[llvm] [AMDGPU] propagate unifor/diverent in DAG combine (PR #184940)
Zeng Wu via llvm-commits
llvm-commits at lists.llvm.org
Thu Mar 5 19:06:05 PST 2026
https://github.com/zwu-2025 created https://github.com/llvm/llvm-project/pull/184940
For the `select` node created in the DAG combiner for `(uint_to_fp (setcc x, y, cc))`, the uniform/divergent information will be updated in the custom combiner.
>From 34dbc2d0a4328bf6b3e4a8343b1de12789b65873 Mon Sep 17 00:00:00 2001
From: root <zengwu13 at amd.com>
Date: Fri, 6 Mar 2026 03:02:55 +0000
Subject: [PATCH] [AMDGPU] propagate unifor/diverent in DAG combine
---
llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 8 ++++
llvm/lib/Target/AMDGPU/SOPInstructions.td | 2 +-
.../AMDGPU/llvm.amdgcn.uniform.combine.ll | 47 +++++++++++++++++++
3 files changed, 56 insertions(+), 1 deletion(-)
create mode 100644 llvm/test/CodeGen/AMDGPU/llvm.amdgcn.uniform.combine.ll
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 301f2fc8dab45..dc11714f3d519 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -16868,6 +16868,14 @@ SDValue SITargetLowering::performSelectCombine(SDNode *N,
if (Cond.getOpcode() != ISD::SETCC)
return SDValue();
+ if (DCI.isAfterLegalizeDAG()) {
+ // in generic combini, (uint_to_fp (setcc x, y, cc)) -> (select (setcc x, y,
+ // cc), 1.0, 0.0)
+ if (N->getValueType(0).isVector()) {
+ DCI.DAG.updateDivergence(N);
+ }
+ }
+
SDValue LHS = Cond.getOperand(0);
SDValue RHS = Cond.getOperand(1);
ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
diff --git a/llvm/lib/Target/AMDGPU/SOPInstructions.td b/llvm/lib/Target/AMDGPU/SOPInstructions.td
index 1931e0be15152..ef09aa4dede3c 100644
--- a/llvm/lib/Target/AMDGPU/SOPInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SOPInstructions.td
@@ -2005,7 +2005,7 @@ let AddedComplexity = 20 in {
// TODO: The predicate should not be necessary, but enabling this pattern for
// all subtargets generates worse code in some cases.
- let OtherPredicates = [HasPseudoScalarTrans] in
+ // let OtherPredicates = [HasPseudoScalarTrans] in
def : GCNPat<
(f32 (UniformSelect f32:$src0, f32:$src1)),
(S_CSELECT_B32 SSrc_b32:$src0, SSrc_b32:$src1)
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.uniform.combine.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.uniform.combine.ll
new file mode 100644
index 0000000000000..67f3e344a2e38
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.uniform.combine.ll
@@ -0,0 +1,47 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=amdgcn -mcpu=gfx950 < %s | FileCheck -%s --check-prefixes=GFX950
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck -%s --check-prefixes=GFX1250
+; RUN: llc -mtriple=amdgcn < %s | FileCheck -%s --check-prefixes=DEFALUT
+
+; CHECK-LABEL: f:
+; GFX950 s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950 s_mov_b64 s[0:1], 0
+; GFX950 s_load_dword s0, s[0:1], 0x0
+; GFX950 s_waitcnt lgkmcnt(0)
+; GFX950 s_cmp_lg_u32 s0, 0
+; GFX950 s_cselect_b32 s0, 1.0, 0
+; GFX950 v_mov_b32_e32 v0, s0
+; GFX950 s_setpc_b64 s[30:31]
+
+; GFX1250 s_wait_loadcnt_dscnt 0x0
+; GFX1250 s_wait_kmcnt 0x0
+; GFX1250 s_mov_b64 s[0:1], 0
+; GFX1250 s_load_b32 s0, s[0:1], 0x0
+; GFX1250 s_wait_kmcnt 0x0
+; GFX1250 s_cmp_lg_u32 s0, 0
+; GFX1250 s_cselect_b32 s0, 1.0, 0
+; GFX1250 s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1250 v_mov_b32_e32 v0, s0
+; GFX1250 s_set_pc_i64 s[30:31]
+
+; DEFALUT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; DEFALUT: s_mov_b64 s[4:5], 0
+; DEFALUT: s_load_dword s4, s[4:5], 0x0
+; DEFALUT: s_waitcnt lgkmcnt(0)
+; DEFALUT: s_cmp_lg_u32 s4, 0
+; DEFALUT: s_cselect_b32 s4, 1.0, 0
+; DEFALUT: v_mov_b32_e32 v0, s4
+; DEFALUT: s_setpc_b64 s[30:31]
+define float @f(i32 %arg, ptr %ptr) {
+bb:
+ %i = load <2 x i32>, ptr addrspace(4) null, align 4294967296
+ %i1 = extractelement <2 x i32> %i, i64 1
+ %i2 = extractelement <2 x i32> %i, i64 0
+ %i3 = lshr i32 %i1, 1
+ %i4 = icmp ne i32 %i2, 0
+ %i8 = uitofp i1 %i4 to float
+ ret float %i8
+}
+
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; MIR: {{.*}}
More information about the llvm-commits
mailing list