[llvm] [AMDGPU] using divergent/uniform information in ISel of zext (PR #174539)
Zeng Wu via llvm-commits
llvm-commits at lists.llvm.org
Wed Jan 21 18:12:24 PST 2026
https://github.com/zwu-2025 updated https://github.com/llvm/llvm-project/pull/174539
>From 07e0ee192b7640291fd0d908f1eb0035a4f2b14c Mon Sep 17 00:00:00 2001
From: Zeng Wu <Zeng.Wu2 at amd.com>
Date: Sun, 11 Jan 2026 01:11:49 -0600
Subject: [PATCH 1/2] [AMD] using divergent/uniform information in ISel of zext
---
llvm/lib/Target/AMDGPU/SIInstructions.td | 12 +++++++++++-
.../AMDGPU/llvm.amdgcn.i1.i32.uniform.zext.ll | 19 +++++++++++++++++++
2 files changed, 30 insertions(+), 1 deletion(-)
create mode 100644 llvm/test/CodeGen/AMDGPU/llvm.amdgcn.i1.i32.uniform.zext.ll
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index ca5a4d7301bda..46a254263c0e4 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -2535,7 +2535,7 @@ def : GCNPat <
>;
class Ext32Pat <SDNode ext> : GCNPat <
- (i32 (ext i1:$src0)),
+ (i32 (ext i1:$src0)),
(V_CNDMASK_B32_e64 /*src0mod*/(i32 0), /*src0*/(i32 0),
/*src1mod*/(i32 0), /*src1*/(i32 1), i1:$src0)
>;
@@ -3044,6 +3044,11 @@ def : GCNPat <
(S_AND_B64 $src0, $src1)
>;
+def : GCNPat <
+ (i64 (UniformUnaryFrag<zext> i1:$src)),
+ (S_AND_B64 $src, (i64 1))
+>;
+
def : GCNPat <
(i1 (or i1:$src0, i1:$src1)),
(S_OR_B64 $src0, $src1)
@@ -3083,6 +3088,11 @@ def : GCNPat <
(S_AND_B32 $src0, $src1)
>;
+def : GCNPat <
+ (i32 (UniformUnaryFrag<zext> i1:$src)),
+ (S_AND_B32 $src, (i32 1))
+>;
+
def : GCNPat <
(i1 (or i1:$src0, i1:$src1)),
(S_OR_B32 $src0, $src1)
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.i1.i32.uniform.zext.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.i1.i32.uniform.zext.ll
new file mode 100644
index 0000000000000..63486534e7032
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.i1.i32.uniform.zext.ll
@@ -0,0 +1,19 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck --check-prefix=GCN-OPT %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx950 < %s | FileCheck --check-prefix=GCN-OPT %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck --check-prefix=GCN-G_SEL %s
+
+declare i32 @llvm.amdgcn.workitem.id.x() #0
+
+define amdgpu_kernel void @zext_i1_to_i32_uniform(ptr addrspace(1) %out, i1 %pred) #0 {
+entry:
+; GCN-LABEL: zext_i1_to_i32_uniform:
+; GCN-OPT: s_and_b32 s{{.*}}, s{{.*}}, 1
+; GCN-G_SEL: v_mov_b32_e32
+; GCN-OPT: s_endpgm
+ %tmp2 = zext i1 %pred to i32
+ store i32 %tmp2, ptr addrspace(1) %out
+ ret void
+}
+
+attributes #0 = { nounwind }
>From ee4318bf0edb3b6492c6108a79892cc5870e7e89 Mon Sep 17 00:00:00 2001
From: Zeng Wu <Zeng.Wu2 at amd.com>
Date: Wed, 21 Jan 2026 20:11:35 -0600
Subject: [PATCH 2/2] [AMDGPU] using divergent/uniform information in ISel of
zext
---
.../lib/CodeGen/SelectionDAG/InstrEmitter.cpp | 25 +++++++++++++++++--
llvm/lib/Target/AMDGPU/SIInstructions.td | 19 +++++++-------
2 files changed, 33 insertions(+), 11 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
index 4ad721bf21959..1bcc231425717 100644
--- a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
@@ -105,8 +105,29 @@ void InstrEmitter::EmitCopyFromReg(SDValue Op, bool IsClone, Register SrcReg,
MVT VT = Op.getSimpleValueType();
// Stick to the preferred register classes for legal types.
- if (TLI->isTypeLegal(VT))
- UseRC = TLI->getRegClassFor(VT, Op->isDivergent());
+ if (TLI->isTypeLegal(VT)) {
+ bool SrcRegIsImplicitDef = false;
+ auto Node = Op.getNode();
+ if (Node->isMachineOpcode() && SrcReg.isPhysical()) {
+ const MCInstrDesc &II = TII->get(Op.getMachineOpcode());
+ auto ImplicitDefs = II.implicit_defs();
+ auto MCSrcReg = SrcReg.asMCReg();
+
+ SrcRegIsImplicitDef = II.NumDefs == 0 && llvm::any_of(ImplicitDefs, [MCSrcReg](MCPhysReg Reg) {
+ return Reg == MCSrcReg;
+ });
+ }
+ if (SrcRegIsImplicitDef) {
+ // if SrcRegIsImplicitDef is true, then the purpose of EmitCopyFromReg is
+ // to insert a `COPY` to copy out from the implicit defined physical
+ // register to another with valid register class. For this case, we prefer getCrossCopyRegClass
+ // to getRegClassFor since this kind of copy could be cross register class.
+ const llvm::TargetRegisterClass *ImplicitRC = TRI->getMinimalPhysRegClass(SrcReg);
+ UseRC = TRI->getCrossCopyRegClass(ImplicitRC);
+ }
+ else
+ UseRC = TLI->getRegClassFor(VT, Op->isDivergent());
+ }
for (SDNode *User : Op->users()) {
bool Match = true;
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index 46a254263c0e4..e165f3bcf2b48 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -3038,16 +3038,22 @@ def : FPToI1Pat<V_CMP_EQ_F64_e64, CONST.FP64_NEG_ONE, i64, f64, fp_to_sint>;
// instructions resulting in the copies from SCC to these instructions
// will be moved to the VALU.
+def : GCNPat <
+ (i64 (UniformUnaryFrag<zext> i1:$src)),
+ (S_AND_B64 $src, (i64 1))
+>;
+
+def : GCNPat <
+ (i32 (UniformUnaryFrag<zext> i1:$src)),
+ (S_AND_B32 $src, (i32 1))
+>;
+
let WaveSizePredicate = isWave64 in {
def : GCNPat <
(i1 (and i1:$src0, i1:$src1)),
(S_AND_B64 $src0, $src1)
>;
-def : GCNPat <
- (i64 (UniformUnaryFrag<zext> i1:$src)),
- (S_AND_B64 $src, (i64 1))
->;
def : GCNPat <
(i1 (or i1:$src0, i1:$src1)),
@@ -3088,11 +3094,6 @@ def : GCNPat <
(S_AND_B32 $src0, $src1)
>;
-def : GCNPat <
- (i32 (UniformUnaryFrag<zext> i1:$src)),
- (S_AND_B32 $src, (i32 1))
->;
-
def : GCNPat <
(i1 (or i1:$src0, i1:$src1)),
(S_OR_B32 $src0, $src1)
More information about the llvm-commits
mailing list