[llvm] ba5cde7 - [AMDGPU][GlobalISel] Fix issue with copy_scc_vcc on gfx7 (#165355)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Oct 30 08:19:16 PDT 2025
Author: vangthao95
Date: 2025-10-30T08:19:12-07:00
New Revision: ba5cde79aa05eeaa87d45cf472f3583fa9f93bff
URL: https://github.com/llvm/llvm-project/commit/ba5cde79aa05eeaa87d45cf472f3583fa9f93bff
DIFF: https://github.com/llvm/llvm-project/commit/ba5cde79aa05eeaa87d45cf472f3583fa9f93bff.diff
LOG: [AMDGPU][GlobalISel] Fix issue with copy_scc_vcc on gfx7 (#165355)
When selecting for G_AMDGPU_COPY_SCC_VCC, we use S_CMP_LG_U64 or
S_CMP_LG_U32 for wave64 and wave32 respectively. However, on gfx7 we do
not have the S_CMP_LG_U64 instruction. Work around this issue by using
S_OR_B64 instead.
Added:
llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy-scc-vcc.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy-scc-vcc.mir
Modified:
llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index 9ce12243016f4..aed325cf627bc 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -221,12 +221,22 @@ bool AMDGPUInstructionSelector::selectCOPY(MachineInstr &I) const {
bool AMDGPUInstructionSelector::selectCOPY_SCC_VCC(MachineInstr &I) const {
const DebugLoc &DL = I.getDebugLoc();
MachineBasicBlock *BB = I.getParent();
+ Register VCCReg = I.getOperand(1).getReg();
+ MachineInstr *Cmp;
+
+ if (STI.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
+ unsigned CmpOpc =
+ STI.isWave64() ? AMDGPU::S_CMP_LG_U64 : AMDGPU::S_CMP_LG_U32;
+ Cmp = BuildMI(*BB, &I, DL, TII.get(CmpOpc)).addReg(VCCReg).addImm(0);
+ } else {
+ // For gfx7 and earlier, S_CMP_LG_U64 doesn't exist, so we use S_OR_B64
+ // which sets SCC as a side effect.
+ Register DeadDst = MRI->createVirtualRegister(&AMDGPU::SReg_64RegClass);
+ Cmp = BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_OR_B64), DeadDst)
+ .addReg(VCCReg)
+ .addReg(VCCReg);
+ }
- unsigned CmpOpc =
- STI.isWave64() ? AMDGPU::S_CMP_LG_U64 : AMDGPU::S_CMP_LG_U32;
- MachineInstr *Cmp = BuildMI(*BB, &I, DL, TII.get(CmpOpc))
- .addReg(I.getOperand(1).getReg())
- .addImm(0);
if (!constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI))
return false;
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy-scc-vcc.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy-scc-vcc.ll
new file mode 100644
index 0000000000000..1a7ccf0835686
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy-scc-vcc.ll
@@ -0,0 +1,66 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx700 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX7 %s
+; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX8 %s
+; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11 %s
+
+define amdgpu_kernel void @fcmp_uniform_select(float %a, i32 %b, i32 %c, ptr addrspace(1) %out) {
+; GFX7-LABEL: fcmp_uniform_select:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x9
+; GFX7-NEXT: s_load_dword s3, s[4:5], 0xb
+; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0xd
+; GFX7-NEXT: s_mov_b32 s2, -1
+; GFX7-NEXT: s_waitcnt lgkmcnt(0)
+; GFX7-NEXT: v_cmp_eq_f32_e64 s[4:5], s6, 0
+; GFX7-NEXT: s_or_b64 s[4:5], s[4:5], s[4:5]
+; GFX7-NEXT: s_cselect_b32 s4, 1, 0
+; GFX7-NEXT: s_and_b32 s4, s4, 1
+; GFX7-NEXT: s_cmp_lg_u32 s4, 0
+; GFX7-NEXT: s_cselect_b32 s3, s7, s3
+; GFX7-NEXT: v_mov_b32_e32 v0, s3
+; GFX7-NEXT: s_mov_b32 s3, 0xf000
+; GFX7-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; GFX7-NEXT: s_endpgm
+;
+; GFX8-LABEL: fcmp_uniform_select:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX8-NEXT: s_load_dword s6, s[4:5], 0x2c
+; GFX8-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x34
+; GFX8-NEXT: s_waitcnt lgkmcnt(0)
+; GFX8-NEXT: v_cmp_eq_f32_e64 s[4:5], s0, 0
+; GFX8-NEXT: s_cmp_lg_u64 s[4:5], 0
+; GFX8-NEXT: s_cselect_b32 s0, 1, 0
+; GFX8-NEXT: s_and_b32 s0, s0, 1
+; GFX8-NEXT: s_cmp_lg_u32 s0, 0
+; GFX8-NEXT: s_cselect_b32 s0, s1, s6
+; GFX8-NEXT: v_mov_b32_e32 v0, s2
+; GFX8-NEXT: v_mov_b32_e32 v2, s0
+; GFX8-NEXT: v_mov_b32_e32 v1, s3
+; GFX8-NEXT: flat_store_dword v[0:1], v2
+; GFX8-NEXT: s_endpgm
+;
+; GFX11-LABEL: fcmp_uniform_select:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_clause 0x2
+; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
+; GFX11-NEXT: s_load_b32 s6, s[4:5], 0x2c
+; GFX11-NEXT: s_load_b64 s[2:3], s[4:5], 0x34
+; GFX11-NEXT: v_mov_b32_e32 v1, 0
+; GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-NEXT: v_cmp_eq_f32_e64 s0, s0, 0
+; GFX11-NEXT: s_cmp_lg_u32 s0, 0
+; GFX11-NEXT: s_cselect_b32 s0, 1, 0
+; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX11-NEXT: s_and_b32 s0, s0, 1
+; GFX11-NEXT: s_cmp_lg_u32 s0, 0
+; GFX11-NEXT: s_cselect_b32 s0, s1, s6
+; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX11-NEXT: v_mov_b32_e32 v0, s0
+; GFX11-NEXT: global_store_b32 v1, v0, s[2:3]
+; GFX11-NEXT: s_endpgm
+ %cmp = fcmp oeq float %a, 0.0
+ %sel = select i1 %cmp, i32 %b, i32 %c
+ store i32 %sel, ptr addrspace(1) %out
+ ret void
+}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy-scc-vcc.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy-scc-vcc.mir
new file mode 100644
index 0000000000000..67cc0169af619
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy-scc-vcc.mir
@@ -0,0 +1,37 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=amdgcn -mcpu=gfx700 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefixes=GFX7 %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx803 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefixes=GF8 %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefixes=GFX11 %s
+
+---
+name: test_copy_scc_vcc
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0:
+ ; GFX7-LABEL: name: test_copy_scc_vcc
+ ; GFX7: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
+ ; GFX7-NEXT: [[S_OR_B64_:%[0-9]+]]:sreg_64 = S_OR_B64 [[DEF]], [[DEF]], implicit-def $scc
+ ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $scc
+ ; GFX7-NEXT: $sgpr0 = COPY [[COPY]]
+ ; GFX7-NEXT: S_ENDPGM 0, implicit $sgpr0
+ ;
+ ; GF8-LABEL: name: test_copy_scc_vcc
+ ; GF8: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
+ ; GF8-NEXT: S_CMP_LG_U64 [[DEF]], 0, implicit-def $scc
+ ; GF8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $scc
+ ; GF8-NEXT: $sgpr0 = COPY [[COPY]]
+ ; GF8-NEXT: S_ENDPGM 0, implicit $sgpr0
+ ;
+ ; GFX11-LABEL: name: test_copy_scc_vcc
+ ; GFX11: [[DEF:%[0-9]+]]:sreg_32_xm0_xexec = IMPLICIT_DEF
+ ; GFX11-NEXT: S_CMP_LG_U32 [[DEF]], 0, implicit-def $scc
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $scc
+ ; GFX11-NEXT: $sgpr0 = COPY [[COPY]]
+ ; GFX11-NEXT: S_ENDPGM 0, implicit $sgpr0
+ %0:vcc(s1) = G_IMPLICIT_DEF
+ %1:sgpr(s32) = G_AMDGPU_COPY_SCC_VCC %0
+ $sgpr0 = COPY %1
+ S_ENDPGM 0, implicit $sgpr0
+...
More information about the llvm-commits
mailing list