[llvm] d708bfb - AMDGPU: Fix si-fix-sgpr-copies asserting on VReg_1 phi (#128903)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Feb 26 15:49:51 PST 2025
Author: Matt Arsenault
Date: 2025-02-27T06:49:47+07:00
New Revision: d708bfb3c0be7ffdba384eff15cd329863568453
URL: https://github.com/llvm/llvm-project/commit/d708bfb3c0be7ffdba384eff15cd329863568453
DIFF: https://github.com/llvm/llvm-project/commit/d708bfb3c0be7ffdba384eff15cd329863568453.diff
LOG: AMDGPU: Fix si-fix-sgpr-copies asserting on VReg_1 phi (#128903)
Added:
llvm/test/CodeGen/AMDGPU/i1-divergent-phi-fix-sgpr-copies-assert.mir
Modified:
llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
llvm/test/CodeGen/AMDGPU/divergent-branch-uniform-condition.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp b/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
index a99fd25477553..52d00485385c2 100644
--- a/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
@@ -669,6 +669,9 @@ bool SIFixSGPRCopies::run(MachineFunction &MF) {
if (!MO.isReg() || !MO.getReg().isVirtual())
continue;
const TargetRegisterClass *SrcRC = MRI->getRegClass(MO.getReg());
+ if (SrcRC == &AMDGPU::VReg_1RegClass)
+ continue;
+
if (TRI->hasVectorRegisters(SrcRC)) {
const TargetRegisterClass *DestRC =
TRI->getEquivalentSGPRClass(SrcRC);
diff --git a/llvm/test/CodeGen/AMDGPU/divergent-branch-uniform-condition.ll b/llvm/test/CodeGen/AMDGPU/divergent-branch-uniform-condition.ll
index 757458363284c..0a420396f52a9 100644
--- a/llvm/test/CodeGen/AMDGPU/divergent-branch-uniform-condition.ll
+++ b/llvm/test/CodeGen/AMDGPU/divergent-branch-uniform-condition.ll
@@ -121,6 +121,68 @@ endloop: ; preds = %if1, %Flow2
ret void
}
+define amdgpu_ps void @i1_copy_assert(i1 %v4) {
+; ISA-LABEL: i1_copy_assert:
+; ISA: ; %bb.0: ; %start
+; ISA-NEXT: v_and_b32_e32 v0, 1, v0
+; ISA-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
+; ISA-NEXT: s_mov_b32 s8, 0
+; ISA-NEXT: s_mov_b64 s[0:1], 0
+; ISA-NEXT: ; implicit-def: $sgpr4_sgpr5
+; ISA-NEXT: ; implicit-def: $sgpr2_sgpr3
+; ISA-NEXT: s_branch .LBB1_3
+; ISA-NEXT: .LBB1_1: ; %endif1
+; ISA-NEXT: ; in Loop: Header=BB1_3 Depth=1
+; ISA-NEXT: s_andn2_b64 s[4:5], s[4:5], exec
+; ISA-NEXT: s_and_b64 s[8:9], vcc, exec
+; ISA-NEXT: s_mov_b64 s[6:7], 0
+; ISA-NEXT: s_or_b64 s[4:5], s[4:5], s[8:9]
+; ISA-NEXT: .LBB1_2: ; %Flow
+; ISA-NEXT: ; in Loop: Header=BB1_3 Depth=1
+; ISA-NEXT: s_and_b64 s[8:9], exec, s[4:5]
+; ISA-NEXT: s_or_b64 s[0:1], s[8:9], s[0:1]
+; ISA-NEXT: s_andn2_b64 s[2:3], s[2:3], exec
+; ISA-NEXT: s_and_b64 s[6:7], s[6:7], exec
+; ISA-NEXT: s_mov_b32 s8, 1
+; ISA-NEXT: s_or_b64 s[2:3], s[2:3], s[6:7]
+; ISA-NEXT: s_andn2_b64 exec, exec, s[0:1]
+; ISA-NEXT: s_cbranch_execz .LBB1_5
+; ISA-NEXT: .LBB1_3: ; %loop
+; ISA-NEXT: ; =>This Inner Loop Header: Depth=1
+; ISA-NEXT: s_or_b64 s[4:5], s[4:5], exec
+; ISA-NEXT: s_cmp_lg_u32 s8, 0
+; ISA-NEXT: s_cbranch_scc1 .LBB1_1
+; ISA-NEXT: ; %bb.4: ; in Loop: Header=BB1_3 Depth=1
+; ISA-NEXT: s_mov_b64 s[6:7], -1
+; ISA-NEXT: s_branch .LBB1_2
+; ISA-NEXT: .LBB1_5: ; %Flow2
+; ISA-NEXT: s_or_b64 exec, exec, s[0:1]
+; ISA-NEXT: v_mov_b32_e32 v0, 0
+; ISA-NEXT: v_cndmask_b32_e64 v1, 0, 1.0, s[2:3]
+; ISA-NEXT: exp mrt0 off, off, off, off
+; ISA-NEXT: s_endpgm
+start:
+ br label %loop
+
+loop: ; preds = %Flow, %start
+ %v1 = phi i32 [ 0, %start ], [ 1, %Flow ]
+ %v2 = icmp ugt i32 %v1, 0
+ br i1 %v2, label %endif1, label %Flow
+
+Flow2: ; preds = %Flow
+ %spec.select = select i1 %i1, float 1.000000e+00, float 0.000000e+00
+ call void @llvm.amdgcn.exp.f32(i32 0, i32 0, float %spec.select, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, i1 false, i1 false)
+ ret void
+
+endif1: ; preds = %loop
+ br label %Flow
+
+Flow: ; preds = %endif1, %loop
+ %i = phi i1 [ %v4, %endif1 ], [ true, %loop ]
+ %i1 = phi i1 [ false, %endif1 ], [ true, %loop ]
+ br i1 %i, label %Flow2, label %loop
+}
+
; Function Attrs: nounwind readnone speculatable willreturn
declare float @llvm.sqrt.f32(float) #0
diff --git a/llvm/test/CodeGen/AMDGPU/i1-divergent-phi-fix-sgpr-copies-assert.mir b/llvm/test/CodeGen/AMDGPU/i1-divergent-phi-fix-sgpr-copies-assert.mir
new file mode 100644
index 0000000000000..1b5999c689178
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/i1-divergent-phi-fix-sgpr-copies-assert.mir
@@ -0,0 +1,97 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=si-fix-sgpr-copies,si-i1-copies -o - %s | FileCheck %s
+
+# Make sure SIFixSGPRCopies does not assert on a phi with vreg_1
+# inputs.
+
+---
+name: i1_copy_assert
+tracksRegLiveness: true
+body: |
+ ; CHECK-LABEL: name: i1_copy_assert
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.1(0x80000000)
+ ; CHECK-NEXT: liveins: $vgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 killed [[COPY]], 1, implicit $exec
+ ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.4(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_64 = PHI [[DEF1]], %bb.0, %14, %bb.4
+ ; CHECK-NEXT: [[PHI1:%[0-9]+]]:sreg_64 = PHI [[DEF]], %bb.0, %8, %bb.4
+ ; CHECK-NEXT: [[S_MOV_B64_1:%[0-9]+]]:sreg_64 = S_MOV_B64 -1
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY [[S_MOV_B64_1]]
+ ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; CHECK-NEXT: [[DEF2:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[S_OR_B64_:%[0-9]+]]:sreg_64 = S_OR_B64 [[PHI]], $exec, implicit-def $scc
+ ; CHECK-NEXT: S_CMP_LG_U32 [[DEF2]], killed [[S_MOV_B32_]], implicit-def $scc
+ ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.3, implicit $scc
+ ; CHECK-NEXT: S_BRANCH %bb.4
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_64_xexec = COPY %8
+ ; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1065353216, implicit $exec
+ ; CHECK-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, [[V_MOV_B32_e32_]], [[COPY2]], implicit $exec
+ ; CHECK-NEXT: S_ENDPGM 0, implicit killed [[V_CNDMASK_B32_e64_]]
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.3:
+ ; CHECK-NEXT: successors: %bb.4(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[S_MOV_B64_2:%[0-9]+]]:sreg_64 = S_MOV_B64 0
+ ; CHECK-NEXT: [[S_ANDN2_B64_:%[0-9]+]]:sreg_64 = S_ANDN2_B64 [[S_OR_B64_]], $exec, implicit-def $scc
+ ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[V_CMP_EQ_U32_e64_]], $exec, implicit-def $scc
+ ; CHECK-NEXT: [[S_OR_B64_1:%[0-9]+]]:sreg_64 = S_OR_B64 [[S_ANDN2_B64_]], [[S_AND_B64_]], implicit-def $scc
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.4:
+ ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[PHI2:%[0-9]+]]:sreg_64 = PHI [[S_OR_B64_]], %bb.1, [[S_OR_B64_1]], %bb.3
+ ; CHECK-NEXT: [[PHI3:%[0-9]+]]:sreg_64 = PHI [[COPY1]], %bb.1, [[S_MOV_B64_2]], %bb.3
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_64 = COPY [[PHI2]]
+ ; CHECK-NEXT: [[DEF3:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[S_ANDN2_B64_1:%[0-9]+]]:sreg_64 = S_ANDN2_B64 [[PHI1]], $exec, implicit-def $scc
+ ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 [[PHI3]], $exec, implicit-def $scc
+ ; CHECK-NEXT: [[S_OR_B64_2:%[0-9]+]]:sreg_64 = S_OR_B64 [[S_ANDN2_B64_1]], [[S_AND_B64_1]], implicit-def $scc
+ ; CHECK-NEXT: SI_LOOP [[DEF3]], %bb.1, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ ; CHECK-NEXT: S_BRANCH %bb.2
+ bb.0:
+ liveins: $vgpr0
+
+ %0:vgpr_32 = COPY $vgpr0
+ %1:sreg_64 = V_CMP_EQ_U32_e64 killed %0, 1, implicit $exec
+ %2:sreg_64 = S_MOV_B64 0
+ %3:vreg_1 = COPY %1
+
+ bb.1:
+ %4:sreg_64 = S_MOV_B64 -1
+ %5:vreg_1 = COPY %4
+ %6:sreg_32 = S_MOV_B32 0
+ %7:sreg_32 = IMPLICIT_DEF
+ S_CMP_LG_U32 %7, killed %6, implicit-def $scc
+ S_CBRANCH_SCC1 %bb.3, implicit $scc
+ S_BRANCH %bb.4
+
+ bb.2:
+ %8:vreg_1 = PHI %9, %bb.4
+ %10:sreg_64_xexec = COPY %8
+ %11:vgpr_32 = V_MOV_B32_e32 1065353216, implicit $exec
+ %12:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, %11, %10, implicit $exec
+ S_ENDPGM 0, implicit killed %12
+
+ bb.3:
+ %13:sreg_64 = S_MOV_B64 0
+
+ bb.4:
+ %14:vreg_1 = PHI %5, %bb.1, %3, %bb.3
+ %9:sreg_64 = PHI %5, %bb.1, %13, %bb.3
+ %15:sreg_64 = COPY %14
+ %16:sreg_64 = IMPLICIT_DEF
+ SI_LOOP %16, %bb.1, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ S_BRANCH %bb.2
+
+...
More information about the llvm-commits
mailing list