[llvm] AMDGPU: Fix si-fix-sgpr-copies asserting on VReg_1 phi (PR #128903)
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Wed Feb 26 08:30:28 PST 2025
https://github.com/arsenm created https://github.com/llvm/llvm-project/pull/128903
We should just need to leave these alone for the si-i1-copies
to deal with it. Note the IR test did not fail previously, and
only establishes a baseline result for a pending patch which
hit the failure.
>From 4d536cf2a4bc33e585202456f2c77770e6599b39 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Wed, 26 Feb 2025 21:44:51 +0700
Subject: [PATCH] AMDGPU: Fix si-fix-sgpr-copies asserting on VReg_1 phi
We should just need to leave these alone for the si-i1-copies
to deal with it. Note the IR test did not fail previously, and
only establishes a baseline result for a pending patch which
hit the failure.
---
llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp | 3 +
.../divergent-branch-uniform-condition.ll | 62 ++++++++++++
...1-divergent-phi-fix-sgpr-copies-assert.mir | 97 +++++++++++++++++++
3 files changed, 162 insertions(+)
create mode 100644 llvm/test/CodeGen/AMDGPU/i1-divergent-phi-fix-sgpr-copies-assert.mir
diff --git a/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp b/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
index a99fd25477553..52d00485385c2 100644
--- a/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
@@ -669,6 +669,9 @@ bool SIFixSGPRCopies::run(MachineFunction &MF) {
if (!MO.isReg() || !MO.getReg().isVirtual())
continue;
const TargetRegisterClass *SrcRC = MRI->getRegClass(MO.getReg());
+ if (SrcRC == &AMDGPU::VReg_1RegClass)
+ continue;
+
if (TRI->hasVectorRegisters(SrcRC)) {
const TargetRegisterClass *DestRC =
TRI->getEquivalentSGPRClass(SrcRC);
diff --git a/llvm/test/CodeGen/AMDGPU/divergent-branch-uniform-condition.ll b/llvm/test/CodeGen/AMDGPU/divergent-branch-uniform-condition.ll
index 757458363284c..0a420396f52a9 100644
--- a/llvm/test/CodeGen/AMDGPU/divergent-branch-uniform-condition.ll
+++ b/llvm/test/CodeGen/AMDGPU/divergent-branch-uniform-condition.ll
@@ -121,6 +121,68 @@ endloop: ; preds = %if1, %Flow2
ret void
}
+define amdgpu_ps void @i1_copy_assert(i1 %v4) {
+; ISA-LABEL: i1_copy_assert:
+; ISA: ; %bb.0: ; %start
+; ISA-NEXT: v_and_b32_e32 v0, 1, v0
+; ISA-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
+; ISA-NEXT: s_mov_b32 s8, 0
+; ISA-NEXT: s_mov_b64 s[0:1], 0
+; ISA-NEXT: ; implicit-def: $sgpr4_sgpr5
+; ISA-NEXT: ; implicit-def: $sgpr2_sgpr3
+; ISA-NEXT: s_branch .LBB1_3
+; ISA-NEXT: .LBB1_1: ; %endif1
+; ISA-NEXT: ; in Loop: Header=BB1_3 Depth=1
+; ISA-NEXT: s_andn2_b64 s[4:5], s[4:5], exec
+; ISA-NEXT: s_and_b64 s[8:9], vcc, exec
+; ISA-NEXT: s_mov_b64 s[6:7], 0
+; ISA-NEXT: s_or_b64 s[4:5], s[4:5], s[8:9]
+; ISA-NEXT: .LBB1_2: ; %Flow
+; ISA-NEXT: ; in Loop: Header=BB1_3 Depth=1
+; ISA-NEXT: s_and_b64 s[8:9], exec, s[4:5]
+; ISA-NEXT: s_or_b64 s[0:1], s[8:9], s[0:1]
+; ISA-NEXT: s_andn2_b64 s[2:3], s[2:3], exec
+; ISA-NEXT: s_and_b64 s[6:7], s[6:7], exec
+; ISA-NEXT: s_mov_b32 s8, 1
+; ISA-NEXT: s_or_b64 s[2:3], s[2:3], s[6:7]
+; ISA-NEXT: s_andn2_b64 exec, exec, s[0:1]
+; ISA-NEXT: s_cbranch_execz .LBB1_5
+; ISA-NEXT: .LBB1_3: ; %loop
+; ISA-NEXT: ; =>This Inner Loop Header: Depth=1
+; ISA-NEXT: s_or_b64 s[4:5], s[4:5], exec
+; ISA-NEXT: s_cmp_lg_u32 s8, 0
+; ISA-NEXT: s_cbranch_scc1 .LBB1_1
+; ISA-NEXT: ; %bb.4: ; in Loop: Header=BB1_3 Depth=1
+; ISA-NEXT: s_mov_b64 s[6:7], -1
+; ISA-NEXT: s_branch .LBB1_2
+; ISA-NEXT: .LBB1_5: ; %Flow2
+; ISA-NEXT: s_or_b64 exec, exec, s[0:1]
+; ISA-NEXT: v_mov_b32_e32 v0, 0
+; ISA-NEXT: v_cndmask_b32_e64 v1, 0, 1.0, s[2:3]
+; ISA-NEXT: exp mrt0 off, off, off, off
+; ISA-NEXT: s_endpgm
+start:
+ br label %loop
+
+loop: ; preds = %Flow, %start
+ %v1 = phi i32 [ 0, %start ], [ 1, %Flow ]
+ %v2 = icmp ugt i32 %v1, 0
+ br i1 %v2, label %endif1, label %Flow
+
+Flow2: ; preds = %Flow
+ %spec.select = select i1 %i1, float 1.000000e+00, float 0.000000e+00
+ call void @llvm.amdgcn.exp.f32(i32 0, i32 0, float %spec.select, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, i1 false, i1 false)
+ ret void
+
+endif1: ; preds = %loop
+ br label %Flow
+
+Flow: ; preds = %endif1, %loop
+ %i = phi i1 [ %v4, %endif1 ], [ true, %loop ]
+ %i1 = phi i1 [ false, %endif1 ], [ true, %loop ]
+ br i1 %i, label %Flow2, label %loop
+}
+
; Function Attrs: nounwind readnone speculatable willreturn
declare float @llvm.sqrt.f32(float) #0
diff --git a/llvm/test/CodeGen/AMDGPU/i1-divergent-phi-fix-sgpr-copies-assert.mir b/llvm/test/CodeGen/AMDGPU/i1-divergent-phi-fix-sgpr-copies-assert.mir
new file mode 100644
index 0000000000000..1b5999c689178
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/i1-divergent-phi-fix-sgpr-copies-assert.mir
@@ -0,0 +1,97 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=si-fix-sgpr-copies,si-i1-copies -o - %s | FileCheck %s
+
+# Make sure SIFixSGPRCopies does not assert on a phi with vreg_1
+# inputs.
+
+---
+name: i1_copy_assert
+tracksRegLiveness: true
+body: |
+ ; CHECK-LABEL: name: i1_copy_assert
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.1(0x80000000)
+ ; CHECK-NEXT: liveins: $vgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 killed [[COPY]], 1, implicit $exec
+ ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.4(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_64 = PHI [[DEF1]], %bb.0, %14, %bb.4
+ ; CHECK-NEXT: [[PHI1:%[0-9]+]]:sreg_64 = PHI [[DEF]], %bb.0, %8, %bb.4
+ ; CHECK-NEXT: [[S_MOV_B64_1:%[0-9]+]]:sreg_64 = S_MOV_B64 -1
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY [[S_MOV_B64_1]]
+ ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; CHECK-NEXT: [[DEF2:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[S_OR_B64_:%[0-9]+]]:sreg_64 = S_OR_B64 [[PHI]], $exec, implicit-def $scc
+ ; CHECK-NEXT: S_CMP_LG_U32 [[DEF2]], killed [[S_MOV_B32_]], implicit-def $scc
+ ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.3, implicit $scc
+ ; CHECK-NEXT: S_BRANCH %bb.4
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_64_xexec = COPY %8
+ ; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1065353216, implicit $exec
+ ; CHECK-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, [[V_MOV_B32_e32_]], [[COPY2]], implicit $exec
+ ; CHECK-NEXT: S_ENDPGM 0, implicit killed [[V_CNDMASK_B32_e64_]]
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.3:
+ ; CHECK-NEXT: successors: %bb.4(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[S_MOV_B64_2:%[0-9]+]]:sreg_64 = S_MOV_B64 0
+ ; CHECK-NEXT: [[S_ANDN2_B64_:%[0-9]+]]:sreg_64 = S_ANDN2_B64 [[S_OR_B64_]], $exec, implicit-def $scc
+ ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[V_CMP_EQ_U32_e64_]], $exec, implicit-def $scc
+ ; CHECK-NEXT: [[S_OR_B64_1:%[0-9]+]]:sreg_64 = S_OR_B64 [[S_ANDN2_B64_]], [[S_AND_B64_]], implicit-def $scc
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.4:
+ ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[PHI2:%[0-9]+]]:sreg_64 = PHI [[S_OR_B64_]], %bb.1, [[S_OR_B64_1]], %bb.3
+ ; CHECK-NEXT: [[PHI3:%[0-9]+]]:sreg_64 = PHI [[COPY1]], %bb.1, [[S_MOV_B64_2]], %bb.3
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_64 = COPY [[PHI2]]
+ ; CHECK-NEXT: [[DEF3:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[S_ANDN2_B64_1:%[0-9]+]]:sreg_64 = S_ANDN2_B64 [[PHI1]], $exec, implicit-def $scc
+ ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 [[PHI3]], $exec, implicit-def $scc
+ ; CHECK-NEXT: [[S_OR_B64_2:%[0-9]+]]:sreg_64 = S_OR_B64 [[S_ANDN2_B64_1]], [[S_AND_B64_1]], implicit-def $scc
+ ; CHECK-NEXT: SI_LOOP [[DEF3]], %bb.1, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ ; CHECK-NEXT: S_BRANCH %bb.2
+ bb.0:
+ liveins: $vgpr0
+
+ %0:vgpr_32 = COPY $vgpr0
+ %1:sreg_64 = V_CMP_EQ_U32_e64 killed %0, 1, implicit $exec
+ %2:sreg_64 = S_MOV_B64 0
+ %3:vreg_1 = COPY %1
+
+ bb.1:
+ %4:sreg_64 = S_MOV_B64 -1
+ %5:vreg_1 = COPY %4
+ %6:sreg_32 = S_MOV_B32 0
+ %7:sreg_32 = IMPLICIT_DEF
+ S_CMP_LG_U32 %7, killed %6, implicit-def $scc
+ S_CBRANCH_SCC1 %bb.3, implicit $scc
+ S_BRANCH %bb.4
+
+ bb.2:
+ %8:vreg_1 = PHI %9, %bb.4
+ %10:sreg_64_xexec = COPY %8
+ %11:vgpr_32 = V_MOV_B32_e32 1065353216, implicit $exec
+ %12:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, %11, %10, implicit $exec
+ S_ENDPGM 0, implicit killed %12
+
+ bb.3:
+ %13:sreg_64 = S_MOV_B64 0
+
+ bb.4:
+ %14:vreg_1 = PHI %5, %bb.1, %3, %bb.3
+ %9:sreg_64 = PHI %5, %bb.1, %13, %bb.3
+ %15:sreg_64 = COPY %14
+ %16:sreg_64 = IMPLICIT_DEF
+ SI_LOOP %16, %bb.1, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ S_BRANCH %bb.2
+
+...
More information about the llvm-commits
mailing list