[llvm] [AMDGCN][SIWholeQuadMode] Handle case when SI_KILL_I1_TERMINATOR -1,0 is not the only terminator (PR #122922)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Jan 14 07:40:29 PST 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
Author: Juan Manuel Martinez CaamaƱo (jmmartinez)
<details>
<summary>Changes</summary>
The `SI_KILL_I1_TERMINATOR -1,0` instruction does not have any effect, so we lowered them to unconditional branches.
However, there may be more than a single terminator in the block (after the `SI_KILL_I1_TERMINATOR`). This resulted in an assertion being triggered later in the pipeline.
To handle this case, we simply remove the `SI_KILL_I1_TERMINATOR -1, 0` when its not the last terminator.
Solves SWDEV-508819
---
Full diff: https://github.com/llvm/llvm-project/pull/122922.diff
2 Files Affected:
- (modified) llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp (+8-3)
- (added) llvm/test/CodeGen/AMDGPU/kill-true-in-return-block.ll (+41)
``````````diff
diff --git a/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp b/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
index 9fbb847da2af1c..2795f371de32cb 100644
--- a/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
+++ b/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
@@ -947,9 +947,14 @@ MachineInstr *SIWholeQuadMode::lowerKillI1(MachineBasicBlock &MBB,
LIS->RemoveMachineInstrFromMaps(MI);
} else {
assert(MBB.succ_size() == 1);
- NewTerm = BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_BRANCH))
- .addMBB(*MBB.succ_begin());
- LIS->ReplaceMachineInstrInMaps(MI, *NewTerm);
+ bool IsLastTerminator = MI.getReverseIterator() == MBB.rbegin();
+ if (IsLastTerminator) {
+ NewTerm = BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_BRANCH))
+ .addMBB(*MBB.succ_begin());
+ LIS->ReplaceMachineInstrInMaps(MI, *NewTerm);
+ } else {
+ LIS->RemoveMachineInstrFromMaps(MI);
+ }
}
MBB.remove(&MI);
return NewTerm;
diff --git a/llvm/test/CodeGen/AMDGPU/kill-true-in-return-block.ll b/llvm/test/CodeGen/AMDGPU/kill-true-in-return-block.ll
new file mode 100644
index 00000000000000..021c845d5ea6bb
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/kill-true-in-return-block.ll
@@ -0,0 +1,41 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=amdgcn -mcpu=gfx90a %s -o - | FileCheck %s
+
+define amdgpu_ps float @kill_true(i1 %.not) {
+; CHECK-LABEL: kill_true:
+; CHECK: ; %bb.0: ; %entry
+; CHECK-NEXT: s_mov_b64 s[0:1], exec
+; CHECK-NEXT: s_wqm_b64 exec, exec
+; CHECK-NEXT: v_and_b32_e32 v0, 1, v0
+; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
+; CHECK-NEXT: s_xor_b64 s[4:5], vcc, -1
+; CHECK-NEXT: s_and_saveexec_b64 s[2:3], s[4:5]
+; CHECK-NEXT: s_cbranch_execz .LBB0_2
+; CHECK-NEXT: ; %bb.1: ; %if1
+; CHECK-NEXT: s_mov_b32 s4, 0
+; CHECK-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $exec
+; CHECK-NEXT: v_pk_mov_b32 v[0:1], 0, 0
+; CHECK-NEXT: v_mov_b32_e32 v2, s4
+; CHECK-NEXT: flat_store_dword v[0:1], v2
+; CHECK-NEXT: .LBB0_2: ; %endif1
+; CHECK-NEXT: s_or_b64 exec, exec, s[2:3]
+; CHECK-NEXT: s_and_b64 exec, exec, s[0:1]
+; CHECK-NEXT: v_mov_b32_e32 v0, 0
+; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; CHECK-NEXT: ; return to shader part epilog
+entry:
+ br i1 %.not, label %endif1, label %if1
+
+if1:
+ %C = call float @llvm.amdgcn.wqm.f32(float 0.000000e+00)
+ store float %C, ptr null, align 4
+ br label %endif1
+
+endif1:
+ call void @llvm.amdgcn.kill(i1 true)
+ ret float 0.000000e+00
+}
+
+declare void @llvm.amdgcn.kill(i1)
+
+declare float @llvm.amdgcn.wqm.f32(float)
``````````
</details>
https://github.com/llvm/llvm-project/pull/122922
More information about the llvm-commits
mailing list