[llvm] d4e46f0 - [AMDGPU] Fix machine verification failure from INIT_EXEC lowering (#98333)

via llvm-commits llvm-commits at lists.llvm.org
Thu Jul 11 01:18:54 PDT 2024


Author: Jay Foad
Date: 2024-07-11T09:18:50+01:00
New Revision: d4e46f0e864e37085da0c5e56e4f6f278e2f7aee

URL: https://github.com/llvm/llvm-project/commit/d4e46f0e864e37085da0c5e56e4f6f278e2f7aee
DIFF: https://github.com/llvm/llvm-project/commit/d4e46f0e864e37085da0c5e56e4f6f278e2f7aee.diff

LOG: [AMDGPU] Fix machine verification failure from INIT_EXEC lowering (#98333)

Fix machine verification failure from INIT_EXEC lowering since it was
moved from SILowerControlFlow to SIWholeQuadMode in #94452.

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
    llvm/test/CodeGen/AMDGPU/wqm.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp b/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
index 9f064493f5047..ae91cb31590cf 100644
--- a/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
+++ b/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
@@ -1676,6 +1676,8 @@ bool SIWholeQuadMode::runOnMachineFunction(MachineFunction &MF) {
   if (!(GlobalFlags & (StateWQM | StateStrict)) && LowerToCopyInstrs.empty() &&
       LowerToMovInstrs.empty() && KillInstrs.empty()) {
     lowerLiveMaskQueries();
+    if (!InitExecInstrs.empty())
+      LIS->removeAllRegUnitsForPhysReg(AMDGPU::EXEC);
     return !InitExecInstrs.empty() || !LiveMaskQueries.empty();
   }
 
@@ -1717,7 +1719,7 @@ bool SIWholeQuadMode::runOnMachineFunction(MachineFunction &MF) {
   LIS->removeAllRegUnitsForPhysReg(AMDGPU::SCC);
 
   // If we performed any kills then recompute EXEC
-  if (!KillInstrs.empty())
+  if (!KillInstrs.empty() || !InitExecInstrs.empty())
     LIS->removeAllRegUnitsForPhysReg(AMDGPU::EXEC);
 
   return true;

diff  --git a/llvm/test/CodeGen/AMDGPU/wqm.ll b/llvm/test/CodeGen/AMDGPU/wqm.ll
index c621904ff727b..11003c4c9edfd 100644
--- a/llvm/test/CodeGen/AMDGPU/wqm.ll
+++ b/llvm/test/CodeGen/AMDGPU/wqm.ll
@@ -3463,6 +3463,81 @@ bb:
   ret void
 }
 
+; Test a case that failed machine verification.
+define amdgpu_gs void @wqm_init_exec_switch(i32 %arg) {
+; GFX9-W64-LABEL: wqm_init_exec_switch:
+; GFX9-W64:       ; %bb.0:
+; GFX9-W64-NEXT:    s_mov_b64 exec, 0
+; GFX9-W64-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v0
+; GFX9-W64-NEXT:    s_and_saveexec_b64 s[0:1], vcc
+; GFX9-W64-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
+; GFX9-W64-NEXT:    s_andn2_saveexec_b64 s[0:1], s[0:1]
+; GFX9-W64-NEXT:    s_endpgm
+;
+; GFX10-W32-LABEL: wqm_init_exec_switch:
+; GFX10-W32:       ; %bb.0:
+; GFX10-W32-NEXT:    s_mov_b32 exec_lo, 0
+; GFX10-W32-NEXT:    s_mov_b32 s0, exec_lo
+; GFX10-W32-NEXT:    v_cmpx_lt_i32_e32 0, v0
+; GFX10-W32-NEXT:    s_xor_b32 s0, exec_lo, s0
+; GFX10-W32-NEXT:    s_andn2_saveexec_b32 s0, s0
+; GFX10-W32-NEXT:    s_endpgm
+  call void @llvm.amdgcn.init.exec(i64 0)
+  switch i32 %arg, label %bb1 [
+    i32 0, label %bb3
+    i32 1, label %bb2
+  ]
+bb1:
+  ret void
+bb2:
+  ret void
+bb3:
+  ret void
+}
+
+define amdgpu_gs void @wqm_init_exec_wwm() {
+; GFX9-W64-LABEL: wqm_init_exec_wwm:
+; GFX9-W64:       ; %bb.0:
+; GFX9-W64-NEXT:    s_mov_b64 exec, 0
+; GFX9-W64-NEXT:    s_mov_b32 s1, 0
+; GFX9-W64-NEXT:    s_mov_b32 s0, s1
+; GFX9-W64-NEXT:    s_cmp_lg_u64 exec, 0
+; GFX9-W64-NEXT:    s_cselect_b64 s[2:3], -1, 0
+; GFX9-W64-NEXT:    s_cmp_lg_u64 s[0:1], 0
+; GFX9-W64-NEXT:    s_cselect_b64 s[0:1], -1, 0
+; GFX9-W64-NEXT:    s_xor_b64 s[0:1], s[2:3], s[0:1]
+; GFX9-W64-NEXT:    v_cndmask_b32_e64 v0, 0, 1.0, s[0:1]
+; GFX9-W64-NEXT:    v_mov_b32_e32 v1, 0
+; GFX9-W64-NEXT:    exp mrt0 off, off, off, off
+; GFX9-W64-NEXT:    s_endpgm
+;
+; GFX10-W32-LABEL: wqm_init_exec_wwm:
+; GFX10-W32:       ; %bb.0:
+; GFX10-W32-NEXT:    s_mov_b32 exec_lo, 0
+; GFX10-W32-NEXT:    s_mov_b32 s1, 0
+; GFX10-W32-NEXT:    s_cmp_lg_u64 exec, 0
+; GFX10-W32-NEXT:    s_mov_b32 s0, s1
+; GFX10-W32-NEXT:    s_cselect_b32 s2, -1, 0
+; GFX10-W32-NEXT:    s_cmp_lg_u64 s[0:1], 0
+; GFX10-W32-NEXT:    v_mov_b32_e32 v1, 0
+; GFX10-W32-NEXT:    s_cselect_b32 s0, -1, 0
+; GFX10-W32-NEXT:    s_xor_b32 s0, s2, s0
+; GFX10-W32-NEXT:    v_cndmask_b32_e64 v0, 0, 1.0, s0
+; GFX10-W32-NEXT:    exp mrt0 off, off, off, off
+; GFX10-W32-NEXT:    s_endpgm
+  call void @llvm.amdgcn.init.exec(i64 0)
+  %i = call i64 @llvm.amdgcn.ballot.i64(i1 true)
+  %i1 = call i32 @llvm.amdgcn.wwm.i32(i32 0)
+  %i2 = insertelement <2 x i32> zeroinitializer, i32 %i1, i64 0
+  %i3 = bitcast <2 x i32> %i2 to i64
+  %i4 = icmp ne i64 %i, 0
+  %i5 = icmp ne i64 %i3, 0
+  %i6 = xor i1 %i4, %i5
+  %i7 = uitofp i1 %i6 to float
+  call void @llvm.amdgcn.exp.f32(i32 0, i32 0, float %i7, float 0.0, float 0.0, float 0.0, i1 false, i1 false)
+  ret void
+}
+
 declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #1
 declare void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float>, i32, i32, <8 x i32>, i32, i32) #1
 


        


More information about the llvm-commits mailing list