[llvm] 017ef78 - AMDGPU: Mark scc defs dead in SGPR to VMEM path for no free SGPRs

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Wed Dec 8 15:57:39 PST 2021


Author: Matt Arsenault
Date: 2021-12-08T18:40:49-05:00
New Revision: 017ef7854972e8ad9386bf8eaf4bde7a8123d0ff

URL: https://github.com/llvm/llvm-project/commit/017ef7854972e8ad9386bf8eaf4bde7a8123d0ff
DIFF: https://github.com/llvm/llvm-project/commit/017ef7854972e8ad9386bf8eaf4bde7a8123d0ff.diff

LOG: AMDGPU: Mark scc defs dead in SGPR to VMEM path for no free SGPRs

This introduces verifier errors into this broken situation which we do
not handle correctly, which is better than being silently
miscompiled. For the emergency stack slot, the scavenger likes to move
the restore instruction as late as possible, which ends up separating
the SCC def from the conditional branch.

Added: 
    llvm/test/CodeGen/AMDGPU/scc-clobbered-sgpr-to-vmem-spill.ll

Modified: 
    llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
index b6b495de9b4f..cc4bf05ef6a1 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -210,6 +210,7 @@ struct SGPRSpillBuilder {
       auto I = BuildMI(*MBB, MI, DL, TII.get(NotOpc), ExecReg).addReg(ExecReg);
       if (!TmpVGPRLive)
         I.addReg(TmpVGPR, RegState::ImplicitDefine);
+      I->getOperand(2).setIsDead(true); // Mark SCC as dead.
       TRI.buildVGPRSpillLoadStore(*this, TmpVGPRIndex, 0, /*IsLoad*/ false);
     }
   }
@@ -242,9 +243,10 @@ struct SGPRSpillBuilder {
       TRI.buildVGPRSpillLoadStore(*this, TmpVGPRIndex, 0, /*IsLoad*/ true,
                                   /*IsKill*/ false);
       auto I = BuildMI(*MBB, MI, DL, TII.get(NotOpc), ExecReg).addReg(ExecReg);
-      if (!TmpVGPRLive) {
+      if (!TmpVGPRLive)
         I.addReg(TmpVGPR, RegState::ImplicitKill);
-      }
+      I->getOperand(2).setIsDead(true); // Mark SCC as dead.
+
       // Restore active lanes
       if (TmpVGPRLive)
         TRI.buildVGPRSpillLoadStore(*this, TmpVGPRIndex, 0, /*IsLoad*/ true);
@@ -267,9 +269,11 @@ struct SGPRSpillBuilder {
       TRI.buildVGPRSpillLoadStore(*this, Index, Offset, IsLoad,
                                   /*IsKill*/ false);
       // Spill inactive lanes
-      BuildMI(*MBB, MI, DL, TII.get(NotOpc), ExecReg).addReg(ExecReg);
+      auto Not0 = BuildMI(*MBB, MI, DL, TII.get(NotOpc), ExecReg).addReg(ExecReg);
+      Not0->getOperand(2).setIsDead(); // Mark SCC as dead.
       TRI.buildVGPRSpillLoadStore(*this, Index, Offset, IsLoad);
-      BuildMI(*MBB, MI, DL, TII.get(NotOpc), ExecReg).addReg(ExecReg);
+      auto Not1 = BuildMI(*MBB, MI, DL, TII.get(NotOpc), ExecReg).addReg(ExecReg);
+      Not1->getOperand(2).setIsDead(); // Mark SCC as dead.
     }
   }
 

diff  --git a/llvm/test/CodeGen/AMDGPU/scc-clobbered-sgpr-to-vmem-spill.ll b/llvm/test/CodeGen/AMDGPU/scc-clobbered-sgpr-to-vmem-spill.ll
new file mode 100644
index 000000000000..959bc7f33426
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/scc-clobbered-sgpr-to-vmem-spill.ll
@@ -0,0 +1,77 @@
+; RUN: not --crash llc -mtriple=amdgcn--amdhsa -mcpu=gfx900 -verify-machineinstrs -o /dev/null %s 2>&1 | FileCheck %s
+
+; This ends up needing to spill SGPRs to memory, and also does not
+; have any free SGPRs available to save the exec mask when doing so.
+; The register scavenger also needs to use the emergency stack slot,
+; which tries to place the scavenged register restore instruction as
+; far the block as possible, near the terminator. This places a
+; restore instruction between the condition and the conditional
+; branch, which gets expanded into a sequence involving s_not_b64 on
+; the exec mask, clobbering SCC value before the branch. We probably
+; have to stop relying on being able to flip and restore the exec
+; mask, and always require a free SGPR for saving exec.
+
+; CHECK: *** Bad machine code: Using an undefined physical register ***
+; CHECK-NEXT: - function:    kernel0
+; CHECK-NEXT: - basic block: %bb.0
+; CHECK-NEXT: - instruction: S_CBRANCH_SCC1 %bb.2, implicit killed $scc
+; CHECK-NEXT: - operand 1:   implicit killed $scc
+define amdgpu_kernel void @kernel0(i32 addrspace(1)* %out, i32 %in) #1 {
+  call void asm sideeffect "", "~{v[0:7]}" () #0
+  call void asm sideeffect "", "~{v[8:15]}" () #0
+  call void asm sideeffect "", "~{v[16:19]}"() #0
+  call void asm sideeffect "", "~{v[20:21]}"() #0
+  call void asm sideeffect "", "~{v22}"() #0
+
+  %val0 = call <2 x i32> asm sideeffect "; def $0", "=s" () #0
+  %val1 = call <4 x i32> asm sideeffect "; def $0", "=s" () #0
+  %val2 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0
+  %val3 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0
+  %val4 = call <2 x i32> asm sideeffect "; def $0", "=s" () #0
+  %val5 = call <4 x i32> asm sideeffect "; def $0", "=s" () #0
+  %val6 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0
+  %val7 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0
+  %val8 = call <2 x i32> asm sideeffect "; def $0", "=s" () #0
+  %val9 = call <4 x i32> asm sideeffect "; def $0", "=s" () #0
+  %val10 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0
+  %val11 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0
+  %val12 = call <2 x i32> asm sideeffect "; def $0", "=s" () #0
+  %val13 = call <4 x i32> asm sideeffect "; def $0", "=s" () #0
+  %val14 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0
+  %val15 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0
+  %val16 = call <2 x i32> asm sideeffect "; def $0", "=s" () #0
+  %val17 = call <4 x i32> asm sideeffect "; def $0", "=s" () #0
+  %val18 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0
+  %val19 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0
+  %cmp = icmp eq i32 %in, 0
+  br i1 %cmp, label %bb0, label %ret
+
+bb0:
+  call void asm sideeffect "; use $0", "s"(<2 x i32> %val0) #0
+  call void asm sideeffect "; use $0", "s"(<4 x i32> %val1) #0
+  call void asm sideeffect "; use $0", "s"(<8 x i32> %val2) #0
+  call void asm sideeffect "; use $0", "s"(<16 x i32> %val3) #0
+  call void asm sideeffect "; use $0", "s"(<2 x i32> %val4) #0
+  call void asm sideeffect "; use $0", "s"(<4 x i32> %val5) #0
+  call void asm sideeffect "; use $0", "s"(<8 x i32> %val6) #0
+  call void asm sideeffect "; use $0", "s"(<16 x i32> %val7) #0
+  call void asm sideeffect "; use $0", "s"(<2 x i32> %val8) #0
+  call void asm sideeffect "; use $0", "s"(<4 x i32> %val9) #0
+  call void asm sideeffect "; use $0", "s"(<8 x i32> %val10) #0
+  call void asm sideeffect "; use $0", "s"(<16 x i32> %val11) #0
+  call void asm sideeffect "; use $0", "s"(<2 x i32> %val12) #0
+  call void asm sideeffect "; use $0", "s"(<4 x i32> %val13) #0
+  call void asm sideeffect "; use $0", "s"(<8 x i32> %val14) #0
+  call void asm sideeffect "; use $0", "s"(<16 x i32> %val15) #0
+  call void asm sideeffect "; use $0", "s"(<2 x i32> %val16) #0
+  call void asm sideeffect "; use $0", "s"(<4 x i32> %val17) #0
+  call void asm sideeffect "; use $0", "s"(<8 x i32> %val18) #0
+  call void asm sideeffect "; use $0", "s"(<16 x i32> %val19) #0
+  br label %ret
+
+ret:
+  ret void
+}
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind "amdgpu-waves-per-eu"="10,10" }


        


More information about the llvm-commits mailing list