[PATCH] D95509: [AMDGPU] Mark V_SET_INACTIVE as defining SCC

Carl Ritson via Phabricator via llvm-commits llvm-commits at lists.llvm.org
Thu Jan 28 16:47:29 PST 2021


This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
Closed by commit rG0e8f50595e22: [AMDGPU] Mark V_SET_INACTIVE as defining SCC (authored by critson).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D95509/new/

https://reviews.llvm.org/D95509

Files:
  llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
  llvm/lib/Target/AMDGPU/SIInstructions.td
  llvm/test/CodeGen/AMDGPU/llvm.amdgcn.set.inactive.ll
  llvm/test/CodeGen/AMDGPU/wqm.mir


Index: llvm/test/CodeGen/AMDGPU/wqm.mir
===================================================================
--- llvm/test/CodeGen/AMDGPU/wqm.mir
+++ llvm/test/CodeGen/AMDGPU/wqm.mir
@@ -99,7 +99,7 @@
     %8:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %6, %7, 0, 0, 0, 0, 0, 0, implicit $exec
     %16:vgpr_32 = COPY %8.sub1
     %11:vgpr_32 = COPY %16
-    %10:vgpr_32 = V_SET_INACTIVE_B32 %11, undef %12:sreg_32, implicit $exec
+    %10:vgpr_32 = V_SET_INACTIVE_B32 %11, undef %12:sreg_32, implicit $exec, implicit-def $scc
     %14:vgpr_32 = COPY %7
     %13:vgpr_32 = V_MOV_B32_dpp %14, killed %10, 323, 12, 15, 0, implicit $exec
     early-clobber %15:vgpr_32 = WWM killed %13, implicit $exec
Index: llvm/test/CodeGen/AMDGPU/llvm.amdgcn.set.inactive.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/llvm.amdgcn.set.inactive.ll
+++ llvm/test/CodeGen/AMDGPU/llvm.amdgcn.set.inactive.ll
@@ -45,17 +45,17 @@
 define amdgpu_kernel void @set_inactive_scc(i32 addrspace(1)* %out, i32 %in, <4 x i32> inreg %desc) {
 ; GCN-LABEL: set_inactive_scc:
 ; GCN:       ; %bb.0:
-; GCN-NEXT:    s_load_dwordx4 s[8:11], s[0:1], 0x34
 ; GCN-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x24
-; GCN-NEXT:    s_load_dword s0, s[0:1], 0x2c
-; GCN-NEXT:    s_waitcnt lgkmcnt(0)
-; GCN-NEXT:    s_buffer_load_dword s1, s[8:11], 0x0
-; GCN-NEXT:    v_mov_b32_e32 v0, s0
+; GCN-NEXT:    s_load_dword s6, s[0:1], 0x2c
+; GCN-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x34
 ; GCN-NEXT:    s_waitcnt lgkmcnt(0)
-; GCN-NEXT:    s_cmp_lg_u32 s1, 56
+; GCN-NEXT:    v_mov_b32_e32 v0, s6
+; GCN-NEXT:    s_buffer_load_dword s0, s[0:3], 0x0
 ; GCN-NEXT:    s_not_b64 exec, exec
 ; GCN-NEXT:    v_mov_b32_e32 v0, 42
 ; GCN-NEXT:    s_not_b64 exec, exec
+; GCN-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN-NEXT:    s_cmp_lg_u32 s0, 56
 ; GCN-NEXT:    s_mov_b64 s[0:1], -1
 ; GCN-NEXT:    s_cbranch_scc1 BB2_3
 ; GCN-NEXT:  ; %bb.1: ; %Flow
Index: llvm/lib/Target/AMDGPU/SIInstructions.td
===================================================================
--- llvm/lib/Target/AMDGPU/SIInstructions.td
+++ llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -145,6 +145,7 @@
 
 // Invert the exec mask and overwrite the inactive lanes of dst with inactive,
 // restoring it after we're done.
+let Defs = [SCC] in {
 def V_SET_INACTIVE_B32 : VPseudoInstSI <(outs VGPR_32:$vdst),
   (ins VGPR_32: $src, VSrc_b32:$inactive),
   [(set i32:$vdst, (int_amdgcn_set_inactive i32:$src, i32:$inactive))]> {
@@ -156,6 +157,7 @@
   [(set i64:$vdst, (int_amdgcn_set_inactive i64:$src, i64:$inactive))]> {
   let Constraints = "$src = $vdst";
 }
+} // End Defs = [SCC]
 
 let usesCustomInserter = 1, Defs = [VCC, EXEC] in {
 def V_ADD_U64_PSEUDO : VPseudoInstSI <
Index: llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -1590,6 +1590,7 @@
 }
 
 bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
+  const SIRegisterInfo *TRI = ST.getRegisterInfo();
   MachineBasicBlock &MBB = *MI.getParent();
   DebugLoc DL = MBB.findDebugLoc(MI);
   switch (MI.getOpcode()) {
@@ -1675,8 +1676,8 @@
   case AMDGPU::V_SET_INACTIVE_B32: {
     unsigned NotOpc = ST.isWave32() ? AMDGPU::S_NOT_B32 : AMDGPU::S_NOT_B64;
     unsigned Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
-    BuildMI(MBB, MI, DL, get(NotOpc), Exec)
-      .addReg(Exec);
+    auto FirstNot = BuildMI(MBB, MI, DL, get(NotOpc), Exec).addReg(Exec);
+    FirstNot->addRegisterDead(AMDGPU::SCC, TRI); // SCC is overwritten
     BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), MI.getOperand(0).getReg())
       .add(MI.getOperand(2));
     BuildMI(MBB, MI, DL, get(NotOpc), Exec)
@@ -1687,8 +1688,8 @@
   case AMDGPU::V_SET_INACTIVE_B64: {
     unsigned NotOpc = ST.isWave32() ? AMDGPU::S_NOT_B32 : AMDGPU::S_NOT_B64;
     unsigned Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
-    BuildMI(MBB, MI, DL, get(NotOpc), Exec)
-      .addReg(Exec);
+    auto FirstNot = BuildMI(MBB, MI, DL, get(NotOpc), Exec).addReg(Exec);
+    FirstNot->addRegisterDead(AMDGPU::SCC, TRI); // SCC is overwritten
     MachineInstr *Copy = BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B64_PSEUDO),
                                  MI.getOperand(0).getReg())
       .add(MI.getOperand(2));


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D95509.319999.patch
Type: text/x-patch
Size: 4365 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20210129/6d12ae68/attachment.bin>


More information about the llvm-commits mailing list