[llvm] 9d72c0a - [AMDGPU] Mark waterfall loops as SI_WATERFALL_LOOP

Sebastian Neubauer via llvm-commits llvm-commits at lists.llvm.org
Tue Jul 13 03:15:25 PDT 2021


Author: Sebastian Neubauer
Date: 2021-07-13T12:15:08+02:00
New Revision: 9d72c0ad43e720ef2394a23a2f4c58f79d753f03

URL: https://github.com/llvm/llvm-project/commit/9d72c0ad43e720ef2394a23a2f4c58f79d753f03
DIFF: https://github.com/llvm/llvm-project/commit/9d72c0ad43e720ef2394a23a2f4c58f79d753f03.diff

LOG: [AMDGPU] Mark waterfall loops as SI_WATERFALL_LOOP

This way, they can be detected later, e.g. by the
SIOptimizeVGPRLiveRange pass.

Differential Revision: https://reviews.llvm.org/D105467

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
    llvm/lib/Target/AMDGPU/SIInstructions.td
    llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp
    llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.mir

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 8dc74898e4e96..d0f5b2d661893 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -5278,7 +5278,7 @@ emitLoadSRsrcFromVGPRLoop(const SIInstrInfo &TII, MachineRegisterInfo &MRI,
       .addReg(Exec)
       .addReg(SaveExec);
 
-  BuildMI(LoopBB, I, DL, TII.get(AMDGPU::S_CBRANCH_EXECNZ)).addMBB(&LoopBB);
+  BuildMI(LoopBB, I, DL, TII.get(AMDGPU::SI_WATERFALL_LOOP)).addMBB(&LoopBB);
 }
 
 // Build a waterfall loop around \p MI, replacing the VGPR \p Rsrc register

diff  --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index 0475963066a26..fbf4634bfc94c 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -318,6 +318,14 @@ def SI_ELSE : CFPseudoInstSI <
   let hasSideEffects = 1;
 }
 
+def SI_WATERFALL_LOOP : CFPseudoInstSI <
+  (outs),
+  (ins brtarget:$target), [], 1> {
+  let Size = 8;
+  let isBranch = 1;
+  let Defs = [];
+}
+
 def SI_LOOP : CFPseudoInstSI <
   (outs), (ins SReg_1:$saved, brtarget:$target),
   [(AMDGPUloop i1:$saved, bb:$target)], 1, 1> {

diff  --git a/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp b/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp
index edfafd6cd8aa5..0f2836e1e7fb1 100644
--- a/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp
+++ b/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp
@@ -600,6 +600,10 @@ MachineBasicBlock *SILowerControlFlow::process(MachineInstr &MI) {
     emitLoop(MI);
     break;
 
+  case AMDGPU::SI_WATERFALL_LOOP:
+    MI.setDesc(TII->get(AMDGPU::S_CBRANCH_EXECNZ));
+    break;
+
   case AMDGPU::SI_END_CF:
     SplitBB = emitEndCf(MI);
     break;
@@ -840,6 +844,7 @@ bool SILowerControlFlow::runOnMachineFunction(MachineFunction &MF) {
       case AMDGPU::SI_IF:
       case AMDGPU::SI_ELSE:
       case AMDGPU::SI_IF_BREAK:
+      case AMDGPU::SI_WATERFALL_LOOP:
       case AMDGPU::SI_LOOP:
       case AMDGPU::SI_END_CF:
         SplitMBB = process(MI);

diff  --git a/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.mir b/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.mir
index 256c1509d27f8..8e33900804764 100644
--- a/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.mir
+++ b/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.mir
@@ -30,7 +30,7 @@
 # W64: [[TMPEXEC:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[CMP]], implicit-def $exec, implicit-def $scc, implicit $exec
 # W64: {{[0-9]+}}:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN %4, killed [[SRSRC]], 0, 0, 0, 0, 0, implicit $exec
 # W64: $exec = S_XOR_B64_term $exec, [[TMPEXEC]], implicit-def $scc
-# W64: S_CBRANCH_EXECNZ %bb.1, implicit $exec
+# W64: SI_WATERFALL_LOOP %bb.1, implicit $exec
 # W64-LABEL:  bb.2:
 # W64: $exec = S_MOV_B64 [[SAVEEXEC]]
 
@@ -55,7 +55,7 @@
 # W32: {{[0-9]+}}:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN %4, killed [[SRSRC]], 0, 0, 0, 0, 0, implicit $exec
 # TODO: S_XOR_B32_term should be `implicit-def $scc`
 # W32: $exec_lo = S_XOR_B32_term $exec_lo, [[TMPEXEC]]
-# W32: S_CBRANCH_EXECNZ %bb.1, implicit $exec
+# W32: SI_WATERFALL_LOOP %bb.1, implicit $exec
 # W32-LABEL:  bb.2:
 # W32: $exec_lo = S_MOV_B32 [[SAVEEXEC]]
 ---
@@ -103,7 +103,7 @@ body:             |
 # W64: [[TMPEXEC:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[CMP]], implicit-def $exec, implicit-def $scc, implicit $exec
 # W64: {{[0-9]+}}:vgpr_32 = BUFFER_LOAD_FORMAT_X_OFFEN %4, killed [[SRSRC]], 0, 0, 0, 0, 0, implicit $exec
 # W64: $exec = S_XOR_B64_term $exec, [[TMPEXEC]], implicit-def $scc
-# W64: S_CBRANCH_EXECNZ %bb.1, implicit $exec
+# W64: SI_WATERFALL_LOOP %bb.1, implicit $exec
 # W64-LABEL:  bb.2:
 # W64: $exec = S_MOV_B64 [[SAVEEXEC]]
 
@@ -128,7 +128,7 @@ body:             |
 # W32: {{[0-9]+}}:vgpr_32 = BUFFER_LOAD_FORMAT_X_OFFEN %4, killed [[SRSRC]], 0, 0, 0, 0, 0, implicit $exec
 # TODO: S_XOR_B32_term should be `implicit-def $scc`
 # W32: $exec_lo = S_XOR_B32_term $exec_lo, [[TMPEXEC]]
-# W32: S_CBRANCH_EXECNZ %bb.1, implicit $exec
+# W32: SI_WATERFALL_LOOP %bb.1, implicit $exec
 # W32-LABEL:  bb.2:
 # W32: $exec_lo = S_MOV_B32 [[SAVEEXEC]]
 ---
@@ -176,7 +176,7 @@ body:             |
 # W64: [[TMPEXEC:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[CMP]], implicit-def $exec, implicit-def $scc, implicit $exec
 # W64: {{[0-9]+}}:vgpr_32 = BUFFER_LOAD_FORMAT_X_BOTHEN %4, killed [[SRSRC]], 0, 0, 0, 0, 0, implicit $exec
 # W64: $exec = S_XOR_B64_term $exec, [[TMPEXEC]], implicit-def $scc
-# W64: S_CBRANCH_EXECNZ %bb.1, implicit $exec
+# W64: SI_WATERFALL_LOOP %bb.1, implicit $exec
 # W64-LABEL:  bb.2:
 # W64: $exec = S_MOV_B64 [[SAVEEXEC]]
 
@@ -201,7 +201,7 @@ body:             |
 # W32: {{[0-9]+}}:vgpr_32 = BUFFER_LOAD_FORMAT_X_BOTHEN %4, killed [[SRSRC]], 0, 0, 0, 0, 0, implicit $exec
 # TODO: S_XOR_B32_term should be `implicit-def $scc`
 # W32: $exec_lo = S_XOR_B32_term $exec_lo, [[TMPEXEC]]
-# W32: S_CBRANCH_EXECNZ %bb.1, implicit $exec
+# W32: SI_WATERFALL_LOOP %bb.1, implicit $exec
 # W32-LABEL:  bb.2:
 # W32: $exec_lo = S_MOV_B32 [[SAVEEXEC]]
 ---
@@ -286,7 +286,7 @@ body:             |
 # W64-NO-ADDR64: [[TMPEXEC:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[CMP]], implicit-def $exec, implicit-def $scc, implicit $exec
 # W64-NO-ADDR64: {{[0-9]+}}:vgpr_32 = BUFFER_LOAD_FORMAT_X_OFFSET killed [[SRSRC]], 0, 0, 0, 0, 0, implicit $exec
 # W64-NO-ADDR64: $exec = S_XOR_B64_term $exec, [[TMPEXEC]], implicit-def $scc
-# W64-NO-ADDR64: S_CBRANCH_EXECNZ %bb.1, implicit $exec
+# W64-NO-ADDR64: SI_WATERFALL_LOOP %bb.1, implicit $exec
 # W64-NO-ADDR64-LABEL:  bb.2:
 # W64-NO-ADDR64: $exec = S_MOV_B64 [[SAVEEXEC]]
 
@@ -309,7 +309,7 @@ body:             |
 # W32: {{[0-9]+}}:vgpr_32 = BUFFER_LOAD_FORMAT_X_OFFSET killed [[SRSRC]], 0, 0, 0, 0, 0, implicit $exec
 # TODO: S_XOR_B32_term should be `implicit-def $scc`
 # W32: $exec_lo = S_XOR_B32_term $exec_lo, [[TMPEXEC]]
-# W32: S_CBRANCH_EXECNZ %bb.1, implicit $exec
+# W32: SI_WATERFALL_LOOP %bb.1, implicit $exec
 # W32-LABEL:  bb.2:
 # W32: $exec_lo = S_MOV_B32 [[SAVEEXEC]]
 


        


More information about the llvm-commits mailing list