[llvm] 9d72c0a - [AMDGPU] Mark waterfall loops as SI_WATERFALL_LOOP
Sebastian Neubauer via llvm-commits
llvm-commits at lists.llvm.org
Tue Jul 13 03:15:25 PDT 2021
Author: Sebastian Neubauer
Date: 2021-07-13T12:15:08+02:00
New Revision: 9d72c0ad43e720ef2394a23a2f4c58f79d753f03
URL: https://github.com/llvm/llvm-project/commit/9d72c0ad43e720ef2394a23a2f4c58f79d753f03
DIFF: https://github.com/llvm/llvm-project/commit/9d72c0ad43e720ef2394a23a2f4c58f79d753f03.diff
LOG: [AMDGPU] Mark waterfall loops as SI_WATERFALL_LOOP
This way, they can be detected later, e.g. by the
SIOptimizeVGPRLiveRange pass.
Differential Revision: https://reviews.llvm.org/D105467
Added:
Modified:
llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
llvm/lib/Target/AMDGPU/SIInstructions.td
llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp
llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.mir
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 8dc74898e4e96..d0f5b2d661893 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -5278,7 +5278,7 @@ emitLoadSRsrcFromVGPRLoop(const SIInstrInfo &TII, MachineRegisterInfo &MRI,
.addReg(Exec)
.addReg(SaveExec);
- BuildMI(LoopBB, I, DL, TII.get(AMDGPU::S_CBRANCH_EXECNZ)).addMBB(&LoopBB);
+ BuildMI(LoopBB, I, DL, TII.get(AMDGPU::SI_WATERFALL_LOOP)).addMBB(&LoopBB);
}
// Build a waterfall loop around \p MI, replacing the VGPR \p Rsrc register
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index 0475963066a26..fbf4634bfc94c 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -318,6 +318,14 @@ def SI_ELSE : CFPseudoInstSI <
let hasSideEffects = 1;
}
+def SI_WATERFALL_LOOP : CFPseudoInstSI <
+ (outs),
+ (ins brtarget:$target), [], 1> {
+ let Size = 8;
+ let isBranch = 1;
+ let Defs = [];
+}
+
def SI_LOOP : CFPseudoInstSI <
(outs), (ins SReg_1:$saved, brtarget:$target),
[(AMDGPUloop i1:$saved, bb:$target)], 1, 1> {
diff --git a/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp b/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp
index edfafd6cd8aa5..0f2836e1e7fb1 100644
--- a/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp
+++ b/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp
@@ -600,6 +600,10 @@ MachineBasicBlock *SILowerControlFlow::process(MachineInstr &MI) {
emitLoop(MI);
break;
+ case AMDGPU::SI_WATERFALL_LOOP:
+ MI.setDesc(TII->get(AMDGPU::S_CBRANCH_EXECNZ));
+ break;
+
case AMDGPU::SI_END_CF:
SplitBB = emitEndCf(MI);
break;
@@ -840,6 +844,7 @@ bool SILowerControlFlow::runOnMachineFunction(MachineFunction &MF) {
case AMDGPU::SI_IF:
case AMDGPU::SI_ELSE:
case AMDGPU::SI_IF_BREAK:
+ case AMDGPU::SI_WATERFALL_LOOP:
case AMDGPU::SI_LOOP:
case AMDGPU::SI_END_CF:
SplitMBB = process(MI);
diff --git a/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.mir b/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.mir
index 256c1509d27f8..8e33900804764 100644
--- a/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.mir
+++ b/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.mir
@@ -30,7 +30,7 @@
# W64: [[TMPEXEC:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[CMP]], implicit-def $exec, implicit-def $scc, implicit $exec
# W64: {{[0-9]+}}:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN %4, killed [[SRSRC]], 0, 0, 0, 0, 0, implicit $exec
# W64: $exec = S_XOR_B64_term $exec, [[TMPEXEC]], implicit-def $scc
-# W64: S_CBRANCH_EXECNZ %bb.1, implicit $exec
+# W64: SI_WATERFALL_LOOP %bb.1, implicit $exec
# W64-LABEL: bb.2:
# W64: $exec = S_MOV_B64 [[SAVEEXEC]]
@@ -55,7 +55,7 @@
# W32: {{[0-9]+}}:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN %4, killed [[SRSRC]], 0, 0, 0, 0, 0, implicit $exec
# TODO: S_XOR_B32_term should be `implicit-def $scc`
# W32: $exec_lo = S_XOR_B32_term $exec_lo, [[TMPEXEC]]
-# W32: S_CBRANCH_EXECNZ %bb.1, implicit $exec
+# W32: SI_WATERFALL_LOOP %bb.1, implicit $exec
# W32-LABEL: bb.2:
# W32: $exec_lo = S_MOV_B32 [[SAVEEXEC]]
---
@@ -103,7 +103,7 @@ body: |
# W64: [[TMPEXEC:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[CMP]], implicit-def $exec, implicit-def $scc, implicit $exec
# W64: {{[0-9]+}}:vgpr_32 = BUFFER_LOAD_FORMAT_X_OFFEN %4, killed [[SRSRC]], 0, 0, 0, 0, 0, implicit $exec
# W64: $exec = S_XOR_B64_term $exec, [[TMPEXEC]], implicit-def $scc
-# W64: S_CBRANCH_EXECNZ %bb.1, implicit $exec
+# W64: SI_WATERFALL_LOOP %bb.1, implicit $exec
# W64-LABEL: bb.2:
# W64: $exec = S_MOV_B64 [[SAVEEXEC]]
@@ -128,7 +128,7 @@ body: |
# W32: {{[0-9]+}}:vgpr_32 = BUFFER_LOAD_FORMAT_X_OFFEN %4, killed [[SRSRC]], 0, 0, 0, 0, 0, implicit $exec
# TODO: S_XOR_B32_term should be `implicit-def $scc`
# W32: $exec_lo = S_XOR_B32_term $exec_lo, [[TMPEXEC]]
-# W32: S_CBRANCH_EXECNZ %bb.1, implicit $exec
+# W32: SI_WATERFALL_LOOP %bb.1, implicit $exec
# W32-LABEL: bb.2:
# W32: $exec_lo = S_MOV_B32 [[SAVEEXEC]]
---
@@ -176,7 +176,7 @@ body: |
# W64: [[TMPEXEC:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[CMP]], implicit-def $exec, implicit-def $scc, implicit $exec
# W64: {{[0-9]+}}:vgpr_32 = BUFFER_LOAD_FORMAT_X_BOTHEN %4, killed [[SRSRC]], 0, 0, 0, 0, 0, implicit $exec
# W64: $exec = S_XOR_B64_term $exec, [[TMPEXEC]], implicit-def $scc
-# W64: S_CBRANCH_EXECNZ %bb.1, implicit $exec
+# W64: SI_WATERFALL_LOOP %bb.1, implicit $exec
# W64-LABEL: bb.2:
# W64: $exec = S_MOV_B64 [[SAVEEXEC]]
@@ -201,7 +201,7 @@ body: |
# W32: {{[0-9]+}}:vgpr_32 = BUFFER_LOAD_FORMAT_X_BOTHEN %4, killed [[SRSRC]], 0, 0, 0, 0, 0, implicit $exec
# TODO: S_XOR_B32_term should be `implicit-def $scc`
# W32: $exec_lo = S_XOR_B32_term $exec_lo, [[TMPEXEC]]
-# W32: S_CBRANCH_EXECNZ %bb.1, implicit $exec
+# W32: SI_WATERFALL_LOOP %bb.1, implicit $exec
# W32-LABEL: bb.2:
# W32: $exec_lo = S_MOV_B32 [[SAVEEXEC]]
---
@@ -286,7 +286,7 @@ body: |
# W64-NO-ADDR64: [[TMPEXEC:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[CMP]], implicit-def $exec, implicit-def $scc, implicit $exec
# W64-NO-ADDR64: {{[0-9]+}}:vgpr_32 = BUFFER_LOAD_FORMAT_X_OFFSET killed [[SRSRC]], 0, 0, 0, 0, 0, implicit $exec
# W64-NO-ADDR64: $exec = S_XOR_B64_term $exec, [[TMPEXEC]], implicit-def $scc
-# W64-NO-ADDR64: S_CBRANCH_EXECNZ %bb.1, implicit $exec
+# W64-NO-ADDR64: SI_WATERFALL_LOOP %bb.1, implicit $exec
# W64-NO-ADDR64-LABEL: bb.2:
# W64-NO-ADDR64: $exec = S_MOV_B64 [[SAVEEXEC]]
@@ -309,7 +309,7 @@ body: |
# W32: {{[0-9]+}}:vgpr_32 = BUFFER_LOAD_FORMAT_X_OFFSET killed [[SRSRC]], 0, 0, 0, 0, 0, implicit $exec
# TODO: S_XOR_B32_term should be `implicit-def $scc`
# W32: $exec_lo = S_XOR_B32_term $exec_lo, [[TMPEXEC]]
-# W32: S_CBRANCH_EXECNZ %bb.1, implicit $exec
+# W32: SI_WATERFALL_LOOP %bb.1, implicit $exec
# W32-LABEL: bb.2:
# W32: $exec_lo = S_MOV_B32 [[SAVEEXEC]]
More information about the llvm-commits
mailing list