[llvm] 12a3243 - [AMDGPU] Limit endcf-collapase to simple if
Stanislav Mekhanoshin via llvm-commits
llvm-commits at lists.llvm.org
Tue Apr 7 10:27:33 PDT 2020
Author: Stanislav Mekhanoshin
Date: 2020-04-07T10:27:23-07:00
New Revision: 12a324393d61a0ef602feeb9705ccb5b61563c27
URL: https://github.com/llvm/llvm-project/commit/12a324393d61a0ef602feeb9705ccb5b61563c27
DIFF: https://github.com/llvm/llvm-project/commit/12a324393d61a0ef602feeb9705ccb5b61563c27.diff
LOG: [AMDGPU] Limit endcf-collapase to simple if
We can only collapse adjacent SI_END_CF if outer statement
belongs to a simple SI_IF, otherwise correct mask is not in the
register we expect, but is an argument of an S_XOR instruction.
Even if SI_IF is simple it might be lowered using S_XOR because
lowering is dependent on a basic block layout. It is not
considered simple if instruction consuming its output is
not an SI_END_CF. Since that SI_END_CF might have already been
lowered to an S_OR isSimpleIf() check may return false.
This situation is an opportunity for a further optimization
of SI_IF lowering, but that is a separate optimization. In the
meanwhile move SI_END_CF post the lowering when we already know
how the rest of the CFG was lowered since a non-simple SI_IF
case still needs to be handled.
Differential Revision: https://reviews.llvm.org/D77610
Added:
Modified:
llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp
llvm/test/CodeGen/AMDGPU/collapse-endcf.mir
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp b/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp
index caf21086cc37..1e90e6ba5418 100644
--- a/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp
+++ b/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp
@@ -51,6 +51,7 @@
#include "AMDGPUSubtarget.h"
#include "SIInstrInfo.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
@@ -86,7 +87,7 @@ class SILowerControlFlow : public MachineFunctionPass {
const SIInstrInfo *TII = nullptr;
LiveIntervals *LIS = nullptr;
MachineRegisterInfo *MRI = nullptr;
- DenseSet<const MachineInstr*> LoweredEndCf;
+ SetVector<MachineInstr*> LoweredEndCf;
DenseSet<Register> LoweredIf;
const TargetRegisterClass *BoolRC = nullptr;
@@ -117,6 +118,9 @@ class SILowerControlFlow : public MachineFunctionPass {
skipIgnoreExecInstsTrivialSucc(MachineBasicBlock &MBB,
MachineBasicBlock::iterator It) const;
+ // Remove redundant SI_END_CF instructions.
+ void optimizeEndCf();
+
public:
static char ID;
@@ -448,29 +452,6 @@ void SILowerControlFlow::emitEndCf(MachineInstr &MI) {
MachineInstr *Def = MRI.getUniqueVRegDef(CFMask);
const DebugLoc &DL = MI.getDebugLoc();
- // If the only instruction immediately following this END_CF is an another
- // END_CF in the only successor we can avoid emitting exec mask restore here.
- if (RemoveRedundantEndcf) {
- auto Next =
- skipIgnoreExecInstsTrivialSucc(MBB, std::next(MI.getIterator()));
- if (Next != MBB.end() && (Next->getOpcode() == AMDGPU::SI_END_CF ||
- LoweredEndCf.count(&*Next))) {
- // Only skip inner END_CF if outer ENDCF belongs to SI_IF.
- // If that belongs to SI_ELSE then saved mask has an inverted value.
- Register SavedExec = Next->getOperand(0).getReg();
- const MachineInstr *Def = MRI.getUniqueVRegDef(SavedExec);
- // A lowered SI_IF turns definition into COPY of exec.
- if (Def && (Def->getOpcode() == AMDGPU::SI_IF ||
- LoweredIf.count(SavedExec))) {
- LLVM_DEBUG(dbgs() << "Skip redundant "; MI.dump());
- if (LIS)
- LIS->RemoveMachineInstrFromMaps(MI);
- MI.eraseFromParent();
- return;
- }
- }
- }
-
MachineBasicBlock::iterator InsPt =
Def && Def->getParent() == &MBB ? std::next(MachineBasicBlock::iterator(Def))
: MBB.begin();
@@ -544,6 +525,34 @@ void SILowerControlFlow::combineMasks(MachineInstr &MI) {
MRI->getUniqueVRegDef(Reg)->eraseFromParent();
}
+void SILowerControlFlow::optimizeEndCf() {
+ // If the only instruction immediately following this END_CF is an another
+ // END_CF in the only successor we can avoid emitting exec mask restore here.
+ if (!RemoveRedundantEndcf)
+ return;
+
+ for (MachineInstr *MI : LoweredEndCf) {
+ MachineBasicBlock &MBB = *MI->getParent();
+ auto Next =
+ skipIgnoreExecInstsTrivialSucc(MBB, std::next(MI->getIterator()));
+ if (Next == MBB.end() || !LoweredEndCf.count(&*Next))
+ continue;
+ // Only skip inner END_CF if outer ENDCF belongs to SI_IF.
+ // If that belongs to SI_ELSE then saved mask has an inverted value.
+ Register SavedExec
+ = TII->getNamedOperand(*Next, AMDGPU::OpName::src1)->getReg();
+ assert(SavedExec.isVirtual() && "Expected saved exec to be src1!");
+
+ const MachineInstr *Def = MRI->getUniqueVRegDef(SavedExec);
+ if (Def && LoweredIf.count(SavedExec)) {
+ LLVM_DEBUG(dbgs() << "Skip redundant "; MI->dump());
+ if (LIS)
+ LIS->RemoveMachineInstrFromMaps(*MI);
+ MI->eraseFromParent();
+ }
+ }
+}
+
bool SILowerControlFlow::runOnMachineFunction(MachineFunction &MF) {
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
TII = ST.getInstrInfo();
@@ -626,6 +635,8 @@ bool SILowerControlFlow::runOnMachineFunction(MachineFunction &MF) {
}
}
+ optimizeEndCf();
+
LoweredEndCf.clear();
LoweredIf.clear();
diff --git a/llvm/test/CodeGen/AMDGPU/collapse-endcf.mir b/llvm/test/CodeGen/AMDGPU/collapse-endcf.mir
index 8bb1d134154d..815251e3560c 100644
--- a/llvm/test/CodeGen/AMDGPU/collapse-endcf.mir
+++ b/llvm/test/CodeGen/AMDGPU/collapse-endcf.mir
@@ -533,3 +533,63 @@ body: |
S_ENDPGM 0
...
+
+---
+name: if_inside_loop
+tracksRegLiveness: true
+machineFunctionInfo:
+ isEntryFunction: true
+body: |
+ ; GCN-LABEL: name: if_inside_loop
+ ; GCN: bb.0:
+ ; GCN: successors: %bb.6(0x80000000)
+ ; GCN: S_BRANCH %bb.6
+ ; GCN: bb.1:
+ ; GCN: successors: %bb.3(0x40000000), %bb.2(0x40000000)
+ ; GCN: [[COPY:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec
+ ; GCN: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY]], undef %1:sreg_64, implicit-def dead $scc
+ ; GCN: $exec = S_MOV_B64_term killed [[S_AND_B64_]]
+ ; GCN: S_CBRANCH_EXECZ %bb.3, implicit $exec
+ ; GCN: bb.2:
+ ; GCN: successors: %bb.6(0x80000000)
+ ; GCN: S_BRANCH %bb.6
+ ; GCN: bb.3:
+ ; GCN: successors: %bb.4(0x80000000)
+ ; GCN: $exec = S_OR_B64 $exec, [[COPY]], implicit-def $scc
+ ; GCN: bb.4:
+ ; GCN: successors: %bb.5(0x80000000)
+ ; GCN: $exec = S_OR_B64 $exec, %2, implicit-def $scc
+ ; GCN: bb.5:
+ ; GCN: successors: %bb.6(0x80000000)
+ ; GCN: bb.6:
+ ; GCN: successors: %bb.4(0x40000000), %bb.0(0x40000000)
+ ; GCN: [[COPY1:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec
+ ; GCN: [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY1]], undef %3:sreg_64, implicit-def dead $scc
+ ; GCN: [[S_XOR_B64_:%[0-9]+]]:sreg_64 = S_XOR_B64 [[S_AND_B64_1]], [[COPY1]], implicit-def dead $scc
+ ; GCN: $exec = S_MOV_B64_term killed [[S_AND_B64_1]]
+ ; GCN: S_CBRANCH_EXECZ %bb.4, implicit $exec
+ ; GCN: S_BRANCH %bb.0
+ ; GCN: S_ENDPGM 0
+ bb.0:
+ S_BRANCH %bb.6
+
+ bb.1:
+ %0:sreg_64 = SI_IF undef %1:sreg_64, %bb.3, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+
+ bb.2:
+ S_BRANCH %bb.6
+
+ bb.3:
+ SI_END_CF %0:sreg_64, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+
+ bb.4:
+ SI_END_CF %2:sreg_64, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+
+ bb.5:
+
+ bb.6:
+ %2:sreg_64 = SI_IF undef %3:sreg_64, %bb.4, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ S_BRANCH %bb.0
+ S_ENDPGM 0
+
+...
More information about the llvm-commits
mailing list