[llvm] 7ec6927 - Revert "[AMDGPU] Insert PS early exit at end of control flow"
Carl Ritson via llvm-commits
llvm-commits at lists.llvm.org
Thu Jul 2 21:04:33 PDT 2020
Author: Carl Ritson
Date: 2020-07-03T13:03:33+09:00
New Revision: 7ec6927badecae0adf63eb72c42194deb68a9685
URL: https://github.com/llvm/llvm-project/commit/7ec6927badecae0adf63eb72c42194deb68a9685
DIFF: https://github.com/llvm/llvm-project/commit/7ec6927badecae0adf63eb72c42194deb68a9685.diff
LOG: Revert "[AMDGPU] Insert PS early exit at end of control flow"
This reverts commit 2bfcacf0ad362956277a1c2c9ba00ddc453a42ce.
There appears to be an issue to analysis preservation.
Added:
Modified:
llvm/lib/Target/AMDGPU/SIInsertSkips.cpp
llvm/lib/Target/AMDGPU/SIInstructions.td
llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp
llvm/test/CodeGen/AMDGPU/kill-infinite-loop.ll
llvm/test/CodeGen/AMDGPU/skip-if-dead.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp b/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp
index e208385d0fb9..d4371ef4a6ca 100644
--- a/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp
@@ -447,15 +447,6 @@ bool SIInsertSkips::runOnMachineFunction(MachineFunction &MF) {
break;
}
- case AMDGPU::SI_KILL_CLEANUP:
- if (MF.getFunction().getCallingConv() == CallingConv::AMDGPU_PS &&
- dominatesAllReachable(MBB)) {
- KillInstrs.push_back(&MI);
- } else {
- MI.eraseFromParent();
- }
- break;
-
default:
break;
}
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index ec378379ca92..2b053f8dc95e 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -379,9 +379,6 @@ multiclass PseudoInstKill <dag ins> {
defm SI_KILL_I1 : PseudoInstKill <(ins SCSrc_i1:$src, i1imm:$killvalue)>;
defm SI_KILL_F32_COND_IMM : PseudoInstKill <(ins VSrc_b32:$src0, i32imm:$src1, i32imm:$cond)>;
-let Defs = [EXEC] in
-def SI_KILL_CLEANUP : SPseudoInstSI <(outs), (ins)>;
-
let Defs = [EXEC,VCC] in
def SI_ILLEGAL_COPY : SPseudoInstSI <
(outs unknown:$dst), (ins unknown:$src),
diff --git a/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp b/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp
index 36d52ac3ee89..1e90e6ba5418 100644
--- a/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp
+++ b/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp
@@ -89,10 +89,8 @@ class SILowerControlFlow : public MachineFunctionPass {
MachineRegisterInfo *MRI = nullptr;
SetVector<MachineInstr*> LoweredEndCf;
DenseSet<Register> LoweredIf;
- SmallSet<MachineInstr *, 16> NeedsKillCleanup;
const TargetRegisterClass *BoolRC = nullptr;
- bool InsertKillCleanups;
unsigned AndOpc;
unsigned OrOpc;
unsigned XorOpc;
@@ -113,8 +111,6 @@ class SILowerControlFlow : public MachineFunctionPass {
void combineMasks(MachineInstr &MI);
- void process(MachineInstr &MI);
-
// Skip to the next instruction, ignoring debug instructions, and trivial
// block boundaries (blocks that have one (typically fallthrough) successor,
// and the successor has one predecessor.
@@ -164,36 +160,36 @@ static void setImpSCCDefDead(MachineInstr &MI, bool IsDead) {
char &llvm::SILowerControlFlowID = SILowerControlFlow::ID;
-static bool hasKill(const MachineBasicBlock *Begin,
- const MachineBasicBlock *End, const SIInstrInfo *TII) {
+static bool isSimpleIf(const MachineInstr &MI, const MachineRegisterInfo *MRI,
+ const SIInstrInfo *TII) {
+ Register SaveExecReg = MI.getOperand(0).getReg();
+ auto U = MRI->use_instr_nodbg_begin(SaveExecReg);
+
+ if (U == MRI->use_instr_nodbg_end() ||
+ std::next(U) != MRI->use_instr_nodbg_end() ||
+ U->getOpcode() != AMDGPU::SI_END_CF)
+ return false;
+
+ // Check for SI_KILL_*_TERMINATOR on path from if to endif.
+ // if there is any such terminator simplififcations are not safe.
+ auto SMBB = MI.getParent();
+ auto EMBB = U->getParent();
DenseSet<const MachineBasicBlock*> Visited;
- SmallVector<MachineBasicBlock *, 4> Worklist(Begin->succ_begin(),
- Begin->succ_end());
+ SmallVector<MachineBasicBlock*, 4> Worklist(SMBB->succ_begin(),
+ SMBB->succ_end());
while (!Worklist.empty()) {
MachineBasicBlock *MBB = Worklist.pop_back_val();
- if (MBB == End || !Visited.insert(MBB).second)
+ if (MBB == EMBB || !Visited.insert(MBB).second)
continue;
- for (auto &Term : MBB->terminators())
+ for(auto &Term : MBB->terminators())
if (TII->isKillTerminator(Term.getOpcode()))
- return true;
+ return false;
Worklist.append(MBB->succ_begin(), MBB->succ_end());
}
- return false;
-}
-
-static bool isSimpleIf(const MachineInstr &MI, const MachineRegisterInfo *MRI) {
- Register SaveExecReg = MI.getOperand(0).getReg();
- auto U = MRI->use_instr_nodbg_begin(SaveExecReg);
-
- if (U == MRI->use_instr_nodbg_end() ||
- std::next(U) != MRI->use_instr_nodbg_end() ||
- U->getOpcode() != AMDGPU::SI_END_CF)
- return false;
-
return true;
}
@@ -211,35 +207,7 @@ void SILowerControlFlow::emitIf(MachineInstr &MI) {
// If there is only one use of save exec register and that use is SI_END_CF,
// we can optimize SI_IF by returning the full saved exec mask instead of
// just cleared bits.
- bool SimpleIf = isSimpleIf(MI, MRI);
-
- if (InsertKillCleanups) {
- // Check for SI_KILL_*_TERMINATOR on full path of control flow and
- // flag the associated SI_END_CF for insertion of a kill cleanup.
- auto UseMI = MRI->use_instr_nodbg_begin(SaveExecReg);
- while (UseMI->getOpcode() != AMDGPU::SI_END_CF) {
- assert(std::next(UseMI) == MRI->use_instr_nodbg_end());
- assert(UseMI->getOpcode() == AMDGPU::SI_ELSE);
- MachineOperand &NextExec = UseMI->getOperand(0);
- Register NextExecReg = NextExec.getReg();
- if (NextExec.isDead()) {
- assert(!SimpleIf);
- break;
- }
- UseMI = MRI->use_instr_nodbg_begin(NextExecReg);
- }
- if (UseMI->getOpcode() == AMDGPU::SI_END_CF) {
- if (hasKill(MI.getParent(), UseMI->getParent(), TII)) {
- NeedsKillCleanup.insert(&*UseMI);
- SimpleIf = false;
- }
- }
- } else if (SimpleIf) {
- // Check for SI_KILL_*_TERMINATOR on path from if to endif.
- // if there is any such terminator simplifications are not safe.
- auto UseMI = MRI->use_instr_nodbg_begin(SaveExecReg);
- SimpleIf = !hasKill(MI.getParent(), UseMI->getParent(), TII);
- }
+ bool SimpleIf = isSimpleIf(MI, MRI, TII);
// Add an implicit def of exec to discourage scheduling VALU after this which
// will interfere with trying to form s_and_saveexec_b64 later.
@@ -459,8 +427,6 @@ SILowerControlFlow::skipIgnoreExecInstsTrivialSucc(
auto E = B->end();
for ( ; It != E; ++It) {
- if (It->getOpcode() == AMDGPU::SI_KILL_CLEANUP)
- continue;
if (TII->mayReadEXEC(*MRI, *It))
break;
}
@@ -495,18 +461,8 @@ void SILowerControlFlow::emitEndCf(MachineInstr &MI) {
LoweredEndCf.insert(NewMI);
- // If this ends control flow which contains kills (as flagged in emitIf)
- // then insert an SI_KILL_CLEANUP immediately following the exec mask
- // manipulation. This can be lowered to early termination if appropriate.
- MachineInstr *CleanUpMI = nullptr;
- if (NeedsKillCleanup.count(&MI))
- CleanUpMI = BuildMI(MBB, InsPt, DL, TII->get(AMDGPU::SI_KILL_CLEANUP));
-
- if (LIS) {
+ if (LIS)
LIS->ReplaceMachineInstrInMaps(MI, *NewMI);
- if (CleanUpMI)
- LIS->InsertMachineInstrInMaps(*CleanUpMI);
- }
MI.eraseFromParent();
@@ -597,56 +553,6 @@ void SILowerControlFlow::optimizeEndCf() {
}
}
-void SILowerControlFlow::process(MachineInstr &MI) {
- MachineBasicBlock &MBB = *MI.getParent();
- MachineBasicBlock::iterator I(MI);
- MachineInstr *Prev = (I != MBB.begin()) ? &*(std::prev(I)) : nullptr;
-
- switch (MI.getOpcode()) {
- case AMDGPU::SI_IF:
- emitIf(MI);
- break;
-
- case AMDGPU::SI_ELSE:
- emitElse(MI);
- break;
-
- case AMDGPU::SI_IF_BREAK:
- emitIfBreak(MI);
- break;
-
- case AMDGPU::SI_LOOP:
- emitLoop(MI);
- break;
-
- case AMDGPU::SI_END_CF:
- emitEndCf(MI);
- break;
-
- default:
- assert(false && "Attempt to process unsupported instruction");
- break;
- }
-
- MachineBasicBlock::iterator Next;
- for (I = Prev ? Prev->getIterator() : MBB.begin(); I != MBB.end(); I = Next) {
- Next = std::next(I);
- MachineInstr &MaskMI = *I;
- switch (MaskMI.getOpcode()) {
- case AMDGPU::S_AND_B64:
- case AMDGPU::S_OR_B64:
- case AMDGPU::S_AND_B32:
- case AMDGPU::S_OR_B32:
- // Cleanup bit manipulations on exec mask
- combineMasks(MaskMI);
- break;
- default:
- I = MBB.end();
- break;
- }
- }
-}
-
bool SILowerControlFlow::runOnMachineFunction(MachineFunction &MF) {
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
TII = ST.getInstrInfo();
@@ -656,8 +562,6 @@ bool SILowerControlFlow::runOnMachineFunction(MachineFunction &MF) {
LIS = getAnalysisIfAvailable<LiveIntervals>();
MRI = &MF.getRegInfo();
BoolRC = TRI->getBoolRC();
- InsertKillCleanups =
- MF.getFunction().getCallingConv() == CallingConv::AMDGPU_PS;
if (ST.isWave32()) {
AndOpc = AMDGPU::S_AND_B32;
@@ -679,49 +583,62 @@ bool SILowerControlFlow::runOnMachineFunction(MachineFunction &MF) {
Exec = AMDGPU::EXEC;
}
- SmallVector<MachineInstr *, 32> Worklist;
-
MachineFunction::iterator NextBB;
for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
BI != BE; BI = NextBB) {
NextBB = std::next(BI);
MachineBasicBlock &MBB = *BI;
- MachineBasicBlock::iterator I, Next;
- for (I = MBB.begin(); I != MBB.end(); I = Next) {
+ MachineBasicBlock::iterator I, Next, Last;
+
+ for (I = MBB.begin(), Last = MBB.end(); I != MBB.end(); I = Next) {
Next = std::next(I);
MachineInstr &MI = *I;
switch (MI.getOpcode()) {
case AMDGPU::SI_IF:
- process(MI);
+ emitIf(MI);
break;
case AMDGPU::SI_ELSE:
+ emitElse(MI);
+ break;
+
case AMDGPU::SI_IF_BREAK:
+ emitIfBreak(MI);
+ break;
+
case AMDGPU::SI_LOOP:
+ emitLoop(MI);
+ break;
+
case AMDGPU::SI_END_CF:
- // Only build worklist if SI_IF instructions must be processed first.
- if (InsertKillCleanups)
- Worklist.push_back(&MI);
- else
- process(MI);
+ emitEndCf(MI);
break;
+ case AMDGPU::S_AND_B64:
+ case AMDGPU::S_OR_B64:
+ case AMDGPU::S_AND_B32:
+ case AMDGPU::S_OR_B32:
+ // Cleanup bit manipulations on exec mask
+ combineMasks(MI);
+ Last = I;
+ continue;
+
default:
- break;
+ Last = I;
+ continue;
}
+
+ // Replay newly inserted code to combine masks
+ Next = (Last == MBB.end()) ? MBB.begin() : Last;
}
}
- for (MachineInstr *MI : Worklist)
- process(*MI);
-
optimizeEndCf();
LoweredEndCf.clear();
LoweredIf.clear();
- NeedsKillCleanup.clear();
return true;
}
diff --git a/llvm/test/CodeGen/AMDGPU/kill-infinite-loop.ll b/llvm/test/CodeGen/AMDGPU/kill-infinite-loop.ll
index 172e6bf32721..a2358f3a80f4 100644
--- a/llvm/test/CodeGen/AMDGPU/kill-infinite-loop.ll
+++ b/llvm/test/CodeGen/AMDGPU/kill-infinite-loop.ll
@@ -61,11 +61,9 @@ loop:
br label %loop
}
-; Check that the epilog is the final block
+; In case there's an epilog, we shouldn't have to do this.
; CHECK-LABEL: return_nonvoid
-; CHECK: exp null off, off, off, off done vm
-; CHECK-NEXT: s_endpgm
-; CHECK-NEXT: BB{{[0-9]+}}_{{[0-9]+}}:
+; CHECK-NOT: exp null off, off, off, off done vm
define amdgpu_ps float @return_nonvoid(float %0) #0 {
main_body:
%cmp = fcmp olt float %0, 1.000000e+01
diff --git a/llvm/test/CodeGen/AMDGPU/skip-if-dead.ll b/llvm/test/CodeGen/AMDGPU/skip-if-dead.ll
index f178259f2b6f..fee3158d4296 100644
--- a/llvm/test/CodeGen/AMDGPU/skip-if-dead.ll
+++ b/llvm/test/CodeGen/AMDGPU/skip-if-dead.ll
@@ -470,11 +470,7 @@ bb9: ; preds = %bb4
}
; CHECK-LABEL: {{^}}cbranch_kill:
-; CHECK: ; %bb.{{[0-9]+}}: ; %export
-; CHECK-NEXT: s_or_b64
-; CHECK-NEXT: s_cbranch_execz [[EXIT:BB[0-9]+_[0-9]+]]
-; CHECK: [[EXIT]]:
-; CHECK-NEXT: exp null off, off, off, off done vm
+; CHECK-NOT: exp null off, off, off, off done vm
define amdgpu_ps void @cbranch_kill(i32 inreg %0, <2 x float> %1) {
.entry:
%val0 = extractelement <2 x float> %1, i32 0
More information about the llvm-commits
mailing list