[llvm-branch-commits] [llvm] 0feb4bc - Fix missed SI_RETURN_TO_EPILOG in pre-emit peephole
Tom Stellard via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Wed Dec 9 08:12:06 PST 2020
Author: Carl Ritson
Date: 2020-12-09T11:11:31-05:00
New Revision: 0feb4bc5295b373876823972d4b33e62b305cd0a
URL: https://github.com/llvm/llvm-project/commit/0feb4bc5295b373876823972d4b33e62b305cd0a
DIFF: https://github.com/llvm/llvm-project/commit/0feb4bc5295b373876823972d4b33e62b305cd0a.diff
LOG: Fix missed SI_RETURN_TO_EPILOG in pre-emit peephole
SIPreEmitPeephole does not process all terminators, which means
it can fail to handle SI_RETURN_TO_EPILOG if immediately preceeded
by a branch to the early exit block.
Reviewed By: rampitec
Differential Revision: https://reviews.llvm.org/D85872
(cherry picked from commit d538c5837a2cfedbf274133e29612da76003beed)
Added:
Modified:
llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp
llvm/test/CodeGen/AMDGPU/transform-block-with-return-to-epilog.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp b/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp
index f31c722db1b2..442be886a8ac 100644
--- a/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp
+++ b/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp
@@ -254,16 +254,24 @@ bool SIPreEmitPeephole::runOnMachineFunction(MachineFunction &MF) {
for (MachineBasicBlock &MBB : MF) {
MachineBasicBlock::iterator MBBE = MBB.getFirstTerminator();
- if (MBBE != MBB.end()) {
- MachineInstr &MI = *MBBE;
+ MachineBasicBlock::iterator TermI = MBBE;
+ // Check first terminator for VCC branches to optimize
+ if (TermI != MBB.end()) {
+ MachineInstr &MI = *TermI;
switch (MI.getOpcode()) {
case AMDGPU::S_CBRANCH_VCCZ:
case AMDGPU::S_CBRANCH_VCCNZ:
Changed |= optimizeVccBranch(MI);
continue;
- case AMDGPU::SI_RETURN_TO_EPILOG:
- // FIXME: This is not an optimization and should be
- // moved somewhere else.
+ default:
+ break;
+ }
+ }
+ // Check all terminators for SI_RETURN_TO_EPILOG
+ // FIXME: This is not an optimization and should be moved somewhere else.
+ while (TermI != MBB.end()) {
+ MachineInstr &MI = *TermI;
+ if (MI.getOpcode() == AMDGPU::SI_RETURN_TO_EPILOG) {
assert(!MF.getInfo<SIMachineFunctionInfo>()->returnsVoid());
// Graphics shaders returning non-void shouldn't contain S_ENDPGM,
@@ -281,11 +289,11 @@ bool SIPreEmitPeephole::runOnMachineFunction(MachineFunction &MF) {
.addMBB(EmptyMBBAtEnd);
MI.eraseFromParent();
MBBE = MBB.getFirstTerminator();
+ TermI = MBBE;
+ continue;
}
- break;
- default:
- break;
}
+ TermI++;
}
if (!ST.hasVGPRIndexMode())
diff --git a/llvm/test/CodeGen/AMDGPU/transform-block-with-return-to-epilog.ll b/llvm/test/CodeGen/AMDGPU/transform-block-with-return-to-epilog.ll
index c34a62bfc31d..416a72d51f99 100644
--- a/llvm/test/CodeGen/AMDGPU/transform-block-with-return-to-epilog.ll
+++ b/llvm/test/CodeGen/AMDGPU/transform-block-with-return-to-epilog.ll
@@ -78,4 +78,73 @@ else: ; preds = %else.if.cond
unreachable
}
+define amdgpu_ps { <4 x float> } @test_return_to_epilog_with_optimized_kill(float %val) #0 {
+ ; GCN-LABEL: name: test_return_to_epilog_with_optimized_kill
+ ; GCN: bb.0.entry:
+ ; GCN: successors: %bb.1(0x40000000), %bb.4(0x40000000)
+ ; GCN: liveins: $vgpr0
+ ; GCN: renamable $vgpr1 = nofpexcept V_RCP_F32_e32 $vgpr0, implicit $mode, implicit $exec
+ ; GCN: nofpexcept V_CMP_NGT_F32_e32 0, killed $vgpr1, implicit-def $vcc, implicit $mode, implicit $exec
+ ; GCN: $sgpr0_sgpr1 = S_AND_SAVEEXEC_B64 killed $vcc, implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GCN: renamable $sgpr0_sgpr1 = S_XOR_B64 $exec, killed renamable $sgpr0_sgpr1, implicit-def dead $scc
+ ; GCN: S_CBRANCH_EXECZ %bb.4, implicit $exec
+ ; GCN: bb.1.flow.preheader:
+ ; GCN: successors: %bb.2(0x80000000)
+ ; GCN: liveins: $vgpr0, $sgpr0_sgpr1
+ ; GCN: nofpexcept V_CMP_NGT_F32_e32 0, killed $vgpr0, implicit-def $vcc, implicit $mode, implicit $exec
+ ; GCN: renamable $sgpr2_sgpr3 = S_MOV_B64 0
+ ; GCN: bb.2.flow:
+ ; GCN: successors: %bb.3(0x04000000), %bb.2(0x7c000000)
+ ; GCN: liveins: $vcc, $sgpr0_sgpr1, $sgpr2_sgpr3
+ ; GCN: renamable $sgpr4_sgpr5 = S_AND_B64 $exec, renamable $vcc, implicit-def $scc
+ ; GCN: renamable $sgpr2_sgpr3 = S_OR_B64 killed renamable $sgpr4_sgpr5, killed renamable $sgpr2_sgpr3, implicit-def $scc
+ ; GCN: $exec = S_ANDN2_B64 $exec, renamable $sgpr2_sgpr3, implicit-def $scc
+ ; GCN: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; GCN: bb.3.Flow:
+ ; GCN: successors: %bb.4(0x80000000)
+ ; GCN: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3
+ ; GCN: $exec = S_OR_B64 $exec, killed renamable $sgpr2_sgpr3, implicit-def $scc
+ ; GCN: bb.4.Flow1:
+ ; GCN: successors: %bb.5(0x40000000), %bb.6(0x40000000)
+ ; GCN: liveins: $sgpr0_sgpr1
+ ; GCN: renamable $sgpr0_sgpr1 = S_OR_SAVEEXEC_B64 killed renamable $sgpr0_sgpr1, implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GCN: $exec = S_XOR_B64 $exec, renamable $sgpr0_sgpr1, implicit-def $scc
+ ; GCN: S_CBRANCH_EXECZ %bb.6, implicit $exec
+ ; GCN: bb.5.kill0:
+ ; GCN: successors: %bb.6(0x80000000)
+ ; GCN: liveins: $sgpr0_sgpr1
+ ; GCN: $exec = S_MOV_B64 0
+ ; GCN: bb.6.end:
+ ; GCN: successors: %bb.7(0x40000000), %bb.8(0x40000000)
+ ; GCN: liveins: $sgpr0_sgpr1
+ ; GCN: $exec = S_OR_B64 $exec, killed renamable $sgpr0_sgpr1, implicit-def $scc
+ ; GCN: S_CBRANCH_EXECZ %bb.7, implicit $exec
+ ; GCN: S_BRANCH %bb.8
+ ; GCN: bb.7:
+ ; GCN: EXP_DONE 9, undef $vgpr0, undef $vgpr0, undef $vgpr0, undef $vgpr0, 1, 0, 0, implicit $exec
+ ; GCN: S_ENDPGM 0
+ ; GCN: bb.8:
+entry:
+ %.i0 = fdiv reassoc nnan nsz arcp contract afn float 1.000000e+00, %val
+ %cmp0 = fcmp olt float %.i0, 0.000000e+00
+ br i1 %cmp0, label %kill0, label %flow
+
+kill0: ; preds = %entry
+ call void @llvm.amdgcn.kill(i1 false)
+ br label %end
+
+flow: ; preds = %entry
+ %cmp1 = fcmp olt float %val, 0.000000e+00
+ br i1 %cmp1, label %flow, label %end
+
+kill1: ; preds = %flow
+ call void @llvm.amdgcn.kill(i1 false)
+ br label %end
+
+end: ; preds = %kill0, %kill1, %flow
+ ret { <4 x float> } undef
+}
+
+declare void @llvm.amdgcn.kill(i1) #0
+
attributes #0 = { nounwind }
More information about the llvm-branch-commits
mailing list