[llvm] 226cda5 - [AMDGPU] Moving SI_RETURN_TO_EPILOG handling out of SIInsertSkips.
Christudasan Devadasan via llvm-commits
llvm-commits at lists.llvm.org
Mon Jun 29 08:12:34 PDT 2020
Author: Christudasan Devadasan
Date: 2020-06-29T20:41:53+05:30
New Revision: 226cda58d50555f626737c3cb9dffef0cf3d906c
URL: https://github.com/llvm/llvm-project/commit/226cda58d50555f626737c3cb9dffef0cf3d906c
DIFF: https://github.com/llvm/llvm-project/commit/226cda58d50555f626737c3cb9dffef0cf3d906c.diff
LOG: [AMDGPU] Moving SI_RETURN_TO_EPILOG handling out of SIInsertSkips.
For now, moving it to SIPreEmitPeephole.
Should find a right place to have this code.
Reviewed By: nhaehnle
Differential revision: https://reviews.llvm.org/D77544
Added:
llvm/test/CodeGen/AMDGPU/transform-block-with-return-to-epilog.ll
Modified:
llvm/lib/Target/AMDGPU/SIInsertSkips.cpp
llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp b/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp
index de4d7904caa4..530529567e8e 100644
--- a/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp
@@ -366,7 +366,6 @@ bool SIInsertSkips::runOnMachineFunction(MachineFunction &MF) {
MDT = &getAnalysis<MachineDominatorTree>();
SkipThreshold = SkipThresholdFlag;
- MachineBasicBlock *EmptyMBBAtEnd = nullptr;
SmallVector<MachineInstr *, 4> KillInstrs;
bool MadeChange = false;
@@ -417,29 +416,6 @@ bool SIInsertSkips::runOnMachineFunction(MachineFunction &MF) {
break;
}
- case AMDGPU::SI_RETURN_TO_EPILOG:
- // FIXME: Should move somewhere else
- assert(!MF.getInfo<SIMachineFunctionInfo>()->returnsVoid());
-
- // Graphics shaders returning non-void shouldn't contain S_ENDPGM,
- // because external bytecode will be appended at the end.
- if (&MBB != &MF.back() || &MI != &MBB.back()) {
- // SI_RETURN_TO_EPILOG is not the last instruction. Add an empty block at
- // the end and jump there.
- if (!EmptyMBBAtEnd) {
- EmptyMBBAtEnd = MF.CreateMachineBasicBlock();
- MF.insert(MF.end(), EmptyMBBAtEnd);
- }
-
- MBB.addSuccessor(EmptyMBBAtEnd);
- BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(AMDGPU::S_BRANCH))
- .addMBB(EmptyMBBAtEnd);
- MI.eraseFromParent();
-
- MDT->getBase().insertEdge(&MBB, EmptyMBBAtEnd);
- }
- break;
-
default:
break;
}
diff --git a/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp b/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp
index 62df8ef488c6..1bb66907f9ce 100644
--- a/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp
+++ b/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp
@@ -15,6 +15,7 @@
#include "AMDGPUSubtarget.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "SIInstrInfo.h"
+#include "SIMachineFunctionInfo.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/Support/CommandLine.h"
@@ -198,6 +199,7 @@ bool SIPreEmitPeephole::runOnMachineFunction(MachineFunction &MF) {
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
TII = ST.getInstrInfo();
TRI = &TII->getRegisterInfo();
+ MachineBasicBlock *EmptyMBBAtEnd = nullptr;
bool Changed = false;
for (MachineBasicBlock &MBB : MF) {
@@ -209,6 +211,28 @@ bool SIPreEmitPeephole::runOnMachineFunction(MachineFunction &MF) {
case AMDGPU::S_CBRANCH_VCCNZ:
Changed |= optimizeVccBranch(MI);
continue;
+ case AMDGPU::SI_RETURN_TO_EPILOG:
+ // FIXME: This is not an optimization and should be
+ // moved somewhere else.
+ assert(!MF.getInfo<SIMachineFunctionInfo>()->returnsVoid());
+
+ // Graphics shaders returning non-void shouldn't contain S_ENDPGM,
+ // because external bytecode will be appended at the end.
+ if (&MBB != &MF.back() || &MI != &MBB.back()) {
+ // SI_RETURN_TO_EPILOG is not the last instruction. Add an empty block
+ // at the end and jump there.
+ if (!EmptyMBBAtEnd) {
+ EmptyMBBAtEnd = MF.CreateMachineBasicBlock();
+ MF.insert(MF.end(), EmptyMBBAtEnd);
+ }
+
+ MBB.addSuccessor(EmptyMBBAtEnd);
+ BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(AMDGPU::S_BRANCH))
+ .addMBB(EmptyMBBAtEnd);
+ MI.eraseFromParent();
+ MBBE = MBB.getFirstTerminator();
+ }
+ break;
default:
break;
}
diff --git a/llvm/test/CodeGen/AMDGPU/transform-block-with-return-to-epilog.ll b/llvm/test/CodeGen/AMDGPU/transform-block-with-return-to-epilog.ll
new file mode 100644
index 000000000000..c34a62bfc31d
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/transform-block-with-return-to-epilog.ll
@@ -0,0 +1,81 @@
+; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -verify-machineinstrs -stop-after=si-pre-emit-peephole -o - %s | FileCheck -check-prefix=GCN %s
+; If the block containing the SI_RETURN_TO_EPILOG is not the last block, insert an empty block at the end and
+; insert an unconditional jump there.
+define amdgpu_ps float @simple_test_return_to_epilog(float %a) #0 {
+ ; GCN-LABEL: name: simple_test_return_to_epilog
+ ; GCN: bb.0.entry:
+ ; GCN: liveins: $vgpr0
+ ; GCN: SI_RETURN_TO_EPILOG killed $vgpr0
+entry:
+ ret float %a
+}
+
+define amdgpu_ps float @test_return_to_epilog_into_end_block(i32 inreg %a, float %b) #0 {
+ ; GCN-LABEL: name: test_return_to_epilog_into_end_block
+ ; GCN: bb.0.entry:
+ ; GCN: successors: %bb.1(0x7fffffff), %bb.2(0x00000001)
+ ; GCN: liveins: $sgpr2, $vgpr0
+ ; GCN: S_CMP_LT_I32 killed renamable $sgpr2, 1, implicit-def $scc
+ ; GCN: S_CBRANCH_SCC1 %bb.2, implicit killed $scc
+ ; GCN: bb.1.if:
+ ; GCN: successors: %bb.3(0x80000000)
+ ; GCN: liveins: $vgpr0
+ ; GCN: S_BRANCH %bb.3
+ ; GCN: bb.2.else:
+ ; GCN: successors:
+ ; GCN: renamable $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+ ; GCN: GLOBAL_STORE_DWORD undef renamable $vgpr0_vgpr1, killed renamable $vgpr0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
+ ; GCN: bb.3:
+entry:
+ %cc = icmp sgt i32 %a, 0
+ br i1 %cc, label %if, label %else
+if: ; preds = %entry
+ ret float %b
+else: ; preds = %entry
+ store volatile i32 0, i32 addrspace(1)* undef
+ unreachable
+}
+
+define amdgpu_ps float @test_unify_return_to_epilog_into_end_block(i32 inreg %a, i32 inreg %b, float %c, float %d) #0 {
+ ; GCN-LABEL: name: test_unify_return_to_epilog_into_end_block
+ ; GCN: bb.0.entry:
+ ; GCN: successors: %bb.1(0x50000000), %bb.2(0x30000000)
+ ; GCN: liveins: $sgpr2, $sgpr3, $vgpr0, $vgpr1
+ ; GCN: S_CMP_LT_I32 killed renamable $sgpr2, 1, implicit-def $scc
+ ; GCN: S_CBRANCH_SCC1 %bb.2, implicit killed $scc
+ ; GCN: bb.1.if:
+ ; GCN: successors: %bb.5(0x80000000)
+ ; GCN: liveins: $vgpr0
+ ; GCN: S_BRANCH %bb.5
+ ; GCN: bb.2.else.if.cond:
+ ; GCN: successors: %bb.3(0x7fffffff), %bb.4(0x00000001)
+ ; GCN: liveins: $sgpr3, $vgpr1
+ ; GCN: S_CMP_LT_I32 killed renamable $sgpr3, 1, implicit-def $scc
+ ; GCN: S_CBRANCH_SCC1 %bb.4, implicit killed $scc
+ ; GCN: bb.3.else.if:
+ ; GCN: successors: %bb.5(0x80000000)
+ ; GCN: liveins: $vgpr1
+ ; GCN: $vgpr0 = V_MOV_B32_e32 killed $vgpr1, implicit $exec, implicit $exec
+ ; GCN: S_BRANCH %bb.5
+ ; GCN: bb.4.else:
+ ; GCN: successors:
+ ; GCN: renamable $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+ ; GCN: GLOBAL_STORE_DWORD undef renamable $vgpr0_vgpr1, killed renamable $vgpr0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
+ ; GCN: bb.5:
+entry:
+ %cc = icmp sgt i32 %a, 0
+ br i1 %cc, label %if, label %else.if.cond
+if: ; preds = %entry
+ ret float %c
+else.if.cond: ; preds = %entry
+ %cc1 = icmp sgt i32 %b, 0
+ br i1 %cc1, label %else.if, label %else
+else.if: ; preds = %else.if.cond
+ ret float %d
+else: ; preds = %else.if.cond
+ store volatile i32 0, i32 addrspace(1)* undef
+ unreachable
+}
+
+attributes #0 = { nounwind }
More information about the llvm-commits
mailing list