[llvm] 226cda5 - [AMDGPU] Moving SI_RETURN_TO_EPILOG handling out of SIInsertSkips.

Christudasan Devadasan via llvm-commits llvm-commits at lists.llvm.org
Mon Jun 29 08:12:34 PDT 2020


Author: Christudasan Devadasan
Date: 2020-06-29T20:41:53+05:30
New Revision: 226cda58d50555f626737c3cb9dffef0cf3d906c

URL: https://github.com/llvm/llvm-project/commit/226cda58d50555f626737c3cb9dffef0cf3d906c
DIFF: https://github.com/llvm/llvm-project/commit/226cda58d50555f626737c3cb9dffef0cf3d906c.diff

LOG: [AMDGPU] Moving SI_RETURN_TO_EPILOG handling out of SIInsertSkips.

For now, moving it to SIPreEmitPeephole.
Should find a right place to have this code.

Reviewed By: nhaehnle

Differential revision: https://reviews.llvm.org/D77544

Added: 
    llvm/test/CodeGen/AMDGPU/transform-block-with-return-to-epilog.ll

Modified: 
    llvm/lib/Target/AMDGPU/SIInsertSkips.cpp
    llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp b/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp
index de4d7904caa4..530529567e8e 100644
--- a/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp
@@ -366,7 +366,6 @@ bool SIInsertSkips::runOnMachineFunction(MachineFunction &MF) {
   MDT = &getAnalysis<MachineDominatorTree>();
   SkipThreshold = SkipThresholdFlag;
 
-  MachineBasicBlock *EmptyMBBAtEnd = nullptr;
   SmallVector<MachineInstr *, 4> KillInstrs;
   bool MadeChange = false;
 
@@ -417,29 +416,6 @@ bool SIInsertSkips::runOnMachineFunction(MachineFunction &MF) {
         break;
       }
 
-      case AMDGPU::SI_RETURN_TO_EPILOG:
-        // FIXME: Should move somewhere else
-        assert(!MF.getInfo<SIMachineFunctionInfo>()->returnsVoid());
-
-        // Graphics shaders returning non-void shouldn't contain S_ENDPGM,
-        // because external bytecode will be appended at the end.
-        if (&MBB != &MF.back() || &MI != &MBB.back()) {
-          // SI_RETURN_TO_EPILOG is not the last instruction. Add an empty block at
-          // the end and jump there.
-          if (!EmptyMBBAtEnd) {
-            EmptyMBBAtEnd = MF.CreateMachineBasicBlock();
-            MF.insert(MF.end(), EmptyMBBAtEnd);
-          }
-
-          MBB.addSuccessor(EmptyMBBAtEnd);
-          BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(AMDGPU::S_BRANCH))
-            .addMBB(EmptyMBBAtEnd);
-          MI.eraseFromParent();
-
-          MDT->getBase().insertEdge(&MBB, EmptyMBBAtEnd);
-        }
-        break;
-
       default:
         break;
       }

diff  --git a/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp b/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp
index 62df8ef488c6..1bb66907f9ce 100644
--- a/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp
+++ b/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp
@@ -15,6 +15,7 @@
 #include "AMDGPUSubtarget.h"
 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
 #include "SIInstrInfo.h"
+#include "SIMachineFunctionInfo.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/Support/CommandLine.h"
 
@@ -198,6 +199,7 @@ bool SIPreEmitPeephole::runOnMachineFunction(MachineFunction &MF) {
   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
   TII = ST.getInstrInfo();
   TRI = &TII->getRegisterInfo();
+  MachineBasicBlock *EmptyMBBAtEnd = nullptr;
   bool Changed = false;
 
   for (MachineBasicBlock &MBB : MF) {
@@ -209,6 +211,28 @@ bool SIPreEmitPeephole::runOnMachineFunction(MachineFunction &MF) {
       case AMDGPU::S_CBRANCH_VCCNZ:
         Changed |= optimizeVccBranch(MI);
         continue;
+      case AMDGPU::SI_RETURN_TO_EPILOG:
+        // FIXME: This is not an optimization and should be
+        // moved somewhere else.
+        assert(!MF.getInfo<SIMachineFunctionInfo>()->returnsVoid());
+
+        // Graphics shaders returning non-void shouldn't contain S_ENDPGM,
+        // because external bytecode will be appended at the end.
+        if (&MBB != &MF.back() || &MI != &MBB.back()) {
+          // SI_RETURN_TO_EPILOG is not the last instruction. Add an empty block
+          // at the end and jump there.
+          if (!EmptyMBBAtEnd) {
+            EmptyMBBAtEnd = MF.CreateMachineBasicBlock();
+            MF.insert(MF.end(), EmptyMBBAtEnd);
+          }
+
+          MBB.addSuccessor(EmptyMBBAtEnd);
+          BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(AMDGPU::S_BRANCH))
+              .addMBB(EmptyMBBAtEnd);
+          MI.eraseFromParent();
+          MBBE = MBB.getFirstTerminator();
+        }
+        break;
       default:
         break;
       }

diff  --git a/llvm/test/CodeGen/AMDGPU/transform-block-with-return-to-epilog.ll b/llvm/test/CodeGen/AMDGPU/transform-block-with-return-to-epilog.ll
new file mode 100644
index 000000000000..c34a62bfc31d
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/transform-block-with-return-to-epilog.ll
@@ -0,0 +1,81 @@
+; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -verify-machineinstrs -stop-after=si-pre-emit-peephole -o - %s | FileCheck -check-prefix=GCN %s
+; If the block containing the SI_RETURN_TO_EPILOG is not the last block, insert an empty block at the end and
+; insert an unconditional jump there.
+define amdgpu_ps float @simple_test_return_to_epilog(float %a) #0 {
+  ; GCN-LABEL: name: simple_test_return_to_epilog
+  ; GCN: bb.0.entry:
+  ; GCN:   liveins: $vgpr0
+  ; GCN:   SI_RETURN_TO_EPILOG killed $vgpr0
+entry:
+  ret float %a
+}
+
+define amdgpu_ps float @test_return_to_epilog_into_end_block(i32 inreg %a, float %b) #0 {
+  ; GCN-LABEL: name: test_return_to_epilog_into_end_block
+  ; GCN: bb.0.entry:
+  ; GCN:   successors: %bb.1(0x7fffffff), %bb.2(0x00000001)
+  ; GCN:   liveins: $sgpr2, $vgpr0
+  ; GCN:   S_CMP_LT_I32 killed renamable $sgpr2, 1, implicit-def $scc
+  ; GCN:   S_CBRANCH_SCC1 %bb.2, implicit killed $scc
+  ; GCN: bb.1.if:
+  ; GCN:   successors: %bb.3(0x80000000)
+  ; GCN:   liveins: $vgpr0
+  ; GCN:   S_BRANCH %bb.3
+  ; GCN: bb.2.else:
+  ; GCN:   successors:
+  ; GCN:   renamable $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+  ; GCN:   GLOBAL_STORE_DWORD undef renamable $vgpr0_vgpr1, killed renamable $vgpr0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
+  ; GCN: bb.3:
+entry:
+  %cc = icmp sgt i32 %a, 0
+  br i1 %cc, label %if, label %else
+if:                                               ; preds = %entry
+  ret float %b
+else:                                             ; preds = %entry
+  store volatile i32 0, i32 addrspace(1)* undef
+  unreachable
+}
+
+define amdgpu_ps float @test_unify_return_to_epilog_into_end_block(i32 inreg %a, i32 inreg %b, float %c, float %d) #0 {
+  ; GCN-LABEL: name: test_unify_return_to_epilog_into_end_block
+  ; GCN: bb.0.entry:
+  ; GCN:   successors: %bb.1(0x50000000), %bb.2(0x30000000)
+  ; GCN:   liveins: $sgpr2, $sgpr3, $vgpr0, $vgpr1
+  ; GCN:   S_CMP_LT_I32 killed renamable $sgpr2, 1, implicit-def $scc
+  ; GCN:   S_CBRANCH_SCC1 %bb.2, implicit killed $scc
+  ; GCN: bb.1.if:
+  ; GCN:   successors: %bb.5(0x80000000)
+  ; GCN:   liveins: $vgpr0
+  ; GCN:   S_BRANCH %bb.5
+  ; GCN: bb.2.else.if.cond:
+  ; GCN:   successors: %bb.3(0x7fffffff), %bb.4(0x00000001)
+  ; GCN:   liveins: $sgpr3, $vgpr1
+  ; GCN:   S_CMP_LT_I32 killed renamable $sgpr3, 1, implicit-def $scc
+  ; GCN:   S_CBRANCH_SCC1 %bb.4, implicit killed $scc
+  ; GCN: bb.3.else.if:
+  ; GCN:   successors: %bb.5(0x80000000)
+  ; GCN:   liveins: $vgpr1
+  ; GCN:   $vgpr0 = V_MOV_B32_e32 killed $vgpr1, implicit $exec, implicit $exec
+  ; GCN:   S_BRANCH %bb.5
+  ; GCN: bb.4.else:
+  ; GCN:   successors:
+  ; GCN:   renamable $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+  ; GCN:   GLOBAL_STORE_DWORD undef renamable $vgpr0_vgpr1, killed renamable $vgpr0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
+  ; GCN: bb.5:
+entry:
+  %cc = icmp sgt i32 %a, 0
+  br i1 %cc, label %if, label %else.if.cond
+if:                                               ; preds = %entry
+  ret float %c
+else.if.cond:                                     ; preds = %entry
+  %cc1 = icmp sgt i32 %b, 0
+  br i1 %cc1, label %else.if, label %else
+else.if:                                          ; preds = %else.if.cond
+  ret float %d
+else:                                             ; preds = %else.if.cond
+  store volatile i32 0, i32 addrspace(1)* undef
+  unreachable
+}
+
+attributes #0 = { nounwind }


        


More information about the llvm-commits mailing list