[PATCH] D85872: [AMDGPU] Fix missed SI_RETURN_TO_EPILOG in pre-emit peephole

Carl Ritson via Phabricator via llvm-commits llvm-commits at lists.llvm.org
Wed Aug 12 21:12:05 PDT 2020


critson updated this revision to Diff 285251.
critson added a comment.

- Rebase on to pre-committed test.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D85872/new/

https://reviews.llvm.org/D85872

Files:
  llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp
  llvm/test/CodeGen/AMDGPU/transform-block-with-return-to-epilog.ll


Index: llvm/test/CodeGen/AMDGPU/transform-block-with-return-to-epilog.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/transform-block-with-return-to-epilog.ll
+++ llvm/test/CodeGen/AMDGPU/transform-block-with-return-to-epilog.ll
@@ -115,14 +115,15 @@
   ; GCN:   liveins: $sgpr0_sgpr1
   ; GCN:   $exec = S_MOV_B64 0
   ; GCN: bb.6.end:
-  ; GCN:   successors: %bb.7(0x80000000)
+  ; GCN:   successors: %bb.7(0x40000000), %bb.8(0x40000000)
   ; GCN:   liveins: $sgpr0_sgpr1
   ; GCN:   $exec = S_OR_B64 $exec, killed renamable $sgpr0_sgpr1, implicit-def $scc
   ; GCN:   S_CBRANCH_EXECZ %bb.7, implicit $exec
-  ; GCN:   SI_RETURN_TO_EPILOG undef $vgpr0, undef $vgpr1, undef $vgpr2, undef $vgpr3
+  ; GCN:   S_BRANCH %bb.8
   ; GCN: bb.7:
   ; GCN:   EXP_DONE 9, undef $vgpr0, undef $vgpr0, undef $vgpr0, undef $vgpr0, 1, 0, 0, implicit $exec
   ; GCN:   S_ENDPGM 0
+  ; GCN: bb.8:
 entry:
   %.i0 = fdiv reassoc nnan nsz arcp contract afn float 1.000000e+00, %val
   %cmp0 = fcmp olt float %.i0, 0.000000e+00
Index: llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp
+++ llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp
@@ -266,16 +266,24 @@
 
   for (MachineBasicBlock &MBB : MF) {
     MachineBasicBlock::iterator MBBE = MBB.getFirstTerminator();
-    if (MBBE != MBB.end()) {
-      MachineInstr &MI = *MBBE;
+    MachineBasicBlock::iterator TermI = MBBE;
+    // Check first terminator for VCC branches to optimize
+    if (TermI != MBB.end()) {
+      MachineInstr &MI = *TermI;
       switch (MI.getOpcode()) {
       case AMDGPU::S_CBRANCH_VCCZ:
       case AMDGPU::S_CBRANCH_VCCNZ:
         Changed |= optimizeVccBranch(MI);
         continue;
-      case AMDGPU::SI_RETURN_TO_EPILOG:
-        // FIXME: This is not an optimization and should be
-        // moved somewhere else.
+      default:
+        break;
+      }
+    }
+    // Check all terminators for SI_RETURN_TO_EPILOG
+    // FIXME: This is not an optimization and should be moved somewhere else.
+    while (TermI != MBB.end()) {
+      MachineInstr &MI = *TermI;
+      if (MI.getOpcode() == AMDGPU::SI_RETURN_TO_EPILOG) {
         assert(!MF.getInfo<SIMachineFunctionInfo>()->returnsVoid());
 
         // Graphics shaders returning non-void shouldn't contain S_ENDPGM,
@@ -293,11 +301,11 @@
               .addMBB(EmptyMBBAtEnd);
           MI.eraseFromParent();
           MBBE = MBB.getFirstTerminator();
+          TermI = MBBE;
+          continue;
         }
-        break;
-      default:
-        break;
       }
+      TermI++;
     }
 
     if (!ST.hasVGPRIndexMode())


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D85872.285251.patch
Type: text/x-patch
Size: 2715 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20200813/c3673501/attachment.bin>


More information about the llvm-commits mailing list