[PATCH] D72997: [AMDGPU] SIRemoveShortExecBranches should not remove branches exiting loops

Carl Ritson via Phabricator via llvm-commits llvm-commits at lists.llvm.org
Mon Jan 20 19:27:07 PST 2020


critson updated this revision to Diff 239224.
critson added a comment.

Update patch to current state of discussion.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D72997/new/

https://reviews.llvm.org/D72997

Files:
  llvm/lib/Target/AMDGPU/SIRemoveShortExecBranches.cpp
  llvm/test/CodeGen/AMDGPU/divergent-branch-uniform-condition.ll


Index: llvm/test/CodeGen/AMDGPU/divergent-branch-uniform-condition.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/divergent-branch-uniform-condition.ll
+++ llvm/test/CodeGen/AMDGPU/divergent-branch-uniform-condition.ll
@@ -32,6 +32,7 @@
 ; CHECK-NEXT:    s_and_b64 s[8:9], s[8:9], exec
 ; CHECK-NEXT:    s_or_b64 s[4:5], s[4:5], s[8:9]
 ; CHECK-NEXT:    s_andn2_b64 exec, exec, s[2:3]
+; CHECK-NEXT:    s_cbranch_execz BB0_6
 ; CHECK-NEXT:  BB0_3: ; %loop
 ; CHECK-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    s_or_b64 s[6:7], s[6:7], exec
@@ -49,7 +50,7 @@
 ; CHECK-NEXT:    s_add_i32 s0, s0, 1
 ; CHECK-NEXT:    s_xor_b64 s[6:7], exec, -1
 ; CHECK-NEXT:    s_branch BB0_1
-; CHECK-NEXT:  ; %bb.6: ; %Flow2
+; CHECK-NEXT:  BB0_6: ; %Flow2
 ; CHECK-NEXT:    s_or_b64 exec, exec, s[2:3]
 ; CHECK-NEXT:    v_mov_b32_e32 v1, 0
 ; CHECK-NEXT:    s_and_saveexec_b64 s[0:1], s[4:5]
Index: llvm/lib/Target/AMDGPU/SIRemoveShortExecBranches.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/SIRemoveShortExecBranches.cpp
+++ llvm/lib/Target/AMDGPU/SIRemoveShortExecBranches.cpp
@@ -37,6 +37,7 @@
 class SIRemoveShortExecBranches : public MachineFunctionPass {
 private:
   const SIInstrInfo *TII = nullptr;
+
   bool getBlockDestinations(MachineBasicBlock &SrcMBB,
                             MachineBasicBlock *&TrueMBB,
                             MachineBasicBlock *&FalseMBB,
@@ -88,10 +89,10 @@
     for (MachineBasicBlock::const_iterator I = MBB.begin(), E = MBB.end();
          I != E; ++I) {
       // When a uniform loop is inside non-uniform control flow, the branch
-      // leaving the loop might be an S_CBRANCH_VCCNZ, which is never taken
-      // when EXEC = 0. We should skip the loop lest it becomes infinite.
-      if (I->getOpcode() == AMDGPU::S_CBRANCH_VCCNZ ||
-          I->getOpcode() == AMDGPU::S_CBRANCH_VCCZ)
+      // leaving the loop might never be taken when EXEC = 0.
+      // Hence we should retain cbranch out of the loop lest it become infinite.
+      if (I->isConditionalBranch() &&
+          I->getOpcode() != AMDGPU::S_CBRANCH_EXECNZ)
         return true;
 
       if (TII->hasUnwantedEffectsWhenEXECEmpty(*I))


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D72997.239224.patch
Type: text/x-patch
Size: 2268 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20200121/e72b4878/attachment.bin>


More information about the llvm-commits mailing list