[PATCH] D72997: [AMDGPU] SIRemoveShortExecBranches should not remove branches exiting loops
Carl Ritson via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Mon Jan 20 20:11:38 PST 2020
critson updated this revision to Diff 239226.
critson added a comment.
Remove C_BRANCH_EXECNZ special case.
This revives a c_branch_execz in one lit test.
Alternatively we special case both exec_nz/exec_z.
However this ceases to feel much better than simply stating all the accepted branch types.
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D72997/new/
https://reviews.llvm.org/D72997
Files:
llvm/lib/Target/AMDGPU/SIRemoveShortExecBranches.cpp
llvm/test/CodeGen/AMDGPU/divergent-branch-uniform-condition.ll
llvm/test/CodeGen/AMDGPU/valu-i1.ll
Index: llvm/test/CodeGen/AMDGPU/valu-i1.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/valu-i1.ll
+++ llvm/test/CodeGen/AMDGPU/valu-i1.ll
@@ -13,6 +13,7 @@
; SI-NEXT: s_mov_b64 {{s\[[0-9]+:[0-9]+\]}}, 0
; SI-NEXT: s_and_saveexec_b64 [[SAVE1:s\[[0-9]+:[0-9]+\]]], vcc
; SI-NEXT: s_xor_b64 [[SAVE2:s\[[0-9]+:[0-9]+\]]], exec, [[SAVE1]]
+; SI-NEXT: s_cbranch_execz [[FLOW_BB:BB[0-9]+_[0-9]+]]
; SI-NEXT: ; %bb.{{[0-9]+}}: ; %LeafBlock3
; SI: s_mov_b64 s[{{[0-9]:[0-9]}}], -1
@@ -20,7 +21,7 @@
; SI-NEXT: s_cbranch_execnz
; v_mov should be after exec modification
-; SI: ; %bb.{{[0-9]+}}:
+; SI: [[FLOW_BB]]:
; SI-NEXT: s_or_saveexec_b64 [[SAVE3:s\[[0-9]+:[0-9]+\]]], [[SAVE2]]
; SI-NEXT: s_xor_b64 exec, exec, [[SAVE3]]
;
Index: llvm/test/CodeGen/AMDGPU/divergent-branch-uniform-condition.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/divergent-branch-uniform-condition.ll
+++ llvm/test/CodeGen/AMDGPU/divergent-branch-uniform-condition.ll
@@ -32,6 +32,7 @@
; CHECK-NEXT: s_and_b64 s[8:9], s[8:9], exec
; CHECK-NEXT: s_or_b64 s[4:5], s[4:5], s[8:9]
; CHECK-NEXT: s_andn2_b64 exec, exec, s[2:3]
+; CHECK-NEXT: s_cbranch_execz BB0_6
; CHECK-NEXT: BB0_3: ; %loop
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
; CHECK-NEXT: s_or_b64 s[6:7], s[6:7], exec
@@ -49,7 +50,7 @@
; CHECK-NEXT: s_add_i32 s0, s0, 1
; CHECK-NEXT: s_xor_b64 s[6:7], exec, -1
; CHECK-NEXT: s_branch BB0_1
-; CHECK-NEXT: ; %bb.6: ; %Flow2
+; CHECK-NEXT: BB0_6: ; %Flow2
; CHECK-NEXT: s_or_b64 exec, exec, s[2:3]
; CHECK-NEXT: v_mov_b32_e32 v1, 0
; CHECK-NEXT: s_and_saveexec_b64 s[0:1], s[4:5]
Index: llvm/lib/Target/AMDGPU/SIRemoveShortExecBranches.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/SIRemoveShortExecBranches.cpp
+++ llvm/lib/Target/AMDGPU/SIRemoveShortExecBranches.cpp
@@ -37,6 +37,7 @@
class SIRemoveShortExecBranches : public MachineFunctionPass {
private:
const SIInstrInfo *TII = nullptr;
+
bool getBlockDestinations(MachineBasicBlock &SrcMBB,
MachineBasicBlock *&TrueMBB,
MachineBasicBlock *&FalseMBB,
@@ -88,10 +89,9 @@
for (MachineBasicBlock::const_iterator I = MBB.begin(), E = MBB.end();
I != E; ++I) {
// When a uniform loop is inside non-uniform control flow, the branch
- // leaving the loop might be an S_CBRANCH_VCCNZ, which is never taken
- // when EXEC = 0. We should skip the loop lest it becomes infinite.
- if (I->getOpcode() == AMDGPU::S_CBRANCH_VCCNZ ||
- I->getOpcode() == AMDGPU::S_CBRANCH_VCCZ)
+ // leaving the loop might never be taken when EXEC = 0.
+ // Hence we should retain cbranch out of the loop lest it become infinite.
+ if (I->isConditionalBranch())
return true;
if (TII->hasUnwantedEffectsWhenEXECEmpty(*I))
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D72997.239226.patch
Type: text/x-patch
Size: 3001 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20200121/b9601c31/attachment.bin>
More information about the llvm-commits
mailing list