[llvm] a5676a3 - StructurizeCFG: Set Undef for non-predecessors in setPhiValues()
Ruiling Song via llvm-commits
llvm-commits at lists.llvm.org
Sun Sep 25 18:56:23 PDT 2022
Author: Ruiling Song
Date: 2022-09-26T09:54:47+08:00
New Revision: a5676a3a7eab3a295ae0482162089a4e366bf9d2
URL: https://github.com/llvm/llvm-project/commit/a5676a3a7eab3a295ae0482162089a4e366bf9d2
DIFF: https://github.com/llvm/llvm-project/commit/a5676a3a7eab3a295ae0482162089a4e366bf9d2.diff
LOG: StructurizeCFG: Set Undef for non-predecessors in setPhiValues()
During structurization process, we may place non-predecessor blocks
between the predecessors of a block in the structurized CFG. Take
the typical while-break case as an example:
```
/---A(v=...)
| / \
^ B C
| \ /|
\---L |
\ /
E (r = phi (v:C)...)
```
After structurization, the CFG would be look like:
```
/---A
| |\
| | C
| |/
| F1
^ |\
| | B
| |/
| F2
| |\
| | L
\ |/
\--F3
|
E
```
We can see that block B is placed between the predecessors(C/L) of E.
During phi reconstruction, to achieve the same sematics as before, we
are reconstructing the PHIs as:
F1: v1 = phi (v:C), (undef:A)
F3: r = phi (v1:F2), ...
But this is also saying that `v1` would be live through B, which is not
quite necessary. The idea in the change is to say the incoming value
from B is Undef for the PHI in E. With this change, the reconstructed
PHI would be:
F1: v1 = phi (v:C), (undef:A)
F2: v2 = phi (v1:F1), (undef:B)
F3: r = phi (v2:F2), ...
Reviewed by: sameerds
Differential Revision: https://reviews.llvm.org/D132450
Added:
Modified:
llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
llvm/test/CodeGen/AMDGPU/multilevel-break.ll
llvm/test/CodeGen/AMDGPU/tuple-allocation-failure.ll
llvm/test/CodeGen/AMDGPU/while-break.ll
llvm/test/Transforms/StructurizeCFG/workarounds/needs-fr-ule.ll
llvm/test/Transforms/StructurizeCFG/workarounds/needs-unified-loop-exits.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp b/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
index 271c89700e7f..b8ad09e037e7 100644
--- a/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
+++ b/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
@@ -12,6 +12,7 @@
#include "llvm/ADT/SCCIterator.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/LegacyDivergenceAnalysis.h"
@@ -246,6 +247,7 @@ class StructurizeCFG {
SmallVector<RegionNode *, 8> Order;
BBSet Visited;
+ BBSet FlowSet;
SmallVector<WeakVH, 8> AffectedPhis;
BBPhiMap DeletedPhis;
@@ -278,6 +280,9 @@ class StructurizeCFG {
void addPhiValues(BasicBlock *From, BasicBlock *To);
+ void findUndefBlocks(BasicBlock *PHIBlock,
+ const SmallSet<BasicBlock *, 8> &Incomings,
+ SmallVector<BasicBlock *> &UndefBlks) const;
void setPhiValues();
void simplifyAffectedPhis();
@@ -632,6 +637,67 @@ void StructurizeCFG::addPhiValues(BasicBlock *From, BasicBlock *To) {
AddedPhis[To].push_back(From);
}
+/// When we are reconstructing a PHI inside \p PHIBlock with incoming values
+/// from predecessors \p Incomings, we have a chance to mark the available value
+/// from some blocks as undefined. The function will find out all such blocks
+/// and return in \p UndefBlks.
+void StructurizeCFG::findUndefBlocks(
+ BasicBlock *PHIBlock, const SmallSet<BasicBlock *, 8> &Incomings,
+ SmallVector<BasicBlock *> &UndefBlks) const {
+ // We may get a post-structured CFG like below:
+ //
+ // | P1
+ // |/
+ // F1
+ // |\
+ // | N
+ // |/
+ // F2
+ // |\
+ // | P2
+ // |/
+ // F3
+ // |\
+ // B
+ //
+ // B is the block that has a PHI being reconstructed. P1/P2 are predecessors
+ // of B before structurization. F1/F2/F3 are flow blocks inserted during
+ // structurization process. Block N is not a predecessor of B before
+ // structurization, but are placed between the predecessors(P1/P2) of B after
+ // structurization. This usually means that threads went to N never take the
+ // path N->F2->F3->B. For example, the threads take the branch F1->N may
+ // always take the branch F2->P2. So, when we are reconstructing a PHI
+ // originally in B, we can safely say the incoming value from N is undefined.
+ SmallSet<BasicBlock *, 8> VisitedBlock;
+ SmallVector<BasicBlock *, 8> Stack;
+ if (PHIBlock == ParentRegion->getExit()) {
+ for (auto P : predecessors(PHIBlock)) {
+ if (ParentRegion->contains(P))
+ Stack.push_back(P);
+ }
+ } else {
+ append_range(Stack, predecessors(PHIBlock));
+ }
+
+ // Do a backward traversal over the CFG, and stop further searching if
+ // the block is not a Flow. If a block is neither flow block nor the
+ // incoming predecessor, then the incoming value from the block is
+ // undefined value for the PHI being reconstructed.
+ while (!Stack.empty()) {
+ BasicBlock *Current = Stack.pop_back_val();
+ if (VisitedBlock.contains(Current))
+ continue;
+
+ VisitedBlock.insert(Current);
+ if (FlowSet.contains(Current)) {
+ for (auto P : predecessors(Current))
+ Stack.push_back(P);
+ } else if (!Incomings.contains(Current)) {
+ UndefBlks.push_back(Current);
+ }
+ }
+}
+
/// Add the real PHI value as soon as everything is set up
void StructurizeCFG::setPhiValues() {
SmallVector<PHINode *, 8> InsertedPhis;
@@ -643,6 +709,8 @@ void StructurizeCFG::setPhiValues() {
if (!DeletedPhis.count(To))
continue;
+ SmallVector<BasicBlock *> UndefBlks;
+ bool CachedUndefs = false;
PhiMap &Map = DeletedPhis[To];
for (const auto &PI : Map) {
PHINode *Phi = PI.first;
@@ -651,15 +719,30 @@ void StructurizeCFG::setPhiValues() {
Updater.AddAvailableValue(&Func->getEntryBlock(), Undef);
Updater.AddAvailableValue(To, Undef);
- NearestCommonDominator Dominator(DT);
- Dominator.addBlock(To);
+ SmallSet<BasicBlock *, 8> Incomings;
+ SmallVector<BasicBlock *> ConstantPreds;
for (const auto &VI : PI.second) {
+ Incomings.insert(VI.first);
Updater.AddAvailableValue(VI.first, VI.second);
- Dominator.addAndRememberBlock(VI.first);
+ if (isa<Constant>(VI.second))
+ ConstantPreds.push_back(VI.first);
}
- if (!Dominator.resultIsRememberedBlock())
- Updater.AddAvailableValue(Dominator.result(), Undef);
+ if (!CachedUndefs) {
+ findUndefBlocks(To, Incomings, UndefBlks);
+ CachedUndefs = true;
+ }
+
+ for (auto UB : UndefBlks) {
+ // If this undef block is dominated by any predecessor(before
+ // structurization) of reconstructed PHI with constant incoming value,
+ // don't mark the available value as undefined. Setting undef to such
+ // block will stop us from getting optimal phi insertion.
+ if (any_of(ConstantPreds,
+ [&](BasicBlock *CP) { return DT->dominates(CP, UB); }))
+ continue;
+ Updater.AddAvailableValue(UB, Undef);
+ }
for (BasicBlock *FI : From)
Phi->setIncomingValueForBlock(FI, Updater.GetValueAtEndOfBlock(FI));
@@ -759,6 +842,7 @@ BasicBlock *StructurizeCFG::getNextFlow(BasicBlock *Dominator) {
Order.back()->getEntry();
BasicBlock *Flow = BasicBlock::Create(Context, FlowBlockName,
Func, Insert);
+ FlowSet.insert(Flow);
DT->addNewBlock(Flow, Dominator);
ParentRegion->getRegionInfo()->setRegionFor(Flow, ParentRegion);
return Flow;
@@ -1103,6 +1187,7 @@ bool StructurizeCFG::run(Region *R, DominatorTree *DT) {
Loops.clear();
LoopPreds.clear();
LoopConds.clear();
+ FlowSet.clear();
return true;
}
diff --git a/llvm/test/CodeGen/AMDGPU/multilevel-break.ll b/llvm/test/CodeGen/AMDGPU/multilevel-break.ll
index 47ce05f1702c..d125f4304c91 100644
--- a/llvm/test/CodeGen/AMDGPU/multilevel-break.ll
+++ b/llvm/test/CodeGen/AMDGPU/multilevel-break.ll
@@ -9,34 +9,32 @@ define amdgpu_vs void @multi_else_break(<4 x float> %vec, i32 %ub, i32 %cont) {
; OPT-NEXT: main_body:
; OPT-NEXT: br label [[LOOP_OUTER:%.*]]
; OPT: LOOP.outer:
-; OPT-NEXT: [[PHI_BROKEN2:%.*]] = phi i64 [ [[TMP10:%.*]], [[FLOW1:%.*]] ], [ 0, [[MAIN_BODY:%.*]] ]
-; OPT-NEXT: [[TMP43:%.*]] = phi i32 [ 0, [[MAIN_BODY]] ], [ [[TMP4:%.*]], [[FLOW1]] ]
+; OPT-NEXT: [[PHI_BROKEN2:%.*]] = phi i64 [ [[TMP8:%.*]], [[FLOW1:%.*]] ], [ 0, [[MAIN_BODY:%.*]] ]
+; OPT-NEXT: [[TMP43:%.*]] = phi i32 [ 0, [[MAIN_BODY]] ], [ [[TMP3:%.*]], [[FLOW1]] ]
; OPT-NEXT: br label [[LOOP:%.*]]
; OPT: LOOP:
-; OPT-NEXT: [[PHI_BROKEN:%.*]] = phi i64 [ [[TMP8:%.*]], [[FLOW:%.*]] ], [ 0, [[LOOP_OUTER]] ]
-; OPT-NEXT: [[TMP0:%.*]] = phi i32 [ undef, [[LOOP_OUTER]] ], [ [[TMP4]], [[FLOW]] ]
-; OPT-NEXT: [[TMP45:%.*]] = phi i32 [ [[TMP43]], [[LOOP_OUTER]] ], [ [[TMP5:%.*]], [[FLOW]] ]
+; OPT-NEXT: [[PHI_BROKEN:%.*]] = phi i64 [ [[TMP6:%.*]], [[FLOW:%.*]] ], [ 0, [[LOOP_OUTER]] ]
+; OPT-NEXT: [[TMP45:%.*]] = phi i32 [ [[TMP43]], [[LOOP_OUTER]] ], [ [[TMP3]], [[FLOW]] ]
; OPT-NEXT: [[TMP48:%.*]] = icmp slt i32 [[TMP45]], [[UB:%.*]]
-; OPT-NEXT: [[TMP1:%.*]] = call { i1, i64 } @llvm.amdgcn.if.i64(i1 [[TMP48]])
-; OPT-NEXT: [[TMP2:%.*]] = extractvalue { i1, i64 } [[TMP1]], 0
-; OPT-NEXT: [[TMP3:%.*]] = extractvalue { i1, i64 } [[TMP1]], 1
-; OPT-NEXT: br i1 [[TMP2]], label [[ENDIF:%.*]], label [[FLOW]]
+; OPT-NEXT: [[TMP0:%.*]] = call { i1, i64 } @llvm.amdgcn.if.i64(i1 [[TMP48]])
+; OPT-NEXT: [[TMP1:%.*]] = extractvalue { i1, i64 } [[TMP0]], 0
+; OPT-NEXT: [[TMP2:%.*]] = extractvalue { i1, i64 } [[TMP0]], 1
+; OPT-NEXT: br i1 [[TMP1]], label [[ENDIF:%.*]], label [[FLOW]]
; OPT: Flow:
-; OPT-NEXT: [[TMP4]] = phi i32 [ [[TMP47:%.*]], [[ENDIF]] ], [ [[TMP0]], [[LOOP]] ]
-; OPT-NEXT: [[TMP5]] = phi i32 [ [[TMP47]], [[ENDIF]] ], [ undef, [[LOOP]] ]
-; OPT-NEXT: [[TMP6:%.*]] = phi i1 [ [[TMP51:%.*]], [[ENDIF]] ], [ true, [[LOOP]] ]
-; OPT-NEXT: [[TMP7:%.*]] = phi i1 [ [[TMP51_INV:%.*]], [[ENDIF]] ], [ true, [[LOOP]] ]
-; OPT-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP3]])
-; OPT-NEXT: [[TMP8]] = call i64 @llvm.amdgcn.if.break.i64(i1 [[TMP7]], i64 [[PHI_BROKEN]])
-; OPT-NEXT: [[TMP9:%.*]] = call i1 @llvm.amdgcn.loop.i64(i64 [[TMP8]])
-; OPT-NEXT: [[TMP10]] = call i64 @llvm.amdgcn.if.break.i64(i1 [[TMP6]], i64 [[PHI_BROKEN2]])
-; OPT-NEXT: br i1 [[TMP9]], label [[FLOW1]], label [[LOOP]]
+; OPT-NEXT: [[TMP3]] = phi i32 [ [[TMP47:%.*]], [[ENDIF]] ], [ undef, [[LOOP]] ]
+; OPT-NEXT: [[TMP4:%.*]] = phi i1 [ [[TMP51:%.*]], [[ENDIF]] ], [ true, [[LOOP]] ]
+; OPT-NEXT: [[TMP5:%.*]] = phi i1 [ [[TMP51_INV:%.*]], [[ENDIF]] ], [ true, [[LOOP]] ]
+; OPT-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP2]])
+; OPT-NEXT: [[TMP6]] = call i64 @llvm.amdgcn.if.break.i64(i1 [[TMP5]], i64 [[PHI_BROKEN]])
+; OPT-NEXT: [[TMP7:%.*]] = call i1 @llvm.amdgcn.loop.i64(i64 [[TMP6]])
+; OPT-NEXT: [[TMP8]] = call i64 @llvm.amdgcn.if.break.i64(i1 [[TMP4]], i64 [[PHI_BROKEN2]])
+; OPT-NEXT: br i1 [[TMP7]], label [[FLOW1]], label [[LOOP]]
; OPT: Flow1:
-; OPT-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP8]])
-; OPT-NEXT: [[TMP11:%.*]] = call i1 @llvm.amdgcn.loop.i64(i64 [[TMP10]])
-; OPT-NEXT: br i1 [[TMP11]], label [[IF:%.*]], label [[LOOP_OUTER]]
+; OPT-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP6]])
+; OPT-NEXT: [[TMP9:%.*]] = call i1 @llvm.amdgcn.loop.i64(i64 [[TMP8]])
+; OPT-NEXT: br i1 [[TMP9]], label [[IF:%.*]], label [[LOOP_OUTER]]
; OPT: IF:
-; OPT-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP10]])
+; OPT-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP8]])
; OPT-NEXT: ret void
; OPT: ENDIF:
; OPT-NEXT: [[TMP47]] = add i32 [[TMP45]], 1
@@ -156,7 +154,7 @@ define amdgpu_kernel void @multi_if_break_loop(i32 %arg) #0 {
; OPT-NEXT: [[CMP2]] = icmp sge i32 [[TMP]], [[LOAD2]]
; OPT-NEXT: br label [[FLOW3]]
; OPT: Flow5:
-; OPT-NEXT: [[TMP9]] = phi i32 [ [[LSR_IV_NEXT]], [[CASE0]] ], [ [[TMP6]], [[LEAFBLOCK]] ]
+; OPT-NEXT: [[TMP9]] = phi i32 [ [[LSR_IV_NEXT]], [[CASE0]] ], [ undef, [[LEAFBLOCK]] ]
; OPT-NEXT: [[TMP10]] = phi i1 [ [[CMP1]], [[CASE0]] ], [ [[TMP7]], [[LEAFBLOCK]] ]
; OPT-NEXT: br label [[FLOW4]]
; OPT: bb9:
diff --git a/llvm/test/CodeGen/AMDGPU/tuple-allocation-failure.ll b/llvm/test/CodeGen/AMDGPU/tuple-allocation-failure.ll
index 6c9fea15dfcc..20fdbec80b59 100644
--- a/llvm/test/CodeGen/AMDGPU/tuple-allocation-failure.ll
+++ b/llvm/test/CodeGen/AMDGPU/tuple-allocation-failure.ll
@@ -222,16 +222,16 @@ define amdgpu_kernel void @kernel(i32 addrspace(1)* %arg1.global, i1 %tmp3.i.i,
; GLOBALNESS1-NEXT: v_readlane_b32 s6, v41, 40
; GLOBALNESS1-NEXT: v_readlane_b32 s7, v41, 41
; GLOBALNESS1-NEXT: s_andn2_b64 vcc, exec, s[6:7]
-; GLOBALNESS1-NEXT: s_cbranch_vccz .LBB1_29
+; GLOBALNESS1-NEXT: s_cbranch_vccz .LBB1_28
; GLOBALNESS1-NEXT: .LBB1_2: ; %Flow6
; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_4 Depth=1
; GLOBALNESS1-NEXT: s_or_b64 exec, exec, s[4:5]
-; GLOBALNESS1-NEXT: s_mov_b64 s[6:7], 0
+; GLOBALNESS1-NEXT: s_mov_b64 s[8:9], 0
+; GLOBALNESS1-NEXT: ; implicit-def: $sgpr4_sgpr5
; GLOBALNESS1-NEXT: .LBB1_3: ; %Flow19
; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_4 Depth=1
; GLOBALNESS1-NEXT: v_accvgpr_write_b32 a63, v31
-; GLOBALNESS1-NEXT: v_readlane_b32 s4, v42, 10
-; GLOBALNESS1-NEXT: s_and_b64 vcc, exec, s[6:7]
+; GLOBALNESS1-NEXT: s_and_b64 vcc, exec, s[8:9]
; GLOBALNESS1-NEXT: v_accvgpr_write_b32 a62, v30
; GLOBALNESS1-NEXT: v_accvgpr_write_b32 a61, v29
; GLOBALNESS1-NEXT: v_accvgpr_write_b32 a60, v28
@@ -263,11 +263,10 @@ define amdgpu_kernel void @kernel(i32 addrspace(1)* %arg1.global, i1 %tmp3.i.i,
; GLOBALNESS1-NEXT: v_accvgpr_write_b32 a34, v2
; GLOBALNESS1-NEXT: v_accvgpr_write_b32 a33, v1
; GLOBALNESS1-NEXT: v_accvgpr_write_b32 a32, v0
-; GLOBALNESS1-NEXT: v_readlane_b32 s5, v42, 11
-; GLOBALNESS1-NEXT: s_cbranch_vccnz .LBB1_30
+; GLOBALNESS1-NEXT: s_cbranch_vccnz .LBB1_29
; GLOBALNESS1-NEXT: .LBB1_4: ; %bb5
; GLOBALNESS1-NEXT: ; =>This Loop Header: Depth=1
-; GLOBALNESS1-NEXT: ; Child Loop BB1_17 Depth 2
+; GLOBALNESS1-NEXT: ; Child Loop BB1_16 Depth 2
; GLOBALNESS1-NEXT: v_readlane_b32 s60, v41, 0
; GLOBALNESS1-NEXT: v_readlane_b32 s61, v41, 1
; GLOBALNESS1-NEXT: v_pk_mov_b32 v[0:1], s[60:61], s[60:61] op_sel:[0,1]
@@ -315,11 +314,10 @@ define amdgpu_kernel void @kernel(i32 addrspace(1)* %arg1.global, i1 %tmp3.i.i,
; GLOBALNESS1-NEXT: v_readlane_b32 s91, v41, 31
; GLOBALNESS1-NEXT: s_waitcnt lgkmcnt(0)
; GLOBALNESS1-NEXT: s_swappc_b64 s[30:31], s[52:53]
-; GLOBALNESS1-NEXT: ; implicit-def: $sgpr4_sgpr5
; GLOBALNESS1-NEXT: s_and_b64 vcc, exec, s[54:55]
-; GLOBALNESS1-NEXT: ; kill: killed $sgpr4_sgpr5
-; GLOBALNESS1-NEXT: s_mov_b64 s[4:5], -1
-; GLOBALNESS1-NEXT: s_cbranch_vccnz .LBB1_10
+; GLOBALNESS1-NEXT: s_mov_b64 s[6:7], -1
+; GLOBALNESS1-NEXT: ; implicit-def: $sgpr4_sgpr5
+; GLOBALNESS1-NEXT: s_cbranch_vccnz .LBB1_9
; GLOBALNESS1-NEXT: ; %bb.5: ; %NodeBlock
; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_4 Depth=1
; GLOBALNESS1-NEXT: s_cmp_lt_i32 s59, 1
@@ -327,23 +325,19 @@ define amdgpu_kernel void @kernel(i32 addrspace(1)* %arg1.global, i1 %tmp3.i.i,
; GLOBALNESS1-NEXT: ; %bb.6: ; %LeafBlock3
; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_4 Depth=1
; GLOBALNESS1-NEXT: s_cmp_lg_u32 s59, 1
-; GLOBALNESS1-NEXT: s_mov_b64 s[8:9], -1
-; GLOBALNESS1-NEXT: s_cselect_b64 s[4:5], -1, 0
+; GLOBALNESS1-NEXT: s_mov_b64 s[4:5], -1
+; GLOBALNESS1-NEXT: s_cselect_b64 s[6:7], -1, 0
; GLOBALNESS1-NEXT: s_cbranch_execz .LBB1_8
; GLOBALNESS1-NEXT: s_branch .LBB1_9
; GLOBALNESS1-NEXT: .LBB1_7: ; in Loop: Header=BB1_4 Depth=1
-; GLOBALNESS1-NEXT: s_mov_b64 s[4:5], 0
-; GLOBALNESS1-NEXT: ; implicit-def: $sgpr8_sgpr9
+; GLOBALNESS1-NEXT: s_mov_b64 s[6:7], 0
+; GLOBALNESS1-NEXT: ; implicit-def: $sgpr4_sgpr5
; GLOBALNESS1-NEXT: .LBB1_8: ; %LeafBlock
; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_4 Depth=1
; GLOBALNESS1-NEXT: s_cmp_lg_u32 s59, 0
-; GLOBALNESS1-NEXT: s_mov_b64 s[8:9], 0
-; GLOBALNESS1-NEXT: s_cselect_b64 s[4:5], -1, 0
-; GLOBALNESS1-NEXT: .LBB1_9: ; %Flow18
-; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_4 Depth=1
-; GLOBALNESS1-NEXT: v_writelane_b32 v42, s8, 10
-; GLOBALNESS1-NEXT: v_writelane_b32 v42, s9, 11
-; GLOBALNESS1-NEXT: .LBB1_10: ; %Flow16
+; GLOBALNESS1-NEXT: s_mov_b64 s[4:5], 0
+; GLOBALNESS1-NEXT: s_cselect_b64 s[6:7], -1, 0
+; GLOBALNESS1-NEXT: .LBB1_9: ; %Flow16
; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_4 Depth=1
; GLOBALNESS1-NEXT: v_readlane_b32 s68, v41, 0
; GLOBALNESS1-NEXT: v_readlane_b32 s69, v41, 1
@@ -411,8 +405,8 @@ define amdgpu_kernel void @kernel(i32 addrspace(1)* %arg1.global, i1 %tmp3.i.i,
; GLOBALNESS1-NEXT: s_mov_b32 s98, s57
; GLOBALNESS1-NEXT: s_mov_b32 s99, s57
; GLOBALNESS1-NEXT: v_pk_mov_b32 v[0:1], s[68:69], s[68:69] op_sel:[0,1]
-; GLOBALNESS1-NEXT: s_mov_b64 s[6:7], -1
-; GLOBALNESS1-NEXT: s_and_b64 vcc, exec, s[4:5]
+; GLOBALNESS1-NEXT: s_mov_b64 s[8:9], -1
+; GLOBALNESS1-NEXT: s_and_b64 vcc, exec, s[6:7]
; GLOBALNESS1-NEXT: v_pk_mov_b32 v[2:3], s[70:71], s[70:71] op_sel:[0,1]
; GLOBALNESS1-NEXT: v_pk_mov_b32 v[4:5], s[72:73], s[72:73] op_sel:[0,1]
; GLOBALNESS1-NEXT: v_pk_mov_b32 v[6:7], s[74:75], s[74:75] op_sel:[0,1]
@@ -429,7 +423,7 @@ define amdgpu_kernel void @kernel(i32 addrspace(1)* %arg1.global, i1 %tmp3.i.i,
; GLOBALNESS1-NEXT: v_pk_mov_b32 v[28:29], s[96:97], s[96:97] op_sel:[0,1]
; GLOBALNESS1-NEXT: v_pk_mov_b32 v[30:31], s[98:99], s[98:99] op_sel:[0,1]
; GLOBALNESS1-NEXT: s_cbranch_vccz .LBB1_3
-; GLOBALNESS1-NEXT: ; %bb.11: ; %baz.exit.i
+; GLOBALNESS1-NEXT: ; %bb.10: ; %baz.exit.i
; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_4 Depth=1
; GLOBALNESS1-NEXT: v_pk_mov_b32 v[0:1], 0, 0
; GLOBALNESS1-NEXT: flat_load_dword v0, v[0:1]
@@ -568,8 +562,8 @@ define amdgpu_kernel void @kernel(i32 addrspace(1)* %arg1.global, i1 %tmp3.i.i,
; GLOBALNESS1-NEXT: v_pk_mov_b32 v[28:29], s[88:89], s[88:89] op_sel:[0,1]
; GLOBALNESS1-NEXT: v_pk_mov_b32 v[30:31], s[90:91], s[90:91] op_sel:[0,1]
; GLOBALNESS1-NEXT: s_and_saveexec_b64 s[88:89], s[54:55]
-; GLOBALNESS1-NEXT: s_cbranch_execz .LBB1_26
-; GLOBALNESS1-NEXT: ; %bb.12: ; %bb33.i
+; GLOBALNESS1-NEXT: s_cbranch_execz .LBB1_25
+; GLOBALNESS1-NEXT: ; %bb.11: ; %bb33.i
; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_4 Depth=1
; GLOBALNESS1-NEXT: v_pk_mov_b32 v[2:3], 0, 0
; GLOBALNESS1-NEXT: global_load_dwordx2 v[0:1], v[2:3], off
@@ -577,12 +571,12 @@ define amdgpu_kernel void @kernel(i32 addrspace(1)* %arg1.global, i1 %tmp3.i.i,
; GLOBALNESS1-NEXT: v_readlane_b32 s5, v41, 37
; GLOBALNESS1-NEXT: s_mov_b32 s91, s59
; GLOBALNESS1-NEXT: s_andn2_b64 vcc, exec, s[4:5]
-; GLOBALNESS1-NEXT: s_cbranch_vccnz .LBB1_14
-; GLOBALNESS1-NEXT: ; %bb.13: ; %bb39.i
+; GLOBALNESS1-NEXT: s_cbranch_vccnz .LBB1_13
+; GLOBALNESS1-NEXT: ; %bb.12: ; %bb39.i
; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_4 Depth=1
; GLOBALNESS1-NEXT: v_mov_b32_e32 v45, v44
; GLOBALNESS1-NEXT: global_store_dwordx2 v[2:3], v[44:45], off
-; GLOBALNESS1-NEXT: .LBB1_14: ; %bb44.lr.ph.i
+; GLOBALNESS1-NEXT: .LBB1_13: ; %bb44.lr.ph.i
; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_4 Depth=1
; GLOBALNESS1-NEXT: v_cmp_ne_u32_e32 vcc, 0, v46
; GLOBALNESS1-NEXT: v_cndmask_b32_e32 v2, 0, v40, vcc
@@ -593,40 +587,40 @@ define amdgpu_kernel void @kernel(i32 addrspace(1)* %arg1.global, i1 %tmp3.i.i,
; GLOBALNESS1-NEXT: v_cmp_eq_u32_e64 s[58:59], 0, v2
; GLOBALNESS1-NEXT: v_readlane_b32 s63, v41, 33
; GLOBALNESS1-NEXT: v_readlane_b32 s65, v41, 35
-; GLOBALNESS1-NEXT: s_branch .LBB1_17
-; GLOBALNESS1-NEXT: .LBB1_15: ; %Flow7
-; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_17 Depth=2
+; GLOBALNESS1-NEXT: s_branch .LBB1_16
+; GLOBALNESS1-NEXT: .LBB1_14: ; %Flow7
+; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_16 Depth=2
; GLOBALNESS1-NEXT: s_or_b64 exec, exec, s[4:5]
-; GLOBALNESS1-NEXT: .LBB1_16: ; %bb63.i
-; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_17 Depth=2
+; GLOBALNESS1-NEXT: .LBB1_15: ; %bb63.i
+; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_16 Depth=2
; GLOBALNESS1-NEXT: s_andn2_b64 vcc, exec, s[50:51]
-; GLOBALNESS1-NEXT: s_cbranch_vccz .LBB1_25
-; GLOBALNESS1-NEXT: .LBB1_17: ; %bb44.i
+; GLOBALNESS1-NEXT: s_cbranch_vccz .LBB1_24
+; GLOBALNESS1-NEXT: .LBB1_16: ; %bb44.i
; GLOBALNESS1-NEXT: ; Parent Loop BB1_4 Depth=1
; GLOBALNESS1-NEXT: ; => This Inner Loop Header: Depth=2
; GLOBALNESS1-NEXT: s_andn2_b64 vcc, exec, s[100:101]
-; GLOBALNESS1-NEXT: s_cbranch_vccnz .LBB1_16
-; GLOBALNESS1-NEXT: ; %bb.18: ; %bb46.i
-; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_17 Depth=2
+; GLOBALNESS1-NEXT: s_cbranch_vccnz .LBB1_15
+; GLOBALNESS1-NEXT: ; %bb.17: ; %bb46.i
+; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_16 Depth=2
; GLOBALNESS1-NEXT: s_andn2_b64 vcc, exec, s[46:47]
-; GLOBALNESS1-NEXT: s_cbranch_vccnz .LBB1_16
-; GLOBALNESS1-NEXT: ; %bb.19: ; %bb50.i
-; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_17 Depth=2
+; GLOBALNESS1-NEXT: s_cbranch_vccnz .LBB1_15
+; GLOBALNESS1-NEXT: ; %bb.18: ; %bb50.i
+; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_16 Depth=2
; GLOBALNESS1-NEXT: s_andn2_b64 vcc, exec, s[62:63]
-; GLOBALNESS1-NEXT: s_cbranch_vccnz .LBB1_22
-; GLOBALNESS1-NEXT: ; %bb.20: ; %bb3.i.i
-; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_17 Depth=2
+; GLOBALNESS1-NEXT: s_cbranch_vccnz .LBB1_21
+; GLOBALNESS1-NEXT: ; %bb.19: ; %bb3.i.i
+; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_16 Depth=2
; GLOBALNESS1-NEXT: s_andn2_b64 vcc, exec, s[64:65]
-; GLOBALNESS1-NEXT: s_cbranch_vccnz .LBB1_22
-; GLOBALNESS1-NEXT: ; %bb.21: ; %bb6.i.i
-; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_17 Depth=2
+; GLOBALNESS1-NEXT: s_cbranch_vccnz .LBB1_21
+; GLOBALNESS1-NEXT: ; %bb.20: ; %bb6.i.i
+; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_16 Depth=2
; GLOBALNESS1-NEXT: s_andn2_b64 vcc, exec, s[56:57]
-; GLOBALNESS1-NEXT: .LBB1_22: ; %spam.exit.i
-; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_17 Depth=2
+; GLOBALNESS1-NEXT: .LBB1_21: ; %spam.exit.i
+; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_16 Depth=2
; GLOBALNESS1-NEXT: s_andn2_b64 vcc, exec, s[48:49]
-; GLOBALNESS1-NEXT: s_cbranch_vccnz .LBB1_16
-; GLOBALNESS1-NEXT: ; %bb.23: ; %bb55.i
-; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_17 Depth=2
+; GLOBALNESS1-NEXT: s_cbranch_vccnz .LBB1_15
+; GLOBALNESS1-NEXT: ; %bb.22: ; %bb55.i
+; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_16 Depth=2
; GLOBALNESS1-NEXT: s_add_u32 s60, s38, 40
; GLOBALNESS1-NEXT: s_addc_u32 s61, s39, 0
; GLOBALNESS1-NEXT: s_mov_b64 s[4:5], s[40:41]
@@ -650,14 +644,14 @@ define amdgpu_kernel void @kernel(i32 addrspace(1)* %arg1.global, i1 %tmp3.i.i,
; GLOBALNESS1-NEXT: global_store_dwordx2 v[0:1], a[32:33], off
; GLOBALNESS1-NEXT: s_swappc_b64 s[30:31], s[52:53]
; GLOBALNESS1-NEXT: s_and_saveexec_b64 s[4:5], s[58:59]
-; GLOBALNESS1-NEXT: s_cbranch_execz .LBB1_15
-; GLOBALNESS1-NEXT: ; %bb.24: ; %bb62.i
-; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_17 Depth=2
+; GLOBALNESS1-NEXT: s_cbranch_execz .LBB1_14
+; GLOBALNESS1-NEXT: ; %bb.23: ; %bb62.i
+; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_16 Depth=2
; GLOBALNESS1-NEXT: v_mov_b32_e32 v45, v44
; GLOBALNESS1-NEXT: v_pk_mov_b32 v[0:1], 0, 0
; GLOBALNESS1-NEXT: global_store_dwordx2 v[0:1], v[44:45], off
-; GLOBALNESS1-NEXT: s_branch .LBB1_15
-; GLOBALNESS1-NEXT: .LBB1_25: ; %Flow14
+; GLOBALNESS1-NEXT: s_branch .LBB1_14
+; GLOBALNESS1-NEXT: .LBB1_24: ; %Flow14
; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_4 Depth=1
; GLOBALNESS1-NEXT: v_readlane_b32 s56, v41, 0
; GLOBALNESS1-NEXT: v_readlane_b32 s57, v41, 1
@@ -726,35 +720,35 @@ define amdgpu_kernel void @kernel(i32 addrspace(1)* %arg1.global, i1 %tmp3.i.i,
; GLOBALNESS1-NEXT: v_readlane_b32 s65, v41, 9
; GLOBALNESS1-NEXT: v_readlane_b32 s66, v41, 10
; GLOBALNESS1-NEXT: v_readlane_b32 s67, v41, 11
-; GLOBALNESS1-NEXT: .LBB1_26: ; %Flow15
+; GLOBALNESS1-NEXT: .LBB1_25: ; %Flow15
; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_4 Depth=1
; GLOBALNESS1-NEXT: s_or_b64 exec, exec, s[88:89]
; GLOBALNESS1-NEXT: s_and_saveexec_b64 s[4:5], s[54:55]
; GLOBALNESS1-NEXT: s_mov_b64 s[54:55], s[92:93]
; GLOBALNESS1-NEXT: s_cbranch_execz .LBB1_2
-; GLOBALNESS1-NEXT: ; %bb.27: ; %bb67.i
+; GLOBALNESS1-NEXT: ; %bb.26: ; %bb67.i
; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_4 Depth=1
; GLOBALNESS1-NEXT: v_readlane_b32 s6, v41, 38
; GLOBALNESS1-NEXT: v_readlane_b32 s7, v41, 39
; GLOBALNESS1-NEXT: s_andn2_b64 vcc, exec, s[6:7]
; GLOBALNESS1-NEXT: s_cbranch_vccnz .LBB1_1
-; GLOBALNESS1-NEXT: ; %bb.28: ; %bb69.i
+; GLOBALNESS1-NEXT: ; %bb.27: ; %bb69.i
; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_4 Depth=1
; GLOBALNESS1-NEXT: v_mov_b32_e32 v45, v44
; GLOBALNESS1-NEXT: v_pk_mov_b32 v[32:33], 0, 0
; GLOBALNESS1-NEXT: global_store_dwordx2 v[32:33], v[44:45], off
; GLOBALNESS1-NEXT: s_branch .LBB1_1
-; GLOBALNESS1-NEXT: .LBB1_29: ; %bb73.i
+; GLOBALNESS1-NEXT: .LBB1_28: ; %bb73.i
; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_4 Depth=1
; GLOBALNESS1-NEXT: v_mov_b32_e32 v45, v44
; GLOBALNESS1-NEXT: v_pk_mov_b32 v[32:33], 0, 0
; GLOBALNESS1-NEXT: global_store_dwordx2 v[32:33], v[44:45], off
; GLOBALNESS1-NEXT: s_branch .LBB1_2
-; GLOBALNESS1-NEXT: .LBB1_30: ; %loop.exit.guard
+; GLOBALNESS1-NEXT: .LBB1_29: ; %loop.exit.guard
; GLOBALNESS1-NEXT: s_andn2_b64 vcc, exec, s[4:5]
; GLOBALNESS1-NEXT: s_mov_b64 s[4:5], -1
-; GLOBALNESS1-NEXT: s_cbranch_vccz .LBB1_32
-; GLOBALNESS1-NEXT: ; %bb.31: ; %bb7.i.i
+; GLOBALNESS1-NEXT: s_cbranch_vccz .LBB1_31
+; GLOBALNESS1-NEXT: ; %bb.30: ; %bb7.i.i
; GLOBALNESS1-NEXT: s_add_u32 s8, s38, 40
; GLOBALNESS1-NEXT: s_addc_u32 s9, s39, 0
; GLOBALNESS1-NEXT: s_mov_b64 s[4:5], s[40:41]
@@ -769,10 +763,10 @@ define amdgpu_kernel void @kernel(i32 addrspace(1)* %arg1.global, i1 %tmp3.i.i,
; GLOBALNESS1-NEXT: s_addc_u32 s17, s17, widget at rel32@hi+12
; GLOBALNESS1-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GLOBALNESS1-NEXT: s_mov_b64 s[4:5], 0
-; GLOBALNESS1-NEXT: .LBB1_32: ; %Flow
+; GLOBALNESS1-NEXT: .LBB1_31: ; %Flow
; GLOBALNESS1-NEXT: s_andn2_b64 vcc, exec, s[4:5]
-; GLOBALNESS1-NEXT: s_cbranch_vccnz .LBB1_34
-; GLOBALNESS1-NEXT: ; %bb.33: ; %bb11.i.i
+; GLOBALNESS1-NEXT: s_cbranch_vccnz .LBB1_33
+; GLOBALNESS1-NEXT: ; %bb.32: ; %bb11.i.i
; GLOBALNESS1-NEXT: s_add_u32 s8, s38, 40
; GLOBALNESS1-NEXT: s_addc_u32 s9, s39, 0
; GLOBALNESS1-NEXT: s_mov_b64 s[4:5], s[40:41]
@@ -786,7 +780,7 @@ define amdgpu_kernel void @kernel(i32 addrspace(1)* %arg1.global, i1 %tmp3.i.i,
; GLOBALNESS1-NEXT: s_add_u32 s16, s16, widget at rel32@lo+4
; GLOBALNESS1-NEXT: s_addc_u32 s17, s17, widget at rel32@hi+12
; GLOBALNESS1-NEXT: s_swappc_b64 s[30:31], s[16:17]
-; GLOBALNESS1-NEXT: .LBB1_34: ; %UnifiedUnreachableBlock
+; GLOBALNESS1-NEXT: .LBB1_33: ; %UnifiedUnreachableBlock
;
; GLOBALNESS0-LABEL: kernel:
; GLOBALNESS0: ; %bb.0: ; %bb
@@ -982,16 +976,16 @@ define amdgpu_kernel void @kernel(i32 addrspace(1)* %arg1.global, i1 %tmp3.i.i,
; GLOBALNESS0-NEXT: v_readlane_b32 s6, v41, 40
; GLOBALNESS0-NEXT: v_readlane_b32 s7, v41, 41
; GLOBALNESS0-NEXT: s_andn2_b64 vcc, exec, s[6:7]
-; GLOBALNESS0-NEXT: s_cbranch_vccz .LBB1_29
+; GLOBALNESS0-NEXT: s_cbranch_vccz .LBB1_28
; GLOBALNESS0-NEXT: .LBB1_2: ; %Flow6
; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_4 Depth=1
; GLOBALNESS0-NEXT: s_or_b64 exec, exec, s[4:5]
-; GLOBALNESS0-NEXT: s_mov_b64 s[6:7], 0
+; GLOBALNESS0-NEXT: s_mov_b64 s[8:9], 0
+; GLOBALNESS0-NEXT: ; implicit-def: $sgpr4_sgpr5
; GLOBALNESS0-NEXT: .LBB1_3: ; %Flow19
; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_4 Depth=1
; GLOBALNESS0-NEXT: v_accvgpr_write_b32 a63, v31
-; GLOBALNESS0-NEXT: v_readlane_b32 s4, v42, 10
-; GLOBALNESS0-NEXT: s_and_b64 vcc, exec, s[6:7]
+; GLOBALNESS0-NEXT: s_and_b64 vcc, exec, s[8:9]
; GLOBALNESS0-NEXT: v_accvgpr_write_b32 a62, v30
; GLOBALNESS0-NEXT: v_accvgpr_write_b32 a61, v29
; GLOBALNESS0-NEXT: v_accvgpr_write_b32 a60, v28
@@ -1023,11 +1017,10 @@ define amdgpu_kernel void @kernel(i32 addrspace(1)* %arg1.global, i1 %tmp3.i.i,
; GLOBALNESS0-NEXT: v_accvgpr_write_b32 a34, v2
; GLOBALNESS0-NEXT: v_accvgpr_write_b32 a33, v1
; GLOBALNESS0-NEXT: v_accvgpr_write_b32 a32, v0
-; GLOBALNESS0-NEXT: v_readlane_b32 s5, v42, 11
-; GLOBALNESS0-NEXT: s_cbranch_vccnz .LBB1_30
+; GLOBALNESS0-NEXT: s_cbranch_vccnz .LBB1_29
; GLOBALNESS0-NEXT: .LBB1_4: ; %bb5
; GLOBALNESS0-NEXT: ; =>This Loop Header: Depth=1
-; GLOBALNESS0-NEXT: ; Child Loop BB1_17 Depth 2
+; GLOBALNESS0-NEXT: ; Child Loop BB1_16 Depth 2
; GLOBALNESS0-NEXT: v_readlane_b32 s60, v41, 0
; GLOBALNESS0-NEXT: v_readlane_b32 s61, v41, 1
; GLOBALNESS0-NEXT: v_pk_mov_b32 v[0:1], s[60:61], s[60:61] op_sel:[0,1]
@@ -1075,11 +1068,10 @@ define amdgpu_kernel void @kernel(i32 addrspace(1)* %arg1.global, i1 %tmp3.i.i,
; GLOBALNESS0-NEXT: v_readlane_b32 s91, v41, 31
; GLOBALNESS0-NEXT: s_waitcnt lgkmcnt(0)
; GLOBALNESS0-NEXT: s_swappc_b64 s[30:31], s[100:101]
-; GLOBALNESS0-NEXT: ; implicit-def: $sgpr4_sgpr5
; GLOBALNESS0-NEXT: s_and_b64 vcc, exec, s[54:55]
-; GLOBALNESS0-NEXT: ; kill: killed $sgpr4_sgpr5
-; GLOBALNESS0-NEXT: s_mov_b64 s[4:5], -1
-; GLOBALNESS0-NEXT: s_cbranch_vccnz .LBB1_10
+; GLOBALNESS0-NEXT: s_mov_b64 s[6:7], -1
+; GLOBALNESS0-NEXT: ; implicit-def: $sgpr4_sgpr5
+; GLOBALNESS0-NEXT: s_cbranch_vccnz .LBB1_9
; GLOBALNESS0-NEXT: ; %bb.5: ; %NodeBlock
; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_4 Depth=1
; GLOBALNESS0-NEXT: s_cmp_lt_i32 s59, 1
@@ -1087,23 +1079,19 @@ define amdgpu_kernel void @kernel(i32 addrspace(1)* %arg1.global, i1 %tmp3.i.i,
; GLOBALNESS0-NEXT: ; %bb.6: ; %LeafBlock3
; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_4 Depth=1
; GLOBALNESS0-NEXT: s_cmp_lg_u32 s59, 1
-; GLOBALNESS0-NEXT: s_mov_b64 s[8:9], -1
-; GLOBALNESS0-NEXT: s_cselect_b64 s[4:5], -1, 0
+; GLOBALNESS0-NEXT: s_mov_b64 s[4:5], -1
+; GLOBALNESS0-NEXT: s_cselect_b64 s[6:7], -1, 0
; GLOBALNESS0-NEXT: s_cbranch_execz .LBB1_8
; GLOBALNESS0-NEXT: s_branch .LBB1_9
; GLOBALNESS0-NEXT: .LBB1_7: ; in Loop: Header=BB1_4 Depth=1
-; GLOBALNESS0-NEXT: s_mov_b64 s[4:5], 0
-; GLOBALNESS0-NEXT: ; implicit-def: $sgpr8_sgpr9
+; GLOBALNESS0-NEXT: s_mov_b64 s[6:7], 0
+; GLOBALNESS0-NEXT: ; implicit-def: $sgpr4_sgpr5
; GLOBALNESS0-NEXT: .LBB1_8: ; %LeafBlock
; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_4 Depth=1
; GLOBALNESS0-NEXT: s_cmp_lg_u32 s59, 0
-; GLOBALNESS0-NEXT: s_mov_b64 s[8:9], 0
-; GLOBALNESS0-NEXT: s_cselect_b64 s[4:5], -1, 0
-; GLOBALNESS0-NEXT: .LBB1_9: ; %Flow18
-; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_4 Depth=1
-; GLOBALNESS0-NEXT: v_writelane_b32 v42, s8, 10
-; GLOBALNESS0-NEXT: v_writelane_b32 v42, s9, 11
-; GLOBALNESS0-NEXT: .LBB1_10: ; %Flow16
+; GLOBALNESS0-NEXT: s_mov_b64 s[4:5], 0
+; GLOBALNESS0-NEXT: s_cselect_b64 s[6:7], -1, 0
+; GLOBALNESS0-NEXT: .LBB1_9: ; %Flow16
; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_4 Depth=1
; GLOBALNESS0-NEXT: v_readlane_b32 s64, v41, 0
; GLOBALNESS0-NEXT: v_readlane_b32 s65, v41, 1
@@ -1169,8 +1157,8 @@ define amdgpu_kernel void @kernel(i32 addrspace(1)* %arg1.global, i1 %tmp3.i.i,
; GLOBALNESS0-NEXT: s_mov_b32 s98, s57
; GLOBALNESS0-NEXT: s_mov_b32 s99, s57
; GLOBALNESS0-NEXT: v_pk_mov_b32 v[0:1], s[68:69], s[68:69] op_sel:[0,1]
-; GLOBALNESS0-NEXT: s_mov_b64 s[6:7], -1
-; GLOBALNESS0-NEXT: s_and_b64 vcc, exec, s[4:5]
+; GLOBALNESS0-NEXT: s_mov_b64 s[8:9], -1
+; GLOBALNESS0-NEXT: s_and_b64 vcc, exec, s[6:7]
; GLOBALNESS0-NEXT: v_pk_mov_b32 v[2:3], s[70:71], s[70:71] op_sel:[0,1]
; GLOBALNESS0-NEXT: v_pk_mov_b32 v[4:5], s[72:73], s[72:73] op_sel:[0,1]
; GLOBALNESS0-NEXT: v_pk_mov_b32 v[6:7], s[74:75], s[74:75] op_sel:[0,1]
@@ -1189,7 +1177,7 @@ define amdgpu_kernel void @kernel(i32 addrspace(1)* %arg1.global, i1 %tmp3.i.i,
; GLOBALNESS0-NEXT: v_readlane_b32 s66, v41, 2
; GLOBALNESS0-NEXT: v_readlane_b32 s67, v41, 3
; GLOBALNESS0-NEXT: s_cbranch_vccz .LBB1_3
-; GLOBALNESS0-NEXT: ; %bb.11: ; %baz.exit.i
+; GLOBALNESS0-NEXT: ; %bb.10: ; %baz.exit.i
; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_4 Depth=1
; GLOBALNESS0-NEXT: v_pk_mov_b32 v[0:1], 0, 0
; GLOBALNESS0-NEXT: flat_load_dword v0, v[0:1]
@@ -1328,8 +1316,8 @@ define amdgpu_kernel void @kernel(i32 addrspace(1)* %arg1.global, i1 %tmp3.i.i,
; GLOBALNESS0-NEXT: v_pk_mov_b32 v[28:29], s[88:89], s[88:89] op_sel:[0,1]
; GLOBALNESS0-NEXT: v_pk_mov_b32 v[30:31], s[90:91], s[90:91] op_sel:[0,1]
; GLOBALNESS0-NEXT: s_and_saveexec_b64 s[88:89], s[54:55]
-; GLOBALNESS0-NEXT: s_cbranch_execz .LBB1_26
-; GLOBALNESS0-NEXT: ; %bb.12: ; %bb33.i
+; GLOBALNESS0-NEXT: s_cbranch_execz .LBB1_25
+; GLOBALNESS0-NEXT: ; %bb.11: ; %bb33.i
; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_4 Depth=1
; GLOBALNESS0-NEXT: v_pk_mov_b32 v[2:3], 0, 0
; GLOBALNESS0-NEXT: global_load_dwordx2 v[0:1], v[2:3], off
@@ -1337,12 +1325,12 @@ define amdgpu_kernel void @kernel(i32 addrspace(1)* %arg1.global, i1 %tmp3.i.i,
; GLOBALNESS0-NEXT: v_readlane_b32 s5, v41, 37
; GLOBALNESS0-NEXT: s_mov_b32 s91, s59
; GLOBALNESS0-NEXT: s_andn2_b64 vcc, exec, s[4:5]
-; GLOBALNESS0-NEXT: s_cbranch_vccnz .LBB1_14
-; GLOBALNESS0-NEXT: ; %bb.13: ; %bb39.i
+; GLOBALNESS0-NEXT: s_cbranch_vccnz .LBB1_13
+; GLOBALNESS0-NEXT: ; %bb.12: ; %bb39.i
; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_4 Depth=1
; GLOBALNESS0-NEXT: v_mov_b32_e32 v45, v44
; GLOBALNESS0-NEXT: global_store_dwordx2 v[2:3], v[44:45], off
-; GLOBALNESS0-NEXT: .LBB1_14: ; %bb44.lr.ph.i
+; GLOBALNESS0-NEXT: .LBB1_13: ; %bb44.lr.ph.i
; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_4 Depth=1
; GLOBALNESS0-NEXT: v_cmp_ne_u32_e32 vcc, 0, v46
; GLOBALNESS0-NEXT: v_cndmask_b32_e32 v2, 0, v40, vcc
@@ -1353,40 +1341,40 @@ define amdgpu_kernel void @kernel(i32 addrspace(1)* %arg1.global, i1 %tmp3.i.i,
; GLOBALNESS0-NEXT: v_cmp_eq_u32_e64 s[58:59], 0, v2
; GLOBALNESS0-NEXT: v_readlane_b32 s61, v41, 35
; GLOBALNESS0-NEXT: v_readlane_b32 s63, v41, 33
-; GLOBALNESS0-NEXT: s_branch .LBB1_17
-; GLOBALNESS0-NEXT: .LBB1_15: ; %Flow7
-; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_17 Depth=2
+; GLOBALNESS0-NEXT: s_branch .LBB1_16
+; GLOBALNESS0-NEXT: .LBB1_14: ; %Flow7
+; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_16 Depth=2
; GLOBALNESS0-NEXT: s_or_b64 exec, exec, s[4:5]
-; GLOBALNESS0-NEXT: .LBB1_16: ; %bb63.i
-; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_17 Depth=2
+; GLOBALNESS0-NEXT: .LBB1_15: ; %bb63.i
+; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_16 Depth=2
; GLOBALNESS0-NEXT: s_andn2_b64 vcc, exec, s[52:53]
-; GLOBALNESS0-NEXT: s_cbranch_vccz .LBB1_25
-; GLOBALNESS0-NEXT: .LBB1_17: ; %bb44.i
+; GLOBALNESS0-NEXT: s_cbranch_vccz .LBB1_24
+; GLOBALNESS0-NEXT: .LBB1_16: ; %bb44.i
; GLOBALNESS0-NEXT: ; Parent Loop BB1_4 Depth=1
; GLOBALNESS0-NEXT: ; => This Inner Loop Header: Depth=2
; GLOBALNESS0-NEXT: s_andn2_b64 vcc, exec, s[46:47]
-; GLOBALNESS0-NEXT: s_cbranch_vccnz .LBB1_16
-; GLOBALNESS0-NEXT: ; %bb.18: ; %bb46.i
-; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_17 Depth=2
+; GLOBALNESS0-NEXT: s_cbranch_vccnz .LBB1_15
+; GLOBALNESS0-NEXT: ; %bb.17: ; %bb46.i
+; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_16 Depth=2
; GLOBALNESS0-NEXT: s_andn2_b64 vcc, exec, s[50:51]
-; GLOBALNESS0-NEXT: s_cbranch_vccnz .LBB1_16
-; GLOBALNESS0-NEXT: ; %bb.19: ; %bb50.i
-; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_17 Depth=2
+; GLOBALNESS0-NEXT: s_cbranch_vccnz .LBB1_15
+; GLOBALNESS0-NEXT: ; %bb.18: ; %bb50.i
+; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_16 Depth=2
; GLOBALNESS0-NEXT: s_andn2_b64 vcc, exec, s[62:63]
-; GLOBALNESS0-NEXT: s_cbranch_vccnz .LBB1_22
-; GLOBALNESS0-NEXT: ; %bb.20: ; %bb3.i.i
-; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_17 Depth=2
+; GLOBALNESS0-NEXT: s_cbranch_vccnz .LBB1_21
+; GLOBALNESS0-NEXT: ; %bb.19: ; %bb3.i.i
+; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_16 Depth=2
; GLOBALNESS0-NEXT: s_andn2_b64 vcc, exec, s[60:61]
-; GLOBALNESS0-NEXT: s_cbranch_vccnz .LBB1_22
-; GLOBALNESS0-NEXT: ; %bb.21: ; %bb6.i.i
-; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_17 Depth=2
+; GLOBALNESS0-NEXT: s_cbranch_vccnz .LBB1_21
+; GLOBALNESS0-NEXT: ; %bb.20: ; %bb6.i.i
+; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_16 Depth=2
; GLOBALNESS0-NEXT: s_andn2_b64 vcc, exec, s[56:57]
-; GLOBALNESS0-NEXT: .LBB1_22: ; %spam.exit.i
-; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_17 Depth=2
+; GLOBALNESS0-NEXT: .LBB1_21: ; %spam.exit.i
+; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_16 Depth=2
; GLOBALNESS0-NEXT: s_andn2_b64 vcc, exec, s[48:49]
-; GLOBALNESS0-NEXT: s_cbranch_vccnz .LBB1_16
-; GLOBALNESS0-NEXT: ; %bb.23: ; %bb55.i
-; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_17 Depth=2
+; GLOBALNESS0-NEXT: s_cbranch_vccnz .LBB1_15
+; GLOBALNESS0-NEXT: ; %bb.22: ; %bb55.i
+; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_16 Depth=2
; GLOBALNESS0-NEXT: s_add_u32 s64, s38, 40
; GLOBALNESS0-NEXT: s_addc_u32 s65, s39, 0
; GLOBALNESS0-NEXT: s_mov_b64 s[4:5], s[40:41]
@@ -1410,14 +1398,14 @@ define amdgpu_kernel void @kernel(i32 addrspace(1)* %arg1.global, i1 %tmp3.i.i,
; GLOBALNESS0-NEXT: global_store_dwordx2 v[0:1], a[32:33], off
; GLOBALNESS0-NEXT: s_swappc_b64 s[30:31], s[100:101]
; GLOBALNESS0-NEXT: s_and_saveexec_b64 s[4:5], s[58:59]
-; GLOBALNESS0-NEXT: s_cbranch_execz .LBB1_15
-; GLOBALNESS0-NEXT: ; %bb.24: ; %bb62.i
-; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_17 Depth=2
+; GLOBALNESS0-NEXT: s_cbranch_execz .LBB1_14
+; GLOBALNESS0-NEXT: ; %bb.23: ; %bb62.i
+; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_16 Depth=2
; GLOBALNESS0-NEXT: v_mov_b32_e32 v45, v44
; GLOBALNESS0-NEXT: v_pk_mov_b32 v[0:1], 0, 0
; GLOBALNESS0-NEXT: global_store_dwordx2 v[0:1], v[44:45], off
-; GLOBALNESS0-NEXT: s_branch .LBB1_15
-; GLOBALNESS0-NEXT: .LBB1_25: ; %Flow14
+; GLOBALNESS0-NEXT: s_branch .LBB1_14
+; GLOBALNESS0-NEXT: .LBB1_24: ; %Flow14
; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_4 Depth=1
; GLOBALNESS0-NEXT: v_readlane_b32 s56, v41, 0
; GLOBALNESS0-NEXT: v_readlane_b32 s57, v41, 1
@@ -1486,35 +1474,35 @@ define amdgpu_kernel void @kernel(i32 addrspace(1)* %arg1.global, i1 %tmp3.i.i,
; GLOBALNESS0-NEXT: v_readlane_b32 s61, v41, 5
; GLOBALNESS0-NEXT: v_readlane_b32 s62, v41, 6
; GLOBALNESS0-NEXT: v_readlane_b32 s63, v41, 7
-; GLOBALNESS0-NEXT: .LBB1_26: ; %Flow15
+; GLOBALNESS0-NEXT: .LBB1_25: ; %Flow15
; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_4 Depth=1
; GLOBALNESS0-NEXT: s_or_b64 exec, exec, s[88:89]
; GLOBALNESS0-NEXT: s_and_saveexec_b64 s[4:5], s[54:55]
; GLOBALNESS0-NEXT: s_mov_b64 s[54:55], s[92:93]
; GLOBALNESS0-NEXT: s_cbranch_execz .LBB1_2
-; GLOBALNESS0-NEXT: ; %bb.27: ; %bb67.i
+; GLOBALNESS0-NEXT: ; %bb.26: ; %bb67.i
; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_4 Depth=1
; GLOBALNESS0-NEXT: v_readlane_b32 s6, v41, 38
; GLOBALNESS0-NEXT: v_readlane_b32 s7, v41, 39
; GLOBALNESS0-NEXT: s_andn2_b64 vcc, exec, s[6:7]
; GLOBALNESS0-NEXT: s_cbranch_vccnz .LBB1_1
-; GLOBALNESS0-NEXT: ; %bb.28: ; %bb69.i
+; GLOBALNESS0-NEXT: ; %bb.27: ; %bb69.i
; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_4 Depth=1
; GLOBALNESS0-NEXT: v_mov_b32_e32 v45, v44
; GLOBALNESS0-NEXT: v_pk_mov_b32 v[32:33], 0, 0
; GLOBALNESS0-NEXT: global_store_dwordx2 v[32:33], v[44:45], off
; GLOBALNESS0-NEXT: s_branch .LBB1_1
-; GLOBALNESS0-NEXT: .LBB1_29: ; %bb73.i
+; GLOBALNESS0-NEXT: .LBB1_28: ; %bb73.i
; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_4 Depth=1
; GLOBALNESS0-NEXT: v_mov_b32_e32 v45, v44
; GLOBALNESS0-NEXT: v_pk_mov_b32 v[32:33], 0, 0
; GLOBALNESS0-NEXT: global_store_dwordx2 v[32:33], v[44:45], off
; GLOBALNESS0-NEXT: s_branch .LBB1_2
-; GLOBALNESS0-NEXT: .LBB1_30: ; %loop.exit.guard
+; GLOBALNESS0-NEXT: .LBB1_29: ; %loop.exit.guard
; GLOBALNESS0-NEXT: s_andn2_b64 vcc, exec, s[4:5]
; GLOBALNESS0-NEXT: s_mov_b64 s[4:5], -1
-; GLOBALNESS0-NEXT: s_cbranch_vccz .LBB1_32
-; GLOBALNESS0-NEXT: ; %bb.31: ; %bb7.i.i
+; GLOBALNESS0-NEXT: s_cbranch_vccz .LBB1_31
+; GLOBALNESS0-NEXT: ; %bb.30: ; %bb7.i.i
; GLOBALNESS0-NEXT: s_add_u32 s8, s38, 40
; GLOBALNESS0-NEXT: s_addc_u32 s9, s39, 0
; GLOBALNESS0-NEXT: s_mov_b64 s[4:5], s[40:41]
@@ -1529,10 +1517,10 @@ define amdgpu_kernel void @kernel(i32 addrspace(1)* %arg1.global, i1 %tmp3.i.i,
; GLOBALNESS0-NEXT: s_addc_u32 s17, s17, widget at rel32@hi+12
; GLOBALNESS0-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GLOBALNESS0-NEXT: s_mov_b64 s[4:5], 0
-; GLOBALNESS0-NEXT: .LBB1_32: ; %Flow
+; GLOBALNESS0-NEXT: .LBB1_31: ; %Flow
; GLOBALNESS0-NEXT: s_andn2_b64 vcc, exec, s[4:5]
-; GLOBALNESS0-NEXT: s_cbranch_vccnz .LBB1_34
-; GLOBALNESS0-NEXT: ; %bb.33: ; %bb11.i.i
+; GLOBALNESS0-NEXT: s_cbranch_vccnz .LBB1_33
+; GLOBALNESS0-NEXT: ; %bb.32: ; %bb11.i.i
; GLOBALNESS0-NEXT: s_add_u32 s8, s38, 40
; GLOBALNESS0-NEXT: s_addc_u32 s9, s39, 0
; GLOBALNESS0-NEXT: s_mov_b64 s[4:5], s[40:41]
@@ -1546,7 +1534,7 @@ define amdgpu_kernel void @kernel(i32 addrspace(1)* %arg1.global, i1 %tmp3.i.i,
; GLOBALNESS0-NEXT: s_add_u32 s16, s16, widget at rel32@lo+4
; GLOBALNESS0-NEXT: s_addc_u32 s17, s17, widget at rel32@hi+12
; GLOBALNESS0-NEXT: s_swappc_b64 s[30:31], s[16:17]
-; GLOBALNESS0-NEXT: .LBB1_34: ; %UnifiedUnreachableBlock
+; GLOBALNESS0-NEXT: .LBB1_33: ; %UnifiedUnreachableBlock
bb:
store i32 0, i32 addrspace(1)* null, align 4
%tmp4 = load i32, i32 addrspace(1)* %arg1.global, align 4
diff --git a/llvm/test/CodeGen/AMDGPU/while-break.ll b/llvm/test/CodeGen/AMDGPU/while-break.ll
index 783cb157834b..76efff1152f7 100644
--- a/llvm/test/CodeGen/AMDGPU/while-break.ll
+++ b/llvm/test/CodeGen/AMDGPU/while-break.ll
@@ -10,7 +10,6 @@ define amdgpu_ps float @while_break(i32 %z, float %v, i32 %x, i32 %y) #0 {
; GCN-NEXT: .LBB0_1: ; %Flow2
; GCN-NEXT: ; in Loop: Header=BB0_2 Depth=1
; GCN-NEXT: s_or_b32 exec_lo, exec_lo, s4
-; GCN-NEXT: v_mov_b32_e32 v1, v5
; GCN-NEXT: s_and_b32 s2, exec_lo, s3
; GCN-NEXT: s_or_b32 s0, s2, s0
; GCN-NEXT: s_andn2_b32 exec_lo, exec_lo, s0
@@ -20,22 +19,18 @@ define amdgpu_ps float @while_break(i32 %z, float %v, i32 %x, i32 %y) #0 {
; GCN-NEXT: s_add_i32 s1, s1, 1
; GCN-NEXT: s_mov_b32 s2, 0
; GCN-NEXT: v_cmp_ge_i32_e32 vcc_lo, s1, v2
-; GCN-NEXT: ; implicit-def: $vgpr4
; GCN-NEXT: s_and_saveexec_b32 s3, vcc_lo
; GCN-NEXT: s_xor_b32 s3, exec_lo, s3
; GCN-NEXT: ; %bb.3: ; %else
; GCN-NEXT: ; in Loop: Header=BB0_2 Depth=1
; GCN-NEXT: v_cmp_lt_i32_e32 vcc_lo, s1, v3
-; GCN-NEXT: v_mov_b32_e32 v4, v1
; GCN-NEXT: s_and_b32 s2, vcc_lo, exec_lo
; GCN-NEXT: ; %bb.4: ; %Flow
; GCN-NEXT: ; in Loop: Header=BB0_2 Depth=1
-; GCN-NEXT: s_or_saveexec_b32 s3, s3
-; GCN-NEXT: v_mov_b32_e32 v5, v4
-; GCN-NEXT: s_xor_b32 exec_lo, exec_lo, s3
+; GCN-NEXT: s_andn2_saveexec_b32 s3, s3
; GCN-NEXT: ; %bb.5: ; %if
; GCN-NEXT: ; in Loop: Header=BB0_2 Depth=1
-; GCN-NEXT: v_add_f32_e32 v5, 1.0, v1
+; GCN-NEXT: v_add_f32_e32 v1, 1.0, v1
; GCN-NEXT: s_or_b32 s2, s2, exec_lo
; GCN-NEXT: ; %bb.6: ; %Flow1
; GCN-NEXT: ; in Loop: Header=BB0_2 Depth=1
@@ -46,12 +41,11 @@ define amdgpu_ps float @while_break(i32 %z, float %v, i32 %x, i32 %y) #0 {
; GCN-NEXT: ; %bb.7: ; %latch
; GCN-NEXT: ; in Loop: Header=BB0_2 Depth=1
; GCN-NEXT: v_cmp_lt_i32_e32 vcc_lo, s1, v0
-; GCN-NEXT: v_mov_b32_e32 v4, v5
; GCN-NEXT: s_orn2_b32 s3, vcc_lo, exec_lo
; GCN-NEXT: s_branch .LBB0_1
; GCN-NEXT: .LBB0_8: ; %end
; GCN-NEXT: s_or_b32 exec_lo, exec_lo, s0
-; GCN-NEXT: v_mov_b32_e32 v0, v4
+; GCN-NEXT: v_mov_b32_e32 v0, v1
; GCN-NEXT: ; return to shader part epilog
entry:
br label %header
diff --git a/llvm/test/Transforms/StructurizeCFG/workarounds/needs-fr-ule.ll b/llvm/test/Transforms/StructurizeCFG/workarounds/needs-fr-ule.ll
index 71347957b9f1..a8d449565bd4 100644
--- a/llvm/test/Transforms/StructurizeCFG/workarounds/needs-fr-ule.ll
+++ b/llvm/test/Transforms/StructurizeCFG/workarounds/needs-fr-ule.ll
@@ -68,7 +68,7 @@ define void @irreducible_mountain_bug(i1 %Pred0, i1 %Pred1, i1 %Pred2, i1 %Pred3
; CHECK: cond.end61:
; CHECK-NEXT: br label [[FLOW7]]
; CHECK: Flow14:
-; CHECK-NEXT: [[TMP15:%.*]] = phi i1 [ [[TMP20:%.*]], [[FLOW15:%.*]] ], [ [[TMP17:%.*]], [[LOOP_EXIT_GUARD1]] ]
+; CHECK-NEXT: [[TMP15:%.*]] = phi i1 [ [[TMP20:%.*]], [[FLOW15:%.*]] ], [ undef, [[LOOP_EXIT_GUARD1]] ]
; CHECK-NEXT: [[TMP16:%.*]] = phi i1 [ [[TMP21:%.*]], [[FLOW15]] ], [ [[DOTINV]], [[LOOP_EXIT_GUARD1]] ]
; CHECK-NEXT: br label [[FLOW13:%.*]]
; CHECK: if.then69:
@@ -102,7 +102,7 @@ define void @irreducible_mountain_bug(i1 %Pred0, i1 %Pred1, i1 %Pred2, i1 %Pred3
; CHECK: exit:
; CHECK-NEXT: ret void
; CHECK: Flow12:
-; CHECK-NEXT: [[TMP17]] = phi i1 [ true, [[LOR_RHS]] ], [ undef, [[WHILE_COND]] ]
+; CHECK-NEXT: [[TMP17:%.*]] = phi i1 [ true, [[LOR_RHS]] ], [ undef, [[WHILE_COND]] ]
; CHECK-NEXT: [[TMP18:%.*]] = phi i1 [ false, [[LOR_RHS]] ], [ true, [[WHILE_COND]] ]
; CHECK-NEXT: [[TMP19:%.*]] = phi i1 [ [[PRED9:%.*]], [[LOR_RHS]] ], [ [[PRED3]], [[WHILE_COND]] ]
; CHECK-NEXT: br i1 [[TMP19]], label [[IRR_GUARD]], label [[FLOW13]]
diff --git a/llvm/test/Transforms/StructurizeCFG/workarounds/needs-unified-loop-exits.ll b/llvm/test/Transforms/StructurizeCFG/workarounds/needs-unified-loop-exits.ll
index 8c042627b828..bac74359e9c5 100644
--- a/llvm/test/Transforms/StructurizeCFG/workarounds/needs-unified-loop-exits.ll
+++ b/llvm/test/Transforms/StructurizeCFG/workarounds/needs-unified-loop-exits.ll
@@ -38,7 +38,7 @@ define void @exiting-block(i1 %PredH1, i1 %PredB2, i1 %PredB1, i1 %PredH2) {
; CHECK: Flow:
; CHECK-NEXT: [[TMP2:%.*]] = phi i1 [ false, [[FLOW2]] ], [ undef, [[H2]] ]
; CHECK-NEXT: [[TMP3:%.*]] = phi i1 [ false, [[FLOW2]] ], [ true, [[H2]] ]
-; CHECK-NEXT: [[TMP4:%.*]] = phi i1 [ [[TMP6:%.*]], [[FLOW2]] ], [ true, [[H2]] ]
+; CHECK-NEXT: [[TMP4:%.*]] = phi i1 [ [[TMP7:%.*]], [[FLOW2]] ], [ true, [[H2]] ]
; CHECK-NEXT: br i1 [[TMP4]], label [[LOOP_EXIT_GUARD1:%.*]], label [[H2]]
; CHECK: L2:
; CHECK-NEXT: br label [[FLOW2]]
@@ -51,17 +51,18 @@ define void @exiting-block(i1 %PredH1, i1 %PredB2, i1 %PredB1, i1 %PredH2) {
; CHECK: exit:
; CHECK-NEXT: ret void
; CHECK: Flow5:
-; CHECK-NEXT: [[TMP5:%.*]] = phi i1 [ false, [[L1:%.*]] ], [ true, [[LOOP_EXIT_GUARD1]] ]
+; CHECK-NEXT: [[TMP5:%.*]] = phi i1 [ undef, [[L1:%.*]] ], [ [[TMP2]], [[LOOP_EXIT_GUARD1]] ]
+; CHECK-NEXT: [[TMP6:%.*]] = phi i1 [ false, [[L1]] ], [ true, [[LOOP_EXIT_GUARD1]] ]
; CHECK-NEXT: br label [[FLOW4]]
; CHECK: loop.exit.guard:
-; CHECK-NEXT: br i1 [[TMP7:%.*]], label [[C:%.*]], label [[EXIT]]
+; CHECK-NEXT: br i1 [[TMP8:%.*]], label [[C:%.*]], label [[EXIT]]
; CHECK: Flow2:
-; CHECK-NEXT: [[TMP6]] = phi i1 [ false, [[L2]] ], [ true, [[B2]] ]
+; CHECK-NEXT: [[TMP7]] = phi i1 [ false, [[L2]] ], [ true, [[B2]] ]
; CHECK-NEXT: br label [[FLOW]]
; CHECK: Flow4:
-; CHECK-NEXT: [[TMP7]] = phi i1 [ [[TMP2]], [[FLOW5]] ], [ [[TMP0]], [[FLOW3]] ]
-; CHECK-NEXT: [[TMP8:%.*]] = phi i1 [ [[TMP5]], [[FLOW5]] ], [ true, [[FLOW3]] ]
-; CHECK-NEXT: br i1 [[TMP8]], label [[LOOP_EXIT_GUARD:%.*]], label [[H1]]
+; CHECK-NEXT: [[TMP8]] = phi i1 [ [[TMP5]], [[FLOW5]] ], [ [[TMP0]], [[FLOW3]] ]
+; CHECK-NEXT: [[TMP9:%.*]] = phi i1 [ [[TMP6]], [[FLOW5]] ], [ true, [[FLOW3]] ]
+; CHECK-NEXT: br i1 [[TMP9]], label [[LOOP_EXIT_GUARD:%.*]], label [[H1]]
; CHECK: loop.exit.guard1:
; CHECK-NEXT: br i1 [[TMP3]], label [[L1]], label [[FLOW5]]
;
More information about the llvm-commits
mailing list