[llvm-branch-commits] [llvm] AMDGPU/GlobalISel: Temporal divergence lowering i1 (PR #124299)
Petar Avramovic via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Thu Feb 20 09:18:23 PST 2025
https://github.com/petar-avramovic updated https://github.com/llvm/llvm-project/pull/124299
>From ac7971c1d2c3303685c6a35b5b08850e0da3d410 Mon Sep 17 00:00:00 2001
From: Petar Avramovic <Petar.Avramovic at amd.com>
Date: Fri, 31 Jan 2025 13:12:17 +0100
Subject: [PATCH] AMDGPU/GlobalISel: Temporal divergence lowering i1
Use of i1 outside of the cycle, both uniform and divergent,
is lane mask(in sgpr) that contains i1 at iteration that lane
exited the cycle.
Create phi that merges lane mask across all iterations.
---
.../AMDGPUGlobalISelDivergenceLowering.cpp | 85 ++++
...-divergent-i1-phis-no-lane-mask-merging.ll | 30 +-
...divergent-i1-phis-no-lane-mask-merging.mir | 52 ++-
...vergence-divergent-i1-used-outside-loop.ll | 310 ++++++++------
...ergence-divergent-i1-used-outside-loop.mir | 394 ++++++++++++------
.../GlobalISel/divergence-structurizer.ll | 123 +++---
.../GlobalISel/divergence-structurizer.mir | 191 +++++----
.../divergence-temporal-divergent-i1.ll | 59 ++-
.../divergence-temporal-divergent-i1.mir | 89 ++--
9 files changed, 851 insertions(+), 482 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelDivergenceLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelDivergenceLowering.cpp
index 69e0bc63d25aa..dd4171681adaf 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelDivergenceLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelDivergenceLowering.cpp
@@ -18,10 +18,16 @@
#include "AMDGPU.h"
#include "AMDGPUGlobalISelUtils.h"
#include "SILowerI1Copies.h"
+#include "llvm/ADT/DenseMap.h"
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineCycleAnalysis.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineUniformityAnalysis.h"
+#include "llvm/IR/CycleInfo.h"
#include "llvm/InitializePasses.h"
+#include <utility>
#define DEBUG_TYPE "amdgpu-global-isel-divergence-lowering"
@@ -80,6 +86,7 @@ class DivergenceLoweringHelper : public PhiLoweringHelper {
void constrainAsLaneMask(Incoming &In) override;
bool lowerTemporalDivergence();
+ bool lowerTemporalDivergenceI1();
};
DivergenceLoweringHelper::DivergenceLoweringHelper(
@@ -219,6 +226,78 @@ bool DivergenceLoweringHelper::lowerTemporalDivergence() {
return false;
}
+bool DivergenceLoweringHelper::lowerTemporalDivergenceI1() {
+ MachineRegisterInfo::VRegAttrs BoolS1 = {ST->getBoolRC(), LLT::scalar(1)};
+ initializeLaneMaskRegisterAttributes(BoolS1);
+ MachineSSAUpdater SSAUpdater(*MF);
+
+ struct InstCyclePair {
+ MachineInstr *Inst;
+ const MachineCycle *Cycle;
+
+ InstCyclePair() {};
+ InstCyclePair(MachineInstr *I, const MachineCycle *C)
+ : Inst(I), Cycle(C) {};
+
+ bool operator<(const InstCyclePair &Val) const {
+ return Inst < Val.Inst && Cycle < Val.Cycle;
+ }
+ };
+
+ // Cache in case Inst is used outside Cycle by different UseInsts.
+ SmallDenseMap<std::pair<MachineInstr *, const MachineCycle *>, Register>
+ InstCycleCache;
+ for (auto [Inst, UseInst, Cycle] : MUI->getTemporalDivergenceList()) {
+ if (MRI->getType(Inst->getOperand(0).getReg()) != LLT::scalar(1))
+ continue;
+
+ if (InstCycleCache.contains({Inst, Cycle}))
+ continue;
+ InstCycleCache[{Inst, Cycle}] = {};
+ }
+
+ for (auto InstCycleMapEntry : InstCycleCache) {
+ auto InstCycle = InstCycleMapEntry.getFirst();
+ MachineInstr *Inst = InstCycle.first;
+ const MachineCycle *Cycle = InstCycle.second;
+
+ Register Reg = Inst->getOperand(0).getReg();
+ if (MRI->getType(Reg) != LLT::scalar(1))
+ continue;
+
+ Register MergedMask = MRI->createVirtualRegister(BoolS1);
+ SSAUpdater.Initialize(MergedMask);
+
+ MachineBasicBlock *MBB = Inst->getParent();
+ SSAUpdater.AddAvailableValue(Inst->getParent(), MergedMask);
+
+ for (auto Entry : Cycle->getEntries()) {
+ for (MachineBasicBlock *Pred : Entry->predecessors()) {
+ if (!Cycle->contains(Pred)) {
+ B.setInsertPt(*Pred, Pred->getFirstTerminator());
+ auto ImplDef = B.buildInstr(AMDGPU::IMPLICIT_DEF, {BoolS1}, {});
+ SSAUpdater.AddAvailableValue(Pred, ImplDef.getReg(0));
+ }
+ }
+ }
+
+ buildMergeLaneMasks(*MBB, MBB->getFirstTerminator(), {}, MergedMask,
+ SSAUpdater.GetValueInMiddleOfBlock(MBB), Reg);
+
+ InstCycleCache[InstCycle] = MergedMask;
+ }
+
+ for (auto [Inst, UseInst, Cycle] : MUI->getTemporalDivergenceList()) {
+ if (MRI->getType(Inst->getOperand(0).getReg()) != LLT::scalar(1))
+ continue;
+
+ replaceUsesOfRegInInstWith(Inst->getOperand(0).getReg(), UseInst,
+ InstCycleCache[{Inst, Cycle}]);
+ }
+
+ return false;
+}
+
} // End anonymous namespace.
INITIALIZE_PASS_BEGIN(AMDGPUGlobalISelDivergenceLowering, DEBUG_TYPE,
@@ -258,6 +337,12 @@ bool AMDGPUGlobalISelDivergenceLowering::runOnMachineFunction(
// Non-i1 temporal divergence lowering.
Changed |= Helper.lowerTemporalDivergence();
+ // This covers both uniform and divergent i1s. Lane masks are in sgpr and need
+ // to be updated in each iteration.
+ Changed |= Helper.lowerTemporalDivergenceI1();
+ // Temporal divergence lowering of divergent i1 phi used outside of the cycle
+ // could also be handled by lowerPhis but we do it in lowerTempDivergenceI1
+ // since in some case lowerPhis does unnecessary lane mask merging.
Changed |= Helper.lowerPhis();
return Changed;
}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-phis-no-lane-mask-merging.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-phis-no-lane-mask-merging.ll
index 65c96a3db5bbf..11acd451d98d7 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-phis-no-lane-mask-merging.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-phis-no-lane-mask-merging.ll
@@ -104,20 +104,25 @@ define void @divergent_i1_phi_used_inside_loop(float %val, ptr %addr) {
; GFX10-NEXT: s_mov_b32 s4, 0
; GFX10-NEXT: s_mov_b32 s5, 1
; GFX10-NEXT: s_mov_b32 s6, 0
+; GFX10-NEXT: ; implicit-def: $sgpr7
; GFX10-NEXT: .LBB2_1: ; %loop
; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX10-NEXT: v_cvt_f32_u32_e32 v3, s6
; GFX10-NEXT: s_xor_b32 s5, s5, 1
-; GFX10-NEXT: s_add_i32 s6, s6, 1
+; GFX10-NEXT: s_and_b32 s8, s5, 1
+; GFX10-NEXT: s_cmp_lg_u32 s8, 0
; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, v3, v0
+; GFX10-NEXT: s_cselect_b32 s8, exec_lo, 0
+; GFX10-NEXT: s_add_i32 s6, s6, 1
; GFX10-NEXT: s_or_b32 s4, vcc_lo, s4
+; GFX10-NEXT: s_andn2_b32 s7, s7, exec_lo
+; GFX10-NEXT: s_and_b32 s8, exec_lo, s8
+; GFX10-NEXT: s_or_b32 s7, s7, s8
; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s4
; GFX10-NEXT: s_cbranch_execnz .LBB2_1
; GFX10-NEXT: ; %bb.2: ; %exit
; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s4
-; GFX10-NEXT: s_cmp_lg_u32 s5, 0
-; GFX10-NEXT: s_cselect_b32 s4, exec_lo, 0
-; GFX10-NEXT: v_cndmask_b32_e64 v0, 0, 1.0, s4
+; GFX10-NEXT: v_cndmask_b32_e64 v0, 0, 1.0, s7
; GFX10-NEXT: flat_store_dword v[1:2], v0
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: s_setpc_b64 s[30:31]
@@ -147,29 +152,34 @@ define void @divergent_i1_phi_used_inside_loop_bigger_loop_body(float %val, floa
; GFX10-NEXT: v_mov_b32_e32 v1, 0x3e8
; GFX10-NEXT: s_mov_b32 s5, 0
; GFX10-NEXT: s_mov_b32 s6, 0
+; GFX10-NEXT: ; implicit-def: $sgpr7
; GFX10-NEXT: s_branch .LBB3_2
; GFX10-NEXT: .LBB3_1: ; %loop_body
; GFX10-NEXT: ; in Loop: Header=BB3_2 Depth=1
; GFX10-NEXT: v_cvt_f32_u32_e32 v8, s6
-; GFX10-NEXT: s_xor_b32 s4, s4, exec_lo
+; GFX10-NEXT: s_mov_b32 s8, exec_lo
; GFX10-NEXT: s_add_i32 s6, s6, 1
+; GFX10-NEXT: s_xor_b32 s4, s4, s8
; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, v8, v0
; GFX10-NEXT: s_or_b32 s5, vcc_lo, s5
+; GFX10-NEXT: s_andn2_b32 s7, s7, exec_lo
+; GFX10-NEXT: s_and_b32 s8, exec_lo, s4
+; GFX10-NEXT: s_or_b32 s7, s7, s8
; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s5
; GFX10-NEXT: s_cbranch_execz .LBB3_6
; GFX10-NEXT: .LBB3_2: ; %loop_start
; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX10-NEXT: s_cmpk_le_i32 s6, 0x3e8
-; GFX10-NEXT: s_mov_b32 s7, 1
+; GFX10-NEXT: s_mov_b32 s8, 1
; GFX10-NEXT: s_cbranch_scc0 .LBB3_4
; GFX10-NEXT: ; %bb.3: ; %else
; GFX10-NEXT: ; in Loop: Header=BB3_2 Depth=1
-; GFX10-NEXT: s_mov_b32 s7, 0
+; GFX10-NEXT: s_mov_b32 s8, 0
; GFX10-NEXT: flat_store_dword v[6:7], v1
; GFX10-NEXT: .LBB3_4: ; %Flow
; GFX10-NEXT: ; in Loop: Header=BB3_2 Depth=1
-; GFX10-NEXT: s_xor_b32 s7, s7, 1
-; GFX10-NEXT: s_cmp_lg_u32 s7, 0
+; GFX10-NEXT: s_xor_b32 s8, s8, 1
+; GFX10-NEXT: s_cmp_lg_u32 s8, 0
; GFX10-NEXT: s_cbranch_scc1 .LBB3_1
; GFX10-NEXT: ; %bb.5: ; %if
; GFX10-NEXT: ; in Loop: Header=BB3_2 Depth=1
@@ -177,7 +187,7 @@ define void @divergent_i1_phi_used_inside_loop_bigger_loop_body(float %val, floa
; GFX10-NEXT: s_branch .LBB3_1
; GFX10-NEXT: .LBB3_6: ; %exit
; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s5
-; GFX10-NEXT: v_cndmask_b32_e64 v0, 0, 1.0, s4
+; GFX10-NEXT: v_cndmask_b32_e64 v0, 0, 1.0, s7
; GFX10-NEXT: flat_store_dword v[2:3], v0
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: s_setpc_b64 s[30:31]
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-phis-no-lane-mask-merging.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-phis-no-lane-mask-merging.mir
index f244639ed9762..c0fbdb541ab9f 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-phis-no-lane-mask-merging.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-phis-no-lane-mask-merging.mir
@@ -201,20 +201,27 @@ body: |
; GFX10-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32)
; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; GFX10-NEXT: [[C1:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
+ ; GFX10-NEXT: [[DEF:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: bb.1:
; GFX10-NEXT: successors: %bb.2(0x04000000), %bb.1(0x7c000000)
; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[PHI:%[0-9]+]]:_(s32) = G_PHI %7(s32), %bb.1, [[C]](s32), %bb.0
- ; GFX10-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.0, %9(s32), %bb.1
- ; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s1) = G_PHI [[C1]](s1), %bb.0, %11(s1), %bb.1
+ ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32(s1) = PHI [[DEF]](s1), %bb.0, %19(s1), %bb.1
+ ; GFX10-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI %7(s32), %bb.1, [[C]](s32), %bb.0
+ ; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.0, %9(s32), %bb.1
+ ; GFX10-NEXT: [[PHI3:%[0-9]+]]:_(s1) = G_PHI [[C1]](s1), %bb.0, %11(s1), %bb.1
+ ; GFX10-NEXT: [[COPY3:%[0-9]+]]:sreg_32(s1) = COPY [[PHI]](s1)
; GFX10-NEXT: [[C2:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
- ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[PHI2]], [[C2]]
- ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[PHI1]](s32)
+ ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[PHI3]], [[C2]]
+ ; GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_32(s1) = COPY [[XOR]](s1)
+ ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[PHI2]](s32)
; GFX10-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[UITOFP]](s32), [[COPY]]
; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
- ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[PHI1]], [[C3]]
- ; GFX10-NEXT: [[INT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[FCMP]](s1), [[PHI]](s32)
+ ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[PHI2]], [[C3]]
+ ; GFX10-NEXT: [[INT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[FCMP]](s1), [[PHI1]](s32)
+ ; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY3]](s1), $exec_lo, implicit-def $scc
+ ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY4]](s1), implicit-def $scc
+ ; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_]](s1), [[S_AND_B32_]](s1), implicit-def $scc
; GFX10-NEXT: SI_LOOP [[INT]](s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec
; GFX10-NEXT: G_BR %bb.2
; GFX10-NEXT: {{ $}}
@@ -222,7 +229,7 @@ body: |
; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[INT]](s32)
; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00
; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00
- ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[XOR]](s1), [[C5]], [[C4]]
+ ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[S_OR_B32_]](s1), [[C5]], [[C4]]
; GFX10-NEXT: G_STORE [[SELECT]](s32), [[MV]](p0) :: (store (s32))
; GFX10-NEXT: SI_RETURN
bb.0:
@@ -285,17 +292,20 @@ body: |
; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00
; GFX10-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[COPY1]](s32), [[C]]
; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; GFX10-NEXT: [[DEF:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF
; GFX10-NEXT: [[COPY8:%[0-9]+]]:sreg_32(s1) = COPY [[FCMP]](s1)
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: bb.1:
; GFX10-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000)
; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32(s1) = PHI [[COPY8]](s1), %bb.0, %37(s1), %bb.5
- ; GFX10-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI %15(s32), %bb.5, [[C1]](s32), %bb.0
- ; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s32) = G_PHI [[C1]](s32), %bb.0, %17(s32), %bb.5
+ ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32(s1) = PHI [[COPY8]](s1), %bb.0, %44(s1), %bb.5
+ ; GFX10-NEXT: [[PHI1:%[0-9]+]]:sreg_32(s1) = PHI [[DEF]](s1), %bb.0, %36(s1), %bb.5
+ ; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s32) = G_PHI %15(s32), %bb.5, [[C1]](s32), %bb.0
+ ; GFX10-NEXT: [[PHI3:%[0-9]+]]:_(s32) = G_PHI [[C1]](s32), %bb.0, %17(s32), %bb.5
; GFX10-NEXT: [[COPY9:%[0-9]+]]:sreg_32(s1) = COPY [[PHI]](s1)
+ ; GFX10-NEXT: [[COPY10:%[0-9]+]]:sreg_32(s1) = COPY [[PHI1]](s1)
; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1000
- ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(sle), [[PHI2]](s32), [[C2]]
+ ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(sle), [[PHI3]](s32), [[C2]]
; GFX10-NEXT: [[C3:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
; GFX10-NEXT: G_BRCOND [[ICMP]](s1), %bb.4
; GFX10-NEXT: G_BR %bb.2
@@ -303,9 +313,9 @@ body: |
; GFX10-NEXT: bb.2:
; GFX10-NEXT: successors: %bb.3(0x40000000), %bb.5(0x40000000)
; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[PHI3:%[0-9]+]]:_(s1) = G_PHI %24(s1), %bb.4, [[C3]](s1), %bb.1
+ ; GFX10-NEXT: [[PHI4:%[0-9]+]]:_(s1) = G_PHI %24(s1), %bb.4, [[C3]](s1), %bb.1
; GFX10-NEXT: [[C4:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
- ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[PHI3]], [[C4]]
+ ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[PHI4]], [[C4]]
; GFX10-NEXT: G_BRCOND [[XOR]](s1), %bb.5
; GFX10-NEXT: G_BR %bb.3
; GFX10-NEXT: {{ $}}
@@ -329,12 +339,16 @@ body: |
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: [[C8:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
; GFX10-NEXT: [[XOR1:%[0-9]+]]:_(s1) = G_XOR [[COPY9]], [[C8]]
- ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[PHI2]](s32)
+ ; GFX10-NEXT: [[COPY11:%[0-9]+]]:sreg_32(s1) = COPY [[XOR1]](s1)
+ ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[PHI3]](s32)
; GFX10-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[UITOFP]](s32), [[COPY]]
; GFX10-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
- ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[PHI2]], [[C9]]
- ; GFX10-NEXT: [[INT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[FCMP1]](s1), [[PHI1]](s32)
- ; GFX10-NEXT: [[COPY10:%[0-9]+]]:sreg_32(s1) = COPY [[XOR1]](s1)
+ ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[PHI3]], [[C9]]
+ ; GFX10-NEXT: [[INT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[FCMP1]](s1), [[PHI2]](s32)
+ ; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY10]](s1), $exec_lo, implicit-def $scc
+ ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY11]](s1), implicit-def $scc
+ ; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_]](s1), [[S_AND_B32_]](s1), implicit-def $scc
+ ; GFX10-NEXT: [[COPY12:%[0-9]+]]:sreg_32(s1) = COPY [[XOR1]](s1)
; GFX10-NEXT: SI_LOOP [[INT]](s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec
; GFX10-NEXT: G_BR %bb.6
; GFX10-NEXT: {{ $}}
@@ -342,7 +356,7 @@ body: |
; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[INT]](s32)
; GFX10-NEXT: [[C10:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00
; GFX10-NEXT: [[C11:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00
- ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[XOR1]](s1), [[C11]], [[C10]]
+ ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[S_OR_B32_]](s1), [[C11]], [[C10]]
; GFX10-NEXT: G_STORE [[SELECT]](s32), [[MV]](p0) :: (store (s32))
; GFX10-NEXT: SI_RETURN
bb.0:
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-used-outside-loop.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-used-outside-loop.ll
index f5499656e92ff..8ea9cda367204 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-used-outside-loop.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-used-outside-loop.ll
@@ -13,24 +13,22 @@ define void @divergent_i1_phi_used_outside_loop(float %val, float %pre.cond.val,
; GFX10-LABEL: divergent_i1_phi_used_outside_loop:
; GFX10: ; %bb.0: ; %entry
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_cmp_lt_f32_e32 vcc_lo, 1.0, v1
-; GFX10-NEXT: s_andn2_b32 s5, s4, exec_lo
+; GFX10-NEXT: v_cmp_lt_f32_e64 s5, 1.0, v1
; GFX10-NEXT: s_mov_b32 s4, 0
-; GFX10-NEXT: s_and_b32 s6, exec_lo, vcc_lo
-; GFX10-NEXT: s_or_b32 s6, s5, s6
-; GFX10-NEXT: s_mov_b32 s5, 0
+; GFX10-NEXT: s_mov_b32 s6, 0
+; GFX10-NEXT: ; implicit-def: $sgpr7
; GFX10-NEXT: .LBB0_1: ; %loop
; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX10-NEXT: v_cvt_f32_u32_e32 v1, s5
+; GFX10-NEXT: v_cvt_f32_u32_e32 v1, s6
; GFX10-NEXT: s_mov_b32 s8, exec_lo
-; GFX10-NEXT: s_mov_b32 s7, s6
-; GFX10-NEXT: s_add_i32 s5, s5, 1
-; GFX10-NEXT: s_xor_b32 s6, s6, s8
+; GFX10-NEXT: s_add_i32 s6, s6, 1
+; GFX10-NEXT: s_xor_b32 s8, s5, s8
; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, v1, v0
; GFX10-NEXT: s_or_b32 s4, vcc_lo, s4
-; GFX10-NEXT: s_andn2_b32 s8, s7, exec_lo
-; GFX10-NEXT: s_and_b32 s6, exec_lo, s6
-; GFX10-NEXT: s_or_b32 s6, s8, s6
+; GFX10-NEXT: s_andn2_b32 s7, s7, exec_lo
+; GFX10-NEXT: s_and_b32 s9, exec_lo, s5
+; GFX10-NEXT: s_mov_b32 s5, s8
+; GFX10-NEXT: s_or_b32 s7, s7, s9
; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s4
; GFX10-NEXT: s_cbranch_execnz .LBB0_1
; GFX10-NEXT: ; %bb.2: ; %exit
@@ -135,22 +133,75 @@ define void @divergent_i1_xor_used_outside_loop(float %val, float %pre.cond.val,
; GFX10-LABEL: divergent_i1_xor_used_outside_loop:
; GFX10: ; %bb.0: ; %entry
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_cmp_lt_f32_e64 s4, 1.0, v1
-; GFX10-NEXT: s_mov_b32 s5, 0
+; GFX10-NEXT: v_cmp_lt_f32_e64 s5, 1.0, v1
+; GFX10-NEXT: s_mov_b32 s4, 0
; GFX10-NEXT: s_mov_b32 s6, 0
+; GFX10-NEXT: ; implicit-def: $sgpr7
; GFX10-NEXT: .LBB2_1: ; %loop
; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX10-NEXT: v_cvt_f32_u32_e32 v1, s6
+; GFX10-NEXT: s_mov_b32 s8, exec_lo
; GFX10-NEXT: s_add_i32 s6, s6, 1
-; GFX10-NEXT: s_xor_b32 s4, s4, exec_lo
+; GFX10-NEXT: s_xor_b32 s5, s5, s8
; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, v1, v0
-; GFX10-NEXT: s_or_b32 s5, vcc_lo, s5
-; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s5
+; GFX10-NEXT: s_or_b32 s4, vcc_lo, s4
+; GFX10-NEXT: s_andn2_b32 s7, s7, exec_lo
+; GFX10-NEXT: s_and_b32 s8, exec_lo, s5
+; GFX10-NEXT: s_or_b32 s7, s7, s8
+; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s4
; GFX10-NEXT: s_cbranch_execnz .LBB2_1
; GFX10-NEXT: ; %bb.2: ; %exit
-; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s5
-; GFX10-NEXT: v_cndmask_b32_e64 v0, 0, 1.0, s4
+; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s4
+; GFX10-NEXT: v_cndmask_b32_e64 v0, 0, 1.0, s7
+; GFX10-NEXT: flat_store_dword v[2:3], v0
+; GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+entry:
+ %pre.cond = fcmp ogt float %pre.cond.val, 1.0
+ br label %loop
+
+loop:
+ %counter = phi i32 [ 0, %entry ], [ %counter.plus.1, %loop ]
+ %bool.counter = phi i1 [ %pre.cond, %entry ], [ %neg.bool.counter, %loop ]
+ %neg.bool.counter = xor i1 %bool.counter, true
+ %f.counter = uitofp i32 %counter to float
+ %cond = fcmp ogt float %f.counter, %val
+ %counter.plus.1 = add i32 %counter, 1
+ br i1 %cond, label %exit, label %loop
+
+exit:
+ %select = select i1 %neg.bool.counter, float 1.000000e+00, float 0.000000e+00
+ store float %select, ptr %addr
+ ret void
+}
+
+define void @divergent_i1_xor_used_outside_loop_twice(float %val, float %pre.cond.val, ptr %addr, ptr %addr2) {
+; GFX10-LABEL: divergent_i1_xor_used_outside_loop_twice:
+; GFX10: ; %bb.0: ; %entry
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_cmp_lt_f32_e64 s5, 1.0, v1
+; GFX10-NEXT: s_mov_b32 s4, 0
+; GFX10-NEXT: s_mov_b32 s7, 0
+; GFX10-NEXT: ; implicit-def: $sgpr6
+; GFX10-NEXT: .LBB3_1: ; %loop
+; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX10-NEXT: v_cvt_f32_u32_e32 v1, s7
+; GFX10-NEXT: s_mov_b32 s8, exec_lo
+; GFX10-NEXT: s_add_i32 s7, s7, 1
+; GFX10-NEXT: s_xor_b32 s5, s5, s8
+; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, v1, v0
+; GFX10-NEXT: s_or_b32 s4, vcc_lo, s4
+; GFX10-NEXT: s_andn2_b32 s6, s6, exec_lo
+; GFX10-NEXT: s_and_b32 s8, exec_lo, s5
+; GFX10-NEXT: s_or_b32 s6, s6, s8
+; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s4
+; GFX10-NEXT: s_cbranch_execnz .LBB3_1
+; GFX10-NEXT: ; %bb.2: ; %exit
+; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s4
+; GFX10-NEXT: v_cndmask_b32_e64 v0, 0, 1.0, s6
+; GFX10-NEXT: v_cndmask_b32_e64 v1, -1.0, 2.0, s6
; GFX10-NEXT: flat_store_dword v[2:3], v0
+; GFX10-NEXT: flat_store_dword v[4:5], v1
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: s_setpc_b64 s[30:31]
entry:
@@ -169,6 +220,8 @@ loop:
exit:
%select = select i1 %neg.bool.counter, float 1.000000e+00, float 0.000000e+00
store float %select, ptr %addr
+ %select2 = select i1 %neg.bool.counter, float 2.000000e+00, float -1.000000e+00
+ store float %select2, ptr %addr2
ret void
}
@@ -189,63 +242,67 @@ define void @divergent_i1_xor_used_outside_loop_larger_loop_body(i32 %num.elts,
; GFX10-NEXT: s_mov_b32 s6, exec_lo
; GFX10-NEXT: s_mov_b32 s8, 0
; GFX10-NEXT: s_and_saveexec_b32 s7, vcc_lo
-; GFX10-NEXT: s_cbranch_execz .LBB3_6
+; GFX10-NEXT: s_cbranch_execz .LBB4_6
; GFX10-NEXT: ; %bb.1: ; %loop.start.preheader
; GFX10-NEXT: s_mov_b32 s4, 0
-; GFX10-NEXT: ; implicit-def: $sgpr9
; GFX10-NEXT: ; implicit-def: $sgpr10
-; GFX10-NEXT: s_branch .LBB3_3
-; GFX10-NEXT: .LBB3_2: ; %Flow
-; GFX10-NEXT: ; in Loop: Header=BB3_3 Depth=1
+; GFX10-NEXT: ; implicit-def: $sgpr11
+; GFX10-NEXT: ; implicit-def: $sgpr9
+; GFX10-NEXT: s_branch .LBB4_3
+; GFX10-NEXT: .LBB4_2: ; %Flow
+; GFX10-NEXT: ; in Loop: Header=BB4_3 Depth=1
; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s5
-; GFX10-NEXT: s_xor_b32 s5, s10, exec_lo
-; GFX10-NEXT: s_and_b32 s11, exec_lo, s9
-; GFX10-NEXT: s_or_b32 s8, s11, s8
+; GFX10-NEXT: s_xor_b32 s5, s11, exec_lo
+; GFX10-NEXT: s_and_b32 s12, exec_lo, s10
+; GFX10-NEXT: s_or_b32 s8, s12, s8
+; GFX10-NEXT: s_andn2_b32 s9, s9, exec_lo
+; GFX10-NEXT: s_and_b32 s5, exec_lo, s5
+; GFX10-NEXT: s_or_b32 s9, s9, s5
; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s8
-; GFX10-NEXT: s_cbranch_execz .LBB3_5
-; GFX10-NEXT: .LBB3_3: ; %loop.start
+; GFX10-NEXT: s_cbranch_execz .LBB4_5
+; GFX10-NEXT: .LBB4_3: ; %loop.start
; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX10-NEXT: s_ashr_i32 s5, s4, 31
-; GFX10-NEXT: s_andn2_b32 s9, s9, exec_lo
+; GFX10-NEXT: s_andn2_b32 s10, s10, exec_lo
; GFX10-NEXT: s_lshl_b64 s[12:13], s[4:5], 2
-; GFX10-NEXT: s_andn2_b32 s5, s10, exec_lo
+; GFX10-NEXT: s_andn2_b32 s5, s11, exec_lo
; GFX10-NEXT: v_mov_b32_e32 v5, s12
; GFX10-NEXT: v_mov_b32_e32 v6, s13
-; GFX10-NEXT: s_and_b32 s10, exec_lo, exec_lo
; GFX10-NEXT: s_and_b32 s11, exec_lo, exec_lo
-; GFX10-NEXT: s_or_b32 s10, s5, s10
+; GFX10-NEXT: s_and_b32 s12, exec_lo, exec_lo
+; GFX10-NEXT: s_or_b32 s11, s5, s11
; GFX10-NEXT: v_add_co_u32 v5, vcc_lo, v1, v5
; GFX10-NEXT: v_add_co_ci_u32_e32 v6, vcc_lo, v2, v6, vcc_lo
-; GFX10-NEXT: s_or_b32 s9, s9, s11
+; GFX10-NEXT: s_or_b32 s10, s10, s12
; GFX10-NEXT: global_load_dword v5, v[5:6], off
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v5
; GFX10-NEXT: s_and_saveexec_b32 s5, vcc_lo
-; GFX10-NEXT: s_cbranch_execz .LBB3_2
+; GFX10-NEXT: s_cbranch_execz .LBB4_2
; GFX10-NEXT: ; %bb.4: ; %loop.cond
-; GFX10-NEXT: ; in Loop: Header=BB3_3 Depth=1
+; GFX10-NEXT: ; in Loop: Header=BB4_3 Depth=1
; GFX10-NEXT: v_cmp_lt_i32_e32 vcc_lo, s4, v0
+; GFX10-NEXT: s_andn2_b32 s11, s11, exec_lo
+; GFX10-NEXT: s_and_b32 s12, exec_lo, 0
; GFX10-NEXT: s_andn2_b32 s10, s10, exec_lo
-; GFX10-NEXT: s_and_b32 s11, exec_lo, 0
-; GFX10-NEXT: s_andn2_b32 s9, s9, exec_lo
; GFX10-NEXT: s_add_i32 s4, s4, 1
-; GFX10-NEXT: s_and_b32 s12, exec_lo, vcc_lo
-; GFX10-NEXT: s_or_b32 s10, s10, s11
-; GFX10-NEXT: s_or_b32 s9, s9, s12
-; GFX10-NEXT: s_branch .LBB3_2
-; GFX10-NEXT: .LBB3_5: ; %loop.exit.guard
+; GFX10-NEXT: s_and_b32 s13, exec_lo, vcc_lo
+; GFX10-NEXT: s_or_b32 s11, s11, s12
+; GFX10-NEXT: s_or_b32 s10, s10, s13
+; GFX10-NEXT: s_branch .LBB4_2
+; GFX10-NEXT: .LBB4_5: ; %loop.exit.guard
; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s8
; GFX10-NEXT: s_andn2_b32 s4, s6, exec_lo
-; GFX10-NEXT: s_and_b32 s5, exec_lo, s5
+; GFX10-NEXT: s_and_b32 s5, exec_lo, s9
; GFX10-NEXT: s_or_b32 s6, s4, s5
-; GFX10-NEXT: .LBB3_6: ; %Flow1
+; GFX10-NEXT: .LBB4_6: ; %Flow1
; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s7
; GFX10-NEXT: s_and_saveexec_b32 s4, s6
-; GFX10-NEXT: s_cbranch_execz .LBB3_8
+; GFX10-NEXT: s_cbranch_execz .LBB4_8
; GFX10-NEXT: ; %bb.7: ; %block.after.loop
; GFX10-NEXT: v_mov_b32_e32 v0, 5
; GFX10-NEXT: flat_store_dword v[3:4], v0
-; GFX10-NEXT: .LBB3_8: ; %exit
+; GFX10-NEXT: .LBB4_8: ; %exit
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s4
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
@@ -288,50 +345,54 @@ define void @divergent_i1_icmp_used_outside_loop(i32 %v0, i32 %v1, ptr addrspace
; GFX10-LABEL: divergent_i1_icmp_used_outside_loop:
; GFX10: ; %bb.0: ; %entry
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_mov_b32 s5, 0
-; GFX10-NEXT: v_mov_b32_e32 v4, s5
-; GFX10-NEXT: s_branch .LBB4_2
-; GFX10-NEXT: .LBB4_1: ; %Flow
-; GFX10-NEXT: ; in Loop: Header=BB4_2 Depth=1
+; GFX10-NEXT: s_mov_b32 s4, 0
+; GFX10-NEXT: ; implicit-def: $sgpr5
+; GFX10-NEXT: v_mov_b32_e32 v4, s4
+; GFX10-NEXT: s_branch .LBB5_2
+; GFX10-NEXT: .LBB5_1: ; %Flow
+; GFX10-NEXT: ; in Loop: Header=BB5_2 Depth=1
; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s7
-; GFX10-NEXT: s_and_b32 s4, exec_lo, s6
-; GFX10-NEXT: s_or_b32 s5, s4, s5
-; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s5
-; GFX10-NEXT: s_cbranch_execz .LBB4_6
-; GFX10-NEXT: .LBB4_2: ; %cond.block.0
+; GFX10-NEXT: s_and_b32 s6, exec_lo, s6
+; GFX10-NEXT: s_or_b32 s4, s6, s4
+; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s4
+; GFX10-NEXT: s_cbranch_execz .LBB5_6
+; GFX10-NEXT: .LBB5_2: ; %cond.block.0
; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, v0, v4
+; GFX10-NEXT: s_andn2_b32 s5, s5, exec_lo
+; GFX10-NEXT: s_and_b32 s6, exec_lo, vcc_lo
+; GFX10-NEXT: s_or_b32 s5, s5, s6
; GFX10-NEXT: s_and_saveexec_b32 s6, vcc_lo
-; GFX10-NEXT: s_cbranch_execz .LBB4_4
+; GFX10-NEXT: s_cbranch_execz .LBB5_4
; GFX10-NEXT: ; %bb.3: ; %if.block.0
-; GFX10-NEXT: ; in Loop: Header=BB4_2 Depth=1
+; GFX10-NEXT: ; in Loop: Header=BB5_2 Depth=1
; GFX10-NEXT: v_ashrrev_i32_e32 v5, 31, v4
; GFX10-NEXT: v_lshlrev_b64 v[8:9], 2, v[4:5]
-; GFX10-NEXT: v_add_co_u32 v8, s4, v2, v8
-; GFX10-NEXT: v_add_co_ci_u32_e64 v9, s4, v3, v9, s4
+; GFX10-NEXT: v_add_co_u32 v8, vcc_lo, v2, v8
+; GFX10-NEXT: v_add_co_ci_u32_e32 v9, vcc_lo, v3, v9, vcc_lo
; GFX10-NEXT: global_store_dword v[8:9], v4, off
-; GFX10-NEXT: .LBB4_4: ; %loop.break.block
-; GFX10-NEXT: ; in Loop: Header=BB4_2 Depth=1
+; GFX10-NEXT: .LBB5_4: ; %loop.break.block
+; GFX10-NEXT: ; in Loop: Header=BB5_2 Depth=1
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s6
-; GFX10-NEXT: v_cmp_ne_u32_e64 s4, v1, v4
+; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, v1, v4
; GFX10-NEXT: s_mov_b32 s6, exec_lo
-; GFX10-NEXT: s_and_saveexec_b32 s7, s4
-; GFX10-NEXT: s_cbranch_execz .LBB4_1
+; GFX10-NEXT: s_and_saveexec_b32 s7, vcc_lo
+; GFX10-NEXT: s_cbranch_execz .LBB5_1
; GFX10-NEXT: ; %bb.5: ; %loop.cond
-; GFX10-NEXT: ; in Loop: Header=BB4_2 Depth=1
+; GFX10-NEXT: ; in Loop: Header=BB5_2 Depth=1
; GFX10-NEXT: v_add_nc_u32_e32 v4, 1, v4
-; GFX10-NEXT: s_andn2_b32 s4, s6, exec_lo
-; GFX10-NEXT: s_and_b32 s6, exec_lo, 0
-; GFX10-NEXT: s_or_b32 s6, s4, s6
-; GFX10-NEXT: s_branch .LBB4_1
-; GFX10-NEXT: .LBB4_6: ; %cond.block.1
-; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s5
-; GFX10-NEXT: s_and_saveexec_b32 s4, vcc_lo
-; GFX10-NEXT: s_cbranch_execz .LBB4_8
+; GFX10-NEXT: s_andn2_b32 s6, s6, exec_lo
+; GFX10-NEXT: s_and_b32 s8, exec_lo, 0
+; GFX10-NEXT: s_or_b32 s6, s6, s8
+; GFX10-NEXT: s_branch .LBB5_1
+; GFX10-NEXT: .LBB5_6: ; %cond.block.1
+; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s4
+; GFX10-NEXT: s_and_saveexec_b32 s4, s5
+; GFX10-NEXT: s_cbranch_execz .LBB5_8
; GFX10-NEXT: ; %bb.7: ; %if.block.1
; GFX10-NEXT: global_store_dword v[6:7], v4, off
-; GFX10-NEXT: .LBB4_8: ; %exit
+; GFX10-NEXT: .LBB5_8: ; %exit
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s4
; GFX10-NEXT: s_setpc_b64 s[30:31]
@@ -394,31 +455,34 @@ define amdgpu_ps void @divergent_i1_freeze_used_outside_loop(i32 %n, ptr addrspa
; GFX10-NEXT: s_mov_b32 s1, exec_lo
; GFX10-NEXT: s_mov_b32 s2, 0
; GFX10-NEXT: s_mov_b32 s0, 0
+; GFX10-NEXT: ; implicit-def: $sgpr4
; GFX10-NEXT: ; implicit-def: $sgpr3
-; GFX10-NEXT: s_branch .LBB5_2
-; GFX10-NEXT: .LBB5_1: ; %loop.cond
-; GFX10-NEXT: ; in Loop: Header=BB5_2 Depth=1
-; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s4
+; GFX10-NEXT: s_branch .LBB6_2
+; GFX10-NEXT: .LBB6_1: ; %loop.cond
+; GFX10-NEXT: ; in Loop: Header=BB6_2 Depth=1
+; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s5
; GFX10-NEXT: v_cmp_lt_i32_e32 vcc_lo, s0, v0
; GFX10-NEXT: s_add_i32 s0, s0, 1
; GFX10-NEXT: s_or_b32 s2, vcc_lo, s2
+; GFX10-NEXT: s_andn2_b32 s3, s3, exec_lo
+; GFX10-NEXT: s_and_b32 s5, exec_lo, s4
; GFX10-NEXT: s_andn2_b32 s1, s1, exec_lo
-; GFX10-NEXT: s_and_b32 s4, exec_lo, s3
-; GFX10-NEXT: s_or_b32 s1, s1, s4
+; GFX10-NEXT: s_or_b32 s3, s3, s5
+; GFX10-NEXT: s_or_b32 s1, s1, s5
; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s2
-; GFX10-NEXT: s_cbranch_execz .LBB5_4
-; GFX10-NEXT: .LBB5_2: ; %loop.start
+; GFX10-NEXT: s_cbranch_execz .LBB6_4
+; GFX10-NEXT: .LBB6_2: ; %loop.start
; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX10-NEXT: s_andn2_b32 s3, s3, exec_lo
-; GFX10-NEXT: s_and_b32 s4, exec_lo, s1
-; GFX10-NEXT: s_or_b32 s3, s3, s4
-; GFX10-NEXT: s_and_saveexec_b32 s4, s1
-; GFX10-NEXT: s_cbranch_execz .LBB5_1
+; GFX10-NEXT: s_andn2_b32 s4, s4, exec_lo
+; GFX10-NEXT: s_and_b32 s5, exec_lo, s1
+; GFX10-NEXT: s_or_b32 s4, s4, s5
+; GFX10-NEXT: s_and_saveexec_b32 s5, s1
+; GFX10-NEXT: s_cbranch_execz .LBB6_1
; GFX10-NEXT: ; %bb.3: ; %is.eq.zero
-; GFX10-NEXT: ; in Loop: Header=BB5_2 Depth=1
+; GFX10-NEXT: ; in Loop: Header=BB6_2 Depth=1
; GFX10-NEXT: s_ashr_i32 s1, s0, 31
; GFX10-NEXT: s_lshl_b64 s[6:7], s[0:1], 2
-; GFX10-NEXT: s_andn2_b32 s1, s3, exec_lo
+; GFX10-NEXT: s_andn2_b32 s1, s4, exec_lo
; GFX10-NEXT: v_mov_b32_e32 v5, s6
; GFX10-NEXT: v_mov_b32_e32 v6, s7
; GFX10-NEXT: v_add_co_u32 v5, vcc_lo, v1, v5
@@ -426,11 +490,11 @@ define amdgpu_ps void @divergent_i1_freeze_used_outside_loop(i32 %n, ptr addrspa
; GFX10-NEXT: global_load_dword v5, v[5:6], off
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v5
-; GFX10-NEXT: s_and_b32 s3, exec_lo, vcc_lo
-; GFX10-NEXT: s_or_b32 s3, s1, s3
+; GFX10-NEXT: s_and_b32 s4, exec_lo, vcc_lo
+; GFX10-NEXT: s_or_b32 s4, s1, s4
; GFX10-NEXT: ; implicit-def: $sgpr1
-; GFX10-NEXT: s_branch .LBB5_1
-; GFX10-NEXT: .LBB5_4: ; %exit
+; GFX10-NEXT: s_branch .LBB6_1
+; GFX10-NEXT: .LBB6_4: ; %exit
; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s2
; GFX10-NEXT: v_cndmask_b32_e64 v0, 0, 1.0, s3
; GFX10-NEXT: flat_store_dword v[3:4], v0
@@ -468,39 +532,43 @@ define amdgpu_cs void @loop_with_1break(ptr addrspace(1) %x, ptr addrspace(1) %a
; GFX10: ; %bb.0: ; %entry
; GFX10-NEXT: s_mov_b32 s4, 0
; GFX10-NEXT: s_mov_b32 s0, 0
-; GFX10-NEXT: ; implicit-def: $sgpr5
; GFX10-NEXT: ; implicit-def: $sgpr6
-; GFX10-NEXT: s_branch .LBB6_2
-; GFX10-NEXT: .LBB6_1: ; %Flow
-; GFX10-NEXT: ; in Loop: Header=BB6_2 Depth=1
+; GFX10-NEXT: ; implicit-def: $sgpr7
+; GFX10-NEXT: ; implicit-def: $sgpr5
+; GFX10-NEXT: s_branch .LBB7_2
+; GFX10-NEXT: .LBB7_1: ; %Flow
+; GFX10-NEXT: ; in Loop: Header=BB7_2 Depth=1
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s1
-; GFX10-NEXT: s_and_b32 s1, exec_lo, s5
+; GFX10-NEXT: s_and_b32 s1, exec_lo, s6
; GFX10-NEXT: s_or_b32 s4, s1, s4
+; GFX10-NEXT: s_andn2_b32 s1, s5, exec_lo
+; GFX10-NEXT: s_and_b32 s2, exec_lo, s7
+; GFX10-NEXT: s_or_b32 s5, s1, s2
; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s4
-; GFX10-NEXT: s_cbranch_execz .LBB6_4
-; GFX10-NEXT: .LBB6_2: ; %A
+; GFX10-NEXT: s_cbranch_execz .LBB7_4
+; GFX10-NEXT: .LBB7_2: ; %A
; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX10-NEXT: s_ashr_i32 s1, s0, 31
-; GFX10-NEXT: s_mov_b32 s7, exec_lo
+; GFX10-NEXT: s_mov_b32 s8, exec_lo
; GFX10-NEXT: s_lshl_b64 s[2:3], s[0:1], 2
-; GFX10-NEXT: s_andn2_b32 s1, s6, exec_lo
+; GFX10-NEXT: s_andn2_b32 s1, s7, exec_lo
; GFX10-NEXT: v_mov_b32_e32 v7, s3
; GFX10-NEXT: v_mov_b32_e32 v6, s2
-; GFX10-NEXT: s_and_b32 s6, exec_lo, s7
-; GFX10-NEXT: s_andn2_b32 s5, s5, exec_lo
-; GFX10-NEXT: s_and_b32 s7, exec_lo, exec_lo
-; GFX10-NEXT: s_or_b32 s6, s1, s6
+; GFX10-NEXT: s_and_b32 s7, exec_lo, s8
+; GFX10-NEXT: s_andn2_b32 s6, s6, exec_lo
+; GFX10-NEXT: s_and_b32 s8, exec_lo, exec_lo
+; GFX10-NEXT: s_or_b32 s7, s1, s7
; GFX10-NEXT: v_add_co_u32 v6, vcc_lo, v2, v6
; GFX10-NEXT: v_add_co_ci_u32_e32 v7, vcc_lo, v3, v7, vcc_lo
-; GFX10-NEXT: s_or_b32 s5, s5, s7
+; GFX10-NEXT: s_or_b32 s6, s6, s8
; GFX10-NEXT: global_load_dword v6, v[6:7], off
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v6
; GFX10-NEXT: s_and_saveexec_b32 s1, vcc_lo
-; GFX10-NEXT: s_cbranch_execz .LBB6_1
+; GFX10-NEXT: s_cbranch_execz .LBB7_1
; GFX10-NEXT: ; %bb.3: ; %loop.body
-; GFX10-NEXT: ; in Loop: Header=BB6_2 Depth=1
+; GFX10-NEXT: ; in Loop: Header=BB7_2 Depth=1
; GFX10-NEXT: v_mov_b32_e32 v7, s3
; GFX10-NEXT: v_mov_b32_e32 v6, s2
; GFX10-NEXT: s_add_i32 s2, s0, 1
@@ -508,27 +576,27 @@ define amdgpu_cs void @loop_with_1break(ptr addrspace(1) %x, ptr addrspace(1) %a
; GFX10-NEXT: s_cselect_b32 s0, exec_lo, 0
; GFX10-NEXT: v_add_co_u32 v6, vcc_lo, v0, v6
; GFX10-NEXT: v_add_co_ci_u32_e32 v7, vcc_lo, v1, v7, vcc_lo
-; GFX10-NEXT: s_andn2_b32 s3, s6, exec_lo
-; GFX10-NEXT: s_and_b32 s6, exec_lo, 0
-; GFX10-NEXT: s_andn2_b32 s5, s5, exec_lo
+; GFX10-NEXT: s_andn2_b32 s3, s7, exec_lo
+; GFX10-NEXT: s_and_b32 s7, exec_lo, 0
+; GFX10-NEXT: s_andn2_b32 s6, s6, exec_lo
; GFX10-NEXT: global_load_dword v8, v[6:7], off
; GFX10-NEXT: s_and_b32 s0, exec_lo, s0
-; GFX10-NEXT: s_or_b32 s6, s3, s6
-; GFX10-NEXT: s_or_b32 s5, s5, s0
+; GFX10-NEXT: s_or_b32 s7, s3, s7
+; GFX10-NEXT: s_or_b32 s6, s6, s0
; GFX10-NEXT: s_mov_b32 s0, s2
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: v_add_nc_u32_e32 v8, 1, v8
; GFX10-NEXT: global_store_dword v[6:7], v8, off
-; GFX10-NEXT: s_branch .LBB6_1
-; GFX10-NEXT: .LBB6_4: ; %loop.exit.guard
+; GFX10-NEXT: s_branch .LBB7_1
+; GFX10-NEXT: .LBB7_4: ; %loop.exit.guard
; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s4
-; GFX10-NEXT: s_and_saveexec_b32 s0, s6
+; GFX10-NEXT: s_and_saveexec_b32 s0, s5
; GFX10-NEXT: s_xor_b32 s0, exec_lo, s0
-; GFX10-NEXT: s_cbranch_execz .LBB6_6
+; GFX10-NEXT: s_cbranch_execz .LBB7_6
; GFX10-NEXT: ; %bb.5: ; %break.body
; GFX10-NEXT: v_mov_b32_e32 v0, 10
; GFX10-NEXT: global_store_dword v[4:5], v0, off
-; GFX10-NEXT: .LBB6_6: ; %exit
+; GFX10-NEXT: .LBB7_6: ; %exit
; GFX10-NEXT: s_endpgm
entry:
br label %A
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-used-outside-loop.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-used-outside-loop.mir
index 87e4f4b666d57..e800cb2e24a7a 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-used-outside-loop.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-used-outside-loop.mir
@@ -18,33 +18,31 @@ body: |
; GFX10-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00
; GFX10-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[COPY1]](s32), [[C]]
- ; GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_32(s1) = COPY [[FCMP]](s1)
; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; GFX10-NEXT: [[DEF:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF
- ; GFX10-NEXT: [[COPY5:%[0-9]+]]:sreg_32(s1) = COPY [[DEF]](s1)
- ; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY5]](s1), $exec_lo, implicit-def $scc
- ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY4]](s1), implicit-def $scc
- ; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_]](s1), [[S_AND_B32_]](s1), implicit-def $scc
+ ; GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_32(s1) = COPY [[FCMP]](s1)
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: bb.1:
; GFX10-NEXT: successors: %bb.2(0x04000000), %bb.1(0x7c000000)
; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32(s1) = PHI [[S_OR_B32_]](s1), %bb.0, %22(s1), %bb.1
- ; GFX10-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI %9(s32), %bb.1, [[C1]](s32), %bb.0
- ; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s32) = G_PHI [[C1]](s32), %bb.0, %11(s32), %bb.1
- ; GFX10-NEXT: [[COPY6:%[0-9]+]]:sreg_32(s1) = COPY [[PHI]](s1)
- ; GFX10-NEXT: [[COPY7:%[0-9]+]]:sreg_32(s1) = COPY [[PHI]](s1)
+ ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32(s1) = PHI [[COPY4]](s1), %bb.0, %29(s1), %bb.1
+ ; GFX10-NEXT: [[PHI1:%[0-9]+]]:sreg_32(s1) = PHI [[DEF]](s1), %bb.0, %21(s1), %bb.1
+ ; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s32) = G_PHI %9(s32), %bb.1, [[C1]](s32), %bb.0
+ ; GFX10-NEXT: [[PHI3:%[0-9]+]]:_(s32) = G_PHI [[C1]](s32), %bb.0, %11(s32), %bb.1
+ ; GFX10-NEXT: [[COPY5:%[0-9]+]]:sreg_32(s1) = COPY [[PHI]](s1)
+ ; GFX10-NEXT: [[COPY6:%[0-9]+]]:sreg_32(s1) = COPY [[COPY5]](s1)
+ ; GFX10-NEXT: [[COPY7:%[0-9]+]]:sreg_32(s1) = COPY [[PHI1]](s1)
; GFX10-NEXT: [[C2:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
- ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[COPY6]], [[C2]]
- ; GFX10-NEXT: [[COPY8:%[0-9]+]]:sreg_32(s1) = COPY [[XOR]](s1)
- ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[PHI2]](s32)
+ ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[COPY5]], [[C2]]
+ ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[PHI3]](s32)
; GFX10-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[UITOFP]](s32), [[COPY]]
; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
- ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[PHI2]], [[C3]]
- ; GFX10-NEXT: [[INT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[FCMP1]](s1), [[PHI1]](s32)
- ; GFX10-NEXT: [[S_ANDN2_B32_1:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY7]](s1), $exec_lo, implicit-def $scc
- ; GFX10-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY8]](s1), implicit-def $scc
- ; GFX10-NEXT: [[S_OR_B32_1:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_1]](s1), [[S_AND_B32_1]](s1), implicit-def $scc
+ ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[PHI3]], [[C3]]
+ ; GFX10-NEXT: [[INT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[FCMP1]](s1), [[PHI2]](s32)
+ ; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY7]](s1), $exec_lo, implicit-def $scc
+ ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY6]](s1), implicit-def $scc
+ ; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_]](s1), [[S_AND_B32_]](s1), implicit-def $scc
+ ; GFX10-NEXT: [[COPY8:%[0-9]+]]:sreg_32(s1) = COPY [[XOR]](s1)
; GFX10-NEXT: SI_LOOP [[INT]](s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec
; GFX10-NEXT: G_BR %bb.2
; GFX10-NEXT: {{ $}}
@@ -52,7 +50,7 @@ body: |
; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[INT]](s32)
; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00
; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00
- ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[COPY6]](s1), [[C5]], [[C4]]
+ ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[S_OR_B32_]](s1), [[C5]], [[C4]]
; GFX10-NEXT: G_STORE [[SELECT]](s32), [[MV]](p0) :: (store (s32))
; GFX10-NEXT: SI_RETURN
bb.0:
@@ -242,23 +240,30 @@ body: |
; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00
; GFX10-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[COPY1]](s32), [[C]]
; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; GFX10-NEXT: [[DEF:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF
; GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_32(s1) = COPY [[FCMP]](s1)
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: bb.1:
; GFX10-NEXT: successors: %bb.2(0x04000000), %bb.1(0x7c000000)
; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32(s1) = PHI [[COPY4]](s1), %bb.0, %22(s1), %bb.1
- ; GFX10-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI %9(s32), %bb.1, [[C1]](s32), %bb.0
- ; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s32) = G_PHI [[C1]](s32), %bb.0, %11(s32), %bb.1
+ ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32(s1) = PHI [[COPY4]](s1), %bb.0, %29(s1), %bb.1
+ ; GFX10-NEXT: [[PHI1:%[0-9]+]]:sreg_32(s1) = PHI [[DEF]](s1), %bb.0, %21(s1), %bb.1
+ ; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s32) = G_PHI %9(s32), %bb.1, [[C1]](s32), %bb.0
+ ; GFX10-NEXT: [[PHI3:%[0-9]+]]:_(s32) = G_PHI [[C1]](s32), %bb.0, %11(s32), %bb.1
; GFX10-NEXT: [[COPY5:%[0-9]+]]:sreg_32(s1) = COPY [[PHI]](s1)
+ ; GFX10-NEXT: [[COPY6:%[0-9]+]]:sreg_32(s1) = COPY [[PHI1]](s1)
; GFX10-NEXT: [[C2:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[COPY5]], [[C2]]
- ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[PHI2]](s32)
+ ; GFX10-NEXT: [[COPY7:%[0-9]+]]:sreg_32(s1) = COPY [[XOR]](s1)
+ ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[PHI3]](s32)
; GFX10-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[UITOFP]](s32), [[COPY]]
; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
- ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[PHI2]], [[C3]]
- ; GFX10-NEXT: [[INT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[FCMP1]](s1), [[PHI1]](s32)
- ; GFX10-NEXT: [[COPY6:%[0-9]+]]:sreg_32(s1) = COPY [[XOR]](s1)
+ ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[PHI3]], [[C3]]
+ ; GFX10-NEXT: [[INT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[FCMP1]](s1), [[PHI2]](s32)
+ ; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY6]](s1), $exec_lo, implicit-def $scc
+ ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY7]](s1), implicit-def $scc
+ ; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_]](s1), [[S_AND_B32_]](s1), implicit-def $scc
+ ; GFX10-NEXT: [[COPY8:%[0-9]+]]:sreg_32(s1) = COPY [[XOR]](s1)
; GFX10-NEXT: SI_LOOP [[INT]](s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec
; GFX10-NEXT: G_BR %bb.2
; GFX10-NEXT: {{ $}}
@@ -266,7 +271,7 @@ body: |
; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[INT]](s32)
; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00
; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00
- ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[XOR]](s1), [[C5]], [[C4]]
+ ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[S_OR_B32_]](s1), [[C5]], [[C4]]
; GFX10-NEXT: G_STORE [[SELECT]](s32), [[MV]](p0) :: (store (s32))
; GFX10-NEXT: SI_RETURN
bb.0:
@@ -307,6 +312,111 @@ body: |
SI_RETURN
...
+
+---
+name: divergent_i1_xor_used_outside_loop_twice
+legalized: true
+tracksRegLiveness: true
+body: |
+ ; GFX10-LABEL: name: divergent_i1_xor_used_outside_loop_twice
+ ; GFX10: bb.0:
+ ; GFX10-NEXT: successors: %bb.1(0x80000000)
+ ; GFX10-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; GFX10-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
+ ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
+ ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
+ ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32)
+ ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00
+ ; GFX10-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[COPY1]](s32), [[C]]
+ ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; GFX10-NEXT: [[DEF:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF
+ ; GFX10-NEXT: [[COPY6:%[0-9]+]]:sreg_32(s1) = COPY [[FCMP]](s1)
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: bb.1:
+ ; GFX10-NEXT: successors: %bb.2(0x04000000), %bb.1(0x7c000000)
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32(s1) = PHI [[COPY6]](s1), %bb.0, %35(s1), %bb.1
+ ; GFX10-NEXT: [[PHI1:%[0-9]+]]:sreg_32(s1) = PHI [[DEF]](s1), %bb.0, %27(s1), %bb.1
+ ; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s32) = G_PHI %12(s32), %bb.1, [[C1]](s32), %bb.0
+ ; GFX10-NEXT: [[PHI3:%[0-9]+]]:_(s32) = G_PHI [[C1]](s32), %bb.0, %14(s32), %bb.1
+ ; GFX10-NEXT: [[COPY7:%[0-9]+]]:sreg_32(s1) = COPY [[PHI]](s1)
+ ; GFX10-NEXT: [[COPY8:%[0-9]+]]:sreg_32(s1) = COPY [[PHI1]](s1)
+ ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
+ ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[COPY7]], [[C2]]
+ ; GFX10-NEXT: [[COPY9:%[0-9]+]]:sreg_32(s1) = COPY [[XOR]](s1)
+ ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[PHI3]](s32)
+ ; GFX10-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[UITOFP]](s32), [[COPY]]
+ ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[PHI3]], [[C3]]
+ ; GFX10-NEXT: [[INT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[FCMP1]](s1), [[PHI2]](s32)
+ ; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY8]](s1), $exec_lo, implicit-def $scc
+ ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY9]](s1), implicit-def $scc
+ ; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_]](s1), [[S_AND_B32_]](s1), implicit-def $scc
+ ; GFX10-NEXT: [[COPY10:%[0-9]+]]:sreg_32(s1) = COPY [[XOR]](s1)
+ ; GFX10-NEXT: SI_LOOP [[INT]](s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GFX10-NEXT: G_BR %bb.2
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: bb.2:
+ ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[INT]](s32)
+ ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00
+ ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00
+ ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[S_OR_B32_]](s1), [[C5]], [[C4]]
+ ; GFX10-NEXT: G_STORE [[SELECT]](s32), [[MV]](p0) :: (store (s32))
+ ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_FCONSTANT float -1.000000e+00
+ ; GFX10-NEXT: [[C7:%[0-9]+]]:_(s32) = G_FCONSTANT float 2.000000e+00
+ ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[S_OR_B32_]](s1), [[C7]], [[C6]]
+ ; GFX10-NEXT: G_STORE [[SELECT1]](s32), [[MV1]](p0) :: (store (s32))
+ ; GFX10-NEXT: SI_RETURN
+ bb.0:
+ successors: %bb.1(0x80000000)
+ liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
+
+ %0:_(s32) = COPY $vgpr0
+ %1:_(s32) = COPY $vgpr1
+ %2:_(s32) = COPY $vgpr2
+ %3:_(s32) = COPY $vgpr3
+ %4:_(p0) = G_MERGE_VALUES %2(s32), %3(s32)
+ %5:_(s32) = COPY $vgpr4
+ %6:_(s32) = COPY $vgpr5
+ %7:_(p0) = G_MERGE_VALUES %5(s32), %6(s32)
+ %8:_(s32) = G_FCONSTANT float 1.000000e+00
+ %9:_(s1) = G_FCMP floatpred(ogt), %1(s32), %8
+ %10:_(s32) = G_CONSTANT i32 0
+
+ bb.1:
+ successors: %bb.2(0x04000000), %bb.1(0x7c000000)
+
+ %11:_(s32) = G_PHI %12(s32), %bb.1, %10(s32), %bb.0
+ %13:_(s32) = G_PHI %10(s32), %bb.0, %14(s32), %bb.1
+ %15:_(s1) = G_PHI %9(s1), %bb.0, %16(s1), %bb.1
+ %17:_(s1) = G_CONSTANT i1 true
+ %16:_(s1) = G_XOR %15, %17
+ %18:_(s32) = G_UITOFP %13(s32)
+ %19:_(s1) = G_FCMP floatpred(ogt), %18(s32), %0
+ %20:_(s32) = G_CONSTANT i32 1
+ %14:_(s32) = G_ADD %13, %20
+ %12:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), %19(s1), %11(s32)
+ SI_LOOP %12(s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec
+ G_BR %bb.2
+
+ bb.2:
+ G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %12(s32)
+ %21:_(s32) = G_FCONSTANT float 0.000000e+00
+ %22:_(s32) = G_FCONSTANT float 1.000000e+00
+ %23:_(s32) = G_SELECT %16(s1), %22, %21
+ G_STORE %23(s32), %4(p0) :: (store (s32))
+ %24:_(s32) = G_FCONSTANT float -1.000000e+00
+ %25:_(s32) = G_FCONSTANT float 2.000000e+00
+ %26:_(s32) = G_SELECT %16(s1), %25, %24
+ G_STORE %26(s32), %7(p0) :: (store (s32))
+ SI_RETURN
+...
+
---
name: divergent_i1_xor_used_outside_loop_larger_loop_body
legalized: true
@@ -339,12 +449,13 @@ body: |
; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; GFX10-NEXT: [[DEF1:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF
; GFX10-NEXT: [[DEF2:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF
+ ; GFX10-NEXT: [[DEF3:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF
; GFX10-NEXT: G_BR %bb.3
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: bb.2:
; GFX10-NEXT: successors: %bb.5(0x40000000), %bb.6(0x40000000)
; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32_xm0_xexec(s1) = PHI [[COPY5]](s1), %bb.0, %38(s1), %bb.8
+ ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32_xm0_xexec(s1) = PHI [[COPY5]](s1), %bb.0, %45(s1), %bb.8
; GFX10-NEXT: [[COPY7:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[PHI]](s1)
; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[SI_IF]](s32)
; GFX10-NEXT: [[SI_IF1:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[COPY7]](s1), %bb.6, implicit-def $exec, implicit-def $scc, implicit $exec
@@ -353,13 +464,15 @@ body: |
; GFX10-NEXT: bb.3:
; GFX10-NEXT: successors: %bb.4(0x40000000), %bb.7(0x40000000)
; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[PHI1:%[0-9]+]]:sreg_32(s1) = PHI [[DEF2]](s1), %bb.1, %60(s1), %bb.7
- ; GFX10-NEXT: [[PHI2:%[0-9]+]]:sreg_32(s1) = PHI [[DEF1]](s1), %bb.1, %47(s1), %bb.7
- ; GFX10-NEXT: [[PHI3:%[0-9]+]]:_(s32) = G_PHI [[C2]](s32), %bb.1, %17(s32), %bb.7
- ; GFX10-NEXT: [[PHI4:%[0-9]+]]:_(s32) = G_PHI %19(s32), %bb.7, [[C2]](s32), %bb.1
+ ; GFX10-NEXT: [[PHI1:%[0-9]+]]:sreg_32(s1) = PHI [[DEF3]](s1), %bb.1, %67(s1), %bb.7
+ ; GFX10-NEXT: [[PHI2:%[0-9]+]]:sreg_32(s1) = PHI [[DEF2]](s1), %bb.1, %54(s1), %bb.7
+ ; GFX10-NEXT: [[PHI3:%[0-9]+]]:sreg_32(s1) = PHI [[DEF1]](s1), %bb.1, %37(s1), %bb.7
+ ; GFX10-NEXT: [[PHI4:%[0-9]+]]:_(s32) = G_PHI [[C2]](s32), %bb.1, %17(s32), %bb.7
+ ; GFX10-NEXT: [[PHI5:%[0-9]+]]:_(s32) = G_PHI %19(s32), %bb.7, [[C2]](s32), %bb.1
; GFX10-NEXT: [[COPY8:%[0-9]+]]:sreg_32(s1) = COPY [[PHI1]](s1)
; GFX10-NEXT: [[COPY9:%[0-9]+]]:sreg_32(s1) = COPY [[PHI2]](s1)
- ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[PHI4]](s32)
+ ; GFX10-NEXT: [[COPY10:%[0-9]+]]:sreg_32(s1) = COPY [[PHI3]](s1)
+ ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[PHI5]](s32)
; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[SEXT]], [[C3]](s32)
; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[MV]], [[SHL]](s64)
@@ -367,16 +480,16 @@ body: |
; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; GFX10-NEXT: [[ICMP1:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), [[LOAD]](s32), [[C4]]
; GFX10-NEXT: [[C5:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
- ; GFX10-NEXT: [[COPY10:%[0-9]+]]:sreg_32(s1) = COPY [[C5]](s1)
; GFX10-NEXT: [[COPY11:%[0-9]+]]:sreg_32(s1) = COPY [[C5]](s1)
+ ; GFX10-NEXT: [[COPY12:%[0-9]+]]:sreg_32(s1) = COPY [[C5]](s1)
; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY9]](s1), $exec_lo, implicit-def $scc
- ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY11]](s1), implicit-def $scc
+ ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY12]](s1), implicit-def $scc
; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_]](s1), [[S_AND_B32_]](s1), implicit-def $scc
- ; GFX10-NEXT: [[COPY12:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_]](s1)
+ ; GFX10-NEXT: [[COPY13:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_]](s1)
; GFX10-NEXT: [[S_ANDN2_B32_1:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY8]](s1), $exec_lo, implicit-def $scc
- ; GFX10-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY10]](s1), implicit-def $scc
+ ; GFX10-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY11]](s1), implicit-def $scc
; GFX10-NEXT: [[S_OR_B32_1:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_1]](s1), [[S_AND_B32_1]](s1), implicit-def $scc
- ; GFX10-NEXT: [[COPY13:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_1]](s1)
+ ; GFX10-NEXT: [[COPY14:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_1]](s1)
; GFX10-NEXT: [[SI_IF2:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[ICMP1]](s1), %bb.7, implicit-def $exec, implicit-def $scc, implicit $exec
; GFX10-NEXT: G_BR %bb.4
; GFX10-NEXT: {{ $}}
@@ -384,16 +497,16 @@ body: |
; GFX10-NEXT: successors: %bb.7(0x80000000)
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
- ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[PHI4]], [[C6]]
- ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[PHI4]](s32), [[COPY]]
- ; GFX10-NEXT: [[COPY14:%[0-9]+]]:sreg_32(s1) = COPY [[ICMP2]](s1)
+ ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[PHI5]], [[C6]]
+ ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[PHI5]](s32), [[COPY]]
+ ; GFX10-NEXT: [[COPY15:%[0-9]+]]:sreg_32(s1) = COPY [[ICMP2]](s1)
; GFX10-NEXT: [[C7:%[0-9]+]]:_(s1) = G_CONSTANT i1 false
- ; GFX10-NEXT: [[COPY15:%[0-9]+]]:sreg_32(s1) = COPY [[C7]](s1)
- ; GFX10-NEXT: [[S_ANDN2_B32_2:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY12]](s1), $exec_lo, implicit-def $scc
- ; GFX10-NEXT: [[S_AND_B32_2:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY15]](s1), implicit-def $scc
+ ; GFX10-NEXT: [[COPY16:%[0-9]+]]:sreg_32(s1) = COPY [[C7]](s1)
+ ; GFX10-NEXT: [[S_ANDN2_B32_2:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY13]](s1), $exec_lo, implicit-def $scc
+ ; GFX10-NEXT: [[S_AND_B32_2:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY16]](s1), implicit-def $scc
; GFX10-NEXT: [[S_OR_B32_2:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_2]](s1), [[S_AND_B32_2]](s1), implicit-def $scc
- ; GFX10-NEXT: [[S_ANDN2_B32_3:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY13]](s1), $exec_lo, implicit-def $scc
- ; GFX10-NEXT: [[S_AND_B32_3:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY14]](s1), implicit-def $scc
+ ; GFX10-NEXT: [[S_ANDN2_B32_3:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY14]](s1), $exec_lo, implicit-def $scc
+ ; GFX10-NEXT: [[S_AND_B32_3:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY15]](s1), implicit-def $scc
; GFX10-NEXT: [[S_OR_B32_3:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_3]](s1), [[S_AND_B32_3]](s1), implicit-def $scc
; GFX10-NEXT: G_BR %bb.7
; GFX10-NEXT: {{ $}}
@@ -410,16 +523,20 @@ body: |
; GFX10-NEXT: bb.7:
; GFX10-NEXT: successors: %bb.8(0x04000000), %bb.3(0x7c000000)
; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[PHI5:%[0-9]+]]:sreg_32(s1) = PHI [[S_OR_B32_1]](s1), %bb.3, [[S_OR_B32_3]](s1), %bb.4
- ; GFX10-NEXT: [[PHI6:%[0-9]+]]:sreg_32(s1) = PHI [[S_OR_B32_]](s1), %bb.3, [[S_OR_B32_2]](s1), %bb.4
- ; GFX10-NEXT: [[PHI7:%[0-9]+]]:_(s32) = G_PHI [[ADD]](s32), %bb.4, [[DEF]](s32), %bb.3
- ; GFX10-NEXT: [[COPY16:%[0-9]+]]:sreg_32(s1) = COPY [[PHI5]](s1)
+ ; GFX10-NEXT: [[PHI6:%[0-9]+]]:sreg_32(s1) = PHI [[S_OR_B32_1]](s1), %bb.3, [[S_OR_B32_3]](s1), %bb.4
+ ; GFX10-NEXT: [[PHI7:%[0-9]+]]:sreg_32(s1) = PHI [[S_OR_B32_]](s1), %bb.3, [[S_OR_B32_2]](s1), %bb.4
+ ; GFX10-NEXT: [[PHI8:%[0-9]+]]:_(s32) = G_PHI [[ADD]](s32), %bb.4, [[DEF]](s32), %bb.3
; GFX10-NEXT: [[COPY17:%[0-9]+]]:sreg_32(s1) = COPY [[PHI6]](s1)
+ ; GFX10-NEXT: [[COPY18:%[0-9]+]]:sreg_32(s1) = COPY [[PHI7]](s1)
; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[SI_IF2]](s32)
; GFX10-NEXT: [[C9:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
- ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[COPY17]], [[C9]]
- ; GFX10-NEXT: [[COPY18:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[XOR]](s1)
- ; GFX10-NEXT: [[INT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[COPY16]](s1), [[PHI3]](s32)
+ ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[COPY18]], [[C9]]
+ ; GFX10-NEXT: [[COPY19:%[0-9]+]]:sreg_32(s1) = COPY [[XOR]](s1)
+ ; GFX10-NEXT: [[INT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[COPY17]](s1), [[PHI4]](s32)
+ ; GFX10-NEXT: [[S_ANDN2_B32_4:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY10]](s1), $exec_lo, implicit-def $scc
+ ; GFX10-NEXT: [[S_AND_B32_4:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY19]](s1), implicit-def $scc
+ ; GFX10-NEXT: [[S_OR_B32_4:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_4]](s1), [[S_AND_B32_4]](s1), implicit-def $scc
+ ; GFX10-NEXT: [[COPY20:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[S_OR_B32_4]](s1)
; GFX10-NEXT: SI_LOOP [[INT]](s32), %bb.3, implicit-def $exec, implicit-def $scc, implicit $exec
; GFX10-NEXT: G_BR %bb.8
; GFX10-NEXT: {{ $}}
@@ -427,9 +544,9 @@ body: |
; GFX10-NEXT: successors: %bb.2(0x80000000)
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[INT]](s32)
- ; GFX10-NEXT: [[S_ANDN2_B32_4:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_ANDN2_B32 [[COPY6]](s1), $exec_lo, implicit-def $scc
- ; GFX10-NEXT: [[S_AND_B32_4:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_AND_B32 $exec_lo, [[COPY18]](s1), implicit-def $scc
- ; GFX10-NEXT: [[S_OR_B32_4:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_OR_B32 [[S_ANDN2_B32_4]](s1), [[S_AND_B32_4]](s1), implicit-def $scc
+ ; GFX10-NEXT: [[S_ANDN2_B32_5:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_ANDN2_B32 [[COPY6]](s1), $exec_lo, implicit-def $scc
+ ; GFX10-NEXT: [[S_AND_B32_5:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_AND_B32 $exec_lo, [[COPY20]](s1), implicit-def $scc
+ ; GFX10-NEXT: [[S_OR_B32_5:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_OR_B32 [[S_ANDN2_B32_5]](s1), [[S_AND_B32_5]](s1), implicit-def $scc
; GFX10-NEXT: G_BR %bb.2
bb.0:
successors: %bb.1(0x40000000), %bb.2(0x40000000)
@@ -538,38 +655,45 @@ body: |
; GFX10-NEXT: [[MV1:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32)
; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; GFX10-NEXT: [[DEF1:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: bb.1:
; GFX10-NEXT: successors: %bb.2(0x80000000)
; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[PHI:%[0-9]+]]:_(s32) = G_PHI %11(s32), %bb.6, [[C]](s32), %bb.0
- ; GFX10-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.0, %13(s32), %bb.6
- ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[PHI1]](s32), implicit $exec_lo
+ ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32(s1) = PHI [[DEF1]](s1), %bb.0, %29(s1), %bb.6
+ ; GFX10-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI %11(s32), %bb.6, [[C]](s32), %bb.0
+ ; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.0, %13(s32), %bb.6
+ ; GFX10-NEXT: [[COPY6:%[0-9]+]]:sreg_32(s1) = COPY [[PHI]](s1)
+ ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[PHI2]](s32), implicit $exec_lo
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: bb.2:
; GFX10-NEXT: successors: %bb.3(0x40000000), %bb.4(0x40000000)
; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[ICMP:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[PHI1]]
+ ; GFX10-NEXT: [[ICMP:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[PHI2]]
+ ; GFX10-NEXT: [[COPY8:%[0-9]+]]:sreg_32(s1) = COPY [[ICMP]](s1)
+ ; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY6]](s1), $exec_lo, implicit-def $scc
+ ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY8]](s1), implicit-def $scc
+ ; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_]](s1), [[S_AND_B32_]](s1), implicit-def $scc
; GFX10-NEXT: [[SI_IF:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[ICMP]](s1), %bb.4, implicit-def $exec, implicit-def $scc, implicit $exec
; GFX10-NEXT: G_BR %bb.3
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: bb.3:
; GFX10-NEXT: successors: %bb.4(0x80000000)
; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[PHI1]](s32)
+ ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[PHI2]](s32)
; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[SEXT]], [[C1]](s32)
; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[MV]], [[SHL]](s64)
- ; GFX10-NEXT: G_STORE [[PHI1]](s32), [[PTR_ADD]](p1) :: (store (s32), addrspace 1)
+ ; GFX10-NEXT: G_STORE [[PHI2]](s32), [[PTR_ADD]](p1) :: (store (s32), addrspace 1)
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: bb.4:
; GFX10-NEXT: successors: %bb.5(0x40000000), %bb.6(0x40000000)
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[SI_IF]](s32)
- ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), [[COPY1]](s32), [[PHI1]]
+ ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), [[COPY1]](s32), [[PHI2]]
; GFX10-NEXT: [[C2:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
- ; GFX10-NEXT: [[COPY7:%[0-9]+]]:sreg_32(s1) = COPY [[C2]](s1)
- ; GFX10-NEXT: [[COPY8:%[0-9]+]]:sreg_32(s1) = COPY [[COPY7]](s1)
+ ; GFX10-NEXT: [[COPY9:%[0-9]+]]:sreg_32(s1) = COPY [[C2]](s1)
+ ; GFX10-NEXT: [[COPY10:%[0-9]+]]:sreg_32(s1) = COPY [[COPY9]](s1)
; GFX10-NEXT: [[SI_IF1:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[ICMP1]](s1), %bb.6, implicit-def $exec, implicit-def $scc, implicit $exec
; GFX10-NEXT: G_BR %bb.5
; GFX10-NEXT: {{ $}}
@@ -577,21 +701,21 @@ body: |
; GFX10-NEXT: successors: %bb.6(0x80000000)
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
- ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[PHI1]], [[C3]]
+ ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[PHI2]], [[C3]]
; GFX10-NEXT: [[C4:%[0-9]+]]:_(s1) = G_CONSTANT i1 false
- ; GFX10-NEXT: [[COPY9:%[0-9]+]]:sreg_32(s1) = COPY [[C4]](s1)
- ; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY8]](s1), $exec_lo, implicit-def $scc
- ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY9]](s1), implicit-def $scc
- ; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_]](s1), [[S_AND_B32_]](s1), implicit-def $scc
+ ; GFX10-NEXT: [[COPY11:%[0-9]+]]:sreg_32(s1) = COPY [[C4]](s1)
+ ; GFX10-NEXT: [[S_ANDN2_B32_1:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY10]](s1), $exec_lo, implicit-def $scc
+ ; GFX10-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY11]](s1), implicit-def $scc
+ ; GFX10-NEXT: [[S_OR_B32_1:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_1]](s1), [[S_AND_B32_1]](s1), implicit-def $scc
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: bb.6:
; GFX10-NEXT: successors: %bb.7(0x04000000), %bb.1(0x7c000000)
; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[PHI2:%[0-9]+]]:sreg_32(s1) = PHI [[COPY7]](s1), %bb.4, [[S_OR_B32_]](s1), %bb.5
- ; GFX10-NEXT: [[PHI3:%[0-9]+]]:_(s32) = G_PHI [[ADD]](s32), %bb.5, [[DEF]](s32), %bb.4
- ; GFX10-NEXT: [[COPY10:%[0-9]+]]:sreg_32(s1) = COPY [[PHI2]](s1)
+ ; GFX10-NEXT: [[PHI3:%[0-9]+]]:sreg_32(s1) = PHI [[COPY9]](s1), %bb.4, [[S_OR_B32_1]](s1), %bb.5
+ ; GFX10-NEXT: [[PHI4:%[0-9]+]]:_(s32) = G_PHI [[ADD]](s32), %bb.5, [[DEF]](s32), %bb.4
+ ; GFX10-NEXT: [[COPY12:%[0-9]+]]:sreg_32(s1) = COPY [[PHI3]](s1)
; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[SI_IF1]](s32)
- ; GFX10-NEXT: [[INT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[COPY10]](s1), [[PHI]](s32)
+ ; GFX10-NEXT: [[INT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[COPY12]](s1), [[PHI1]](s32)
; GFX10-NEXT: SI_LOOP [[INT]](s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec
; GFX10-NEXT: G_BR %bb.7
; GFX10-NEXT: {{ $}}
@@ -599,13 +723,13 @@ body: |
; GFX10-NEXT: successors: %bb.8(0x40000000), %bb.9(0x40000000)
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[INT]](s32)
- ; GFX10-NEXT: [[SI_IF2:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[ICMP]](s1), %bb.9, implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GFX10-NEXT: [[SI_IF2:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[S_OR_B32_]](s1), %bb.9, implicit-def $exec, implicit-def $scc, implicit $exec
; GFX10-NEXT: G_BR %bb.8
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: bb.8:
; GFX10-NEXT: successors: %bb.9(0x80000000)
; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: G_STORE [[COPY6]](s32), [[MV1]](p1) :: (store (s32), addrspace 1)
+ ; GFX10-NEXT: G_STORE [[COPY7]](s32), [[MV1]](p1) :: (store (s32), addrspace 1)
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: bb.9:
; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[SI_IF2]](s32)
@@ -709,59 +833,66 @@ body: |
; GFX10-NEXT: [[MV1:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY3]](s32), [[COPY4]](s32)
; GFX10-NEXT: [[C:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; GFX10-NEXT: [[COPY5:%[0-9]+]]:sreg_32(s1) = COPY [[C]](s1)
; GFX10-NEXT: [[DEF:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF
+ ; GFX10-NEXT: [[COPY5:%[0-9]+]]:sreg_32(s1) = COPY [[C]](s1)
+ ; GFX10-NEXT: [[DEF1:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: bb.1:
; GFX10-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000)
; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32(s1) = PHI [[DEF]](s1), %bb.0, %41(s1), %bb.3
- ; GFX10-NEXT: [[PHI1:%[0-9]+]]:sreg_32_xm0_xexec(s1) = PHI [[COPY5]](s1), %bb.0, %31(s1), %bb.3
- ; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s32) = G_PHI %10(s32), %bb.3, [[C1]](s32), %bb.0
- ; GFX10-NEXT: [[PHI3:%[0-9]+]]:_(s32) = G_PHI [[C1]](s32), %bb.0, %12(s32), %bb.3
+ ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32(s1) = PHI [[DEF1]](s1), %bb.0, %48(s1), %bb.3
+ ; GFX10-NEXT: [[PHI1:%[0-9]+]]:sreg_32_xm0_xexec(s1) = PHI [[COPY5]](s1), %bb.0, %38(s1), %bb.3
+ ; GFX10-NEXT: [[PHI2:%[0-9]+]]:sreg_32(s1) = PHI [[DEF]](s1), %bb.0, %29(s1), %bb.3
+ ; GFX10-NEXT: [[PHI3:%[0-9]+]]:_(s32) = G_PHI %10(s32), %bb.3, [[C1]](s32), %bb.0
+ ; GFX10-NEXT: [[PHI4:%[0-9]+]]:_(s32) = G_PHI [[C1]](s32), %bb.0, %12(s32), %bb.3
; GFX10-NEXT: [[COPY6:%[0-9]+]]:sreg_32(s1) = COPY [[PHI]](s1)
; GFX10-NEXT: [[COPY7:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[PHI1]](s1)
; GFX10-NEXT: [[COPY8:%[0-9]+]]:sreg_32(s1) = COPY [[COPY7]](s1)
+ ; GFX10-NEXT: [[COPY9:%[0-9]+]]:sreg_32(s1) = COPY [[PHI2]](s1)
; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY6]](s1), $exec_lo, implicit-def $scc
; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY8]](s1), implicit-def $scc
; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_]](s1), [[S_AND_B32_]](s1), implicit-def $scc
- ; GFX10-NEXT: [[COPY9:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_]](s1)
+ ; GFX10-NEXT: [[COPY10:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_]](s1)
; GFX10-NEXT: [[SI_IF:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[COPY7]](s1), %bb.3, implicit-def $exec, implicit-def $scc, implicit $exec
; GFX10-NEXT: G_BR %bb.2
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: bb.2:
; GFX10-NEXT: successors: %bb.3(0x80000000)
; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[PHI3]](s32)
+ ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[PHI4]](s32)
; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[SEXT]], [[C2]](s32)
; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[MV]], [[SHL]](s64)
; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s32), addrspace 1)
; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[LOAD]](s32), [[C3]]
- ; GFX10-NEXT: [[COPY10:%[0-9]+]]:sreg_32(s1) = COPY [[ICMP]](s1)
- ; GFX10-NEXT: [[DEF1:%[0-9]+]]:sreg_32_xm0_xexec(s1) = IMPLICIT_DEF
- ; GFX10-NEXT: [[S_ANDN2_B32_1:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY9]](s1), $exec_lo, implicit-def $scc
- ; GFX10-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY10]](s1), implicit-def $scc
+ ; GFX10-NEXT: [[COPY11:%[0-9]+]]:sreg_32(s1) = COPY [[ICMP]](s1)
+ ; GFX10-NEXT: [[DEF2:%[0-9]+]]:sreg_32_xm0_xexec(s1) = IMPLICIT_DEF
+ ; GFX10-NEXT: [[S_ANDN2_B32_1:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY10]](s1), $exec_lo, implicit-def $scc
+ ; GFX10-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY11]](s1), implicit-def $scc
; GFX10-NEXT: [[S_OR_B32_1:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_1]](s1), [[S_AND_B32_1]](s1), implicit-def $scc
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: bb.3:
; GFX10-NEXT: successors: %bb.4(0x04000000), %bb.1(0x7c000000)
; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[PHI4:%[0-9]+]]:sreg_32(s1) = PHI [[S_OR_B32_]](s1), %bb.1, [[S_OR_B32_1]](s1), %bb.2
- ; GFX10-NEXT: [[PHI5:%[0-9]+]]:sreg_32_xm0_xexec(s1) = PHI [[PHI1]](s1), %bb.1, [[DEF1]](s1), %bb.2
- ; GFX10-NEXT: [[COPY11:%[0-9]+]]:sreg_32(s1) = COPY [[PHI4]](s1)
- ; GFX10-NEXT: [[COPY12:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[PHI5]](s1)
+ ; GFX10-NEXT: [[PHI5:%[0-9]+]]:sreg_32(s1) = PHI [[S_OR_B32_]](s1), %bb.1, [[S_OR_B32_1]](s1), %bb.2
+ ; GFX10-NEXT: [[PHI6:%[0-9]+]]:sreg_32_xm0_xexec(s1) = PHI [[PHI1]](s1), %bb.1, [[DEF2]](s1), %bb.2
+ ; GFX10-NEXT: [[COPY12:%[0-9]+]]:sreg_32(s1) = COPY [[PHI5]](s1)
+ ; GFX10-NEXT: [[COPY13:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[PHI6]](s1)
; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[SI_IF]](s32)
- ; GFX10-NEXT: [[FREEZE:%[0-9]+]]:_(s1) = G_FREEZE [[COPY11]]
- ; GFX10-NEXT: [[COPY13:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[FREEZE]](s1)
+ ; GFX10-NEXT: [[FREEZE:%[0-9]+]]:_(s1) = G_FREEZE [[COPY12]]
+ ; GFX10-NEXT: [[COPY14:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[FREEZE]](s1)
+ ; GFX10-NEXT: [[COPY15:%[0-9]+]]:sreg_32(s1) = COPY [[FREEZE]](s1)
; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
- ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[PHI3]], [[C4]]
- ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[PHI3]](s32), [[COPY]]
- ; GFX10-NEXT: [[INT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[ICMP1]](s1), [[PHI2]](s32)
- ; GFX10-NEXT: [[S_ANDN2_B32_2:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_ANDN2_B32 [[COPY12]](s1), $exec_lo, implicit-def $scc
- ; GFX10-NEXT: [[S_AND_B32_2:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_AND_B32 $exec_lo, [[COPY13]](s1), implicit-def $scc
- ; GFX10-NEXT: [[S_OR_B32_2:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_OR_B32 [[S_ANDN2_B32_2]](s1), [[S_AND_B32_2]](s1), implicit-def $scc
+ ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[PHI4]], [[C4]]
+ ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[PHI4]](s32), [[COPY]]
+ ; GFX10-NEXT: [[INT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[ICMP1]](s1), [[PHI3]](s32)
+ ; GFX10-NEXT: [[S_ANDN2_B32_2:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY9]](s1), $exec_lo, implicit-def $scc
+ ; GFX10-NEXT: [[S_AND_B32_2:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY15]](s1), implicit-def $scc
+ ; GFX10-NEXT: [[S_OR_B32_2:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_2]](s1), [[S_AND_B32_2]](s1), implicit-def $scc
+ ; GFX10-NEXT: [[S_ANDN2_B32_3:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_ANDN2_B32 [[COPY13]](s1), $exec_lo, implicit-def $scc
+ ; GFX10-NEXT: [[S_AND_B32_3:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_AND_B32 $exec_lo, [[COPY14]](s1), implicit-def $scc
+ ; GFX10-NEXT: [[S_OR_B32_3:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_OR_B32 [[S_ANDN2_B32_3]](s1), [[S_AND_B32_3]](s1), implicit-def $scc
; GFX10-NEXT: SI_LOOP [[INT]](s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec
; GFX10-NEXT: G_BR %bb.4
; GFX10-NEXT: {{ $}}
@@ -769,7 +900,7 @@ body: |
; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[INT]](s32)
; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00
; GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00
- ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FREEZE]](s1), [[C6]], [[C5]]
+ ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[S_OR_B32_2]](s1), [[C6]], [[C5]]
; GFX10-NEXT: G_STORE [[SELECT]](s32), [[MV1]](p0) :: (store (s32))
; GFX10-NEXT: S_ENDPGM 0
bb.0:
@@ -849,19 +980,22 @@ body: |
; GFX10-NEXT: [[MV2:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32)
; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; GFX10-NEXT: [[DEF1:%[0-9]+]]:sreg_32_xm0_xexec(s1) = IMPLICIT_DEF
- ; GFX10-NEXT: [[DEF2:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF
+ ; GFX10-NEXT: [[DEF1:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF
+ ; GFX10-NEXT: [[DEF2:%[0-9]+]]:sreg_32_xm0_xexec(s1) = IMPLICIT_DEF
+ ; GFX10-NEXT: [[DEF3:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: bb.1:
; GFX10-NEXT: successors: %bb.3(0x40000000), %bb.5(0x40000000)
; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32(s1) = PHI [[DEF2]](s1), %bb.0, %54(s1), %bb.5
- ; GFX10-NEXT: [[PHI1:%[0-9]+]]:sreg_32_xm0_xexec(s1) = PHI [[DEF1]](s1), %bb.0, %41(s1), %bb.5
- ; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s32) = G_PHI %12(s32), %bb.5, [[C]](s32), %bb.0
- ; GFX10-NEXT: [[PHI3:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.0, %14(s32), %bb.5
+ ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32(s1) = PHI [[DEF3]](s1), %bb.0, %61(s1), %bb.5
+ ; GFX10-NEXT: [[PHI1:%[0-9]+]]:sreg_32_xm0_xexec(s1) = PHI [[DEF2]](s1), %bb.0, %48(s1), %bb.5
+ ; GFX10-NEXT: [[PHI2:%[0-9]+]]:sreg_32(s1) = PHI [[DEF1]](s1), %bb.0, %38(s1), %bb.5
+ ; GFX10-NEXT: [[PHI3:%[0-9]+]]:_(s32) = G_PHI %12(s32), %bb.5, [[C]](s32), %bb.0
+ ; GFX10-NEXT: [[PHI4:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.0, %14(s32), %bb.5
; GFX10-NEXT: [[COPY6:%[0-9]+]]:sreg_32(s1) = COPY [[PHI]](s1)
; GFX10-NEXT: [[COPY7:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[PHI1]](s1)
- ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[PHI3]](s32)
+ ; GFX10-NEXT: [[COPY8:%[0-9]+]]:sreg_32(s1) = COPY [[PHI2]](s1)
+ ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[PHI4]](s32)
; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[SEXT]], [[C1]](s32)
; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[MV1]], [[SHL]](s64)
@@ -869,16 +1003,16 @@ body: |
; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; GFX10-NEXT: [[ICMP:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), [[LOAD]](s32), [[C2]]
; GFX10-NEXT: [[C3:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
- ; GFX10-NEXT: [[COPY8:%[0-9]+]]:sreg_32(s1) = COPY [[C3]](s1)
- ; GFX10-NEXT: [[COPY9:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[C3]](s1)
+ ; GFX10-NEXT: [[COPY9:%[0-9]+]]:sreg_32(s1) = COPY [[C3]](s1)
+ ; GFX10-NEXT: [[COPY10:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[C3]](s1)
; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_ANDN2_B32 [[COPY7]](s1), $exec_lo, implicit-def $scc
- ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_AND_B32 $exec_lo, [[COPY9]](s1), implicit-def $scc
+ ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_AND_B32 $exec_lo, [[COPY10]](s1), implicit-def $scc
; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_OR_B32 [[S_ANDN2_B32_]](s1), [[S_AND_B32_]](s1), implicit-def $scc
- ; GFX10-NEXT: [[COPY10:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[S_OR_B32_]](s1)
+ ; GFX10-NEXT: [[COPY11:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[S_OR_B32_]](s1)
; GFX10-NEXT: [[S_ANDN2_B32_1:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY6]](s1), $exec_lo, implicit-def $scc
- ; GFX10-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY8]](s1), implicit-def $scc
+ ; GFX10-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY9]](s1), implicit-def $scc
; GFX10-NEXT: [[S_OR_B32_1:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_1]](s1), [[S_AND_B32_1]](s1), implicit-def $scc
- ; GFX10-NEXT: [[COPY11:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_1]](s1)
+ ; GFX10-NEXT: [[COPY12:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_1]](s1)
; GFX10-NEXT: [[SI_IF:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[ICMP]](s1), %bb.5, implicit-def $exec, implicit-def $scc, implicit $exec
; GFX10-NEXT: G_BR %bb.3
; GFX10-NEXT: {{ $}}
@@ -899,17 +1033,17 @@ body: |
; GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[LOAD1]], [[C6]]
; GFX10-NEXT: G_STORE [[ADD]](s32), [[PTR_ADD1]](p1) :: (store (s32), addrspace 1)
- ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[PHI3]], [[C6]]
+ ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[PHI4]], [[C6]]
; GFX10-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 100
- ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[PHI3]](s32), [[C7]]
- ; GFX10-NEXT: [[COPY12:%[0-9]+]]:sreg_32(s1) = COPY [[ICMP1]](s1)
+ ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[PHI4]](s32), [[C7]]
+ ; GFX10-NEXT: [[COPY13:%[0-9]+]]:sreg_32(s1) = COPY [[ICMP1]](s1)
; GFX10-NEXT: [[C8:%[0-9]+]]:_(s1) = G_CONSTANT i1 false
- ; GFX10-NEXT: [[COPY13:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[C8]](s1)
- ; GFX10-NEXT: [[S_ANDN2_B32_2:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_ANDN2_B32 [[COPY10]](s1), $exec_lo, implicit-def $scc
- ; GFX10-NEXT: [[S_AND_B32_2:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_AND_B32 $exec_lo, [[COPY13]](s1), implicit-def $scc
+ ; GFX10-NEXT: [[COPY14:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[C8]](s1)
+ ; GFX10-NEXT: [[S_ANDN2_B32_2:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_ANDN2_B32 [[COPY11]](s1), $exec_lo, implicit-def $scc
+ ; GFX10-NEXT: [[S_AND_B32_2:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_AND_B32 $exec_lo, [[COPY14]](s1), implicit-def $scc
; GFX10-NEXT: [[S_OR_B32_2:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_OR_B32 [[S_ANDN2_B32_2]](s1), [[S_AND_B32_2]](s1), implicit-def $scc
- ; GFX10-NEXT: [[S_ANDN2_B32_3:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY11]](s1), $exec_lo, implicit-def $scc
- ; GFX10-NEXT: [[S_AND_B32_3:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY12]](s1), implicit-def $scc
+ ; GFX10-NEXT: [[S_ANDN2_B32_3:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY12]](s1), $exec_lo, implicit-def $scc
+ ; GFX10-NEXT: [[S_AND_B32_3:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY13]](s1), implicit-def $scc
; GFX10-NEXT: [[S_OR_B32_3:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_3]](s1), [[S_AND_B32_3]](s1), implicit-def $scc
; GFX10-NEXT: G_BR %bb.5
; GFX10-NEXT: {{ $}}
@@ -920,13 +1054,17 @@ body: |
; GFX10-NEXT: bb.5:
; GFX10-NEXT: successors: %bb.6(0x04000000), %bb.1(0x7c000000)
; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[PHI4:%[0-9]+]]:sreg_32(s1) = PHI [[S_OR_B32_1]](s1), %bb.1, [[S_OR_B32_3]](s1), %bb.3
- ; GFX10-NEXT: [[PHI5:%[0-9]+]]:sreg_32_xm0_xexec(s1) = PHI [[S_OR_B32_]](s1), %bb.1, [[S_OR_B32_2]](s1), %bb.3
- ; GFX10-NEXT: [[PHI6:%[0-9]+]]:_(s32) = G_PHI [[ADD1]](s32), %bb.3, [[DEF]](s32), %bb.1
- ; GFX10-NEXT: [[COPY14:%[0-9]+]]:sreg_32(s1) = COPY [[PHI4]](s1)
- ; GFX10-NEXT: [[COPY15:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[PHI5]](s1)
+ ; GFX10-NEXT: [[PHI5:%[0-9]+]]:sreg_32(s1) = PHI [[S_OR_B32_1]](s1), %bb.1, [[S_OR_B32_3]](s1), %bb.3
+ ; GFX10-NEXT: [[PHI6:%[0-9]+]]:sreg_32_xm0_xexec(s1) = PHI [[S_OR_B32_]](s1), %bb.1, [[S_OR_B32_2]](s1), %bb.3
+ ; GFX10-NEXT: [[PHI7:%[0-9]+]]:_(s32) = G_PHI [[ADD1]](s32), %bb.3, [[DEF]](s32), %bb.1
+ ; GFX10-NEXT: [[COPY15:%[0-9]+]]:sreg_32(s1) = COPY [[PHI5]](s1)
+ ; GFX10-NEXT: [[COPY16:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[PHI6]](s1)
+ ; GFX10-NEXT: [[COPY17:%[0-9]+]]:sreg_32(s1) = COPY [[COPY16]](s1)
; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[SI_IF]](s32)
- ; GFX10-NEXT: [[INT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[COPY14]](s1), [[PHI2]](s32)
+ ; GFX10-NEXT: [[INT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[COPY15]](s1), [[PHI3]](s32)
+ ; GFX10-NEXT: [[S_ANDN2_B32_4:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY8]](s1), $exec_lo, implicit-def $scc
+ ; GFX10-NEXT: [[S_AND_B32_4:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY17]](s1), implicit-def $scc
+ ; GFX10-NEXT: [[S_OR_B32_4:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_4]](s1), [[S_AND_B32_4]](s1), implicit-def $scc
; GFX10-NEXT: SI_LOOP [[INT]](s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec
; GFX10-NEXT: G_BR %bb.6
; GFX10-NEXT: {{ $}}
@@ -934,7 +1072,7 @@ body: |
; GFX10-NEXT: successors: %bb.2(0x40000000), %bb.4(0x40000000)
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[INT]](s32)
- ; GFX10-NEXT: [[SI_IF1:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[COPY15]](s1), %bb.4, implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GFX10-NEXT: [[SI_IF1:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[S_OR_B32_4]](s1), %bb.4, implicit-def $exec, implicit-def $scc, implicit $exec
; GFX10-NEXT: G_BR %bb.2
bb.0:
successors: %bb.1(0x80000000)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-structurizer.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-structurizer.ll
index 9b1cc719d5567..145c430597106 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-structurizer.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-structurizer.ll
@@ -406,32 +406,36 @@ define amdgpu_cs void @loop_with_div_break_with_body(ptr addrspace(1) %x, ptr ad
; GFX10: ; %bb.0: ; %entry
; GFX10-NEXT: s_mov_b32 s4, 0
; GFX10-NEXT: s_mov_b32 s0, 0
-; GFX10-NEXT: ; implicit-def: $sgpr5
; GFX10-NEXT: ; implicit-def: $sgpr6
+; GFX10-NEXT: ; implicit-def: $sgpr7
+; GFX10-NEXT: ; implicit-def: $sgpr5
; GFX10-NEXT: s_branch .LBB5_2
; GFX10-NEXT: .LBB5_1: ; %Flow
; GFX10-NEXT: ; in Loop: Header=BB5_2 Depth=1
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s1
-; GFX10-NEXT: s_and_b32 s1, exec_lo, s5
+; GFX10-NEXT: s_and_b32 s1, exec_lo, s6
; GFX10-NEXT: s_or_b32 s4, s1, s4
+; GFX10-NEXT: s_andn2_b32 s1, s5, exec_lo
+; GFX10-NEXT: s_and_b32 s2, exec_lo, s7
+; GFX10-NEXT: s_or_b32 s5, s1, s2
; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s4
; GFX10-NEXT: s_cbranch_execz .LBB5_4
; GFX10-NEXT: .LBB5_2: ; %A
; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX10-NEXT: s_ashr_i32 s1, s0, 31
-; GFX10-NEXT: s_mov_b32 s7, exec_lo
+; GFX10-NEXT: s_mov_b32 s8, exec_lo
; GFX10-NEXT: s_lshl_b64 s[2:3], s[0:1], 2
-; GFX10-NEXT: s_andn2_b32 s1, s6, exec_lo
+; GFX10-NEXT: s_andn2_b32 s1, s7, exec_lo
; GFX10-NEXT: v_mov_b32_e32 v7, s3
; GFX10-NEXT: v_mov_b32_e32 v6, s2
-; GFX10-NEXT: s_and_b32 s6, exec_lo, s7
-; GFX10-NEXT: s_andn2_b32 s5, s5, exec_lo
-; GFX10-NEXT: s_and_b32 s7, exec_lo, exec_lo
-; GFX10-NEXT: s_or_b32 s6, s1, s6
+; GFX10-NEXT: s_and_b32 s7, exec_lo, s8
+; GFX10-NEXT: s_andn2_b32 s6, s6, exec_lo
+; GFX10-NEXT: s_and_b32 s8, exec_lo, exec_lo
+; GFX10-NEXT: s_or_b32 s7, s1, s7
; GFX10-NEXT: v_add_co_u32 v6, vcc_lo, v2, v6
; GFX10-NEXT: v_add_co_ci_u32_e32 v7, vcc_lo, v3, v7, vcc_lo
-; GFX10-NEXT: s_or_b32 s5, s5, s7
+; GFX10-NEXT: s_or_b32 s6, s6, s8
; GFX10-NEXT: global_load_dword v6, v[6:7], off
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v6
@@ -446,13 +450,13 @@ define amdgpu_cs void @loop_with_div_break_with_body(ptr addrspace(1) %x, ptr ad
; GFX10-NEXT: s_cselect_b32 s0, exec_lo, 0
; GFX10-NEXT: v_add_co_u32 v6, vcc_lo, v0, v6
; GFX10-NEXT: v_add_co_ci_u32_e32 v7, vcc_lo, v1, v7, vcc_lo
-; GFX10-NEXT: s_andn2_b32 s3, s6, exec_lo
-; GFX10-NEXT: s_and_b32 s6, exec_lo, 0
-; GFX10-NEXT: s_andn2_b32 s5, s5, exec_lo
+; GFX10-NEXT: s_andn2_b32 s3, s7, exec_lo
+; GFX10-NEXT: s_and_b32 s7, exec_lo, 0
+; GFX10-NEXT: s_andn2_b32 s6, s6, exec_lo
; GFX10-NEXT: global_load_dword v8, v[6:7], off
; GFX10-NEXT: s_and_b32 s0, exec_lo, s0
-; GFX10-NEXT: s_or_b32 s6, s3, s6
-; GFX10-NEXT: s_or_b32 s5, s5, s0
+; GFX10-NEXT: s_or_b32 s7, s3, s7
+; GFX10-NEXT: s_or_b32 s6, s6, s0
; GFX10-NEXT: s_mov_b32 s0, s2
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: v_add_nc_u32_e32 v8, 1, v8
@@ -460,7 +464,7 @@ define amdgpu_cs void @loop_with_div_break_with_body(ptr addrspace(1) %x, ptr ad
; GFX10-NEXT: s_branch .LBB5_1
; GFX10-NEXT: .LBB5_4: ; %loop.exit.guard
; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s4
-; GFX10-NEXT: s_and_saveexec_b32 s0, s6
+; GFX10-NEXT: s_and_saveexec_b32 s0, s5
; GFX10-NEXT: s_xor_b32 s0, exec_lo, s0
; GFX10-NEXT: s_cbranch_execz .LBB5_6
; GFX10-NEXT: ; %bb.5: ; %break.body
@@ -520,74 +524,75 @@ define amdgpu_ps i32 @irreducible_cfg(i32 %x, i32 %y, i32 %a0, i32 %a1, i32 %a2,
; GFX10: ; %bb.0: ; %.entry
; GFX10-NEXT: v_cmp_gt_i32_e32 vcc_lo, v4, v1
; GFX10-NEXT: s_mov_b32 s0, exec_lo
-; GFX10-NEXT: s_mov_b32 s2, 0
-; GFX10-NEXT: s_and_b32 s3, s0, 1
-; GFX10-NEXT: s_xor_b32 s1, vcc_lo, s0
-; GFX10-NEXT: s_cmp_lg_u32 s3, 0
-; GFX10-NEXT: s_cselect_b32 s0, exec_lo, 0
+; GFX10-NEXT: s_mov_b32 s1, 0
+; GFX10-NEXT: s_and_b32 s2, s0, 1
+; GFX10-NEXT: s_xor_b32 s0, vcc_lo, s0
+; GFX10-NEXT: s_cmp_lg_u32 s2, 0
+; GFX10-NEXT: ; implicit-def: $sgpr2
+; GFX10-NEXT: s_cselect_b32 s3, exec_lo, 0
; GFX10-NEXT: s_branch .LBB6_2
; GFX10-NEXT: .LBB6_1: ; %Flow2
; GFX10-NEXT: ; in Loop: Header=BB6_2 Depth=1
-; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s5
-; GFX10-NEXT: s_and_b32 s3, exec_lo, s4
-; GFX10-NEXT: s_or_b32 s2, s3, s2
-; GFX10-NEXT: s_and_b32 s1, s1, 1
-; GFX10-NEXT: s_cmp_lg_u32 s1, 0
-; GFX10-NEXT: s_cselect_b32 s1, exec_lo, 0
-; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s2
+; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s6
+; GFX10-NEXT: s_and_b32 s4, exec_lo, s5
+; GFX10-NEXT: s_or_b32 s1, s4, s1
+; GFX10-NEXT: s_and_b32 s0, s0, 1
+; GFX10-NEXT: s_cmp_lg_u32 s0, 0
+; GFX10-NEXT: s_cselect_b32 s0, exec_lo, 0
+; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s1
; GFX10-NEXT: s_cbranch_execz .LBB6_8
; GFX10-NEXT: .LBB6_2: ; %irr.guard
; GFX10-NEXT: ; =>This Loop Header: Depth=1
; GFX10-NEXT: ; Child Loop BB6_6 Depth 2
-; GFX10-NEXT: s_andn2_b32 s4, s0, exec_lo
-; GFX10-NEXT: s_and_b32 s0, exec_lo, s0
-; GFX10-NEXT: s_mov_b32 s3, exec_lo
-; GFX10-NEXT: s_or_b32 s0, s4, s0
-; GFX10-NEXT: s_and_saveexec_b32 s4, s1
-; GFX10-NEXT: s_xor_b32 s4, exec_lo, s4
+; GFX10-NEXT: s_mov_b32 s4, exec_lo
+; GFX10-NEXT: s_and_saveexec_b32 s5, s0
+; GFX10-NEXT: s_xor_b32 s5, exec_lo, s5
; GFX10-NEXT: ; %bb.3: ; %.loopexit
; GFX10-NEXT: ; in Loop: Header=BB6_2 Depth=1
-; GFX10-NEXT: v_cmp_gt_i32_e64 s1, v5, v0
-; GFX10-NEXT: s_mov_b32 s5, exec_lo
+; GFX10-NEXT: v_cmp_gt_i32_e64 s0, v5, v0
; GFX10-NEXT: s_mov_b32 s6, exec_lo
-; GFX10-NEXT: s_xor_b32 s5, vcc_lo, s5
-; GFX10-NEXT: s_andn2_b32 s0, s0, exec_lo
-; GFX10-NEXT: s_or_b32 s5, s1, s5
-; GFX10-NEXT: s_and_b32 s1, exec_lo, s1
-; GFX10-NEXT: s_xor_b32 s5, s5, s6
+; GFX10-NEXT: s_mov_b32 s7, exec_lo
+; GFX10-NEXT: s_xor_b32 s6, vcc_lo, s6
; GFX10-NEXT: s_andn2_b32 s3, s3, exec_lo
-; GFX10-NEXT: s_and_b32 s5, exec_lo, s5
-; GFX10-NEXT: s_or_b32 s0, s0, s1
-; GFX10-NEXT: s_or_b32 s3, s3, s5
+; GFX10-NEXT: s_or_b32 s6, s0, s6
+; GFX10-NEXT: s_and_b32 s0, exec_lo, s0
+; GFX10-NEXT: s_xor_b32 s6, s6, s7
+; GFX10-NEXT: s_andn2_b32 s4, s4, exec_lo
+; GFX10-NEXT: s_and_b32 s6, exec_lo, s6
+; GFX10-NEXT: s_or_b32 s3, s3, s0
+; GFX10-NEXT: s_or_b32 s4, s4, s6
; GFX10-NEXT: ; %bb.4: ; %Flow1
; GFX10-NEXT: ; in Loop: Header=BB6_2 Depth=1
-; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s4
-; GFX10-NEXT: s_mov_b32 s4, exec_lo
-; GFX10-NEXT: ; implicit-def: $sgpr1
-; GFX10-NEXT: s_and_saveexec_b32 s5, s3
+; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s5
+; GFX10-NEXT: s_andn2_b32 s0, s2, exec_lo
+; GFX10-NEXT: s_and_b32 s2, exec_lo, s3
+; GFX10-NEXT: s_mov_b32 s5, exec_lo
+; GFX10-NEXT: s_or_b32 s2, s0, s2
+; GFX10-NEXT: ; implicit-def: $sgpr0
+; GFX10-NEXT: s_and_saveexec_b32 s6, s4
; GFX10-NEXT: s_cbranch_execz .LBB6_1
; GFX10-NEXT: ; %bb.5: ; %.preheader
; GFX10-NEXT: ; in Loop: Header=BB6_2 Depth=1
-; GFX10-NEXT: v_cmp_le_i32_e64 s1, v4, v0
-; GFX10-NEXT: s_mov_b32 s3, 0
+; GFX10-NEXT: v_cmp_le_i32_e64 s0, v4, v0
+; GFX10-NEXT: s_mov_b32 s4, 0
; GFX10-NEXT: .LBB6_6: ; %.inner_loop
; GFX10-NEXT: ; Parent Loop BB6_2 Depth=1
; GFX10-NEXT: ; => This Inner Loop Header: Depth=2
-; GFX10-NEXT: s_and_b32 s6, exec_lo, s1
-; GFX10-NEXT: s_or_b32 s3, s6, s3
-; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s3
+; GFX10-NEXT: s_and_b32 s7, exec_lo, s0
+; GFX10-NEXT: s_or_b32 s4, s7, s4
+; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s4
; GFX10-NEXT: s_cbranch_execnz .LBB6_6
; GFX10-NEXT: ; %bb.7: ; %Flow
; GFX10-NEXT: ; in Loop: Header=BB6_2 Depth=1
-; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s3
-; GFX10-NEXT: s_andn2_b32 s3, s4, exec_lo
-; GFX10-NEXT: s_and_b32 s4, exec_lo, 0
-; GFX10-NEXT: s_mov_b32 s1, 1
-; GFX10-NEXT: s_or_b32 s4, s3, s4
+; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s4
+; GFX10-NEXT: s_andn2_b32 s4, s5, exec_lo
+; GFX10-NEXT: s_and_b32 s5, exec_lo, 0
+; GFX10-NEXT: s_mov_b32 s0, 1
+; GFX10-NEXT: s_or_b32 s5, s4, s5
; GFX10-NEXT: s_branch .LBB6_1
; GFX10-NEXT: .LBB6_8: ; %.exit
-; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s2
-; GFX10-NEXT: v_cndmask_b32_e64 v0, v2, v3, s0
+; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s1
+; GFX10-NEXT: v_cndmask_b32_e64 v0, v2, v3, s2
; GFX10-NEXT: v_readfirstlane_b32 s0, v0
; GFX10-NEXT: ; return to shader part epilog
.entry:
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-structurizer.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-structurizer.mir
index bd7140de5d617..b76d421c16172 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-structurizer.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-structurizer.mir
@@ -771,19 +771,22 @@ body: |
; GFX10-NEXT: [[MV2:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32)
; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; GFX10-NEXT: [[DEF1:%[0-9]+]]:sreg_32_xm0_xexec(s1) = IMPLICIT_DEF
- ; GFX10-NEXT: [[DEF2:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF
+ ; GFX10-NEXT: [[DEF1:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF
+ ; GFX10-NEXT: [[DEF2:%[0-9]+]]:sreg_32_xm0_xexec(s1) = IMPLICIT_DEF
+ ; GFX10-NEXT: [[DEF3:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: bb.1:
; GFX10-NEXT: successors: %bb.3(0x40000000), %bb.5(0x40000000)
; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32(s1) = PHI [[DEF2]](s1), %bb.0, %54(s1), %bb.5
- ; GFX10-NEXT: [[PHI1:%[0-9]+]]:sreg_32_xm0_xexec(s1) = PHI [[DEF1]](s1), %bb.0, %41(s1), %bb.5
- ; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s32) = G_PHI %12(s32), %bb.5, [[C]](s32), %bb.0
- ; GFX10-NEXT: [[PHI3:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.0, %14(s32), %bb.5
+ ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32(s1) = PHI [[DEF3]](s1), %bb.0, %61(s1), %bb.5
+ ; GFX10-NEXT: [[PHI1:%[0-9]+]]:sreg_32_xm0_xexec(s1) = PHI [[DEF2]](s1), %bb.0, %48(s1), %bb.5
+ ; GFX10-NEXT: [[PHI2:%[0-9]+]]:sreg_32(s1) = PHI [[DEF1]](s1), %bb.0, %38(s1), %bb.5
+ ; GFX10-NEXT: [[PHI3:%[0-9]+]]:_(s32) = G_PHI %12(s32), %bb.5, [[C]](s32), %bb.0
+ ; GFX10-NEXT: [[PHI4:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.0, %14(s32), %bb.5
; GFX10-NEXT: [[COPY6:%[0-9]+]]:sreg_32(s1) = COPY [[PHI]](s1)
; GFX10-NEXT: [[COPY7:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[PHI1]](s1)
- ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[PHI3]](s32)
+ ; GFX10-NEXT: [[COPY8:%[0-9]+]]:sreg_32(s1) = COPY [[PHI2]](s1)
+ ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[PHI4]](s32)
; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[SEXT]], [[C1]](s32)
; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[MV1]], [[SHL]](s64)
@@ -791,16 +794,16 @@ body: |
; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; GFX10-NEXT: [[ICMP:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), [[LOAD]](s32), [[C2]]
; GFX10-NEXT: [[C3:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
- ; GFX10-NEXT: [[COPY8:%[0-9]+]]:sreg_32(s1) = COPY [[C3]](s1)
- ; GFX10-NEXT: [[COPY9:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[C3]](s1)
+ ; GFX10-NEXT: [[COPY9:%[0-9]+]]:sreg_32(s1) = COPY [[C3]](s1)
+ ; GFX10-NEXT: [[COPY10:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[C3]](s1)
; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_ANDN2_B32 [[COPY7]](s1), $exec_lo, implicit-def $scc
- ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_AND_B32 $exec_lo, [[COPY9]](s1), implicit-def $scc
+ ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_AND_B32 $exec_lo, [[COPY10]](s1), implicit-def $scc
; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_OR_B32 [[S_ANDN2_B32_]](s1), [[S_AND_B32_]](s1), implicit-def $scc
- ; GFX10-NEXT: [[COPY10:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[S_OR_B32_]](s1)
+ ; GFX10-NEXT: [[COPY11:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[S_OR_B32_]](s1)
; GFX10-NEXT: [[S_ANDN2_B32_1:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY6]](s1), $exec_lo, implicit-def $scc
- ; GFX10-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY8]](s1), implicit-def $scc
+ ; GFX10-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY9]](s1), implicit-def $scc
; GFX10-NEXT: [[S_OR_B32_1:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_1]](s1), [[S_AND_B32_1]](s1), implicit-def $scc
- ; GFX10-NEXT: [[COPY11:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_1]](s1)
+ ; GFX10-NEXT: [[COPY12:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_1]](s1)
; GFX10-NEXT: [[SI_IF:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[ICMP]](s1), %bb.5, implicit-def $exec, implicit-def $scc, implicit $exec
; GFX10-NEXT: G_BR %bb.3
; GFX10-NEXT: {{ $}}
@@ -821,17 +824,17 @@ body: |
; GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[LOAD1]], [[C6]]
; GFX10-NEXT: G_STORE [[ADD]](s32), [[PTR_ADD1]](p1) :: (store (s32), addrspace 1)
- ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[PHI3]], [[C6]]
+ ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[PHI4]], [[C6]]
; GFX10-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 100
- ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[PHI3]](s32), [[C7]]
- ; GFX10-NEXT: [[COPY12:%[0-9]+]]:sreg_32(s1) = COPY [[ICMP1]](s1)
+ ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[PHI4]](s32), [[C7]]
+ ; GFX10-NEXT: [[COPY13:%[0-9]+]]:sreg_32(s1) = COPY [[ICMP1]](s1)
; GFX10-NEXT: [[C8:%[0-9]+]]:_(s1) = G_CONSTANT i1 false
- ; GFX10-NEXT: [[COPY13:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[C8]](s1)
- ; GFX10-NEXT: [[S_ANDN2_B32_2:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_ANDN2_B32 [[COPY10]](s1), $exec_lo, implicit-def $scc
- ; GFX10-NEXT: [[S_AND_B32_2:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_AND_B32 $exec_lo, [[COPY13]](s1), implicit-def $scc
+ ; GFX10-NEXT: [[COPY14:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[C8]](s1)
+ ; GFX10-NEXT: [[S_ANDN2_B32_2:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_ANDN2_B32 [[COPY11]](s1), $exec_lo, implicit-def $scc
+ ; GFX10-NEXT: [[S_AND_B32_2:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_AND_B32 $exec_lo, [[COPY14]](s1), implicit-def $scc
; GFX10-NEXT: [[S_OR_B32_2:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_OR_B32 [[S_ANDN2_B32_2]](s1), [[S_AND_B32_2]](s1), implicit-def $scc
- ; GFX10-NEXT: [[S_ANDN2_B32_3:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY11]](s1), $exec_lo, implicit-def $scc
- ; GFX10-NEXT: [[S_AND_B32_3:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY12]](s1), implicit-def $scc
+ ; GFX10-NEXT: [[S_ANDN2_B32_3:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY12]](s1), $exec_lo, implicit-def $scc
+ ; GFX10-NEXT: [[S_AND_B32_3:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY13]](s1), implicit-def $scc
; GFX10-NEXT: [[S_OR_B32_3:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_3]](s1), [[S_AND_B32_3]](s1), implicit-def $scc
; GFX10-NEXT: G_BR %bb.5
; GFX10-NEXT: {{ $}}
@@ -842,13 +845,17 @@ body: |
; GFX10-NEXT: bb.5:
; GFX10-NEXT: successors: %bb.6(0x04000000), %bb.1(0x7c000000)
; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[PHI4:%[0-9]+]]:sreg_32(s1) = PHI [[S_OR_B32_1]](s1), %bb.1, [[S_OR_B32_3]](s1), %bb.3
- ; GFX10-NEXT: [[PHI5:%[0-9]+]]:sreg_32_xm0_xexec(s1) = PHI [[S_OR_B32_]](s1), %bb.1, [[S_OR_B32_2]](s1), %bb.3
- ; GFX10-NEXT: [[PHI6:%[0-9]+]]:_(s32) = G_PHI [[ADD1]](s32), %bb.3, [[DEF]](s32), %bb.1
- ; GFX10-NEXT: [[COPY14:%[0-9]+]]:sreg_32(s1) = COPY [[PHI4]](s1)
- ; GFX10-NEXT: [[COPY15:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[PHI5]](s1)
+ ; GFX10-NEXT: [[PHI5:%[0-9]+]]:sreg_32(s1) = PHI [[S_OR_B32_1]](s1), %bb.1, [[S_OR_B32_3]](s1), %bb.3
+ ; GFX10-NEXT: [[PHI6:%[0-9]+]]:sreg_32_xm0_xexec(s1) = PHI [[S_OR_B32_]](s1), %bb.1, [[S_OR_B32_2]](s1), %bb.3
+ ; GFX10-NEXT: [[PHI7:%[0-9]+]]:_(s32) = G_PHI [[ADD1]](s32), %bb.3, [[DEF]](s32), %bb.1
+ ; GFX10-NEXT: [[COPY15:%[0-9]+]]:sreg_32(s1) = COPY [[PHI5]](s1)
+ ; GFX10-NEXT: [[COPY16:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[PHI6]](s1)
+ ; GFX10-NEXT: [[COPY17:%[0-9]+]]:sreg_32(s1) = COPY [[COPY16]](s1)
; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[SI_IF]](s32)
- ; GFX10-NEXT: [[INT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[COPY14]](s1), [[PHI2]](s32)
+ ; GFX10-NEXT: [[INT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[COPY15]](s1), [[PHI3]](s32)
+ ; GFX10-NEXT: [[S_ANDN2_B32_4:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY8]](s1), $exec_lo, implicit-def $scc
+ ; GFX10-NEXT: [[S_AND_B32_4:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY17]](s1), implicit-def $scc
+ ; GFX10-NEXT: [[S_OR_B32_4:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_4]](s1), [[S_AND_B32_4]](s1), implicit-def $scc
; GFX10-NEXT: SI_LOOP [[INT]](s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec
; GFX10-NEXT: G_BR %bb.6
; GFX10-NEXT: {{ $}}
@@ -856,7 +863,7 @@ body: |
; GFX10-NEXT: successors: %bb.2(0x40000000), %bb.4(0x40000000)
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[INT]](s32)
- ; GFX10-NEXT: [[SI_IF1:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[COPY15]](s1), %bb.4, implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GFX10-NEXT: [[SI_IF1:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[S_OR_B32_4]](s1), %bb.4, implicit-def $exec, implicit-def $scc, implicit $exec
; GFX10-NEXT: G_BR %bb.2
bb.0:
successors: %bb.1(0x80000000)
@@ -965,14 +972,18 @@ body: |
; GFX10-NEXT: bb.1:
; GFX10-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000)
; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32_xm0_xexec(s1) = PHI %51(s1), %bb.5, %50(s1), %bb.8
- ; GFX10-NEXT: [[PHI1:%[0-9]+]]:sreg_32(s1) = PHI %39(s1), %bb.5, %38(s1), %bb.8
+ ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32_xm0_xexec(s1) = PHI %52(s1), %bb.5, %51(s1), %bb.8
+ ; GFX10-NEXT: [[PHI1:%[0-9]+]]:sreg_32(s1) = PHI %45(s1), %bb.5, %44(s1), %bb.8
; GFX10-NEXT: [[COPY8:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[PHI]](s1)
; GFX10-NEXT: [[COPY9:%[0-9]+]]:sreg_32(s1) = COPY [[PHI1]](s1)
+ ; GFX10-NEXT: [[COPY10:%[0-9]+]]:sreg_32(s1) = COPY [[COPY9]](s1)
; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %17(s32)
; GFX10-NEXT: [[C2:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
- ; GFX10-NEXT: [[COPY10:%[0-9]+]]:sreg_32(s1) = COPY [[C2]](s1)
- ; GFX10-NEXT: [[COPY11:%[0-9]+]]:sreg_32(s1) = COPY [[COPY10]](s1)
+ ; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 %40(s1), $exec_lo, implicit-def $scc
+ ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY10]](s1), implicit-def $scc
+ ; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_]](s1), [[S_AND_B32_]](s1), implicit-def $scc
+ ; GFX10-NEXT: [[COPY11:%[0-9]+]]:sreg_32(s1) = COPY [[C2]](s1)
+ ; GFX10-NEXT: [[COPY12:%[0-9]+]]:sreg_32(s1) = COPY [[COPY11]](s1)
; GFX10-NEXT: [[SI_IF:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[COPY8]](s1), %bb.3, implicit-def $exec, implicit-def $scc, implicit $exec
; GFX10-NEXT: G_BR %bb.2
; GFX10-NEXT: {{ $}}
@@ -986,13 +997,13 @@ body: |
; GFX10-NEXT: bb.3:
; GFX10-NEXT: successors: %bb.6(0x04000000), %bb.8(0x7c000000)
; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[PHI2:%[0-9]+]]:sreg_32(s1) = PHI [[COPY10]](s1), %bb.1, %58(s1), %bb.7
+ ; GFX10-NEXT: [[PHI2:%[0-9]+]]:sreg_32(s1) = PHI [[COPY11]](s1), %bb.1, %59(s1), %bb.7
; GFX10-NEXT: [[PHI3:%[0-9]+]]:_(s1) = G_PHI %23(s1), %bb.7, [[DEF]](s1), %bb.1
- ; GFX10-NEXT: [[COPY12:%[0-9]+]]:sreg_32(s1) = COPY [[PHI2]](s1)
+ ; GFX10-NEXT: [[COPY13:%[0-9]+]]:sreg_32(s1) = COPY [[PHI2]](s1)
; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[SI_IF]](s32)
- ; GFX10-NEXT: [[INT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[COPY12]](s1), %27(s32)
- ; GFX10-NEXT: [[COPY13:%[0-9]+]]:sreg_32(s1) = COPY [[COPY9]](s1)
- ; GFX10-NEXT: [[COPY14:%[0-9]+]]:sreg_32(s1) = COPY [[PHI3]](s1)
+ ; GFX10-NEXT: [[INT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[COPY13]](s1), %27(s32)
+ ; GFX10-NEXT: [[COPY14:%[0-9]+]]:sreg_32(s1) = COPY [[COPY9]](s1)
+ ; GFX10-NEXT: [[COPY15:%[0-9]+]]:sreg_32(s1) = COPY [[PHI3]](s1)
; GFX10-NEXT: SI_LOOP [[INT]](s32), %bb.8, implicit-def $exec, implicit-def $scc, implicit $exec
; GFX10-NEXT: G_BR %bb.6
; GFX10-NEXT: {{ $}}
@@ -1008,24 +1019,24 @@ body: |
; GFX10-NEXT: successors: %bb.1(0x80000000)
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[COPY5]](s32), [[COPY]]
- ; GFX10-NEXT: [[COPY15:%[0-9]+]]:sreg_32(s1) = COPY [[ICMP2]](s1)
+ ; GFX10-NEXT: [[COPY16:%[0-9]+]]:sreg_32(s1) = COPY [[ICMP2]](s1)
; GFX10-NEXT: [[FREEZE:%[0-9]+]]:_(s1) = G_FREEZE [[ICMP]]
; GFX10-NEXT: [[C4:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
; GFX10-NEXT: [[XOR1:%[0-9]+]]:_(s1) = G_XOR [[FREEZE]], [[C4]]
; GFX10-NEXT: [[OR:%[0-9]+]]:_(s1) = G_OR [[ICMP2]], [[XOR1]]
; GFX10-NEXT: [[XOR2:%[0-9]+]]:_(s1) = G_XOR [[OR]], [[C4]]
- ; GFX10-NEXT: [[COPY16:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[XOR2]](s1)
- ; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 %46(s1), $exec_lo, implicit-def $scc
- ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY15]](s1), implicit-def $scc
- ; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_]](s1), [[S_AND_B32_]](s1), implicit-def $scc
- ; GFX10-NEXT: [[S_ANDN2_B32_1:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_ANDN2_B32 %52(s1), $exec_lo, implicit-def $scc
- ; GFX10-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_AND_B32 $exec_lo, [[COPY16]](s1), implicit-def $scc
- ; GFX10-NEXT: [[S_OR_B32_1:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_OR_B32 [[S_ANDN2_B32_1]](s1), [[S_AND_B32_1]](s1), implicit-def $scc
+ ; GFX10-NEXT: [[COPY17:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[XOR2]](s1)
+ ; GFX10-NEXT: [[S_ANDN2_B32_1:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 %46(s1), $exec_lo, implicit-def $scc
+ ; GFX10-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY16]](s1), implicit-def $scc
+ ; GFX10-NEXT: [[S_OR_B32_1:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_1]](s1), [[S_AND_B32_1]](s1), implicit-def $scc
+ ; GFX10-NEXT: [[S_ANDN2_B32_2:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_ANDN2_B32 %53(s1), $exec_lo, implicit-def $scc
+ ; GFX10-NEXT: [[S_AND_B32_2:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_AND_B32 $exec_lo, [[COPY17]](s1), implicit-def $scc
+ ; GFX10-NEXT: [[S_OR_B32_2:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_OR_B32 [[S_ANDN2_B32_2]](s1), [[S_AND_B32_2]](s1), implicit-def $scc
; GFX10-NEXT: G_BR %bb.1
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: bb.6:
; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[INT]](s32)
- ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[COPY9]](s1), [[COPY3]], [[COPY2]]
+ ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[S_OR_B32_]](s1), [[COPY3]], [[COPY2]]
; GFX10-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[SELECT]](s32)
; GFX10-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32)
; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
@@ -1035,32 +1046,29 @@ body: |
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[INT1]](s32)
; GFX10-NEXT: [[C5:%[0-9]+]]:_(s1) = G_CONSTANT i1 false
- ; GFX10-NEXT: [[COPY17:%[0-9]+]]:sreg_32(s1) = COPY [[C5]](s1)
+ ; GFX10-NEXT: [[COPY18:%[0-9]+]]:sreg_32(s1) = COPY [[C5]](s1)
; GFX10-NEXT: [[C6:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
- ; GFX10-NEXT: [[S_ANDN2_B32_2:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY11]](s1), $exec_lo, implicit-def $scc
- ; GFX10-NEXT: [[S_AND_B32_2:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY17]](s1), implicit-def $scc
- ; GFX10-NEXT: [[S_OR_B32_2:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_2]](s1), [[S_AND_B32_2]](s1), implicit-def $scc
+ ; GFX10-NEXT: [[S_ANDN2_B32_3:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY12]](s1), $exec_lo, implicit-def $scc
+ ; GFX10-NEXT: [[S_AND_B32_3:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY18]](s1), implicit-def $scc
+ ; GFX10-NEXT: [[S_OR_B32_3:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_3]](s1), [[S_AND_B32_3]](s1), implicit-def $scc
; GFX10-NEXT: G_BR %bb.3
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: bb.8:
; GFX10-NEXT: successors: %bb.5(0x40000000), %bb.1(0x40000000)
; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[PHI5:%[0-9]+]]:sreg_32_xm0_xexec(s1) = PHI [[COPY7]](s1), %bb.0, [[COPY14]](s1), %bb.3
- ; GFX10-NEXT: [[PHI6:%[0-9]+]]:sreg_32(s1) = PHI [[COPY6]](s1), %bb.0, [[COPY13]](s1), %bb.3
- ; GFX10-NEXT: [[PHI7:%[0-9]+]]:sreg_32(s1) = PHI [[DEF1]](s1), %bb.0, [[PHI1]](s1), %bb.3
+ ; GFX10-NEXT: [[PHI5:%[0-9]+]]:sreg_32_xm0_xexec(s1) = PHI [[COPY7]](s1), %bb.0, [[COPY15]](s1), %bb.3
+ ; GFX10-NEXT: [[PHI6:%[0-9]+]]:sreg_32(s1) = PHI [[COPY6]](s1), %bb.0, [[COPY14]](s1), %bb.3
+ ; GFX10-NEXT: [[PHI7:%[0-9]+]]:sreg_32(s1) = PHI [[DEF1]](s1), %bb.0, [[S_OR_B32_]](s1), %bb.3
; GFX10-NEXT: [[PHI8:%[0-9]+]]:_(s32) = G_PHI [[INT]](s32), %bb.3, [[C1]](s32), %bb.0
- ; GFX10-NEXT: [[COPY18:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[PHI5]](s1)
- ; GFX10-NEXT: [[COPY19:%[0-9]+]]:sreg_32(s1) = COPY [[PHI6]](s1)
- ; GFX10-NEXT: [[COPY20:%[0-9]+]]:sreg_32(s1) = COPY [[COPY19]](s1)
+ ; GFX10-NEXT: [[COPY19:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[PHI5]](s1)
+ ; GFX10-NEXT: [[COPY20:%[0-9]+]]:sreg_32(s1) = COPY [[PHI6]](s1)
; GFX10-NEXT: [[COPY21:%[0-9]+]]:sreg_32(s1) = COPY [[PHI7]](s1)
; GFX10-NEXT: [[C7:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
- ; GFX10-NEXT: [[S_ANDN2_B32_3:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY21]](s1), $exec_lo, implicit-def $scc
- ; GFX10-NEXT: [[S_AND_B32_3:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY20]](s1), implicit-def $scc
- ; GFX10-NEXT: [[S_OR_B32_3:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_3]](s1), [[S_AND_B32_3]](s1), implicit-def $scc
- ; GFX10-NEXT: [[COPY22:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_3]](s1)
- ; GFX10-NEXT: [[COPY23:%[0-9]+]]:sreg_32(s1) = COPY [[C7]](s1)
- ; GFX10-NEXT: [[COPY24:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[COPY23]](s1)
- ; GFX10-NEXT: [[SI_IF1:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[COPY18]](s1), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GFX10-NEXT: [[COPY22:%[0-9]+]]:sreg_32(s1) = COPY [[COPY20]](s1)
+ ; GFX10-NEXT: [[COPY23:%[0-9]+]]:sreg_32(s1) = COPY [[COPY22]](s1)
+ ; GFX10-NEXT: [[COPY24:%[0-9]+]]:sreg_32(s1) = COPY [[C7]](s1)
+ ; GFX10-NEXT: [[COPY25:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[COPY24]](s1)
+ ; GFX10-NEXT: [[SI_IF1:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[COPY19]](s1), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec
; GFX10-NEXT: G_BR %bb.5
bb.0:
successors: %bb.8(0x80000000)
@@ -1173,6 +1181,7 @@ body: |
; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[COPY4]](s32), [[COPY1]]
; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; GFX10-NEXT: [[DEF1:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF
+ ; GFX10-NEXT: [[DEF2:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF
; GFX10-NEXT: [[COPY6:%[0-9]+]]:sreg_32(s1) = COPY [[ICMP]](s1)
; GFX10-NEXT: G_BR %bb.7
; GFX10-NEXT: {{ $}}
@@ -1186,8 +1195,8 @@ body: |
; GFX10-NEXT: bb.2:
; GFX10-NEXT: successors: %bb.4(0x40000000), %bb.7(0x40000000)
; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32_xm0_xexec(s1) = PHI %45(s1), %bb.6, %49(s1), %bb.7
- ; GFX10-NEXT: [[PHI1:%[0-9]+]]:sreg_32(s1) = PHI %34(s1), %bb.6, %33(s1), %bb.7
+ ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32_xm0_xexec(s1) = PHI %52(s1), %bb.6, %56(s1), %bb.7
+ ; GFX10-NEXT: [[PHI1:%[0-9]+]]:sreg_32(s1) = PHI %41(s1), %bb.6, %40(s1), %bb.7
; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s1) = G_PHI %12(s1), %bb.6, [[DEF]](s1), %bb.7
; GFX10-NEXT: [[COPY7:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[PHI2]](s1)
; GFX10-NEXT: [[COPY8:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[PHI]](s1)
@@ -1214,23 +1223,27 @@ body: |
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[INT]](s32)
; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[COPY5]](s32), [[COPY]]
+ ; GFX10-NEXT: [[COPY11:%[0-9]+]]:sreg_32(s1) = COPY [[ICMP2]](s1)
; GFX10-NEXT: [[FREEZE:%[0-9]+]]:_(s1) = G_FREEZE [[ICMP]]
; GFX10-NEXT: [[C2:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
- ; GFX10-NEXT: [[COPY11:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[C2]](s1)
+ ; GFX10-NEXT: [[COPY12:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[C2]](s1)
; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[FREEZE]], [[C2]]
; GFX10-NEXT: [[OR:%[0-9]+]]:_(s1) = G_OR [[ICMP2]], [[XOR]]
; GFX10-NEXT: [[INT2:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[OR]](s1), %26(s32)
; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; GFX10-NEXT: [[DEF2:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF
- ; GFX10-NEXT: [[S_ANDN2_B32_1:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_ANDN2_B32 [[COPY10]](s1), $exec_lo, implicit-def $scc
- ; GFX10-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_AND_B32 $exec_lo, [[COPY11]](s1), implicit-def $scc
- ; GFX10-NEXT: [[S_OR_B32_1:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_OR_B32 [[S_ANDN2_B32_1]](s1), [[S_AND_B32_1]](s1), implicit-def $scc
+ ; GFX10-NEXT: [[S_ANDN2_B32_1:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 %34(s1), $exec_lo, implicit-def $scc
+ ; GFX10-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY11]](s1), implicit-def $scc
+ ; GFX10-NEXT: [[S_OR_B32_1:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_1]](s1), [[S_AND_B32_1]](s1), implicit-def $scc
+ ; GFX10-NEXT: [[DEF3:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF
+ ; GFX10-NEXT: [[S_ANDN2_B32_2:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_ANDN2_B32 [[COPY10]](s1), $exec_lo, implicit-def $scc
+ ; GFX10-NEXT: [[S_AND_B32_2:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_AND_B32 $exec_lo, [[COPY12]](s1), implicit-def $scc
+ ; GFX10-NEXT: [[S_OR_B32_2:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_OR_B32 [[S_ANDN2_B32_2]](s1), [[S_AND_B32_2]](s1), implicit-def $scc
; GFX10-NEXT: SI_LOOP [[INT2]](s32), %bb.7, implicit-def $exec, implicit-def $scc, implicit $exec
; GFX10-NEXT: G_BR %bb.5
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: bb.5:
; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[INT2]](s32)
- ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[COPY3]], [[COPY2]]
+ ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[S_OR_B32_1]](s1), [[COPY3]], [[COPY2]]
; GFX10-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[SELECT]](s32)
; GFX10-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32)
; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
@@ -1240,29 +1253,31 @@ body: |
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[INT1]](s32)
; GFX10-NEXT: [[C4:%[0-9]+]]:_(s1) = G_CONSTANT i1 false
- ; GFX10-NEXT: [[COPY12:%[0-9]+]]:sreg_32(s1) = COPY [[C4]](s1)
- ; GFX10-NEXT: [[S_ANDN2_B32_2:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 %41(s1), $exec_lo, implicit-def $scc
- ; GFX10-NEXT: [[S_AND_B32_2:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY12]](s1), implicit-def $scc
- ; GFX10-NEXT: [[S_OR_B32_2:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_2]](s1), [[S_AND_B32_2]](s1), implicit-def $scc
- ; GFX10-NEXT: [[DEF3:%[0-9]+]]:sreg_32_xm0_xexec(s1) = IMPLICIT_DEF
+ ; GFX10-NEXT: [[COPY13:%[0-9]+]]:sreg_32(s1) = COPY [[C4]](s1)
+ ; GFX10-NEXT: [[S_ANDN2_B32_3:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 %48(s1), $exec_lo, implicit-def $scc
+ ; GFX10-NEXT: [[S_AND_B32_3:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY13]](s1), implicit-def $scc
+ ; GFX10-NEXT: [[S_OR_B32_3:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_3]](s1), [[S_AND_B32_3]](s1), implicit-def $scc
+ ; GFX10-NEXT: [[DEF4:%[0-9]+]]:sreg_32_xm0_xexec(s1) = IMPLICIT_DEF
; GFX10-NEXT: G_BR %bb.2
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: bb.7:
; GFX10-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[PHI4:%[0-9]+]]:sreg_32_xm0_xexec(s1) = PHI [[COPY6]](s1), %bb.0, [[S_OR_B32_]](s1), %bb.2, [[S_OR_B32_1]](s1), %bb.4
- ; GFX10-NEXT: [[PHI5:%[0-9]+]]:sreg_32(s1) = PHI [[DEF1]](s1), %bb.0, [[PHI1]](s1), %bb.2, [[DEF2]](s1), %bb.4
- ; GFX10-NEXT: [[PHI6:%[0-9]+]]:_(s32) = G_PHI [[INT2]](s32), %bb.4, [[PHI6]](s32), %bb.2, [[C]](s32), %bb.0
- ; GFX10-NEXT: [[PHI7:%[0-9]+]]:_(s32) = G_PHI [[C3]](s32), %bb.4, [[INT]](s32), %bb.2, [[C]](s32), %bb.0
- ; GFX10-NEXT: [[COPY13:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[PHI4]](s1)
- ; GFX10-NEXT: [[COPY14:%[0-9]+]]:sreg_32(s1) = COPY [[PHI5]](s1)
+ ; GFX10-NEXT: [[PHI4:%[0-9]+]]:sreg_32_xm0_xexec(s1) = PHI [[COPY6]](s1), %bb.0, [[S_OR_B32_]](s1), %bb.2, [[S_OR_B32_2]](s1), %bb.4
+ ; GFX10-NEXT: [[PHI5:%[0-9]+]]:sreg_32(s1) = PHI [[DEF2]](s1), %bb.0, [[PHI1]](s1), %bb.2, [[DEF3]](s1), %bb.4
+ ; GFX10-NEXT: [[PHI6:%[0-9]+]]:sreg_32(s1) = PHI [[DEF1]](s1), %bb.0, [[PHI6]](s1), %bb.2, [[S_OR_B32_1]](s1), %bb.4
+ ; GFX10-NEXT: [[PHI7:%[0-9]+]]:_(s32) = G_PHI [[INT2]](s32), %bb.4, [[PHI7]](s32), %bb.2, [[C]](s32), %bb.0
+ ; GFX10-NEXT: [[PHI8:%[0-9]+]]:_(s32) = G_PHI [[C3]](s32), %bb.4, [[INT]](s32), %bb.2, [[C]](s32), %bb.0
+ ; GFX10-NEXT: [[COPY14:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[PHI4]](s1)
+ ; GFX10-NEXT: [[COPY15:%[0-9]+]]:sreg_32(s1) = COPY [[PHI5]](s1)
+ ; GFX10-NEXT: [[COPY16:%[0-9]+]]:sreg_32(s1) = COPY [[PHI6]](s1)
; GFX10-NEXT: [[C5:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
- ; GFX10-NEXT: [[COPY15:%[0-9]+]]:sreg_32(s1) = COPY [[C5]](s1)
- ; GFX10-NEXT: [[S_ANDN2_B32_3:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY14]](s1), $exec_lo, implicit-def $scc
- ; GFX10-NEXT: [[S_AND_B32_3:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY15]](s1), implicit-def $scc
- ; GFX10-NEXT: [[S_OR_B32_3:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_3]](s1), [[S_AND_B32_3]](s1), implicit-def $scc
- ; GFX10-NEXT: [[COPY16:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_3]](s1)
- ; GFX10-NEXT: [[SI_IF:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[COPY13]](s1), %bb.2, implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GFX10-NEXT: [[COPY17:%[0-9]+]]:sreg_32(s1) = COPY [[C5]](s1)
+ ; GFX10-NEXT: [[S_ANDN2_B32_4:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY15]](s1), $exec_lo, implicit-def $scc
+ ; GFX10-NEXT: [[S_AND_B32_4:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY17]](s1), implicit-def $scc
+ ; GFX10-NEXT: [[S_OR_B32_4:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_4]](s1), [[S_AND_B32_4]](s1), implicit-def $scc
+ ; GFX10-NEXT: [[COPY18:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_4]](s1)
+ ; GFX10-NEXT: [[SI_IF:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[COPY14]](s1), %bb.2, implicit-def $exec, implicit-def $scc, implicit $exec
; GFX10-NEXT: G_BR %bb.1
bb.0:
successors: %bb.7(0x80000000)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-temporal-divergent-i1.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-temporal-divergent-i1.ll
index 6e72c11f892a7..d374f9011d60c 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-temporal-divergent-i1.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-temporal-divergent-i1.ll
@@ -6,23 +6,27 @@ define void @temporal_divergent_i1_phi(float %val, ptr %addr) {
; GFX10: ; %bb.0: ; %entry
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_mov_b32 s4, 0
-; GFX10-NEXT: s_mov_b32 s6, 1
-; GFX10-NEXT: s_mov_b32 s5, 0
+; GFX10-NEXT: s_mov_b32 s5, 1
+; GFX10-NEXT: s_mov_b32 s6, 0
+; GFX10-NEXT: ; implicit-def: $sgpr7
; GFX10-NEXT: .LBB0_1: ; %loop
; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX10-NEXT: v_cvt_f32_u32_e32 v3, s5
-; GFX10-NEXT: s_mov_b32 s7, s6
-; GFX10-NEXT: s_add_i32 s5, s5, 1
-; GFX10-NEXT: s_xor_b32 s6, s6, 1
+; GFX10-NEXT: v_cvt_f32_u32_e32 v3, s6
+; GFX10-NEXT: s_and_b32 s8, s5, 1
+; GFX10-NEXT: s_cmp_lg_u32 s8, 0
+; GFX10-NEXT: s_cselect_b32 s8, exec_lo, 0
; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, v3, v0
+; GFX10-NEXT: s_xor_b32 s5, s5, 1
+; GFX10-NEXT: s_add_i32 s6, s6, 1
; GFX10-NEXT: s_or_b32 s4, vcc_lo, s4
+; GFX10-NEXT: s_andn2_b32 s7, s7, exec_lo
+; GFX10-NEXT: s_and_b32 s8, exec_lo, s8
+; GFX10-NEXT: s_or_b32 s7, s7, s8
; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s4
; GFX10-NEXT: s_cbranch_execnz .LBB0_1
; GFX10-NEXT: ; %bb.2: ; %exit
; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s4
-; GFX10-NEXT: s_cmp_lg_u32 s7, 0
-; GFX10-NEXT: s_cselect_b32 s4, exec_lo, 0
-; GFX10-NEXT: v_cndmask_b32_e64 v0, 0, 1.0, s4
+; GFX10-NEXT: v_cndmask_b32_e64 v0, 0, 1.0, s7
; GFX10-NEXT: flat_store_dword v[1:2], v0
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: s_setpc_b64 s[30:31]
@@ -51,20 +55,25 @@ define void @temporal_divergent_i1_non_phi(float %val, ptr %addr) {
; GFX10-NEXT: s_mov_b32 s4, 0
; GFX10-NEXT: s_mov_b32 s5, 1
; GFX10-NEXT: s_mov_b32 s6, 0
+; GFX10-NEXT: ; implicit-def: $sgpr7
; GFX10-NEXT: .LBB1_1: ; %loop
; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX10-NEXT: v_cvt_f32_u32_e32 v3, s6
; GFX10-NEXT: s_xor_b32 s5, s5, 1
-; GFX10-NEXT: s_add_i32 s6, s6, 1
+; GFX10-NEXT: s_and_b32 s8, s5, 1
+; GFX10-NEXT: s_cmp_lg_u32 s8, 0
; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, v3, v0
+; GFX10-NEXT: s_cselect_b32 s8, exec_lo, 0
+; GFX10-NEXT: s_add_i32 s6, s6, 1
; GFX10-NEXT: s_or_b32 s4, vcc_lo, s4
+; GFX10-NEXT: s_andn2_b32 s7, s7, exec_lo
+; GFX10-NEXT: s_and_b32 s8, exec_lo, s8
+; GFX10-NEXT: s_or_b32 s7, s7, s8
; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s4
; GFX10-NEXT: s_cbranch_execnz .LBB1_1
; GFX10-NEXT: ; %bb.2: ; %exit
; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s4
-; GFX10-NEXT: s_cmp_lg_u32 s5, 0
-; GFX10-NEXT: s_cselect_b32 s4, exec_lo, 0
-; GFX10-NEXT: v_cndmask_b32_e64 v0, 0, 1.0, s4
+; GFX10-NEXT: v_cndmask_b32_e64 v0, 0, 1.0, s7
; GFX10-NEXT: flat_store_dword v[1:2], v0
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: s_setpc_b64 s[30:31]
@@ -94,13 +103,14 @@ define amdgpu_cs void @loop_with_1break(ptr addrspace(1) %x, i32 %x.size, ptr ad
; GFX10-NEXT: v_mov_b32_e32 v3, 0
; GFX10-NEXT: s_mov_b32 s8, 0
; GFX10-NEXT: s_mov_b32 s4, 0
+; GFX10-NEXT: ; implicit-def: $sgpr10
; GFX10-NEXT: ; implicit-def: $sgpr9
; GFX10-NEXT: s_branch .LBB2_3
; GFX10-NEXT: .LBB2_1: ; %loop.body
; GFX10-NEXT: ; in Loop: Header=BB2_3 Depth=1
; GFX10-NEXT: v_mov_b32_e32 v4, s6
; GFX10-NEXT: v_mov_b32_e32 v5, s7
-; GFX10-NEXT: s_andn2_b32 s6, s9, exec_lo
+; GFX10-NEXT: s_andn2_b32 s6, s10, exec_lo
; GFX10-NEXT: s_mov_b32 s5, 0
; GFX10-NEXT: v_add_co_u32 v4, vcc_lo, v0, v4
; GFX10-NEXT: v_add_co_ci_u32_e32 v5, vcc_lo, v1, v5, vcc_lo
@@ -108,7 +118,7 @@ define amdgpu_cs void @loop_with_1break(ptr addrspace(1) %x, i32 %x.size, ptr ad
; GFX10-NEXT: s_add_i32 s4, s4, 1
; GFX10-NEXT: global_load_dword v6, v[4:5], off
; GFX10-NEXT: s_and_b32 s7, exec_lo, vcc_lo
-; GFX10-NEXT: s_or_b32 s9, s6, s7
+; GFX10-NEXT: s_or_b32 s10, s6, s7
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: v_add_nc_u32_e32 v6, 1, v6
; GFX10-NEXT: global_store_dword v[4:5], v6, off
@@ -116,8 +126,11 @@ define amdgpu_cs void @loop_with_1break(ptr addrspace(1) %x, i32 %x.size, ptr ad
; GFX10-NEXT: ; in Loop: Header=BB2_3 Depth=1
; GFX10-NEXT: s_cmp_lg_u32 s5, 0
; GFX10-NEXT: s_cselect_b32 s5, exec_lo, 0
-; GFX10-NEXT: s_and_b32 s6, exec_lo, s9
+; GFX10-NEXT: s_and_b32 s6, exec_lo, s10
; GFX10-NEXT: s_or_b32 s8, s6, s8
+; GFX10-NEXT: s_andn2_b32 s6, s9, exec_lo
+; GFX10-NEXT: s_and_b32 s5, exec_lo, s5
+; GFX10-NEXT: s_or_b32 s9, s6, s5
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s8
; GFX10-NEXT: s_cbranch_execz .LBB2_5
@@ -125,16 +138,16 @@ define amdgpu_cs void @loop_with_1break(ptr addrspace(1) %x, i32 %x.size, ptr ad
; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX10-NEXT: s_ashr_i32 s5, s4, 31
; GFX10-NEXT: s_lshl_b64 s[6:7], s[4:5], 2
-; GFX10-NEXT: s_add_u32 s10, s0, s6
-; GFX10-NEXT: s_addc_u32 s11, s1, s7
-; GFX10-NEXT: global_load_dword v4, v3, s[10:11]
+; GFX10-NEXT: s_add_u32 s12, s0, s6
+; GFX10-NEXT: s_addc_u32 s13, s1, s7
+; GFX10-NEXT: global_load_dword v4, v3, s[12:13]
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: v_readfirstlane_b32 s5, v4
; GFX10-NEXT: s_cmp_lg_u32 s5, 0
; GFX10-NEXT: s_cselect_b32 s5, 1, 0
-; GFX10-NEXT: s_andn2_b32 s9, s9, exec_lo
-; GFX10-NEXT: s_and_b32 s10, exec_lo, exec_lo
-; GFX10-NEXT: s_or_b32 s9, s9, s10
+; GFX10-NEXT: s_andn2_b32 s10, s10, exec_lo
+; GFX10-NEXT: s_and_b32 s11, exec_lo, exec_lo
+; GFX10-NEXT: s_or_b32 s10, s10, s11
; GFX10-NEXT: s_cmp_lg_u32 s5, 0
; GFX10-NEXT: s_cbranch_scc1 .LBB2_1
; GFX10-NEXT: ; %bb.4: ; in Loop: Header=BB2_3 Depth=1
@@ -143,7 +156,7 @@ define amdgpu_cs void @loop_with_1break(ptr addrspace(1) %x, i32 %x.size, ptr ad
; GFX10-NEXT: s_branch .LBB2_2
; GFX10-NEXT: .LBB2_5: ; %loop.exit.guard
; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s8
-; GFX10-NEXT: s_and_saveexec_b32 s0, s5
+; GFX10-NEXT: s_and_saveexec_b32 s0, s9
; GFX10-NEXT: s_xor_b32 s0, exec_lo, s0
; GFX10-NEXT: s_cbranch_execz .LBB2_7
; GFX10-NEXT: ; %bb.6: ; %break.body
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-temporal-divergent-i1.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-temporal-divergent-i1.mir
index e9a415c3da7ee..db23c139271c0 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-temporal-divergent-i1.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-temporal-divergent-i1.mir
@@ -17,20 +17,27 @@ body: |
; GFX10-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32)
; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; GFX10-NEXT: [[C1:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
+ ; GFX10-NEXT: [[DEF:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: bb.1:
; GFX10-NEXT: successors: %bb.2(0x04000000), %bb.1(0x7c000000)
; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[PHI:%[0-9]+]]:_(s32) = G_PHI %7(s32), %bb.1, [[C]](s32), %bb.0
- ; GFX10-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.0, %9(s32), %bb.1
- ; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s1) = G_PHI [[C1]](s1), %bb.0, %11(s1), %bb.1
+ ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32(s1) = PHI [[DEF]](s1), %bb.0, %19(s1), %bb.1
+ ; GFX10-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI %7(s32), %bb.1, [[C]](s32), %bb.0
+ ; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.0, %9(s32), %bb.1
+ ; GFX10-NEXT: [[PHI3:%[0-9]+]]:_(s1) = G_PHI [[C1]](s1), %bb.0, %11(s1), %bb.1
+ ; GFX10-NEXT: [[COPY3:%[0-9]+]]:sreg_32(s1) = COPY [[PHI3]](s1)
+ ; GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_32(s1) = COPY [[PHI]](s1)
; GFX10-NEXT: [[C2:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
- ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[PHI2]], [[C2]]
- ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[PHI1]](s32)
+ ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[PHI3]], [[C2]]
+ ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[PHI2]](s32)
; GFX10-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[UITOFP]](s32), [[COPY]]
; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
- ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[PHI1]], [[C3]]
- ; GFX10-NEXT: [[INT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[FCMP]](s1), [[PHI]](s32)
+ ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[PHI2]], [[C3]]
+ ; GFX10-NEXT: [[INT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[FCMP]](s1), [[PHI1]](s32)
+ ; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY4]](s1), $exec_lo, implicit-def $scc
+ ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY3]](s1), implicit-def $scc
+ ; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_]](s1), [[S_AND_B32_]](s1), implicit-def $scc
; GFX10-NEXT: SI_LOOP [[INT]](s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec
; GFX10-NEXT: G_BR %bb.2
; GFX10-NEXT: {{ $}}
@@ -38,7 +45,7 @@ body: |
; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[INT]](s32)
; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00
; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00
- ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[PHI2]](s1), [[C5]], [[C4]]
+ ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[S_OR_B32_]](s1), [[C5]], [[C4]]
; GFX10-NEXT: G_STORE [[SELECT]](s32), [[MV]](p0) :: (store (s32))
; GFX10-NEXT: SI_RETURN
bb.0:
@@ -93,20 +100,27 @@ body: |
; GFX10-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32)
; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; GFX10-NEXT: [[C1:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
+ ; GFX10-NEXT: [[DEF:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: bb.1:
; GFX10-NEXT: successors: %bb.2(0x04000000), %bb.1(0x7c000000)
; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[PHI:%[0-9]+]]:_(s32) = G_PHI %7(s32), %bb.1, [[C]](s32), %bb.0
- ; GFX10-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.0, %9(s32), %bb.1
- ; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s1) = G_PHI [[C1]](s1), %bb.0, %11(s1), %bb.1
+ ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32(s1) = PHI [[DEF]](s1), %bb.0, %19(s1), %bb.1
+ ; GFX10-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI %7(s32), %bb.1, [[C]](s32), %bb.0
+ ; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.0, %9(s32), %bb.1
+ ; GFX10-NEXT: [[PHI3:%[0-9]+]]:_(s1) = G_PHI [[C1]](s1), %bb.0, %11(s1), %bb.1
+ ; GFX10-NEXT: [[COPY3:%[0-9]+]]:sreg_32(s1) = COPY [[PHI]](s1)
; GFX10-NEXT: [[C2:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
- ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[PHI2]], [[C2]]
- ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[PHI1]](s32)
+ ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[PHI3]], [[C2]]
+ ; GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_32(s1) = COPY [[XOR]](s1)
+ ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[PHI2]](s32)
; GFX10-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[UITOFP]](s32), [[COPY]]
; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
- ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[PHI1]], [[C3]]
- ; GFX10-NEXT: [[INT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[FCMP]](s1), [[PHI]](s32)
+ ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[PHI2]], [[C3]]
+ ; GFX10-NEXT: [[INT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[FCMP]](s1), [[PHI1]](s32)
+ ; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY3]](s1), $exec_lo, implicit-def $scc
+ ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY4]](s1), implicit-def $scc
+ ; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_]](s1), [[S_AND_B32_]](s1), implicit-def $scc
; GFX10-NEXT: SI_LOOP [[INT]](s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec
; GFX10-NEXT: G_BR %bb.2
; GFX10-NEXT: {{ $}}
@@ -114,7 +128,7 @@ body: |
; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[INT]](s32)
; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00
; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00
- ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[XOR]](s1), [[C5]], [[C4]]
+ ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[S_OR_B32_]](s1), [[C5]], [[C4]]
; GFX10-NEXT: G_STORE [[SELECT]](s32), [[MV]](p0) :: (store (s32))
; GFX10-NEXT: SI_RETURN
bb.0:
@@ -176,15 +190,18 @@ body: |
; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; GFX10-NEXT: [[DEF1:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF
+ ; GFX10-NEXT: [[DEF2:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: bb.1:
; GFX10-NEXT: successors: %bb.3(0x50000000), %bb.5(0x30000000)
; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32(s1) = PHI [[DEF1]](s1), %bb.0, %40(s1), %bb.5
- ; GFX10-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI %13(s32), %bb.5, [[C]](s32), %bb.0
- ; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.0, %15(s32), %bb.5
+ ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32(s1) = PHI [[DEF2]](s1), %bb.0, %47(s1), %bb.5
+ ; GFX10-NEXT: [[PHI1:%[0-9]+]]:sreg_32(s1) = PHI [[DEF1]](s1), %bb.0, %37(s1), %bb.5
+ ; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s32) = G_PHI %13(s32), %bb.5, [[C]](s32), %bb.0
+ ; GFX10-NEXT: [[PHI3:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.0, %15(s32), %bb.5
; GFX10-NEXT: [[COPY7:%[0-9]+]]:sreg_32(s1) = COPY [[PHI]](s1)
- ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[PHI2]](s32)
+ ; GFX10-NEXT: [[COPY8:%[0-9]+]]:sreg_32(s1) = COPY [[PHI1]](s1)
+ ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[PHI3]](s32)
; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[SEXT]], [[C1]](s32)
; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[MV1]], [[SHL]](s64)
@@ -192,11 +209,11 @@ body: |
; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[LOAD]](s32), [[C2]]
; GFX10-NEXT: [[C3:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
- ; GFX10-NEXT: [[COPY8:%[0-9]+]]:sreg_32(s1) = COPY [[C3]](s1)
+ ; GFX10-NEXT: [[COPY9:%[0-9]+]]:sreg_32(s1) = COPY [[C3]](s1)
; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY7]](s1), $exec_lo, implicit-def $scc
- ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY8]](s1), implicit-def $scc
+ ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY9]](s1), implicit-def $scc
; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_]](s1), [[S_AND_B32_]](s1), implicit-def $scc
- ; GFX10-NEXT: [[COPY9:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_]](s1)
+ ; GFX10-NEXT: [[COPY10:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_]](s1)
; GFX10-NEXT: G_BRCOND [[ICMP]](s1), %bb.3
; GFX10-NEXT: G_BR %bb.5
; GFX10-NEXT: {{ $}}
@@ -217,12 +234,12 @@ body: |
; GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[LOAD1]], [[C6]]
; GFX10-NEXT: G_STORE [[ADD]](s32), [[PTR_ADD1]](p1) :: (store (s32), addrspace 1)
- ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[PHI2]], [[C6]]
- ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[PHI2]](s32), [[COPY2]]
- ; GFX10-NEXT: [[COPY10:%[0-9]+]]:sreg_32(s1) = COPY [[ICMP1]](s1)
+ ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[PHI3]], [[C6]]
+ ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[PHI3]](s32), [[COPY2]]
+ ; GFX10-NEXT: [[COPY11:%[0-9]+]]:sreg_32(s1) = COPY [[ICMP1]](s1)
; GFX10-NEXT: [[C7:%[0-9]+]]:_(s1) = G_CONSTANT i1 false
- ; GFX10-NEXT: [[S_ANDN2_B32_1:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY9]](s1), $exec_lo, implicit-def $scc
- ; GFX10-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY10]](s1), implicit-def $scc
+ ; GFX10-NEXT: [[S_ANDN2_B32_1:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY10]](s1), $exec_lo, implicit-def $scc
+ ; GFX10-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY11]](s1), implicit-def $scc
; GFX10-NEXT: [[S_OR_B32_1:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_1]](s1), [[S_AND_B32_1]](s1), implicit-def $scc
; GFX10-NEXT: G_BR %bb.5
; GFX10-NEXT: {{ $}}
@@ -233,11 +250,15 @@ body: |
; GFX10-NEXT: bb.5:
; GFX10-NEXT: successors: %bb.6(0x04000000), %bb.1(0x7c000000)
; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[PHI3:%[0-9]+]]:sreg_32(s1) = PHI [[S_OR_B32_]](s1), %bb.1, [[S_OR_B32_1]](s1), %bb.3
- ; GFX10-NEXT: [[PHI4:%[0-9]+]]:_(s32) = G_PHI [[ADD1]](s32), %bb.3, [[DEF]](s32), %bb.1
- ; GFX10-NEXT: [[PHI5:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_PHI [[C7]](s1), %bb.3, [[C3]](s1), %bb.1
- ; GFX10-NEXT: [[COPY11:%[0-9]+]]:sreg_32(s1) = COPY [[PHI3]](s1)
- ; GFX10-NEXT: [[INT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[COPY11]](s1), [[PHI1]](s32)
+ ; GFX10-NEXT: [[PHI4:%[0-9]+]]:sreg_32(s1) = PHI [[S_OR_B32_]](s1), %bb.1, [[S_OR_B32_1]](s1), %bb.3
+ ; GFX10-NEXT: [[PHI5:%[0-9]+]]:_(s32) = G_PHI [[ADD1]](s32), %bb.3, [[DEF]](s32), %bb.1
+ ; GFX10-NEXT: [[PHI6:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_PHI [[C7]](s1), %bb.3, [[C3]](s1), %bb.1
+ ; GFX10-NEXT: [[COPY12:%[0-9]+]]:sreg_32(s1) = COPY [[PHI4]](s1)
+ ; GFX10-NEXT: [[COPY13:%[0-9]+]]:sreg_32(s1) = COPY [[PHI6]](s1)
+ ; GFX10-NEXT: [[INT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[COPY12]](s1), [[PHI2]](s32)
+ ; GFX10-NEXT: [[S_ANDN2_B32_2:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY8]](s1), $exec_lo, implicit-def $scc
+ ; GFX10-NEXT: [[S_AND_B32_2:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY13]](s1), implicit-def $scc
+ ; GFX10-NEXT: [[S_OR_B32_2:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_2]](s1), [[S_AND_B32_2]](s1), implicit-def $scc
; GFX10-NEXT: SI_LOOP [[INT]](s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec
; GFX10-NEXT: G_BR %bb.6
; GFX10-NEXT: {{ $}}
@@ -245,7 +266,7 @@ body: |
; GFX10-NEXT: successors: %bb.2(0x40000000), %bb.4(0x40000000)
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[INT]](s32)
- ; GFX10-NEXT: [[SI_IF:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[PHI5]](s1), %bb.4, implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GFX10-NEXT: [[SI_IF:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[S_OR_B32_2]](s1), %bb.4, implicit-def $exec, implicit-def $scc, implicit $exec
; GFX10-NEXT: G_BR %bb.2
bb.0:
successors: %bb.1(0x80000000)
More information about the llvm-branch-commits
mailing list