[llvm] 3ad810e - AMDGPU/GlobalISel: Disable LCSSA pass (#124297)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Mar 12 03:09:54 PDT 2025
Author: Petar Avramovic
Date: 2025-03-12T11:09:50+01:00
New Revision: 3ad810ea9a97bb3399d8590372a6d3a0eb40d236
URL: https://github.com/llvm/llvm-project/commit/3ad810ea9a97bb3399d8590372a6d3a0eb40d236
DIFF: https://github.com/llvm/llvm-project/commit/3ad810ea9a97bb3399d8590372a6d3a0eb40d236.diff
LOG: AMDGPU/GlobalISel: Disable LCSSA pass (#124297)
Disable LCSSA pass in preparation for implementing temporal divergence
lowering in amdgpu divergence lowering. Breaks all cases where sgpr or
i1 values are used outside of the cycle with divergent exit.
Regenerate regression tests for amdgpu divergence lowering with LCSSA
disabled.
Update IntrinsicLaneMaskAnalyzer to stop tracking lcssa phis that are
lane masks.
Added:
Modified:
llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.cpp
llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.h
llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-phis-no-lane-mask-merging.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-used-outside-loop.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-used-outside-loop.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-structurizer.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-structurizer.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-temporal-divergent-i1.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-temporal-divergent-reg.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mui-regbankselect.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mui.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mui.mir
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.cpp b/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.cpp
index 0b18c6b0e923a..47e32c4864809 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.cpp
@@ -91,25 +91,17 @@ void IntrinsicLaneMaskAnalyzer::initLaneMaskIntrinsics(MachineFunction &MF) {
GIntrinsic *GI = dyn_cast<GIntrinsic>(&MI);
if (GI && GI->is(Intrinsic::amdgcn_if_break)) {
S32S64LaneMask.insert(MI.getOperand(3).getReg());
- findLCSSAPhi(MI.getOperand(0).getReg());
+ S32S64LaneMask.insert(MI.getOperand(0).getReg());
}
if (MI.getOpcode() == AMDGPU::SI_IF ||
MI.getOpcode() == AMDGPU::SI_ELSE) {
- findLCSSAPhi(MI.getOperand(0).getReg());
+ S32S64LaneMask.insert(MI.getOperand(0).getReg());
}
}
}
}
-void IntrinsicLaneMaskAnalyzer::findLCSSAPhi(Register Reg) {
- S32S64LaneMask.insert(Reg);
- for (const MachineInstr &LCSSAPhi : MRI.use_instructions(Reg)) {
- if (LCSSAPhi.isPHI())
- S32S64LaneMask.insert(LCSSAPhi.getOperand(0).getReg());
- }
-}
-
static LLT getReadAnyLaneSplitTy(LLT Ty) {
if (Ty.isVector()) {
LLT ElTy = Ty.getElementType();
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.h b/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.h
index 27f8fed86d647..70cfdacec700c 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.h
@@ -47,8 +47,6 @@ class IntrinsicLaneMaskAnalyzer {
private:
void initLaneMaskIntrinsics(MachineFunction &MF);
- // This will not be needed when we turn off LCSSA for global-isel.
- void findLCSSAPhi(Register Reg);
};
void buildReadAnyLane(MachineIRBuilder &B, Register SgprDst, Register VgprSrc,
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 4815f626831c0..d5d375e473d04 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -1383,7 +1383,11 @@ bool GCNPassConfig::addPreISel() {
// control flow modifications.
addPass(createAMDGPURewriteUndefForPHILegacyPass());
- addPass(createLCSSAPass());
+ // SDAG requires LCSSA, GlobalISel does not. Disable LCSSA for -global-isel
+ // with -new-reg-bank-select and without any of the fallback options.
+ if (!getCGPassBuilderOption().EnableGlobalISelOption ||
+ !isGlobalISelAbortEnabled() || !NewRegBankSelect)
+ addPass(createLCSSAPass());
if (TM->getOptLevel() > CodeGenOptLevel::Less)
addPass(&AMDGPUPerfHintAnalysisLegacyID);
@@ -2087,7 +2091,9 @@ void AMDGPUCodeGenPassBuilder::addPreISel(AddIRPass &addPass) const {
// control flow modifications.
addPass(AMDGPURewriteUndefForPHIPass());
- addPass(LCSSAPass());
+ if (!getCGPassBuilderOption().EnableGlobalISelOption ||
+ !isGlobalISelAbortEnabled() || !NewRegBankSelect)
+ addPass(LCSSAPass());
if (TM.getOptLevel() > CodeGenOptLevel::Less)
addPass(AMDGPUPerfHintAnalysisPass(TM));
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-phis-no-lane-mask-merging.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-phis-no-lane-mask-merging.mir
index 6594d7f504212..6ce2f9b7a2c77 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-phis-no-lane-mask-merging.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-phis-no-lane-mask-merging.mir
@@ -6,7 +6,7 @@
define void @divergent_i1_phi_uniform_branch_simple() {ret void}
define void @divergent_i1_phi_used_inside_loop() {ret void}
define void @divergent_i1_phi_used_inside_loop_bigger_loop_body() {ret void}
- define void @_amdgpu_cs_main() #0 {ret void}
+ define void @single_lane_execution_attribute() #0 {ret void}
attributes #0 = {"amdgpu-flat-work-group-size"="1,1"}
...
@@ -60,10 +60,10 @@ body: |
; GFX10-NEXT: G_BR %bb.2
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: bb.4:
+ ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[COPY7]](s1)
; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
- ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
- ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[COPY7]](s1), [[C5]], [[C4]]
- ; GFX10-NEXT: G_STORE [[SELECT]](s32), [[MV]](p1) :: (store (s32), addrspace 1)
+ ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT]], [[C4]]
+ ; GFX10-NEXT: G_STORE [[ADD]](s32), [[MV]](p1) :: (store (s32), addrspace 1)
; GFX10-NEXT: S_ENDPGM 0
bb.0:
successors: %bb.1(0x30000000), %bb.2(0x50000000)
@@ -105,9 +105,9 @@ body: |
G_BR %bb.2
bb.4:
- %16:_(s32) = G_CONSTANT i32 2
- %17:_(s32) = G_CONSTANT i32 1
- %18:_(s32) = G_SELECT %13(s1), %17, %16
+ %16:_(s32) = G_SEXT %13(s1)
+ %17:_(s32) = G_CONSTANT i32 2
+ %18:_(s32) = G_ADD %16, %17
G_STORE %18(s32), %2(p1) :: (store (s32), addrspace 1)
S_ENDPGM 0
...
@@ -149,10 +149,10 @@ body: |
; GFX10-NEXT: bb.2:
; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32(s1) = PHI [[COPY4]](s1), %bb.0, [[S_OR_B32_]](s1), %bb.1
; GFX10-NEXT: [[COPY7:%[0-9]+]]:sreg_32(s1) = COPY [[PHI]](s1)
+ ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[COPY7]](s1)
; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
- ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
- ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[COPY7]](s1), [[C4]], [[C3]]
- ; GFX10-NEXT: G_STORE [[SELECT]](s32), [[MV]](p1) :: (store (s32), addrspace 1)
+ ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT]], [[C3]]
+ ; GFX10-NEXT: G_STORE [[ADD]](s32), [[MV]](p1) :: (store (s32), addrspace 1)
; GFX10-NEXT: S_ENDPGM 0
bb.0:
successors: %bb.1(0x30000000), %bb.2(0x50000000)
@@ -178,9 +178,9 @@ body: |
bb.2:
%11:_(s1) = G_PHI %6(s1), %bb.0, %10(s1), %bb.1
- %12:_(s32) = G_CONSTANT i32 2
- %13:_(s32) = G_CONSTANT i32 1
- %14:_(s32) = G_SELECT %11(s1), %13, %12
+ %12:_(s32) = G_SEXT %11(s1)
+ %13:_(s32) = G_CONSTANT i32 2
+ %14:_(s32) = G_ADD %12, %13
G_STORE %14(s32), %2(p1) :: (store (s32), addrspace 1)
S_ENDPGM 0
...
@@ -199,39 +199,30 @@ body: |
; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
; GFX10-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32)
- ; GFX10-NEXT: [[C:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
- ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; GFX10-NEXT: [[DEF:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF
+ ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: bb.1:
; GFX10-NEXT: successors: %bb.2(0x04000000), %bb.1(0x7c000000)
; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32(s1) = PHI [[DEF]](s1), %bb.0, %22(s1), %bb.1
- ; GFX10-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI %7(s32), %bb.1, [[C1]](s32), %bb.0
- ; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s32) = G_PHI [[C1]](s32), %bb.0, %9(s32), %bb.1
- ; GFX10-NEXT: [[PHI3:%[0-9]+]]:_(s1) = G_PHI [[C]](s1), %bb.0, %11(s1), %bb.1
- ; GFX10-NEXT: [[COPY3:%[0-9]+]]:sreg_32(s1) = COPY [[PHI]](s1)
+ ; GFX10-NEXT: [[PHI:%[0-9]+]]:_(s32) = G_PHI %7(s32), %bb.1, [[C]](s32), %bb.0
+ ; GFX10-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.0, %9(s32), %bb.1
+ ; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s1) = G_PHI [[C1]](s1), %bb.0, %11(s1), %bb.1
; GFX10-NEXT: [[C2:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
- ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[PHI3]], [[C2]]
- ; GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_32(s1) = COPY [[XOR]](s1)
- ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[PHI2]](s32)
+ ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[PHI2]], [[C2]]
+ ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[PHI1]](s32)
; GFX10-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[UITOFP]](s32), [[COPY]]
; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
- ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[PHI2]], [[C3]]
- ; GFX10-NEXT: [[INT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[FCMP]](s1), [[PHI1]](s32)
- ; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY3]](s1), $exec_lo, implicit-def $scc
- ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY4]](s1), implicit-def $scc
- ; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_]](s1), [[S_AND_B32_]](s1), implicit-def $scc
+ ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[PHI1]], [[C3]]
+ ; GFX10-NEXT: [[INT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[FCMP]](s1), [[PHI]](s32)
; GFX10-NEXT: SI_LOOP [[INT]](s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec
; GFX10-NEXT: G_BR %bb.2
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: bb.2:
- ; GFX10-NEXT: [[PHI4:%[0-9]+]]:_(s32) = G_PHI [[INT]](s32), %bb.1
- ; GFX10-NEXT: [[COPY5:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_]](s1)
- ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI4]](s32)
+ ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[INT]](s32)
; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00
; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00
- ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[COPY5]](s1), [[C5]], [[C4]]
+ ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[XOR]](s1), [[C5]], [[C4]]
; GFX10-NEXT: G_STORE [[SELECT]](s32), [[MV]](p0) :: (store (s32))
; GFX10-NEXT: SI_RETURN
bb.0:
@@ -242,15 +233,15 @@ body: |
%1:_(s32) = COPY $vgpr1
%2:_(s32) = COPY $vgpr2
%3:_(p0) = G_MERGE_VALUES %1(s32), %2(s32)
- %4:_(s1) = G_CONSTANT i1 true
- %5:_(s32) = G_CONSTANT i32 0
+ %4:_(s32) = G_CONSTANT i32 0
+ %5:_(s1) = G_CONSTANT i1 true
bb.1:
successors: %bb.2(0x04000000), %bb.1(0x7c000000)
- %6:_(s32) = G_PHI %7(s32), %bb.1, %5(s32), %bb.0
- %8:_(s32) = G_PHI %5(s32), %bb.0, %9(s32), %bb.1
- %10:_(s1) = G_PHI %4(s1), %bb.0, %11(s1), %bb.1
+ %6:_(s32) = G_PHI %7(s32), %bb.1, %4(s32), %bb.0
+ %8:_(s32) = G_PHI %4(s32), %bb.0, %9(s32), %bb.1
+ %10:_(s1) = G_PHI %5(s1), %bb.0, %11(s1), %bb.1
%12:_(s1) = G_CONSTANT i1 true
%11:_(s1) = G_XOR %10, %12
%13:_(s32) = G_UITOFP %8(s32)
@@ -262,13 +253,11 @@ body: |
G_BR %bb.2
bb.2:
- %16:_(s1) = G_PHI %11(s1), %bb.1
- %17:_(s32) = G_PHI %7(s32), %bb.1
- G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %17(s32)
- %18:_(s32) = G_FCONSTANT float 0.000000e+00
- %19:_(s32) = G_FCONSTANT float 1.000000e+00
- %20:_(s32) = G_SELECT %16(s1), %19, %18
- G_STORE %20(s32), %3(p0) :: (store (s32))
+ G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %7(s32)
+ %16:_(s32) = G_FCONSTANT float 0.000000e+00
+ %17:_(s32) = G_FCONSTANT float 1.000000e+00
+ %18:_(s32) = G_SELECT %11(s1), %17, %16
+ G_STORE %18(s32), %3(p0) :: (store (s32))
SI_RETURN
...
@@ -293,33 +282,30 @@ body: |
; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
; GFX10-NEXT: [[MV2:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32)
- ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00
- ; GFX10-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[COPY1]](s32), [[C1]]
+ ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00
+ ; GFX10-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[COPY1]](s32), [[C]]
+ ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; GFX10-NEXT: [[COPY8:%[0-9]+]]:sreg_32(s1) = COPY [[FCMP]](s1)
- ; GFX10-NEXT: [[DEF:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: bb.1:
; GFX10-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000)
; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32(s1) = PHI [[DEF]](s1), %bb.0, %42(s1), %bb.5
- ; GFX10-NEXT: [[PHI1:%[0-9]+]]:sreg_32(s1) = PHI [[COPY8]](s1), %bb.0, %39(s1), %bb.5
- ; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s32) = G_PHI %15(s32), %bb.5, [[C]](s32), %bb.0
- ; GFX10-NEXT: [[PHI3:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.0, %17(s32), %bb.5
+ ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32(s1) = PHI [[COPY8]](s1), %bb.0, %37(s1), %bb.5
+ ; GFX10-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI %15(s32), %bb.5, [[C1]](s32), %bb.0
+ ; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s32) = G_PHI [[C1]](s32), %bb.0, %17(s32), %bb.5
; GFX10-NEXT: [[COPY9:%[0-9]+]]:sreg_32(s1) = COPY [[PHI]](s1)
- ; GFX10-NEXT: [[COPY10:%[0-9]+]]:sreg_32(s1) = COPY [[PHI1]](s1)
- ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
- ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1000
- ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(sle), [[PHI3]](s32), [[C3]]
+ ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1000
+ ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(sle), [[PHI2]](s32), [[C2]]
+ ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
; GFX10-NEXT: G_BRCOND [[ICMP]](s1), %bb.4
; GFX10-NEXT: G_BR %bb.2
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: bb.2:
; GFX10-NEXT: successors: %bb.3(0x40000000), %bb.5(0x40000000)
; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[PHI4:%[0-9]+]]:_(s1) = G_PHI %24(s1), %bb.4, [[C2]](s1), %bb.1
+ ; GFX10-NEXT: [[PHI3:%[0-9]+]]:_(s1) = G_PHI %24(s1), %bb.4, [[C3]](s1), %bb.1
; GFX10-NEXT: [[C4:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
- ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[PHI4]], [[C4]]
+ ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[PHI3]], [[C4]]
; GFX10-NEXT: G_BRCOND [[XOR]](s1), %bb.5
; GFX10-NEXT: G_BR %bb.3
; GFX10-NEXT: {{ $}}
@@ -333,36 +319,30 @@ body: |
; GFX10-NEXT: bb.4:
; GFX10-NEXT: successors: %bb.2(0x80000000)
; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s1) = G_CONSTANT i1 false
- ; GFX10-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 1000
- ; GFX10-NEXT: G_STORE [[C7]](s32), [[MV2]](p0) :: (store (s32))
+ ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 1000
+ ; GFX10-NEXT: G_STORE [[C6]](s32), [[MV2]](p0) :: (store (s32))
+ ; GFX10-NEXT: [[C7:%[0-9]+]]:_(s1) = G_CONSTANT i1 false
; GFX10-NEXT: G_BR %bb.2
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: bb.5:
; GFX10-NEXT: successors: %bb.6(0x04000000), %bb.1(0x7c000000)
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: [[C8:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
- ; GFX10-NEXT: [[XOR1:%[0-9]+]]:_(s1) = G_XOR [[COPY10]], [[C8]]
- ; GFX10-NEXT: [[COPY11:%[0-9]+]]:sreg_32(s1) = COPY [[XOR1]](s1)
- ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[PHI3]](s32)
+ ; GFX10-NEXT: [[XOR1:%[0-9]+]]:_(s1) = G_XOR [[COPY9]], [[C8]]
+ ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[PHI2]](s32)
; GFX10-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[UITOFP]](s32), [[COPY]]
; GFX10-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
- ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[PHI3]], [[C9]]
- ; GFX10-NEXT: [[INT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[FCMP1]](s1), [[PHI2]](s32)
- ; GFX10-NEXT: [[COPY12:%[0-9]+]]:sreg_32(s1) = COPY [[XOR1]](s1)
- ; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY9]](s1), $exec_lo, implicit-def $scc
- ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY11]](s1), implicit-def $scc
- ; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_]](s1), [[S_AND_B32_]](s1), implicit-def $scc
+ ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[PHI2]], [[C9]]
+ ; GFX10-NEXT: [[INT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[FCMP1]](s1), [[PHI1]](s32)
+ ; GFX10-NEXT: [[COPY10:%[0-9]+]]:sreg_32(s1) = COPY [[XOR1]](s1)
; GFX10-NEXT: SI_LOOP [[INT]](s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec
; GFX10-NEXT: G_BR %bb.6
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: bb.6:
- ; GFX10-NEXT: [[PHI5:%[0-9]+]]:_(s32) = G_PHI [[INT]](s32), %bb.5
- ; GFX10-NEXT: [[COPY13:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_]](s1)
- ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI5]](s32)
+ ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[INT]](s32)
; GFX10-NEXT: [[C10:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00
; GFX10-NEXT: [[C11:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00
- ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[COPY13]](s1), [[C11]], [[C10]]
+ ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[XOR1]](s1), [[C11]], [[C10]]
; GFX10-NEXT: G_STORE [[SELECT]](s32), [[MV]](p0) :: (store (s32))
; GFX10-NEXT: SI_RETURN
bb.0:
@@ -380,26 +360,26 @@ body: |
%8:_(s32) = COPY $vgpr6
%9:_(s32) = COPY $vgpr7
%10:_(p0) = G_MERGE_VALUES %8(s32), %9(s32)
- %11:_(s32) = G_CONSTANT i32 0
- %12:_(s32) = G_FCONSTANT float 1.000000e+00
- %13:_(s1) = G_FCMP floatpred(ogt), %1(s32), %12
+ %11:_(s32) = G_FCONSTANT float 1.000000e+00
+ %12:_(s1) = G_FCMP floatpred(ogt), %1(s32), %11
+ %13:_(s32) = G_CONSTANT i32 0
bb.1:
successors: %bb.4(0x40000000), %bb.2(0x40000000)
- %14:_(s32) = G_PHI %15(s32), %bb.5, %11(s32), %bb.0
- %16:_(s32) = G_PHI %11(s32), %bb.0, %17(s32), %bb.5
- %18:_(s1) = G_PHI %13(s1), %bb.0, %19(s1), %bb.5
- %20:_(s1) = G_CONSTANT i1 true
- %21:_(s32) = G_CONSTANT i32 1000
- %22:_(s1) = G_ICMP intpred(sle), %16(s32), %21
- G_BRCOND %22(s1), %bb.4
+ %14:_(s32) = G_PHI %15(s32), %bb.5, %13(s32), %bb.0
+ %16:_(s32) = G_PHI %13(s32), %bb.0, %17(s32), %bb.5
+ %18:_(s1) = G_PHI %12(s1), %bb.0, %19(s1), %bb.5
+ %20:_(s32) = G_CONSTANT i32 1000
+ %21:_(s1) = G_ICMP intpred(sle), %16(s32), %20
+ %22:_(s1) = G_CONSTANT i1 true
+ G_BRCOND %21(s1), %bb.4
G_BR %bb.2
bb.2:
successors: %bb.3(0x40000000), %bb.5(0x40000000)
- %23:_(s1) = G_PHI %24(s1), %bb.4, %20(s1), %bb.1
+ %23:_(s1) = G_PHI %24(s1), %bb.4, %22(s1), %bb.1
%25:_(s1) = G_CONSTANT i1 true
%26:_(s1) = G_XOR %23, %25
G_BRCOND %26(s1), %bb.5
@@ -415,9 +395,9 @@ body: |
bb.4:
successors: %bb.2(0x80000000)
- %24:_(s1) = G_CONSTANT i1 false
%28:_(s32) = G_CONSTANT i32 1000
G_STORE %28(s32), %10(p0) :: (store (s32))
+ %24:_(s1) = G_CONSTANT i1 false
G_BR %bb.2
bb.5:
@@ -434,22 +414,20 @@ body: |
G_BR %bb.6
bb.6:
- %33:_(s1) = G_PHI %19(s1), %bb.5
- %34:_(s32) = G_PHI %15(s32), %bb.5
- G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %34(s32)
- %35:_(s32) = G_FCONSTANT float 0.000000e+00
- %36:_(s32) = G_FCONSTANT float 1.000000e+00
- %37:_(s32) = G_SELECT %33(s1), %36, %35
- G_STORE %37(s32), %4(p0) :: (store (s32))
+ G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %15(s32)
+ %33:_(s32) = G_FCONSTANT float 0.000000e+00
+ %34:_(s32) = G_FCONSTANT float 1.000000e+00
+ %35:_(s32) = G_SELECT %19(s1), %34, %33
+ G_STORE %35(s32), %4(p0) :: (store (s32))
SI_RETURN
...
---
-name: _amdgpu_cs_main
+name: single_lane_execution_attribute
legalized: true
tracksRegLiveness: true
body: |
- ; GFX10-LABEL: name: _amdgpu_cs_main
+ ; GFX10-LABEL: name: single_lane_execution_attribute
; GFX10: bb.0:
; GFX10-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
; GFX10-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2
@@ -464,24 +442,22 @@ body: |
; GFX10-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[COPY]](s32)
; GFX10-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[AND]], [[ZEXT]]
; GFX10-NEXT: [[INTTOPTR:%[0-9]+]]:_(p4) = G_INTTOPTR [[OR]](s64)
- ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[INTTOPTR]](p4) :: (load (<8 x s32>))
- ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s256) = G_BITCAST [[LOAD]](<8 x s32>)
- ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s128) = G_TRUNC [[BITCAST]](s256)
- ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[TRUNC]](s128)
+ ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[INTTOPTR]](p4) :: (invariant load (<8 x s32>), align 16, addrspace 4)
; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
- ; GFX10-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.mbcnt.lo), [[C2]](s32), [[C1]](s32)
- ; GFX10-NEXT: [[INT2:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.mbcnt.hi), [[C2]](s32), [[INT1]](s32)
- ; GFX10-NEXT: [[FREEZE:%[0-9]+]]:_(s32) = G_FREEZE [[INT2]]
+ ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+ ; GFX10-NEXT: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[LOAD]](<8 x s32>)
+ ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
+ ; GFX10-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.mbcnt.lo), [[C4]](s32), [[C1]](s32)
+ ; GFX10-NEXT: [[INT2:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.mbcnt.hi), [[C4]](s32), [[INT1]](s32)
+ ; GFX10-NEXT: [[FREEZE:%[0-9]+]]:_(s32) = G_FREEZE [[INT2]]
; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[FREEZE]], [[C3]](s32)
- ; GFX10-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_BUFFER_LOAD [[BITCAST1]](<4 x s32>), [[C1]](s32), [[SHL]], [[C1]], 0, 0, 0 :: (load (s32), align 1, addrspace 8)
+ ; GFX10-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_BUFFER_LOAD [[UV]](<4 x s32>), [[C1]](s32), [[SHL]], [[C1]], 0, 0, 0 :: (dereferenceable load (s32), align 1, addrspace 8)
; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[AMDGPU_BUFFER_LOAD]](s32), [[C1]]
- ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
- ; GFX10-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[FREEZE]], [[C4]]
- ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s1) = G_TRUNC [[AND1]](s32)
+ ; GFX10-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[FREEZE]], [[C2]]
+ ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[AND1]](s32)
; GFX10-NEXT: [[C5:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
- ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[TRUNC1]], [[C5]]
+ ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[TRUNC]], [[C5]]
; GFX10-NEXT: [[COPY3:%[0-9]+]]:sreg_32(s1) = COPY [[C5]](s1)
; GFX10-NEXT: G_BRCOND [[XOR]](s1), %bb.2
; GFX10-NEXT: G_BR %bb.1
@@ -495,8 +471,8 @@ body: |
; GFX10-NEXT: bb.2:
; GFX10-NEXT: successors: %bb.5(0x40000000), %bb.6(0x40000000)
; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32(s1) = PHI [[COPY3]](s1), %bb.0, %58(s1), %bb.4
- ; GFX10-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI %30(s32), %bb.4, [[DEF]](s32), %bb.0
+ ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32(s1) = PHI [[COPY3]](s1), %bb.0, %56(s1), %bb.4
+ ; GFX10-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI %29(s32), %bb.4, [[DEF]](s32), %bb.0
; GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_32(s1) = COPY [[PHI]](s1)
; GFX10-NEXT: G_BRCOND [[COPY4]](s1), %bb.5
; GFX10-NEXT: G_BR %bb.6
@@ -504,11 +480,11 @@ body: |
; GFX10-NEXT: bb.3:
; GFX10-NEXT: successors: %bb.4(0x04000000), %bb.3(0x7c000000)
; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s32) = G_PHI %34(s32), %bb.3, [[C6]](s32), %bb.1
- ; GFX10-NEXT: [[PHI3:%[0-9]+]]:_(s32) = G_PHI %36(s32), %bb.3, [[FREEZE]](s32), %bb.1
- ; GFX10-NEXT: [[PHI4:%[0-9]+]]:_(s32) = G_PHI %38(s32), %bb.3, [[C6]](s32), %bb.1
+ ; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s32) = G_PHI %33(s32), %bb.3, [[C6]](s32), %bb.1
+ ; GFX10-NEXT: [[PHI3:%[0-9]+]]:_(s32) = G_PHI %35(s32), %bb.3, [[FREEZE]](s32), %bb.1
+ ; GFX10-NEXT: [[PHI4:%[0-9]+]]:_(s32) = G_PHI %37(s32), %bb.3, [[C6]](s32), %bb.1
; GFX10-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; GFX10-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:_(s32) = G_AMDGPU_BUFFER_LOAD [[BITCAST1]](<4 x s32>), [[C7]](s32), [[PHI2]], [[C7]], 0, 0, 0 :: (load (s32), align 1, addrspace 8)
+ ; GFX10-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:_(s32) = G_AMDGPU_BUFFER_LOAD [[UV]](<4 x s32>), [[C7]](s32), [[PHI2]], [[C7]], 0, 0, 0 :: (dereferenceable load (s32), align 1, addrspace 8)
; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[AMDGPU_BUFFER_LOAD1]], [[PHI4]]
; GFX10-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[PHI3]], [[C8]]
@@ -521,11 +497,10 @@ body: |
; GFX10-NEXT: bb.4:
; GFX10-NEXT: successors: %bb.2(0x80000000)
; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[PHI5:%[0-9]+]]:_(s32) = G_PHI [[ADD]](s32), %bb.3
- ; GFX10-NEXT: [[C10:%[0-9]+]]:_(s1) = G_CONSTANT i1 false
- ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[PHI5]](s32), [[AMDGPU_BUFFER_LOAD]]
+ ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[AMDGPU_BUFFER_LOAD]]
; GFX10-NEXT: [[OR1:%[0-9]+]]:_(s1) = G_OR [[ICMP]], [[ICMP2]]
; GFX10-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s1)
+ ; GFX10-NEXT: [[C10:%[0-9]+]]:_(s1) = G_CONSTANT i1 false
; GFX10-NEXT: [[COPY5:%[0-9]+]]:sreg_32(s1) = COPY [[C10]](s1)
; GFX10-NEXT: G_BR %bb.2
; GFX10-NEXT: {{ $}}
@@ -537,13 +512,13 @@ body: |
; GFX10-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[C11]]
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: bb.6:
- ; GFX10-NEXT: [[PHI6:%[0-9]+]]:_(s32) = G_PHI [[PHI1]](s32), %bb.2, [[OR2]](s32), %bb.5
- ; GFX10-NEXT: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[LOAD]](<8 x s32>)
+ ; GFX10-NEXT: [[PHI5:%[0-9]+]]:_(s32) = G_PHI [[PHI1]](s32), %bb.2, [[OR2]](s32), %bb.5
+ ; GFX10-NEXT: [[UV2:%[0-9]+]]:_(<4 x s32>), [[UV3:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[LOAD]](<8 x s32>)
; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[COPY2]], [[COPY1]]
; GFX10-NEXT: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ADD3]], [[C12]](s32)
; GFX10-NEXT: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; GFX10-NEXT: G_AMDGPU_BUFFER_STORE [[PHI6]](s32), [[UV1]](<4 x s32>), [[C13]](s32), [[SHL1]], [[C13]], 0, 0, 0 :: (store (s32), align 1, addrspace 8)
+ ; GFX10-NEXT: G_AMDGPU_BUFFER_STORE [[PHI5]](s32), [[UV3]](<4 x s32>), [[C13]](s32), [[SHL1]], [[C13]], 0, 0, 0 :: (dereferenceable store (s32), align 1, addrspace 8)
; GFX10-NEXT: S_ENDPGM 0
bb.0:
successors: %bb.1(0x40000000), %bb.2(0x40000000)
@@ -559,82 +534,79 @@ body: |
%7:_(s64) = G_ZEXT %0(s32)
%8:_(s64) = G_OR %6, %7
%9:_(p4) = G_INTTOPTR %8(s64)
- %10:_(<8 x s32>) = G_LOAD %9(p4) :: (load (<8 x s32>))
- %11:_(s256) = G_BITCAST %10(<8 x s32>)
- %12:_(s128) = G_TRUNC %11(s256)
- %13:_(<4 x s32>) = G_BITCAST %12(s128)
- %15:_(s32) = G_CONSTANT i32 0
- %14:_(s32) = G_CONSTANT i32 -1
- %16:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.mbcnt.lo), %14(s32), %15(s32)
- %17:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.mbcnt.hi), %14(s32), %16(s32)
- %18:_(s32) = G_FREEZE %17
- %19:_(s32) = G_CONSTANT i32 2
- %20:_(s32) = G_SHL %18, %19(s32)
- %21:_(s32) = G_AMDGPU_BUFFER_LOAD %13(<4 x s32>), %15(s32), %20, %15, 0, 0, 0 :: (load (s32), align 1, addrspace 8)
- %22:_(s1) = G_ICMP intpred(eq), %21(s32), %15
- %23:_(s32) = G_CONSTANT i32 1
- %24:_(s32) = G_AND %18, %23
- %25:_(s1) = G_TRUNC %24(s32)
- %26:_(s1) = G_CONSTANT i1 true
- %27:_(s1) = G_XOR %25, %26
- G_BRCOND %27(s1), %bb.2
+ %10:_(<8 x s32>) = G_LOAD %9(p4) :: (invariant load (<8 x s32>), align 16, addrspace 4)
+ %11:_(s32) = G_CONSTANT i32 0
+ %12:_(s32) = G_CONSTANT i32 1
+ %13:_(s32) = G_CONSTANT i32 2
+ %14:_(<4 x s32>), %15:_(<4 x s32>) = G_UNMERGE_VALUES %10(<8 x s32>)
+ %16:_(s32) = G_CONSTANT i32 -1
+ %17:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.mbcnt.lo), %16(s32), %11(s32)
+ %18:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.mbcnt.hi), %16(s32), %17(s32)
+ %19:_(s32) = G_FREEZE %18
+ %20:_(s32) = G_SHL %19, %13(s32)
+ %21:_(s32) = G_AMDGPU_BUFFER_LOAD %14(<4 x s32>), %11(s32), %20, %11, 0, 0, 0 :: (dereferenceable load (s32), align 1, addrspace 8)
+ %22:_(s1) = G_ICMP intpred(eq), %21(s32), %11
+ %23:_(s32) = G_AND %19, %12
+ %24:_(s1) = G_TRUNC %23(s32)
+ %25:_(s1) = G_CONSTANT i1 true
+ %26:_(s1) = G_XOR %24, %25
+ G_BRCOND %26(s1), %bb.2
G_BR %bb.1
bb.1:
successors: %bb.3(0x80000000)
- %28:_(s32) = G_CONSTANT i32 0
+ %27:_(s32) = G_CONSTANT i32 0
G_BR %bb.3
bb.2:
successors: %bb.5(0x40000000), %bb.6(0x40000000)
- %29:_(s32) = G_PHI %30(s32), %bb.4, %3(s32), %bb.0
- %31:_(s1) = G_PHI %32(s1), %bb.4, %26(s1), %bb.0
- G_BRCOND %31(s1), %bb.5
+ %28:_(s32) = G_PHI %29(s32), %bb.4, %3(s32), %bb.0
+ %30:_(s1) = G_PHI %31(s1), %bb.4, %25(s1), %bb.0
+ G_BRCOND %30(s1), %bb.5
G_BR %bb.6
bb.3:
successors: %bb.4(0x04000000), %bb.3(0x7c000000)
- %33:_(s32) = G_PHI %34(s32), %bb.3, %28(s32), %bb.1
- %35:_(s32) = G_PHI %36(s32), %bb.3, %18(s32), %bb.1
- %37:_(s32) = G_PHI %38(s32), %bb.3, %28(s32), %bb.1
- %39:_(s32) = G_CONSTANT i32 0
- %40:_(s32) = G_AMDGPU_BUFFER_LOAD %13(<4 x s32>), %39(s32), %33, %39, 0, 0, 0 :: (load (s32), align 1, addrspace 8)
- %38:_(s32) = G_ADD %40, %37
- %41:_(s32) = G_CONSTANT i32 -1
- %36:_(s32) = G_ADD %35, %41
- %42:_(s32) = G_CONSTANT i32 4
- %34:_(s32) = G_ADD %33, %42
- %43:_(s1) = G_ICMP intpred(ne), %36(s32), %39
- G_BRCOND %43(s1), %bb.3
+ %32:_(s32) = G_PHI %33(s32), %bb.3, %27(s32), %bb.1
+ %34:_(s32) = G_PHI %35(s32), %bb.3, %19(s32), %bb.1
+ %36:_(s32) = G_PHI %37(s32), %bb.3, %27(s32), %bb.1
+ %38:_(s32) = G_CONSTANT i32 0
+ %39:_(s32) = G_AMDGPU_BUFFER_LOAD %14(<4 x s32>), %38(s32), %32, %38, 0, 0, 0 :: (dereferenceable load (s32), align 1, addrspace 8)
+ %37:_(s32) = G_ADD %39, %36
+ %40:_(s32) = G_CONSTANT i32 -1
+ %35:_(s32) = G_ADD %34, %40
+ %41:_(s32) = G_CONSTANT i32 4
+ %33:_(s32) = G_ADD %32, %41
+ %42:_(s1) = G_ICMP intpred(ne), %35(s32), %38
+ G_BRCOND %42(s1), %bb.3
G_BR %bb.4
bb.4:
successors: %bb.2(0x80000000)
- %44:_(s32) = G_PHI %38(s32), %bb.3
- %32:_(s1) = G_CONSTANT i1 false
- %45:_(s1) = G_ICMP intpred(eq), %44(s32), %21
- %46:_(s1) = G_OR %22, %45
- %30:_(s32) = G_ZEXT %46(s1)
+ %43:_(s1) = G_ICMP intpred(eq), %37(s32), %21
+ %44:_(s1) = G_OR %22, %43
+ %29:_(s32) = G_ZEXT %44(s1)
+ %31:_(s1) = G_CONSTANT i1 false
G_BR %bb.2
bb.5:
successors: %bb.6(0x80000000)
- %47:_(s32) = G_ZEXT %22(s1)
- %48:_(s32) = G_CONSTANT i32 2
- %49:_(s32) = G_OR %47, %48
+ %45:_(s32) = G_ZEXT %22(s1)
+ %46:_(s32) = G_CONSTANT i32 2
+ %47:_(s32) = G_OR %45, %46
bb.6:
- %50:_(s32) = G_PHI %29(s32), %bb.2, %49(s32), %bb.5
- %51:_(<4 x s32>), %52:_(<4 x s32>) = G_UNMERGE_VALUES %10(<8 x s32>)
- %53:_(s32) = G_ADD %2, %1
- %54:_(s32) = G_CONSTANT i32 2
- %55:_(s32) = G_SHL %53, %54(s32)
- %56:_(s32) = G_CONSTANT i32 0
- G_AMDGPU_BUFFER_STORE %50(s32), %52(<4 x s32>), %56(s32), %55, %56, 0, 0, 0 :: (store (s32), align 1, addrspace 8)
+ %48:_(s32) = G_PHI %28(s32), %bb.2, %47(s32), %bb.5
+ %49:_(<4 x s32>), %50:_(<4 x s32>) = G_UNMERGE_VALUES %10(<8 x s32>)
+ %51:_(s32) = G_ADD %2, %1
+ %52:_(s32) = G_CONSTANT i32 2
+ %53:_(s32) = G_SHL %51, %52(s32)
+ %54:_(s32) = G_CONSTANT i32 0
+ G_AMDGPU_BUFFER_STORE %48(s32), %50(<4 x s32>), %54(s32), %53, %54, 0, 0, 0 :: (dereferenceable store (s32), align 1, addrspace 8)
S_ENDPGM 0
...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-used-outside-loop.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-used-outside-loop.ll
index a66c21feb1cbc..687452f58bd63 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-used-outside-loop.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-used-outside-loop.ll
@@ -178,6 +178,55 @@ exit:
ret void
}
+define void @divergent_i1_xor_used_outside_loop_twice(float %val, float %pre.cond.val, ptr %addr, ptr %addr2) {
+; GFX10-LABEL: divergent_i1_xor_used_outside_loop_twice:
+; GFX10: ; %bb.0: ; %entry
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: s_mov_b32 s4, 0
+; GFX10-NEXT: v_cmp_lt_f32_e64 s5, 1.0, v1
+; GFX10-NEXT: v_mov_b32_e32 v1, s4
+; GFX10-NEXT: ; implicit-def: $sgpr6
+; GFX10-NEXT: .LBB3_1: ; %loop
+; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX10-NEXT: v_cvt_f32_u32_e32 v6, v1
+; GFX10-NEXT: s_xor_b32 s5, s5, -1
+; GFX10-NEXT: v_add_nc_u32_e32 v1, 1, v1
+; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, v6, v0
+; GFX10-NEXT: s_or_b32 s4, vcc_lo, s4
+; GFX10-NEXT: s_andn2_b32 s6, s6, exec_lo
+; GFX10-NEXT: s_and_b32 s7, exec_lo, s5
+; GFX10-NEXT: s_or_b32 s6, s6, s7
+; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s4
+; GFX10-NEXT: s_cbranch_execnz .LBB3_1
+; GFX10-NEXT: ; %bb.2: ; %exit
+; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s4
+; GFX10-NEXT: v_cndmask_b32_e64 v0, 0, 1.0, s6
+; GFX10-NEXT: v_cndmask_b32_e64 v1, -1.0, 2.0, s6
+; GFX10-NEXT: flat_store_dword v[2:3], v0
+; GFX10-NEXT: flat_store_dword v[4:5], v1
+; GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+entry:
+ %pre.cond = fcmp ogt float %pre.cond.val, 1.0
+ br label %loop
+
+loop:
+ %counter = phi i32 [ 0, %entry ], [ %counter.plus.1, %loop ]
+ %bool.counter = phi i1 [ %pre.cond, %entry ], [ %neg.bool.counter, %loop ]
+ %neg.bool.counter = xor i1 %bool.counter, true
+ %f.counter = uitofp i32 %counter to float
+ %cond = fcmp ogt float %f.counter, %val
+ %counter.plus.1 = add i32 %counter, 1
+ br i1 %cond, label %exit, label %loop
+
+exit:
+ %select = select i1 %neg.bool.counter, float 1.000000e+00, float 0.000000e+00
+ store float %select, ptr %addr
+ %select2 = select i1 %neg.bool.counter, float 2.000000e+00, float -1.000000e+00
+ store float %select2, ptr %addr2
+ ret void
+}
+
;void xor(int num_elts, int* a, int* addr) {
;for(int i=0; i<num_elts; ++i) {
; if(a[i]==0)
@@ -195,15 +244,15 @@ define void @divergent_i1_xor_used_outside_loop_larger_loop_body(i32 %num.elts,
; GFX10-NEXT: s_mov_b32 s5, 0
; GFX10-NEXT: s_mov_b32 s6, -1
; GFX10-NEXT: s_and_saveexec_b32 s4, vcc_lo
-; GFX10-NEXT: s_cbranch_execz .LBB3_6
+; GFX10-NEXT: s_cbranch_execz .LBB4_6
; GFX10-NEXT: ; %bb.1: ; %loop.start.preheader
; GFX10-NEXT: v_mov_b32_e32 v5, s5
; GFX10-NEXT: ; implicit-def: $sgpr6
; GFX10-NEXT: ; implicit-def: $sgpr7
; GFX10-NEXT: ; implicit-def: $sgpr8
-; GFX10-NEXT: s_branch .LBB3_3
-; GFX10-NEXT: .LBB3_2: ; %Flow
-; GFX10-NEXT: ; in Loop: Header=BB3_3 Depth=1
+; GFX10-NEXT: s_branch .LBB4_3
+; GFX10-NEXT: .LBB4_2: ; %Flow
+; GFX10-NEXT: ; in Loop: Header=BB4_3 Depth=1
; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s9
; GFX10-NEXT: s_xor_b32 s9, s8, -1
; GFX10-NEXT: s_and_b32 s10, exec_lo, s7
@@ -212,8 +261,8 @@ define void @divergent_i1_xor_used_outside_loop_larger_loop_body(i32 %num.elts,
; GFX10-NEXT: s_and_b32 s9, exec_lo, s9
; GFX10-NEXT: s_or_b32 s6, s6, s9
; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s5
-; GFX10-NEXT: s_cbranch_execz .LBB3_5
-; GFX10-NEXT: .LBB3_3: ; %loop.start
+; GFX10-NEXT: s_cbranch_execz .LBB4_5
+; GFX10-NEXT: .LBB4_3: ; %loop.start
; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX10-NEXT: v_ashrrev_i32_e32 v6, 31, v5
; GFX10-NEXT: s_andn2_b32 s8, s8, exec_lo
@@ -228,9 +277,9 @@ define void @divergent_i1_xor_used_outside_loop_larger_loop_body(i32 %num.elts,
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v6
; GFX10-NEXT: s_and_saveexec_b32 s9, vcc_lo
-; GFX10-NEXT: s_cbranch_execz .LBB3_2
+; GFX10-NEXT: s_cbranch_execz .LBB4_2
; GFX10-NEXT: ; %bb.4: ; %loop.cond
-; GFX10-NEXT: ; in Loop: Header=BB3_3 Depth=1
+; GFX10-NEXT: ; in Loop: Header=BB4_3 Depth=1
; GFX10-NEXT: v_add_nc_u32_e32 v6, 1, v5
; GFX10-NEXT: v_cmp_lt_i32_e32 vcc_lo, v5, v0
; GFX10-NEXT: s_andn2_b32 s8, s8, exec_lo
@@ -240,20 +289,20 @@ define void @divergent_i1_xor_used_outside_loop_larger_loop_body(i32 %num.elts,
; GFX10-NEXT: s_and_b32 s11, exec_lo, vcc_lo
; GFX10-NEXT: s_or_b32 s8, s8, s10
; GFX10-NEXT: s_or_b32 s7, s7, s11
-; GFX10-NEXT: s_branch .LBB3_2
-; GFX10-NEXT: .LBB3_5: ; %loop.exit.guard
+; GFX10-NEXT: s_branch .LBB4_2
+; GFX10-NEXT: .LBB4_5: ; %loop.exit.guard
; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s5
; GFX10-NEXT: s_andn2_b32 s5, -1, exec_lo
; GFX10-NEXT: s_and_b32 s6, exec_lo, s6
; GFX10-NEXT: s_or_b32 s6, s5, s6
-; GFX10-NEXT: .LBB3_6: ; %Flow1
+; GFX10-NEXT: .LBB4_6: ; %Flow1
; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s4
; GFX10-NEXT: s_and_saveexec_b32 s4, s6
-; GFX10-NEXT: s_cbranch_execz .LBB3_8
+; GFX10-NEXT: s_cbranch_execz .LBB4_8
; GFX10-NEXT: ; %bb.7: ; %block.after.loop
; GFX10-NEXT: v_mov_b32_e32 v0, 5
; GFX10-NEXT: flat_store_dword v[3:4], v0
-; GFX10-NEXT: .LBB3_8: ; %exit
+; GFX10-NEXT: .LBB4_8: ; %exit
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s4
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
@@ -299,9 +348,9 @@ define void @divergent_i1_icmp_used_outside_loop(i32 %v0, i32 %v1, ptr addrspace
; GFX10-NEXT: s_mov_b32 s5, 0
; GFX10-NEXT: ; implicit-def: $sgpr6
; GFX10-NEXT: v_mov_b32_e32 v5, s5
-; GFX10-NEXT: s_branch .LBB4_2
-; GFX10-NEXT: .LBB4_1: ; %Flow
-; GFX10-NEXT: ; in Loop: Header=BB4_2 Depth=1
+; GFX10-NEXT: s_branch .LBB5_2
+; GFX10-NEXT: .LBB5_1: ; %Flow
+; GFX10-NEXT: ; in Loop: Header=BB5_2 Depth=1
; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s8
; GFX10-NEXT: s_and_b32 s4, exec_lo, s7
; GFX10-NEXT: s_or_b32 s5, s4, s5
@@ -309,43 +358,43 @@ define void @divergent_i1_icmp_used_outside_loop(i32 %v0, i32 %v1, ptr addrspace
; GFX10-NEXT: s_and_b32 s6, exec_lo, vcc_lo
; GFX10-NEXT: s_or_b32 s6, s4, s6
; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s5
-; GFX10-NEXT: s_cbranch_execz .LBB4_6
-; GFX10-NEXT: .LBB4_2: ; %cond.block.0
+; GFX10-NEXT: s_cbranch_execz .LBB5_6
+; GFX10-NEXT: .LBB5_2: ; %cond.block.0
; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX10-NEXT: v_mov_b32_e32 v4, v5
; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, v0, v4
; GFX10-NEXT: s_and_saveexec_b32 s7, vcc_lo
-; GFX10-NEXT: s_cbranch_execz .LBB4_4
+; GFX10-NEXT: s_cbranch_execz .LBB5_4
; GFX10-NEXT: ; %bb.3: ; %if.block.0
-; GFX10-NEXT: ; in Loop: Header=BB4_2 Depth=1
+; GFX10-NEXT: ; in Loop: Header=BB5_2 Depth=1
; GFX10-NEXT: v_ashrrev_i32_e32 v5, 31, v4
; GFX10-NEXT: v_lshlrev_b64 v[8:9], 2, v[4:5]
; GFX10-NEXT: v_add_co_u32 v8, s4, v2, v8
; GFX10-NEXT: v_add_co_ci_u32_e64 v9, s4, v3, v9, s4
; GFX10-NEXT: global_store_dword v[8:9], v4, off
-; GFX10-NEXT: .LBB4_4: ; %loop.break.block
-; GFX10-NEXT: ; in Loop: Header=BB4_2 Depth=1
+; GFX10-NEXT: .LBB5_4: ; %loop.break.block
+; GFX10-NEXT: ; in Loop: Header=BB5_2 Depth=1
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s7
; GFX10-NEXT: v_cmp_ne_u32_e64 s4, v1, v4
; GFX10-NEXT: s_mov_b32 s7, -1
; GFX10-NEXT: ; implicit-def: $vgpr5
; GFX10-NEXT: s_and_saveexec_b32 s8, s4
-; GFX10-NEXT: s_cbranch_execz .LBB4_1
+; GFX10-NEXT: s_cbranch_execz .LBB5_1
; GFX10-NEXT: ; %bb.5: ; %loop.cond
-; GFX10-NEXT: ; in Loop: Header=BB4_2 Depth=1
+; GFX10-NEXT: ; in Loop: Header=BB5_2 Depth=1
; GFX10-NEXT: v_add_nc_u32_e32 v5, 1, v4
; GFX10-NEXT: s_andn2_b32 s4, -1, exec_lo
; GFX10-NEXT: s_and_b32 s7, exec_lo, 0
; GFX10-NEXT: s_or_b32 s7, s4, s7
-; GFX10-NEXT: s_branch .LBB4_1
-; GFX10-NEXT: .LBB4_6: ; %cond.block.1
+; GFX10-NEXT: s_branch .LBB5_1
+; GFX10-NEXT: .LBB5_6: ; %cond.block.1
; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s5
; GFX10-NEXT: s_and_saveexec_b32 s4, s6
-; GFX10-NEXT: s_cbranch_execz .LBB4_8
+; GFX10-NEXT: s_cbranch_execz .LBB5_8
; GFX10-NEXT: ; %bb.7: ; %if.block.1
; GFX10-NEXT: global_store_dword v[6:7], v4, off
-; GFX10-NEXT: .LBB4_8: ; %exit
+; GFX10-NEXT: .LBB5_8: ; %exit
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s4
; GFX10-NEXT: s_setpc_b64 s[30:31]
@@ -410,9 +459,9 @@ define amdgpu_ps void @divergent_i1_freeze_used_outside_loop(i32 %n, ptr addrspa
; GFX10-NEXT: v_mov_b32_e32 v5, s0
; GFX10-NEXT: ; implicit-def: $sgpr1
; GFX10-NEXT: ; implicit-def: $sgpr2
-; GFX10-NEXT: s_branch .LBB5_2
-; GFX10-NEXT: .LBB5_1: ; %loop.cond
-; GFX10-NEXT: ; in Loop: Header=BB5_2 Depth=1
+; GFX10-NEXT: s_branch .LBB6_2
+; GFX10-NEXT: .LBB6_1: ; %loop.cond
+; GFX10-NEXT: ; in Loop: Header=BB6_2 Depth=1
; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s4
; GFX10-NEXT: v_cmp_lt_i32_e32 vcc_lo, v5, v0
; GFX10-NEXT: v_add_nc_u32_e32 v5, 1, v5
@@ -423,16 +472,16 @@ define amdgpu_ps void @divergent_i1_freeze_used_outside_loop(i32 %n, ptr addrspa
; GFX10-NEXT: s_or_b32 s3, s3, s4
; GFX10-NEXT: s_or_b32 s1, s1, s4
; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s0
-; GFX10-NEXT: s_cbranch_execz .LBB5_4
-; GFX10-NEXT: .LBB5_2: ; %loop.start
+; GFX10-NEXT: s_cbranch_execz .LBB6_4
+; GFX10-NEXT: .LBB6_2: ; %loop.start
; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX10-NEXT: s_andn2_b32 s2, s2, exec_lo
; GFX10-NEXT: s_and_b32 s4, exec_lo, s3
; GFX10-NEXT: s_or_b32 s2, s2, s4
; GFX10-NEXT: s_and_saveexec_b32 s4, s3
-; GFX10-NEXT: s_cbranch_execz .LBB5_1
+; GFX10-NEXT: s_cbranch_execz .LBB6_1
; GFX10-NEXT: ; %bb.3: ; %is.eq.zero
-; GFX10-NEXT: ; in Loop: Header=BB5_2 Depth=1
+; GFX10-NEXT: ; in Loop: Header=BB6_2 Depth=1
; GFX10-NEXT: v_ashrrev_i32_e32 v6, 31, v5
; GFX10-NEXT: s_andn2_b32 s2, s2, exec_lo
; GFX10-NEXT: v_lshlrev_b64 v[6:7], 2, v[5:6]
@@ -444,8 +493,8 @@ define amdgpu_ps void @divergent_i1_freeze_used_outside_loop(i32 %n, ptr addrspa
; GFX10-NEXT: s_and_b32 s3, exec_lo, vcc_lo
; GFX10-NEXT: s_or_b32 s2, s2, s3
; GFX10-NEXT: ; implicit-def: $sgpr3
-; GFX10-NEXT: s_branch .LBB5_1
-; GFX10-NEXT: .LBB5_4: ; %exit
+; GFX10-NEXT: s_branch .LBB6_1
+; GFX10-NEXT: .LBB6_4: ; %exit
; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX10-NEXT: v_cndmask_b32_e64 v0, 0, 1.0, s1
; GFX10-NEXT: flat_store_dword v[3:4], v0
@@ -486,9 +535,9 @@ define amdgpu_cs void @loop_with_1break(ptr addrspace(1) %x, ptr addrspace(1) %a
; GFX10-NEXT: ; implicit-def: $sgpr2
; GFX10-NEXT: ; implicit-def: $sgpr3
; GFX10-NEXT: v_mov_b32_e32 v6, s0
-; GFX10-NEXT: s_branch .LBB6_2
-; GFX10-NEXT: .LBB6_1: ; %Flow
-; GFX10-NEXT: ; in Loop: Header=BB6_2 Depth=1
+; GFX10-NEXT: s_branch .LBB7_2
+; GFX10-NEXT: .LBB7_1: ; %Flow
+; GFX10-NEXT: ; in Loop: Header=BB7_2 Depth=1
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s4
; GFX10-NEXT: s_and_b32 s4, exec_lo, s2
@@ -497,8 +546,8 @@ define amdgpu_cs void @loop_with_1break(ptr addrspace(1) %x, ptr addrspace(1) %a
; GFX10-NEXT: s_and_b32 s4, exec_lo, s3
; GFX10-NEXT: s_or_b32 s1, s1, s4
; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s0
-; GFX10-NEXT: s_cbranch_execz .LBB6_4
-; GFX10-NEXT: .LBB6_2: ; %A
+; GFX10-NEXT: s_cbranch_execz .LBB7_4
+; GFX10-NEXT: .LBB7_2: ; %A
; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX10-NEXT: v_ashrrev_i32_e32 v7, 31, v6
; GFX10-NEXT: s_andn2_b32 s3, s3, exec_lo
@@ -513,9 +562,9 @@ define amdgpu_cs void @loop_with_1break(ptr addrspace(1) %x, ptr addrspace(1) %a
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v9
; GFX10-NEXT: s_and_saveexec_b32 s4, vcc_lo
-; GFX10-NEXT: s_cbranch_execz .LBB6_1
+; GFX10-NEXT: s_cbranch_execz .LBB7_1
; GFX10-NEXT: ; %bb.3: ; %loop.body
-; GFX10-NEXT: ; in Loop: Header=BB6_2 Depth=1
+; GFX10-NEXT: ; in Loop: Header=BB7_2 Depth=1
; GFX10-NEXT: v_add_co_u32 v7, vcc_lo, v0, v7
; GFX10-NEXT: v_add_co_ci_u32_e32 v8, vcc_lo, v1, v8, vcc_lo
; GFX10-NEXT: v_add_nc_u32_e32 v10, 1, v6
@@ -531,16 +580,16 @@ define amdgpu_cs void @loop_with_1break(ptr addrspace(1) %x, ptr addrspace(1) %a
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: v_add_nc_u32_e32 v9, 1, v9
; GFX10-NEXT: global_store_dword v[7:8], v9, off
-; GFX10-NEXT: s_branch .LBB6_1
-; GFX10-NEXT: .LBB6_4: ; %loop.exit.guard
+; GFX10-NEXT: s_branch .LBB7_1
+; GFX10-NEXT: .LBB7_4: ; %loop.exit.guard
; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX10-NEXT: s_and_saveexec_b32 s0, s1
; GFX10-NEXT: s_xor_b32 s0, exec_lo, s0
-; GFX10-NEXT: s_cbranch_execz .LBB6_6
+; GFX10-NEXT: s_cbranch_execz .LBB7_6
; GFX10-NEXT: ; %bb.5: ; %break.body
; GFX10-NEXT: v_mov_b32_e32 v0, 10
; GFX10-NEXT: global_store_dword v[4:5], v0, off
-; GFX10-NEXT: .LBB6_6: ; %exit
+; GFX10-NEXT: .LBB7_6: ; %exit
; GFX10-NEXT: s_endpgm
entry:
br label %A
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-used-outside-loop.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-used-outside-loop.mir
index 5bbe3e4886899..397a952d94203 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-used-outside-loop.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-used-outside-loop.mir
@@ -16,52 +16,43 @@ body: |
; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
; GFX10-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
- ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00
- ; GFX10-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[COPY1]](s32), [[C1]]
+ ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00
+ ; GFX10-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[COPY1]](s32), [[C]]
; GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_32(s1) = COPY [[FCMP]](s1)
+ ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; GFX10-NEXT: [[DEF:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF
; GFX10-NEXT: [[COPY5:%[0-9]+]]:sreg_32(s1) = COPY [[DEF]](s1)
; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY5]](s1), $exec_lo, implicit-def $scc
; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY4]](s1), implicit-def $scc
; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_]](s1), [[S_AND_B32_]](s1), implicit-def $scc
- ; GFX10-NEXT: [[DEF1:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: bb.1:
; GFX10-NEXT: successors: %bb.2(0x04000000), %bb.1(0x7c000000)
; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32(s1) = PHI [[DEF1]](s1), %bb.0, %36(s1), %bb.1
- ; GFX10-NEXT: [[PHI1:%[0-9]+]]:sreg_32(s1) = PHI [[S_OR_B32_]](s1), %bb.0, %24(s1), %bb.1
- ; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s32) = G_PHI %9(s32), %bb.1, [[C]](s32), %bb.0
- ; GFX10-NEXT: [[PHI3:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.0, %11(s32), %bb.1
+ ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32(s1) = PHI [[S_OR_B32_]](s1), %bb.0, %22(s1), %bb.1
+ ; GFX10-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI %9(s32), %bb.1, [[C1]](s32), %bb.0
+ ; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s32) = G_PHI [[C1]](s32), %bb.0, %11(s32), %bb.1
; GFX10-NEXT: [[COPY6:%[0-9]+]]:sreg_32(s1) = COPY [[PHI]](s1)
- ; GFX10-NEXT: [[COPY7:%[0-9]+]]:sreg_32(s1) = COPY [[PHI1]](s1)
- ; GFX10-NEXT: [[COPY8:%[0-9]+]]:sreg_32(s1) = COPY [[COPY7]](s1)
- ; GFX10-NEXT: [[COPY9:%[0-9]+]]:sreg_32(s1) = COPY [[PHI1]](s1)
+ ; GFX10-NEXT: [[COPY7:%[0-9]+]]:sreg_32(s1) = COPY [[PHI]](s1)
; GFX10-NEXT: [[C2:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
- ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[COPY7]], [[C2]]
- ; GFX10-NEXT: [[COPY10:%[0-9]+]]:sreg_32(s1) = COPY [[XOR]](s1)
- ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[PHI3]](s32)
+ ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[COPY6]], [[C2]]
+ ; GFX10-NEXT: [[COPY8:%[0-9]+]]:sreg_32(s1) = COPY [[XOR]](s1)
+ ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[PHI2]](s32)
; GFX10-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[UITOFP]](s32), [[COPY]]
; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
- ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[PHI3]], [[C3]]
- ; GFX10-NEXT: [[INT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[FCMP1]](s1), [[PHI2]](s32)
- ; GFX10-NEXT: [[S_ANDN2_B32_1:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY9]](s1), $exec_lo, implicit-def $scc
- ; GFX10-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY10]](s1), implicit-def $scc
+ ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[PHI2]], [[C3]]
+ ; GFX10-NEXT: [[INT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[FCMP1]](s1), [[PHI1]](s32)
+ ; GFX10-NEXT: [[S_ANDN2_B32_1:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY7]](s1), $exec_lo, implicit-def $scc
+ ; GFX10-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY8]](s1), implicit-def $scc
; GFX10-NEXT: [[S_OR_B32_1:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_1]](s1), [[S_AND_B32_1]](s1), implicit-def $scc
- ; GFX10-NEXT: [[S_ANDN2_B32_2:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY6]](s1), $exec_lo, implicit-def $scc
- ; GFX10-NEXT: [[S_AND_B32_2:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY8]](s1), implicit-def $scc
- ; GFX10-NEXT: [[S_OR_B32_2:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_2]](s1), [[S_AND_B32_2]](s1), implicit-def $scc
; GFX10-NEXT: SI_LOOP [[INT]](s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec
; GFX10-NEXT: G_BR %bb.2
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: bb.2:
- ; GFX10-NEXT: [[PHI4:%[0-9]+]]:_(s32) = G_PHI [[INT]](s32), %bb.1
- ; GFX10-NEXT: [[COPY11:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_2]](s1)
- ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI4]](s32)
+ ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[INT]](s32)
; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00
; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00
- ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[COPY11]](s1), [[C5]], [[C4]]
+ ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[COPY6]](s1), [[C5]], [[C4]]
; GFX10-NEXT: G_STORE [[SELECT]](s32), [[MV]](p0) :: (store (s32))
; GFX10-NEXT: SI_RETURN
bb.0:
@@ -73,16 +64,16 @@ body: |
%2:_(s32) = COPY $vgpr2
%3:_(s32) = COPY $vgpr3
%4:_(p0) = G_MERGE_VALUES %2(s32), %3(s32)
- %5:_(s32) = G_CONSTANT i32 0
- %6:_(s32) = G_FCONSTANT float 1.000000e+00
- %7:_(s1) = G_FCMP floatpred(ogt), %1(s32), %6
+ %5:_(s32) = G_FCONSTANT float 1.000000e+00
+ %6:_(s1) = G_FCMP floatpred(ogt), %1(s32), %5
+ %7:_(s32) = G_CONSTANT i32 0
bb.1:
successors: %bb.2(0x04000000), %bb.1(0x7c000000)
- %8:_(s32) = G_PHI %9(s32), %bb.1, %5(s32), %bb.0
- %10:_(s32) = G_PHI %5(s32), %bb.0, %11(s32), %bb.1
- %12:_(s1) = G_PHI %7(s1), %bb.0, %13(s1), %bb.1
+ %8:_(s32) = G_PHI %9(s32), %bb.1, %7(s32), %bb.0
+ %10:_(s32) = G_PHI %7(s32), %bb.0, %11(s32), %bb.1
+ %12:_(s1) = G_PHI %6(s1), %bb.0, %13(s1), %bb.1
%14:_(s1) = G_CONSTANT i1 true
%13:_(s1) = G_XOR %12, %14
%15:_(s32) = G_UITOFP %10(s32)
@@ -94,13 +85,11 @@ body: |
G_BR %bb.2
bb.2:
- %18:_(s1) = G_PHI %12(s1), %bb.1
- %19:_(s32) = G_PHI %9(s32), %bb.1
- G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %19(s32)
- %20:_(s32) = G_FCONSTANT float 0.000000e+00
- %21:_(s32) = G_FCONSTANT float 1.000000e+00
- %22:_(s32) = G_SELECT %18(s1), %21, %20
- G_STORE %22(s32), %4(p0) :: (store (s32))
+ G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %9(s32)
+ %18:_(s32) = G_FCONSTANT float 0.000000e+00
+ %19:_(s32) = G_FCONSTANT float 1.000000e+00
+ %20:_(s32) = G_SELECT %12(s1), %19, %18
+ G_STORE %20(s32), %4(p0) :: (store (s32))
SI_RETURN
...
@@ -120,9 +109,9 @@ body: |
; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr3
; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr4
; GFX10-NEXT: [[MV1:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
- ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
- ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
- ; GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[C1]](s1)
+ ; GFX10-NEXT: [[C:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
+ ; GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[C]](s1)
+ ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
; GFX10-NEXT: [[DEF:%[0-9]+]]:sreg_32_xm0_xexec(s1) = IMPLICIT_DEF
; GFX10-NEXT: [[COPY5:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[DEF]](s1)
; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_ANDN2_B32 [[COPY5]](s1), $exec_lo, implicit-def $scc
@@ -133,9 +122,9 @@ body: |
; GFX10-NEXT: bb.1:
; GFX10-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000)
; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32(s1) = PHI [[DEF1]](s1), %bb.0, %41(s1), %bb.3
- ; GFX10-NEXT: [[PHI1:%[0-9]+]]:sreg_32_xm0_xexec(s1) = PHI [[S_OR_B32_]](s1), %bb.0, %27(s1), %bb.3
- ; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.0, %9(s32), %bb.3
+ ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32(s1) = PHI [[DEF1]](s1), %bb.0, %40(s1), %bb.3
+ ; GFX10-NEXT: [[PHI1:%[0-9]+]]:sreg_32_xm0_xexec(s1) = PHI [[S_OR_B32_]](s1), %bb.0, %26(s1), %bb.3
+ ; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s32) = G_PHI [[C1]](s32), %bb.0, %9(s32), %bb.3
; GFX10-NEXT: [[PHI3:%[0-9]+]]:_(p1) = G_PHI [[MV]](p1), %bb.0, %11(p1), %bb.3
; GFX10-NEXT: [[COPY6:%[0-9]+]]:sreg_32(s1) = COPY [[PHI]](s1)
; GFX10-NEXT: [[COPY7:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[PHI1]](s1)
@@ -175,15 +164,13 @@ body: |
; GFX10-NEXT: [[S_ANDN2_B32_3:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_ANDN2_B32 [[COPY9]](s1), $exec_lo, implicit-def $scc
; GFX10-NEXT: [[S_AND_B32_3:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_AND_B32 $exec_lo, [[COPY13]](s1), implicit-def $scc
; GFX10-NEXT: [[S_OR_B32_3:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_OR_B32 [[S_ANDN2_B32_3]](s1), [[S_AND_B32_3]](s1), implicit-def $scc
- ; GFX10-NEXT: [[COPY14:%[0-9]+]]:sreg_32(s1) = COPY [[COPY7]](s1)
; GFX10-NEXT: G_BRCOND [[ICMP1]](s1), %bb.1
; GFX10-NEXT: G_BR %bb.4
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: bb.4:
- ; GFX10-NEXT: [[COPY15:%[0-9]+]]:sreg_32(s1) = COPY [[COPY14]](s1)
; GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00
; GFX10-NEXT: [[C7:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00
- ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[COPY15]](s1), [[C7]], [[C6]]
+ ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[COPY7]](s1), [[C7]], [[C6]]
; GFX10-NEXT: G_STORE [[SELECT]](s32), [[MV1]](p0) :: (store (s32))
; GFX10-NEXT: SI_RETURN
bb.0:
@@ -196,15 +183,15 @@ body: |
%3:_(s32) = COPY $vgpr3
%4:_(s32) = COPY $vgpr4
%5:_(p0) = G_MERGE_VALUES %3(s32), %4(s32)
- %6:_(s32) = G_CONSTANT i32 -1
- %7:_(s1) = G_CONSTANT i1 true
+ %6:_(s1) = G_CONSTANT i1 true
+ %7:_(s32) = G_CONSTANT i32 -1
bb.1:
successors: %bb.2(0x40000000), %bb.3(0x40000000)
- %8:_(s32) = G_PHI %6(s32), %bb.0, %9(s32), %bb.3
+ %8:_(s32) = G_PHI %7(s32), %bb.0, %9(s32), %bb.3
%10:_(p1) = G_PHI %2(p1), %bb.0, %11(p1), %bb.3
- %12:sreg_32_xm0_xexec(s1) = G_PHI %7(s1), %bb.0, %13(s1), %bb.3
+ %12:sreg_32_xm0_xexec(s1) = G_PHI %6(s1), %bb.0, %13(s1), %bb.3
%14:sreg_32_xm0_xexec(s32) = SI_IF %12(s1), %bb.3, implicit-def $exec, implicit-def $scc, implicit $exec
G_BR %bb.2
@@ -230,11 +217,10 @@ body: |
G_BR %bb.4
bb.4:
- %22:_(s1) = G_PHI %12(s1), %bb.3
- %23:_(s32) = G_FCONSTANT float 0.000000e+00
- %24:_(s32) = G_FCONSTANT float 1.000000e+00
- %25:_(s32) = G_SELECT %22(s1), %24, %23
- G_STORE %25(s32), %5(p0) :: (store (s32))
+ %22:_(s32) = G_FCONSTANT float 0.000000e+00
+ %23:_(s32) = G_FCONSTANT float 1.000000e+00
+ %24:_(s32) = G_SELECT %12(s1), %23, %22
+ G_STORE %24(s32), %5(p0) :: (store (s32))
SI_RETURN
...
@@ -253,43 +239,34 @@ body: |
; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
; GFX10-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
- ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00
- ; GFX10-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[COPY1]](s32), [[C1]]
+ ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00
+ ; GFX10-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[COPY1]](s32), [[C]]
+ ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_32(s1) = COPY [[FCMP]](s1)
- ; GFX10-NEXT: [[DEF:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: bb.1:
; GFX10-NEXT: successors: %bb.2(0x04000000), %bb.1(0x7c000000)
; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32(s1) = PHI [[DEF]](s1), %bb.0, %27(s1), %bb.1
- ; GFX10-NEXT: [[PHI1:%[0-9]+]]:sreg_32(s1) = PHI [[COPY4]](s1), %bb.0, %24(s1), %bb.1
- ; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s32) = G_PHI %9(s32), %bb.1, [[C]](s32), %bb.0
- ; GFX10-NEXT: [[PHI3:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.0, %11(s32), %bb.1
+ ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32(s1) = PHI [[COPY4]](s1), %bb.0, %22(s1), %bb.1
+ ; GFX10-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI %9(s32), %bb.1, [[C1]](s32), %bb.0
+ ; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s32) = G_PHI [[C1]](s32), %bb.0, %11(s32), %bb.1
; GFX10-NEXT: [[COPY5:%[0-9]+]]:sreg_32(s1) = COPY [[PHI]](s1)
- ; GFX10-NEXT: [[COPY6:%[0-9]+]]:sreg_32(s1) = COPY [[PHI1]](s1)
; GFX10-NEXT: [[C2:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
- ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[COPY6]], [[C2]]
- ; GFX10-NEXT: [[COPY7:%[0-9]+]]:sreg_32(s1) = COPY [[XOR]](s1)
- ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[PHI3]](s32)
+ ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[COPY5]], [[C2]]
+ ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[PHI2]](s32)
; GFX10-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[UITOFP]](s32), [[COPY]]
; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
- ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[PHI3]], [[C3]]
- ; GFX10-NEXT: [[INT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[FCMP1]](s1), [[PHI2]](s32)
- ; GFX10-NEXT: [[COPY8:%[0-9]+]]:sreg_32(s1) = COPY [[XOR]](s1)
- ; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY5]](s1), $exec_lo, implicit-def $scc
- ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY7]](s1), implicit-def $scc
- ; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_]](s1), [[S_AND_B32_]](s1), implicit-def $scc
+ ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[PHI2]], [[C3]]
+ ; GFX10-NEXT: [[INT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[FCMP1]](s1), [[PHI1]](s32)
+ ; GFX10-NEXT: [[COPY6:%[0-9]+]]:sreg_32(s1) = COPY [[XOR]](s1)
; GFX10-NEXT: SI_LOOP [[INT]](s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec
; GFX10-NEXT: G_BR %bb.2
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: bb.2:
- ; GFX10-NEXT: [[PHI4:%[0-9]+]]:_(s32) = G_PHI [[INT]](s32), %bb.1
- ; GFX10-NEXT: [[COPY9:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_]](s1)
- ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI4]](s32)
+ ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[INT]](s32)
; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00
; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00
- ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[COPY9]](s1), [[C5]], [[C4]]
+ ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[XOR]](s1), [[C5]], [[C4]]
; GFX10-NEXT: G_STORE [[SELECT]](s32), [[MV]](p0) :: (store (s32))
; GFX10-NEXT: SI_RETURN
bb.0:
@@ -301,16 +278,16 @@ body: |
%2:_(s32) = COPY $vgpr2
%3:_(s32) = COPY $vgpr3
%4:_(p0) = G_MERGE_VALUES %2(s32), %3(s32)
- %5:_(s32) = G_CONSTANT i32 0
- %6:_(s32) = G_FCONSTANT float 1.000000e+00
- %7:_(s1) = G_FCMP floatpred(ogt), %1(s32), %6
+ %5:_(s32) = G_FCONSTANT float 1.000000e+00
+ %6:_(s1) = G_FCMP floatpred(ogt), %1(s32), %5
+ %7:_(s32) = G_CONSTANT i32 0
bb.1:
successors: %bb.2(0x04000000), %bb.1(0x7c000000)
- %8:_(s32) = G_PHI %9(s32), %bb.1, %5(s32), %bb.0
- %10:_(s32) = G_PHI %5(s32), %bb.0, %11(s32), %bb.1
- %12:_(s1) = G_PHI %7(s1), %bb.0, %13(s1), %bb.1
+ %8:_(s32) = G_PHI %9(s32), %bb.1, %7(s32), %bb.0
+ %10:_(s32) = G_PHI %7(s32), %bb.0, %11(s32), %bb.1
+ %12:_(s1) = G_PHI %6(s1), %bb.0, %13(s1), %bb.1
%14:_(s1) = G_CONSTANT i1 true
%13:_(s1) = G_XOR %12, %14
%15:_(s32) = G_UITOFP %10(s32)
@@ -322,13 +299,109 @@ body: |
G_BR %bb.2
bb.2:
- %18:_(s1) = G_PHI %13(s1), %bb.1
- %19:_(s32) = G_PHI %9(s32), %bb.1
- G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %19(s32)
- %20:_(s32) = G_FCONSTANT float 0.000000e+00
- %21:_(s32) = G_FCONSTANT float 1.000000e+00
- %22:_(s32) = G_SELECT %18(s1), %21, %20
- G_STORE %22(s32), %4(p0) :: (store (s32))
+ G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %9(s32)
+ %18:_(s32) = G_FCONSTANT float 0.000000e+00
+ %19:_(s32) = G_FCONSTANT float 1.000000e+00
+ %20:_(s32) = G_SELECT %13(s1), %19, %18
+ G_STORE %20(s32), %4(p0) :: (store (s32))
+ SI_RETURN
+...
+
+
+---
+name: divergent_i1_xor_used_outside_loop_twice
+legalized: true
+tracksRegLiveness: true
+body: |
+ ; GFX10-LABEL: name: divergent_i1_xor_used_outside_loop_twice
+ ; GFX10: bb.0:
+ ; GFX10-NEXT: successors: %bb.1(0x80000000)
+ ; GFX10-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; GFX10-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
+ ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
+ ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
+ ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32)
+ ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00
+ ; GFX10-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[COPY1]](s32), [[C]]
+ ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; GFX10-NEXT: [[COPY6:%[0-9]+]]:sreg_32(s1) = COPY [[FCMP]](s1)
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: bb.1:
+ ; GFX10-NEXT: successors: %bb.2(0x04000000), %bb.1(0x7c000000)
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32(s1) = PHI [[COPY6]](s1), %bb.0, %28(s1), %bb.1
+ ; GFX10-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI %12(s32), %bb.1, [[C1]](s32), %bb.0
+ ; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s32) = G_PHI [[C1]](s32), %bb.0, %14(s32), %bb.1
+ ; GFX10-NEXT: [[COPY7:%[0-9]+]]:sreg_32(s1) = COPY [[PHI]](s1)
+ ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
+ ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[COPY7]], [[C2]]
+ ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[PHI2]](s32)
+ ; GFX10-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[UITOFP]](s32), [[COPY]]
+ ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[PHI2]], [[C3]]
+ ; GFX10-NEXT: [[INT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[FCMP1]](s1), [[PHI1]](s32)
+ ; GFX10-NEXT: [[COPY8:%[0-9]+]]:sreg_32(s1) = COPY [[XOR]](s1)
+ ; GFX10-NEXT: SI_LOOP [[INT]](s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GFX10-NEXT: G_BR %bb.2
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: bb.2:
+ ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[INT]](s32)
+ ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00
+ ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00
+ ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[XOR]](s1), [[C5]], [[C4]]
+ ; GFX10-NEXT: G_STORE [[SELECT]](s32), [[MV]](p0) :: (store (s32))
+ ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_FCONSTANT float -1.000000e+00
+ ; GFX10-NEXT: [[C7:%[0-9]+]]:_(s32) = G_FCONSTANT float 2.000000e+00
+ ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[XOR]](s1), [[C7]], [[C6]]
+ ; GFX10-NEXT: G_STORE [[SELECT1]](s32), [[MV1]](p0) :: (store (s32))
+ ; GFX10-NEXT: SI_RETURN
+ bb.0:
+ successors: %bb.1(0x80000000)
+ liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
+
+ %0:_(s32) = COPY $vgpr0
+ %1:_(s32) = COPY $vgpr1
+ %2:_(s32) = COPY $vgpr2
+ %3:_(s32) = COPY $vgpr3
+ %4:_(p0) = G_MERGE_VALUES %2(s32), %3(s32)
+ %5:_(s32) = COPY $vgpr4
+ %6:_(s32) = COPY $vgpr5
+ %7:_(p0) = G_MERGE_VALUES %5(s32), %6(s32)
+ %8:_(s32) = G_FCONSTANT float 1.000000e+00
+ %9:_(s1) = G_FCMP floatpred(ogt), %1(s32), %8
+ %10:_(s32) = G_CONSTANT i32 0
+
+ bb.1:
+ successors: %bb.2(0x04000000), %bb.1(0x7c000000)
+
+ %11:_(s32) = G_PHI %12(s32), %bb.1, %10(s32), %bb.0
+ %13:_(s32) = G_PHI %10(s32), %bb.0, %14(s32), %bb.1
+ %15:_(s1) = G_PHI %9(s1), %bb.0, %16(s1), %bb.1
+ %17:_(s1) = G_CONSTANT i1 true
+ %16:_(s1) = G_XOR %15, %17
+ %18:_(s32) = G_UITOFP %13(s32)
+ %19:_(s1) = G_FCMP floatpred(ogt), %18(s32), %0
+ %20:_(s32) = G_CONSTANT i32 1
+ %14:_(s32) = G_ADD %13, %20
+ %12:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), %19(s1), %11(s32)
+ SI_LOOP %12(s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec
+ G_BR %bb.2
+
+ bb.2:
+ G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %12(s32)
+ %21:_(s32) = G_FCONSTANT float 0.000000e+00
+ %22:_(s32) = G_FCONSTANT float 1.000000e+00
+ %23:_(s32) = G_SELECT %16(s1), %22, %21
+ G_STORE %23(s32), %4(p0) :: (store (s32))
+ %24:_(s32) = G_FCONSTANT float -1.000000e+00
+ %25:_(s32) = G_FCONSTANT float 2.000000e+00
+ %26:_(s32) = G_SELECT %16(s1), %25, %24
+ G_STORE %26(s32), %7(p0) :: (store (s32))
SI_RETURN
...
@@ -364,13 +437,12 @@ body: |
; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; GFX10-NEXT: [[DEF1:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF
; GFX10-NEXT: [[DEF2:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF
- ; GFX10-NEXT: [[DEF3:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF
; GFX10-NEXT: G_BR %bb.3
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: bb.2:
; GFX10-NEXT: successors: %bb.5(0x40000000), %bb.6(0x40000000)
; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32_xm0_xexec(s1) = PHI [[COPY5]](s1), %bb.0, %40(s1), %bb.8
+ ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32_xm0_xexec(s1) = PHI [[COPY5]](s1), %bb.0, %38(s1), %bb.8
; GFX10-NEXT: [[COPY7:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[PHI]](s1)
; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[SI_IF]](s32)
; GFX10-NEXT: [[SI_IF1:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[COPY7]](s1), %bb.6, implicit-def $exec, implicit-def $scc, implicit $exec
@@ -379,49 +451,47 @@ body: |
; GFX10-NEXT: bb.3:
; GFX10-NEXT: successors: %bb.4(0x40000000), %bb.7(0x40000000)
; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[PHI1:%[0-9]+]]:sreg_32(s1) = PHI [[DEF3]](s1), %bb.1, %73(s1), %bb.7
- ; GFX10-NEXT: [[PHI2:%[0-9]+]]:sreg_32(s1) = PHI [[DEF2]](s1), %bb.1, %62(s1), %bb.7
- ; GFX10-NEXT: [[PHI3:%[0-9]+]]:sreg_32(s1) = PHI [[DEF1]](s1), %bb.1, %49(s1), %bb.7
- ; GFX10-NEXT: [[PHI4:%[0-9]+]]:_(s32) = G_PHI [[C2]](s32), %bb.1, %17(s32), %bb.7
- ; GFX10-NEXT: [[PHI5:%[0-9]+]]:_(s32) = G_PHI %19(s32), %bb.7, [[C2]](s32), %bb.1
+ ; GFX10-NEXT: [[PHI1:%[0-9]+]]:sreg_32(s1) = PHI [[DEF2]](s1), %bb.1, %60(s1), %bb.7
+ ; GFX10-NEXT: [[PHI2:%[0-9]+]]:sreg_32(s1) = PHI [[DEF1]](s1), %bb.1, %47(s1), %bb.7
+ ; GFX10-NEXT: [[PHI3:%[0-9]+]]:_(s32) = G_PHI [[C2]](s32), %bb.1, %17(s32), %bb.7
+ ; GFX10-NEXT: [[PHI4:%[0-9]+]]:_(s32) = G_PHI %19(s32), %bb.7, [[C2]](s32), %bb.1
; GFX10-NEXT: [[COPY8:%[0-9]+]]:sreg_32(s1) = COPY [[PHI1]](s1)
; GFX10-NEXT: [[COPY9:%[0-9]+]]:sreg_32(s1) = COPY [[PHI2]](s1)
- ; GFX10-NEXT: [[COPY10:%[0-9]+]]:sreg_32(s1) = COPY [[PHI3]](s1)
- ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
- ; GFX10-NEXT: [[COPY11:%[0-9]+]]:sreg_32(s1) = COPY [[C3]](s1)
- ; GFX10-NEXT: [[COPY12:%[0-9]+]]:sreg_32(s1) = COPY [[C3]](s1)
- ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[PHI5]](s32)
- ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
- ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[SEXT]], [[C4]](s32)
+ ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[PHI4]](s32)
+ ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+ ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[SEXT]], [[C3]](s32)
; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[MV]], [[SHL]](s64)
; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s32), addrspace 1)
- ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), [[LOAD]](s32), [[C5]]
- ; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY10]](s1), $exec_lo, implicit-def $scc
- ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY12]](s1), implicit-def $scc
+ ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), [[LOAD]](s32), [[C4]]
+ ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
+ ; GFX10-NEXT: [[COPY10:%[0-9]+]]:sreg_32(s1) = COPY [[C5]](s1)
+ ; GFX10-NEXT: [[COPY11:%[0-9]+]]:sreg_32(s1) = COPY [[C5]](s1)
+ ; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY9]](s1), $exec_lo, implicit-def $scc
+ ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY11]](s1), implicit-def $scc
; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_]](s1), [[S_AND_B32_]](s1), implicit-def $scc
- ; GFX10-NEXT: [[COPY13:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_]](s1)
- ; GFX10-NEXT: [[S_ANDN2_B32_1:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY9]](s1), $exec_lo, implicit-def $scc
- ; GFX10-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY11]](s1), implicit-def $scc
+ ; GFX10-NEXT: [[COPY12:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_]](s1)
+ ; GFX10-NEXT: [[S_ANDN2_B32_1:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY8]](s1), $exec_lo, implicit-def $scc
+ ; GFX10-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY10]](s1), implicit-def $scc
; GFX10-NEXT: [[S_OR_B32_1:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_1]](s1), [[S_AND_B32_1]](s1), implicit-def $scc
- ; GFX10-NEXT: [[COPY14:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_1]](s1)
+ ; GFX10-NEXT: [[COPY13:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_1]](s1)
; GFX10-NEXT: [[SI_IF2:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[ICMP1]](s1), %bb.7, implicit-def $exec, implicit-def $scc, implicit $exec
; GFX10-NEXT: G_BR %bb.4
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: bb.4:
; GFX10-NEXT: successors: %bb.7(0x80000000)
; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s1) = G_CONSTANT i1 false
- ; GFX10-NEXT: [[COPY15:%[0-9]+]]:sreg_32(s1) = COPY [[C6]](s1)
- ; GFX10-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
- ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[PHI5]], [[C7]]
- ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[PHI5]](s32), [[COPY]]
- ; GFX10-NEXT: [[COPY16:%[0-9]+]]:sreg_32(s1) = COPY [[ICMP2]](s1)
- ; GFX10-NEXT: [[S_ANDN2_B32_2:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY13]](s1), $exec_lo, implicit-def $scc
+ ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[PHI4]], [[C6]]
+ ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[PHI4]](s32), [[COPY]]
+ ; GFX10-NEXT: [[COPY14:%[0-9]+]]:sreg_32(s1) = COPY [[ICMP2]](s1)
+ ; GFX10-NEXT: [[C7:%[0-9]+]]:_(s1) = G_CONSTANT i1 false
+ ; GFX10-NEXT: [[COPY15:%[0-9]+]]:sreg_32(s1) = COPY [[C7]](s1)
+ ; GFX10-NEXT: [[S_ANDN2_B32_2:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY12]](s1), $exec_lo, implicit-def $scc
; GFX10-NEXT: [[S_AND_B32_2:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY15]](s1), implicit-def $scc
; GFX10-NEXT: [[S_OR_B32_2:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_2]](s1), [[S_AND_B32_2]](s1), implicit-def $scc
- ; GFX10-NEXT: [[S_ANDN2_B32_3:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY14]](s1), $exec_lo, implicit-def $scc
- ; GFX10-NEXT: [[S_AND_B32_3:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY16]](s1), implicit-def $scc
+ ; GFX10-NEXT: [[S_ANDN2_B32_3:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY13]](s1), $exec_lo, implicit-def $scc
+ ; GFX10-NEXT: [[S_AND_B32_3:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY14]](s1), implicit-def $scc
; GFX10-NEXT: [[S_OR_B32_3:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_3]](s1), [[S_AND_B32_3]](s1), implicit-def $scc
; GFX10-NEXT: G_BR %bb.7
; GFX10-NEXT: {{ $}}
@@ -438,32 +508,26 @@ body: |
; GFX10-NEXT: bb.7:
; GFX10-NEXT: successors: %bb.8(0x04000000), %bb.3(0x7c000000)
; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[PHI6:%[0-9]+]]:sreg_32(s1) = PHI [[S_OR_B32_1]](s1), %bb.3, [[S_OR_B32_3]](s1), %bb.4
- ; GFX10-NEXT: [[PHI7:%[0-9]+]]:sreg_32(s1) = PHI [[S_OR_B32_]](s1), %bb.3, [[S_OR_B32_2]](s1), %bb.4
- ; GFX10-NEXT: [[PHI8:%[0-9]+]]:_(s32) = G_PHI [[ADD]](s32), %bb.4, [[DEF]](s32), %bb.3
+ ; GFX10-NEXT: [[PHI5:%[0-9]+]]:sreg_32(s1) = PHI [[S_OR_B32_1]](s1), %bb.3, [[S_OR_B32_3]](s1), %bb.4
+ ; GFX10-NEXT: [[PHI6:%[0-9]+]]:sreg_32(s1) = PHI [[S_OR_B32_]](s1), %bb.3, [[S_OR_B32_2]](s1), %bb.4
+ ; GFX10-NEXT: [[PHI7:%[0-9]+]]:_(s32) = G_PHI [[ADD]](s32), %bb.4, [[DEF]](s32), %bb.3
+ ; GFX10-NEXT: [[COPY16:%[0-9]+]]:sreg_32(s1) = COPY [[PHI5]](s1)
; GFX10-NEXT: [[COPY17:%[0-9]+]]:sreg_32(s1) = COPY [[PHI6]](s1)
- ; GFX10-NEXT: [[COPY18:%[0-9]+]]:sreg_32(s1) = COPY [[PHI7]](s1)
; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[SI_IF2]](s32)
; GFX10-NEXT: [[C9:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
- ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[COPY18]], [[C9]]
- ; GFX10-NEXT: [[COPY19:%[0-9]+]]:sreg_32(s1) = COPY [[XOR]](s1)
- ; GFX10-NEXT: [[INT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[COPY17]](s1), [[PHI4]](s32)
- ; GFX10-NEXT: [[S_ANDN2_B32_4:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY8]](s1), $exec_lo, implicit-def $scc
- ; GFX10-NEXT: [[S_AND_B32_4:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY19]](s1), implicit-def $scc
- ; GFX10-NEXT: [[S_OR_B32_4:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_4]](s1), [[S_AND_B32_4]](s1), implicit-def $scc
+ ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[COPY17]], [[C9]]
+ ; GFX10-NEXT: [[COPY18:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[XOR]](s1)
+ ; GFX10-NEXT: [[INT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[COPY16]](s1), [[PHI3]](s32)
; GFX10-NEXT: SI_LOOP [[INT]](s32), %bb.3, implicit-def $exec, implicit-def $scc, implicit $exec
; GFX10-NEXT: G_BR %bb.8
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: bb.8:
; GFX10-NEXT: successors: %bb.2(0x80000000)
; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[PHI9:%[0-9]+]]:_(s32) = G_PHI [[INT]](s32), %bb.7
- ; GFX10-NEXT: [[COPY20:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_4]](s1)
- ; GFX10-NEXT: [[COPY21:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[COPY20]](s1)
- ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI9]](s32)
- ; GFX10-NEXT: [[S_ANDN2_B32_5:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_ANDN2_B32 [[COPY6]](s1), $exec_lo, implicit-def $scc
- ; GFX10-NEXT: [[S_AND_B32_5:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_AND_B32 $exec_lo, [[COPY21]](s1), implicit-def $scc
- ; GFX10-NEXT: [[S_OR_B32_5:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_OR_B32 [[S_ANDN2_B32_5]](s1), [[S_AND_B32_5]](s1), implicit-def $scc
+ ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[INT]](s32)
+ ; GFX10-NEXT: [[S_ANDN2_B32_4:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_ANDN2_B32 [[COPY6]](s1), $exec_lo, implicit-def $scc
+ ; GFX10-NEXT: [[S_AND_B32_4:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_AND_B32 $exec_lo, [[COPY18]](s1), implicit-def $scc
+ ; GFX10-NEXT: [[S_OR_B32_4:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_OR_B32 [[S_ANDN2_B32_4]](s1), [[S_AND_B32_4]](s1), implicit-def $scc
; GFX10-NEXT: G_BR %bb.2
bb.0:
successors: %bb.1(0x40000000), %bb.2(0x40000000)
@@ -502,24 +566,24 @@ body: |
%16:_(s32) = G_PHI %12(s32), %bb.1, %17(s32), %bb.7
%18:_(s32) = G_PHI %19(s32), %bb.7, %12(s32), %bb.1
- %20:_(s1) = G_CONSTANT i1 true
- %21:_(s64) = G_SEXT %18(s32)
- %22:_(s32) = G_CONSTANT i32 2
- %23:_(s64) = G_SHL %21, %22(s32)
- %24:_(p1) = G_PTR_ADD %3, %23(s64)
- %25:_(s32) = G_LOAD %24(p1) :: (load (s32), addrspace 1)
- %26:_(s32) = G_CONSTANT i32 0
- %27:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), %25(s32), %26
- %28:sreg_32_xm0_xexec(s32) = SI_IF %27(s1), %bb.7, implicit-def $exec, implicit-def $scc, implicit $exec
+ %20:_(s64) = G_SEXT %18(s32)
+ %21:_(s32) = G_CONSTANT i32 2
+ %22:_(s64) = G_SHL %20, %21(s32)
+ %23:_(p1) = G_PTR_ADD %3, %22(s64)
+ %24:_(s32) = G_LOAD %23(p1) :: (load (s32), addrspace 1)
+ %25:_(s32) = G_CONSTANT i32 0
+ %26:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), %24(s32), %25
+ %27:_(s1) = G_CONSTANT i1 true
+ %28:sreg_32_xm0_xexec(s32) = SI_IF %26(s1), %bb.7, implicit-def $exec, implicit-def $scc, implicit $exec
G_BR %bb.4
bb.4:
successors: %bb.7(0x80000000)
- %29:_(s1) = G_CONSTANT i1 false
- %30:_(s32) = G_CONSTANT i32 1
- %31:_(s32) = G_ADD %18, %30
- %32:_(s1) = G_ICMP intpred(slt), %18(s32), %0
+ %29:_(s32) = G_CONSTANT i32 1
+ %30:_(s32) = G_ADD %18, %29
+ %31:_(s1) = G_ICMP intpred(slt), %18(s32), %0
+ %32:_(s1) = G_CONSTANT i1 false
G_BR %bb.7
bb.5:
@@ -535,12 +599,12 @@ body: |
bb.7:
successors: %bb.8(0x04000000), %bb.3(0x7c000000)
- %19:_(s32) = G_PHI %31(s32), %bb.4, %7(s32), %bb.3
- %34:_(s1) = G_PHI %29(s1), %bb.4, %20(s1), %bb.3
- %35:_(s1) = G_PHI %32(s1), %bb.4, %20(s1), %bb.3
+ %19:_(s32) = G_PHI %30(s32), %bb.4, %7(s32), %bb.3
+ %34:_(s1) = G_PHI %32(s1), %bb.4, %27(s1), %bb.3
+ %35:_(s1) = G_PHI %31(s1), %bb.4, %27(s1), %bb.3
G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %28(s32)
%36:_(s1) = G_CONSTANT i1 true
- %37:_(s1) = G_XOR %34, %36
+ %14:_(s1) = G_XOR %34, %36
%17:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), %35(s1), %16(s32)
SI_LOOP %17(s32), %bb.3, implicit-def $exec, implicit-def $scc, implicit $exec
G_BR %bb.8
@@ -548,9 +612,7 @@ body: |
bb.8:
successors: %bb.2(0x80000000)
- %14:_(s1) = G_PHI %37(s1), %bb.7
- %38:_(s32) = G_PHI %17(s32), %bb.7
- G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %38(s32)
+ G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %17(s32)
G_BR %bb.2
...
@@ -572,85 +634,75 @@ body: |
; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr6
; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr7
; GFX10-NEXT: [[MV1:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32)
- ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; GFX10-NEXT: [[DEF1:%[0-9]+]]:sreg_32_xm0_xexec(s1) = IMPLICIT_DEF
+ ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: bb.1:
; GFX10-NEXT: successors: %bb.2(0x80000000)
; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32_xm0_xexec(s1) = PHI [[DEF1]](s1), %bb.0, %39(s1), %bb.6
- ; GFX10-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI %11(s32), %bb.6, [[C]](s32), %bb.0
- ; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.0, %13(s32), %bb.6
- ; GFX10-NEXT: [[COPY6:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[PHI]](s1)
+ ; GFX10-NEXT: [[PHI:%[0-9]+]]:_(s32) = G_PHI %11(s32), %bb.6, [[C]](s32), %bb.0
+ ; GFX10-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.0, %13(s32), %bb.6
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: bb.2:
; GFX10-NEXT: successors: %bb.3(0x40000000), %bb.4(0x40000000)
; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[ICMP:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[PHI2]]
- ; GFX10-NEXT: [[COPY7:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[ICMP]](s1)
+ ; GFX10-NEXT: [[ICMP:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[PHI1]]
; GFX10-NEXT: [[SI_IF:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[ICMP]](s1), %bb.4, implicit-def $exec, implicit-def $scc, implicit $exec
; GFX10-NEXT: G_BR %bb.3
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: bb.3:
; GFX10-NEXT: successors: %bb.4(0x80000000)
; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[PHI2]](s32)
+ ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[PHI1]](s32)
; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[SEXT]], [[C1]](s32)
; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[MV]], [[SHL]](s64)
- ; GFX10-NEXT: G_STORE [[PHI2]](s32), [[PTR_ADD]](p1) :: (store (s32), addrspace 1)
+ ; GFX10-NEXT: G_STORE [[PHI1]](s32), [[PTR_ADD]](p1) :: (store (s32), addrspace 1)
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: bb.4:
; GFX10-NEXT: successors: %bb.5(0x40000000), %bb.6(0x40000000)
; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[SI_IF]](s32)
- ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), [[COPY1]](s32), [[PHI2]]
- ; GFX10-NEXT: [[COPY8:%[0-9]+]]:sreg_32(s1) = COPY [[C2]](s1)
- ; GFX10-NEXT: [[COPY9:%[0-9]+]]:sreg_32(s1) = COPY [[COPY8]](s1)
+ ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), [[COPY1]](s32), [[PHI1]]
+ ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
+ ; GFX10-NEXT: [[COPY6:%[0-9]+]]:sreg_32(s1) = COPY [[C2]](s1)
+ ; GFX10-NEXT: [[COPY7:%[0-9]+]]:sreg_32(s1) = COPY [[COPY6]](s1)
; GFX10-NEXT: [[SI_IF1:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[ICMP1]](s1), %bb.6, implicit-def $exec, implicit-def $scc, implicit $exec
; GFX10-NEXT: G_BR %bb.5
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: bb.5:
; GFX10-NEXT: successors: %bb.6(0x80000000)
; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s1) = G_CONSTANT i1 false
- ; GFX10-NEXT: [[COPY10:%[0-9]+]]:sreg_32(s1) = COPY [[C3]](s1)
- ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
- ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[PHI2]], [[C4]]
- ; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY9]](s1), $exec_lo, implicit-def $scc
- ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY10]](s1), implicit-def $scc
+ ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[PHI1]], [[C3]]
+ ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s1) = G_CONSTANT i1 false
+ ; GFX10-NEXT: [[COPY8:%[0-9]+]]:sreg_32(s1) = COPY [[C4]](s1)
+ ; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY7]](s1), $exec_lo, implicit-def $scc
+ ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY8]](s1), implicit-def $scc
; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_]](s1), [[S_AND_B32_]](s1), implicit-def $scc
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: bb.6:
; GFX10-NEXT: successors: %bb.7(0x04000000), %bb.1(0x7c000000)
; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[PHI3:%[0-9]+]]:sreg_32(s1) = PHI [[COPY8]](s1), %bb.4, [[S_OR_B32_]](s1), %bb.5
- ; GFX10-NEXT: [[PHI4:%[0-9]+]]:_(s32) = G_PHI [[ADD]](s32), %bb.5, [[DEF]](s32), %bb.4
- ; GFX10-NEXT: [[COPY11:%[0-9]+]]:sreg_32(s1) = COPY [[PHI3]](s1)
+ ; GFX10-NEXT: [[PHI2:%[0-9]+]]:sreg_32(s1) = PHI [[COPY6]](s1), %bb.4, [[S_OR_B32_]](s1), %bb.5
+ ; GFX10-NEXT: [[PHI3:%[0-9]+]]:_(s32) = G_PHI [[ADD]](s32), %bb.5, [[DEF]](s32), %bb.4
+ ; GFX10-NEXT: [[COPY9:%[0-9]+]]:sreg_32(s1) = COPY [[PHI2]](s1)
; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[SI_IF1]](s32)
- ; GFX10-NEXT: [[INT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[COPY11]](s1), [[PHI1]](s32)
- ; GFX10-NEXT: [[S_ANDN2_B32_1:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_ANDN2_B32 [[COPY6]](s1), $exec_lo, implicit-def $scc
- ; GFX10-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_AND_B32 $exec_lo, [[COPY7]](s1), implicit-def $scc
- ; GFX10-NEXT: [[S_OR_B32_1:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_OR_B32 [[S_ANDN2_B32_1]](s1), [[S_AND_B32_1]](s1), implicit-def $scc
+ ; GFX10-NEXT: [[INT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[COPY9]](s1), [[PHI]](s32)
; GFX10-NEXT: SI_LOOP [[INT]](s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec
; GFX10-NEXT: G_BR %bb.7
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: bb.7:
; GFX10-NEXT: successors: %bb.8(0x40000000), %bb.9(0x40000000)
; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[PHI5:%[0-9]+]]:_(s32) = G_PHI [[INT]](s32), %bb.6
- ; GFX10-NEXT: [[PHI6:%[0-9]+]]:_(s32) = G_PHI [[PHI2]](s32), %bb.6
- ; GFX10-NEXT: [[COPY12:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[S_OR_B32_1]](s1)
- ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI5]](s32)
- ; GFX10-NEXT: [[SI_IF2:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[COPY12]](s1), %bb.9, implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[INT]](s32)
+ ; GFX10-NEXT: [[SI_IF2:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[ICMP]](s1), %bb.9, implicit-def $exec, implicit-def $scc, implicit $exec
; GFX10-NEXT: G_BR %bb.8
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: bb.8:
; GFX10-NEXT: successors: %bb.9(0x80000000)
; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: G_STORE [[PHI6]](s32), [[MV1]](p1) :: (store (s32), addrspace 1)
+ ; GFX10-NEXT: G_STORE [[PHI1]](s32), [[MV1]](p1) :: (store (s32), addrspace 1)
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: bb.9:
; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[SI_IF2]](s32)
@@ -667,14 +719,14 @@ body: |
%5:_(s32) = COPY $vgpr6
%6:_(s32) = COPY $vgpr7
%7:_(p1) = G_MERGE_VALUES %5(s32), %6(s32)
- %8:_(s32) = G_CONSTANT i32 0
- %9:_(s32) = G_IMPLICIT_DEF
+ %8:_(s32) = G_IMPLICIT_DEF
+ %9:_(s32) = G_CONSTANT i32 0
bb.1:
successors: %bb.2(0x80000000)
- %10:_(s32) = G_PHI %11(s32), %bb.6, %8(s32), %bb.0
- %12:_(s32) = G_PHI %8(s32), %bb.0, %13(s32), %bb.6
+ %10:_(s32) = G_PHI %11(s32), %bb.6, %9(s32), %bb.0
+ %12:_(s32) = G_PHI %9(s32), %bb.0, %13(s32), %bb.6
bb.2:
successors: %bb.3(0x40000000), %bb.4(0x40000000)
@@ -695,24 +747,24 @@ body: |
bb.4:
successors: %bb.5(0x40000000), %bb.6(0x40000000)
- %20:_(s1) = G_CONSTANT i1 true
G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %15(s32)
- %21:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), %1(s32), %12
- %22:sreg_32_xm0_xexec(s32) = SI_IF %21(s1), %bb.6, implicit-def $exec, implicit-def $scc, implicit $exec
+ %20:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), %1(s32), %12
+ %21:_(s1) = G_CONSTANT i1 true
+ %22:sreg_32_xm0_xexec(s32) = SI_IF %20(s1), %bb.6, implicit-def $exec, implicit-def $scc, implicit $exec
G_BR %bb.5
bb.5:
successors: %bb.6(0x80000000)
- %23:_(s1) = G_CONSTANT i1 false
- %24:_(s32) = G_CONSTANT i32 1
- %25:_(s32) = G_ADD %12, %24
+ %23:_(s32) = G_CONSTANT i32 1
+ %24:_(s32) = G_ADD %12, %23
+ %25:_(s1) = G_CONSTANT i1 false
bb.6:
successors: %bb.7(0x04000000), %bb.1(0x7c000000)
- %13:_(s32) = G_PHI %25(s32), %bb.5, %9(s32), %bb.4
- %26:_(s1) = G_PHI %23(s1), %bb.5, %20(s1), %bb.4
+ %13:_(s32) = G_PHI %24(s32), %bb.5, %8(s32), %bb.4
+ %26:_(s1) = G_PHI %25(s1), %bb.5, %21(s1), %bb.4
G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %22(s32)
%11:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), %26(s1), %10(s32)
SI_LOOP %11(s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec
@@ -721,20 +773,17 @@ body: |
bb.7:
successors: %bb.8(0x40000000), %bb.9(0x40000000)
- %27:_(s32) = G_PHI %11(s32), %bb.6
- %28:sreg_32_xm0_xexec(s1) = G_PHI %14(s1), %bb.6
- %29:_(s32) = G_PHI %12(s32), %bb.6
- G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %27(s32)
- %30:sreg_32_xm0_xexec(s32) = SI_IF %28(s1), %bb.9, implicit-def $exec, implicit-def $scc, implicit $exec
+ G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %11(s32)
+ %27:sreg_32_xm0_xexec(s32) = SI_IF %14(s1), %bb.9, implicit-def $exec, implicit-def $scc, implicit $exec
G_BR %bb.8
bb.8:
successors: %bb.9(0x80000000)
- G_STORE %29(s32), %7(p1) :: (store (s32), addrspace 1)
+ G_STORE %12(s32), %7(p1) :: (store (s32), addrspace 1)
bb.9:
- G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %30(s32)
+ G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %27(s32)
SI_RETURN
...
@@ -755,78 +804,69 @@ body: |
; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
; GFX10-NEXT: [[MV1:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY3]](s32), [[COPY4]](s32)
- ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
- ; GFX10-NEXT: [[COPY5:%[0-9]+]]:sreg_32(s1) = COPY [[C1]](s1)
+ ; GFX10-NEXT: [[C:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
+ ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; GFX10-NEXT: [[COPY5:%[0-9]+]]:sreg_32(s1) = COPY [[C]](s1)
; GFX10-NEXT: [[DEF:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF
- ; GFX10-NEXT: [[DEF1:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: bb.1:
; GFX10-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000)
; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32(s1) = PHI [[DEF1]](s1), %bb.0, %54(s1), %bb.3
- ; GFX10-NEXT: [[PHI1:%[0-9]+]]:sreg_32(s1) = PHI [[DEF]](s1), %bb.0, %43(s1), %bb.3
- ; GFX10-NEXT: [[PHI2:%[0-9]+]]:sreg_32_xm0_xexec(s1) = PHI [[COPY5]](s1), %bb.0, %33(s1), %bb.3
- ; GFX10-NEXT: [[PHI3:%[0-9]+]]:_(s32) = G_PHI %10(s32), %bb.3, [[C]](s32), %bb.0
- ; GFX10-NEXT: [[PHI4:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.0, %12(s32), %bb.3
+ ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32(s1) = PHI [[DEF]](s1), %bb.0, %41(s1), %bb.3
+ ; GFX10-NEXT: [[PHI1:%[0-9]+]]:sreg_32_xm0_xexec(s1) = PHI [[COPY5]](s1), %bb.0, %31(s1), %bb.3
+ ; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s32) = G_PHI %10(s32), %bb.3, [[C1]](s32), %bb.0
+ ; GFX10-NEXT: [[PHI3:%[0-9]+]]:_(s32) = G_PHI [[C1]](s32), %bb.0, %12(s32), %bb.3
; GFX10-NEXT: [[COPY6:%[0-9]+]]:sreg_32(s1) = COPY [[PHI]](s1)
- ; GFX10-NEXT: [[COPY7:%[0-9]+]]:sreg_32(s1) = COPY [[PHI1]](s1)
- ; GFX10-NEXT: [[COPY8:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[PHI2]](s1)
- ; GFX10-NEXT: [[COPY9:%[0-9]+]]:sreg_32(s1) = COPY [[COPY8]](s1)
- ; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY7]](s1), $exec_lo, implicit-def $scc
- ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY9]](s1), implicit-def $scc
+ ; GFX10-NEXT: [[COPY7:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[PHI1]](s1)
+ ; GFX10-NEXT: [[COPY8:%[0-9]+]]:sreg_32(s1) = COPY [[COPY7]](s1)
+ ; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY6]](s1), $exec_lo, implicit-def $scc
+ ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY8]](s1), implicit-def $scc
; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_]](s1), [[S_AND_B32_]](s1), implicit-def $scc
- ; GFX10-NEXT: [[COPY10:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_]](s1)
- ; GFX10-NEXT: [[SI_IF:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[COPY8]](s1), %bb.3, implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GFX10-NEXT: [[COPY9:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_]](s1)
+ ; GFX10-NEXT: [[SI_IF:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[COPY7]](s1), %bb.3, implicit-def $exec, implicit-def $scc, implicit $exec
; GFX10-NEXT: G_BR %bb.2
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: bb.2:
; GFX10-NEXT: successors: %bb.3(0x80000000)
; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[PHI4]](s32)
+ ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[PHI3]](s32)
; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[SEXT]], [[C2]](s32)
; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[MV]], [[SHL]](s64)
; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s32), addrspace 1)
; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[LOAD]](s32), [[C3]]
- ; GFX10-NEXT: [[COPY11:%[0-9]+]]:sreg_32(s1) = COPY [[ICMP]](s1)
- ; GFX10-NEXT: [[DEF2:%[0-9]+]]:sreg_32_xm0_xexec(s1) = IMPLICIT_DEF
- ; GFX10-NEXT: [[S_ANDN2_B32_1:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY10]](s1), $exec_lo, implicit-def $scc
- ; GFX10-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY11]](s1), implicit-def $scc
+ ; GFX10-NEXT: [[COPY10:%[0-9]+]]:sreg_32(s1) = COPY [[ICMP]](s1)
+ ; GFX10-NEXT: [[DEF1:%[0-9]+]]:sreg_32_xm0_xexec(s1) = IMPLICIT_DEF
+ ; GFX10-NEXT: [[S_ANDN2_B32_1:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY9]](s1), $exec_lo, implicit-def $scc
+ ; GFX10-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY10]](s1), implicit-def $scc
; GFX10-NEXT: [[S_OR_B32_1:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_1]](s1), [[S_AND_B32_1]](s1), implicit-def $scc
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: bb.3:
; GFX10-NEXT: successors: %bb.4(0x04000000), %bb.1(0x7c000000)
; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[PHI5:%[0-9]+]]:sreg_32(s1) = PHI [[S_OR_B32_]](s1), %bb.1, [[S_OR_B32_1]](s1), %bb.2
- ; GFX10-NEXT: [[PHI6:%[0-9]+]]:sreg_32_xm0_xexec(s1) = PHI [[PHI2]](s1), %bb.1, [[DEF2]](s1), %bb.2
- ; GFX10-NEXT: [[COPY12:%[0-9]+]]:sreg_32(s1) = COPY [[PHI5]](s1)
- ; GFX10-NEXT: [[COPY13:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[PHI6]](s1)
+ ; GFX10-NEXT: [[PHI4:%[0-9]+]]:sreg_32(s1) = PHI [[S_OR_B32_]](s1), %bb.1, [[S_OR_B32_1]](s1), %bb.2
+ ; GFX10-NEXT: [[PHI5:%[0-9]+]]:sreg_32_xm0_xexec(s1) = PHI [[PHI1]](s1), %bb.1, [[DEF1]](s1), %bb.2
+ ; GFX10-NEXT: [[COPY11:%[0-9]+]]:sreg_32(s1) = COPY [[PHI4]](s1)
+ ; GFX10-NEXT: [[COPY12:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[PHI5]](s1)
; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[SI_IF]](s32)
- ; GFX10-NEXT: [[FREEZE:%[0-9]+]]:_(s1) = G_FREEZE [[COPY12]]
- ; GFX10-NEXT: [[COPY14:%[0-9]+]]:sreg_32(s1) = COPY [[FREEZE]](s1)
- ; GFX10-NEXT: [[COPY15:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[FREEZE]](s1)
+ ; GFX10-NEXT: [[FREEZE:%[0-9]+]]:_(s1) = G_FREEZE [[COPY11]]
+ ; GFX10-NEXT: [[COPY13:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[FREEZE]](s1)
; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
- ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[PHI4]], [[C4]]
- ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[PHI4]](s32), [[COPY]]
- ; GFX10-NEXT: [[INT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[ICMP1]](s1), [[PHI3]](s32)
- ; GFX10-NEXT: [[S_ANDN2_B32_2:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_ANDN2_B32 [[COPY13]](s1), $exec_lo, implicit-def $scc
- ; GFX10-NEXT: [[S_AND_B32_2:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_AND_B32 $exec_lo, [[COPY15]](s1), implicit-def $scc
+ ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[PHI3]], [[C4]]
+ ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[PHI3]](s32), [[COPY]]
+ ; GFX10-NEXT: [[INT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[ICMP1]](s1), [[PHI2]](s32)
+ ; GFX10-NEXT: [[S_ANDN2_B32_2:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_ANDN2_B32 [[COPY12]](s1), $exec_lo, implicit-def $scc
+ ; GFX10-NEXT: [[S_AND_B32_2:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_AND_B32 $exec_lo, [[COPY13]](s1), implicit-def $scc
; GFX10-NEXT: [[S_OR_B32_2:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_OR_B32 [[S_ANDN2_B32_2]](s1), [[S_AND_B32_2]](s1), implicit-def $scc
- ; GFX10-NEXT: [[S_ANDN2_B32_3:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY6]](s1), $exec_lo, implicit-def $scc
- ; GFX10-NEXT: [[S_AND_B32_3:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY14]](s1), implicit-def $scc
- ; GFX10-NEXT: [[S_OR_B32_3:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_3]](s1), [[S_AND_B32_3]](s1), implicit-def $scc
; GFX10-NEXT: SI_LOOP [[INT]](s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec
; GFX10-NEXT: G_BR %bb.4
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: bb.4:
- ; GFX10-NEXT: [[PHI7:%[0-9]+]]:_(s32) = G_PHI [[INT]](s32), %bb.3
- ; GFX10-NEXT: [[COPY16:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_3]](s1)
- ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI7]](s32)
+ ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[INT]](s32)
; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00
; GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00
- ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[COPY16]](s1), [[C6]], [[C5]]
+ ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FREEZE]](s1), [[C6]], [[C5]]
; GFX10-NEXT: G_STORE [[SELECT]](s32), [[MV1]](p0) :: (store (s32))
; GFX10-NEXT: S_ENDPGM 0
bb.0:
@@ -840,15 +880,15 @@ body: |
%4:_(s32) = COPY $vgpr3
%5:_(s32) = COPY $vgpr4
%6:_(p0) = G_MERGE_VALUES %4(s32), %5(s32)
- %7:_(s32) = G_CONSTANT i32 0
- %8:_(s1) = G_CONSTANT i1 true
+ %7:_(s1) = G_CONSTANT i1 true
+ %8:_(s32) = G_CONSTANT i32 0
bb.1:
successors: %bb.2(0x40000000), %bb.3(0x40000000)
- %9:_(s32) = G_PHI %10(s32), %bb.3, %7(s32), %bb.0
- %11:_(s32) = G_PHI %7(s32), %bb.0, %12(s32), %bb.3
- %13:sreg_32_xm0_xexec(s1) = G_PHI %8(s1), %bb.0, %14(s1), %bb.3
+ %9:_(s32) = G_PHI %10(s32), %bb.3, %8(s32), %bb.0
+ %11:_(s32) = G_PHI %8(s32), %bb.0, %12(s32), %bb.3
+ %13:sreg_32_xm0_xexec(s1) = G_PHI %7(s1), %bb.0, %14(s1), %bb.3
%15:sreg_32_xm0_xexec(s32) = SI_IF %13(s1), %bb.3, implicit-def $exec, implicit-def $scc, implicit $exec
G_BR %bb.2
@@ -877,13 +917,11 @@ body: |
G_BR %bb.4
bb.4:
- %26:_(s1) = G_PHI %14(s1), %bb.3
- %27:_(s32) = G_PHI %10(s32), %bb.3
- G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %27(s32)
- %28:_(s32) = G_FCONSTANT float 0.000000e+00
- %29:_(s32) = G_FCONSTANT float 1.000000e+00
- %30:_(s32) = G_SELECT %26(s1), %29, %28
- G_STORE %30(s32), %6(p0) :: (store (s32))
+ G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %10(s32)
+ %26:_(s32) = G_FCONSTANT float 0.000000e+00
+ %27:_(s32) = G_FCONSTANT float 1.000000e+00
+ %28:_(s32) = G_SELECT %14(s1), %27, %26
+ G_STORE %28(s32), %6(p0) :: (store (s32))
S_ENDPGM 0
...
@@ -906,41 +944,38 @@ body: |
; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
; GFX10-NEXT: [[MV2:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32)
- ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; GFX10-NEXT: [[DEF1:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF
+ ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; GFX10-NEXT: [[DEF1:%[0-9]+]]:sreg_32_xm0_xexec(s1) = IMPLICIT_DEF
; GFX10-NEXT: [[DEF2:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF
- ; GFX10-NEXT: [[DEF3:%[0-9]+]]:sreg_32_xm0_xexec(s1) = IMPLICIT_DEF
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: bb.1:
; GFX10-NEXT: successors: %bb.3(0x40000000), %bb.5(0x40000000)
; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32_xm0_xexec(s1) = PHI [[DEF3]](s1), %bb.0, %67(s1), %bb.5
- ; GFX10-NEXT: [[PHI1:%[0-9]+]]:sreg_32(s1) = PHI [[DEF2]](s1), %bb.0, %56(s1), %bb.5
- ; GFX10-NEXT: [[PHI2:%[0-9]+]]:sreg_32(s1) = PHI [[DEF1]](s1), %bb.0, %43(s1), %bb.5
- ; GFX10-NEXT: [[PHI3:%[0-9]+]]:_(s32) = G_PHI %12(s32), %bb.5, [[C]](s32), %bb.0
- ; GFX10-NEXT: [[PHI4:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.0, %14(s32), %bb.5
- ; GFX10-NEXT: [[COPY6:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[PHI]](s1)
- ; GFX10-NEXT: [[COPY7:%[0-9]+]]:sreg_32(s1) = COPY [[PHI1]](s1)
- ; GFX10-NEXT: [[COPY8:%[0-9]+]]:sreg_32(s1) = COPY [[PHI2]](s1)
- ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
- ; GFX10-NEXT: [[COPY9:%[0-9]+]]:sreg_32(s1) = COPY [[C1]](s1)
- ; GFX10-NEXT: [[COPY10:%[0-9]+]]:sreg_32(s1) = COPY [[C1]](s1)
- ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[PHI4]](s32)
- ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
- ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[SEXT]], [[C2]](s32)
+ ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32(s1) = PHI [[DEF2]](s1), %bb.0, %54(s1), %bb.5
+ ; GFX10-NEXT: [[PHI1:%[0-9]+]]:sreg_32_xm0_xexec(s1) = PHI [[DEF1]](s1), %bb.0, %41(s1), %bb.5
+ ; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s32) = G_PHI %12(s32), %bb.5, [[C]](s32), %bb.0
+ ; GFX10-NEXT: [[PHI3:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.0, %14(s32), %bb.5
+ ; GFX10-NEXT: [[COPY6:%[0-9]+]]:sreg_32(s1) = COPY [[PHI]](s1)
+ ; GFX10-NEXT: [[COPY7:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[PHI1]](s1)
+ ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[PHI3]](s32)
+ ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+ ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[SEXT]], [[C1]](s32)
; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[MV1]], [[SHL]](s64)
; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s32), addrspace 1)
- ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; GFX10-NEXT: [[ICMP:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), [[LOAD]](s32), [[C3]]
- ; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY8]](s1), $exec_lo, implicit-def $scc
- ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY10]](s1), implicit-def $scc
- ; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_]](s1), [[S_AND_B32_]](s1), implicit-def $scc
- ; GFX10-NEXT: [[COPY11:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_]](s1)
- ; GFX10-NEXT: [[S_ANDN2_B32_1:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY7]](s1), $exec_lo, implicit-def $scc
- ; GFX10-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY9]](s1), implicit-def $scc
+ ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; GFX10-NEXT: [[ICMP:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), [[LOAD]](s32), [[C2]]
+ ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
+ ; GFX10-NEXT: [[COPY8:%[0-9]+]]:sreg_32(s1) = COPY [[C3]](s1)
+ ; GFX10-NEXT: [[COPY9:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[C3]](s1)
+ ; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_ANDN2_B32 [[COPY7]](s1), $exec_lo, implicit-def $scc
+ ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_AND_B32 $exec_lo, [[COPY9]](s1), implicit-def $scc
+ ; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_OR_B32 [[S_ANDN2_B32_]](s1), [[S_AND_B32_]](s1), implicit-def $scc
+ ; GFX10-NEXT: [[COPY10:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[S_OR_B32_]](s1)
+ ; GFX10-NEXT: [[S_ANDN2_B32_1:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY6]](s1), $exec_lo, implicit-def $scc
+ ; GFX10-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY8]](s1), implicit-def $scc
; GFX10-NEXT: [[S_OR_B32_1:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_1]](s1), [[S_AND_B32_1]](s1), implicit-def $scc
- ; GFX10-NEXT: [[COPY12:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_1]](s1)
+ ; GFX10-NEXT: [[COPY11:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_1]](s1)
; GFX10-NEXT: [[SI_IF:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[ICMP]](s1), %bb.5, implicit-def $exec, implicit-def $scc, implicit $exec
; GFX10-NEXT: G_BR %bb.3
; GFX10-NEXT: {{ $}}
@@ -954,24 +989,24 @@ body: |
; GFX10-NEXT: bb.3:
; GFX10-NEXT: successors: %bb.5(0x80000000)
; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s1) = G_CONSTANT i1 false
- ; GFX10-NEXT: [[COPY13:%[0-9]+]]:sreg_32(s1) = COPY [[C5]](s1)
- ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
- ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[SEXT]], [[C6]](s32)
+ ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+ ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[SEXT]], [[C5]](s32)
; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[MV]], [[SHL1]](s64)
; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s32), addrspace 1)
- ; GFX10-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
- ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[LOAD1]], [[C7]]
+ ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[LOAD1]], [[C6]]
; GFX10-NEXT: G_STORE [[ADD]](s32), [[PTR_ADD1]](p1) :: (store (s32), addrspace 1)
- ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[PHI4]], [[C7]]
- ; GFX10-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 100
- ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[PHI4]](s32), [[C8]]
- ; GFX10-NEXT: [[COPY14:%[0-9]+]]:sreg_32(s1) = COPY [[ICMP1]](s1)
- ; GFX10-NEXT: [[S_ANDN2_B32_2:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY11]](s1), $exec_lo, implicit-def $scc
- ; GFX10-NEXT: [[S_AND_B32_2:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY13]](s1), implicit-def $scc
- ; GFX10-NEXT: [[S_OR_B32_2:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_2]](s1), [[S_AND_B32_2]](s1), implicit-def $scc
- ; GFX10-NEXT: [[S_ANDN2_B32_3:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY12]](s1), $exec_lo, implicit-def $scc
- ; GFX10-NEXT: [[S_AND_B32_3:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY14]](s1), implicit-def $scc
+ ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[PHI3]], [[C6]]
+ ; GFX10-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 100
+ ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[PHI3]](s32), [[C7]]
+ ; GFX10-NEXT: [[COPY12:%[0-9]+]]:sreg_32(s1) = COPY [[ICMP1]](s1)
+ ; GFX10-NEXT: [[C8:%[0-9]+]]:_(s1) = G_CONSTANT i1 false
+ ; GFX10-NEXT: [[COPY13:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[C8]](s1)
+ ; GFX10-NEXT: [[S_ANDN2_B32_2:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_ANDN2_B32 [[COPY10]](s1), $exec_lo, implicit-def $scc
+ ; GFX10-NEXT: [[S_AND_B32_2:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_AND_B32 $exec_lo, [[COPY13]](s1), implicit-def $scc
+ ; GFX10-NEXT: [[S_OR_B32_2:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_OR_B32 [[S_ANDN2_B32_2]](s1), [[S_AND_B32_2]](s1), implicit-def $scc
+ ; GFX10-NEXT: [[S_ANDN2_B32_3:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY11]](s1), $exec_lo, implicit-def $scc
+ ; GFX10-NEXT: [[S_AND_B32_3:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY12]](s1), implicit-def $scc
; GFX10-NEXT: [[S_OR_B32_3:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_3]](s1), [[S_AND_B32_3]](s1), implicit-def $scc
; GFX10-NEXT: G_BR %bb.5
; GFX10-NEXT: {{ $}}
@@ -982,27 +1017,21 @@ body: |
; GFX10-NEXT: bb.5:
; GFX10-NEXT: successors: %bb.6(0x04000000), %bb.1(0x7c000000)
; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[PHI5:%[0-9]+]]:sreg_32(s1) = PHI [[S_OR_B32_1]](s1), %bb.1, [[S_OR_B32_3]](s1), %bb.3
- ; GFX10-NEXT: [[PHI6:%[0-9]+]]:sreg_32(s1) = PHI [[S_OR_B32_]](s1), %bb.1, [[S_OR_B32_2]](s1), %bb.3
- ; GFX10-NEXT: [[PHI7:%[0-9]+]]:_(s32) = G_PHI [[ADD1]](s32), %bb.3, [[DEF]](s32), %bb.1
- ; GFX10-NEXT: [[COPY15:%[0-9]+]]:sreg_32(s1) = COPY [[PHI5]](s1)
- ; GFX10-NEXT: [[COPY16:%[0-9]+]]:sreg_32(s1) = COPY [[PHI6]](s1)
- ; GFX10-NEXT: [[COPY17:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[COPY16]](s1)
+ ; GFX10-NEXT: [[PHI4:%[0-9]+]]:sreg_32(s1) = PHI [[S_OR_B32_1]](s1), %bb.1, [[S_OR_B32_3]](s1), %bb.3
+ ; GFX10-NEXT: [[PHI5:%[0-9]+]]:sreg_32_xm0_xexec(s1) = PHI [[S_OR_B32_]](s1), %bb.1, [[S_OR_B32_2]](s1), %bb.3
+ ; GFX10-NEXT: [[PHI6:%[0-9]+]]:_(s32) = G_PHI [[ADD1]](s32), %bb.3, [[DEF]](s32), %bb.1
+ ; GFX10-NEXT: [[COPY14:%[0-9]+]]:sreg_32(s1) = COPY [[PHI4]](s1)
+ ; GFX10-NEXT: [[COPY15:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[PHI5]](s1)
; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[SI_IF]](s32)
- ; GFX10-NEXT: [[INT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[COPY15]](s1), [[PHI3]](s32)
- ; GFX10-NEXT: [[S_ANDN2_B32_4:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_ANDN2_B32 [[COPY6]](s1), $exec_lo, implicit-def $scc
- ; GFX10-NEXT: [[S_AND_B32_4:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_AND_B32 $exec_lo, [[COPY17]](s1), implicit-def $scc
- ; GFX10-NEXT: [[S_OR_B32_4:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_OR_B32 [[S_ANDN2_B32_4]](s1), [[S_AND_B32_4]](s1), implicit-def $scc
+ ; GFX10-NEXT: [[INT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[COPY14]](s1), [[PHI2]](s32)
; GFX10-NEXT: SI_LOOP [[INT]](s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec
; GFX10-NEXT: G_BR %bb.6
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: bb.6:
; GFX10-NEXT: successors: %bb.2(0x40000000), %bb.4(0x40000000)
; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[PHI8:%[0-9]+]]:_(s32) = G_PHI [[INT]](s32), %bb.5
- ; GFX10-NEXT: [[COPY18:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[S_OR_B32_4]](s1)
- ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI8]](s32)
- ; GFX10-NEXT: [[SI_IF1:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[COPY18]](s1), %bb.4, implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[INT]](s32)
+ ; GFX10-NEXT: [[SI_IF1:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[COPY15]](s1), %bb.4, implicit-def $exec, implicit-def $scc, implicit $exec
; GFX10-NEXT: G_BR %bb.2
bb.0:
successors: %bb.1(0x80000000)
@@ -1017,23 +1046,23 @@ body: |
%6:_(s32) = COPY $vgpr4
%7:_(s32) = COPY $vgpr5
%8:_(p1) = G_MERGE_VALUES %6(s32), %7(s32)
- %9:_(s32) = G_CONSTANT i32 0
- %10:_(s32) = G_IMPLICIT_DEF
+ %9:_(s32) = G_IMPLICIT_DEF
+ %10:_(s32) = G_CONSTANT i32 0
bb.1:
successors: %bb.3(0x40000000), %bb.5(0x40000000)
- %11:_(s32) = G_PHI %12(s32), %bb.5, %9(s32), %bb.0
- %13:_(s32) = G_PHI %9(s32), %bb.0, %14(s32), %bb.5
- %15:_(s1) = G_CONSTANT i1 true
- %16:_(s64) = G_SEXT %13(s32)
- %17:_(s32) = G_CONSTANT i32 2
- %18:_(s64) = G_SHL %16, %17(s32)
- %19:_(p1) = G_PTR_ADD %5, %18(s64)
- %20:_(s32) = G_LOAD %19(p1) :: (load (s32), addrspace 1)
- %21:_(s32) = G_CONSTANT i32 0
- %22:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), %20(s32), %21
- %23:sreg_32_xm0_xexec(s32) = SI_IF %22(s1), %bb.5, implicit-def $exec, implicit-def $scc, implicit $exec
+ %11:_(s32) = G_PHI %12(s32), %bb.5, %10(s32), %bb.0
+ %13:_(s32) = G_PHI %10(s32), %bb.0, %14(s32), %bb.5
+ %15:_(s64) = G_SEXT %13(s32)
+ %16:_(s32) = G_CONSTANT i32 2
+ %17:_(s64) = G_SHL %15, %16(s32)
+ %18:_(p1) = G_PTR_ADD %5, %17(s64)
+ %19:_(s32) = G_LOAD %18(p1) :: (load (s32), addrspace 1)
+ %20:_(s32) = G_CONSTANT i32 0
+ %21:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), %19(s32), %20
+ %22:_(s1) = G_CONSTANT i1 true
+ %23:sreg_32_xm0_xexec(s32) = SI_IF %21(s1), %bb.5, implicit-def $exec, implicit-def $scc, implicit $exec
G_BR %bb.3
bb.2:
@@ -1046,17 +1075,17 @@ body: |
bb.3:
successors: %bb.5(0x80000000)
- %25:_(s1) = G_CONSTANT i1 false
- %26:_(s32) = G_CONSTANT i32 2
- %27:_(s64) = G_SHL %16, %26(s32)
- %28:_(p1) = G_PTR_ADD %2, %27(s64)
- %29:_(s32) = G_LOAD %28(p1) :: (load (s32), addrspace 1)
- %30:_(s32) = G_CONSTANT i32 1
- %31:_(s32) = G_ADD %29, %30
- G_STORE %31(s32), %28(p1) :: (store (s32), addrspace 1)
- %32:_(s32) = G_ADD %13, %30
- %33:_(s32) = G_CONSTANT i32 100
- %34:_(s1) = G_ICMP intpred(ult), %13(s32), %33
+ %25:_(s32) = G_CONSTANT i32 2
+ %26:_(s64) = G_SHL %15, %25(s32)
+ %27:_(p1) = G_PTR_ADD %2, %26(s64)
+ %28:_(s32) = G_LOAD %27(p1) :: (load (s32), addrspace 1)
+ %29:_(s32) = G_CONSTANT i32 1
+ %30:_(s32) = G_ADD %28, %29
+ G_STORE %30(s32), %27(p1) :: (store (s32), addrspace 1)
+ %31:_(s32) = G_ADD %13, %29
+ %32:_(s32) = G_CONSTANT i32 100
+ %33:_(s1) = G_ICMP intpred(ult), %13(s32), %32
+ %34:_(s1) = G_CONSTANT i1 false
G_BR %bb.5
bb.4:
@@ -1066,9 +1095,9 @@ body: |
bb.5:
successors: %bb.6(0x04000000), %bb.1(0x7c000000)
- %14:_(s32) = G_PHI %32(s32), %bb.3, %10(s32), %bb.1
- %36:_(s1) = G_PHI %25(s1), %bb.3, %15(s1), %bb.1
- %37:_(s1) = G_PHI %34(s1), %bb.3, %15(s1), %bb.1
+ %14:_(s32) = G_PHI %31(s32), %bb.3, %9(s32), %bb.1
+ %36:sreg_32_xm0_xexec(s1) = G_PHI %34(s1), %bb.3, %22(s1), %bb.1
+ %37:_(s1) = G_PHI %33(s1), %bb.3, %22(s1), %bb.1
G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %23(s32)
%12:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), %37(s1), %11(s32)
SI_LOOP %12(s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec
@@ -1077,9 +1106,7 @@ body: |
bb.6:
successors: %bb.2(0x40000000), %bb.4(0x40000000)
- %38:sreg_32_xm0_xexec(s1) = G_PHI %36(s1), %bb.5
- %39:_(s32) = G_PHI %12(s32), %bb.5
- G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %39(s32)
- %35:sreg_32_xm0_xexec(s32) = SI_IF %38(s1), %bb.4, implicit-def $exec, implicit-def $scc, implicit $exec
+ G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %12(s32)
+ %35:sreg_32_xm0_xexec(s32) = SI_IF %36(s1), %bb.4, implicit-def $exec, implicit-def $scc, implicit $exec
G_BR %bb.2
...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-structurizer.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-structurizer.ll
index 1698f84eea518..ba30a4bd97684 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-structurizer.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-structurizer.ll
@@ -472,8 +472,7 @@ exit:
}
; Snippet from test generated by the GraphicsFuzz tool, frontend generates ir
-; with irreducible control flow graph. FixIrreducible converts it into natural
-; loop and in the process creates i1 phi with three incoming values.
+; with irreducible control flow graph.
; int loop(int x, int y, int a0, int a1, int a2, int a3, int a4) {
; do {
@@ -488,27 +487,99 @@ exit:
; return a0;
; }
-; This test is also interesting because it has phi with three incomings
-;define amdgpu_ps i32 @irreducible_cfg(i32 %x, i32 %y, i32 %a0, i32 %a1, i32 %a2, i32 %a3) {
-;.entry:
-; %.y_lt_a2 = icmp sgt i32 %a2, %y
-; %.x_lt_a2 = icmp sgt i32 %a2, %x
-; %.x_lt_a3 = icmp sgt i32 %a3, %x
-; br i1 %.y_lt_a2, label %.preheader, label %.loopexit ; first iteration, jump to inner loop if 'y < a2' or start with 'if (x < a3)'
-;
-;.preheader: ; if (y < a2),
-; br label %.inner_loop
-;
-;.inner_loop: ; do while x < a2
-; br i1 %.x_lt_a2, label %.inner_loop, label %.loopexit
-;
-;.loopexit: ; if x < a3
-; %not.inner_loop = xor i1 %.y_lt_a2, true
-; %brmerge = select i1 %.x_lt_a3, i1 true, i1 %not.inner_loop ; exit loop if 'x < a3' or 'loop ends since !(y < a2)'
-; %.ret = select i1 %.x_lt_a3, i32 %a1, i32 %a0 ; select retrun value a1 'x < a3' or a0 'loop ends'
-; br i1 %brmerge, label %.exit, label %.preheader
-;
-;.exit:
-; ret i32 %.ret
-;}
+; This test is also interesting because it had phi with three incomings
+; After fa4cc9ddd58eb9fef2497e678873ff3b495340a3, FixIrreducible does not
+; generate phi with three incomings. There is a mir test with such phi.
+define amdgpu_ps i32 @irreducible_cfg(i32 %x, i32 %y, i32 %a0, i32 %a1, i32 %a2, i32 %a3) {
+; GFX10-LABEL: irreducible_cfg:
+; GFX10: ; %bb.0: ; %.entry
+; GFX10-NEXT: v_cmp_gt_i32_e32 vcc_lo, v4, v1
+; GFX10-NEXT: s_and_b32 s0, 1, s0
+; GFX10-NEXT: ; implicit-def: $sgpr2
+; GFX10-NEXT: v_cmp_ne_u32_e64 s3, 0, s0
+; GFX10-NEXT: s_mov_b32 s0, 0
+; GFX10-NEXT: s_xor_b32 s1, vcc_lo, -1
+; GFX10-NEXT: s_mov_b32 s4, s1
+; GFX10-NEXT: s_branch .LBB6_2
+; GFX10-NEXT: .LBB6_1: ; %Flow2
+; GFX10-NEXT: ; in Loop: Header=BB6_2 Depth=1
+; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s5
+; GFX10-NEXT: s_and_b32 s5, exec_lo, s7
+; GFX10-NEXT: s_or_b32 s0, s5, s0
+; GFX10-NEXT: s_andn2_b32 s2, s2, exec_lo
+; GFX10-NEXT: s_and_b32 s5, exec_lo, s3
+; GFX10-NEXT: s_or_b32 s2, s2, s5
+; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s0
+; GFX10-NEXT: s_cbranch_execz .LBB6_8
+; GFX10-NEXT: .LBB6_2: ; %irr.guard
+; GFX10-NEXT: ; =>This Loop Header: Depth=1
+; GFX10-NEXT: ; Child Loop BB6_6 Depth 2
+; GFX10-NEXT: s_andn2_b32 s5, s3, exec_lo
+; GFX10-NEXT: s_and_b32 s3, exec_lo, s3
+; GFX10-NEXT: s_mov_b32 s6, -1
+; GFX10-NEXT: s_or_b32 s3, s5, s3
+; GFX10-NEXT: s_and_saveexec_b32 s5, s4
+; GFX10-NEXT: s_xor_b32 s4, exec_lo, s5
+; GFX10-NEXT: ; %bb.3: ; %.loopexit
+; GFX10-NEXT: ; in Loop: Header=BB6_2 Depth=1
+; GFX10-NEXT: v_cmp_gt_i32_e32 vcc_lo, v5, v0
+; GFX10-NEXT: s_andn2_b32 s3, s3, exec_lo
+; GFX10-NEXT: s_andn2_b32 s7, -1, exec_lo
+; GFX10-NEXT: s_or_b32 s5, vcc_lo, s1
+; GFX10-NEXT: s_and_b32 s6, exec_lo, vcc_lo
+; GFX10-NEXT: s_xor_b32 s5, s5, -1
+; GFX10-NEXT: s_or_b32 s3, s3, s6
+; GFX10-NEXT: s_and_b32 s5, exec_lo, s5
+; GFX10-NEXT: s_or_b32 s6, s7, s5
+; GFX10-NEXT: ; %bb.4: ; %Flow1
+; GFX10-NEXT: ; in Loop: Header=BB6_2 Depth=1
+; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s4
+; GFX10-NEXT: s_mov_b32 s4, -1
+; GFX10-NEXT: s_mov_b32 s7, -1
+; GFX10-NEXT: s_and_saveexec_b32 s5, s6
+; GFX10-NEXT: s_cbranch_execz .LBB6_1
+; GFX10-NEXT: ; %bb.5: ; %.preheader
+; GFX10-NEXT: ; in Loop: Header=BB6_2 Depth=1
+; GFX10-NEXT: s_mov_b32 s6, 0
+; GFX10-NEXT: v_cmp_le_i32_e32 vcc_lo, v4, v0
+; GFX10-NEXT: .LBB6_6: ; %.inner_loop
+; GFX10-NEXT: ; Parent Loop BB6_2 Depth=1
+; GFX10-NEXT: ; => This Inner Loop Header: Depth=2
+; GFX10-NEXT: s_and_b32 s7, exec_lo, vcc_lo
+; GFX10-NEXT: s_or_b32 s6, s7, s6
+; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s6
+; GFX10-NEXT: s_cbranch_execnz .LBB6_6
+; GFX10-NEXT: ; %bb.7: ; %Flow
+; GFX10-NEXT: ; in Loop: Header=BB6_2 Depth=1
+; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s6
+; GFX10-NEXT: s_andn2_b32 s6, -1, exec_lo
+; GFX10-NEXT: s_and_b32 s7, exec_lo, 0
+; GFX10-NEXT: s_or_b32 s7, s6, s7
+; GFX10-NEXT: s_branch .LBB6_1
+; GFX10-NEXT: .LBB6_8: ; %.exit
+; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s0
+; GFX10-NEXT: v_cndmask_b32_e64 v0, v2, v3, s2
+; GFX10-NEXT: v_readfirstlane_b32 s0, v0
+; GFX10-NEXT: ; return to shader part epilog
+.entry:
+ %.y_lt_a2 = icmp sgt i32 %a2, %y
+ %.x_lt_a2 = icmp sgt i32 %a2, %x
+ %.x_lt_a3 = icmp sgt i32 %a3, %x
+ br i1 %.y_lt_a2, label %.preheader, label %.loopexit ; first iteration, jump to inner loop if 'y < a2' or start with 'if (x < a3)'
+
+.preheader: ; if (y < a2),
+ br label %.inner_loop
+
+.inner_loop: ; do while x < a2
+ br i1 %.x_lt_a2, label %.inner_loop, label %.loopexit
+
+.loopexit: ; if x < a3
+ %not.inner_loop = xor i1 %.y_lt_a2, true
+ %brmerge = select i1 %.x_lt_a3, i1 true, i1 %not.inner_loop ; exit loop if 'x < a3' or 'loop ends since !(y < a2)'
+ %.ret = select i1 %.x_lt_a3, i32 %a1, i32 %a0 ; select retrun value a1 'x < a3' or a0 'loop ends'
+ br i1 %brmerge, label %.exit, label %.preheader
+
+.exit:
+ ret i32 %.ret
+}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-structurizer.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-structurizer.mir
index 39ebf66411cc6..bd7140de5d617 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-structurizer.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-structurizer.mir
@@ -39,10 +39,10 @@ body: |
; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32(s1) = PHI [[COPY4]](s1), %bb.0, [[S_OR_B32_]](s1), %bb.1
; GFX10-NEXT: [[COPY7:%[0-9]+]]:sreg_32(s1) = COPY [[PHI]](s1)
; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[SI_IF]](s32)
+ ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[COPY7]](s1)
; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
- ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
- ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[COPY7]](s1), [[C4]], [[C3]]
- ; GFX10-NEXT: G_STORE [[SELECT]](s32), [[MV]](p1) :: (store (s32), addrspace 1)
+ ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT]], [[C3]]
+ ; GFX10-NEXT: G_STORE [[ADD]](s32), [[MV]](p1) :: (store (s32), addrspace 1)
; GFX10-NEXT: S_ENDPGM 0
bb.0:
successors: %bb.1(0x40000000), %bb.2(0x40000000)
@@ -69,9 +69,9 @@ body: |
bb.2:
%12:_(s1) = G_PHI %6(s1), %bb.0, %11(s1), %bb.1
G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %9(s32)
- %13:_(s32) = G_CONSTANT i32 2
- %14:_(s32) = G_CONSTANT i32 1
- %15:_(s32) = G_SELECT %12(s1), %14, %13
+ %13:_(s32) = G_SEXT %12(s1)
+ %14:_(s32) = G_CONSTANT i32 2
+ %15:_(s32) = G_ADD %13, %14
G_STORE %15(s32), %2(p1) :: (store (s32), addrspace 1)
S_ENDPGM 0
...
@@ -135,10 +135,10 @@ body: |
; GFX10-NEXT: [[PHI1:%[0-9]+]]:sreg_32(s1) = PHI [[COPY7]](s1), %bb.1, [[S_OR_B32_]](s1), %bb.2
; GFX10-NEXT: [[COPY11:%[0-9]+]]:sreg_32(s1) = COPY [[PHI1]](s1)
; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[SI_ELSE]](s32)
- ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
- ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
- ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[COPY11]](s1), [[C3]], [[C4]]
- ; GFX10-NEXT: G_STORE [[SELECT]](s32), [[MV]](p1) :: (store (s32), addrspace 1)
+ ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[COPY11]](s1)
+ ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+ ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT]], [[C3]]
+ ; GFX10-NEXT: G_STORE [[ADD]](s32), [[MV]](p1) :: (store (s32), addrspace 1)
; GFX10-NEXT: S_ENDPGM 0
bb.0:
successors: %bb.3(0x40000000), %bb.1(0x40000000)
@@ -179,9 +179,9 @@ body: |
bb.4:
%15:_(s1) = G_PHI %9(s1), %bb.1, %13(s1), %bb.2
G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %11(s32)
- %16:_(s32) = G_CONSTANT i32 1
+ %16:_(s32) = G_SEXT %15(s1)
%17:_(s32) = G_CONSTANT i32 2
- %18:_(s32) = G_SELECT %15(s1), %16, %17
+ %18:_(s32) = G_ADD %16, %17
G_STORE %18(s32), %2(p1) :: (store (s32), addrspace 1)
S_ENDPGM 0
...
@@ -202,26 +202,26 @@ body: |
; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
; GFX10-NEXT: [[MV1:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
- ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; GFX10-NEXT: [[DEF1:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: bb.1:
; GFX10-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000)
; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32(s1) = PHI [[DEF1]](s1), %bb.0, %35(s1), %bb.3
+ ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32(s1) = PHI [[DEF1]](s1), %bb.0, %34(s1), %bb.3
; GFX10-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI %9(s32), %bb.3, [[C]](s32), %bb.0
; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.0, %11(s32), %bb.3
; GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_32(s1) = COPY [[PHI]](s1)
- ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
- ; GFX10-NEXT: [[COPY5:%[0-9]+]]:sreg_32(s1) = COPY [[C1]](s1)
; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[PHI2]](s32)
- ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
- ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[SEXT]], [[C2]](s32)
+ ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+ ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[SEXT]], [[C1]](s32)
; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[MV1]], [[SHL]](s64)
; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s32), addrspace 1)
- ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; GFX10-NEXT: [[ICMP:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), [[LOAD]](s32), [[C3]]
+ ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; GFX10-NEXT: [[ICMP:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), [[LOAD]](s32), [[C2]]
+ ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
+ ; GFX10-NEXT: [[COPY5:%[0-9]+]]:sreg_32(s1) = COPY [[C3]](s1)
; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY4]](s1), $exec_lo, implicit-def $scc
; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY5]](s1), implicit-def $scc
; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_]](s1), [[S_AND_B32_]](s1), implicit-def $scc
@@ -259,8 +259,7 @@ body: |
; GFX10-NEXT: G_BR %bb.4
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: bb.4:
- ; GFX10-NEXT: [[PHI5:%[0-9]+]]:_(s32) = G_PHI [[INT]](s32), %bb.3
- ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI5]](s32)
+ ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[INT]](s32)
; GFX10-NEXT: S_ENDPGM 0
bb.0:
successors: %bb.1(0x80000000)
@@ -272,30 +271,30 @@ body: |
%3:_(s32) = COPY $vgpr2
%4:_(s32) = COPY $vgpr3
%5:_(p1) = G_MERGE_VALUES %3(s32), %4(s32)
- %6:_(s32) = G_CONSTANT i32 0
- %7:_(s32) = G_IMPLICIT_DEF
+ %6:_(s32) = G_IMPLICIT_DEF
+ %7:_(s32) = G_CONSTANT i32 0
bb.1:
successors: %bb.2(0x40000000), %bb.3(0x40000000)
- %8:_(s32) = G_PHI %9(s32), %bb.3, %6(s32), %bb.0
- %10:_(s32) = G_PHI %6(s32), %bb.0, %11(s32), %bb.3
- %12:_(s1) = G_CONSTANT i1 true
- %13:_(s64) = G_SEXT %10(s32)
- %14:_(s32) = G_CONSTANT i32 2
- %15:_(s64) = G_SHL %13, %14(s32)
- %16:_(p1) = G_PTR_ADD %5, %15(s64)
- %17:_(s32) = G_LOAD %16(p1) :: (load (s32), addrspace 1)
- %18:_(s32) = G_CONSTANT i32 0
- %19:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), %17(s32), %18
- %20:sreg_32_xm0_xexec(s32) = SI_IF %19(s1), %bb.3, implicit-def $exec, implicit-def $scc, implicit $exec
+ %8:_(s32) = G_PHI %9(s32), %bb.3, %7(s32), %bb.0
+ %10:_(s32) = G_PHI %7(s32), %bb.0, %11(s32), %bb.3
+ %12:_(s64) = G_SEXT %10(s32)
+ %13:_(s32) = G_CONSTANT i32 2
+ %14:_(s64) = G_SHL %12, %13(s32)
+ %15:_(p1) = G_PTR_ADD %5, %14(s64)
+ %16:_(s32) = G_LOAD %15(p1) :: (load (s32), addrspace 1)
+ %17:_(s32) = G_CONSTANT i32 0
+ %18:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), %16(s32), %17
+ %19:_(s1) = G_CONSTANT i1 true
+ %20:sreg_32_xm0_xexec(s32) = SI_IF %18(s1), %bb.3, implicit-def $exec, implicit-def $scc, implicit $exec
G_BR %bb.2
bb.2:
successors: %bb.3(0x80000000)
%21:_(s32) = G_CONSTANT i32 2
- %22:_(s64) = G_SHL %13, %21(s32)
+ %22:_(s64) = G_SHL %12, %21(s32)
%23:_(p1) = G_PTR_ADD %2, %22(s64)
%24:_(s32) = G_LOAD %23(p1) :: (load (s32), addrspace 1)
%25:_(s32) = G_CONSTANT i32 1
@@ -308,16 +307,15 @@ body: |
bb.3:
successors: %bb.4(0x04000000), %bb.1(0x7c000000)
- %11:_(s32) = G_PHI %27(s32), %bb.2, %7(s32), %bb.1
- %30:_(s1) = G_PHI %29(s1), %bb.2, %12(s1), %bb.1
+ %11:_(s32) = G_PHI %27(s32), %bb.2, %6(s32), %bb.1
+ %30:_(s1) = G_PHI %29(s1), %bb.2, %19(s1), %bb.1
G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %20(s32)
%9:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), %30(s1), %8(s32)
SI_LOOP %9(s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec
G_BR %bb.4
bb.4:
- %31:_(s32) = G_PHI %9(s32), %bb.3
- G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %31(s32)
+ G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %9(s32)
S_ENDPGM 0
...
@@ -340,26 +338,26 @@ body: |
; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
; GFX10-NEXT: [[MV2:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32)
- ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; GFX10-NEXT: [[DEF1:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: bb.1:
; GFX10-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000)
; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32(s1) = PHI [[DEF1]](s1), %bb.0, %48(s1), %bb.3
+ ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32(s1) = PHI [[DEF1]](s1), %bb.0, %47(s1), %bb.3
; GFX10-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI %12(s32), %bb.3, [[C]](s32), %bb.0
; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.0, %14(s32), %bb.3
; GFX10-NEXT: [[COPY6:%[0-9]+]]:sreg_32(s1) = COPY [[PHI]](s1)
- ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
- ; GFX10-NEXT: [[COPY7:%[0-9]+]]:sreg_32(s1) = COPY [[C1]](s1)
; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[PHI2]](s32)
- ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
- ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[SEXT]], [[C2]](s32)
+ ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+ ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[SEXT]], [[C1]](s32)
; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[MV1]], [[SHL]](s64)
; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s32), addrspace 1)
- ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; GFX10-NEXT: [[ICMP:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), [[LOAD]](s32), [[C3]]
+ ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; GFX10-NEXT: [[ICMP:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), [[LOAD]](s32), [[C2]]
+ ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
+ ; GFX10-NEXT: [[COPY7:%[0-9]+]]:sreg_32(s1) = COPY [[C3]](s1)
; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY6]](s1), $exec_lo, implicit-def $scc
; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY7]](s1), implicit-def $scc
; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_]](s1), [[S_AND_B32_]](s1), implicit-def $scc
@@ -370,14 +368,14 @@ body: |
; GFX10-NEXT: bb.2:
; GFX10-NEXT: successors: %bb.4(0x40000000), %bb.5(0x40000000)
; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
- ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
- ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[SEXT]], [[C5]](s32)
+ ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+ ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[SEXT]], [[C4]](s32)
; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[MV2]], [[SHL1]](s64)
; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s32), addrspace 1)
- ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), [[LOAD1]](s32), [[C6]]
- ; GFX10-NEXT: [[COPY9:%[0-9]+]]:sreg_32(s1) = COPY [[C4]](s1)
+ ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), [[LOAD1]](s32), [[C5]]
+ ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
+ ; GFX10-NEXT: [[COPY9:%[0-9]+]]:sreg_32(s1) = COPY [[C6]](s1)
; GFX10-NEXT: [[COPY10:%[0-9]+]]:sreg_32(s1) = COPY [[COPY9]](s1)
; GFX10-NEXT: [[SI_IF1:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[ICMP1]](s1), %bb.5, implicit-def $exec, implicit-def $scc, implicit $exec
; GFX10-NEXT: G_BR %bb.4
@@ -385,7 +383,7 @@ body: |
; GFX10-NEXT: bb.3:
; GFX10-NEXT: successors: %bb.6(0x04000000), %bb.1(0x7c000000)
; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[PHI3:%[0-9]+]]:sreg_32(s1) = PHI [[S_OR_B32_]](s1), %bb.1, %47(s1), %bb.5
+ ; GFX10-NEXT: [[PHI3:%[0-9]+]]:sreg_32(s1) = PHI [[S_OR_B32_]](s1), %bb.1, %46(s1), %bb.5
; GFX10-NEXT: [[PHI4:%[0-9]+]]:_(s32) = G_PHI %32(s32), %bb.5, [[DEF]](s32), %bb.1
; GFX10-NEXT: [[COPY11:%[0-9]+]]:sreg_32(s1) = COPY [[PHI3]](s1)
; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[SI_IF]](s32)
@@ -425,8 +423,7 @@ body: |
; GFX10-NEXT: G_BR %bb.3
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: bb.6:
- ; GFX10-NEXT: [[PHI7:%[0-9]+]]:_(s32) = G_PHI [[INT]](s32), %bb.3
- ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI7]](s32)
+ ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[INT]](s32)
; GFX10-NEXT: S_ENDPGM 0
bb.0:
successors: %bb.1(0x80000000)
@@ -441,43 +438,43 @@ body: |
%6:_(s32) = COPY $vgpr4
%7:_(s32) = COPY $vgpr5
%8:_(p1) = G_MERGE_VALUES %6(s32), %7(s32)
- %9:_(s32) = G_CONSTANT i32 0
- %10:_(s32) = G_IMPLICIT_DEF
+ %9:_(s32) = G_IMPLICIT_DEF
+ %10:_(s32) = G_CONSTANT i32 0
bb.1:
successors: %bb.2(0x40000000), %bb.3(0x40000000)
- %11:_(s32) = G_PHI %12(s32), %bb.3, %9(s32), %bb.0
- %13:_(s32) = G_PHI %9(s32), %bb.0, %14(s32), %bb.3
- %15:_(s1) = G_CONSTANT i1 true
- %16:_(s64) = G_SEXT %13(s32)
- %17:_(s32) = G_CONSTANT i32 2
- %18:_(s64) = G_SHL %16, %17(s32)
- %19:_(p1) = G_PTR_ADD %5, %18(s64)
- %20:_(s32) = G_LOAD %19(p1) :: (load (s32), addrspace 1)
- %21:_(s32) = G_CONSTANT i32 0
- %22:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), %20(s32), %21
- %23:sreg_32_xm0_xexec(s32) = SI_IF %22(s1), %bb.3, implicit-def $exec, implicit-def $scc, implicit $exec
+ %11:_(s32) = G_PHI %12(s32), %bb.3, %10(s32), %bb.0
+ %13:_(s32) = G_PHI %10(s32), %bb.0, %14(s32), %bb.3
+ %15:_(s64) = G_SEXT %13(s32)
+ %16:_(s32) = G_CONSTANT i32 2
+ %17:_(s64) = G_SHL %15, %16(s32)
+ %18:_(p1) = G_PTR_ADD %5, %17(s64)
+ %19:_(s32) = G_LOAD %18(p1) :: (load (s32), addrspace 1)
+ %20:_(s32) = G_CONSTANT i32 0
+ %21:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), %19(s32), %20
+ %22:_(s1) = G_CONSTANT i1 true
+ %23:sreg_32_xm0_xexec(s32) = SI_IF %21(s1), %bb.3, implicit-def $exec, implicit-def $scc, implicit $exec
G_BR %bb.2
bb.2:
successors: %bb.4(0x40000000), %bb.5(0x40000000)
- %24:_(s1) = G_CONSTANT i1 true
- %25:_(s32) = G_CONSTANT i32 2
- %26:_(s64) = G_SHL %16, %25(s32)
- %27:_(p1) = G_PTR_ADD %8, %26(s64)
- %28:_(s32) = G_LOAD %27(p1) :: (load (s32), addrspace 1)
- %29:_(s32) = G_CONSTANT i32 0
- %30:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), %28(s32), %29
- %31:sreg_32_xm0_xexec(s32) = SI_IF %30(s1), %bb.5, implicit-def $exec, implicit-def $scc, implicit $exec
+ %24:_(s32) = G_CONSTANT i32 2
+ %25:_(s64) = G_SHL %15, %24(s32)
+ %26:_(p1) = G_PTR_ADD %8, %25(s64)
+ %27:_(s32) = G_LOAD %26(p1) :: (load (s32), addrspace 1)
+ %28:_(s32) = G_CONSTANT i32 0
+ %29:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), %27(s32), %28
+ %30:_(s1) = G_CONSTANT i1 true
+ %31:sreg_32_xm0_xexec(s32) = SI_IF %29(s1), %bb.5, implicit-def $exec, implicit-def $scc, implicit $exec
G_BR %bb.4
bb.3:
successors: %bb.6(0x04000000), %bb.1(0x7c000000)
- %14:_(s32) = G_PHI %32(s32), %bb.5, %10(s32), %bb.1
- %33:_(s1) = G_PHI %34(s1), %bb.5, %15(s1), %bb.1
+ %14:_(s32) = G_PHI %32(s32), %bb.5, %9(s32), %bb.1
+ %33:_(s1) = G_PHI %34(s1), %bb.5, %22(s1), %bb.1
G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %23(s32)
%12:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), %33(s1), %11(s32)
SI_LOOP %12(s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec
@@ -487,7 +484,7 @@ body: |
successors: %bb.5(0x80000000)
%35:_(s32) = G_CONSTANT i32 2
- %36:_(s64) = G_SHL %16, %35(s32)
+ %36:_(s64) = G_SHL %15, %35(s32)
%37:_(p1) = G_PTR_ADD %2, %36(s64)
%38:_(s32) = G_LOAD %37(p1) :: (load (s32), addrspace 1)
%39:_(s32) = G_CONSTANT i32 1
@@ -500,14 +497,13 @@ body: |
bb.5:
successors: %bb.3(0x80000000)
- %32:_(s32) = G_PHI %41(s32), %bb.4, %10(s32), %bb.2
- %34:_(s1) = G_PHI %43(s1), %bb.4, %24(s1), %bb.2
+ %32:_(s32) = G_PHI %41(s32), %bb.4, %9(s32), %bb.2
+ %34:_(s1) = G_PHI %43(s1), %bb.4, %30(s1), %bb.2
G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %31(s32)
G_BR %bb.3
bb.6:
- %44:_(s32) = G_PHI %12(s32), %bb.3
- G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %44(s32)
+ G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %12(s32)
S_ENDPGM 0
...
@@ -533,26 +529,26 @@ body: |
; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
; GFX10-NEXT: [[MV3:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32)
- ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; GFX10-NEXT: [[DEF1:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: bb.1:
; GFX10-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000)
; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32(s1) = PHI [[DEF1]](s1), %bb.0, %61(s1), %bb.3
+ ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32(s1) = PHI [[DEF1]](s1), %bb.0, %60(s1), %bb.3
; GFX10-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI %15(s32), %bb.3, [[C]](s32), %bb.0
; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.0, %17(s32), %bb.3
; GFX10-NEXT: [[COPY8:%[0-9]+]]:sreg_32(s1) = COPY [[PHI]](s1)
- ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
- ; GFX10-NEXT: [[COPY9:%[0-9]+]]:sreg_32(s1) = COPY [[C1]](s1)
; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[PHI2]](s32)
- ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
- ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[SEXT]], [[C2]](s32)
+ ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+ ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[SEXT]], [[C1]](s32)
; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[MV1]], [[SHL]](s64)
; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s32), addrspace 1)
- ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; GFX10-NEXT: [[ICMP:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), [[LOAD]](s32), [[C3]]
+ ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; GFX10-NEXT: [[ICMP:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), [[LOAD]](s32), [[C2]]
+ ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
+ ; GFX10-NEXT: [[COPY9:%[0-9]+]]:sreg_32(s1) = COPY [[C3]](s1)
; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY8]](s1), $exec_lo, implicit-def $scc
; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY9]](s1), implicit-def $scc
; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_]](s1), [[S_AND_B32_]](s1), implicit-def $scc
@@ -563,14 +559,14 @@ body: |
; GFX10-NEXT: bb.2:
; GFX10-NEXT: successors: %bb.4(0x40000000), %bb.5(0x40000000)
; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
- ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
- ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[SEXT]], [[C5]](s32)
+ ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+ ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[SEXT]], [[C4]](s32)
; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[MV2]], [[SHL1]](s64)
; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s32), addrspace 1)
- ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), [[LOAD1]](s32), [[C6]]
- ; GFX10-NEXT: [[COPY11:%[0-9]+]]:sreg_32(s1) = COPY [[C4]](s1)
+ ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), [[LOAD1]](s32), [[C5]]
+ ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
+ ; GFX10-NEXT: [[COPY11:%[0-9]+]]:sreg_32(s1) = COPY [[C6]](s1)
; GFX10-NEXT: [[COPY12:%[0-9]+]]:sreg_32(s1) = COPY [[COPY11]](s1)
; GFX10-NEXT: [[SI_IF1:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[ICMP1]](s1), %bb.5, implicit-def $exec, implicit-def $scc, implicit $exec
; GFX10-NEXT: G_BR %bb.4
@@ -578,7 +574,7 @@ body: |
; GFX10-NEXT: bb.3:
; GFX10-NEXT: successors: %bb.8(0x04000000), %bb.1(0x7c000000)
; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[PHI3:%[0-9]+]]:sreg_32(s1) = PHI [[S_OR_B32_]](s1), %bb.1, %60(s1), %bb.5
+ ; GFX10-NEXT: [[PHI3:%[0-9]+]]:sreg_32(s1) = PHI [[S_OR_B32_]](s1), %bb.1, %59(s1), %bb.5
; GFX10-NEXT: [[PHI4:%[0-9]+]]:_(s32) = G_PHI %35(s32), %bb.5, [[DEF]](s32), %bb.1
; GFX10-NEXT: [[COPY13:%[0-9]+]]:sreg_32(s1) = COPY [[PHI3]](s1)
; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[SI_IF]](s32)
@@ -589,14 +585,14 @@ body: |
; GFX10-NEXT: bb.4:
; GFX10-NEXT: successors: %bb.6(0x40000000), %bb.7(0x40000000)
; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[C7:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
- ; GFX10-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
- ; GFX10-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[SEXT]], [[C8]](s32)
+ ; GFX10-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+ ; GFX10-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[SEXT]], [[C7]](s32)
; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[MV3]], [[SHL2]](s64)
; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s32), addrspace 1)
- ; GFX10-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), [[LOAD2]](s32), [[C9]]
- ; GFX10-NEXT: [[COPY14:%[0-9]+]]:sreg_32(s1) = COPY [[C7]](s1)
+ ; GFX10-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), [[LOAD2]](s32), [[C8]]
+ ; GFX10-NEXT: [[C9:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
+ ; GFX10-NEXT: [[COPY14:%[0-9]+]]:sreg_32(s1) = COPY [[C9]](s1)
; GFX10-NEXT: [[COPY15:%[0-9]+]]:sreg_32(s1) = COPY [[COPY14]](s1)
; GFX10-NEXT: [[SI_IF2:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[ICMP2]](s1), %bb.7, implicit-def $exec, implicit-def $scc, implicit $exec
; GFX10-NEXT: G_BR %bb.6
@@ -604,7 +600,7 @@ body: |
; GFX10-NEXT: bb.5:
; GFX10-NEXT: successors: %bb.3(0x80000000)
; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[PHI5:%[0-9]+]]:sreg_32(s1) = PHI [[COPY11]](s1), %bb.2, %72(s1), %bb.7
+ ; GFX10-NEXT: [[PHI5:%[0-9]+]]:sreg_32(s1) = PHI [[COPY11]](s1), %bb.2, %71(s1), %bb.7
; GFX10-NEXT: [[PHI6:%[0-9]+]]:_(s32) = G_PHI %46(s32), %bb.7, [[DEF]](s32), %bb.2
; GFX10-NEXT: [[COPY16:%[0-9]+]]:sreg_32(s1) = COPY [[PHI5]](s1)
; GFX10-NEXT: [[COPY17:%[0-9]+]]:sreg_32(s1) = COPY [[COPY16]](s1)
@@ -646,8 +642,7 @@ body: |
; GFX10-NEXT: G_BR %bb.5
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: bb.8:
- ; GFX10-NEXT: [[PHI9:%[0-9]+]]:_(s32) = G_PHI [[INT]](s32), %bb.3
- ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI9]](s32)
+ ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[INT]](s32)
; GFX10-NEXT: S_ENDPGM 0
bb.0:
successors: %bb.1(0x80000000)
@@ -665,43 +660,43 @@ body: |
%9:_(s32) = COPY $vgpr6
%10:_(s32) = COPY $vgpr7
%11:_(p1) = G_MERGE_VALUES %9(s32), %10(s32)
- %12:_(s32) = G_CONSTANT i32 0
- %13:_(s32) = G_IMPLICIT_DEF
+ %12:_(s32) = G_IMPLICIT_DEF
+ %13:_(s32) = G_CONSTANT i32 0
bb.1:
successors: %bb.2(0x40000000), %bb.3(0x40000000)
- %14:_(s32) = G_PHI %15(s32), %bb.3, %12(s32), %bb.0
- %16:_(s32) = G_PHI %12(s32), %bb.0, %17(s32), %bb.3
- %18:_(s1) = G_CONSTANT i1 true
- %19:_(s64) = G_SEXT %16(s32)
- %20:_(s32) = G_CONSTANT i32 2
- %21:_(s64) = G_SHL %19, %20(s32)
- %22:_(p1) = G_PTR_ADD %5, %21(s64)
- %23:_(s32) = G_LOAD %22(p1) :: (load (s32), addrspace 1)
- %24:_(s32) = G_CONSTANT i32 0
- %25:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), %23(s32), %24
- %26:sreg_32_xm0_xexec(s32) = SI_IF %25(s1), %bb.3, implicit-def $exec, implicit-def $scc, implicit $exec
+ %14:_(s32) = G_PHI %15(s32), %bb.3, %13(s32), %bb.0
+ %16:_(s32) = G_PHI %13(s32), %bb.0, %17(s32), %bb.3
+ %18:_(s64) = G_SEXT %16(s32)
+ %19:_(s32) = G_CONSTANT i32 2
+ %20:_(s64) = G_SHL %18, %19(s32)
+ %21:_(p1) = G_PTR_ADD %5, %20(s64)
+ %22:_(s32) = G_LOAD %21(p1) :: (load (s32), addrspace 1)
+ %23:_(s32) = G_CONSTANT i32 0
+ %24:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), %22(s32), %23
+ %25:_(s1) = G_CONSTANT i1 true
+ %26:sreg_32_xm0_xexec(s32) = SI_IF %24(s1), %bb.3, implicit-def $exec, implicit-def $scc, implicit $exec
G_BR %bb.2
bb.2:
successors: %bb.4(0x40000000), %bb.5(0x40000000)
- %27:_(s1) = G_CONSTANT i1 true
- %28:_(s32) = G_CONSTANT i32 2
- %29:_(s64) = G_SHL %19, %28(s32)
- %30:_(p1) = G_PTR_ADD %8, %29(s64)
- %31:_(s32) = G_LOAD %30(p1) :: (load (s32), addrspace 1)
- %32:_(s32) = G_CONSTANT i32 0
- %33:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), %31(s32), %32
- %34:sreg_32_xm0_xexec(s32) = SI_IF %33(s1), %bb.5, implicit-def $exec, implicit-def $scc, implicit $exec
+ %27:_(s32) = G_CONSTANT i32 2
+ %28:_(s64) = G_SHL %18, %27(s32)
+ %29:_(p1) = G_PTR_ADD %8, %28(s64)
+ %30:_(s32) = G_LOAD %29(p1) :: (load (s32), addrspace 1)
+ %31:_(s32) = G_CONSTANT i32 0
+ %32:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), %30(s32), %31
+ %33:_(s1) = G_CONSTANT i1 true
+ %34:sreg_32_xm0_xexec(s32) = SI_IF %32(s1), %bb.5, implicit-def $exec, implicit-def $scc, implicit $exec
G_BR %bb.4
bb.3:
successors: %bb.8(0x04000000), %bb.1(0x7c000000)
- %17:_(s32) = G_PHI %35(s32), %bb.5, %13(s32), %bb.1
- %36:_(s1) = G_PHI %37(s1), %bb.5, %18(s1), %bb.1
+ %17:_(s32) = G_PHI %35(s32), %bb.5, %12(s32), %bb.1
+ %36:_(s1) = G_PHI %37(s1), %bb.5, %25(s1), %bb.1
G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %26(s32)
%15:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), %36(s1), %14(s32)
SI_LOOP %15(s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec
@@ -710,21 +705,21 @@ body: |
bb.4:
successors: %bb.6(0x40000000), %bb.7(0x40000000)
- %38:_(s1) = G_CONSTANT i1 true
- %39:_(s32) = G_CONSTANT i32 2
- %40:_(s64) = G_SHL %19, %39(s32)
- %41:_(p1) = G_PTR_ADD %11, %40(s64)
- %42:_(s32) = G_LOAD %41(p1) :: (load (s32), addrspace 1)
- %43:_(s32) = G_CONSTANT i32 0
- %44:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), %42(s32), %43
- %45:sreg_32_xm0_xexec(s32) = SI_IF %44(s1), %bb.7, implicit-def $exec, implicit-def $scc, implicit $exec
+ %38:_(s32) = G_CONSTANT i32 2
+ %39:_(s64) = G_SHL %18, %38(s32)
+ %40:_(p1) = G_PTR_ADD %11, %39(s64)
+ %41:_(s32) = G_LOAD %40(p1) :: (load (s32), addrspace 1)
+ %42:_(s32) = G_CONSTANT i32 0
+ %43:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), %41(s32), %42
+ %44:_(s1) = G_CONSTANT i1 true
+ %45:sreg_32_xm0_xexec(s32) = SI_IF %43(s1), %bb.7, implicit-def $exec, implicit-def $scc, implicit $exec
G_BR %bb.6
bb.5:
successors: %bb.3(0x80000000)
- %35:_(s32) = G_PHI %46(s32), %bb.7, %13(s32), %bb.2
- %37:_(s1) = G_PHI %47(s1), %bb.7, %27(s1), %bb.2
+ %35:_(s32) = G_PHI %46(s32), %bb.7, %12(s32), %bb.2
+ %37:_(s1) = G_PHI %47(s1), %bb.7, %33(s1), %bb.2
G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %34(s32)
G_BR %bb.3
@@ -732,7 +727,7 @@ body: |
successors: %bb.7(0x80000000)
%48:_(s32) = G_CONSTANT i32 2
- %49:_(s64) = G_SHL %19, %48(s32)
+ %49:_(s64) = G_SHL %18, %48(s32)
%50:_(p1) = G_PTR_ADD %2, %49(s64)
%51:_(s32) = G_LOAD %50(p1) :: (load (s32), addrspace 1)
%52:_(s32) = G_CONSTANT i32 1
@@ -745,14 +740,13 @@ body: |
bb.7:
successors: %bb.5(0x80000000)
- %46:_(s32) = G_PHI %54(s32), %bb.6, %13(s32), %bb.4
- %47:_(s1) = G_PHI %56(s1), %bb.6, %38(s1), %bb.4
+ %46:_(s32) = G_PHI %54(s32), %bb.6, %12(s32), %bb.4
+ %47:_(s1) = G_PHI %56(s1), %bb.6, %44(s1), %bb.4
G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %45(s32)
G_BR %bb.5
bb.8:
- %57:_(s32) = G_PHI %15(s32), %bb.3
- G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %57(s32)
+ G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %15(s32)
S_ENDPGM 0
...
@@ -775,41 +769,38 @@ body: |
; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
; GFX10-NEXT: [[MV2:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32)
- ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; GFX10-NEXT: [[DEF1:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF
+ ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; GFX10-NEXT: [[DEF1:%[0-9]+]]:sreg_32_xm0_xexec(s1) = IMPLICIT_DEF
; GFX10-NEXT: [[DEF2:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF
- ; GFX10-NEXT: [[DEF3:%[0-9]+]]:sreg_32_xm0_xexec(s1) = IMPLICIT_DEF
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: bb.1:
; GFX10-NEXT: successors: %bb.3(0x40000000), %bb.5(0x40000000)
; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32_xm0_xexec(s1) = PHI [[DEF3]](s1), %bb.0, %67(s1), %bb.5
- ; GFX10-NEXT: [[PHI1:%[0-9]+]]:sreg_32(s1) = PHI [[DEF2]](s1), %bb.0, %56(s1), %bb.5
- ; GFX10-NEXT: [[PHI2:%[0-9]+]]:sreg_32(s1) = PHI [[DEF1]](s1), %bb.0, %43(s1), %bb.5
- ; GFX10-NEXT: [[PHI3:%[0-9]+]]:_(s32) = G_PHI %12(s32), %bb.5, [[C]](s32), %bb.0
- ; GFX10-NEXT: [[PHI4:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.0, %14(s32), %bb.5
- ; GFX10-NEXT: [[COPY6:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[PHI]](s1)
- ; GFX10-NEXT: [[COPY7:%[0-9]+]]:sreg_32(s1) = COPY [[PHI1]](s1)
- ; GFX10-NEXT: [[COPY8:%[0-9]+]]:sreg_32(s1) = COPY [[PHI2]](s1)
- ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
- ; GFX10-NEXT: [[COPY9:%[0-9]+]]:sreg_32(s1) = COPY [[C1]](s1)
- ; GFX10-NEXT: [[COPY10:%[0-9]+]]:sreg_32(s1) = COPY [[C1]](s1)
- ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[PHI4]](s32)
- ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
- ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[SEXT]], [[C2]](s32)
+ ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32(s1) = PHI [[DEF2]](s1), %bb.0, %54(s1), %bb.5
+ ; GFX10-NEXT: [[PHI1:%[0-9]+]]:sreg_32_xm0_xexec(s1) = PHI [[DEF1]](s1), %bb.0, %41(s1), %bb.5
+ ; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s32) = G_PHI %12(s32), %bb.5, [[C]](s32), %bb.0
+ ; GFX10-NEXT: [[PHI3:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.0, %14(s32), %bb.5
+ ; GFX10-NEXT: [[COPY6:%[0-9]+]]:sreg_32(s1) = COPY [[PHI]](s1)
+ ; GFX10-NEXT: [[COPY7:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[PHI1]](s1)
+ ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[PHI3]](s32)
+ ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+ ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[SEXT]], [[C1]](s32)
; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[MV1]], [[SHL]](s64)
; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s32), addrspace 1)
- ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; GFX10-NEXT: [[ICMP:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), [[LOAD]](s32), [[C3]]
- ; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY8]](s1), $exec_lo, implicit-def $scc
- ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY10]](s1), implicit-def $scc
- ; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_]](s1), [[S_AND_B32_]](s1), implicit-def $scc
- ; GFX10-NEXT: [[COPY11:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_]](s1)
- ; GFX10-NEXT: [[S_ANDN2_B32_1:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY7]](s1), $exec_lo, implicit-def $scc
- ; GFX10-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY9]](s1), implicit-def $scc
+ ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; GFX10-NEXT: [[ICMP:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), [[LOAD]](s32), [[C2]]
+ ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
+ ; GFX10-NEXT: [[COPY8:%[0-9]+]]:sreg_32(s1) = COPY [[C3]](s1)
+ ; GFX10-NEXT: [[COPY9:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[C3]](s1)
+ ; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_ANDN2_B32 [[COPY7]](s1), $exec_lo, implicit-def $scc
+ ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_AND_B32 $exec_lo, [[COPY9]](s1), implicit-def $scc
+ ; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_OR_B32 [[S_ANDN2_B32_]](s1), [[S_AND_B32_]](s1), implicit-def $scc
+ ; GFX10-NEXT: [[COPY10:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[S_OR_B32_]](s1)
+ ; GFX10-NEXT: [[S_ANDN2_B32_1:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY6]](s1), $exec_lo, implicit-def $scc
+ ; GFX10-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY8]](s1), implicit-def $scc
; GFX10-NEXT: [[S_OR_B32_1:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_1]](s1), [[S_AND_B32_1]](s1), implicit-def $scc
- ; GFX10-NEXT: [[COPY12:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_1]](s1)
+ ; GFX10-NEXT: [[COPY11:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_1]](s1)
; GFX10-NEXT: [[SI_IF:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[ICMP]](s1), %bb.5, implicit-def $exec, implicit-def $scc, implicit $exec
; GFX10-NEXT: G_BR %bb.3
; GFX10-NEXT: {{ $}}
@@ -823,24 +814,24 @@ body: |
; GFX10-NEXT: bb.3:
; GFX10-NEXT: successors: %bb.5(0x80000000)
; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s1) = G_CONSTANT i1 false
- ; GFX10-NEXT: [[COPY13:%[0-9]+]]:sreg_32(s1) = COPY [[C5]](s1)
- ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
- ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[SEXT]], [[C6]](s32)
+ ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+ ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[SEXT]], [[C5]](s32)
; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[MV]], [[SHL1]](s64)
; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s32), addrspace 1)
- ; GFX10-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
- ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[LOAD1]], [[C7]]
+ ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[LOAD1]], [[C6]]
; GFX10-NEXT: G_STORE [[ADD]](s32), [[PTR_ADD1]](p1) :: (store (s32), addrspace 1)
- ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[PHI4]], [[C7]]
- ; GFX10-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 100
- ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[PHI4]](s32), [[C8]]
- ; GFX10-NEXT: [[COPY14:%[0-9]+]]:sreg_32(s1) = COPY [[ICMP1]](s1)
- ; GFX10-NEXT: [[S_ANDN2_B32_2:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY11]](s1), $exec_lo, implicit-def $scc
- ; GFX10-NEXT: [[S_AND_B32_2:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY13]](s1), implicit-def $scc
- ; GFX10-NEXT: [[S_OR_B32_2:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_2]](s1), [[S_AND_B32_2]](s1), implicit-def $scc
- ; GFX10-NEXT: [[S_ANDN2_B32_3:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY12]](s1), $exec_lo, implicit-def $scc
- ; GFX10-NEXT: [[S_AND_B32_3:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY14]](s1), implicit-def $scc
+ ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[PHI3]], [[C6]]
+ ; GFX10-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 100
+ ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[PHI3]](s32), [[C7]]
+ ; GFX10-NEXT: [[COPY12:%[0-9]+]]:sreg_32(s1) = COPY [[ICMP1]](s1)
+ ; GFX10-NEXT: [[C8:%[0-9]+]]:_(s1) = G_CONSTANT i1 false
+ ; GFX10-NEXT: [[COPY13:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[C8]](s1)
+ ; GFX10-NEXT: [[S_ANDN2_B32_2:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_ANDN2_B32 [[COPY10]](s1), $exec_lo, implicit-def $scc
+ ; GFX10-NEXT: [[S_AND_B32_2:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_AND_B32 $exec_lo, [[COPY13]](s1), implicit-def $scc
+ ; GFX10-NEXT: [[S_OR_B32_2:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_OR_B32 [[S_ANDN2_B32_2]](s1), [[S_AND_B32_2]](s1), implicit-def $scc
+ ; GFX10-NEXT: [[S_ANDN2_B32_3:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY11]](s1), $exec_lo, implicit-def $scc
+ ; GFX10-NEXT: [[S_AND_B32_3:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY12]](s1), implicit-def $scc
; GFX10-NEXT: [[S_OR_B32_3:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_3]](s1), [[S_AND_B32_3]](s1), implicit-def $scc
; GFX10-NEXT: G_BR %bb.5
; GFX10-NEXT: {{ $}}
@@ -851,27 +842,21 @@ body: |
; GFX10-NEXT: bb.5:
; GFX10-NEXT: successors: %bb.6(0x04000000), %bb.1(0x7c000000)
; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[PHI5:%[0-9]+]]:sreg_32(s1) = PHI [[S_OR_B32_1]](s1), %bb.1, [[S_OR_B32_3]](s1), %bb.3
- ; GFX10-NEXT: [[PHI6:%[0-9]+]]:sreg_32(s1) = PHI [[S_OR_B32_]](s1), %bb.1, [[S_OR_B32_2]](s1), %bb.3
- ; GFX10-NEXT: [[PHI7:%[0-9]+]]:_(s32) = G_PHI [[ADD1]](s32), %bb.3, [[DEF]](s32), %bb.1
- ; GFX10-NEXT: [[COPY15:%[0-9]+]]:sreg_32(s1) = COPY [[PHI5]](s1)
- ; GFX10-NEXT: [[COPY16:%[0-9]+]]:sreg_32(s1) = COPY [[PHI6]](s1)
- ; GFX10-NEXT: [[COPY17:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[COPY16]](s1)
+ ; GFX10-NEXT: [[PHI4:%[0-9]+]]:sreg_32(s1) = PHI [[S_OR_B32_1]](s1), %bb.1, [[S_OR_B32_3]](s1), %bb.3
+ ; GFX10-NEXT: [[PHI5:%[0-9]+]]:sreg_32_xm0_xexec(s1) = PHI [[S_OR_B32_]](s1), %bb.1, [[S_OR_B32_2]](s1), %bb.3
+ ; GFX10-NEXT: [[PHI6:%[0-9]+]]:_(s32) = G_PHI [[ADD1]](s32), %bb.3, [[DEF]](s32), %bb.1
+ ; GFX10-NEXT: [[COPY14:%[0-9]+]]:sreg_32(s1) = COPY [[PHI4]](s1)
+ ; GFX10-NEXT: [[COPY15:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[PHI5]](s1)
; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[SI_IF]](s32)
- ; GFX10-NEXT: [[INT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[COPY15]](s1), [[PHI3]](s32)
- ; GFX10-NEXT: [[S_ANDN2_B32_4:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_ANDN2_B32 [[COPY6]](s1), $exec_lo, implicit-def $scc
- ; GFX10-NEXT: [[S_AND_B32_4:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_AND_B32 $exec_lo, [[COPY17]](s1), implicit-def $scc
- ; GFX10-NEXT: [[S_OR_B32_4:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_OR_B32 [[S_ANDN2_B32_4]](s1), [[S_AND_B32_4]](s1), implicit-def $scc
+ ; GFX10-NEXT: [[INT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[COPY14]](s1), [[PHI2]](s32)
; GFX10-NEXT: SI_LOOP [[INT]](s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec
; GFX10-NEXT: G_BR %bb.6
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: bb.6:
; GFX10-NEXT: successors: %bb.2(0x40000000), %bb.4(0x40000000)
; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[PHI8:%[0-9]+]]:_(s32) = G_PHI [[INT]](s32), %bb.5
- ; GFX10-NEXT: [[COPY18:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[S_OR_B32_4]](s1)
- ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI8]](s32)
- ; GFX10-NEXT: [[SI_IF1:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[COPY18]](s1), %bb.4, implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[INT]](s32)
+ ; GFX10-NEXT: [[SI_IF1:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[COPY15]](s1), %bb.4, implicit-def $exec, implicit-def $scc, implicit $exec
; GFX10-NEXT: G_BR %bb.2
bb.0:
successors: %bb.1(0x80000000)
@@ -886,23 +871,23 @@ body: |
%6:_(s32) = COPY $vgpr4
%7:_(s32) = COPY $vgpr5
%8:_(p1) = G_MERGE_VALUES %6(s32), %7(s32)
- %9:_(s32) = G_CONSTANT i32 0
- %10:_(s32) = G_IMPLICIT_DEF
+ %9:_(s32) = G_IMPLICIT_DEF
+ %10:_(s32) = G_CONSTANT i32 0
bb.1:
successors: %bb.3(0x40000000), %bb.5(0x40000000)
- %11:_(s32) = G_PHI %12(s32), %bb.5, %9(s32), %bb.0
- %13:_(s32) = G_PHI %9(s32), %bb.0, %14(s32), %bb.5
- %15:_(s1) = G_CONSTANT i1 true
- %16:_(s64) = G_SEXT %13(s32)
- %17:_(s32) = G_CONSTANT i32 2
- %18:_(s64) = G_SHL %16, %17(s32)
- %19:_(p1) = G_PTR_ADD %5, %18(s64)
- %20:_(s32) = G_LOAD %19(p1) :: (load (s32), addrspace 1)
- %21:_(s32) = G_CONSTANT i32 0
- %22:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), %20(s32), %21
- %23:sreg_32_xm0_xexec(s32) = SI_IF %22(s1), %bb.5, implicit-def $exec, implicit-def $scc, implicit $exec
+ %11:_(s32) = G_PHI %12(s32), %bb.5, %10(s32), %bb.0
+ %13:_(s32) = G_PHI %10(s32), %bb.0, %14(s32), %bb.5
+ %15:_(s64) = G_SEXT %13(s32)
+ %16:_(s32) = G_CONSTANT i32 2
+ %17:_(s64) = G_SHL %15, %16(s32)
+ %18:_(p1) = G_PTR_ADD %5, %17(s64)
+ %19:_(s32) = G_LOAD %18(p1) :: (load (s32), addrspace 1)
+ %20:_(s32) = G_CONSTANT i32 0
+ %21:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), %19(s32), %20
+ %22:_(s1) = G_CONSTANT i1 true
+ %23:sreg_32_xm0_xexec(s32) = SI_IF %21(s1), %bb.5, implicit-def $exec, implicit-def $scc, implicit $exec
G_BR %bb.3
bb.2:
@@ -915,17 +900,17 @@ body: |
bb.3:
successors: %bb.5(0x80000000)
- %25:_(s1) = G_CONSTANT i1 false
- %26:_(s32) = G_CONSTANT i32 2
- %27:_(s64) = G_SHL %16, %26(s32)
- %28:_(p1) = G_PTR_ADD %2, %27(s64)
- %29:_(s32) = G_LOAD %28(p1) :: (load (s32), addrspace 1)
- %30:_(s32) = G_CONSTANT i32 1
- %31:_(s32) = G_ADD %29, %30
- G_STORE %31(s32), %28(p1) :: (store (s32), addrspace 1)
- %32:_(s32) = G_ADD %13, %30
- %33:_(s32) = G_CONSTANT i32 100
- %34:_(s1) = G_ICMP intpred(ult), %13(s32), %33
+ %25:_(s32) = G_CONSTANT i32 2
+ %26:_(s64) = G_SHL %15, %25(s32)
+ %27:_(p1) = G_PTR_ADD %2, %26(s64)
+ %28:_(s32) = G_LOAD %27(p1) :: (load (s32), addrspace 1)
+ %29:_(s32) = G_CONSTANT i32 1
+ %30:_(s32) = G_ADD %28, %29
+ G_STORE %30(s32), %27(p1) :: (store (s32), addrspace 1)
+ %31:_(s32) = G_ADD %13, %29
+ %32:_(s32) = G_CONSTANT i32 100
+ %33:_(s1) = G_ICMP intpred(ult), %13(s32), %32
+ %34:_(s1) = G_CONSTANT i1 false
G_BR %bb.5
bb.4:
@@ -935,9 +920,9 @@ body: |
bb.5:
successors: %bb.6(0x04000000), %bb.1(0x7c000000)
- %14:_(s32) = G_PHI %32(s32), %bb.3, %10(s32), %bb.1
- %36:_(s1) = G_PHI %25(s1), %bb.3, %15(s1), %bb.1
- %37:_(s1) = G_PHI %34(s1), %bb.3, %15(s1), %bb.1
+ %14:_(s32) = G_PHI %31(s32), %bb.3, %9(s32), %bb.1
+ %36:sreg_32_xm0_xexec(s1) = G_PHI %34(s1), %bb.3, %22(s1), %bb.1
+ %37:_(s1) = G_PHI %33(s1), %bb.3, %22(s1), %bb.1
G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %23(s32)
%12:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), %37(s1), %11(s32)
SI_LOOP %12(s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec
@@ -946,10 +931,8 @@ body: |
bb.6:
successors: %bb.2(0x40000000), %bb.4(0x40000000)
- %38:sreg_32_xm0_xexec(s1) = G_PHI %36(s1), %bb.5
- %39:_(s32) = G_PHI %12(s32), %bb.5
- G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %39(s32)
- %35:sreg_32_xm0_xexec(s32) = SI_IF %38(s1), %bb.4, implicit-def $exec, implicit-def $scc, implicit $exec
+ G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %12(s32)
+ %35:sreg_32_xm0_xexec(s32) = SI_IF %36(s1), %bb.4, implicit-def $exec, implicit-def $scc, implicit $exec
G_BR %bb.2
...
@@ -960,6 +943,223 @@ tracksRegLiveness: true
body: |
; GFX10-LABEL: name: irreducible_cfg
; GFX10: bb.0:
+ ; GFX10-NEXT: successors: %bb.8(0x80000000)
+ ; GFX10-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
+ ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
+ ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s1) = G_IMPLICIT_DEF
+ ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[COPY4]](s32), [[COPY1]]
+ ; GFX10-NEXT: [[C:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
+ ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP]], [[C]]
+ ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; GFX10-NEXT: [[DEF1:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF
+ ; GFX10-NEXT: [[COPY6:%[0-9]+]]:sreg_32(s1) = COPY [[DEF]](s1)
+ ; GFX10-NEXT: [[COPY7:%[0-9]+]]:sreg_32(s1) = COPY [[XOR]](s1)
+ ; GFX10-NEXT: G_BR %bb.8
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: bb.1:
+ ; GFX10-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000)
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32_xm0_xexec(s1) = PHI %51(s1), %bb.5, %50(s1), %bb.8
+ ; GFX10-NEXT: [[PHI1:%[0-9]+]]:sreg_32(s1) = PHI %39(s1), %bb.5, %38(s1), %bb.8
+ ; GFX10-NEXT: [[COPY8:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[PHI]](s1)
+ ; GFX10-NEXT: [[COPY9:%[0-9]+]]:sreg_32(s1) = COPY [[PHI1]](s1)
+ ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %17(s32)
+ ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
+ ; GFX10-NEXT: [[COPY10:%[0-9]+]]:sreg_32(s1) = COPY [[C2]](s1)
+ ; GFX10-NEXT: [[COPY11:%[0-9]+]]:sreg_32(s1) = COPY [[COPY10]](s1)
+ ; GFX10-NEXT: [[SI_IF:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[COPY8]](s1), %bb.3, implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GFX10-NEXT: G_BR %bb.2
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: bb.2:
+ ; GFX10-NEXT: successors: %bb.4(0x80000000)
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(sle), [[COPY4]](s32), [[COPY]]
+ ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; GFX10-NEXT: G_BR %bb.4
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: bb.3:
+ ; GFX10-NEXT: successors: %bb.6(0x04000000), %bb.8(0x7c000000)
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: [[PHI2:%[0-9]+]]:sreg_32(s1) = PHI [[COPY10]](s1), %bb.1, %58(s1), %bb.7
+ ; GFX10-NEXT: [[PHI3:%[0-9]+]]:_(s1) = G_PHI %23(s1), %bb.7, [[DEF]](s1), %bb.1
+ ; GFX10-NEXT: [[COPY12:%[0-9]+]]:sreg_32(s1) = COPY [[PHI2]](s1)
+ ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[SI_IF]](s32)
+ ; GFX10-NEXT: [[INT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[COPY12]](s1), %27(s32)
+ ; GFX10-NEXT: [[COPY13:%[0-9]+]]:sreg_32(s1) = COPY [[COPY9]](s1)
+ ; GFX10-NEXT: [[COPY14:%[0-9]+]]:sreg_32(s1) = COPY [[PHI3]](s1)
+ ; GFX10-NEXT: SI_LOOP [[INT]](s32), %bb.8, implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GFX10-NEXT: G_BR %bb.6
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: bb.4:
+ ; GFX10-NEXT: successors: %bb.7(0x04000000), %bb.4(0x7c000000)
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: [[PHI4:%[0-9]+]]:_(s32) = G_PHI [[C3]](s32), %bb.2, %29(s32), %bb.4
+ ; GFX10-NEXT: [[INT1:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[ICMP1]](s1), [[PHI4]](s32)
+ ; GFX10-NEXT: SI_LOOP [[INT1]](s32), %bb.4, implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GFX10-NEXT: G_BR %bb.7
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: bb.5:
+ ; GFX10-NEXT: successors: %bb.1(0x80000000)
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[COPY5]](s32), [[COPY]]
+ ; GFX10-NEXT: [[COPY15:%[0-9]+]]:sreg_32(s1) = COPY [[ICMP2]](s1)
+ ; GFX10-NEXT: [[FREEZE:%[0-9]+]]:_(s1) = G_FREEZE [[ICMP]]
+ ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
+ ; GFX10-NEXT: [[XOR1:%[0-9]+]]:_(s1) = G_XOR [[FREEZE]], [[C4]]
+ ; GFX10-NEXT: [[OR:%[0-9]+]]:_(s1) = G_OR [[ICMP2]], [[XOR1]]
+ ; GFX10-NEXT: [[XOR2:%[0-9]+]]:_(s1) = G_XOR [[OR]], [[C4]]
+ ; GFX10-NEXT: [[COPY16:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[XOR2]](s1)
+ ; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 %46(s1), $exec_lo, implicit-def $scc
+ ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY15]](s1), implicit-def $scc
+ ; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_]](s1), [[S_AND_B32_]](s1), implicit-def $scc
+ ; GFX10-NEXT: [[S_ANDN2_B32_1:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_ANDN2_B32 %52(s1), $exec_lo, implicit-def $scc
+ ; GFX10-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_AND_B32 $exec_lo, [[COPY16]](s1), implicit-def $scc
+ ; GFX10-NEXT: [[S_OR_B32_1:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_OR_B32 [[S_ANDN2_B32_1]](s1), [[S_AND_B32_1]](s1), implicit-def $scc
+ ; GFX10-NEXT: G_BR %bb.1
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: bb.6:
+ ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[INT]](s32)
+ ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[COPY9]](s1), [[COPY3]], [[COPY2]]
+ ; GFX10-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[SELECT]](s32)
+ ; GFX10-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32)
+ ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: bb.7:
+ ; GFX10-NEXT: successors: %bb.3(0x80000000)
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[INT1]](s32)
+ ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s1) = G_CONSTANT i1 false
+ ; GFX10-NEXT: [[COPY17:%[0-9]+]]:sreg_32(s1) = COPY [[C5]](s1)
+ ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
+ ; GFX10-NEXT: [[S_ANDN2_B32_2:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY11]](s1), $exec_lo, implicit-def $scc
+ ; GFX10-NEXT: [[S_AND_B32_2:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY17]](s1), implicit-def $scc
+ ; GFX10-NEXT: [[S_OR_B32_2:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_2]](s1), [[S_AND_B32_2]](s1), implicit-def $scc
+ ; GFX10-NEXT: G_BR %bb.3
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: bb.8:
+ ; GFX10-NEXT: successors: %bb.5(0x40000000), %bb.1(0x40000000)
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: [[PHI5:%[0-9]+]]:sreg_32_xm0_xexec(s1) = PHI [[COPY7]](s1), %bb.0, [[COPY14]](s1), %bb.3
+ ; GFX10-NEXT: [[PHI6:%[0-9]+]]:sreg_32(s1) = PHI [[COPY6]](s1), %bb.0, [[COPY13]](s1), %bb.3
+ ; GFX10-NEXT: [[PHI7:%[0-9]+]]:sreg_32(s1) = PHI [[DEF1]](s1), %bb.0, [[PHI1]](s1), %bb.3
+ ; GFX10-NEXT: [[PHI8:%[0-9]+]]:_(s32) = G_PHI [[INT]](s32), %bb.3, [[C1]](s32), %bb.0
+ ; GFX10-NEXT: [[COPY18:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[PHI5]](s1)
+ ; GFX10-NEXT: [[COPY19:%[0-9]+]]:sreg_32(s1) = COPY [[PHI6]](s1)
+ ; GFX10-NEXT: [[COPY20:%[0-9]+]]:sreg_32(s1) = COPY [[COPY19]](s1)
+ ; GFX10-NEXT: [[COPY21:%[0-9]+]]:sreg_32(s1) = COPY [[PHI7]](s1)
+ ; GFX10-NEXT: [[C7:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
+ ; GFX10-NEXT: [[S_ANDN2_B32_3:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY21]](s1), $exec_lo, implicit-def $scc
+ ; GFX10-NEXT: [[S_AND_B32_3:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY20]](s1), implicit-def $scc
+ ; GFX10-NEXT: [[S_OR_B32_3:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_3]](s1), [[S_AND_B32_3]](s1), implicit-def $scc
+ ; GFX10-NEXT: [[COPY22:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_3]](s1)
+ ; GFX10-NEXT: [[COPY23:%[0-9]+]]:sreg_32(s1) = COPY [[C7]](s1)
+ ; GFX10-NEXT: [[COPY24:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[COPY23]](s1)
+ ; GFX10-NEXT: [[SI_IF1:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[COPY18]](s1), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GFX10-NEXT: G_BR %bb.5
+ bb.0:
+ successors: %bb.8(0x80000000)
+ liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
+
+ %0:_(s32) = COPY $vgpr0
+ %1:_(s32) = COPY $vgpr1
+ %2:_(s32) = COPY $vgpr2
+ %3:_(s32) = COPY $vgpr3
+ %4:_(s32) = COPY $vgpr4
+ %5:_(s32) = COPY $vgpr5
+ %6:_(s1) = G_IMPLICIT_DEF
+ %7:_(s1) = G_ICMP intpred(sgt), %4(s32), %1
+ %8:_(s1) = G_CONSTANT i1 true
+ %9:_(s1) = G_XOR %7, %8
+ %10:_(s32) = G_CONSTANT i32 0
+ G_BR %bb.8
+
+ bb.1:
+ successors: %bb.2(0x40000000), %bb.3(0x40000000)
+
+ %11:_(s1) = G_PHI %12(s1), %bb.5, %13(s1), %bb.8
+ %14:sreg_32_xm0_xexec(s1) = G_PHI %15(s1), %bb.5, %16(s1), %bb.8
+ G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %17(s32)
+ %18:_(s1) = G_CONSTANT i1 true
+ %19:sreg_32_xm0_xexec(s32) = SI_IF %14(s1), %bb.3, implicit-def $exec, implicit-def $scc, implicit $exec
+ G_BR %bb.2
+
+ bb.2:
+ successors: %bb.4(0x80000000)
+
+ %20:_(s1) = G_ICMP intpred(sle), %4(s32), %0
+ %21:_(s32) = G_CONSTANT i32 0
+ G_BR %bb.4
+
+ bb.3:
+ successors: %bb.6(0x04000000), %bb.8(0x7c000000)
+
+ %22:_(s1) = G_PHI %23(s1), %bb.7, %6(s1), %bb.1
+ %24:_(s1) = G_PHI %25(s1), %bb.7, %18(s1), %bb.1
+ G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %19(s32)
+ %26:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), %24(s1), %27(s32)
+ SI_LOOP %26(s32), %bb.8, implicit-def $exec, implicit-def $scc, implicit $exec
+ G_BR %bb.6
+
+ bb.4:
+ successors: %bb.7(0x04000000), %bb.4(0x7c000000)
+
+ %28:_(s32) = G_PHI %21(s32), %bb.2, %29(s32), %bb.4
+ %29:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), %20(s1), %28(s32)
+ SI_LOOP %29(s32), %bb.4, implicit-def $exec, implicit-def $scc, implicit $exec
+ G_BR %bb.7
+
+ bb.5:
+ successors: %bb.1(0x80000000)
+
+ %12:_(s1) = G_ICMP intpred(sgt), %5(s32), %0
+ %30:_(s1) = G_FREEZE %7
+ %31:_(s1) = G_CONSTANT i1 true
+ %32:_(s1) = G_XOR %30, %31
+ %33:_(s1) = G_OR %12, %32
+ %15:_(s1) = G_XOR %33, %31
+ G_BR %bb.1
+
+ bb.6:
+ G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %26(s32)
+ %34:_(s32) = G_SELECT %11(s1), %3, %2
+ %35:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), %34(s32)
+ $sgpr0 = COPY %35(s32)
+ SI_RETURN_TO_EPILOG implicit $sgpr0
+
+ bb.7:
+ successors: %bb.3(0x80000000)
+
+ G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %29(s32)
+ %25:_(s1) = G_CONSTANT i1 false
+ %23:_(s1) = G_CONSTANT i1 true
+ G_BR %bb.3
+
+ bb.8:
+ successors: %bb.5(0x40000000), %bb.1(0x40000000)
+
+ %27:_(s32) = G_PHI %26(s32), %bb.3, %10(s32), %bb.0
+ %13:_(s1) = G_PHI %6(s1), %bb.0, %11(s1), %bb.3
+ %36:sreg_32_xm0_xexec(s1) = G_PHI %9(s1), %bb.0, %22(s1), %bb.3
+ %16:_(s1) = G_CONSTANT i1 true
+ %17:sreg_32_xm0_xexec(s32) = SI_IF %36(s1), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec
+ G_BR %bb.5
+...
+
+# Keep original test that had phi with three incomings.
+# FixIrreducible no longer generates such phi.
+---
+name: irreducible_cfg_phi_with_three_incomings
+legalized: true
+tracksRegLiveness: true
+body: |
+ ; GFX10-LABEL: name: irreducible_cfg_phi_with_three_incomings
+ ; GFX10: bb.0:
; GFX10-NEXT: successors: %bb.7(0x80000000)
; GFX10-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
; GFX10-NEXT: {{ $}}
@@ -969,26 +1169,25 @@ body: |
; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
- ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s1) = G_IMPLICIT_DEF
; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[COPY4]](s32), [[COPY1]]
+ ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; GFX10-NEXT: [[DEF1:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF
- ; GFX10-NEXT: [[DEF2:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF
; GFX10-NEXT: [[COPY6:%[0-9]+]]:sreg_32(s1) = COPY [[ICMP]](s1)
; GFX10-NEXT: G_BR %bb.7
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: bb.1:
; GFX10-NEXT: successors: %bb.3(0x80000000)
; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(sle), [[COPY4]](s32), [[COPY]]
+ ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; GFX10-NEXT: G_BR %bb.3
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: bb.2:
; GFX10-NEXT: successors: %bb.4(0x40000000), %bb.7(0x40000000)
; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32_xm0_xexec(s1) = PHI %53(s1), %bb.6, %57(s1), %bb.7
- ; GFX10-NEXT: [[PHI1:%[0-9]+]]:sreg_32(s1) = PHI %35(s1), %bb.6, %34(s1), %bb.7
+ ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32_xm0_xexec(s1) = PHI %45(s1), %bb.6, %49(s1), %bb.7
+ ; GFX10-NEXT: [[PHI1:%[0-9]+]]:sreg_32(s1) = PHI %34(s1), %bb.6, %33(s1), %bb.7
; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s1) = G_PHI %12(s1), %bb.6, [[DEF]](s1), %bb.7
; GFX10-NEXT: [[COPY7:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[PHI2]](s1)
; GFX10-NEXT: [[COPY8:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[PHI]](s1)
@@ -1015,27 +1214,23 @@ body: |
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[INT]](s32)
; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[COPY5]](s32), [[COPY]]
- ; GFX10-NEXT: [[COPY11:%[0-9]+]]:sreg_32(s1) = COPY [[ICMP2]](s1)
+ ; GFX10-NEXT: [[FREEZE:%[0-9]+]]:_(s1) = G_FREEZE [[ICMP]]
; GFX10-NEXT: [[C2:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
- ; GFX10-NEXT: [[COPY12:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[C2]](s1)
- ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP]], [[C2]]
+ ; GFX10-NEXT: [[COPY11:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[C2]](s1)
+ ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[FREEZE]], [[C2]]
; GFX10-NEXT: [[OR:%[0-9]+]]:_(s1) = G_OR [[ICMP2]], [[XOR]]
- ; GFX10-NEXT: [[INT2:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[OR]](s1), %25(s32)
- ; GFX10-NEXT: [[DEF3:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF
- ; GFX10-NEXT: [[S_ANDN2_B32_1:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 %49(s1), $exec_lo, implicit-def $scc
- ; GFX10-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY11]](s1), implicit-def $scc
- ; GFX10-NEXT: [[S_OR_B32_1:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_1]](s1), [[S_AND_B32_1]](s1), implicit-def $scc
- ; GFX10-NEXT: [[S_ANDN2_B32_2:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_ANDN2_B32 [[COPY10]](s1), $exec_lo, implicit-def $scc
- ; GFX10-NEXT: [[S_AND_B32_2:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_AND_B32 $exec_lo, [[COPY12]](s1), implicit-def $scc
- ; GFX10-NEXT: [[S_OR_B32_2:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_OR_B32 [[S_ANDN2_B32_2]](s1), [[S_AND_B32_2]](s1), implicit-def $scc
+ ; GFX10-NEXT: [[INT2:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[OR]](s1), %26(s32)
+ ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; GFX10-NEXT: [[DEF2:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF
+ ; GFX10-NEXT: [[S_ANDN2_B32_1:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_ANDN2_B32 [[COPY10]](s1), $exec_lo, implicit-def $scc
+ ; GFX10-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_AND_B32 $exec_lo, [[COPY11]](s1), implicit-def $scc
+ ; GFX10-NEXT: [[S_OR_B32_1:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_OR_B32 [[S_ANDN2_B32_1]](s1), [[S_AND_B32_1]](s1), implicit-def $scc
; GFX10-NEXT: SI_LOOP [[INT2]](s32), %bb.7, implicit-def $exec, implicit-def $scc, implicit $exec
; GFX10-NEXT: G_BR %bb.5
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: bb.5:
- ; GFX10-NEXT: [[PHI4:%[0-9]+]]:_(s32) = G_PHI [[INT2]](s32), %bb.4
- ; GFX10-NEXT: [[COPY13:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_1]](s1)
- ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI4]](s32)
- ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[COPY13]](s1), [[COPY3]], [[COPY2]]
+ ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[INT2]](s32)
+ ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[COPY3]], [[COPY2]]
; GFX10-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[SELECT]](s32)
; GFX10-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32)
; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
@@ -1043,34 +1238,31 @@ body: |
; GFX10-NEXT: bb.6:
; GFX10-NEXT: successors: %bb.2(0x80000000)
; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[PHI5:%[0-9]+]]:_(s32) = G_PHI [[INT1]](s32), %bb.3
- ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s1) = G_CONSTANT i1 false
- ; GFX10-NEXT: [[COPY14:%[0-9]+]]:sreg_32(s1) = COPY [[C3]](s1)
- ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI5]](s32)
- ; GFX10-NEXT: [[S_ANDN2_B32_3:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 %42(s1), $exec_lo, implicit-def $scc
- ; GFX10-NEXT: [[S_AND_B32_3:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY14]](s1), implicit-def $scc
- ; GFX10-NEXT: [[S_OR_B32_3:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_3]](s1), [[S_AND_B32_3]](s1), implicit-def $scc
- ; GFX10-NEXT: [[DEF4:%[0-9]+]]:sreg_32_xm0_xexec(s1) = IMPLICIT_DEF
+ ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[INT1]](s32)
+ ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s1) = G_CONSTANT i1 false
+ ; GFX10-NEXT: [[COPY12:%[0-9]+]]:sreg_32(s1) = COPY [[C4]](s1)
+ ; GFX10-NEXT: [[S_ANDN2_B32_2:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 %41(s1), $exec_lo, implicit-def $scc
+ ; GFX10-NEXT: [[S_AND_B32_2:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY12]](s1), implicit-def $scc
+ ; GFX10-NEXT: [[S_OR_B32_2:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_2]](s1), [[S_AND_B32_2]](s1), implicit-def $scc
+ ; GFX10-NEXT: [[DEF3:%[0-9]+]]:sreg_32_xm0_xexec(s1) = IMPLICIT_DEF
; GFX10-NEXT: G_BR %bb.2
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: bb.7:
; GFX10-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[PHI6:%[0-9]+]]:sreg_32_xm0_xexec(s1) = PHI [[COPY6]](s1), %bb.0, [[S_OR_B32_]](s1), %bb.2, [[S_OR_B32_2]](s1), %bb.4
- ; GFX10-NEXT: [[PHI7:%[0-9]+]]:sreg_32(s1) = PHI [[DEF2]](s1), %bb.0, [[PHI7]](s1), %bb.2, [[S_OR_B32_1]](s1), %bb.4
- ; GFX10-NEXT: [[PHI8:%[0-9]+]]:sreg_32(s1) = PHI [[DEF1]](s1), %bb.0, [[PHI1]](s1), %bb.2, [[DEF3]](s1), %bb.4
- ; GFX10-NEXT: [[PHI9:%[0-9]+]]:_(s32) = G_PHI [[INT2]](s32), %bb.4, [[PHI9]](s32), %bb.2, [[C]](s32), %bb.0
- ; GFX10-NEXT: [[PHI10:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.4, [[INT]](s32), %bb.2, [[C]](s32), %bb.0
- ; GFX10-NEXT: [[COPY15:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[PHI6]](s1)
- ; GFX10-NEXT: [[COPY16:%[0-9]+]]:sreg_32(s1) = COPY [[PHI7]](s1)
- ; GFX10-NEXT: [[COPY17:%[0-9]+]]:sreg_32(s1) = COPY [[PHI8]](s1)
- ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
- ; GFX10-NEXT: [[COPY18:%[0-9]+]]:sreg_32(s1) = COPY [[C4]](s1)
- ; GFX10-NEXT: [[S_ANDN2_B32_4:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY17]](s1), $exec_lo, implicit-def $scc
- ; GFX10-NEXT: [[S_AND_B32_4:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY18]](s1), implicit-def $scc
- ; GFX10-NEXT: [[S_OR_B32_4:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_4]](s1), [[S_AND_B32_4]](s1), implicit-def $scc
- ; GFX10-NEXT: [[COPY19:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_4]](s1)
- ; GFX10-NEXT: [[SI_IF:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[COPY15]](s1), %bb.2, implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GFX10-NEXT: [[PHI4:%[0-9]+]]:sreg_32_xm0_xexec(s1) = PHI [[COPY6]](s1), %bb.0, [[S_OR_B32_]](s1), %bb.2, [[S_OR_B32_1]](s1), %bb.4
+ ; GFX10-NEXT: [[PHI5:%[0-9]+]]:sreg_32(s1) = PHI [[DEF1]](s1), %bb.0, [[PHI1]](s1), %bb.2, [[DEF2]](s1), %bb.4
+ ; GFX10-NEXT: [[PHI6:%[0-9]+]]:_(s32) = G_PHI [[INT2]](s32), %bb.4, [[PHI6]](s32), %bb.2, [[C]](s32), %bb.0
+ ; GFX10-NEXT: [[PHI7:%[0-9]+]]:_(s32) = G_PHI [[C3]](s32), %bb.4, [[INT]](s32), %bb.2, [[C]](s32), %bb.0
+ ; GFX10-NEXT: [[COPY13:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[PHI4]](s1)
+ ; GFX10-NEXT: [[COPY14:%[0-9]+]]:sreg_32(s1) = COPY [[PHI5]](s1)
+ ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
+ ; GFX10-NEXT: [[COPY15:%[0-9]+]]:sreg_32(s1) = COPY [[C5]](s1)
+ ; GFX10-NEXT: [[S_ANDN2_B32_3:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY14]](s1), $exec_lo, implicit-def $scc
+ ; GFX10-NEXT: [[S_AND_B32_3:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY15]](s1), implicit-def $scc
+ ; GFX10-NEXT: [[S_OR_B32_3:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_3]](s1), [[S_AND_B32_3]](s1), implicit-def $scc
+ ; GFX10-NEXT: [[COPY16:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_3]](s1)
+ ; GFX10-NEXT: [[SI_IF:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[COPY13]](s1), %bb.2, implicit-def $exec, implicit-def $scc, implicit $exec
; GFX10-NEXT: G_BR %bb.1
bb.0:
successors: %bb.7(0x80000000)
@@ -1082,22 +1274,22 @@ body: |
%3:_(s32) = COPY $vgpr3
%4:_(s32) = COPY $vgpr4
%5:_(s32) = COPY $vgpr5
- %6:_(s32) = G_CONSTANT i32 0
- %7:_(s1) = G_IMPLICIT_DEF
- %8:_(s1) = G_ICMP intpred(sgt), %4(s32), %1
+ %6:_(s1) = G_IMPLICIT_DEF
+ %7:_(s1) = G_ICMP intpred(sgt), %4(s32), %1
+ %8:_(s32) = G_CONSTANT i32 0
G_BR %bb.7
bb.1:
successors: %bb.3(0x80000000)
- %9:_(s32) = G_CONSTANT i32 0
- %10:_(s1) = G_ICMP intpred(sle), %4(s32), %0
+ %9:_(s1) = G_ICMP intpred(sle), %4(s32), %0
+ %10:_(s32) = G_CONSTANT i32 0
G_BR %bb.3
bb.2:
successors: %bb.4(0x40000000), %bb.7(0x40000000)
- %11:_(s1) = G_PHI %12(s1), %bb.6, %7(s1), %bb.7
+ %11:_(s1) = G_PHI %12(s1), %bb.6, %6(s1), %bb.7
%13:_(s1) = G_PHI %12(s1), %bb.6, %14(s1), %bb.7
G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %15(s32)
%16:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), %13(s1), %17(s32)
@@ -1107,8 +1299,8 @@ body: |
bb.3:
successors: %bb.6(0x04000000), %bb.3(0x7c000000)
- %18:_(s32) = G_PHI %9(s32), %bb.1, %19(s32), %bb.3
- %19:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), %10(s1), %18(s32)
+ %18:_(s32) = G_PHI %10(s32), %bb.1, %19(s32), %bb.3
+ %19:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), %9(s1), %18(s32)
SI_LOOP %19(s32), %bb.3, implicit-def $exec, implicit-def $scc, implicit $exec
G_BR %bb.6
@@ -1117,18 +1309,18 @@ body: |
G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %16(s32)
%20:_(s1) = G_ICMP intpred(sgt), %5(s32), %0
- %21:_(s1) = G_CONSTANT i1 true
- %22:_(s1) = G_XOR %8, %21
- %23:_(s1) = G_OR %20, %22
- %24:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), %23(s1), %25(s32)
- SI_LOOP %24(s32), %bb.7, implicit-def $exec, implicit-def $scc, implicit $exec
+ %21:_(s1) = G_FREEZE %7
+ %22:_(s1) = G_CONSTANT i1 true
+ %23:_(s1) = G_XOR %21, %22
+ %24:_(s1) = G_OR %20, %23
+ %25:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), %24(s1), %26(s32)
+ %27:_(s32) = G_CONSTANT i32 0
+ SI_LOOP %25(s32), %bb.7, implicit-def $exec, implicit-def $scc, implicit $exec
G_BR %bb.5
bb.5:
- %26:_(s1) = G_PHI %20(s1), %bb.4
- %27:_(s32) = G_PHI %24(s32), %bb.4
- G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %27(s32)
- %28:_(s32) = G_SELECT %26(s1), %3, %2
+ G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %25(s32)
+ %28:_(s32) = G_SELECT %20(s1), %3, %2
%29:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), %28(s32)
$sgpr0 = COPY %29(s32)
SI_RETURN_TO_EPILOG implicit $sgpr0
@@ -1136,18 +1328,17 @@ body: |
bb.6:
successors: %bb.2(0x80000000)
- %30:_(s32) = G_PHI %19(s32), %bb.3
+ G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %19(s32)
%12:_(s1) = G_CONSTANT i1 false
- G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %30(s32)
G_BR %bb.2
bb.7:
successors: %bb.1(0x40000000), %bb.2(0x40000000)
- %25:_(s32) = G_PHI %24(s32), %bb.4, %25(s32), %bb.2, %6(s32), %bb.0
- %17:_(s32) = G_PHI %6(s32), %bb.4, %16(s32), %bb.2, %6(s32), %bb.0
- %31:sreg_32_xm0_xexec(s1) = G_PHI %8(s1), %bb.0, %11(s1), %bb.2, %21(s1), %bb.4
+ %26:_(s32) = G_PHI %25(s32), %bb.4, %26(s32), %bb.2, %8(s32), %bb.0
+ %17:_(s32) = G_PHI %27(s32), %bb.4, %16(s32), %bb.2, %8(s32), %bb.0
+ %30:sreg_32_xm0_xexec(s1) = G_PHI %7(s1), %bb.0, %11(s1), %bb.2, %22(s1), %bb.4
%14:_(s1) = G_CONSTANT i1 true
- %15:sreg_32_xm0_xexec(s32) = SI_IF %31(s1), %bb.2, implicit-def $exec, implicit-def $scc, implicit $exec
+ %15:sreg_32_xm0_xexec(s32) = SI_IF %30(s1), %bb.2, implicit-def $exec, implicit-def $scc, implicit $exec
G_BR %bb.1
...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-temporal-divergent-i1.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-temporal-divergent-i1.mir
index fb436623bed2d..e9a415c3da7ee 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-temporal-divergent-i1.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-temporal-divergent-i1.mir
@@ -15,39 +15,30 @@ body: |
; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
; GFX10-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32)
- ; GFX10-NEXT: [[C:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
- ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; GFX10-NEXT: [[DEF:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF
+ ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: bb.1:
; GFX10-NEXT: successors: %bb.2(0x04000000), %bb.1(0x7c000000)
; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32(s1) = PHI [[DEF]](s1), %bb.0, %22(s1), %bb.1
- ; GFX10-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI %7(s32), %bb.1, [[C1]](s32), %bb.0
- ; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s32) = G_PHI [[C1]](s32), %bb.0, %9(s32), %bb.1
- ; GFX10-NEXT: [[PHI3:%[0-9]+]]:_(s1) = G_PHI [[C]](s1), %bb.0, %11(s1), %bb.1
- ; GFX10-NEXT: [[COPY3:%[0-9]+]]:sreg_32(s1) = COPY [[PHI3]](s1)
- ; GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_32(s1) = COPY [[PHI]](s1)
+ ; GFX10-NEXT: [[PHI:%[0-9]+]]:_(s32) = G_PHI %7(s32), %bb.1, [[C]](s32), %bb.0
+ ; GFX10-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.0, %9(s32), %bb.1
+ ; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s1) = G_PHI [[C1]](s1), %bb.0, %11(s1), %bb.1
; GFX10-NEXT: [[C2:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
- ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[PHI3]], [[C2]]
- ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[PHI2]](s32)
+ ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[PHI2]], [[C2]]
+ ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[PHI1]](s32)
; GFX10-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[UITOFP]](s32), [[COPY]]
; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
- ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[PHI2]], [[C3]]
- ; GFX10-NEXT: [[INT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[FCMP]](s1), [[PHI1]](s32)
- ; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY4]](s1), $exec_lo, implicit-def $scc
- ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY3]](s1), implicit-def $scc
- ; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_]](s1), [[S_AND_B32_]](s1), implicit-def $scc
+ ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[PHI1]], [[C3]]
+ ; GFX10-NEXT: [[INT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[FCMP]](s1), [[PHI]](s32)
; GFX10-NEXT: SI_LOOP [[INT]](s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec
; GFX10-NEXT: G_BR %bb.2
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: bb.2:
- ; GFX10-NEXT: [[PHI4:%[0-9]+]]:_(s32) = G_PHI [[INT]](s32), %bb.1
- ; GFX10-NEXT: [[COPY5:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_]](s1)
- ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI4]](s32)
+ ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[INT]](s32)
; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00
; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00
- ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[COPY5]](s1), [[C5]], [[C4]]
+ ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[PHI2]](s1), [[C5]], [[C4]]
; GFX10-NEXT: G_STORE [[SELECT]](s32), [[MV]](p0) :: (store (s32))
; GFX10-NEXT: SI_RETURN
bb.0:
@@ -58,15 +49,15 @@ body: |
%1:_(s32) = COPY $vgpr1
%2:_(s32) = COPY $vgpr2
%3:_(p0) = G_MERGE_VALUES %1(s32), %2(s32)
- %4:_(s1) = G_CONSTANT i1 true
- %5:_(s32) = G_CONSTANT i32 0
+ %4:_(s32) = G_CONSTANT i32 0
+ %5:_(s1) = G_CONSTANT i1 true
bb.1:
successors: %bb.2(0x04000000), %bb.1(0x7c000000)
- %6:_(s32) = G_PHI %7(s32), %bb.1, %5(s32), %bb.0
- %8:_(s32) = G_PHI %5(s32), %bb.0, %9(s32), %bb.1
- %10:_(s1) = G_PHI %4(s1), %bb.0, %11(s1), %bb.1
+ %6:_(s32) = G_PHI %7(s32), %bb.1, %4(s32), %bb.0
+ %8:_(s32) = G_PHI %4(s32), %bb.0, %9(s32), %bb.1
+ %10:_(s1) = G_PHI %5(s1), %bb.0, %11(s1), %bb.1
%12:_(s1) = G_CONSTANT i1 true
%11:_(s1) = G_XOR %10, %12
%13:_(s32) = G_UITOFP %8(s32)
@@ -78,13 +69,11 @@ body: |
G_BR %bb.2
bb.2:
- %16:_(s1) = G_PHI %10(s1), %bb.1
- %17:_(s32) = G_PHI %7(s32), %bb.1
- G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %17(s32)
- %18:_(s32) = G_FCONSTANT float 0.000000e+00
- %19:_(s32) = G_FCONSTANT float 1.000000e+00
- %20:_(s32) = G_SELECT %16(s1), %19, %18
- G_STORE %20(s32), %3(p0) :: (store (s32))
+ G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %7(s32)
+ %16:_(s32) = G_FCONSTANT float 0.000000e+00
+ %17:_(s32) = G_FCONSTANT float 1.000000e+00
+ %18:_(s32) = G_SELECT %10(s1), %17, %16
+ G_STORE %18(s32), %3(p0) :: (store (s32))
SI_RETURN
...
@@ -102,39 +91,30 @@ body: |
; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
; GFX10-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32)
- ; GFX10-NEXT: [[C:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
- ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; GFX10-NEXT: [[DEF:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF
+ ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: bb.1:
; GFX10-NEXT: successors: %bb.2(0x04000000), %bb.1(0x7c000000)
; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32(s1) = PHI [[DEF]](s1), %bb.0, %22(s1), %bb.1
- ; GFX10-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI %7(s32), %bb.1, [[C1]](s32), %bb.0
- ; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s32) = G_PHI [[C1]](s32), %bb.0, %9(s32), %bb.1
- ; GFX10-NEXT: [[PHI3:%[0-9]+]]:_(s1) = G_PHI [[C]](s1), %bb.0, %11(s1), %bb.1
- ; GFX10-NEXT: [[COPY3:%[0-9]+]]:sreg_32(s1) = COPY [[PHI]](s1)
+ ; GFX10-NEXT: [[PHI:%[0-9]+]]:_(s32) = G_PHI %7(s32), %bb.1, [[C]](s32), %bb.0
+ ; GFX10-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.0, %9(s32), %bb.1
+ ; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s1) = G_PHI [[C1]](s1), %bb.0, %11(s1), %bb.1
; GFX10-NEXT: [[C2:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
- ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[PHI3]], [[C2]]
- ; GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_32(s1) = COPY [[XOR]](s1)
- ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[PHI2]](s32)
+ ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[PHI2]], [[C2]]
+ ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[PHI1]](s32)
; GFX10-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[UITOFP]](s32), [[COPY]]
; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
- ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[PHI2]], [[C3]]
- ; GFX10-NEXT: [[INT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[FCMP]](s1), [[PHI1]](s32)
- ; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY3]](s1), $exec_lo, implicit-def $scc
- ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY4]](s1), implicit-def $scc
- ; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_]](s1), [[S_AND_B32_]](s1), implicit-def $scc
+ ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[PHI1]], [[C3]]
+ ; GFX10-NEXT: [[INT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[FCMP]](s1), [[PHI]](s32)
; GFX10-NEXT: SI_LOOP [[INT]](s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec
; GFX10-NEXT: G_BR %bb.2
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: bb.2:
- ; GFX10-NEXT: [[PHI4:%[0-9]+]]:_(s32) = G_PHI [[INT]](s32), %bb.1
- ; GFX10-NEXT: [[COPY5:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_]](s1)
- ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI4]](s32)
+ ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[INT]](s32)
; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00
; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00
- ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[COPY5]](s1), [[C5]], [[C4]]
+ ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[XOR]](s1), [[C5]], [[C4]]
; GFX10-NEXT: G_STORE [[SELECT]](s32), [[MV]](p0) :: (store (s32))
; GFX10-NEXT: SI_RETURN
bb.0:
@@ -145,15 +125,15 @@ body: |
%1:_(s32) = COPY $vgpr1
%2:_(s32) = COPY $vgpr2
%3:_(p0) = G_MERGE_VALUES %1(s32), %2(s32)
- %4:_(s1) = G_CONSTANT i1 true
- %5:_(s32) = G_CONSTANT i32 0
+ %4:_(s32) = G_CONSTANT i32 0
+ %5:_(s1) = G_CONSTANT i1 true
bb.1:
successors: %bb.2(0x04000000), %bb.1(0x7c000000)
- %6:_(s32) = G_PHI %7(s32), %bb.1, %5(s32), %bb.0
- %8:_(s32) = G_PHI %5(s32), %bb.0, %9(s32), %bb.1
- %10:_(s1) = G_PHI %4(s1), %bb.0, %11(s1), %bb.1
+ %6:_(s32) = G_PHI %7(s32), %bb.1, %4(s32), %bb.0
+ %8:_(s32) = G_PHI %4(s32), %bb.0, %9(s32), %bb.1
+ %10:_(s1) = G_PHI %5(s1), %bb.0, %11(s1), %bb.1
%12:_(s1) = G_CONSTANT i1 true
%11:_(s1) = G_XOR %10, %12
%13:_(s32) = G_UITOFP %8(s32)
@@ -165,13 +145,11 @@ body: |
G_BR %bb.2
bb.2:
- %16:_(s1) = G_PHI %11(s1), %bb.1
- %17:_(s32) = G_PHI %7(s32), %bb.1
- G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %17(s32)
- %18:_(s32) = G_FCONSTANT float 0.000000e+00
- %19:_(s32) = G_FCONSTANT float 1.000000e+00
- %20:_(s32) = G_SELECT %16(s1), %19, %18
- G_STORE %20(s32), %3(p0) :: (store (s32))
+ G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %7(s32)
+ %16:_(s32) = G_FCONSTANT float 0.000000e+00
+ %17:_(s32) = G_FCONSTANT float 1.000000e+00
+ %18:_(s32) = G_SELECT %11(s1), %17, %16
+ G_STORE %18(s32), %3(p0) :: (store (s32))
SI_RETURN
...
@@ -195,33 +173,30 @@ body: |
; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr2
; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr3
; GFX10-NEXT: [[MV2:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY5]](s32), [[COPY6]](s32)
- ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; GFX10-NEXT: [[DEF1:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF
- ; GFX10-NEXT: [[DEF2:%[0-9]+]]:sreg_32_xm0_xexec(s1) = IMPLICIT_DEF
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: bb.1:
; GFX10-NEXT: successors: %bb.3(0x50000000), %bb.5(0x30000000)
; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32_xm0_xexec(s1) = PHI [[DEF2]](s1), %bb.0, %53(s1), %bb.5
- ; GFX10-NEXT: [[PHI1:%[0-9]+]]:sreg_32(s1) = PHI [[DEF1]](s1), %bb.0, %42(s1), %bb.5
- ; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s32) = G_PHI %13(s32), %bb.5, [[C]](s32), %bb.0
- ; GFX10-NEXT: [[PHI3:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.0, %15(s32), %bb.5
- ; GFX10-NEXT: [[COPY7:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[PHI]](s1)
- ; GFX10-NEXT: [[COPY8:%[0-9]+]]:sreg_32(s1) = COPY [[PHI1]](s1)
- ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
- ; GFX10-NEXT: [[COPY9:%[0-9]+]]:sreg_32(s1) = COPY [[C1]](s1)
- ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[PHI3]](s32)
- ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
- ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[SEXT]], [[C2]](s32)
+ ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32(s1) = PHI [[DEF1]](s1), %bb.0, %40(s1), %bb.5
+ ; GFX10-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI %13(s32), %bb.5, [[C]](s32), %bb.0
+ ; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.0, %15(s32), %bb.5
+ ; GFX10-NEXT: [[COPY7:%[0-9]+]]:sreg_32(s1) = COPY [[PHI]](s1)
+ ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[PHI2]](s32)
+ ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+ ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[SEXT]], [[C1]](s32)
; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[MV1]], [[SHL]](s64)
; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s32), addrspace 1)
- ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[LOAD]](s32), [[C3]]
- ; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY8]](s1), $exec_lo, implicit-def $scc
- ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY9]](s1), implicit-def $scc
+ ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[LOAD]](s32), [[C2]]
+ ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
+ ; GFX10-NEXT: [[COPY8:%[0-9]+]]:sreg_32(s1) = COPY [[C3]](s1)
+ ; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY7]](s1), $exec_lo, implicit-def $scc
+ ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY8]](s1), implicit-def $scc
; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_]](s1), [[S_AND_B32_]](s1), implicit-def $scc
- ; GFX10-NEXT: [[COPY10:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_]](s1)
+ ; GFX10-NEXT: [[COPY9:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_]](s1)
; GFX10-NEXT: G_BRCOND [[ICMP]](s1), %bb.3
; GFX10-NEXT: G_BR %bb.5
; GFX10-NEXT: {{ $}}
@@ -235,19 +210,19 @@ body: |
; GFX10-NEXT: bb.3:
; GFX10-NEXT: successors: %bb.5(0x80000000)
; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s1) = G_CONSTANT i1 false
- ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
- ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[SEXT]], [[C6]](s32)
+ ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+ ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[SEXT]], [[C5]](s32)
; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[MV]], [[SHL1]](s64)
; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s32), addrspace 1)
- ; GFX10-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
- ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[LOAD1]], [[C7]]
+ ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[LOAD1]], [[C6]]
; GFX10-NEXT: G_STORE [[ADD]](s32), [[PTR_ADD1]](p1) :: (store (s32), addrspace 1)
- ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[PHI3]], [[C7]]
- ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[PHI3]](s32), [[COPY2]]
- ; GFX10-NEXT: [[COPY11:%[0-9]+]]:sreg_32(s1) = COPY [[ICMP1]](s1)
- ; GFX10-NEXT: [[S_ANDN2_B32_1:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY10]](s1), $exec_lo, implicit-def $scc
- ; GFX10-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY11]](s1), implicit-def $scc
+ ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[PHI2]], [[C6]]
+ ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[PHI2]](s32), [[COPY2]]
+ ; GFX10-NEXT: [[COPY10:%[0-9]+]]:sreg_32(s1) = COPY [[ICMP1]](s1)
+ ; GFX10-NEXT: [[C7:%[0-9]+]]:_(s1) = G_CONSTANT i1 false
+ ; GFX10-NEXT: [[S_ANDN2_B32_1:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY9]](s1), $exec_lo, implicit-def $scc
+ ; GFX10-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY10]](s1), implicit-def $scc
; GFX10-NEXT: [[S_OR_B32_1:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_1]](s1), [[S_AND_B32_1]](s1), implicit-def $scc
; GFX10-NEXT: G_BR %bb.5
; GFX10-NEXT: {{ $}}
@@ -258,25 +233,19 @@ body: |
; GFX10-NEXT: bb.5:
; GFX10-NEXT: successors: %bb.6(0x04000000), %bb.1(0x7c000000)
; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[PHI4:%[0-9]+]]:sreg_32(s1) = PHI [[S_OR_B32_]](s1), %bb.1, [[S_OR_B32_1]](s1), %bb.3
- ; GFX10-NEXT: [[PHI5:%[0-9]+]]:_(s32) = G_PHI [[ADD1]](s32), %bb.3, [[DEF]](s32), %bb.1
- ; GFX10-NEXT: [[PHI6:%[0-9]+]]:_(s1) = G_PHI [[C5]](s1), %bb.3, [[C1]](s1), %bb.1
- ; GFX10-NEXT: [[COPY12:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[PHI6]](s1)
- ; GFX10-NEXT: [[COPY13:%[0-9]+]]:sreg_32(s1) = COPY [[PHI4]](s1)
- ; GFX10-NEXT: [[INT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[COPY13]](s1), [[PHI2]](s32)
- ; GFX10-NEXT: [[S_ANDN2_B32_2:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_ANDN2_B32 [[COPY7]](s1), $exec_lo, implicit-def $scc
- ; GFX10-NEXT: [[S_AND_B32_2:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_AND_B32 $exec_lo, [[COPY12]](s1), implicit-def $scc
- ; GFX10-NEXT: [[S_OR_B32_2:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_OR_B32 [[S_ANDN2_B32_2]](s1), [[S_AND_B32_2]](s1), implicit-def $scc
+ ; GFX10-NEXT: [[PHI3:%[0-9]+]]:sreg_32(s1) = PHI [[S_OR_B32_]](s1), %bb.1, [[S_OR_B32_1]](s1), %bb.3
+ ; GFX10-NEXT: [[PHI4:%[0-9]+]]:_(s32) = G_PHI [[ADD1]](s32), %bb.3, [[DEF]](s32), %bb.1
+ ; GFX10-NEXT: [[PHI5:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_PHI [[C7]](s1), %bb.3, [[C3]](s1), %bb.1
+ ; GFX10-NEXT: [[COPY11:%[0-9]+]]:sreg_32(s1) = COPY [[PHI3]](s1)
+ ; GFX10-NEXT: [[INT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[COPY11]](s1), [[PHI1]](s32)
; GFX10-NEXT: SI_LOOP [[INT]](s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec
; GFX10-NEXT: G_BR %bb.6
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: bb.6:
; GFX10-NEXT: successors: %bb.2(0x40000000), %bb.4(0x40000000)
; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[PHI7:%[0-9]+]]:_(s32) = G_PHI [[INT]](s32), %bb.5
- ; GFX10-NEXT: [[COPY14:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[S_OR_B32_2]](s1)
- ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI7]](s32)
- ; GFX10-NEXT: [[SI_IF:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[COPY14]](s1), %bb.4, implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[INT]](s32)
+ ; GFX10-NEXT: [[SI_IF:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[PHI5]](s1), %bb.4, implicit-def $exec, implicit-def $scc, implicit $exec
; GFX10-NEXT: G_BR %bb.2
bb.0:
successors: %bb.1(0x80000000)
@@ -292,23 +261,23 @@ body: |
%7:_(s32) = COPY $sgpr2
%8:_(s32) = COPY $sgpr3
%9:_(p1) = G_MERGE_VALUES %7(s32), %8(s32)
- %10:_(s32) = G_CONSTANT i32 0
- %11:_(s32) = G_IMPLICIT_DEF
+ %10:_(s32) = G_IMPLICIT_DEF
+ %11:_(s32) = G_CONSTANT i32 0
bb.1:
successors: %bb.3(0x50000000), %bb.5(0x30000000)
- %12:_(s32) = G_PHI %13(s32), %bb.5, %10(s32), %bb.0
- %14:_(s32) = G_PHI %10(s32), %bb.0, %15(s32), %bb.5
- %16:_(s1) = G_CONSTANT i1 true
- %17:_(s64) = G_SEXT %14(s32)
- %18:_(s32) = G_CONSTANT i32 2
- %19:_(s64) = G_SHL %17, %18(s32)
- %20:_(p1) = G_PTR_ADD %6, %19(s64)
- %21:_(s32) = G_LOAD %20(p1) :: (load (s32), addrspace 1)
- %22:_(s32) = G_CONSTANT i32 0
- %23:_(s1) = G_ICMP intpred(ne), %21(s32), %22
- G_BRCOND %23(s1), %bb.3
+ %12:_(s32) = G_PHI %13(s32), %bb.5, %11(s32), %bb.0
+ %14:_(s32) = G_PHI %11(s32), %bb.0, %15(s32), %bb.5
+ %16:_(s64) = G_SEXT %14(s32)
+ %17:_(s32) = G_CONSTANT i32 2
+ %18:_(s64) = G_SHL %16, %17(s32)
+ %19:_(p1) = G_PTR_ADD %6, %18(s64)
+ %20:_(s32) = G_LOAD %19(p1) :: (load (s32), addrspace 1)
+ %21:_(s32) = G_CONSTANT i32 0
+ %22:_(s1) = G_ICMP intpred(ne), %20(s32), %21
+ %23:_(s1) = G_CONSTANT i1 true
+ G_BRCOND %22(s1), %bb.3
G_BR %bb.5
bb.2:
@@ -321,16 +290,16 @@ body: |
bb.3:
successors: %bb.5(0x80000000)
- %25:_(s1) = G_CONSTANT i1 false
- %26:_(s32) = G_CONSTANT i32 2
- %27:_(s64) = G_SHL %17, %26(s32)
- %28:_(p1) = G_PTR_ADD %2, %27(s64)
- %29:_(s32) = G_LOAD %28(p1) :: (load (s32), addrspace 1)
- %30:_(s32) = G_CONSTANT i32 1
- %31:_(s32) = G_ADD %29, %30
- G_STORE %31(s32), %28(p1) :: (store (s32), addrspace 1)
- %32:_(s32) = G_ADD %14, %30
- %33:_(s1) = G_ICMP intpred(ult), %14(s32), %3
+ %25:_(s32) = G_CONSTANT i32 2
+ %26:_(s64) = G_SHL %16, %25(s32)
+ %27:_(p1) = G_PTR_ADD %2, %26(s64)
+ %28:_(s32) = G_LOAD %27(p1) :: (load (s32), addrspace 1)
+ %29:_(s32) = G_CONSTANT i32 1
+ %30:_(s32) = G_ADD %28, %29
+ G_STORE %30(s32), %27(p1) :: (store (s32), addrspace 1)
+ %31:_(s32) = G_ADD %14, %29
+ %32:_(s1) = G_ICMP intpred(ult), %14(s32), %3
+ %33:_(s1) = G_CONSTANT i1 false
G_BR %bb.5
bb.4:
@@ -340,9 +309,9 @@ body: |
bb.5:
successors: %bb.6(0x04000000), %bb.1(0x7c000000)
- %15:_(s32) = G_PHI %32(s32), %bb.3, %11(s32), %bb.1
- %35:_(s1) = G_PHI %25(s1), %bb.3, %16(s1), %bb.1
- %36:_(s1) = G_PHI %33(s1), %bb.3, %16(s1), %bb.1
+ %15:_(s32) = G_PHI %31(s32), %bb.3, %10(s32), %bb.1
+ %35:sreg_32_xm0_xexec(s1) = G_PHI %33(s1), %bb.3, %23(s1), %bb.1
+ %36:_(s1) = G_PHI %32(s1), %bb.3, %23(s1), %bb.1
%13:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), %36(s1), %12(s32)
SI_LOOP %13(s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec
G_BR %bb.6
@@ -350,9 +319,7 @@ body: |
bb.6:
successors: %bb.2(0x40000000), %bb.4(0x40000000)
- %37:sreg_32_xm0_xexec(s1) = G_PHI %35(s1), %bb.5
- %38:_(s32) = G_PHI %13(s32), %bb.5
- G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %38(s32)
- %34:sreg_32_xm0_xexec(s32) = SI_IF %37(s1), %bb.4, implicit-def $exec, implicit-def $scc, implicit $exec
+ G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %13(s32)
+ %34:sreg_32_xm0_xexec(s32) = SI_IF %35(s1), %bb.4, implicit-def $exec, implicit-def $scc, implicit $exec
G_BR %bb.2
...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-temporal-divergent-reg.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-temporal-divergent-reg.mir
index d1b473f2f41d8..996815e2d38fc 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-temporal-divergent-reg.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-temporal-divergent-reg.mir
@@ -15,14 +15,14 @@ body: |
; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
; GFX10-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32)
- ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
+ ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
+ ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: bb.1:
; GFX10-NEXT: successors: %bb.2(0x04000000), %bb.1(0x7c000000)
; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[PHI:%[0-9]+]]:_(s32) = G_PHI %7(s32), %bb.1, [[C]](s32), %bb.0
- ; GFX10-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI [[C1]](s32), %bb.0, %9(s32), %bb.1
+ ; GFX10-NEXT: [[PHI:%[0-9]+]]:_(s32) = G_PHI %7(s32), %bb.1, [[C1]](s32), %bb.0
+ ; GFX10-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.0, %9(s32), %bb.1
; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[PHI1]], [[C2]]
; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[ADD]](s32)
@@ -32,10 +32,8 @@ body: |
; GFX10-NEXT: G_BR %bb.2
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: bb.2:
- ; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s32) = G_PHI [[ADD]](s32), %bb.1
- ; GFX10-NEXT: [[PHI3:%[0-9]+]]:_(s32) = G_PHI [[INT]](s32), %bb.1
- ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI3]](s32)
- ; GFX10-NEXT: G_STORE [[PHI2]](s32), [[MV]](p0) :: (store (s32))
+ ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[INT]](s32)
+ ; GFX10-NEXT: G_STORE [[ADD]](s32), [[MV]](p0) :: (store (s32))
; GFX10-NEXT: SI_RETURN
bb.0:
successors: %bb.1(0x80000000)
@@ -45,14 +43,14 @@ body: |
%1:_(s32) = COPY $vgpr1
%2:_(s32) = COPY $vgpr2
%3:_(p0) = G_MERGE_VALUES %1(s32), %2(s32)
- %4:_(s32) = G_CONSTANT i32 0
- %5:_(s32) = G_CONSTANT i32 -1
+ %4:_(s32) = G_CONSTANT i32 -1
+ %5:_(s32) = G_CONSTANT i32 0
bb.1:
successors: %bb.2(0x04000000), %bb.1(0x7c000000)
- %6:_(s32) = G_PHI %7(s32), %bb.1, %4(s32), %bb.0
- %8:_(s32) = G_PHI %5(s32), %bb.0, %9(s32), %bb.1
+ %6:_(s32) = G_PHI %7(s32), %bb.1, %5(s32), %bb.0
+ %8:_(s32) = G_PHI %4(s32), %bb.0, %9(s32), %bb.1
%10:_(s32) = G_CONSTANT i32 1
%9:_(s32) = G_ADD %8, %10
%11:_(s32) = G_UITOFP %9(s32)
@@ -62,9 +60,7 @@ body: |
G_BR %bb.2
bb.2:
- %13:_(s32) = G_PHI %9(s32), %bb.1
- %14:_(s32) = G_PHI %7(s32), %bb.1
- G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %14(s32)
- G_STORE %13(s32), %3(p0) :: (store (s32))
+ G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %7(s32)
+ G_STORE %9(s32), %3(p0) :: (store (s32))
SI_RETURN
...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mui-regbankselect.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mui-regbankselect.mir
index 8f3495ea87eec..97dcd5084cacc 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mui-regbankselect.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mui-regbankselect.mir
@@ -597,7 +597,7 @@ body: |
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.2(0x04000000), %bb.1(0x7c000000)
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[PHI:%[0-9]+]]:sgpr(s32) = G_PHI %17(s32), %bb.1, [[C1]](s32), %bb.0
+ ; CHECK-NEXT: [[PHI:%[0-9]+]]:sgpr(s32) = G_PHI %15(s32), %bb.1, [[C1]](s32), %bb.0
; CHECK-NEXT: [[PHI1:%[0-9]+]]:sgpr(s32) = G_PHI [[C]](s32), %bb.0, %9(s32), %bb.1
; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1
; CHECK-NEXT: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[PHI1]], [[C2]]
@@ -609,11 +609,9 @@ body: |
; CHECK-NEXT: G_BR %bb.2
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
- ; CHECK-NEXT: [[PHI2:%[0-9]+]]:vgpr(s32) = G_PHI [[ADD]](s32), %bb.1
- ; CHECK-NEXT: [[PHI3:%[0-9]+]]:sgpr(s32) = G_PHI [[INT]](s32), %bb.1
- ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI3]](s32)
+ ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[INT]](s32)
; CHECK-NEXT: [[C3:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 10
- ; CHECK-NEXT: [[MUL:%[0-9]+]]:vgpr(s32) = G_MUL [[PHI2]], [[C3]]
+ ; CHECK-NEXT: [[MUL:%[0-9]+]]:vgpr(s32) = G_MUL [[ADD]], [[C3]]
; CHECK-NEXT: G_STORE [[MUL]](s32), [[MV]](p1) :: (store (s32), addrspace 1)
; CHECK-NEXT: S_ENDPGM 0
bb.0:
@@ -641,12 +639,10 @@ body: |
G_BR %bb.2
bb.2:
- %13:_(s32) = G_PHI %9(s32), %bb.1
- %14:_(s32) = G_PHI %7(s32), %bb.1
- G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %14(s32)
- %15:_(s32) = G_CONSTANT i32 10
- %16:_(s32) = G_MUL %13, %15
- G_STORE %16(s32), %3(p1) :: (store (s32), addrspace 1)
+ G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %7(s32)
+ %13:_(s32) = G_CONSTANT i32 10
+ %14:_(s32) = G_MUL %9, %13
+ G_STORE %14(s32), %3(p1) :: (store (s32), addrspace 1)
S_ENDPGM 0
...
@@ -677,7 +673,7 @@ body: |
; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_32(s1) = PHI [[DEF1]](s1), %bb.0, %13(s1), %bb.3
- ; CHECK-NEXT: [[PHI1:%[0-9]+]]:sgpr(s32) = G_PHI %68(s32), %bb.3, [[C]](s32), %bb.0
+ ; CHECK-NEXT: [[PHI1:%[0-9]+]]:sgpr(s32) = G_PHI %67(s32), %bb.3, [[C]](s32), %bb.0
; CHECK-NEXT: [[PHI2:%[0-9]+]]:sgpr(s32) = G_PHI [[C]](s32), %bb.0, %17(s32), %bb.3
; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32(s1) = COPY [[PHI]](s1)
; CHECK-NEXT: [[SEXT:%[0-9]+]]:sgpr(s64) = G_SEXT [[PHI2]](s32)
@@ -760,8 +756,7 @@ body: |
; CHECK-NEXT: G_BR %bb.3
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.6:
- ; CHECK-NEXT: [[PHI7:%[0-9]+]]:sgpr(s32) = G_PHI [[INT]](s32), %bb.3
- ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI7]](s32)
+ ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[INT]](s32)
; CHECK-NEXT: S_ENDPGM 0
bb.0:
successors: %bb.1(0x80000000)
@@ -861,7 +856,6 @@ body: |
G_BR %bb.3
bb.6:
- %64:_(s32) = G_PHI %15(s32), %bb.3
- G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %64(s32)
+ G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %15(s32)
S_ENDPGM 0
...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mui.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mui.ll
index 951182f703a4e..44ff2b45f1598 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mui.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mui.ll
@@ -462,8 +462,7 @@ define amdgpu_ps void @divergent_because_of_temporal_divergent_use(float %val, p
; NEW_RBS-NEXT: s_cbranch_execnz .LBB15_1
; NEW_RBS-NEXT: ; %bb.2: ; %exit
; NEW_RBS-NEXT: s_or_b32 exec_lo, exec_lo, s1
-; NEW_RBS-NEXT: v_mov_b32_e32 v0, s0
-; NEW_RBS-NEXT: v_mul_lo_u32 v0, v0, 10
+; NEW_RBS-NEXT: v_mul_lo_u32 v0, s0, 10
; NEW_RBS-NEXT: global_store_dword v[1:2], v0, off
; NEW_RBS-NEXT: s_endpgm
entry:
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mui.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mui.mir
index 1b22ee4b3fffc..06b0b7269b224 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mui.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mui.mir
@@ -971,12 +971,10 @@ body: |
; OLD_RBS-NEXT: G_BR %bb.2
; OLD_RBS-NEXT: {{ $}}
; OLD_RBS-NEXT: bb.2:
- ; OLD_RBS-NEXT: [[PHI2:%[0-9]+]]:vgpr(s32) = G_PHI [[ADD]](s32), %bb.1
- ; OLD_RBS-NEXT: [[PHI3:%[0-9]+]]:sgpr(s32) = G_PHI [[INT]](s32), %bb.1
- ; OLD_RBS-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI3]](s32)
+ ; OLD_RBS-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[INT]](s32)
; OLD_RBS-NEXT: [[C3:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 10
; OLD_RBS-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C3]](s32)
- ; OLD_RBS-NEXT: [[MUL:%[0-9]+]]:vgpr(s32) = G_MUL [[PHI2]], [[COPY4]]
+ ; OLD_RBS-NEXT: [[MUL:%[0-9]+]]:vgpr(s32) = G_MUL [[ADD]], [[COPY4]]
; OLD_RBS-NEXT: G_STORE [[MUL]](s32), [[MV]](p1) :: (store (s32), addrspace 1)
; OLD_RBS-NEXT: S_ENDPGM 0
;
@@ -995,7 +993,7 @@ body: |
; NEW_RBS-NEXT: bb.1:
; NEW_RBS-NEXT: successors: %bb.2(0x04000000), %bb.1(0x7c000000)
; NEW_RBS-NEXT: {{ $}}
- ; NEW_RBS-NEXT: [[PHI:%[0-9]+]]:sgpr(s32) = G_PHI %17(s32), %bb.1, [[C1]](s32), %bb.0
+ ; NEW_RBS-NEXT: [[PHI:%[0-9]+]]:sgpr(s32) = G_PHI %15(s32), %bb.1, [[C1]](s32), %bb.0
; NEW_RBS-NEXT: [[PHI1:%[0-9]+]]:sgpr(s32) = G_PHI [[C]](s32), %bb.0, %9(s32), %bb.1
; NEW_RBS-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1
; NEW_RBS-NEXT: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[PHI1]], [[C2]]
@@ -1008,12 +1006,11 @@ body: |
; NEW_RBS-NEXT: G_BR %bb.2
; NEW_RBS-NEXT: {{ $}}
; NEW_RBS-NEXT: bb.2:
- ; NEW_RBS-NEXT: [[PHI2:%[0-9]+]]:vgpr(s32) = G_PHI [[ADD]](s32), %bb.1
- ; NEW_RBS-NEXT: [[PHI3:%[0-9]+]]:sgpr(s32) = G_PHI [[INT]](s32), %bb.1
- ; NEW_RBS-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI3]](s32)
+ ; NEW_RBS-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[INT]](s32)
; NEW_RBS-NEXT: [[C3:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 10
- ; NEW_RBS-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C3]](s32)
- ; NEW_RBS-NEXT: [[MUL:%[0-9]+]]:vgpr(s32) = G_MUL [[PHI2]], [[COPY5]]
+ ; NEW_RBS-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[ADD]](s32)
+ ; NEW_RBS-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[C3]](s32)
+ ; NEW_RBS-NEXT: [[MUL:%[0-9]+]]:vgpr(s32) = G_MUL [[COPY5]], [[COPY6]]
; NEW_RBS-NEXT: G_STORE [[MUL]](s32), [[MV]](p1) :: (store (s32), addrspace 1)
; NEW_RBS-NEXT: S_ENDPGM 0
bb.0:
@@ -1041,12 +1038,10 @@ body: |
G_BR %bb.2
bb.2:
- %13:_(s32) = G_PHI %9(s32), %bb.1
- %14:_(s32) = G_PHI %7(s32), %bb.1
- G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %14(s32)
- %15:_(s32) = G_CONSTANT i32 10
- %16:_(s32) = G_MUL %13, %15
- G_STORE %16(s32), %3(p1) :: (store (s32), addrspace 1)
+ G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %7(s32)
+ %13:_(s32) = G_CONSTANT i32 10
+ %14:_(s32) = G_MUL %9, %13
+ G_STORE %14(s32), %3(p1) :: (store (s32), addrspace 1)
S_ENDPGM 0
...
@@ -1167,8 +1162,7 @@ body: |
; OLD_RBS-NEXT: G_BR %bb.3
; OLD_RBS-NEXT: {{ $}}
; OLD_RBS-NEXT: bb.6:
- ; OLD_RBS-NEXT: [[PHI7:%[0-9]+]]:sgpr(s32) = G_PHI [[INT]](s32), %bb.3
- ; OLD_RBS-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI7]](s32)
+ ; OLD_RBS-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[INT]](s32)
; OLD_RBS-NEXT: S_ENDPGM 0
;
; NEW_RBS-LABEL: name: loop_with_2breaks
@@ -1193,7 +1187,7 @@ body: |
; NEW_RBS-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000)
; NEW_RBS-NEXT: {{ $}}
; NEW_RBS-NEXT: [[PHI:%[0-9]+]]:sreg_32(s1) = PHI [[DEF1]](s1), %bb.0, %13(s1), %bb.3
- ; NEW_RBS-NEXT: [[PHI1:%[0-9]+]]:sgpr(s32) = G_PHI %68(s32), %bb.3, [[C]](s32), %bb.0
+ ; NEW_RBS-NEXT: [[PHI1:%[0-9]+]]:sgpr(s32) = G_PHI %67(s32), %bb.3, [[C]](s32), %bb.0
; NEW_RBS-NEXT: [[PHI2:%[0-9]+]]:sgpr(s32) = G_PHI [[C]](s32), %bb.0, %17(s32), %bb.3
; NEW_RBS-NEXT: [[COPY6:%[0-9]+]]:sreg_32(s1) = COPY [[PHI]](s1)
; NEW_RBS-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 31
@@ -1285,8 +1279,7 @@ body: |
; NEW_RBS-NEXT: G_BR %bb.3
; NEW_RBS-NEXT: {{ $}}
; NEW_RBS-NEXT: bb.6:
- ; NEW_RBS-NEXT: [[PHI7:%[0-9]+]]:sgpr(s32) = G_PHI [[INT]](s32), %bb.3
- ; NEW_RBS-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI7]](s32)
+ ; NEW_RBS-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[INT]](s32)
; NEW_RBS-NEXT: S_ENDPGM 0
bb.0:
successors: %bb.1(0x80000000)
@@ -1386,7 +1379,6 @@ body: |
G_BR %bb.3
bb.6:
- %64:_(s32) = G_PHI %15(s32), %bb.3
- G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %64(s32)
+ G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %15(s32)
S_ENDPGM 0
...
More information about the llvm-commits
mailing list