[llvm-branch-commits] [llvm] AMDGPU: Allow folding multiple uses of some immediates into copies (PR #154757)
Matt Arsenault via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Thu Aug 21 06:10:42 PDT 2025
https://github.com/arsenm created https://github.com/llvm/llvm-project/pull/154757
In some cases this will require an avoidable re-defining of
a register, but it works out better most of the time. Also allow
folding 64-bit immediates into subregister extracts, unless it would
break an inline constant.
We could be more aggressive here, but this set of conditions seems
to do a reasonable job without introducing too many regressions.
>From 5b5f0042f7da212fd395ecef6252054f611a2036 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Thu, 21 Aug 2025 12:24:40 +0900
Subject: [PATCH] AMDGPU: Allow folding multiple uses of some immediates into
copies
In some cases this will require an avoidable re-defining of
a register, but it works out better most of the time. Also allow
folding 64-bit immediates into subregister extracts, unless it would
break an inline constant.
We could be more aggressive here, but this set of conditions seems
to do a reasonable job without introducing too many regressions.
---
llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 27 +-
.../GlobalISel/llvm.amdgcn.interp.inreg.ll | 12 +-
.../CodeGen/AMDGPU/GlobalISel/mubuf-global.ll | 20 +-
.../test/CodeGen/AMDGPU/GlobalISel/saddsat.ll | 26 +-
.../CodeGen/AMDGPU/GlobalISel/srem.i64.ll | 52 +-
.../test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll | 17 +-
llvm/test/CodeGen/AMDGPU/addrspacecast-gas.ll | 5 +-
.../AMDGPU/agpr-copy-no-free-registers.ll | 160 +--
.../AMDGPU/amdgpu-codegenprepare-idiv.ll | 22 +-
.../atomic_optimizations_local_pointer.ll | 14 +-
.../branch-folding-implicit-def-subreg.ll | 356 +++---
...dagcomb-extract-vec-elt-different-sizes.ll | 36 +-
.../CodeGen/AMDGPU/dagcombine-fmul-sel.ll | 112 +-
llvm/test/CodeGen/AMDGPU/div_i128.ll | 112 +-
llvm/test/CodeGen/AMDGPU/div_v2i128.ll | 1110 ++++++++---------
.../divergent-branch-uniform-condition.ll | 28 +-
.../CodeGen/AMDGPU/extract_vector_elt-f16.ll | 2 +-
.../CodeGen/AMDGPU/extract_vector_elt-i16.ll | 8 +-
llvm/test/CodeGen/AMDGPU/fmul-to-ldexp.ll | 49 +-
llvm/test/CodeGen/AMDGPU/fptoi.i128.ll | 390 +++---
llvm/test/CodeGen/AMDGPU/fsqrt.f64.ll | 80 +-
.../identical-subrange-spill-infloop.ll | 11 +-
llvm/test/CodeGen/AMDGPU/iglp-no-clobber.ll | 2 +-
...llvm.amdgcn.iglp.AFLCustomIRMutator.opt.ll | 2 +-
llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.ll | 8 +-
llvm/test/CodeGen/AMDGPU/llvm.frexp.ll | 95 +-
llvm/test/CodeGen/AMDGPU/mad-combine.ll | 18 +-
.../CodeGen/AMDGPU/masked-load-vectortypes.ll | 2 +-
llvm/test/CodeGen/AMDGPU/mul_uint24-amdgcn.ll | 2 +-
.../AMDGPU/peephole-fold-imm-multi-use.mir | 94 ++
llvm/test/CodeGen/AMDGPU/rem_i128.ll | 232 ++--
llvm/test/CodeGen/AMDGPU/roundeven.ll | 12 +-
llvm/test/CodeGen/AMDGPU/rsq.f64.ll | 352 +++---
llvm/test/CodeGen/AMDGPU/sdiv64.ll | 2 +-
.../test/CodeGen/AMDGPU/shift-and-i64-ubfe.ll | 6 +-
llvm/test/CodeGen/AMDGPU/sint_to_fp.f64.ll | 42 +-
llvm/test/CodeGen/AMDGPU/spill-agpr.ll | 232 ++--
llvm/test/CodeGen/AMDGPU/srem64.ll | 2 +-
llvm/test/CodeGen/AMDGPU/srl.ll | 2 +-
.../CodeGen/AMDGPU/subreg-coalescer-crash.ll | 2 +-
llvm/test/CodeGen/AMDGPU/udiv64.ll | 2 +-
llvm/test/CodeGen/AMDGPU/uint_to_fp.f64.ll | 42 +-
.../AMDGPU/undef-handling-crash-in-ra.ll | 40 +-
llvm/test/CodeGen/AMDGPU/urem64.ll | 96 +-
llvm/test/CodeGen/AMDGPU/v_cndmask.ll | 2 +-
llvm/test/CodeGen/AMDGPU/valu-i1.ll | 2 +-
46 files changed, 2051 insertions(+), 1889 deletions(-)
create mode 100644 llvm/test/CodeGen/AMDGPU/peephole-fold-imm-multi-use.mir
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 75b303086163b..1be8d99834f93 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -3559,13 +3559,12 @@ static unsigned getNewFMAMKInst(const GCNSubtarget &ST, unsigned Opc) {
bool SIInstrInfo::foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
Register Reg, MachineRegisterInfo *MRI) const {
- if (!MRI->hasOneNonDBGUse(Reg))
- return false;
-
int64_t Imm;
if (!getConstValDefinedInReg(DefMI, Reg, Imm))
return false;
+ const bool HasMultipleUses = !MRI->hasOneNonDBGUse(Reg);
+
assert(!DefMI.getOperand(0).getSubReg() && "Expected SSA form");
unsigned Opc = UseMI.getOpcode();
@@ -3577,6 +3576,25 @@ bool SIInstrInfo::foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
const TargetRegisterClass *DstRC = RI.getRegClassForReg(*MRI, DstReg);
+ if (HasMultipleUses) {
+ // TODO: This should fold in more cases with multiple use, but we need to
+ // more carefully consider what those uses are.
+ unsigned ImmDefSize = RI.getRegSizeInBits(*MRI->getRegClass(Reg));
+
+ // Avoid breaking up a 64-bit inline immediate into a subregister extract.
+ if (UseSubReg != AMDGPU::NoSubRegister && ImmDefSize == 64)
+ return false;
+
+ // Most of the time folding a 32-bit inline constant is free (though this
+ // might not be true if we can't later fold it into a real user).
+ //
+ // FIXME: This isInlineConstant check is imprecise if
+ // getConstValDefinedInReg handled the tricky non-mov cases.
+ if (ImmDefSize == 32 &&
+ !isInlineConstant(Imm, AMDGPU::OPERAND_REG_IMM_INT32))
+ return false;
+ }
+
bool Is16Bit = UseSubReg != AMDGPU::NoSubRegister &&
RI.getSubRegIdxSize(UseSubReg) == 16;
@@ -3664,6 +3682,9 @@ bool SIInstrInfo::foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
return true;
}
+ if (HasMultipleUses)
+ return false;
+
if (Opc == AMDGPU::V_MAD_F32_e64 || Opc == AMDGPU::V_MAC_F32_e64 ||
Opc == AMDGPU::V_MAD_F16_e64 || Opc == AMDGPU::V_MAC_F16_e64 ||
Opc == AMDGPU::V_FMA_F32_e64 || Opc == AMDGPU::V_FMAC_F32_e64 ||
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.interp.inreg.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.interp.inreg.ll
index a09703285087c..bd6634f250777 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.interp.inreg.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.interp.inreg.ll
@@ -358,12 +358,12 @@ main_body:
define amdgpu_ps half @v_interp_f16_imm_params(float inreg %i, float inreg %j) #0 {
; GFX11-TRUE16-LABEL: v_interp_f16_imm_params:
; GFX11-TRUE16: ; %bb.0: ; %main_body
-; GFX11-TRUE16-NEXT: v_dual_mov_b32 v1, s0 :: v_dual_mov_b32 v2, 0
+; GFX11-TRUE16-NEXT: v_dual_mov_b32 v1, s0 :: v_dual_mov_b32 v2, s1
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, 0
-; GFX11-TRUE16-NEXT: v_mov_b32_e32 v3, s1
+; GFX11-TRUE16-NEXT: v_mov_b32_e32 v3, 0
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-TRUE16-NEXT: v_interp_p10_f16_f32 v1, v0.l, v1, v0.l wait_exp:7
-; GFX11-TRUE16-NEXT: v_interp_p2_f16_f32 v0.l, v0.l, v3, v2 wait_exp:7
+; GFX11-TRUE16-NEXT: v_interp_p2_f16_f32 v0.l, v0.l, v2, v3 wait_exp:7
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-TRUE16-NEXT: v_cvt_f16_f32_e32 v0.h, v1
; GFX11-TRUE16-NEXT: v_add_f16_e32 v0.l, v0.h, v0.l
@@ -383,12 +383,12 @@ define amdgpu_ps half @v_interp_f16_imm_params(float inreg %i, float inreg %j) #
;
; GFX12-TRUE16-LABEL: v_interp_f16_imm_params:
; GFX12-TRUE16: ; %bb.0: ; %main_body
-; GFX12-TRUE16-NEXT: v_dual_mov_b32 v1, s0 :: v_dual_mov_b32 v2, 0
+; GFX12-TRUE16-NEXT: v_dual_mov_b32 v1, s0 :: v_dual_mov_b32 v2, s1
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v0.l, 0
-; GFX12-TRUE16-NEXT: v_mov_b32_e32 v3, s1
+; GFX12-TRUE16-NEXT: v_mov_b32_e32 v3, 0
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX12-TRUE16-NEXT: v_interp_p10_f16_f32 v1, v0.l, v1, v0.l wait_exp:7
-; GFX12-TRUE16-NEXT: v_interp_p2_f16_f32 v0.l, v0.l, v3, v2 wait_exp:7
+; GFX12-TRUE16-NEXT: v_interp_p2_f16_f32 v0.l, v0.l, v2, v3 wait_exp:7
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX12-TRUE16-NEXT: v_cvt_f16_f32_e32 v0.h, v1
; GFX12-TRUE16-NEXT: v_add_f16_e32 v0.l, v0.h, v0.l
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/mubuf-global.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/mubuf-global.ll
index 07d5ff2036d93..b75eb737534e9 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/mubuf-global.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/mubuf-global.ll
@@ -1379,45 +1379,43 @@ define amdgpu_ps float @mubuf_atomicrmw_sgpr_ptr_vgpr_offset(ptr addrspace(1) in
; GFX6-LABEL: mubuf_atomicrmw_sgpr_ptr_vgpr_offset:
; GFX6: ; %bb.0:
; GFX6-NEXT: v_ashrrev_i32_e32 v1, 31, v0
-; GFX6-NEXT: v_lshl_b64 v[0:1], v[0:1], 2
+; GFX6-NEXT: v_lshl_b64 v[1:2], v[0:1], 2
; GFX6-NEXT: s_mov_b32 s0, s2
; GFX6-NEXT: s_mov_b32 s1, s3
-; GFX6-NEXT: v_mov_b32_e32 v2, 2
+; GFX6-NEXT: v_mov_b32_e32 v0, 2
; GFX6-NEXT: s_mov_b32 s2, 0
; GFX6-NEXT: s_mov_b32 s3, 0xf000
-; GFX6-NEXT: buffer_atomic_add v2, v[0:1], s[0:3], 0 addr64 glc
+; GFX6-NEXT: buffer_atomic_add v0, v[1:2], s[0:3], 0 addr64 glc
; GFX6-NEXT: s_waitcnt vmcnt(0)
; GFX6-NEXT: buffer_wbinvl1
-; GFX6-NEXT: v_mov_b32_e32 v0, v2
; GFX6-NEXT: s_waitcnt expcnt(0)
; GFX6-NEXT: ; return to shader part epilog
;
; GFX7-LABEL: mubuf_atomicrmw_sgpr_ptr_vgpr_offset:
; GFX7: ; %bb.0:
; GFX7-NEXT: v_ashrrev_i32_e32 v1, 31, v0
-; GFX7-NEXT: v_lshl_b64 v[0:1], v[0:1], 2
+; GFX7-NEXT: v_lshl_b64 v[1:2], v[0:1], 2
; GFX7-NEXT: s_mov_b32 s0, s2
; GFX7-NEXT: s_mov_b32 s1, s3
-; GFX7-NEXT: v_mov_b32_e32 v2, 2
+; GFX7-NEXT: v_mov_b32_e32 v0, 2
; GFX7-NEXT: s_mov_b32 s2, 0
; GFX7-NEXT: s_mov_b32 s3, 0xf000
-; GFX7-NEXT: buffer_atomic_add v2, v[0:1], s[0:3], 0 addr64 glc
+; GFX7-NEXT: buffer_atomic_add v0, v[1:2], s[0:3], 0 addr64 glc
; GFX7-NEXT: s_waitcnt vmcnt(0)
; GFX7-NEXT: buffer_wbinvl1
-; GFX7-NEXT: v_mov_b32_e32 v0, v2
; GFX7-NEXT: ; return to shader part epilog
;
; GFX12-LABEL: mubuf_atomicrmw_sgpr_ptr_vgpr_offset:
; GFX12: ; %bb.0:
; GFX12-NEXT: v_ashrrev_i32_e32 v1, 31, v0
; GFX12-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3
-; GFX12-NEXT: v_mov_b32_e32 v4, 2
-; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX12-NEXT: v_lshlrev_b64_e32 v[0:1], 2, v[0:1]
; GFX12-NEXT: v_add_co_u32 v0, vcc_lo, v2, v0
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX12-NEXT: v_add_co_ci_u32_e64 v1, null, v3, v1, vcc_lo
-; GFX12-NEXT: global_atomic_add_u32 v0, v[0:1], v4, off th:TH_ATOMIC_RETURN scope:SCOPE_DEV
+; GFX12-NEXT: v_mov_b32_e32 v2, 2
+; GFX12-NEXT: global_atomic_add_u32 v0, v[0:1], v2, off th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_wait_loadcnt 0x0
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll
index 832f066adaa84..2f956d7a0a534 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll
@@ -229,21 +229,23 @@ define i16 @v_saddsat_v2i8(i16 %lhs.arg, i16 %rhs.arg) {
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX6-NEXT: v_lshrrev_b32_e32 v2, 8, v0
; GFX6-NEXT: v_lshlrev_b32_e32 v0, 24, v0
-; GFX6-NEXT: v_min_i32_e32 v5, 0, v0
+; GFX6-NEXT: v_min_i32_e32 v6, 0, v0
+; GFX6-NEXT: v_bfrev_b32_e32 v7, 1
; GFX6-NEXT: v_lshrrev_b32_e32 v3, 8, v1
; GFX6-NEXT: v_lshlrev_b32_e32 v1, 24, v1
; GFX6-NEXT: v_max_i32_e32 v4, 0, v0
-; GFX6-NEXT: v_sub_i32_e32 v5, vcc, 0x80000000, v5
+; GFX6-NEXT: v_sub_i32_e32 v6, vcc, v7, v6
; GFX6-NEXT: v_sub_i32_e32 v4, vcc, 0x7fffffff, v4
-; GFX6-NEXT: v_max_i32_e32 v1, v5, v1
+; GFX6-NEXT: v_max_i32_e32 v1, v6, v1
; GFX6-NEXT: v_min_i32_e32 v1, v1, v4
; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v1
; GFX6-NEXT: v_lshlrev_b32_e32 v1, 24, v2
; GFX6-NEXT: v_min_i32_e32 v4, 0, v1
+; GFX6-NEXT: v_bfrev_b32_e32 v5, -2
; GFX6-NEXT: v_lshlrev_b32_e32 v2, 24, v3
; GFX6-NEXT: v_max_i32_e32 v3, 0, v1
; GFX6-NEXT: v_sub_i32_e32 v4, vcc, 0x80000000, v4
-; GFX6-NEXT: v_sub_i32_e32 v3, vcc, 0x7fffffff, v3
+; GFX6-NEXT: v_sub_i32_e32 v3, vcc, v5, v3
; GFX6-NEXT: v_max_i32_e32 v2, v4, v2
; GFX6-NEXT: v_min_i32_e32 v2, v2, v3
; GFX6-NEXT: v_add_i32_e32 v1, vcc, v1, v2
@@ -2951,20 +2953,22 @@ define amdgpu_ps float @saddsat_v2i16_vs(<2 x i16> %lhs, <2 x i16> inreg %rhs) {
; GFX6-LABEL: saddsat_v2i16_vs:
; GFX6: ; %bb.0:
; GFX6-NEXT: v_lshlrev_b32_e32 v0, 16, v0
-; GFX6-NEXT: v_min_i32_e32 v3, 0, v0
+; GFX6-NEXT: v_min_i32_e32 v4, 0, v0
+; GFX6-NEXT: v_bfrev_b32_e32 v5, 1
; GFX6-NEXT: s_lshl_b32 s0, s0, 16
; GFX6-NEXT: v_max_i32_e32 v2, 0, v0
-; GFX6-NEXT: v_sub_i32_e32 v3, vcc, 0x80000000, v3
+; GFX6-NEXT: v_sub_i32_e32 v4, vcc, v5, v4
; GFX6-NEXT: v_sub_i32_e32 v2, vcc, 0x7fffffff, v2
-; GFX6-NEXT: v_max_i32_e32 v3, s0, v3
+; GFX6-NEXT: v_max_i32_e32 v4, s0, v4
+; GFX6-NEXT: v_min_i32_e32 v2, v4, v2
; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1
-; GFX6-NEXT: v_min_i32_e32 v2, v3, v2
-; GFX6-NEXT: v_min_i32_e32 v3, 0, v1
+; GFX6-NEXT: v_bfrev_b32_e32 v3, -2
; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v2
-; GFX6-NEXT: s_lshl_b32 s0, s1, 16
; GFX6-NEXT: v_max_i32_e32 v2, 0, v1
+; GFX6-NEXT: v_sub_i32_e32 v2, vcc, v3, v2
+; GFX6-NEXT: v_min_i32_e32 v3, 0, v1
+; GFX6-NEXT: s_lshl_b32 s0, s1, 16
; GFX6-NEXT: v_sub_i32_e32 v3, vcc, 0x80000000, v3
-; GFX6-NEXT: v_sub_i32_e32 v2, vcc, 0x7fffffff, v2
; GFX6-NEXT: v_max_i32_e32 v3, s0, v3
; GFX6-NEXT: v_min_i32_e32 v2, v3, v2
; GFX6-NEXT: v_add_i32_e32 v1, vcc, v1, v2
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll
index 8d8eca162257a..19dc20c510041 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll
@@ -1067,24 +1067,24 @@ define i64 @v_srem_i64_pow2k_denom(i64 %num) {
; CHECK-NEXT: v_add_i32_e32 v2, vcc, v3, v2
; CHECK-NEXT: v_add_i32_e32 v2, vcc, v7, v2
; CHECK-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v5, v2, v[1:2]
-; CHECK-NEXT: v_sub_i32_e64 v0, s[4:5], v4, v0
-; CHECK-NEXT: v_subb_u32_e64 v2, vcc, v9, v1, s[4:5]
-; CHECK-NEXT: v_sub_i32_e32 v1, vcc, v9, v1
-; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v5
-; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, -1, vcc
-; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2
-; CHECK-NEXT: v_cndmask_b32_e32 v3, -1, v3, vcc
+; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v4, v0
+; CHECK-NEXT: v_subb_u32_e64 v2, s[4:5], v9, v1, vcc
+; CHECK-NEXT: v_sub_i32_e64 v1, s[4:5], v9, v1
+; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
; CHECK-NEXT: v_sub_i32_e32 v4, vcc, v0, v5
-; CHECK-NEXT: v_subbrev_u32_e64 v1, s[4:5], 0, v1, s[4:5]
; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v4, v5
-; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, -1, vcc
+; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, -1, vcc
; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
-; CHECK-NEXT: v_cndmask_b32_e32 v5, -1, v5, vcc
-; CHECK-NEXT: v_subrev_i32_e32 v7, vcc, 0x1000, v4
+; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v5
+; CHECK-NEXT: v_cndmask_b32_e32 v7, -1, v7, vcc
+; CHECK-NEXT: v_sub_i32_e32 v5, vcc, v4, v5
+; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, -1, s[4:5]
+; CHECK-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v2
; CHECK-NEXT: v_subbrev_u32_e32 v8, vcc, 0, v1, vcc
-; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5
-; CHECK-NEXT: v_cndmask_b32_e32 v4, v4, v7, vcc
+; CHECK-NEXT: v_cndmask_b32_e64 v3, -1, v3, s[4:5]
+; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v7
+; CHECK-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc
; CHECK-NEXT: v_cndmask_b32_e32 v1, v1, v8, vcc
; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3
; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
@@ -1660,24 +1660,24 @@ define i64 @v_srem_i64_oddk_denom(i64 %num) {
; CHECK-NEXT: v_add_i32_e32 v2, vcc, v3, v2
; CHECK-NEXT: v_add_i32_e32 v2, vcc, v7, v2
; CHECK-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v5, v2, v[1:2]
-; CHECK-NEXT: v_sub_i32_e64 v0, s[4:5], v4, v0
-; CHECK-NEXT: v_subb_u32_e64 v2, vcc, v9, v1, s[4:5]
-; CHECK-NEXT: v_sub_i32_e32 v1, vcc, v9, v1
-; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v5
-; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, -1, vcc
-; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2
-; CHECK-NEXT: v_cndmask_b32_e32 v3, -1, v3, vcc
+; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v4, v0
+; CHECK-NEXT: v_subb_u32_e64 v2, s[4:5], v9, v1, vcc
+; CHECK-NEXT: v_sub_i32_e64 v1, s[4:5], v9, v1
+; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
; CHECK-NEXT: v_sub_i32_e32 v4, vcc, v0, v5
-; CHECK-NEXT: v_subbrev_u32_e64 v1, s[4:5], 0, v1, s[4:5]
; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v4, v5
-; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, -1, vcc
+; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, -1, vcc
; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
-; CHECK-NEXT: v_cndmask_b32_e32 v5, -1, v5, vcc
-; CHECK-NEXT: v_subrev_i32_e32 v7, vcc, 0x12d8fb, v4
+; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v5
+; CHECK-NEXT: v_cndmask_b32_e32 v7, -1, v7, vcc
+; CHECK-NEXT: v_sub_i32_e32 v5, vcc, v4, v5
+; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, -1, s[4:5]
+; CHECK-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v2
; CHECK-NEXT: v_subbrev_u32_e32 v8, vcc, 0, v1, vcc
-; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5
-; CHECK-NEXT: v_cndmask_b32_e32 v4, v4, v7, vcc
+; CHECK-NEXT: v_cndmask_b32_e64 v3, -1, v3, s[4:5]
+; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v7
+; CHECK-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc
; CHECK-NEXT: v_cndmask_b32_e32 v1, v1, v8, vcc
; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3
; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll
index 2673ac4fb5bae..c1b225562b77b 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll
@@ -233,16 +233,17 @@ define i16 @v_ssubsat_v2i8(i16 %lhs.arg, i16 %rhs.arg) {
; GFX6-NEXT: v_lshrrev_b32_e32 v3, 8, v1
; GFX6-NEXT: v_lshlrev_b32_e32 v1, 24, v1
; GFX6-NEXT: v_add_i32_e32 v4, vcc, 0x80000001, v4
-; GFX6-NEXT: v_min_i32_e32 v5, -1, v0
-; GFX6-NEXT: v_bfrev_b32_e32 v6, 1
-; GFX6-NEXT: v_add_i32_e32 v5, vcc, v5, v6
+; GFX6-NEXT: v_min_i32_e32 v6, -1, v0
+; GFX6-NEXT: v_bfrev_b32_e32 v7, 1
+; GFX6-NEXT: v_add_i32_e32 v6, vcc, v6, v7
; GFX6-NEXT: v_max_i32_e32 v1, v4, v1
-; GFX6-NEXT: v_min_i32_e32 v1, v1, v5
+; GFX6-NEXT: v_min_i32_e32 v1, v1, v6
; GFX6-NEXT: v_sub_i32_e32 v0, vcc, v0, v1
; GFX6-NEXT: v_lshlrev_b32_e32 v1, 24, v2
+; GFX6-NEXT: v_mov_b32_e32 v5, 0x80000001
; GFX6-NEXT: v_lshlrev_b32_e32 v2, 24, v3
; GFX6-NEXT: v_max_i32_e32 v3, -1, v1
-; GFX6-NEXT: v_add_i32_e32 v3, vcc, 0x80000001, v3
+; GFX6-NEXT: v_add_i32_e32 v3, vcc, v3, v5
; GFX6-NEXT: v_min_i32_e32 v4, -1, v1
; GFX6-NEXT: v_add_i32_e32 v4, vcc, 0x80000000, v4
; GFX6-NEXT: v_max_i32_e32 v2, v3, v2
@@ -1260,7 +1261,8 @@ define <2 x i32> @v_ssubsat_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
; GFX6-NEXT: v_max_i32_e32 v4, -1, v0
; GFX6-NEXT: v_add_i32_e32 v4, vcc, 0x80000001, v4
; GFX6-NEXT: v_min_i32_e32 v5, -1, v0
-; GFX6-NEXT: v_add_i32_e32 v5, vcc, 0x80000000, v5
+; GFX6-NEXT: v_bfrev_b32_e32 v6, 1
+; GFX6-NEXT: v_add_i32_e32 v5, vcc, v5, v6
; GFX6-NEXT: v_max_i32_e32 v2, v4, v2
; GFX6-NEXT: v_min_i32_e32 v2, v2, v5
; GFX6-NEXT: v_sub_i32_e32 v0, vcc, v0, v2
@@ -1279,7 +1281,8 @@ define <2 x i32> @v_ssubsat_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
; GFX8-NEXT: v_max_i32_e32 v4, -1, v0
; GFX8-NEXT: v_add_u32_e32 v4, vcc, 0x80000001, v4
; GFX8-NEXT: v_min_i32_e32 v5, -1, v0
-; GFX8-NEXT: v_add_u32_e32 v5, vcc, 0x80000000, v5
+; GFX8-NEXT: v_bfrev_b32_e32 v6, 1
+; GFX8-NEXT: v_add_u32_e32 v5, vcc, v5, v6
; GFX8-NEXT: v_max_i32_e32 v2, v4, v2
; GFX8-NEXT: v_min_i32_e32 v2, v2, v5
; GFX8-NEXT: v_sub_u32_e32 v0, vcc, v0, v2
diff --git a/llvm/test/CodeGen/AMDGPU/addrspacecast-gas.ll b/llvm/test/CodeGen/AMDGPU/addrspacecast-gas.ll
index 4b6375cc60800..153898560fc31 100644
--- a/llvm/test/CodeGen/AMDGPU/addrspacecast-gas.ll
+++ b/llvm/test/CodeGen/AMDGPU/addrspacecast-gas.ll
@@ -74,12 +74,13 @@ define amdgpu_kernel void @use_private_to_flat_addrspacecast_nonnull(ptr addrspa
; GFX1250-GISEL-NEXT: v_mbcnt_lo_u32_b32 v2, -1, 0
; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[0:1], s[0:1]
; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX1250-GISEL-NEXT: v_dual_mov_b32 v3, 0 :: v_dual_lshlrev_b32 v2, 20, v2
+; GFX1250-GISEL-NEXT: v_lshlrev_b32_e32 v2, 20, v2
; GFX1250-GISEL-NEXT: s_wait_kmcnt 0x0
; GFX1250-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, s2, v0
; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-GISEL-NEXT: v_add_co_ci_u32_e64 v1, null, v2, v1, vcc_lo
-; GFX1250-GISEL-NEXT: flat_store_b32 v[0:1], v3 scope:SCOPE_SYS
+; GFX1250-GISEL-NEXT: v_mov_b32_e32 v2, 0
+; GFX1250-GISEL-NEXT: flat_store_b32 v[0:1], v2 scope:SCOPE_SYS
; GFX1250-GISEL-NEXT: s_wait_storecnt 0x0
; GFX1250-GISEL-NEXT: s_endpgm
%stof = call ptr @llvm.amdgcn.addrspacecast.nonnull.p0.p5(ptr addrspace(5) %ptr)
diff --git a/llvm/test/CodeGen/AMDGPU/agpr-copy-no-free-registers.ll b/llvm/test/CodeGen/AMDGPU/agpr-copy-no-free-registers.ll
index 3160e38df5e3f..4fd28be3b8425 100644
--- a/llvm/test/CodeGen/AMDGPU/agpr-copy-no-free-registers.ll
+++ b/llvm/test/CodeGen/AMDGPU/agpr-copy-no-free-registers.ll
@@ -513,51 +513,51 @@ define amdgpu_kernel void @introduced_copy_to_sgpr(i64 %arg, i32 %arg1, i32 %arg
; GFX908-LABEL: introduced_copy_to_sgpr:
; GFX908: ; %bb.0: ; %bb
; GFX908-NEXT: global_load_ushort v16, v[0:1], off glc
-; GFX908-NEXT: s_load_dwordx4 s[4:7], s[8:9], 0x0
-; GFX908-NEXT: s_load_dwordx2 s[10:11], s[8:9], 0x10
-; GFX908-NEXT: s_load_dword s0, s[8:9], 0x18
-; GFX908-NEXT: s_mov_b32 s12, 0
-; GFX908-NEXT: s_mov_b32 s9, s12
+; GFX908-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
+; GFX908-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x10
+; GFX908-NEXT: s_load_dword s7, s[8:9], 0x18
+; GFX908-NEXT: s_mov_b32 s6, 0
+; GFX908-NEXT: s_mov_b32 s9, s6
; GFX908-NEXT: s_waitcnt lgkmcnt(0)
-; GFX908-NEXT: v_cvt_f32_u32_e32 v0, s7
-; GFX908-NEXT: s_sub_i32 s1, 0, s7
-; GFX908-NEXT: v_cvt_f32_f16_e32 v17, s0
-; GFX908-NEXT: v_mov_b32_e32 v19, 0
+; GFX908-NEXT: v_cvt_f32_u32_e32 v0, s3
+; GFX908-NEXT: s_sub_i32 s8, 0, s3
+; GFX908-NEXT: v_cvt_f32_f16_e32 v18, s7
+; GFX908-NEXT: v_mov_b32_e32 v17, 0
; GFX908-NEXT: v_rcp_iflag_f32_e32 v2, v0
; GFX908-NEXT: v_mov_b32_e32 v0, 0
; GFX908-NEXT: v_mov_b32_e32 v1, 0
; GFX908-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2
; GFX908-NEXT: v_cvt_u32_f32_e32 v2, v2
-; GFX908-NEXT: v_readfirstlane_b32 s2, v2
-; GFX908-NEXT: s_mul_i32 s1, s1, s2
-; GFX908-NEXT: s_mul_hi_u32 s1, s2, s1
-; GFX908-NEXT: s_add_i32 s2, s2, s1
-; GFX908-NEXT: s_mul_hi_u32 s1, s6, s2
-; GFX908-NEXT: s_mul_i32 s2, s1, s7
-; GFX908-NEXT: s_sub_i32 s2, s6, s2
-; GFX908-NEXT: s_add_i32 s3, s1, 1
-; GFX908-NEXT: s_sub_i32 s6, s2, s7
-; GFX908-NEXT: s_cmp_ge_u32 s2, s7
-; GFX908-NEXT: s_cselect_b32 s1, s3, s1
-; GFX908-NEXT: s_cselect_b32 s2, s6, s2
-; GFX908-NEXT: s_add_i32 s3, s1, 1
-; GFX908-NEXT: s_cmp_ge_u32 s2, s7
-; GFX908-NEXT: s_cselect_b32 s8, s3, s1
-; GFX908-NEXT: s_lshr_b32 s2, s0, 16
-; GFX908-NEXT: v_cvt_f32_f16_e32 v18, s2
-; GFX908-NEXT: s_lshl_b64 s[6:7], s[4:5], 5
-; GFX908-NEXT: s_lshl_b64 s[14:15], s[10:11], 5
-; GFX908-NEXT: s_and_b64 s[0:1], exec, s[0:1]
-; GFX908-NEXT: s_or_b32 s14, s14, 28
-; GFX908-NEXT: s_lshl_b64 s[16:17], s[8:9], 5
+; GFX908-NEXT: v_readfirstlane_b32 s10, v2
+; GFX908-NEXT: s_mul_i32 s8, s8, s10
+; GFX908-NEXT: s_mul_hi_u32 s8, s10, s8
+; GFX908-NEXT: s_add_i32 s10, s10, s8
+; GFX908-NEXT: s_mul_hi_u32 s8, s2, s10
+; GFX908-NEXT: s_mul_i32 s10, s8, s3
+; GFX908-NEXT: s_sub_i32 s2, s2, s10
+; GFX908-NEXT: s_add_i32 s11, s8, 1
+; GFX908-NEXT: s_sub_i32 s10, s2, s3
+; GFX908-NEXT: s_cmp_ge_u32 s2, s3
+; GFX908-NEXT: s_cselect_b32 s8, s11, s8
+; GFX908-NEXT: s_cselect_b32 s2, s10, s2
+; GFX908-NEXT: s_add_i32 s10, s8, 1
+; GFX908-NEXT: s_cmp_ge_u32 s2, s3
+; GFX908-NEXT: s_cselect_b32 s8, s10, s8
+; GFX908-NEXT: s_lshr_b32 s2, s7, 16
+; GFX908-NEXT: v_cvt_f32_f16_e32 v19, s2
+; GFX908-NEXT: s_lshl_b64 s[10:11], s[0:1], 5
+; GFX908-NEXT: s_lshl_b64 s[12:13], s[4:5], 5
+; GFX908-NEXT: s_or_b32 s12, s12, 28
+; GFX908-NEXT: s_lshl_b64 s[14:15], s[8:9], 5
; GFX908-NEXT: s_waitcnt vmcnt(0)
; GFX908-NEXT: v_readfirstlane_b32 s2, v16
; GFX908-NEXT: s_and_b32 s2, 0xffff, s2
-; GFX908-NEXT: s_mul_i32 s3, s5, s2
-; GFX908-NEXT: s_mul_hi_u32 s5, s4, s2
-; GFX908-NEXT: s_mul_i32 s2, s4, s2
-; GFX908-NEXT: s_add_i32 s3, s5, s3
-; GFX908-NEXT: s_lshl_b64 s[4:5], s[2:3], 5
+; GFX908-NEXT: s_mul_i32 s1, s1, s2
+; GFX908-NEXT: s_mul_hi_u32 s3, s0, s2
+; GFX908-NEXT: s_mul_i32 s0, s0, s2
+; GFX908-NEXT: s_add_i32 s1, s3, s1
+; GFX908-NEXT: s_lshl_b64 s[16:17], s[0:1], 5
+; GFX908-NEXT: s_and_b64 s[0:1], exec, s[0:1]
; GFX908-NEXT: s_branch .LBB3_2
; GFX908-NEXT: .LBB3_1: ; %Flow20
; GFX908-NEXT: ; in Loop: Header=BB3_2 Depth=1
@@ -572,56 +572,56 @@ define amdgpu_kernel void @introduced_copy_to_sgpr(i64 %arg, i32 %arg1, i32 %arg
; GFX908-NEXT: ; %bb.3: ; %bb14
; GFX908-NEXT: ; in Loop: Header=BB3_2 Depth=1
; GFX908-NEXT: global_load_dwordx2 v[2:3], v[0:1], off
-; GFX908-NEXT: v_cmp_gt_i64_e64 s[2:3], s[10:11], -1
-; GFX908-NEXT: s_mov_b32 s13, s12
+; GFX908-NEXT: v_cmp_gt_i64_e64 s[2:3], s[4:5], -1
+; GFX908-NEXT: s_mov_b32 s7, s6
; GFX908-NEXT: v_cndmask_b32_e64 v6, 0, 1, s[2:3]
-; GFX908-NEXT: v_mov_b32_e32 v4, s12
+; GFX908-NEXT: v_mov_b32_e32 v4, s6
; GFX908-NEXT: v_cmp_ne_u32_e64 s[2:3], 1, v6
-; GFX908-NEXT: v_mov_b32_e32 v6, s12
-; GFX908-NEXT: v_mov_b32_e32 v8, s12
-; GFX908-NEXT: v_mov_b32_e32 v5, s13
-; GFX908-NEXT: v_mov_b32_e32 v7, s13
-; GFX908-NEXT: v_mov_b32_e32 v9, s13
-; GFX908-NEXT: v_cmp_lt_i64_e64 s[18:19], s[10:11], 0
+; GFX908-NEXT: v_mov_b32_e32 v6, s6
+; GFX908-NEXT: v_mov_b32_e32 v9, s7
+; GFX908-NEXT: v_mov_b32_e32 v5, s7
+; GFX908-NEXT: v_mov_b32_e32 v7, s7
+; GFX908-NEXT: v_mov_b32_e32 v8, s6
+; GFX908-NEXT: v_cmp_lt_i64_e64 s[18:19], s[4:5], 0
; GFX908-NEXT: v_mov_b32_e32 v11, v5
-; GFX908-NEXT: s_mov_b64 s[20:21], s[14:15]
+; GFX908-NEXT: s_mov_b64 s[20:21], s[12:13]
; GFX908-NEXT: v_mov_b32_e32 v10, v4
; GFX908-NEXT: s_waitcnt vmcnt(0)
-; GFX908-NEXT: v_readfirstlane_b32 s9, v2
-; GFX908-NEXT: v_readfirstlane_b32 s13, v3
-; GFX908-NEXT: s_add_u32 s9, s9, 1
-; GFX908-NEXT: s_addc_u32 s13, s13, 0
-; GFX908-NEXT: s_mul_hi_u32 s22, s6, s9
-; GFX908-NEXT: s_mul_i32 s13, s6, s13
-; GFX908-NEXT: s_mul_i32 s23, s7, s9
-; GFX908-NEXT: s_add_i32 s13, s22, s13
-; GFX908-NEXT: s_mul_i32 s9, s6, s9
-; GFX908-NEXT: s_add_i32 s13, s13, s23
+; GFX908-NEXT: v_readfirstlane_b32 s7, v2
+; GFX908-NEXT: v_readfirstlane_b32 s9, v3
+; GFX908-NEXT: s_add_u32 s7, s7, 1
+; GFX908-NEXT: s_addc_u32 s9, s9, 0
+; GFX908-NEXT: s_mul_hi_u32 s22, s10, s7
+; GFX908-NEXT: s_mul_i32 s9, s10, s9
+; GFX908-NEXT: s_mul_i32 s23, s11, s7
+; GFX908-NEXT: s_add_i32 s9, s22, s9
+; GFX908-NEXT: s_mul_i32 s7, s10, s7
+; GFX908-NEXT: s_add_i32 s9, s9, s23
; GFX908-NEXT: s_branch .LBB3_5
; GFX908-NEXT: .LBB3_4: ; %bb58
; GFX908-NEXT: ; in Loop: Header=BB3_5 Depth=2
; GFX908-NEXT: v_add_co_u32_sdwa v2, vcc, v2, v16 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
; GFX908-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v3, vcc
-; GFX908-NEXT: s_add_u32 s20, s20, s4
+; GFX908-NEXT: s_add_u32 s20, s20, s16
; GFX908-NEXT: v_cmp_lt_i64_e64 s[24:25], -1, v[2:3]
-; GFX908-NEXT: s_addc_u32 s21, s21, s5
+; GFX908-NEXT: s_addc_u32 s21, s21, s17
; GFX908-NEXT: s_mov_b64 s[22:23], 0
; GFX908-NEXT: s_andn2_b64 vcc, exec, s[24:25]
; GFX908-NEXT: s_cbranch_vccz .LBB3_9
; GFX908-NEXT: .LBB3_5: ; %bb16
; GFX908-NEXT: ; Parent Loop BB3_2 Depth=1
; GFX908-NEXT: ; => This Inner Loop Header: Depth=2
-; GFX908-NEXT: s_add_u32 s22, s20, s9
-; GFX908-NEXT: s_addc_u32 s23, s21, s13
-; GFX908-NEXT: global_load_dword v21, v19, s[22:23] offset:-12 glc
+; GFX908-NEXT: s_add_u32 s22, s20, s7
+; GFX908-NEXT: s_addc_u32 s23, s21, s9
+; GFX908-NEXT: global_load_dword v21, v17, s[22:23] offset:-12 glc
; GFX908-NEXT: s_waitcnt vmcnt(0)
-; GFX908-NEXT: global_load_dword v20, v19, s[22:23] offset:-8 glc
+; GFX908-NEXT: global_load_dword v20, v17, s[22:23] offset:-8 glc
; GFX908-NEXT: s_waitcnt vmcnt(0)
-; GFX908-NEXT: global_load_dword v12, v19, s[22:23] offset:-4 glc
+; GFX908-NEXT: global_load_dword v12, v17, s[22:23] offset:-4 glc
; GFX908-NEXT: s_waitcnt vmcnt(0)
-; GFX908-NEXT: global_load_dword v12, v19, s[22:23] glc
+; GFX908-NEXT: global_load_dword v12, v17, s[22:23] glc
; GFX908-NEXT: s_waitcnt vmcnt(0)
-; GFX908-NEXT: ds_read_b64 v[12:13], v19
+; GFX908-NEXT: ds_read_b64 v[12:13], v17
; GFX908-NEXT: ds_read_b64 v[14:15], v0
; GFX908-NEXT: s_and_b64 vcc, exec, s[2:3]
; GFX908-NEXT: s_waitcnt lgkmcnt(0)
@@ -632,8 +632,8 @@ define amdgpu_kernel void @introduced_copy_to_sgpr(i64 %arg, i32 %arg1, i32 %arg
; GFX908-NEXT: v_cvt_f32_f16_e32 v21, v21
; GFX908-NEXT: v_cvt_f32_f16_sdwa v23, v20 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
; GFX908-NEXT: v_cvt_f32_f16_e32 v20, v20
-; GFX908-NEXT: v_add_f32_e32 v24, v17, v12
-; GFX908-NEXT: v_add_f32_e32 v25, v18, v13
+; GFX908-NEXT: v_add_f32_e32 v24, v18, v12
+; GFX908-NEXT: v_add_f32_e32 v25, v19, v13
; GFX908-NEXT: v_add_f32_e32 v26, 0, v12
; GFX908-NEXT: v_add_f32_e32 v27, 0, v13
; GFX908-NEXT: v_add_f32_e32 v15, v22, v15
@@ -667,10 +667,10 @@ define amdgpu_kernel void @introduced_copy_to_sgpr(i64 %arg, i32 %arg1, i32 %arg
; GFX908-NEXT: s_cbranch_vccz .LBB3_1
; GFX908-NEXT: ; %bb.11: ; %bb12
; GFX908-NEXT: ; in Loop: Header=BB3_2 Depth=1
-; GFX908-NEXT: s_add_u32 s10, s10, s8
-; GFX908-NEXT: s_addc_u32 s11, s11, 0
-; GFX908-NEXT: s_add_u32 s14, s14, s16
-; GFX908-NEXT: s_addc_u32 s15, s15, s17
+; GFX908-NEXT: s_add_u32 s4, s4, s8
+; GFX908-NEXT: s_addc_u32 s5, s5, 0
+; GFX908-NEXT: s_add_u32 s12, s12, s14
+; GFX908-NEXT: s_addc_u32 s13, s13, s15
; GFX908-NEXT: s_mov_b64 s[2:3], 0
; GFX908-NEXT: s_branch .LBB3_1
; GFX908-NEXT: .LBB3_12: ; %DummyReturnBlock
@@ -688,12 +688,12 @@ define amdgpu_kernel void @introduced_copy_to_sgpr(i64 %arg, i32 %arg1, i32 %arg
; GFX90A-NEXT: v_cvt_f32_u32_e32 v0, s7
; GFX90A-NEXT: s_sub_i32 s1, 0, s7
; GFX90A-NEXT: v_mov_b32_e32 v19, 0
-; GFX90A-NEXT: v_rcp_iflag_f32_e32 v2, v0
-; GFX90A-NEXT: v_pk_mov_b32 v[0:1], 0, 0
-; GFX90A-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2
-; GFX90A-NEXT: v_cvt_u32_f32_e32 v3, v2
-; GFX90A-NEXT: v_cvt_f32_f16_e32 v2, s0
-; GFX90A-NEXT: v_readfirstlane_b32 s2, v3
+; GFX90A-NEXT: v_pk_mov_b32 v[2:3], 0, 0
+; GFX90A-NEXT: v_rcp_iflag_f32_e32 v0, v0
+; GFX90A-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0
+; GFX90A-NEXT: v_cvt_u32_f32_e32 v1, v0
+; GFX90A-NEXT: v_cvt_f32_f16_e32 v0, s0
+; GFX90A-NEXT: v_readfirstlane_b32 s2, v1
; GFX90A-NEXT: s_mul_i32 s1, s1, s2
; GFX90A-NEXT: s_mul_hi_u32 s1, s2, s1
; GFX90A-NEXT: s_add_i32 s2, s2, s1
@@ -709,7 +709,7 @@ define amdgpu_kernel void @introduced_copy_to_sgpr(i64 %arg, i32 %arg1, i32 %arg
; GFX90A-NEXT: s_cmp_ge_u32 s2, s7
; GFX90A-NEXT: s_cselect_b32 s8, s3, s1
; GFX90A-NEXT: s_lshr_b32 s2, s0, 16
-; GFX90A-NEXT: v_cvt_f32_f16_e32 v3, s2
+; GFX90A-NEXT: v_cvt_f32_f16_e32 v1, s2
; GFX90A-NEXT: s_lshl_b64 s[6:7], s[4:5], 5
; GFX90A-NEXT: s_lshl_b64 s[14:15], s[10:11], 5
; GFX90A-NEXT: s_and_b64 s[0:1], exec, s[0:1]
@@ -736,7 +736,7 @@ define amdgpu_kernel void @introduced_copy_to_sgpr(i64 %arg, i32 %arg1, i32 %arg
; GFX90A-NEXT: s_cbranch_vccz .LBB3_10
; GFX90A-NEXT: ; %bb.3: ; %bb14
; GFX90A-NEXT: ; in Loop: Header=BB3_2 Depth=1
-; GFX90A-NEXT: global_load_dwordx2 v[4:5], v[0:1], off
+; GFX90A-NEXT: global_load_dwordx2 v[4:5], v[2:3], off
; GFX90A-NEXT: v_cmp_gt_i64_e64 s[2:3], s[10:11], -1
; GFX90A-NEXT: s_mov_b32 s13, s12
; GFX90A-NEXT: v_cndmask_b32_e64 v8, 0, 1, s[2:3]
@@ -794,7 +794,7 @@ define amdgpu_kernel void @introduced_copy_to_sgpr(i64 %arg, i32 %arg1, i32 %arg
; GFX90A-NEXT: v_cvt_f32_f16_e32 v22, v21
; GFX90A-NEXT: v_cvt_f32_f16_sdwa v21, v20 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
; GFX90A-NEXT: v_cvt_f32_f16_e32 v20, v20
-; GFX90A-NEXT: v_pk_add_f32 v[24:25], v[2:3], v[14:15]
+; GFX90A-NEXT: v_pk_add_f32 v[24:25], v[0:1], v[14:15]
; GFX90A-NEXT: v_pk_add_f32 v[26:27], v[14:15], 0 op_sel_hi:[1,0]
; GFX90A-NEXT: v_pk_add_f32 v[16:17], v[22:23], v[16:17]
; GFX90A-NEXT: v_pk_add_f32 v[14:15], v[20:21], v[14:15]
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-idiv.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-idiv.ll
index c7385e4324e2c..b2dcd77274989 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-idiv.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-idiv.ll
@@ -10032,20 +10032,20 @@ define i64 @udiv_i64_gt_smax(i8 %size) {
; GFX9-LABEL: udiv_i64_gt_smax:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_mov_b32_e32 v1, 31
-; GFX9-NEXT: v_not_b32_sdwa v4, sext(v0) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0
+; GFX9-NEXT: v_not_b32_sdwa v5, sext(v0) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0
; GFX9-NEXT: s_mov_b32 s4, 0xcccccccd
-; GFX9-NEXT: v_ashrrev_i32_sdwa v1, v1, sext(v0) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
-; GFX9-NEXT: v_mul_hi_u32 v0, v4, s4
-; GFX9-NEXT: v_not_b32_e32 v5, v1
-; GFX9-NEXT: v_mov_b32_e32 v1, 0
+; GFX9-NEXT: v_mul_hi_u32 v1, v5, s4
+; GFX9-NEXT: v_mov_b32_e32 v3, 31
+; GFX9-NEXT: v_ashrrev_i32_sdwa v0, v3, sext(v0) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
+; GFX9-NEXT: v_mov_b32_e32 v2, 0
+; GFX9-NEXT: v_not_b32_e32 v6, v0
+; GFX9-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v6, s4, v[1:2]
; GFX9-NEXT: s_mov_b32 s6, 0xcccccccc
-; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v5, s4, v[0:1]
-; GFX9-NEXT: v_mov_b32_e32 v0, v2
-; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v4, s6, v[0:1]
-; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v3, v1
+; GFX9-NEXT: v_mov_b32_e32 v1, v3
+; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v5, s6, v[1:2]
+; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v4, v1
; GFX9-NEXT: v_addc_co_u32_e64 v1, s[4:5], 0, 0, vcc
-; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v5, s6, v[0:1]
+; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v6, s6, v[0:1]
; GFX9-NEXT: v_alignbit_b32 v0, v1, v0, 3
; GFX9-NEXT: v_lshrrev_b32_e32 v1, 3, v1
; GFX9-NEXT: s_setpc_b64 s[30:31]
diff --git a/llvm/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll b/llvm/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll
index 17737cccec7c4..23c5f4f5506f3 100644
--- a/llvm/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll
+++ b/llvm/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll
@@ -14614,8 +14614,8 @@ define amdgpu_kernel void @umax_i64_constant(ptr addrspace(1) %out) {
; GFX7LESS-NEXT: s_mov_b32 s2, -1
; GFX7LESS-NEXT: v_readfirstlane_b32 s5, v1
; GFX7LESS-NEXT: v_readfirstlane_b32 s4, v0
-; GFX7LESS-NEXT: v_mov_b32_e32 v1, 0
; GFX7LESS-NEXT: v_cndmask_b32_e64 v0, 5, 0, vcc
+; GFX7LESS-NEXT: v_mov_b32_e32 v1, 0
; GFX7LESS-NEXT: v_mov_b32_e32 v2, s4
; GFX7LESS-NEXT: v_cmp_gt_u64_e32 vcc, s[4:5], v[0:1]
; GFX7LESS-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
@@ -14645,8 +14645,8 @@ define amdgpu_kernel void @umax_i64_constant(ptr addrspace(1) %out) {
; GFX8-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; GFX8-NEXT: v_readfirstlane_b32 s5, v1
; GFX8-NEXT: v_readfirstlane_b32 s4, v0
-; GFX8-NEXT: v_mov_b32_e32 v1, 0
; GFX8-NEXT: v_cndmask_b32_e64 v0, 5, 0, vcc
+; GFX8-NEXT: v_mov_b32_e32 v1, 0
; GFX8-NEXT: v_cmp_gt_u64_e32 vcc, s[4:5], v[0:1]
; GFX8-NEXT: v_mov_b32_e32 v2, s4
; GFX8-NEXT: v_mov_b32_e32 v1, s5
@@ -14677,8 +14677,8 @@ define amdgpu_kernel void @umax_i64_constant(ptr addrspace(1) %out) {
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; GFX9-NEXT: v_readfirstlane_b32 s5, v1
; GFX9-NEXT: v_readfirstlane_b32 s4, v0
-; GFX9-NEXT: v_mov_b32_e32 v1, 0
; GFX9-NEXT: v_cndmask_b32_e64 v0, 5, 0, vcc
+; GFX9-NEXT: v_mov_b32_e32 v1, 0
; GFX9-NEXT: v_cmp_gt_u64_e32 vcc, s[4:5], v[0:1]
; GFX9-NEXT: v_mov_b32_e32 v2, s4
; GFX9-NEXT: v_mov_b32_e32 v1, s5
@@ -14711,8 +14711,8 @@ define amdgpu_kernel void @umax_i64_constant(ptr addrspace(1) %out) {
; GFX1064-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; GFX1064-NEXT: v_readfirstlane_b32 s3, v1
; GFX1064-NEXT: v_readfirstlane_b32 s2, v0
-; GFX1064-NEXT: v_mov_b32_e32 v1, 0
; GFX1064-NEXT: v_cndmask_b32_e64 v0, 5, 0, vcc
+; GFX1064-NEXT: v_mov_b32_e32 v1, 0
; GFX1064-NEXT: v_cmp_gt_u64_e32 vcc, s[2:3], v[0:1]
; GFX1064-NEXT: v_cndmask_b32_e64 v0, v0, s2, vcc
; GFX1064-NEXT: v_cndmask_b32_e64 v1, 0, s3, vcc
@@ -14742,8 +14742,8 @@ define amdgpu_kernel void @umax_i64_constant(ptr addrspace(1) %out) {
; GFX1032-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; GFX1032-NEXT: v_readfirstlane_b32 s3, v1
; GFX1032-NEXT: v_readfirstlane_b32 s2, v0
-; GFX1032-NEXT: v_mov_b32_e32 v1, 0
; GFX1032-NEXT: v_cndmask_b32_e64 v0, 5, 0, vcc_lo
+; GFX1032-NEXT: v_mov_b32_e32 v1, 0
; GFX1032-NEXT: v_cmp_gt_u64_e32 vcc_lo, s[2:3], v[0:1]
; GFX1032-NEXT: v_cndmask_b32_e64 v0, v0, s2, vcc_lo
; GFX1032-NEXT: v_cndmask_b32_e64 v1, 0, s3, vcc_lo
@@ -14774,8 +14774,8 @@ define amdgpu_kernel void @umax_i64_constant(ptr addrspace(1) %out) {
; GFX1164-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
; GFX1164-NEXT: v_readfirstlane_b32 s3, v1
; GFX1164-NEXT: v_readfirstlane_b32 s2, v0
-; GFX1164-NEXT: v_mov_b32_e32 v1, 0
; GFX1164-NEXT: v_cndmask_b32_e64 v0, 5, 0, vcc
+; GFX1164-NEXT: v_mov_b32_e32 v1, 0
; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1164-NEXT: v_cmp_gt_u64_e32 vcc, s[2:3], v[0:1]
; GFX1164-NEXT: v_cndmask_b32_e64 v0, v0, s2, vcc
@@ -14805,8 +14805,8 @@ define amdgpu_kernel void @umax_i64_constant(ptr addrspace(1) %out) {
; GFX1132-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
; GFX1132-NEXT: v_readfirstlane_b32 s3, v1
; GFX1132-NEXT: v_readfirstlane_b32 s2, v0
-; GFX1132-NEXT: v_mov_b32_e32 v1, 0
; GFX1132-NEXT: v_cndmask_b32_e64 v0, 5, 0, vcc_lo
+; GFX1132-NEXT: v_mov_b32_e32 v1, 0
; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1132-NEXT: v_cmp_gt_u64_e32 vcc_lo, s[2:3], v[0:1]
; GFX1132-NEXT: v_cndmask_b32_e64 v0, v0, s2, vcc_lo
diff --git a/llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll b/llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll
index 006fe51a32c72..12f8a59f0b84b 100644
--- a/llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll
+++ b/llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll
@@ -25,15 +25,15 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: S_BITCMP1_B32 renamable $sgpr17, 8, implicit-def $scc
; GFX90A-NEXT: renamable $sgpr18_sgpr19 = S_CSELECT_B64 -1, 0, implicit killed $scc
; GFX90A-NEXT: renamable $sgpr30_sgpr31 = S_XOR_B64 killed renamable $sgpr18_sgpr19, -1, implicit-def dead $scc
- ; GFX90A-NEXT: renamable $vgpr3 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec
- ; GFX90A-NEXT: renamable $vgpr2 = DS_READ_B32_gfx9 renamable $vgpr3, 0, 0, implicit $exec :: (load (s32) from `ptr addrspace(3) null`, align 8, addrspace 3)
+ ; GFX90A-NEXT: renamable $vgpr5 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec
+ ; GFX90A-NEXT: renamable $vgpr4 = DS_READ_B32_gfx9 renamable $vgpr5, 0, 0, implicit $exec :: (load (s32) from `ptr addrspace(3) null`, align 8, addrspace 3)
; GFX90A-NEXT: renamable $sgpr18_sgpr19 = S_MOV_B64 0
; GFX90A-NEXT: renamable $vcc = S_AND_B64 $exec, renamable $sgpr28_sgpr29, implicit-def dead $scc
; GFX90A-NEXT: S_CBRANCH_VCCZ %bb.2, implicit $vcc
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.1.bb103:
; GFX90A-NEXT: successors: %bb.58(0x40000000), %bb.2(0x40000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr33, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr46_sgpr47:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x00000000000000FF, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000FF, $vgpr2_vgpr3:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr33, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr46_sgpr47:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x00000000000000FF, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000FF, $vgpr4_vgpr5:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: renamable $sgpr34_sgpr35 = S_MOV_B64 0
; GFX90A-NEXT: renamable $vcc = S_AND_B64 $exec, renamable $sgpr30_sgpr31, implicit-def dead $scc
@@ -41,10 +41,10 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.2:
; GFX90A-NEXT: successors: %bb.3(0x80000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr17, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8, $sgpr9, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr46, $sgpr47, $sgpr20_sgpr21_sgpr22, $sgpr22_sgpr23, $sgpr24_sgpr25_sgpr26, $sgpr26_sgpr27, $vgpr2, $vgpr3
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr17, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8, $sgpr9, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr46, $sgpr47, $sgpr20_sgpr21_sgpr22, $sgpr22_sgpr23, $sgpr24_sgpr25_sgpr26, $sgpr26_sgpr27, $vgpr4, $vgpr5
; GFX90A-NEXT: {{ $}}
- ; GFX90A-NEXT: renamable $vgpr11 = IMPLICIT_DEF implicit-def $vgpr10
- ; GFX90A-NEXT: renamable $vgpr19 = IMPLICIT_DEF implicit-def $vgpr18
+ ; GFX90A-NEXT: renamable $vgpr13 = IMPLICIT_DEF implicit-def $vgpr12
+ ; GFX90A-NEXT: renamable $vgpr3 = IMPLICIT_DEF implicit-def $vgpr2
; GFX90A-NEXT: renamable $vgpr21 = IMPLICIT_DEF implicit-def $vgpr20
; GFX90A-NEXT: renamable $vgpr23 = IMPLICIT_DEF implicit-def $vgpr22
; GFX90A-NEXT: renamable $vgpr25 = IMPLICIT_DEF implicit-def $vgpr24
@@ -52,22 +52,21 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.3.Flow17:
; GFX90A-NEXT: successors: %bb.4(0x40000000), %bb.57(0x40000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr17, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr46_sgpr47:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000FF, $vgpr2_vgpr3:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr18_vgpr19:0x000000000000000F, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x000000000000000F, $vgpr24_vgpr25:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr17, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr46_sgpr47:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000FF, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x000000000000000F, $vgpr24_vgpr25:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: renamable $vgpr30 = V_AND_B32_e32 1023, $vgpr31, implicit $exec
; GFX90A-NEXT: renamable $vcc = S_AND_B64 $exec, killed renamable $sgpr34_sgpr35, implicit-def dead $scc
- ; GFX90A-NEXT: renamable $vgpr15 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec
- ; GFX90A-NEXT: renamable $vgpr17 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec
; GFX90A-NEXT: S_CBRANCH_VCCZ %bb.57, implicit $vcc
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.4.bb15:
; GFX90A-NEXT: successors: %bb.35(0x40000000), %bb.5(0x40000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr17, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr36_sgpr37, $sgpr46_sgpr47:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000FF, $vgpr2_vgpr3:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr18_sgpr19
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr17, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr36_sgpr37, $sgpr46_sgpr47:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000FF, $vgpr4_vgpr5:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr18_sgpr19
; GFX90A-NEXT: {{ $}}
- ; GFX90A-NEXT: renamable $vgpr0_vgpr1 = V_LSHLREV_B64_e64 2, $vgpr2_vgpr3, implicit $exec
- ; GFX90A-NEXT: renamable $vgpr4 = COPY renamable $sgpr25, implicit $exec
+ ; GFX90A-NEXT: renamable $vgpr0_vgpr1 = V_LSHLREV_B64_e64 2, $vgpr4_vgpr5, implicit $exec
+ ; GFX90A-NEXT: renamable $vgpr2 = COPY renamable $sgpr25, implicit $exec
; GFX90A-NEXT: renamable $vgpr46, renamable $vcc = V_ADD_CO_U32_e64 $sgpr24, $vgpr0, 0, implicit $exec
- ; GFX90A-NEXT: renamable $vgpr47, dead renamable $vcc = V_ADDC_U32_e64 killed $vgpr4, killed $vgpr1, killed $vcc, 0, implicit $exec
+ ; GFX90A-NEXT: renamable $vgpr47, dead renamable $vcc = V_ADDC_U32_e64 killed $vgpr2, killed $vgpr1, killed $vcc, 0, implicit $exec
+ ; GFX90A-NEXT: renamable $vgpr2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec
; GFX90A-NEXT: renamable $vgpr0 = nuw nsw V_LSHLREV_B32_e32 2, $vgpr30, implicit $exec
; GFX90A-NEXT: renamable $vgpr40, renamable $vcc = V_ADD_CO_U32_e64 $vgpr46, killed $vgpr0, 0, implicit $exec
; GFX90A-NEXT: renamable $vgpr41, dead renamable $vcc = V_ADDC_U32_e64 0, $vgpr47, killed $vcc, 0, implicit $exec
@@ -76,7 +75,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.5:
; GFX90A-NEXT: successors: %bb.6(0x80000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr36_sgpr37, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr18_sgpr19
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr36_sgpr37, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr18_sgpr19
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: renamable $sgpr34_sgpr35 = S_MOV_B64 -1
; GFX90A-NEXT: renamable $sgpr44_sgpr45 = S_MOV_B64 0
@@ -89,9 +88,9 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: renamable $sgpr50_sgpr51 = S_MOV_B64 0
; GFX90A-NEXT: renamable $sgpr48_sgpr49 = S_MOV_B64 0
; GFX90A-NEXT: renamable $sgpr38_sgpr39 = S_MOV_B64 0
+ ; GFX90A-NEXT: renamable $vgpr10_vgpr11 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr8_vgpr9 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr6_vgpr7 = IMPLICIT_DEF
- ; GFX90A-NEXT: renamable $vgpr4_vgpr5 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr0_vgpr1 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr62_vgpr63 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr60_vgpr61 = IMPLICIT_DEF
@@ -99,31 +98,30 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: renamable $vgpr56_vgpr57 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr44_vgpr45 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr42_vgpr43 = IMPLICIT_DEF
+ ; GFX90A-NEXT: renamable $vgpr19 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr17 = IMPLICIT_DEF
- ; GFX90A-NEXT: renamable $vgpr15 = IMPLICIT_DEF
- ; GFX90A-NEXT: renamable $vgpr14 = IMPLICIT_DEF
- ; GFX90A-NEXT: renamable $vgpr52 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr16 = IMPLICIT_DEF
+ ; GFX90A-NEXT: renamable $vgpr52 = IMPLICIT_DEF
+ ; GFX90A-NEXT: renamable $vgpr18 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr53 = IMPLICIT_DEF
+ ; GFX90A-NEXT: renamable $vgpr15 = IMPLICIT_DEF implicit-def $vgpr14
; GFX90A-NEXT: renamable $vgpr13 = IMPLICIT_DEF implicit-def $vgpr12
- ; GFX90A-NEXT: renamable $vgpr11 = IMPLICIT_DEF implicit-def $vgpr10
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.6.Flow20:
; GFX90A-NEXT: successors: %bb.7(0x80000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr15, $vgpr17, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr17, $vgpr19, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x000000000000000F, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
- ; GFX90A-NEXT: renamable $vgpr19 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec
- ; GFX90A-NEXT: renamable $vgpr18 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec
- ; GFX90A-NEXT: renamable $vgpr21 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec
; GFX90A-NEXT: renamable $vgpr20 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec
- ; GFX90A-NEXT: renamable $vgpr23 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec
; GFX90A-NEXT: renamable $vgpr22 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec
- ; GFX90A-NEXT: renamable $vgpr25 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec
; GFX90A-NEXT: renamable $vgpr24 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec
+ ; GFX90A-NEXT: renamable $vgpr25 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec
+ ; GFX90A-NEXT: renamable $vgpr3 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec
+ ; GFX90A-NEXT: renamable $vgpr21 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec
+ ; GFX90A-NEXT: renamable $vgpr23 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.7.Flow19:
; GFX90A-NEXT: successors: %bb.62(0x40000000), %bb.8(0x40000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr15, $vgpr17, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x000000000000000F, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x000000000000000F, $vgpr24_vgpr25:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr17, $vgpr19, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x000000000000000F, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x000000000000000F, $vgpr24_vgpr25:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: renamable $sgpr68_sgpr69 = S_MOV_B64 0
; GFX90A-NEXT: $sgpr24_sgpr25 = S_AND_SAVEEXEC_B64 $sgpr36_sgpr37, implicit-def $exec, implicit-def $scc, implicit $exec
@@ -131,7 +129,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.8.Flow32:
; GFX90A-NEXT: successors: %bb.9(0x40000000), %bb.10(0x40000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr68_sgpr69, $vgpr0_vgpr1:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr68_sgpr69, $vgpr0_vgpr1:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr24_sgpr25, implicit-def $scc
; GFX90A-NEXT: $sgpr12_sgpr13 = S_AND_SAVEEXEC_B64 $sgpr18_sgpr19, implicit-def $exec, implicit-def $scc, implicit $exec
@@ -140,15 +138,15 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.9.bb89:
; GFX90A-NEXT: successors: %bb.10(0x80000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr68_sgpr69, $vgpr0_vgpr1:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr68_sgpr69, $vgpr0_vgpr1:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
- ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET renamable $vgpr9, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null` + 4, basealign 8, addrspace 5)
- ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed renamable $vgpr8, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null`, align 8, addrspace 5)
+ ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET renamable $vgpr11, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null` + 4, basealign 8, addrspace 5)
+ ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed renamable $vgpr10, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null`, align 8, addrspace 5)
; GFX90A-NEXT: renamable $sgpr68_sgpr69 = S_OR_B64 killed renamable $sgpr68_sgpr69, $exec, implicit-def dead $scc
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.10.Flow33:
; GFX90A-NEXT: successors: %bb.11(0x40000000), %bb.12(0x40000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr68_sgpr69, $vgpr0_vgpr1:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr68_sgpr69, $vgpr0_vgpr1:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr12_sgpr13, implicit-def $scc
; GFX90A-NEXT: $sgpr12_sgpr13 = S_AND_SAVEEXEC_B64 $sgpr44_sgpr45, implicit-def $exec, implicit-def $scc, implicit $exec
@@ -157,15 +155,15 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.11.bb84:
; GFX90A-NEXT: successors: %bb.12(0x80000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr68_sgpr69, $vgpr0_vgpr1:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr68_sgpr69, $vgpr0_vgpr1:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
- ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET renamable $vgpr7, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null` + 4, basealign 8, addrspace 5)
- ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed renamable $vgpr6, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null`, align 8, addrspace 5)
+ ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET renamable $vgpr9, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null` + 4, basealign 8, addrspace 5)
+ ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed renamable $vgpr8, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null`, align 8, addrspace 5)
; GFX90A-NEXT: renamable $sgpr68_sgpr69 = S_OR_B64 killed renamable $sgpr68_sgpr69, $exec, implicit-def dead $scc
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.12.Flow34:
; GFX90A-NEXT: successors: %bb.13(0x40000000), %bb.14(0x40000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr68_sgpr69, $vgpr0_vgpr1:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr68_sgpr69, $vgpr0_vgpr1:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr12_sgpr13, implicit-def $scc
; GFX90A-NEXT: $sgpr12_sgpr13 = S_AND_SAVEEXEC_B64 $sgpr42_sgpr43, implicit-def $exec, implicit-def $scc, implicit $exec
@@ -174,10 +172,10 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.13.bb79:
; GFX90A-NEXT: successors: %bb.14(0x80000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr68_sgpr69, $vgpr0_vgpr1:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr68_sgpr69, $vgpr0_vgpr1:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
- ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET renamable $vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null` + 4, basealign 8, addrspace 5)
- ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed renamable $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null`, align 8, addrspace 5)
+ ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET renamable $vgpr7, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null` + 4, basealign 8, addrspace 5)
+ ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed renamable $vgpr6, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null`, align 8, addrspace 5)
; GFX90A-NEXT: renamable $sgpr68_sgpr69 = S_OR_B64 killed renamable $sgpr68_sgpr69, $exec, implicit-def dead $scc
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.14.Flow35:
@@ -359,7 +357,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.35.bb20:
; GFX90A-NEXT: successors: %bb.37(0x40000000), %bb.36(0x40000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr17, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr36_sgpr37, $sgpr46_sgpr47:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr18_sgpr19
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr17, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr36_sgpr37, $sgpr46_sgpr47:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr18_sgpr19
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: renamable $vgpr0 = FLAT_LOAD_SBYTE renamable $vgpr40_vgpr41, 1024, 0, implicit $exec, implicit $flat_scr :: (load (s8) from %ir.i23)
; GFX90A-NEXT: renamable $vgpr42 = V_ADD_CO_U32_e32 1024, $vgpr40, implicit-def $vcc, implicit $exec
@@ -376,36 +374,36 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: renamable $vgpr43, dead renamable $vcc = V_ADDC_U32_e64 0, $vgpr41, killed $vcc, 0, implicit $exec
; GFX90A-NEXT: renamable $vcc = V_CMP_LT_I16_e64 0, killed $vgpr0, implicit $exec
; GFX90A-NEXT: renamable $sgpr48_sgpr49 = S_MOV_B64 0
+ ; GFX90A-NEXT: renamable $vgpr10_vgpr11 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr8_vgpr9 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr6_vgpr7 = IMPLICIT_DEF
- ; GFX90A-NEXT: renamable $vgpr4_vgpr5 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr0_vgpr1 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr62_vgpr63 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr60_vgpr61 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr58_vgpr59 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr56_vgpr57 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr44_vgpr45 = IMPLICIT_DEF
+ ; GFX90A-NEXT: renamable $vgpr19 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr17 = IMPLICIT_DEF
- ; GFX90A-NEXT: renamable $vgpr15 = IMPLICIT_DEF
- ; GFX90A-NEXT: renamable $vgpr14 = IMPLICIT_DEF
- ; GFX90A-NEXT: renamable $vgpr52 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr16 = IMPLICIT_DEF
+ ; GFX90A-NEXT: renamable $vgpr52 = IMPLICIT_DEF
+ ; GFX90A-NEXT: renamable $vgpr18 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr53 = IMPLICIT_DEF
+ ; GFX90A-NEXT: renamable $vgpr15 = IMPLICIT_DEF implicit-def $vgpr14
; GFX90A-NEXT: renamable $vgpr13 = IMPLICIT_DEF implicit-def $vgpr12
- ; GFX90A-NEXT: renamable $vgpr11 = IMPLICIT_DEF implicit-def $vgpr10
; GFX90A-NEXT: $sgpr24_sgpr25 = S_AND_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec
; GFX90A-NEXT: S_CBRANCH_EXECNZ %bb.37, implicit $exec
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.36.Flow21:
; GFX90A-NEXT: successors: %bb.6(0x80000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr15, $vgpr17, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr17, $vgpr19, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x000000000000000F, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr24_sgpr25, implicit-def $scc
; GFX90A-NEXT: S_BRANCH %bb.6
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.37.bb27:
; GFX90A-NEXT: successors: %bb.39(0x40000000), %bb.38(0x40000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr17, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr46_sgpr47:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr18_sgpr19, $sgpr44_sgpr45, $sgpr42_sgpr43, $sgpr54_sgpr55, $sgpr52_sgpr53, $sgpr64_sgpr65, $sgpr50_sgpr51, $sgpr66_sgpr67
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr17, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr46_sgpr47:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr18_sgpr19, $sgpr44_sgpr45, $sgpr42_sgpr43, $sgpr54_sgpr55, $sgpr52_sgpr53, $sgpr64_sgpr65, $sgpr50_sgpr51, $sgpr66_sgpr67
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: renamable $vgpr0 = FLAT_LOAD_UBYTE renamable $vgpr40_vgpr41, 2048, 0, implicit $exec, implicit $flat_scr :: (load (s8) from %ir.i30)
; GFX90A-NEXT: renamable $vgpr44 = V_ADD_CO_U32_e32 2048, $vgpr40, implicit-def $vcc, implicit $exec
@@ -416,28 +414,28 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: renamable $sgpr62_sgpr63 = S_MOV_B64 0
; GFX90A-NEXT: renamable $vgpr45, dead renamable $vcc = V_ADDC_U32_e64 0, $vgpr41, killed $vcc, 0, implicit $exec
; GFX90A-NEXT: renamable $vcc = V_CMP_EQ_U16_e64 0, killed $vgpr0, implicit $exec
+ ; GFX90A-NEXT: renamable $vgpr10_vgpr11 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr8_vgpr9 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr6_vgpr7 = IMPLICIT_DEF
- ; GFX90A-NEXT: renamable $vgpr4_vgpr5 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr0_vgpr1 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr62_vgpr63 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr60_vgpr61 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr58_vgpr59 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr56_vgpr57 = IMPLICIT_DEF
+ ; GFX90A-NEXT: renamable $vgpr19 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr17 = IMPLICIT_DEF
- ; GFX90A-NEXT: renamable $vgpr15 = IMPLICIT_DEF
- ; GFX90A-NEXT: renamable $vgpr14 = IMPLICIT_DEF
- ; GFX90A-NEXT: renamable $vgpr52 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr16 = IMPLICIT_DEF
+ ; GFX90A-NEXT: renamable $vgpr52 = IMPLICIT_DEF
+ ; GFX90A-NEXT: renamable $vgpr18 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr53 = IMPLICIT_DEF
+ ; GFX90A-NEXT: renamable $vgpr15 = IMPLICIT_DEF implicit-def $vgpr14
; GFX90A-NEXT: renamable $vgpr13 = IMPLICIT_DEF implicit-def $vgpr12
- ; GFX90A-NEXT: renamable $vgpr11 = IMPLICIT_DEF implicit-def $vgpr10
; GFX90A-NEXT: $sgpr38_sgpr39 = S_AND_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec
; GFX90A-NEXT: S_CBRANCH_EXECNZ %bb.39, implicit $exec
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.38.Flow22:
; GFX90A-NEXT: successors: %bb.36(0x80000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr15, $vgpr17, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr56_sgpr57, $sgpr58_sgpr59, $sgpr60_sgpr61, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr17, $vgpr19, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr56_sgpr57, $sgpr58_sgpr59, $sgpr60_sgpr61, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x000000000000000F, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr38_sgpr39, implicit-def $scc
; GFX90A-NEXT: renamable $sgpr38_sgpr39 = S_XOR_B64 $exec, -1, implicit-def dead $scc
@@ -458,7 +456,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.39.bb34:
; GFX90A-NEXT: successors: %bb.41(0x40000000), %bb.40(0x40000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr17, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr46_sgpr47:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr18_sgpr19, $sgpr44_sgpr45, $sgpr50_sgpr51, $sgpr58_sgpr59, $sgpr60_sgpr61, $sgpr54_sgpr55, $sgpr62_sgpr63, $sgpr52_sgpr53, $sgpr64_sgpr65, $sgpr66_sgpr67
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr17, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr46_sgpr47:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr18_sgpr19, $sgpr44_sgpr45, $sgpr50_sgpr51, $sgpr58_sgpr59, $sgpr60_sgpr61, $sgpr54_sgpr55, $sgpr62_sgpr63, $sgpr52_sgpr53, $sgpr64_sgpr65, $sgpr66_sgpr67
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: renamable $vgpr0 = FLAT_LOAD_UBYTE renamable $vgpr40_vgpr41, 3072, 0, implicit $exec, implicit $flat_scr :: (load (s8) from %ir.i37)
; GFX90A-NEXT: renamable $vgpr56 = V_ADD_CO_U32_e32 3072, $vgpr40, implicit-def $vcc, implicit $exec
@@ -467,27 +465,27 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: renamable $sgpr56_sgpr57 = S_MOV_B64 0
; GFX90A-NEXT: renamable $vgpr57, dead renamable $vcc = V_ADDC_U32_e64 0, $vgpr41, killed $vcc, 0, implicit $exec
; GFX90A-NEXT: renamable $vcc = V_CMP_EQ_U16_e64 0, killed $vgpr0, implicit $exec
+ ; GFX90A-NEXT: renamable $vgpr10_vgpr11 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr8_vgpr9 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr6_vgpr7 = IMPLICIT_DEF
- ; GFX90A-NEXT: renamable $vgpr4_vgpr5 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr0_vgpr1 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr62_vgpr63 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr60_vgpr61 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr58_vgpr59 = IMPLICIT_DEF
+ ; GFX90A-NEXT: renamable $vgpr19 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr17 = IMPLICIT_DEF
- ; GFX90A-NEXT: renamable $vgpr15 = IMPLICIT_DEF
- ; GFX90A-NEXT: renamable $vgpr14 = IMPLICIT_DEF
- ; GFX90A-NEXT: renamable $vgpr52 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr16 = IMPLICIT_DEF
+ ; GFX90A-NEXT: renamable $vgpr52 = IMPLICIT_DEF
+ ; GFX90A-NEXT: renamable $vgpr18 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr53 = IMPLICIT_DEF
+ ; GFX90A-NEXT: renamable $vgpr15 = IMPLICIT_DEF implicit-def $vgpr14
; GFX90A-NEXT: renamable $vgpr13 = IMPLICIT_DEF implicit-def $vgpr12
- ; GFX90A-NEXT: renamable $vgpr11 = IMPLICIT_DEF implicit-def $vgpr10
; GFX90A-NEXT: $sgpr40_sgpr41 = S_AND_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec
; GFX90A-NEXT: S_CBRANCH_EXECNZ %bb.41, implicit $exec
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.40.Flow23:
; GFX90A-NEXT: successors: %bb.38(0x80000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr15, $vgpr17, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr54_sgpr55, $sgpr56_sgpr57, $sgpr58_sgpr59, $sgpr60_sgpr61, $sgpr62_sgpr63, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr17, $vgpr19, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr54_sgpr55, $sgpr56_sgpr57, $sgpr58_sgpr59, $sgpr60_sgpr61, $sgpr62_sgpr63, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x000000000000000F, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr40_sgpr41, implicit-def $scc
; GFX90A-NEXT: renamable $sgpr40_sgpr41 = S_XOR_B64 $exec, -1, implicit-def dead $scc
@@ -507,7 +505,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.41.bb41:
; GFX90A-NEXT: successors: %bb.46(0x40000000), %bb.42(0x40000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr17, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr46_sgpr47:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr52_sgpr53, $sgpr50_sgpr51, $sgpr56_sgpr57, $sgpr58_sgpr59, $sgpr60_sgpr61, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr17, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr46_sgpr47:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr52_sgpr53, $sgpr50_sgpr51, $sgpr56_sgpr57, $sgpr58_sgpr59, $sgpr60_sgpr61, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: renamable $vgpr58 = V_ADD_CO_U32_e32 4096, $vgpr40, implicit-def $vcc, implicit $exec
; GFX90A-NEXT: renamable $sgpr18_sgpr19 = COPY $vcc
@@ -516,31 +514,31 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: renamable $sgpr18_sgpr19 = S_MOV_B64 0
; GFX90A-NEXT: renamable $sgpr44_sgpr45 = S_MOV_B64 -1
; GFX90A-NEXT: renamable $sgpr48_sgpr49 = COPY renamable $sgpr36_sgpr37
- ; GFX90A-NEXT: renamable $vgpr18, dead renamable $vcc = V_ADDC_U32_e64 0, $vgpr41, killed $vcc, 0, implicit $exec
+ ; GFX90A-NEXT: renamable $vgpr3, dead renamable $vcc = V_ADDC_U32_e64 0, $vgpr41, killed $vcc, 0, implicit $exec
; GFX90A-NEXT: renamable $vcc = V_CMP_EQ_U16_e64 0, killed $vgpr0, implicit $exec
+ ; GFX90A-NEXT: renamable $vgpr10_vgpr11 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr8_vgpr9 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr6_vgpr7 = IMPLICIT_DEF
- ; GFX90A-NEXT: renamable $vgpr4_vgpr5 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr0_vgpr1 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr62_vgpr63 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr60_vgpr61 = IMPLICIT_DEF
+ ; GFX90A-NEXT: renamable $vgpr19 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr17 = IMPLICIT_DEF
- ; GFX90A-NEXT: renamable $vgpr15 = IMPLICIT_DEF
- ; GFX90A-NEXT: renamable $vgpr14 = IMPLICIT_DEF
- ; GFX90A-NEXT: renamable $vgpr52 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr16 = IMPLICIT_DEF
+ ; GFX90A-NEXT: renamable $vgpr52 = IMPLICIT_DEF
+ ; GFX90A-NEXT: renamable $vgpr18 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr53 = IMPLICIT_DEF
+ ; GFX90A-NEXT: renamable $vgpr15 = IMPLICIT_DEF implicit-def $vgpr14
; GFX90A-NEXT: renamable $vgpr13 = IMPLICIT_DEF implicit-def $vgpr12
- ; GFX90A-NEXT: renamable $vgpr11 = IMPLICIT_DEF implicit-def $vgpr10
; GFX90A-NEXT: $sgpr42_sgpr43 = S_AND_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec
; GFX90A-NEXT: S_CBRANCH_EXECNZ %bb.46, implicit $exec
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.42.Flow24:
; GFX90A-NEXT: successors: %bb.40(0x80000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr15, $vgpr17, $vgpr18, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr56_sgpr57, $sgpr58_sgpr59, $sgpr60_sgpr61, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x0000000000000003, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr3, $vgpr17, $vgpr19, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr56_sgpr57, $sgpr58_sgpr59, $sgpr60_sgpr61, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x000000000000000F, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x0000000000000003, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr42_sgpr43, implicit-def $scc
- ; GFX90A-NEXT: renamable $vgpr59 = COPY killed renamable $vgpr18, implicit $exec
+ ; GFX90A-NEXT: renamable $vgpr59 = COPY killed renamable $vgpr3, implicit $exec
; GFX90A-NEXT: renamable $sgpr42_sgpr43 = S_XOR_B64 $exec, -1, implicit-def dead $scc
; GFX90A-NEXT: renamable $sgpr62_sgpr63 = S_AND_B64 killed renamable $sgpr44_sgpr45, $exec, implicit-def dead $scc
; GFX90A-NEXT: renamable $sgpr54_sgpr55 = S_AND_B64 killed renamable $sgpr54_sgpr55, $exec, implicit-def dead $scc
@@ -557,7 +555,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.43.bb55:
; GFX90A-NEXT: successors: %bb.48(0x40000000), %bb.44(0x40000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr17, $vgpr18, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr46_sgpr47:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x0000000000000003, $vgpr60_vgpr61:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr44_sgpr45, $sgpr54_sgpr55, $sgpr58_sgpr59, $sgpr56_sgpr57, $sgpr48_sgpr49
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr17, $vgpr3, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr46_sgpr47:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x0000000000000003, $vgpr60_vgpr61:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr44_sgpr45, $sgpr54_sgpr55, $sgpr58_sgpr59, $sgpr56_sgpr57, $sgpr48_sgpr49
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: S_BITCMP1_B32 killed renamable $sgpr17, 16, implicit-def $scc
; GFX90A-NEXT: renamable $sgpr64_sgpr65 = S_CSELECT_B64 -1, 0, implicit killed $scc
@@ -569,26 +567,26 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.44:
; GFX90A-NEXT: successors: %bb.45(0x80000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr57, $vgpr56, $vgpr18, $vgpr30, $vgpr31, $vgpr60, $vgpr62, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8, $sgpr9, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr48_sgpr49, $vgpr61, $vgpr58, $sgpr54_sgpr55, $sgpr56_sgpr57, $sgpr58_sgpr59, $sgpr20_sgpr21_sgpr22, $sgpr22_sgpr23, $sgpr24_sgpr25_sgpr26, $sgpr26_sgpr27, $vgpr47, $vgpr46, $vgpr2, $vgpr3, $vgpr45, $vgpr44, $vgpr43, $vgpr42, $vgpr41, $vgpr40, $vgpr63
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr3, $vgpr57, $vgpr56, $vgpr30, $vgpr31, $vgpr60, $vgpr62, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8, $sgpr9, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr48_sgpr49, $vgpr61, $vgpr58, $sgpr54_sgpr55, $sgpr56_sgpr57, $sgpr58_sgpr59, $sgpr20_sgpr21_sgpr22, $sgpr22_sgpr23, $sgpr24_sgpr25_sgpr26, $sgpr26_sgpr27, $vgpr47, $vgpr46, $vgpr2, $vgpr4, $vgpr5, $vgpr45, $vgpr44, $vgpr43, $vgpr42, $vgpr41, $vgpr40, $vgpr63
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: renamable $sgpr52_sgpr53 = COPY renamable $sgpr36_sgpr37
+ ; GFX90A-NEXT: renamable $vgpr10_vgpr11 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr8_vgpr9 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr6_vgpr7 = IMPLICIT_DEF
- ; GFX90A-NEXT: renamable $vgpr4_vgpr5 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr0_vgpr1 = IMPLICIT_DEF
+ ; GFX90A-NEXT: renamable $vgpr19 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr17 = IMPLICIT_DEF
- ; GFX90A-NEXT: renamable $vgpr15 = IMPLICIT_DEF
- ; GFX90A-NEXT: renamable $vgpr14 = IMPLICIT_DEF
- ; GFX90A-NEXT: renamable $vgpr52 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr16 = IMPLICIT_DEF
+ ; GFX90A-NEXT: renamable $vgpr52 = IMPLICIT_DEF
+ ; GFX90A-NEXT: renamable $vgpr18 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr53 = IMPLICIT_DEF
+ ; GFX90A-NEXT: renamable $vgpr15 = IMPLICIT_DEF implicit-def $vgpr14
; GFX90A-NEXT: renamable $vgpr13 = IMPLICIT_DEF implicit-def $vgpr12
- ; GFX90A-NEXT: renamable $vgpr11 = IMPLICIT_DEF implicit-def $vgpr10
; GFX90A-NEXT: renamable $sgpr50_sgpr51 = S_MOV_B64 0
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.45.Flow26:
; GFX90A-NEXT: successors: %bb.47(0x80000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr15, $vgpr17, $vgpr18, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr56_sgpr57, $sgpr58_sgpr59, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x0000000000000003, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr3, $vgpr17, $vgpr19, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr56_sgpr57, $sgpr58_sgpr59, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x000000000000000F, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x0000000000000003, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: renamable $sgpr60_sgpr61 = S_XOR_B64 $exec, -1, implicit-def dead $scc
; GFX90A-NEXT: renamable $sgpr70_sgpr71 = S_AND_B64 killed renamable $sgpr44_sgpr45, $exec, implicit-def dead $scc
@@ -604,7 +602,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.46.bb48:
; GFX90A-NEXT: successors: %bb.43(0x40000000), %bb.47(0x40000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr17, $vgpr18, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr46_sgpr47:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x0000000000000003, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr64_sgpr65, $sgpr50_sgpr51, $sgpr66_sgpr67, $sgpr44_sgpr45, $sgpr54_sgpr55, $sgpr58_sgpr59, $sgpr56_sgpr57
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr17, $vgpr3, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr46_sgpr47:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x0000000000000003, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr64_sgpr65, $sgpr50_sgpr51, $sgpr66_sgpr67, $sgpr44_sgpr45, $sgpr54_sgpr55, $sgpr58_sgpr59, $sgpr56_sgpr57
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: renamable $vgpr60 = V_ADD_CO_U32_e32 5120, $vgpr40, implicit-def $vcc, implicit $exec
; GFX90A-NEXT: renamable $sgpr18_sgpr19 = COPY $vcc
@@ -618,25 +616,25 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: renamable $vgpr61, dead renamable $vcc = V_ADDC_U32_e64 0, $vgpr41, killed $sgpr18_sgpr19, 0, implicit $exec
; GFX90A-NEXT: renamable $vcc = V_CMP_EQ_U16_e64 0, killed $vgpr0, implicit $exec
; GFX90A-NEXT: renamable $sgpr70_sgpr71 = S_MOV_B64 0
+ ; GFX90A-NEXT: renamable $vgpr10_vgpr11 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr8_vgpr9 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr6_vgpr7 = IMPLICIT_DEF
- ; GFX90A-NEXT: renamable $vgpr4_vgpr5 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr0_vgpr1 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr62_vgpr63 = IMPLICIT_DEF
+ ; GFX90A-NEXT: renamable $vgpr19 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr17 = IMPLICIT_DEF
- ; GFX90A-NEXT: renamable $vgpr15 = IMPLICIT_DEF
- ; GFX90A-NEXT: renamable $vgpr14 = IMPLICIT_DEF
- ; GFX90A-NEXT: renamable $vgpr52 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr16 = IMPLICIT_DEF
+ ; GFX90A-NEXT: renamable $vgpr52 = IMPLICIT_DEF
+ ; GFX90A-NEXT: renamable $vgpr18 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr53 = IMPLICIT_DEF
+ ; GFX90A-NEXT: renamable $vgpr15 = IMPLICIT_DEF implicit-def $vgpr14
; GFX90A-NEXT: renamable $vgpr13 = IMPLICIT_DEF implicit-def $vgpr12
- ; GFX90A-NEXT: renamable $vgpr11 = IMPLICIT_DEF implicit-def $vgpr10
; GFX90A-NEXT: $sgpr18_sgpr19 = S_AND_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec
; GFX90A-NEXT: S_CBRANCH_EXECNZ %bb.43, implicit $exec
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.47.Flow25:
; GFX90A-NEXT: successors: %bb.42(0x80000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr15, $vgpr17, $vgpr18, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr60_sgpr61, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr68_sgpr69, $sgpr70_sgpr71, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x0000000000000003, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr3, $vgpr17, $vgpr19, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr60_sgpr61, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr68_sgpr69, $sgpr70_sgpr71, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x000000000000000F, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x0000000000000003, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr18_sgpr19, implicit-def $scc
; GFX90A-NEXT: renamable $sgpr44_sgpr45 = S_XOR_B64 $exec, -1, implicit-def dead $scc
@@ -654,21 +652,21 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.48.bb63:
; GFX90A-NEXT: successors: %bb.50(0x40000000), %bb.49(0x40000000)
- ; GFX90A-NEXT: liveins: $vcc, $sgpr14, $sgpr15, $sgpr16, $vgpr18, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr46_sgpr47:0x000000000000000F, $sgpr50_sgpr51, $sgpr64_sgpr65, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x0000000000000003, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr54_sgpr55, $sgpr58_sgpr59, $sgpr56_sgpr57, $sgpr48_sgpr49
+ ; GFX90A-NEXT: liveins: $vcc, $sgpr14, $sgpr15, $sgpr16, $vgpr3, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr46_sgpr47:0x000000000000000F, $sgpr50_sgpr51, $sgpr64_sgpr65, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x0000000000000003, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr54_sgpr55, $sgpr58_sgpr59, $sgpr56_sgpr57, $sgpr48_sgpr49
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: renamable $sgpr44_sgpr45 = S_MOV_B64 0
; GFX90A-NEXT: S_CBRANCH_VCCNZ %bb.50, implicit $vcc
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.49:
; GFX90A-NEXT: successors: %bb.44(0x80000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr18, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x0000000000000003, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr54_sgpr55, $sgpr58_sgpr59, $sgpr56_sgpr57
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr3, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x0000000000000003, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr54_sgpr55, $sgpr58_sgpr59, $sgpr56_sgpr57
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: renamable $sgpr48_sgpr49 = S_MOV_B64 -1
; GFX90A-NEXT: S_BRANCH %bb.44
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.50.bb68:
; GFX90A-NEXT: successors: %bb.54(0x40000000), %bb.51(0x40000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr18, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47:0x000000000000000F, $sgpr50_sgpr51, $sgpr64_sgpr65, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x0000000000000003, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr48_sgpr49, $sgpr54_sgpr55, $sgpr58_sgpr59, $sgpr56_sgpr57
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr3, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47:0x000000000000000F, $sgpr50_sgpr51, $sgpr64_sgpr65, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x0000000000000003, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr48_sgpr49, $sgpr54_sgpr55, $sgpr58_sgpr59, $sgpr56_sgpr57
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: renamable $vgpr0 = nuw nsw V_LSHLREV_B32_e32 3, $vgpr30, implicit $exec
; GFX90A-NEXT: renamable $vgpr1 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec
@@ -677,108 +675,115 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.51:
; GFX90A-NEXT: successors: %bb.45(0x80000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr18, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr48_sgpr49, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x0000000000000003, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr54_sgpr55, $sgpr58_sgpr59, $sgpr56_sgpr57
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr3, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr48_sgpr49, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x0000000000000003, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr54_sgpr55, $sgpr58_sgpr59, $sgpr56_sgpr57
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: renamable $sgpr50_sgpr51 = S_MOV_B64 -1
; GFX90A-NEXT: renamable $sgpr52_sgpr53 = COPY renamable $sgpr36_sgpr37
+ ; GFX90A-NEXT: renamable $vgpr10_vgpr11 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr8_vgpr9 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr6_vgpr7 = IMPLICIT_DEF
- ; GFX90A-NEXT: renamable $vgpr4_vgpr5 = IMPLICIT_DEF
+ ; GFX90A-NEXT: renamable $vgpr19 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr17 = IMPLICIT_DEF
- ; GFX90A-NEXT: renamable $vgpr15 = IMPLICIT_DEF
- ; GFX90A-NEXT: renamable $vgpr14 = IMPLICIT_DEF
- ; GFX90A-NEXT: renamable $vgpr52 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr16 = IMPLICIT_DEF
+ ; GFX90A-NEXT: renamable $vgpr52 = IMPLICIT_DEF
+ ; GFX90A-NEXT: renamable $vgpr18 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr53 = IMPLICIT_DEF
+ ; GFX90A-NEXT: renamable $vgpr15 = IMPLICIT_DEF implicit-def $vgpr14
; GFX90A-NEXT: renamable $vgpr13 = IMPLICIT_DEF implicit-def $vgpr12
- ; GFX90A-NEXT: renamable $vgpr11 = IMPLICIT_DEF implicit-def $vgpr10
; GFX90A-NEXT: S_BRANCH %bb.45
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.52.bb80:
; GFX90A-NEXT: successors: %bb.59(0x40000000), %bb.53(0x40000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr18, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47:0x000000000000000F, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr60_sgpr61, $sgpr64_sgpr65, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x0000000000000003, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr3, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47:0x000000000000000F, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr60_sgpr61, $sgpr64_sgpr65, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x0000000000000003, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: renamable $sgpr17 = S_BFE_U32 renamable $sgpr20, 65560, implicit-def dead $scc
; GFX90A-NEXT: S_CMP_EQ_U32 killed renamable $sgpr17, 0, implicit-def $scc
- ; GFX90A-NEXT: renamable $vgpr6 = V_ADD_CO_U32_e32 4096, $vgpr0, implicit-def $vcc, implicit $exec
- ; GFX90A-NEXT: renamable $vgpr7, dead renamable $sgpr52_sgpr53 = V_ADDC_U32_e64 0, 0, killed $vcc, 0, implicit $exec
+ ; GFX90A-NEXT: renamable $vgpr8 = V_ADD_CO_U32_e32 4096, $vgpr0, implicit-def $vcc, implicit $exec
+ ; GFX90A-NEXT: renamable $vgpr9, dead renamable $sgpr52_sgpr53 = V_ADDC_U32_e64 0, 0, killed $vcc, 0, implicit $exec
; GFX90A-NEXT: S_CBRANCH_SCC1 %bb.59, implicit killed $scc
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.53:
; GFX90A-NEXT: successors: %bb.61(0x80000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr18, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr60_sgpr61, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x0000000000000003, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr3, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr60_sgpr61, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x0000000000000003, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: renamable $sgpr52_sgpr53 = S_MOV_B64 0
; GFX90A-NEXT: renamable $sgpr46_sgpr47 = S_MOV_B64 -1
; GFX90A-NEXT: renamable $sgpr62_sgpr63 = COPY renamable $sgpr36_sgpr37
- ; GFX90A-NEXT: renamable $vgpr8_vgpr9 = IMPLICIT_DEF
+ ; GFX90A-NEXT: renamable $vgpr10_vgpr11 = IMPLICIT_DEF
+ ; GFX90A-NEXT: renamable $vgpr19 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr17 = IMPLICIT_DEF
- ; GFX90A-NEXT: renamable $vgpr15 = IMPLICIT_DEF
- ; GFX90A-NEXT: renamable $vgpr14 = IMPLICIT_DEF
- ; GFX90A-NEXT: renamable $vgpr52 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr16 = IMPLICIT_DEF
+ ; GFX90A-NEXT: renamable $vgpr52 = IMPLICIT_DEF
+ ; GFX90A-NEXT: renamable $vgpr18 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr53 = IMPLICIT_DEF
+ ; GFX90A-NEXT: renamable $vgpr15 = IMPLICIT_DEF implicit-def $vgpr14
; GFX90A-NEXT: renamable $vgpr13 = IMPLICIT_DEF implicit-def $vgpr12
- ; GFX90A-NEXT: renamable $vgpr11 = IMPLICIT_DEF implicit-def $vgpr10
; GFX90A-NEXT: S_BRANCH %bb.61
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.54.bb73:
; GFX90A-NEXT: successors: %bb.52(0x40000000), %bb.55(0x40000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr18, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47:0x000000000000000F, $sgpr48_sgpr49, $sgpr64_sgpr65, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x0000000000000003, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr54_sgpr55
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr3, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47:0x000000000000000F, $sgpr48_sgpr49, $sgpr64_sgpr65, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x0000000000000003, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr54_sgpr55
; GFX90A-NEXT: {{ $}}
- ; GFX90A-NEXT: renamable $vgpr6 = FLAT_LOAD_UBYTE renamable $vgpr0_vgpr1, 2048, 0, implicit $exec, implicit $flat_scr :: (load (s8) from %ir.i76)
- ; GFX90A-NEXT: renamable $vgpr4 = V_ADD_CO_U32_e32 2048, $vgpr0, implicit-def $vcc, implicit $exec
+ ; GFX90A-NEXT: renamable $vgpr8 = FLAT_LOAD_UBYTE renamable $vgpr0_vgpr1, 2048, 0, implicit $exec, implicit $flat_scr :: (load (s8) from %ir.i76)
+ ; GFX90A-NEXT: renamable $vgpr6 = V_ADD_CO_U32_e32 2048, $vgpr0, implicit-def $vcc, implicit $exec
; GFX90A-NEXT: renamable $sgpr50_sgpr51 = S_MOV_B64 0
; GFX90A-NEXT: renamable $sgpr56_sgpr57 = S_MOV_B64 -1
; GFX90A-NEXT: renamable $sgpr52_sgpr53 = COPY renamable $sgpr36_sgpr37
- ; GFX90A-NEXT: renamable $vgpr5, dead renamable $sgpr58_sgpr59 = V_ADDC_U32_e64 0, 0, killed $vcc, 0, implicit $exec
- ; GFX90A-NEXT: renamable $vcc = V_CMP_EQ_U16_e64 0, killed $vgpr6, implicit $exec
+ ; GFX90A-NEXT: renamable $vgpr7, dead renamable $sgpr58_sgpr59 = V_ADDC_U32_e64 0, 0, killed $vcc, 0, implicit $exec
+ ; GFX90A-NEXT: renamable $vcc = V_CMP_EQ_U16_e64 0, killed $vgpr8, implicit $exec
; GFX90A-NEXT: renamable $sgpr58_sgpr59 = S_MOV_B64 0
+ ; GFX90A-NEXT: renamable $vgpr10_vgpr11 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr8_vgpr9 = IMPLICIT_DEF
- ; GFX90A-NEXT: renamable $vgpr6_vgpr7 = IMPLICIT_DEF
+ ; GFX90A-NEXT: renamable $vgpr19 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr17 = IMPLICIT_DEF
- ; GFX90A-NEXT: renamable $vgpr15 = IMPLICIT_DEF
- ; GFX90A-NEXT: renamable $vgpr14 = IMPLICIT_DEF
- ; GFX90A-NEXT: renamable $vgpr52 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr16 = IMPLICIT_DEF
+ ; GFX90A-NEXT: renamable $vgpr52 = IMPLICIT_DEF
+ ; GFX90A-NEXT: renamable $vgpr18 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr53 = IMPLICIT_DEF
+ ; GFX90A-NEXT: renamable $vgpr15 = IMPLICIT_DEF implicit-def $vgpr14
; GFX90A-NEXT: renamable $vgpr13 = IMPLICIT_DEF implicit-def $vgpr12
- ; GFX90A-NEXT: renamable $vgpr11 = IMPLICIT_DEF implicit-def $vgpr10
; GFX90A-NEXT: $sgpr60_sgpr61 = S_AND_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec
; GFX90A-NEXT: S_CBRANCH_EXECNZ %bb.52, implicit $exec
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.55.Flow29:
; GFX90A-NEXT: successors: %bb.45(0x80000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr15, $vgpr17, $vgpr18, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr56_sgpr57, $sgpr58_sgpr59, $sgpr60_sgpr61, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x0000000000000003, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr3, $vgpr17, $vgpr19, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr56_sgpr57, $sgpr58_sgpr59, $sgpr60_sgpr61, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x000000000000000F, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x0000000000000003, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr60_sgpr61, implicit-def $scc
; GFX90A-NEXT: S_BRANCH %bb.45
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.56.bb90:
; GFX90A-NEXT: successors: %bb.60(0x80000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr18, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47:0x000000000000000F, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr54_sgpr55, $sgpr60_sgpr61, $sgpr64_sgpr65, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x0000000000000003, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr3, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47:0x000000000000000F, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr54_sgpr55, $sgpr60_sgpr61, $sgpr64_sgpr65, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x0000000000000003, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: renamable $vgpr53 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed $sgpr64_sgpr65, implicit $exec
- ; GFX90A-NEXT: renamable $vgpr10 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec
- ; GFX90A-NEXT: renamable $vgpr14_vgpr15 = DS_READ_B64_gfx9 killed renamable $vgpr10, 0, 0, implicit $exec :: (load (s64) from `ptr addrspace(3) null`, addrspace 3)
- ; GFX90A-NEXT: renamable $vgpr10 = COPY renamable $sgpr21, implicit $exec
- ; GFX90A-NEXT: renamable $vgpr16_vgpr17 = DS_READ_B64_gfx9 killed renamable $vgpr10, 0, 0, implicit $exec :: (load (s64) from %ir.7, addrspace 3)
- ; GFX90A-NEXT: renamable $vgpr10 = COPY renamable $sgpr22, implicit $exec
- ; GFX90A-NEXT: renamable $vgpr12_vgpr13 = DS_READ_B64_gfx9 killed renamable $vgpr10, 0, 0, implicit $exec :: (load (s64) from %ir.8, addrspace 3)
- ; GFX90A-NEXT: renamable $vgpr10 = COPY renamable $sgpr46, implicit $exec
- ; GFX90A-NEXT: renamable $vgpr11 = V_ALIGNBIT_B32_opsel_e64 0, killed $sgpr47, 0, killed $vgpr10, 0, 1, 0, 0, implicit $exec
- ; GFX90A-NEXT: renamable $vgpr52 = V_ALIGNBIT_B32_opsel_e64 0, $vgpr17, 0, $vgpr16, 0, 1, 0, 0, implicit $exec
- ; GFX90A-NEXT: renamable $vgpr17 = V_CNDMASK_B32_e64 0, 0, 0, 1, $sgpr12_sgpr13, implicit $exec
- ; GFX90A-NEXT: renamable $vgpr15 = V_ALIGNBIT_B32_opsel_e64 0, $vgpr15, 0, $vgpr14, 0, 1, 0, 0, implicit $exec
+ ; GFX90A-NEXT: renamable $vgpr12 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec
+ ; GFX90A-NEXT: renamable $vgpr16_vgpr17 = DS_READ_B64_gfx9 killed renamable $vgpr12, 0, 0, implicit $exec :: (load (s64) from `ptr addrspace(3) null`, addrspace 3)
+ ; GFX90A-NEXT: renamable $vgpr12 = COPY renamable $sgpr21, implicit $exec
+ ; GFX90A-NEXT: renamable $vgpr18_vgpr19 = DS_READ_B64_gfx9 killed renamable $vgpr12, 0, 0, implicit $exec :: (load (s64) from %ir.7, addrspace 3)
+ ; GFX90A-NEXT: renamable $vgpr12 = COPY renamable $sgpr22, implicit $exec
+ ; GFX90A-NEXT: renamable $vgpr14_vgpr15 = DS_READ_B64_gfx9 killed renamable $vgpr12, 0, 0, implicit $exec :: (load (s64) from %ir.8, addrspace 3)
+ ; GFX90A-NEXT: renamable $vgpr12 = COPY renamable $sgpr46, implicit $exec
+ ; GFX90A-NEXT: renamable $vgpr13 = V_ALIGNBIT_B32_opsel_e64 0, killed $sgpr47, 0, killed $vgpr12, 0, 1, 0, 0, implicit $exec
+ ; GFX90A-NEXT: renamable $vgpr52 = V_ALIGNBIT_B32_opsel_e64 0, $vgpr19, 0, $vgpr18, 0, 1, 0, 0, implicit $exec
+ ; GFX90A-NEXT: renamable $vgpr19 = V_CNDMASK_B32_e64 0, 0, 0, 1, $sgpr12_sgpr13, implicit $exec
+ ; GFX90A-NEXT: renamable $vgpr17 = V_ALIGNBIT_B32_opsel_e64 0, $vgpr17, 0, $vgpr16, 0, 1, 0, 0, implicit $exec
; GFX90A-NEXT: renamable $sgpr52_sgpr53 = S_XOR_B64 $exec, -1, implicit-def dead $scc
; GFX90A-NEXT: renamable $sgpr62_sgpr63 = S_OR_B64 renamable $sgpr36_sgpr37, $exec, implicit-def dead $scc
- ; GFX90A-NEXT: renamable $vgpr10 = COPY renamable $vgpr14, implicit $exec
+ ; GFX90A-NEXT: renamable $vgpr12 = COPY renamable $vgpr16, implicit $exec
; GFX90A-NEXT: S_BRANCH %bb.60
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.57:
; GFX90A-NEXT: successors: %bb.7(0x80000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr15, $vgpr17, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr36_sgpr37, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr18_vgpr19:0x000000000000000F, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x000000000000000F, $vgpr24_vgpr25:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr36_sgpr37, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x000000000000000F, $vgpr24_vgpr25:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
+ ; GFX90A-NEXT: renamable $vgpr19 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec
+ ; GFX90A-NEXT: renamable $vgpr52 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec
+ ; GFX90A-NEXT: renamable $vgpr18 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec
+ ; GFX90A-NEXT: renamable $vgpr14 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec
+ ; GFX90A-NEXT: renamable $vgpr16 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec
+ ; GFX90A-NEXT: renamable $vgpr15 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec
+ ; GFX90A-NEXT: renamable $vgpr53 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec
; GFX90A-NEXT: renamable $sgpr44_sgpr45 = S_MOV_B64 0
; GFX90A-NEXT: renamable $sgpr42_sgpr43 = S_MOV_B64 0
; GFX90A-NEXT: renamable $sgpr40_sgpr41 = S_MOV_B64 0
@@ -789,9 +794,9 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: renamable $sgpr50_sgpr51 = S_MOV_B64 0
; GFX90A-NEXT: renamable $sgpr48_sgpr49 = S_MOV_B64 0
; GFX90A-NEXT: renamable $sgpr38_sgpr39 = S_MOV_B64 0
+ ; GFX90A-NEXT: renamable $vgpr10_vgpr11 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr8_vgpr9 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr6_vgpr7 = IMPLICIT_DEF
- ; GFX90A-NEXT: renamable $vgpr4_vgpr5 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr0_vgpr1 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr62_vgpr63 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr60_vgpr61 = IMPLICIT_DEF
@@ -801,27 +806,22 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: renamable $vgpr42_vgpr43 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr40_vgpr41 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr46_vgpr47 = IMPLICIT_DEF
- ; GFX90A-NEXT: renamable $vgpr14 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec
- ; GFX90A-NEXT: renamable $vgpr52 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec
- ; GFX90A-NEXT: renamable $vgpr16 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec
- ; GFX90A-NEXT: renamable $vgpr53 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec
- ; GFX90A-NEXT: renamable $vgpr13 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec
- ; GFX90A-NEXT: renamable $vgpr12 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec
+ ; GFX90A-NEXT: renamable $vgpr17 = COPY renamable $vgpr5, implicit $exec
; GFX90A-NEXT: renamable $sgpr34_sgpr35 = S_MOV_B64 0
; GFX90A-NEXT: S_BRANCH %bb.7
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.58.bb105:
; GFX90A-NEXT: successors: %bb.3(0x80000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr33, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr46_sgpr47:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x00000000000000FF, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000FF, $vgpr2_vgpr3:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr33, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr46_sgpr47:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x00000000000000FF, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000FF, $vgpr4_vgpr5:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: renamable $vgpr0 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec
; GFX90A-NEXT: renamable $vgpr22_vgpr23 = DS_READ_B64_gfx9 killed renamable $vgpr0, 0, 0, implicit $exec :: (load (s64) from `ptr addrspace(3) null`, addrspace 3)
; GFX90A-NEXT: renamable $vgpr0 = COPY renamable $sgpr23, implicit $exec
; GFX90A-NEXT: renamable $vgpr20_vgpr21 = DS_READ_B64_gfx9 killed renamable $vgpr0, 0, 0, implicit $exec :: (load (s64) from %ir.419, addrspace 3)
; GFX90A-NEXT: renamable $vgpr0 = COPY renamable $sgpr21, implicit $exec
- ; GFX90A-NEXT: renamable $vgpr18_vgpr19 = DS_READ_B64_gfx9 killed renamable $vgpr0, 0, 0, implicit $exec :: (load (s64) from %ir.7, addrspace 3)
+ ; GFX90A-NEXT: renamable $vgpr2_vgpr3 = DS_READ_B64_gfx9 killed renamable $vgpr0, 0, 0, implicit $exec :: (load (s64) from %ir.7, addrspace 3)
; GFX90A-NEXT: renamable $vgpr0 = COPY killed renamable $sgpr33, implicit $exec
- ; GFX90A-NEXT: renamable $vgpr10_vgpr11 = DS_READ_B64_gfx9 killed renamable $vgpr0, 0, 0, implicit $exec :: (load (s64) from %ir.420, addrspace 3)
+ ; GFX90A-NEXT: renamable $vgpr12_vgpr13 = DS_READ_B64_gfx9 killed renamable $vgpr0, 0, 0, implicit $exec :: (load (s64) from %ir.420, addrspace 3)
; GFX90A-NEXT: renamable $vgpr0 = COPY renamable $sgpr22, implicit $exec
; GFX90A-NEXT: renamable $vgpr24_vgpr25 = DS_READ_B64_gfx9 killed renamable $vgpr0, 0, 0, implicit $exec :: (load (s64) from %ir.8, addrspace 3)
; GFX90A-NEXT: renamable $sgpr36_sgpr37 = S_MOV_B64 -1
@@ -829,35 +829,35 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.59.bb85:
; GFX90A-NEXT: successors: %bb.56(0x40000000), %bb.60(0x40000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr18, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47:0x000000000000000F, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr60_sgpr61, $sgpr64_sgpr65, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x0000000000000003, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr3, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47:0x000000000000000F, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr60_sgpr61, $sgpr64_sgpr65, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x0000000000000003, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
- ; GFX90A-NEXT: renamable $vgpr8 = V_OR_B32_e32 1, $vgpr6, implicit $exec
- ; GFX90A-NEXT: renamable $vgpr9 = COPY renamable $vgpr7, implicit $exec
- ; GFX90A-NEXT: renamable $vgpr10 = FLAT_LOAD_UBYTE renamable $vgpr8_vgpr9, 0, 0, implicit $exec, implicit $flat_scr :: (load (s8) from %ir.i86)
+ ; GFX90A-NEXT: renamable $vgpr10 = V_OR_B32_e32 1, $vgpr8, implicit $exec
+ ; GFX90A-NEXT: renamable $vgpr11 = COPY renamable $vgpr9, implicit $exec
+ ; GFX90A-NEXT: renamable $vgpr12 = FLAT_LOAD_UBYTE renamable $vgpr10_vgpr11, 0, 0, implicit $exec, implicit $flat_scr :: (load (s8) from %ir.i86)
; GFX90A-NEXT: renamable $sgpr52_sgpr53 = S_MOV_B64 -1
- ; GFX90A-NEXT: renamable $vcc = V_CMP_EQ_U16_e64 0, killed $vgpr10, implicit $exec
+ ; GFX90A-NEXT: renamable $vcc = V_CMP_EQ_U16_e64 0, killed $vgpr12, implicit $exec
; GFX90A-NEXT: renamable $sgpr62_sgpr63 = COPY renamable $sgpr36_sgpr37
+ ; GFX90A-NEXT: renamable $vgpr19 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr17 = IMPLICIT_DEF
- ; GFX90A-NEXT: renamable $vgpr15 = IMPLICIT_DEF
- ; GFX90A-NEXT: renamable $vgpr14 = IMPLICIT_DEF
- ; GFX90A-NEXT: renamable $vgpr52 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr16 = IMPLICIT_DEF
+ ; GFX90A-NEXT: renamable $vgpr52 = IMPLICIT_DEF
+ ; GFX90A-NEXT: renamable $vgpr18 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr53 = IMPLICIT_DEF
+ ; GFX90A-NEXT: renamable $vgpr15 = IMPLICIT_DEF implicit-def $vgpr14
; GFX90A-NEXT: renamable $vgpr13 = IMPLICIT_DEF implicit-def $vgpr12
- ; GFX90A-NEXT: renamable $vgpr11 = IMPLICIT_DEF implicit-def $vgpr10
; GFX90A-NEXT: $sgpr54_sgpr55 = S_AND_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec
; GFX90A-NEXT: S_CBRANCH_EXECNZ %bb.56, implicit $exec
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.60.Flow31:
; GFX90A-NEXT: successors: %bb.61(0x80000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr15, $vgpr17, $vgpr18, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr60_sgpr61, $sgpr62_sgpr63, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x0000000000000003, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr3, $vgpr17, $vgpr19, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr60_sgpr61, $sgpr62_sgpr63, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x000000000000000F, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x0000000000000003, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr54_sgpr55, implicit-def $scc
; GFX90A-NEXT: renamable $sgpr46_sgpr47 = S_MOV_B64 0
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.61.Flow30:
; GFX90A-NEXT: successors: %bb.55(0x80000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr15, $vgpr17, $vgpr18, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr60_sgpr61, $sgpr62_sgpr63, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x0000000000000003, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr3, $vgpr17, $vgpr19, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr60_sgpr61, $sgpr62_sgpr63, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x000000000000000F, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x0000000000000003, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: renamable $sgpr56_sgpr57 = S_XOR_B64 $exec, -1, implicit-def dead $scc
; GFX90A-NEXT: renamable $sgpr58_sgpr59 = S_AND_B64 killed renamable $sgpr46_sgpr47, $exec, implicit-def dead $scc
@@ -869,7 +869,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.62.bb140:
; GFX90A-NEXT: successors: %bb.68(0x40000000), %bb.63(0x40000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr15, $vgpr17, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x000000000000000F, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x000000000000000F, $vgpr24_vgpr25:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr17, $vgpr19, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x000000000000000F, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x000000000000000F, $vgpr24_vgpr25:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: renamable $sgpr36_sgpr37 = S_MOV_B64 -1
; GFX90A-NEXT: renamable $vcc = S_AND_B64 $exec, killed renamable $sgpr30_sgpr31, implicit-def dead $scc
@@ -877,14 +877,14 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.63.Flow13:
; GFX90A-NEXT: successors: %bb.64(0x40000000), %bb.66(0x40000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr15, $vgpr17, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $vgpr0_vgpr1:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000C, $vgpr12_vgpr13:0x000000000000000C, $vgpr18_vgpr19:0x000000000000000C, $vgpr20_vgpr21:0x000000000000000C, $vgpr22_vgpr23:0x000000000000000C, $vgpr24_vgpr25:0x000000000000000C, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr17, $vgpr19, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000C, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000C, $vgpr14_vgpr15:0x000000000000000C, $vgpr20_vgpr21:0x000000000000000C, $vgpr22_vgpr23:0x000000000000000C, $vgpr24_vgpr25:0x000000000000000C, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: $vcc = S_ANDN2_B64 $exec, killed renamable $sgpr36_sgpr37, implicit-def dead $scc
; GFX90A-NEXT: S_CBRANCH_VCCNZ %bb.66, implicit $vcc
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.64.bb159:
; GFX90A-NEXT: successors: %bb.67(0x40000000), %bb.65(0x40000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr15, $vgpr17, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $vgpr0_vgpr1:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000C, $vgpr12_vgpr13:0x000000000000000C, $vgpr18_vgpr19:0x000000000000000C, $vgpr20_vgpr21:0x000000000000000C, $vgpr22_vgpr23:0x000000000000000C, $vgpr24_vgpr25:0x000000000000000C, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr17, $vgpr19, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000C, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000C, $vgpr14_vgpr15:0x000000000000000C, $vgpr20_vgpr21:0x000000000000000C, $vgpr22_vgpr23:0x000000000000000C, $vgpr24_vgpr25:0x000000000000000C, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: renamable $vcc = V_CMP_NE_U32_e64 0, killed $vgpr30, implicit $exec
; GFX90A-NEXT: $sgpr12_sgpr13 = S_AND_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec
@@ -893,48 +893,48 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.65.Flow10:
; GFX90A-NEXT: successors: %bb.66(0x80000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $vgpr0_vgpr1:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $vgpr0_vgpr1:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: $sgpr12_sgpr13 = S_ANDN2_SAVEEXEC_B64 $sgpr12_sgpr13, implicit-def $exec, implicit-def $scc, implicit $exec
; GFX90A-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr12_sgpr13, implicit-def $scc
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.66.Flow14:
; GFX90A-NEXT: successors: %bb.8(0x80000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $vgpr0_vgpr1:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $vgpr0_vgpr1:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: renamable $sgpr68_sgpr69 = COPY $exec
; GFX90A-NEXT: S_BRANCH %bb.8
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.67.bb161:
; GFX90A-NEXT: successors: %bb.65(0x80000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr15, $vgpr17, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $vgpr0_vgpr1:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000C, $vgpr12_vgpr13:0x000000000000000C, $vgpr18_vgpr19:0x000000000000000C, $vgpr20_vgpr21:0x000000000000000C, $vgpr22_vgpr23:0x000000000000000C, $vgpr24_vgpr25:0x000000000000000C, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr17, $vgpr19, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000C, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000C, $vgpr14_vgpr15:0x000000000000000C, $vgpr20_vgpr21:0x000000000000000C, $vgpr22_vgpr23:0x000000000000000C, $vgpr24_vgpr25:0x000000000000000C, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: renamable $vgpr2 = V_OR_B32_e32 killed $vgpr21, killed $vgpr23, implicit $exec
; GFX90A-NEXT: renamable $vgpr2 = V_OR_B32_e32 killed $vgpr2, killed $vgpr25, implicit $exec
- ; GFX90A-NEXT: renamable $vgpr3 = V_OR_B32_e32 killed $vgpr11, killed $vgpr19, implicit $exec
+ ; GFX90A-NEXT: renamable $vgpr3 = V_OR_B32_e32 killed $vgpr13, killed $vgpr3, implicit $exec
; GFX90A-NEXT: renamable $vgpr2 = V_OR_B32_e32 killed $vgpr3, killed $vgpr2, implicit $exec
; GFX90A-NEXT: renamable $vgpr3 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec
; GFX90A-NEXT: renamable $vcc = V_CMP_EQ_U32_sdwa 0, killed $vgpr53, 0, $vgpr3, 0, 0, 6, implicit $exec
; GFX90A-NEXT: renamable $vgpr2 = V_CNDMASK_B32_e64 0, 0, 0, killed $vgpr2, killed $vcc, implicit $exec
- ; GFX90A-NEXT: renamable $vgpr10 = V_OR_B32_e32 killed $vgpr52, killed $vgpr13, implicit $exec
- ; GFX90A-NEXT: renamable $vgpr2 = V_OR_B32_e32 killed $vgpr10, killed $vgpr2, implicit $exec
- ; GFX90A-NEXT: renamable $vcc = V_CMP_EQ_U32_sdwa 0, killed $vgpr17, 0, $vgpr3, 0, 0, 6, implicit $exec
+ ; GFX90A-NEXT: renamable $vgpr4 = V_OR_B32_e32 killed $vgpr52, killed $vgpr15, implicit $exec
+ ; GFX90A-NEXT: renamable $vgpr2 = V_OR_B32_e32 killed $vgpr4, killed $vgpr2, implicit $exec
+ ; GFX90A-NEXT: renamable $vcc = V_CMP_EQ_U32_sdwa 0, killed $vgpr19, 0, $vgpr3, 0, 0, 6, implicit $exec
; GFX90A-NEXT: renamable $vgpr2 = V_CNDMASK_B32_e64 0, 0, 0, killed $vgpr2, killed $vcc, implicit $exec
- ; GFX90A-NEXT: renamable $vgpr2 = V_OR_B32_e32 killed $vgpr2, killed $vgpr15, implicit $exec
+ ; GFX90A-NEXT: renamable $vgpr2 = V_OR_B32_e32 killed $vgpr2, killed $vgpr17, implicit $exec
; GFX90A-NEXT: DS_WRITE2_B32_gfx9 killed renamable $vgpr3, killed renamable $vgpr2, renamable $vgpr3, 0, 1, 0, implicit $exec :: (store (s64) into `ptr addrspace(3) null`, align 4, addrspace 3)
; GFX90A-NEXT: S_BRANCH %bb.65
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.68.bb174:
; GFX90A-NEXT: successors: %bb.72(0x40000000), %bb.69(0x40000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr15, $vgpr17, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x000000000000000F, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x000000000000000F, $vgpr24_vgpr25:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr17, $vgpr19, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x000000000000000F, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x000000000000000F, $vgpr24_vgpr25:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: renamable $vgpr26 = V_OR_B32_e32 1, $vgpr24, implicit $exec
; GFX90A-NEXT: renamable $vgpr48 = V_OR_B32_e32 $vgpr26, $vgpr22, implicit $exec
; GFX90A-NEXT: renamable $vgpr34 = V_OR_B32_e32 $vgpr48, $vgpr20, implicit $exec
; GFX90A-NEXT: renamable $vgpr28 = V_CNDMASK_B32_e64 0, $vgpr34, 0, 0, $sgpr12_sgpr13, implicit $exec
- ; GFX90A-NEXT: renamable $vgpr38 = V_OR_B32_e32 $vgpr28, $vgpr18, implicit $exec
- ; GFX90A-NEXT: renamable $vgpr36 = V_OR_B32_e32 $vgpr38, $vgpr10, implicit $exec
- ; GFX90A-NEXT: renamable $vgpr32 = V_OR_B32_e32 $vgpr36, $vgpr12, implicit $exec
+ ; GFX90A-NEXT: renamable $vgpr38 = V_OR_B32_e32 $vgpr28, $vgpr2, implicit $exec
+ ; GFX90A-NEXT: renamable $vgpr36 = V_OR_B32_e32 $vgpr38, $vgpr12, implicit $exec
+ ; GFX90A-NEXT: renamable $vgpr32 = V_OR_B32_e32 $vgpr36, $vgpr14, implicit $exec
; GFX90A-NEXT: renamable $vgpr50 = V_CNDMASK_B32_e64 0, 0, 0, $vgpr32, killed $sgpr12_sgpr13, implicit $exec
; GFX90A-NEXT: renamable $sgpr12_sgpr13 = S_MOV_B64 -1
; GFX90A-NEXT: renamable $vcc = S_AND_B64 $exec, killed renamable $sgpr28_sgpr29, implicit-def dead $scc
@@ -942,19 +942,19 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.69.Flow:
; GFX90A-NEXT: successors: %bb.70(0x40000000), %bb.71(0x40000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr15, $vgpr17, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000C, $vgpr12_vgpr13:0x000000000000000C, $vgpr18_vgpr19:0x000000000000000C, $vgpr20_vgpr21:0x000000000000000C, $vgpr22_vgpr23:0x000000000000000C, $vgpr24_vgpr25:0x000000000000000C, $vgpr26_vgpr27:0x0000000000000003, $vgpr28_vgpr29:0x0000000000000003, $vgpr32_vgpr33:0x0000000000000003, $vgpr34_vgpr35:0x0000000000000003, $vgpr36_vgpr37:0x0000000000000003, $vgpr38_vgpr39:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr48_vgpr49:0x0000000000000003, $vgpr50_vgpr51:0x0000000000000003, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr17, $vgpr19, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000C, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000C, $vgpr14_vgpr15:0x000000000000000C, $vgpr20_vgpr21:0x000000000000000C, $vgpr22_vgpr23:0x000000000000000C, $vgpr24_vgpr25:0x000000000000000C, $vgpr26_vgpr27:0x0000000000000003, $vgpr28_vgpr29:0x0000000000000003, $vgpr32_vgpr33:0x0000000000000003, $vgpr34_vgpr35:0x0000000000000003, $vgpr36_vgpr37:0x0000000000000003, $vgpr38_vgpr39:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr48_vgpr49:0x0000000000000003, $vgpr50_vgpr51:0x0000000000000003, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: $vcc = S_ANDN2_B64 $exec, killed renamable $sgpr12_sgpr13, implicit-def dead $scc
; GFX90A-NEXT: S_CBRANCH_VCCNZ %bb.71, implicit $vcc
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.70.bb186:
; GFX90A-NEXT: successors: %bb.71(0x80000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr15, $vgpr17, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000C, $vgpr12_vgpr13:0x000000000000000C, $vgpr18_vgpr19:0x000000000000000C, $vgpr20_vgpr21:0x000000000000000C, $vgpr22_vgpr23:0x000000000000000C, $vgpr24_vgpr25:0x000000000000000C, $vgpr26_vgpr27:0x0000000000000003, $vgpr28_vgpr29:0x0000000000000003, $vgpr32_vgpr33:0x0000000000000003, $vgpr34_vgpr35:0x0000000000000003, $vgpr36_vgpr37:0x0000000000000003, $vgpr38_vgpr39:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr48_vgpr49:0x0000000000000003, $vgpr50_vgpr51:0x0000000000000003, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr17, $vgpr19, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000C, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000C, $vgpr14_vgpr15:0x000000000000000C, $vgpr20_vgpr21:0x000000000000000C, $vgpr22_vgpr23:0x000000000000000C, $vgpr24_vgpr25:0x000000000000000C, $vgpr26_vgpr27:0x0000000000000003, $vgpr28_vgpr29:0x0000000000000003, $vgpr32_vgpr33:0x0000000000000003, $vgpr34_vgpr35:0x0000000000000003, $vgpr36_vgpr37:0x0000000000000003, $vgpr38_vgpr39:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr48_vgpr49:0x0000000000000003, $vgpr50_vgpr51:0x0000000000000003, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
- ; GFX90A-NEXT: renamable $vgpr2_vgpr3 = V_LSHLREV_B64_e64 3, killed $vgpr2_vgpr3, implicit $exec
- ; GFX90A-NEXT: renamable $vgpr10 = COPY renamable $sgpr27, implicit $exec
- ; GFX90A-NEXT: renamable $vgpr2, renamable $vcc = V_ADD_CO_U32_e64 killed $sgpr26, $vgpr2, 0, implicit $exec
- ; GFX90A-NEXT: renamable $vgpr3, dead renamable $vcc = V_ADDC_U32_e64 killed $vgpr10, killed $vgpr3, killed $vcc, 0, implicit $exec
+ ; GFX90A-NEXT: renamable $vgpr4_vgpr5 = V_LSHLREV_B64_e64 3, killed $vgpr4_vgpr5, implicit $exec
+ ; GFX90A-NEXT: renamable $vgpr2 = COPY renamable $sgpr27, implicit $exec
+ ; GFX90A-NEXT: renamable $vgpr4, renamable $vcc = V_ADD_CO_U32_e64 killed $sgpr26, $vgpr4, 0, implicit $exec
+ ; GFX90A-NEXT: renamable $vgpr2, dead renamable $vcc = V_ADDC_U32_e64 killed $vgpr2, killed $vgpr5, killed $vcc, 0, implicit $exec
; GFX90A-NEXT: renamable $vgpr27 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec
; GFX90A-NEXT: renamable $vgpr49 = COPY renamable $vgpr27, implicit $exec
; GFX90A-NEXT: renamable $vgpr35 = COPY renamable $vgpr27, implicit $exec
@@ -964,31 +964,31 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: renamable $vgpr51 = COPY renamable $vgpr27, implicit $exec
; GFX90A-NEXT: renamable $vgpr33 = COPY renamable $vgpr27, implicit $exec
; GFX90A-NEXT: DS_WRITE_B64_gfx9 renamable $vgpr27, renamable $vgpr26_vgpr27, 0, 0, implicit $exec :: (store (s64) into `ptr addrspace(3) null`, addrspace 3)
- ; GFX90A-NEXT: renamable $vgpr10 = COPY renamable $sgpr21, implicit $exec
- ; GFX90A-NEXT: DS_WRITE_B64_gfx9 renamable $vgpr10, killed renamable $vgpr48_vgpr49, 0, 0, implicit $exec :: (store (s64) into %ir.7, addrspace 3)
+ ; GFX90A-NEXT: renamable $vgpr5 = COPY renamable $sgpr21, implicit $exec
+ ; GFX90A-NEXT: DS_WRITE_B64_gfx9 renamable $vgpr5, killed renamable $vgpr48_vgpr49, 0, 0, implicit $exec :: (store (s64) into %ir.7, addrspace 3)
; GFX90A-NEXT: renamable $vgpr12 = COPY killed renamable $sgpr22, implicit $exec
; GFX90A-NEXT: DS_WRITE_B64_gfx9 killed renamable $vgpr12, killed renamable $vgpr34_vgpr35, 0, 0, implicit $exec :: (store (s64) into %ir.8, addrspace 3)
; GFX90A-NEXT: DS_WRITE_B64_gfx9 renamable $vgpr27, killed renamable $vgpr38_vgpr39, 0, 0, implicit $exec :: (store (s64) into `ptr addrspace(3) null`, addrspace 3)
- ; GFX90A-NEXT: DS_WRITE_B64_gfx9 renamable $vgpr10, killed renamable $vgpr36_vgpr37, 0, 0, implicit $exec :: (store (s64) into %ir.7, addrspace 3)
+ ; GFX90A-NEXT: DS_WRITE_B64_gfx9 renamable $vgpr5, killed renamable $vgpr36_vgpr37, 0, 0, implicit $exec :: (store (s64) into %ir.7, addrspace 3)
; GFX90A-NEXT: DS_WRITE_B64_gfx9 renamable $vgpr27, killed renamable $vgpr28_vgpr29, 0, 0, implicit $exec :: (store (s64) into `ptr addrspace(3) null`, addrspace 3)
- ; GFX90A-NEXT: DS_WRITE_B64_gfx9 killed renamable $vgpr10, killed renamable $vgpr50_vgpr51, 0, 0, implicit $exec :: (store (s64) into %ir.7, addrspace 3)
+ ; GFX90A-NEXT: DS_WRITE_B64_gfx9 killed renamable $vgpr5, killed renamable $vgpr50_vgpr51, 0, 0, implicit $exec :: (store (s64) into %ir.7, addrspace 3)
; GFX90A-NEXT: DS_WRITE_B64_gfx9 killed renamable $vgpr27, killed renamable $vgpr32_vgpr33, 0, 0, implicit $exec :: (store (s64) into `ptr addrspace(3) null`, addrspace 3)
- ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed renamable $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null` + 4, basealign 8, addrspace 5)
- ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed renamable $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null`, align 8, addrspace 5)
+ ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed renamable $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null` + 4, basealign 8, addrspace 5)
+ ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed renamable $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null`, align 8, addrspace 5)
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.71.Flow9:
; GFX90A-NEXT: successors: %bb.63(0x80000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr15, $vgpr17, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $vgpr0_vgpr1:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000C, $vgpr12_vgpr13:0x000000000000000C, $vgpr18_vgpr19:0x000000000000000C, $vgpr20_vgpr21:0x000000000000000C, $vgpr22_vgpr23:0x000000000000000C, $vgpr24_vgpr25:0x000000000000000C, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr17, $vgpr19, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000C, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000C, $vgpr14_vgpr15:0x000000000000000C, $vgpr20_vgpr21:0x000000000000000C, $vgpr22_vgpr23:0x000000000000000C, $vgpr24_vgpr25:0x000000000000000C, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: renamable $sgpr36_sgpr37 = S_MOV_B64 0
; GFX90A-NEXT: S_BRANCH %bb.63
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.72.bb196:
; GFX90A-NEXT: successors: %bb.69(0x80000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr15, $vgpr17, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000C, $vgpr12_vgpr13:0x000000000000000C, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x000000000000000C, $vgpr20_vgpr21:0x000000000000000C, $vgpr22_vgpr23:0x000000000000000C, $vgpr24_vgpr25:0x000000000000000C, $vgpr26_vgpr27:0x0000000000000003, $vgpr28_vgpr29:0x0000000000000003, $vgpr32_vgpr33:0x0000000000000003, $vgpr34_vgpr35:0x0000000000000003, $vgpr36_vgpr37:0x0000000000000003, $vgpr38_vgpr39:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr48_vgpr49:0x0000000000000003, $vgpr50_vgpr51:0x0000000000000003, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr17, $vgpr19, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000C, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000C, $vgpr14_vgpr15:0x000000000000000C, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000C, $vgpr22_vgpr23:0x000000000000000C, $vgpr24_vgpr25:0x000000000000000C, $vgpr26_vgpr27:0x0000000000000003, $vgpr28_vgpr29:0x0000000000000003, $vgpr32_vgpr33:0x0000000000000003, $vgpr34_vgpr35:0x0000000000000003, $vgpr36_vgpr37:0x0000000000000003, $vgpr38_vgpr39:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr48_vgpr49:0x0000000000000003, $vgpr50_vgpr51:0x0000000000000003, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
- ; GFX90A-NEXT: renamable $vgpr10 = V_OR_B32_e32 $vgpr50, killed $vgpr16, implicit $exec
- ; GFX90A-NEXT: renamable $vgpr54 = V_OR_B32_e32 killed $vgpr10, killed $vgpr14, implicit $exec
+ ; GFX90A-NEXT: renamable $vgpr2 = V_OR_B32_e32 $vgpr50, killed $vgpr18, implicit $exec
+ ; GFX90A-NEXT: renamable $vgpr54 = V_OR_B32_e32 killed $vgpr2, killed $vgpr16, implicit $exec
; GFX90A-NEXT: renamable $vgpr55 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec
; GFX90A-NEXT: DS_WRITE_B64_gfx9 killed renamable $vgpr55, renamable $vgpr54_vgpr55, 0, 0, implicit $exec :: (store (s64) into `ptr addrspace(3) null`, addrspace 3)
; GFX90A-NEXT: renamable $sgpr12_sgpr13 = S_MOV_B64 0
diff --git a/llvm/test/CodeGen/AMDGPU/dagcomb-extract-vec-elt-different-sizes.ll b/llvm/test/CodeGen/AMDGPU/dagcomb-extract-vec-elt-different-sizes.ll
index fc17d9288bf40..9f12977a3efde 100644
--- a/llvm/test/CodeGen/AMDGPU/dagcomb-extract-vec-elt-different-sizes.ll
+++ b/llvm/test/CodeGen/AMDGPU/dagcomb-extract-vec-elt-different-sizes.ll
@@ -17,14 +17,14 @@ define amdgpu_kernel void @eggs(i1 %arg, ptr addrspace(1) %arg1, ptr %arg2, ptr
; CHECK-NEXT: ; %bb.1: ; %bb10
; CHECK-NEXT: global_load_dwordx2 v[8:9], v0, s[12:13]
; CHECK-NEXT: s_waitcnt vmcnt(0)
-; CHECK-NEXT: v_and_b32_e32 v7, 0xff, v8
-; CHECK-NEXT: v_bfe_u32 v6, v8, 8, 8
-; CHECK-NEXT: v_bfe_u32 v5, v8, 16, 8
-; CHECK-NEXT: v_lshrrev_b32_e32 v4, 24, v8
-; CHECK-NEXT: v_and_b32_e32 v3, 0xff, v9
-; CHECK-NEXT: v_bfe_u32 v2, v9, 8, 8
-; CHECK-NEXT: v_bfe_u32 v1, v9, 16, 8
-; CHECK-NEXT: v_lshrrev_b32_e32 v0, 24, v9
+; CHECK-NEXT: v_and_b32_e32 v0, 0xff, v8
+; CHECK-NEXT: v_bfe_u32 v1, v8, 8, 8
+; CHECK-NEXT: v_bfe_u32 v2, v8, 16, 8
+; CHECK-NEXT: v_lshrrev_b32_e32 v3, 24, v8
+; CHECK-NEXT: v_and_b32_e32 v4, 0xff, v9
+; CHECK-NEXT: v_bfe_u32 v5, v9, 8, 8
+; CHECK-NEXT: v_bfe_u32 v7, v9, 16, 8
+; CHECK-NEXT: v_lshrrev_b32_e32 v6, 24, v9
; CHECK-NEXT: s_branch .LBB0_3
; CHECK-NEXT: .LBB0_2:
; CHECK-NEXT: v_mov_b32_e32 v1, 0
@@ -32,8 +32,8 @@ define amdgpu_kernel void @eggs(i1 %arg, ptr addrspace(1) %arg1, ptr %arg2, ptr
; CHECK-NEXT: v_mov_b32_e32 v3, 0
; CHECK-NEXT: v_mov_b32_e32 v4, 0
; CHECK-NEXT: v_mov_b32_e32 v5, 0
-; CHECK-NEXT: v_mov_b32_e32 v6, 0
; CHECK-NEXT: v_mov_b32_e32 v7, 0
+; CHECK-NEXT: v_mov_b32_e32 v6, 0
; CHECK-NEXT: .LBB0_3: ; %bb41
; CHECK-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x48
; CHECK-NEXT: v_mov_b32_e32 v8, s14
@@ -50,16 +50,16 @@ define amdgpu_kernel void @eggs(i1 %arg, ptr addrspace(1) %arg1, ptr %arg2, ptr
; CHECK-NEXT: v_mov_b32_e32 v19, s25
; CHECK-NEXT: v_mov_b32_e32 v20, s26
; CHECK-NEXT: v_mov_b32_e32 v21, s27
-; CHECK-NEXT: flat_store_byte v[8:9], v7
-; CHECK-NEXT: flat_store_byte v[10:11], v6
-; CHECK-NEXT: flat_store_byte v[12:13], v5
-; CHECK-NEXT: flat_store_byte v[14:15], v4
-; CHECK-NEXT: flat_store_byte v[16:17], v3
-; CHECK-NEXT: flat_store_byte v[18:19], v2
-; CHECK-NEXT: flat_store_byte v[20:21], v1
+; CHECK-NEXT: flat_store_byte v[8:9], v0
+; CHECK-NEXT: flat_store_byte v[10:11], v1
+; CHECK-NEXT: flat_store_byte v[12:13], v2
+; CHECK-NEXT: flat_store_byte v[14:15], v3
+; CHECK-NEXT: flat_store_byte v[16:17], v4
+; CHECK-NEXT: flat_store_byte v[18:19], v5
+; CHECK-NEXT: flat_store_byte v[20:21], v7
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
-; CHECK-NEXT: v_pk_mov_b32 v[2:3], s[0:1], s[0:1] op_sel:[0,1]
-; CHECK-NEXT: flat_store_byte v[2:3], v0
+; CHECK-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1]
+; CHECK-NEXT: flat_store_byte v[0:1], v6
; CHECK-NEXT: s_endpgm
bb:
br i1 %arg, label %bb10, label %bb41
diff --git a/llvm/test/CodeGen/AMDGPU/dagcombine-fmul-sel.ll b/llvm/test/CodeGen/AMDGPU/dagcombine-fmul-sel.ll
index f26b72027a784..59837bc718b76 100644
--- a/llvm/test/CodeGen/AMDGPU/dagcombine-fmul-sel.ll
+++ b/llvm/test/CodeGen/AMDGPU/dagcombine-fmul-sel.ll
@@ -1280,25 +1280,45 @@ define double @fmul_select_f64_test11(double %x, i32 %bool.arg1, i32 %bool.arg2)
}
define double @fmul_select_f64_test12(double %x, i32 %bool.arg1, i32 %bool.arg2) {
-; GFX7-LABEL: fmul_select_f64_test12:
-; GFX7: ; %bb.0:
-; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_bfrev_b32_e32 v5, 1
-; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3
-; GFX7-NEXT: v_mov_b32_e32 v4, 0
-; GFX7-NEXT: v_cndmask_b32_e64 v5, v5, 0, vcc
-; GFX7-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5]
-; GFX7-NEXT: s_setpc_b64 s[30:31]
+; GFX7-SDAG-LABEL: fmul_select_f64_test12:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: v_bfrev_b32_e32 v4, 1
+; GFX7-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3
+; GFX7-SDAG-NEXT: v_cndmask_b32_e64 v3, v4, 0, vcc
+; GFX7-SDAG-NEXT: v_mov_b32_e32 v2, 0
+; GFX7-SDAG-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3]
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: fmul_select_f64_test12:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_bfrev_b32_e32 v5, 1
-; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3
-; GFX9-NEXT: v_mov_b32_e32 v4, 0
-; GFX9-NEXT: v_cndmask_b32_e64 v5, v5, 0, vcc
-; GFX9-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5]
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX7-GISEL-LABEL: fmul_select_f64_test12:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: v_bfrev_b32_e32 v5, 1
+; GFX7-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3
+; GFX7-GISEL-NEXT: v_mov_b32_e32 v4, 0
+; GFX7-GISEL-NEXT: v_cndmask_b32_e64 v5, v5, 0, vcc
+; GFX7-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5]
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-SDAG-LABEL: fmul_select_f64_test12:
+; GFX9-SDAG: ; %bb.0:
+; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-SDAG-NEXT: v_bfrev_b32_e32 v4, 1
+; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3
+; GFX9-SDAG-NEXT: v_cndmask_b32_e64 v3, v4, 0, vcc
+; GFX9-SDAG-NEXT: v_mov_b32_e32 v2, 0
+; GFX9-SDAG-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3]
+; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GISEL-LABEL: fmul_select_f64_test12:
+; GFX9-GISEL: ; %bb.0:
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT: v_bfrev_b32_e32 v5, 1
+; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v4, 0
+; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v5, v5, 0, vcc
+; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5]
+; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: fmul_select_f64_test12:
; GFX10: ; %bb.0:
@@ -1325,25 +1345,45 @@ define double @fmul_select_f64_test12(double %x, i32 %bool.arg1, i32 %bool.arg2)
}
define double @fmul_select_f64_test13(double %x, i32 %bool.arg1, i32 %bool.arg2) {
-; GFX7-LABEL: fmul_select_f64_test13:
-; GFX7: ; %bb.0:
-; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_mov_b32_e32 v5, 0x40300000
-; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3
-; GFX7-NEXT: v_mov_b32_e32 v4, 0
-; GFX7-NEXT: v_cndmask_b32_e64 v5, v5, 0, vcc
-; GFX7-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5]
-; GFX7-NEXT: s_setpc_b64 s[30:31]
+; GFX7-SDAG-LABEL: fmul_select_f64_test13:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: v_mov_b32_e32 v4, 0x40300000
+; GFX7-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3
+; GFX7-SDAG-NEXT: v_cndmask_b32_e64 v3, v4, 0, vcc
+; GFX7-SDAG-NEXT: v_mov_b32_e32 v2, 0
+; GFX7-SDAG-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3]
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: fmul_select_f64_test13:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_mov_b32_e32 v5, 0x40300000
-; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3
-; GFX9-NEXT: v_mov_b32_e32 v4, 0
-; GFX9-NEXT: v_cndmask_b32_e64 v5, v5, 0, vcc
-; GFX9-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5]
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX7-GISEL-LABEL: fmul_select_f64_test13:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: v_mov_b32_e32 v5, 0x40300000
+; GFX7-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3
+; GFX7-GISEL-NEXT: v_mov_b32_e32 v4, 0
+; GFX7-GISEL-NEXT: v_cndmask_b32_e64 v5, v5, 0, vcc
+; GFX7-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5]
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-SDAG-LABEL: fmul_select_f64_test13:
+; GFX9-SDAG: ; %bb.0:
+; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-SDAG-NEXT: v_mov_b32_e32 v4, 0x40300000
+; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3
+; GFX9-SDAG-NEXT: v_cndmask_b32_e64 v3, v4, 0, vcc
+; GFX9-SDAG-NEXT: v_mov_b32_e32 v2, 0
+; GFX9-SDAG-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3]
+; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GISEL-LABEL: fmul_select_f64_test13:
+; GFX9-GISEL: ; %bb.0:
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v5, 0x40300000
+; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v4, 0
+; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v5, v5, 0, vcc
+; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5]
+; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: fmul_select_f64_test13:
; GFX10: ; %bb.0:
diff --git a/llvm/test/CodeGen/AMDGPU/div_i128.ll b/llvm/test/CodeGen/AMDGPU/div_i128.ll
index 747affa928601..b60061589d097 100644
--- a/llvm/test/CodeGen/AMDGPU/div_i128.ll
+++ b/llvm/test/CodeGen/AMDGPU/div_i128.ll
@@ -147,8 +147,8 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-NEXT: v_mov_b32_e32 v14, 0
; GFX9-NEXT: v_addc_co_u32_e32 v29, vcc, -1, v1, vcc
; GFX9-NEXT: v_mov_b32_e32 v13, 0
-; GFX9-NEXT: v_mov_b32_e32 v15, 0
; GFX9-NEXT: s_mov_b64 s[4:5], 0
+; GFX9-NEXT: v_mov_b32_e32 v15, 0
; GFX9-NEXT: v_mov_b32_e32 v7, 0
; GFX9-NEXT: .LBB0_3: ; %udiv-do-while
; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1
@@ -385,31 +385,31 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-O0-NEXT: s_mov_b64 s[8:9], s[6:7]
; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[8:9], v[10:11], s[8:9]
; GFX9-O0-NEXT: v_ffbh_u32_e64 v7, v7
-; GFX9-O0-NEXT: s_mov_b32 s13, 32
-; GFX9-O0-NEXT: v_add_u32_e64 v7, v7, s13
+; GFX9-O0-NEXT: s_mov_b32 s12, 32
+; GFX9-O0-NEXT: v_add_u32_e64 v7, v7, s12
; GFX9-O0-NEXT: v_ffbh_u32_e64 v8, v8
; GFX9-O0-NEXT: v_min_u32_e64 v7, v7, v8
-; GFX9-O0-NEXT: s_mov_b32 s12, 0
-; GFX9-O0-NEXT: ; implicit-def: $sgpr14
-; GFX9-O0-NEXT: v_mov_b32_e32 v10, s12
+; GFX9-O0-NEXT: s_mov_b32 s13, 0
+; GFX9-O0-NEXT: ; implicit-def: $sgpr13
+; GFX9-O0-NEXT: v_mov_b32_e32 v10, 0
; GFX9-O0-NEXT: ; kill: def $vgpr7 killed $vgpr7 def $vgpr7_vgpr8 killed $exec
; GFX9-O0-NEXT: v_mov_b32_e32 v8, v10
; GFX9-O0-NEXT: v_mov_b32_e32 v11, v8
; GFX9-O0-NEXT: v_ffbh_u32_e64 v6, v6
-; GFX9-O0-NEXT: v_add_u32_e64 v6, v6, s13
+; GFX9-O0-NEXT: v_add_u32_e64 v6, v6, s12
; GFX9-O0-NEXT: v_ffbh_u32_e64 v9, v9
; GFX9-O0-NEXT: v_min_u32_e64 v12, v6, v9
-; GFX9-O0-NEXT: ; implicit-def: $sgpr14
-; GFX9-O0-NEXT: v_mov_b32_e32 v6, s12
+; GFX9-O0-NEXT: ; implicit-def: $sgpr13
+; GFX9-O0-NEXT: v_mov_b32_e32 v6, 0
; GFX9-O0-NEXT: ; kill: def $vgpr12 killed $vgpr12 def $vgpr12_vgpr13 killed $exec
; GFX9-O0-NEXT: v_mov_b32_e32 v13, v6
; GFX9-O0-NEXT: s_mov_b64 s[14:15], 64
; GFX9-O0-NEXT: v_mov_b32_e32 v9, v12
; GFX9-O0-NEXT: s_mov_b32 s16, s14
; GFX9-O0-NEXT: v_mov_b32_e32 v6, v13
-; GFX9-O0-NEXT: s_mov_b32 s18, s15
+; GFX9-O0-NEXT: s_mov_b32 s13, s15
; GFX9-O0-NEXT: v_add_co_u32_e64 v9, s[16:17], v9, s16
-; GFX9-O0-NEXT: v_mov_b32_e32 v10, s18
+; GFX9-O0-NEXT: v_mov_b32_e32 v10, s13
; GFX9-O0-NEXT: v_addc_co_u32_e64 v6, s[16:17], v6, v10, s[16:17]
; GFX9-O0-NEXT: ; kill: def $vgpr9 killed $vgpr9 def $vgpr9_vgpr10 killed $exec
; GFX9-O0-NEXT: v_mov_b32_e32 v10, v6
@@ -425,20 +425,20 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-O0-NEXT: s_mov_b64 s[8:9], s[6:7]
; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[8:9], v[4:5], s[8:9]
; GFX9-O0-NEXT: v_ffbh_u32_e64 v4, v0
-; GFX9-O0-NEXT: v_add_u32_e64 v4, v4, s13
+; GFX9-O0-NEXT: v_add_u32_e64 v4, v4, s12
; GFX9-O0-NEXT: v_ffbh_u32_e64 v5, v1
; GFX9-O0-NEXT: v_min_u32_e64 v5, v4, v5
-; GFX9-O0-NEXT: ; implicit-def: $sgpr16
-; GFX9-O0-NEXT: v_mov_b32_e32 v4, s12
+; GFX9-O0-NEXT: ; implicit-def: $sgpr13
+; GFX9-O0-NEXT: v_mov_b32_e32 v4, 0
; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
; GFX9-O0-NEXT: v_mov_b32_e32 v6, v4
; GFX9-O0-NEXT: v_mov_b32_e32 v7, v6
; GFX9-O0-NEXT: v_ffbh_u32_e64 v4, v2
-; GFX9-O0-NEXT: v_add_u32_e64 v4, v4, s13
+; GFX9-O0-NEXT: v_add_u32_e64 v4, v4, s12
; GFX9-O0-NEXT: v_ffbh_u32_e64 v10, v3
; GFX9-O0-NEXT: v_min_u32_e64 v11, v4, v10
-; GFX9-O0-NEXT: ; implicit-def: $sgpr13
-; GFX9-O0-NEXT: v_mov_b32_e32 v4, s12
+; GFX9-O0-NEXT: ; implicit-def: $sgpr12
+; GFX9-O0-NEXT: v_mov_b32_e32 v4, 0
; GFX9-O0-NEXT: ; kill: def $vgpr11 killed $vgpr11 def $vgpr11_vgpr12 killed $exec
; GFX9-O0-NEXT: v_mov_b32_e32 v12, v4
; GFX9-O0-NEXT: v_mov_b32_e32 v10, v11
@@ -543,17 +543,17 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-O0-NEXT: s_mov_b64 s[4:5], exec
; GFX9-O0-NEXT: v_writelane_b32 v30, s4, 4
; GFX9-O0-NEXT: v_writelane_b32 v30, s5, 5
-; GFX9-O0-NEXT: s_or_saveexec_b64 s[22:23], -1
+; GFX9-O0-NEXT: s_or_saveexec_b64 s[20:21], -1
; GFX9-O0-NEXT: buffer_store_dword v30, off, s[0:3], s32 ; 4-byte Folded Spill
-; GFX9-O0-NEXT: s_mov_b64 exec, s[22:23]
+; GFX9-O0-NEXT: s_mov_b64 exec, s[20:21]
; GFX9-O0-NEXT: s_and_b64 s[4:5], s[4:5], s[6:7]
; GFX9-O0-NEXT: s_mov_b64 exec, s[4:5]
; GFX9-O0-NEXT: s_cbranch_execz .LBB0_3
; GFX9-O0-NEXT: s_branch .LBB0_8
; GFX9-O0-NEXT: .LBB0_1: ; %Flow
-; GFX9-O0-NEXT: s_or_saveexec_b64 s[22:23], -1
+; GFX9-O0-NEXT: s_or_saveexec_b64 s[20:21], -1
; GFX9-O0-NEXT: buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: s_mov_b64 exec, s[22:23]
+; GFX9-O0-NEXT: s_mov_b64 exec, s[20:21]
; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
; GFX9-O0-NEXT: v_readlane_b32 s4, v30, 6
; GFX9-O0-NEXT: v_readlane_b32 s5, v30, 7
@@ -585,9 +585,9 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:92 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_branch .LBB0_5
; GFX9-O0-NEXT: .LBB0_3: ; %Flow2
-; GFX9-O0-NEXT: s_or_saveexec_b64 s[22:23], -1
+; GFX9-O0-NEXT: s_or_saveexec_b64 s[20:21], -1
; GFX9-O0-NEXT: buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: s_mov_b64 exec, s[22:23]
+; GFX9-O0-NEXT: s_mov_b64 exec, s[20:21]
; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
; GFX9-O0-NEXT: v_readlane_b32 s4, v30, 4
; GFX9-O0-NEXT: v_readlane_b32 s5, v30, 5
@@ -645,9 +645,9 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_branch .LBB0_3
; GFX9-O0-NEXT: .LBB0_5: ; %Flow1
-; GFX9-O0-NEXT: s_or_saveexec_b64 s[22:23], -1
+; GFX9-O0-NEXT: s_or_saveexec_b64 s[20:21], -1
; GFX9-O0-NEXT: buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: s_mov_b64 exec, s[22:23]
+; GFX9-O0-NEXT: s_mov_b64 exec, s[20:21]
; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
; GFX9-O0-NEXT: v_readlane_b32 s4, v30, 8
; GFX9-O0-NEXT: v_readlane_b32 s5, v30, 9
@@ -676,9 +676,9 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-O0-NEXT: s_branch .LBB0_4
; GFX9-O0-NEXT: .LBB0_6: ; %udiv-do-while
; GFX9-O0-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX9-O0-NEXT: s_or_saveexec_b64 s[22:23], -1
+; GFX9-O0-NEXT: s_or_saveexec_b64 s[20:21], -1
; GFX9-O0-NEXT: buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: s_mov_b64 exec, s[22:23]
+; GFX9-O0-NEXT: s_mov_b64 exec, s[20:21]
; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
; GFX9-O0-NEXT: v_readlane_b32 s6, v30, 10
; GFX9-O0-NEXT: v_readlane_b32 s7, v30, 11
@@ -870,9 +870,9 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-O0-NEXT: s_mov_b64 s[6:7], s[4:5]
; GFX9-O0-NEXT: v_writelane_b32 v30, s6, 10
; GFX9-O0-NEXT: v_writelane_b32 v30, s7, 11
-; GFX9-O0-NEXT: s_or_saveexec_b64 s[22:23], -1
+; GFX9-O0-NEXT: s_or_saveexec_b64 s[20:21], -1
; GFX9-O0-NEXT: buffer_store_dword v30, off, s[0:3], s32 ; 4-byte Folded Spill
-; GFX9-O0-NEXT: s_mov_b64 exec, s[22:23]
+; GFX9-O0-NEXT: s_mov_b64 exec, s[20:21]
; GFX9-O0-NEXT: buffer_store_dword v14, off, s[0:3], s32 offset:256 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_nop 0
; GFX9-O0-NEXT: buffer_store_dword v15, off, s[0:3], s32 offset:260 ; 4-byte Folded Spill
@@ -901,9 +901,9 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-O0-NEXT: s_cbranch_execnz .LBB0_6
; GFX9-O0-NEXT: s_branch .LBB0_1
; GFX9-O0-NEXT: .LBB0_7: ; %udiv-preheader
-; GFX9-O0-NEXT: s_or_saveexec_b64 s[22:23], -1
+; GFX9-O0-NEXT: s_or_saveexec_b64 s[20:21], -1
; GFX9-O0-NEXT: buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: s_mov_b64 exec, s[22:23]
+; GFX9-O0-NEXT: s_mov_b64 exec, s[20:21]
; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:280 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:284 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:288 ; 4-byte Folded Reload
@@ -1003,9 +1003,9 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-O0-NEXT: buffer_store_dword v17, off, s[0:3], s32 offset:276 ; 4-byte Folded Spill
; GFX9-O0-NEXT: v_writelane_b32 v30, s4, 10
; GFX9-O0-NEXT: v_writelane_b32 v30, s5, 11
-; GFX9-O0-NEXT: s_or_saveexec_b64 s[22:23], -1
+; GFX9-O0-NEXT: s_or_saveexec_b64 s[20:21], -1
; GFX9-O0-NEXT: buffer_store_dword v30, off, s[0:3], s32 ; 4-byte Folded Spill
-; GFX9-O0-NEXT: s_mov_b64 exec, s[22:23]
+; GFX9-O0-NEXT: s_mov_b64 exec, s[20:21]
; GFX9-O0-NEXT: buffer_store_dword v14, off, s[0:3], s32 offset:256 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_nop 0
; GFX9-O0-NEXT: buffer_store_dword v15, off, s[0:3], s32 offset:260 ; 4-byte Folded Spill
@@ -1032,9 +1032,9 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:204 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_branch .LBB0_6
; GFX9-O0-NEXT: .LBB0_8: ; %udiv-bb1
-; GFX9-O0-NEXT: s_or_saveexec_b64 s[22:23], -1
+; GFX9-O0-NEXT: s_or_saveexec_b64 s[20:21], -1
; GFX9-O0-NEXT: buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: s_mov_b64 exec, s[22:23]
+; GFX9-O0-NEXT: s_mov_b64 exec, s[20:21]
; GFX9-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
@@ -1161,9 +1161,9 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-O0-NEXT: s_xor_b64 s[6:7], s[4:5], s[6:7]
; GFX9-O0-NEXT: v_writelane_b32 v30, s6, 8
; GFX9-O0-NEXT: v_writelane_b32 v30, s7, 9
-; GFX9-O0-NEXT: s_or_saveexec_b64 s[22:23], -1
+; GFX9-O0-NEXT: s_or_saveexec_b64 s[20:21], -1
; GFX9-O0-NEXT: buffer_store_dword v30, off, s[0:3], s32 ; 4-byte Folded Spill
-; GFX9-O0-NEXT: s_mov_b64 exec, s[22:23]
+; GFX9-O0-NEXT: s_mov_b64 exec, s[20:21]
; GFX9-O0-NEXT: s_mov_b64 exec, s[4:5]
; GFX9-O0-NEXT: s_cbranch_execz .LBB0_5
; GFX9-O0-NEXT: s_branch .LBB0_7
@@ -2427,8 +2427,8 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-NEXT: v_cndmask_b32_e64 v0, v12, v0, s[4:5]
; GFX9-NEXT: v_addc_co_u32_e32 v25, vcc, -1, v7, vcc
; GFX9-NEXT: v_mov_b32_e32 v15, 0
-; GFX9-NEXT: v_mov_b32_e32 v17, 0
; GFX9-NEXT: s_mov_b64 s[4:5], 0
+; GFX9-NEXT: v_mov_b32_e32 v17, 0
; GFX9-NEXT: v_mov_b32_e32 v13, 0
; GFX9-NEXT: .LBB1_3: ; %udiv-do-while
; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1
@@ -2566,31 +2566,31 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-O0-NEXT: v_cmp_eq_u64_e64 s[8:9], v[8:9], s[6:7]
; GFX9-O0-NEXT: s_or_b64 s[4:5], s[4:5], s[8:9]
; GFX9-O0-NEXT: v_ffbh_u32_e64 v5, v5
-; GFX9-O0-NEXT: s_mov_b32 s9, 32
-; GFX9-O0-NEXT: v_add_u32_e64 v5, v5, s9
+; GFX9-O0-NEXT: s_mov_b32 s8, 32
+; GFX9-O0-NEXT: v_add_u32_e64 v5, v5, s8
; GFX9-O0-NEXT: v_ffbh_u32_e64 v6, v6
; GFX9-O0-NEXT: v_min_u32_e64 v5, v5, v6
-; GFX9-O0-NEXT: s_mov_b32 s8, 0
-; GFX9-O0-NEXT: ; implicit-def: $sgpr10
-; GFX9-O0-NEXT: v_mov_b32_e32 v8, s8
+; GFX9-O0-NEXT: s_mov_b32 s9, 0
+; GFX9-O0-NEXT: ; implicit-def: $sgpr9
+; GFX9-O0-NEXT: v_mov_b32_e32 v8, 0
; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
; GFX9-O0-NEXT: v_mov_b32_e32 v6, v8
; GFX9-O0-NEXT: v_mov_b32_e32 v9, v6
; GFX9-O0-NEXT: v_ffbh_u32_e64 v4, v4
-; GFX9-O0-NEXT: v_add_u32_e64 v4, v4, s9
+; GFX9-O0-NEXT: v_add_u32_e64 v4, v4, s8
; GFX9-O0-NEXT: v_ffbh_u32_e64 v7, v7
; GFX9-O0-NEXT: v_min_u32_e64 v14, v4, v7
-; GFX9-O0-NEXT: ; implicit-def: $sgpr10
-; GFX9-O0-NEXT: v_mov_b32_e32 v4, s8
+; GFX9-O0-NEXT: ; implicit-def: $sgpr9
+; GFX9-O0-NEXT: v_mov_b32_e32 v4, 0
; GFX9-O0-NEXT: ; kill: def $vgpr14 killed $vgpr14 def $vgpr14_vgpr15 killed $exec
; GFX9-O0-NEXT: v_mov_b32_e32 v15, v4
; GFX9-O0-NEXT: s_mov_b64 s[10:11], 64
; GFX9-O0-NEXT: v_mov_b32_e32 v7, v14
; GFX9-O0-NEXT: s_mov_b32 s12, s10
; GFX9-O0-NEXT: v_mov_b32_e32 v4, v15
-; GFX9-O0-NEXT: s_mov_b32 s14, s11
+; GFX9-O0-NEXT: s_mov_b32 s9, s11
; GFX9-O0-NEXT: v_add_co_u32_e64 v7, s[12:13], v7, s12
-; GFX9-O0-NEXT: v_mov_b32_e32 v8, s14
+; GFX9-O0-NEXT: v_mov_b32_e32 v8, s9
; GFX9-O0-NEXT: v_addc_co_u32_e64 v4, s[12:13], v4, v8, s[12:13]
; GFX9-O0-NEXT: ; kill: def $vgpr7 killed $vgpr7 def $vgpr7_vgpr8 killed $exec
; GFX9-O0-NEXT: v_mov_b32_e32 v8, v4
@@ -2601,25 +2601,25 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-O0-NEXT: v_mov_b32_e32 v6, v5
; GFX9-O0-NEXT: v_mov_b32_e32 v5, v7
; GFX9-O0-NEXT: v_cndmask_b32_e64 v8, v5, v6, s[12:13]
-; GFX9-O0-NEXT: ; implicit-def: $sgpr12
-; GFX9-O0-NEXT: ; implicit-def: $sgpr12
+; GFX9-O0-NEXT: ; implicit-def: $sgpr9
+; GFX9-O0-NEXT: ; implicit-def: $sgpr9
; GFX9-O0-NEXT: ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec
; GFX9-O0-NEXT: v_mov_b32_e32 v9, v4
; GFX9-O0-NEXT: v_ffbh_u32_e64 v4, v0
-; GFX9-O0-NEXT: v_add_u32_e64 v4, v4, s9
+; GFX9-O0-NEXT: v_add_u32_e64 v4, v4, s8
; GFX9-O0-NEXT: v_ffbh_u32_e64 v5, v1
; GFX9-O0-NEXT: v_min_u32_e64 v5, v4, v5
-; GFX9-O0-NEXT: ; implicit-def: $sgpr12
-; GFX9-O0-NEXT: v_mov_b32_e32 v4, s8
+; GFX9-O0-NEXT: ; implicit-def: $sgpr9
+; GFX9-O0-NEXT: v_mov_b32_e32 v4, 0
; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
; GFX9-O0-NEXT: v_mov_b32_e32 v6, v4
; GFX9-O0-NEXT: v_mov_b32_e32 v7, v6
; GFX9-O0-NEXT: v_ffbh_u32_e64 v4, v2
-; GFX9-O0-NEXT: v_add_u32_e64 v4, v4, s9
+; GFX9-O0-NEXT: v_add_u32_e64 v4, v4, s8
; GFX9-O0-NEXT: v_ffbh_u32_e64 v10, v3
; GFX9-O0-NEXT: v_min_u32_e64 v14, v4, v10
-; GFX9-O0-NEXT: ; implicit-def: $sgpr9
-; GFX9-O0-NEXT: v_mov_b32_e32 v4, s8
+; GFX9-O0-NEXT: ; implicit-def: $sgpr8
+; GFX9-O0-NEXT: v_mov_b32_e32 v4, 0
; GFX9-O0-NEXT: ; kill: def $vgpr14 killed $vgpr14 def $vgpr14_vgpr15 killed $exec
; GFX9-O0-NEXT: v_mov_b32_e32 v15, v4
; GFX9-O0-NEXT: v_mov_b32_e32 v10, v14
diff --git a/llvm/test/CodeGen/AMDGPU/div_v2i128.ll b/llvm/test/CodeGen/AMDGPU/div_v2i128.ll
index 7ea98a16e3b84..5134159e3e406 100644
--- a/llvm/test/CodeGen/AMDGPU/div_v2i128.ll
+++ b/llvm/test/CodeGen/AMDGPU/div_v2i128.ll
@@ -60,25 +60,25 @@ define <2 x i128> @v_sdiv_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
; SDAG-NEXT: v_add_i32_e64 v3, s[6:7], 64, v8
; SDAG-NEXT: v_addc_u32_e64 v8, s[6:7], 0, 0, s[6:7]
; SDAG-NEXT: v_cmp_ne_u64_e64 s[6:7], 0, v[0:1]
-; SDAG-NEXT: v_cndmask_b32_e64 v8, v8, 0, s[6:7]
+; SDAG-NEXT: v_cndmask_b32_e64 v11, v8, 0, s[6:7]
; SDAG-NEXT: v_cndmask_b32_e64 v2, v3, v2, s[6:7]
; SDAG-NEXT: s_or_b64 s[6:7], vcc, s[4:5]
-; SDAG-NEXT: v_sub_i32_e32 v2, vcc, v2, v10
-; SDAG-NEXT: v_subb_u32_e32 v3, vcc, v8, v9, vcc
-; SDAG-NEXT: v_xor_b32_e32 v8, 0x7f, v2
+; SDAG-NEXT: v_sub_i32_e32 v8, vcc, v2, v10
+; SDAG-NEXT: v_subb_u32_e32 v9, vcc, v11, v9, vcc
+; SDAG-NEXT: v_xor_b32_e32 v2, 0x7f, v8
; SDAG-NEXT: v_subb_u32_e32 v10, vcc, 0, v18, vcc
-; SDAG-NEXT: v_cmp_lt_u64_e64 s[4:5], s[10:11], v[2:3]
+; SDAG-NEXT: v_cmp_lt_u64_e64 s[4:5], s[10:11], v[8:9]
; SDAG-NEXT: v_cndmask_b32_e64 v19, 0, 1, s[4:5]
; SDAG-NEXT: v_subb_u32_e32 v11, vcc, 0, v18, vcc
-; SDAG-NEXT: v_or_b32_e32 v8, v8, v10
-; SDAG-NEXT: v_or_b32_e32 v9, v3, v11
+; SDAG-NEXT: v_or_b32_e32 v2, v2, v10
+; SDAG-NEXT: v_or_b32_e32 v3, v9, v11
; SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[10:11]
; SDAG-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc
-; SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[8:9]
+; SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[2:3]
; SDAG-NEXT: v_cmp_eq_u64_e64 s[4:5], 0, v[10:11]
-; SDAG-NEXT: v_cndmask_b32_e64 v8, v18, v19, s[4:5]
-; SDAG-NEXT: v_and_b32_e32 v8, 1, v8
-; SDAG-NEXT: v_cmp_eq_u32_e64 s[4:5], 1, v8
+; SDAG-NEXT: v_cndmask_b32_e64 v2, v18, v19, s[4:5]
+; SDAG-NEXT: v_and_b32_e32 v2, 1, v2
+; SDAG-NEXT: v_cmp_eq_u32_e64 s[4:5], 1, v2
; SDAG-NEXT: s_or_b64 s[4:5], s[6:7], s[4:5]
; SDAG-NEXT: v_cndmask_b32_e64 v18, v17, 0, s[4:5]
; SDAG-NEXT: s_xor_b64 s[6:7], s[4:5], -1
@@ -89,88 +89,88 @@ define <2 x i128> @v_sdiv_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
; SDAG-NEXT: s_and_saveexec_b64 s[6:7], s[8:9]
; SDAG-NEXT: s_cbranch_execz .LBB0_6
; SDAG-NEXT: ; %bb.1: ; %udiv-bb15
-; SDAG-NEXT: v_add_i32_e32 v30, vcc, 1, v2
-; SDAG-NEXT: v_sub_i32_e64 v18, s[4:5], 63, v2
-; SDAG-NEXT: v_mov_b32_e32 v8, 0
-; SDAG-NEXT: v_mov_b32_e32 v9, 0
-; SDAG-NEXT: v_addc_u32_e32 v31, vcc, 0, v3, vcc
+; SDAG-NEXT: v_add_i32_e32 v30, vcc, 1, v8
+; SDAG-NEXT: v_sub_i32_e64 v18, s[4:5], 63, v8
+; SDAG-NEXT: v_mov_b32_e32 v2, 0
+; SDAG-NEXT: v_mov_b32_e32 v3, 0
+; SDAG-NEXT: v_addc_u32_e32 v31, vcc, 0, v9, vcc
; SDAG-NEXT: v_lshl_b64 v[18:19], v[20:21], v18
; SDAG-NEXT: v_addc_u32_e32 v32, vcc, 0, v10, vcc
; SDAG-NEXT: v_addc_u32_e32 v33, vcc, 0, v11, vcc
-; SDAG-NEXT: v_or_b32_e32 v10, v30, v32
-; SDAG-NEXT: v_sub_i32_e32 v34, vcc, 0x7f, v2
-; SDAG-NEXT: v_or_b32_e32 v11, v31, v33
-; SDAG-NEXT: v_lshl_b64 v[2:3], v[16:17], v34
-; SDAG-NEXT: v_sub_i32_e32 v35, vcc, 64, v34
-; SDAG-NEXT: v_lshl_b64 v[22:23], v[20:21], v34
-; SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[10:11]
-; SDAG-NEXT: v_lshr_b64 v[10:11], v[20:21], v35
-; SDAG-NEXT: v_or_b32_e32 v3, v3, v11
-; SDAG-NEXT: v_or_b32_e32 v2, v2, v10
-; SDAG-NEXT: v_cmp_gt_u32_e64 s[4:5], 64, v34
-; SDAG-NEXT: v_cndmask_b32_e64 v3, v19, v3, s[4:5]
-; SDAG-NEXT: v_cndmask_b32_e64 v2, v18, v2, s[4:5]
-; SDAG-NEXT: v_cndmask_b32_e64 v19, 0, v23, s[4:5]
-; SDAG-NEXT: v_cndmask_b32_e64 v18, 0, v22, s[4:5]
-; SDAG-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v34
-; SDAG-NEXT: v_cndmask_b32_e64 v3, v3, v17, s[4:5]
-; SDAG-NEXT: v_cndmask_b32_e64 v2, v2, v16, s[4:5]
+; SDAG-NEXT: v_or_b32_e32 v9, v30, v32
+; SDAG-NEXT: v_sub_i32_e32 v11, vcc, 0x7f, v8
+; SDAG-NEXT: v_or_b32_e32 v10, v31, v33
+; SDAG-NEXT: v_lshl_b64 v[22:23], v[16:17], v11
+; SDAG-NEXT: v_sub_i32_e32 v8, vcc, 64, v11
+; SDAG-NEXT: v_lshl_b64 v[34:35], v[20:21], v11
+; SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[9:10]
+; SDAG-NEXT: v_lshr_b64 v[8:9], v[20:21], v8
+; SDAG-NEXT: v_or_b32_e32 v9, v23, v9
+; SDAG-NEXT: v_or_b32_e32 v8, v22, v8
+; SDAG-NEXT: v_cmp_gt_u32_e64 s[4:5], 64, v11
+; SDAG-NEXT: v_cndmask_b32_e64 v9, v19, v9, s[4:5]
+; SDAG-NEXT: v_cndmask_b32_e64 v8, v18, v8, s[4:5]
+; SDAG-NEXT: v_cndmask_b32_e64 v19, 0, v35, s[4:5]
+; SDAG-NEXT: v_cndmask_b32_e64 v18, 0, v34, s[4:5]
+; SDAG-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v11
+; SDAG-NEXT: v_cndmask_b32_e64 v9, v9, v17, s[4:5]
+; SDAG-NEXT: v_cndmask_b32_e64 v8, v8, v16, s[4:5]
; SDAG-NEXT: v_mov_b32_e32 v10, 0
; SDAG-NEXT: v_mov_b32_e32 v11, 0
; SDAG-NEXT: s_and_saveexec_b64 s[4:5], vcc
; SDAG-NEXT: s_xor_b64 s[8:9], exec, s[4:5]
; SDAG-NEXT: s_cbranch_execz .LBB0_5
; SDAG-NEXT: ; %bb.2: ; %udiv-preheader4
-; SDAG-NEXT: v_lshr_b64 v[8:9], v[20:21], v30
+; SDAG-NEXT: v_lshr_b64 v[2:3], v[20:21], v30
; SDAG-NEXT: v_sub_i32_e32 v10, vcc, 64, v30
; SDAG-NEXT: v_lshl_b64 v[10:11], v[16:17], v10
-; SDAG-NEXT: v_or_b32_e32 v11, v9, v11
-; SDAG-NEXT: v_or_b32_e32 v10, v8, v10
+; SDAG-NEXT: v_or_b32_e32 v11, v3, v11
+; SDAG-NEXT: v_or_b32_e32 v10, v2, v10
; SDAG-NEXT: v_cmp_gt_u32_e32 vcc, 64, v30
-; SDAG-NEXT: v_subrev_i32_e64 v8, s[4:5], 64, v30
-; SDAG-NEXT: v_lshr_b64 v[8:9], v[16:17], v8
-; SDAG-NEXT: v_cndmask_b32_e32 v9, v9, v11, vcc
+; SDAG-NEXT: v_subrev_i32_e64 v2, s[4:5], 64, v30
+; SDAG-NEXT: v_lshr_b64 v[2:3], v[16:17], v2
+; SDAG-NEXT: v_cndmask_b32_e32 v3, v3, v11, vcc
; SDAG-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v30
-; SDAG-NEXT: v_cndmask_b32_e64 v21, v9, v21, s[4:5]
-; SDAG-NEXT: v_cndmask_b32_e32 v8, v8, v10, vcc
-; SDAG-NEXT: v_cndmask_b32_e64 v20, v8, v20, s[4:5]
-; SDAG-NEXT: v_lshr_b64 v[8:9], v[16:17], v30
-; SDAG-NEXT: v_cndmask_b32_e32 v23, 0, v9, vcc
-; SDAG-NEXT: v_cndmask_b32_e32 v22, 0, v8, vcc
+; SDAG-NEXT: v_cndmask_b32_e64 v21, v3, v21, s[4:5]
+; SDAG-NEXT: v_cndmask_b32_e32 v2, v2, v10, vcc
+; SDAG-NEXT: v_cndmask_b32_e64 v20, v2, v20, s[4:5]
+; SDAG-NEXT: v_lshr_b64 v[2:3], v[16:17], v30
+; SDAG-NEXT: v_cndmask_b32_e32 v23, 0, v3, vcc
+; SDAG-NEXT: v_cndmask_b32_e32 v22, 0, v2, vcc
; SDAG-NEXT: v_add_i32_e32 v34, vcc, -1, v29
; SDAG-NEXT: v_addc_u32_e32 v35, vcc, -1, v28, vcc
; SDAG-NEXT: v_addc_u32_e32 v36, vcc, -1, v0, vcc
; SDAG-NEXT: v_addc_u32_e32 v37, vcc, -1, v1, vcc
; SDAG-NEXT: v_mov_b32_e32 v10, 0
; SDAG-NEXT: v_mov_b32_e32 v11, 0
+; SDAG-NEXT: s_mov_b64 s[4:5], 0
+; SDAG-NEXT: v_mov_b32_e32 v3, 0
; SDAG-NEXT: v_mov_b32_e32 v16, 0
; SDAG-NEXT: v_mov_b32_e32 v17, 0
-; SDAG-NEXT: s_mov_b64 s[4:5], 0
-; SDAG-NEXT: v_mov_b32_e32 v9, 0
; SDAG-NEXT: .LBB0_3: ; %udiv-do-while3
; SDAG-NEXT: ; =>This Inner Loop Header: Depth=1
-; SDAG-NEXT: v_lshrrev_b32_e32 v8, 31, v19
+; SDAG-NEXT: v_lshrrev_b32_e32 v2, 31, v19
; SDAG-NEXT: v_lshl_b64 v[18:19], v[18:19], 1
; SDAG-NEXT: v_lshl_b64 v[22:23], v[22:23], 1
; SDAG-NEXT: v_lshrrev_b32_e32 v38, 31, v21
; SDAG-NEXT: v_lshl_b64 v[20:21], v[20:21], 1
-; SDAG-NEXT: v_lshrrev_b32_e32 v39, 31, v3
-; SDAG-NEXT: v_lshl_b64 v[2:3], v[2:3], 1
+; SDAG-NEXT: v_lshrrev_b32_e32 v39, 31, v9
+; SDAG-NEXT: v_lshl_b64 v[8:9], v[8:9], 1
; SDAG-NEXT: v_or_b32_e32 v19, v17, v19
; SDAG-NEXT: v_or_b32_e32 v18, v16, v18
; SDAG-NEXT: v_or_b32_e32 v16, v22, v38
; SDAG-NEXT: v_or_b32_e32 v17, v20, v39
-; SDAG-NEXT: v_or_b32_e32 v2, v2, v8
-; SDAG-NEXT: v_sub_i32_e32 v8, vcc, v34, v17
-; SDAG-NEXT: v_subb_u32_e32 v8, vcc, v35, v21, vcc
-; SDAG-NEXT: v_subb_u32_e32 v8, vcc, v36, v16, vcc
-; SDAG-NEXT: v_subb_u32_e32 v8, vcc, v37, v23, vcc
-; SDAG-NEXT: v_ashrrev_i32_e32 v8, 31, v8
-; SDAG-NEXT: v_and_b32_e32 v20, v8, v29
-; SDAG-NEXT: v_and_b32_e32 v22, v8, v28
-; SDAG-NEXT: v_and_b32_e32 v38, v8, v0
-; SDAG-NEXT: v_and_b32_e32 v39, v8, v1
-; SDAG-NEXT: v_and_b32_e32 v8, 1, v8
+; SDAG-NEXT: v_or_b32_e32 v8, v8, v2
+; SDAG-NEXT: v_sub_i32_e32 v2, vcc, v34, v17
+; SDAG-NEXT: v_subb_u32_e32 v2, vcc, v35, v21, vcc
+; SDAG-NEXT: v_subb_u32_e32 v2, vcc, v36, v16, vcc
+; SDAG-NEXT: v_subb_u32_e32 v2, vcc, v37, v23, vcc
+; SDAG-NEXT: v_ashrrev_i32_e32 v2, 31, v2
+; SDAG-NEXT: v_and_b32_e32 v20, v2, v29
+; SDAG-NEXT: v_and_b32_e32 v22, v2, v28
+; SDAG-NEXT: v_and_b32_e32 v38, v2, v0
+; SDAG-NEXT: v_and_b32_e32 v39, v2, v1
+; SDAG-NEXT: v_and_b32_e32 v2, 1, v2
; SDAG-NEXT: v_sub_i32_e32 v20, vcc, v17, v20
; SDAG-NEXT: v_subb_u32_e32 v21, vcc, v21, v22, vcc
; SDAG-NEXT: v_subb_u32_e32 v22, vcc, v16, v38, vcc
@@ -182,243 +182,243 @@ define <2 x i128> @v_sdiv_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
; SDAG-NEXT: v_or_b32_e32 v16, v30, v32
; SDAG-NEXT: v_or_b32_e32 v17, v31, v33
; SDAG-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[16:17]
-; SDAG-NEXT: v_or_b32_e32 v3, v11, v3
+; SDAG-NEXT: v_or_b32_e32 v9, v11, v9
; SDAG-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
-; SDAG-NEXT: v_or_b32_e32 v2, v10, v2
-; SDAG-NEXT: v_mov_b32_e32 v17, v9
-; SDAG-NEXT: v_mov_b32_e32 v16, v8
+; SDAG-NEXT: v_or_b32_e32 v8, v10, v8
+; SDAG-NEXT: v_mov_b32_e32 v17, v3
+; SDAG-NEXT: v_mov_b32_e32 v16, v2
; SDAG-NEXT: s_andn2_b64 exec, exec, s[4:5]
; SDAG-NEXT: s_cbranch_execnz .LBB0_3
; SDAG-NEXT: ; %bb.4: ; %Flow13
; SDAG-NEXT: s_or_b64 exec, exec, s[4:5]
; SDAG-NEXT: .LBB0_5: ; %Flow14
; SDAG-NEXT: s_or_b64 exec, exec, s[8:9]
-; SDAG-NEXT: v_lshl_b64 v[0:1], v[2:3], 1
+; SDAG-NEXT: v_lshl_b64 v[0:1], v[8:9], 1
; SDAG-NEXT: v_lshrrev_b32_e32 v16, 31, v19
-; SDAG-NEXT: v_lshl_b64 v[2:3], v[18:19], 1
+; SDAG-NEXT: v_lshl_b64 v[8:9], v[18:19], 1
; SDAG-NEXT: v_or_b32_e32 v0, v0, v16
; SDAG-NEXT: v_or_b32_e32 v18, v11, v1
-; SDAG-NEXT: v_or_b32_e32 v19, v9, v3
+; SDAG-NEXT: v_or_b32_e32 v19, v3, v9
; SDAG-NEXT: v_or_b32_e32 v22, v10, v0
-; SDAG-NEXT: v_or_b32_e32 v23, v8, v2
+; SDAG-NEXT: v_or_b32_e32 v23, v2, v8
; SDAG-NEXT: .LBB0_6: ; %Flow16
; SDAG-NEXT: s_or_b64 exec, exec, s[6:7]
; SDAG-NEXT: v_ashrrev_i32_e32 v16, 31, v7
; SDAG-NEXT: v_ashrrev_i32_e32 v17, 31, v15
; SDAG-NEXT: v_sub_i32_e32 v0, vcc, 0, v4
-; SDAG-NEXT: v_mov_b32_e32 v8, 0
+; SDAG-NEXT: v_mov_b32_e32 v11, 0
; SDAG-NEXT: s_mov_b64 s[10:11], 0x7f
; SDAG-NEXT: v_mov_b32_e32 v20, v16
; SDAG-NEXT: v_mov_b32_e32 v21, v17
; SDAG-NEXT: v_subb_u32_e32 v1, vcc, 0, v5, vcc
-; SDAG-NEXT: v_subb_u32_e32 v9, vcc, 0, v6, vcc
+; SDAG-NEXT: v_subb_u32_e32 v8, vcc, 0, v6, vcc
; SDAG-NEXT: v_cmp_gt_i64_e64 s[4:5], 0, v[6:7]
; SDAG-NEXT: v_cndmask_b32_e64 v3, v5, v1, s[4:5]
; SDAG-NEXT: v_cndmask_b32_e64 v2, v4, v0, s[4:5]
; SDAG-NEXT: v_subb_u32_e32 v0, vcc, 0, v7, vcc
-; SDAG-NEXT: v_cndmask_b32_e64 v6, v6, v9, s[4:5]
+; SDAG-NEXT: v_cndmask_b32_e64 v8, v6, v8, s[4:5]
; SDAG-NEXT: v_ffbh_u32_e32 v1, v2
; SDAG-NEXT: v_ffbh_u32_e32 v4, v3
-; SDAG-NEXT: v_cndmask_b32_e64 v7, v7, v0, s[4:5]
+; SDAG-NEXT: v_cndmask_b32_e64 v9, v7, v0, s[4:5]
; SDAG-NEXT: v_sub_i32_e32 v5, vcc, 0, v12
-; SDAG-NEXT: v_or_b32_e32 v0, v2, v6
-; SDAG-NEXT: v_ffbh_u32_e32 v9, v6
-; SDAG-NEXT: v_add_i32_e64 v10, s[4:5], 32, v1
-; SDAG-NEXT: v_subb_u32_e32 v11, vcc, 0, v13, vcc
-; SDAG-NEXT: v_or_b32_e32 v1, v3, v7
-; SDAG-NEXT: v_add_i32_e64 v9, s[4:5], 32, v9
-; SDAG-NEXT: v_ffbh_u32_e32 v30, v7
-; SDAG-NEXT: v_min_u32_e32 v4, v10, v4
-; SDAG-NEXT: v_subb_u32_e32 v10, vcc, 0, v14, vcc
+; SDAG-NEXT: v_or_b32_e32 v0, v2, v8
+; SDAG-NEXT: v_ffbh_u32_e32 v6, v8
+; SDAG-NEXT: v_add_i32_e64 v7, s[4:5], 32, v1
+; SDAG-NEXT: v_subb_u32_e32 v10, vcc, 0, v13, vcc
+; SDAG-NEXT: v_or_b32_e32 v1, v3, v9
+; SDAG-NEXT: v_add_i32_e64 v6, s[4:5], 32, v6
+; SDAG-NEXT: v_ffbh_u32_e32 v30, v9
+; SDAG-NEXT: v_min_u32_e32 v4, v7, v4
+; SDAG-NEXT: v_subb_u32_e32 v7, vcc, 0, v14, vcc
; SDAG-NEXT: v_cmp_gt_i64_e64 s[4:5], 0, v[14:15]
-; SDAG-NEXT: v_cndmask_b32_e64 v28, v13, v11, s[4:5]
+; SDAG-NEXT: v_cndmask_b32_e64 v28, v13, v10, s[4:5]
; SDAG-NEXT: v_cndmask_b32_e64 v29, v12, v5, s[4:5]
; SDAG-NEXT: v_cmp_eq_u64_e64 s[6:7], 0, v[0:1]
-; SDAG-NEXT: v_min_u32_e32 v1, v9, v30
+; SDAG-NEXT: v_min_u32_e32 v1, v6, v30
; SDAG-NEXT: v_add_i32_e64 v4, s[8:9], 64, v4
; SDAG-NEXT: v_addc_u32_e64 v5, s[8:9], 0, 0, s[8:9]
-; SDAG-NEXT: v_subb_u32_e32 v9, vcc, 0, v15, vcc
-; SDAG-NEXT: v_cndmask_b32_e64 v0, v14, v10, s[4:5]
-; SDAG-NEXT: v_ffbh_u32_e32 v10, v29
-; SDAG-NEXT: v_ffbh_u32_e32 v11, v28
-; SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[6:7]
+; SDAG-NEXT: v_subb_u32_e32 v6, vcc, 0, v15, vcc
+; SDAG-NEXT: v_cndmask_b32_e64 v0, v14, v7, s[4:5]
+; SDAG-NEXT: v_ffbh_u32_e32 v7, v29
+; SDAG-NEXT: v_ffbh_u32_e32 v10, v28
+; SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[8:9]
; SDAG-NEXT: v_cndmask_b32_e64 v12, v5, 0, vcc
; SDAG-NEXT: v_cndmask_b32_e32 v13, v4, v1, vcc
-; SDAG-NEXT: v_cndmask_b32_e64 v1, v15, v9, s[4:5]
+; SDAG-NEXT: v_cndmask_b32_e64 v1, v15, v6, s[4:5]
; SDAG-NEXT: v_or_b32_e32 v4, v29, v0
-; SDAG-NEXT: v_ffbh_u32_e32 v9, v0
-; SDAG-NEXT: v_add_i32_e32 v10, vcc, 32, v10
+; SDAG-NEXT: v_ffbh_u32_e32 v6, v0
+; SDAG-NEXT: v_add_i32_e32 v7, vcc, 32, v7
; SDAG-NEXT: v_or_b32_e32 v5, v28, v1
-; SDAG-NEXT: v_add_i32_e32 v9, vcc, 32, v9
+; SDAG-NEXT: v_add_i32_e32 v6, vcc, 32, v6
; SDAG-NEXT: v_ffbh_u32_e32 v14, v1
-; SDAG-NEXT: v_min_u32_e32 v10, v10, v11
+; SDAG-NEXT: v_min_u32_e32 v7, v7, v10
; SDAG-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[4:5]
-; SDAG-NEXT: v_min_u32_e32 v4, v9, v14
-; SDAG-NEXT: v_add_i32_e64 v5, s[4:5], 64, v10
-; SDAG-NEXT: v_addc_u32_e64 v9, s[4:5], 0, 0, s[4:5]
+; SDAG-NEXT: v_min_u32_e32 v4, v6, v14
+; SDAG-NEXT: v_add_i32_e64 v5, s[4:5], 64, v7
+; SDAG-NEXT: v_addc_u32_e64 v6, s[4:5], 0, 0, s[4:5]
; SDAG-NEXT: s_or_b64 s[6:7], vcc, s[6:7]
; SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1]
-; SDAG-NEXT: v_cndmask_b32_e64 v9, v9, 0, vcc
+; SDAG-NEXT: v_cndmask_b32_e64 v7, v6, 0, vcc
; SDAG-NEXT: v_cndmask_b32_e32 v4, v5, v4, vcc
-; SDAG-NEXT: v_sub_i32_e32 v4, vcc, v4, v13
-; SDAG-NEXT: v_subb_u32_e32 v5, vcc, v9, v12, vcc
-; SDAG-NEXT: v_xor_b32_e32 v9, 0x7f, v4
-; SDAG-NEXT: v_subb_u32_e32 v10, vcc, 0, v8, vcc
-; SDAG-NEXT: v_cmp_lt_u64_e64 s[4:5], s[10:11], v[4:5]
+; SDAG-NEXT: v_sub_i32_e32 v6, vcc, v4, v13
+; SDAG-NEXT: v_subb_u32_e32 v7, vcc, v7, v12, vcc
+; SDAG-NEXT: v_xor_b32_e32 v4, 0x7f, v6
+; SDAG-NEXT: v_subb_u32_e32 v10, vcc, 0, v11, vcc
+; SDAG-NEXT: v_cmp_lt_u64_e64 s[4:5], s[10:11], v[6:7]
; SDAG-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[4:5]
-; SDAG-NEXT: v_subb_u32_e32 v11, vcc, 0, v8, vcc
-; SDAG-NEXT: v_or_b32_e32 v8, v9, v10
+; SDAG-NEXT: v_subb_u32_e32 v11, vcc, 0, v11, vcc
+; SDAG-NEXT: v_or_b32_e32 v4, v4, v10
; SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[10:11]
; SDAG-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
-; SDAG-NEXT: v_or_b32_e32 v9, v5, v11
+; SDAG-NEXT: v_or_b32_e32 v5, v7, v11
; SDAG-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[10:11]
; SDAG-NEXT: v_cndmask_b32_e32 v12, v13, v12, vcc
-; SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[8:9]
-; SDAG-NEXT: v_and_b32_e32 v8, 1, v12
-; SDAG-NEXT: v_cmp_eq_u32_e64 s[4:5], 1, v8
+; SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[4:5]
+; SDAG-NEXT: v_and_b32_e32 v4, 1, v12
+; SDAG-NEXT: v_cmp_eq_u32_e64 s[4:5], 1, v4
; SDAG-NEXT: s_or_b64 s[4:5], s[6:7], s[4:5]
-; SDAG-NEXT: v_cndmask_b32_e64 v13, v7, 0, s[4:5]
+; SDAG-NEXT: v_cndmask_b32_e64 v13, v9, 0, s[4:5]
; SDAG-NEXT: s_xor_b64 s[6:7], s[4:5], -1
-; SDAG-NEXT: v_cndmask_b32_e64 v9, v6, 0, s[4:5]
+; SDAG-NEXT: v_cndmask_b32_e64 v5, v8, 0, s[4:5]
; SDAG-NEXT: v_cndmask_b32_e64 v14, v3, 0, s[4:5]
-; SDAG-NEXT: v_cndmask_b32_e64 v8, v2, 0, s[4:5]
+; SDAG-NEXT: v_cndmask_b32_e64 v4, v2, 0, s[4:5]
; SDAG-NEXT: s_and_b64 s[4:5], s[6:7], vcc
; SDAG-NEXT: s_and_saveexec_b64 s[6:7], s[4:5]
; SDAG-NEXT: s_cbranch_execz .LBB0_12
; SDAG-NEXT: ; %bb.7: ; %udiv-bb1
-; SDAG-NEXT: v_add_i32_e32 v30, vcc, 1, v4
-; SDAG-NEXT: v_sub_i32_e64 v12, s[4:5], 63, v4
-; SDAG-NEXT: v_mov_b32_e32 v8, 0
-; SDAG-NEXT: v_mov_b32_e32 v9, 0
-; SDAG-NEXT: v_addc_u32_e32 v31, vcc, 0, v5, vcc
+; SDAG-NEXT: v_add_i32_e32 v30, vcc, 1, v6
+; SDAG-NEXT: v_sub_i32_e64 v12, s[4:5], 63, v6
+; SDAG-NEXT: v_mov_b32_e32 v4, 0
+; SDAG-NEXT: v_mov_b32_e32 v5, 0
+; SDAG-NEXT: v_addc_u32_e32 v31, vcc, 0, v7, vcc
; SDAG-NEXT: v_lshl_b64 v[12:13], v[2:3], v12
; SDAG-NEXT: v_addc_u32_e32 v32, vcc, 0, v10, vcc
; SDAG-NEXT: v_addc_u32_e32 v33, vcc, 0, v11, vcc
; SDAG-NEXT: v_or_b32_e32 v10, v30, v32
-; SDAG-NEXT: v_sub_i32_e32 v34, vcc, 0x7f, v4
+; SDAG-NEXT: v_sub_i32_e32 v34, vcc, 0x7f, v6
; SDAG-NEXT: v_or_b32_e32 v11, v31, v33
-; SDAG-NEXT: v_lshl_b64 v[4:5], v[6:7], v34
+; SDAG-NEXT: v_lshl_b64 v[6:7], v[8:9], v34
; SDAG-NEXT: v_sub_i32_e32 v35, vcc, 64, v34
; SDAG-NEXT: v_lshl_b64 v[14:15], v[2:3], v34
; SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[10:11]
; SDAG-NEXT: v_lshr_b64 v[10:11], v[2:3], v35
-; SDAG-NEXT: v_or_b32_e32 v5, v5, v11
-; SDAG-NEXT: v_or_b32_e32 v4, v4, v10
+; SDAG-NEXT: v_or_b32_e32 v7, v7, v11
+; SDAG-NEXT: v_or_b32_e32 v6, v6, v10
; SDAG-NEXT: v_cmp_gt_u32_e64 s[4:5], 64, v34
-; SDAG-NEXT: v_cndmask_b32_e64 v5, v13, v5, s[4:5]
-; SDAG-NEXT: v_cndmask_b32_e64 v4, v12, v4, s[4:5]
+; SDAG-NEXT: v_cndmask_b32_e64 v7, v13, v7, s[4:5]
+; SDAG-NEXT: v_cndmask_b32_e64 v6, v12, v6, s[4:5]
; SDAG-NEXT: v_cndmask_b32_e64 v11, 0, v15, s[4:5]
; SDAG-NEXT: v_cndmask_b32_e64 v10, 0, v14, s[4:5]
; SDAG-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v34
-; SDAG-NEXT: v_cndmask_b32_e64 v5, v5, v7, s[4:5]
-; SDAG-NEXT: v_cndmask_b32_e64 v4, v4, v6, s[4:5]
+; SDAG-NEXT: v_cndmask_b32_e64 v7, v7, v9, s[4:5]
+; SDAG-NEXT: v_cndmask_b32_e64 v6, v6, v8, s[4:5]
; SDAG-NEXT: v_mov_b32_e32 v12, 0
; SDAG-NEXT: v_mov_b32_e32 v13, 0
; SDAG-NEXT: s_and_saveexec_b64 s[4:5], vcc
; SDAG-NEXT: s_xor_b64 s[8:9], exec, s[4:5]
; SDAG-NEXT: s_cbranch_execz .LBB0_11
; SDAG-NEXT: ; %bb.8: ; %udiv-preheader
-; SDAG-NEXT: v_lshr_b64 v[8:9], v[2:3], v30
-; SDAG-NEXT: v_sub_i32_e32 v35, vcc, 64, v30
-; SDAG-NEXT: v_subrev_i32_e32 v36, vcc, 64, v30
-; SDAG-NEXT: v_lshr_b64 v[37:38], v[6:7], v30
+; SDAG-NEXT: v_lshr_b64 v[14:15], v[2:3], v30
+; SDAG-NEXT: v_sub_i32_e32 v4, vcc, 64, v30
+; SDAG-NEXT: v_subrev_i32_e32 v35, vcc, 64, v30
+; SDAG-NEXT: v_lshr_b64 v[37:38], v[8:9], v30
; SDAG-NEXT: v_add_i32_e32 v34, vcc, -1, v29
; SDAG-NEXT: v_mov_b32_e32 v12, 0
; SDAG-NEXT: v_mov_b32_e32 v13, 0
-; SDAG-NEXT: v_mov_b32_e32 v14, 0
-; SDAG-NEXT: v_mov_b32_e32 v15, 0
; SDAG-NEXT: s_mov_b64 s[10:11], 0
-; SDAG-NEXT: v_lshl_b64 v[48:49], v[6:7], v35
-; SDAG-NEXT: v_lshr_b64 v[6:7], v[6:7], v36
+; SDAG-NEXT: v_mov_b32_e32 v5, 0
+; SDAG-NEXT: v_lshl_b64 v[48:49], v[8:9], v4
+; SDAG-NEXT: v_lshr_b64 v[8:9], v[8:9], v35
; SDAG-NEXT: v_addc_u32_e32 v35, vcc, -1, v28, vcc
-; SDAG-NEXT: v_or_b32_e32 v9, v9, v49
-; SDAG-NEXT: v_or_b32_e32 v8, v8, v48
+; SDAG-NEXT: v_or_b32_e32 v4, v15, v49
+; SDAG-NEXT: v_or_b32_e32 v14, v14, v48
; SDAG-NEXT: v_addc_u32_e32 v36, vcc, -1, v0, vcc
; SDAG-NEXT: v_cmp_gt_u32_e64 s[4:5], 64, v30
-; SDAG-NEXT: v_cndmask_b32_e64 v9, v7, v9, s[4:5]
-; SDAG-NEXT: v_cndmask_b32_e64 v8, v6, v8, s[4:5]
-; SDAG-NEXT: v_cndmask_b32_e64 v7, 0, v38, s[4:5]
-; SDAG-NEXT: v_cndmask_b32_e64 v6, 0, v37, s[4:5]
+; SDAG-NEXT: v_cndmask_b32_e64 v4, v9, v4, s[4:5]
+; SDAG-NEXT: v_cndmask_b32_e64 v8, v8, v14, s[4:5]
+; SDAG-NEXT: v_cndmask_b32_e64 v15, 0, v38, s[4:5]
+; SDAG-NEXT: v_cndmask_b32_e64 v14, 0, v37, s[4:5]
; SDAG-NEXT: v_addc_u32_e32 v37, vcc, -1, v1, vcc
; SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v30
-; SDAG-NEXT: v_cndmask_b32_e32 v3, v9, v3, vcc
-; SDAG-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc
-; SDAG-NEXT: v_mov_b32_e32 v9, 0
+; SDAG-NEXT: v_cndmask_b32_e32 v9, v4, v3, vcc
+; SDAG-NEXT: v_cndmask_b32_e32 v8, v8, v2, vcc
+; SDAG-NEXT: v_mov_b32_e32 v2, 0
+; SDAG-NEXT: v_mov_b32_e32 v3, 0
; SDAG-NEXT: .LBB0_9: ; %udiv-do-while
; SDAG-NEXT: ; =>This Inner Loop Header: Depth=1
+; SDAG-NEXT: v_lshl_b64 v[14:15], v[14:15], 1
+; SDAG-NEXT: v_lshrrev_b32_e32 v4, 31, v9
+; SDAG-NEXT: v_lshl_b64 v[8:9], v[8:9], 1
+; SDAG-NEXT: v_lshrrev_b32_e32 v38, 31, v7
; SDAG-NEXT: v_lshl_b64 v[6:7], v[6:7], 1
-; SDAG-NEXT: v_lshrrev_b32_e32 v8, 31, v3
-; SDAG-NEXT: v_lshl_b64 v[2:3], v[2:3], 1
-; SDAG-NEXT: v_lshrrev_b32_e32 v38, 31, v5
-; SDAG-NEXT: v_lshl_b64 v[4:5], v[4:5], 1
; SDAG-NEXT: v_lshrrev_b32_e32 v39, 31, v11
; SDAG-NEXT: v_lshl_b64 v[10:11], v[10:11], 1
-; SDAG-NEXT: v_or_b32_e32 v6, v6, v8
-; SDAG-NEXT: v_or_b32_e32 v2, v2, v38
-; SDAG-NEXT: v_or_b32_e32 v4, v4, v39
-; SDAG-NEXT: v_or_b32_e32 v5, v13, v5
-; SDAG-NEXT: v_or_b32_e32 v11, v15, v11
-; SDAG-NEXT: v_sub_i32_e32 v8, vcc, v34, v2
-; SDAG-NEXT: v_or_b32_e32 v4, v12, v4
-; SDAG-NEXT: v_subb_u32_e32 v8, vcc, v35, v3, vcc
-; SDAG-NEXT: v_subb_u32_e32 v8, vcc, v36, v6, vcc
-; SDAG-NEXT: v_subb_u32_e32 v8, vcc, v37, v7, vcc
-; SDAG-NEXT: v_ashrrev_i32_e32 v8, 31, v8
-; SDAG-NEXT: v_and_b32_e32 v15, v8, v29
-; SDAG-NEXT: v_and_b32_e32 v38, v8, v28
-; SDAG-NEXT: v_and_b32_e32 v39, v8, v0
-; SDAG-NEXT: v_and_b32_e32 v48, v8, v1
-; SDAG-NEXT: v_sub_i32_e32 v2, vcc, v2, v15
-; SDAG-NEXT: v_subb_u32_e32 v3, vcc, v3, v38, vcc
-; SDAG-NEXT: v_subb_u32_e32 v6, vcc, v6, v39, vcc
-; SDAG-NEXT: v_subb_u32_e32 v7, vcc, v7, v48, vcc
+; SDAG-NEXT: v_or_b32_e32 v4, v14, v4
+; SDAG-NEXT: v_or_b32_e32 v8, v8, v38
+; SDAG-NEXT: v_or_b32_e32 v6, v6, v39
+; SDAG-NEXT: v_or_b32_e32 v7, v13, v7
+; SDAG-NEXT: v_or_b32_e32 v11, v3, v11
+; SDAG-NEXT: v_sub_i32_e32 v3, vcc, v34, v8
+; SDAG-NEXT: v_or_b32_e32 v6, v12, v6
+; SDAG-NEXT: v_subb_u32_e32 v3, vcc, v35, v9, vcc
+; SDAG-NEXT: v_subb_u32_e32 v3, vcc, v36, v4, vcc
+; SDAG-NEXT: v_subb_u32_e32 v3, vcc, v37, v15, vcc
+; SDAG-NEXT: v_ashrrev_i32_e32 v38, 31, v3
+; SDAG-NEXT: v_and_b32_e32 v3, v38, v29
+; SDAG-NEXT: v_and_b32_e32 v14, v38, v28
+; SDAG-NEXT: v_and_b32_e32 v39, v38, v0
+; SDAG-NEXT: v_and_b32_e32 v48, v38, v1
+; SDAG-NEXT: v_sub_i32_e32 v8, vcc, v8, v3
+; SDAG-NEXT: v_subb_u32_e32 v9, vcc, v9, v14, vcc
+; SDAG-NEXT: v_subb_u32_e32 v14, vcc, v4, v39, vcc
+; SDAG-NEXT: v_subb_u32_e32 v15, vcc, v15, v48, vcc
; SDAG-NEXT: v_add_i32_e32 v30, vcc, -1, v30
; SDAG-NEXT: v_addc_u32_e32 v31, vcc, -1, v31, vcc
; SDAG-NEXT: v_addc_u32_e32 v32, vcc, -1, v32, vcc
; SDAG-NEXT: v_addc_u32_e32 v33, vcc, -1, v33, vcc
-; SDAG-NEXT: v_or_b32_e32 v38, v30, v32
-; SDAG-NEXT: v_or_b32_e32 v39, v31, v33
-; SDAG-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[38:39]
-; SDAG-NEXT: v_and_b32_e32 v8, 1, v8
+; SDAG-NEXT: v_or_b32_e32 v3, v30, v32
+; SDAG-NEXT: v_or_b32_e32 v4, v31, v33
+; SDAG-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[3:4]
+; SDAG-NEXT: v_and_b32_e32 v4, 1, v38
; SDAG-NEXT: s_or_b64 s[10:11], vcc, s[10:11]
-; SDAG-NEXT: v_or_b32_e32 v10, v14, v10
-; SDAG-NEXT: v_mov_b32_e32 v15, v9
-; SDAG-NEXT: v_mov_b32_e32 v14, v8
+; SDAG-NEXT: v_or_b32_e32 v10, v2, v10
+; SDAG-NEXT: v_mov_b32_e32 v2, v4
+; SDAG-NEXT: v_mov_b32_e32 v3, v5
; SDAG-NEXT: s_andn2_b64 exec, exec, s[10:11]
; SDAG-NEXT: s_cbranch_execnz .LBB0_9
; SDAG-NEXT: ; %bb.10: ; %Flow
; SDAG-NEXT: s_or_b64 exec, exec, s[10:11]
; SDAG-NEXT: .LBB0_11: ; %Flow11
; SDAG-NEXT: s_or_b64 exec, exec, s[8:9]
-; SDAG-NEXT: v_lshl_b64 v[0:1], v[4:5], 1
-; SDAG-NEXT: v_lshrrev_b32_e32 v4, 31, v11
+; SDAG-NEXT: v_lshl_b64 v[0:1], v[6:7], 1
+; SDAG-NEXT: v_lshrrev_b32_e32 v6, 31, v11
; SDAG-NEXT: v_lshl_b64 v[2:3], v[10:11], 1
-; SDAG-NEXT: v_or_b32_e32 v0, v0, v4
+; SDAG-NEXT: v_or_b32_e32 v0, v0, v6
; SDAG-NEXT: v_or_b32_e32 v13, v13, v1
-; SDAG-NEXT: v_or_b32_e32 v14, v9, v3
-; SDAG-NEXT: v_or_b32_e32 v9, v12, v0
-; SDAG-NEXT: v_or_b32_e32 v8, v8, v2
+; SDAG-NEXT: v_or_b32_e32 v14, v5, v3
+; SDAG-NEXT: v_or_b32_e32 v5, v12, v0
+; SDAG-NEXT: v_or_b32_e32 v4, v4, v2
; SDAG-NEXT: .LBB0_12: ; %Flow12
; SDAG-NEXT: s_or_b64 exec, exec, s[6:7]
; SDAG-NEXT: v_xor_b32_e32 v3, v27, v26
; SDAG-NEXT: v_xor_b32_e32 v2, v25, v24
; SDAG-NEXT: v_xor_b32_e32 v7, v21, v20
; SDAG-NEXT: v_xor_b32_e32 v6, v17, v16
-; SDAG-NEXT: v_xor_b32_e32 v4, v18, v3
-; SDAG-NEXT: v_xor_b32_e32 v5, v22, v2
+; SDAG-NEXT: v_xor_b32_e32 v8, v18, v3
+; SDAG-NEXT: v_xor_b32_e32 v9, v22, v2
; SDAG-NEXT: v_xor_b32_e32 v1, v19, v3
; SDAG-NEXT: v_xor_b32_e32 v0, v23, v2
; SDAG-NEXT: v_xor_b32_e32 v10, v13, v7
-; SDAG-NEXT: v_xor_b32_e32 v9, v9, v6
-; SDAG-NEXT: v_xor_b32_e32 v11, v14, v7
+; SDAG-NEXT: v_xor_b32_e32 v11, v5, v6
+; SDAG-NEXT: v_xor_b32_e32 v5, v14, v7
; SDAG-NEXT: v_sub_i32_e32 v0, vcc, v0, v2
; SDAG-NEXT: v_subb_u32_e32 v1, vcc, v1, v3, vcc
-; SDAG-NEXT: v_subb_u32_e32 v2, vcc, v5, v2, vcc
-; SDAG-NEXT: v_subb_u32_e32 v3, vcc, v4, v3, vcc
-; SDAG-NEXT: v_xor_b32_e32 v4, v8, v6
+; SDAG-NEXT: v_subb_u32_e32 v2, vcc, v9, v2, vcc
+; SDAG-NEXT: v_subb_u32_e32 v3, vcc, v8, v3, vcc
+; SDAG-NEXT: v_xor_b32_e32 v4, v4, v6
; SDAG-NEXT: v_sub_i32_e32 v4, vcc, v4, v6
-; SDAG-NEXT: v_subb_u32_e32 v5, vcc, v11, v7, vcc
-; SDAG-NEXT: v_subb_u32_e32 v6, vcc, v9, v6, vcc
+; SDAG-NEXT: v_subb_u32_e32 v5, vcc, v5, v7, vcc
+; SDAG-NEXT: v_subb_u32_e32 v6, vcc, v11, v6, vcc
; SDAG-NEXT: v_subb_u32_e32 v7, vcc, v10, v7, vcc
; SDAG-NEXT: s_setpc_b64 s[30:31]
;
@@ -869,19 +869,19 @@ define <2 x i128> @v_udiv_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
; SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[2:3]
; SDAG-NEXT: v_cndmask_b32_e64 v17, v21, 0, vcc
; SDAG-NEXT: v_cndmask_b32_e32 v18, v19, v18, vcc
-; SDAG-NEXT: v_sub_i32_e32 v22, vcc, v16, v18
-; SDAG-NEXT: v_subb_u32_e32 v23, vcc, v20, v17, vcc
-; SDAG-NEXT: v_xor_b32_e32 v16, 0x7f, v22
-; SDAG-NEXT: v_subb_u32_e32 v24, vcc, 0, v28, vcc
-; SDAG-NEXT: v_cmp_lt_u64_e64 s[4:5], s[8:9], v[22:23]
+; SDAG-NEXT: v_sub_i32_e32 v21, vcc, v16, v18
+; SDAG-NEXT: v_subb_u32_e32 v22, vcc, v20, v17, vcc
+; SDAG-NEXT: v_xor_b32_e32 v16, 0x7f, v21
+; SDAG-NEXT: v_subb_u32_e32 v23, vcc, 0, v28, vcc
+; SDAG-NEXT: v_cmp_lt_u64_e64 s[4:5], s[8:9], v[21:22]
; SDAG-NEXT: v_cndmask_b32_e64 v18, 0, 1, s[4:5]
-; SDAG-NEXT: v_subb_u32_e32 v25, vcc, 0, v28, vcc
-; SDAG-NEXT: v_or_b32_e32 v16, v16, v24
-; SDAG-NEXT: v_or_b32_e32 v17, v23, v25
-; SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[24:25]
+; SDAG-NEXT: v_subb_u32_e32 v24, vcc, 0, v28, vcc
+; SDAG-NEXT: v_or_b32_e32 v16, v16, v23
+; SDAG-NEXT: v_or_b32_e32 v17, v22, v24
+; SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[23:24]
; SDAG-NEXT: v_cndmask_b32_e64 v19, 0, 1, vcc
; SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[16:17]
-; SDAG-NEXT: v_cmp_eq_u64_e64 s[4:5], 0, v[24:25]
+; SDAG-NEXT: v_cmp_eq_u64_e64 s[4:5], 0, v[23:24]
; SDAG-NEXT: v_cndmask_b32_e64 v16, v19, v18, s[4:5]
; SDAG-NEXT: v_and_b32_e32 v16, 1, v16
; SDAG-NEXT: v_cmp_eq_u32_e64 s[4:5], 1, v16
@@ -895,118 +895,118 @@ define <2 x i128> @v_udiv_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
; SDAG-NEXT: s_and_saveexec_b64 s[6:7], s[8:9]
; SDAG-NEXT: s_cbranch_execz .LBB1_6
; SDAG-NEXT: ; %bb.1: ; %udiv-bb15
-; SDAG-NEXT: v_add_i32_e32 v26, vcc, 1, v22
-; SDAG-NEXT: v_sub_i32_e64 v16, s[4:5], 63, v22
+; SDAG-NEXT: v_add_i32_e32 v18, vcc, 1, v21
+; SDAG-NEXT: v_sub_i32_e64 v16, s[4:5], 63, v21
+; SDAG-NEXT: v_mov_b32_e32 v19, 0
; SDAG-NEXT: v_mov_b32_e32 v20, 0
-; SDAG-NEXT: v_mov_b32_e32 v21, 0
-; SDAG-NEXT: v_addc_u32_e32 v27, vcc, 0, v23, vcc
+; SDAG-NEXT: v_addc_u32_e32 v27, vcc, 0, v22, vcc
; SDAG-NEXT: v_lshl_b64 v[16:17], v[0:1], v16
-; SDAG-NEXT: v_addc_u32_e32 v28, vcc, 0, v24, vcc
-; SDAG-NEXT: v_addc_u32_e32 v29, vcc, 0, v25, vcc
-; SDAG-NEXT: v_or_b32_e32 v18, v26, v28
-; SDAG-NEXT: v_sub_i32_e32 v30, vcc, 0x7f, v22
-; SDAG-NEXT: v_or_b32_e32 v19, v27, v29
-; SDAG-NEXT: v_lshl_b64 v[22:23], v[2:3], v30
-; SDAG-NEXT: v_sub_i32_e32 v31, vcc, 64, v30
-; SDAG-NEXT: v_lshl_b64 v[24:25], v[0:1], v30
-; SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[18:19]
-; SDAG-NEXT: v_lshr_b64 v[18:19], v[0:1], v31
-; SDAG-NEXT: v_or_b32_e32 v19, v23, v19
-; SDAG-NEXT: v_or_b32_e32 v18, v22, v18
-; SDAG-NEXT: v_cmp_gt_u32_e64 s[4:5], 64, v30
-; SDAG-NEXT: v_cndmask_b32_e64 v19, v17, v19, s[4:5]
-; SDAG-NEXT: v_cndmask_b32_e64 v18, v16, v18, s[4:5]
-; SDAG-NEXT: v_cndmask_b32_e64 v17, 0, v25, s[4:5]
-; SDAG-NEXT: v_cndmask_b32_e64 v16, 0, v24, s[4:5]
-; SDAG-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v30
-; SDAG-NEXT: v_cndmask_b32_e64 v19, v19, v3, s[4:5]
-; SDAG-NEXT: v_cndmask_b32_e64 v18, v18, v2, s[4:5]
-; SDAG-NEXT: v_mov_b32_e32 v22, 0
+; SDAG-NEXT: v_addc_u32_e32 v28, vcc, 0, v23, vcc
+; SDAG-NEXT: v_addc_u32_e32 v29, vcc, 0, v24, vcc
+; SDAG-NEXT: v_or_b32_e32 v22, v18, v28
+; SDAG-NEXT: v_sub_i32_e32 v26, vcc, 0x7f, v21
+; SDAG-NEXT: v_or_b32_e32 v23, v27, v29
+; SDAG-NEXT: v_lshl_b64 v[24:25], v[2:3], v26
+; SDAG-NEXT: v_sub_i32_e32 v21, vcc, 64, v26
+; SDAG-NEXT: v_lshl_b64 v[30:31], v[0:1], v26
+; SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[22:23]
+; SDAG-NEXT: v_lshr_b64 v[21:22], v[0:1], v21
+; SDAG-NEXT: v_or_b32_e32 v22, v25, v22
+; SDAG-NEXT: v_or_b32_e32 v21, v24, v21
+; SDAG-NEXT: v_cmp_gt_u32_e64 s[4:5], 64, v26
+; SDAG-NEXT: v_cndmask_b32_e64 v22, v17, v22, s[4:5]
+; SDAG-NEXT: v_cndmask_b32_e64 v21, v16, v21, s[4:5]
+; SDAG-NEXT: v_cndmask_b32_e64 v17, 0, v31, s[4:5]
+; SDAG-NEXT: v_cndmask_b32_e64 v16, 0, v30, s[4:5]
+; SDAG-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v26
+; SDAG-NEXT: v_cndmask_b32_e64 v22, v22, v3, s[4:5]
+; SDAG-NEXT: v_cndmask_b32_e64 v21, v21, v2, s[4:5]
; SDAG-NEXT: v_mov_b32_e32 v23, 0
+; SDAG-NEXT: v_mov_b32_e32 v24, 0
; SDAG-NEXT: s_and_saveexec_b64 s[4:5], vcc
; SDAG-NEXT: s_xor_b64 s[8:9], exec, s[4:5]
; SDAG-NEXT: s_cbranch_execz .LBB1_5
; SDAG-NEXT: ; %bb.2: ; %udiv-preheader4
-; SDAG-NEXT: v_lshr_b64 v[20:21], v[0:1], v26
-; SDAG-NEXT: v_sub_i32_e32 v22, vcc, 64, v26
-; SDAG-NEXT: v_lshl_b64 v[22:23], v[2:3], v22
-; SDAG-NEXT: v_or_b32_e32 v23, v21, v23
-; SDAG-NEXT: v_or_b32_e32 v22, v20, v22
-; SDAG-NEXT: v_cmp_gt_u32_e32 vcc, 64, v26
-; SDAG-NEXT: v_subrev_i32_e64 v20, s[4:5], 64, v26
-; SDAG-NEXT: v_lshr_b64 v[20:21], v[2:3], v20
-; SDAG-NEXT: v_cndmask_b32_e32 v21, v21, v23, vcc
-; SDAG-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v26
-; SDAG-NEXT: v_cndmask_b32_e64 v1, v21, v1, s[4:5]
-; SDAG-NEXT: v_cndmask_b32_e32 v20, v20, v22, vcc
-; SDAG-NEXT: v_cndmask_b32_e64 v0, v20, v0, s[4:5]
-; SDAG-NEXT: v_lshr_b64 v[2:3], v[2:3], v26
+; SDAG-NEXT: v_lshr_b64 v[19:20], v[0:1], v18
+; SDAG-NEXT: v_sub_i32_e32 v23, vcc, 64, v18
+; SDAG-NEXT: v_lshl_b64 v[23:24], v[2:3], v23
+; SDAG-NEXT: v_or_b32_e32 v24, v20, v24
+; SDAG-NEXT: v_or_b32_e32 v23, v19, v23
+; SDAG-NEXT: v_cmp_gt_u32_e32 vcc, 64, v18
+; SDAG-NEXT: v_subrev_i32_e64 v19, s[4:5], 64, v18
+; SDAG-NEXT: v_lshr_b64 v[19:20], v[2:3], v19
+; SDAG-NEXT: v_cndmask_b32_e32 v20, v20, v24, vcc
+; SDAG-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v18
+; SDAG-NEXT: v_cndmask_b32_e64 v1, v20, v1, s[4:5]
+; SDAG-NEXT: v_cndmask_b32_e32 v19, v19, v23, vcc
+; SDAG-NEXT: v_cndmask_b32_e64 v0, v19, v0, s[4:5]
+; SDAG-NEXT: v_lshr_b64 v[2:3], v[2:3], v18
; SDAG-NEXT: v_cndmask_b32_e32 v3, 0, v3, vcc
; SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
; SDAG-NEXT: v_add_i32_e32 v30, vcc, -1, v8
; SDAG-NEXT: v_addc_u32_e32 v31, vcc, -1, v9, vcc
; SDAG-NEXT: v_addc_u32_e32 v32, vcc, -1, v10, vcc
; SDAG-NEXT: v_addc_u32_e32 v33, vcc, -1, v11, vcc
-; SDAG-NEXT: v_mov_b32_e32 v22, 0
; SDAG-NEXT: v_mov_b32_e32 v23, 0
; SDAG-NEXT: v_mov_b32_e32 v24, 0
-; SDAG-NEXT: v_mov_b32_e32 v25, 0
; SDAG-NEXT: s_mov_b64 s[4:5], 0
-; SDAG-NEXT: v_mov_b32_e32 v21, 0
+; SDAG-NEXT: v_mov_b32_e32 v20, 0
+; SDAG-NEXT: v_mov_b32_e32 v25, 0
+; SDAG-NEXT: v_mov_b32_e32 v26, 0
; SDAG-NEXT: .LBB1_3: ; %udiv-do-while3
; SDAG-NEXT: ; =>This Inner Loop Header: Depth=1
; SDAG-NEXT: v_lshrrev_b32_e32 v34, 31, v17
; SDAG-NEXT: v_lshl_b64 v[16:17], v[16:17], 1
-; SDAG-NEXT: v_or_b32_e32 v17, v25, v17
-; SDAG-NEXT: v_or_b32_e32 v16, v24, v16
+; SDAG-NEXT: v_or_b32_e32 v17, v26, v17
+; SDAG-NEXT: v_or_b32_e32 v16, v25, v16
; SDAG-NEXT: v_lshl_b64 v[2:3], v[2:3], 1
-; SDAG-NEXT: v_lshrrev_b32_e32 v20, 31, v1
+; SDAG-NEXT: v_lshrrev_b32_e32 v19, 31, v1
; SDAG-NEXT: v_lshl_b64 v[0:1], v[0:1], 1
-; SDAG-NEXT: v_or_b32_e32 v2, v2, v20
-; SDAG-NEXT: v_lshrrev_b32_e32 v20, 31, v19
-; SDAG-NEXT: v_or_b32_e32 v0, v0, v20
-; SDAG-NEXT: v_sub_i32_e32 v20, vcc, v30, v0
-; SDAG-NEXT: v_subb_u32_e32 v20, vcc, v31, v1, vcc
-; SDAG-NEXT: v_subb_u32_e32 v20, vcc, v32, v2, vcc
-; SDAG-NEXT: v_subb_u32_e32 v20, vcc, v33, v3, vcc
-; SDAG-NEXT: v_ashrrev_i32_e32 v20, 31, v20
-; SDAG-NEXT: v_and_b32_e32 v24, v20, v8
-; SDAG-NEXT: v_sub_i32_e32 v0, vcc, v0, v24
-; SDAG-NEXT: v_and_b32_e32 v24, v20, v9
-; SDAG-NEXT: v_subb_u32_e32 v1, vcc, v1, v24, vcc
-; SDAG-NEXT: v_and_b32_e32 v24, v20, v10
-; SDAG-NEXT: v_subb_u32_e32 v2, vcc, v2, v24, vcc
-; SDAG-NEXT: v_and_b32_e32 v24, v20, v11
-; SDAG-NEXT: v_subb_u32_e32 v3, vcc, v3, v24, vcc
-; SDAG-NEXT: v_add_i32_e32 v26, vcc, -1, v26
+; SDAG-NEXT: v_or_b32_e32 v2, v2, v19
+; SDAG-NEXT: v_lshrrev_b32_e32 v19, 31, v22
+; SDAG-NEXT: v_or_b32_e32 v0, v0, v19
+; SDAG-NEXT: v_sub_i32_e32 v19, vcc, v30, v0
+; SDAG-NEXT: v_subb_u32_e32 v19, vcc, v31, v1, vcc
+; SDAG-NEXT: v_subb_u32_e32 v19, vcc, v32, v2, vcc
+; SDAG-NEXT: v_subb_u32_e32 v19, vcc, v33, v3, vcc
+; SDAG-NEXT: v_ashrrev_i32_e32 v19, 31, v19
+; SDAG-NEXT: v_and_b32_e32 v25, v19, v8
+; SDAG-NEXT: v_sub_i32_e32 v0, vcc, v0, v25
+; SDAG-NEXT: v_and_b32_e32 v25, v19, v9
+; SDAG-NEXT: v_subb_u32_e32 v1, vcc, v1, v25, vcc
+; SDAG-NEXT: v_and_b32_e32 v25, v19, v10
+; SDAG-NEXT: v_subb_u32_e32 v2, vcc, v2, v25, vcc
+; SDAG-NEXT: v_and_b32_e32 v25, v19, v11
+; SDAG-NEXT: v_subb_u32_e32 v3, vcc, v3, v25, vcc
+; SDAG-NEXT: v_add_i32_e32 v18, vcc, -1, v18
; SDAG-NEXT: v_addc_u32_e32 v27, vcc, -1, v27, vcc
; SDAG-NEXT: v_addc_u32_e32 v28, vcc, -1, v28, vcc
; SDAG-NEXT: v_addc_u32_e32 v29, vcc, -1, v29, vcc
-; SDAG-NEXT: v_or_b32_e32 v24, v26, v28
-; SDAG-NEXT: v_or_b32_e32 v25, v27, v29
-; SDAG-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[24:25]
-; SDAG-NEXT: v_and_b32_e32 v20, 1, v20
-; SDAG-NEXT: v_lshl_b64 v[18:19], v[18:19], 1
-; SDAG-NEXT: v_or_b32_e32 v18, v18, v34
-; SDAG-NEXT: v_or_b32_e32 v19, v23, v19
+; SDAG-NEXT: v_or_b32_e32 v25, v18, v28
+; SDAG-NEXT: v_or_b32_e32 v26, v27, v29
+; SDAG-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[25:26]
+; SDAG-NEXT: v_and_b32_e32 v19, 1, v19
+; SDAG-NEXT: v_lshl_b64 v[21:22], v[21:22], 1
+; SDAG-NEXT: v_or_b32_e32 v21, v21, v34
+; SDAG-NEXT: v_or_b32_e32 v22, v24, v22
; SDAG-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
-; SDAG-NEXT: v_or_b32_e32 v18, v22, v18
-; SDAG-NEXT: v_mov_b32_e32 v25, v21
-; SDAG-NEXT: v_mov_b32_e32 v24, v20
+; SDAG-NEXT: v_or_b32_e32 v21, v23, v21
+; SDAG-NEXT: v_mov_b32_e32 v26, v20
+; SDAG-NEXT: v_mov_b32_e32 v25, v19
; SDAG-NEXT: s_andn2_b64 exec, exec, s[4:5]
; SDAG-NEXT: s_cbranch_execnz .LBB1_3
; SDAG-NEXT: ; %bb.4: ; %Flow13
; SDAG-NEXT: s_or_b64 exec, exec, s[4:5]
; SDAG-NEXT: .LBB1_5: ; %Flow14
; SDAG-NEXT: s_or_b64 exec, exec, s[8:9]
-; SDAG-NEXT: v_lshl_b64 v[0:1], v[18:19], 1
+; SDAG-NEXT: v_lshl_b64 v[0:1], v[21:22], 1
; SDAG-NEXT: v_lshrrev_b32_e32 v8, 31, v17
; SDAG-NEXT: v_lshl_b64 v[2:3], v[16:17], 1
; SDAG-NEXT: v_or_b32_e32 v0, v0, v8
-; SDAG-NEXT: v_or_b32_e32 v16, v23, v1
-; SDAG-NEXT: v_or_b32_e32 v18, v21, v3
-; SDAG-NEXT: v_or_b32_e32 v17, v22, v0
-; SDAG-NEXT: v_or_b32_e32 v19, v20, v2
+; SDAG-NEXT: v_or_b32_e32 v16, v24, v1
+; SDAG-NEXT: v_or_b32_e32 v18, v20, v3
+; SDAG-NEXT: v_or_b32_e32 v17, v23, v0
+; SDAG-NEXT: v_or_b32_e32 v19, v19, v2
; SDAG-NEXT: .LBB1_6: ; %Flow16
; SDAG-NEXT: s_or_b64 exec, exec, s[6:7]
; SDAG-NEXT: v_or_b32_e32 v1, v13, v15
@@ -1044,22 +1044,22 @@ define <2 x i128> @v_udiv_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
; SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[6:7]
; SDAG-NEXT: v_cndmask_b32_e64 v1, v9, 0, vcc
; SDAG-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
-; SDAG-NEXT: v_sub_i32_e32 v0, vcc, v0, v2
-; SDAG-NEXT: v_subb_u32_e32 v1, vcc, v8, v1, vcc
-; SDAG-NEXT: v_xor_b32_e32 v2, 0x7f, v0
+; SDAG-NEXT: v_sub_i32_e32 v2, vcc, v0, v2
+; SDAG-NEXT: v_subb_u32_e32 v3, vcc, v8, v1, vcc
+; SDAG-NEXT: v_xor_b32_e32 v0, 0x7f, v2
; SDAG-NEXT: v_subb_u32_e32 v20, vcc, 0, v24, vcc
-; SDAG-NEXT: v_cmp_lt_u64_e64 s[4:5], s[8:9], v[0:1]
+; SDAG-NEXT: v_cmp_lt_u64_e64 s[4:5], s[8:9], v[2:3]
; SDAG-NEXT: v_cndmask_b32_e64 v8, 0, 1, s[4:5]
; SDAG-NEXT: v_subb_u32_e32 v21, vcc, 0, v24, vcc
-; SDAG-NEXT: v_or_b32_e32 v2, v2, v20
+; SDAG-NEXT: v_or_b32_e32 v0, v0, v20
; SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[20:21]
; SDAG-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
-; SDAG-NEXT: v_or_b32_e32 v3, v1, v21
+; SDAG-NEXT: v_or_b32_e32 v1, v3, v21
; SDAG-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[20:21]
; SDAG-NEXT: v_cndmask_b32_e32 v8, v9, v8, vcc
-; SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[2:3]
-; SDAG-NEXT: v_and_b32_e32 v2, 1, v8
-; SDAG-NEXT: v_cmp_eq_u32_e64 s[4:5], 1, v2
+; SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1]
+; SDAG-NEXT: v_and_b32_e32 v0, 1, v8
+; SDAG-NEXT: v_cmp_eq_u32_e64 s[4:5], 1, v0
; SDAG-NEXT: s_or_b64 s[4:5], s[6:7], s[4:5]
; SDAG-NEXT: v_cndmask_b32_e64 v8, v7, 0, s[4:5]
; SDAG-NEXT: s_xor_b64 s[6:7], s[4:5], -1
@@ -1070,93 +1070,93 @@ define <2 x i128> @v_udiv_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
; SDAG-NEXT: s_and_saveexec_b64 s[6:7], s[4:5]
; SDAG-NEXT: s_cbranch_execz .LBB1_12
; SDAG-NEXT: ; %bb.7: ; %udiv-bb1
-; SDAG-NEXT: v_add_i32_e32 v22, vcc, 1, v0
-; SDAG-NEXT: v_sub_i32_e64 v8, s[4:5], 63, v0
-; SDAG-NEXT: v_mov_b32_e32 v2, 0
-; SDAG-NEXT: v_mov_b32_e32 v3, 0
-; SDAG-NEXT: v_addc_u32_e32 v23, vcc, 0, v1, vcc
+; SDAG-NEXT: v_add_i32_e32 v22, vcc, 1, v2
+; SDAG-NEXT: v_sub_i32_e64 v8, s[4:5], 63, v2
+; SDAG-NEXT: v_mov_b32_e32 v0, 0
+; SDAG-NEXT: v_mov_b32_e32 v1, 0
+; SDAG-NEXT: v_addc_u32_e32 v23, vcc, 0, v3, vcc
; SDAG-NEXT: v_lshl_b64 v[8:9], v[4:5], v8
; SDAG-NEXT: v_addc_u32_e32 v24, vcc, 0, v20, vcc
; SDAG-NEXT: v_addc_u32_e32 v25, vcc, 0, v21, vcc
; SDAG-NEXT: v_or_b32_e32 v10, v22, v24
-; SDAG-NEXT: v_sub_i32_e32 v26, vcc, 0x7f, v0
+; SDAG-NEXT: v_sub_i32_e32 v26, vcc, 0x7f, v2
; SDAG-NEXT: v_or_b32_e32 v11, v23, v25
-; SDAG-NEXT: v_lshl_b64 v[0:1], v[6:7], v26
+; SDAG-NEXT: v_lshl_b64 v[2:3], v[6:7], v26
; SDAG-NEXT: v_sub_i32_e32 v27, vcc, 64, v26
; SDAG-NEXT: v_lshl_b64 v[20:21], v[4:5], v26
; SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[10:11]
; SDAG-NEXT: v_lshr_b64 v[10:11], v[4:5], v27
-; SDAG-NEXT: v_or_b32_e32 v1, v1, v11
-; SDAG-NEXT: v_or_b32_e32 v0, v0, v10
+; SDAG-NEXT: v_or_b32_e32 v3, v3, v11
+; SDAG-NEXT: v_or_b32_e32 v2, v2, v10
; SDAG-NEXT: v_cmp_gt_u32_e64 s[4:5], 64, v26
-; SDAG-NEXT: v_cndmask_b32_e64 v1, v9, v1, s[4:5]
-; SDAG-NEXT: v_cndmask_b32_e64 v0, v8, v0, s[4:5]
+; SDAG-NEXT: v_cndmask_b32_e64 v3, v9, v3, s[4:5]
+; SDAG-NEXT: v_cndmask_b32_e64 v2, v8, v2, s[4:5]
; SDAG-NEXT: v_cndmask_b32_e64 v9, 0, v21, s[4:5]
; SDAG-NEXT: v_cndmask_b32_e64 v8, 0, v20, s[4:5]
; SDAG-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v26
-; SDAG-NEXT: v_cndmask_b32_e64 v1, v1, v7, s[4:5]
-; SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v6, s[4:5]
+; SDAG-NEXT: v_cndmask_b32_e64 v3, v3, v7, s[4:5]
+; SDAG-NEXT: v_cndmask_b32_e64 v2, v2, v6, s[4:5]
; SDAG-NEXT: v_mov_b32_e32 v20, 0
; SDAG-NEXT: v_mov_b32_e32 v21, 0
; SDAG-NEXT: s_and_saveexec_b64 s[4:5], vcc
; SDAG-NEXT: s_xor_b64 s[8:9], exec, s[4:5]
; SDAG-NEXT: s_cbranch_execz .LBB1_11
; SDAG-NEXT: ; %bb.8: ; %udiv-preheader
-; SDAG-NEXT: v_lshr_b64 v[2:3], v[4:5], v22
-; SDAG-NEXT: v_sub_i32_e32 v27, vcc, 64, v22
-; SDAG-NEXT: v_subrev_i32_e32 v28, vcc, 64, v22
+; SDAG-NEXT: v_lshr_b64 v[10:11], v[4:5], v22
+; SDAG-NEXT: v_sub_i32_e32 v0, vcc, 64, v22
+; SDAG-NEXT: v_subrev_i32_e32 v27, vcc, 64, v22
; SDAG-NEXT: v_lshr_b64 v[29:30], v[6:7], v22
; SDAG-NEXT: v_add_i32_e32 v26, vcc, -1, v12
; SDAG-NEXT: v_mov_b32_e32 v20, 0
; SDAG-NEXT: v_mov_b32_e32 v21, 0
-; SDAG-NEXT: v_mov_b32_e32 v10, 0
-; SDAG-NEXT: v_mov_b32_e32 v11, 0
; SDAG-NEXT: s_mov_b64 s[10:11], 0
-; SDAG-NEXT: v_lshl_b64 v[31:32], v[6:7], v27
-; SDAG-NEXT: v_lshr_b64 v[6:7], v[6:7], v28
+; SDAG-NEXT: v_mov_b32_e32 v1, 0
+; SDAG-NEXT: v_lshl_b64 v[31:32], v[6:7], v0
+; SDAG-NEXT: v_lshr_b64 v[6:7], v[6:7], v27
; SDAG-NEXT: v_addc_u32_e32 v27, vcc, -1, v13, vcc
-; SDAG-NEXT: v_or_b32_e32 v3, v3, v32
-; SDAG-NEXT: v_or_b32_e32 v2, v2, v31
+; SDAG-NEXT: v_or_b32_e32 v0, v11, v32
+; SDAG-NEXT: v_or_b32_e32 v10, v10, v31
; SDAG-NEXT: v_addc_u32_e32 v28, vcc, -1, v14, vcc
; SDAG-NEXT: v_cmp_gt_u32_e64 s[4:5], 64, v22
-; SDAG-NEXT: v_cndmask_b32_e64 v3, v7, v3, s[4:5]
-; SDAG-NEXT: v_cndmask_b32_e64 v2, v6, v2, s[4:5]
-; SDAG-NEXT: v_cndmask_b32_e64 v7, 0, v30, s[4:5]
-; SDAG-NEXT: v_cndmask_b32_e64 v6, 0, v29, s[4:5]
+; SDAG-NEXT: v_cndmask_b32_e64 v0, v7, v0, s[4:5]
+; SDAG-NEXT: v_cndmask_b32_e64 v6, v6, v10, s[4:5]
+; SDAG-NEXT: v_cndmask_b32_e64 v11, 0, v30, s[4:5]
+; SDAG-NEXT: v_cndmask_b32_e64 v10, 0, v29, s[4:5]
; SDAG-NEXT: v_addc_u32_e32 v29, vcc, -1, v15, vcc
; SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v22
-; SDAG-NEXT: v_cndmask_b32_e32 v5, v3, v5, vcc
-; SDAG-NEXT: v_cndmask_b32_e32 v4, v2, v4, vcc
-; SDAG-NEXT: v_mov_b32_e32 v3, 0
+; SDAG-NEXT: v_cndmask_b32_e32 v7, v0, v5, vcc
+; SDAG-NEXT: v_cndmask_b32_e32 v6, v6, v4, vcc
+; SDAG-NEXT: v_mov_b32_e32 v4, 0
+; SDAG-NEXT: v_mov_b32_e32 v5, 0
; SDAG-NEXT: .LBB1_9: ; %udiv-do-while
; SDAG-NEXT: ; =>This Inner Loop Header: Depth=1
+; SDAG-NEXT: v_lshl_b64 v[10:11], v[10:11], 1
+; SDAG-NEXT: v_lshrrev_b32_e32 v0, 31, v7
; SDAG-NEXT: v_lshl_b64 v[6:7], v[6:7], 1
-; SDAG-NEXT: v_lshrrev_b32_e32 v2, 31, v5
-; SDAG-NEXT: v_lshl_b64 v[4:5], v[4:5], 1
-; SDAG-NEXT: v_lshrrev_b32_e32 v30, 31, v1
-; SDAG-NEXT: v_lshl_b64 v[0:1], v[0:1], 1
+; SDAG-NEXT: v_lshrrev_b32_e32 v30, 31, v3
+; SDAG-NEXT: v_lshl_b64 v[2:3], v[2:3], 1
; SDAG-NEXT: v_lshrrev_b32_e32 v31, 31, v9
; SDAG-NEXT: v_lshl_b64 v[8:9], v[8:9], 1
-; SDAG-NEXT: v_or_b32_e32 v6, v6, v2
-; SDAG-NEXT: v_or_b32_e32 v2, v4, v30
-; SDAG-NEXT: v_or_b32_e32 v0, v0, v31
-; SDAG-NEXT: v_or_b32_e32 v1, v21, v1
-; SDAG-NEXT: v_sub_i32_e32 v4, vcc, v26, v2
-; SDAG-NEXT: v_subb_u32_e32 v4, vcc, v27, v5, vcc
-; SDAG-NEXT: v_subb_u32_e32 v4, vcc, v28, v6, vcc
-; SDAG-NEXT: v_subb_u32_e32 v4, vcc, v29, v7, vcc
-; SDAG-NEXT: v_ashrrev_i32_e32 v30, 31, v4
+; SDAG-NEXT: v_or_b32_e32 v10, v10, v0
+; SDAG-NEXT: v_or_b32_e32 v0, v6, v30
+; SDAG-NEXT: v_or_b32_e32 v2, v2, v31
+; SDAG-NEXT: v_or_b32_e32 v3, v21, v3
+; SDAG-NEXT: v_sub_i32_e32 v6, vcc, v26, v0
+; SDAG-NEXT: v_subb_u32_e32 v6, vcc, v27, v7, vcc
+; SDAG-NEXT: v_subb_u32_e32 v6, vcc, v28, v10, vcc
+; SDAG-NEXT: v_subb_u32_e32 v6, vcc, v29, v11, vcc
+; SDAG-NEXT: v_ashrrev_i32_e32 v30, 31, v6
; SDAG-NEXT: v_and_b32_e32 v31, v30, v13
-; SDAG-NEXT: v_and_b32_e32 v4, v30, v12
-; SDAG-NEXT: v_sub_i32_e32 v4, vcc, v2, v4
-; SDAG-NEXT: v_subb_u32_e32 v5, vcc, v5, v31, vcc
-; SDAG-NEXT: v_or_b32_e32 v9, v11, v9
-; SDAG-NEXT: v_or_b32_e32 v0, v20, v0
-; SDAG-NEXT: v_and_b32_e32 v2, 1, v30
-; SDAG-NEXT: v_and_b32_e32 v11, v30, v15
+; SDAG-NEXT: v_and_b32_e32 v6, v30, v12
+; SDAG-NEXT: v_sub_i32_e32 v6, vcc, v0, v6
+; SDAG-NEXT: v_subb_u32_e32 v7, vcc, v7, v31, vcc
+; SDAG-NEXT: v_or_b32_e32 v9, v5, v9
+; SDAG-NEXT: v_or_b32_e32 v2, v20, v2
+; SDAG-NEXT: v_and_b32_e32 v0, 1, v30
+; SDAG-NEXT: v_and_b32_e32 v5, v30, v15
; SDAG-NEXT: v_and_b32_e32 v30, v30, v14
-; SDAG-NEXT: v_subb_u32_e32 v6, vcc, v6, v30, vcc
-; SDAG-NEXT: v_subb_u32_e32 v7, vcc, v7, v11, vcc
+; SDAG-NEXT: v_subb_u32_e32 v10, vcc, v10, v30, vcc
+; SDAG-NEXT: v_subb_u32_e32 v11, vcc, v11, v5, vcc
; SDAG-NEXT: v_add_i32_e32 v22, vcc, -1, v22
; SDAG-NEXT: v_addc_u32_e32 v23, vcc, -1, v23, vcc
; SDAG-NEXT: v_addc_u32_e32 v24, vcc, -1, v24, vcc
@@ -1165,23 +1165,23 @@ define <2 x i128> @v_udiv_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
; SDAG-NEXT: v_or_b32_e32 v30, v22, v24
; SDAG-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[30:31]
; SDAG-NEXT: s_or_b64 s[10:11], vcc, s[10:11]
-; SDAG-NEXT: v_or_b32_e32 v8, v10, v8
-; SDAG-NEXT: v_mov_b32_e32 v11, v3
-; SDAG-NEXT: v_mov_b32_e32 v10, v2
+; SDAG-NEXT: v_or_b32_e32 v8, v4, v8
+; SDAG-NEXT: v_mov_b32_e32 v5, v1
+; SDAG-NEXT: v_mov_b32_e32 v4, v0
; SDAG-NEXT: s_andn2_b64 exec, exec, s[10:11]
; SDAG-NEXT: s_cbranch_execnz .LBB1_9
; SDAG-NEXT: ; %bb.10: ; %Flow
; SDAG-NEXT: s_or_b64 exec, exec, s[10:11]
; SDAG-NEXT: .LBB1_11: ; %Flow11
; SDAG-NEXT: s_or_b64 exec, exec, s[8:9]
-; SDAG-NEXT: v_lshl_b64 v[0:1], v[0:1], 1
+; SDAG-NEXT: v_lshl_b64 v[2:3], v[2:3], 1
; SDAG-NEXT: v_lshrrev_b32_e32 v6, 31, v9
; SDAG-NEXT: v_lshl_b64 v[4:5], v[8:9], 1
-; SDAG-NEXT: v_or_b32_e32 v0, v0, v6
-; SDAG-NEXT: v_or_b32_e32 v8, v21, v1
-; SDAG-NEXT: v_or_b32_e32 v10, v3, v5
-; SDAG-NEXT: v_or_b32_e32 v9, v20, v0
-; SDAG-NEXT: v_or_b32_e32 v11, v2, v4
+; SDAG-NEXT: v_or_b32_e32 v2, v2, v6
+; SDAG-NEXT: v_or_b32_e32 v8, v21, v3
+; SDAG-NEXT: v_or_b32_e32 v10, v1, v5
+; SDAG-NEXT: v_or_b32_e32 v9, v20, v2
+; SDAG-NEXT: v_or_b32_e32 v11, v0, v4
; SDAG-NEXT: .LBB1_12: ; %Flow12
; SDAG-NEXT: s_or_b64 exec, exec, s[6:7]
; SDAG-NEXT: v_mov_b32_e32 v0, v19
@@ -1674,32 +1674,32 @@ define <2 x i128> @v_srem_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
; SDAG-NEXT: s_xor_b64 s[8:9], exec, s[4:5]
; SDAG-NEXT: s_cbranch_execz .LBB2_5
; SDAG-NEXT: ; %bb.2: ; %udiv-preheader4
-; SDAG-NEXT: v_lshr_b64 v[8:9], v[16:17], v32
-; SDAG-NEXT: v_sub_i32_e32 v26, vcc, 64, v32
+; SDAG-NEXT: v_lshr_b64 v[22:23], v[16:17], v32
+; SDAG-NEXT: v_sub_i32_e32 v8, vcc, 64, v32
; SDAG-NEXT: v_subrev_i32_e32 v37, vcc, 64, v32
; SDAG-NEXT: v_lshr_b64 v[24:25], v[0:1], v32
; SDAG-NEXT: v_add_i32_e32 v36, vcc, -1, v31
; SDAG-NEXT: v_mov_b32_e32 v18, 0
; SDAG-NEXT: v_mov_b32_e32 v19, 0
-; SDAG-NEXT: v_mov_b32_e32 v22, 0
-; SDAG-NEXT: v_mov_b32_e32 v23, 0
; SDAG-NEXT: s_mov_b64 s[10:11], 0
-; SDAG-NEXT: v_lshl_b64 v[26:27], v[0:1], v26
+; SDAG-NEXT: v_mov_b32_e32 v9, 0
+; SDAG-NEXT: v_lshl_b64 v[26:27], v[0:1], v8
; SDAG-NEXT: v_lshr_b64 v[48:49], v[0:1], v37
; SDAG-NEXT: v_addc_u32_e32 v37, vcc, -1, v30, vcc
-; SDAG-NEXT: v_or_b32_e32 v9, v9, v27
-; SDAG-NEXT: v_or_b32_e32 v8, v8, v26
+; SDAG-NEXT: v_or_b32_e32 v8, v23, v27
+; SDAG-NEXT: v_or_b32_e32 v22, v22, v26
; SDAG-NEXT: v_addc_u32_e32 v38, vcc, -1, v2, vcc
; SDAG-NEXT: v_cmp_gt_u32_e64 s[4:5], 64, v32
-; SDAG-NEXT: v_cndmask_b32_e64 v9, v49, v9, s[4:5]
-; SDAG-NEXT: v_cndmask_b32_e64 v8, v48, v8, s[4:5]
+; SDAG-NEXT: v_cndmask_b32_e64 v8, v49, v8, s[4:5]
+; SDAG-NEXT: v_cndmask_b32_e64 v22, v48, v22, s[4:5]
; SDAG-NEXT: v_cndmask_b32_e64 v27, 0, v25, s[4:5]
; SDAG-NEXT: v_cndmask_b32_e64 v26, 0, v24, s[4:5]
; SDAG-NEXT: v_addc_u32_e32 v39, vcc, -1, v3, vcc
; SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v32
-; SDAG-NEXT: v_cndmask_b32_e32 v25, v9, v17, vcc
-; SDAG-NEXT: v_cndmask_b32_e32 v24, v8, v16, vcc
-; SDAG-NEXT: v_mov_b32_e32 v9, 0
+; SDAG-NEXT: v_cndmask_b32_e32 v25, v8, v17, vcc
+; SDAG-NEXT: v_cndmask_b32_e32 v24, v22, v16, vcc
+; SDAG-NEXT: v_mov_b32_e32 v22, 0
+; SDAG-NEXT: v_mov_b32_e32 v23, 0
; SDAG-NEXT: .LBB2_3: ; %udiv-do-while3
; SDAG-NEXT: ; =>This Inner Loop Header: Depth=1
; SDAG-NEXT: v_lshrrev_b32_e32 v8, 31, v21
@@ -1813,109 +1813,109 @@ define <2 x i128> @v_srem_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
; SDAG-NEXT: v_cndmask_b32_e32 v10, v11, v10, vcc
; SDAG-NEXT: v_sub_i32_e32 v10, vcc, v10, v19
; SDAG-NEXT: v_subb_u32_e32 v11, vcc, v13, v12, vcc
-; SDAG-NEXT: v_xor_b32_e32 v14, 0x7f, v10
-; SDAG-NEXT: v_subb_u32_e32 v12, vcc, 0, v18, vcc
+; SDAG-NEXT: v_xor_b32_e32 v12, 0x7f, v10
+; SDAG-NEXT: v_subb_u32_e32 v14, vcc, 0, v18, vcc
; SDAG-NEXT: v_cmp_lt_u64_e64 s[4:5], s[10:11], v[10:11]
; SDAG-NEXT: v_cndmask_b32_e64 v19, 0, 1, s[4:5]
-; SDAG-NEXT: v_subb_u32_e32 v13, vcc, 0, v18, vcc
-; SDAG-NEXT: v_or_b32_e32 v14, v14, v12
-; SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[12:13]
+; SDAG-NEXT: v_subb_u32_e32 v15, vcc, 0, v18, vcc
+; SDAG-NEXT: v_or_b32_e32 v12, v12, v14
+; SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[14:15]
; SDAG-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc
-; SDAG-NEXT: v_or_b32_e32 v15, v11, v13
-; SDAG-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[12:13]
+; SDAG-NEXT: v_or_b32_e32 v13, v11, v15
+; SDAG-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[14:15]
; SDAG-NEXT: v_cndmask_b32_e32 v18, v18, v19, vcc
-; SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[14:15]
-; SDAG-NEXT: v_and_b32_e32 v14, 1, v18
-; SDAG-NEXT: v_cmp_eq_u32_e64 s[4:5], 1, v14
+; SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[12:13]
+; SDAG-NEXT: v_and_b32_e32 v12, 1, v18
+; SDAG-NEXT: v_cmp_eq_u32_e64 s[4:5], 1, v12
; SDAG-NEXT: s_or_b64 s[4:5], s[6:7], s[4:5]
; SDAG-NEXT: v_cndmask_b32_e64 v19, v5, 0, s[4:5]
; SDAG-NEXT: s_xor_b64 s[6:7], s[4:5], -1
; SDAG-NEXT: v_cndmask_b32_e64 v18, v4, 0, s[4:5]
-; SDAG-NEXT: v_cndmask_b32_e64 v15, v9, 0, s[4:5]
-; SDAG-NEXT: v_cndmask_b32_e64 v14, v8, 0, s[4:5]
+; SDAG-NEXT: v_cndmask_b32_e64 v13, v9, 0, s[4:5]
+; SDAG-NEXT: v_cndmask_b32_e64 v12, v8, 0, s[4:5]
; SDAG-NEXT: s_and_b64 s[4:5], s[6:7], vcc
; SDAG-NEXT: s_and_saveexec_b64 s[6:7], s[4:5]
; SDAG-NEXT: s_cbranch_execz .LBB2_12
; SDAG-NEXT: ; %bb.7: ; %udiv-bb1
; SDAG-NEXT: v_add_i32_e32 v38, vcc, 1, v10
; SDAG-NEXT: v_sub_i32_e64 v18, s[4:5], 63, v10
-; SDAG-NEXT: v_mov_b32_e32 v14, 0
-; SDAG-NEXT: v_mov_b32_e32 v15, 0
+; SDAG-NEXT: v_mov_b32_e32 v12, 0
+; SDAG-NEXT: v_mov_b32_e32 v13, 0
; SDAG-NEXT: v_addc_u32_e32 v39, vcc, 0, v11, vcc
; SDAG-NEXT: v_lshl_b64 v[18:19], v[8:9], v18
-; SDAG-NEXT: v_addc_u32_e32 v48, vcc, 0, v12, vcc
-; SDAG-NEXT: v_addc_u32_e32 v49, vcc, 0, v13, vcc
-; SDAG-NEXT: v_or_b32_e32 v11, v38, v48
-; SDAG-NEXT: v_sub_i32_e32 v13, vcc, 0x7f, v10
-; SDAG-NEXT: v_or_b32_e32 v12, v39, v49
-; SDAG-NEXT: v_lshl_b64 v[20:21], v[4:5], v13
-; SDAG-NEXT: v_sub_i32_e32 v10, vcc, 64, v13
-; SDAG-NEXT: v_lshl_b64 v[22:23], v[8:9], v13
-; SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[11:12]
-; SDAG-NEXT: v_lshr_b64 v[10:11], v[8:9], v10
-; SDAG-NEXT: v_or_b32_e32 v11, v21, v11
-; SDAG-NEXT: v_or_b32_e32 v10, v20, v10
-; SDAG-NEXT: v_cmp_gt_u32_e64 s[4:5], 64, v13
-; SDAG-NEXT: v_cndmask_b32_e64 v12, v19, v11, s[4:5]
+; SDAG-NEXT: v_addc_u32_e32 v48, vcc, 0, v14, vcc
+; SDAG-NEXT: v_addc_u32_e32 v49, vcc, 0, v15, vcc
+; SDAG-NEXT: v_or_b32_e32 v14, v38, v48
+; SDAG-NEXT: v_sub_i32_e32 v22, vcc, 0x7f, v10
+; SDAG-NEXT: v_or_b32_e32 v15, v39, v49
+; SDAG-NEXT: v_lshl_b64 v[10:11], v[4:5], v22
+; SDAG-NEXT: v_sub_i32_e32 v23, vcc, 64, v22
+; SDAG-NEXT: v_lshl_b64 v[20:21], v[8:9], v22
+; SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[14:15]
+; SDAG-NEXT: v_lshr_b64 v[14:15], v[8:9], v23
+; SDAG-NEXT: v_or_b32_e32 v11, v11, v15
+; SDAG-NEXT: v_or_b32_e32 v10, v10, v14
+; SDAG-NEXT: v_cmp_gt_u32_e64 s[4:5], 64, v22
+; SDAG-NEXT: v_cndmask_b32_e64 v14, v19, v11, s[4:5]
; SDAG-NEXT: v_cndmask_b32_e64 v18, v18, v10, s[4:5]
-; SDAG-NEXT: v_cndmask_b32_e64 v11, 0, v23, s[4:5]
-; SDAG-NEXT: v_cndmask_b32_e64 v10, 0, v22, s[4:5]
-; SDAG-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v13
-; SDAG-NEXT: v_cndmask_b32_e64 v13, v12, v5, s[4:5]
-; SDAG-NEXT: v_cndmask_b32_e64 v12, v18, v4, s[4:5]
+; SDAG-NEXT: v_cndmask_b32_e64 v11, 0, v21, s[4:5]
+; SDAG-NEXT: v_cndmask_b32_e64 v10, 0, v20, s[4:5]
+; SDAG-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v22
+; SDAG-NEXT: v_cndmask_b32_e64 v15, v14, v5, s[4:5]
+; SDAG-NEXT: v_cndmask_b32_e64 v14, v18, v4, s[4:5]
; SDAG-NEXT: v_mov_b32_e32 v18, 0
; SDAG-NEXT: v_mov_b32_e32 v19, 0
; SDAG-NEXT: s_and_saveexec_b64 s[4:5], vcc
; SDAG-NEXT: s_xor_b64 s[8:9], exec, s[4:5]
; SDAG-NEXT: s_cbranch_execz .LBB2_11
; SDAG-NEXT: ; %bb.8: ; %udiv-preheader
-; SDAG-NEXT: v_lshr_b64 v[14:15], v[8:9], v38
-; SDAG-NEXT: v_sub_i32_e32 v24, vcc, 64, v38
+; SDAG-NEXT: v_lshr_b64 v[20:21], v[8:9], v38
+; SDAG-NEXT: v_sub_i32_e32 v12, vcc, 64, v38
; SDAG-NEXT: v_subrev_i32_e32 v51, vcc, 64, v38
; SDAG-NEXT: v_lshr_b64 v[22:23], v[4:5], v38
; SDAG-NEXT: v_add_i32_e32 v50, vcc, -1, v37
; SDAG-NEXT: v_mov_b32_e32 v18, 0
; SDAG-NEXT: v_mov_b32_e32 v19, 0
-; SDAG-NEXT: v_mov_b32_e32 v20, 0
-; SDAG-NEXT: v_mov_b32_e32 v21, 0
; SDAG-NEXT: s_mov_b64 s[10:11], 0
-; SDAG-NEXT: v_lshl_b64 v[24:25], v[4:5], v24
+; SDAG-NEXT: v_mov_b32_e32 v13, 0
+; SDAG-NEXT: v_lshl_b64 v[24:25], v[4:5], v12
; SDAG-NEXT: v_lshr_b64 v[53:54], v[4:5], v51
; SDAG-NEXT: v_addc_u32_e32 v51, vcc, -1, v36, vcc
-; SDAG-NEXT: v_or_b32_e32 v15, v15, v25
-; SDAG-NEXT: v_or_b32_e32 v14, v14, v24
+; SDAG-NEXT: v_or_b32_e32 v12, v21, v25
+; SDAG-NEXT: v_or_b32_e32 v20, v20, v24
; SDAG-NEXT: v_addc_u32_e32 v52, vcc, -1, v6, vcc
; SDAG-NEXT: v_cmp_gt_u32_e64 s[4:5], 64, v38
-; SDAG-NEXT: v_cndmask_b32_e64 v15, v54, v15, s[4:5]
-; SDAG-NEXT: v_cndmask_b32_e64 v14, v53, v14, s[4:5]
+; SDAG-NEXT: v_cndmask_b32_e64 v12, v54, v12, s[4:5]
+; SDAG-NEXT: v_cndmask_b32_e64 v20, v53, v20, s[4:5]
; SDAG-NEXT: v_cndmask_b32_e64 v25, 0, v23, s[4:5]
; SDAG-NEXT: v_cndmask_b32_e64 v24, 0, v22, s[4:5]
; SDAG-NEXT: v_addc_u32_e32 v53, vcc, -1, v7, vcc
; SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v38
-; SDAG-NEXT: v_cndmask_b32_e32 v23, v15, v9, vcc
-; SDAG-NEXT: v_cndmask_b32_e32 v22, v14, v8, vcc
-; SDAG-NEXT: v_mov_b32_e32 v15, 0
+; SDAG-NEXT: v_cndmask_b32_e32 v23, v12, v9, vcc
+; SDAG-NEXT: v_cndmask_b32_e32 v22, v20, v8, vcc
+; SDAG-NEXT: v_mov_b32_e32 v20, 0
+; SDAG-NEXT: v_mov_b32_e32 v21, 0
; SDAG-NEXT: .LBB2_9: ; %udiv-do-while
; SDAG-NEXT: ; =>This Inner Loop Header: Depth=1
; SDAG-NEXT: v_lshl_b64 v[24:25], v[24:25], 1
-; SDAG-NEXT: v_lshrrev_b32_e32 v14, 31, v23
+; SDAG-NEXT: v_lshrrev_b32_e32 v12, 31, v23
; SDAG-NEXT: v_lshl_b64 v[22:23], v[22:23], 1
-; SDAG-NEXT: v_lshrrev_b32_e32 v54, 31, v13
-; SDAG-NEXT: v_lshl_b64 v[12:13], v[12:13], 1
+; SDAG-NEXT: v_lshrrev_b32_e32 v54, 31, v15
+; SDAG-NEXT: v_lshl_b64 v[14:15], v[14:15], 1
; SDAG-NEXT: v_lshrrev_b32_e32 v55, 31, v11
; SDAG-NEXT: v_lshl_b64 v[10:11], v[10:11], 1
-; SDAG-NEXT: v_or_b32_e32 v24, v24, v14
+; SDAG-NEXT: v_or_b32_e32 v24, v24, v12
; SDAG-NEXT: v_or_b32_e32 v22, v22, v54
-; SDAG-NEXT: v_or_b32_e32 v12, v12, v55
-; SDAG-NEXT: v_or_b32_e32 v13, v19, v13
+; SDAG-NEXT: v_or_b32_e32 v12, v14, v55
+; SDAG-NEXT: v_or_b32_e32 v15, v19, v15
; SDAG-NEXT: v_or_b32_e32 v11, v21, v11
-; SDAG-NEXT: v_or_b32_e32 v12, v18, v12
-; SDAG-NEXT: v_sub_i32_e32 v14, vcc, v50, v22
-; SDAG-NEXT: v_subb_u32_e32 v14, vcc, v51, v23, vcc
-; SDAG-NEXT: v_subb_u32_e32 v14, vcc, v52, v24, vcc
-; SDAG-NEXT: v_subb_u32_e32 v14, vcc, v53, v25, vcc
-; SDAG-NEXT: v_ashrrev_i32_e32 v21, 31, v14
-; SDAG-NEXT: v_and_b32_e32 v14, 1, v21
+; SDAG-NEXT: v_or_b32_e32 v14, v18, v12
+; SDAG-NEXT: v_sub_i32_e32 v12, vcc, v50, v22
+; SDAG-NEXT: v_subb_u32_e32 v12, vcc, v51, v23, vcc
+; SDAG-NEXT: v_subb_u32_e32 v12, vcc, v52, v24, vcc
+; SDAG-NEXT: v_subb_u32_e32 v12, vcc, v53, v25, vcc
+; SDAG-NEXT: v_ashrrev_i32_e32 v21, 31, v12
+; SDAG-NEXT: v_and_b32_e32 v12, 1, v21
; SDAG-NEXT: v_and_b32_e32 v54, v21, v7
; SDAG-NEXT: v_and_b32_e32 v55, v21, v6
; SDAG-NEXT: v_and_b32_e32 v40, v21, v36
@@ -1933,83 +1933,83 @@ define <2 x i128> @v_srem_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
; SDAG-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[54:55]
; SDAG-NEXT: s_or_b64 s[10:11], vcc, s[10:11]
; SDAG-NEXT: v_or_b32_e32 v10, v20, v10
-; SDAG-NEXT: v_mov_b32_e32 v21, v15
-; SDAG-NEXT: v_mov_b32_e32 v20, v14
+; SDAG-NEXT: v_mov_b32_e32 v21, v13
+; SDAG-NEXT: v_mov_b32_e32 v20, v12
; SDAG-NEXT: s_andn2_b64 exec, exec, s[10:11]
; SDAG-NEXT: s_cbranch_execnz .LBB2_9
; SDAG-NEXT: ; %bb.10: ; %Flow
; SDAG-NEXT: s_or_b64 exec, exec, s[10:11]
; SDAG-NEXT: .LBB2_11: ; %Flow11
; SDAG-NEXT: s_or_b64 exec, exec, s[8:9]
-; SDAG-NEXT: v_lshl_b64 v[12:13], v[12:13], 1
+; SDAG-NEXT: v_lshl_b64 v[14:15], v[14:15], 1
; SDAG-NEXT: v_lshrrev_b32_e32 v20, 31, v11
; SDAG-NEXT: v_lshl_b64 v[10:11], v[10:11], 1
-; SDAG-NEXT: v_or_b32_e32 v12, v12, v20
-; SDAG-NEXT: v_or_b32_e32 v19, v19, v13
-; SDAG-NEXT: v_or_b32_e32 v15, v15, v11
-; SDAG-NEXT: v_or_b32_e32 v18, v18, v12
-; SDAG-NEXT: v_or_b32_e32 v14, v14, v10
+; SDAG-NEXT: v_or_b32_e32 v14, v14, v20
+; SDAG-NEXT: v_or_b32_e32 v19, v19, v15
+; SDAG-NEXT: v_or_b32_e32 v13, v13, v11
+; SDAG-NEXT: v_or_b32_e32 v18, v18, v14
+; SDAG-NEXT: v_or_b32_e32 v12, v12, v10
; SDAG-NEXT: .LBB2_12: ; %Flow12
; SDAG-NEXT: s_or_b64 exec, exec, s[6:7]
-; SDAG-NEXT: v_mul_lo_u32 v12, v33, v3
+; SDAG-NEXT: v_mul_lo_u32 v14, v33, v3
; SDAG-NEXT: v_mad_u64_u32 v[10:11], s[4:5], v33, v2, 0
; SDAG-NEXT: v_mul_lo_u32 v24, v27, v2
; SDAG-NEXT: v_mul_lo_u32 v25, v34, v31
; SDAG-NEXT: v_mul_lo_u32 v34, v32, v30
; SDAG-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v31, v33, 0
-; SDAG-NEXT: v_mov_b32_e32 v13, 0
-; SDAG-NEXT: v_mul_lo_u32 v38, v14, v7
-; SDAG-NEXT: v_mad_u64_u32 v[20:21], s[4:5], v14, v6, 0
-; SDAG-NEXT: v_mul_lo_u32 v39, v15, v6
-; SDAG-NEXT: v_mul_lo_u32 v48, v19, v37
-; SDAG-NEXT: v_mul_lo_u32 v49, v18, v36
-; SDAG-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v37, v14, 0
-; SDAG-NEXT: v_add_i32_e32 v11, vcc, v11, v12
-; SDAG-NEXT: v_mov_b32_e32 v12, v3
-; SDAG-NEXT: v_mad_u64_u32 v[22:23], s[4:5], v30, v33, v[12:13]
+; SDAG-NEXT: v_mov_b32_e32 v15, 0
+; SDAG-NEXT: v_mul_lo_u32 v38, v12, v7
+; SDAG-NEXT: v_mad_u64_u32 v[20:21], s[4:5], v12, v6, 0
+; SDAG-NEXT: v_mul_lo_u32 v39, v13, v6
+; SDAG-NEXT: v_mul_lo_u32 v19, v19, v37
+; SDAG-NEXT: v_mul_lo_u32 v48, v18, v36
+; SDAG-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v37, v12, 0
+; SDAG-NEXT: v_add_i32_e32 v11, vcc, v11, v14
+; SDAG-NEXT: v_mov_b32_e32 v14, v3
+; SDAG-NEXT: v_mad_u64_u32 v[22:23], s[4:5], v30, v33, v[14:15]
; SDAG-NEXT: v_sub_i32_e32 v16, vcc, v16, v2
-; SDAG-NEXT: v_add_i32_e64 v19, s[4:5], v21, v38
+; SDAG-NEXT: v_add_i32_e64 v21, s[4:5], v21, v38
; SDAG-NEXT: v_add_i32_e64 v11, s[4:5], v11, v24
-; SDAG-NEXT: v_mov_b32_e32 v12, v22
-; SDAG-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v31, v27, v[12:13]
+; SDAG-NEXT: v_mov_b32_e32 v14, v22
+; SDAG-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v31, v27, v[14:15]
; SDAG-NEXT: v_xor_b32_e32 v24, v16, v28
-; SDAG-NEXT: v_add_i32_e64 v21, s[4:5], v19, v39
+; SDAG-NEXT: v_add_i32_e64 v21, s[4:5], v21, v39
; SDAG-NEXT: v_mad_u64_u32 v[10:11], s[4:5], v32, v31, v[10:11]
; SDAG-NEXT: v_add_i32_e64 v22, s[4:5], v23, v3
; SDAG-NEXT: v_addc_u32_e64 v23, s[4:5], 0, 0, s[4:5]
; SDAG-NEXT: v_subb_u32_e32 v31, vcc, v17, v2, vcc
; SDAG-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v18, v37, v[20:21]
-; SDAG-NEXT: v_mov_b32_e32 v12, v7
-; SDAG-NEXT: v_mad_u64_u32 v[16:17], s[4:5], v36, v14, v[12:13]
+; SDAG-NEXT: v_mov_b32_e32 v14, v7
+; SDAG-NEXT: v_mad_u64_u32 v[16:17], s[4:5], v36, v12, v[14:15]
; SDAG-NEXT: v_add_i32_e64 v7, s[4:5], v25, v11
-; SDAG-NEXT: v_mad_u64_u32 v[18:19], s[4:5], v30, v27, v[22:23]
-; SDAG-NEXT: v_xor_b32_e32 v14, v31, v29
-; SDAG-NEXT: v_add_i32_e64 v3, s[4:5], v48, v3
-; SDAG-NEXT: v_mov_b32_e32 v12, v16
-; SDAG-NEXT: v_mad_u64_u32 v[11:12], s[4:5], v37, v15, v[12:13]
+; SDAG-NEXT: v_mad_u64_u32 v[11:12], s[4:5], v30, v27, v[22:23]
+; SDAG-NEXT: v_xor_b32_e32 v18, v31, v29
+; SDAG-NEXT: v_add_i32_e64 v3, s[4:5], v19, v3
+; SDAG-NEXT: v_mov_b32_e32 v14, v16
+; SDAG-NEXT: v_mad_u64_u32 v[14:15], s[4:5], v37, v13, v[14:15]
; SDAG-NEXT: v_add_i32_e64 v7, s[4:5], v34, v7
-; SDAG-NEXT: v_add_i32_e64 v3, s[4:5], v49, v3
-; SDAG-NEXT: v_add_i32_e64 v12, s[4:5], v17, v12
-; SDAG-NEXT: v_addc_u32_e64 v13, s[4:5], 0, 0, s[4:5]
-; SDAG-NEXT: v_add_i32_e64 v10, s[4:5], v18, v10
-; SDAG-NEXT: v_addc_u32_e64 v7, s[4:5], v19, v7, s[4:5]
+; SDAG-NEXT: v_add_i32_e64 v3, s[4:5], v48, v3
+; SDAG-NEXT: v_add_i32_e64 v15, s[4:5], v17, v15
+; SDAG-NEXT: v_addc_u32_e64 v16, s[4:5], 0, 0, s[4:5]
+; SDAG-NEXT: v_add_i32_e64 v10, s[4:5], v11, v10
+; SDAG-NEXT: v_addc_u32_e64 v7, s[4:5], v12, v7, s[4:5]
; SDAG-NEXT: v_subb_u32_e32 v0, vcc, v0, v10, vcc
-; SDAG-NEXT: v_mad_u64_u32 v[12:13], s[4:5], v36, v15, v[12:13]
+; SDAG-NEXT: v_mad_u64_u32 v[10:11], s[4:5], v36, v13, v[15:16]
; SDAG-NEXT: v_subb_u32_e32 v1, vcc, v1, v7, vcc
; SDAG-NEXT: v_xor_b32_e32 v7, v0, v28
-; SDAG-NEXT: v_add_i32_e32 v10, vcc, v12, v2
-; SDAG-NEXT: v_addc_u32_e32 v12, vcc, v13, v3, vcc
+; SDAG-NEXT: v_add_i32_e32 v10, vcc, v10, v2
+; SDAG-NEXT: v_addc_u32_e32 v11, vcc, v11, v3, vcc
; SDAG-NEXT: v_xor_b32_e32 v3, v1, v29
; SDAG-NEXT: v_sub_i32_e32 v0, vcc, v24, v28
-; SDAG-NEXT: v_subb_u32_e32 v1, vcc, v14, v29, vcc
+; SDAG-NEXT: v_subb_u32_e32 v1, vcc, v18, v29, vcc
; SDAG-NEXT: v_subb_u32_e32 v2, vcc, v7, v28, vcc
; SDAG-NEXT: v_subb_u32_e32 v3, vcc, v3, v29, vcc
; SDAG-NEXT: v_sub_i32_e32 v6, vcc, v8, v6
-; SDAG-NEXT: v_subb_u32_e32 v7, vcc, v9, v11, vcc
+; SDAG-NEXT: v_subb_u32_e32 v7, vcc, v9, v14, vcc
; SDAG-NEXT: v_xor_b32_e32 v6, v6, v26
; SDAG-NEXT: v_subb_u32_e32 v4, vcc, v4, v10, vcc
; SDAG-NEXT: v_xor_b32_e32 v7, v7, v35
-; SDAG-NEXT: v_subb_u32_e32 v5, vcc, v5, v12, vcc
+; SDAG-NEXT: v_subb_u32_e32 v5, vcc, v5, v11, vcc
; SDAG-NEXT: v_xor_b32_e32 v8, v4, v26
; SDAG-NEXT: v_xor_b32_e32 v9, v5, v35
; SDAG-NEXT: v_sub_i32_e32 v4, vcc, v6, v26
@@ -2557,32 +2557,32 @@ define <2 x i128> @v_urem_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
; SDAG-NEXT: s_xor_b64 s[8:9], exec, s[4:5]
; SDAG-NEXT: s_cbranch_execz .LBB3_5
; SDAG-NEXT: ; %bb.2: ; %udiv-preheader4
-; SDAG-NEXT: v_lshr_b64 v[16:17], v[0:1], v30
-; SDAG-NEXT: v_sub_i32_e32 v28, vcc, 64, v30
+; SDAG-NEXT: v_lshr_b64 v[24:25], v[0:1], v30
+; SDAG-NEXT: v_sub_i32_e32 v16, vcc, 64, v30
; SDAG-NEXT: v_subrev_i32_e32 v35, vcc, 64, v30
; SDAG-NEXT: v_lshr_b64 v[26:27], v[2:3], v30
; SDAG-NEXT: v_add_i32_e32 v34, vcc, -1, v8
; SDAG-NEXT: v_mov_b32_e32 v20, 0
; SDAG-NEXT: v_mov_b32_e32 v21, 0
-; SDAG-NEXT: v_mov_b32_e32 v24, 0
-; SDAG-NEXT: v_mov_b32_e32 v25, 0
; SDAG-NEXT: s_mov_b64 s[10:11], 0
-; SDAG-NEXT: v_lshl_b64 v[28:29], v[2:3], v28
+; SDAG-NEXT: v_mov_b32_e32 v17, 0
+; SDAG-NEXT: v_lshl_b64 v[28:29], v[2:3], v16
; SDAG-NEXT: v_lshr_b64 v[37:38], v[2:3], v35
; SDAG-NEXT: v_addc_u32_e32 v35, vcc, -1, v9, vcc
-; SDAG-NEXT: v_or_b32_e32 v17, v17, v29
-; SDAG-NEXT: v_or_b32_e32 v16, v16, v28
+; SDAG-NEXT: v_or_b32_e32 v16, v25, v29
+; SDAG-NEXT: v_or_b32_e32 v24, v24, v28
; SDAG-NEXT: v_addc_u32_e32 v36, vcc, -1, v10, vcc
; SDAG-NEXT: v_cmp_gt_u32_e64 s[4:5], 64, v30
-; SDAG-NEXT: v_cndmask_b32_e64 v17, v38, v17, s[4:5]
-; SDAG-NEXT: v_cndmask_b32_e64 v16, v37, v16, s[4:5]
+; SDAG-NEXT: v_cndmask_b32_e64 v16, v38, v16, s[4:5]
+; SDAG-NEXT: v_cndmask_b32_e64 v24, v37, v24, s[4:5]
; SDAG-NEXT: v_cndmask_b32_e64 v29, 0, v27, s[4:5]
; SDAG-NEXT: v_cndmask_b32_e64 v28, 0, v26, s[4:5]
; SDAG-NEXT: v_addc_u32_e32 v37, vcc, -1, v11, vcc
; SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v30
-; SDAG-NEXT: v_cndmask_b32_e32 v27, v17, v1, vcc
-; SDAG-NEXT: v_cndmask_b32_e32 v26, v16, v0, vcc
-; SDAG-NEXT: v_mov_b32_e32 v17, 0
+; SDAG-NEXT: v_cndmask_b32_e32 v27, v16, v1, vcc
+; SDAG-NEXT: v_cndmask_b32_e32 v26, v24, v0, vcc
+; SDAG-NEXT: v_mov_b32_e32 v24, 0
+; SDAG-NEXT: v_mov_b32_e32 v25, 0
; SDAG-NEXT: .LBB3_3: ; %udiv-do-while3
; SDAG-NEXT: ; =>This Inner Loop Header: Depth=1
; SDAG-NEXT: v_lshrrev_b32_e32 v16, 31, v23
@@ -2674,108 +2674,108 @@ define <2 x i128> @v_urem_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
; SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[6:7]
; SDAG-NEXT: v_cndmask_b32_e64 v17, v21, 0, vcc
; SDAG-NEXT: v_cndmask_b32_e32 v18, v19, v18, vcc
-; SDAG-NEXT: v_sub_i32_e32 v16, vcc, v16, v18
-; SDAG-NEXT: v_subb_u32_e32 v17, vcc, v20, v17, vcc
-; SDAG-NEXT: v_xor_b32_e32 v18, 0x7f, v16
+; SDAG-NEXT: v_sub_i32_e32 v18, vcc, v16, v18
+; SDAG-NEXT: v_subb_u32_e32 v19, vcc, v20, v17, vcc
+; SDAG-NEXT: v_xor_b32_e32 v16, 0x7f, v18
; SDAG-NEXT: v_subb_u32_e32 v20, vcc, 0, v28, vcc
-; SDAG-NEXT: v_cmp_lt_u64_e64 s[4:5], s[8:9], v[16:17]
+; SDAG-NEXT: v_cmp_lt_u64_e64 s[4:5], s[8:9], v[18:19]
; SDAG-NEXT: v_cndmask_b32_e64 v22, 0, 1, s[4:5]
; SDAG-NEXT: v_subb_u32_e32 v21, vcc, 0, v28, vcc
-; SDAG-NEXT: v_or_b32_e32 v18, v18, v20
+; SDAG-NEXT: v_or_b32_e32 v16, v16, v20
; SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[20:21]
; SDAG-NEXT: v_cndmask_b32_e64 v23, 0, 1, vcc
-; SDAG-NEXT: v_or_b32_e32 v19, v17, v21
+; SDAG-NEXT: v_or_b32_e32 v17, v19, v21
; SDAG-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[20:21]
; SDAG-NEXT: v_cndmask_b32_e32 v22, v23, v22, vcc
-; SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[18:19]
-; SDAG-NEXT: v_and_b32_e32 v18, 1, v22
-; SDAG-NEXT: v_cmp_eq_u32_e64 s[4:5], 1, v18
+; SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[16:17]
+; SDAG-NEXT: v_and_b32_e32 v16, 1, v22
+; SDAG-NEXT: v_cmp_eq_u32_e64 s[4:5], 1, v16
; SDAG-NEXT: s_or_b64 s[4:5], s[6:7], s[4:5]
; SDAG-NEXT: v_cndmask_b32_e64 v23, v7, 0, s[4:5]
; SDAG-NEXT: s_xor_b64 s[6:7], s[4:5], -1
; SDAG-NEXT: v_cndmask_b32_e64 v22, v6, 0, s[4:5]
-; SDAG-NEXT: v_cndmask_b32_e64 v19, v5, 0, s[4:5]
-; SDAG-NEXT: v_cndmask_b32_e64 v18, v4, 0, s[4:5]
+; SDAG-NEXT: v_cndmask_b32_e64 v17, v5, 0, s[4:5]
+; SDAG-NEXT: v_cndmask_b32_e64 v16, v4, 0, s[4:5]
; SDAG-NEXT: s_and_b64 s[4:5], s[6:7], vcc
; SDAG-NEXT: s_and_saveexec_b64 s[6:7], s[4:5]
; SDAG-NEXT: s_cbranch_execz .LBB3_12
; SDAG-NEXT: ; %bb.7: ; %udiv-bb1
-; SDAG-NEXT: v_add_i32_e32 v34, vcc, 1, v16
-; SDAG-NEXT: v_sub_i32_e64 v22, s[4:5], 63, v16
-; SDAG-NEXT: v_mov_b32_e32 v18, 0
-; SDAG-NEXT: v_mov_b32_e32 v19, 0
-; SDAG-NEXT: v_addc_u32_e32 v35, vcc, 0, v17, vcc
+; SDAG-NEXT: v_add_i32_e32 v34, vcc, 1, v18
+; SDAG-NEXT: v_sub_i32_e64 v22, s[4:5], 63, v18
+; SDAG-NEXT: v_mov_b32_e32 v16, 0
+; SDAG-NEXT: v_mov_b32_e32 v17, 0
+; SDAG-NEXT: v_addc_u32_e32 v35, vcc, 0, v19, vcc
; SDAG-NEXT: v_lshl_b64 v[22:23], v[4:5], v22
; SDAG-NEXT: v_addc_u32_e32 v36, vcc, 0, v20, vcc
; SDAG-NEXT: v_addc_u32_e32 v37, vcc, 0, v21, vcc
-; SDAG-NEXT: v_or_b32_e32 v20, v34, v36
-; SDAG-NEXT: v_sub_i32_e32 v26, vcc, 0x7f, v16
-; SDAG-NEXT: v_or_b32_e32 v21, v35, v37
-; SDAG-NEXT: v_lshl_b64 v[16:17], v[6:7], v26
-; SDAG-NEXT: v_sub_i32_e32 v27, vcc, 64, v26
-; SDAG-NEXT: v_lshl_b64 v[24:25], v[4:5], v26
-; SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[20:21]
-; SDAG-NEXT: v_lshr_b64 v[20:21], v[4:5], v27
-; SDAG-NEXT: v_or_b32_e32 v17, v17, v21
-; SDAG-NEXT: v_or_b32_e32 v16, v16, v20
-; SDAG-NEXT: v_cmp_gt_u32_e64 s[4:5], 64, v26
-; SDAG-NEXT: v_cndmask_b32_e64 v17, v23, v17, s[4:5]
-; SDAG-NEXT: v_cndmask_b32_e64 v16, v22, v16, s[4:5]
-; SDAG-NEXT: v_cndmask_b32_e64 v21, 0, v25, s[4:5]
-; SDAG-NEXT: v_cndmask_b32_e64 v20, 0, v24, s[4:5]
-; SDAG-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v26
-; SDAG-NEXT: v_cndmask_b32_e64 v17, v17, v7, s[4:5]
-; SDAG-NEXT: v_cndmask_b32_e64 v16, v16, v6, s[4:5]
+; SDAG-NEXT: v_or_b32_e32 v19, v34, v36
+; SDAG-NEXT: v_sub_i32_e32 v28, vcc, 0x7f, v18
+; SDAG-NEXT: v_or_b32_e32 v20, v35, v37
+; SDAG-NEXT: v_lshl_b64 v[24:25], v[6:7], v28
+; SDAG-NEXT: v_sub_i32_e32 v18, vcc, 64, v28
+; SDAG-NEXT: v_lshl_b64 v[26:27], v[4:5], v28
+; SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[19:20]
+; SDAG-NEXT: v_lshr_b64 v[18:19], v[4:5], v18
+; SDAG-NEXT: v_or_b32_e32 v19, v25, v19
+; SDAG-NEXT: v_or_b32_e32 v18, v24, v18
+; SDAG-NEXT: v_cmp_gt_u32_e64 s[4:5], 64, v28
+; SDAG-NEXT: v_cndmask_b32_e64 v19, v23, v19, s[4:5]
+; SDAG-NEXT: v_cndmask_b32_e64 v18, v22, v18, s[4:5]
+; SDAG-NEXT: v_cndmask_b32_e64 v21, 0, v27, s[4:5]
+; SDAG-NEXT: v_cndmask_b32_e64 v20, 0, v26, s[4:5]
+; SDAG-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v28
+; SDAG-NEXT: v_cndmask_b32_e64 v19, v19, v7, s[4:5]
+; SDAG-NEXT: v_cndmask_b32_e64 v18, v18, v6, s[4:5]
; SDAG-NEXT: v_mov_b32_e32 v22, 0
; SDAG-NEXT: v_mov_b32_e32 v23, 0
; SDAG-NEXT: s_and_saveexec_b64 s[4:5], vcc
; SDAG-NEXT: s_xor_b64 s[8:9], exec, s[4:5]
; SDAG-NEXT: s_cbranch_execz .LBB3_11
; SDAG-NEXT: ; %bb.8: ; %udiv-preheader
-; SDAG-NEXT: v_lshr_b64 v[18:19], v[4:5], v34
-; SDAG-NEXT: v_sub_i32_e32 v28, vcc, 64, v34
+; SDAG-NEXT: v_lshr_b64 v[24:25], v[4:5], v34
+; SDAG-NEXT: v_sub_i32_e32 v16, vcc, 64, v34
; SDAG-NEXT: v_subrev_i32_e32 v39, vcc, 64, v34
; SDAG-NEXT: v_lshr_b64 v[26:27], v[6:7], v34
; SDAG-NEXT: v_add_i32_e32 v38, vcc, -1, v12
; SDAG-NEXT: v_mov_b32_e32 v22, 0
; SDAG-NEXT: v_mov_b32_e32 v23, 0
-; SDAG-NEXT: v_mov_b32_e32 v24, 0
-; SDAG-NEXT: v_mov_b32_e32 v25, 0
; SDAG-NEXT: s_mov_b64 s[10:11], 0
-; SDAG-NEXT: v_lshl_b64 v[28:29], v[6:7], v28
+; SDAG-NEXT: v_mov_b32_e32 v17, 0
+; SDAG-NEXT: v_lshl_b64 v[28:29], v[6:7], v16
; SDAG-NEXT: v_lshr_b64 v[49:50], v[6:7], v39
; SDAG-NEXT: v_addc_u32_e32 v39, vcc, -1, v13, vcc
-; SDAG-NEXT: v_or_b32_e32 v19, v19, v29
-; SDAG-NEXT: v_or_b32_e32 v18, v18, v28
+; SDAG-NEXT: v_or_b32_e32 v16, v25, v29
+; SDAG-NEXT: v_or_b32_e32 v24, v24, v28
; SDAG-NEXT: v_addc_u32_e32 v48, vcc, -1, v14, vcc
; SDAG-NEXT: v_cmp_gt_u32_e64 s[4:5], 64, v34
-; SDAG-NEXT: v_cndmask_b32_e64 v19, v50, v19, s[4:5]
-; SDAG-NEXT: v_cndmask_b32_e64 v18, v49, v18, s[4:5]
+; SDAG-NEXT: v_cndmask_b32_e64 v16, v50, v16, s[4:5]
+; SDAG-NEXT: v_cndmask_b32_e64 v24, v49, v24, s[4:5]
; SDAG-NEXT: v_cndmask_b32_e64 v29, 0, v27, s[4:5]
; SDAG-NEXT: v_cndmask_b32_e64 v28, 0, v26, s[4:5]
; SDAG-NEXT: v_addc_u32_e32 v49, vcc, -1, v15, vcc
; SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v34
-; SDAG-NEXT: v_cndmask_b32_e32 v27, v19, v5, vcc
-; SDAG-NEXT: v_cndmask_b32_e32 v26, v18, v4, vcc
-; SDAG-NEXT: v_mov_b32_e32 v19, 0
+; SDAG-NEXT: v_cndmask_b32_e32 v27, v16, v5, vcc
+; SDAG-NEXT: v_cndmask_b32_e32 v26, v24, v4, vcc
+; SDAG-NEXT: v_mov_b32_e32 v24, 0
+; SDAG-NEXT: v_mov_b32_e32 v25, 0
; SDAG-NEXT: .LBB3_9: ; %udiv-do-while
; SDAG-NEXT: ; =>This Inner Loop Header: Depth=1
; SDAG-NEXT: v_lshl_b64 v[28:29], v[28:29], 1
-; SDAG-NEXT: v_lshrrev_b32_e32 v18, 31, v27
+; SDAG-NEXT: v_lshrrev_b32_e32 v16, 31, v27
; SDAG-NEXT: v_lshl_b64 v[26:27], v[26:27], 1
-; SDAG-NEXT: v_lshrrev_b32_e32 v50, 31, v17
-; SDAG-NEXT: v_lshl_b64 v[16:17], v[16:17], 1
+; SDAG-NEXT: v_lshrrev_b32_e32 v50, 31, v19
+; SDAG-NEXT: v_lshl_b64 v[18:19], v[18:19], 1
; SDAG-NEXT: v_lshrrev_b32_e32 v51, 31, v21
; SDAG-NEXT: v_lshl_b64 v[20:21], v[20:21], 1
-; SDAG-NEXT: v_or_b32_e32 v18, v28, v18
+; SDAG-NEXT: v_or_b32_e32 v16, v28, v16
; SDAG-NEXT: v_or_b32_e32 v26, v26, v50
-; SDAG-NEXT: v_or_b32_e32 v16, v16, v51
-; SDAG-NEXT: v_or_b32_e32 v17, v23, v17
+; SDAG-NEXT: v_or_b32_e32 v18, v18, v51
+; SDAG-NEXT: v_or_b32_e32 v19, v23, v19
; SDAG-NEXT: v_or_b32_e32 v21, v25, v21
; SDAG-NEXT: v_sub_i32_e32 v25, vcc, v38, v26
-; SDAG-NEXT: v_or_b32_e32 v16, v22, v16
+; SDAG-NEXT: v_or_b32_e32 v18, v22, v18
; SDAG-NEXT: v_subb_u32_e32 v25, vcc, v39, v27, vcc
-; SDAG-NEXT: v_subb_u32_e32 v25, vcc, v48, v18, vcc
+; SDAG-NEXT: v_subb_u32_e32 v25, vcc, v48, v16, vcc
; SDAG-NEXT: v_subb_u32_e32 v25, vcc, v49, v29, vcc
; SDAG-NEXT: v_ashrrev_i32_e32 v25, 31, v25
; SDAG-NEXT: v_and_b32_e32 v28, v25, v12
@@ -2784,7 +2784,7 @@ define <2 x i128> @v_urem_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
; SDAG-NEXT: v_and_b32_e32 v52, v25, v15
; SDAG-NEXT: v_sub_i32_e32 v26, vcc, v26, v28
; SDAG-NEXT: v_subb_u32_e32 v27, vcc, v27, v50, vcc
-; SDAG-NEXT: v_subb_u32_e32 v28, vcc, v18, v51, vcc
+; SDAG-NEXT: v_subb_u32_e32 v28, vcc, v16, v51, vcc
; SDAG-NEXT: v_subb_u32_e32 v29, vcc, v29, v52, vcc
; SDAG-NEXT: v_add_i32_e32 v34, vcc, -1, v34
; SDAG-NEXT: v_addc_u32_e32 v35, vcc, -1, v35, vcc
@@ -2793,69 +2793,69 @@ define <2 x i128> @v_urem_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
; SDAG-NEXT: v_or_b32_e32 v50, v34, v36
; SDAG-NEXT: v_or_b32_e32 v51, v35, v37
; SDAG-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[50:51]
-; SDAG-NEXT: v_and_b32_e32 v18, 1, v25
+; SDAG-NEXT: v_and_b32_e32 v16, 1, v25
; SDAG-NEXT: s_or_b64 s[10:11], vcc, s[10:11]
; SDAG-NEXT: v_or_b32_e32 v20, v24, v20
-; SDAG-NEXT: v_mov_b32_e32 v25, v19
-; SDAG-NEXT: v_mov_b32_e32 v24, v18
+; SDAG-NEXT: v_mov_b32_e32 v25, v17
+; SDAG-NEXT: v_mov_b32_e32 v24, v16
; SDAG-NEXT: s_andn2_b64 exec, exec, s[10:11]
; SDAG-NEXT: s_cbranch_execnz .LBB3_9
; SDAG-NEXT: ; %bb.10: ; %Flow
; SDAG-NEXT: s_or_b64 exec, exec, s[10:11]
; SDAG-NEXT: .LBB3_11: ; %Flow11
; SDAG-NEXT: s_or_b64 exec, exec, s[8:9]
-; SDAG-NEXT: v_lshl_b64 v[16:17], v[16:17], 1
+; SDAG-NEXT: v_lshl_b64 v[18:19], v[18:19], 1
; SDAG-NEXT: v_lshrrev_b32_e32 v24, 31, v21
; SDAG-NEXT: v_lshl_b64 v[20:21], v[20:21], 1
-; SDAG-NEXT: v_or_b32_e32 v16, v16, v24
-; SDAG-NEXT: v_or_b32_e32 v23, v23, v17
-; SDAG-NEXT: v_or_b32_e32 v19, v19, v21
-; SDAG-NEXT: v_or_b32_e32 v22, v22, v16
-; SDAG-NEXT: v_or_b32_e32 v18, v18, v20
+; SDAG-NEXT: v_or_b32_e32 v18, v18, v24
+; SDAG-NEXT: v_or_b32_e32 v23, v23, v19
+; SDAG-NEXT: v_or_b32_e32 v17, v17, v21
+; SDAG-NEXT: v_or_b32_e32 v22, v22, v18
+; SDAG-NEXT: v_or_b32_e32 v16, v16, v20
; SDAG-NEXT: .LBB3_12: ; %Flow12
; SDAG-NEXT: s_or_b64 exec, exec, s[6:7]
; SDAG-NEXT: v_mul_lo_u32 v20, v32, v11
-; SDAG-NEXT: v_mad_u64_u32 v[16:17], s[4:5], v32, v10, 0
+; SDAG-NEXT: v_mad_u64_u32 v[18:19], s[4:5], v32, v10, 0
; SDAG-NEXT: v_mul_lo_u32 v28, v30, v10
; SDAG-NEXT: v_mul_lo_u32 v29, v33, v8
; SDAG-NEXT: v_mul_lo_u32 v33, v31, v9
; SDAG-NEXT: v_mad_u64_u32 v[10:11], s[4:5], v8, v32, 0
; SDAG-NEXT: v_mov_b32_e32 v21, 0
-; SDAG-NEXT: v_mul_lo_u32 v34, v18, v15
-; SDAG-NEXT: v_mad_u64_u32 v[24:25], s[4:5], v18, v14, 0
-; SDAG-NEXT: v_mul_lo_u32 v35, v19, v14
-; SDAG-NEXT: v_mul_lo_u32 v36, v23, v12
-; SDAG-NEXT: v_mul_lo_u32 v37, v22, v13
-; SDAG-NEXT: v_mad_u64_u32 v[14:15], s[4:5], v12, v18, 0
-; SDAG-NEXT: v_add_i32_e32 v17, vcc, v17, v20
+; SDAG-NEXT: v_mul_lo_u32 v34, v16, v15
+; SDAG-NEXT: v_mad_u64_u32 v[24:25], s[4:5], v16, v14, 0
+; SDAG-NEXT: v_mul_lo_u32 v35, v17, v14
+; SDAG-NEXT: v_mul_lo_u32 v23, v23, v12
+; SDAG-NEXT: v_mul_lo_u32 v36, v22, v13
+; SDAG-NEXT: v_mad_u64_u32 v[14:15], s[4:5], v12, v16, 0
+; SDAG-NEXT: v_add_i32_e32 v19, vcc, v19, v20
; SDAG-NEXT: v_mov_b32_e32 v20, v11
; SDAG-NEXT: v_mad_u64_u32 v[26:27], s[4:5], v9, v32, v[20:21]
; SDAG-NEXT: v_sub_i32_e32 v0, vcc, v0, v10
-; SDAG-NEXT: v_add_i32_e64 v23, s[4:5], v25, v34
-; SDAG-NEXT: v_add_i32_e64 v17, s[4:5], v17, v28
+; SDAG-NEXT: v_add_i32_e64 v25, s[4:5], v25, v34
+; SDAG-NEXT: v_add_i32_e64 v19, s[4:5], v19, v28
; SDAG-NEXT: v_mov_b32_e32 v20, v26
; SDAG-NEXT: v_mad_u64_u32 v[10:11], s[4:5], v8, v30, v[20:21]
-; SDAG-NEXT: v_add_i32_e64 v25, s[4:5], v23, v35
-; SDAG-NEXT: v_mad_u64_u32 v[16:17], s[4:5], v31, v8, v[16:17]
+; SDAG-NEXT: v_add_i32_e64 v25, s[4:5], v25, v35
+; SDAG-NEXT: v_mad_u64_u32 v[18:19], s[4:5], v31, v8, v[18:19]
; SDAG-NEXT: v_add_i32_e64 v26, s[4:5], v27, v11
; SDAG-NEXT: v_addc_u32_e64 v27, s[4:5], 0, 0, s[4:5]
; SDAG-NEXT: v_subb_u32_e32 v1, vcc, v1, v10, vcc
; SDAG-NEXT: v_mad_u64_u32 v[10:11], s[4:5], v22, v12, v[24:25]
; SDAG-NEXT: v_mov_b32_e32 v20, v15
-; SDAG-NEXT: v_mad_u64_u32 v[22:23], s[4:5], v13, v18, v[20:21]
-; SDAG-NEXT: v_add_i32_e64 v15, s[4:5], v29, v17
+; SDAG-NEXT: v_mad_u64_u32 v[15:16], s[4:5], v13, v16, v[20:21]
+; SDAG-NEXT: v_add_i32_e64 v19, s[4:5], v29, v19
; SDAG-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v9, v30, v[26:27]
-; SDAG-NEXT: v_add_i32_e64 v17, s[4:5], v36, v11
-; SDAG-NEXT: v_mov_b32_e32 v20, v22
-; SDAG-NEXT: v_mad_u64_u32 v[11:12], s[4:5], v12, v19, v[20:21]
-; SDAG-NEXT: v_add_i32_e64 v15, s[4:5], v33, v15
-; SDAG-NEXT: v_add_i32_e64 v20, s[4:5], v37, v17
-; SDAG-NEXT: v_add_i32_e64 v17, s[4:5], v23, v12
-; SDAG-NEXT: v_addc_u32_e64 v18, s[4:5], 0, 0, s[4:5]
-; SDAG-NEXT: v_add_i32_e64 v8, s[4:5], v8, v16
-; SDAG-NEXT: v_addc_u32_e64 v12, s[4:5], v9, v15, s[4:5]
+; SDAG-NEXT: v_add_i32_e64 v22, s[4:5], v23, v11
+; SDAG-NEXT: v_mov_b32_e32 v20, v15
+; SDAG-NEXT: v_mad_u64_u32 v[11:12], s[4:5], v12, v17, v[20:21]
+; SDAG-NEXT: v_add_i32_e64 v19, s[4:5], v33, v19
+; SDAG-NEXT: v_add_i32_e64 v20, s[4:5], v36, v22
+; SDAG-NEXT: v_add_i32_e64 v15, s[4:5], v16, v12
+; SDAG-NEXT: v_addc_u32_e64 v16, s[4:5], 0, 0, s[4:5]
+; SDAG-NEXT: v_add_i32_e64 v8, s[4:5], v8, v18
+; SDAG-NEXT: v_addc_u32_e64 v12, s[4:5], v9, v19, s[4:5]
; SDAG-NEXT: v_subb_u32_e32 v2, vcc, v2, v8, vcc
-; SDAG-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v13, v19, v[17:18]
+; SDAG-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v13, v17, v[15:16]
; SDAG-NEXT: v_subb_u32_e32 v3, vcc, v3, v12, vcc
; SDAG-NEXT: v_add_i32_e32 v8, vcc, v8, v10
; SDAG-NEXT: v_addc_u32_e32 v9, vcc, v9, v20, vcc
diff --git a/llvm/test/CodeGen/AMDGPU/divergent-branch-uniform-condition.ll b/llvm/test/CodeGen/AMDGPU/divergent-branch-uniform-condition.ll
index 8c3d20ffb02fd..d588c22a88573 100644
--- a/llvm/test/CodeGen/AMDGPU/divergent-branch-uniform-condition.ll
+++ b/llvm/test/CodeGen/AMDGPU/divergent-branch-uniform-condition.ll
@@ -20,7 +20,7 @@ define amdgpu_ps void @main(i32 %0, float %1) {
; ISA: ; %bb.0: ; %start
; ISA-NEXT: v_readfirstlane_b32 s0, v0
; ISA-NEXT: s_mov_b32 m0, s0
-; ISA-NEXT: s_mov_b32 s10, 0
+; ISA-NEXT: s_mov_b32 s8, 0
; ISA-NEXT: v_interp_p1_f32_e32 v0, v1, attr0.x
; ISA-NEXT: v_cmp_nlt_f32_e32 vcc, 0, v0
; ISA-NEXT: s_mov_b64 s[0:1], 0
@@ -30,40 +30,42 @@ define amdgpu_ps void @main(i32 %0, float %1) {
; ISA-NEXT: .LBB0_1: ; %Flow1
; ISA-NEXT: ; in Loop: Header=BB0_3 Depth=1
; ISA-NEXT: s_or_b64 exec, exec, s[4:5]
-; ISA-NEXT: s_mov_b64 s[8:9], 0
; ISA-NEXT: s_mov_b64 s[4:5], s[6:7]
+; ISA-NEXT: s_mov_b64 s[6:7], 0
; ISA-NEXT: .LBB0_2: ; %Flow
; ISA-NEXT: ; in Loop: Header=BB0_3 Depth=1
-; ISA-NEXT: s_and_b64 s[6:7], exec, s[4:5]
-; ISA-NEXT: s_or_b64 s[0:1], s[6:7], s[0:1]
+; ISA-NEXT: s_and_b64 s[10:11], exec, s[4:5]
+; ISA-NEXT: s_or_b64 s[0:1], s[10:11], s[0:1]
; ISA-NEXT: s_andn2_b64 s[2:3], s[2:3], exec
-; ISA-NEXT: s_and_b64 s[6:7], s[8:9], exec
+; ISA-NEXT: s_and_b64 s[6:7], s[6:7], exec
; ISA-NEXT: s_or_b64 s[2:3], s[2:3], s[6:7]
; ISA-NEXT: s_andn2_b64 exec, exec, s[0:1]
-; ISA-NEXT: s_cbranch_execz .LBB0_6
+; ISA-NEXT: s_cbranch_execz .LBB0_7
; ISA-NEXT: .LBB0_3: ; %loop
; ISA-NEXT: ; =>This Inner Loop Header: Depth=1
; ISA-NEXT: s_or_b64 s[4:5], s[4:5], exec
+; ISA-NEXT: s_cmp_lt_u32 s8, 32
; ISA-NEXT: s_mov_b64 s[6:7], -1
-; ISA-NEXT: s_cmp_lt_u32 s10, 32
-; ISA-NEXT: s_mov_b64 s[8:9], -1
-; ISA-NEXT: s_cbranch_scc0 .LBB0_2
+; ISA-NEXT: s_cbranch_scc0 .LBB0_6
; ISA-NEXT: ; %bb.4: ; %endif1
; ISA-NEXT: ; in Loop: Header=BB0_3 Depth=1
; ISA-NEXT: s_and_saveexec_b64 s[4:5], vcc
; ISA-NEXT: s_cbranch_execz .LBB0_1
; ISA-NEXT: ; %bb.5: ; %endif2
; ISA-NEXT: ; in Loop: Header=BB0_3 Depth=1
-; ISA-NEXT: s_add_i32 s10, s10, 1
+; ISA-NEXT: s_add_i32 s8, s8, 1
; ISA-NEXT: s_xor_b64 s[6:7], exec, -1
; ISA-NEXT: s_branch .LBB0_1
-; ISA-NEXT: .LBB0_6: ; %Flow2
+; ISA-NEXT: .LBB0_6: ; in Loop: Header=BB0_3 Depth=1
+; ISA-NEXT: ; implicit-def: $sgpr8
+; ISA-NEXT: s_branch .LBB0_2
+; ISA-NEXT: .LBB0_7: ; %Flow2
; ISA-NEXT: s_or_b64 exec, exec, s[0:1]
; ISA-NEXT: v_mov_b32_e32 v1, 0
; ISA-NEXT: s_and_saveexec_b64 s[0:1], s[2:3]
-; ISA-NEXT: ; %bb.7: ; %if1
+; ISA-NEXT: ; %bb.8: ; %if1
; ISA-NEXT: v_sqrt_f32_e32 v1, v0
-; ISA-NEXT: ; %bb.8: ; %endloop
+; ISA-NEXT: ; %bb.9: ; %endloop
; ISA-NEXT: s_or_b64 exec, exec, s[0:1]
; ISA-NEXT: exp mrt0 v1, v1, v1, v1 done vm
; ISA-NEXT: s_endpgm
diff --git a/llvm/test/CodeGen/AMDGPU/extract_vector_elt-f16.ll b/llvm/test/CodeGen/AMDGPU/extract_vector_elt-f16.ll
index dcfac6fdbfc77..614200803d6f1 100644
--- a/llvm/test/CodeGen/AMDGPU/extract_vector_elt-f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/extract_vector_elt-f16.ll
@@ -294,8 +294,8 @@ define amdgpu_kernel void @v_extractelement_v4f16_2(ptr addrspace(1) %out, ptr a
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
; SI-NEXT: v_lshlrev_b32_e32 v1, 3, v0
-; SI-NEXT: s_mov_b32 s6, 0
; SI-NEXT: s_mov_b32 s7, 0xf000
+; SI-NEXT: s_mov_b32 s6, 0
; SI-NEXT: v_mov_b32_e32 v2, 0
; SI-NEXT: s_mov_b64 s[10:11], s[6:7]
; SI-NEXT: s_waitcnt lgkmcnt(0)
diff --git a/llvm/test/CodeGen/AMDGPU/extract_vector_elt-i16.ll b/llvm/test/CodeGen/AMDGPU/extract_vector_elt-i16.ll
index 625ac12b99839..0d8a9f6aca34b 100644
--- a/llvm/test/CodeGen/AMDGPU/extract_vector_elt-i16.ll
+++ b/llvm/test/CodeGen/AMDGPU/extract_vector_elt-i16.ll
@@ -349,8 +349,8 @@ define amdgpu_kernel void @v_extractelement_v8i16_2(ptr addrspace(1) %out, ptr a
; GCN: ; %bb.0:
; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
; GCN-NEXT: v_lshlrev_b32_e32 v1, 4, v0
-; GCN-NEXT: s_mov_b32 s6, 0
; GCN-NEXT: s_mov_b32 s7, 0xf000
+; GCN-NEXT: s_mov_b32 s6, 0
; GCN-NEXT: v_mov_b32_e32 v2, 0
; GCN-NEXT: s_mov_b64 s[10:11], s[6:7]
; GCN-NEXT: s_waitcnt lgkmcnt(0)
@@ -379,8 +379,8 @@ define amdgpu_kernel void @v_extractelement_v8i16_6(ptr addrspace(1) %out, ptr a
; GCN: ; %bb.0:
; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
; GCN-NEXT: v_lshlrev_b32_e32 v1, 4, v0
-; GCN-NEXT: s_mov_b32 s6, 0
; GCN-NEXT: s_mov_b32 s7, 0xf000
+; GCN-NEXT: s_mov_b32 s6, 0
; GCN-NEXT: v_mov_b32_e32 v2, 0
; GCN-NEXT: s_mov_b64 s[10:11], s[6:7]
; GCN-NEXT: s_waitcnt lgkmcnt(0)
@@ -465,8 +465,8 @@ define amdgpu_kernel void @v_extractelement_v16i16_2(ptr addrspace(1) %out, ptr
; GCN: ; %bb.0:
; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
; GCN-NEXT: v_lshlrev_b32_e32 v1, 5, v0
-; GCN-NEXT: s_mov_b32 s6, 0
; GCN-NEXT: s_mov_b32 s7, 0xf000
+; GCN-NEXT: s_mov_b32 s6, 0
; GCN-NEXT: v_mov_b32_e32 v2, 0
; GCN-NEXT: s_mov_b64 s[10:11], s[6:7]
; GCN-NEXT: s_waitcnt lgkmcnt(0)
@@ -495,8 +495,8 @@ define amdgpu_kernel void @v_extractelement_v16i16_6(ptr addrspace(1) %out, ptr
; GCN: ; %bb.0:
; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
; GCN-NEXT: v_lshlrev_b32_e32 v1, 5, v0
-; GCN-NEXT: s_mov_b32 s6, 0
; GCN-NEXT: s_mov_b32 s7, 0xf000
+; GCN-NEXT: s_mov_b32 s6, 0
; GCN-NEXT: v_mov_b32_e32 v2, 0
; GCN-NEXT: s_mov_b64 s[10:11], s[6:7]
; GCN-NEXT: s_waitcnt lgkmcnt(0)
diff --git a/llvm/test/CodeGen/AMDGPU/fmul-to-ldexp.ll b/llvm/test/CodeGen/AMDGPU/fmul-to-ldexp.ll
index 899cc89405440..8cf91aa900662 100644
--- a/llvm/test/CodeGen/AMDGPU/fmul-to-ldexp.ll
+++ b/llvm/test/CodeGen/AMDGPU/fmul-to-ldexp.ll
@@ -2129,14 +2129,23 @@ define double @v_fma_mul_add_32_f64(double %x, double %y) {
}
define <2 x double> @v_fma_mul_add_32_v2f64(<2 x double> %x, <2 x double> %y) {
-; GFX9-LABEL: v_fma_mul_add_32_v2f64:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_mov_b32 s4, 0
-; GFX9-NEXT: s_mov_b32 s5, 0x40400000
-; GFX9-NEXT: v_fma_f64 v[0:1], v[0:1], s[4:5], v[4:5]
-; GFX9-NEXT: v_fma_f64 v[2:3], v[2:3], s[4:5], v[6:7]
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX9-SDAG-LABEL: v_fma_mul_add_32_v2f64:
+; GFX9-SDAG: ; %bb.0:
+; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-SDAG-NEXT: s_mov_b32 s4, 0
+; GFX9-SDAG-NEXT: s_mov_b32 s5, 0x40400000
+; GFX9-SDAG-NEXT: v_fma_f64 v[0:1], v[0:1], s[4:5], v[4:5]
+; GFX9-SDAG-NEXT: v_fma_f64 v[2:3], v[2:3], s[4:5], v[6:7]
+; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GISEL-LABEL: v_fma_mul_add_32_v2f64:
+; GFX9-GISEL: ; %bb.0:
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v8, 0
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v9, 0x40400000
+; GFX9-GISEL-NEXT: v_fma_f64 v[0:1], v[0:1], v[8:9], v[4:5]
+; GFX9-GISEL-NEXT: v_fma_f64 v[2:3], v[2:3], v[8:9], v[6:7]
+; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1011-LABEL: v_fma_mul_add_32_v2f64:
; GFX1011: ; %bb.0:
@@ -2485,10 +2494,10 @@ define <2 x double> @v_mul_16_v2f64(<2 x double> %x) {
; GFX9-GISEL-LABEL: v_mul_16_v2f64:
; GFX9-GISEL: ; %bb.0:
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT: s_mov_b32 s4, 0
-; GFX9-GISEL-NEXT: s_mov_b32 s5, 0x40300000
-; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5]
-; GFX9-GISEL-NEXT: v_mul_f64 v[2:3], v[2:3], s[4:5]
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v4, 0
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v5, 0x40300000
+; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5]
+; GFX9-GISEL-NEXT: v_mul_f64 v[2:3], v[2:3], v[4:5]
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SDAG-LABEL: v_mul_16_v2f64:
@@ -2533,10 +2542,10 @@ define <2 x double> @v_mul_neg16_v2f64(<2 x double> %x) {
; GFX9-GISEL-LABEL: v_mul_neg16_v2f64:
; GFX9-GISEL: ; %bb.0:
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT: s_mov_b32 s4, 0
-; GFX9-GISEL-NEXT: s_mov_b32 s5, 0xc0300000
-; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5]
-; GFX9-GISEL-NEXT: v_mul_f64 v[2:3], v[2:3], s[4:5]
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v4, 0
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v5, 0xc0300000
+; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5]
+; GFX9-GISEL-NEXT: v_mul_f64 v[2:3], v[2:3], v[4:5]
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SDAG-LABEL: v_mul_neg16_v2f64:
@@ -2581,10 +2590,10 @@ define <2 x double> @v_mul_fabs_16_v2f64(<2 x double> %x) {
; GFX9-GISEL-LABEL: v_mul_fabs_16_v2f64:
; GFX9-GISEL: ; %bb.0:
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT: s_mov_b32 s4, 0
-; GFX9-GISEL-NEXT: s_mov_b32 s5, 0x40300000
-; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], |v[0:1]|, s[4:5]
-; GFX9-GISEL-NEXT: v_mul_f64 v[2:3], |v[2:3]|, s[4:5]
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v4, 0
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v5, 0x40300000
+; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], |v[0:1]|, v[4:5]
+; GFX9-GISEL-NEXT: v_mul_f64 v[2:3], |v[2:3]|, v[4:5]
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SDAG-LABEL: v_mul_fabs_16_v2f64:
diff --git a/llvm/test/CodeGen/AMDGPU/fptoi.i128.ll b/llvm/test/CodeGen/AMDGPU/fptoi.i128.ll
index cdd34cbde6ddd..e7af7467171c3 100644
--- a/llvm/test/CodeGen/AMDGPU/fptoi.i128.ll
+++ b/llvm/test/CodeGen/AMDGPU/fptoi.i128.ll
@@ -11,8 +11,8 @@ define i128 @fptosi_f64_to_i128(double %x) {
; SDAG-NEXT: v_mov_b32_e32 v7, 0
; SDAG-NEXT: s_mov_b64 s[4:5], 0x3fe
; SDAG-NEXT: v_mov_b32_e32 v4, v0
-; SDAG-NEXT: v_cmp_lt_u64_e32 vcc, s[4:5], v[6:7]
; SDAG-NEXT: v_mov_b32_e32 v0, 0
+; SDAG-NEXT: v_cmp_lt_u64_e32 vcc, s[4:5], v[6:7]
; SDAG-NEXT: v_mov_b32_e32 v2, 0
; SDAG-NEXT: v_mov_b32_e32 v1, 0
; SDAG-NEXT: v_mov_b32_e32 v3, 0
@@ -57,33 +57,34 @@ define i128 @fptosi_f64_to_i128(double %x) {
; SDAG-NEXT: v_cmp_gt_u32_e64 s[4:5], 64, v7
; SDAG-NEXT: v_cndmask_b32_e64 v1, v3, v1, s[4:5]
; SDAG-NEXT: v_cmp_ne_u32_e64 s[6:7], 0, v7
-; SDAG-NEXT: v_cndmask_b32_e64 v6, 0, v1, s[6:7]
+; SDAG-NEXT: v_cndmask_b32_e64 v3, 0, v1, s[6:7]
; SDAG-NEXT: v_cndmask_b32_e64 v2, v2, v0, s[4:5]
; SDAG-NEXT: v_lshlrev_b64 v[0:1], v7, v[4:5]
-; SDAG-NEXT: v_cndmask_b32_e64 v2, 0, v2, s[6:7]
+; SDAG-NEXT: v_mov_b32_e32 v5, 0
; SDAG-NEXT: v_cndmask_b32_e64 v12, 0, v0, s[4:5]
; SDAG-NEXT: v_cndmask_b32_e64 v11, 0, v1, s[4:5]
; SDAG-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v12, v10, 0
-; SDAG-NEXT: v_mov_b32_e32 v3, 0
+; SDAG-NEXT: v_cndmask_b32_e64 v2, 0, v2, s[6:7]
; SDAG-NEXT: v_mul_lo_u32 v13, v8, v2
-; SDAG-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v10, v2, 0
-; SDAG-NEXT: v_mov_b32_e32 v2, v1
-; SDAG-NEXT: v_mul_lo_u32 v14, v10, v6
-; SDAG-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v11, v10, v[2:3]
-; SDAG-NEXT: ; implicit-def: $vgpr10
-; SDAG-NEXT: v_add3_u32 v5, v5, v14, v13
-; SDAG-NEXT: v_mov_b32_e32 v2, v6
-; SDAG-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v12, v8, v[2:3]
-; SDAG-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v9, v12, v[4:5]
-; SDAG-NEXT: v_add_co_u32_e64 v2, s[4:5], v7, v2
-; SDAG-NEXT: v_addc_co_u32_e64 v3, s[4:5], 0, 0, s[4:5]
-; SDAG-NEXT: v_mul_lo_u32 v6, v9, v11
+; SDAG-NEXT: v_mov_b32_e32 v4, v1
+; SDAG-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v11, v10, v[4:5]
+; SDAG-NEXT: v_mul_lo_u32 v14, v10, v3
+; SDAG-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v10, v2, 0
+; SDAG-NEXT: v_mov_b32_e32 v4, v6
+; SDAG-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v12, v8, v[4:5]
+; SDAG-NEXT: v_add3_u32 v3, v3, v14, v13
+; SDAG-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v9, v12, v[2:3]
+; SDAG-NEXT: v_add_co_u32_e64 v5, s[4:5], v7, v5
+; SDAG-NEXT: v_addc_co_u32_e64 v6, s[4:5], 0, 0, s[4:5]
+; SDAG-NEXT: v_mul_lo_u32 v3, v9, v11
; SDAG-NEXT: v_mul_lo_u32 v9, v9, v12
-; SDAG-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v11, v8, v[2:3]
+; SDAG-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v11, v8, v[5:6]
+; SDAG-NEXT: ; implicit-def: $vgpr10
; SDAG-NEXT: ; implicit-def: $vgpr8
-; SDAG-NEXT: v_add3_u32 v5, v9, v5, v6
-; SDAG-NEXT: v_add_co_u32_e64 v2, s[4:5], v2, v4
-; SDAG-NEXT: v_addc_co_u32_e64 v3, s[4:5], v3, v5, s[4:5]
+; SDAG-NEXT: v_add3_u32 v3, v9, v2, v3
+; SDAG-NEXT: v_add_co_u32_e64 v2, s[4:5], v5, v1
+; SDAG-NEXT: v_addc_co_u32_e64 v3, s[4:5], v6, v3, s[4:5]
+; SDAG-NEXT: v_mov_b32_e32 v1, v4
; SDAG-NEXT: ; implicit-def: $vgpr6_vgpr7
; SDAG-NEXT: ; implicit-def: $vgpr4_vgpr5
; SDAG-NEXT: ; implicit-def: $vgpr9
@@ -376,8 +377,8 @@ define i128 @fptoui_f64_to_i128(double %x) {
; SDAG-NEXT: v_mov_b32_e32 v7, 0
; SDAG-NEXT: s_mov_b64 s[4:5], 0x3fe
; SDAG-NEXT: v_mov_b32_e32 v4, v0
-; SDAG-NEXT: v_cmp_lt_u64_e32 vcc, s[4:5], v[6:7]
; SDAG-NEXT: v_mov_b32_e32 v0, 0
+; SDAG-NEXT: v_cmp_lt_u64_e32 vcc, s[4:5], v[6:7]
; SDAG-NEXT: v_mov_b32_e32 v2, 0
; SDAG-NEXT: v_mov_b32_e32 v1, 0
; SDAG-NEXT: v_mov_b32_e32 v3, 0
@@ -422,33 +423,34 @@ define i128 @fptoui_f64_to_i128(double %x) {
; SDAG-NEXT: v_cmp_gt_u32_e64 s[4:5], 64, v7
; SDAG-NEXT: v_cndmask_b32_e64 v1, v3, v1, s[4:5]
; SDAG-NEXT: v_cmp_ne_u32_e64 s[6:7], 0, v7
-; SDAG-NEXT: v_cndmask_b32_e64 v6, 0, v1, s[6:7]
+; SDAG-NEXT: v_cndmask_b32_e64 v3, 0, v1, s[6:7]
; SDAG-NEXT: v_cndmask_b32_e64 v2, v2, v0, s[4:5]
; SDAG-NEXT: v_lshlrev_b64 v[0:1], v7, v[4:5]
-; SDAG-NEXT: v_cndmask_b32_e64 v2, 0, v2, s[6:7]
+; SDAG-NEXT: v_mov_b32_e32 v5, 0
; SDAG-NEXT: v_cndmask_b32_e64 v12, 0, v0, s[4:5]
; SDAG-NEXT: v_cndmask_b32_e64 v11, 0, v1, s[4:5]
; SDAG-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v12, v10, 0
-; SDAG-NEXT: v_mov_b32_e32 v3, 0
+; SDAG-NEXT: v_cndmask_b32_e64 v2, 0, v2, s[6:7]
; SDAG-NEXT: v_mul_lo_u32 v13, v8, v2
-; SDAG-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v10, v2, 0
-; SDAG-NEXT: v_mov_b32_e32 v2, v1
-; SDAG-NEXT: v_mul_lo_u32 v14, v10, v6
-; SDAG-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v11, v10, v[2:3]
-; SDAG-NEXT: ; implicit-def: $vgpr10
-; SDAG-NEXT: v_add3_u32 v5, v5, v14, v13
-; SDAG-NEXT: v_mov_b32_e32 v2, v6
-; SDAG-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v12, v8, v[2:3]
-; SDAG-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v9, v12, v[4:5]
-; SDAG-NEXT: v_add_co_u32_e64 v2, s[4:5], v7, v2
-; SDAG-NEXT: v_addc_co_u32_e64 v3, s[4:5], 0, 0, s[4:5]
-; SDAG-NEXT: v_mul_lo_u32 v6, v9, v11
+; SDAG-NEXT: v_mov_b32_e32 v4, v1
+; SDAG-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v11, v10, v[4:5]
+; SDAG-NEXT: v_mul_lo_u32 v14, v10, v3
+; SDAG-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v10, v2, 0
+; SDAG-NEXT: v_mov_b32_e32 v4, v6
+; SDAG-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v12, v8, v[4:5]
+; SDAG-NEXT: v_add3_u32 v3, v3, v14, v13
+; SDAG-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v9, v12, v[2:3]
+; SDAG-NEXT: v_add_co_u32_e64 v5, s[4:5], v7, v5
+; SDAG-NEXT: v_addc_co_u32_e64 v6, s[4:5], 0, 0, s[4:5]
+; SDAG-NEXT: v_mul_lo_u32 v3, v9, v11
; SDAG-NEXT: v_mul_lo_u32 v9, v9, v12
-; SDAG-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v11, v8, v[2:3]
+; SDAG-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v11, v8, v[5:6]
+; SDAG-NEXT: ; implicit-def: $vgpr10
; SDAG-NEXT: ; implicit-def: $vgpr8
-; SDAG-NEXT: v_add3_u32 v5, v9, v5, v6
-; SDAG-NEXT: v_add_co_u32_e64 v2, s[4:5], v2, v4
-; SDAG-NEXT: v_addc_co_u32_e64 v3, s[4:5], v3, v5, s[4:5]
+; SDAG-NEXT: v_add3_u32 v3, v9, v2, v3
+; SDAG-NEXT: v_add_co_u32_e64 v2, s[4:5], v5, v1
+; SDAG-NEXT: v_addc_co_u32_e64 v3, s[4:5], v6, v3, s[4:5]
+; SDAG-NEXT: v_mov_b32_e32 v1, v4
; SDAG-NEXT: ; implicit-def: $vgpr6_vgpr7
; SDAG-NEXT: ; implicit-def: $vgpr4_vgpr5
; SDAG-NEXT: ; implicit-def: $vgpr9
@@ -737,17 +739,17 @@ define i128 @fptosi_f32_to_i128(float %x) {
; SDAG: ; %bb.0: ; %fp-to-i-entry
; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT: v_mov_b32_e32 v4, v0
+; SDAG-NEXT: v_mov_b32_e32 v0, 0
; SDAG-NEXT: v_bfe_u32 v5, v4, 23, 8
; SDAG-NEXT: s_movk_i32 s4, 0x7e
-; SDAG-NEXT: v_mov_b32_e32 v0, 0
; SDAG-NEXT: v_mov_b32_e32 v2, 0
-; SDAG-NEXT: v_mov_b32_e32 v6, 0
; SDAG-NEXT: v_mov_b32_e32 v1, 0
; SDAG-NEXT: v_mov_b32_e32 v3, 0
; SDAG-NEXT: v_cmp_lt_u32_e32 vcc, s4, v5
; SDAG-NEXT: s_and_saveexec_b64 s[8:9], vcc
; SDAG-NEXT: s_cbranch_execz .LBB2_10
; SDAG-NEXT: ; %bb.1: ; %fp-to-i-if-end
+; SDAG-NEXT: v_mov_b32_e32 v6, 0
; SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0xffffff01, v5
; SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v6, vcc
; SDAG-NEXT: v_addc_co_u32_e32 v2, vcc, -1, v6, vcc
@@ -765,14 +767,14 @@ define i128 @fptosi_f32_to_i128(float %x) {
; SDAG-NEXT: s_cbranch_execz .LBB2_7
; SDAG-NEXT: ; %bb.2: ; %fp-to-i-if-end9
; SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
-; SDAG-NEXT: v_add_co_u32_e64 v9, s[4:5], -1, v0
+; SDAG-NEXT: v_add_co_u32_e64 v10, s[4:5], -1, v0
; SDAG-NEXT: s_mov_b64 s[4:5], 0x95
; SDAG-NEXT: v_and_b32_e32 v0, 0x7fffff, v4
; SDAG-NEXT: v_cmp_lt_u64_e64 s[4:5], s[4:5], v[5:6]
-; SDAG-NEXT: v_mov_b32_e32 v7, 0
-; SDAG-NEXT: v_cndmask_b32_e64 v8, -1, 0, vcc
-; SDAG-NEXT: v_cndmask_b32_e64 v10, -1, 1, vcc
-; SDAG-NEXT: v_or_b32_e32 v6, 0x800000, v0
+; SDAG-NEXT: v_cndmask_b32_e64 v9, -1, 0, vcc
+; SDAG-NEXT: v_cndmask_b32_e64 v11, -1, 1, vcc
+; SDAG-NEXT: v_or_b32_e32 v7, 0x800000, v0
+; SDAG-NEXT: v_mov_b32_e32 v8, v6
; SDAG-NEXT: ; implicit-def: $vgpr0_vgpr1
; SDAG-NEXT: ; implicit-def: $vgpr2_vgpr3
; SDAG-NEXT: s_and_saveexec_b64 s[6:7], s[4:5]
@@ -782,56 +784,56 @@ define i128 @fptosi_f32_to_i128(float %x) {
; SDAG-NEXT: v_sub_u32_e32 v0, 0xd6, v5
; SDAG-NEXT: v_add_u32_e32 v2, 0xffffff2a, v5
; SDAG-NEXT: v_add_u32_e32 v4, 0xffffff6a, v5
-; SDAG-NEXT: v_lshrrev_b64 v[0:1], v0, v[6:7]
-; SDAG-NEXT: v_lshlrev_b64 v[2:3], v2, v[6:7]
+; SDAG-NEXT: v_lshrrev_b64 v[0:1], v0, v[7:8]
+; SDAG-NEXT: v_lshlrev_b64 v[2:3], v2, v[7:8]
; SDAG-NEXT: v_cmp_gt_u32_e64 s[4:5], 64, v4
; SDAG-NEXT: v_cndmask_b32_e64 v1, v3, v1, s[4:5]
; SDAG-NEXT: v_cmp_ne_u32_e64 s[6:7], 0, v4
; SDAG-NEXT: v_cndmask_b32_e64 v3, 0, v1, s[6:7]
; SDAG-NEXT: v_cndmask_b32_e64 v2, v2, v0, s[4:5]
-; SDAG-NEXT: v_lshlrev_b64 v[0:1], v4, v[6:7]
+; SDAG-NEXT: v_lshlrev_b64 v[0:1], v4, v[7:8]
; SDAG-NEXT: v_cndmask_b32_e64 v2, 0, v2, s[6:7]
-; SDAG-NEXT: v_cndmask_b32_e64 v12, 0, v0, s[4:5]
-; SDAG-NEXT: v_cndmask_b32_e64 v11, 0, v1, s[4:5]
-; SDAG-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v12, v10, 0
-; SDAG-NEXT: v_mul_lo_u32 v13, v8, v2
-; SDAG-NEXT: v_mul_lo_u32 v14, v10, v3
-; SDAG-NEXT: v_mov_b32_e32 v6, v1
-; SDAG-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v11, v10, v[6:7]
-; SDAG-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v10, v2, 0
-; SDAG-NEXT: v_mov_b32_e32 v6, v4
-; SDAG-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v12, v8, v[6:7]
-; SDAG-NEXT: v_add3_u32 v3, v3, v14, v13
-; SDAG-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v9, v12, v[2:3]
-; SDAG-NEXT: v_add_co_u32_e64 v3, s[4:5], v5, v7
-; SDAG-NEXT: v_addc_co_u32_e64 v4, s[4:5], 0, 0, s[4:5]
-; SDAG-NEXT: v_mul_lo_u32 v10, v9, v11
-; SDAG-NEXT: v_mul_lo_u32 v9, v9, v12
-; SDAG-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v11, v8, v[3:4]
-; SDAG-NEXT: ; implicit-def: $vgpr8
-; SDAG-NEXT: v_add3_u32 v5, v9, v2, v10
-; SDAG-NEXT: v_add_co_u32_e64 v2, s[4:5], v3, v1
-; SDAG-NEXT: v_addc_co_u32_e64 v3, s[4:5], v4, v5, s[4:5]
-; SDAG-NEXT: v_mov_b32_e32 v1, v6
-; SDAG-NEXT: ; implicit-def: $vgpr5_vgpr6
-; SDAG-NEXT: ; implicit-def: $vgpr6_vgpr7
-; SDAG-NEXT: ; implicit-def: $vgpr10
+; SDAG-NEXT: v_cndmask_b32_e64 v13, 0, v0, s[4:5]
+; SDAG-NEXT: v_cndmask_b32_e64 v12, 0, v1, s[4:5]
+; SDAG-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v13, v11, 0
+; SDAG-NEXT: v_mul_lo_u32 v4, v9, v2
+; SDAG-NEXT: v_mul_lo_u32 v14, v11, v3
+; SDAG-NEXT: v_mov_b32_e32 v5, v1
+; SDAG-NEXT: v_mad_u64_u32 v[7:8], s[4:5], v12, v11, v[5:6]
+; SDAG-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v11, v2, 0
+; SDAG-NEXT: v_mov_b32_e32 v5, v7
+; SDAG-NEXT: v_mul_lo_u32 v7, v10, v13
+; SDAG-NEXT: v_add3_u32 v3, v3, v14, v4
+; SDAG-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v13, v9, v[5:6]
+; SDAG-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v10, v13, v[2:3]
+; SDAG-NEXT: v_add_co_u32_e64 v5, s[4:5], v8, v5
+; SDAG-NEXT: v_addc_co_u32_e64 v6, s[4:5], 0, 0, s[4:5]
+; SDAG-NEXT: v_mul_lo_u32 v3, v10, v12
+; SDAG-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v12, v9, v[5:6]
+; SDAG-NEXT: ; implicit-def: $vgpr11
; SDAG-NEXT: ; implicit-def: $vgpr9
+; SDAG-NEXT: ; implicit-def: $vgpr10
+; SDAG-NEXT: v_add3_u32 v3, v7, v2, v3
+; SDAG-NEXT: v_add_co_u32_e64 v2, s[4:5], v5, v1
+; SDAG-NEXT: v_addc_co_u32_e64 v3, s[4:5], v6, v3, s[4:5]
+; SDAG-NEXT: v_mov_b32_e32 v1, v4
+; SDAG-NEXT: ; implicit-def: $vgpr5_vgpr6
+; SDAG-NEXT: ; implicit-def: $vgpr7_vgpr8
; SDAG-NEXT: .LBB2_4: ; %Flow
; SDAG-NEXT: s_andn2_saveexec_b64 s[6:7], s[12:13]
; SDAG-NEXT: s_cbranch_execz .LBB2_6
; SDAG-NEXT: ; %bb.5: ; %fp-to-i-if-then12
; SDAG-NEXT: v_sub_u32_e32 v2, 0x96, v5
-; SDAG-NEXT: v_lshrrev_b64 v[0:1], v2, v[6:7]
+; SDAG-NEXT: v_lshrrev_b64 v[0:1], v2, v[7:8]
; SDAG-NEXT: v_cmp_gt_u32_e64 s[4:5], 64, v2
; SDAG-NEXT: v_cndmask_b32_e64 v0, 0, v0, s[4:5]
; SDAG-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v2
-; SDAG-NEXT: v_cndmask_b32_e64 v3, v0, v6, s[4:5]
-; SDAG-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v3, v10, 0
+; SDAG-NEXT: v_cndmask_b32_e64 v3, v0, v7, s[4:5]
+; SDAG-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v3, v11, 0
; SDAG-NEXT: v_mov_b32_e32 v2, 0
-; SDAG-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v3, v8, v[1:2]
+; SDAG-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v3, v9, v[1:2]
; SDAG-NEXT: v_mov_b32_e32 v1, v5
-; SDAG-NEXT: v_mad_i64_i32 v[2:3], s[4:5], v9, v3, v[1:2]
+; SDAG-NEXT: v_mad_i64_i32 v[2:3], s[4:5], v10, v3, v[1:2]
; SDAG-NEXT: v_mov_b32_e32 v1, v4
; SDAG-NEXT: .LBB2_6: ; %Flow1
; SDAG-NEXT: s_or_b64 exec, exec, s[6:7]
@@ -1088,17 +1090,17 @@ define i128 @fptoui_f32_to_i128(float %x) {
; SDAG: ; %bb.0: ; %fp-to-i-entry
; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT: v_mov_b32_e32 v4, v0
+; SDAG-NEXT: v_mov_b32_e32 v0, 0
; SDAG-NEXT: v_bfe_u32 v5, v4, 23, 8
; SDAG-NEXT: s_movk_i32 s4, 0x7e
-; SDAG-NEXT: v_mov_b32_e32 v0, 0
; SDAG-NEXT: v_mov_b32_e32 v2, 0
-; SDAG-NEXT: v_mov_b32_e32 v6, 0
; SDAG-NEXT: v_mov_b32_e32 v1, 0
; SDAG-NEXT: v_mov_b32_e32 v3, 0
; SDAG-NEXT: v_cmp_lt_u32_e32 vcc, s4, v5
; SDAG-NEXT: s_and_saveexec_b64 s[8:9], vcc
; SDAG-NEXT: s_cbranch_execz .LBB3_10
; SDAG-NEXT: ; %bb.1: ; %fp-to-i-if-end
+; SDAG-NEXT: v_mov_b32_e32 v6, 0
; SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0xffffff01, v5
; SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v6, vcc
; SDAG-NEXT: v_addc_co_u32_e32 v2, vcc, -1, v6, vcc
@@ -1116,14 +1118,14 @@ define i128 @fptoui_f32_to_i128(float %x) {
; SDAG-NEXT: s_cbranch_execz .LBB3_7
; SDAG-NEXT: ; %bb.2: ; %fp-to-i-if-end9
; SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
-; SDAG-NEXT: v_add_co_u32_e64 v9, s[4:5], -1, v0
+; SDAG-NEXT: v_add_co_u32_e64 v10, s[4:5], -1, v0
; SDAG-NEXT: s_mov_b64 s[4:5], 0x95
; SDAG-NEXT: v_and_b32_e32 v0, 0x7fffff, v4
; SDAG-NEXT: v_cmp_lt_u64_e64 s[4:5], s[4:5], v[5:6]
-; SDAG-NEXT: v_mov_b32_e32 v7, 0
-; SDAG-NEXT: v_cndmask_b32_e64 v8, -1, 0, vcc
-; SDAG-NEXT: v_cndmask_b32_e64 v10, -1, 1, vcc
-; SDAG-NEXT: v_or_b32_e32 v6, 0x800000, v0
+; SDAG-NEXT: v_cndmask_b32_e64 v9, -1, 0, vcc
+; SDAG-NEXT: v_cndmask_b32_e64 v11, -1, 1, vcc
+; SDAG-NEXT: v_or_b32_e32 v7, 0x800000, v0
+; SDAG-NEXT: v_mov_b32_e32 v8, v6
; SDAG-NEXT: ; implicit-def: $vgpr0_vgpr1
; SDAG-NEXT: ; implicit-def: $vgpr2_vgpr3
; SDAG-NEXT: s_and_saveexec_b64 s[6:7], s[4:5]
@@ -1133,56 +1135,56 @@ define i128 @fptoui_f32_to_i128(float %x) {
; SDAG-NEXT: v_sub_u32_e32 v0, 0xd6, v5
; SDAG-NEXT: v_add_u32_e32 v2, 0xffffff2a, v5
; SDAG-NEXT: v_add_u32_e32 v4, 0xffffff6a, v5
-; SDAG-NEXT: v_lshrrev_b64 v[0:1], v0, v[6:7]
-; SDAG-NEXT: v_lshlrev_b64 v[2:3], v2, v[6:7]
+; SDAG-NEXT: v_lshrrev_b64 v[0:1], v0, v[7:8]
+; SDAG-NEXT: v_lshlrev_b64 v[2:3], v2, v[7:8]
; SDAG-NEXT: v_cmp_gt_u32_e64 s[4:5], 64, v4
; SDAG-NEXT: v_cndmask_b32_e64 v1, v3, v1, s[4:5]
; SDAG-NEXT: v_cmp_ne_u32_e64 s[6:7], 0, v4
; SDAG-NEXT: v_cndmask_b32_e64 v3, 0, v1, s[6:7]
; SDAG-NEXT: v_cndmask_b32_e64 v2, v2, v0, s[4:5]
-; SDAG-NEXT: v_lshlrev_b64 v[0:1], v4, v[6:7]
+; SDAG-NEXT: v_lshlrev_b64 v[0:1], v4, v[7:8]
; SDAG-NEXT: v_cndmask_b32_e64 v2, 0, v2, s[6:7]
-; SDAG-NEXT: v_cndmask_b32_e64 v12, 0, v0, s[4:5]
-; SDAG-NEXT: v_cndmask_b32_e64 v11, 0, v1, s[4:5]
-; SDAG-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v12, v10, 0
-; SDAG-NEXT: v_mul_lo_u32 v13, v8, v2
-; SDAG-NEXT: v_mul_lo_u32 v14, v10, v3
-; SDAG-NEXT: v_mov_b32_e32 v6, v1
-; SDAG-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v11, v10, v[6:7]
-; SDAG-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v10, v2, 0
-; SDAG-NEXT: v_mov_b32_e32 v6, v4
-; SDAG-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v12, v8, v[6:7]
-; SDAG-NEXT: v_add3_u32 v3, v3, v14, v13
-; SDAG-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v9, v12, v[2:3]
-; SDAG-NEXT: v_add_co_u32_e64 v3, s[4:5], v5, v7
-; SDAG-NEXT: v_addc_co_u32_e64 v4, s[4:5], 0, 0, s[4:5]
-; SDAG-NEXT: v_mul_lo_u32 v10, v9, v11
-; SDAG-NEXT: v_mul_lo_u32 v9, v9, v12
-; SDAG-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v11, v8, v[3:4]
-; SDAG-NEXT: ; implicit-def: $vgpr8
-; SDAG-NEXT: v_add3_u32 v5, v9, v2, v10
-; SDAG-NEXT: v_add_co_u32_e64 v2, s[4:5], v3, v1
-; SDAG-NEXT: v_addc_co_u32_e64 v3, s[4:5], v4, v5, s[4:5]
-; SDAG-NEXT: v_mov_b32_e32 v1, v6
-; SDAG-NEXT: ; implicit-def: $vgpr5_vgpr6
-; SDAG-NEXT: ; implicit-def: $vgpr6_vgpr7
-; SDAG-NEXT: ; implicit-def: $vgpr10
+; SDAG-NEXT: v_cndmask_b32_e64 v13, 0, v0, s[4:5]
+; SDAG-NEXT: v_cndmask_b32_e64 v12, 0, v1, s[4:5]
+; SDAG-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v13, v11, 0
+; SDAG-NEXT: v_mul_lo_u32 v4, v9, v2
+; SDAG-NEXT: v_mul_lo_u32 v14, v11, v3
+; SDAG-NEXT: v_mov_b32_e32 v5, v1
+; SDAG-NEXT: v_mad_u64_u32 v[7:8], s[4:5], v12, v11, v[5:6]
+; SDAG-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v11, v2, 0
+; SDAG-NEXT: v_mov_b32_e32 v5, v7
+; SDAG-NEXT: v_mul_lo_u32 v7, v10, v13
+; SDAG-NEXT: v_add3_u32 v3, v3, v14, v4
+; SDAG-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v13, v9, v[5:6]
+; SDAG-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v10, v13, v[2:3]
+; SDAG-NEXT: v_add_co_u32_e64 v5, s[4:5], v8, v5
+; SDAG-NEXT: v_addc_co_u32_e64 v6, s[4:5], 0, 0, s[4:5]
+; SDAG-NEXT: v_mul_lo_u32 v3, v10, v12
+; SDAG-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v12, v9, v[5:6]
+; SDAG-NEXT: ; implicit-def: $vgpr11
; SDAG-NEXT: ; implicit-def: $vgpr9
+; SDAG-NEXT: ; implicit-def: $vgpr10
+; SDAG-NEXT: v_add3_u32 v3, v7, v2, v3
+; SDAG-NEXT: v_add_co_u32_e64 v2, s[4:5], v5, v1
+; SDAG-NEXT: v_addc_co_u32_e64 v3, s[4:5], v6, v3, s[4:5]
+; SDAG-NEXT: v_mov_b32_e32 v1, v4
+; SDAG-NEXT: ; implicit-def: $vgpr5_vgpr6
+; SDAG-NEXT: ; implicit-def: $vgpr7_vgpr8
; SDAG-NEXT: .LBB3_4: ; %Flow
; SDAG-NEXT: s_andn2_saveexec_b64 s[6:7], s[12:13]
; SDAG-NEXT: s_cbranch_execz .LBB3_6
; SDAG-NEXT: ; %bb.5: ; %fp-to-i-if-then12
; SDAG-NEXT: v_sub_u32_e32 v2, 0x96, v5
-; SDAG-NEXT: v_lshrrev_b64 v[0:1], v2, v[6:7]
+; SDAG-NEXT: v_lshrrev_b64 v[0:1], v2, v[7:8]
; SDAG-NEXT: v_cmp_gt_u32_e64 s[4:5], 64, v2
; SDAG-NEXT: v_cndmask_b32_e64 v0, 0, v0, s[4:5]
; SDAG-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v2
-; SDAG-NEXT: v_cndmask_b32_e64 v3, v0, v6, s[4:5]
-; SDAG-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v3, v10, 0
+; SDAG-NEXT: v_cndmask_b32_e64 v3, v0, v7, s[4:5]
+; SDAG-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v3, v11, 0
; SDAG-NEXT: v_mov_b32_e32 v2, 0
-; SDAG-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v3, v8, v[1:2]
+; SDAG-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v3, v9, v[1:2]
; SDAG-NEXT: v_mov_b32_e32 v1, v5
-; SDAG-NEXT: v_mad_i64_i32 v[2:3], s[4:5], v9, v3, v[1:2]
+; SDAG-NEXT: v_mad_i64_i32 v[2:3], s[4:5], v10, v3, v[1:2]
; SDAG-NEXT: v_mov_b32_e32 v1, v4
; SDAG-NEXT: .LBB3_6: ; %Flow1
; SDAG-NEXT: s_or_b64 exec, exec, s[6:7]
@@ -1477,17 +1479,17 @@ define i128 @fptosi_bf16_to_i128(bfloat %x) {
; SDAG: ; %bb.0: ; %fp-to-i-entry
; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT: v_mov_b32_e32 v4, v0
+; SDAG-NEXT: v_mov_b32_e32 v0, 0
; SDAG-NEXT: v_bfe_u32 v5, v4, 7, 8
; SDAG-NEXT: s_movk_i32 s4, 0x7e
-; SDAG-NEXT: v_mov_b32_e32 v0, 0
; SDAG-NEXT: v_mov_b32_e32 v2, 0
-; SDAG-NEXT: v_mov_b32_e32 v6, 0
; SDAG-NEXT: v_mov_b32_e32 v1, 0
; SDAG-NEXT: v_mov_b32_e32 v3, 0
; SDAG-NEXT: v_cmp_lt_u32_e32 vcc, s4, v5
; SDAG-NEXT: s_and_saveexec_b64 s[8:9], vcc
; SDAG-NEXT: s_cbranch_execz .LBB6_10
; SDAG-NEXT: ; %bb.1: ; %fp-to-i-if-end
+; SDAG-NEXT: v_mov_b32_e32 v6, 0
; SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0xffffff01, v5
; SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v6, vcc
; SDAG-NEXT: v_addc_co_u32_e32 v2, vcc, -1, v6, vcc
@@ -1508,10 +1510,10 @@ define i128 @fptosi_bf16_to_i128(bfloat %x) {
; SDAG-NEXT: v_and_b32_sdwa v0, v4, s4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; SDAG-NEXT: s_mov_b64 s[4:5], 0x85
; SDAG-NEXT: v_cmp_lt_u64_e64 s[4:5], s[4:5], v[5:6]
-; SDAG-NEXT: v_mov_b32_e32 v7, 0
-; SDAG-NEXT: v_cndmask_b32_e64 v9, -1, 0, vcc
-; SDAG-NEXT: v_cndmask_b32_e64 v8, -1, 1, vcc
-; SDAG-NEXT: v_or_b32_e32 v6, 0x80, v0
+; SDAG-NEXT: v_cndmask_b32_e64 v10, -1, 0, vcc
+; SDAG-NEXT: v_cndmask_b32_e64 v9, -1, 1, vcc
+; SDAG-NEXT: v_or_b32_e32 v7, 0x80, v0
+; SDAG-NEXT: v_mov_b32_e32 v8, v6
; SDAG-NEXT: ; implicit-def: $vgpr0_vgpr1
; SDAG-NEXT: ; implicit-def: $vgpr2_vgpr3
; SDAG-NEXT: s_and_saveexec_b64 s[6:7], s[4:5]
@@ -1519,56 +1521,56 @@ define i128 @fptosi_bf16_to_i128(bfloat %x) {
; SDAG-NEXT: s_cbranch_execz .LBB6_4
; SDAG-NEXT: ; %bb.3: ; %fp-to-i-if-else
; SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
-; SDAG-NEXT: v_add_co_u32_e64 v10, s[4:5], -1, v0
+; SDAG-NEXT: v_add_co_u32_e64 v11, s[4:5], -1, v0
; SDAG-NEXT: v_sub_u32_e32 v0, 0xc6, v5
; SDAG-NEXT: v_add_u32_e32 v2, 0xffffff3a, v5
; SDAG-NEXT: v_add_u32_e32 v4, 0xffffff7a, v5
-; SDAG-NEXT: v_lshrrev_b64 v[0:1], v0, v[6:7]
-; SDAG-NEXT: v_lshlrev_b64 v[2:3], v2, v[6:7]
+; SDAG-NEXT: v_lshrrev_b64 v[0:1], v0, v[7:8]
+; SDAG-NEXT: v_lshlrev_b64 v[2:3], v2, v[7:8]
; SDAG-NEXT: v_cmp_gt_u32_e64 s[4:5], 64, v4
; SDAG-NEXT: v_cndmask_b32_e64 v1, v3, v1, s[4:5]
; SDAG-NEXT: v_cmp_ne_u32_e64 s[6:7], 0, v4
; SDAG-NEXT: v_cndmask_b32_e64 v3, 0, v1, s[6:7]
; SDAG-NEXT: v_cndmask_b32_e64 v2, v2, v0, s[4:5]
-; SDAG-NEXT: v_lshlrev_b64 v[0:1], v4, v[6:7]
+; SDAG-NEXT: v_lshlrev_b64 v[0:1], v4, v[7:8]
; SDAG-NEXT: v_cndmask_b32_e64 v2, 0, v2, s[6:7]
-; SDAG-NEXT: v_cndmask_b32_e64 v12, 0, v0, s[4:5]
-; SDAG-NEXT: v_cndmask_b32_e64 v11, 0, v1, s[4:5]
-; SDAG-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v12, v8, 0
-; SDAG-NEXT: v_mul_lo_u32 v13, v9, v2
-; SDAG-NEXT: v_mul_lo_u32 v14, v8, v3
-; SDAG-NEXT: v_mov_b32_e32 v6, v1
-; SDAG-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v11, v8, v[6:7]
-; SDAG-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v8, v2, 0
-; SDAG-NEXT: v_mov_b32_e32 v6, v4
-; SDAG-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v12, v9, v[6:7]
-; SDAG-NEXT: v_add3_u32 v3, v3, v14, v13
-; SDAG-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v10, v12, v[2:3]
-; SDAG-NEXT: v_add_co_u32_e64 v3, s[4:5], v5, v7
-; SDAG-NEXT: v_addc_co_u32_e64 v4, s[4:5], 0, 0, s[4:5]
-; SDAG-NEXT: v_mul_lo_u32 v8, v10, v11
-; SDAG-NEXT: v_mul_lo_u32 v10, v10, v12
-; SDAG-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v11, v9, v[3:4]
-; SDAG-NEXT: v_add3_u32 v5, v10, v2, v8
-; SDAG-NEXT: v_add_co_u32_e64 v2, s[4:5], v3, v1
-; SDAG-NEXT: v_addc_co_u32_e64 v3, s[4:5], v4, v5, s[4:5]
-; SDAG-NEXT: v_mov_b32_e32 v1, v6
+; SDAG-NEXT: v_cndmask_b32_e64 v13, 0, v0, s[4:5]
+; SDAG-NEXT: v_cndmask_b32_e64 v12, 0, v1, s[4:5]
+; SDAG-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v13, v9, 0
+; SDAG-NEXT: v_mul_lo_u32 v4, v10, v2
+; SDAG-NEXT: v_mul_lo_u32 v14, v9, v3
+; SDAG-NEXT: v_mov_b32_e32 v5, v1
+; SDAG-NEXT: v_mad_u64_u32 v[7:8], s[4:5], v12, v9, v[5:6]
+; SDAG-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v9, v2, 0
+; SDAG-NEXT: v_mov_b32_e32 v5, v7
+; SDAG-NEXT: v_mul_lo_u32 v7, v11, v13
+; SDAG-NEXT: v_add3_u32 v3, v3, v14, v4
+; SDAG-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v13, v10, v[5:6]
+; SDAG-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v11, v13, v[2:3]
+; SDAG-NEXT: v_add_co_u32_e64 v5, s[4:5], v8, v5
+; SDAG-NEXT: v_addc_co_u32_e64 v6, s[4:5], 0, 0, s[4:5]
+; SDAG-NEXT: v_mul_lo_u32 v3, v11, v12
+; SDAG-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v12, v10, v[5:6]
+; SDAG-NEXT: ; implicit-def: $vgpr9
+; SDAG-NEXT: v_add3_u32 v3, v7, v2, v3
+; SDAG-NEXT: v_add_co_u32_e64 v2, s[4:5], v5, v1
+; SDAG-NEXT: v_addc_co_u32_e64 v3, s[4:5], v6, v3, s[4:5]
+; SDAG-NEXT: v_mov_b32_e32 v1, v4
; SDAG-NEXT: ; implicit-def: $vgpr5_vgpr6
-; SDAG-NEXT: ; implicit-def: $vgpr6_vgpr7
-; SDAG-NEXT: ; implicit-def: $vgpr8
+; SDAG-NEXT: ; implicit-def: $vgpr7_vgpr8
; SDAG-NEXT: .LBB6_4: ; %Flow
; SDAG-NEXT: s_andn2_saveexec_b64 s[6:7], s[12:13]
; SDAG-NEXT: s_cbranch_execz .LBB6_6
; SDAG-NEXT: ; %bb.5: ; %fp-to-i-if-then12
; SDAG-NEXT: v_sub_u32_e32 v2, 0x86, v5
-; SDAG-NEXT: v_lshrrev_b64 v[0:1], v2, v[6:7]
+; SDAG-NEXT: v_lshrrev_b64 v[0:1], v2, v[7:8]
; SDAG-NEXT: v_cmp_gt_u32_e64 s[4:5], 64, v2
; SDAG-NEXT: v_cndmask_b32_e64 v0, 0, v0, s[4:5]
; SDAG-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v2
-; SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v6, s[4:5]
-; SDAG-NEXT: v_mul_hi_i32_i24_e32 v1, v0, v8
+; SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v7, s[4:5]
+; SDAG-NEXT: v_mul_hi_i32_i24_e32 v1, v0, v9
; SDAG-NEXT: v_ashrrev_i32_e32 v2, 31, v1
-; SDAG-NEXT: v_mul_i32_i24_e32 v0, v0, v8
+; SDAG-NEXT: v_mul_i32_i24_e32 v0, v0, v9
; SDAG-NEXT: v_mov_b32_e32 v3, v2
; SDAG-NEXT: .LBB6_6: ; %Flow1
; SDAG-NEXT: s_or_b64 exec, exec, s[6:7]
@@ -1824,17 +1826,17 @@ define i128 @fptoui_bf16_to_i128(bfloat %x) {
; SDAG: ; %bb.0: ; %fp-to-i-entry
; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT: v_mov_b32_e32 v4, v0
+; SDAG-NEXT: v_mov_b32_e32 v0, 0
; SDAG-NEXT: v_bfe_u32 v5, v4, 7, 8
; SDAG-NEXT: s_movk_i32 s4, 0x7e
-; SDAG-NEXT: v_mov_b32_e32 v0, 0
; SDAG-NEXT: v_mov_b32_e32 v2, 0
-; SDAG-NEXT: v_mov_b32_e32 v6, 0
; SDAG-NEXT: v_mov_b32_e32 v1, 0
; SDAG-NEXT: v_mov_b32_e32 v3, 0
; SDAG-NEXT: v_cmp_lt_u32_e32 vcc, s4, v5
; SDAG-NEXT: s_and_saveexec_b64 s[8:9], vcc
; SDAG-NEXT: s_cbranch_execz .LBB7_10
; SDAG-NEXT: ; %bb.1: ; %fp-to-i-if-end
+; SDAG-NEXT: v_mov_b32_e32 v6, 0
; SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0xffffff01, v5
; SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v6, vcc
; SDAG-NEXT: v_addc_co_u32_e32 v2, vcc, -1, v6, vcc
@@ -1855,10 +1857,10 @@ define i128 @fptoui_bf16_to_i128(bfloat %x) {
; SDAG-NEXT: v_and_b32_sdwa v0, v4, s4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; SDAG-NEXT: s_mov_b64 s[4:5], 0x85
; SDAG-NEXT: v_cmp_lt_u64_e64 s[4:5], s[4:5], v[5:6]
-; SDAG-NEXT: v_mov_b32_e32 v7, 0
-; SDAG-NEXT: v_cndmask_b32_e64 v9, -1, 0, vcc
-; SDAG-NEXT: v_cndmask_b32_e64 v8, -1, 1, vcc
-; SDAG-NEXT: v_or_b32_e32 v6, 0x80, v0
+; SDAG-NEXT: v_cndmask_b32_e64 v10, -1, 0, vcc
+; SDAG-NEXT: v_cndmask_b32_e64 v9, -1, 1, vcc
+; SDAG-NEXT: v_or_b32_e32 v7, 0x80, v0
+; SDAG-NEXT: v_mov_b32_e32 v8, v6
; SDAG-NEXT: ; implicit-def: $vgpr0_vgpr1
; SDAG-NEXT: ; implicit-def: $vgpr2_vgpr3
; SDAG-NEXT: s_and_saveexec_b64 s[6:7], s[4:5]
@@ -1866,56 +1868,56 @@ define i128 @fptoui_bf16_to_i128(bfloat %x) {
; SDAG-NEXT: s_cbranch_execz .LBB7_4
; SDAG-NEXT: ; %bb.3: ; %fp-to-i-if-else
; SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
-; SDAG-NEXT: v_add_co_u32_e64 v10, s[4:5], -1, v0
+; SDAG-NEXT: v_add_co_u32_e64 v11, s[4:5], -1, v0
; SDAG-NEXT: v_sub_u32_e32 v0, 0xc6, v5
; SDAG-NEXT: v_add_u32_e32 v2, 0xffffff3a, v5
; SDAG-NEXT: v_add_u32_e32 v4, 0xffffff7a, v5
-; SDAG-NEXT: v_lshrrev_b64 v[0:1], v0, v[6:7]
-; SDAG-NEXT: v_lshlrev_b64 v[2:3], v2, v[6:7]
+; SDAG-NEXT: v_lshrrev_b64 v[0:1], v0, v[7:8]
+; SDAG-NEXT: v_lshlrev_b64 v[2:3], v2, v[7:8]
; SDAG-NEXT: v_cmp_gt_u32_e64 s[4:5], 64, v4
; SDAG-NEXT: v_cndmask_b32_e64 v1, v3, v1, s[4:5]
; SDAG-NEXT: v_cmp_ne_u32_e64 s[6:7], 0, v4
; SDAG-NEXT: v_cndmask_b32_e64 v3, 0, v1, s[6:7]
; SDAG-NEXT: v_cndmask_b32_e64 v2, v2, v0, s[4:5]
-; SDAG-NEXT: v_lshlrev_b64 v[0:1], v4, v[6:7]
+; SDAG-NEXT: v_lshlrev_b64 v[0:1], v4, v[7:8]
; SDAG-NEXT: v_cndmask_b32_e64 v2, 0, v2, s[6:7]
-; SDAG-NEXT: v_cndmask_b32_e64 v12, 0, v0, s[4:5]
-; SDAG-NEXT: v_cndmask_b32_e64 v11, 0, v1, s[4:5]
-; SDAG-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v12, v8, 0
-; SDAG-NEXT: v_mul_lo_u32 v13, v9, v2
-; SDAG-NEXT: v_mul_lo_u32 v14, v8, v3
-; SDAG-NEXT: v_mov_b32_e32 v6, v1
-; SDAG-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v11, v8, v[6:7]
-; SDAG-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v8, v2, 0
-; SDAG-NEXT: v_mov_b32_e32 v6, v4
-; SDAG-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v12, v9, v[6:7]
-; SDAG-NEXT: v_add3_u32 v3, v3, v14, v13
-; SDAG-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v10, v12, v[2:3]
-; SDAG-NEXT: v_add_co_u32_e64 v3, s[4:5], v5, v7
-; SDAG-NEXT: v_addc_co_u32_e64 v4, s[4:5], 0, 0, s[4:5]
-; SDAG-NEXT: v_mul_lo_u32 v8, v10, v11
-; SDAG-NEXT: v_mul_lo_u32 v10, v10, v12
-; SDAG-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v11, v9, v[3:4]
-; SDAG-NEXT: v_add3_u32 v5, v10, v2, v8
-; SDAG-NEXT: v_add_co_u32_e64 v2, s[4:5], v3, v1
-; SDAG-NEXT: v_addc_co_u32_e64 v3, s[4:5], v4, v5, s[4:5]
-; SDAG-NEXT: v_mov_b32_e32 v1, v6
+; SDAG-NEXT: v_cndmask_b32_e64 v13, 0, v0, s[4:5]
+; SDAG-NEXT: v_cndmask_b32_e64 v12, 0, v1, s[4:5]
+; SDAG-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v13, v9, 0
+; SDAG-NEXT: v_mul_lo_u32 v4, v10, v2
+; SDAG-NEXT: v_mul_lo_u32 v14, v9, v3
+; SDAG-NEXT: v_mov_b32_e32 v5, v1
+; SDAG-NEXT: v_mad_u64_u32 v[7:8], s[4:5], v12, v9, v[5:6]
+; SDAG-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v9, v2, 0
+; SDAG-NEXT: v_mov_b32_e32 v5, v7
+; SDAG-NEXT: v_mul_lo_u32 v7, v11, v13
+; SDAG-NEXT: v_add3_u32 v3, v3, v14, v4
+; SDAG-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v13, v10, v[5:6]
+; SDAG-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v11, v13, v[2:3]
+; SDAG-NEXT: v_add_co_u32_e64 v5, s[4:5], v8, v5
+; SDAG-NEXT: v_addc_co_u32_e64 v6, s[4:5], 0, 0, s[4:5]
+; SDAG-NEXT: v_mul_lo_u32 v3, v11, v12
+; SDAG-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v12, v10, v[5:6]
+; SDAG-NEXT: ; implicit-def: $vgpr9
+; SDAG-NEXT: v_add3_u32 v3, v7, v2, v3
+; SDAG-NEXT: v_add_co_u32_e64 v2, s[4:5], v5, v1
+; SDAG-NEXT: v_addc_co_u32_e64 v3, s[4:5], v6, v3, s[4:5]
+; SDAG-NEXT: v_mov_b32_e32 v1, v4
; SDAG-NEXT: ; implicit-def: $vgpr5_vgpr6
-; SDAG-NEXT: ; implicit-def: $vgpr6_vgpr7
-; SDAG-NEXT: ; implicit-def: $vgpr8
+; SDAG-NEXT: ; implicit-def: $vgpr7_vgpr8
; SDAG-NEXT: .LBB7_4: ; %Flow
; SDAG-NEXT: s_andn2_saveexec_b64 s[6:7], s[12:13]
; SDAG-NEXT: s_cbranch_execz .LBB7_6
; SDAG-NEXT: ; %bb.5: ; %fp-to-i-if-then12
; SDAG-NEXT: v_sub_u32_e32 v2, 0x86, v5
-; SDAG-NEXT: v_lshrrev_b64 v[0:1], v2, v[6:7]
+; SDAG-NEXT: v_lshrrev_b64 v[0:1], v2, v[7:8]
; SDAG-NEXT: v_cmp_gt_u32_e64 s[4:5], 64, v2
; SDAG-NEXT: v_cndmask_b32_e64 v0, 0, v0, s[4:5]
; SDAG-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v2
-; SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v6, s[4:5]
-; SDAG-NEXT: v_mul_hi_i32_i24_e32 v1, v0, v8
+; SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v7, s[4:5]
+; SDAG-NEXT: v_mul_hi_i32_i24_e32 v1, v0, v9
; SDAG-NEXT: v_ashrrev_i32_e32 v2, 31, v1
-; SDAG-NEXT: v_mul_i32_i24_e32 v0, v0, v8
+; SDAG-NEXT: v_mul_i32_i24_e32 v0, v0, v9
; SDAG-NEXT: v_mov_b32_e32 v3, v2
; SDAG-NEXT: .LBB7_6: ; %Flow1
; SDAG-NEXT: s_or_b64 exec, exec, s[6:7]
diff --git a/llvm/test/CodeGen/AMDGPU/fsqrt.f64.ll b/llvm/test/CodeGen/AMDGPU/fsqrt.f64.ll
index cffa287dd91f5..18567ef647d8e 100644
--- a/llvm/test/CodeGen/AMDGPU/fsqrt.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/fsqrt.f64.ll
@@ -2005,11 +2005,9 @@ define <2 x double> @v_sqrt_v2f64_afn(<2 x double> %x) {
; GFX6-GISEL-LABEL: v_sqrt_v2f64_afn:
; GFX6-GISEL: ; %bb.0:
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX6-GISEL-NEXT: s_mov_b32 s4, 0
-; GFX6-GISEL-NEXT: s_brev_b32 s5, 8
-; GFX6-GISEL-NEXT: v_mov_b32_e32 v4, s4
-; GFX6-GISEL-NEXT: v_mov_b32_e32 v5, s5
-; GFX6-GISEL-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
+; GFX6-GISEL-NEXT: v_mov_b32_e32 v4, 0
+; GFX6-GISEL-NEXT: v_bfrev_b32_e32 v5, 8
+; GFX6-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[4:5]
; GFX6-GISEL-NEXT: v_cmp_lt_f64_e64 s[4:5], v[2:3], v[4:5]
; GFX6-GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
; GFX6-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, s[4:5]
@@ -2054,11 +2052,9 @@ define <2 x double> @v_sqrt_v2f64_afn(<2 x double> %x) {
; GFX8-GISEL-LABEL: v_sqrt_v2f64_afn:
; GFX8-GISEL: ; %bb.0:
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-GISEL-NEXT: s_mov_b32 s4, 0
-; GFX8-GISEL-NEXT: s_brev_b32 s5, 8
-; GFX8-GISEL-NEXT: v_mov_b32_e32 v4, s4
-; GFX8-GISEL-NEXT: v_mov_b32_e32 v5, s5
-; GFX8-GISEL-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
+; GFX8-GISEL-NEXT: v_mov_b32_e32 v4, 0
+; GFX8-GISEL-NEXT: v_bfrev_b32_e32 v5, 8
+; GFX8-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[4:5]
; GFX8-GISEL-NEXT: v_cmp_lt_f64_e64 s[4:5], v[2:3], v[4:5]
; GFX8-GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
; GFX8-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, s[4:5]
@@ -2548,11 +2544,9 @@ define <2 x double> @v_sqrt_v2f64_afn_nnan_ninf(<2 x double> %x) {
; GFX6-GISEL-LABEL: v_sqrt_v2f64_afn_nnan_ninf:
; GFX6-GISEL: ; %bb.0:
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX6-GISEL-NEXT: s_mov_b32 s4, 0
-; GFX6-GISEL-NEXT: s_brev_b32 s5, 8
-; GFX6-GISEL-NEXT: v_mov_b32_e32 v4, s4
-; GFX6-GISEL-NEXT: v_mov_b32_e32 v5, s5
-; GFX6-GISEL-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
+; GFX6-GISEL-NEXT: v_mov_b32_e32 v4, 0
+; GFX6-GISEL-NEXT: v_bfrev_b32_e32 v5, 8
+; GFX6-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[4:5]
; GFX6-GISEL-NEXT: v_cmp_lt_f64_e64 s[4:5], v[2:3], v[4:5]
; GFX6-GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
; GFX6-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, s[4:5]
@@ -2597,11 +2591,9 @@ define <2 x double> @v_sqrt_v2f64_afn_nnan_ninf(<2 x double> %x) {
; GFX8-GISEL-LABEL: v_sqrt_v2f64_afn_nnan_ninf:
; GFX8-GISEL: ; %bb.0:
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-GISEL-NEXT: s_mov_b32 s4, 0
-; GFX8-GISEL-NEXT: s_brev_b32 s5, 8
-; GFX8-GISEL-NEXT: v_mov_b32_e32 v4, s4
-; GFX8-GISEL-NEXT: v_mov_b32_e32 v5, s5
-; GFX8-GISEL-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
+; GFX8-GISEL-NEXT: v_mov_b32_e32 v4, 0
+; GFX8-GISEL-NEXT: v_bfrev_b32_e32 v5, 8
+; GFX8-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[4:5]
; GFX8-GISEL-NEXT: v_cmp_lt_f64_e64 s[4:5], v[2:3], v[4:5]
; GFX8-GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
; GFX8-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, s[4:5]
@@ -3206,11 +3198,9 @@ define <2 x double> @v_sqrt_v2f64(<2 x double> %x) {
; GFX6-GISEL-LABEL: v_sqrt_v2f64:
; GFX6-GISEL: ; %bb.0:
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX6-GISEL-NEXT: s_mov_b32 s4, 0
-; GFX6-GISEL-NEXT: s_brev_b32 s5, 8
-; GFX6-GISEL-NEXT: v_mov_b32_e32 v4, s4
-; GFX6-GISEL-NEXT: v_mov_b32_e32 v5, s5
-; GFX6-GISEL-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
+; GFX6-GISEL-NEXT: v_mov_b32_e32 v4, 0
+; GFX6-GISEL-NEXT: v_bfrev_b32_e32 v5, 8
+; GFX6-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[4:5]
; GFX6-GISEL-NEXT: v_cmp_lt_f64_e64 s[4:5], v[2:3], v[4:5]
; GFX6-GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
; GFX6-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, s[4:5]
@@ -3255,11 +3245,9 @@ define <2 x double> @v_sqrt_v2f64(<2 x double> %x) {
; GFX8-GISEL-LABEL: v_sqrt_v2f64:
; GFX8-GISEL: ; %bb.0:
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-GISEL-NEXT: s_mov_b32 s4, 0
-; GFX8-GISEL-NEXT: s_brev_b32 s5, 8
-; GFX8-GISEL-NEXT: v_mov_b32_e32 v4, s4
-; GFX8-GISEL-NEXT: v_mov_b32_e32 v5, s5
-; GFX8-GISEL-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
+; GFX8-GISEL-NEXT: v_mov_b32_e32 v4, 0
+; GFX8-GISEL-NEXT: v_bfrev_b32_e32 v5, 8
+; GFX8-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[4:5]
; GFX8-GISEL-NEXT: v_cmp_lt_f64_e64 s[4:5], v[2:3], v[4:5]
; GFX8-GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
; GFX8-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, s[4:5]
@@ -3436,23 +3424,21 @@ define <3 x double> @v_sqrt_v3f64(<3 x double> %x) {
; GFX6-GISEL-LABEL: v_sqrt_v3f64:
; GFX6-GISEL: ; %bb.0:
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX6-GISEL-NEXT: s_mov_b32 s4, 0
-; GFX6-GISEL-NEXT: s_brev_b32 s5, 8
-; GFX6-GISEL-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
-; GFX6-GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
-; GFX6-GISEL-NEXT: v_lshlrev_b32_e32 v6, 8, v6
-; GFX6-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v6
-; GFX6-GISEL-NEXT: v_mov_b32_e32 v6, s4
-; GFX6-GISEL-NEXT: v_mov_b32_e32 v7, s5
+; GFX6-GISEL-NEXT: v_mov_b32_e32 v6, 0
+; GFX6-GISEL-NEXT: v_bfrev_b32_e32 v7, 8
+; GFX6-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[6:7]
; GFX6-GISEL-NEXT: v_cmp_lt_f64_e64 s[4:5], v[2:3], v[6:7]
; GFX6-GISEL-NEXT: v_cmp_lt_f64_e64 s[6:7], v[4:5], v[6:7]
+; GFX6-GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
+; GFX6-GISEL-NEXT: v_lshlrev_b32_e32 v8, 8, v8
+; GFX6-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v8
; GFX6-GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, s[4:5]
; GFX6-GISEL-NEXT: v_lshlrev_b32_e32 v8, 8, v8
; GFX6-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v8
-; GFX6-GISEL-NEXT: v_rsq_f64_e32 v[8:9], v[0:1]
; GFX6-GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, s[6:7]
; GFX6-GISEL-NEXT: v_lshlrev_b32_e32 v6, 8, v6
; GFX6-GISEL-NEXT: v_ldexp_f64 v[4:5], v[4:5], v6
+; GFX6-GISEL-NEXT: v_rsq_f64_e32 v[8:9], v[0:1]
; GFX6-GISEL-NEXT: v_rsq_f64_e32 v[10:11], v[2:3]
; GFX6-GISEL-NEXT: v_rsq_f64_e32 v[12:13], v[4:5]
; GFX6-GISEL-NEXT: v_mul_f64 v[6:7], v[8:9], 0.5
@@ -3504,23 +3490,21 @@ define <3 x double> @v_sqrt_v3f64(<3 x double> %x) {
; GFX8-GISEL-LABEL: v_sqrt_v3f64:
; GFX8-GISEL: ; %bb.0:
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-GISEL-NEXT: s_mov_b32 s4, 0
-; GFX8-GISEL-NEXT: s_brev_b32 s5, 8
-; GFX8-GISEL-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
-; GFX8-GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
-; GFX8-GISEL-NEXT: v_lshlrev_b32_e32 v6, 8, v6
-; GFX8-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v6
-; GFX8-GISEL-NEXT: v_mov_b32_e32 v6, s4
-; GFX8-GISEL-NEXT: v_mov_b32_e32 v7, s5
+; GFX8-GISEL-NEXT: v_mov_b32_e32 v6, 0
+; GFX8-GISEL-NEXT: v_bfrev_b32_e32 v7, 8
+; GFX8-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[6:7]
; GFX8-GISEL-NEXT: v_cmp_lt_f64_e64 s[4:5], v[2:3], v[6:7]
; GFX8-GISEL-NEXT: v_cmp_lt_f64_e64 s[6:7], v[4:5], v[6:7]
+; GFX8-GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
+; GFX8-GISEL-NEXT: v_lshlrev_b32_e32 v8, 8, v8
+; GFX8-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v8
; GFX8-GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, s[4:5]
; GFX8-GISEL-NEXT: v_lshlrev_b32_e32 v8, 8, v8
; GFX8-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v8
-; GFX8-GISEL-NEXT: v_rsq_f64_e32 v[8:9], v[0:1]
; GFX8-GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, s[6:7]
; GFX8-GISEL-NEXT: v_lshlrev_b32_e32 v6, 8, v6
; GFX8-GISEL-NEXT: v_ldexp_f64 v[4:5], v[4:5], v6
+; GFX8-GISEL-NEXT: v_rsq_f64_e32 v[8:9], v[0:1]
; GFX8-GISEL-NEXT: v_rsq_f64_e32 v[10:11], v[2:3]
; GFX8-GISEL-NEXT: v_rsq_f64_e32 v[12:13], v[4:5]
; GFX8-GISEL-NEXT: v_mul_f64 v[6:7], v[8:9], 0.5
diff --git a/llvm/test/CodeGen/AMDGPU/identical-subrange-spill-infloop.ll b/llvm/test/CodeGen/AMDGPU/identical-subrange-spill-infloop.ll
index 56ceba258f471..2c03113e8af47 100644
--- a/llvm/test/CodeGen/AMDGPU/identical-subrange-spill-infloop.ll
+++ b/llvm/test/CodeGen/AMDGPU/identical-subrange-spill-infloop.ll
@@ -41,15 +41,14 @@ define void @main(i1 %arg) #0 {
; CHECK-NEXT: v_writelane_b32 v5, s69, 19
; CHECK-NEXT: v_writelane_b32 v5, s70, 20
; CHECK-NEXT: s_mov_b32 s68, 0
-; CHECK-NEXT: v_mov_b32_e32 v1, 0
; CHECK-NEXT: v_writelane_b32 v5, s71, 21
-; CHECK-NEXT: v_mov_b32_e32 v2, s4
-; CHECK-NEXT: v_mov_b32_e32 v3, v1
+; CHECK-NEXT: v_mov_b32_e32 v1, s4
+; CHECK-NEXT: v_mov_b32_e32 v2, 0
; CHECK-NEXT: s_mov_b32 s69, s68
; CHECK-NEXT: s_mov_b32 s70, s68
; CHECK-NEXT: s_mov_b32 s71, s68
-; CHECK-NEXT: image_sample_lz v3, v[2:3], s[16:23], s[68:71] dmask:0x1
-; CHECK-NEXT: v_mov_b32_e32 v2, v1
+; CHECK-NEXT: image_sample_lz v3, v[1:2], s[16:23], s[68:71] dmask:0x1
+; CHECK-NEXT: v_mov_b32_e32 v1, v2
; CHECK-NEXT: ; implicit-def: $vgpr6 : SGPR spill to VGPR lane
; CHECK-NEXT: s_mov_b32 s6, 48
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
@@ -86,7 +85,7 @@ define void @main(i1 %arg) #0 {
; CHECK-NEXT: s_cbranch_execz .LBB0_3
; CHECK-NEXT: ; %bb.1: ; %bb48
; CHECK-NEXT: image_sample_lz v3, v[1:2], s[16:23], s[68:71] dmask:0x1
-; CHECK-NEXT: v_mov_b32_e32 v2, 0
+; CHECK-NEXT: v_mov_b32_e32 v1, v2
; CHECK-NEXT: s_and_b64 vcc, exec, -1
; CHECK-NEXT: .LBB0_2: ; %bb50
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
diff --git a/llvm/test/CodeGen/AMDGPU/iglp-no-clobber.ll b/llvm/test/CodeGen/AMDGPU/iglp-no-clobber.ll
index f582f984a3924..9f5bbf834fdff 100644
--- a/llvm/test/CodeGen/AMDGPU/iglp-no-clobber.ll
+++ b/llvm/test/CodeGen/AMDGPU/iglp-no-clobber.ll
@@ -9,9 +9,9 @@ define amdgpu_kernel void @func(ptr addrspace(1) %in, ptr addrspace(3) %out) {
; CHECK: ; %bb.0: ; %.lr.ph
; CHECK-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x0
; CHECK-NEXT: s_mov_b64 s[8:9], 0
-; CHECK-NEXT: s_mov_b64 s[10:11], 0
; CHECK-NEXT: s_mov_b32 s3, 32
; CHECK-NEXT: s_mov_b32 s2, 0
+; CHECK-NEXT: s_mov_b64 s[10:11], 0
; CHECK-NEXT: s_mov_b64 s[12:13], 0
; CHECK-NEXT: .LBB0_1: ; %loop
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.iglp.AFLCustomIRMutator.opt.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.iglp.AFLCustomIRMutator.opt.ll
index fcdad53553823..50bf632533378 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.iglp.AFLCustomIRMutator.opt.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.iglp.AFLCustomIRMutator.opt.ll
@@ -17,7 +17,7 @@ define amdgpu_kernel void @test_iglp_opt_rev_mfma_gemm(<1 x i64> %L1) {
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: ds_write_b128 v0, v[2:5]
; GCN-NEXT: v_mov_b32_e32 v2, 0
-; GCN-NEXT: v_mov_b32_e32 v3, v2
+; GCN-NEXT: v_mov_b32_e32 v3, 0
; GCN-NEXT: s_cmp_lg_u64 s[0:1], 0
; GCN-NEXT: ; iglp_opt mask(0x00000001)
; GCN-NEXT: ds_write_b128 v0, v[30:33] offset:112
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.ll
index 78be949baabac..c1508c1675fe0 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.ll
@@ -5058,10 +5058,10 @@ define amdgpu_kernel void @test_mfma_f32_4x4x1f32_imm(ptr addrspace(1) %arg) #0
; GFX90A-NEXT: v_mov_b32_e32 v1, 1.0
; GFX90A-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; GFX90A-NEXT: v_accvgpr_write_b32 a0, 1.0
-; GFX90A-NEXT: v_mov_b32_e32 v2, 2.0
; GFX90A-NEXT: v_accvgpr_write_b32 a1, 2.0
; GFX90A-NEXT: v_accvgpr_mov_b32 a2, a0
; GFX90A-NEXT: v_accvgpr_mov_b32 a3, a0
+; GFX90A-NEXT: v_mov_b32_e32 v2, 2.0
; GFX90A-NEXT: v_mov_b32_e32 v0, 0
; GFX90A-NEXT: s_nop 0
; GFX90A-NEXT: v_mfma_f32_4x4x1f32 a[0:3], v1, v2, a[0:3]
@@ -5075,10 +5075,10 @@ define amdgpu_kernel void @test_mfma_f32_4x4x1f32_imm(ptr addrspace(1) %arg) #0
; GFX942-NEXT: v_mov_b32_e32 v1, 1.0
; GFX942-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; GFX942-NEXT: v_accvgpr_write_b32 a0, 1.0
-; GFX942-NEXT: v_mov_b32_e32 v2, 2.0
; GFX942-NEXT: v_accvgpr_write_b32 a1, 2.0
; GFX942-NEXT: v_accvgpr_mov_b32 a2, a0
; GFX942-NEXT: v_accvgpr_mov_b32 a3, a0
+; GFX942-NEXT: v_mov_b32_e32 v2, 2.0
; GFX942-NEXT: v_mov_b32_e32 v0, 0
; GFX942-NEXT: s_nop 0
; GFX942-NEXT: v_mfma_f32_4x4x1_16b_f32 a[0:3], v1, v2, a[0:3]
@@ -5222,7 +5222,6 @@ define amdgpu_kernel void @test_mfma_f32_16x16x1f32_imm(ptr addrspace(1) %arg) #
; GFX90A: ; %bb.0: ; %bb
; GFX90A-NEXT: v_mov_b32_e32 v1, 1.0
; GFX90A-NEXT: v_accvgpr_write_b32 a0, 1.0
-; GFX90A-NEXT: v_mov_b32_e32 v2, 2.0
; GFX90A-NEXT: v_accvgpr_write_b32 a15, 2.0
; GFX90A-NEXT: v_accvgpr_mov_b32 a1, a0
; GFX90A-NEXT: v_accvgpr_mov_b32 a2, a0
@@ -5238,6 +5237,7 @@ define amdgpu_kernel void @test_mfma_f32_16x16x1f32_imm(ptr addrspace(1) %arg) #
; GFX90A-NEXT: v_accvgpr_mov_b32 a12, a0
; GFX90A-NEXT: v_accvgpr_mov_b32 a13, a0
; GFX90A-NEXT: v_accvgpr_mov_b32 a14, a0
+; GFX90A-NEXT: v_mov_b32_e32 v2, 2.0
; GFX90A-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; GFX90A-NEXT: v_mov_b32_e32 v0, 0
; GFX90A-NEXT: v_mfma_f32_16x16x1f32 a[0:15], v1, v2, a[0:15]
@@ -5254,7 +5254,6 @@ define amdgpu_kernel void @test_mfma_f32_16x16x1f32_imm(ptr addrspace(1) %arg) #
; GFX942: ; %bb.0: ; %bb
; GFX942-NEXT: v_mov_b32_e32 v1, 1.0
; GFX942-NEXT: v_accvgpr_write_b32 a0, 1.0
-; GFX942-NEXT: v_mov_b32_e32 v2, 2.0
; GFX942-NEXT: v_accvgpr_write_b32 a15, 2.0
; GFX942-NEXT: v_accvgpr_mov_b32 a1, a0
; GFX942-NEXT: v_accvgpr_mov_b32 a2, a0
@@ -5270,6 +5269,7 @@ define amdgpu_kernel void @test_mfma_f32_16x16x1f32_imm(ptr addrspace(1) %arg) #
; GFX942-NEXT: v_accvgpr_mov_b32 a12, a0
; GFX942-NEXT: v_accvgpr_mov_b32 a13, a0
; GFX942-NEXT: v_accvgpr_mov_b32 a14, a0
+; GFX942-NEXT: v_mov_b32_e32 v2, 2.0
; GFX942-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; GFX942-NEXT: v_mov_b32_e32 v0, 0
; GFX942-NEXT: v_mfma_f32_16x16x1_4b_f32 a[0:15], v1, v2, a[0:15]
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.frexp.ll b/llvm/test/CodeGen/AMDGPU/llvm.frexp.ll
index 3a4bf1c81ed58..eed67d9e020d7 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.frexp.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.frexp.ll
@@ -1939,40 +1939,40 @@ define { <2 x double>, <2 x i32> } @test_frexp_v2f64_v2i32(<2 x double> %a) {
; GFX6-GISEL-LABEL: test_frexp_v2f64_v2i32:
; GFX6-GISEL: ; %bb.0:
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX6-GISEL-NEXT: s_mov_b32 s4, 0
-; GFX6-GISEL-NEXT: s_mov_b32 s5, 0x7ff00000
+; GFX6-GISEL-NEXT: v_mov_b32_e32 v7, 0
+; GFX6-GISEL-NEXT: v_mov_b32_e32 v8, 0x7ff00000
; GFX6-GISEL-NEXT: v_frexp_mant_f64_e32 v[5:6], v[0:1]
; GFX6-GISEL-NEXT: v_frexp_exp_i32_f64_e32 v4, v[0:1]
-; GFX6-GISEL-NEXT: v_cmp_lt_f64_e64 vcc, |v[0:1]|, s[4:5]
+; GFX6-GISEL-NEXT: v_cmp_lt_f64_e64 vcc, |v[0:1]|, v[7:8]
+; GFX6-GISEL-NEXT: v_frexp_mant_f64_e32 v[9:10], v[2:3]
; GFX6-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
; GFX6-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc
; GFX6-GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v6, vcc
-; GFX6-GISEL-NEXT: v_frexp_mant_f64_e32 v[6:7], v[2:3]
; GFX6-GISEL-NEXT: v_frexp_exp_i32_f64_e32 v5, v[2:3]
-; GFX6-GISEL-NEXT: v_cmp_lt_f64_e64 vcc, |v[2:3]|, s[4:5]
+; GFX6-GISEL-NEXT: v_cmp_lt_f64_e64 vcc, |v[2:3]|, v[7:8]
; GFX6-GISEL-NEXT: v_cndmask_b32_e32 v5, 0, v5, vcc
-; GFX6-GISEL-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc
-; GFX6-GISEL-NEXT: v_cndmask_b32_e32 v3, v3, v7, vcc
+; GFX6-GISEL-NEXT: v_cndmask_b32_e32 v2, v2, v9, vcc
+; GFX6-GISEL-NEXT: v_cndmask_b32_e32 v3, v3, v10, vcc
; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
%result = call { <2 x double>, <2 x i32> } @llvm.frexp.v2f64.v2i32(<2 x double> %a)
ret { <2 x double>, <2 x i32> } %result
}
define <2 x double> @test_frexp_v2f64_v2i32_only_use_fract(<2 x double> %a) {
-; GFX6-LABEL: test_frexp_v2f64_v2i32_only_use_fract:
-; GFX6: ; %bb.0:
-; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX6-NEXT: s_mov_b32 s4, 0
-; GFX6-NEXT: s_mov_b32 s5, 0x7ff00000
-; GFX6-NEXT: v_frexp_mant_f64_e32 v[4:5], v[0:1]
-; GFX6-NEXT: v_cmp_lt_f64_e64 vcc, |v[0:1]|, s[4:5]
-; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
-; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc
-; GFX6-NEXT: v_frexp_mant_f64_e32 v[4:5], v[2:3]
-; GFX6-NEXT: v_cmp_lt_f64_e64 vcc, |v[2:3]|, s[4:5]
-; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
-; GFX6-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc
-; GFX6-NEXT: s_setpc_b64 s[30:31]
+; GFX6-SDAG-LABEL: test_frexp_v2f64_v2i32_only_use_fract:
+; GFX6-SDAG: ; %bb.0:
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-SDAG-NEXT: s_mov_b32 s4, 0
+; GFX6-SDAG-NEXT: s_mov_b32 s5, 0x7ff00000
+; GFX6-SDAG-NEXT: v_frexp_mant_f64_e32 v[4:5], v[0:1]
+; GFX6-SDAG-NEXT: v_cmp_lt_f64_e64 vcc, |v[0:1]|, s[4:5]
+; GFX6-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
+; GFX6-SDAG-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc
+; GFX6-SDAG-NEXT: v_frexp_mant_f64_e32 v[4:5], v[2:3]
+; GFX6-SDAG-NEXT: v_cmp_lt_f64_e64 vcc, |v[2:3]|, s[4:5]
+; GFX6-SDAG-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
+; GFX6-SDAG-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc
+; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: test_frexp_v2f64_v2i32_only_use_fract:
; GFX8: ; %bb.0:
@@ -2005,24 +2005,39 @@ define <2 x double> @test_frexp_v2f64_v2i32_only_use_fract(<2 x double> %a) {
; GFX12-NEXT: v_frexp_mant_f64_e32 v[0:1], v[0:1]
; GFX12-NEXT: v_frexp_mant_f64_e32 v[2:3], v[2:3]
; GFX12-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX6-GISEL-LABEL: test_frexp_v2f64_v2i32_only_use_fract:
+; GFX6-GISEL: ; %bb.0:
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-GISEL-NEXT: v_mov_b32_e32 v4, 0
+; GFX6-GISEL-NEXT: v_mov_b32_e32 v5, 0x7ff00000
+; GFX6-GISEL-NEXT: v_frexp_mant_f64_e32 v[6:7], v[0:1]
+; GFX6-GISEL-NEXT: v_cmp_lt_f64_e64 vcc, |v[0:1]|, v[4:5]
+; GFX6-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc
+; GFX6-GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc
+; GFX6-GISEL-NEXT: v_frexp_mant_f64_e32 v[6:7], v[2:3]
+; GFX6-GISEL-NEXT: v_cmp_lt_f64_e64 vcc, |v[2:3]|, v[4:5]
+; GFX6-GISEL-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc
+; GFX6-GISEL-NEXT: v_cndmask_b32_e32 v3, v3, v7, vcc
+; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
%result = call { <2 x double>, <2 x i32> } @llvm.frexp.v2f64.v2i32(<2 x double> %a)
%result.0 = extractvalue { <2 x double>, <2 x i32> } %result, 0
ret <2 x double> %result.0
}
define <2 x i32> @test_frexp_v2f64_v2i32_only_use_exp(<2 x double> %a) {
-; GFX6-LABEL: test_frexp_v2f64_v2i32_only_use_exp:
-; GFX6: ; %bb.0:
-; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX6-NEXT: s_mov_b32 s4, 0
-; GFX6-NEXT: s_mov_b32 s5, 0x7ff00000
-; GFX6-NEXT: v_frexp_exp_i32_f64_e32 v4, v[0:1]
-; GFX6-NEXT: v_cmp_lt_f64_e64 vcc, |v[0:1]|, s[4:5]
-; GFX6-NEXT: v_frexp_exp_i32_f64_e32 v1, v[2:3]
-; GFX6-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc
-; GFX6-NEXT: v_cmp_lt_f64_e64 vcc, |v[2:3]|, s[4:5]
-; GFX6-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
-; GFX6-NEXT: s_setpc_b64 s[30:31]
+; GFX6-SDAG-LABEL: test_frexp_v2f64_v2i32_only_use_exp:
+; GFX6-SDAG: ; %bb.0:
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-SDAG-NEXT: s_mov_b32 s4, 0
+; GFX6-SDAG-NEXT: s_mov_b32 s5, 0x7ff00000
+; GFX6-SDAG-NEXT: v_frexp_exp_i32_f64_e32 v4, v[0:1]
+; GFX6-SDAG-NEXT: v_cmp_lt_f64_e64 vcc, |v[0:1]|, s[4:5]
+; GFX6-SDAG-NEXT: v_frexp_exp_i32_f64_e32 v1, v[2:3]
+; GFX6-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc
+; GFX6-SDAG-NEXT: v_cmp_lt_f64_e64 vcc, |v[2:3]|, s[4:5]
+; GFX6-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: test_frexp_v2f64_v2i32_only_use_exp:
; GFX8: ; %bb.0:
@@ -2055,6 +2070,19 @@ define <2 x i32> @test_frexp_v2f64_v2i32_only_use_exp(<2 x double> %a) {
; GFX12-NEXT: v_frexp_exp_i32_f64_e32 v0, v[0:1]
; GFX12-NEXT: v_frexp_exp_i32_f64_e32 v1, v[2:3]
; GFX12-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX6-GISEL-LABEL: test_frexp_v2f64_v2i32_only_use_exp:
+; GFX6-GISEL: ; %bb.0:
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-GISEL-NEXT: v_mov_b32_e32 v4, 0
+; GFX6-GISEL-NEXT: v_mov_b32_e32 v5, 0x7ff00000
+; GFX6-GISEL-NEXT: v_frexp_exp_i32_f64_e32 v6, v[0:1]
+; GFX6-GISEL-NEXT: v_cmp_lt_f64_e64 vcc, |v[0:1]|, v[4:5]
+; GFX6-GISEL-NEXT: v_frexp_exp_i32_f64_e32 v1, v[2:3]
+; GFX6-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v6, vcc
+; GFX6-GISEL-NEXT: v_cmp_lt_f64_e64 vcc, |v[2:3]|, v[4:5]
+; GFX6-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
%result = call { <2 x double>, <2 x i32> } @llvm.frexp.v2f64.v2i32(<2 x double> %a)
%result.1 = extractvalue { <2 x double>, <2 x i32> } %result, 1
ret <2 x i32> %result.1
@@ -2079,3 +2107,4 @@ attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memo
; GFX11-SDAG: {{.*}}
; GFX12-GISEL: {{.*}}
; GFX12-SDAG: {{.*}}
+; GFX6: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/mad-combine.ll b/llvm/test/CodeGen/AMDGPU/mad-combine.ll
index 41eeeaf51df9f..320d3c77a6d9f 100644
--- a/llvm/test/CodeGen/AMDGPU/mad-combine.ll
+++ b/llvm/test/CodeGen/AMDGPU/mad-combine.ll
@@ -1049,9 +1049,9 @@ define amdgpu_kernel void @aggressive_combine_to_mad_fsub_0_f32(ptr addrspace(1)
; SI-STD: ; %bb.0:
; SI-STD-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0xb
; SI-STD-NEXT: s_load_dword s6, s[4:5], 0xd
-; SI-STD-NEXT: s_mov_b32 s2, 0
; SI-STD-NEXT: v_mov_b32_e32 v1, 0
; SI-STD-NEXT: s_mov_b32 s3, 0xf000
+; SI-STD-NEXT: s_mov_b32 s2, 0
; SI-STD-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; SI-STD-NEXT: s_waitcnt lgkmcnt(0)
; SI-STD-NEXT: buffer_load_dword v2, v[0:1], s[0:3], 0 addr64 glc
@@ -1097,9 +1097,9 @@ define amdgpu_kernel void @aggressive_combine_to_mad_fsub_0_f32(ptr addrspace(1)
; SI-DENORM-FASTFMAF: ; %bb.0:
; SI-DENORM-FASTFMAF-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0xb
; SI-DENORM-FASTFMAF-NEXT: s_load_dword s6, s[4:5], 0xd
-; SI-DENORM-FASTFMAF-NEXT: s_mov_b32 s2, 0
; SI-DENORM-FASTFMAF-NEXT: v_mov_b32_e32 v1, 0
; SI-DENORM-FASTFMAF-NEXT: s_mov_b32 s3, 0xf000
+; SI-DENORM-FASTFMAF-NEXT: s_mov_b32 s2, 0
; SI-DENORM-FASTFMAF-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; SI-DENORM-FASTFMAF-NEXT: s_waitcnt lgkmcnt(0)
; SI-DENORM-FASTFMAF-NEXT: buffer_load_dword v2, v[0:1], s[0:3], 0 addr64 glc
@@ -1145,9 +1145,9 @@ define amdgpu_kernel void @aggressive_combine_to_mad_fsub_0_f32(ptr addrspace(1)
; SI-DENORM-SLOWFMAF: ; %bb.0:
; SI-DENORM-SLOWFMAF-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0xb
; SI-DENORM-SLOWFMAF-NEXT: s_load_dword s6, s[4:5], 0xd
-; SI-DENORM-SLOWFMAF-NEXT: s_mov_b32 s2, 0
; SI-DENORM-SLOWFMAF-NEXT: v_mov_b32_e32 v1, 0
; SI-DENORM-SLOWFMAF-NEXT: s_mov_b32 s3, 0xf000
+; SI-DENORM-SLOWFMAF-NEXT: s_mov_b32 s2, 0
; SI-DENORM-SLOWFMAF-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; SI-DENORM-SLOWFMAF-NEXT: s_waitcnt lgkmcnt(0)
; SI-DENORM-SLOWFMAF-NEXT: buffer_load_dword v3, v[0:1], s[0:3], 0 addr64 glc
@@ -1277,9 +1277,9 @@ define amdgpu_kernel void @aggressive_combine_to_mad_fsub_2_f32(ptr addrspace(1)
; SI-STD: ; %bb.0:
; SI-STD-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0xb
; SI-STD-NEXT: s_load_dword s6, s[4:5], 0xd
-; SI-STD-NEXT: s_mov_b32 s2, 0
; SI-STD-NEXT: v_mov_b32_e32 v1, 0
; SI-STD-NEXT: s_mov_b32 s3, 0xf000
+; SI-STD-NEXT: s_mov_b32 s2, 0
; SI-STD-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; SI-STD-NEXT: s_waitcnt lgkmcnt(0)
; SI-STD-NEXT: buffer_load_dword v2, v[0:1], s[0:3], 0 addr64 glc
@@ -1325,9 +1325,9 @@ define amdgpu_kernel void @aggressive_combine_to_mad_fsub_2_f32(ptr addrspace(1)
; SI-DENORM-FASTFMAF: ; %bb.0:
; SI-DENORM-FASTFMAF-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0xb
; SI-DENORM-FASTFMAF-NEXT: s_load_dword s6, s[4:5], 0xd
-; SI-DENORM-FASTFMAF-NEXT: s_mov_b32 s2, 0
; SI-DENORM-FASTFMAF-NEXT: v_mov_b32_e32 v1, 0
; SI-DENORM-FASTFMAF-NEXT: s_mov_b32 s3, 0xf000
+; SI-DENORM-FASTFMAF-NEXT: s_mov_b32 s2, 0
; SI-DENORM-FASTFMAF-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; SI-DENORM-FASTFMAF-NEXT: s_waitcnt lgkmcnt(0)
; SI-DENORM-FASTFMAF-NEXT: buffer_load_dword v2, v[0:1], s[0:3], 0 addr64 glc
@@ -1373,9 +1373,9 @@ define amdgpu_kernel void @aggressive_combine_to_mad_fsub_2_f32(ptr addrspace(1)
; SI-DENORM-SLOWFMAF: ; %bb.0:
; SI-DENORM-SLOWFMAF-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0xb
; SI-DENORM-SLOWFMAF-NEXT: s_load_dword s6, s[4:5], 0xd
-; SI-DENORM-SLOWFMAF-NEXT: s_mov_b32 s2, 0
; SI-DENORM-SLOWFMAF-NEXT: v_mov_b32_e32 v1, 0
; SI-DENORM-SLOWFMAF-NEXT: s_mov_b32 s3, 0xf000
+; SI-DENORM-SLOWFMAF-NEXT: s_mov_b32 s2, 0
; SI-DENORM-SLOWFMAF-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; SI-DENORM-SLOWFMAF-NEXT: s_waitcnt lgkmcnt(0)
; SI-DENORM-SLOWFMAF-NEXT: buffer_load_dword v3, v[0:1], s[0:3], 0 addr64 glc
@@ -1457,9 +1457,9 @@ define amdgpu_kernel void @aggressive_combine_to_mad_fsub_3_f32(ptr addrspace(1)
; SI-STD: ; %bb.0:
; SI-STD-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0xb
; SI-STD-NEXT: s_load_dword s6, s[4:5], 0xd
-; SI-STD-NEXT: s_mov_b32 s2, 0
; SI-STD-NEXT: v_mov_b32_e32 v1, 0
; SI-STD-NEXT: s_mov_b32 s3, 0xf000
+; SI-STD-NEXT: s_mov_b32 s2, 0
; SI-STD-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; SI-STD-NEXT: s_waitcnt lgkmcnt(0)
; SI-STD-NEXT: buffer_load_dword v2, v[0:1], s[0:3], 0 addr64 glc
@@ -1505,9 +1505,9 @@ define amdgpu_kernel void @aggressive_combine_to_mad_fsub_3_f32(ptr addrspace(1)
; SI-DENORM-FASTFMAF: ; %bb.0:
; SI-DENORM-FASTFMAF-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0xb
; SI-DENORM-FASTFMAF-NEXT: s_load_dword s6, s[4:5], 0xd
-; SI-DENORM-FASTFMAF-NEXT: s_mov_b32 s2, 0
; SI-DENORM-FASTFMAF-NEXT: v_mov_b32_e32 v1, 0
; SI-DENORM-FASTFMAF-NEXT: s_mov_b32 s3, 0xf000
+; SI-DENORM-FASTFMAF-NEXT: s_mov_b32 s2, 0
; SI-DENORM-FASTFMAF-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; SI-DENORM-FASTFMAF-NEXT: s_waitcnt lgkmcnt(0)
; SI-DENORM-FASTFMAF-NEXT: buffer_load_dword v2, v[0:1], s[0:3], 0 addr64 glc
@@ -1553,9 +1553,9 @@ define amdgpu_kernel void @aggressive_combine_to_mad_fsub_3_f32(ptr addrspace(1)
; SI-DENORM-SLOWFMAF: ; %bb.0:
; SI-DENORM-SLOWFMAF-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0xb
; SI-DENORM-SLOWFMAF-NEXT: s_load_dword s6, s[4:5], 0xd
-; SI-DENORM-SLOWFMAF-NEXT: s_mov_b32 s2, 0
; SI-DENORM-SLOWFMAF-NEXT: v_mov_b32_e32 v1, 0
; SI-DENORM-SLOWFMAF-NEXT: s_mov_b32 s3, 0xf000
+; SI-DENORM-SLOWFMAF-NEXT: s_mov_b32 s2, 0
; SI-DENORM-SLOWFMAF-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; SI-DENORM-SLOWFMAF-NEXT: s_waitcnt lgkmcnt(0)
; SI-DENORM-SLOWFMAF-NEXT: buffer_load_dword v2, v[0:1], s[0:3], 0 addr64 glc
diff --git a/llvm/test/CodeGen/AMDGPU/masked-load-vectortypes.ll b/llvm/test/CodeGen/AMDGPU/masked-load-vectortypes.ll
index 3b855a56a5abb..deb97a9812b42 100644
--- a/llvm/test/CodeGen/AMDGPU/masked-load-vectortypes.ll
+++ b/llvm/test/CodeGen/AMDGPU/masked-load-vectortypes.ll
@@ -217,7 +217,6 @@ define <16 x i8> @uniform_masked_load_ptr1_mask_v16i8(ptr addrspace(1) inreg noc
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: v_and_b32_e32 v0, 1, v0
; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX942-NEXT: v_mov_b32_e32 v15, 0
; GFX942-NEXT: v_mov_b32_e32 v16, 0
; GFX942-NEXT: v_mov_b32_e32 v1, 0
; GFX942-NEXT: v_mov_b32_e32 v2, 0
@@ -233,6 +232,7 @@ define <16 x i8> @uniform_masked_load_ptr1_mask_v16i8(ptr addrspace(1) inreg noc
; GFX942-NEXT: v_mov_b32_e32 v19, 0
; GFX942-NEXT: v_mov_b32_e32 v13, 0
; GFX942-NEXT: v_mov_b32_e32 v14, 0
+; GFX942-NEXT: v_mov_b32_e32 v15, 0
; GFX942-NEXT: s_and_saveexec_b64 s[2:3], vcc
; GFX942-NEXT: s_cbranch_execz .LBB8_2
; GFX942-NEXT: ; %bb.1: ; %cond.load
diff --git a/llvm/test/CodeGen/AMDGPU/mul_uint24-amdgcn.ll b/llvm/test/CodeGen/AMDGPU/mul_uint24-amdgcn.ll
index 1870d1bcb1804..e29da3a6b000f 100644
--- a/llvm/test/CodeGen/AMDGPU/mul_uint24-amdgcn.ll
+++ b/llvm/test/CodeGen/AMDGPU/mul_uint24-amdgcn.ll
@@ -310,8 +310,8 @@ define amdgpu_kernel void @test_umul24_i8_vgpr(ptr addrspace(1) %out, ptr addrsp
; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
; SI-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0xd
; SI-NEXT: s_mov_b32 s11, 0xf000
-; SI-NEXT: s_mov_b32 s14, 0
; SI-NEXT: v_mov_b32_e32 v4, 0
+; SI-NEXT: s_mov_b32 s14, 0
; SI-NEXT: s_mov_b32 s15, s11
; SI-NEXT: v_mov_b32_e32 v2, v4
; SI-NEXT: s_mov_b64 s[6:7], s[14:15]
diff --git a/llvm/test/CodeGen/AMDGPU/peephole-fold-imm-multi-use.mir b/llvm/test/CodeGen/AMDGPU/peephole-fold-imm-multi-use.mir
new file mode 100644
index 0000000000000..cf515e7d92862
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/peephole-fold-imm-multi-use.mir
@@ -0,0 +1,94 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+# RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx942 -run-pass=peephole-opt -o - %s | FileCheck %s
+
+# Breaking mov of 64-bit inline immediate will increase instruction
+# count.
+---
+name: no_break_s_mov_b64_multi_use_copy_inline_imm_extract
+body: |
+ bb.0:
+
+ ; GCN-LABEL: name: no_break_s_mov_b64_multi_use_inline_imm_extract
+ ; GCN: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY killed [[S_MOV_B64_]].sub0
+ ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY killed [[S_MOV_B64_]].sub1
+ ; GCN-NEXT: SI_RETURN_TO_EPILOG [[COPY]], [[COPY1]]
+ ; CHECK-LABEL: name: no_break_s_mov_b64_multi_use_copy_inline_imm_extract
+ ; CHECK: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY killed [[S_MOV_B64_]].sub0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY killed [[S_MOV_B64_]].sub1
+ ; CHECK-NEXT: SI_RETURN_TO_EPILOG [[COPY]], [[COPY1]]
+ %0:sreg_64 = S_MOV_B64 0
+ %1:sreg_32 = COPY killed %0.sub0
+ %2:sreg_32 = COPY killed %0.sub1
+ SI_RETURN_TO_EPILOG %1, %2
+
+...
+
+---
+name: no_break_v_mov_b64_multi_use_copy_inline_imm_extract
+body: |
+ bb.0:
+
+ ; GCN-LABEL: name: no_break_s_mov_b64_multi_use_inline_imm_extract
+ ; GCN: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY killed [[S_MOV_B64_]].sub0
+ ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY killed [[S_MOV_B64_]].sub1
+ ; GCN-NEXT: SI_RETURN_TO_EPILOG [[COPY]], [[COPY1]]
+ ; CHECK-LABEL: name: no_break_v_mov_b64_multi_use_copy_inline_imm_extract
+ ; CHECK: [[V_MOV_B64_e64_:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_e64 0, implicit $exec
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY killed [[V_MOV_B64_e64_]].sub0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY killed [[V_MOV_B64_e64_]].sub1
+ ; CHECK-NEXT: SI_RETURN_TO_EPILOG [[COPY]], [[COPY1]]
+ %0:vreg_64_align2 = V_MOV_B64_e64 0, implicit $exec
+ %1:vgpr_32 = COPY killed %0.sub0
+ %2:vgpr_32 = COPY killed %0.sub1
+ SI_RETURN_TO_EPILOG %1, %2
+
+...
+
+# The high half extract is an inline immediate in the use context, so
+# this should fold despite multiple uses.
+---
+name: break_s_mov_b64_multi_use_copy_extract_use_is_inline_imm
+body: |
+ bb.0:
+
+ ; GCN-LABEL: name: break_s_mov_b64_multi_use_extract_use_is_inline_imm
+ ; GCN: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -96
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY killed [[S_MOV_B]].sub0
+ ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY killed [[S_MOV_B]].sub1
+ ; GCN-NEXT: SI_RETURN_TO_EPILOG [[COPY]], [[COPY1]]
+ ; CHECK-LABEL: name: break_s_mov_b64_multi_use_copy_extract_use_is_inline_imm
+ ; CHECK: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -96
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY killed [[S_MOV_B]].sub0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY killed [[S_MOV_B]].sub1
+ ; CHECK-NEXT: SI_RETURN_TO_EPILOG [[COPY]], [[COPY1]]
+ %0:sreg_64 = S_MOV_B64_IMM_PSEUDO -96
+ %1:sreg_32 = COPY killed %0.sub0
+ %2:sreg_32 = COPY killed %0.sub1
+ SI_RETURN_TO_EPILOG %1, %2
+
+...
+
+---
+name: break_v_mov_b64_multi_use_copy_extract_use_is_inline_imm
+body: |
+ bb.0:
+
+ ; GCN-LABEL: name: break_s_mov_b64_multi_use_extract_use_is_inline_imm
+ ; GCN: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -96
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY killed [[S_MOV_B]].sub0
+ ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY killed [[S_MOV_B]].sub1
+ ; GCN-NEXT: SI_RETURN_TO_EPILOG [[COPY]], [[COPY1]]
+ ; CHECK-LABEL: name: break_v_mov_b64_multi_use_copy_extract_use_is_inline_imm
+ ; CHECK: [[V_MOV_B:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO -96, implicit $exec
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY killed [[V_MOV_B]].sub0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY killed [[V_MOV_B]].sub1
+ ; CHECK-NEXT: SI_RETURN_TO_EPILOG [[COPY]], [[COPY1]]
+ %0:vreg_64_align2 = V_MOV_B64_PSEUDO -96, implicit $exec
+ %1:vgpr_32 = COPY killed %0.sub0
+ %2:vgpr_32 = COPY killed %0.sub1
+ SI_RETURN_TO_EPILOG %1, %2
+
+...
diff --git a/llvm/test/CodeGen/AMDGPU/rem_i128.ll b/llvm/test/CodeGen/AMDGPU/rem_i128.ll
index f0c8fed925673..5d0db8fd55d9e 100644
--- a/llvm/test/CodeGen/AMDGPU/rem_i128.ll
+++ b/llvm/test/CodeGen/AMDGPU/rem_i128.ll
@@ -146,8 +146,8 @@ define i128 @v_srem_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-NEXT: v_mov_b32_e32 v18, 0
; GFX9-NEXT: v_addc_co_u32_e32 v31, vcc, -1, v5, vcc
; GFX9-NEXT: v_mov_b32_e32 v13, 0
-; GFX9-NEXT: v_mov_b32_e32 v19, 0
; GFX9-NEXT: s_mov_b64 s[4:5], 0
+; GFX9-NEXT: v_mov_b32_e32 v19, 0
; GFX9-NEXT: v_mov_b32_e32 v9, 0
; GFX9-NEXT: .LBB0_3: ; %udiv-do-while
; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1
@@ -423,31 +423,31 @@ define i128 @v_srem_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-O0-NEXT: s_mov_b64 s[8:9], s[6:7]
; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[8:9], v[10:11], s[8:9]
; GFX9-O0-NEXT: v_ffbh_u32_e64 v7, v7
-; GFX9-O0-NEXT: s_mov_b32 s13, 32
-; GFX9-O0-NEXT: v_add_u32_e64 v7, v7, s13
+; GFX9-O0-NEXT: s_mov_b32 s12, 32
+; GFX9-O0-NEXT: v_add_u32_e64 v7, v7, s12
; GFX9-O0-NEXT: v_ffbh_u32_e64 v8, v8
; GFX9-O0-NEXT: v_min_u32_e64 v7, v7, v8
-; GFX9-O0-NEXT: s_mov_b32 s12, 0
-; GFX9-O0-NEXT: ; implicit-def: $sgpr14
-; GFX9-O0-NEXT: v_mov_b32_e32 v10, s12
+; GFX9-O0-NEXT: s_mov_b32 s13, 0
+; GFX9-O0-NEXT: ; implicit-def: $sgpr13
+; GFX9-O0-NEXT: v_mov_b32_e32 v10, 0
; GFX9-O0-NEXT: ; kill: def $vgpr7 killed $vgpr7 def $vgpr7_vgpr8 killed $exec
; GFX9-O0-NEXT: v_mov_b32_e32 v8, v10
; GFX9-O0-NEXT: v_mov_b32_e32 v11, v8
; GFX9-O0-NEXT: v_ffbh_u32_e64 v6, v6
-; GFX9-O0-NEXT: v_add_u32_e64 v6, v6, s13
+; GFX9-O0-NEXT: v_add_u32_e64 v6, v6, s12
; GFX9-O0-NEXT: v_ffbh_u32_e64 v9, v9
; GFX9-O0-NEXT: v_min_u32_e64 v12, v6, v9
-; GFX9-O0-NEXT: ; implicit-def: $sgpr14
-; GFX9-O0-NEXT: v_mov_b32_e32 v6, s12
+; GFX9-O0-NEXT: ; implicit-def: $sgpr13
+; GFX9-O0-NEXT: v_mov_b32_e32 v6, 0
; GFX9-O0-NEXT: ; kill: def $vgpr12 killed $vgpr12 def $vgpr12_vgpr13 killed $exec
; GFX9-O0-NEXT: v_mov_b32_e32 v13, v6
; GFX9-O0-NEXT: s_mov_b64 s[14:15], 64
; GFX9-O0-NEXT: v_mov_b32_e32 v9, v12
; GFX9-O0-NEXT: s_mov_b32 s16, s14
; GFX9-O0-NEXT: v_mov_b32_e32 v6, v13
-; GFX9-O0-NEXT: s_mov_b32 s18, s15
+; GFX9-O0-NEXT: s_mov_b32 s13, s15
; GFX9-O0-NEXT: v_add_co_u32_e64 v9, s[16:17], v9, s16
-; GFX9-O0-NEXT: v_mov_b32_e32 v10, s18
+; GFX9-O0-NEXT: v_mov_b32_e32 v10, s13
; GFX9-O0-NEXT: v_addc_co_u32_e64 v6, s[16:17], v6, v10, s[16:17]
; GFX9-O0-NEXT: ; kill: def $vgpr9 killed $vgpr9 def $vgpr9_vgpr10 killed $exec
; GFX9-O0-NEXT: v_mov_b32_e32 v10, v6
@@ -463,20 +463,20 @@ define i128 @v_srem_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-O0-NEXT: s_mov_b64 s[8:9], s[6:7]
; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[8:9], v[4:5], s[8:9]
; GFX9-O0-NEXT: v_ffbh_u32_e64 v4, v0
-; GFX9-O0-NEXT: v_add_u32_e64 v4, v4, s13
+; GFX9-O0-NEXT: v_add_u32_e64 v4, v4, s12
; GFX9-O0-NEXT: v_ffbh_u32_e64 v5, v1
; GFX9-O0-NEXT: v_min_u32_e64 v5, v4, v5
-; GFX9-O0-NEXT: ; implicit-def: $sgpr16
-; GFX9-O0-NEXT: v_mov_b32_e32 v4, s12
+; GFX9-O0-NEXT: ; implicit-def: $sgpr13
+; GFX9-O0-NEXT: v_mov_b32_e32 v4, 0
; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
; GFX9-O0-NEXT: v_mov_b32_e32 v6, v4
; GFX9-O0-NEXT: v_mov_b32_e32 v7, v6
; GFX9-O0-NEXT: v_ffbh_u32_e64 v4, v2
-; GFX9-O0-NEXT: v_add_u32_e64 v4, v4, s13
+; GFX9-O0-NEXT: v_add_u32_e64 v4, v4, s12
; GFX9-O0-NEXT: v_ffbh_u32_e64 v10, v3
; GFX9-O0-NEXT: v_min_u32_e64 v11, v4, v10
-; GFX9-O0-NEXT: ; implicit-def: $sgpr13
-; GFX9-O0-NEXT: v_mov_b32_e32 v4, s12
+; GFX9-O0-NEXT: ; implicit-def: $sgpr12
+; GFX9-O0-NEXT: v_mov_b32_e32 v4, 0
; GFX9-O0-NEXT: ; kill: def $vgpr11 killed $vgpr11 def $vgpr11_vgpr12 killed $exec
; GFX9-O0-NEXT: v_mov_b32_e32 v12, v4
; GFX9-O0-NEXT: v_mov_b32_e32 v10, v11
@@ -581,17 +581,17 @@ define i128 @v_srem_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-O0-NEXT: s_mov_b64 s[4:5], exec
; GFX9-O0-NEXT: v_writelane_b32 v30, s4, 4
; GFX9-O0-NEXT: v_writelane_b32 v30, s5, 5
-; GFX9-O0-NEXT: s_or_saveexec_b64 s[22:23], -1
+; GFX9-O0-NEXT: s_or_saveexec_b64 s[20:21], -1
; GFX9-O0-NEXT: buffer_store_dword v30, off, s[0:3], s32 ; 4-byte Folded Spill
-; GFX9-O0-NEXT: s_mov_b64 exec, s[22:23]
+; GFX9-O0-NEXT: s_mov_b64 exec, s[20:21]
; GFX9-O0-NEXT: s_and_b64 s[4:5], s[4:5], s[6:7]
; GFX9-O0-NEXT: s_mov_b64 exec, s[4:5]
; GFX9-O0-NEXT: s_cbranch_execz .LBB0_3
; GFX9-O0-NEXT: s_branch .LBB0_8
; GFX9-O0-NEXT: .LBB0_1: ; %Flow
-; GFX9-O0-NEXT: s_or_saveexec_b64 s[22:23], -1
+; GFX9-O0-NEXT: s_or_saveexec_b64 s[20:21], -1
; GFX9-O0-NEXT: buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: s_mov_b64 exec, s[22:23]
+; GFX9-O0-NEXT: s_mov_b64 exec, s[20:21]
; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
; GFX9-O0-NEXT: v_readlane_b32 s4, v30, 6
; GFX9-O0-NEXT: v_readlane_b32 s5, v30, 7
@@ -623,9 +623,9 @@ define i128 @v_srem_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:124 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_branch .LBB0_5
; GFX9-O0-NEXT: .LBB0_3: ; %Flow2
-; GFX9-O0-NEXT: s_or_saveexec_b64 s[22:23], -1
+; GFX9-O0-NEXT: s_or_saveexec_b64 s[20:21], -1
; GFX9-O0-NEXT: buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: s_mov_b64 exec, s[22:23]
+; GFX9-O0-NEXT: s_mov_b64 exec, s[20:21]
; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
; GFX9-O0-NEXT: v_readlane_b32 s4, v30, 4
; GFX9-O0-NEXT: v_readlane_b32 s5, v30, 5
@@ -683,9 +683,9 @@ define i128 @v_srem_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_branch .LBB0_3
; GFX9-O0-NEXT: .LBB0_5: ; %Flow1
-; GFX9-O0-NEXT: s_or_saveexec_b64 s[22:23], -1
+; GFX9-O0-NEXT: s_or_saveexec_b64 s[20:21], -1
; GFX9-O0-NEXT: buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: s_mov_b64 exec, s[22:23]
+; GFX9-O0-NEXT: s_mov_b64 exec, s[20:21]
; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
; GFX9-O0-NEXT: v_readlane_b32 s4, v30, 8
; GFX9-O0-NEXT: v_readlane_b32 s5, v30, 9
@@ -714,9 +714,9 @@ define i128 @v_srem_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-O0-NEXT: s_branch .LBB0_4
; GFX9-O0-NEXT: .LBB0_6: ; %udiv-do-while
; GFX9-O0-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX9-O0-NEXT: s_or_saveexec_b64 s[22:23], -1
+; GFX9-O0-NEXT: s_or_saveexec_b64 s[20:21], -1
; GFX9-O0-NEXT: buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: s_mov_b64 exec, s[22:23]
+; GFX9-O0-NEXT: s_mov_b64 exec, s[20:21]
; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
; GFX9-O0-NEXT: v_readlane_b32 s6, v30, 10
; GFX9-O0-NEXT: v_readlane_b32 s7, v30, 11
@@ -908,9 +908,9 @@ define i128 @v_srem_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-O0-NEXT: s_mov_b64 s[6:7], s[4:5]
; GFX9-O0-NEXT: v_writelane_b32 v30, s6, 10
; GFX9-O0-NEXT: v_writelane_b32 v30, s7, 11
-; GFX9-O0-NEXT: s_or_saveexec_b64 s[22:23], -1
+; GFX9-O0-NEXT: s_or_saveexec_b64 s[20:21], -1
; GFX9-O0-NEXT: buffer_store_dword v30, off, s[0:3], s32 ; 4-byte Folded Spill
-; GFX9-O0-NEXT: s_mov_b64 exec, s[22:23]
+; GFX9-O0-NEXT: s_mov_b64 exec, s[20:21]
; GFX9-O0-NEXT: buffer_store_dword v14, off, s[0:3], s32 offset:288 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_nop 0
; GFX9-O0-NEXT: buffer_store_dword v15, off, s[0:3], s32 offset:292 ; 4-byte Folded Spill
@@ -939,9 +939,9 @@ define i128 @v_srem_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-O0-NEXT: s_cbranch_execnz .LBB0_6
; GFX9-O0-NEXT: s_branch .LBB0_1
; GFX9-O0-NEXT: .LBB0_7: ; %udiv-preheader
-; GFX9-O0-NEXT: s_or_saveexec_b64 s[22:23], -1
+; GFX9-O0-NEXT: s_or_saveexec_b64 s[20:21], -1
; GFX9-O0-NEXT: buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: s_mov_b64 exec, s[22:23]
+; GFX9-O0-NEXT: s_mov_b64 exec, s[20:21]
; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:312 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:316 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:320 ; 4-byte Folded Reload
@@ -1041,9 +1041,9 @@ define i128 @v_srem_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-O0-NEXT: buffer_store_dword v17, off, s[0:3], s32 offset:308 ; 4-byte Folded Spill
; GFX9-O0-NEXT: v_writelane_b32 v30, s4, 10
; GFX9-O0-NEXT: v_writelane_b32 v30, s5, 11
-; GFX9-O0-NEXT: s_or_saveexec_b64 s[22:23], -1
+; GFX9-O0-NEXT: s_or_saveexec_b64 s[20:21], -1
; GFX9-O0-NEXT: buffer_store_dword v30, off, s[0:3], s32 ; 4-byte Folded Spill
-; GFX9-O0-NEXT: s_mov_b64 exec, s[22:23]
+; GFX9-O0-NEXT: s_mov_b64 exec, s[20:21]
; GFX9-O0-NEXT: buffer_store_dword v14, off, s[0:3], s32 offset:288 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_nop 0
; GFX9-O0-NEXT: buffer_store_dword v15, off, s[0:3], s32 offset:292 ; 4-byte Folded Spill
@@ -1070,9 +1070,9 @@ define i128 @v_srem_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:236 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_branch .LBB0_6
; GFX9-O0-NEXT: .LBB0_8: ; %udiv-bb1
-; GFX9-O0-NEXT: s_or_saveexec_b64 s[22:23], -1
+; GFX9-O0-NEXT: s_or_saveexec_b64 s[20:21], -1
; GFX9-O0-NEXT: buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: s_mov_b64 exec, s[22:23]
+; GFX9-O0-NEXT: s_mov_b64 exec, s[20:21]
; GFX9-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
@@ -1199,9 +1199,9 @@ define i128 @v_srem_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-O0-NEXT: s_xor_b64 s[6:7], s[4:5], s[6:7]
; GFX9-O0-NEXT: v_writelane_b32 v30, s6, 8
; GFX9-O0-NEXT: v_writelane_b32 v30, s7, 9
-; GFX9-O0-NEXT: s_or_saveexec_b64 s[22:23], -1
+; GFX9-O0-NEXT: s_or_saveexec_b64 s[20:21], -1
; GFX9-O0-NEXT: buffer_store_dword v30, off, s[0:3], s32 ; 4-byte Folded Spill
-; GFX9-O0-NEXT: s_mov_b64 exec, s[22:23]
+; GFX9-O0-NEXT: s_mov_b64 exec, s[20:21]
; GFX9-O0-NEXT: s_mov_b64 exec, s[4:5]
; GFX9-O0-NEXT: s_cbranch_execz .LBB0_5
; GFX9-O0-NEXT: s_branch .LBB0_7
@@ -1247,8 +1247,8 @@ define i128 @v_srem_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-O0-NEXT: v_mov_b32_e32 v5, v9
; GFX9-O0-NEXT: ; kill: def $vgpr16 killed $vgpr16 killed $vgpr16_vgpr17 killed $exec
; GFX9-O0-NEXT: s_mov_b32 s5, 0
-; GFX9-O0-NEXT: ; implicit-def: $sgpr6
-; GFX9-O0-NEXT: v_mov_b32_e32 v0, s5
+; GFX9-O0-NEXT: ; implicit-def: $sgpr5
+; GFX9-O0-NEXT: v_mov_b32_e32 v0, 0
; GFX9-O0-NEXT: ; kill: def $vgpr16 killed $vgpr16 def $vgpr16_vgpr17 killed $exec
; GFX9-O0-NEXT: v_mov_b32_e32 v17, v0
; GFX9-O0-NEXT: v_mov_b32_e32 v0, v17
@@ -1269,17 +1269,17 @@ define i128 @v_srem_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-O0-NEXT: v_mad_u64_u32 v[18:19], s[6:7], v8, v0, 0
; GFX9-O0-NEXT: v_mov_b32_e32 v8, v19
; GFX9-O0-NEXT: v_add3_u32 v8, v8, v9, v14
+; GFX9-O0-NEXT: ; implicit-def: $sgpr5
; GFX9-O0-NEXT: ; implicit-def: $sgpr6
-; GFX9-O0-NEXT: ; implicit-def: $sgpr7
-; GFX9-O0-NEXT: ; implicit-def: $sgpr7
-; GFX9-O0-NEXT: v_mov_b32_e32 v14, s6
+; GFX9-O0-NEXT: ; implicit-def: $sgpr6
+; GFX9-O0-NEXT: v_mov_b32_e32 v14, s5
; GFX9-O0-NEXT: ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec
; GFX9-O0-NEXT: v_mov_b32_e32 v9, v14
; GFX9-O0-NEXT: v_lshlrev_b64 v[8:9], s4, v[8:9]
; GFX9-O0-NEXT: v_mov_b32_e32 v15, v9
; GFX9-O0-NEXT: ; kill: def $vgpr18 killed $vgpr18 killed $vgpr18_vgpr19 killed $exec
-; GFX9-O0-NEXT: ; implicit-def: $sgpr6
-; GFX9-O0-NEXT: v_mov_b32_e32 v14, s5
+; GFX9-O0-NEXT: ; implicit-def: $sgpr5
+; GFX9-O0-NEXT: v_mov_b32_e32 v14, 0
; GFX9-O0-NEXT: ; kill: def $vgpr18 killed $vgpr18 def $vgpr18_vgpr19 killed $exec
; GFX9-O0-NEXT: v_mov_b32_e32 v19, v14
; GFX9-O0-NEXT: v_mov_b32_e32 v14, v19
@@ -1299,16 +1299,16 @@ define i128 @v_srem_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-O0-NEXT: v_mov_b32_e32 v17, v8
; GFX9-O0-NEXT: v_mad_u64_u32 v[14:15], s[6:7], v5, v1, 0
; GFX9-O0-NEXT: v_mov_b32_e32 v18, v14
-; GFX9-O0-NEXT: ; implicit-def: $sgpr6
-; GFX9-O0-NEXT: v_mov_b32_e32 v8, s5
+; GFX9-O0-NEXT: ; implicit-def: $sgpr5
+; GFX9-O0-NEXT: v_mov_b32_e32 v8, 0
; GFX9-O0-NEXT: ; kill: def $vgpr18 killed $vgpr18 def $vgpr18_vgpr19 killed $exec
; GFX9-O0-NEXT: v_mov_b32_e32 v19, v8
; GFX9-O0-NEXT: v_mov_b32_e32 v8, v19
; GFX9-O0-NEXT: v_mov_b32_e32 v14, v15
+; GFX9-O0-NEXT: ; implicit-def: $sgpr5
; GFX9-O0-NEXT: ; implicit-def: $sgpr6
-; GFX9-O0-NEXT: ; implicit-def: $sgpr7
-; GFX9-O0-NEXT: ; implicit-def: $sgpr7
-; GFX9-O0-NEXT: v_mov_b32_e32 v9, s6
+; GFX9-O0-NEXT: ; implicit-def: $sgpr6
+; GFX9-O0-NEXT: v_mov_b32_e32 v9, s5
; GFX9-O0-NEXT: ; kill: def $vgpr14 killed $vgpr14 def $vgpr14_vgpr15 killed $exec
; GFX9-O0-NEXT: v_mov_b32_e32 v15, v9
; GFX9-O0-NEXT: v_lshlrev_b64 v[14:15], s4, v[14:15]
@@ -1321,16 +1321,16 @@ define i128 @v_srem_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-O0-NEXT: v_mov_b32_e32 v21, v8
; GFX9-O0-NEXT: v_mad_u64_u32 v[14:15], s[6:7], v5, v2, 0
; GFX9-O0-NEXT: v_mov_b32_e32 v8, v14
-; GFX9-O0-NEXT: ; implicit-def: $sgpr6
-; GFX9-O0-NEXT: v_mov_b32_e32 v5, s5
+; GFX9-O0-NEXT: ; implicit-def: $sgpr5
+; GFX9-O0-NEXT: v_mov_b32_e32 v5, 0
; GFX9-O0-NEXT: ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec
; GFX9-O0-NEXT: v_mov_b32_e32 v9, v5
; GFX9-O0-NEXT: v_mov_b32_e32 v5, v9
; GFX9-O0-NEXT: v_mov_b32_e32 v14, v15
+; GFX9-O0-NEXT: ; implicit-def: $sgpr5
; GFX9-O0-NEXT: ; implicit-def: $sgpr6
-; GFX9-O0-NEXT: ; implicit-def: $sgpr7
-; GFX9-O0-NEXT: ; implicit-def: $sgpr7
-; GFX9-O0-NEXT: v_mov_b32_e32 v18, s6
+; GFX9-O0-NEXT: ; implicit-def: $sgpr6
+; GFX9-O0-NEXT: v_mov_b32_e32 v18, s5
; GFX9-O0-NEXT: ; kill: def $vgpr14 killed $vgpr14 def $vgpr14_vgpr15 killed $exec
; GFX9-O0-NEXT: v_mov_b32_e32 v15, v18
; GFX9-O0-NEXT: v_lshlrev_b64 v[14:15], s4, v[14:15]
@@ -1343,8 +1343,8 @@ define i128 @v_srem_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-O0-NEXT: v_mov_b32_e32 v23, v5
; GFX9-O0-NEXT: v_mad_u64_u32 v[14:15], s[6:7], v0, v2, 0
; GFX9-O0-NEXT: v_mov_b32_e32 v18, v15
-; GFX9-O0-NEXT: ; implicit-def: $sgpr6
-; GFX9-O0-NEXT: v_mov_b32_e32 v2, s5
+; GFX9-O0-NEXT: ; implicit-def: $sgpr5
+; GFX9-O0-NEXT: v_mov_b32_e32 v2, 0
; GFX9-O0-NEXT: ; kill: def $vgpr18 killed $vgpr18 def $vgpr18_vgpr19 killed $exec
; GFX9-O0-NEXT: v_mov_b32_e32 v19, v2
; GFX9-O0-NEXT: v_mov_b32_e32 v8, v22
@@ -1357,25 +1357,25 @@ define i128 @v_srem_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-O0-NEXT: v_mov_b32_e32 v9, v2
; GFX9-O0-NEXT: v_mov_b32_e32 v2, v9
; GFX9-O0-NEXT: s_mov_b64 s[6:7], 0xffffffff
-; GFX9-O0-NEXT: s_mov_b32 s8, s7
-; GFX9-O0-NEXT: v_and_b32_e64 v2, v2, s8
+; GFX9-O0-NEXT: s_mov_b32 s5, s7
+; GFX9-O0-NEXT: v_and_b32_e64 v2, v2, s5
; GFX9-O0-NEXT: v_mov_b32_e32 v5, v8
-; GFX9-O0-NEXT: ; kill: def $sgpr6 killed $sgpr6 killed $sgpr6_sgpr7
-; GFX9-O0-NEXT: v_and_b32_e64 v18, v5, s6
+; GFX9-O0-NEXT: s_mov_b32 s5, s6
+; GFX9-O0-NEXT: v_and_b32_e64 v18, v5, s5
; GFX9-O0-NEXT: ; kill: def $vgpr18 killed $vgpr18 def $vgpr18_vgpr19 killed $exec
; GFX9-O0-NEXT: v_mov_b32_e32 v19, v2
; GFX9-O0-NEXT: v_mad_u64_u32 v[22:23], s[6:7], v0, v1, 0
; GFX9-O0-NEXT: v_mov_b32_e32 v1, v22
-; GFX9-O0-NEXT: ; implicit-def: $sgpr6
-; GFX9-O0-NEXT: v_mov_b32_e32 v0, s5
+; GFX9-O0-NEXT: ; implicit-def: $sgpr5
+; GFX9-O0-NEXT: v_mov_b32_e32 v0, 0
; GFX9-O0-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
; GFX9-O0-NEXT: v_mov_b32_e32 v2, v0
; GFX9-O0-NEXT: v_mov_b32_e32 v0, v2
; GFX9-O0-NEXT: v_mov_b32_e32 v22, v23
+; GFX9-O0-NEXT: ; implicit-def: $sgpr5
+; GFX9-O0-NEXT: ; implicit-def: $sgpr6
; GFX9-O0-NEXT: ; implicit-def: $sgpr6
-; GFX9-O0-NEXT: ; implicit-def: $sgpr7
-; GFX9-O0-NEXT: ; implicit-def: $sgpr7
-; GFX9-O0-NEXT: v_mov_b32_e32 v5, s6
+; GFX9-O0-NEXT: v_mov_b32_e32 v5, s5
; GFX9-O0-NEXT: ; kill: def $vgpr22 killed $vgpr22 def $vgpr22_vgpr23 killed $exec
; GFX9-O0-NEXT: v_mov_b32_e32 v23, v5
; GFX9-O0-NEXT: v_lshlrev_b64 v[22:23], s4, v[22:23]
@@ -1423,8 +1423,8 @@ define i128 @v_srem_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-O0-NEXT: v_lshlrev_b64 v[0:1], s4, v[0:1]
; GFX9-O0-NEXT: v_mov_b32_e32 v5, v1
; GFX9-O0-NEXT: ; kill: def $vgpr14 killed $vgpr14 killed $vgpr14_vgpr15 killed $exec
-; GFX9-O0-NEXT: ; implicit-def: $sgpr6
-; GFX9-O0-NEXT: v_mov_b32_e32 v2, s5
+; GFX9-O0-NEXT: ; implicit-def: $sgpr5
+; GFX9-O0-NEXT: v_mov_b32_e32 v2, 0
; GFX9-O0-NEXT: ; kill: def $vgpr14 killed $vgpr14 def $vgpr14_vgpr15 killed $exec
; GFX9-O0-NEXT: v_mov_b32_e32 v15, v2
; GFX9-O0-NEXT: v_mov_b32_e32 v2, v15
@@ -1621,8 +1621,8 @@ define i128 @v_urem_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-NEXT: v_mov_b32_e32 v20, 0
; GFX9-NEXT: v_addc_co_u32_e32 v29, vcc, -1, v7, vcc
; GFX9-NEXT: v_mov_b32_e32 v15, 0
-; GFX9-NEXT: v_mov_b32_e32 v21, 0
; GFX9-NEXT: s_mov_b64 s[4:5], 0
+; GFX9-NEXT: v_mov_b32_e32 v21, 0
; GFX9-NEXT: v_mov_b32_e32 v13, 0
; GFX9-NEXT: .LBB1_3: ; %udiv-do-while
; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1
@@ -1799,31 +1799,31 @@ define i128 @v_urem_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-O0-NEXT: v_cmp_eq_u64_e64 s[8:9], v[8:9], s[6:7]
; GFX9-O0-NEXT: s_or_b64 s[4:5], s[4:5], s[8:9]
; GFX9-O0-NEXT: v_ffbh_u32_e64 v5, v5
-; GFX9-O0-NEXT: s_mov_b32 s9, 32
-; GFX9-O0-NEXT: v_add_u32_e64 v5, v5, s9
+; GFX9-O0-NEXT: s_mov_b32 s8, 32
+; GFX9-O0-NEXT: v_add_u32_e64 v5, v5, s8
; GFX9-O0-NEXT: v_ffbh_u32_e64 v6, v6
; GFX9-O0-NEXT: v_min_u32_e64 v5, v5, v6
-; GFX9-O0-NEXT: s_mov_b32 s8, 0
-; GFX9-O0-NEXT: ; implicit-def: $sgpr10
-; GFX9-O0-NEXT: v_mov_b32_e32 v8, s8
+; GFX9-O0-NEXT: s_mov_b32 s9, 0
+; GFX9-O0-NEXT: ; implicit-def: $sgpr9
+; GFX9-O0-NEXT: v_mov_b32_e32 v8, 0
; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
; GFX9-O0-NEXT: v_mov_b32_e32 v6, v8
; GFX9-O0-NEXT: v_mov_b32_e32 v9, v6
; GFX9-O0-NEXT: v_ffbh_u32_e64 v4, v4
-; GFX9-O0-NEXT: v_add_u32_e64 v4, v4, s9
+; GFX9-O0-NEXT: v_add_u32_e64 v4, v4, s8
; GFX9-O0-NEXT: v_ffbh_u32_e64 v7, v7
; GFX9-O0-NEXT: v_min_u32_e64 v14, v4, v7
-; GFX9-O0-NEXT: ; implicit-def: $sgpr10
-; GFX9-O0-NEXT: v_mov_b32_e32 v4, s8
+; GFX9-O0-NEXT: ; implicit-def: $sgpr9
+; GFX9-O0-NEXT: v_mov_b32_e32 v4, 0
; GFX9-O0-NEXT: ; kill: def $vgpr14 killed $vgpr14 def $vgpr14_vgpr15 killed $exec
; GFX9-O0-NEXT: v_mov_b32_e32 v15, v4
; GFX9-O0-NEXT: s_mov_b64 s[10:11], 64
; GFX9-O0-NEXT: v_mov_b32_e32 v7, v14
; GFX9-O0-NEXT: s_mov_b32 s12, s10
; GFX9-O0-NEXT: v_mov_b32_e32 v4, v15
-; GFX9-O0-NEXT: s_mov_b32 s14, s11
+; GFX9-O0-NEXT: s_mov_b32 s9, s11
; GFX9-O0-NEXT: v_add_co_u32_e64 v7, s[12:13], v7, s12
-; GFX9-O0-NEXT: v_mov_b32_e32 v8, s14
+; GFX9-O0-NEXT: v_mov_b32_e32 v8, s9
; GFX9-O0-NEXT: v_addc_co_u32_e64 v4, s[12:13], v4, v8, s[12:13]
; GFX9-O0-NEXT: ; kill: def $vgpr7 killed $vgpr7 def $vgpr7_vgpr8 killed $exec
; GFX9-O0-NEXT: v_mov_b32_e32 v8, v4
@@ -1834,25 +1834,25 @@ define i128 @v_urem_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-O0-NEXT: v_mov_b32_e32 v6, v5
; GFX9-O0-NEXT: v_mov_b32_e32 v5, v7
; GFX9-O0-NEXT: v_cndmask_b32_e64 v8, v5, v6, s[12:13]
-; GFX9-O0-NEXT: ; implicit-def: $sgpr12
-; GFX9-O0-NEXT: ; implicit-def: $sgpr12
+; GFX9-O0-NEXT: ; implicit-def: $sgpr9
+; GFX9-O0-NEXT: ; implicit-def: $sgpr9
; GFX9-O0-NEXT: ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec
; GFX9-O0-NEXT: v_mov_b32_e32 v9, v4
; GFX9-O0-NEXT: v_ffbh_u32_e64 v4, v0
-; GFX9-O0-NEXT: v_add_u32_e64 v4, v4, s9
+; GFX9-O0-NEXT: v_add_u32_e64 v4, v4, s8
; GFX9-O0-NEXT: v_ffbh_u32_e64 v5, v1
; GFX9-O0-NEXT: v_min_u32_e64 v5, v4, v5
-; GFX9-O0-NEXT: ; implicit-def: $sgpr12
-; GFX9-O0-NEXT: v_mov_b32_e32 v4, s8
+; GFX9-O0-NEXT: ; implicit-def: $sgpr9
+; GFX9-O0-NEXT: v_mov_b32_e32 v4, 0
; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
; GFX9-O0-NEXT: v_mov_b32_e32 v6, v4
; GFX9-O0-NEXT: v_mov_b32_e32 v7, v6
; GFX9-O0-NEXT: v_ffbh_u32_e64 v4, v2
-; GFX9-O0-NEXT: v_add_u32_e64 v4, v4, s9
+; GFX9-O0-NEXT: v_add_u32_e64 v4, v4, s8
; GFX9-O0-NEXT: v_ffbh_u32_e64 v10, v3
; GFX9-O0-NEXT: v_min_u32_e64 v14, v4, v10
-; GFX9-O0-NEXT: ; implicit-def: $sgpr9
-; GFX9-O0-NEXT: v_mov_b32_e32 v4, s8
+; GFX9-O0-NEXT: ; implicit-def: $sgpr8
+; GFX9-O0-NEXT: v_mov_b32_e32 v4, 0
; GFX9-O0-NEXT: ; kill: def $vgpr14 killed $vgpr14 def $vgpr14_vgpr15 killed $exec
; GFX9-O0-NEXT: v_mov_b32_e32 v15, v4
; GFX9-O0-NEXT: v_mov_b32_e32 v10, v14
@@ -2619,8 +2619,8 @@ define i128 @v_urem_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-O0-NEXT: v_mov_b32_e32 v7, v4
; GFX9-O0-NEXT: ; kill: def $vgpr12 killed $vgpr12 killed $vgpr12_vgpr13 killed $exec
; GFX9-O0-NEXT: s_mov_b32 s5, 0
-; GFX9-O0-NEXT: ; implicit-def: $sgpr6
-; GFX9-O0-NEXT: v_mov_b32_e32 v2, s5
+; GFX9-O0-NEXT: ; implicit-def: $sgpr5
+; GFX9-O0-NEXT: v_mov_b32_e32 v2, 0
; GFX9-O0-NEXT: ; kill: def $vgpr12 killed $vgpr12 def $vgpr12_vgpr13 killed $exec
; GFX9-O0-NEXT: v_mov_b32_e32 v13, v2
; GFX9-O0-NEXT: v_mov_b32_e32 v2, v13
@@ -2641,17 +2641,17 @@ define i128 @v_urem_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-O0-NEXT: v_mad_u64_u32 v[14:15], s[6:7], v2, v4, 0
; GFX9-O0-NEXT: v_mov_b32_e32 v2, v15
; GFX9-O0-NEXT: v_add3_u32 v2, v2, v3, v10
+; GFX9-O0-NEXT: ; implicit-def: $sgpr5
+; GFX9-O0-NEXT: ; implicit-def: $sgpr6
; GFX9-O0-NEXT: ; implicit-def: $sgpr6
-; GFX9-O0-NEXT: ; implicit-def: $sgpr7
-; GFX9-O0-NEXT: ; implicit-def: $sgpr7
-; GFX9-O0-NEXT: v_mov_b32_e32 v10, s6
+; GFX9-O0-NEXT: v_mov_b32_e32 v10, s5
; GFX9-O0-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX9-O0-NEXT: v_mov_b32_e32 v3, v10
; GFX9-O0-NEXT: v_lshlrev_b64 v[2:3], s4, v[2:3]
; GFX9-O0-NEXT: v_mov_b32_e32 v11, v3
; GFX9-O0-NEXT: ; kill: def $vgpr14 killed $vgpr14 killed $vgpr14_vgpr15 killed $exec
-; GFX9-O0-NEXT: ; implicit-def: $sgpr6
-; GFX9-O0-NEXT: v_mov_b32_e32 v10, s5
+; GFX9-O0-NEXT: ; implicit-def: $sgpr5
+; GFX9-O0-NEXT: v_mov_b32_e32 v10, 0
; GFX9-O0-NEXT: ; kill: def $vgpr14 killed $vgpr14 def $vgpr14_vgpr15 killed $exec
; GFX9-O0-NEXT: v_mov_b32_e32 v15, v10
; GFX9-O0-NEXT: v_mov_b32_e32 v10, v15
@@ -2671,16 +2671,16 @@ define i128 @v_urem_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-O0-NEXT: v_mov_b32_e32 v13, v2
; GFX9-O0-NEXT: v_mad_u64_u32 v[14:15], s[6:7], v7, v5, 0
; GFX9-O0-NEXT: v_mov_b32_e32 v2, v14
-; GFX9-O0-NEXT: ; implicit-def: $sgpr6
-; GFX9-O0-NEXT: v_mov_b32_e32 v10, s5
+; GFX9-O0-NEXT: ; implicit-def: $sgpr5
+; GFX9-O0-NEXT: v_mov_b32_e32 v10, 0
; GFX9-O0-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX9-O0-NEXT: v_mov_b32_e32 v3, v10
; GFX9-O0-NEXT: v_mov_b32_e32 v10, v3
; GFX9-O0-NEXT: v_mov_b32_e32 v14, v15
+; GFX9-O0-NEXT: ; implicit-def: $sgpr5
; GFX9-O0-NEXT: ; implicit-def: $sgpr6
-; GFX9-O0-NEXT: ; implicit-def: $sgpr7
-; GFX9-O0-NEXT: ; implicit-def: $sgpr7
-; GFX9-O0-NEXT: v_mov_b32_e32 v11, s6
+; GFX9-O0-NEXT: ; implicit-def: $sgpr6
+; GFX9-O0-NEXT: v_mov_b32_e32 v11, s5
; GFX9-O0-NEXT: ; kill: def $vgpr14 killed $vgpr14 def $vgpr14_vgpr15 killed $exec
; GFX9-O0-NEXT: v_mov_b32_e32 v15, v11
; GFX9-O0-NEXT: v_lshlrev_b64 v[14:15], s4, v[14:15]
@@ -2693,16 +2693,16 @@ define i128 @v_urem_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-O0-NEXT: v_mov_b32_e32 v3, v10
; GFX9-O0-NEXT: v_mad_u64_u32 v[14:15], s[6:7], v7, v6, 0
; GFX9-O0-NEXT: v_mov_b32_e32 v10, v14
-; GFX9-O0-NEXT: ; implicit-def: $sgpr6
-; GFX9-O0-NEXT: v_mov_b32_e32 v7, s5
+; GFX9-O0-NEXT: ; implicit-def: $sgpr5
+; GFX9-O0-NEXT: v_mov_b32_e32 v7, 0
; GFX9-O0-NEXT: ; kill: def $vgpr10 killed $vgpr10 def $vgpr10_vgpr11 killed $exec
; GFX9-O0-NEXT: v_mov_b32_e32 v11, v7
; GFX9-O0-NEXT: v_mov_b32_e32 v7, v11
; GFX9-O0-NEXT: v_mov_b32_e32 v14, v15
+; GFX9-O0-NEXT: ; implicit-def: $sgpr5
+; GFX9-O0-NEXT: ; implicit-def: $sgpr6
; GFX9-O0-NEXT: ; implicit-def: $sgpr6
-; GFX9-O0-NEXT: ; implicit-def: $sgpr7
-; GFX9-O0-NEXT: ; implicit-def: $sgpr7
-; GFX9-O0-NEXT: v_mov_b32_e32 v16, s6
+; GFX9-O0-NEXT: v_mov_b32_e32 v16, s5
; GFX9-O0-NEXT: ; kill: def $vgpr14 killed $vgpr14 def $vgpr14_vgpr15 killed $exec
; GFX9-O0-NEXT: v_mov_b32_e32 v15, v16
; GFX9-O0-NEXT: v_lshlrev_b64 v[14:15], s4, v[14:15]
@@ -2715,8 +2715,8 @@ define i128 @v_urem_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-O0-NEXT: v_mov_b32_e32 v19, v7
; GFX9-O0-NEXT: v_mad_u64_u32 v[10:11], s[6:7], v4, v6, 0
; GFX9-O0-NEXT: v_mov_b32_e32 v16, v11
-; GFX9-O0-NEXT: ; implicit-def: $sgpr6
-; GFX9-O0-NEXT: v_mov_b32_e32 v6, s5
+; GFX9-O0-NEXT: ; implicit-def: $sgpr5
+; GFX9-O0-NEXT: v_mov_b32_e32 v6, 0
; GFX9-O0-NEXT: ; kill: def $vgpr16 killed $vgpr16 def $vgpr16_vgpr17 killed $exec
; GFX9-O0-NEXT: v_mov_b32_e32 v17, v6
; GFX9-O0-NEXT: v_mov_b32_e32 v6, v18
@@ -2729,25 +2729,25 @@ define i128 @v_urem_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-O0-NEXT: v_mov_b32_e32 v7, v14
; GFX9-O0-NEXT: v_mov_b32_e32 v14, v7
; GFX9-O0-NEXT: s_mov_b64 s[6:7], 0xffffffff
-; GFX9-O0-NEXT: s_mov_b32 s8, s7
-; GFX9-O0-NEXT: v_and_b32_e64 v14, v14, s8
+; GFX9-O0-NEXT: s_mov_b32 s5, s7
+; GFX9-O0-NEXT: v_and_b32_e64 v14, v14, s5
; GFX9-O0-NEXT: v_mov_b32_e32 v15, v6
-; GFX9-O0-NEXT: ; kill: def $sgpr6 killed $sgpr6 killed $sgpr6_sgpr7
-; GFX9-O0-NEXT: v_and_b32_e64 v16, v15, s6
+; GFX9-O0-NEXT: s_mov_b32 s5, s6
+; GFX9-O0-NEXT: v_and_b32_e64 v16, v15, s5
; GFX9-O0-NEXT: ; kill: def $vgpr16 killed $vgpr16 def $vgpr16_vgpr17 killed $exec
; GFX9-O0-NEXT: v_mov_b32_e32 v17, v14
; GFX9-O0-NEXT: v_mad_u64_u32 v[14:15], s[6:7], v4, v5, 0
; GFX9-O0-NEXT: v_mov_b32_e32 v18, v14
-; GFX9-O0-NEXT: ; implicit-def: $sgpr6
-; GFX9-O0-NEXT: v_mov_b32_e32 v4, s5
+; GFX9-O0-NEXT: ; implicit-def: $sgpr5
+; GFX9-O0-NEXT: v_mov_b32_e32 v4, 0
; GFX9-O0-NEXT: ; kill: def $vgpr18 killed $vgpr18 def $vgpr18_vgpr19 killed $exec
; GFX9-O0-NEXT: v_mov_b32_e32 v19, v4
; GFX9-O0-NEXT: v_mov_b32_e32 v4, v19
; GFX9-O0-NEXT: v_mov_b32_e32 v14, v15
+; GFX9-O0-NEXT: ; implicit-def: $sgpr5
; GFX9-O0-NEXT: ; implicit-def: $sgpr6
-; GFX9-O0-NEXT: ; implicit-def: $sgpr7
-; GFX9-O0-NEXT: ; implicit-def: $sgpr7
-; GFX9-O0-NEXT: v_mov_b32_e32 v5, s6
+; GFX9-O0-NEXT: ; implicit-def: $sgpr6
+; GFX9-O0-NEXT: v_mov_b32_e32 v5, s5
; GFX9-O0-NEXT: ; kill: def $vgpr14 killed $vgpr14 def $vgpr14_vgpr15 killed $exec
; GFX9-O0-NEXT: v_mov_b32_e32 v15, v5
; GFX9-O0-NEXT: v_lshlrev_b64 v[14:15], s4, v[14:15]
@@ -2795,8 +2795,8 @@ define i128 @v_urem_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-O0-NEXT: v_lshlrev_b64 v[5:6], s4, v[4:5]
; GFX9-O0-NEXT: v_mov_b32_e32 v7, v6
; GFX9-O0-NEXT: ; kill: def $vgpr10 killed $vgpr10 killed $vgpr10_vgpr11 killed $exec
-; GFX9-O0-NEXT: ; implicit-def: $sgpr6
-; GFX9-O0-NEXT: v_mov_b32_e32 v4, s5
+; GFX9-O0-NEXT: ; implicit-def: $sgpr5
+; GFX9-O0-NEXT: v_mov_b32_e32 v4, 0
; GFX9-O0-NEXT: ; kill: def $vgpr10 killed $vgpr10 def $vgpr10_vgpr11 killed $exec
; GFX9-O0-NEXT: v_mov_b32_e32 v11, v4
; GFX9-O0-NEXT: v_mov_b32_e32 v4, v11
diff --git a/llvm/test/CodeGen/AMDGPU/roundeven.ll b/llvm/test/CodeGen/AMDGPU/roundeven.ll
index 59a1fe041bf90..cf3edc0b4ac96 100644
--- a/llvm/test/CodeGen/AMDGPU/roundeven.ll
+++ b/llvm/test/CodeGen/AMDGPU/roundeven.ll
@@ -1258,17 +1258,17 @@ define <2 x double> @v_roundeven_v2f64(<2 x double> %x) {
; GFX6-NEXT: v_mov_b32_e32 v4, 0
; GFX6-NEXT: v_or_b32_e32 v5, 0x43300000, v5
; GFX6-NEXT: v_add_f64 v[6:7], v[0:1], v[4:5]
-; GFX6-NEXT: s_mov_b32 s4, -1
-; GFX6-NEXT: s_mov_b32 s5, 0x432fffff
+; GFX6-NEXT: v_mov_b32_e32 v8, -1
+; GFX6-NEXT: v_mov_b32_e32 v9, 0x432fffff
; GFX6-NEXT: v_add_f64 v[5:6], v[6:7], -v[4:5]
-; GFX6-NEXT: v_cmp_gt_f64_e64 vcc, |v[0:1]|, s[4:5]
+; GFX6-NEXT: v_cmp_gt_f64_e64 vcc, |v[0:1]|, v[8:9]
; GFX6-NEXT: v_cndmask_b32_e32 v0, v5, v0, vcc
; GFX6-NEXT: v_and_b32_e32 v5, 0x80000000, v3
; GFX6-NEXT: v_or_b32_e32 v5, 0x43300000, v5
-; GFX6-NEXT: v_add_f64 v[7:8], v[2:3], v[4:5]
+; GFX6-NEXT: v_add_f64 v[10:11], v[2:3], v[4:5]
; GFX6-NEXT: v_cndmask_b32_e32 v1, v6, v1, vcc
-; GFX6-NEXT: v_add_f64 v[4:5], v[7:8], -v[4:5]
-; GFX6-NEXT: v_cmp_gt_f64_e64 vcc, |v[2:3]|, s[4:5]
+; GFX6-NEXT: v_add_f64 v[4:5], v[10:11], -v[4:5]
+; GFX6-NEXT: v_cmp_gt_f64_e64 vcc, |v[2:3]|, v[8:9]
; GFX6-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
; GFX6-NEXT: v_cndmask_b32_e32 v3, v5, v3, vcc
; GFX6-NEXT: s_setpc_b64 s[30:31]
diff --git a/llvm/test/CodeGen/AMDGPU/rsq.f64.ll b/llvm/test/CodeGen/AMDGPU/rsq.f64.ll
index 4aac193d6aeab..e34fdd9ae6902 100644
--- a/llvm/test/CodeGen/AMDGPU/rsq.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/rsq.f64.ll
@@ -1660,33 +1660,31 @@ define <2 x double> @v_rsq_v2f64(<2 x double> %x) {
; SI-GISEL-LABEL: v_rsq_v2f64:
; SI-GISEL: ; %bb.0:
; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-GISEL-NEXT: s_mov_b32 s4, 0
-; SI-GISEL-NEXT: s_brev_b32 s5, 8
-; SI-GISEL-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
-; SI-GISEL-NEXT: v_mov_b32_e32 v10, s4
-; SI-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
-; SI-GISEL-NEXT: v_lshlrev_b32_e32 v4, 8, v4
-; SI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v4
-; SI-GISEL-NEXT: v_mov_b32_e32 v11, s5
-; SI-GISEL-NEXT: v_rsq_f64_e32 v[4:5], v[0:1]
-; SI-GISEL-NEXT: v_cmp_lt_f64_e64 s[4:5], v[2:3], v[10:11]
+; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0
+; SI-GISEL-NEXT: v_bfrev_b32_e32 v5, 8
+; SI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[4:5]
+; SI-GISEL-NEXT: v_cmp_lt_f64_e64 s[4:5], v[2:3], v[4:5]
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
+; SI-GISEL-NEXT: v_lshlrev_b32_e32 v6, 8, v6
+; SI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v6
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[4:5]
+; SI-GISEL-NEXT: v_rsq_f64_e32 v[6:7], v[0:1]
; SI-GISEL-NEXT: v_mov_b32_e32 v14, 0xffffff80
; SI-GISEL-NEXT: v_mov_b32_e32 v15, 0x260
-; SI-GISEL-NEXT: v_mul_f64 v[6:7], v[4:5], 0.5
-; SI-GISEL-NEXT: v_mul_f64 v[4:5], v[0:1], v[4:5]
; SI-GISEL-NEXT: v_mov_b32_e32 v18, 0x3ff00000
-; SI-GISEL-NEXT: v_fma_f64 v[8:9], -v[6:7], v[4:5], 0.5
-; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
-; SI-GISEL-NEXT: v_fma_f64 v[6:7], v[6:7], v[8:9], v[6:7]
-; SI-GISEL-NEXT: v_fma_f64 v[8:9], -v[4:5], v[4:5], v[0:1]
-; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[8:9], v[6:7], v[4:5]
-; SI-GISEL-NEXT: v_fma_f64 v[8:9], -v[4:5], v[4:5], v[0:1]
-; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[8:9], v[6:7], v[4:5]
-; SI-GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, s[4:5]
-; SI-GISEL-NEXT: v_lshlrev_b32_e32 v6, 8, v6
-; SI-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v6
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v8, 0, v14, vcc
+; SI-GISEL-NEXT: v_mul_f64 v[8:9], v[6:7], 0.5
+; SI-GISEL-NEXT: v_mul_f64 v[6:7], v[0:1], v[6:7]
+; SI-GISEL-NEXT: v_fma_f64 v[10:11], -v[8:9], v[6:7], 0.5
+; SI-GISEL-NEXT: v_fma_f64 v[6:7], v[6:7], v[10:11], v[6:7]
+; SI-GISEL-NEXT: v_fma_f64 v[8:9], v[8:9], v[10:11], v[8:9]
+; SI-GISEL-NEXT: v_fma_f64 v[10:11], -v[6:7], v[6:7], v[0:1]
+; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[10:11], v[8:9], v[6:7]
+; SI-GISEL-NEXT: v_lshlrev_b32_e32 v10, 8, v12
+; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
+; SI-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v10
+; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[6:7], v[8:9], v[4:5]
; SI-GISEL-NEXT: v_rsq_f64_e32 v[6:7], v[2:3]
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v8, 0, v14, vcc
; SI-GISEL-NEXT: v_ldexp_f64 v[4:5], v[4:5], v8
; SI-GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v15
; SI-GISEL-NEXT: v_mul_f64 v[8:9], v[6:7], 0.5
@@ -1807,11 +1805,9 @@ define <2 x double> @v_rsq_v2f64(<2 x double> %x) {
; VI-GISEL-LABEL: v_rsq_v2f64:
; VI-GISEL: ; %bb.0:
; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-GISEL-NEXT: s_mov_b32 s4, 0
-; VI-GISEL-NEXT: s_brev_b32 s5, 8
-; VI-GISEL-NEXT: v_mov_b32_e32 v4, s4
-; VI-GISEL-NEXT: v_mov_b32_e32 v5, s5
-; VI-GISEL-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
+; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0
+; VI-GISEL-NEXT: v_bfrev_b32_e32 v5, 8
+; VI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[4:5]
; VI-GISEL-NEXT: v_cmp_lt_f64_e64 s[4:5], v[2:3], v[4:5]
; VI-GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
; VI-GISEL-NEXT: v_lshlrev_b32_e32 v6, 8, v6
@@ -1842,15 +1838,15 @@ define <2 x double> @v_rsq_v2f64(<2 x double> %x) {
; VI-GISEL-NEXT: v_fma_f64 v[6:7], v[14:15], v[10:11], v[6:7]
; VI-GISEL-NEXT: v_mov_b32_e32 v9, 0x260
; VI-GISEL-NEXT: v_cndmask_b32_e32 v10, 0, v8, vcc
-; VI-GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v9
; VI-GISEL-NEXT: v_cndmask_b32_e64 v8, 0, v8, s[4:5]
+; VI-GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v9
; VI-GISEL-NEXT: v_cmp_class_f64_e64 s[4:5], v[2:3], v9
; VI-GISEL-NEXT: v_ldexp_f64 v[4:5], v[4:5], v10
; VI-GISEL-NEXT: v_ldexp_f64 v[6:7], v[6:7], v8
; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
-; VI-GISEL-NEXT: v_div_scale_f64 v[4:5], s[6:7], v[0:1], v[0:1], 1.0
; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, v6, v2, s[4:5]
+; VI-GISEL-NEXT: v_div_scale_f64 v[4:5], s[6:7], v[0:1], v[0:1], 1.0
; VI-GISEL-NEXT: v_cndmask_b32_e64 v3, v7, v3, s[4:5]
; VI-GISEL-NEXT: v_div_scale_f64 v[6:7], s[4:5], v[2:3], v[2:3], 1.0
; VI-GISEL-NEXT: v_div_scale_f64 v[16:17], s[4:5], 1.0, v[2:3], 1.0
@@ -1960,33 +1956,31 @@ define <2 x double> @v_neg_rsq_v2f64(<2 x double> %x) {
; SI-GISEL-LABEL: v_neg_rsq_v2f64:
; SI-GISEL: ; %bb.0:
; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-GISEL-NEXT: s_mov_b32 s4, 0
-; SI-GISEL-NEXT: s_brev_b32 s5, 8
-; SI-GISEL-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
-; SI-GISEL-NEXT: v_mov_b32_e32 v10, s4
-; SI-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
-; SI-GISEL-NEXT: v_lshlrev_b32_e32 v4, 8, v4
-; SI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v4
-; SI-GISEL-NEXT: v_mov_b32_e32 v11, s5
-; SI-GISEL-NEXT: v_rsq_f64_e32 v[4:5], v[0:1]
-; SI-GISEL-NEXT: v_cmp_lt_f64_e64 s[4:5], v[2:3], v[10:11]
+; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0
+; SI-GISEL-NEXT: v_bfrev_b32_e32 v5, 8
+; SI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[4:5]
+; SI-GISEL-NEXT: v_cmp_lt_f64_e64 s[4:5], v[2:3], v[4:5]
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
+; SI-GISEL-NEXT: v_lshlrev_b32_e32 v6, 8, v6
+; SI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v6
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[4:5]
+; SI-GISEL-NEXT: v_rsq_f64_e32 v[6:7], v[0:1]
; SI-GISEL-NEXT: v_mov_b32_e32 v14, 0xffffff80
; SI-GISEL-NEXT: v_mov_b32_e32 v15, 0x260
-; SI-GISEL-NEXT: v_mul_f64 v[6:7], v[4:5], 0.5
-; SI-GISEL-NEXT: v_mul_f64 v[4:5], v[0:1], v[4:5]
; SI-GISEL-NEXT: v_mov_b32_e32 v18, 0xbff00000
-; SI-GISEL-NEXT: v_fma_f64 v[8:9], -v[6:7], v[4:5], 0.5
-; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
-; SI-GISEL-NEXT: v_fma_f64 v[6:7], v[6:7], v[8:9], v[6:7]
-; SI-GISEL-NEXT: v_fma_f64 v[8:9], -v[4:5], v[4:5], v[0:1]
-; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[8:9], v[6:7], v[4:5]
-; SI-GISEL-NEXT: v_fma_f64 v[8:9], -v[4:5], v[4:5], v[0:1]
-; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[8:9], v[6:7], v[4:5]
-; SI-GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, s[4:5]
-; SI-GISEL-NEXT: v_lshlrev_b32_e32 v6, 8, v6
-; SI-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v6
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v8, 0, v14, vcc
+; SI-GISEL-NEXT: v_mul_f64 v[8:9], v[6:7], 0.5
+; SI-GISEL-NEXT: v_mul_f64 v[6:7], v[0:1], v[6:7]
+; SI-GISEL-NEXT: v_fma_f64 v[10:11], -v[8:9], v[6:7], 0.5
+; SI-GISEL-NEXT: v_fma_f64 v[6:7], v[6:7], v[10:11], v[6:7]
+; SI-GISEL-NEXT: v_fma_f64 v[8:9], v[8:9], v[10:11], v[8:9]
+; SI-GISEL-NEXT: v_fma_f64 v[10:11], -v[6:7], v[6:7], v[0:1]
+; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[10:11], v[8:9], v[6:7]
+; SI-GISEL-NEXT: v_lshlrev_b32_e32 v10, 8, v12
+; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
+; SI-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v10
+; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[6:7], v[8:9], v[4:5]
; SI-GISEL-NEXT: v_rsq_f64_e32 v[6:7], v[2:3]
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v8, 0, v14, vcc
; SI-GISEL-NEXT: v_ldexp_f64 v[4:5], v[4:5], v8
; SI-GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v15
; SI-GISEL-NEXT: v_mul_f64 v[8:9], v[6:7], 0.5
@@ -2107,11 +2101,9 @@ define <2 x double> @v_neg_rsq_v2f64(<2 x double> %x) {
; VI-GISEL-LABEL: v_neg_rsq_v2f64:
; VI-GISEL: ; %bb.0:
; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-GISEL-NEXT: s_mov_b32 s4, 0
-; VI-GISEL-NEXT: s_brev_b32 s5, 8
-; VI-GISEL-NEXT: v_mov_b32_e32 v4, s4
-; VI-GISEL-NEXT: v_mov_b32_e32 v5, s5
-; VI-GISEL-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
+; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0
+; VI-GISEL-NEXT: v_bfrev_b32_e32 v5, 8
+; VI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[4:5]
; VI-GISEL-NEXT: v_cmp_lt_f64_e64 s[4:5], v[2:3], v[4:5]
; VI-GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
; VI-GISEL-NEXT: v_lshlrev_b32_e32 v6, 8, v6
@@ -2142,15 +2134,15 @@ define <2 x double> @v_neg_rsq_v2f64(<2 x double> %x) {
; VI-GISEL-NEXT: v_fma_f64 v[6:7], v[14:15], v[10:11], v[6:7]
; VI-GISEL-NEXT: v_mov_b32_e32 v9, 0x260
; VI-GISEL-NEXT: v_cndmask_b32_e32 v10, 0, v8, vcc
-; VI-GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v9
; VI-GISEL-NEXT: v_cndmask_b32_e64 v8, 0, v8, s[4:5]
+; VI-GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v9
; VI-GISEL-NEXT: v_cmp_class_f64_e64 s[4:5], v[2:3], v9
; VI-GISEL-NEXT: v_ldexp_f64 v[4:5], v[4:5], v10
; VI-GISEL-NEXT: v_ldexp_f64 v[6:7], v[6:7], v8
; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
-; VI-GISEL-NEXT: v_div_scale_f64 v[4:5], s[6:7], v[0:1], v[0:1], -1.0
; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, v6, v2, s[4:5]
+; VI-GISEL-NEXT: v_div_scale_f64 v[4:5], s[6:7], v[0:1], v[0:1], -1.0
; VI-GISEL-NEXT: v_cndmask_b32_e64 v3, v7, v3, s[4:5]
; VI-GISEL-NEXT: v_div_scale_f64 v[6:7], s[4:5], v[2:3], v[2:3], -1.0
; VI-GISEL-NEXT: v_div_scale_f64 v[16:17], s[4:5], -1.0, v[2:3], -1.0
@@ -2229,38 +2221,36 @@ define <2 x double> @v_neg_rsq_v2f64_poisonelt(<2 x double> %x) {
; SI-GISEL-LABEL: v_neg_rsq_v2f64_poisonelt:
; SI-GISEL: ; %bb.0:
; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-GISEL-NEXT: s_mov_b32 s4, 0
-; SI-GISEL-NEXT: s_brev_b32 s5, 8
-; SI-GISEL-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
-; SI-GISEL-NEXT: v_mov_b32_e32 v10, s4
-; SI-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
-; SI-GISEL-NEXT: v_lshlrev_b32_e32 v4, 8, v4
-; SI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v4
-; SI-GISEL-NEXT: v_mov_b32_e32 v11, s5
-; SI-GISEL-NEXT: v_rsq_f64_e32 v[4:5], v[0:1]
-; SI-GISEL-NEXT: v_cmp_lt_f64_e64 s[4:5], v[2:3], v[10:11]
+; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0
+; SI-GISEL-NEXT: v_bfrev_b32_e32 v5, 8
+; SI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[4:5]
+; SI-GISEL-NEXT: v_cmp_lt_f64_e64 s[4:5], v[2:3], v[4:5]
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
+; SI-GISEL-NEXT: v_lshlrev_b32_e32 v6, 8, v6
+; SI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v6
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[4:5]
+; SI-GISEL-NEXT: v_rsq_f64_e32 v[6:7], v[0:1]
+; SI-GISEL-NEXT: v_mul_f64 v[8:9], v[6:7], 0.5
+; SI-GISEL-NEXT: v_mul_f64 v[6:7], v[0:1], v[6:7]
+; SI-GISEL-NEXT: v_fma_f64 v[10:11], -v[8:9], v[6:7], 0.5
+; SI-GISEL-NEXT: v_fma_f64 v[6:7], v[6:7], v[10:11], v[6:7]
+; SI-GISEL-NEXT: v_fma_f64 v[8:9], v[8:9], v[10:11], v[8:9]
+; SI-GISEL-NEXT: v_fma_f64 v[10:11], -v[6:7], v[6:7], v[0:1]
+; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[10:11], v[8:9], v[6:7]
+; SI-GISEL-NEXT: v_lshlrev_b32_e32 v6, 8, v12
+; SI-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v6
+; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
+; SI-GISEL-NEXT: v_rsq_f64_e32 v[10:11], v[2:3]
+; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[6:7], v[8:9], v[4:5]
; SI-GISEL-NEXT: v_mov_b32_e32 v12, 0xffffff80
; SI-GISEL-NEXT: v_cndmask_b32_e32 v13, 0, v12, vcc
-; SI-GISEL-NEXT: v_mul_f64 v[6:7], v[4:5], 0.5
-; SI-GISEL-NEXT: v_mul_f64 v[4:5], v[0:1], v[4:5]
-; SI-GISEL-NEXT: v_fma_f64 v[8:9], -v[6:7], v[4:5], 0.5
-; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
-; SI-GISEL-NEXT: v_fma_f64 v[6:7], v[6:7], v[8:9], v[6:7]
-; SI-GISEL-NEXT: v_fma_f64 v[8:9], -v[4:5], v[4:5], v[0:1]
-; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[8:9], v[6:7], v[4:5]
-; SI-GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, s[4:5]
-; SI-GISEL-NEXT: v_lshlrev_b32_e32 v8, 8, v8
-; SI-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v8
-; SI-GISEL-NEXT: v_fma_f64 v[8:9], -v[4:5], v[4:5], v[0:1]
-; SI-GISEL-NEXT: v_rsq_f64_e32 v[10:11], v[2:3]
-; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[8:9], v[6:7], v[4:5]
-; SI-GISEL-NEXT: v_ldexp_f64 v[4:5], v[4:5], v13
; SI-GISEL-NEXT: v_mul_f64 v[6:7], v[10:11], 0.5
; SI-GISEL-NEXT: v_mul_f64 v[8:9], v[2:3], v[10:11]
-; SI-GISEL-NEXT: v_mov_b32_e32 v13, 0x260
+; SI-GISEL-NEXT: v_ldexp_f64 v[4:5], v[4:5], v13
; SI-GISEL-NEXT: v_fma_f64 v[10:11], -v[6:7], v[8:9], 0.5
-; SI-GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v13
+; SI-GISEL-NEXT: v_mov_b32_e32 v13, 0x260
; SI-GISEL-NEXT: v_fma_f64 v[8:9], v[8:9], v[10:11], v[8:9]
+; SI-GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v13
; SI-GISEL-NEXT: v_fma_f64 v[6:7], v[6:7], v[10:11], v[6:7]
; SI-GISEL-NEXT: v_fma_f64 v[10:11], -v[8:9], v[8:9], v[2:3]
; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
@@ -2349,11 +2339,9 @@ define <2 x double> @v_neg_rsq_v2f64_poisonelt(<2 x double> %x) {
; VI-GISEL-LABEL: v_neg_rsq_v2f64_poisonelt:
; VI-GISEL: ; %bb.0:
; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-GISEL-NEXT: s_mov_b32 s4, 0
-; VI-GISEL-NEXT: s_brev_b32 s5, 8
-; VI-GISEL-NEXT: v_mov_b32_e32 v4, s4
-; VI-GISEL-NEXT: v_mov_b32_e32 v5, s5
-; VI-GISEL-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
+; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0
+; VI-GISEL-NEXT: v_bfrev_b32_e32 v5, 8
+; VI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[4:5]
; VI-GISEL-NEXT: v_cmp_lt_f64_e64 s[4:5], v[2:3], v[4:5]
; VI-GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
; VI-GISEL-NEXT: v_lshlrev_b32_e32 v6, 8, v6
@@ -2384,15 +2372,15 @@ define <2 x double> @v_neg_rsq_v2f64_poisonelt(<2 x double> %x) {
; VI-GISEL-NEXT: v_fma_f64 v[6:7], v[14:15], v[10:11], v[6:7]
; VI-GISEL-NEXT: v_mov_b32_e32 v9, 0x260
; VI-GISEL-NEXT: v_cndmask_b32_e32 v10, 0, v8, vcc
-; VI-GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v9
; VI-GISEL-NEXT: v_cndmask_b32_e64 v8, 0, v8, s[4:5]
+; VI-GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v9
; VI-GISEL-NEXT: v_cmp_class_f64_e64 s[4:5], v[2:3], v9
; VI-GISEL-NEXT: v_ldexp_f64 v[4:5], v[4:5], v10
; VI-GISEL-NEXT: v_ldexp_f64 v[6:7], v[6:7], v8
; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
-; VI-GISEL-NEXT: v_div_scale_f64 v[4:5], s[6:7], v[0:1], v[0:1], -1.0
; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, v6, v2, s[4:5]
+; VI-GISEL-NEXT: v_div_scale_f64 v[4:5], s[6:7], v[0:1], v[0:1], -1.0
; VI-GISEL-NEXT: v_cndmask_b32_e64 v3, v7, v3, s[4:5]
; VI-GISEL-NEXT: v_div_scale_f64 v[6:7], s[4:5], v[2:3], v[2:3], s[4:5]
; VI-GISEL-NEXT: v_div_scale_f64 v[16:17], s[4:5], s[4:5], v[2:3], s[4:5]
@@ -2503,38 +2491,36 @@ define <2 x double> @v_neg_pos_rsq_v2f64(<2 x double> %x) {
; SI-GISEL-LABEL: v_neg_pos_rsq_v2f64:
; SI-GISEL: ; %bb.0:
; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-GISEL-NEXT: s_mov_b32 s4, 0
-; SI-GISEL-NEXT: s_brev_b32 s5, 8
-; SI-GISEL-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
-; SI-GISEL-NEXT: v_mov_b32_e32 v10, s4
-; SI-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
-; SI-GISEL-NEXT: v_lshlrev_b32_e32 v4, 8, v4
-; SI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v4
-; SI-GISEL-NEXT: v_mov_b32_e32 v11, s5
-; SI-GISEL-NEXT: v_rsq_f64_e32 v[4:5], v[0:1]
-; SI-GISEL-NEXT: v_cmp_lt_f64_e64 s[4:5], v[2:3], v[10:11]
+; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0
+; SI-GISEL-NEXT: v_bfrev_b32_e32 v5, 8
+; SI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[4:5]
+; SI-GISEL-NEXT: v_cmp_lt_f64_e64 s[4:5], v[2:3], v[4:5]
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
+; SI-GISEL-NEXT: v_lshlrev_b32_e32 v6, 8, v6
+; SI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v6
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[4:5]
+; SI-GISEL-NEXT: v_rsq_f64_e32 v[6:7], v[0:1]
+; SI-GISEL-NEXT: v_mul_f64 v[8:9], v[6:7], 0.5
+; SI-GISEL-NEXT: v_mul_f64 v[6:7], v[0:1], v[6:7]
+; SI-GISEL-NEXT: v_fma_f64 v[10:11], -v[8:9], v[6:7], 0.5
+; SI-GISEL-NEXT: v_fma_f64 v[6:7], v[6:7], v[10:11], v[6:7]
+; SI-GISEL-NEXT: v_fma_f64 v[8:9], v[8:9], v[10:11], v[8:9]
+; SI-GISEL-NEXT: v_fma_f64 v[10:11], -v[6:7], v[6:7], v[0:1]
+; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[10:11], v[8:9], v[6:7]
+; SI-GISEL-NEXT: v_lshlrev_b32_e32 v6, 8, v12
+; SI-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v6
+; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
+; SI-GISEL-NEXT: v_rsq_f64_e32 v[10:11], v[2:3]
+; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[6:7], v[8:9], v[4:5]
; SI-GISEL-NEXT: v_mov_b32_e32 v12, 0xffffff80
; SI-GISEL-NEXT: v_cndmask_b32_e32 v13, 0, v12, vcc
-; SI-GISEL-NEXT: v_mul_f64 v[6:7], v[4:5], 0.5
-; SI-GISEL-NEXT: v_mul_f64 v[4:5], v[0:1], v[4:5]
-; SI-GISEL-NEXT: v_fma_f64 v[8:9], -v[6:7], v[4:5], 0.5
-; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
-; SI-GISEL-NEXT: v_fma_f64 v[6:7], v[6:7], v[8:9], v[6:7]
-; SI-GISEL-NEXT: v_fma_f64 v[8:9], -v[4:5], v[4:5], v[0:1]
-; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[8:9], v[6:7], v[4:5]
-; SI-GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, s[4:5]
-; SI-GISEL-NEXT: v_lshlrev_b32_e32 v8, 8, v8
-; SI-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v8
-; SI-GISEL-NEXT: v_fma_f64 v[8:9], -v[4:5], v[4:5], v[0:1]
-; SI-GISEL-NEXT: v_rsq_f64_e32 v[10:11], v[2:3]
-; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[8:9], v[6:7], v[4:5]
-; SI-GISEL-NEXT: v_ldexp_f64 v[4:5], v[4:5], v13
; SI-GISEL-NEXT: v_mul_f64 v[6:7], v[10:11], 0.5
; SI-GISEL-NEXT: v_mul_f64 v[8:9], v[2:3], v[10:11]
-; SI-GISEL-NEXT: v_mov_b32_e32 v13, 0x260
+; SI-GISEL-NEXT: v_ldexp_f64 v[4:5], v[4:5], v13
; SI-GISEL-NEXT: v_fma_f64 v[10:11], -v[6:7], v[8:9], 0.5
-; SI-GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v13
+; SI-GISEL-NEXT: v_mov_b32_e32 v13, 0x260
; SI-GISEL-NEXT: v_fma_f64 v[8:9], v[8:9], v[10:11], v[8:9]
+; SI-GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v13
; SI-GISEL-NEXT: v_fma_f64 v[6:7], v[6:7], v[10:11], v[6:7]
; SI-GISEL-NEXT: v_fma_f64 v[10:11], -v[8:9], v[8:9], v[2:3]
; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
@@ -2652,11 +2638,9 @@ define <2 x double> @v_neg_pos_rsq_v2f64(<2 x double> %x) {
; VI-GISEL-LABEL: v_neg_pos_rsq_v2f64:
; VI-GISEL: ; %bb.0:
; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-GISEL-NEXT: s_mov_b32 s4, 0
-; VI-GISEL-NEXT: s_brev_b32 s5, 8
-; VI-GISEL-NEXT: v_mov_b32_e32 v4, s4
-; VI-GISEL-NEXT: v_mov_b32_e32 v5, s5
-; VI-GISEL-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
+; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0
+; VI-GISEL-NEXT: v_bfrev_b32_e32 v5, 8
+; VI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[4:5]
; VI-GISEL-NEXT: v_cmp_lt_f64_e64 s[4:5], v[2:3], v[4:5]
; VI-GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
; VI-GISEL-NEXT: v_lshlrev_b32_e32 v6, 8, v6
@@ -2687,15 +2671,15 @@ define <2 x double> @v_neg_pos_rsq_v2f64(<2 x double> %x) {
; VI-GISEL-NEXT: v_fma_f64 v[6:7], v[14:15], v[10:11], v[6:7]
; VI-GISEL-NEXT: v_mov_b32_e32 v9, 0x260
; VI-GISEL-NEXT: v_cndmask_b32_e32 v10, 0, v8, vcc
-; VI-GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v9
; VI-GISEL-NEXT: v_cndmask_b32_e64 v8, 0, v8, s[4:5]
+; VI-GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v9
; VI-GISEL-NEXT: v_cmp_class_f64_e64 s[4:5], v[2:3], v9
; VI-GISEL-NEXT: v_ldexp_f64 v[4:5], v[4:5], v10
; VI-GISEL-NEXT: v_ldexp_f64 v[6:7], v[6:7], v8
; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
-; VI-GISEL-NEXT: v_div_scale_f64 v[4:5], s[6:7], v[0:1], v[0:1], -1.0
; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, v6, v2, s[4:5]
+; VI-GISEL-NEXT: v_div_scale_f64 v[4:5], s[6:7], v[0:1], v[0:1], -1.0
; VI-GISEL-NEXT: v_cndmask_b32_e64 v3, v7, v3, s[4:5]
; VI-GISEL-NEXT: v_div_scale_f64 v[6:7], s[4:5], v[2:3], v[2:3], 1.0
; VI-GISEL-NEXT: v_div_scale_f64 v[16:17], s[4:5], 1.0, v[2:3], 1.0
@@ -4317,48 +4301,46 @@ define <2 x double> @v_rsq_v2f64__afn_nnan_ninf(<2 x double> %x) {
; SI-GISEL-LABEL: v_rsq_v2f64__afn_nnan_ninf:
; SI-GISEL: ; %bb.0:
; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-GISEL-NEXT: s_mov_b32 s4, 0
-; SI-GISEL-NEXT: s_brev_b32 s5, 8
-; SI-GISEL-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
-; SI-GISEL-NEXT: v_mov_b32_e32 v10, s4
-; SI-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
-; SI-GISEL-NEXT: v_lshlrev_b32_e32 v4, 8, v4
-; SI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v4
-; SI-GISEL-NEXT: v_mov_b32_e32 v11, s5
-; SI-GISEL-NEXT: v_rsq_f64_e32 v[4:5], v[0:1]
-; SI-GISEL-NEXT: v_cmp_lt_f64_e64 s[4:5], v[2:3], v[10:11]
+; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0
+; SI-GISEL-NEXT: v_bfrev_b32_e32 v5, 8
+; SI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[4:5]
+; SI-GISEL-NEXT: v_cmp_lt_f64_e64 s[4:5], v[2:3], v[4:5]
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
+; SI-GISEL-NEXT: v_lshlrev_b32_e32 v6, 8, v6
+; SI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v6
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[4:5]
+; SI-GISEL-NEXT: v_rsq_f64_e32 v[6:7], v[0:1]
+; SI-GISEL-NEXT: v_mul_f64 v[8:9], v[6:7], 0.5
+; SI-GISEL-NEXT: v_mul_f64 v[6:7], v[0:1], v[6:7]
+; SI-GISEL-NEXT: v_fma_f64 v[10:11], -v[8:9], v[6:7], 0.5
+; SI-GISEL-NEXT: v_fma_f64 v[6:7], v[6:7], v[10:11], v[6:7]
+; SI-GISEL-NEXT: v_fma_f64 v[8:9], v[8:9], v[10:11], v[8:9]
+; SI-GISEL-NEXT: v_fma_f64 v[10:11], -v[6:7], v[6:7], v[0:1]
+; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[10:11], v[8:9], v[6:7]
+; SI-GISEL-NEXT: v_lshlrev_b32_e32 v6, 8, v12
+; SI-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v6
+; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
+; SI-GISEL-NEXT: v_rsq_f64_e32 v[10:11], v[2:3]
+; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[6:7], v[8:9], v[4:5]
; SI-GISEL-NEXT: v_mov_b32_e32 v12, 0xffffff80
; SI-GISEL-NEXT: v_cndmask_b32_e32 v13, 0, v12, vcc
-; SI-GISEL-NEXT: v_mul_f64 v[6:7], v[4:5], 0.5
-; SI-GISEL-NEXT: v_mul_f64 v[4:5], v[0:1], v[4:5]
-; SI-GISEL-NEXT: v_fma_f64 v[8:9], -v[6:7], v[4:5], 0.5
-; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
-; SI-GISEL-NEXT: v_fma_f64 v[6:7], v[6:7], v[8:9], v[6:7]
-; SI-GISEL-NEXT: v_fma_f64 v[8:9], -v[4:5], v[4:5], v[0:1]
-; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[8:9], v[6:7], v[4:5]
-; SI-GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, s[4:5]
-; SI-GISEL-NEXT: v_lshlrev_b32_e32 v8, 8, v8
-; SI-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v8
-; SI-GISEL-NEXT: v_fma_f64 v[8:9], -v[4:5], v[4:5], v[0:1]
-; SI-GISEL-NEXT: v_rsq_f64_e32 v[10:11], v[2:3]
-; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[8:9], v[6:7], v[4:5]
-; SI-GISEL-NEXT: v_ldexp_f64 v[4:5], v[4:5], v13
; SI-GISEL-NEXT: v_mul_f64 v[6:7], v[10:11], 0.5
; SI-GISEL-NEXT: v_mul_f64 v[8:9], v[2:3], v[10:11]
-; SI-GISEL-NEXT: v_mov_b32_e32 v13, 0x260
+; SI-GISEL-NEXT: v_ldexp_f64 v[4:5], v[4:5], v13
; SI-GISEL-NEXT: v_fma_f64 v[10:11], -v[6:7], v[8:9], 0.5
-; SI-GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v13
+; SI-GISEL-NEXT: v_mov_b32_e32 v13, 0x260
; SI-GISEL-NEXT: v_fma_f64 v[8:9], v[8:9], v[10:11], v[8:9]
; SI-GISEL-NEXT: v_fma_f64 v[6:7], v[6:7], v[10:11], v[6:7]
; SI-GISEL-NEXT: v_fma_f64 v[10:11], -v[8:9], v[8:9], v[2:3]
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
+; SI-GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v13
; SI-GISEL-NEXT: v_fma_f64 v[8:9], v[10:11], v[6:7], v[8:9]
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
; SI-GISEL-NEXT: v_fma_f64 v[10:11], -v[8:9], v[8:9], v[2:3]
-; SI-GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[2:3], v13
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[10:11], v[6:7], v[8:9]
; SI-GISEL-NEXT: v_cndmask_b32_e64 v6, 0, v12, s[4:5]
; SI-GISEL-NEXT: v_ldexp_f64 v[4:5], v[4:5], v6
+; SI-GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[2:3], v13
; SI-GISEL-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
; SI-GISEL-NEXT: v_cndmask_b32_e32 v3, v5, v3, vcc
; SI-GISEL-NEXT: v_rcp_f64_e32 v[4:5], v[0:1]
@@ -4440,11 +4422,9 @@ define <2 x double> @v_rsq_v2f64__afn_nnan_ninf(<2 x double> %x) {
; VI-GISEL-LABEL: v_rsq_v2f64__afn_nnan_ninf:
; VI-GISEL: ; %bb.0:
; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-GISEL-NEXT: s_mov_b32 s4, 0
-; VI-GISEL-NEXT: s_brev_b32 s5, 8
-; VI-GISEL-NEXT: v_mov_b32_e32 v4, s4
-; VI-GISEL-NEXT: v_mov_b32_e32 v5, s5
-; VI-GISEL-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
+; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0
+; VI-GISEL-NEXT: v_bfrev_b32_e32 v5, 8
+; VI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[4:5]
; VI-GISEL-NEXT: v_cmp_lt_f64_e64 s[4:5], v[2:3], v[4:5]
; VI-GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
; VI-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, s[4:5]
@@ -5612,15 +5592,15 @@ define double @v_div_const_contract_sqrt_f64(double %x) {
; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0
; SI-GISEL-NEXT: v_bfrev_b32_e32 v3, 8
; SI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
-; SI-GISEL-NEXT: v_mov_b32_e32 v8, 0xffffff80
+; SI-GISEL-NEXT: v_mov_b32_e32 v10, 0xffffff80
; SI-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
; SI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 8, v2
; SI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
-; SI-GISEL-NEXT: v_mov_b32_e32 v9, 0x260
+; SI-GISEL-NEXT: v_mov_b32_e32 v11, 0x260
; SI-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
-; SI-GISEL-NEXT: s_mov_b32 s6, 0
-; SI-GISEL-NEXT: s_mov_b32 s7, 0x40700000
-; SI-GISEL-NEXT: v_mov_b32_e32 v10, 0x40700000
+; SI-GISEL-NEXT: v_mov_b32_e32 v8, 0
+; SI-GISEL-NEXT: v_mov_b32_e32 v9, 0x40700000
+; SI-GISEL-NEXT: v_mov_b32_e32 v12, 0x40700000
; SI-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
; SI-GISEL-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
@@ -5630,25 +5610,25 @@ define double @v_div_const_contract_sqrt_f64(double %x) {
; SI-GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
; SI-GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v8, vcc
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v10, vcc
; SI-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
-; SI-GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v9
+; SI-GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v11
; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; SI-GISEL-NEXT: v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], s[6:7]
-; SI-GISEL-NEXT: v_div_scale_f64 v[8:9], s[4:5], s[6:7], v[0:1], s[6:7]
+; SI-GISEL-NEXT: v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], v[8:9]
+; SI-GISEL-NEXT: v_div_scale_f64 v[10:11], s[4:5], v[8:9], v[0:1], v[8:9]
; SI-GISEL-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
; SI-GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v1, v3
-; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v9, v10
+; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v11, v12
; SI-GISEL-NEXT: s_xor_b64 vcc, vcc, s[4:5]
; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; SI-GISEL-NEXT: v_mul_f64 v[6:7], v[8:9], v[4:5]
-; SI-GISEL-NEXT: v_fma_f64 v[2:3], -v[2:3], v[6:7], v[8:9]
+; SI-GISEL-NEXT: v_mul_f64 v[6:7], v[10:11], v[4:5]
+; SI-GISEL-NEXT: v_fma_f64 v[2:3], -v[2:3], v[6:7], v[10:11]
; SI-GISEL-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[6:7]
-; SI-GISEL-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], s[6:7]
+; SI-GISEL-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], v[8:9]
; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; VI-SDAG-LABEL: v_div_const_contract_sqrt_f64:
@@ -5698,8 +5678,6 @@ define double @v_div_const_contract_sqrt_f64(double %x) {
; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0
; VI-GISEL-NEXT: v_bfrev_b32_e32 v3, 8
; VI-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
-; VI-GISEL-NEXT: s_mov_b32 s4, 0
-; VI-GISEL-NEXT: s_mov_b32 s5, 0x40700000
; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
; VI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 8, v2
; VI-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
@@ -5718,19 +5696,21 @@ define double @v_div_const_contract_sqrt_f64(double %x) {
; VI-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
; VI-GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
; VI-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
+; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0
+; VI-GISEL-NEXT: v_mov_b32_e32 v5, 0x40700000
; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; VI-GISEL-NEXT: v_div_scale_f64 v[2:3], s[6:7], v[0:1], v[0:1], s[4:5]
-; VI-GISEL-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
-; VI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
-; VI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; VI-GISEL-NEXT: v_div_scale_f64 v[6:7], vcc, s[4:5], v[0:1], s[4:5]
-; VI-GISEL-NEXT: v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
-; VI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
-; VI-GISEL-NEXT: v_mul_f64 v[8:9], v[6:7], v[4:5]
-; VI-GISEL-NEXT: v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
-; VI-GISEL-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
-; VI-GISEL-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], s[4:5]
+; VI-GISEL-NEXT: v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], v[4:5]
+; VI-GISEL-NEXT: v_rcp_f64_e32 v[6:7], v[2:3]
+; VI-GISEL-NEXT: v_fma_f64 v[8:9], -v[2:3], v[6:7], 1.0
+; VI-GISEL-NEXT: v_fma_f64 v[6:7], v[6:7], v[8:9], v[6:7]
+; VI-GISEL-NEXT: v_div_scale_f64 v[8:9], vcc, v[4:5], v[0:1], v[4:5]
+; VI-GISEL-NEXT: v_fma_f64 v[10:11], -v[2:3], v[6:7], 1.0
+; VI-GISEL-NEXT: v_fma_f64 v[6:7], v[6:7], v[10:11], v[6:7]
+; VI-GISEL-NEXT: v_mul_f64 v[10:11], v[8:9], v[6:7]
+; VI-GISEL-NEXT: v_fma_f64 v[2:3], -v[2:3], v[10:11], v[8:9]
+; VI-GISEL-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[6:7], v[10:11]
+; VI-GISEL-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], v[4:5]
; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
%sqrt = call contract double @llvm.sqrt.f64(double %x)
%rsq = fdiv contract double 256.0, %sqrt
diff --git a/llvm/test/CodeGen/AMDGPU/sdiv64.ll b/llvm/test/CodeGen/AMDGPU/sdiv64.ll
index 4addf42b27984..5e76c7d7c734f 100644
--- a/llvm/test/CodeGen/AMDGPU/sdiv64.ll
+++ b/llvm/test/CodeGen/AMDGPU/sdiv64.ll
@@ -1747,8 +1747,8 @@ define i64 @v_test_sdiv_pow2_k_den_i64(i64 %x) {
; GCN-IR-NEXT: v_addc_u32_e64 v5, s[4:5], 0, -1, vcc
; GCN-IR-NEXT: s_mov_b64 s[10:11], 0
; GCN-IR-NEXT: v_mov_b32_e32 v9, 0
-; GCN-IR-NEXT: s_movk_i32 s12, 0x7fff
; GCN-IR-NEXT: v_mov_b32_e32 v3, 0
+; GCN-IR-NEXT: s_movk_i32 s12, 0x7fff
; GCN-IR-NEXT: .LBB13_3: ; %udiv-do-while
; GCN-IR-NEXT: ; =>This Inner Loop Header: Depth=1
; GCN-IR-NEXT: v_lshl_b64 v[6:7], v[6:7], 1
diff --git a/llvm/test/CodeGen/AMDGPU/shift-and-i64-ubfe.ll b/llvm/test/CodeGen/AMDGPU/shift-and-i64-ubfe.ll
index 6be41fb8889b0..59a884c829312 100644
--- a/llvm/test/CodeGen/AMDGPU/shift-and-i64-ubfe.ll
+++ b/llvm/test/CodeGen/AMDGPU/shift-and-i64-ubfe.ll
@@ -125,8 +125,8 @@ define amdgpu_kernel void @v_uextract_bit_32_i64(ptr addrspace(1) %out, ptr addr
; GCN-LABEL: v_uextract_bit_32_i64:
; GCN: ; %bb.0:
; GCN-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
-; GCN-NEXT: s_mov_b32 s6, 0
; GCN-NEXT: s_mov_b32 s7, 0xf000
+; GCN-NEXT: s_mov_b32 s6, 0
; GCN-NEXT: v_lshlrev_b32_e32 v0, 3, v0
; GCN-NEXT: v_mov_b32_e32 v1, 0
; GCN-NEXT: s_waitcnt lgkmcnt(0)
@@ -470,8 +470,8 @@ define amdgpu_kernel void @v_uextract_bit_33_i64_trunc_i32(ptr addrspace(1) %out
; GCN-LABEL: v_uextract_bit_33_i64_trunc_i32:
; GCN: ; %bb.0:
; GCN-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
-; GCN-NEXT: s_mov_b32 s6, 0
; GCN-NEXT: s_mov_b32 s7, 0xf000
+; GCN-NEXT: s_mov_b32 s6, 0
; GCN-NEXT: v_lshlrev_b32_e32 v1, 3, v0
; GCN-NEXT: v_mov_b32_e32 v2, 0
; GCN-NEXT: s_waitcnt lgkmcnt(0)
@@ -627,8 +627,8 @@ define amdgpu_kernel void @v_uextract_bit_33_36_use_upper_half_shift_i64(ptr add
; GCN-LABEL: v_uextract_bit_33_36_use_upper_half_shift_i64:
; GCN: ; %bb.0:
; GCN-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xd
-; GCN-NEXT: s_mov_b32 s2, 0
; GCN-NEXT: s_mov_b32 s3, 0xf000
+; GCN-NEXT: s_mov_b32 s2, 0
; GCN-NEXT: v_lshlrev_b32_e32 v1, 3, v0
; GCN-NEXT: v_mov_b32_e32 v2, 0
; GCN-NEXT: s_mov_b64 s[6:7], s[2:3]
diff --git a/llvm/test/CodeGen/AMDGPU/sint_to_fp.f64.ll b/llvm/test/CodeGen/AMDGPU/sint_to_fp.f64.ll
index 101787abf8ea7..76bf9176143ff 100644
--- a/llvm/test/CodeGen/AMDGPU/sint_to_fp.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/sint_to_fp.f64.ll
@@ -401,11 +401,11 @@ define void @v_select_sint_to_fp_i1_vals_f64(ptr addrspace(1) %out, i32 %in) {
; GCN-LABEL: v_select_sint_to_fp_i1_vals_f64:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT: v_mov_b32_e32 v4, 0xbff00000
+; GCN-NEXT: v_mov_b32_e32 v3, 0xbff00000
; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2
-; GCN-NEXT: v_mov_b32_e32 v3, 0
-; GCN-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
-; GCN-NEXT: flat_store_dwordx2 v[0:1], v[3:4]
+; GCN-NEXT: v_cndmask_b32_e32 v3, 0, v3, vcc
+; GCN-NEXT: v_mov_b32_e32 v2, 0
+; GCN-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: s_setpc_b64 s[30:31]
;
@@ -414,10 +414,10 @@ define void @v_select_sint_to_fp_i1_vals_f64(ptr addrspace(1) %out, i32 %in) {
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: v_mov_b32_e32 v3, 0xbff00000
; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, 0
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_cndmask_b32_e32 v5, 0, v3, vcc
-; GFX942-NEXT: global_store_dwordx2 v[0:1], v[4:5], off
+; GFX942-NEXT: v_cndmask_b32_e32 v3, 0, v3, vcc
+; GFX942-NEXT: global_store_dwordx2 v[0:1], v[2:3], off
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp eq i32 %in, 0
@@ -482,11 +482,11 @@ define void @v_select_sint_to_fp_i1_vals_i64(ptr addrspace(1) %out, i32 %in) {
; GCN-LABEL: v_select_sint_to_fp_i1_vals_i64:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT: v_mov_b32_e32 v4, 0xbff00000
+; GCN-NEXT: v_mov_b32_e32 v3, 0xbff00000
; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2
-; GCN-NEXT: v_mov_b32_e32 v3, 0
-; GCN-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
-; GCN-NEXT: flat_store_dwordx2 v[0:1], v[3:4]
+; GCN-NEXT: v_cndmask_b32_e32 v3, 0, v3, vcc
+; GCN-NEXT: v_mov_b32_e32 v2, 0
+; GCN-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: s_setpc_b64 s[30:31]
;
@@ -495,10 +495,10 @@ define void @v_select_sint_to_fp_i1_vals_i64(ptr addrspace(1) %out, i32 %in) {
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: v_mov_b32_e32 v3, 0xbff00000
; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, 0
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_cndmask_b32_e32 v5, 0, v3, vcc
-; GFX942-NEXT: global_store_dwordx2 v[0:1], v[4:5], off
+; GFX942-NEXT: v_cndmask_b32_e32 v3, 0, v3, vcc
+; GFX942-NEXT: global_store_dwordx2 v[0:1], v[2:3], off
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp eq i32 %in, 0
@@ -512,11 +512,11 @@ define void @v_swap_select_sint_to_fp_i1_vals_f64(ptr addrspace(1) %out, i32 %in
; GCN-LABEL: v_swap_select_sint_to_fp_i1_vals_f64:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT: v_mov_b32_e32 v4, 0xbff00000
+; GCN-NEXT: v_mov_b32_e32 v3, 0xbff00000
; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2
-; GCN-NEXT: v_mov_b32_e32 v3, 0
-; GCN-NEXT: v_cndmask_b32_e64 v4, v4, 0, vcc
-; GCN-NEXT: flat_store_dwordx2 v[0:1], v[3:4]
+; GCN-NEXT: v_cndmask_b32_e64 v3, v3, 0, vcc
+; GCN-NEXT: v_mov_b32_e32 v2, 0
+; GCN-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: s_setpc_b64 s[30:31]
;
@@ -525,10 +525,10 @@ define void @v_swap_select_sint_to_fp_i1_vals_f64(ptr addrspace(1) %out, i32 %in
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: v_mov_b32_e32 v3, 0xbff00000
; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, 0
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_cndmask_b32_e64 v5, v3, 0, vcc
-; GFX942-NEXT: global_store_dwordx2 v[0:1], v[4:5], off
+; GFX942-NEXT: v_cndmask_b32_e64 v3, v3, 0, vcc
+; GFX942-NEXT: global_store_dwordx2 v[0:1], v[2:3], off
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp eq i32 %in, 0
diff --git a/llvm/test/CodeGen/AMDGPU/spill-agpr.ll b/llvm/test/CodeGen/AMDGPU/spill-agpr.ll
index eb0d5465cacd9..2040e2b26cb15 100644
--- a/llvm/test/CodeGen/AMDGPU/spill-agpr.ll
+++ b/llvm/test/CodeGen/AMDGPU/spill-agpr.ll
@@ -191,72 +191,72 @@ define amdgpu_kernel void @max_10_vgprs_used_9a() #1 {
define amdgpu_kernel void @max_32regs_mfma32(ptr addrspace(1) %arg) #3 {
; GFX908-LABEL: max_32regs_mfma32:
; GFX908: ; %bb.0: ; %bb
-; GFX908-NEXT: v_mov_b32_e32 v2, 0x40400000
-; GFX908-NEXT: v_mov_b32_e32 v3, 0x40c00000
-; GFX908-NEXT: v_mov_b32_e32 v4, 0x40e00000
-; GFX908-NEXT: v_accvgpr_write_b32 a2, v2
-; GFX908-NEXT: v_mov_b32_e32 v2, 0x40a00000
-; GFX908-NEXT: v_accvgpr_write_b32 a5, v3
-; GFX908-NEXT: v_accvgpr_write_b32 a6, v4
-; GFX908-NEXT: v_accvgpr_write_b32 a4, v2
-; GFX908-NEXT: v_mov_b32_e32 v2, 0x41000000
-; GFX908-NEXT: v_mov_b32_e32 v3, 0x41100000
-; GFX908-NEXT: v_mov_b32_e32 v4, 0x41200000
-; GFX908-NEXT: v_accvgpr_write_b32 a7, v2
-; GFX908-NEXT: v_accvgpr_write_b32 a8, v3
-; GFX908-NEXT: v_accvgpr_write_b32 a9, v4
-; GFX908-NEXT: v_mov_b32_e32 v2, 0x41300000
-; GFX908-NEXT: v_mov_b32_e32 v3, 0x41400000
-; GFX908-NEXT: v_mov_b32_e32 v4, 0x41500000
-; GFX908-NEXT: v_accvgpr_write_b32 a10, v2
-; GFX908-NEXT: v_accvgpr_write_b32 a11, v3
-; GFX908-NEXT: v_accvgpr_write_b32 a12, v4
-; GFX908-NEXT: v_mov_b32_e32 v2, 0x41600000
-; GFX908-NEXT: v_mov_b32_e32 v3, 0x41700000
-; GFX908-NEXT: v_mov_b32_e32 v4, 0x41800000
-; GFX908-NEXT: v_accvgpr_write_b32 a13, v2
-; GFX908-NEXT: v_accvgpr_write_b32 a14, v3
-; GFX908-NEXT: v_accvgpr_write_b32 a15, v4
-; GFX908-NEXT: v_mov_b32_e32 v2, 0x41880000
-; GFX908-NEXT: v_mov_b32_e32 v3, 0x41900000
-; GFX908-NEXT: v_mov_b32_e32 v4, 0x41980000
-; GFX908-NEXT: v_accvgpr_write_b32 a16, v2
-; GFX908-NEXT: v_accvgpr_write_b32 a17, v3
-; GFX908-NEXT: v_accvgpr_write_b32 a18, v4
-; GFX908-NEXT: v_mov_b32_e32 v2, 0x41a00000
-; GFX908-NEXT: v_mov_b32_e32 v3, 0x41a80000
-; GFX908-NEXT: v_mov_b32_e32 v4, 0x41b00000
-; GFX908-NEXT: v_accvgpr_write_b32 a19, v2
-; GFX908-NEXT: v_accvgpr_write_b32 a20, v3
-; GFX908-NEXT: v_accvgpr_write_b32 a21, v4
-; GFX908-NEXT: v_mov_b32_e32 v2, 0x41b80000
-; GFX908-NEXT: v_mov_b32_e32 v3, 0x41c00000
-; GFX908-NEXT: v_mov_b32_e32 v4, 0x41c80000
-; GFX908-NEXT: v_accvgpr_write_b32 a22, v2
-; GFX908-NEXT: v_accvgpr_write_b32 a23, v3
-; GFX908-NEXT: v_accvgpr_write_b32 a24, v4
-; GFX908-NEXT: v_mov_b32_e32 v2, 0x41d00000
-; GFX908-NEXT: v_mov_b32_e32 v3, 0x41d80000
-; GFX908-NEXT: v_mov_b32_e32 v4, 0x41e00000
-; GFX908-NEXT: v_mov_b32_e32 v1, 1.0
-; GFX908-NEXT: v_accvgpr_write_b32 a25, v2
-; GFX908-NEXT: v_accvgpr_write_b32 a26, v3
-; GFX908-NEXT: v_accvgpr_write_b32 a27, v4
-; GFX908-NEXT: v_mov_b32_e32 v2, 0x41e80000
-; GFX908-NEXT: v_mov_b32_e32 v3, 0x41f00000
-; GFX908-NEXT: v_mov_b32_e32 v4, 0x41f80000
+; GFX908-NEXT: v_mov_b32_e32 v1, 0x40400000
+; GFX908-NEXT: v_mov_b32_e32 v2, 0x40c00000
+; GFX908-NEXT: v_mov_b32_e32 v3, 0x40e00000
+; GFX908-NEXT: v_accvgpr_write_b32 a2, v1
+; GFX908-NEXT: v_mov_b32_e32 v1, 0x40a00000
+; GFX908-NEXT: v_accvgpr_write_b32 a5, v2
+; GFX908-NEXT: v_accvgpr_write_b32 a6, v3
+; GFX908-NEXT: v_accvgpr_write_b32 a4, v1
+; GFX908-NEXT: v_mov_b32_e32 v1, 0x41000000
+; GFX908-NEXT: v_mov_b32_e32 v2, 0x41100000
+; GFX908-NEXT: v_mov_b32_e32 v3, 0x41200000
+; GFX908-NEXT: v_accvgpr_write_b32 a7, v1
+; GFX908-NEXT: v_accvgpr_write_b32 a8, v2
+; GFX908-NEXT: v_accvgpr_write_b32 a9, v3
+; GFX908-NEXT: v_mov_b32_e32 v1, 0x41300000
+; GFX908-NEXT: v_mov_b32_e32 v2, 0x41400000
+; GFX908-NEXT: v_mov_b32_e32 v3, 0x41500000
+; GFX908-NEXT: v_accvgpr_write_b32 a10, v1
+; GFX908-NEXT: v_accvgpr_write_b32 a11, v2
+; GFX908-NEXT: v_accvgpr_write_b32 a12, v3
+; GFX908-NEXT: v_mov_b32_e32 v1, 0x41600000
+; GFX908-NEXT: v_mov_b32_e32 v2, 0x41700000
+; GFX908-NEXT: v_mov_b32_e32 v3, 0x41800000
+; GFX908-NEXT: v_accvgpr_write_b32 a13, v1
+; GFX908-NEXT: v_accvgpr_write_b32 a14, v2
+; GFX908-NEXT: v_accvgpr_write_b32 a15, v3
+; GFX908-NEXT: v_mov_b32_e32 v1, 0x41880000
+; GFX908-NEXT: v_mov_b32_e32 v2, 0x41900000
+; GFX908-NEXT: v_mov_b32_e32 v3, 0x41980000
+; GFX908-NEXT: v_accvgpr_write_b32 a16, v1
+; GFX908-NEXT: v_accvgpr_write_b32 a17, v2
+; GFX908-NEXT: v_accvgpr_write_b32 a18, v3
+; GFX908-NEXT: v_mov_b32_e32 v1, 0x41a00000
+; GFX908-NEXT: v_mov_b32_e32 v2, 0x41a80000
+; GFX908-NEXT: v_mov_b32_e32 v3, 0x41b00000
+; GFX908-NEXT: v_accvgpr_write_b32 a19, v1
+; GFX908-NEXT: v_accvgpr_write_b32 a20, v2
+; GFX908-NEXT: v_accvgpr_write_b32 a21, v3
+; GFX908-NEXT: v_mov_b32_e32 v1, 0x41b80000
+; GFX908-NEXT: v_mov_b32_e32 v2, 0x41c00000
+; GFX908-NEXT: v_mov_b32_e32 v3, 0x41c80000
+; GFX908-NEXT: v_accvgpr_write_b32 a22, v1
+; GFX908-NEXT: v_accvgpr_write_b32 a23, v2
+; GFX908-NEXT: v_accvgpr_write_b32 a24, v3
+; GFX908-NEXT: v_mov_b32_e32 v1, 0x41d00000
+; GFX908-NEXT: v_mov_b32_e32 v2, 0x41d80000
+; GFX908-NEXT: v_mov_b32_e32 v3, 0x41e00000
+; GFX908-NEXT: v_accvgpr_write_b32 a25, v1
+; GFX908-NEXT: v_accvgpr_write_b32 a26, v2
+; GFX908-NEXT: v_accvgpr_write_b32 a27, v3
+; GFX908-NEXT: v_mov_b32_e32 v1, 0x41e80000
+; GFX908-NEXT: v_mov_b32_e32 v2, 0x41f00000
+; GFX908-NEXT: v_mov_b32_e32 v3, 0x41f80000
+; GFX908-NEXT: v_mov_b32_e32 v4, 1.0
; GFX908-NEXT: ;;#ASMSTART
; GFX908-NEXT: ;;#ASMEND
; GFX908-NEXT: v_accvgpr_read_b32 v5, a0
; GFX908-NEXT: v_accvgpr_write_b32 a0, 1.0
; GFX908-NEXT: v_accvgpr_write_b32 a1, 2.0
; GFX908-NEXT: v_accvgpr_write_b32 a3, 4.0
-; GFX908-NEXT: v_accvgpr_write_b32 a28, v2
-; GFX908-NEXT: v_accvgpr_write_b32 a29, v3
-; GFX908-NEXT: v_accvgpr_write_b32 a30, v4
+; GFX908-NEXT: v_accvgpr_write_b32 a28, v1
+; GFX908-NEXT: v_accvgpr_write_b32 a29, v2
+; GFX908-NEXT: v_accvgpr_write_b32 a30, v3
; GFX908-NEXT: v_accvgpr_write_b32 a31, 2.0
; GFX908-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
-; GFX908-NEXT: v_mfma_f32_32x32x1f32 a[0:31], v1, v1, a[0:31]
+; GFX908-NEXT: v_mfma_f32_32x32x1f32 a[0:31], v4, v4, a[0:31]
; GFX908-NEXT: v_mov_b32_e32 v0, 0
; GFX908-NEXT: s_nop 7
; GFX908-NEXT: s_nop 5
@@ -272,73 +272,73 @@ define amdgpu_kernel void @max_32regs_mfma32(ptr addrspace(1) %arg) #3 {
;
; GFX90A-LABEL: max_32regs_mfma32:
; GFX90A: ; %bb.0: ; %bb
-; GFX90A-NEXT: v_mov_b32_e32 v2, 0x40400000
-; GFX90A-NEXT: v_accvgpr_write_b32 a2, v2
-; GFX90A-NEXT: v_mov_b32_e32 v2, 0x40a00000
-; GFX90A-NEXT: v_accvgpr_write_b32 a4, v2
-; GFX90A-NEXT: v_mov_b32_e32 v2, 0x40c00000
-; GFX90A-NEXT: v_accvgpr_write_b32 a5, v2
-; GFX90A-NEXT: v_mov_b32_e32 v2, 0x40e00000
-; GFX90A-NEXT: v_accvgpr_write_b32 a6, v2
-; GFX90A-NEXT: v_mov_b32_e32 v2, 0x41000000
-; GFX90A-NEXT: v_accvgpr_write_b32 a7, v2
-; GFX90A-NEXT: v_mov_b32_e32 v2, 0x41100000
-; GFX90A-NEXT: v_accvgpr_write_b32 a8, v2
-; GFX90A-NEXT: v_mov_b32_e32 v2, 0x41200000
-; GFX90A-NEXT: v_accvgpr_write_b32 a9, v2
-; GFX90A-NEXT: v_mov_b32_e32 v2, 0x41300000
-; GFX90A-NEXT: v_accvgpr_write_b32 a10, v2
-; GFX90A-NEXT: v_mov_b32_e32 v2, 0x41400000
-; GFX90A-NEXT: v_accvgpr_write_b32 a11, v2
-; GFX90A-NEXT: v_mov_b32_e32 v2, 0x41500000
-; GFX90A-NEXT: v_accvgpr_write_b32 a12, v2
-; GFX90A-NEXT: v_mov_b32_e32 v2, 0x41600000
-; GFX90A-NEXT: v_accvgpr_write_b32 a13, v2
-; GFX90A-NEXT: v_mov_b32_e32 v2, 0x41700000
-; GFX90A-NEXT: v_accvgpr_write_b32 a14, v2
-; GFX90A-NEXT: v_mov_b32_e32 v2, 0x41800000
-; GFX90A-NEXT: v_accvgpr_write_b32 a15, v2
-; GFX90A-NEXT: v_mov_b32_e32 v2, 0x41880000
-; GFX90A-NEXT: v_accvgpr_write_b32 a16, v2
-; GFX90A-NEXT: v_mov_b32_e32 v2, 0x41900000
-; GFX90A-NEXT: v_accvgpr_write_b32 a17, v2
-; GFX90A-NEXT: v_mov_b32_e32 v2, 0x41980000
-; GFX90A-NEXT: v_accvgpr_write_b32 a18, v2
-; GFX90A-NEXT: v_mov_b32_e32 v2, 0x41a00000
-; GFX90A-NEXT: v_accvgpr_write_b32 a19, v2
-; GFX90A-NEXT: v_mov_b32_e32 v2, 0x41a80000
-; GFX90A-NEXT: v_accvgpr_write_b32 a20, v2
-; GFX90A-NEXT: v_mov_b32_e32 v2, 0x41b00000
-; GFX90A-NEXT: v_accvgpr_write_b32 a21, v2
-; GFX90A-NEXT: v_mov_b32_e32 v2, 0x41b80000
-; GFX90A-NEXT: v_accvgpr_write_b32 a22, v2
-; GFX90A-NEXT: v_mov_b32_e32 v2, 0x41c00000
-; GFX90A-NEXT: v_accvgpr_write_b32 a23, v2
-; GFX90A-NEXT: v_mov_b32_e32 v2, 0x41c80000
-; GFX90A-NEXT: v_accvgpr_write_b32 a24, v2
-; GFX90A-NEXT: v_mov_b32_e32 v2, 0x41d00000
-; GFX90A-NEXT: v_accvgpr_write_b32 a25, v2
-; GFX90A-NEXT: v_mov_b32_e32 v2, 0x41d80000
-; GFX90A-NEXT: v_mov_b32_e32 v1, 1.0
-; GFX90A-NEXT: v_accvgpr_write_b32 a26, v2
-; GFX90A-NEXT: v_mov_b32_e32 v2, 0x41e00000
-; GFX90A-NEXT: v_accvgpr_write_b32 a27, v2
-; GFX90A-NEXT: v_mov_b32_e32 v2, 0x41e80000
-; GFX90A-NEXT: v_accvgpr_write_b32 a28, v2
-; GFX90A-NEXT: v_mov_b32_e32 v2, 0x41f00000
+; GFX90A-NEXT: v_mov_b32_e32 v1, 0x40400000
+; GFX90A-NEXT: v_accvgpr_write_b32 a2, v1
+; GFX90A-NEXT: v_mov_b32_e32 v1, 0x40a00000
+; GFX90A-NEXT: v_accvgpr_write_b32 a4, v1
+; GFX90A-NEXT: v_mov_b32_e32 v1, 0x40c00000
+; GFX90A-NEXT: v_accvgpr_write_b32 a5, v1
+; GFX90A-NEXT: v_mov_b32_e32 v1, 0x40e00000
+; GFX90A-NEXT: v_accvgpr_write_b32 a6, v1
+; GFX90A-NEXT: v_mov_b32_e32 v1, 0x41000000
+; GFX90A-NEXT: v_accvgpr_write_b32 a7, v1
+; GFX90A-NEXT: v_mov_b32_e32 v1, 0x41100000
+; GFX90A-NEXT: v_accvgpr_write_b32 a8, v1
+; GFX90A-NEXT: v_mov_b32_e32 v1, 0x41200000
+; GFX90A-NEXT: v_accvgpr_write_b32 a9, v1
+; GFX90A-NEXT: v_mov_b32_e32 v1, 0x41300000
+; GFX90A-NEXT: v_accvgpr_write_b32 a10, v1
+; GFX90A-NEXT: v_mov_b32_e32 v1, 0x41400000
+; GFX90A-NEXT: v_accvgpr_write_b32 a11, v1
+; GFX90A-NEXT: v_mov_b32_e32 v1, 0x41500000
+; GFX90A-NEXT: v_accvgpr_write_b32 a12, v1
+; GFX90A-NEXT: v_mov_b32_e32 v1, 0x41600000
+; GFX90A-NEXT: v_accvgpr_write_b32 a13, v1
+; GFX90A-NEXT: v_mov_b32_e32 v1, 0x41700000
+; GFX90A-NEXT: v_accvgpr_write_b32 a14, v1
+; GFX90A-NEXT: v_mov_b32_e32 v1, 0x41800000
+; GFX90A-NEXT: v_accvgpr_write_b32 a15, v1
+; GFX90A-NEXT: v_mov_b32_e32 v1, 0x41880000
+; GFX90A-NEXT: v_accvgpr_write_b32 a16, v1
+; GFX90A-NEXT: v_mov_b32_e32 v1, 0x41900000
+; GFX90A-NEXT: v_accvgpr_write_b32 a17, v1
+; GFX90A-NEXT: v_mov_b32_e32 v1, 0x41980000
+; GFX90A-NEXT: v_accvgpr_write_b32 a18, v1
+; GFX90A-NEXT: v_mov_b32_e32 v1, 0x41a00000
+; GFX90A-NEXT: v_accvgpr_write_b32 a19, v1
+; GFX90A-NEXT: v_mov_b32_e32 v1, 0x41a80000
+; GFX90A-NEXT: v_accvgpr_write_b32 a20, v1
+; GFX90A-NEXT: v_mov_b32_e32 v1, 0x41b00000
+; GFX90A-NEXT: v_accvgpr_write_b32 a21, v1
+; GFX90A-NEXT: v_mov_b32_e32 v1, 0x41b80000
+; GFX90A-NEXT: v_accvgpr_write_b32 a22, v1
+; GFX90A-NEXT: v_mov_b32_e32 v1, 0x41c00000
+; GFX90A-NEXT: v_accvgpr_write_b32 a23, v1
+; GFX90A-NEXT: v_mov_b32_e32 v1, 0x41c80000
+; GFX90A-NEXT: v_accvgpr_write_b32 a24, v1
+; GFX90A-NEXT: v_mov_b32_e32 v1, 0x41d00000
+; GFX90A-NEXT: v_accvgpr_write_b32 a25, v1
+; GFX90A-NEXT: v_mov_b32_e32 v1, 0x41d80000
+; GFX90A-NEXT: v_accvgpr_write_b32 a26, v1
+; GFX90A-NEXT: v_mov_b32_e32 v1, 0x41e00000
+; GFX90A-NEXT: v_mov_b32_e32 v2, 1.0
+; GFX90A-NEXT: v_accvgpr_write_b32 a27, v1
+; GFX90A-NEXT: v_mov_b32_e32 v1, 0x41e80000
+; GFX90A-NEXT: v_accvgpr_write_b32 a28, v1
+; GFX90A-NEXT: v_mov_b32_e32 v1, 0x41f00000
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_accvgpr_write_b32 a1, 2.0
-; GFX90A-NEXT: v_accvgpr_write_b32 a29, v2
-; GFX90A-NEXT: v_mov_b32_e32 v2, 0x41f80000
+; GFX90A-NEXT: v_accvgpr_write_b32 a29, v1
+; GFX90A-NEXT: v_mov_b32_e32 v1, 0x41f80000
; GFX90A-NEXT: v_accvgpr_read_b32 v3, a0
; GFX90A-NEXT: v_accvgpr_write_b32 a0, 1.0
; GFX90A-NEXT: v_accvgpr_write_b32 a3, 4.0
-; GFX90A-NEXT: v_accvgpr_write_b32 a30, v2
+; GFX90A-NEXT: v_accvgpr_write_b32 a30, v1
; GFX90A-NEXT: v_accvgpr_mov_b32 a31, a1
; GFX90A-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
; GFX90A-NEXT: v_mov_b32_e32 v0, 0
-; GFX90A-NEXT: v_mfma_f32_32x32x1f32 a[0:31], v1, v1, a[0:31]
+; GFX90A-NEXT: v_mfma_f32_32x32x1f32 a[0:31], v2, v2, a[0:31]
; GFX90A-NEXT: s_nop 7
; GFX90A-NEXT: s_nop 7
; GFX90A-NEXT: s_nop 2
diff --git a/llvm/test/CodeGen/AMDGPU/srem64.ll b/llvm/test/CodeGen/AMDGPU/srem64.ll
index e64e3def98c26..c7b690fbd4a21 100644
--- a/llvm/test/CodeGen/AMDGPU/srem64.ll
+++ b/llvm/test/CodeGen/AMDGPU/srem64.ll
@@ -1868,8 +1868,8 @@ define i64 @v_test_srem_pow2_k_den_i64(i64 %x) {
; GCN-IR-NEXT: v_addc_u32_e64 v7, s[4:5], 0, -1, vcc
; GCN-IR-NEXT: s_mov_b64 s[10:11], 0
; GCN-IR-NEXT: v_mov_b32_e32 v11, 0
-; GCN-IR-NEXT: s_movk_i32 s12, 0x7fff
; GCN-IR-NEXT: v_mov_b32_e32 v5, 0
+; GCN-IR-NEXT: s_movk_i32 s12, 0x7fff
; GCN-IR-NEXT: .LBB13_3: ; %udiv-do-while
; GCN-IR-NEXT: ; =>This Inner Loop Header: Depth=1
; GCN-IR-NEXT: v_lshl_b64 v[8:9], v[8:9], 1
diff --git a/llvm/test/CodeGen/AMDGPU/srl.ll b/llvm/test/CodeGen/AMDGPU/srl.ll
index c05f341f9e910..571c0f04c06ca 100644
--- a/llvm/test/CodeGen/AMDGPU/srl.ll
+++ b/llvm/test/CodeGen/AMDGPU/srl.ll
@@ -412,8 +412,8 @@ define amdgpu_kernel void @v_lshr_32_i64(ptr addrspace(1) %out, ptr addrspace(1)
; SI-LABEL: v_lshr_32_i64:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
-; SI-NEXT: s_mov_b32 s6, 0
; SI-NEXT: s_mov_b32 s7, 0xf000
+; SI-NEXT: s_mov_b32 s6, 0
; SI-NEXT: v_lshlrev_b32_e32 v0, 3, v0
; SI-NEXT: v_mov_b32_e32 v1, 0
; SI-NEXT: s_waitcnt lgkmcnt(0)
diff --git a/llvm/test/CodeGen/AMDGPU/subreg-coalescer-crash.ll b/llvm/test/CodeGen/AMDGPU/subreg-coalescer-crash.ll
index a69ee2e1a8b5c..199ab49fa19d5 100644
--- a/llvm/test/CodeGen/AMDGPU/subreg-coalescer-crash.ll
+++ b/llvm/test/CodeGen/AMDGPU/subreg-coalescer-crash.ll
@@ -63,7 +63,7 @@ define amdgpu_ps void @foo() #0 {
; GCN-NEXT: s_branch .LBB1_5
; GCN-NEXT: .LBB1_4:
; GCN-NEXT: v_mov_b32_e32 v1, 0
-; GCN-NEXT: v_mov_b32_e32 v0, v1
+; GCN-NEXT: v_mov_b32_e32 v0, 0
; GCN-NEXT: .LBB1_5: ; %bb14
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: v_mul_f32_e32 v0, 0x41280000, v0
diff --git a/llvm/test/CodeGen/AMDGPU/udiv64.ll b/llvm/test/CodeGen/AMDGPU/udiv64.ll
index bc9a3f2389e7e..bf1f6980fe25a 100644
--- a/llvm/test/CodeGen/AMDGPU/udiv64.ll
+++ b/llvm/test/CodeGen/AMDGPU/udiv64.ll
@@ -1186,8 +1186,8 @@ define i64 @v_test_udiv_pow2_k_den_i64(i64 %x) {
; GCN-IR-NEXT: v_addc_u32_e64 v1, s[4:5], 0, -1, vcc
; GCN-IR-NEXT: s_mov_b64 s[10:11], 0
; GCN-IR-NEXT: v_mov_b32_e32 v9, 0
-; GCN-IR-NEXT: s_movk_i32 s12, 0x7fff
; GCN-IR-NEXT: v_mov_b32_e32 v5, 0
+; GCN-IR-NEXT: s_movk_i32 s12, 0x7fff
; GCN-IR-NEXT: .LBB10_3: ; %udiv-do-while
; GCN-IR-NEXT: ; =>This Inner Loop Header: Depth=1
; GCN-IR-NEXT: v_lshl_b64 v[6:7], v[6:7], 1
diff --git a/llvm/test/CodeGen/AMDGPU/uint_to_fp.f64.ll b/llvm/test/CodeGen/AMDGPU/uint_to_fp.f64.ll
index 983acfc2c0699..b31cc36a5f7c6 100644
--- a/llvm/test/CodeGen/AMDGPU/uint_to_fp.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/uint_to_fp.f64.ll
@@ -660,11 +660,11 @@ define void @v_select_uint_to_fp_i1_vals_f64(ptr addrspace(1) %out, i32 %in) {
; GCN-LABEL: v_select_uint_to_fp_i1_vals_f64:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT: v_mov_b32_e32 v4, 0x3ff00000
+; GCN-NEXT: v_mov_b32_e32 v3, 0x3ff00000
; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2
-; GCN-NEXT: v_mov_b32_e32 v3, 0
-; GCN-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
-; GCN-NEXT: flat_store_dwordx2 v[0:1], v[3:4]
+; GCN-NEXT: v_cndmask_b32_e32 v3, 0, v3, vcc
+; GCN-NEXT: v_mov_b32_e32 v2, 0
+; GCN-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: s_setpc_b64 s[30:31]
;
@@ -673,10 +673,10 @@ define void @v_select_uint_to_fp_i1_vals_f64(ptr addrspace(1) %out, i32 %in) {
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: v_mov_b32_e32 v3, 0x3ff00000
; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, 0
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_cndmask_b32_e32 v5, 0, v3, vcc
-; GFX942-NEXT: global_store_dwordx2 v[0:1], v[4:5], off
+; GFX942-NEXT: v_cndmask_b32_e32 v3, 0, v3, vcc
+; GFX942-NEXT: global_store_dwordx2 v[0:1], v[2:3], off
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp eq i32 %in, 0
@@ -741,11 +741,11 @@ define void @v_select_uint_to_fp_i1_vals_i64(ptr addrspace(1) %out, i32 %in) {
; GCN-LABEL: v_select_uint_to_fp_i1_vals_i64:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT: v_mov_b32_e32 v4, 0x3ff00000
+; GCN-NEXT: v_mov_b32_e32 v3, 0x3ff00000
; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2
-; GCN-NEXT: v_mov_b32_e32 v3, 0
-; GCN-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
-; GCN-NEXT: flat_store_dwordx2 v[0:1], v[3:4]
+; GCN-NEXT: v_cndmask_b32_e32 v3, 0, v3, vcc
+; GCN-NEXT: v_mov_b32_e32 v2, 0
+; GCN-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: s_setpc_b64 s[30:31]
;
@@ -754,10 +754,10 @@ define void @v_select_uint_to_fp_i1_vals_i64(ptr addrspace(1) %out, i32 %in) {
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: v_mov_b32_e32 v3, 0x3ff00000
; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, 0
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_cndmask_b32_e32 v5, 0, v3, vcc
-; GFX942-NEXT: global_store_dwordx2 v[0:1], v[4:5], off
+; GFX942-NEXT: v_cndmask_b32_e32 v3, 0, v3, vcc
+; GFX942-NEXT: global_store_dwordx2 v[0:1], v[2:3], off
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp eq i32 %in, 0
@@ -823,11 +823,11 @@ define void @v_swap_select_uint_to_fp_i1_vals_f64(ptr addrspace(1) %out, i32 %in
; GCN-LABEL: v_swap_select_uint_to_fp_i1_vals_f64:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT: v_mov_b32_e32 v4, 0x3ff00000
+; GCN-NEXT: v_mov_b32_e32 v3, 0x3ff00000
; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2
-; GCN-NEXT: v_mov_b32_e32 v3, 0
-; GCN-NEXT: v_cndmask_b32_e64 v4, v4, 0, vcc
-; GCN-NEXT: flat_store_dwordx2 v[0:1], v[3:4]
+; GCN-NEXT: v_cndmask_b32_e64 v3, v3, 0, vcc
+; GCN-NEXT: v_mov_b32_e32 v2, 0
+; GCN-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: s_setpc_b64 s[30:31]
;
@@ -836,10 +836,10 @@ define void @v_swap_select_uint_to_fp_i1_vals_f64(ptr addrspace(1) %out, i32 %in
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: v_mov_b32_e32 v3, 0x3ff00000
; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, 0
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_cndmask_b32_e64 v5, v3, 0, vcc
-; GFX942-NEXT: global_store_dwordx2 v[0:1], v[4:5], off
+; GFX942-NEXT: v_cndmask_b32_e64 v3, v3, 0, vcc
+; GFX942-NEXT: global_store_dwordx2 v[0:1], v[2:3], off
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp eq i32 %in, 0
diff --git a/llvm/test/CodeGen/AMDGPU/undef-handling-crash-in-ra.ll b/llvm/test/CodeGen/AMDGPU/undef-handling-crash-in-ra.ll
index b3166fa3f4548..3e40f9a5b2b12 100644
--- a/llvm/test/CodeGen/AMDGPU/undef-handling-crash-in-ra.ll
+++ b/llvm/test/CodeGen/AMDGPU/undef-handling-crash-in-ra.ll
@@ -8,8 +8,8 @@ define amdgpu_kernel void @foo(ptr addrspace(5) %ptr5, ptr %p0, double %v0, <4 x
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_add_u32 flat_scratch_lo, s12, s17
; CHECK-NEXT: s_addc_u32 flat_scratch_hi, s13, 0
-; CHECK-NEXT: v_pk_mov_b32 v[44:45], 0, 0
-; CHECK-NEXT: flat_load_dword v42, v[44:45]
+; CHECK-NEXT: v_pk_mov_b32 v[46:47], 0, 0
+; CHECK-NEXT: flat_load_dword v42, v[46:47]
; CHECK-NEXT: s_mov_b64 s[38:39], s[6:7]
; CHECK-NEXT: s_mov_b64 s[48:49], s[4:5]
; CHECK-NEXT: s_load_dwordx4 s[4:7], s[8:9], 0x8
@@ -26,17 +26,17 @@ define amdgpu_kernel void @foo(ptr addrspace(5) %ptr5, ptr %p0, double %v0, <4 x
; CHECK-NEXT: s_cselect_b32 s8, s64, 0
; CHECK-NEXT: s_add_u32 s50, s34, 48
; CHECK-NEXT: s_addc_u32 s51, s35, 0
-; CHECK-NEXT: v_pk_mov_b32 v[56:57], s[4:5], s[4:5] op_sel:[0,1]
+; CHECK-NEXT: v_pk_mov_b32 v[58:59], s[4:5], s[4:5] op_sel:[0,1]
; CHECK-NEXT: s_getpc_b64 s[4:5]
; CHECK-NEXT: s_add_u32 s4, s4, G at gotpcrel32@lo+4
; CHECK-NEXT: s_addc_u32 s5, s5, G at gotpcrel32@hi+12
; CHECK-NEXT: s_load_dwordx2 s[54:55], s[4:5], 0x0
; CHECK-NEXT: s_mov_b32 s6, 0
-; CHECK-NEXT: v_mov_b32_e32 v47, s7
+; CHECK-NEXT: v_mov_b32_e32 v57, s7
; CHECK-NEXT: s_mov_b32 s7, s6
; CHECK-NEXT: s_mov_b32 s53, s14
-; CHECK-NEXT: v_mov_b32_e32 v46, s8
-; CHECK-NEXT: v_pk_mov_b32 v[58:59], s[6:7], s[6:7] op_sel:[0,1]
+; CHECK-NEXT: v_mov_b32_e32 v56, s8
+; CHECK-NEXT: v_pk_mov_b32 v[60:61], s[6:7], s[6:7] op_sel:[0,1]
; CHECK-NEXT: s_mov_b64 s[4:5], s[48:49]
; CHECK-NEXT: s_mov_b64 s[6:7], s[38:39]
; CHECK-NEXT: s_mov_b64 s[8:9], s[50:51]
@@ -49,13 +49,13 @@ define amdgpu_kernel void @foo(ptr addrspace(5) %ptr5, ptr %p0, double %v0, <4 x
; CHECK-NEXT: s_mov_b32 s52, s15
; CHECK-NEXT: s_mov_b64 s[36:37], s[10:11]
; CHECK-NEXT: v_mov_b32_e32 v40, v0
-; CHECK-NEXT: flat_store_dwordx2 v[56:57], v[58:59]
+; CHECK-NEXT: flat_store_dwordx2 v[58:59], v[60:61]
; CHECK-NEXT: ; kill: def $sgpr15 killed $sgpr15
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_swappc_b64 s[30:31], s[54:55]
-; CHECK-NEXT: flat_load_dwordx2 v[60:61], v[56:57]
-; CHECK-NEXT: v_mov_b32_e32 v62, 0
-; CHECK-NEXT: v_mov_b32_e32 v63, 0x3ff00000
+; CHECK-NEXT: flat_load_dwordx2 v[62:63], v[58:59]
+; CHECK-NEXT: v_mov_b32_e32 v44, 0
+; CHECK-NEXT: v_mov_b32_e32 v45, 0x3ff00000
; CHECK-NEXT: s_mov_b64 s[4:5], s[48:49]
; CHECK-NEXT: s_mov_b64 s[6:7], s[38:39]
; CHECK-NEXT: s_mov_b64 s[8:9], s[50:51]
@@ -64,30 +64,28 @@ define amdgpu_kernel void @foo(ptr addrspace(5) %ptr5, ptr %p0, double %v0, <4 x
; CHECK-NEXT: s_mov_b32 s13, s52
; CHECK-NEXT: s_mov_b32 s14, s33
; CHECK-NEXT: v_mov_b32_e32 v31, v40
-; CHECK-NEXT: flat_store_dwordx2 v[44:45], v[62:63]
-; CHECK-NEXT: flat_store_dwordx2 v[56:57], v[58:59]
+; CHECK-NEXT: flat_store_dwordx2 v[46:47], v[44:45]
+; CHECK-NEXT: flat_store_dwordx2 v[58:59], v[60:61]
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: ; kill: def $sgpr15 killed $sgpr15
; CHECK-NEXT: s_swappc_b64 s[30:31], s[54:55]
-; CHECK-NEXT: flat_load_dwordx2 v[0:1], v[46:47] glc
+; CHECK-NEXT: flat_load_dwordx2 v[0:1], v[56:57] glc
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; CHECK-NEXT: v_mov_b32_e32 v0, s64
; CHECK-NEXT: v_cmp_lt_i32_e32 vcc, 0, v42
-; CHECK-NEXT: flat_store_dwordx2 v[56:57], v[60:61]
+; CHECK-NEXT: flat_store_dwordx2 v[58:59], v[62:63]
; CHECK-NEXT: s_waitcnt vmcnt(0)
-; CHECK-NEXT: flat_store_dwordx2 v[56:57], a[32:33]
+; CHECK-NEXT: flat_store_dwordx2 v[58:59], a[32:33]
; CHECK-NEXT: buffer_store_dword a33, v0, s[0:3], 0 offen offset:4
-; CHECK-NEXT: buffer_store_dword v62, v0, s[0:3], 0 offen
-; CHECK-NEXT: ; implicit-def: $vgpr4
+; CHECK-NEXT: buffer_store_dword v44, v0, s[0:3], 0 offen
; CHECK-NEXT: s_and_saveexec_b64 s[4:5], vcc
; CHECK-NEXT: s_xor_b64 s[4:5], exec, s[4:5]
; CHECK-NEXT: s_cbranch_execz .LBB0_4
; CHECK-NEXT: ; %bb.1: ; %LeafBlock5
; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 1, v42
-; CHECK-NEXT: v_mov_b32_e32 v4, 0
; CHECK-NEXT: s_and_saveexec_b64 s[6:7], vcc
; CHECK-NEXT: ; %bb.2: ; %sw.bb17.i.i.i.i
-; CHECK-NEXT: v_mov_b32_e32 v4, 1
+; CHECK-NEXT: v_mov_b32_e32 v44, 1
; CHECK-NEXT: ; %bb.3: ; %Flow
; CHECK-NEXT: s_or_b64 exec, exec, s[6:7]
; CHECK-NEXT: .LBB0_4: ; %Flow8
@@ -105,10 +103,10 @@ define amdgpu_kernel void @foo(ptr addrspace(5) %ptr5, ptr %p0, double %v0, <4 x
; CHECK-NEXT: v_mov_b32_e32 v0, 0
; CHECK-NEXT: ; %bb.7: ; %Flow7
; CHECK-NEXT: s_or_b64 exec, exec, s[6:7]
-; CHECK-NEXT: v_mov_b32_e32 v4, 0
+; CHECK-NEXT: v_mov_b32_e32 v44, 0
; CHECK-NEXT: .LBB0_8: ; %bb.1
; CHECK-NEXT: s_or_b64 exec, exec, s[4:5]
-; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4
+; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v44
; CHECK-NEXT: s_and_saveexec_b64 s[4:5], vcc
; CHECK-NEXT: s_cbranch_execz .LBB0_10
; CHECK-NEXT: ; %bb.9: ; %sw.bb.i.i.i.i.i
diff --git a/llvm/test/CodeGen/AMDGPU/urem64.ll b/llvm/test/CodeGen/AMDGPU/urem64.ll
index 464dad83f47c9..c4d928185d8f4 100644
--- a/llvm/test/CodeGen/AMDGPU/urem64.ll
+++ b/llvm/test/CodeGen/AMDGPU/urem64.ll
@@ -664,96 +664,94 @@ define amdgpu_kernel void @s_test_urem23_64_v2i64(ptr addrspace(1) %out, <2 x i6
; GCN-LABEL: s_test_urem23_64_v2i64:
; GCN: ; %bb.0:
; GCN-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0xd
-; GCN-NEXT: v_mov_b32_e32 v1, 0
+; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
; GCN-NEXT: s_mov_b32 s3, 0xf000
; GCN-NEXT: s_mov_b32 s2, -1
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_lshr_b32 s6, s13, 1
; GCN-NEXT: v_cvt_f32_u32_e32 v0, s6
-; GCN-NEXT: s_lshr_b32 s0, s15, 9
-; GCN-NEXT: v_cvt_f32_u32_e32 v2, s0
-; GCN-NEXT: s_lshr_b32 s7, s11, 9
+; GCN-NEXT: s_lshr_b32 s4, s15, 9
+; GCN-NEXT: v_cvt_f32_u32_e32 v1, s4
+; GCN-NEXT: s_lshr_b32 s5, s11, 9
; GCN-NEXT: v_rcp_iflag_f32_e32 v0, v0
-; GCN-NEXT: v_cvt_f32_u32_e32 v3, s7
-; GCN-NEXT: v_rcp_iflag_f32_e32 v4, v2
-; GCN-NEXT: s_sub_i32 s1, 0, s6
+; GCN-NEXT: v_cvt_f32_u32_e32 v2, s5
+; GCN-NEXT: v_rcp_iflag_f32_e32 v3, v1
+; GCN-NEXT: s_sub_i32 s8, 0, s6
; GCN-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0
; GCN-NEXT: v_cvt_u32_f32_e32 v0, v0
-; GCN-NEXT: v_mul_f32_e32 v4, v3, v4
-; GCN-NEXT: v_trunc_f32_e32 v4, v4
-; GCN-NEXT: v_mad_f32 v3, -v4, v2, v3
-; GCN-NEXT: v_mul_lo_u32 v5, s1, v0
-; GCN-NEXT: v_cvt_u32_f32_e32 v4, v4
-; GCN-NEXT: v_cmp_ge_f32_e64 vcc, |v3|, v2
-; GCN-NEXT: s_lshr_b32 s8, s9, 1
-; GCN-NEXT: v_mul_hi_u32 v5, v0, v5
-; GCN-NEXT: v_addc_u32_e32 v2, vcc, 0, v4, vcc
-; GCN-NEXT: v_mul_lo_u32 v2, v2, s0
-; GCN-NEXT: v_add_i32_e32 v0, vcc, v0, v5
-; GCN-NEXT: v_mul_hi_u32 v0, s8, v0
-; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
-; GCN-NEXT: v_sub_i32_e32 v2, vcc, s7, v2
+; GCN-NEXT: v_mul_f32_e32 v3, v2, v3
+; GCN-NEXT: v_trunc_f32_e32 v3, v3
+; GCN-NEXT: v_mad_f32 v2, -v3, v1, v2
+; GCN-NEXT: v_mul_lo_u32 v4, s8, v0
+; GCN-NEXT: v_cvt_u32_f32_e32 v3, v3
+; GCN-NEXT: v_cmp_ge_f32_e64 vcc, |v2|, v1
+; GCN-NEXT: s_lshr_b32 s7, s9, 1
+; GCN-NEXT: v_mul_hi_u32 v4, v0, v4
+; GCN-NEXT: v_addc_u32_e32 v1, vcc, 0, v3, vcc
+; GCN-NEXT: v_mul_lo_u32 v1, v1, s4
+; GCN-NEXT: v_add_i32_e32 v0, vcc, v0, v4
+; GCN-NEXT: v_mul_hi_u32 v0, s7, v0
+; GCN-NEXT: v_sub_i32_e32 v1, vcc, s5, v1
+; GCN-NEXT: v_and_b32_e32 v2, 0x7fffff, v1
; GCN-NEXT: v_readfirstlane_b32 s4, v0
; GCN-NEXT: s_mul_i32 s4, s4, s6
-; GCN-NEXT: s_sub_i32 s4, s8, s4
+; GCN-NEXT: s_sub_i32 s4, s7, s4
; GCN-NEXT: s_sub_i32 s5, s4, s6
; GCN-NEXT: s_cmp_ge_u32 s4, s6
; GCN-NEXT: s_cselect_b32 s4, s5, s4
; GCN-NEXT: s_sub_i32 s5, s4, s6
; GCN-NEXT: s_cmp_ge_u32 s4, s6
; GCN-NEXT: s_cselect_b32 s4, s5, s4
-; GCN-NEXT: v_and_b32_e32 v2, 0x7fffff, v2
+; GCN-NEXT: v_mov_b32_e32 v1, 0
; GCN-NEXT: v_mov_b32_e32 v0, s4
; GCN-NEXT: v_mov_b32_e32 v3, v1
-; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
; GCN-NEXT: s_endpgm
;
; GCN-IR-LABEL: s_test_urem23_64_v2i64:
; GCN-IR: ; %bb.0:
; GCN-IR-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0xd
-; GCN-IR-NEXT: v_mov_b32_e32 v1, 0
+; GCN-IR-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
; GCN-IR-NEXT: s_mov_b32 s3, 0xf000
; GCN-IR-NEXT: s_mov_b32 s2, -1
; GCN-IR-NEXT: s_waitcnt lgkmcnt(0)
; GCN-IR-NEXT: s_lshr_b32 s6, s13, 1
; GCN-IR-NEXT: v_cvt_f32_u32_e32 v0, s6
-; GCN-IR-NEXT: s_lshr_b32 s0, s15, 9
-; GCN-IR-NEXT: v_cvt_f32_u32_e32 v2, s0
-; GCN-IR-NEXT: s_lshr_b32 s7, s11, 9
+; GCN-IR-NEXT: s_lshr_b32 s4, s15, 9
+; GCN-IR-NEXT: v_cvt_f32_u32_e32 v1, s4
+; GCN-IR-NEXT: s_lshr_b32 s5, s11, 9
; GCN-IR-NEXT: v_rcp_iflag_f32_e32 v0, v0
-; GCN-IR-NEXT: v_cvt_f32_u32_e32 v3, s7
-; GCN-IR-NEXT: v_rcp_iflag_f32_e32 v4, v2
-; GCN-IR-NEXT: s_sub_i32 s1, 0, s6
+; GCN-IR-NEXT: v_cvt_f32_u32_e32 v2, s5
+; GCN-IR-NEXT: v_rcp_iflag_f32_e32 v3, v1
+; GCN-IR-NEXT: s_sub_i32 s8, 0, s6
; GCN-IR-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0
; GCN-IR-NEXT: v_cvt_u32_f32_e32 v0, v0
-; GCN-IR-NEXT: v_mul_f32_e32 v4, v3, v4
-; GCN-IR-NEXT: v_trunc_f32_e32 v4, v4
-; GCN-IR-NEXT: v_mad_f32 v3, -v4, v2, v3
-; GCN-IR-NEXT: v_mul_lo_u32 v5, s1, v0
-; GCN-IR-NEXT: v_cvt_u32_f32_e32 v4, v4
-; GCN-IR-NEXT: v_cmp_ge_f32_e64 vcc, |v3|, v2
-; GCN-IR-NEXT: s_lshr_b32 s8, s9, 1
-; GCN-IR-NEXT: v_mul_hi_u32 v5, v0, v5
-; GCN-IR-NEXT: v_addc_u32_e32 v2, vcc, 0, v4, vcc
-; GCN-IR-NEXT: v_mul_lo_u32 v2, v2, s0
-; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, v0, v5
-; GCN-IR-NEXT: v_mul_hi_u32 v0, s8, v0
-; GCN-IR-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
-; GCN-IR-NEXT: v_sub_i32_e32 v2, vcc, s7, v2
+; GCN-IR-NEXT: v_mul_f32_e32 v3, v2, v3
+; GCN-IR-NEXT: v_trunc_f32_e32 v3, v3
+; GCN-IR-NEXT: v_mad_f32 v2, -v3, v1, v2
+; GCN-IR-NEXT: v_mul_lo_u32 v4, s8, v0
+; GCN-IR-NEXT: v_cvt_u32_f32_e32 v3, v3
+; GCN-IR-NEXT: v_cmp_ge_f32_e64 vcc, |v2|, v1
+; GCN-IR-NEXT: s_lshr_b32 s7, s9, 1
+; GCN-IR-NEXT: v_mul_hi_u32 v4, v0, v4
+; GCN-IR-NEXT: v_addc_u32_e32 v1, vcc, 0, v3, vcc
+; GCN-IR-NEXT: v_mul_lo_u32 v1, v1, s4
+; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, v0, v4
+; GCN-IR-NEXT: v_mul_hi_u32 v0, s7, v0
+; GCN-IR-NEXT: v_sub_i32_e32 v1, vcc, s5, v1
+; GCN-IR-NEXT: v_and_b32_e32 v2, 0x7fffff, v1
; GCN-IR-NEXT: v_readfirstlane_b32 s4, v0
; GCN-IR-NEXT: s_mul_i32 s4, s4, s6
-; GCN-IR-NEXT: s_sub_i32 s4, s8, s4
+; GCN-IR-NEXT: s_sub_i32 s4, s7, s4
; GCN-IR-NEXT: s_sub_i32 s5, s4, s6
; GCN-IR-NEXT: s_cmp_ge_u32 s4, s6
; GCN-IR-NEXT: s_cselect_b32 s4, s5, s4
; GCN-IR-NEXT: s_sub_i32 s5, s4, s6
; GCN-IR-NEXT: s_cmp_ge_u32 s4, s6
; GCN-IR-NEXT: s_cselect_b32 s4, s5, s4
-; GCN-IR-NEXT: v_and_b32_e32 v2, 0x7fffff, v2
+; GCN-IR-NEXT: v_mov_b32_e32 v1, 0
; GCN-IR-NEXT: v_mov_b32_e32 v0, s4
; GCN-IR-NEXT: v_mov_b32_e32 v3, v1
-; GCN-IR-NEXT: s_waitcnt lgkmcnt(0)
; GCN-IR-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
; GCN-IR-NEXT: s_endpgm
%1 = lshr <2 x i64> %x, <i64 33, i64 41>
@@ -1293,8 +1291,8 @@ define i64 @v_test_urem_pow2_k_den_i64(i64 %x) {
; GCN-IR-NEXT: v_addc_u32_e64 v7, s[4:5], 0, -1, vcc
; GCN-IR-NEXT: s_mov_b64 s[10:11], 0
; GCN-IR-NEXT: v_mov_b32_e32 v11, 0
-; GCN-IR-NEXT: s_movk_i32 s12, 0x7fff
; GCN-IR-NEXT: v_mov_b32_e32 v5, 0
+; GCN-IR-NEXT: s_movk_i32 s12, 0x7fff
; GCN-IR-NEXT: .LBB9_3: ; %udiv-do-while
; GCN-IR-NEXT: ; =>This Inner Loop Header: Depth=1
; GCN-IR-NEXT: v_lshl_b64 v[8:9], v[8:9], 1
diff --git a/llvm/test/CodeGen/AMDGPU/v_cndmask.ll b/llvm/test/CodeGen/AMDGPU/v_cndmask.ll
index b314cf2e1d9cc..53525c93f5b89 100644
--- a/llvm/test/CodeGen/AMDGPU/v_cndmask.ll
+++ b/llvm/test/CodeGen/AMDGPU/v_cndmask.ll
@@ -1545,9 +1545,9 @@ define amdgpu_kernel void @icmp_vgprX_k0_select_k1_vgprZ_i1(ptr addrspace(1) %ou
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x9
; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0xd
-; SI-NEXT: s_mov_b32 s6, 0
; SI-NEXT: v_mov_b32_e32 v1, 0
; SI-NEXT: s_mov_b32 s7, 0xf000
+; SI-NEXT: s_mov_b32 s6, 0
; SI-NEXT: v_lshlrev_b32_e32 v2, 2, v0
; SI-NEXT: v_mov_b32_e32 v3, v1
; SI-NEXT: s_mov_b64 s[2:3], s[6:7]
diff --git a/llvm/test/CodeGen/AMDGPU/valu-i1.ll b/llvm/test/CodeGen/AMDGPU/valu-i1.ll
index 0f368ffd33b9d..d254b7effbfc6 100644
--- a/llvm/test/CodeGen/AMDGPU/valu-i1.ll
+++ b/llvm/test/CodeGen/AMDGPU/valu-i1.ll
@@ -299,9 +299,9 @@ define amdgpu_kernel void @multi_vcond_loop(ptr addrspace(1) noalias nocapture %
; SI-LABEL: multi_vcond_loop:
; SI: ; %bb.0: ; %bb
; SI-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0xf
-; SI-NEXT: s_mov_b32 s10, 0
; SI-NEXT: v_mov_b32_e32 v7, 0
; SI-NEXT: s_mov_b32 s11, 0xf000
+; SI-NEXT: s_mov_b32 s10, 0
; SI-NEXT: v_lshlrev_b32_e32 v6, 2, v0
; SI-NEXT: s_waitcnt lgkmcnt(0)
; SI-NEXT: buffer_load_dword v0, v[6:7], s[8:11], 0 addr64
More information about the llvm-branch-commits
mailing list