[llvm] d86a7d6 - GlobalISel: Add constant fold combine for zext/sext/anyext
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Thu Aug 24 05:10:17 PDT 2023
Author: Matt Arsenault
Date: 2023-08-24T08:10:01-04:00
New Revision: d86a7d631c32341fd86fa5ecd247957cdb2c58d1
URL: https://github.com/llvm/llvm-project/commit/d86a7d631c32341fd86fa5ecd247957cdb2c58d1
DIFF: https://github.com/llvm/llvm-project/commit/d86a7d631c32341fd86fa5ecd247957cdb2c58d1.diff
LOG: GlobalISel: Add constant fold combine for zext/sext/anyext
Could use more work for vectors.
https://reviews.llvm.org/D156534
Added:
Modified:
llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
llvm/include/llvm/CodeGen/GlobalISel/Utils.h
llvm/include/llvm/Target/GlobalISel/Combine.td
llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
llvm/lib/CodeGen/GlobalISel/Utils.cpp
llvm/lib/Target/AArch64/AArch64Combine.td
llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll
llvm/test/CodeGen/AArch64/GlobalISel/arm64-pcsections.ll
llvm/test/CodeGen/AArch64/GlobalISel/combine-unmerge.mir
llvm/test/CodeGen/AArch64/GlobalISel/prelegalizer-combiner-divrem-insertpt-conflict.mir
llvm/test/CodeGen/AArch64/GlobalISel/prelegalizer-combiner-divrem-insertpt-crash.mir
llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-binop-same-val.mir
llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-br.mir
llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-bzero.mir
llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-concat-vectors.mir
llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-copy-prop-disabled.mir
llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-prop-extends-phi.mir
llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-ptradd-chain.mir
llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-trivial-arith.mir
llvm/test/CodeGen/AArch64/GlobalISel/uaddo-8-16-bits.mir
llvm/test/CodeGen/AArch64/bool-ext-inc.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/combine-shift-imm-chain-illegal-types.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/combine-shl-from-extend-narrow.postlegal.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/combine-shl-from-extend-narrow.prelegal.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizercombiner-ashr.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizercombiner-lshr.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizercombiner-shl.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll
llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll
Removed:
################################################################################
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
index 6e331d4a68baa1..19c058dacd3ba3 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
@@ -632,7 +632,10 @@ class CombinerHelper {
bool matchReassocCommBinOp(MachineInstr &MI, BuildFnTy &MatchInfo);
/// Do constant folding when opportunities are exposed after MIR building.
- bool matchConstantFold(MachineInstr &MI, APInt &MatchInfo);
+ bool matchConstantFoldCastOp(MachineInstr &MI, APInt &MatchInfo);
+
+ /// Do constant folding when opportunities are exposed after MIR building.
+ bool matchConstantFoldBinOp(MachineInstr &MI, APInt &MatchInfo);
/// \returns true if it is possible to narrow the width of a scalar binop
/// feeding a G_AND instruction \p MI.
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/Utils.h b/llvm/include/llvm/CodeGen/GlobalISel/Utils.h
index 5be67eb013b112..ffb6e53a0363f9 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/Utils.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/Utils.h
@@ -277,6 +277,10 @@ SmallVector<APInt> ConstantFoldVectorBinop(unsigned Opcode, const Register Op1,
const Register Op2,
const MachineRegisterInfo &MRI);
+std::optional<APInt> ConstantFoldCastOp(unsigned Opcode, LLT DstTy,
+ const Register Op0,
+ const MachineRegisterInfo &MRI);
+
std::optional<APInt> ConstantFoldExtOp(unsigned Opcode, const Register Op1,
uint64_t Imm,
const MachineRegisterInfo &MRI);
diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index 5cee4f7e15c5e1..2858f3f177f6f5 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -905,10 +905,16 @@ def reassoc_comm_binops : GICombineRule<
def reassocs : GICombineGroup<[reassoc_ptradd, reassoc_comm_binops]>;
// Constant fold operations.
-def constant_fold : GICombineRule<
+def constant_fold_binop : GICombineRule<
(defs root:$d, apint_matchinfo:$matchinfo),
(match (wip_match_opcode G_ADD, G_SUB, G_MUL, G_AND, G_OR, G_XOR, G_SHL, G_LSHR, G_ASHR):$d,
- [{ return Helper.matchConstantFold(*${d}, ${matchinfo}); }]),
+ [{ return Helper.matchConstantFoldBinOp(*${d}, ${matchinfo}); }]),
+ (apply [{ Helper.replaceInstWithConstant(*${d}, ${matchinfo}); }])>;
+
+def constant_fold_cast_op : GICombineRule<
+ (defs root:$d, apint_matchinfo:$matchinfo),
+ (match (wip_match_opcode G_ZEXT, G_SEXT, G_ANYEXT):$d,
+ [{ return Helper.matchConstantFoldCastOp(*${d}, ${matchinfo}); }]),
(apply [{ Helper.replaceInstWithConstant(*${d}, ${matchinfo}); }])>;
def mulo_by_2: GICombineRule<
@@ -1170,7 +1176,7 @@ def all_combines : GICombineGroup<[trivial_combines, insert_vec_elt_combines,
const_combines, xor_of_and_with_same_reg, ptr_add_with_zero,
shift_immed_chain, shift_of_shifted_logic_chain, load_or_combine,
div_rem_to_divrem, funnel_shift_combines, commute_shift,
- form_bitfield_extract, constant_fold, fabs_fneg_fold,
+ form_bitfield_extract, constant_fold_binop, constant_fold_cast_op, fabs_fneg_fold,
intdiv_combines, mulh_combines, redundant_neg_operands,
and_or_disjoint_mask, fma_combines, fold_binop_into_select,
sub_add_reg, select_to_minmax, redundant_binop_in_equality,
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index c24ddef4ee8236..5faa168a7a35ec 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -4502,7 +4502,19 @@ bool CombinerHelper::matchReassocCommBinOp(MachineInstr &MI,
return false;
}
-bool CombinerHelper::matchConstantFold(MachineInstr &MI, APInt &MatchInfo) {
+bool CombinerHelper::matchConstantFoldCastOp(MachineInstr &MI, APInt &MatchInfo) {
+ LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
+ Register SrcOp = MI.getOperand(1).getReg();
+
+ if (auto MaybeCst = ConstantFoldCastOp(MI.getOpcode(), DstTy, SrcOp, MRI)) {
+ MatchInfo = *MaybeCst;
+ return true;
+ }
+
+ return false;
+}
+
+bool CombinerHelper::matchConstantFoldBinOp(MachineInstr &MI, APInt &MatchInfo) {
Register Op1 = MI.getOperand(1).getReg();
Register Op2 = MI.getOperand(2).getReg();
auto MaybeCst = ConstantFoldBinOp(MI.getOpcode(), Op1, Op2, MRI);
diff --git a/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
index 862cc60fb1e95a..acc7b8098d1f0d 100644
--- a/llvm/lib/CodeGen/GlobalISel/Utils.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
@@ -780,6 +780,29 @@ std::optional<APInt> llvm::ConstantFoldExtOp(unsigned Opcode,
return std::nullopt;
}
+std::optional<APInt> llvm::ConstantFoldCastOp(unsigned Opcode, LLT DstTy,
+ const Register Op0,
+ const MachineRegisterInfo &MRI) {
+ std::optional<APInt> Val = getIConstantVRegVal(Op0, MRI);
+ if (!Val)
+ return Val;
+
+ const unsigned DstSize = DstTy.getScalarSizeInBits();
+
+ switch (Opcode) {
+ case TargetOpcode::G_SEXT:
+ return Val->sext(DstSize);
+ case TargetOpcode::G_ZEXT:
+ case TargetOpcode::G_ANYEXT:
+ // TODO: DAG considers target preference when constant folding any_extend.
+ return Val->zext(DstSize);
+ default:
+ break;
+ }
+
+ llvm_unreachable("unexpected cast opcode to constant fold");
+}
+
std::optional<APFloat>
llvm::ConstantFoldIntToFloat(unsigned Opcode, LLT DstTy, Register Src,
const MachineRegisterInfo &MRI) {
diff --git a/llvm/lib/Target/AArch64/AArch64Combine.td b/llvm/lib/Target/AArch64/AArch64Combine.td
index 96fd2865050434..58493b98316531 100644
--- a/llvm/lib/Target/AArch64/AArch64Combine.td
+++ b/llvm/lib/Target/AArch64/AArch64Combine.td
@@ -227,7 +227,7 @@ def AArch64PostLegalizerCombiner
form_bitfield_extract, rotate_out_of_range,
icmp_to_true_false_known_bits, merge_unmerge,
select_combines, fold_merge_to_zext,
- constant_fold, identity_combines,
+ constant_fold_binop, identity_combines,
ptr_add_immed_chain, overlapping_and,
split_store_zero_128, undef_combines,
select_to_minmax]> {
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll
index 1ec020b7896753..aa5e54992f486f 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll
@@ -2729,24 +2729,23 @@ define { i8, i1 } @cmpxchg_i8(ptr %ptr, i8 %desired, i8 %new) {
; CHECK-NOLSE-O1-LABEL: cmpxchg_i8:
; CHECK-NOLSE-O1: ; %bb.0:
; CHECK-NOLSE-O1-NEXT: mov x8, x0
-; CHECK-NOLSE-O1-NEXT: mov w9, w1
-; CHECK-NOLSE-O1-NEXT: mov w1, wzr
; CHECK-NOLSE-O1-NEXT: ; kill: def $w2 killed $w2 def $x2
; CHECK-NOLSE-O1-NEXT: LBB47_1: ; %cmpxchg.start
; CHECK-NOLSE-O1-NEXT: ; =>This Inner Loop Header: Depth=1
; CHECK-NOLSE-O1-NEXT: ldxrb w0, [x8]
-; CHECK-NOLSE-O1-NEXT: and w10, w0, #0xff
-; CHECK-NOLSE-O1-NEXT: cmp w10, w9, uxtb
+; CHECK-NOLSE-O1-NEXT: and w9, w0, #0xff
+; CHECK-NOLSE-O1-NEXT: cmp w9, w1, uxtb
; CHECK-NOLSE-O1-NEXT: b.ne LBB47_4
; CHECK-NOLSE-O1-NEXT: ; %bb.2: ; %cmpxchg.trystore
; CHECK-NOLSE-O1-NEXT: ; in Loop: Header=BB47_1 Depth=1
-; CHECK-NOLSE-O1-NEXT: stxrb w10, w2, [x8]
-; CHECK-NOLSE-O1-NEXT: cbnz w10, LBB47_1
+; CHECK-NOLSE-O1-NEXT: stxrb w9, w2, [x8]
+; CHECK-NOLSE-O1-NEXT: cbnz w9, LBB47_1
; CHECK-NOLSE-O1-NEXT: ; %bb.3:
; CHECK-NOLSE-O1-NEXT: mov w1, #1 ; =0x1
; CHECK-NOLSE-O1-NEXT: ; kill: def $w0 killed $w0 killed $x0
; CHECK-NOLSE-O1-NEXT: ret
; CHECK-NOLSE-O1-NEXT: LBB47_4: ; %cmpxchg.nostore
+; CHECK-NOLSE-O1-NEXT: mov w1, wzr
; CHECK-NOLSE-O1-NEXT: clrex
; CHECK-NOLSE-O1-NEXT: ; kill: def $w0 killed $w0 killed $x0
; CHECK-NOLSE-O1-NEXT: ret
@@ -2796,24 +2795,23 @@ define { i16, i1 } @cmpxchg_i16(ptr %ptr, i16 %desired, i16 %new) {
; CHECK-NOLSE-O1-LABEL: cmpxchg_i16:
; CHECK-NOLSE-O1: ; %bb.0:
; CHECK-NOLSE-O1-NEXT: mov x8, x0
-; CHECK-NOLSE-O1-NEXT: mov w9, w1
-; CHECK-NOLSE-O1-NEXT: mov w1, wzr
; CHECK-NOLSE-O1-NEXT: ; kill: def $w2 killed $w2 def $x2
; CHECK-NOLSE-O1-NEXT: LBB48_1: ; %cmpxchg.start
; CHECK-NOLSE-O1-NEXT: ; =>This Inner Loop Header: Depth=1
; CHECK-NOLSE-O1-NEXT: ldxrh w0, [x8]
-; CHECK-NOLSE-O1-NEXT: and w10, w0, #0xffff
-; CHECK-NOLSE-O1-NEXT: cmp w10, w9, uxth
+; CHECK-NOLSE-O1-NEXT: and w9, w0, #0xffff
+; CHECK-NOLSE-O1-NEXT: cmp w9, w1, uxth
; CHECK-NOLSE-O1-NEXT: b.ne LBB48_4
; CHECK-NOLSE-O1-NEXT: ; %bb.2: ; %cmpxchg.trystore
; CHECK-NOLSE-O1-NEXT: ; in Loop: Header=BB48_1 Depth=1
-; CHECK-NOLSE-O1-NEXT: stxrh w10, w2, [x8]
-; CHECK-NOLSE-O1-NEXT: cbnz w10, LBB48_1
+; CHECK-NOLSE-O1-NEXT: stxrh w9, w2, [x8]
+; CHECK-NOLSE-O1-NEXT: cbnz w9, LBB48_1
; CHECK-NOLSE-O1-NEXT: ; %bb.3:
; CHECK-NOLSE-O1-NEXT: mov w1, #1 ; =0x1
; CHECK-NOLSE-O1-NEXT: ; kill: def $w0 killed $w0 killed $x0
; CHECK-NOLSE-O1-NEXT: ret
; CHECK-NOLSE-O1-NEXT: LBB48_4: ; %cmpxchg.nostore
+; CHECK-NOLSE-O1-NEXT: mov w1, wzr
; CHECK-NOLSE-O1-NEXT: clrex
; CHECK-NOLSE-O1-NEXT: ; kill: def $w0 killed $w0 killed $x0
; CHECK-NOLSE-O1-NEXT: ret
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-pcsections.ll b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-pcsections.ll
index 54e7d5ee9c13b7..4c07081404c889 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-pcsections.ll
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-pcsections.ll
@@ -1251,25 +1251,23 @@ define { i8, i1 } @cmpxchg_i8(ptr %ptr, i8 %desired, i8 %new) {
; CHECK-NEXT: liveins: $w1, $w2, $x0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: $x8 = ORRXrs $xzr, $x0, 0
- ; CHECK-NEXT: $w9 = ORRWrs $wzr, $w1, 0
; CHECK-NEXT: renamable $w2 = KILL $w2, implicit-def $x2
- ; CHECK-NEXT: $w1 = ORRWrs $wzr, $wzr, 0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1.cmpxchg.start:
; CHECK-NEXT: successors: %bb.2(0x7c000000), %bb.4(0x04000000)
- ; CHECK-NEXT: liveins: $w1, $w9, $x2, $x8
+ ; CHECK-NEXT: liveins: $w1, $x2, $x8
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: renamable $w0 = LDXRB renamable $x8, implicit-def $x0, pcsections !0 :: (volatile load (s8) from %ir.ptr)
- ; CHECK-NEXT: renamable $w10 = ANDWri renamable $w0, 7, pcsections !0
- ; CHECK-NEXT: dead $wzr = SUBSWrx killed renamable $w10, renamable $w9, 0, implicit-def $nzcv, pcsections !0
+ ; CHECK-NEXT: renamable $w9 = ANDWri renamable $w0, 7, pcsections !0
+ ; CHECK-NEXT: dead $wzr = SUBSWrx killed renamable $w9, renamable $w1, 0, implicit-def $nzcv, pcsections !0
; CHECK-NEXT: Bcc 1, %bb.4, implicit killed $nzcv, pcsections !0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2.cmpxchg.trystore:
; CHECK-NEXT: successors: %bb.3(0x04000000), %bb.1(0x7c000000)
- ; CHECK-NEXT: liveins: $w1, $w9, $x0, $x2, $x8
+ ; CHECK-NEXT: liveins: $w1, $x0, $x2, $x8
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: early-clobber renamable $w10 = STXRB renamable $w2, renamable $x8, pcsections !0 :: (volatile store (s8) into %ir.ptr)
- ; CHECK-NEXT: CBNZW killed renamable $w10, %bb.1
+ ; CHECK-NEXT: early-clobber renamable $w9 = STXRB renamable $w2, renamable $x8, pcsections !0 :: (volatile store (s8) into %ir.ptr)
+ ; CHECK-NEXT: CBNZW killed renamable $w9, %bb.1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3:
; CHECK-NEXT: liveins: $x0
@@ -1279,8 +1277,9 @@ define { i8, i1 } @cmpxchg_i8(ptr %ptr, i8 %desired, i8 %new) {
; CHECK-NEXT: RET undef $lr, implicit $w0, implicit $w1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.4.cmpxchg.nostore:
- ; CHECK-NEXT: liveins: $w1, $x0
+ ; CHECK-NEXT: liveins: $x0
; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $w1 = ORRWrs $wzr, $wzr, 0
; CHECK-NEXT: CLREX 15, pcsections !0
; CHECK-NEXT: $w0 = KILL renamable $w0, implicit killed $x0
; CHECK-NEXT: RET undef $lr, implicit $w0, implicit $w1
@@ -1295,25 +1294,23 @@ define { i16, i1 } @cmpxchg_i16(ptr %ptr, i16 %desired, i16 %new) {
; CHECK-NEXT: liveins: $w1, $w2, $x0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: $x8 = ORRXrs $xzr, $x0, 0
- ; CHECK-NEXT: $w9 = ORRWrs $wzr, $w1, 0
; CHECK-NEXT: renamable $w2 = KILL $w2, implicit-def $x2
- ; CHECK-NEXT: $w1 = ORRWrs $wzr, $wzr, 0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1.cmpxchg.start:
; CHECK-NEXT: successors: %bb.2(0x7c000000), %bb.4(0x04000000)
- ; CHECK-NEXT: liveins: $w1, $w9, $x2, $x8
+ ; CHECK-NEXT: liveins: $w1, $x2, $x8
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: renamable $w0 = LDXRH renamable $x8, implicit-def $x0, pcsections !0 :: (volatile load (s16) from %ir.ptr)
- ; CHECK-NEXT: renamable $w10 = ANDWri renamable $w0, 15, pcsections !0
- ; CHECK-NEXT: dead $wzr = SUBSWrx killed renamable $w10, renamable $w9, 8, implicit-def $nzcv, pcsections !0
+ ; CHECK-NEXT: renamable $w9 = ANDWri renamable $w0, 15, pcsections !0
+ ; CHECK-NEXT: dead $wzr = SUBSWrx killed renamable $w9, renamable $w1, 8, implicit-def $nzcv, pcsections !0
; CHECK-NEXT: Bcc 1, %bb.4, implicit killed $nzcv, pcsections !0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2.cmpxchg.trystore:
; CHECK-NEXT: successors: %bb.3(0x04000000), %bb.1(0x7c000000)
- ; CHECK-NEXT: liveins: $w1, $w9, $x0, $x2, $x8
+ ; CHECK-NEXT: liveins: $w1, $x0, $x2, $x8
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: early-clobber renamable $w10 = STXRH renamable $w2, renamable $x8, pcsections !0 :: (volatile store (s16) into %ir.ptr)
- ; CHECK-NEXT: CBNZW killed renamable $w10, %bb.1
+ ; CHECK-NEXT: early-clobber renamable $w9 = STXRH renamable $w2, renamable $x8, pcsections !0 :: (volatile store (s16) into %ir.ptr)
+ ; CHECK-NEXT: CBNZW killed renamable $w9, %bb.1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3:
; CHECK-NEXT: liveins: $x0
@@ -1323,8 +1320,9 @@ define { i16, i1 } @cmpxchg_i16(ptr %ptr, i16 %desired, i16 %new) {
; CHECK-NEXT: RET undef $lr, implicit $w0, implicit $w1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.4.cmpxchg.nostore:
- ; CHECK-NEXT: liveins: $w1, $x0
+ ; CHECK-NEXT: liveins: $x0
; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $w1 = ORRWrs $wzr, $wzr, 0
; CHECK-NEXT: CLREX 15, pcsections !0
; CHECK-NEXT: $w0 = KILL renamable $w0, implicit killed $x0
; CHECK-NEXT: RET undef $lr, implicit $w0, implicit $w1
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-unmerge.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-unmerge.mir
index 01a4f65a9c6c14..e2874bc28e1eea 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-unmerge.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-unmerge.mir
@@ -10,9 +10,9 @@ body: |
bb.1:
; CHECK-LABEL: name: test_combine_unmerge_merge
; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; CHECK: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; CHECK: $w0 = COPY [[DEF]](s32)
- ; CHECK: $w1 = COPY [[DEF1]](s32)
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: $w0 = COPY [[DEF]](s32)
+ ; CHECK-NEXT: $w1 = COPY [[DEF1]](s32)
%0:_(s32) = G_IMPLICIT_DEF
%1:_(s32) = G_IMPLICIT_DEF
%2:_(s64) = G_MERGE_VALUES %0(s32), %1(s32)
@@ -30,11 +30,11 @@ body: |
bb.1:
; CHECK-LABEL: name: test_combine_unmerge_merge_3ops
; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; CHECK: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; CHECK: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; CHECK: $w0 = COPY [[DEF]](s32)
- ; CHECK: $w1 = COPY [[DEF1]](s32)
- ; CHECK: $w2 = COPY [[DEF2]](s32)
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: $w0 = COPY [[DEF]](s32)
+ ; CHECK-NEXT: $w1 = COPY [[DEF1]](s32)
+ ; CHECK-NEXT: $w2 = COPY [[DEF2]](s32)
%0:_(s32) = G_IMPLICIT_DEF
%1:_(s32) = G_IMPLICIT_DEF
%5:_(s32) = G_IMPLICIT_DEF
@@ -54,9 +54,9 @@ body: |
bb.1:
; CHECK-LABEL: name: test_combine_unmerge_build_vector
; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; CHECK: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; CHECK: $w0 = COPY [[DEF]](s32)
- ; CHECK: $w1 = COPY [[DEF1]](s32)
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: $w0 = COPY [[DEF]](s32)
+ ; CHECK-NEXT: $w1 = COPY [[DEF1]](s32)
%0:_(s32) = G_IMPLICIT_DEF
%1:_(s32) = G_IMPLICIT_DEF
%2:_(<2 x s32>) = G_BUILD_VECTOR %0(s32), %1(s32)
@@ -74,11 +74,11 @@ body: |
bb.1:
; CHECK-LABEL: name: test_combine_unmerge_buildvector_3ops
; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; CHECK: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; CHECK: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; CHECK: $w0 = COPY [[DEF]](s32)
- ; CHECK: $w1 = COPY [[DEF1]](s32)
- ; CHECK: $w2 = COPY [[DEF2]](s32)
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: $w0 = COPY [[DEF]](s32)
+ ; CHECK-NEXT: $w1 = COPY [[DEF1]](s32)
+ ; CHECK-NEXT: $w2 = COPY [[DEF2]](s32)
%0:_(s32) = G_IMPLICIT_DEF
%1:_(s32) = G_IMPLICIT_DEF
%5:_(s32) = G_IMPLICIT_DEF
@@ -98,9 +98,9 @@ body: |
bb.1:
; CHECK-LABEL: name: test_combine_unmerge_concat_vectors
; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $w0
- ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $w1
- ; CHECK: $w0 = COPY [[COPY]](<2 x s16>)
- ; CHECK: $w1 = COPY [[COPY1]](<2 x s16>)
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $w1
+ ; CHECK-NEXT: $w0 = COPY [[COPY]](<2 x s16>)
+ ; CHECK-NEXT: $w1 = COPY [[COPY1]](<2 x s16>)
%0:_(<2 x s16>) = COPY $w0
%1:_(<2 x s16>) = COPY $w1
%2:_(<4 x s16>) = G_CONCAT_VECTORS %0(<2 x s16>), %1(<2 x s16>)
@@ -118,9 +118,9 @@ body: |
bb.1:
; CHECK-LABEL: name: test_combine_unmerge_bitcast_merge
; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; CHECK: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; CHECK: $w0 = COPY [[DEF]](s32)
- ; CHECK: $w1 = COPY [[DEF1]](s32)
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: $w0 = COPY [[DEF]](s32)
+ ; CHECK-NEXT: $w1 = COPY [[DEF1]](s32)
%0:_(s32) = G_IMPLICIT_DEF
%1:_(s32) = G_IMPLICIT_DEF
%2:_(s64) = G_MERGE_VALUES %0(s32), %1(s32)
@@ -139,13 +139,13 @@ body: |
bb.1:
; CHECK-LABEL: name: test_combine_unmerge_merge_incompatible_types
; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; CHECK: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[DEF]](s32), [[DEF1]](s32)
- ; CHECK: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[MV]](s64)
- ; CHECK: $h0 = COPY [[UV]](s16)
- ; CHECK: $h1 = COPY [[UV1]](s16)
- ; CHECK: $h2 = COPY [[UV2]](s16)
- ; CHECK: $h3 = COPY [[UV3]](s16)
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[DEF]](s32), [[DEF1]](s32)
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[MV]](s64)
+ ; CHECK-NEXT: $h0 = COPY [[UV]](s16)
+ ; CHECK-NEXT: $h1 = COPY [[UV1]](s16)
+ ; CHECK-NEXT: $h2 = COPY [[UV2]](s16)
+ ; CHECK-NEXT: $h3 = COPY [[UV3]](s16)
%0:_(s32) = G_IMPLICIT_DEF
%1:_(s32) = G_IMPLICIT_DEF
%2:_(s64) = G_MERGE_VALUES %0(s32), %1(s32)
@@ -167,11 +167,11 @@ body: |
bb.1:
; CHECK-LABEL: name: test_combine_unmerge_merge_incompatible_types_but_same_size
; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $w0
- ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $w1
- ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>)
- ; CHECK: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>)
- ; CHECK: $w0 = COPY [[BITCAST]](s32)
- ; CHECK: $w1 = COPY [[BITCAST1]](s32)
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $w1
+ ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>)
+ ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>)
+ ; CHECK-NEXT: $w0 = COPY [[BITCAST]](s32)
+ ; CHECK-NEXT: $w1 = COPY [[BITCAST1]](s32)
%0:_(<2 x s16>) = COPY $w0
%1:_(<2 x s16>) = COPY $w1
%2:_(<4 x s16>) = G_CONCAT_VECTORS %0(<2 x s16>), %1(<2 x s16>)
@@ -194,37 +194,37 @@ body: |
bb.1:
; CHECK-LABEL: name: test_combine_unmerge_cst
; CHECK: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 16
- ; CHECK: [[C1:%[0-9]+]]:_(s8) = G_CONSTANT i8 15
- ; CHECK: [[C2:%[0-9]+]]:_(s8) = G_CONSTANT i8 14
- ; CHECK: [[C3:%[0-9]+]]:_(s8) = G_CONSTANT i8 13
- ; CHECK: [[C4:%[0-9]+]]:_(s8) = G_CONSTANT i8 12
- ; CHECK: [[C5:%[0-9]+]]:_(s8) = G_CONSTANT i8 11
- ; CHECK: [[C6:%[0-9]+]]:_(s8) = G_CONSTANT i8 10
- ; CHECK: [[C7:%[0-9]+]]:_(s8) = G_CONSTANT i8 9
- ; CHECK: [[C8:%[0-9]+]]:_(s8) = G_CONSTANT i8 8
- ; CHECK: [[C9:%[0-9]+]]:_(s8) = G_CONSTANT i8 7
- ; CHECK: [[C10:%[0-9]+]]:_(s8) = G_CONSTANT i8 6
- ; CHECK: [[C11:%[0-9]+]]:_(s8) = G_CONSTANT i8 5
- ; CHECK: [[C12:%[0-9]+]]:_(s8) = G_CONSTANT i8 4
- ; CHECK: [[C13:%[0-9]+]]:_(s8) = G_CONSTANT i8 3
- ; CHECK: [[C14:%[0-9]+]]:_(s8) = G_CONSTANT i8 2
- ; CHECK: [[C15:%[0-9]+]]:_(s8) = G_CONSTANT i8 1
- ; CHECK: $b0 = COPY [[C]](s8)
- ; CHECK: $b1 = COPY [[C1]](s8)
- ; CHECK: $b2 = COPY [[C2]](s8)
- ; CHECK: $b3 = COPY [[C3]](s8)
- ; CHECK: $b4 = COPY [[C4]](s8)
- ; CHECK: $b5 = COPY [[C5]](s8)
- ; CHECK: $b6 = COPY [[C6]](s8)
- ; CHECK: $b7 = COPY [[C7]](s8)
- ; CHECK: $b8 = COPY [[C8]](s8)
- ; CHECK: $b9 = COPY [[C9]](s8)
- ; CHECK: $b10 = COPY [[C10]](s8)
- ; CHECK: $b11 = COPY [[C11]](s8)
- ; CHECK: $b12 = COPY [[C12]](s8)
- ; CHECK: $b13 = COPY [[C13]](s8)
- ; CHECK: $b14 = COPY [[C14]](s8)
- ; CHECK: $b15 = COPY [[C15]](s8)
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s8) = G_CONSTANT i8 15
+ ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s8) = G_CONSTANT i8 14
+ ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s8) = G_CONSTANT i8 13
+ ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s8) = G_CONSTANT i8 12
+ ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s8) = G_CONSTANT i8 11
+ ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s8) = G_CONSTANT i8 10
+ ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s8) = G_CONSTANT i8 9
+ ; CHECK-NEXT: [[C8:%[0-9]+]]:_(s8) = G_CONSTANT i8 8
+ ; CHECK-NEXT: [[C9:%[0-9]+]]:_(s8) = G_CONSTANT i8 7
+ ; CHECK-NEXT: [[C10:%[0-9]+]]:_(s8) = G_CONSTANT i8 6
+ ; CHECK-NEXT: [[C11:%[0-9]+]]:_(s8) = G_CONSTANT i8 5
+ ; CHECK-NEXT: [[C12:%[0-9]+]]:_(s8) = G_CONSTANT i8 4
+ ; CHECK-NEXT: [[C13:%[0-9]+]]:_(s8) = G_CONSTANT i8 3
+ ; CHECK-NEXT: [[C14:%[0-9]+]]:_(s8) = G_CONSTANT i8 2
+ ; CHECK-NEXT: [[C15:%[0-9]+]]:_(s8) = G_CONSTANT i8 1
+ ; CHECK-NEXT: $b0 = COPY [[C]](s8)
+ ; CHECK-NEXT: $b1 = COPY [[C1]](s8)
+ ; CHECK-NEXT: $b2 = COPY [[C2]](s8)
+ ; CHECK-NEXT: $b3 = COPY [[C3]](s8)
+ ; CHECK-NEXT: $b4 = COPY [[C4]](s8)
+ ; CHECK-NEXT: $b5 = COPY [[C5]](s8)
+ ; CHECK-NEXT: $b6 = COPY [[C6]](s8)
+ ; CHECK-NEXT: $b7 = COPY [[C7]](s8)
+ ; CHECK-NEXT: $b8 = COPY [[C8]](s8)
+ ; CHECK-NEXT: $b9 = COPY [[C9]](s8)
+ ; CHECK-NEXT: $b10 = COPY [[C10]](s8)
+ ; CHECK-NEXT: $b11 = COPY [[C11]](s8)
+ ; CHECK-NEXT: $b12 = COPY [[C12]](s8)
+ ; CHECK-NEXT: $b13 = COPY [[C13]](s8)
+ ; CHECK-NEXT: $b14 = COPY [[C14]](s8)
+ ; CHECK-NEXT: $b15 = COPY [[C15]](s8)
%0:_(s128) = G_CONSTANT i128 1339673755198158349044581307228491536
%1:_(s8),%2:_(s8),%3:_(s8),%4:_(s8),%5:_(s8),%6:_(s8),%7:_(s8),%8:_(s8),%9:_(s8),%10:_(s8),%11:_(s8),%12:_(s8),%13:_(s8),%14:_(s8),%15:_(s8),%16:_(s8) = G_UNMERGE_VALUES %0(s128)
$b0 = COPY %1(s8)
@@ -252,15 +252,12 @@ name: test_combine_unmerge_cst_36bit
body: |
bb.1:
; CHECK-LABEL: name: test_combine_unmerge_cst_36bit
- ; CHECK: [[C:%[0-9]+]]:_(s13) = G_CONSTANT i13 1
- ; CHECK: [[C1:%[0-9]+]]:_(s13) = G_CONSTANT i13 2
- ; CHECK: [[C2:%[0-9]+]]:_(s13) = G_CONSTANT i13 3
- ; CHECK: [[ZEXT:%[0-9]+]]:_(s16) = G_ZEXT [[C]](s13)
- ; CHECK: [[ZEXT1:%[0-9]+]]:_(s16) = G_ZEXT [[C1]](s13)
- ; CHECK: [[ZEXT2:%[0-9]+]]:_(s16) = G_ZEXT [[C2]](s13)
- ; CHECK: $h0 = COPY [[ZEXT]](s16)
- ; CHECK: $h1 = COPY [[ZEXT1]](s16)
- ; CHECK: $h2 = COPY [[ZEXT2]](s16)
+ ; CHECK: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 1
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 2
+ ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 3
+ ; CHECK-NEXT: $h0 = COPY [[C]](s16)
+ ; CHECK-NEXT: $h1 = COPY [[C1]](s16)
+ ; CHECK-NEXT: $h2 = COPY [[C2]](s16)
%0:_(s39) = G_CONSTANT i39 201342977
%1:_(s13),%2:_(s13),%3:_(s13) = G_UNMERGE_VALUES %0(s39)
%4:_(s16) = G_ZEXT %1(s13)
@@ -278,13 +275,13 @@ body: |
bb.1:
; CHECK-LABEL: name: test_combine_unmerge_fpcst
; CHECK: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 1
- ; CHECK: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 2
- ; CHECK: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 3
- ; CHECK: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 4
- ; CHECK: $h0 = COPY [[C]](s16)
- ; CHECK: $h1 = COPY [[C1]](s16)
- ; CHECK: $h2 = COPY [[C2]](s16)
- ; CHECK: $h3 = COPY [[C3]](s16)
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 2
+ ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 3
+ ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 4
+ ; CHECK-NEXT: $h0 = COPY [[C]](s16)
+ ; CHECK-NEXT: $h1 = COPY [[C1]](s16)
+ ; CHECK-NEXT: $h2 = COPY [[C2]](s16)
+ ; CHECK-NEXT: $h3 = COPY [[C3]](s16)
%0:_(s64) = G_FCONSTANT double 0x0004000300020001
%1:_(s16),%2:_(s16),%3:_(s16),%4:_(s16) = G_UNMERGE_VALUES %0(s64)
$h0 = COPY %1(s16)
@@ -300,8 +297,8 @@ body: |
bb.1:
; CHECK-LABEL: name: test_combine_unmerge_dead_to_trunc
; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
- ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s64)
- ; CHECK: $h0 = COPY [[TRUNC]](s16)
+ ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s64)
+ ; CHECK-NEXT: $h0 = COPY [[TRUNC]](s16)
%0:_(s64) = COPY $x0
%1:_(s16),%2:_(s16),%3:_(s16),%4:_(s16) = G_UNMERGE_VALUES %0(s64)
$h0 = COPY %1(s16)
@@ -314,8 +311,8 @@ body: |
bb.1:
; CHECK-LABEL: name: test_dont_combine_unmerge_dead_to_trunc
; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
- ; CHECK: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](s64)
- ; CHECK: $h0 = COPY [[UV2]](s16)
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](s64)
+ ; CHECK-NEXT: $h0 = COPY [[UV2]](s16)
%0:_(s64) = COPY $x0
%1:_(s16),%2:_(s16),%3:_(s16),%4:_(s16) = G_UNMERGE_VALUES %0(s64)
$h0 = COPY %3(s16)
@@ -329,10 +326,10 @@ body: |
bb.1:
; CHECK-LABEL: name: test_combine_unmerge_dead_to_trunc_vec_in_n_out
; CHECK: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $x0
- ; CHECK: [[BITCAST:%[0-9]+]]:_(s64) = G_BITCAST [[COPY]](<4 x s16>)
- ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[BITCAST]](s64)
- ; CHECK: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[TRUNC]](s32)
- ; CHECK: $w0 = COPY [[BITCAST1]](<2 x s16>)
+ ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s64) = G_BITCAST [[COPY]](<4 x s16>)
+ ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[BITCAST]](s64)
+ ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[TRUNC]](s32)
+ ; CHECK-NEXT: $w0 = COPY [[BITCAST1]](<2 x s16>)
%0:_(<4 x s16>) = COPY $x0
%1:_(<2 x s16>),%2:_(<2 x s16>) = G_UNMERGE_VALUES %0(<4 x s16>)
$w0 = COPY %1(<2 x s16>)
@@ -346,9 +343,9 @@ body: |
bb.1:
; CHECK-LABEL: name: test_combine_unmerge_dead_to_trunc_vec_in
; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $x0
- ; CHECK: [[BITCAST:%[0-9]+]]:_(s64) = G_BITCAST [[COPY]](<2 x s32>)
- ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s64)
- ; CHECK: $h0 = COPY [[TRUNC]](s16)
+ ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s64) = G_BITCAST [[COPY]](<2 x s32>)
+ ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s64)
+ ; CHECK-NEXT: $h0 = COPY [[TRUNC]](s16)
%0:_(<2 x s32>) = COPY $x0
%1:_(s16),%2:_(s16),%3:_(s16),%4:_(s16) = G_UNMERGE_VALUES %0(<2 x s32>)
$h0 = COPY %1(s16)
@@ -364,9 +361,9 @@ body: |
bb.1:
; CHECK-LABEL: name: test_combine_unmerge_zext_to_zext_same_size
; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
- ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; CHECK: $w0 = COPY [[COPY]](s32)
- ; CHECK: $w1 = COPY [[C]](s32)
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; CHECK-NEXT: $w0 = COPY [[COPY]](s32)
+ ; CHECK-NEXT: $w1 = COPY [[C]](s32)
%0:_(s32) = COPY $w0
%3:_(s64) = G_ZEXT %0(s32)
%1:_(s32),%2:_(s32) = G_UNMERGE_VALUES %3(s64)
@@ -383,12 +380,12 @@ body: |
bb.1:
; CHECK-LABEL: name: test_combine_unmerge_zext_to_zext
; CHECK: [[COPY:%[0-9]+]]:_(s8) = COPY $b0
- ; CHECK: [[ZEXT:%[0-9]+]]:_(s16) = G_ZEXT [[COPY]](s8)
- ; CHECK: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 0
- ; CHECK: $h0 = COPY [[ZEXT]](s16)
- ; CHECK: $h1 = COPY [[C]](s16)
- ; CHECK: $h2 = COPY [[C]](s16)
- ; CHECK: $h3 = COPY [[C]](s16)
+ ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s16) = G_ZEXT [[COPY]](s8)
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 0
+ ; CHECK-NEXT: $h0 = COPY [[ZEXT]](s16)
+ ; CHECK-NEXT: $h1 = COPY [[C]](s16)
+ ; CHECK-NEXT: $h2 = COPY [[C]](s16)
+ ; CHECK-NEXT: $h3 = COPY [[C]](s16)
%0:_(s8) = COPY $b0
%3:_(s64) = G_ZEXT %0(s8)
%1:_(s16),%2:_(s16),%4:_(s16),%5:_(s16) = G_UNMERGE_VALUES %3(s64)
@@ -407,12 +404,12 @@ body: |
bb.1:
; CHECK-LABEL: name: test_dont_combine_unmerge_zext_to_zext_src_bigger
; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
- ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[COPY]](s32)
- ; CHECK: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[ZEXT]](s64)
- ; CHECK: $h0 = COPY [[UV]](s16)
- ; CHECK: $h1 = COPY [[UV1]](s16)
- ; CHECK: $h2 = COPY [[UV2]](s16)
- ; CHECK: $h3 = COPY [[UV3]](s16)
+ ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[COPY]](s32)
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[ZEXT]](s64)
+ ; CHECK-NEXT: $h0 = COPY [[UV]](s16)
+ ; CHECK-NEXT: $h1 = COPY [[UV1]](s16)
+ ; CHECK-NEXT: $h2 = COPY [[UV2]](s16)
+ ; CHECK-NEXT: $h3 = COPY [[UV3]](s16)
%0:_(s32) = COPY $w0
%3:_(s64) = G_ZEXT %0(s32)
%1:_(s16),%2:_(s16),%4:_(s16),%5:_(s16) = G_UNMERGE_VALUES %3(s64)
@@ -430,10 +427,10 @@ body: |
bb.1:
; CHECK-LABEL: name: test_dont_combine_unmerge_zext_to_zext_src_vector
; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $w0
- ; CHECK: [[ZEXT:%[0-9]+]]:_(<2 x s32>) = G_ZEXT [[COPY]](<2 x s16>)
- ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ZEXT]](<2 x s32>)
- ; CHECK: $w0 = COPY [[UV]](s32)
- ; CHECK: $w1 = COPY [[UV1]](s32)
+ ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(<2 x s32>) = G_ZEXT [[COPY]](<2 x s16>)
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ZEXT]](<2 x s32>)
+ ; CHECK-NEXT: $w0 = COPY [[UV]](s32)
+ ; CHECK-NEXT: $w1 = COPY [[UV1]](s32)
%0:_(<2 x s16>) = COPY $w0
%3:_(<2 x s32>) = G_ZEXT %0(<2 x s16>)
%1:_(s32),%2:_(s32) = G_UNMERGE_VALUES %3(<2 x s32>)
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizer-combiner-divrem-insertpt-conflict.mir b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizer-combiner-divrem-insertpt-conflict.mir
index abccb185024941..2e879c7e1622aa 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizer-combiner-divrem-insertpt-conflict.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizer-combiner-divrem-insertpt-conflict.mir
@@ -8,14 +8,16 @@ tracksRegLiveness: true
body: |
bb.1:
; CHECK-LABEL: name: test
- ; CHECK: [[C:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
- ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[C]](s1)
- ; CHECK-NEXT: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[C]](s1)
- ; CHECK-NEXT: [[UDIVREM:%[0-9]+]]:_(s32), [[UDIVREM1:%[0-9]+]]:_ = G_UDIVREM [[SEXT]], [[SEXT1]]
- ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[UDIVREM1]](s32)
- ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[UDIVREM]](s32)
- ; CHECK-NEXT: [[SEXT2:%[0-9]+]]:_(s64) = G_SEXT [[TRUNC]](s8)
- ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[ZEXT]], [[SEXT2]]
+ ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483647
+ ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 31
+ ; CHECK-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[C]], [[C1]]
+ ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UMULH]], [[C2]](s32)
+ ; CHECK-NEXT: [[UREM:%[0-9]+]]:_(s32) = G_UREM [[C]], [[C]]
+ ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[UREM]](s32)
+ ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[LSHR]](s32)
+ ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[TRUNC]](s8)
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[ZEXT]], [[SEXT]]
; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[OR]](s64)
; CHECK-NEXT: $w0 = COPY [[TRUNC1]](s32)
; CHECK-NEXT: RET_ReallyLR implicit $w0
@@ -47,14 +49,12 @@ tracksRegLiveness: true
body: |
bb.1:
; CHECK-LABEL: name: test_inverted_div_rem
- ; CHECK: [[C:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
- ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[C]](s1)
- ; CHECK-NEXT: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[C]](s1)
- ; CHECK-NEXT: [[UDIVREM:%[0-9]+]]:_(s32), [[UDIVREM1:%[0-9]+]]:_ = G_UDIVREM [[SEXT]], [[SEXT1]]
+ ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
+ ; CHECK-NEXT: [[UDIVREM:%[0-9]+]]:_(s32), [[UDIVREM1:%[0-9]+]]:_ = G_UDIVREM [[C]], [[C]]
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[UDIVREM]](s32)
; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[UDIVREM1]](s32)
- ; CHECK-NEXT: [[SEXT2:%[0-9]+]]:_(s64) = G_SEXT [[TRUNC]](s8)
- ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[ZEXT]], [[SEXT2]]
+ ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[TRUNC]](s8)
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[ZEXT]], [[SEXT]]
; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[OR]](s64)
; CHECK-NEXT: $w0 = COPY [[TRUNC1]](s32)
; CHECK-NEXT: RET_ReallyLR implicit $w0
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizer-combiner-divrem-insertpt-crash.mir b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizer-combiner-divrem-insertpt-crash.mir
index 6d9d695ba68909..ca403f85156113 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizer-combiner-divrem-insertpt-crash.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizer-combiner-divrem-insertpt-crash.mir
@@ -17,21 +17,16 @@ body: |
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s1) = G_IMPLICIT_DEF
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
; CHECK-NEXT: G_BRCOND [[DEF]](s1), %bb.2
; CHECK-NEXT: G_BR %bb.1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.2(0x80000000)
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[C2]](s32)
- ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1
- ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s64) = G_FREEZE [[C3]]
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1
+ ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s64) = G_FREEZE [[C1]]
; CHECK-NEXT: [[UDIV:%[0-9]+]]:_(s64) = G_UDIV [[FREEZE]], [[C]]
- ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[ZEXT]], [[C1]](s64)
- ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s64) = G_ADD [[SHL]], [[UDIV]]
- ; CHECK-NEXT: G_STORE [[ADD]](s64), [[COPY]](p0) :: (store (s64))
+ ; CHECK-NEXT: G_STORE [[UDIV]](s64), [[COPY]](p0) :: (store (s64))
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
bb.1:
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-binop-same-val.mir b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-binop-same-val.mir
index 3c70c41719839a..a3d5294a99c5d0 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-binop-same-val.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-binop-same-val.mir
@@ -30,9 +30,10 @@ body: |
; CHECK-LABEL: name: and_same
; CHECK: liveins: $x0
- ; CHECK: %copy:_(s64) = COPY $x0
- ; CHECK: $x0 = COPY %copy(s64)
- ; CHECK: RET_ReallyLR implicit $x0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %copy:_(s64) = COPY $x0
+ ; CHECK-NEXT: $x0 = COPY %copy(s64)
+ ; CHECK-NEXT: RET_ReallyLR implicit $x0
%copy:_(s64) = COPY $x0
%and:_(s64) = G_AND %copy, %copy
$x0 = COPY %and(s64)
@@ -50,11 +51,12 @@ body: |
; CHECK-LABEL: name: and_same2
; CHECK: liveins: $x0, $x1
- ; CHECK: %copy1:_(s64) = COPY $x0
- ; CHECK: %copy2:_(s64) = COPY $x1
- ; CHECK: %or:_(s64) = G_OR %copy1, %copy2
- ; CHECK: $x0 = COPY %or(s64)
- ; CHECK: RET_ReallyLR implicit $x0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %copy1:_(s64) = COPY $x0
+ ; CHECK-NEXT: %copy2:_(s64) = COPY $x1
+ ; CHECK-NEXT: %or:_(s64) = G_OR %copy1, %copy2
+ ; CHECK-NEXT: $x0 = COPY %or(s64)
+ ; CHECK-NEXT: RET_ReallyLR implicit $x0
%copy1:_(s64) = COPY $x0
%copy2:_(s64) = COPY $x1
%or:_(s64) = G_OR %copy1, %copy2
@@ -76,14 +78,15 @@ body: |
; CHECK-LABEL: name: or_and_not_same
; CHECK: liveins: $x0, $x1, $x2
- ; CHECK: %copy1:_(s64) = COPY $x0
- ; CHECK: %copy2:_(s64) = COPY $x1
- ; CHECK: %copy3:_(s64) = COPY $x2
- ; CHECK: %or1:_(s64) = G_OR %copy1, %copy2
- ; CHECK: %or2:_(s64) = G_OR %copy1, %copy3
- ; CHECK: %and:_(s64) = G_AND %or1, %or2
- ; CHECK: $x0 = COPY %and(s64)
- ; CHECK: RET_ReallyLR implicit $x0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %copy1:_(s64) = COPY $x0
+ ; CHECK-NEXT: %copy2:_(s64) = COPY $x1
+ ; CHECK-NEXT: %copy3:_(s64) = COPY $x2
+ ; CHECK-NEXT: %or1:_(s64) = G_OR %copy1, %copy2
+ ; CHECK-NEXT: %or2:_(s64) = G_OR %copy1, %copy3
+ ; CHECK-NEXT: %and:_(s64) = G_AND %or1, %or2
+ ; CHECK-NEXT: $x0 = COPY %and(s64)
+ ; CHECK-NEXT: RET_ReallyLR implicit $x0
%copy1:_(s64) = COPY $x0
%copy2:_(s64) = COPY $x1
%copy3:_(s64) = COPY $x2
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-br.mir b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-br.mir
index 111f866c47746e..fd49f7ffc15eb8 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-br.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-br.mir
@@ -38,30 +38,36 @@ tracksRegLiveness: true
body: |
; CHECK-LABEL: name: foo
; CHECK: bb.0.entry:
- ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000)
- ; CHECK: liveins: $w0, $w1
- ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
- ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
- ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
- ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[COPY]](s32), [[C]]
- ; CHECK: [[C2:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
- ; CHECK: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP]], [[C2]]
- ; CHECK: G_BRCOND [[XOR]](s1), %bb.2
- ; CHECK: G_BR %bb.1
- ; CHECK: bb.1.if.then:
- ; CHECK: successors: %bb.3(0x80000000)
- ; CHECK: [[ADD:%[0-9]+]]:_(s32) = nsw G_ADD [[COPY1]], [[COPY]]
- ; CHECK: [[ADD1:%[0-9]+]]:_(s32) = nsw G_ADD [[ADD]], [[COPY1]]
- ; CHECK: G_BR %bb.3
- ; CHECK: bb.2.if.end:
- ; CHECK: successors: %bb.3(0x80000000)
- ; CHECK: [[MUL:%[0-9]+]]:_(s32) = nsw G_MUL [[COPY1]], [[COPY1]]
- ; CHECK: [[ADD2:%[0-9]+]]:_(s32) = nuw nsw G_ADD [[MUL]], [[C1]]
- ; CHECK: bb.3.return:
- ; CHECK: [[PHI:%[0-9]+]]:_(s32) = G_PHI [[ADD1]](s32), %bb.1, [[ADD2]](s32), %bb.2
- ; CHECK: $w0 = COPY [[PHI]](s32)
- ; CHECK: RET_ReallyLR implicit $w0
+ ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; CHECK-NEXT: liveins: $w0, $w1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+ ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[COPY]](s32), [[C]]
+ ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
+ ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP]], [[C2]]
+ ; CHECK-NEXT: G_BRCOND [[XOR]](s1), %bb.2
+ ; CHECK-NEXT: G_BR %bb.1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1.if.then:
+ ; CHECK-NEXT: successors: %bb.3(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = nsw G_ADD [[COPY1]], [[COPY]]
+ ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s32) = nsw G_ADD [[ADD]], [[COPY1]]
+ ; CHECK-NEXT: G_BR %bb.3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2.if.end:
+ ; CHECK-NEXT: successors: %bb.3(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[MUL:%[0-9]+]]:_(s32) = nsw G_MUL [[COPY1]], [[COPY1]]
+ ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s32) = nuw nsw G_ADD [[MUL]], [[C1]]
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.3.return:
+ ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(s32) = G_PHI [[ADD1]](s32), %bb.1, [[ADD2]](s32), %bb.2
+ ; CHECK-NEXT: $w0 = COPY [[PHI]](s32)
+ ; CHECK-NEXT: RET_ReallyLR implicit $w0
bb.1.entry:
liveins: $w0, $w1
@@ -94,18 +100,19 @@ tracksRegLiveness: true
body: |
; CHECK-LABEL: name: dont_combine_same_block
; CHECK: bb.0:
- ; CHECK: successors: %bb.1(0x80000000)
- ; CHECK: liveins: $w0, $w1
- ; CHECK: %cond:_(s1) = G_IMPLICIT_DEF
- ; CHECK: G_BRCOND %cond(s1), %bb.1
- ; CHECK: G_BR %bb.1
- ; CHECK: bb.1:
- ; CHECK: RET_ReallyLR
+ ; CHECK-NEXT: successors: %bb.1(0x80000000)
+ ; CHECK-NEXT: liveins: $w0, $w1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %cond:_(s1) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: G_BRCOND %cond(s1), %bb.1
+ ; CHECK-NEXT: G_BR %bb.1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: RET_ReallyLR
bb.0:
liveins: $w0, $w1
%cond:_(s1) = G_IMPLICIT_DEF
- ; The G_BRCOND and G_BR have the same target here. Don't change anything.
G_BRCOND %cond(s1), %bb.1
G_BR %bb.1
bb.1:
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-bzero.mir b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-bzero.mir
index a70a5220b4e545..a152b3fdf11197 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-bzero.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-bzero.mir
@@ -22,17 +22,20 @@ body: |
; DARWIN-LABEL: name: bzero_unknown_width
; DARWIN: liveins: $x0, $x1
- ; DARWIN: %ptr:_(p0) = COPY $x0
- ; DARWIN: %width:_(s64) = COPY $x1
- ; DARWIN: G_BZERO %ptr(p0), %width(s64), 0 :: (store (s32))
- ; DARWIN: RET_ReallyLR
+ ; DARWIN-NEXT: {{ $}}
+ ; DARWIN-NEXT: %ptr:_(p0) = COPY $x0
+ ; DARWIN-NEXT: %width:_(s64) = COPY $x1
+ ; DARWIN-NEXT: G_BZERO %ptr(p0), %width(s64), 0 :: (store (s32))
+ ; DARWIN-NEXT: RET_ReallyLR
+ ;
; UNKNOWN-LABEL: name: bzero_unknown_width
; UNKNOWN: liveins: $x0, $x1
- ; UNKNOWN: %ptr:_(p0) = COPY $x0
- ; UNKNOWN: %zero:_(s8) = G_CONSTANT i8 0
- ; UNKNOWN: %width:_(s64) = COPY $x1
- ; UNKNOWN: G_MEMSET %ptr(p0), %zero(s8), %width(s64), 0 :: (store (s32))
- ; UNKNOWN: RET_ReallyLR
+ ; UNKNOWN-NEXT: {{ $}}
+ ; UNKNOWN-NEXT: %ptr:_(p0) = COPY $x0
+ ; UNKNOWN-NEXT: %zero:_(s8) = G_CONSTANT i8 0
+ ; UNKNOWN-NEXT: %width:_(s64) = COPY $x1
+ ; UNKNOWN-NEXT: G_MEMSET %ptr(p0), %zero(s8), %width(s64), 0 :: (store (s32))
+ ; UNKNOWN-NEXT: RET_ReallyLR
%ptr:_(p0) = COPY $x0
%zero:_(s8) = G_CONSTANT i8 0
%width:_(s64) = COPY $x1
@@ -47,17 +50,20 @@ body: |
liveins: $x0, $x1
; DARWIN-LABEL: name: bzero_tail_unknown_width
; DARWIN: liveins: $x0, $x1
- ; DARWIN: %ptr:_(p0) = COPY $x0
- ; DARWIN: %width:_(s64) = COPY $x1
- ; DARWIN: G_BZERO %ptr(p0), %width(s64), 1 :: (store (s32))
- ; DARWIN: RET_ReallyLR
+ ; DARWIN-NEXT: {{ $}}
+ ; DARWIN-NEXT: %ptr:_(p0) = COPY $x0
+ ; DARWIN-NEXT: %width:_(s64) = COPY $x1
+ ; DARWIN-NEXT: G_BZERO %ptr(p0), %width(s64), 1 :: (store (s32))
+ ; DARWIN-NEXT: RET_ReallyLR
+ ;
; UNKNOWN-LABEL: name: bzero_tail_unknown_width
; UNKNOWN: liveins: $x0, $x1
- ; UNKNOWN: %ptr:_(p0) = COPY $x0
- ; UNKNOWN: %zero:_(s8) = G_CONSTANT i8 0
- ; UNKNOWN: %width:_(s64) = COPY $x1
- ; UNKNOWN: G_MEMSET %ptr(p0), %zero(s8), %width(s64), 1 :: (store (s32))
- ; UNKNOWN: RET_ReallyLR
+ ; UNKNOWN-NEXT: {{ $}}
+ ; UNKNOWN-NEXT: %ptr:_(p0) = COPY $x0
+ ; UNKNOWN-NEXT: %zero:_(s8) = G_CONSTANT i8 0
+ ; UNKNOWN-NEXT: %width:_(s64) = COPY $x1
+ ; UNKNOWN-NEXT: G_MEMSET %ptr(p0), %zero(s8), %width(s64), 1 :: (store (s32))
+ ; UNKNOWN-NEXT: RET_ReallyLR
%ptr:_(p0) = COPY $x0
%zero:_(s8) = G_CONSTANT i8 0
%width:_(s64) = COPY $x1
@@ -74,17 +80,20 @@ body: |
; DARWIN-LABEL: name: bzero_constant_width
; DARWIN: liveins: $x0, $x1
- ; DARWIN: %ptr:_(p0) = COPY $x0
- ; DARWIN: %width:_(s64) = G_CONSTANT i64 1024
- ; DARWIN: G_BZERO %ptr(p0), %width(s64), 0 :: (store (s32))
- ; DARWIN: RET_ReallyLR
+ ; DARWIN-NEXT: {{ $}}
+ ; DARWIN-NEXT: %ptr:_(p0) = COPY $x0
+ ; DARWIN-NEXT: %width:_(s64) = G_CONSTANT i64 1024
+ ; DARWIN-NEXT: G_BZERO %ptr(p0), %width(s64), 0 :: (store (s32))
+ ; DARWIN-NEXT: RET_ReallyLR
+ ;
; UNKNOWN-LABEL: name: bzero_constant_width
; UNKNOWN: liveins: $x0, $x1
- ; UNKNOWN: %ptr:_(p0) = COPY $x0
- ; UNKNOWN: %zero:_(s8) = G_CONSTANT i8 0
- ; UNKNOWN: %width:_(s64) = G_CONSTANT i64 1024
- ; UNKNOWN: G_MEMSET %ptr(p0), %zero(s8), %width(s64), 0 :: (store (s32))
- ; UNKNOWN: RET_ReallyLR
+ ; UNKNOWN-NEXT: {{ $}}
+ ; UNKNOWN-NEXT: %ptr:_(p0) = COPY $x0
+ ; UNKNOWN-NEXT: %zero:_(s8) = G_CONSTANT i8 0
+ ; UNKNOWN-NEXT: %width:_(s64) = G_CONSTANT i64 1024
+ ; UNKNOWN-NEXT: G_MEMSET %ptr(p0), %zero(s8), %width(s64), 0 :: (store (s32))
+ ; UNKNOWN-NEXT: RET_ReallyLR
%ptr:_(p0) = COPY $x0
%zero:_(s8) = G_CONSTANT i8 0
%width:_(s64) = G_CONSTANT i64 1024
@@ -101,17 +110,20 @@ body: |
; DARWIN-LABEL: name: bzero_constant_width_minsize
; DARWIN: liveins: $x0, $x1
- ; DARWIN: %ptr:_(p0) = COPY $x0
- ; DARWIN: %width:_(s64) = G_CONSTANT i64 256
- ; DARWIN: G_BZERO %ptr(p0), %width(s64), 0 :: (store (s32))
- ; DARWIN: RET_ReallyLR
+ ; DARWIN-NEXT: {{ $}}
+ ; DARWIN-NEXT: %ptr:_(p0) = COPY $x0
+ ; DARWIN-NEXT: %width:_(s64) = G_CONSTANT i64 256
+ ; DARWIN-NEXT: G_BZERO %ptr(p0), %width(s64), 0 :: (store (s32))
+ ; DARWIN-NEXT: RET_ReallyLR
+ ;
; UNKNOWN-LABEL: name: bzero_constant_width_minsize
; UNKNOWN: liveins: $x0, $x1
- ; UNKNOWN: %ptr:_(p0) = COPY $x0
- ; UNKNOWN: %zero:_(s8) = G_CONSTANT i8 0
- ; UNKNOWN: %width:_(s64) = G_CONSTANT i64 256
- ; UNKNOWN: G_MEMSET %ptr(p0), %zero(s8), %width(s64), 0 :: (store (s32))
- ; UNKNOWN: RET_ReallyLR
+ ; UNKNOWN-NEXT: {{ $}}
+ ; UNKNOWN-NEXT: %ptr:_(p0) = COPY $x0
+ ; UNKNOWN-NEXT: %zero:_(s8) = G_CONSTANT i8 0
+ ; UNKNOWN-NEXT: %width:_(s64) = G_CONSTANT i64 256
+ ; UNKNOWN-NEXT: G_MEMSET %ptr(p0), %zero(s8), %width(s64), 0 :: (store (s32))
+ ; UNKNOWN-NEXT: RET_ReallyLR
%ptr:_(p0) = COPY $x0
%zero:_(s8) = G_CONSTANT i8 0
%width:_(s64) = G_CONSTANT i64 256
@@ -128,18 +140,21 @@ body: |
; DARWIN-LABEL: name: not_zero
; DARWIN: liveins: $x0, $x1
- ; DARWIN: %ptr:_(p0) = COPY $x0
- ; DARWIN: %not_zero:_(s8) = G_CONSTANT i8 1
- ; DARWIN: %width:_(s64) = G_CONSTANT i64 256
- ; DARWIN: G_MEMSET %ptr(p0), %not_zero(s8), %width(s64), 0 :: (store (s32))
- ; DARWIN: RET_ReallyLR
+ ; DARWIN-NEXT: {{ $}}
+ ; DARWIN-NEXT: %ptr:_(p0) = COPY $x0
+ ; DARWIN-NEXT: %not_zero:_(s8) = G_CONSTANT i8 1
+ ; DARWIN-NEXT: %width:_(s64) = G_CONSTANT i64 256
+ ; DARWIN-NEXT: G_MEMSET %ptr(p0), %not_zero(s8), %width(s64), 0 :: (store (s32))
+ ; DARWIN-NEXT: RET_ReallyLR
+ ;
; UNKNOWN-LABEL: name: not_zero
; UNKNOWN: liveins: $x0, $x1
- ; UNKNOWN: %ptr:_(p0) = COPY $x0
- ; UNKNOWN: %not_zero:_(s8) = G_CONSTANT i8 1
- ; UNKNOWN: %width:_(s64) = G_CONSTANT i64 256
- ; UNKNOWN: G_MEMSET %ptr(p0), %not_zero(s8), %width(s64), 0 :: (store (s32))
- ; UNKNOWN: RET_ReallyLR
+ ; UNKNOWN-NEXT: {{ $}}
+ ; UNKNOWN-NEXT: %ptr:_(p0) = COPY $x0
+ ; UNKNOWN-NEXT: %not_zero:_(s8) = G_CONSTANT i8 1
+ ; UNKNOWN-NEXT: %width:_(s64) = G_CONSTANT i64 256
+ ; UNKNOWN-NEXT: G_MEMSET %ptr(p0), %not_zero(s8), %width(s64), 0 :: (store (s32))
+ ; UNKNOWN-NEXT: RET_ReallyLR
%ptr:_(p0) = COPY $x0
%not_zero:_(s8) = G_CONSTANT i8 1
%width:_(s64) = G_CONSTANT i64 256
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-concat-vectors.mir b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-concat-vectors.mir
index a6f231f8185335..30cb7b724059e2 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-concat-vectors.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-concat-vectors.mir
@@ -12,12 +12,13 @@ body: |
; CHECK-LABEL: name: concat_to_build_vector
; CHECK: liveins: $x0, $x1, $x2, $x3
- ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
- ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1
- ; CHECK: [[COPY2:%[0-9]+]]:_(s64) = COPY $x2
- ; CHECK: [[COPY3:%[0-9]+]]:_(s64) = COPY $x3
- ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[COPY]](s64), [[COPY1]](s64), [[COPY2]](s64), [[COPY3]](s64)
- ; CHECK: RET_ReallyLR implicit [[BUILD_VECTOR]](<4 x s64>)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $x2
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s64) = COPY $x3
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[COPY]](s64), [[COPY1]](s64), [[COPY2]](s64), [[COPY3]](s64)
+ ; CHECK-NEXT: RET_ReallyLR implicit [[BUILD_VECTOR]](<4 x s64>)
%0:_(s64) = COPY $x0
%1:_(s64) = COPY $x1
%2:_(s64) = COPY $x2
@@ -37,12 +38,13 @@ body: |
; CHECK-LABEL: name: concat_to_build_vector_ptr
; CHECK: liveins: $x0, $x1, $x2, $x3
- ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
- ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1
- ; CHECK: [[COPY2:%[0-9]+]]:_(p0) = COPY $x2
- ; CHECK: [[COPY3:%[0-9]+]]:_(p0) = COPY $x3
- ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x p0>) = G_BUILD_VECTOR [[COPY]](p0), [[COPY1]](p0), [[COPY2]](p0), [[COPY3]](p0)
- ; CHECK: RET_ReallyLR implicit [[BUILD_VECTOR]](<4 x p0>)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(p0) = COPY $x2
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(p0) = COPY $x3
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x p0>) = G_BUILD_VECTOR [[COPY]](p0), [[COPY1]](p0), [[COPY2]](p0), [[COPY3]](p0)
+ ; CHECK-NEXT: RET_ReallyLR implicit [[BUILD_VECTOR]](<4 x p0>)
%0:_(p0) = COPY $x0
%1:_(p0) = COPY $x1
%2:_(p0) = COPY $x2
@@ -60,7 +62,7 @@ body: |
bb.1:
; CHECK-LABEL: name: concat_to_undef
; CHECK: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
- ; CHECK: RET_ReallyLR implicit [[DEF]](<4 x s64>)
+ ; CHECK-NEXT: RET_ReallyLR implicit [[DEF]](<4 x s64>)
%4:_(<2 x s64>) = G_IMPLICIT_DEF
%5:_(<2 x s64>) = G_IMPLICIT_DEF
%6:_(<4 x s64>) = G_CONCAT_VECTORS %4(<2 x s64>), %5
@@ -78,11 +80,12 @@ body: |
; CHECK-LABEL: name: concat_to_build_vector_with_undef
; CHECK: liveins: $x0, $x1
- ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
- ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1
- ; CHECK: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
- ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[COPY]](s64), [[COPY1]](s64), [[DEF]](s64), [[DEF]](s64)
- ; CHECK: RET_ReallyLR implicit [[BUILD_VECTOR]](<4 x s64>)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[COPY]](s64), [[COPY1]](s64), [[DEF]](s64), [[DEF]](s64)
+ ; CHECK-NEXT: RET_ReallyLR implicit [[BUILD_VECTOR]](<4 x s64>)
%0:_(s64) = COPY $x0
%1:_(s64) = COPY $x1
%4:_(<2 x s64>) = G_BUILD_VECTOR %0(s64), %1
@@ -100,11 +103,12 @@ body: |
; CHECK-LABEL: name: concat_to_build_vector_with_undef_ptr
; CHECK: liveins: $x0, $x1
- ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
- ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1
- ; CHECK: [[DEF:%[0-9]+]]:_(p0) = G_IMPLICIT_DEF
- ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x p0>) = G_BUILD_VECTOR [[COPY]](p0), [[COPY1]](p0), [[DEF]](p0), [[DEF]](p0)
- ; CHECK: RET_ReallyLR implicit [[BUILD_VECTOR]](<4 x p0>)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p0) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x p0>) = G_BUILD_VECTOR [[COPY]](p0), [[COPY1]](p0), [[DEF]](p0), [[DEF]](p0)
+ ; CHECK-NEXT: RET_ReallyLR implicit [[BUILD_VECTOR]](<4 x p0>)
%0:_(p0) = COPY $x0
%1:_(p0) = COPY $x1
%4:_(<2 x p0>) = G_BUILD_VECTOR %0(p0), %1
@@ -123,10 +127,11 @@ body: |
; CHECK-LABEL: name: concat_to_build_vector_negative_test
; CHECK: liveins: $q0
- ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0
- ; CHECK: [[DEF:%[0-9]+]]:_(<2 x s64>) = G_IMPLICIT_DEF
- ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s64>) = G_CONCAT_VECTORS [[COPY]](<2 x s64>), [[DEF]](<2 x s64>)
- ; CHECK: RET_ReallyLR implicit [[CONCAT_VECTORS]](<4 x s64>)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<2 x s64>) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s64>) = G_CONCAT_VECTORS [[COPY]](<2 x s64>), [[DEF]](<2 x s64>)
+ ; CHECK-NEXT: RET_ReallyLR implicit [[CONCAT_VECTORS]](<4 x s64>)
%4:_(<2 x s64>) = COPY $q0
%5:_(<2 x s64>) = G_IMPLICIT_DEF
%6:_(<4 x s64>) = G_CONCAT_VECTORS %4(<2 x s64>), %5
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-copy-prop-disabled.mir b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-copy-prop-disabled.mir
index e511282eb7b7ca..05eb831681e079 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-copy-prop-disabled.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-copy-prop-disabled.mir
@@ -29,13 +29,18 @@ body: |
bb.0.entry:
liveins: $x0
; ENABLED-LABEL: name: test_copy
- ; ENABLED: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
- ; ENABLED: $x0 = COPY [[COPY]](p0)
+ ; ENABLED: liveins: $x0
+ ; ENABLED-NEXT: {{ $}}
+ ; ENABLED-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+ ; ENABLED-NEXT: $x0 = COPY [[COPY]](p0)
+ ;
; DISABLED-LABEL: name: test_copy
- ; DISABLED: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
- ; DISABLED: [[COPY1:%[0-9]+]]:_(p0) = COPY [[COPY]](p0)
- ; DISABLED: [[COPY2:%[0-9]+]]:_(p0) = COPY [[COPY1]](p0)
- ; DISABLED: $x0 = COPY [[COPY2]](p0)
+ ; DISABLED: liveins: $x0
+ ; DISABLED-NEXT: {{ $}}
+ ; DISABLED-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+ ; DISABLED-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY [[COPY]](p0)
+ ; DISABLED-NEXT: [[COPY2:%[0-9]+]]:_(p0) = COPY [[COPY1]](p0)
+ ; DISABLED-NEXT: $x0 = COPY [[COPY2]](p0)
%0:_(p0) = COPY $x0
%1:_(p0) = COPY %0
%2:_(p0) = COPY %1
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-prop-extends-phi.mir b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-prop-extends-phi.mir
index 4719bb320d4cd4..35fa1007864d9b 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-prop-extends-phi.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-prop-extends-phi.mir
@@ -8,26 +8,30 @@ tracksRegLiveness: true
body: |
; CHECK-LABEL: name: sext_icst_through_phi
; CHECK: bb.0.entry:
- ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000)
- ; CHECK: liveins: $w0, $w1
- ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
- ; CHECK: %one:_(s32) = G_CONSTANT i32 2
- ; CHECK: %cmp:_(s1) = G_ICMP intpred(sle), [[COPY]](s32), %one
- ; CHECK: G_BRCOND %cmp(s1), %bb.2
- ; CHECK: G_BR %bb.1
- ; CHECK: bb.1:
- ; CHECK: successors: %bb.3(0x80000000)
- ; CHECK: %cst32_4:_(s32) = G_CONSTANT i32 4
- ; CHECK: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT %cst32_4(s32)
- ; CHECK: G_BR %bb.3
- ; CHECK: bb.2:
- ; CHECK: successors: %bb.3(0x80000000)
- ; CHECK: %cst32_10:_(s32) = G_CONSTANT i32 10
- ; CHECK: [[SEXT1:%[0-9]+]]:_(s64) = G_SEXT %cst32_10(s32)
- ; CHECK: bb.3:
- ; CHECK: %ext:_(s64) = G_PHI [[SEXT]](s64), %bb.1, [[SEXT1]](s64), %bb.2
- ; CHECK: $x0 = COPY %ext(s64)
- ; CHECK: RET_ReallyLR implicit $x0
+ ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; CHECK-NEXT: liveins: $w0, $w1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+ ; CHECK-NEXT: %one:_(s32) = G_CONSTANT i32 2
+ ; CHECK-NEXT: %cmp:_(s1) = G_ICMP intpred(sle), [[COPY]](s32), %one
+ ; CHECK-NEXT: G_BRCOND %cmp(s1), %bb.2
+ ; CHECK-NEXT: G_BR %bb.1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.3(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+ ; CHECK-NEXT: G_BR %bb.3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: successors: %bb.3(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 10
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.3:
+ ; CHECK-NEXT: %ext:_(s64) = G_PHI [[C]](s64), %bb.1, [[C1]](s64), %bb.2
+ ; CHECK-NEXT: $x0 = COPY %ext(s64)
+ ; CHECK-NEXT: RET_ReallyLR implicit $x0
bb.1.entry:
liveins: $w0, $w1
@@ -61,26 +65,30 @@ tracksRegLiveness: true
body: |
; CHECK-LABEL: name: zext_icst_through_phi
; CHECK: bb.0.entry:
- ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000)
- ; CHECK: liveins: $w0, $w1
- ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
- ; CHECK: %one:_(s32) = G_CONSTANT i32 2
- ; CHECK: %cmp:_(s1) = G_ICMP intpred(sle), [[COPY]](s32), %one
- ; CHECK: G_BRCOND %cmp(s1), %bb.2
- ; CHECK: G_BR %bb.1
- ; CHECK: bb.1:
- ; CHECK: successors: %bb.3(0x80000000)
- ; CHECK: %cst32_4:_(s32) = G_CONSTANT i32 4
- ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT %cst32_4(s32)
- ; CHECK: G_BR %bb.3
- ; CHECK: bb.2:
- ; CHECK: successors: %bb.3(0x80000000)
- ; CHECK: %cst32_10:_(s32) = G_CONSTANT i32 10
- ; CHECK: [[ZEXT1:%[0-9]+]]:_(s64) = G_ZEXT %cst32_10(s32)
- ; CHECK: bb.3:
- ; CHECK: %ext:_(s64) = G_PHI [[ZEXT]](s64), %bb.1, [[ZEXT1]](s64), %bb.2
- ; CHECK: $x0 = COPY %ext(s64)
- ; CHECK: RET_ReallyLR implicit $x0
+ ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; CHECK-NEXT: liveins: $w0, $w1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+ ; CHECK-NEXT: %one:_(s32) = G_CONSTANT i32 2
+ ; CHECK-NEXT: %cmp:_(s1) = G_ICMP intpred(sle), [[COPY]](s32), %one
+ ; CHECK-NEXT: G_BRCOND %cmp(s1), %bb.2
+ ; CHECK-NEXT: G_BR %bb.1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.3(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+ ; CHECK-NEXT: G_BR %bb.3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: successors: %bb.3(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 10
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.3:
+ ; CHECK-NEXT: %ext:_(s64) = G_PHI [[C]](s64), %bb.1, [[C1]](s64), %bb.2
+ ; CHECK-NEXT: $x0 = COPY %ext(s64)
+ ; CHECK-NEXT: RET_ReallyLR implicit $x0
bb.1.entry:
liveins: $w0, $w1
@@ -114,24 +122,30 @@ tracksRegLiveness: true
body: |
; CHECK-LABEL: name: sext_load_through_phi_vector
; CHECK: bb.0.entry:
- ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000)
- ; CHECK: liveins: $x0, $q0, $q1
- ; CHECK: %ptr:_(p0) = COPY $x0
- ; CHECK: %cmp:_(s1) = G_IMPLICIT_DEF
- ; CHECK: G_BRCOND %cmp(s1), %bb.2
- ; CHECK: G_BR %bb.1
- ; CHECK: bb.1:
- ; CHECK: successors: %bb.3(0x80000000)
- ; CHECK: %ld1:_(<4 x s32>) = G_LOAD %ptr(p0) :: (load (<4 x s32>))
- ; CHECK: G_BR %bb.3
- ; CHECK: bb.2:
- ; CHECK: successors: %bb.3(0x80000000)
- ; CHECK: %ld2:_(<4 x s32>) = G_LOAD %ptr(p0) :: (load (<4 x s32>))
- ; CHECK: bb.3:
- ; CHECK: %phi:_(<4 x s32>) = G_PHI %ld1(<4 x s32>), %bb.1, %ld2(<4 x s32>), %bb.2
- ; CHECK: %ext:_(<4 x s64>) = G_SEXT %phi(<4 x s32>)
- ; CHECK: G_STORE %ext(<4 x s64>), %ptr(p0) :: (store (<4 x s64>))
- ; CHECK: RET_ReallyLR
+ ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; CHECK-NEXT: liveins: $x0, $q0, $q1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %ptr:_(p0) = COPY $x0
+ ; CHECK-NEXT: %cmp:_(s1) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: G_BRCOND %cmp(s1), %bb.2
+ ; CHECK-NEXT: G_BR %bb.1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.3(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %ld1:_(<4 x s32>) = G_LOAD %ptr(p0) :: (load (<4 x s32>))
+ ; CHECK-NEXT: G_BR %bb.3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: successors: %bb.3(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %ld2:_(<4 x s32>) = G_LOAD %ptr(p0) :: (load (<4 x s32>))
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.3:
+ ; CHECK-NEXT: %phi:_(<4 x s32>) = G_PHI %ld1(<4 x s32>), %bb.1, %ld2(<4 x s32>), %bb.2
+ ; CHECK-NEXT: %ext:_(<4 x s64>) = G_SEXT %phi(<4 x s32>)
+ ; CHECK-NEXT: G_STORE %ext(<4 x s64>), %ptr(p0) :: (store (<4 x s64>))
+ ; CHECK-NEXT: RET_ReallyLR
bb.1.entry:
liveins: $x0, $q0, $q1
@@ -166,27 +180,33 @@ tracksRegLiveness: true
body: |
; CHECK-LABEL: name: sext_icst_through_phi_used_by_ptradd
; CHECK: bb.0.entry:
- ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000)
- ; CHECK: liveins: $w0, $w1, $x2
- ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
- ; CHECK: %base:_(p0) = COPY $x2
- ; CHECK: %one:_(s32) = G_CONSTANT i32 2
- ; CHECK: %cmp:_(s1) = G_ICMP intpred(sle), [[COPY]](s32), %one
- ; CHECK: G_BRCOND %cmp(s1), %bb.2
- ; CHECK: G_BR %bb.1
- ; CHECK: bb.1:
- ; CHECK: successors: %bb.3(0x80000000)
- ; CHECK: %cst32_4:_(s32) = G_CONSTANT i32 4
- ; CHECK: G_BR %bb.3
- ; CHECK: bb.2:
- ; CHECK: successors: %bb.3(0x80000000)
- ; CHECK: %cst32_10:_(s32) = G_CONSTANT i32 10
- ; CHECK: bb.3:
- ; CHECK: %phi:_(s32) = G_PHI %cst32_4(s32), %bb.1, %cst32_10(s32), %bb.2
- ; CHECK: %ext:_(s64) = G_SEXT %phi(s32)
- ; CHECK: %ptr:_(p0) = G_PTR_ADD %base, %ext(s64)
- ; CHECK: $x0 = COPY %ptr(p0)
- ; CHECK: RET_ReallyLR implicit $x0
+ ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; CHECK-NEXT: liveins: $w0, $w1, $x2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+ ; CHECK-NEXT: %base:_(p0) = COPY $x2
+ ; CHECK-NEXT: %one:_(s32) = G_CONSTANT i32 2
+ ; CHECK-NEXT: %cmp:_(s1) = G_ICMP intpred(sle), [[COPY]](s32), %one
+ ; CHECK-NEXT: G_BRCOND %cmp(s1), %bb.2
+ ; CHECK-NEXT: G_BR %bb.1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.3(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %cst32_4:_(s32) = G_CONSTANT i32 4
+ ; CHECK-NEXT: G_BR %bb.3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: successors: %bb.3(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %cst32_10:_(s32) = G_CONSTANT i32 10
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.3:
+ ; CHECK-NEXT: %phi:_(s32) = G_PHI %cst32_4(s32), %bb.1, %cst32_10(s32), %bb.2
+ ; CHECK-NEXT: %ext:_(s64) = G_SEXT %phi(s32)
+ ; CHECK-NEXT: %ptr:_(p0) = G_PTR_ADD %base, %ext(s64)
+ ; CHECK-NEXT: $x0 = COPY %ptr(p0)
+ ; CHECK-NEXT: RET_ReallyLR implicit $x0
bb.1.entry:
liveins: $w0, $w1, $x2
@@ -223,29 +243,33 @@ tracksRegLiveness: true
body: |
; CHECK-LABEL: name: sext_icst_through_phi_used_by_ptradd_multiuse
; CHECK: bb.0.entry:
- ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000)
- ; CHECK: liveins: $w0, $w1, $x2
- ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
- ; CHECK: %base:_(p0) = COPY $x2
- ; CHECK: %one:_(s32) = G_CONSTANT i32 2
- ; CHECK: %cmp:_(s1) = G_ICMP intpred(sle), [[COPY]](s32), %one
- ; CHECK: G_BRCOND %cmp(s1), %bb.2
- ; CHECK: G_BR %bb.1
- ; CHECK: bb.1:
- ; CHECK: successors: %bb.3(0x80000000)
- ; CHECK: %cst32_4:_(s32) = G_CONSTANT i32 4
- ; CHECK: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT %cst32_4(s32)
- ; CHECK: G_BR %bb.3
- ; CHECK: bb.2:
- ; CHECK: successors: %bb.3(0x80000000)
- ; CHECK: %cst32_10:_(s32) = G_CONSTANT i32 10
- ; CHECK: [[SEXT1:%[0-9]+]]:_(s64) = G_SEXT %cst32_10(s32)
- ; CHECK: bb.3:
- ; CHECK: %ext:_(s64) = G_PHI [[SEXT]](s64), %bb.1, [[SEXT1]](s64), %bb.2
- ; CHECK: %ptr:_(p0) = G_PTR_ADD %base, %ext(s64)
- ; CHECK: $x0 = COPY %ptr(p0)
- ; CHECK: $x1 = COPY %ext(s64)
- ; CHECK: RET_ReallyLR implicit $x0
+ ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; CHECK-NEXT: liveins: $w0, $w1, $x2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+ ; CHECK-NEXT: %base:_(p0) = COPY $x2
+ ; CHECK-NEXT: %one:_(s32) = G_CONSTANT i32 2
+ ; CHECK-NEXT: %cmp:_(s1) = G_ICMP intpred(sle), [[COPY]](s32), %one
+ ; CHECK-NEXT: G_BRCOND %cmp(s1), %bb.2
+ ; CHECK-NEXT: G_BR %bb.1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.3(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+ ; CHECK-NEXT: G_BR %bb.3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: successors: %bb.3(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 10
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.3:
+ ; CHECK-NEXT: %ext:_(s64) = G_PHI [[C]](s64), %bb.1, [[C1]](s64), %bb.2
+ ; CHECK-NEXT: %ptr:_(p0) = G_PTR_ADD %base, %ext(s64)
+ ; CHECK-NEXT: $x0 = COPY %ptr(p0)
+ ; CHECK-NEXT: $x1 = COPY %ext(s64)
+ ; CHECK-NEXT: RET_ReallyLR implicit $x0
bb.1.entry:
liveins: $w0, $w1, $x2
@@ -283,31 +307,39 @@ tracksRegLiveness: true
body: |
; CHECK-LABEL: name: zext_icst_through_phi_too_many_incoming
; CHECK: bb.0.entry:
- ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000)
- ; CHECK: liveins: $w0, $w1
- ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
- ; CHECK: %one:_(s32) = G_CONSTANT i32 2
- ; CHECK: %cmp:_(s1) = G_ICMP intpred(sle), [[COPY]](s32), %one
- ; CHECK: G_BRCOND %cmp(s1), %bb.2
- ; CHECK: G_BR %bb.1
- ; CHECK: bb.1:
- ; CHECK: successors: %bb.3(0x40000000), %bb.4(0x40000000)
- ; CHECK: %cst32_4:_(s32) = G_CONSTANT i32 4
- ; CHECK: %cond:_(s1) = G_IMPLICIT_DEF
- ; CHECK: G_BRCOND %cond(s1), %bb.3
- ; CHECK: G_BR %bb.4
- ; CHECK: bb.2:
- ; CHECK: successors: %bb.4(0x80000000)
- ; CHECK: %cst32_10:_(s32) = G_CONSTANT i32 10
- ; CHECK: G_BR %bb.4
- ; CHECK: bb.3:
- ; CHECK: successors: %bb.4(0x80000000)
- ; CHECK: %cst32_42:_(s32) = G_CONSTANT i32 42
- ; CHECK: bb.4:
- ; CHECK: %phi:_(s32) = G_PHI %cst32_4(s32), %bb.1, %cst32_10(s32), %bb.2, %cst32_42(s32), %bb.3
- ; CHECK: %ext:_(s64) = G_ZEXT %phi(s32)
- ; CHECK: $x0 = COPY %ext(s64)
- ; CHECK: RET_ReallyLR implicit $x0
+ ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; CHECK-NEXT: liveins: $w0, $w1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+ ; CHECK-NEXT: %one:_(s32) = G_CONSTANT i32 2
+ ; CHECK-NEXT: %cmp:_(s1) = G_ICMP intpred(sle), [[COPY]](s32), %one
+ ; CHECK-NEXT: G_BRCOND %cmp(s1), %bb.2
+ ; CHECK-NEXT: G_BR %bb.1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.4(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %cst32_4:_(s32) = G_CONSTANT i32 4
+ ; CHECK-NEXT: %cond:_(s1) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: G_BRCOND %cond(s1), %bb.3
+ ; CHECK-NEXT: G_BR %bb.4
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: successors: %bb.4(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %cst32_10:_(s32) = G_CONSTANT i32 10
+ ; CHECK-NEXT: G_BR %bb.4
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.3:
+ ; CHECK-NEXT: successors: %bb.4(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %cst32_42:_(s32) = G_CONSTANT i32 42
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.4:
+ ; CHECK-NEXT: %phi:_(s32) = G_PHI %cst32_4(s32), %bb.1, %cst32_10(s32), %bb.2, %cst32_42(s32), %bb.3
+ ; CHECK-NEXT: %ext:_(s64) = G_ZEXT %phi(s32)
+ ; CHECK-NEXT: $x0 = COPY %ext(s64)
+ ; CHECK-NEXT: RET_ReallyLR implicit $x0
bb.1.entry:
liveins: $w0, $w1
@@ -347,26 +379,32 @@ tracksRegLiveness: true
body: |
; CHECK-LABEL: name: sext_add_through_phi
; CHECK: bb.0.entry:
- ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000)
- ; CHECK: liveins: $w0, $w1
- ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
- ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
- ; CHECK: %one:_(s32) = G_CONSTANT i32 2
- ; CHECK: %cmp:_(s1) = G_ICMP intpred(sle), [[COPY]](s32), %one
- ; CHECK: G_BRCOND %cmp(s1), %bb.2
- ; CHECK: G_BR %bb.1
- ; CHECK: bb.1:
- ; CHECK: successors: %bb.3(0x80000000)
- ; CHECK: %add:_(s32) = G_ADD [[COPY]], [[COPY1]]
- ; CHECK: G_BR %bb.3
- ; CHECK: bb.2:
- ; CHECK: successors: %bb.3(0x80000000)
- ; CHECK: %cst32_10:_(s32) = G_CONSTANT i32 10
- ; CHECK: bb.3:
- ; CHECK: %phi:_(s32) = G_PHI %add(s32), %bb.1, %cst32_10(s32), %bb.2
- ; CHECK: %ext:_(s64) = G_SEXT %phi(s32)
- ; CHECK: $x0 = COPY %ext(s64)
- ; CHECK: RET_ReallyLR implicit $x0
+ ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; CHECK-NEXT: liveins: $w0, $w1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
+ ; CHECK-NEXT: %one:_(s32) = G_CONSTANT i32 2
+ ; CHECK-NEXT: %cmp:_(s1) = G_ICMP intpred(sle), [[COPY]](s32), %one
+ ; CHECK-NEXT: G_BRCOND %cmp(s1), %bb.2
+ ; CHECK-NEXT: G_BR %bb.1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.3(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %add:_(s32) = G_ADD [[COPY]], [[COPY1]]
+ ; CHECK-NEXT: G_BR %bb.3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: successors: %bb.3(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %cst32_10:_(s32) = G_CONSTANT i32 10
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.3:
+ ; CHECK-NEXT: %phi:_(s32) = G_PHI %add(s32), %bb.1, %cst32_10(s32), %bb.2
+ ; CHECK-NEXT: %ext:_(s64) = G_SEXT %phi(s32)
+ ; CHECK-NEXT: $x0 = COPY %ext(s64)
+ ; CHECK-NEXT: RET_ReallyLR implicit $x0
bb.1.entry:
liveins: $w0, $w1
@@ -400,27 +438,32 @@ tracksRegLiveness: true
body: |
; CHECK-LABEL: name: anyext_add_through_phi
; CHECK: bb.0.entry:
- ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000)
- ; CHECK: liveins: $w0, $w1
- ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
- ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
- ; CHECK: %one:_(s32) = G_CONSTANT i32 2
- ; CHECK: %cmp:_(s1) = G_ICMP intpred(sle), [[COPY]](s32), %one
- ; CHECK: G_BRCOND %cmp(s1), %bb.2
- ; CHECK: G_BR %bb.1
- ; CHECK: bb.1:
- ; CHECK: successors: %bb.3(0x80000000)
- ; CHECK: %add:_(s32) = G_ADD [[COPY]], [[COPY1]]
- ; CHECK: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT %add(s32)
- ; CHECK: G_BR %bb.3
- ; CHECK: bb.2:
- ; CHECK: successors: %bb.3(0x80000000)
- ; CHECK: %cst32_10:_(s32) = G_CONSTANT i32 10
- ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT %cst32_10(s32)
- ; CHECK: bb.3:
- ; CHECK: %ext:_(s64) = G_PHI [[ANYEXT]](s64), %bb.1, [[ANYEXT1]](s64), %bb.2
- ; CHECK: $x0 = COPY %ext(s64)
- ; CHECK: RET_ReallyLR implicit $x0
+ ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; CHECK-NEXT: liveins: $w0, $w1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
+ ; CHECK-NEXT: %one:_(s32) = G_CONSTANT i32 2
+ ; CHECK-NEXT: %cmp:_(s1) = G_ICMP intpred(sle), [[COPY]](s32), %one
+ ; CHECK-NEXT: G_BRCOND %cmp(s1), %bb.2
+ ; CHECK-NEXT: G_BR %bb.1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.3(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %add:_(s32) = G_ADD [[COPY]], [[COPY1]]
+ ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT %add(s32)
+ ; CHECK-NEXT: G_BR %bb.3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: successors: %bb.3(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 10
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.3:
+ ; CHECK-NEXT: %ext:_(s64) = G_PHI [[ANYEXT]](s64), %bb.1, [[C]](s64), %bb.2
+ ; CHECK-NEXT: $x0 = COPY %ext(s64)
+ ; CHECK-NEXT: RET_ReallyLR implicit $x0
bb.1.entry:
liveins: $w0, $w1
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-ptradd-chain.mir b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-ptradd-chain.mir
index 2f06b24770939c..8d847d80c4e840 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-ptradd-chain.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-ptradd-chain.mir
@@ -118,8 +118,8 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1
- ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[COPY1]](s64)
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+ ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[COPY1]](s64)
; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD]], [[C]](s64)
; CHECK-NEXT: %ld:_(s64) = G_LOAD [[PTR_ADD1]](p0) :: (load (s64))
; CHECK-NEXT: $x0 = COPY %ld(s64)
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-trivial-arith.mir b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-trivial-arith.mir
index 85538dc1990da4..c75a4cd5ca30b0 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-trivial-arith.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-trivial-arith.mir
@@ -308,11 +308,8 @@ body: |
; CHECK-LABEL: name: look_through_zext
; CHECK: liveins: $x0
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: %zero:_(s8) = G_CONSTANT i8 0
- ; CHECK-NEXT: %zext_zero:_(s64) = G_ZEXT %zero(s8)
- ; CHECK-NEXT: %c:_(s64) = G_CONSTANT i64 72340172838076673
- ; CHECK-NEXT: %mul:_(s64) = G_MUL %zext_zero, %c
- ; CHECK-NEXT: $x0 = COPY %mul(s64)
+ ; CHECK-NEXT: %zext_zero:_(s64) = G_CONSTANT i64 0
+ ; CHECK-NEXT: $x0 = COPY %zext_zero(s64)
; CHECK-NEXT: RET_ReallyLR implicit $x0
%zero:_(s8) = G_CONSTANT i8 0
%zext_zero:_(s64) = G_ZEXT %zero(s8)
@@ -384,7 +381,9 @@ body: |
bb.1:
liveins: $q0
; CHECK-LABEL: name: lshr_of_vec_zero
- ; CHECK: [[COPY:%[0-9]+]]:_(<8 x s16>) = COPY $q0
+ ; CHECK: liveins: $q0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<8 x s16>) = COPY $q0
; CHECK-NEXT: $q0 = COPY [[COPY]](<8 x s16>)
; CHECK-NEXT: RET_ReallyLR implicit $q0
%0:_(<8 x s16>) = COPY $q0
@@ -400,7 +399,9 @@ body: |
bb.1:
liveins: $q0
; CHECK-LABEL: name: ptradd_of_vec_zero
- ; CHECK: [[COPY:%[0-9]+]]:_(<2 x p0>) = COPY $q0
+ ; CHECK: liveins: $q0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x p0>) = COPY $q0
; CHECK-NEXT: $q0 = COPY [[COPY]](<2 x p0>)
; CHECK-NEXT: RET_ReallyLR implicit $q0
%0:_(<2 x p0>) = COPY $q0
@@ -419,7 +420,9 @@ body: |
liveins: $x0
; CHECK-LABEL: name: i128_or_cst
- ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+ ; CHECK: liveins: $x0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[COPY]](p0) :: (load (s128))
; CHECK-NEXT: [[C:%[0-9]+]]:_(s128) = G_CONSTANT i128 9223372036854775808
; CHECK-NEXT: [[OR:%[0-9]+]]:_(s128) = G_OR [[LOAD]], [[C]]
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/uaddo-8-16-bits.mir b/llvm/test/CodeGen/AArch64/GlobalISel/uaddo-8-16-bits.mir
index 95f9d962780ff1..0ab11b3ac558f8 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/uaddo-8-16-bits.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/uaddo-8-16-bits.mir
@@ -310,7 +310,7 @@ body: |
; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ADD]], [[C]]
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[AND]](s32), [[C1]]
- ; CHECK-NEXT: DBG_VALUE {{%[0-9]+}}:_(s16)
+ ; CHECK-NEXT: DBG_VALUE %6:_(s16)
; CHECK-NEXT: G_BRCOND [[ICMP]](s1), %bb.2
; CHECK-NEXT: G_BR %bb.1
; CHECK-NEXT: {{ $}}
@@ -438,23 +438,22 @@ body: |
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
; CHECK-NEXT: [[ASSERT_ZEXT1:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[COPY1]], 16
; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[ASSERT_ZEXT1]](s32)
- ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 10
- ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[C]](s16)
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
; CHECK-NEXT: [[UADDO:%[0-9]+]]:_(s16), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[TRUNC]], [[TRUNC1]]
- ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UADDO]](s16)
+ ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UADDO]](s16)
; CHECK-NEXT: G_BRCOND [[UADDO1]](s1), %bb.2
; CHECK-NEXT: G_BR %bb.1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
- ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(s32) = G_PHI [[ANYEXT]](s32), %bb.2, [[ANYEXT1]](s32), %bb.0
+ ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.2, [[ANYEXT]](s32), %bb.0
; CHECK-NEXT: $w0 = COPY [[PHI]](s32)
; CHECK-NEXT: RET_ReallyLR implicit $w0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
; CHECK-NEXT: successors: %bb.1(0x80000000)
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UADDO]](s16)
- ; CHECK-NEXT: $w0 = COPY [[ANYEXT2]](s32)
+ ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UADDO]](s16)
+ ; CHECK-NEXT: $w0 = COPY [[ANYEXT1]](s32)
; CHECK-NEXT: G_BR %bb.1
bb.1:
liveins: $w0, $w1
diff --git a/llvm/test/CodeGen/AArch64/bool-ext-inc.ll b/llvm/test/CodeGen/AArch64/bool-ext-inc.ll
index 850ce200f5ad9b..8e6a70986c302f 100644
--- a/llvm/test/CodeGen/AArch64/bool-ext-inc.ll
+++ b/llvm/test/CodeGen/AArch64/bool-ext-inc.ll
@@ -117,8 +117,7 @@ define i32 @caller_signext_i1() {
; GISEL-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
; GISEL-NEXT: .cfi_def_cfa_offset 16
; GISEL-NEXT: .cfi_offset w30, -16
-; GISEL-NEXT: mov w8, #1 // =0x1
-; GISEL-NEXT: sbfx w0, w8, #0, #1
+; GISEL-NEXT: mov w0, #-1 // =0xffffffff
; GISEL-NEXT: bl callee_signext_i1
; GISEL-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
; GISEL-NEXT: ret
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-shift-imm-chain-illegal-types.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-shift-imm-chain-illegal-types.mir
index 01fafd76b9116f..5bea4a7db6ca7b 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-shift-imm-chain-illegal-types.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-shift-imm-chain-illegal-types.mir
@@ -82,11 +82,9 @@ body: |
; CHECK-LABEL: name: test_lshr_i44
; CHECK: liveins: $sgpr0, $sgpr1
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[C:%[0-9]+]]:_(s44) = G_CONSTANT i44 0
- ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s44)
- ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ANYEXT]](s64)
- ; CHECK-NEXT: $sgpr0 = COPY [[UV]](s32)
- ; CHECK-NEXT: $sgpr1 = COPY [[UV1]](s32)
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; CHECK-NEXT: $sgpr0 = COPY [[C]](s32)
+ ; CHECK-NEXT: $sgpr1 = COPY [[C]](s32)
; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1
%1:_(s32) = COPY $sgpr0
%2:_(s32) = COPY $sgpr1
@@ -148,11 +146,9 @@ body: |
; CHECK-LABEL: name: test_shl_i44
; CHECK: liveins: $vgpr0, $vgpr1
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[C:%[0-9]+]]:_(s44) = G_CONSTANT i44 0
- ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s44)
- ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ANYEXT]](s64)
- ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32)
- ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32)
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; CHECK-NEXT: $vgpr0 = COPY [[C]](s32)
+ ; CHECK-NEXT: $vgpr1 = COPY [[C]](s32)
; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
%1:_(s32) = COPY $vgpr0
%2:_(s32) = COPY $vgpr1
@@ -222,10 +218,10 @@ body: |
; CHECK-NEXT: [[SSHLSAT:%[0-9]+]]:_(s44) = G_SSHLSAT [[TRUNC]], [[C]](s44)
; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[SSHLSAT]](s44)
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ANYEXT]](s64)
- ; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV]](s32)
- ; CHECK-NEXT: $sgpr0 = COPY [[INT]](s32)
- ; CHECK-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV1]](s32)
- ; CHECK-NEXT: $sgpr1 = COPY [[INT1]](s32)
+ ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV]](s32)
+ ; CHECK-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32)
+ ; CHECK-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV1]](s32)
+ ; CHECK-NEXT: $sgpr1 = COPY [[INTRINSIC_CONVERGENT1]](s32)
; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1
%1:_(s32) = COPY $sgpr0
%2:_(s32) = COPY $sgpr1
@@ -261,10 +257,10 @@ body: |
; CHECK-NEXT: [[SSHLSAT:%[0-9]+]]:_(s55) = G_SSHLSAT [[TRUNC]], [[C]](s55)
; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[SSHLSAT]](s55)
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ANYEXT]](s64)
- ; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV]](s32)
- ; CHECK-NEXT: $vgpr0 = COPY [[INT]](s32)
- ; CHECK-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV1]](s32)
- ; CHECK-NEXT: $vgpr1 = COPY [[INT1]](s32)
+ ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV]](s32)
+ ; CHECK-NEXT: $vgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32)
+ ; CHECK-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV1]](s32)
+ ; CHECK-NEXT: $vgpr1 = COPY [[INTRINSIC_CONVERGENT1]](s32)
; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
%1:_(s32) = COPY $vgpr0
%2:_(s32) = COPY $vgpr1
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-shl-from-extend-narrow.postlegal.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-shl-from-extend-narrow.postlegal.mir
index 2851e8133427a7..6a291510fe66c1 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-shl-from-extend-narrow.postlegal.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-shl-from-extend-narrow.postlegal.mir
@@ -19,6 +19,7 @@ body: |
; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C]](s32)
; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[SHL]](s64)
+ ;
; GFX9-LABEL: name: shl_s64_by_2_from_anyext_s32
; GFX9: liveins: $vgpr0
; GFX9-NEXT: {{ $}}
@@ -51,6 +52,7 @@ body: |
; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[SEXT]], [[C]](s32)
; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[SHL]](s64)
+ ;
; GFX9-LABEL: name: shl_s64_by_2_from_sext_s32
; GFX9: liveins: $vgpr0
; GFX9-NEXT: {{ $}}
@@ -83,6 +85,7 @@ body: |
; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[ZEXT]], [[C]](s32)
; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[SHL]](s64)
+ ;
; GFX9-LABEL: name: shl_s64_by_2_from_zext_s32
; GFX9: liveins: $vgpr0
; GFX9-NEXT: {{ $}}
@@ -116,6 +119,7 @@ body: |
; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL %masked, [[C]](s32)
; GFX6-NEXT: %shl:_(s64) = G_ZEXT [[SHL]](s32)
; GFX6-NEXT: $vgpr0_vgpr1 = COPY %shl(s64)
+ ;
; GFX9-LABEL: name: narrow_shl_s64_by_2_from_anyext_s32
; GFX9: liveins: $vgpr0
; GFX9-NEXT: {{ $}}
@@ -153,6 +157,7 @@ body: |
; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL %masked, [[C]](s32)
; GFX6-NEXT: %shl:_(s64) = G_ZEXT [[SHL]](s32)
; GFX6-NEXT: $vgpr0_vgpr1 = COPY %shl(s64)
+ ;
; GFX9-LABEL: name: narrow_shl_s64_by_2_from_zext_s32
; GFX9: liveins: $vgpr0
; GFX9-NEXT: {{ $}}
@@ -190,6 +195,7 @@ body: |
; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL %masked, [[C]](s32)
; GFX6-NEXT: %shl:_(s64) = G_ZEXT [[SHL]](s32)
; GFX6-NEXT: $vgpr0_vgpr1 = COPY %shl(s64)
+ ;
; GFX9-LABEL: name: narrow_shl_s64_by_2_from_sext_s32
; GFX9: liveins: $vgpr0
; GFX9-NEXT: {{ $}}
@@ -227,6 +233,7 @@ body: |
; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL %masked, [[C]](s32)
; GFX6-NEXT: %shl:_(s64) = G_ZEXT [[SHL]](s32)
; GFX6-NEXT: $vgpr0_vgpr1 = COPY %shl(s64)
+ ;
; GFX9-LABEL: name: narrow_shl_s64_by_2_from_zext_s32_lookthrough_amount
; GFX9: liveins: $vgpr0
; GFX9-NEXT: {{ $}}
@@ -267,6 +274,7 @@ body: |
; GFX6-NEXT: %shiftamt:_(s32) = G_CONSTANT i32 2
; GFX6-NEXT: %shl:_(s32) = G_SHL %extend, %shiftamt(s32)
; GFX6-NEXT: $vgpr0 = COPY %shl(s32)
+ ;
; GFX9-LABEL: name: narrow_shl_s32_by_2_from_zext_s16
; GFX9: liveins: $vgpr0
; GFX9-NEXT: {{ $}}
@@ -307,6 +315,7 @@ body: |
; GFX6-NEXT: %shiftamt:_(s32) = G_CONSTANT i32 2
; GFX6-NEXT: %shl:_(s64) = G_SHL %extend, %shiftamt(s32)
; GFX6-NEXT: $vgpr0_vgpr1 = COPY %shl(s64)
+ ;
; GFX9-LABEL: name: narrow_shl_s64_by_2_from_zext_s16
; GFX9: liveins: $vgpr0
; GFX9-NEXT: {{ $}}
@@ -339,18 +348,13 @@ body: |
; GFX6-LABEL: name: do_not_shl_s32_zero_by_16_from_zext_s16
; GFX6: liveins: $vgpr0
; GFX6-NEXT: {{ $}}
- ; GFX6-NEXT: %zero:_(s16) = G_CONSTANT i16 0
- ; GFX6-NEXT: %extend:_(s32) = G_ZEXT %zero(s16)
- ; GFX6-NEXT: %shiftamt:_(s16) = G_CONSTANT i16 16
- ; GFX6-NEXT: %shl:_(s32) = G_SHL %extend, %shiftamt(s16)
+ ; GFX6-NEXT: %shl:_(s32) = G_CONSTANT i32 0
; GFX6-NEXT: $vgpr0 = COPY %shl(s32)
+ ;
; GFX9-LABEL: name: do_not_shl_s32_zero_by_16_from_zext_s16
; GFX9: liveins: $vgpr0
; GFX9-NEXT: {{ $}}
- ; GFX9-NEXT: %zero:_(s16) = G_CONSTANT i16 0
- ; GFX9-NEXT: %extend:_(s32) = G_ZEXT %zero(s16)
- ; GFX9-NEXT: %shiftamt:_(s16) = G_CONSTANT i16 16
- ; GFX9-NEXT: %shl:_(s32) = G_SHL %extend, %shiftamt(s16)
+ ; GFX9-NEXT: %shl:_(s32) = G_CONSTANT i32 0
; GFX9-NEXT: $vgpr0 = COPY %shl(s32)
%zero:_(s16) = G_CONSTANT i16 0
%extend:_(s32) = G_ZEXT %zero:_(s16)
@@ -377,6 +381,7 @@ body: |
; GFX6-NEXT: %extend:_(<2 x s32>) = G_ZEXT %zerovector(<2 x s16>)
; GFX6-NEXT: %shl:_(<2 x s32>) = G_SHL %extend, %shiftamtvector(<2 x s16>)
; GFX6-NEXT: $vgpr0_vgpr1 = COPY %shl(<2 x s32>)
+ ;
; GFX9-LABEL: name: do_not_shl_v2s32_zero_by_16_from_zext_v2s16
; GFX9: liveins: $vgpr0
; GFX9-NEXT: {{ $}}
@@ -413,6 +418,7 @@ body: |
; GFX6-NEXT: %extend:_(s32) = G_ZEXT %truncate(s16)
; GFX6-NEXT: %shl:_(s32) = G_SHL %extend, %shiftamt(s16)
; GFX6-NEXT: $vgpr0 = COPY %shl(s32)
+ ;
; GFX9-LABEL: name: do_not_shl_s32_by_16_from_zext_s16
; GFX9: liveins: $vgpr0
; GFX9-NEXT: {{ $}}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-shl-from-extend-narrow.prelegal.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-shl-from-extend-narrow.prelegal.mir
index 317c1f29e43f71..6ceb41199af6da 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-shl-from-extend-narrow.prelegal.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-shl-from-extend-narrow.prelegal.mir
@@ -20,6 +20,7 @@ body: |
; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL %masked, [[C]](s16)
; GFX6-NEXT: %shl:_(s32) = G_ZEXT [[SHL]](s16)
; GFX6-NEXT: $vgpr0 = COPY %shl(s32)
+ ;
; GFX9-LABEL: name: narrow_shl_s32_by_2_from_zext_s16
; GFX9: liveins: $vgpr0
; GFX9-NEXT: {{ $}}
@@ -59,6 +60,7 @@ body: |
; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL %masked, [[C]](s16)
; GFX6-NEXT: %shl:_(s64) = G_ZEXT [[SHL]](s16)
; GFX6-NEXT: $vgpr0_vgpr1 = COPY %shl(s64)
+ ;
; GFX9-LABEL: name: narrow_shl_s64_by_2_from_zext_s16
; GFX9: liveins: $vgpr0
; GFX9-NEXT: {{ $}}
@@ -98,6 +100,7 @@ body: |
; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s8) = G_SHL %masked, [[C]](s8)
; GFX6-NEXT: %result:_(s32) = G_ZEXT [[SHL]](s8)
; GFX6-NEXT: $vgpr0 = COPY %result(s32)
+ ;
; GFX9-LABEL: name: narrow_shl_s16_by_2_from_zext_s8
; GFX9: liveins: $vgpr0
; GFX9-NEXT: {{ $}}
@@ -139,6 +142,7 @@ body: |
; GFX6-NEXT: [[SHL:%[0-9]+]]:_(<2 x s16>) = G_SHL %masked, [[BUILD_VECTOR]](<2 x s16>)
; GFX6-NEXT: %shl:_(<2 x s32>) = G_ZEXT [[SHL]](<2 x s16>)
; GFX6-NEXT: $vgpr0_vgpr1 = COPY %shl(<2 x s32>)
+ ;
; GFX9-LABEL: name: narrow_shl_v2s32_by_2_from_zext_v2s16
; GFX9: liveins: $vgpr0
; GFX9-NEXT: {{ $}}
@@ -181,6 +185,7 @@ body: |
; GFX6-NEXT: [[SHL:%[0-9]+]]:_(<2 x s32>) = G_SHL %masked, %shiftamtvec(<2 x s32>)
; GFX6-NEXT: %shl:_(<2 x s64>) = G_ZEXT [[SHL]](<2 x s32>)
; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %shl(<2 x s64>)
+ ;
; GFX9-LABEL: name: narrow_shl_v2s64_by_2_from_anyext_v2s32
; GFX9: liveins: $vgpr0_vgpr1
; GFX9-NEXT: {{ $}}
@@ -215,19 +220,14 @@ body: |
; GFX6-LABEL: name: do_not_shl_s32_zero_by_16_from_zext_s16
; GFX6: liveins: $vgpr0
; GFX6-NEXT: {{ $}}
- ; GFX6-NEXT: %zero:_(s16) = G_CONSTANT i16 0
- ; GFX6-NEXT: %extend:_(s32) = G_ZEXT %zero(s16)
- ; GFX6-NEXT: %shiftamt:_(s16) = G_CONSTANT i16 16
- ; GFX6-NEXT: %shl:_(s32) = G_SHL %extend, %shiftamt(s16)
- ; GFX6-NEXT: $vgpr0 = COPY %shl(s32)
+ ; GFX6-NEXT: %extend:_(s32) = G_CONSTANT i32 0
+ ; GFX6-NEXT: $vgpr0 = COPY %extend(s32)
+ ;
; GFX9-LABEL: name: do_not_shl_s32_zero_by_16_from_zext_s16
; GFX9: liveins: $vgpr0
; GFX9-NEXT: {{ $}}
- ; GFX9-NEXT: %zero:_(s16) = G_CONSTANT i16 0
- ; GFX9-NEXT: %extend:_(s32) = G_ZEXT %zero(s16)
- ; GFX9-NEXT: %shiftamt:_(s16) = G_CONSTANT i16 16
- ; GFX9-NEXT: %shl:_(s32) = G_SHL %extend, %shiftamt(s16)
- ; GFX9-NEXT: $vgpr0 = COPY %shl(s32)
+ ; GFX9-NEXT: %extend:_(s32) = G_CONSTANT i32 0
+ ; GFX9-NEXT: $vgpr0 = COPY %extend(s32)
%zero:_(s16) = G_CONSTANT i16 0
%extend:_(s32) = G_ZEXT %zero:_(s16)
%shiftamt:_(s16) = G_CONSTANT i16 16
@@ -253,6 +253,7 @@ body: |
; GFX6-NEXT: %extend:_(<2 x s32>) = G_ZEXT %zerovector(<2 x s16>)
; GFX6-NEXT: %shl:_(<2 x s32>) = G_SHL %extend, %shiftamtvector(<2 x s16>)
; GFX6-NEXT: $vgpr0_vgpr1 = COPY %shl(<2 x s32>)
+ ;
; GFX9-LABEL: name: do_not_shl_v2s32_zero_by_16_from_zext_v2s16
; GFX9: liveins: $vgpr0, $vgpr1
; GFX9-NEXT: {{ $}}
@@ -289,6 +290,7 @@ body: |
; GFX6-NEXT: %extend:_(s32) = G_ZEXT %truncate(s16)
; GFX6-NEXT: %shl:_(s32) = G_SHL %extend, %shiftamt(s16)
; GFX6-NEXT: $vgpr0 = COPY %shl(s32)
+ ;
; GFX9-LABEL: name: do_not_shl_s32_by_16_from_zext_s16
; GFX9: liveins: $vgpr0
; GFX9-NEXT: {{ $}}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizercombiner-ashr.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizercombiner-ashr.mir
index 2b10ee54eeebde..61162db8e36229 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizercombiner-ashr.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizercombiner-ashr.mir
@@ -26,8 +26,7 @@ tracksRegLiveness: true
body: |
bb.0:
; CHECK-LABEL: name: constant_fold_ashr_s16_s16
- ; CHECK: %shift:_(s16) = G_CONSTANT i16 -772
- ; CHECK-NEXT: %ext:_(s32) = G_ANYEXT %shift(s16)
+ ; CHECK: %ext:_(s32) = G_CONSTANT i32 64764
; CHECK-NEXT: $vgpr0 = COPY %ext(s32)
%val:_(s16) = G_CONSTANT i16 -12345
%shift_amt:_(s16) = G_CONSTANT i16 4
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizercombiner-lshr.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizercombiner-lshr.mir
index 9e2dece906887b..8830c08cb61504 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizercombiner-lshr.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizercombiner-lshr.mir
@@ -26,8 +26,7 @@ tracksRegLiveness: true
body: |
bb.0:
; CHECK-LABEL: name: constant_fold_lshr_s16_s16
- ; CHECK: %shift:_(s16) = G_CONSTANT i16 3324
- ; CHECK-NEXT: %ext:_(s32) = G_ANYEXT %shift(s16)
+ ; CHECK: %ext:_(s32) = G_CONSTANT i32 3324
; CHECK-NEXT: $vgpr0 = COPY %ext(s32)
%val:_(s16) = G_CONSTANT i16 -12345
%shift_amt:_(s16) = G_CONSTANT i16 4
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizercombiner-shl.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizercombiner-shl.mir
index 6962a53dd331e3..a3389097b0b329 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizercombiner-shl.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizercombiner-shl.mir
@@ -26,8 +26,7 @@ tracksRegLiveness: true
body: |
bb.0:
; CHECK-LABEL: name: constant_fold_shl_s16_s16
- ; CHECK: %shift:_(s16) = G_CONSTANT i16 912
- ; CHECK-NEXT: %ext:_(s32) = G_ANYEXT %shift(s16)
+ ; CHECK: %ext:_(s32) = G_CONSTANT i32 912
; CHECK-NEXT: $vgpr0 = COPY %ext(s32)
%val:_(s16) = G_CONSTANT i16 12345
%shift_amt:_(s16) = G_CONSTANT i16 4
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll
index 7a81bc52e9ebf1..ff294d8378005e 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll
@@ -1094,22 +1094,18 @@ define i64 @v_sdiv_i64_pow2k_denom(i64 %num) {
; CHECK-NEXT: v_sub_i32_e64 v1, s[4:5], v4, v1
; CHECK-NEXT: v_mov_b32_e32 v6, 0x1000
; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
-; CHECK-NEXT: s_bfe_i32 s6, 1, 0x10000
; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v6
; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v6
; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, -1, s[4:5]
-; CHECK-NEXT: v_mov_b32_e32 v4, s6
; CHECK-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v2
; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
-; CHECK-NEXT: v_cndmask_b32_e64 v2, v4, v3, s[4:5]
+; CHECK-NEXT: v_cndmask_b32_e64 v2, -1, v3, s[4:5]
; CHECK-NEXT: v_add_i32_e32 v3, vcc, 1, v8
; CHECK-NEXT: v_addc_u32_e32 v4, vcc, 0, v5, vcc
-; CHECK-NEXT: s_bfe_i32 s4, 1, 0x10000
; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v6
; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc
-; CHECK-NEXT: v_mov_b32_e32 v6, s4
; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
-; CHECK-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc
+; CHECK-NEXT: v_cndmask_b32_e32 v0, -1, v0, vcc
; CHECK-NEXT: v_add_i32_e32 v1, vcc, 1, v3
; CHECK-NEXT: v_addc_u32_e32 v6, vcc, 0, v4, vcc
; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
@@ -1132,177 +1128,173 @@ define <2 x i64> @v_sdiv_v2i64_pow2k_denom(<2 x i64> %num) {
; GISEL: ; %bb.0:
; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT: v_cvt_f32_u32_e32 v4, 0x1000
-; GISEL-NEXT: v_cvt_f32_ubyte0_e32 v6, 0
+; GISEL-NEXT: v_cvt_f32_ubyte0_e32 v8, 0
; GISEL-NEXT: s_sub_u32 s6, 0, 0x1000
; GISEL-NEXT: s_subb_u32 s7, 0, 0
-; GISEL-NEXT: v_mac_f32_e32 v4, 0x4f800000, v6
+; GISEL-NEXT: v_mac_f32_e32 v4, 0x4f800000, v8
; GISEL-NEXT: v_rcp_iflag_f32_e32 v4, v4
; GISEL-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4
; GISEL-NEXT: v_mul_f32_e32 v5, 0x2f800000, v4
-; GISEL-NEXT: v_trunc_f32_e32 v7, v5
-; GISEL-NEXT: v_mac_f32_e32 v4, 0xcf800000, v7
-; GISEL-NEXT: v_cvt_u32_f32_e32 v9, v4
-; GISEL-NEXT: v_cvt_u32_f32_e32 v10, v7
-; GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], s6, v9, 0
-; GISEL-NEXT: v_mad_u64_u32 v[7:8], s[4:5], s6, v10, v[5:6]
-; GISEL-NEXT: v_mul_lo_u32 v5, v10, v4
-; GISEL-NEXT: v_mad_u64_u32 v[7:8], s[4:5], s7, v9, v[7:8]
-; GISEL-NEXT: v_mul_hi_u32 v8, v9, v4
-; GISEL-NEXT: v_mul_hi_u32 v4, v10, v4
-; GISEL-NEXT: v_mul_lo_u32 v11, v9, v7
-; GISEL-NEXT: v_mul_lo_u32 v12, v10, v7
-; GISEL-NEXT: v_mul_hi_u32 v13, v9, v7
-; GISEL-NEXT: v_mul_hi_u32 v7, v10, v7
-; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v11
+; GISEL-NEXT: v_trunc_f32_e32 v6, v5
+; GISEL-NEXT: v_mac_f32_e32 v4, 0xcf800000, v6
+; GISEL-NEXT: v_cvt_u32_f32_e32 v7, v4
+; GISEL-NEXT: v_cvt_u32_f32_e32 v9, v6
+; GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], s6, v7, 0
+; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], s6, v9, v[5:6]
+; GISEL-NEXT: v_mul_hi_u32 v10, v7, v4
+; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], s7, v7, v[5:6]
+; GISEL-NEXT: v_mul_lo_u32 v6, v9, v4
+; GISEL-NEXT: v_mul_hi_u32 v4, v9, v4
+; GISEL-NEXT: v_mul_lo_u32 v11, v7, v5
+; GISEL-NEXT: v_mul_lo_u32 v12, v9, v5
+; GISEL-NEXT: v_mul_hi_u32 v13, v7, v5
+; GISEL-NEXT: v_mul_hi_u32 v5, v9, v5
+; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v11
; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v8
-; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v5, vcc, v11, v5
+; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v10
+; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v6, vcc, v11, v6
; GISEL-NEXT: v_add_i32_e32 v4, vcc, v12, v4
-; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
+; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v13
; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v11
-; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v5
-; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v5, vcc, v8, v5
-; GISEL-NEXT: v_add_i32_e32 v5, vcc, v7, v5
-; GISEL-NEXT: v_add_i32_e32 v11, vcc, v9, v4
-; GISEL-NEXT: v_mad_u64_u32 v[7:8], s[4:5], s6, v11, 0
-; GISEL-NEXT: v_addc_u32_e32 v5, vcc, v10, v5, vcc
-; GISEL-NEXT: v_mov_b32_e32 v4, v8
-; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], s6, v5, v[4:5]
+; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v11
+; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v6
+; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v6, vcc, v10, v6
+; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v6
+; GISEL-NEXT: v_add_i32_e32 v10, vcc, v7, v4
+; GISEL-NEXT: v_addc_u32_e32 v9, vcc, v9, v5, vcc
+; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], s6, v10, 0
+; GISEL-NEXT: v_mov_b32_e32 v4, v6
+; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], s6, v9, v[4:5]
; GISEL-NEXT: v_ashrrev_i32_e32 v4, 31, v1
; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v4
-; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], s7, v11, v[8:9]
+; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], s7, v10, v[6:7]
; GISEL-NEXT: v_addc_u32_e32 v1, vcc, v1, v4, vcc
-; GISEL-NEXT: v_xor_b32_e32 v9, v0, v4
-; GISEL-NEXT: v_mul_lo_u32 v0, v5, v7
-; GISEL-NEXT: v_mul_lo_u32 v10, v11, v8
+; GISEL-NEXT: v_xor_b32_e32 v11, v0, v4
+; GISEL-NEXT: v_mul_lo_u32 v0, v9, v5
+; GISEL-NEXT: v_mul_lo_u32 v7, v10, v6
; GISEL-NEXT: v_xor_b32_e32 v12, v1, v4
-; GISEL-NEXT: v_mul_hi_u32 v1, v11, v7
-; GISEL-NEXT: v_mul_hi_u32 v7, v5, v7
-; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v10
-; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
+; GISEL-NEXT: v_mul_hi_u32 v1, v10, v5
+; GISEL-NEXT: v_mul_hi_u32 v5, v9, v5
+; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v7
+; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v1
; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
-; GISEL-NEXT: v_mul_lo_u32 v1, v5, v8
-; GISEL-NEXT: v_add_i32_e32 v0, vcc, v10, v0
-; GISEL-NEXT: v_mul_hi_u32 v10, v11, v8
+; GISEL-NEXT: v_mul_lo_u32 v1, v9, v6
+; GISEL-NEXT: v_add_i32_e32 v0, vcc, v7, v0
+; GISEL-NEXT: v_mul_hi_u32 v7, v10, v6
+; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v5
+; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v7
; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v10
-; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v10
-; GISEL-NEXT: v_mul_hi_u32 v8, v5, v8
+; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v7
+; GISEL-NEXT: v_mul_hi_u32 v6, v9, v6
; GISEL-NEXT: v_add_i32_e32 v0, vcc, v1, v0
; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v1, vcc, v7, v1
-; GISEL-NEXT: v_add_i32_e32 v1, vcc, v8, v1
-; GISEL-NEXT: v_add_i32_e32 v0, vcc, v11, v0
-; GISEL-NEXT: v_addc_u32_e32 v1, vcc, v5, v1, vcc
-; GISEL-NEXT: v_mul_lo_u32 v7, v12, v0
-; GISEL-NEXT: v_mul_lo_u32 v8, v9, v1
-; GISEL-NEXT: v_mul_hi_u32 v10, v9, v0
+; GISEL-NEXT: v_add_i32_e32 v1, vcc, v5, v1
+; GISEL-NEXT: v_add_i32_e32 v1, vcc, v6, v1
+; GISEL-NEXT: v_add_i32_e32 v0, vcc, v10, v0
+; GISEL-NEXT: v_addc_u32_e32 v1, vcc, v9, v1, vcc
+; GISEL-NEXT: v_mul_lo_u32 v6, v12, v0
+; GISEL-NEXT: v_mul_lo_u32 v7, v11, v1
+; GISEL-NEXT: v_mul_hi_u32 v9, v11, v0
; GISEL-NEXT: v_mul_hi_u32 v0, v12, v0
; GISEL-NEXT: v_mov_b32_e32 v5, 0x1000
-; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v8
-; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v10
+; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v7
; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
-; GISEL-NEXT: v_mul_lo_u32 v10, v12, v1
-; GISEL-NEXT: v_add_i32_e32 v7, vcc, v8, v7
-; GISEL-NEXT: v_mul_hi_u32 v8, v9, v1
-; GISEL-NEXT: v_add_i32_e32 v0, vcc, v10, v0
-; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v8
-; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v8, vcc, v10, v8
-; GISEL-NEXT: v_add_i32_e32 v10, vcc, v0, v7
-; GISEL-NEXT: v_mul_hi_u32 v11, v12, v1
-; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v5, v10, 0
+; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v9
+; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
+; GISEL-NEXT: v_mul_lo_u32 v9, v12, v1
+; GISEL-NEXT: v_add_i32_e32 v6, vcc, v7, v6
+; GISEL-NEXT: v_mul_hi_u32 v7, v11, v1
+; GISEL-NEXT: v_add_i32_e32 v0, vcc, v9, v0
+; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v7
; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v7, vcc, v8, v7
-; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v7
-; GISEL-NEXT: v_mad_u64_u32 v[7:8], s[4:5], v5, v11, v[1:2]
-; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v9, v0
-; GISEL-NEXT: v_mad_u64_u32 v[7:8], s[4:5], 0, v10, v[7:8]
-; GISEL-NEXT: s_bfe_i32 s6, 1, 0x10000
-; GISEL-NEXT: v_mov_b32_e32 v9, s6
-; GISEL-NEXT: v_subb_u32_e64 v1, s[4:5], v12, v7, vcc
-; GISEL-NEXT: v_sub_i32_e64 v7, s[4:5], v12, v7
+; GISEL-NEXT: v_add_i32_e32 v7, vcc, v9, v7
+; GISEL-NEXT: v_add_i32_e32 v9, vcc, v0, v6
+; GISEL-NEXT: v_mul_hi_u32 v10, v12, v1
+; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v5, v9, 0
+; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v6, vcc, v7, v6
+; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v6
+; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v5, v10, v[1:2]
+; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v11, v0
+; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], 0, v9, v[6:7]
+; GISEL-NEXT: s_sub_u32 s6, 0, 0x1000
+; GISEL-NEXT: s_subb_u32 s7, 0, 0
+; GISEL-NEXT: v_subb_u32_e64 v1, s[4:5], v12, v6, vcc
+; GISEL-NEXT: v_sub_i32_e64 v6, s[4:5], v12, v6
; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v5
-; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, -1, s[4:5]
+; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[4:5]
; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v1
-; GISEL-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v7, vcc
+; GISEL-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v6, vcc
+; GISEL-NEXT: v_cvt_f32_u32_e32 v6, 0x1000
; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v5
+; GISEL-NEXT: v_cndmask_b32_e64 v11, -1, v7, s[4:5]
+; GISEL-NEXT: v_mac_f32_e32 v6, 0x4f800000, v8
; GISEL-NEXT: v_subbrev_u32_e32 v7, vcc, 0, v1, vcc
-; GISEL-NEXT: v_cvt_f32_u32_e32 v1, 0x1000
-; GISEL-NEXT: v_cndmask_b32_e64 v8, v9, v8, s[4:5]
-; GISEL-NEXT: v_add_i32_e32 v9, vcc, 1, v10
-; GISEL-NEXT: v_mac_f32_e32 v1, 0x4f800000, v6
-; GISEL-NEXT: v_rcp_iflag_f32_e32 v1, v1
-; GISEL-NEXT: v_addc_u32_e32 v12, vcc, 0, v11, vcc
+; GISEL-NEXT: v_rcp_iflag_f32_e32 v1, v6
+; GISEL-NEXT: v_add_i32_e32 v8, vcc, 1, v9
+; GISEL-NEXT: v_addc_u32_e32 v12, vcc, 0, v10, vcc
+; GISEL-NEXT: v_mul_f32_e32 v1, 0x5f7ffffc, v1
+; GISEL-NEXT: v_mul_f32_e32 v6, 0x2f800000, v1
+; GISEL-NEXT: v_trunc_f32_e32 v6, v6
+; GISEL-NEXT: v_mac_f32_e32 v1, 0xcf800000, v6
+; GISEL-NEXT: v_cvt_u32_f32_e32 v13, v1
; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v5
-; GISEL-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v1
-; GISEL-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0
-; GISEL-NEXT: v_trunc_f32_e32 v13, v1
-; GISEL-NEXT: v_mac_f32_e32 v0, 0xcf800000, v13
-; GISEL-NEXT: v_cvt_u32_f32_e32 v14, v0
-; GISEL-NEXT: s_bfe_i32 s4, 1, 0x10000
-; GISEL-NEXT: s_sub_u32 s6, 0, 0x1000
-; GISEL-NEXT: v_mov_b32_e32 v15, s4
-; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], s6, v14, 0
-; GISEL-NEXT: v_cvt_u32_f32_e32 v13, v13
-; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, -1, vcc
+; GISEL-NEXT: v_cvt_u32_f32_e32 v15, v6
+; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, -1, vcc
+; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], s6, v13, 0
; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v7
-; GISEL-NEXT: v_cndmask_b32_e32 v15, v15, v6, vcc
-; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], s6, v13, v[1:2]
-; GISEL-NEXT: s_subb_u32 s7, 0, 0
-; GISEL-NEXT: v_add_i32_e32 v1, vcc, 1, v9
-; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], s7, v14, v[6:7]
+; GISEL-NEXT: v_cndmask_b32_e32 v14, -1, v14, vcc
+; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], s6, v15, v[1:2]
+; GISEL-NEXT: v_add_i32_e32 v1, vcc, 1, v8
+; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], s7, v13, v[6:7]
; GISEL-NEXT: v_addc_u32_e32 v16, vcc, 0, v12, vcc
-; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v15
-; GISEL-NEXT: v_cndmask_b32_e32 v7, v9, v1, vcc
-; GISEL-NEXT: v_mul_lo_u32 v1, v13, v0
-; GISEL-NEXT: v_mul_lo_u32 v9, v14, v6
-; GISEL-NEXT: v_mul_hi_u32 v15, v14, v0
+; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v14
+; GISEL-NEXT: v_cndmask_b32_e32 v7, v8, v1, vcc
+; GISEL-NEXT: v_mul_lo_u32 v1, v15, v0
+; GISEL-NEXT: v_mul_lo_u32 v8, v13, v6
+; GISEL-NEXT: v_mul_hi_u32 v14, v13, v0
; GISEL-NEXT: v_cndmask_b32_e32 v12, v12, v16, vcc
-; GISEL-NEXT: v_mul_hi_u32 v0, v13, v0
-; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v9
-; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v15
+; GISEL-NEXT: v_mul_hi_u32 v0, v15, v0
+; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v8
+; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v14
; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
-; GISEL-NEXT: v_mul_lo_u32 v15, v13, v6
-; GISEL-NEXT: v_add_i32_e32 v1, vcc, v9, v1
-; GISEL-NEXT: v_mul_hi_u32 v9, v14, v6
-; GISEL-NEXT: v_add_i32_e32 v0, vcc, v15, v0
-; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v9
-; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v9, vcc, v15, v9
-; GISEL-NEXT: v_mul_hi_u32 v6, v13, v6
+; GISEL-NEXT: v_mul_lo_u32 v14, v15, v6
+; GISEL-NEXT: v_add_i32_e32 v1, vcc, v8, v1
+; GISEL-NEXT: v_mul_hi_u32 v8, v13, v6
+; GISEL-NEXT: v_add_i32_e32 v0, vcc, v14, v0
+; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v8
+; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v8, vcc, v14, v8
+; GISEL-NEXT: v_mul_hi_u32 v6, v15, v6
; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v1
; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v1, vcc, v9, v1
+; GISEL-NEXT: v_add_i32_e32 v1, vcc, v8, v1
; GISEL-NEXT: v_add_i32_e32 v1, vcc, v6, v1
-; GISEL-NEXT: v_add_i32_e32 v9, vcc, v14, v0
-; GISEL-NEXT: v_addc_u32_e32 v13, vcc, v13, v1, vcc
-; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], s6, v9, 0
-; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v8
-; GISEL-NEXT: v_cndmask_b32_e32 v8, v10, v7, vcc
+; GISEL-NEXT: v_add_i32_e32 v8, vcc, v13, v0
+; GISEL-NEXT: v_addc_u32_e32 v13, vcc, v15, v1, vcc
+; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], s6, v8, 0
+; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v11
+; GISEL-NEXT: v_cndmask_b32_e32 v9, v9, v7, vcc
; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], s6, v13, v[1:2]
-; GISEL-NEXT: v_xor_b32_e32 v1, v8, v4
-; GISEL-NEXT: v_ashrrev_i32_e32 v8, 31, v3
-; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], s7, v9, v[6:7]
-; GISEL-NEXT: v_cndmask_b32_e32 v10, v11, v12, vcc
-; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v8
-; GISEL-NEXT: v_addc_u32_e32 v3, vcc, v3, v8, vcc
-; GISEL-NEXT: v_xor_b32_e32 v11, v2, v8
+; GISEL-NEXT: v_xor_b32_e32 v1, v9, v4
+; GISEL-NEXT: v_ashrrev_i32_e32 v9, 31, v3
+; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], s7, v8, v[6:7]
+; GISEL-NEXT: v_cndmask_b32_e32 v10, v10, v12, vcc
+; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v9
+; GISEL-NEXT: v_addc_u32_e32 v3, vcc, v3, v9, vcc
+; GISEL-NEXT: v_xor_b32_e32 v11, v2, v9
; GISEL-NEXT: v_mul_lo_u32 v2, v13, v0
-; GISEL-NEXT: v_mul_lo_u32 v7, v9, v6
-; GISEL-NEXT: v_xor_b32_e32 v12, v3, v8
-; GISEL-NEXT: v_mul_hi_u32 v3, v9, v0
+; GISEL-NEXT: v_mul_lo_u32 v7, v8, v6
+; GISEL-NEXT: v_xor_b32_e32 v12, v3, v9
+; GISEL-NEXT: v_mul_hi_u32 v3, v8, v0
; GISEL-NEXT: v_mul_hi_u32 v0, v13, v0
; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v7
; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
@@ -1310,7 +1302,7 @@ define <2 x i64> @v_sdiv_v2i64_pow2k_denom(<2 x i64> %num) {
; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
; GISEL-NEXT: v_mul_lo_u32 v3, v13, v6
; GISEL-NEXT: v_add_i32_e32 v2, vcc, v7, v2
-; GISEL-NEXT: v_mul_hi_u32 v7, v9, v6
+; GISEL-NEXT: v_mul_hi_u32 v7, v8, v6
; GISEL-NEXT: v_add_i32_e32 v0, vcc, v3, v0
; GISEL-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v7
@@ -1321,13 +1313,13 @@ define <2 x i64> @v_sdiv_v2i64_pow2k_denom(<2 x i64> %num) {
; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v2, vcc, v3, v2
; GISEL-NEXT: v_add_i32_e32 v2, vcc, v6, v2
-; GISEL-NEXT: v_add_i32_e32 v0, vcc, v9, v0
+; GISEL-NEXT: v_add_i32_e32 v0, vcc, v8, v0
; GISEL-NEXT: v_addc_u32_e32 v2, vcc, v13, v2, vcc
; GISEL-NEXT: v_mul_lo_u32 v3, v12, v0
; GISEL-NEXT: v_mul_lo_u32 v6, v11, v2
; GISEL-NEXT: v_mul_hi_u32 v7, v11, v0
; GISEL-NEXT: v_mul_hi_u32 v0, v12, v0
-; GISEL-NEXT: v_xor_b32_e32 v9, v10, v4
+; GISEL-NEXT: v_xor_b32_e32 v8, v10, v4
; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v6
; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v7
@@ -1349,28 +1341,24 @@ define <2 x i64> @v_sdiv_v2i64_pow2k_denom(<2 x i64> %num) {
; GISEL-NEXT: v_mov_b32_e32 v0, v3
; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v5, v13, v[0:1]
; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v1, v4
-; GISEL-NEXT: v_subb_u32_e32 v1, vcc, v9, v4, vcc
+; GISEL-NEXT: v_subb_u32_e32 v1, vcc, v8, v4, vcc
; GISEL-NEXT: v_mad_u64_u32 v[3:4], s[4:5], 0, v10, v[6:7]
; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v11, v2
; GISEL-NEXT: v_subb_u32_e64 v4, s[4:5], v12, v3, vcc
; GISEL-NEXT: v_sub_i32_e64 v3, s[4:5], v12, v3
; GISEL-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
-; GISEL-NEXT: s_bfe_i32 s6, 1, 0x10000
; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v2, v5
; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v2, v5
; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[4:5]
-; GISEL-NEXT: v_mov_b32_e32 v7, s6
; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v4
; GISEL-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
-; GISEL-NEXT: v_cndmask_b32_e64 v4, v7, v6, s[4:5]
+; GISEL-NEXT: v_cndmask_b32_e64 v4, -1, v6, s[4:5]
; GISEL-NEXT: v_add_i32_e32 v6, vcc, 1, v10
; GISEL-NEXT: v_addc_u32_e32 v7, vcc, 0, v13, vcc
-; GISEL-NEXT: s_bfe_i32 s4, 1, 0x10000
; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v2, v5
; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc
-; GISEL-NEXT: v_mov_b32_e32 v5, s4
; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3
-; GISEL-NEXT: v_cndmask_b32_e32 v2, v5, v2, vcc
+; GISEL-NEXT: v_cndmask_b32_e32 v2, -1, v2, vcc
; GISEL-NEXT: v_add_i32_e32 v3, vcc, 1, v6
; GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v7, vcc
; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2
@@ -1379,10 +1367,10 @@ define <2 x i64> @v_sdiv_v2i64_pow2k_denom(<2 x i64> %num) {
; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4
; GISEL-NEXT: v_cndmask_b32_e32 v2, v10, v2, vcc
; GISEL-NEXT: v_cndmask_b32_e32 v3, v13, v3, vcc
-; GISEL-NEXT: v_xor_b32_e32 v2, v2, v8
-; GISEL-NEXT: v_xor_b32_e32 v3, v3, v8
-; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v2, v8
-; GISEL-NEXT: v_subb_u32_e32 v3, vcc, v3, v8, vcc
+; GISEL-NEXT: v_xor_b32_e32 v2, v2, v9
+; GISEL-NEXT: v_xor_b32_e32 v3, v3, v9
+; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v2, v9
+; GISEL-NEXT: v_subb_u32_e32 v3, vcc, v3, v9, vcc
; GISEL-NEXT: s_setpc_b64 s[30:31]
;
; CGP-LABEL: v_sdiv_v2i64_pow2k_denom:
@@ -1394,7 +1382,6 @@ define <2 x i64> @v_sdiv_v2i64_pow2k_denom(<2 x i64> %num) {
; CGP-NEXT: s_movk_i32 s7, 0x1000
; CGP-NEXT: v_mac_f32_e32 v4, 0x4f800000, v5
; CGP-NEXT: v_rcp_iflag_f32_e32 v4, v4
-; CGP-NEXT: s_bfe_i32 s8, 1, 0x10000
; CGP-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4
; CGP-NEXT: v_mul_f32_e32 v5, 0x2f800000, v4
; CGP-NEXT: v_trunc_f32_e32 v6, v5
@@ -1492,9 +1479,8 @@ define <2 x i64> @v_sdiv_v2i64_pow2k_denom(<2 x i64> %num) {
; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v1
; CGP-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v6, vcc
; CGP-NEXT: v_cvt_f32_u32_e32 v6, 0x1000
-; CGP-NEXT: v_mov_b32_e32 v8, s8
; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v4
-; CGP-NEXT: v_cndmask_b32_e64 v8, v8, v7, s[4:5]
+; CGP-NEXT: v_cndmask_b32_e64 v8, -1, v7, s[4:5]
; CGP-NEXT: v_subbrev_u32_e32 v7, vcc, 0, v1, vcc
; CGP-NEXT: v_cvt_f32_ubyte0_e32 v1, 0
; CGP-NEXT: v_mac_f32_e32 v6, 0x4f800000, v1
@@ -1506,30 +1492,28 @@ define <2 x i64> @v_sdiv_v2i64_pow2k_denom(<2 x i64> %num) {
; CGP-NEXT: v_trunc_f32_e32 v6, v6
; CGP-NEXT: v_mac_f32_e32 v1, 0xcf800000, v6
; CGP-NEXT: v_cvt_u32_f32_e32 v13, v1
-; CGP-NEXT: s_bfe_i32 s4, 1, 0x10000
; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v4
-; CGP-NEXT: v_mov_b32_e32 v15, s4
-; CGP-NEXT: v_mad_u64_u32 v[0:1], s[4:5], s6, v13, 0
-; CGP-NEXT: v_cvt_u32_f32_e32 v16, v6
+; CGP-NEXT: v_cvt_u32_f32_e32 v15, v6
; CGP-NEXT: v_cndmask_b32_e64 v14, 0, -1, vcc
+; CGP-NEXT: v_mad_u64_u32 v[0:1], s[4:5], s6, v13, 0
; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v7
-; CGP-NEXT: v_cndmask_b32_e32 v14, v15, v14, vcc
-; CGP-NEXT: v_mad_u64_u32 v[6:7], s[4:5], s6, v16, v[1:2]
+; CGP-NEXT: v_cndmask_b32_e32 v14, -1, v14, vcc
+; CGP-NEXT: v_mad_u64_u32 v[6:7], s[4:5], s6, v15, v[1:2]
; CGP-NEXT: v_add_i32_e32 v1, vcc, 1, v11
; CGP-NEXT: v_mad_u64_u32 v[6:7], s[4:5], -1, v13, v[6:7]
-; CGP-NEXT: v_addc_u32_e32 v15, vcc, 0, v12, vcc
+; CGP-NEXT: v_addc_u32_e32 v16, vcc, 0, v12, vcc
; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v14
; CGP-NEXT: v_cndmask_b32_e32 v7, v11, v1, vcc
-; CGP-NEXT: v_mul_lo_u32 v1, v16, v0
+; CGP-NEXT: v_mul_lo_u32 v1, v15, v0
; CGP-NEXT: v_mul_lo_u32 v11, v13, v6
; CGP-NEXT: v_mul_hi_u32 v14, v13, v0
-; CGP-NEXT: v_cndmask_b32_e32 v12, v12, v15, vcc
-; CGP-NEXT: v_mul_hi_u32 v0, v16, v0
+; CGP-NEXT: v_cndmask_b32_e32 v12, v12, v16, vcc
+; CGP-NEXT: v_mul_hi_u32 v0, v15, v0
; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v11
; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v14
; CGP-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
-; CGP-NEXT: v_mul_lo_u32 v14, v16, v6
+; CGP-NEXT: v_mul_lo_u32 v14, v15, v6
; CGP-NEXT: v_add_i32_e32 v1, vcc, v11, v1
; CGP-NEXT: v_mul_hi_u32 v11, v13, v6
; CGP-NEXT: v_add_i32_e32 v0, vcc, v14, v0
@@ -1537,13 +1521,13 @@ define <2 x i64> @v_sdiv_v2i64_pow2k_denom(<2 x i64> %num) {
; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v11
; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
; CGP-NEXT: v_add_i32_e32 v11, vcc, v14, v11
-; CGP-NEXT: v_mul_hi_u32 v6, v16, v6
+; CGP-NEXT: v_mul_hi_u32 v6, v15, v6
; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v1
; CGP-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
; CGP-NEXT: v_add_i32_e32 v1, vcc, v11, v1
; CGP-NEXT: v_add_i32_e32 v1, vcc, v6, v1
; CGP-NEXT: v_add_i32_e32 v11, vcc, v13, v0
-; CGP-NEXT: v_addc_u32_e32 v13, vcc, v16, v1, vcc
+; CGP-NEXT: v_addc_u32_e32 v13, vcc, v15, v1, vcc
; CGP-NEXT: v_mad_u64_u32 v[0:1], s[4:5], s6, v11, 0
; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v8
; CGP-NEXT: v_cndmask_b32_e32 v6, v9, v7, vcc
@@ -1609,22 +1593,18 @@ define <2 x i64> @v_sdiv_v2i64_pow2k_denom(<2 x i64> %num) {
; CGP-NEXT: v_subb_u32_e64 v3, s[4:5], v12, v5, vcc
; CGP-NEXT: v_sub_i32_e64 v5, s[4:5], v12, v5
; CGP-NEXT: v_subbrev_u32_e32 v5, vcc, 0, v5, vcc
-; CGP-NEXT: s_bfe_i32 s6, 1, 0x10000
; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v2, v4
; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v4
; CGP-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[4:5]
-; CGP-NEXT: v_mov_b32_e32 v7, s6
; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v3
; CGP-NEXT: v_subbrev_u32_e32 v5, vcc, 0, v5, vcc
-; CGP-NEXT: v_cndmask_b32_e64 v3, v7, v6, s[4:5]
+; CGP-NEXT: v_cndmask_b32_e64 v3, -1, v6, s[4:5]
; CGP-NEXT: v_add_i32_e32 v6, vcc, 1, v9
; CGP-NEXT: v_addc_u32_e32 v7, vcc, 0, v10, vcc
-; CGP-NEXT: s_bfe_i32 s4, 1, 0x10000
; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v2, v4
; CGP-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc
-; CGP-NEXT: v_mov_b32_e32 v4, s4
; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5
-; CGP-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
+; CGP-NEXT: v_cndmask_b32_e32 v2, -1, v2, vcc
; CGP-NEXT: v_add_i32_e32 v4, vcc, 1, v6
; CGP-NEXT: v_addc_u32_e32 v5, vcc, 0, v7, vcc
; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2
@@ -1744,22 +1724,18 @@ define i64 @v_sdiv_i64_oddk_denom(i64 %num) {
; CHECK-NEXT: v_sub_i32_e64 v1, s[4:5], v4, v1
; CHECK-NEXT: v_mov_b32_e32 v6, 0x12d8fb
; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
-; CHECK-NEXT: s_bfe_i32 s6, 1, 0x10000
; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v6
; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v6
; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, -1, s[4:5]
-; CHECK-NEXT: v_mov_b32_e32 v4, s6
; CHECK-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v2
; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
-; CHECK-NEXT: v_cndmask_b32_e64 v2, v4, v3, s[4:5]
+; CHECK-NEXT: v_cndmask_b32_e64 v2, -1, v3, s[4:5]
; CHECK-NEXT: v_add_i32_e32 v3, vcc, 1, v8
; CHECK-NEXT: v_addc_u32_e32 v4, vcc, 0, v5, vcc
-; CHECK-NEXT: s_bfe_i32 s4, 1, 0x10000
; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v6
; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc
-; CHECK-NEXT: v_mov_b32_e32 v6, s4
; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
-; CHECK-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc
+; CHECK-NEXT: v_cndmask_b32_e32 v0, -1, v0, vcc
; CHECK-NEXT: v_add_i32_e32 v1, vcc, 1, v3
; CHECK-NEXT: v_addc_u32_e32 v6, vcc, 0, v4, vcc
; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
@@ -1782,177 +1758,173 @@ define <2 x i64> @v_sdiv_v2i64_oddk_denom(<2 x i64> %num) {
; GISEL: ; %bb.0:
; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT: v_cvt_f32_u32_e32 v4, 0x12d8fb
-; GISEL-NEXT: v_cvt_f32_ubyte0_e32 v6, 0
+; GISEL-NEXT: v_cvt_f32_ubyte0_e32 v8, 0
; GISEL-NEXT: s_sub_u32 s6, 0, 0x12d8fb
; GISEL-NEXT: s_subb_u32 s7, 0, 0
-; GISEL-NEXT: v_mac_f32_e32 v4, 0x4f800000, v6
+; GISEL-NEXT: v_mac_f32_e32 v4, 0x4f800000, v8
; GISEL-NEXT: v_rcp_iflag_f32_e32 v4, v4
; GISEL-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4
; GISEL-NEXT: v_mul_f32_e32 v5, 0x2f800000, v4
-; GISEL-NEXT: v_trunc_f32_e32 v7, v5
-; GISEL-NEXT: v_mac_f32_e32 v4, 0xcf800000, v7
-; GISEL-NEXT: v_cvt_u32_f32_e32 v9, v4
-; GISEL-NEXT: v_cvt_u32_f32_e32 v10, v7
-; GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], s6, v9, 0
-; GISEL-NEXT: v_mad_u64_u32 v[7:8], s[4:5], s6, v10, v[5:6]
-; GISEL-NEXT: v_mul_lo_u32 v5, v10, v4
-; GISEL-NEXT: v_mad_u64_u32 v[7:8], s[4:5], s7, v9, v[7:8]
-; GISEL-NEXT: v_mul_hi_u32 v8, v9, v4
-; GISEL-NEXT: v_mul_hi_u32 v4, v10, v4
-; GISEL-NEXT: v_mul_lo_u32 v11, v9, v7
-; GISEL-NEXT: v_mul_lo_u32 v12, v10, v7
-; GISEL-NEXT: v_mul_hi_u32 v13, v9, v7
-; GISEL-NEXT: v_mul_hi_u32 v7, v10, v7
-; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v11
+; GISEL-NEXT: v_trunc_f32_e32 v6, v5
+; GISEL-NEXT: v_mac_f32_e32 v4, 0xcf800000, v6
+; GISEL-NEXT: v_cvt_u32_f32_e32 v7, v4
+; GISEL-NEXT: v_cvt_u32_f32_e32 v9, v6
+; GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], s6, v7, 0
+; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], s6, v9, v[5:6]
+; GISEL-NEXT: v_mul_hi_u32 v10, v7, v4
+; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], s7, v7, v[5:6]
+; GISEL-NEXT: v_mul_lo_u32 v6, v9, v4
+; GISEL-NEXT: v_mul_hi_u32 v4, v9, v4
+; GISEL-NEXT: v_mul_lo_u32 v11, v7, v5
+; GISEL-NEXT: v_mul_lo_u32 v12, v9, v5
+; GISEL-NEXT: v_mul_hi_u32 v13, v7, v5
+; GISEL-NEXT: v_mul_hi_u32 v5, v9, v5
+; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v11
; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v8
-; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v5, vcc, v11, v5
+; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v10
+; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v6, vcc, v11, v6
; GISEL-NEXT: v_add_i32_e32 v4, vcc, v12, v4
-; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
+; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v13
; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v11
-; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v5
-; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v5, vcc, v8, v5
-; GISEL-NEXT: v_add_i32_e32 v5, vcc, v7, v5
-; GISEL-NEXT: v_add_i32_e32 v11, vcc, v9, v4
-; GISEL-NEXT: v_mad_u64_u32 v[7:8], s[4:5], s6, v11, 0
-; GISEL-NEXT: v_addc_u32_e32 v5, vcc, v10, v5, vcc
-; GISEL-NEXT: v_mov_b32_e32 v4, v8
-; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], s6, v5, v[4:5]
+; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v11
+; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v6
+; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v6, vcc, v10, v6
+; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v6
+; GISEL-NEXT: v_add_i32_e32 v10, vcc, v7, v4
+; GISEL-NEXT: v_addc_u32_e32 v9, vcc, v9, v5, vcc
+; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], s6, v10, 0
+; GISEL-NEXT: v_mov_b32_e32 v4, v6
+; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], s6, v9, v[4:5]
; GISEL-NEXT: v_ashrrev_i32_e32 v4, 31, v1
; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v4
-; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], s7, v11, v[8:9]
+; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], s7, v10, v[6:7]
; GISEL-NEXT: v_addc_u32_e32 v1, vcc, v1, v4, vcc
-; GISEL-NEXT: v_xor_b32_e32 v9, v0, v4
-; GISEL-NEXT: v_mul_lo_u32 v0, v5, v7
-; GISEL-NEXT: v_mul_lo_u32 v10, v11, v8
+; GISEL-NEXT: v_xor_b32_e32 v11, v0, v4
+; GISEL-NEXT: v_mul_lo_u32 v0, v9, v5
+; GISEL-NEXT: v_mul_lo_u32 v7, v10, v6
; GISEL-NEXT: v_xor_b32_e32 v12, v1, v4
-; GISEL-NEXT: v_mul_hi_u32 v1, v11, v7
-; GISEL-NEXT: v_mul_hi_u32 v7, v5, v7
-; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v10
-; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
+; GISEL-NEXT: v_mul_hi_u32 v1, v10, v5
+; GISEL-NEXT: v_mul_hi_u32 v5, v9, v5
+; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v7
+; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v1
; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
-; GISEL-NEXT: v_mul_lo_u32 v1, v5, v8
-; GISEL-NEXT: v_add_i32_e32 v0, vcc, v10, v0
-; GISEL-NEXT: v_mul_hi_u32 v10, v11, v8
+; GISEL-NEXT: v_mul_lo_u32 v1, v9, v6
+; GISEL-NEXT: v_add_i32_e32 v0, vcc, v7, v0
+; GISEL-NEXT: v_mul_hi_u32 v7, v10, v6
+; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v5
+; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v7
; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v10
-; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v10
-; GISEL-NEXT: v_mul_hi_u32 v8, v5, v8
+; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v7
+; GISEL-NEXT: v_mul_hi_u32 v6, v9, v6
; GISEL-NEXT: v_add_i32_e32 v0, vcc, v1, v0
; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v1, vcc, v7, v1
-; GISEL-NEXT: v_add_i32_e32 v1, vcc, v8, v1
-; GISEL-NEXT: v_add_i32_e32 v0, vcc, v11, v0
-; GISEL-NEXT: v_addc_u32_e32 v1, vcc, v5, v1, vcc
-; GISEL-NEXT: v_mul_lo_u32 v7, v12, v0
-; GISEL-NEXT: v_mul_lo_u32 v8, v9, v1
-; GISEL-NEXT: v_mul_hi_u32 v10, v9, v0
+; GISEL-NEXT: v_add_i32_e32 v1, vcc, v5, v1
+; GISEL-NEXT: v_add_i32_e32 v1, vcc, v6, v1
+; GISEL-NEXT: v_add_i32_e32 v0, vcc, v10, v0
+; GISEL-NEXT: v_addc_u32_e32 v1, vcc, v9, v1, vcc
+; GISEL-NEXT: v_mul_lo_u32 v6, v12, v0
+; GISEL-NEXT: v_mul_lo_u32 v7, v11, v1
+; GISEL-NEXT: v_mul_hi_u32 v9, v11, v0
; GISEL-NEXT: v_mul_hi_u32 v0, v12, v0
; GISEL-NEXT: v_mov_b32_e32 v5, 0x12d8fb
-; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v8
-; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v10
+; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v7
; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
-; GISEL-NEXT: v_mul_lo_u32 v10, v12, v1
-; GISEL-NEXT: v_add_i32_e32 v7, vcc, v8, v7
-; GISEL-NEXT: v_mul_hi_u32 v8, v9, v1
-; GISEL-NEXT: v_add_i32_e32 v0, vcc, v10, v0
-; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v8
-; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v8, vcc, v10, v8
-; GISEL-NEXT: v_add_i32_e32 v10, vcc, v0, v7
-; GISEL-NEXT: v_mul_hi_u32 v11, v12, v1
-; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v5, v10, 0
+; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v9
+; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
+; GISEL-NEXT: v_mul_lo_u32 v9, v12, v1
+; GISEL-NEXT: v_add_i32_e32 v6, vcc, v7, v6
+; GISEL-NEXT: v_mul_hi_u32 v7, v11, v1
+; GISEL-NEXT: v_add_i32_e32 v0, vcc, v9, v0
+; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v7
; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v7, vcc, v8, v7
-; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v7
-; GISEL-NEXT: v_mad_u64_u32 v[7:8], s[4:5], v5, v11, v[1:2]
-; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v9, v0
-; GISEL-NEXT: v_mad_u64_u32 v[7:8], s[4:5], 0, v10, v[7:8]
-; GISEL-NEXT: s_bfe_i32 s6, 1, 0x10000
-; GISEL-NEXT: v_mov_b32_e32 v9, s6
-; GISEL-NEXT: v_subb_u32_e64 v1, s[4:5], v12, v7, vcc
-; GISEL-NEXT: v_sub_i32_e64 v7, s[4:5], v12, v7
+; GISEL-NEXT: v_add_i32_e32 v7, vcc, v9, v7
+; GISEL-NEXT: v_add_i32_e32 v9, vcc, v0, v6
+; GISEL-NEXT: v_mul_hi_u32 v10, v12, v1
+; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v5, v9, 0
+; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v6, vcc, v7, v6
+; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v6
+; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v5, v10, v[1:2]
+; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v11, v0
+; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], 0, v9, v[6:7]
+; GISEL-NEXT: s_sub_u32 s6, 0, 0x12d8fb
+; GISEL-NEXT: s_subb_u32 s7, 0, 0
+; GISEL-NEXT: v_subb_u32_e64 v1, s[4:5], v12, v6, vcc
+; GISEL-NEXT: v_sub_i32_e64 v6, s[4:5], v12, v6
; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v5
-; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, -1, s[4:5]
+; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[4:5]
; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v1
-; GISEL-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v7, vcc
+; GISEL-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v6, vcc
+; GISEL-NEXT: v_cvt_f32_u32_e32 v6, 0x12d8fb
; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v5
+; GISEL-NEXT: v_cndmask_b32_e64 v11, -1, v7, s[4:5]
+; GISEL-NEXT: v_mac_f32_e32 v6, 0x4f800000, v8
; GISEL-NEXT: v_subbrev_u32_e32 v7, vcc, 0, v1, vcc
-; GISEL-NEXT: v_cvt_f32_u32_e32 v1, 0x12d8fb
-; GISEL-NEXT: v_cndmask_b32_e64 v8, v9, v8, s[4:5]
-; GISEL-NEXT: v_add_i32_e32 v9, vcc, 1, v10
-; GISEL-NEXT: v_mac_f32_e32 v1, 0x4f800000, v6
-; GISEL-NEXT: v_rcp_iflag_f32_e32 v1, v1
-; GISEL-NEXT: v_addc_u32_e32 v12, vcc, 0, v11, vcc
+; GISEL-NEXT: v_rcp_iflag_f32_e32 v1, v6
+; GISEL-NEXT: v_add_i32_e32 v8, vcc, 1, v9
+; GISEL-NEXT: v_addc_u32_e32 v12, vcc, 0, v10, vcc
+; GISEL-NEXT: v_mul_f32_e32 v1, 0x5f7ffffc, v1
+; GISEL-NEXT: v_mul_f32_e32 v6, 0x2f800000, v1
+; GISEL-NEXT: v_trunc_f32_e32 v6, v6
+; GISEL-NEXT: v_mac_f32_e32 v1, 0xcf800000, v6
+; GISEL-NEXT: v_cvt_u32_f32_e32 v13, v1
; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v5
-; GISEL-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v1
-; GISEL-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0
-; GISEL-NEXT: v_trunc_f32_e32 v13, v1
-; GISEL-NEXT: v_mac_f32_e32 v0, 0xcf800000, v13
-; GISEL-NEXT: v_cvt_u32_f32_e32 v14, v0
-; GISEL-NEXT: s_bfe_i32 s4, 1, 0x10000
-; GISEL-NEXT: s_sub_u32 s6, 0, 0x12d8fb
-; GISEL-NEXT: v_mov_b32_e32 v15, s4
-; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], s6, v14, 0
-; GISEL-NEXT: v_cvt_u32_f32_e32 v13, v13
-; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, -1, vcc
+; GISEL-NEXT: v_cvt_u32_f32_e32 v15, v6
+; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, -1, vcc
+; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], s6, v13, 0
; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v7
-; GISEL-NEXT: v_cndmask_b32_e32 v15, v15, v6, vcc
-; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], s6, v13, v[1:2]
-; GISEL-NEXT: s_subb_u32 s7, 0, 0
-; GISEL-NEXT: v_add_i32_e32 v1, vcc, 1, v9
-; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], s7, v14, v[6:7]
+; GISEL-NEXT: v_cndmask_b32_e32 v14, -1, v14, vcc
+; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], s6, v15, v[1:2]
+; GISEL-NEXT: v_add_i32_e32 v1, vcc, 1, v8
+; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], s7, v13, v[6:7]
; GISEL-NEXT: v_addc_u32_e32 v16, vcc, 0, v12, vcc
-; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v15
-; GISEL-NEXT: v_cndmask_b32_e32 v7, v9, v1, vcc
-; GISEL-NEXT: v_mul_lo_u32 v1, v13, v0
-; GISEL-NEXT: v_mul_lo_u32 v9, v14, v6
-; GISEL-NEXT: v_mul_hi_u32 v15, v14, v0
+; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v14
+; GISEL-NEXT: v_cndmask_b32_e32 v7, v8, v1, vcc
+; GISEL-NEXT: v_mul_lo_u32 v1, v15, v0
+; GISEL-NEXT: v_mul_lo_u32 v8, v13, v6
+; GISEL-NEXT: v_mul_hi_u32 v14, v13, v0
; GISEL-NEXT: v_cndmask_b32_e32 v12, v12, v16, vcc
-; GISEL-NEXT: v_mul_hi_u32 v0, v13, v0
-; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v9
-; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v15
+; GISEL-NEXT: v_mul_hi_u32 v0, v15, v0
+; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v8
+; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v14
; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
-; GISEL-NEXT: v_mul_lo_u32 v15, v13, v6
-; GISEL-NEXT: v_add_i32_e32 v1, vcc, v9, v1
-; GISEL-NEXT: v_mul_hi_u32 v9, v14, v6
-; GISEL-NEXT: v_add_i32_e32 v0, vcc, v15, v0
-; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v9
-; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v9, vcc, v15, v9
-; GISEL-NEXT: v_mul_hi_u32 v6, v13, v6
+; GISEL-NEXT: v_mul_lo_u32 v14, v15, v6
+; GISEL-NEXT: v_add_i32_e32 v1, vcc, v8, v1
+; GISEL-NEXT: v_mul_hi_u32 v8, v13, v6
+; GISEL-NEXT: v_add_i32_e32 v0, vcc, v14, v0
+; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v8
+; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v8, vcc, v14, v8
+; GISEL-NEXT: v_mul_hi_u32 v6, v15, v6
; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v1
; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v1, vcc, v9, v1
+; GISEL-NEXT: v_add_i32_e32 v1, vcc, v8, v1
; GISEL-NEXT: v_add_i32_e32 v1, vcc, v6, v1
-; GISEL-NEXT: v_add_i32_e32 v9, vcc, v14, v0
-; GISEL-NEXT: v_addc_u32_e32 v13, vcc, v13, v1, vcc
-; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], s6, v9, 0
-; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v8
-; GISEL-NEXT: v_cndmask_b32_e32 v8, v10, v7, vcc
+; GISEL-NEXT: v_add_i32_e32 v8, vcc, v13, v0
+; GISEL-NEXT: v_addc_u32_e32 v13, vcc, v15, v1, vcc
+; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], s6, v8, 0
+; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v11
+; GISEL-NEXT: v_cndmask_b32_e32 v9, v9, v7, vcc
; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], s6, v13, v[1:2]
-; GISEL-NEXT: v_xor_b32_e32 v1, v8, v4
-; GISEL-NEXT: v_ashrrev_i32_e32 v8, 31, v3
-; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], s7, v9, v[6:7]
-; GISEL-NEXT: v_cndmask_b32_e32 v10, v11, v12, vcc
-; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v8
-; GISEL-NEXT: v_addc_u32_e32 v3, vcc, v3, v8, vcc
-; GISEL-NEXT: v_xor_b32_e32 v11, v2, v8
+; GISEL-NEXT: v_xor_b32_e32 v1, v9, v4
+; GISEL-NEXT: v_ashrrev_i32_e32 v9, 31, v3
+; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], s7, v8, v[6:7]
+; GISEL-NEXT: v_cndmask_b32_e32 v10, v10, v12, vcc
+; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v9
+; GISEL-NEXT: v_addc_u32_e32 v3, vcc, v3, v9, vcc
+; GISEL-NEXT: v_xor_b32_e32 v11, v2, v9
; GISEL-NEXT: v_mul_lo_u32 v2, v13, v0
-; GISEL-NEXT: v_mul_lo_u32 v7, v9, v6
-; GISEL-NEXT: v_xor_b32_e32 v12, v3, v8
-; GISEL-NEXT: v_mul_hi_u32 v3, v9, v0
+; GISEL-NEXT: v_mul_lo_u32 v7, v8, v6
+; GISEL-NEXT: v_xor_b32_e32 v12, v3, v9
+; GISEL-NEXT: v_mul_hi_u32 v3, v8, v0
; GISEL-NEXT: v_mul_hi_u32 v0, v13, v0
; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v7
; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
@@ -1960,7 +1932,7 @@ define <2 x i64> @v_sdiv_v2i64_oddk_denom(<2 x i64> %num) {
; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
; GISEL-NEXT: v_mul_lo_u32 v3, v13, v6
; GISEL-NEXT: v_add_i32_e32 v2, vcc, v7, v2
-; GISEL-NEXT: v_mul_hi_u32 v7, v9, v6
+; GISEL-NEXT: v_mul_hi_u32 v7, v8, v6
; GISEL-NEXT: v_add_i32_e32 v0, vcc, v3, v0
; GISEL-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v7
@@ -1971,13 +1943,13 @@ define <2 x i64> @v_sdiv_v2i64_oddk_denom(<2 x i64> %num) {
; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v2, vcc, v3, v2
; GISEL-NEXT: v_add_i32_e32 v2, vcc, v6, v2
-; GISEL-NEXT: v_add_i32_e32 v0, vcc, v9, v0
+; GISEL-NEXT: v_add_i32_e32 v0, vcc, v8, v0
; GISEL-NEXT: v_addc_u32_e32 v2, vcc, v13, v2, vcc
; GISEL-NEXT: v_mul_lo_u32 v3, v12, v0
; GISEL-NEXT: v_mul_lo_u32 v6, v11, v2
; GISEL-NEXT: v_mul_hi_u32 v7, v11, v0
; GISEL-NEXT: v_mul_hi_u32 v0, v12, v0
-; GISEL-NEXT: v_xor_b32_e32 v9, v10, v4
+; GISEL-NEXT: v_xor_b32_e32 v8, v10, v4
; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v6
; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v7
@@ -1999,28 +1971,24 @@ define <2 x i64> @v_sdiv_v2i64_oddk_denom(<2 x i64> %num) {
; GISEL-NEXT: v_mov_b32_e32 v0, v3
; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v5, v13, v[0:1]
; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v1, v4
-; GISEL-NEXT: v_subb_u32_e32 v1, vcc, v9, v4, vcc
+; GISEL-NEXT: v_subb_u32_e32 v1, vcc, v8, v4, vcc
; GISEL-NEXT: v_mad_u64_u32 v[3:4], s[4:5], 0, v10, v[6:7]
; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v11, v2
; GISEL-NEXT: v_subb_u32_e64 v4, s[4:5], v12, v3, vcc
; GISEL-NEXT: v_sub_i32_e64 v3, s[4:5], v12, v3
; GISEL-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
-; GISEL-NEXT: s_bfe_i32 s6, 1, 0x10000
; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v2, v5
; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v2, v5
; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[4:5]
-; GISEL-NEXT: v_mov_b32_e32 v7, s6
; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v4
; GISEL-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
-; GISEL-NEXT: v_cndmask_b32_e64 v4, v7, v6, s[4:5]
+; GISEL-NEXT: v_cndmask_b32_e64 v4, -1, v6, s[4:5]
; GISEL-NEXT: v_add_i32_e32 v6, vcc, 1, v10
; GISEL-NEXT: v_addc_u32_e32 v7, vcc, 0, v13, vcc
-; GISEL-NEXT: s_bfe_i32 s4, 1, 0x10000
; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v2, v5
; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc
-; GISEL-NEXT: v_mov_b32_e32 v5, s4
; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3
-; GISEL-NEXT: v_cndmask_b32_e32 v2, v5, v2, vcc
+; GISEL-NEXT: v_cndmask_b32_e32 v2, -1, v2, vcc
; GISEL-NEXT: v_add_i32_e32 v3, vcc, 1, v6
; GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v7, vcc
; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2
@@ -2029,10 +1997,10 @@ define <2 x i64> @v_sdiv_v2i64_oddk_denom(<2 x i64> %num) {
; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4
; GISEL-NEXT: v_cndmask_b32_e32 v2, v10, v2, vcc
; GISEL-NEXT: v_cndmask_b32_e32 v3, v13, v3, vcc
-; GISEL-NEXT: v_xor_b32_e32 v2, v2, v8
-; GISEL-NEXT: v_xor_b32_e32 v3, v3, v8
-; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v2, v8
-; GISEL-NEXT: v_subb_u32_e32 v3, vcc, v3, v8, vcc
+; GISEL-NEXT: v_xor_b32_e32 v2, v2, v9
+; GISEL-NEXT: v_xor_b32_e32 v3, v3, v9
+; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v2, v9
+; GISEL-NEXT: v_subb_u32_e32 v3, vcc, v3, v9, vcc
; GISEL-NEXT: s_setpc_b64 s[30:31]
;
; CGP-LABEL: v_sdiv_v2i64_oddk_denom:
@@ -2044,7 +2012,6 @@ define <2 x i64> @v_sdiv_v2i64_oddk_denom(<2 x i64> %num) {
; CGP-NEXT: s_mov_b32 s7, 0x12d8fb
; CGP-NEXT: v_mac_f32_e32 v4, 0x4f800000, v5
; CGP-NEXT: v_rcp_iflag_f32_e32 v4, v4
-; CGP-NEXT: s_bfe_i32 s8, 1, 0x10000
; CGP-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4
; CGP-NEXT: v_mul_f32_e32 v5, 0x2f800000, v4
; CGP-NEXT: v_trunc_f32_e32 v6, v5
@@ -2142,9 +2109,8 @@ define <2 x i64> @v_sdiv_v2i64_oddk_denom(<2 x i64> %num) {
; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v1
; CGP-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v6, vcc
; CGP-NEXT: v_cvt_f32_u32_e32 v6, 0x12d8fb
-; CGP-NEXT: v_mov_b32_e32 v8, s8
; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v4
-; CGP-NEXT: v_cndmask_b32_e64 v8, v8, v7, s[4:5]
+; CGP-NEXT: v_cndmask_b32_e64 v8, -1, v7, s[4:5]
; CGP-NEXT: v_subbrev_u32_e32 v7, vcc, 0, v1, vcc
; CGP-NEXT: v_cvt_f32_ubyte0_e32 v1, 0
; CGP-NEXT: v_mac_f32_e32 v6, 0x4f800000, v1
@@ -2156,30 +2122,28 @@ define <2 x i64> @v_sdiv_v2i64_oddk_denom(<2 x i64> %num) {
; CGP-NEXT: v_trunc_f32_e32 v6, v6
; CGP-NEXT: v_mac_f32_e32 v1, 0xcf800000, v6
; CGP-NEXT: v_cvt_u32_f32_e32 v13, v1
-; CGP-NEXT: s_bfe_i32 s4, 1, 0x10000
; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v4
-; CGP-NEXT: v_mov_b32_e32 v15, s4
-; CGP-NEXT: v_mad_u64_u32 v[0:1], s[4:5], s6, v13, 0
-; CGP-NEXT: v_cvt_u32_f32_e32 v16, v6
+; CGP-NEXT: v_cvt_u32_f32_e32 v15, v6
; CGP-NEXT: v_cndmask_b32_e64 v14, 0, -1, vcc
+; CGP-NEXT: v_mad_u64_u32 v[0:1], s[4:5], s6, v13, 0
; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v7
-; CGP-NEXT: v_cndmask_b32_e32 v14, v15, v14, vcc
-; CGP-NEXT: v_mad_u64_u32 v[6:7], s[4:5], s6, v16, v[1:2]
+; CGP-NEXT: v_cndmask_b32_e32 v14, -1, v14, vcc
+; CGP-NEXT: v_mad_u64_u32 v[6:7], s[4:5], s6, v15, v[1:2]
; CGP-NEXT: v_add_i32_e32 v1, vcc, 1, v11
; CGP-NEXT: v_mad_u64_u32 v[6:7], s[4:5], -1, v13, v[6:7]
-; CGP-NEXT: v_addc_u32_e32 v15, vcc, 0, v12, vcc
+; CGP-NEXT: v_addc_u32_e32 v16, vcc, 0, v12, vcc
; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v14
; CGP-NEXT: v_cndmask_b32_e32 v7, v11, v1, vcc
-; CGP-NEXT: v_mul_lo_u32 v1, v16, v0
+; CGP-NEXT: v_mul_lo_u32 v1, v15, v0
; CGP-NEXT: v_mul_lo_u32 v11, v13, v6
; CGP-NEXT: v_mul_hi_u32 v14, v13, v0
-; CGP-NEXT: v_cndmask_b32_e32 v12, v12, v15, vcc
-; CGP-NEXT: v_mul_hi_u32 v0, v16, v0
+; CGP-NEXT: v_cndmask_b32_e32 v12, v12, v16, vcc
+; CGP-NEXT: v_mul_hi_u32 v0, v15, v0
; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v11
; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v14
; CGP-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
-; CGP-NEXT: v_mul_lo_u32 v14, v16, v6
+; CGP-NEXT: v_mul_lo_u32 v14, v15, v6
; CGP-NEXT: v_add_i32_e32 v1, vcc, v11, v1
; CGP-NEXT: v_mul_hi_u32 v11, v13, v6
; CGP-NEXT: v_add_i32_e32 v0, vcc, v14, v0
@@ -2187,13 +2151,13 @@ define <2 x i64> @v_sdiv_v2i64_oddk_denom(<2 x i64> %num) {
; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v11
; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
; CGP-NEXT: v_add_i32_e32 v11, vcc, v14, v11
-; CGP-NEXT: v_mul_hi_u32 v6, v16, v6
+; CGP-NEXT: v_mul_hi_u32 v6, v15, v6
; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v1
; CGP-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
; CGP-NEXT: v_add_i32_e32 v1, vcc, v11, v1
; CGP-NEXT: v_add_i32_e32 v1, vcc, v6, v1
; CGP-NEXT: v_add_i32_e32 v11, vcc, v13, v0
-; CGP-NEXT: v_addc_u32_e32 v13, vcc, v16, v1, vcc
+; CGP-NEXT: v_addc_u32_e32 v13, vcc, v15, v1, vcc
; CGP-NEXT: v_mad_u64_u32 v[0:1], s[4:5], s6, v11, 0
; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v8
; CGP-NEXT: v_cndmask_b32_e32 v6, v9, v7, vcc
@@ -2259,22 +2223,18 @@ define <2 x i64> @v_sdiv_v2i64_oddk_denom(<2 x i64> %num) {
; CGP-NEXT: v_subb_u32_e64 v3, s[4:5], v12, v5, vcc
; CGP-NEXT: v_sub_i32_e64 v5, s[4:5], v12, v5
; CGP-NEXT: v_subbrev_u32_e32 v5, vcc, 0, v5, vcc
-; CGP-NEXT: s_bfe_i32 s6, 1, 0x10000
; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v2, v4
; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v4
; CGP-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[4:5]
-; CGP-NEXT: v_mov_b32_e32 v7, s6
; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v3
; CGP-NEXT: v_subbrev_u32_e32 v5, vcc, 0, v5, vcc
-; CGP-NEXT: v_cndmask_b32_e64 v3, v7, v6, s[4:5]
+; CGP-NEXT: v_cndmask_b32_e64 v3, -1, v6, s[4:5]
; CGP-NEXT: v_add_i32_e32 v6, vcc, 1, v9
; CGP-NEXT: v_addc_u32_e32 v7, vcc, 0, v10, vcc
-; CGP-NEXT: s_bfe_i32 s4, 1, 0x10000
; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v2, v4
; CGP-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc
-; CGP-NEXT: v_mov_b32_e32 v4, s4
; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5
-; CGP-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
+; CGP-NEXT: v_cndmask_b32_e32 v2, -1, v2, vcc
; CGP-NEXT: v_add_i32_e32 v4, vcc, 1, v6
; CGP-NEXT: v_addc_u32_e32 v5, vcc, 0, v7, vcc
; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll
index 751d8c66af927f..21fd7b594aca44 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll
@@ -1074,23 +1074,21 @@ define i64 @v_srem_i64_pow2k_denom(i64 %num) {
; CHECK-NEXT: v_mov_b32_e32 v6, 0x1000
; CHECK-NEXT: v_subb_u32_e64 v2, vcc, v4, v1, s[4:5]
; CHECK-NEXT: v_sub_i32_e32 v1, vcc, v4, v1
-; CHECK-NEXT: s_bfe_i32 s6, 1, 0x10000
; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v6
; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, -1, vcc
-; CHECK-NEXT: v_mov_b32_e32 v4, s6
; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2
-; CHECK-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc
-; CHECK-NEXT: v_sub_i32_e32 v5, vcc, v0, v6
+; CHECK-NEXT: v_cndmask_b32_e32 v3, -1, v3, vcc
+; CHECK-NEXT: v_sub_i32_e32 v4, vcc, v0, v6
; CHECK-NEXT: v_subbrev_u32_e64 v1, s[4:5], 0, v1, s[4:5]
; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
-; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v5, v6
-; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, -1, vcc
+; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v4, v6
+; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, -1, vcc
; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
-; CHECK-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc
-; CHECK-NEXT: v_subrev_i32_e32 v6, vcc, 0x1000, v5
+; CHECK-NEXT: v_cndmask_b32_e32 v5, -1, v5, vcc
+; CHECK-NEXT: v_subrev_i32_e32 v6, vcc, 0x1000, v4
; CHECK-NEXT: v_subbrev_u32_e32 v8, vcc, 0, v1, vcc
-; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4
-; CHECK-NEXT: v_cndmask_b32_e32 v4, v5, v6, vcc
+; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5
+; CHECK-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc
; CHECK-NEXT: v_cndmask_b32_e32 v1, v1, v8, vcc
; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3
; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
@@ -1202,121 +1200,119 @@ define <2 x i64> @v_srem_v2i64_pow2k_denom(<2 x i64> %num) {
; GISEL-NEXT: v_add_i32_e32 v6, vcc, v7, v6
; GISEL-NEXT: v_add_i32_e32 v6, vcc, v9, v6
; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v5, v6, v[1:2]
-; GISEL-NEXT: s_bfe_i32 s6, 1, 0x10000
+; GISEL-NEXT: s_sub_u32 s6, 0, 0x1000
+; GISEL-NEXT: s_subb_u32 s7, 0, 0
; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], 0, v8, v[6:7]
; GISEL-NEXT: v_sub_i32_e32 v8, vcc, v10, v0
; GISEL-NEXT: v_subb_u32_e64 v9, s[4:5], v11, v6, vcc
; GISEL-NEXT: v_sub_i32_e64 v0, s[4:5], v11, v6
; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v8, v5
; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, -1, s[4:5]
-; GISEL-NEXT: v_mov_b32_e32 v10, s6
; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v9
-; GISEL-NEXT: v_cndmask_b32_e64 v11, v10, v1, s[4:5]
+; GISEL-NEXT: v_cndmask_b32_e64 v10, -1, v1, s[4:5]
; GISEL-NEXT: v_cvt_f32_u32_e32 v1, 0x1000
; GISEL-NEXT: v_cvt_f32_ubyte0_e32 v6, 0
; GISEL-NEXT: v_subbrev_u32_e32 v0, vcc, 0, v0, vcc
; GISEL-NEXT: v_mac_f32_e32 v1, 0x4f800000, v6
; GISEL-NEXT: v_rcp_iflag_f32_e32 v1, v1
-; GISEL-NEXT: v_sub_i32_e32 v12, vcc, v8, v5
-; GISEL-NEXT: v_subbrev_u32_e32 v13, vcc, 0, v0, vcc
+; GISEL-NEXT: v_sub_i32_e32 v11, vcc, v8, v5
+; GISEL-NEXT: v_subbrev_u32_e32 v12, vcc, 0, v0, vcc
; GISEL-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v1
; GISEL-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0
; GISEL-NEXT: v_trunc_f32_e32 v6, v1
; GISEL-NEXT: v_mac_f32_e32 v0, 0xcf800000, v6
-; GISEL-NEXT: v_cvt_u32_f32_e32 v14, v0
-; GISEL-NEXT: s_sub_u32 s6, 0, 0x1000
-; GISEL-NEXT: v_cvt_u32_f32_e32 v15, v6
-; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v12, v5
-; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], s6, v14, 0
+; GISEL-NEXT: v_cvt_u32_f32_e32 v13, v0
+; GISEL-NEXT: v_cvt_u32_f32_e32 v14, v6
+; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v11, v5
; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, -1, vcc
-; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v13
-; GISEL-NEXT: v_cndmask_b32_e32 v16, v10, v7, vcc
-; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], s6, v15, v[1:2]
-; GISEL-NEXT: s_subb_u32 s7, 0, 0
-; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v12, v5
-; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], s7, v14, v[6:7]
-; GISEL-NEXT: v_subbrev_u32_e32 v17, vcc, 0, v13, vcc
-; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v16
-; GISEL-NEXT: v_cndmask_b32_e32 v7, v12, v1, vcc
-; GISEL-NEXT: v_mul_lo_u32 v1, v15, v0
-; GISEL-NEXT: v_mul_lo_u32 v12, v14, v6
-; GISEL-NEXT: v_mul_hi_u32 v16, v14, v0
-; GISEL-NEXT: v_cndmask_b32_e32 v13, v13, v17, vcc
-; GISEL-NEXT: v_mul_hi_u32 v0, v15, v0
-; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v12
-; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v16
+; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], s6, v13, 0
+; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v12
+; GISEL-NEXT: v_cndmask_b32_e32 v15, -1, v7, vcc
+; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], s6, v14, v[1:2]
+; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v11, v5
+; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], s7, v13, v[6:7]
+; GISEL-NEXT: v_subbrev_u32_e32 v16, vcc, 0, v12, vcc
+; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v15
+; GISEL-NEXT: v_cndmask_b32_e32 v7, v11, v1, vcc
+; GISEL-NEXT: v_mul_lo_u32 v1, v14, v0
+; GISEL-NEXT: v_mul_lo_u32 v11, v13, v6
+; GISEL-NEXT: v_mul_hi_u32 v15, v13, v0
+; GISEL-NEXT: v_cndmask_b32_e32 v12, v12, v16, vcc
+; GISEL-NEXT: v_mul_hi_u32 v0, v14, v0
+; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v11
+; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v15
; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
-; GISEL-NEXT: v_mul_lo_u32 v16, v15, v6
-; GISEL-NEXT: v_add_i32_e32 v1, vcc, v12, v1
-; GISEL-NEXT: v_mul_hi_u32 v12, v14, v6
-; GISEL-NEXT: v_add_i32_e32 v0, vcc, v16, v0
-; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v12
-; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v12, vcc, v16, v12
-; GISEL-NEXT: v_mul_hi_u32 v6, v15, v6
+; GISEL-NEXT: v_mul_lo_u32 v15, v14, v6
+; GISEL-NEXT: v_add_i32_e32 v1, vcc, v11, v1
+; GISEL-NEXT: v_mul_hi_u32 v11, v13, v6
+; GISEL-NEXT: v_add_i32_e32 v0, vcc, v15, v0
+; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v11
+; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v11, vcc, v15, v11
+; GISEL-NEXT: v_mul_hi_u32 v6, v14, v6
; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v1
; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v1, vcc, v12, v1
+; GISEL-NEXT: v_add_i32_e32 v1, vcc, v11, v1
; GISEL-NEXT: v_add_i32_e32 v1, vcc, v6, v1
-; GISEL-NEXT: v_add_i32_e32 v12, vcc, v14, v0
-; GISEL-NEXT: v_addc_u32_e32 v14, vcc, v15, v1, vcc
-; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], s6, v12, 0
-; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v11
+; GISEL-NEXT: v_add_i32_e32 v11, vcc, v13, v0
+; GISEL-NEXT: v_addc_u32_e32 v13, vcc, v14, v1, vcc
+; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], s6, v11, 0
+; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v10
; GISEL-NEXT: v_cndmask_b32_e32 v8, v8, v7, vcc
-; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], s6, v14, v[1:2]
+; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], s6, v13, v[1:2]
; GISEL-NEXT: v_xor_b32_e32 v1, v8, v4
; GISEL-NEXT: v_ashrrev_i32_e32 v8, 31, v3
-; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], s7, v12, v[6:7]
-; GISEL-NEXT: v_cndmask_b32_e32 v9, v9, v13, vcc
+; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], s7, v11, v[6:7]
+; GISEL-NEXT: v_cndmask_b32_e32 v9, v9, v12, vcc
; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v8
; GISEL-NEXT: v_addc_u32_e32 v3, vcc, v3, v8, vcc
-; GISEL-NEXT: v_xor_b32_e32 v11, v2, v8
-; GISEL-NEXT: v_mul_lo_u32 v2, v14, v0
-; GISEL-NEXT: v_mul_lo_u32 v7, v12, v6
-; GISEL-NEXT: v_xor_b32_e32 v13, v3, v8
-; GISEL-NEXT: v_mul_hi_u32 v3, v12, v0
-; GISEL-NEXT: v_mul_hi_u32 v0, v14, v0
+; GISEL-NEXT: v_xor_b32_e32 v10, v2, v8
+; GISEL-NEXT: v_mul_lo_u32 v2, v13, v0
+; GISEL-NEXT: v_mul_lo_u32 v7, v11, v6
+; GISEL-NEXT: v_xor_b32_e32 v12, v3, v8
+; GISEL-NEXT: v_mul_hi_u32 v3, v11, v0
+; GISEL-NEXT: v_mul_hi_u32 v0, v13, v0
; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v7
; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v3
; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
-; GISEL-NEXT: v_mul_lo_u32 v3, v14, v6
+; GISEL-NEXT: v_mul_lo_u32 v3, v13, v6
; GISEL-NEXT: v_add_i32_e32 v2, vcc, v7, v2
-; GISEL-NEXT: v_mul_hi_u32 v7, v12, v6
+; GISEL-NEXT: v_mul_hi_u32 v7, v11, v6
; GISEL-NEXT: v_add_i32_e32 v0, vcc, v3, v0
; GISEL-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v7
; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v7
-; GISEL-NEXT: v_mul_hi_u32 v6, v14, v6
+; GISEL-NEXT: v_mul_hi_u32 v6, v13, v6
; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v2
; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v2, vcc, v3, v2
; GISEL-NEXT: v_add_i32_e32 v2, vcc, v6, v2
-; GISEL-NEXT: v_add_i32_e32 v0, vcc, v12, v0
-; GISEL-NEXT: v_addc_u32_e32 v2, vcc, v14, v2, vcc
-; GISEL-NEXT: v_mul_lo_u32 v3, v13, v0
-; GISEL-NEXT: v_mul_lo_u32 v6, v11, v2
-; GISEL-NEXT: v_mul_hi_u32 v7, v11, v0
-; GISEL-NEXT: v_mul_hi_u32 v0, v13, v0
+; GISEL-NEXT: v_add_i32_e32 v0, vcc, v11, v0
+; GISEL-NEXT: v_addc_u32_e32 v2, vcc, v13, v2, vcc
+; GISEL-NEXT: v_mul_lo_u32 v3, v12, v0
+; GISEL-NEXT: v_mul_lo_u32 v6, v10, v2
+; GISEL-NEXT: v_mul_hi_u32 v7, v10, v0
+; GISEL-NEXT: v_mul_hi_u32 v0, v12, v0
; GISEL-NEXT: v_xor_b32_e32 v9, v9, v4
; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v6
; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v7
; GISEL-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc
-; GISEL-NEXT: v_mul_lo_u32 v7, v13, v2
+; GISEL-NEXT: v_mul_lo_u32 v7, v12, v2
; GISEL-NEXT: v_add_i32_e32 v3, vcc, v6, v3
-; GISEL-NEXT: v_mul_hi_u32 v6, v11, v2
+; GISEL-NEXT: v_mul_hi_u32 v6, v10, v2
; GISEL-NEXT: v_add_i32_e32 v0, vcc, v7, v0
; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v6
; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v6, vcc, v7, v6
-; GISEL-NEXT: v_add_i32_e32 v12, vcc, v0, v3
-; GISEL-NEXT: v_mul_hi_u32 v7, v13, v2
-; GISEL-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v5, v12, 0
+; GISEL-NEXT: v_add_i32_e32 v11, vcc, v0, v3
+; GISEL-NEXT: v_mul_hi_u32 v7, v12, v2
+; GISEL-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v5, v11, 0
; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v0, vcc, v6, v0
; GISEL-NEXT: v_add_i32_e32 v6, vcc, v7, v0
@@ -1324,25 +1320,23 @@ define <2 x i64> @v_srem_v2i64_pow2k_denom(<2 x i64> %num) {
; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v5, v6, v[0:1]
; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v1, v4
; GISEL-NEXT: v_subb_u32_e32 v1, vcc, v9, v4, vcc
-; GISEL-NEXT: v_mad_u64_u32 v[3:4], s[4:5], 0, v12, v[6:7]
-; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v11, v2
-; GISEL-NEXT: v_subb_u32_e64 v4, s[4:5], v13, v3, vcc
-; GISEL-NEXT: v_sub_i32_e64 v3, s[4:5], v13, v3
+; GISEL-NEXT: v_mad_u64_u32 v[3:4], s[4:5], 0, v11, v[6:7]
+; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v10, v2
+; GISEL-NEXT: v_subb_u32_e64 v4, s[4:5], v12, v3, vcc
+; GISEL-NEXT: v_sub_i32_e64 v3, s[4:5], v12, v3
; GISEL-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
-; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v2, v5
; GISEL-NEXT: v_sub_i32_e32 v7, vcc, v2, v5
-; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[4:5]
-; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v4
; GISEL-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
-; GISEL-NEXT: v_cndmask_b32_e64 v6, v10, v6, s[4:5]
-; GISEL-NEXT: s_bfe_i32 s4, 1, 0x10000
; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v7, v5
; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, -1, vcc
-; GISEL-NEXT: v_mov_b32_e32 v10, s4
; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3
-; GISEL-NEXT: v_cndmask_b32_e32 v9, v10, v9, vcc
+; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v2, v5
+; GISEL-NEXT: v_cndmask_b32_e32 v9, -1, v9, vcc
; GISEL-NEXT: v_sub_i32_e32 v5, vcc, v7, v5
+; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[4:5]
+; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v4
; GISEL-NEXT: v_subbrev_u32_e32 v10, vcc, 0, v3, vcc
+; GISEL-NEXT: v_cndmask_b32_e64 v6, -1, v6, s[4:5]
; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v9
; GISEL-NEXT: v_cndmask_b32_e32 v5, v7, v5, vcc
; GISEL-NEXT: v_cndmask_b32_e32 v3, v3, v10, vcc
@@ -1364,7 +1358,6 @@ define <2 x i64> @v_srem_v2i64_pow2k_denom(<2 x i64> %num) {
; CGP-NEXT: s_movk_i32 s7, 0x1000
; CGP-NEXT: v_mac_f32_e32 v4, 0x4f800000, v5
; CGP-NEXT: v_rcp_iflag_f32_e32 v4, v4
-; CGP-NEXT: s_bfe_i32 s8, 1, 0x10000
; CGP-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4
; CGP-NEXT: v_mul_f32_e32 v5, 0x2f800000, v4
; CGP-NEXT: v_trunc_f32_e32 v6, v5
@@ -1454,149 +1447,148 @@ define <2 x i64> @v_srem_v2i64_pow2k_denom(<2 x i64> %num) {
; CGP-NEXT: v_add_i32_e32 v6, vcc, v7, v6
; CGP-NEXT: v_add_i32_e32 v6, vcc, v9, v6
; CGP-NEXT: v_mad_u64_u32 v[6:7], s[4:5], s7, v6, v[1:2]
-; CGP-NEXT: v_sub_i32_e32 v9, vcc, v8, v0
-; CGP-NEXT: v_subb_u32_e64 v10, s[4:5], v11, v6, vcc
+; CGP-NEXT: v_sub_i32_e32 v8, vcc, v8, v0
+; CGP-NEXT: v_subb_u32_e64 v9, s[4:5], v11, v6, vcc
; CGP-NEXT: v_sub_i32_e64 v0, s[4:5], v11, v6
-; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v9, v4
+; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v8, v4
; CGP-NEXT: v_cndmask_b32_e64 v1, 0, -1, s[4:5]
-; CGP-NEXT: v_mov_b32_e32 v6, s8
-; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v10
-; CGP-NEXT: v_cndmask_b32_e64 v11, v6, v1, s[4:5]
+; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v9
+; CGP-NEXT: v_cndmask_b32_e64 v10, -1, v1, s[4:5]
; CGP-NEXT: v_cvt_f32_u32_e32 v1, 0x1000
-; CGP-NEXT: v_cvt_f32_ubyte0_e32 v7, 0
+; CGP-NEXT: v_cvt_f32_ubyte0_e32 v6, 0
; CGP-NEXT: v_subbrev_u32_e32 v0, vcc, 0, v0, vcc
-; CGP-NEXT: v_mac_f32_e32 v1, 0x4f800000, v7
+; CGP-NEXT: v_mac_f32_e32 v1, 0x4f800000, v6
; CGP-NEXT: v_rcp_iflag_f32_e32 v1, v1
-; CGP-NEXT: v_sub_i32_e32 v12, vcc, v9, v4
-; CGP-NEXT: v_subbrev_u32_e32 v13, vcc, 0, v0, vcc
+; CGP-NEXT: v_sub_i32_e32 v11, vcc, v8, v4
+; CGP-NEXT: v_subbrev_u32_e32 v12, vcc, 0, v0, vcc
; CGP-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v1
; CGP-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0
-; CGP-NEXT: v_trunc_f32_e32 v7, v1
-; CGP-NEXT: v_mac_f32_e32 v0, 0xcf800000, v7
-; CGP-NEXT: v_cvt_u32_f32_e32 v14, v0
-; CGP-NEXT: v_cvt_u32_f32_e32 v15, v7
-; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v12, v4
-; CGP-NEXT: v_cndmask_b32_e64 v8, 0, -1, vcc
-; CGP-NEXT: v_mad_u64_u32 v[0:1], s[4:5], s6, v14, 0
-; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v13
-; CGP-NEXT: v_cndmask_b32_e32 v16, v6, v8, vcc
-; CGP-NEXT: v_mad_u64_u32 v[7:8], s[4:5], s6, v15, v[1:2]
-; CGP-NEXT: v_sub_i32_e32 v1, vcc, v12, v4
-; CGP-NEXT: v_mad_u64_u32 v[7:8], s[4:5], -1, v14, v[7:8]
-; CGP-NEXT: v_subbrev_u32_e32 v17, vcc, 0, v13, vcc
-; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v16
-; CGP-NEXT: v_cndmask_b32_e32 v8, v12, v1, vcc
-; CGP-NEXT: v_mul_lo_u32 v1, v15, v0
-; CGP-NEXT: v_mul_lo_u32 v12, v14, v7
-; CGP-NEXT: v_mul_hi_u32 v16, v14, v0
-; CGP-NEXT: v_cndmask_b32_e32 v13, v13, v17, vcc
-; CGP-NEXT: v_mul_hi_u32 v0, v15, v0
-; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v12
-; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v16
+; CGP-NEXT: v_trunc_f32_e32 v6, v1
+; CGP-NEXT: v_mac_f32_e32 v0, 0xcf800000, v6
+; CGP-NEXT: v_cvt_u32_f32_e32 v13, v0
+; CGP-NEXT: v_cvt_u32_f32_e32 v14, v6
+; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v11, v4
+; CGP-NEXT: v_cndmask_b32_e64 v7, 0, -1, vcc
+; CGP-NEXT: v_mad_u64_u32 v[0:1], s[4:5], s6, v13, 0
+; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v12
+; CGP-NEXT: v_cndmask_b32_e32 v15, -1, v7, vcc
+; CGP-NEXT: v_mad_u64_u32 v[6:7], s[4:5], s6, v14, v[1:2]
+; CGP-NEXT: v_sub_i32_e32 v1, vcc, v11, v4
+; CGP-NEXT: v_mad_u64_u32 v[6:7], s[4:5], -1, v13, v[6:7]
+; CGP-NEXT: v_subbrev_u32_e32 v16, vcc, 0, v12, vcc
+; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v15
+; CGP-NEXT: v_cndmask_b32_e32 v7, v11, v1, vcc
+; CGP-NEXT: v_mul_lo_u32 v1, v14, v0
+; CGP-NEXT: v_mul_lo_u32 v11, v13, v6
+; CGP-NEXT: v_mul_hi_u32 v15, v13, v0
+; CGP-NEXT: v_cndmask_b32_e32 v12, v12, v16, vcc
+; CGP-NEXT: v_mul_hi_u32 v0, v14, v0
+; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v11
+; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
+; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v15
; CGP-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
-; CGP-NEXT: v_mul_lo_u32 v16, v15, v7
-; CGP-NEXT: v_add_i32_e32 v1, vcc, v12, v1
-; CGP-NEXT: v_mul_hi_u32 v12, v14, v7
-; CGP-NEXT: v_add_i32_e32 v0, vcc, v16, v0
-; CGP-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v12
-; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v12, vcc, v16, v12
-; CGP-NEXT: v_mul_hi_u32 v7, v15, v7
+; CGP-NEXT: v_mul_lo_u32 v15, v14, v6
+; CGP-NEXT: v_add_i32_e32 v1, vcc, v11, v1
+; CGP-NEXT: v_mul_hi_u32 v11, v13, v6
+; CGP-NEXT: v_add_i32_e32 v0, vcc, v15, v0
+; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc
+; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v11
+; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
+; CGP-NEXT: v_add_i32_e32 v11, vcc, v15, v11
+; CGP-NEXT: v_mul_hi_u32 v6, v14, v6
; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v1
; CGP-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v1, vcc, v12, v1
-; CGP-NEXT: v_add_i32_e32 v1, vcc, v7, v1
-; CGP-NEXT: v_add_i32_e32 v12, vcc, v14, v0
-; CGP-NEXT: v_addc_u32_e32 v14, vcc, v15, v1, vcc
-; CGP-NEXT: v_mad_u64_u32 v[0:1], s[4:5], s6, v12, 0
-; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v11
-; CGP-NEXT: v_cndmask_b32_e32 v7, v9, v8, vcc
-; CGP-NEXT: v_cndmask_b32_e32 v9, v10, v13, vcc
-; CGP-NEXT: v_xor_b32_e32 v10, v7, v5
-; CGP-NEXT: v_mad_u64_u32 v[7:8], s[4:5], s6, v14, v[1:2]
-; CGP-NEXT: v_xor_b32_e32 v1, v9, v5
-; CGP-NEXT: v_ashrrev_i32_e32 v9, 31, v3
-; CGP-NEXT: v_mad_u64_u32 v[7:8], s[4:5], -1, v12, v[7:8]
-; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v9
-; CGP-NEXT: v_addc_u32_e32 v3, vcc, v3, v9, vcc
-; CGP-NEXT: v_xor_b32_e32 v11, v2, v9
-; CGP-NEXT: v_mul_lo_u32 v2, v14, v0
-; CGP-NEXT: v_mul_lo_u32 v8, v12, v7
-; CGP-NEXT: v_xor_b32_e32 v13, v3, v9
-; CGP-NEXT: v_mul_hi_u32 v3, v12, v0
-; CGP-NEXT: v_mul_hi_u32 v0, v14, v0
+; CGP-NEXT: v_add_i32_e32 v1, vcc, v11, v1
+; CGP-NEXT: v_add_i32_e32 v1, vcc, v6, v1
+; CGP-NEXT: v_add_i32_e32 v11, vcc, v13, v0
+; CGP-NEXT: v_addc_u32_e32 v13, vcc, v14, v1, vcc
+; CGP-NEXT: v_mad_u64_u32 v[0:1], s[4:5], s6, v11, 0
+; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v10
+; CGP-NEXT: v_cndmask_b32_e32 v6, v8, v7, vcc
+; CGP-NEXT: v_cndmask_b32_e32 v8, v9, v12, vcc
+; CGP-NEXT: v_xor_b32_e32 v9, v6, v5
+; CGP-NEXT: v_mad_u64_u32 v[6:7], s[4:5], s6, v13, v[1:2]
+; CGP-NEXT: v_xor_b32_e32 v1, v8, v5
+; CGP-NEXT: v_ashrrev_i32_e32 v8, 31, v3
+; CGP-NEXT: v_mad_u64_u32 v[6:7], s[4:5], -1, v11, v[6:7]
; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v8
-; CGP-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
+; CGP-NEXT: v_addc_u32_e32 v3, vcc, v3, v8, vcc
+; CGP-NEXT: v_xor_b32_e32 v7, v2, v8
+; CGP-NEXT: v_mul_lo_u32 v2, v13, v0
+; CGP-NEXT: v_mul_lo_u32 v10, v11, v6
+; CGP-NEXT: v_xor_b32_e32 v12, v3, v8
+; CGP-NEXT: v_mul_hi_u32 v3, v11, v0
+; CGP-NEXT: v_mul_hi_u32 v0, v13, v0
+; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v10
+; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v3
; CGP-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
-; CGP-NEXT: v_mul_lo_u32 v3, v14, v7
-; CGP-NEXT: v_add_i32_e32 v2, vcc, v8, v2
-; CGP-NEXT: v_mul_hi_u32 v8, v12, v7
+; CGP-NEXT: v_mul_lo_u32 v3, v13, v6
+; CGP-NEXT: v_add_i32_e32 v2, vcc, v10, v2
+; CGP-NEXT: v_mul_hi_u32 v10, v11, v6
; CGP-NEXT: v_add_i32_e32 v0, vcc, v3, v0
; CGP-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v8
-; CGP-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v8
-; CGP-NEXT: v_mul_hi_u32 v7, v14, v7
+; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v10
+; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
+; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v10
+; CGP-NEXT: v_mul_hi_u32 v6, v13, v6
; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v2
; CGP-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
; CGP-NEXT: v_add_i32_e32 v2, vcc, v3, v2
-; CGP-NEXT: v_add_i32_e32 v2, vcc, v7, v2
-; CGP-NEXT: v_add_i32_e32 v3, vcc, v12, v0
-; CGP-NEXT: v_addc_u32_e32 v2, vcc, v14, v2, vcc
-; CGP-NEXT: v_mul_lo_u32 v7, v13, v3
-; CGP-NEXT: v_mul_lo_u32 v8, v11, v2
-; CGP-NEXT: v_sub_i32_e32 v0, vcc, v10, v5
+; CGP-NEXT: v_add_i32_e32 v2, vcc, v6, v2
+; CGP-NEXT: v_add_i32_e32 v3, vcc, v11, v0
+; CGP-NEXT: v_addc_u32_e32 v2, vcc, v13, v2, vcc
+; CGP-NEXT: v_mul_lo_u32 v6, v12, v3
+; CGP-NEXT: v_mul_lo_u32 v10, v7, v2
+; CGP-NEXT: v_sub_i32_e32 v0, vcc, v9, v5
; CGP-NEXT: v_subb_u32_e32 v1, vcc, v1, v5, vcc
-; CGP-NEXT: v_mul_hi_u32 v5, v11, v3
-; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v8
-; CGP-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v5, vcc, v7, v5
+; CGP-NEXT: v_mul_hi_u32 v5, v7, v3
+; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v10
+; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
+; CGP-NEXT: v_add_i32_e32 v5, vcc, v6, v5
; CGP-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
-; CGP-NEXT: v_mul_lo_u32 v7, v13, v2
-; CGP-NEXT: v_mul_hi_u32 v3, v13, v3
-; CGP-NEXT: v_add_i32_e32 v5, vcc, v8, v5
-; CGP-NEXT: v_mul_hi_u32 v8, v11, v2
-; CGP-NEXT: v_add_i32_e32 v3, vcc, v7, v3
-; CGP-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v8
-; CGP-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v8
+; CGP-NEXT: v_mul_lo_u32 v6, v12, v2
+; CGP-NEXT: v_mul_hi_u32 v3, v12, v3
+; CGP-NEXT: v_add_i32_e32 v5, vcc, v9, v5
+; CGP-NEXT: v_mul_hi_u32 v9, v7, v2
+; CGP-NEXT: v_add_i32_e32 v3, vcc, v6, v3
+; CGP-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
+; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v9
+; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
+; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v9
; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v5
-; CGP-NEXT: v_mul_hi_u32 v8, v13, v2
+; CGP-NEXT: v_mul_hi_u32 v9, v12, v2
; CGP-NEXT: v_mad_u64_u32 v[2:3], s[4:5], s7, v3, 0
; CGP-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v5, vcc, v7, v5
-; CGP-NEXT: v_add_i32_e32 v5, vcc, v8, v5
-; CGP-NEXT: v_mad_u64_u32 v[7:8], s[4:5], s7, v5, v[3:4]
-; CGP-NEXT: v_sub_i32_e32 v2, vcc, v11, v2
-; CGP-NEXT: v_sub_i32_e64 v5, s[4:5], v13, v7
-; CGP-NEXT: v_subb_u32_e64 v3, s[4:5], v13, v7, vcc
+; CGP-NEXT: v_add_i32_e32 v5, vcc, v6, v5
+; CGP-NEXT: v_add_i32_e32 v5, vcc, v9, v5
+; CGP-NEXT: v_mad_u64_u32 v[5:6], s[4:5], s7, v5, v[3:4]
+; CGP-NEXT: v_sub_i32_e32 v2, vcc, v7, v2
+; CGP-NEXT: v_subb_u32_e64 v3, s[4:5], v12, v5, vcc
+; CGP-NEXT: v_sub_i32_e64 v5, s[4:5], v12, v5
; CGP-NEXT: v_subbrev_u32_e32 v5, vcc, 0, v5, vcc
-; CGP-NEXT: v_sub_i32_e32 v8, vcc, v2, v4
+; CGP-NEXT: v_sub_i32_e32 v7, vcc, v2, v4
; CGP-NEXT: v_subbrev_u32_e32 v5, vcc, 0, v5, vcc
+; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v7, v4
+; CGP-NEXT: v_cndmask_b32_e64 v9, 0, -1, vcc
+; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5
; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v2, v4
-; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v8, v4
-; CGP-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[4:5]
+; CGP-NEXT: v_cndmask_b32_e32 v9, -1, v9, vcc
+; CGP-NEXT: v_sub_i32_e32 v4, vcc, v7, v4
+; CGP-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[4:5]
; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v3
-; CGP-NEXT: v_cndmask_b32_e64 v10, 0, -1, vcc
-; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5
-; CGP-NEXT: v_cndmask_b32_e64 v7, v6, v7, s[4:5]
-; CGP-NEXT: v_cndmask_b32_e32 v6, v6, v10, vcc
-; CGP-NEXT: v_sub_i32_e32 v4, vcc, v8, v4
; CGP-NEXT: v_subbrev_u32_e32 v10, vcc, 0, v5, vcc
-; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v6
-; CGP-NEXT: v_cndmask_b32_e32 v4, v8, v4, vcc
+; CGP-NEXT: v_cndmask_b32_e64 v6, -1, v6, s[4:5]
+; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v9
+; CGP-NEXT: v_cndmask_b32_e32 v4, v7, v4, vcc
; CGP-NEXT: v_cndmask_b32_e32 v5, v5, v10, vcc
-; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v7
+; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v6
; CGP-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
; CGP-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc
-; CGP-NEXT: v_xor_b32_e32 v2, v2, v9
-; CGP-NEXT: v_xor_b32_e32 v3, v3, v9
-; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v9
-; CGP-NEXT: v_subb_u32_e32 v3, vcc, v3, v9, vcc
+; CGP-NEXT: v_xor_b32_e32 v2, v2, v8
+; CGP-NEXT: v_xor_b32_e32 v3, v3, v8
+; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v8
+; CGP-NEXT: v_subb_u32_e32 v3, vcc, v3, v8, vcc
; CGP-NEXT: s_setpc_b64 s[30:31]
%result = srem <2 x i64> %num, <i64 4096, i64 4096>
ret <2 x i64> %result
@@ -1703,23 +1695,21 @@ define i64 @v_srem_i64_oddk_denom(i64 %num) {
; CHECK-NEXT: v_mov_b32_e32 v6, 0x12d8fb
; CHECK-NEXT: v_subb_u32_e64 v2, vcc, v4, v1, s[4:5]
; CHECK-NEXT: v_sub_i32_e32 v1, vcc, v4, v1
-; CHECK-NEXT: s_bfe_i32 s6, 1, 0x10000
; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v6
; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, -1, vcc
-; CHECK-NEXT: v_mov_b32_e32 v4, s6
; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2
-; CHECK-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc
-; CHECK-NEXT: v_sub_i32_e32 v5, vcc, v0, v6
+; CHECK-NEXT: v_cndmask_b32_e32 v3, -1, v3, vcc
+; CHECK-NEXT: v_sub_i32_e32 v4, vcc, v0, v6
; CHECK-NEXT: v_subbrev_u32_e64 v1, s[4:5], 0, v1, s[4:5]
; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
-; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v5, v6
-; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, -1, vcc
+; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v4, v6
+; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, -1, vcc
; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
-; CHECK-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc
-; CHECK-NEXT: v_subrev_i32_e32 v6, vcc, 0x12d8fb, v5
+; CHECK-NEXT: v_cndmask_b32_e32 v5, -1, v5, vcc
+; CHECK-NEXT: v_subrev_i32_e32 v6, vcc, 0x12d8fb, v4
; CHECK-NEXT: v_subbrev_u32_e32 v8, vcc, 0, v1, vcc
-; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4
-; CHECK-NEXT: v_cndmask_b32_e32 v4, v5, v6, vcc
+; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5
+; CHECK-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc
; CHECK-NEXT: v_cndmask_b32_e32 v1, v1, v8, vcc
; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3
; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
@@ -1831,121 +1821,119 @@ define <2 x i64> @v_srem_v2i64_oddk_denom(<2 x i64> %num) {
; GISEL-NEXT: v_add_i32_e32 v6, vcc, v7, v6
; GISEL-NEXT: v_add_i32_e32 v6, vcc, v9, v6
; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v5, v6, v[1:2]
-; GISEL-NEXT: s_bfe_i32 s6, 1, 0x10000
+; GISEL-NEXT: s_sub_u32 s6, 0, 0x12d8fb
+; GISEL-NEXT: s_subb_u32 s7, 0, 0
; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], 0, v8, v[6:7]
; GISEL-NEXT: v_sub_i32_e32 v8, vcc, v10, v0
; GISEL-NEXT: v_subb_u32_e64 v9, s[4:5], v11, v6, vcc
; GISEL-NEXT: v_sub_i32_e64 v0, s[4:5], v11, v6
; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v8, v5
; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, -1, s[4:5]
-; GISEL-NEXT: v_mov_b32_e32 v10, s6
; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v9
-; GISEL-NEXT: v_cndmask_b32_e64 v11, v10, v1, s[4:5]
+; GISEL-NEXT: v_cndmask_b32_e64 v10, -1, v1, s[4:5]
; GISEL-NEXT: v_cvt_f32_u32_e32 v1, 0x12d8fb
; GISEL-NEXT: v_cvt_f32_ubyte0_e32 v6, 0
; GISEL-NEXT: v_subbrev_u32_e32 v0, vcc, 0, v0, vcc
; GISEL-NEXT: v_mac_f32_e32 v1, 0x4f800000, v6
; GISEL-NEXT: v_rcp_iflag_f32_e32 v1, v1
-; GISEL-NEXT: v_sub_i32_e32 v12, vcc, v8, v5
-; GISEL-NEXT: v_subbrev_u32_e32 v13, vcc, 0, v0, vcc
+; GISEL-NEXT: v_sub_i32_e32 v11, vcc, v8, v5
+; GISEL-NEXT: v_subbrev_u32_e32 v12, vcc, 0, v0, vcc
; GISEL-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v1
; GISEL-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0
; GISEL-NEXT: v_trunc_f32_e32 v6, v1
; GISEL-NEXT: v_mac_f32_e32 v0, 0xcf800000, v6
-; GISEL-NEXT: v_cvt_u32_f32_e32 v14, v0
-; GISEL-NEXT: s_sub_u32 s6, 0, 0x12d8fb
-; GISEL-NEXT: v_cvt_u32_f32_e32 v15, v6
-; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v12, v5
-; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], s6, v14, 0
+; GISEL-NEXT: v_cvt_u32_f32_e32 v13, v0
+; GISEL-NEXT: v_cvt_u32_f32_e32 v14, v6
+; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v11, v5
; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, -1, vcc
-; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v13
-; GISEL-NEXT: v_cndmask_b32_e32 v16, v10, v7, vcc
-; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], s6, v15, v[1:2]
-; GISEL-NEXT: s_subb_u32 s7, 0, 0
-; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v12, v5
-; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], s7, v14, v[6:7]
-; GISEL-NEXT: v_subbrev_u32_e32 v17, vcc, 0, v13, vcc
-; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v16
-; GISEL-NEXT: v_cndmask_b32_e32 v7, v12, v1, vcc
-; GISEL-NEXT: v_mul_lo_u32 v1, v15, v0
-; GISEL-NEXT: v_mul_lo_u32 v12, v14, v6
-; GISEL-NEXT: v_mul_hi_u32 v16, v14, v0
-; GISEL-NEXT: v_cndmask_b32_e32 v13, v13, v17, vcc
-; GISEL-NEXT: v_mul_hi_u32 v0, v15, v0
-; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v12
-; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v16
+; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], s6, v13, 0
+; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v12
+; GISEL-NEXT: v_cndmask_b32_e32 v15, -1, v7, vcc
+; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], s6, v14, v[1:2]
+; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v11, v5
+; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], s7, v13, v[6:7]
+; GISEL-NEXT: v_subbrev_u32_e32 v16, vcc, 0, v12, vcc
+; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v15
+; GISEL-NEXT: v_cndmask_b32_e32 v7, v11, v1, vcc
+; GISEL-NEXT: v_mul_lo_u32 v1, v14, v0
+; GISEL-NEXT: v_mul_lo_u32 v11, v13, v6
+; GISEL-NEXT: v_mul_hi_u32 v15, v13, v0
+; GISEL-NEXT: v_cndmask_b32_e32 v12, v12, v16, vcc
+; GISEL-NEXT: v_mul_hi_u32 v0, v14, v0
+; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v11
+; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v15
; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
-; GISEL-NEXT: v_mul_lo_u32 v16, v15, v6
-; GISEL-NEXT: v_add_i32_e32 v1, vcc, v12, v1
-; GISEL-NEXT: v_mul_hi_u32 v12, v14, v6
-; GISEL-NEXT: v_add_i32_e32 v0, vcc, v16, v0
-; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v12
-; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v12, vcc, v16, v12
-; GISEL-NEXT: v_mul_hi_u32 v6, v15, v6
+; GISEL-NEXT: v_mul_lo_u32 v15, v14, v6
+; GISEL-NEXT: v_add_i32_e32 v1, vcc, v11, v1
+; GISEL-NEXT: v_mul_hi_u32 v11, v13, v6
+; GISEL-NEXT: v_add_i32_e32 v0, vcc, v15, v0
+; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v11
+; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v11, vcc, v15, v11
+; GISEL-NEXT: v_mul_hi_u32 v6, v14, v6
; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v1
; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v1, vcc, v12, v1
+; GISEL-NEXT: v_add_i32_e32 v1, vcc, v11, v1
; GISEL-NEXT: v_add_i32_e32 v1, vcc, v6, v1
-; GISEL-NEXT: v_add_i32_e32 v12, vcc, v14, v0
-; GISEL-NEXT: v_addc_u32_e32 v14, vcc, v15, v1, vcc
-; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], s6, v12, 0
-; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v11
+; GISEL-NEXT: v_add_i32_e32 v11, vcc, v13, v0
+; GISEL-NEXT: v_addc_u32_e32 v13, vcc, v14, v1, vcc
+; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], s6, v11, 0
+; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v10
; GISEL-NEXT: v_cndmask_b32_e32 v8, v8, v7, vcc
-; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], s6, v14, v[1:2]
+; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], s6, v13, v[1:2]
; GISEL-NEXT: v_xor_b32_e32 v1, v8, v4
; GISEL-NEXT: v_ashrrev_i32_e32 v8, 31, v3
-; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], s7, v12, v[6:7]
-; GISEL-NEXT: v_cndmask_b32_e32 v9, v9, v13, vcc
+; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], s7, v11, v[6:7]
+; GISEL-NEXT: v_cndmask_b32_e32 v9, v9, v12, vcc
; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v8
; GISEL-NEXT: v_addc_u32_e32 v3, vcc, v3, v8, vcc
-; GISEL-NEXT: v_xor_b32_e32 v11, v2, v8
-; GISEL-NEXT: v_mul_lo_u32 v2, v14, v0
-; GISEL-NEXT: v_mul_lo_u32 v7, v12, v6
-; GISEL-NEXT: v_xor_b32_e32 v13, v3, v8
-; GISEL-NEXT: v_mul_hi_u32 v3, v12, v0
-; GISEL-NEXT: v_mul_hi_u32 v0, v14, v0
+; GISEL-NEXT: v_xor_b32_e32 v10, v2, v8
+; GISEL-NEXT: v_mul_lo_u32 v2, v13, v0
+; GISEL-NEXT: v_mul_lo_u32 v7, v11, v6
+; GISEL-NEXT: v_xor_b32_e32 v12, v3, v8
+; GISEL-NEXT: v_mul_hi_u32 v3, v11, v0
+; GISEL-NEXT: v_mul_hi_u32 v0, v13, v0
; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v7
; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v3
; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
-; GISEL-NEXT: v_mul_lo_u32 v3, v14, v6
+; GISEL-NEXT: v_mul_lo_u32 v3, v13, v6
; GISEL-NEXT: v_add_i32_e32 v2, vcc, v7, v2
-; GISEL-NEXT: v_mul_hi_u32 v7, v12, v6
+; GISEL-NEXT: v_mul_hi_u32 v7, v11, v6
; GISEL-NEXT: v_add_i32_e32 v0, vcc, v3, v0
; GISEL-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v7
; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v7
-; GISEL-NEXT: v_mul_hi_u32 v6, v14, v6
+; GISEL-NEXT: v_mul_hi_u32 v6, v13, v6
; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v2
; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v2, vcc, v3, v2
; GISEL-NEXT: v_add_i32_e32 v2, vcc, v6, v2
-; GISEL-NEXT: v_add_i32_e32 v0, vcc, v12, v0
-; GISEL-NEXT: v_addc_u32_e32 v2, vcc, v14, v2, vcc
-; GISEL-NEXT: v_mul_lo_u32 v3, v13, v0
-; GISEL-NEXT: v_mul_lo_u32 v6, v11, v2
-; GISEL-NEXT: v_mul_hi_u32 v7, v11, v0
-; GISEL-NEXT: v_mul_hi_u32 v0, v13, v0
+; GISEL-NEXT: v_add_i32_e32 v0, vcc, v11, v0
+; GISEL-NEXT: v_addc_u32_e32 v2, vcc, v13, v2, vcc
+; GISEL-NEXT: v_mul_lo_u32 v3, v12, v0
+; GISEL-NEXT: v_mul_lo_u32 v6, v10, v2
+; GISEL-NEXT: v_mul_hi_u32 v7, v10, v0
+; GISEL-NEXT: v_mul_hi_u32 v0, v12, v0
; GISEL-NEXT: v_xor_b32_e32 v9, v9, v4
; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v6
; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v7
; GISEL-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc
-; GISEL-NEXT: v_mul_lo_u32 v7, v13, v2
+; GISEL-NEXT: v_mul_lo_u32 v7, v12, v2
; GISEL-NEXT: v_add_i32_e32 v3, vcc, v6, v3
-; GISEL-NEXT: v_mul_hi_u32 v6, v11, v2
+; GISEL-NEXT: v_mul_hi_u32 v6, v10, v2
; GISEL-NEXT: v_add_i32_e32 v0, vcc, v7, v0
; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v6
; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v6, vcc, v7, v6
-; GISEL-NEXT: v_add_i32_e32 v12, vcc, v0, v3
-; GISEL-NEXT: v_mul_hi_u32 v7, v13, v2
-; GISEL-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v5, v12, 0
+; GISEL-NEXT: v_add_i32_e32 v11, vcc, v0, v3
+; GISEL-NEXT: v_mul_hi_u32 v7, v12, v2
+; GISEL-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v5, v11, 0
; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v0, vcc, v6, v0
; GISEL-NEXT: v_add_i32_e32 v6, vcc, v7, v0
@@ -1953,25 +1941,23 @@ define <2 x i64> @v_srem_v2i64_oddk_denom(<2 x i64> %num) {
; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v5, v6, v[0:1]
; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v1, v4
; GISEL-NEXT: v_subb_u32_e32 v1, vcc, v9, v4, vcc
-; GISEL-NEXT: v_mad_u64_u32 v[3:4], s[4:5], 0, v12, v[6:7]
-; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v11, v2
-; GISEL-NEXT: v_subb_u32_e64 v4, s[4:5], v13, v3, vcc
-; GISEL-NEXT: v_sub_i32_e64 v3, s[4:5], v13, v3
+; GISEL-NEXT: v_mad_u64_u32 v[3:4], s[4:5], 0, v11, v[6:7]
+; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v10, v2
+; GISEL-NEXT: v_subb_u32_e64 v4, s[4:5], v12, v3, vcc
+; GISEL-NEXT: v_sub_i32_e64 v3, s[4:5], v12, v3
; GISEL-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
-; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v2, v5
; GISEL-NEXT: v_sub_i32_e32 v7, vcc, v2, v5
-; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[4:5]
-; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v4
; GISEL-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
-; GISEL-NEXT: v_cndmask_b32_e64 v6, v10, v6, s[4:5]
-; GISEL-NEXT: s_bfe_i32 s4, 1, 0x10000
; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v7, v5
; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, -1, vcc
-; GISEL-NEXT: v_mov_b32_e32 v10, s4
; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3
-; GISEL-NEXT: v_cndmask_b32_e32 v9, v10, v9, vcc
+; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v2, v5
+; GISEL-NEXT: v_cndmask_b32_e32 v9, -1, v9, vcc
; GISEL-NEXT: v_sub_i32_e32 v5, vcc, v7, v5
+; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[4:5]
+; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v4
; GISEL-NEXT: v_subbrev_u32_e32 v10, vcc, 0, v3, vcc
+; GISEL-NEXT: v_cndmask_b32_e64 v6, -1, v6, s[4:5]
; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v9
; GISEL-NEXT: v_cndmask_b32_e32 v5, v7, v5, vcc
; GISEL-NEXT: v_cndmask_b32_e32 v3, v3, v10, vcc
@@ -1993,7 +1979,6 @@ define <2 x i64> @v_srem_v2i64_oddk_denom(<2 x i64> %num) {
; CGP-NEXT: s_mov_b32 s7, 0x12d8fb
; CGP-NEXT: v_mac_f32_e32 v4, 0x4f800000, v5
; CGP-NEXT: v_rcp_iflag_f32_e32 v4, v4
-; CGP-NEXT: s_bfe_i32 s8, 1, 0x10000
; CGP-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4
; CGP-NEXT: v_mul_f32_e32 v5, 0x2f800000, v4
; CGP-NEXT: v_trunc_f32_e32 v6, v5
@@ -2083,149 +2068,148 @@ define <2 x i64> @v_srem_v2i64_oddk_denom(<2 x i64> %num) {
; CGP-NEXT: v_add_i32_e32 v6, vcc, v7, v6
; CGP-NEXT: v_add_i32_e32 v6, vcc, v9, v6
; CGP-NEXT: v_mad_u64_u32 v[6:7], s[4:5], s7, v6, v[1:2]
-; CGP-NEXT: v_sub_i32_e32 v9, vcc, v8, v0
-; CGP-NEXT: v_subb_u32_e64 v10, s[4:5], v11, v6, vcc
+; CGP-NEXT: v_sub_i32_e32 v8, vcc, v8, v0
+; CGP-NEXT: v_subb_u32_e64 v9, s[4:5], v11, v6, vcc
; CGP-NEXT: v_sub_i32_e64 v0, s[4:5], v11, v6
-; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v9, v4
+; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v8, v4
; CGP-NEXT: v_cndmask_b32_e64 v1, 0, -1, s[4:5]
-; CGP-NEXT: v_mov_b32_e32 v6, s8
-; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v10
-; CGP-NEXT: v_cndmask_b32_e64 v11, v6, v1, s[4:5]
+; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v9
+; CGP-NEXT: v_cndmask_b32_e64 v10, -1, v1, s[4:5]
; CGP-NEXT: v_cvt_f32_u32_e32 v1, 0x12d8fb
-; CGP-NEXT: v_cvt_f32_ubyte0_e32 v7, 0
+; CGP-NEXT: v_cvt_f32_ubyte0_e32 v6, 0
; CGP-NEXT: v_subbrev_u32_e32 v0, vcc, 0, v0, vcc
-; CGP-NEXT: v_mac_f32_e32 v1, 0x4f800000, v7
+; CGP-NEXT: v_mac_f32_e32 v1, 0x4f800000, v6
; CGP-NEXT: v_rcp_iflag_f32_e32 v1, v1
-; CGP-NEXT: v_sub_i32_e32 v12, vcc, v9, v4
-; CGP-NEXT: v_subbrev_u32_e32 v13, vcc, 0, v0, vcc
+; CGP-NEXT: v_sub_i32_e32 v11, vcc, v8, v4
+; CGP-NEXT: v_subbrev_u32_e32 v12, vcc, 0, v0, vcc
; CGP-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v1
; CGP-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0
-; CGP-NEXT: v_trunc_f32_e32 v7, v1
-; CGP-NEXT: v_mac_f32_e32 v0, 0xcf800000, v7
-; CGP-NEXT: v_cvt_u32_f32_e32 v14, v0
-; CGP-NEXT: v_cvt_u32_f32_e32 v15, v7
-; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v12, v4
-; CGP-NEXT: v_cndmask_b32_e64 v8, 0, -1, vcc
-; CGP-NEXT: v_mad_u64_u32 v[0:1], s[4:5], s6, v14, 0
-; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v13
-; CGP-NEXT: v_cndmask_b32_e32 v16, v6, v8, vcc
-; CGP-NEXT: v_mad_u64_u32 v[7:8], s[4:5], s6, v15, v[1:2]
-; CGP-NEXT: v_sub_i32_e32 v1, vcc, v12, v4
-; CGP-NEXT: v_mad_u64_u32 v[7:8], s[4:5], -1, v14, v[7:8]
-; CGP-NEXT: v_subbrev_u32_e32 v17, vcc, 0, v13, vcc
-; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v16
-; CGP-NEXT: v_cndmask_b32_e32 v8, v12, v1, vcc
-; CGP-NEXT: v_mul_lo_u32 v1, v15, v0
-; CGP-NEXT: v_mul_lo_u32 v12, v14, v7
-; CGP-NEXT: v_mul_hi_u32 v16, v14, v0
-; CGP-NEXT: v_cndmask_b32_e32 v13, v13, v17, vcc
-; CGP-NEXT: v_mul_hi_u32 v0, v15, v0
-; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v12
-; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v16
+; CGP-NEXT: v_trunc_f32_e32 v6, v1
+; CGP-NEXT: v_mac_f32_e32 v0, 0xcf800000, v6
+; CGP-NEXT: v_cvt_u32_f32_e32 v13, v0
+; CGP-NEXT: v_cvt_u32_f32_e32 v14, v6
+; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v11, v4
+; CGP-NEXT: v_cndmask_b32_e64 v7, 0, -1, vcc
+; CGP-NEXT: v_mad_u64_u32 v[0:1], s[4:5], s6, v13, 0
+; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v12
+; CGP-NEXT: v_cndmask_b32_e32 v15, -1, v7, vcc
+; CGP-NEXT: v_mad_u64_u32 v[6:7], s[4:5], s6, v14, v[1:2]
+; CGP-NEXT: v_sub_i32_e32 v1, vcc, v11, v4
+; CGP-NEXT: v_mad_u64_u32 v[6:7], s[4:5], -1, v13, v[6:7]
+; CGP-NEXT: v_subbrev_u32_e32 v16, vcc, 0, v12, vcc
+; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v15
+; CGP-NEXT: v_cndmask_b32_e32 v7, v11, v1, vcc
+; CGP-NEXT: v_mul_lo_u32 v1, v14, v0
+; CGP-NEXT: v_mul_lo_u32 v11, v13, v6
+; CGP-NEXT: v_mul_hi_u32 v15, v13, v0
+; CGP-NEXT: v_cndmask_b32_e32 v12, v12, v16, vcc
+; CGP-NEXT: v_mul_hi_u32 v0, v14, v0
+; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v11
+; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
+; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v15
; CGP-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
-; CGP-NEXT: v_mul_lo_u32 v16, v15, v7
-; CGP-NEXT: v_add_i32_e32 v1, vcc, v12, v1
-; CGP-NEXT: v_mul_hi_u32 v12, v14, v7
-; CGP-NEXT: v_add_i32_e32 v0, vcc, v16, v0
-; CGP-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v12
-; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v12, vcc, v16, v12
-; CGP-NEXT: v_mul_hi_u32 v7, v15, v7
+; CGP-NEXT: v_mul_lo_u32 v15, v14, v6
+; CGP-NEXT: v_add_i32_e32 v1, vcc, v11, v1
+; CGP-NEXT: v_mul_hi_u32 v11, v13, v6
+; CGP-NEXT: v_add_i32_e32 v0, vcc, v15, v0
+; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc
+; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v11
+; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
+; CGP-NEXT: v_add_i32_e32 v11, vcc, v15, v11
+; CGP-NEXT: v_mul_hi_u32 v6, v14, v6
; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v1
; CGP-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v1, vcc, v12, v1
-; CGP-NEXT: v_add_i32_e32 v1, vcc, v7, v1
-; CGP-NEXT: v_add_i32_e32 v12, vcc, v14, v0
-; CGP-NEXT: v_addc_u32_e32 v14, vcc, v15, v1, vcc
-; CGP-NEXT: v_mad_u64_u32 v[0:1], s[4:5], s6, v12, 0
-; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v11
-; CGP-NEXT: v_cndmask_b32_e32 v7, v9, v8, vcc
-; CGP-NEXT: v_cndmask_b32_e32 v9, v10, v13, vcc
-; CGP-NEXT: v_xor_b32_e32 v10, v7, v5
-; CGP-NEXT: v_mad_u64_u32 v[7:8], s[4:5], s6, v14, v[1:2]
-; CGP-NEXT: v_xor_b32_e32 v1, v9, v5
-; CGP-NEXT: v_ashrrev_i32_e32 v9, 31, v3
-; CGP-NEXT: v_mad_u64_u32 v[7:8], s[4:5], -1, v12, v[7:8]
-; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v9
-; CGP-NEXT: v_addc_u32_e32 v3, vcc, v3, v9, vcc
-; CGP-NEXT: v_xor_b32_e32 v11, v2, v9
-; CGP-NEXT: v_mul_lo_u32 v2, v14, v0
-; CGP-NEXT: v_mul_lo_u32 v8, v12, v7
-; CGP-NEXT: v_xor_b32_e32 v13, v3, v9
-; CGP-NEXT: v_mul_hi_u32 v3, v12, v0
-; CGP-NEXT: v_mul_hi_u32 v0, v14, v0
+; CGP-NEXT: v_add_i32_e32 v1, vcc, v11, v1
+; CGP-NEXT: v_add_i32_e32 v1, vcc, v6, v1
+; CGP-NEXT: v_add_i32_e32 v11, vcc, v13, v0
+; CGP-NEXT: v_addc_u32_e32 v13, vcc, v14, v1, vcc
+; CGP-NEXT: v_mad_u64_u32 v[0:1], s[4:5], s6, v11, 0
+; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v10
+; CGP-NEXT: v_cndmask_b32_e32 v6, v8, v7, vcc
+; CGP-NEXT: v_cndmask_b32_e32 v8, v9, v12, vcc
+; CGP-NEXT: v_xor_b32_e32 v9, v6, v5
+; CGP-NEXT: v_mad_u64_u32 v[6:7], s[4:5], s6, v13, v[1:2]
+; CGP-NEXT: v_xor_b32_e32 v1, v8, v5
+; CGP-NEXT: v_ashrrev_i32_e32 v8, 31, v3
+; CGP-NEXT: v_mad_u64_u32 v[6:7], s[4:5], -1, v11, v[6:7]
; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v8
-; CGP-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
+; CGP-NEXT: v_addc_u32_e32 v3, vcc, v3, v8, vcc
+; CGP-NEXT: v_xor_b32_e32 v7, v2, v8
+; CGP-NEXT: v_mul_lo_u32 v2, v13, v0
+; CGP-NEXT: v_mul_lo_u32 v10, v11, v6
+; CGP-NEXT: v_xor_b32_e32 v12, v3, v8
+; CGP-NEXT: v_mul_hi_u32 v3, v11, v0
+; CGP-NEXT: v_mul_hi_u32 v0, v13, v0
+; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v10
+; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v3
; CGP-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
-; CGP-NEXT: v_mul_lo_u32 v3, v14, v7
-; CGP-NEXT: v_add_i32_e32 v2, vcc, v8, v2
-; CGP-NEXT: v_mul_hi_u32 v8, v12, v7
+; CGP-NEXT: v_mul_lo_u32 v3, v13, v6
+; CGP-NEXT: v_add_i32_e32 v2, vcc, v10, v2
+; CGP-NEXT: v_mul_hi_u32 v10, v11, v6
; CGP-NEXT: v_add_i32_e32 v0, vcc, v3, v0
; CGP-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v8
-; CGP-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v8
-; CGP-NEXT: v_mul_hi_u32 v7, v14, v7
+; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v10
+; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
+; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v10
+; CGP-NEXT: v_mul_hi_u32 v6, v13, v6
; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v2
; CGP-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
; CGP-NEXT: v_add_i32_e32 v2, vcc, v3, v2
-; CGP-NEXT: v_add_i32_e32 v2, vcc, v7, v2
-; CGP-NEXT: v_add_i32_e32 v3, vcc, v12, v0
-; CGP-NEXT: v_addc_u32_e32 v2, vcc, v14, v2, vcc
-; CGP-NEXT: v_mul_lo_u32 v7, v13, v3
-; CGP-NEXT: v_mul_lo_u32 v8, v11, v2
-; CGP-NEXT: v_sub_i32_e32 v0, vcc, v10, v5
+; CGP-NEXT: v_add_i32_e32 v2, vcc, v6, v2
+; CGP-NEXT: v_add_i32_e32 v3, vcc, v11, v0
+; CGP-NEXT: v_addc_u32_e32 v2, vcc, v13, v2, vcc
+; CGP-NEXT: v_mul_lo_u32 v6, v12, v3
+; CGP-NEXT: v_mul_lo_u32 v10, v7, v2
+; CGP-NEXT: v_sub_i32_e32 v0, vcc, v9, v5
; CGP-NEXT: v_subb_u32_e32 v1, vcc, v1, v5, vcc
-; CGP-NEXT: v_mul_hi_u32 v5, v11, v3
-; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v8
-; CGP-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v5, vcc, v7, v5
+; CGP-NEXT: v_mul_hi_u32 v5, v7, v3
+; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v10
+; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
+; CGP-NEXT: v_add_i32_e32 v5, vcc, v6, v5
; CGP-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
-; CGP-NEXT: v_mul_lo_u32 v7, v13, v2
-; CGP-NEXT: v_mul_hi_u32 v3, v13, v3
-; CGP-NEXT: v_add_i32_e32 v5, vcc, v8, v5
-; CGP-NEXT: v_mul_hi_u32 v8, v11, v2
-; CGP-NEXT: v_add_i32_e32 v3, vcc, v7, v3
-; CGP-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v8
-; CGP-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v8
+; CGP-NEXT: v_mul_lo_u32 v6, v12, v2
+; CGP-NEXT: v_mul_hi_u32 v3, v12, v3
+; CGP-NEXT: v_add_i32_e32 v5, vcc, v9, v5
+; CGP-NEXT: v_mul_hi_u32 v9, v7, v2
+; CGP-NEXT: v_add_i32_e32 v3, vcc, v6, v3
+; CGP-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
+; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v9
+; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
+; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v9
; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v5
-; CGP-NEXT: v_mul_hi_u32 v8, v13, v2
+; CGP-NEXT: v_mul_hi_u32 v9, v12, v2
; CGP-NEXT: v_mad_u64_u32 v[2:3], s[4:5], s7, v3, 0
; CGP-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v5, vcc, v7, v5
-; CGP-NEXT: v_add_i32_e32 v5, vcc, v8, v5
-; CGP-NEXT: v_mad_u64_u32 v[7:8], s[4:5], s7, v5, v[3:4]
-; CGP-NEXT: v_sub_i32_e32 v2, vcc, v11, v2
-; CGP-NEXT: v_sub_i32_e64 v5, s[4:5], v13, v7
-; CGP-NEXT: v_subb_u32_e64 v3, s[4:5], v13, v7, vcc
+; CGP-NEXT: v_add_i32_e32 v5, vcc, v6, v5
+; CGP-NEXT: v_add_i32_e32 v5, vcc, v9, v5
+; CGP-NEXT: v_mad_u64_u32 v[5:6], s[4:5], s7, v5, v[3:4]
+; CGP-NEXT: v_sub_i32_e32 v2, vcc, v7, v2
+; CGP-NEXT: v_subb_u32_e64 v3, s[4:5], v12, v5, vcc
+; CGP-NEXT: v_sub_i32_e64 v5, s[4:5], v12, v5
; CGP-NEXT: v_subbrev_u32_e32 v5, vcc, 0, v5, vcc
-; CGP-NEXT: v_sub_i32_e32 v8, vcc, v2, v4
+; CGP-NEXT: v_sub_i32_e32 v7, vcc, v2, v4
; CGP-NEXT: v_subbrev_u32_e32 v5, vcc, 0, v5, vcc
+; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v7, v4
+; CGP-NEXT: v_cndmask_b32_e64 v9, 0, -1, vcc
+; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5
; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v2, v4
-; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v8, v4
-; CGP-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[4:5]
+; CGP-NEXT: v_cndmask_b32_e32 v9, -1, v9, vcc
+; CGP-NEXT: v_sub_i32_e32 v4, vcc, v7, v4
+; CGP-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[4:5]
; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v3
-; CGP-NEXT: v_cndmask_b32_e64 v10, 0, -1, vcc
-; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5
-; CGP-NEXT: v_cndmask_b32_e64 v7, v6, v7, s[4:5]
-; CGP-NEXT: v_cndmask_b32_e32 v6, v6, v10, vcc
-; CGP-NEXT: v_sub_i32_e32 v4, vcc, v8, v4
; CGP-NEXT: v_subbrev_u32_e32 v10, vcc, 0, v5, vcc
-; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v6
-; CGP-NEXT: v_cndmask_b32_e32 v4, v8, v4, vcc
+; CGP-NEXT: v_cndmask_b32_e64 v6, -1, v6, s[4:5]
+; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v9
+; CGP-NEXT: v_cndmask_b32_e32 v4, v7, v4, vcc
; CGP-NEXT: v_cndmask_b32_e32 v5, v5, v10, vcc
-; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v7
+; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v6
; CGP-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
; CGP-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc
-; CGP-NEXT: v_xor_b32_e32 v2, v2, v9
-; CGP-NEXT: v_xor_b32_e32 v3, v3, v9
-; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v9
-; CGP-NEXT: v_subb_u32_e32 v3, vcc, v3, v9, vcc
+; CGP-NEXT: v_xor_b32_e32 v2, v2, v8
+; CGP-NEXT: v_xor_b32_e32 v3, v3, v8
+; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v8
+; CGP-NEXT: v_subb_u32_e32 v3, vcc, v3, v8, vcc
; CGP-NEXT: s_setpc_b64 s[30:31]
%result = srem <2 x i64> %num, <i64 1235195, i64 1235195>
ret <2 x i64> %result
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll
index 9cb924ba233e87..df8f3a702e8858 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll
@@ -1905,255 +1905,247 @@ define <2 x i64> @v_udiv_v2i64_24bit(<2 x i64> %num, <2 x i64> %den) {
; GISEL-NEXT: v_and_b32_e32 v2, 0xffffff, v2
; GISEL-NEXT: v_and_b32_e32 v1, 0xffffff, v4
; GISEL-NEXT: v_and_b32_e32 v0, 0xffffff, v6
-; GISEL-NEXT: v_cvt_f32_ubyte0_e32 v4, 0
-; GISEL-NEXT: s_bfe_i32 s12, 1, 0x10000
-; GISEL-NEXT: s_bfe_i32 s13, 1, 0x10000
-; GISEL-NEXT: s_bfe_i32 s14, 1, 0x10000
-; GISEL-NEXT: s_bfe_i32 s15, 1, 0x10000
-; GISEL-NEXT: v_cvt_f32_u32_e32 v5, v1
-; GISEL-NEXT: v_sub_i32_e32 v6, vcc, 0, v1
-; GISEL-NEXT: v_cvt_f32_u32_e32 v7, v0
-; GISEL-NEXT: v_sub_i32_e64 v8, s[4:5], 0, v0
-; GISEL-NEXT: v_subb_u32_e64 v9, s[6:7], 0, 0, vcc
-; GISEL-NEXT: v_subb_u32_e64 v10, s[4:5], 0, 0, s[4:5]
-; GISEL-NEXT: v_mac_f32_e32 v5, 0x4f800000, v4
-; GISEL-NEXT: v_mac_f32_e32 v7, 0x4f800000, v4
-; GISEL-NEXT: v_rcp_iflag_f32_e32 v4, v5
-; GISEL-NEXT: v_rcp_iflag_f32_e32 v5, v7
-; GISEL-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4
-; GISEL-NEXT: v_mul_f32_e32 v5, 0x5f7ffffc, v5
-; GISEL-NEXT: v_mul_f32_e32 v7, 0x2f800000, v4
-; GISEL-NEXT: v_mul_f32_e32 v11, 0x2f800000, v5
-; GISEL-NEXT: v_trunc_f32_e32 v7, v7
+; GISEL-NEXT: v_cvt_f32_ubyte0_e32 v6, 0
+; GISEL-NEXT: v_cvt_f32_u32_e32 v7, v1
+; GISEL-NEXT: v_sub_i32_e32 v4, vcc, 0, v1
+; GISEL-NEXT: v_subb_u32_e64 v5, s[4:5], 0, 0, vcc
+; GISEL-NEXT: v_cvt_f32_u32_e32 v8, v0
+; GISEL-NEXT: v_sub_i32_e32 v9, vcc, 0, v0
+; GISEL-NEXT: v_subb_u32_e64 v10, s[4:5], 0, 0, vcc
+; GISEL-NEXT: v_mac_f32_e32 v7, 0x4f800000, v6
+; GISEL-NEXT: v_mac_f32_e32 v8, 0x4f800000, v6
+; GISEL-NEXT: v_rcp_iflag_f32_e32 v6, v7
+; GISEL-NEXT: v_rcp_iflag_f32_e32 v7, v8
+; GISEL-NEXT: v_mul_f32_e32 v6, 0x5f7ffffc, v6
+; GISEL-NEXT: v_mul_f32_e32 v7, 0x5f7ffffc, v7
+; GISEL-NEXT: v_mul_f32_e32 v8, 0x2f800000, v6
+; GISEL-NEXT: v_mul_f32_e32 v11, 0x2f800000, v7
+; GISEL-NEXT: v_trunc_f32_e32 v8, v8
; GISEL-NEXT: v_trunc_f32_e32 v11, v11
-; GISEL-NEXT: v_mac_f32_e32 v4, 0xcf800000, v7
-; GISEL-NEXT: v_cvt_u32_f32_e32 v7, v7
-; GISEL-NEXT: v_mac_f32_e32 v5, 0xcf800000, v11
+; GISEL-NEXT: v_mac_f32_e32 v6, 0xcf800000, v8
+; GISEL-NEXT: v_cvt_u32_f32_e32 v8, v8
+; GISEL-NEXT: v_mac_f32_e32 v7, 0xcf800000, v11
; GISEL-NEXT: v_cvt_u32_f32_e32 v11, v11
-; GISEL-NEXT: v_cvt_u32_f32_e32 v12, v4
-; GISEL-NEXT: v_mul_lo_u32 v4, v6, v7
-; GISEL-NEXT: v_cvt_u32_f32_e32 v5, v5
-; GISEL-NEXT: v_mul_lo_u32 v13, v8, v11
-; GISEL-NEXT: v_mul_lo_u32 v14, v6, v12
-; GISEL-NEXT: v_mul_lo_u32 v15, v9, v12
-; GISEL-NEXT: v_mul_hi_u32 v16, v6, v12
-; GISEL-NEXT: v_mul_lo_u32 v17, v8, v5
-; GISEL-NEXT: v_mul_lo_u32 v18, v10, v5
-; GISEL-NEXT: v_mul_hi_u32 v19, v8, v5
-; GISEL-NEXT: v_add_i32_e32 v4, vcc, v15, v4
-; GISEL-NEXT: v_mul_lo_u32 v15, v7, v14
-; GISEL-NEXT: v_mul_hi_u32 v20, v12, v14
-; GISEL-NEXT: v_mul_hi_u32 v14, v7, v14
+; GISEL-NEXT: v_cvt_u32_f32_e32 v6, v6
+; GISEL-NEXT: v_mul_lo_u32 v12, v4, v8
+; GISEL-NEXT: v_cvt_u32_f32_e32 v7, v7
+; GISEL-NEXT: v_mul_lo_u32 v13, v9, v11
+; GISEL-NEXT: v_mul_lo_u32 v14, v4, v6
+; GISEL-NEXT: v_mul_lo_u32 v15, v5, v6
+; GISEL-NEXT: v_mul_hi_u32 v16, v4, v6
+; GISEL-NEXT: v_mul_lo_u32 v17, v9, v7
+; GISEL-NEXT: v_mul_lo_u32 v18, v10, v7
+; GISEL-NEXT: v_mul_hi_u32 v19, v9, v7
+; GISEL-NEXT: v_add_i32_e32 v12, vcc, v15, v12
+; GISEL-NEXT: v_mul_lo_u32 v15, v8, v14
+; GISEL-NEXT: v_mul_hi_u32 v20, v6, v14
+; GISEL-NEXT: v_mul_hi_u32 v14, v8, v14
; GISEL-NEXT: v_add_i32_e32 v13, vcc, v18, v13
; GISEL-NEXT: v_mul_lo_u32 v18, v11, v17
-; GISEL-NEXT: v_add_i32_e32 v16, vcc, v4, v16
-; GISEL-NEXT: v_mul_hi_u32 v4, v5, v17
+; GISEL-NEXT: v_mul_hi_u32 v21, v7, v17
; GISEL-NEXT: v_mul_hi_u32 v17, v11, v17
+; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v16
; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v19
-; GISEL-NEXT: v_mul_lo_u32 v19, v5, v13
-; GISEL-NEXT: v_add_i32_e32 v18, vcc, v18, v19
-; GISEL-NEXT: v_mul_lo_u32 v19, v12, v16
-; GISEL-NEXT: v_add_i32_e64 v4, s[4:5], v18, v4
-; GISEL-NEXT: v_mul_lo_u32 v4, v7, v16
-; GISEL-NEXT: v_mul_hi_u32 v18, v12, v16
-; GISEL-NEXT: v_add_i32_e64 v15, s[6:7], v15, v19
-; GISEL-NEXT: v_mul_lo_u32 v19, v11, v13
-; GISEL-NEXT: v_add_i32_e64 v4, s[8:9], v4, v14
-; GISEL-NEXT: v_mul_hi_u32 v14, v5, v13
-; GISEL-NEXT: v_add_i32_e64 v17, s[10:11], v19, v17
+; GISEL-NEXT: v_mul_lo_u32 v16, v6, v12
+; GISEL-NEXT: v_mul_lo_u32 v19, v8, v12
+; GISEL-NEXT: v_mul_hi_u32 v22, v6, v12
+; GISEL-NEXT: v_mul_hi_u32 v12, v8, v12
+; GISEL-NEXT: v_mul_lo_u32 v23, v7, v13
+; GISEL-NEXT: v_mul_lo_u32 v24, v11, v13
+; GISEL-NEXT: v_mul_hi_u32 v25, v7, v13
+; GISEL-NEXT: v_mul_hi_u32 v13, v11, v13
+; GISEL-NEXT: v_add_i32_e32 v15, vcc, v15, v16
+; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v14, vcc, v19, v14
; GISEL-NEXT: v_cndmask_b32_e64 v19, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v18, vcc, v18, v23
+; GISEL-NEXT: v_cndmask_b32_e64 v23, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v17, vcc, v24, v17
+; GISEL-NEXT: v_cndmask_b32_e64 v24, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v15, vcc, v15, v20
-; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, s[6:7]
-; GISEL-NEXT: v_cndmask_b32_e64 v20, 0, 1, s[8:9]
-; GISEL-NEXT: v_add_i32_e64 v4, s[6:7], v4, v18
-; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, s[10:11]
-; GISEL-NEXT: v_add_i32_e64 v14, s[8:9], v17, v14
-; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v15, vcc, v15, v17
-; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, s[6:7]
-; GISEL-NEXT: v_add_i32_e32 v17, vcc, v20, v17
-; GISEL-NEXT: v_cndmask_b32_e64 v20, 0, 1, s[4:5]
-; GISEL-NEXT: v_add_i32_e32 v19, vcc, v19, v20
-; GISEL-NEXT: v_cndmask_b32_e64 v20, 0, 1, s[8:9]
-; GISEL-NEXT: v_add_i32_e32 v18, vcc, v18, v20
-; GISEL-NEXT: v_mov_b32_e32 v20, s12
-; GISEL-NEXT: v_add_i32_e32 v15, vcc, v4, v15
-; GISEL-NEXT: v_mov_b32_e32 v4, s13
-; GISEL-NEXT: v_add_i32_e64 v14, s[4:5], v14, v19
-; GISEL-NEXT: v_mov_b32_e32 v19, s14
-; GISEL-NEXT: v_add_i32_e64 v12, s[6:7], v12, v15
-; GISEL-NEXT: v_mov_b32_e32 v15, s15
-; GISEL-NEXT: v_mul_hi_u32 v16, v7, v16
-; GISEL-NEXT: v_mul_hi_u32 v13, v11, v13
-; GISEL-NEXT: v_add_i32_e64 v5, s[8:9], v5, v14
-; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v14, vcc, v17, v14
-; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, s[4:5]
-; GISEL-NEXT: v_add_i32_e32 v17, vcc, v18, v17
-; GISEL-NEXT: v_mul_lo_u32 v18, v6, v12
-; GISEL-NEXT: v_mul_lo_u32 v9, v9, v12
-; GISEL-NEXT: v_add_i32_e32 v14, vcc, v16, v14
-; GISEL-NEXT: v_mul_hi_u32 v16, v6, v12
-; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v17
-; GISEL-NEXT: v_mul_lo_u32 v17, v8, v5
-; GISEL-NEXT: v_mul_lo_u32 v10, v10, v5
-; GISEL-NEXT: v_addc_u32_e64 v7, vcc, v7, v14, s[6:7]
-; GISEL-NEXT: v_mul_hi_u32 v14, v8, v5
-; GISEL-NEXT: v_addc_u32_e64 v11, vcc, v11, v13, s[8:9]
-; GISEL-NEXT: v_mul_hi_u32 v13, v12, v18
-; GISEL-NEXT: v_mul_lo_u32 v6, v6, v7
-; GISEL-NEXT: v_add_i32_e32 v6, vcc, v9, v6
-; GISEL-NEXT: v_mul_hi_u32 v9, v5, v17
-; GISEL-NEXT: v_mul_lo_u32 v8, v8, v11
-; GISEL-NEXT: v_add_i32_e32 v8, vcc, v10, v8
-; GISEL-NEXT: v_mul_lo_u32 v10, v7, v18
-; GISEL-NEXT: v_mul_hi_u32 v18, v7, v18
-; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v16
-; GISEL-NEXT: v_mul_lo_u32 v16, v11, v17
-; GISEL-NEXT: v_mul_hi_u32 v17, v11, v17
-; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v14
-; GISEL-NEXT: v_mul_lo_u32 v14, v12, v6
-; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v14
-; GISEL-NEXT: v_mul_lo_u32 v14, v7, v6
-; GISEL-NEXT: v_add_i32_e64 v10, s[4:5], v10, v13
-; GISEL-NEXT: v_mul_hi_u32 v10, v12, v6
-; GISEL-NEXT: v_mul_hi_u32 v6, v7, v6
-; GISEL-NEXT: v_mul_lo_u32 v13, v5, v8
-; GISEL-NEXT: v_add_i32_e64 v14, s[6:7], v14, v18
-; GISEL-NEXT: v_mul_lo_u32 v18, v11, v8
-; GISEL-NEXT: v_add_i32_e64 v13, s[8:9], v16, v13
-; GISEL-NEXT: v_mul_hi_u32 v16, v5, v8
-; GISEL-NEXT: v_mul_hi_u32 v8, v11, v8
-; GISEL-NEXT: v_add_i32_e64 v17, s[10:11], v18, v17
+; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v22
+; GISEL-NEXT: v_cndmask_b32_e64 v20, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v18, vcc, v18, v21
; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v9, vcc, v13, v9
-; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, s[6:7]
-; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, s[8:9]
-; GISEL-NEXT: v_add_i32_e64 v10, s[6:7], v14, v10
-; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[10:11]
-; GISEL-NEXT: v_add_i32_e64 v16, s[8:9], v17, v16
-; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, s[4:5]
-; GISEL-NEXT: v_add_i32_e64 v17, s[4:5], v18, v17
-; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, s[6:7]
-; GISEL-NEXT: v_add_i32_e64 v9, s[4:5], v9, v18
+; GISEL-NEXT: v_add_i32_e32 v17, vcc, v17, v25
+; GISEL-NEXT: v_cndmask_b32_e64 v21, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v15, vcc, v16, v15
+; GISEL-NEXT: v_add_i32_e32 v16, vcc, v19, v20
+; GISEL-NEXT: v_add_i32_e32 v18, vcc, v23, v18
+; GISEL-NEXT: v_add_i32_e32 v19, vcc, v24, v21
+; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v15
+; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v17, vcc, v17, v18
; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v18
-; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, s[8:9]
-; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v18
-; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v17
-; GISEL-NEXT: v_add_i32_e64 v13, s[4:5], v16, v13
+; GISEL-NEXT: v_add_i32_e32 v15, vcc, v16, v15
+; GISEL-NEXT: v_add_i32_e32 v16, vcc, v19, v18
+; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v15
+; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v16
+; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v14
+; GISEL-NEXT: v_addc_u32_e32 v8, vcc, v8, v12, vcc
+; GISEL-NEXT: v_mul_lo_u32 v12, v4, v6
+; GISEL-NEXT: v_mul_lo_u32 v5, v5, v6
+; GISEL-NEXT: v_mul_hi_u32 v14, v4, v6
+; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v17
+; GISEL-NEXT: v_addc_u32_e32 v11, vcc, v11, v13, vcc
+; GISEL-NEXT: v_mul_lo_u32 v13, v9, v7
+; GISEL-NEXT: v_mul_lo_u32 v10, v10, v7
+; GISEL-NEXT: v_mul_hi_u32 v15, v9, v7
+; GISEL-NEXT: v_mul_lo_u32 v4, v4, v8
+; GISEL-NEXT: v_mul_lo_u32 v16, v8, v12
+; GISEL-NEXT: v_mul_hi_u32 v17, v6, v12
+; GISEL-NEXT: v_mul_hi_u32 v12, v8, v12
+; GISEL-NEXT: v_mul_lo_u32 v9, v9, v11
+; GISEL-NEXT: v_mul_lo_u32 v18, v11, v13
+; GISEL-NEXT: v_mul_hi_u32 v19, v7, v13
+; GISEL-NEXT: v_mul_hi_u32 v13, v11, v13
+; GISEL-NEXT: v_add_i32_e32 v4, vcc, v5, v4
+; GISEL-NEXT: v_add_i32_e32 v5, vcc, v10, v9
+; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v14
+; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v15
+; GISEL-NEXT: v_mul_lo_u32 v9, v6, v4
+; GISEL-NEXT: v_mul_lo_u32 v10, v8, v4
+; GISEL-NEXT: v_mul_hi_u32 v14, v6, v4
+; GISEL-NEXT: v_mul_hi_u32 v4, v8, v4
+; GISEL-NEXT: v_mul_lo_u32 v15, v7, v5
+; GISEL-NEXT: v_mul_lo_u32 v20, v11, v5
+; GISEL-NEXT: v_mul_hi_u32 v21, v7, v5
+; GISEL-NEXT: v_mul_hi_u32 v5, v11, v5
+; GISEL-NEXT: v_add_i32_e32 v9, vcc, v16, v9
; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc
-; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, s[4:5]
+; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v12
+; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v15, vcc, v18, v15
+; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v13, vcc, v20, v13
+; GISEL-NEXT: v_cndmask_b32_e64 v20, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v17
+; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v14
+; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v15, vcc, v15, v19
+; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v21
+; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v9, vcc, v16, v9
+; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v14
+; GISEL-NEXT: v_add_i32_e32 v14, vcc, v18, v15
+; GISEL-NEXT: v_add_i32_e32 v15, vcc, v20, v17
+; GISEL-NEXT: v_add_i32_e32 v9, vcc, v10, v9
+; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v14
+; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v10, vcc, v12, v10
-; GISEL-NEXT: v_add_i32_e64 v5, s[4:5], v5, v13
-; GISEL-NEXT: v_add_i32_e64 v9, s[6:7], v9, v16
-; GISEL-NEXT: v_add_i32_e64 v12, s[6:7], v14, v17
-; GISEL-NEXT: v_mul_lo_u32 v13, 0, v10
-; GISEL-NEXT: v_mul_hi_u32 v14, v3, v10
-; GISEL-NEXT: v_mul_hi_u32 v10, 0, v10
+; GISEL-NEXT: v_add_i32_e32 v12, vcc, v15, v14
+; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v10
+; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v12
+; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v9
+; GISEL-NEXT: v_addc_u32_e32 v4, vcc, v8, v4, vcc
+; GISEL-NEXT: v_mul_lo_u32 v8, 0, v6
+; GISEL-NEXT: v_mul_hi_u32 v9, v3, v6
+; GISEL-NEXT: v_mul_hi_u32 v6, 0, v6
+; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v13
+; GISEL-NEXT: v_addc_u32_e32 v5, vcc, v11, v5, vcc
+; GISEL-NEXT: v_mul_lo_u32 v10, 0, v7
+; GISEL-NEXT: v_mul_hi_u32 v11, v2, v7
+; GISEL-NEXT: v_mul_hi_u32 v7, 0, v7
+; GISEL-NEXT: v_mul_lo_u32 v12, v3, v4
+; GISEL-NEXT: v_mul_lo_u32 v13, 0, v4
+; GISEL-NEXT: v_mul_hi_u32 v14, v3, v4
+; GISEL-NEXT: v_mul_hi_u32 v4, 0, v4
+; GISEL-NEXT: v_mul_lo_u32 v15, v2, v5
; GISEL-NEXT: v_mul_lo_u32 v16, 0, v5
; GISEL-NEXT: v_mul_hi_u32 v17, v2, v5
; GISEL-NEXT: v_mul_hi_u32 v5, 0, v5
-; GISEL-NEXT: v_add_i32_e64 v6, s[6:7], v6, v9
-; GISEL-NEXT: v_add_i32_e64 v8, s[6:7], v8, v12
-; GISEL-NEXT: v_addc_u32_e32 v6, vcc, v7, v6, vcc
-; GISEL-NEXT: v_addc_u32_e64 v7, vcc, v11, v8, s[4:5]
-; GISEL-NEXT: v_mul_lo_u32 v8, v3, v6
-; GISEL-NEXT: v_mul_lo_u32 v9, 0, v6
-; GISEL-NEXT: v_mul_hi_u32 v11, v3, v6
-; GISEL-NEXT: v_mul_hi_u32 v6, 0, v6
-; GISEL-NEXT: v_mul_lo_u32 v12, v2, v7
-; GISEL-NEXT: v_mul_lo_u32 v18, 0, v7
-; GISEL-NEXT: v_add_i32_e32 v8, vcc, v13, v8
-; GISEL-NEXT: v_mul_hi_u32 v13, v2, v7
-; GISEL-NEXT: v_mul_hi_u32 v7, 0, v7
-; GISEL-NEXT: v_add_i32_e64 v9, s[4:5], v9, v10
-; GISEL-NEXT: v_add_i32_e64 v10, s[6:7], v16, v12
-; GISEL-NEXT: v_add_i32_e64 v5, s[8:9], v18, v5
+; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v12
; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
-; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, s[4:5]
-; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, s[6:7]
-; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v14
-; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, s[8:9]
-; GISEL-NEXT: v_add_i32_e64 v9, s[4:5], v9, v11
-; GISEL-NEXT: v_add_i32_e64 v10, s[6:7], v10, v17
-; GISEL-NEXT: v_add_i32_e64 v5, s[8:9], v5, v13
+; GISEL-NEXT: v_add_i32_e32 v6, vcc, v13, v6
+; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v15
+; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v7, vcc, v16, v7
+; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v9
+; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v14
+; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v11
; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
-; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, s[4:5]
-; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, s[6:7]
-; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[8:9]
-; GISEL-NEXT: v_add_i32_e32 v10, vcc, v12, v10
+; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v17
+; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v8, vcc, v12, v8
+; GISEL-NEXT: v_add_i32_e32 v9, vcc, v13, v9
+; GISEL-NEXT: v_add_i32_e32 v10, vcc, v15, v10
; GISEL-NEXT: v_add_i32_e32 v11, vcc, v16, v11
-; GISEL-NEXT: v_add_i32_e32 v12, vcc, v18, v13
-; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v14
-; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v10
-; GISEL-NEXT: v_add_i32_e64 v5, s[4:5], v5, v12
+; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v8
+; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v10
; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
-; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[4:5]
-; GISEL-NEXT: v_mul_lo_u32 v13, v1, v9
-; GISEL-NEXT: v_mul_lo_u32 v14, 0, v9
-; GISEL-NEXT: v_mul_hi_u32 v16, v1, v9
-; GISEL-NEXT: v_mul_lo_u32 v17, v0, v5
-; GISEL-NEXT: v_mul_lo_u32 v18, 0, v5
+; GISEL-NEXT: v_add_i32_e32 v8, vcc, v9, v8
+; GISEL-NEXT: v_mul_lo_u32 v9, v1, v6
+; GISEL-NEXT: v_mul_lo_u32 v12, 0, v6
+; GISEL-NEXT: v_mul_hi_u32 v13, v1, v6
; GISEL-NEXT: v_add_i32_e32 v10, vcc, v11, v10
-; GISEL-NEXT: v_mul_hi_u32 v11, v0, v5
-; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v12
-; GISEL-NEXT: v_add_i32_e32 v12, vcc, 1, v9
-; GISEL-NEXT: v_sub_i32_e64 v3, s[4:5], v3, v13
-; GISEL-NEXT: v_add_i32_e64 v13, s[6:7], 1, v5
-; GISEL-NEXT: v_sub_i32_e64 v2, s[8:9], v2, v17
-; GISEL-NEXT: v_add_i32_e64 v17, s[10:11], 1, v12
-; GISEL-NEXT: v_add_i32_e64 v6, s[12:13], v6, v10
-; GISEL-NEXT: v_add_i32_e64 v10, s[12:13], 1, v13
-; GISEL-NEXT: v_add_i32_e64 v7, s[14:15], v7, v8
-; GISEL-NEXT: v_cmp_ge_u32_e64 s[14:15], v3, v1
-; GISEL-NEXT: v_cmp_ge_u32_e64 s[16:17], v2, v0
-; GISEL-NEXT: v_sub_i32_e64 v3, s[18:19], v3, v1
-; GISEL-NEXT: v_sub_i32_e64 v2, s[20:21], v2, v0
-; GISEL-NEXT: v_mul_lo_u32 v8, v1, v6
-; GISEL-NEXT: v_cmp_ge_u32_e64 s[22:23], v3, v1
-; GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v6, vcc
-; GISEL-NEXT: v_mul_lo_u32 v3, v0, v7
+; GISEL-NEXT: v_mul_lo_u32 v11, v0, v7
+; GISEL-NEXT: v_mul_lo_u32 v14, 0, v7
+; GISEL-NEXT: v_mul_hi_u32 v15, v0, v7
+; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v8
+; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v10
+; GISEL-NEXT: v_mul_lo_u32 v8, v1, v4
+; GISEL-NEXT: v_add_i32_e32 v10, vcc, 1, v6
+; GISEL-NEXT: v_addc_u32_e32 v16, vcc, 0, v4, vcc
+; GISEL-NEXT: v_mul_lo_u32 v17, v0, v5
+; GISEL-NEXT: v_add_i32_e32 v18, vcc, 1, v7
+; GISEL-NEXT: v_addc_u32_e32 v19, vcc, 0, v5, vcc
+; GISEL-NEXT: v_add_i32_e32 v8, vcc, v12, v8
+; GISEL-NEXT: v_add_i32_e32 v12, vcc, 1, v10
+; GISEL-NEXT: v_addc_u32_e32 v20, vcc, 0, v16, vcc
+; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v17
+; GISEL-NEXT: v_add_i32_e32 v17, vcc, 1, v18
+; GISEL-NEXT: v_addc_u32_e32 v21, vcc, 0, v19, vcc
+; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v13
+; GISEL-NEXT: v_add_i32_e32 v13, vcc, v14, v15
+; GISEL-NEXT: v_sub_i32_e32 v3, vcc, v3, v9
+; GISEL-NEXT: v_subb_u32_e64 v9, s[4:5], 0, v8, vcc
+; GISEL-NEXT: v_sub_i32_e64 v8, s[4:5], 0, v8
+; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v3, v1
+; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, -1, s[4:5]
+; GISEL-NEXT: v_sub_i32_e64 v2, s[4:5], v2, v11
+; GISEL-NEXT: v_subb_u32_e64 v11, s[6:7], 0, v13, s[4:5]
+; GISEL-NEXT: v_sub_i32_e64 v13, s[6:7], 0, v13
+; GISEL-NEXT: v_cmp_ge_u32_e64 s[6:7], v2, v0
+; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, -1, s[6:7]
+; GISEL-NEXT: v_cmp_eq_u32_e64 s[6:7], 0, v9
+; GISEL-NEXT: v_cndmask_b32_e64 v9, -1, v14, s[6:7]
+; GISEL-NEXT: v_subbrev_u32_e32 v8, vcc, 0, v8, vcc
+; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v11
+; GISEL-NEXT: v_cndmask_b32_e32 v11, -1, v15, vcc
+; GISEL-NEXT: v_subbrev_u32_e64 v13, vcc, 0, v13, s[4:5]
+; GISEL-NEXT: v_sub_i32_e32 v3, vcc, v3, v1
+; GISEL-NEXT: v_subbrev_u32_e32 v8, vcc, 0, v8, vcc
+; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v3, v1
+; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc
+; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v2, v0
+; GISEL-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v13, vcc
; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v2, v0
-; GISEL-NEXT: v_addc_u32_e64 v0, s[6:7], 0, v7, s[6:7]
-; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, -1, s[14:15]
-; GISEL-NEXT: v_add_i32_e64 v8, s[6:7], v14, v8
-; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, -1, s[16:17]
-; GISEL-NEXT: v_add_i32_e64 v3, s[6:7], v18, v3
-; GISEL-NEXT: v_addc_u32_e64 v18, s[6:7], 0, v1, s[10:11]
-; GISEL-NEXT: v_add_i32_e64 v8, s[6:7], v8, v16
-; GISEL-NEXT: v_addc_u32_e64 v16, s[6:7], 0, v0, s[12:13]
-; GISEL-NEXT: v_add_i32_e64 v3, s[6:7], v3, v11
-; GISEL-NEXT: v_subb_u32_e64 v11, s[6:7], 0, v8, s[4:5]
-; GISEL-NEXT: v_cmp_eq_u32_e64 s[6:7], 0, v11
-; GISEL-NEXT: v_subb_u32_e64 v11, s[10:11], 0, v3, s[8:9]
-; GISEL-NEXT: v_cmp_eq_u32_e64 s[10:11], 0, v11
-; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, -1, s[22:23]
-; GISEL-NEXT: v_cndmask_b32_e64 v2, v20, v2, s[6:7]
-; GISEL-NEXT: v_cndmask_b32_e64 v20, 0, -1, vcc
-; GISEL-NEXT: v_sub_i32_e32 v8, vcc, 0, v8
-; GISEL-NEXT: v_sub_i32_e32 v3, vcc, 0, v3
-; GISEL-NEXT: v_subbrev_u32_e64 v8, vcc, 0, v8, s[4:5]
-; GISEL-NEXT: v_subbrev_u32_e64 v3, vcc, 0, v3, s[8:9]
-; GISEL-NEXT: v_cndmask_b32_e64 v14, v19, v14, s[10:11]
-; GISEL-NEXT: v_subbrev_u32_e64 v8, vcc, 0, v8, s[18:19]
-; GISEL-NEXT: v_subbrev_u32_e64 v3, vcc, 0, v3, s[20:21]
+; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc
; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v8
-; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v3
-; GISEL-NEXT: v_cmp_ne_u32_e64 s[6:7], 0, v2
-; GISEL-NEXT: v_cmp_ne_u32_e64 s[8:9], 0, v14
-; GISEL-NEXT: v_cndmask_b32_e32 v2, v4, v11, vcc
-; GISEL-NEXT: v_cndmask_b32_e64 v3, v15, v20, s[4:5]
-; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2
-; GISEL-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v3
-; GISEL-NEXT: v_cndmask_b32_e32 v2, v12, v17, vcc
-; GISEL-NEXT: v_cndmask_b32_e64 v3, v13, v10, s[4:5]
-; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v18, vcc
-; GISEL-NEXT: v_cndmask_b32_e64 v4, v0, v16, s[4:5]
-; GISEL-NEXT: v_cndmask_b32_e64 v0, v9, v2, s[6:7]
-; GISEL-NEXT: v_cndmask_b32_e64 v2, v5, v3, s[8:9]
-; GISEL-NEXT: v_cndmask_b32_e64 v1, v6, v1, s[6:7]
-; GISEL-NEXT: v_cndmask_b32_e64 v3, v7, v4, s[8:9]
+; GISEL-NEXT: v_cndmask_b32_e32 v1, -1, v1, vcc
+; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3
+; GISEL-NEXT: v_cndmask_b32_e32 v0, -1, v0, vcc
+; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1
+; GISEL-NEXT: v_cndmask_b32_e32 v1, v10, v12, vcc
+; GISEL-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v0
+; GISEL-NEXT: v_cndmask_b32_e64 v2, v18, v17, s[4:5]
+; GISEL-NEXT: v_cndmask_b32_e32 v3, v16, v20, vcc
+; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v9
+; GISEL-NEXT: v_cndmask_b32_e32 v0, v6, v1, vcc
+; GISEL-NEXT: v_cndmask_b32_e64 v6, v19, v21, s[4:5]
+; GISEL-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v11
+; GISEL-NEXT: v_cndmask_b32_e64 v2, v7, v2, s[4:5]
+; GISEL-NEXT: v_cndmask_b32_e32 v1, v4, v3, vcc
+; GISEL-NEXT: v_cndmask_b32_e64 v3, v5, v6, s[4:5]
; GISEL-NEXT: s_setpc_b64 s[30:31]
;
; CGP-LABEL: v_udiv_v2i64_24bit:
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll
index 9dd6488db3bfef..09e39569a5abb0 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll
@@ -974,120 +974,118 @@ define i64 @v_urem_i64_oddk_denom(i64 %num) {
; CHECK-NEXT: v_cvt_f32_u32_e32 v3, 0x12d8fb
; CHECK-NEXT: v_cvt_f32_ubyte0_e32 v4, 0
; CHECK-NEXT: s_mov_b32 s5, 0xffed2705
-; CHECK-NEXT: s_bfe_i32 s6, 1, 0x10000
; CHECK-NEXT: v_mac_f32_e32 v3, 0x4f800000, v4
-; CHECK-NEXT: v_mov_b32_e32 v4, s6
; CHECK-NEXT: v_rcp_iflag_f32_e32 v3, v3
; CHECK-NEXT: v_mul_f32_e32 v3, 0x5f7ffffc, v3
-; CHECK-NEXT: v_mul_f32_e32 v5, 0x2f800000, v3
-; CHECK-NEXT: v_trunc_f32_e32 v5, v5
-; CHECK-NEXT: v_mac_f32_e32 v3, 0xcf800000, v5
-; CHECK-NEXT: v_cvt_u32_f32_e32 v5, v5
+; CHECK-NEXT: v_mul_f32_e32 v4, 0x2f800000, v3
+; CHECK-NEXT: v_trunc_f32_e32 v4, v4
+; CHECK-NEXT: v_mac_f32_e32 v3, 0xcf800000, v4
+; CHECK-NEXT: v_cvt_u32_f32_e32 v4, v4
; CHECK-NEXT: v_cvt_u32_f32_e32 v3, v3
-; CHECK-NEXT: v_mul_lo_u32 v6, v5, s5
-; CHECK-NEXT: v_mul_lo_u32 v7, v3, s5
-; CHECK-NEXT: v_mul_hi_u32 v8, s5, v3
-; CHECK-NEXT: v_sub_i32_e32 v6, vcc, v6, v3
-; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v8
-; CHECK-NEXT: v_mul_lo_u32 v8, v5, v7
-; CHECK-NEXT: v_mul_hi_u32 v9, v3, v7
-; CHECK-NEXT: v_mul_hi_u32 v7, v5, v7
-; CHECK-NEXT: v_mul_lo_u32 v10, v3, v6
-; CHECK-NEXT: v_mul_lo_u32 v11, v5, v6
-; CHECK-NEXT: v_mul_hi_u32 v12, v3, v6
-; CHECK-NEXT: v_mul_hi_u32 v6, v5, v6
-; CHECK-NEXT: v_add_i32_e32 v8, vcc, v8, v10
-; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
-; CHECK-NEXT: v_add_i32_e32 v7, vcc, v11, v7
-; CHECK-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
-; CHECK-NEXT: v_add_i32_e32 v8, vcc, v8, v9
-; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
-; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v12
-; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
-; CHECK-NEXT: v_add_i32_e32 v8, vcc, v10, v8
-; CHECK-NEXT: v_add_i32_e32 v9, vcc, v11, v9
-; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v8
-; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
-; CHECK-NEXT: v_add_i32_e32 v8, vcc, v9, v8
-; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v8
-; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v7
-; CHECK-NEXT: v_addc_u32_e32 v5, vcc, v5, v6, vcc
+; CHECK-NEXT: v_mul_lo_u32 v5, v4, s5
; CHECK-NEXT: v_mul_lo_u32 v6, v3, s5
; CHECK-NEXT: v_mul_hi_u32 v7, s5, v3
-; CHECK-NEXT: v_mul_lo_u32 v8, v5, s5
-; CHECK-NEXT: v_mul_lo_u32 v9, v5, v6
-; CHECK-NEXT: v_mul_hi_u32 v10, v3, v6
-; CHECK-NEXT: v_mul_hi_u32 v6, v5, v6
-; CHECK-NEXT: v_sub_i32_e32 v8, vcc, v8, v3
-; CHECK-NEXT: v_add_i32_e32 v7, vcc, v8, v7
-; CHECK-NEXT: v_mul_lo_u32 v8, v3, v7
-; CHECK-NEXT: v_mul_lo_u32 v11, v5, v7
-; CHECK-NEXT: v_mul_hi_u32 v12, v3, v7
-; CHECK-NEXT: v_mul_hi_u32 v7, v5, v7
-; CHECK-NEXT: v_add_i32_e32 v8, vcc, v9, v8
+; CHECK-NEXT: v_sub_i32_e32 v5, vcc, v5, v3
+; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v7
+; CHECK-NEXT: v_mul_lo_u32 v7, v4, v6
+; CHECK-NEXT: v_mul_hi_u32 v8, v3, v6
+; CHECK-NEXT: v_mul_hi_u32 v6, v4, v6
+; CHECK-NEXT: v_mul_lo_u32 v9, v3, v5
+; CHECK-NEXT: v_mul_lo_u32 v10, v4, v5
+; CHECK-NEXT: v_mul_hi_u32 v11, v3, v5
+; CHECK-NEXT: v_mul_hi_u32 v5, v4, v5
+; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v9
; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
-; CHECK-NEXT: v_add_i32_e32 v6, vcc, v11, v6
-; CHECK-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
-; CHECK-NEXT: v_add_i32_e32 v8, vcc, v8, v10
-; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
-; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v12
+; CHECK-NEXT: v_add_i32_e32 v6, vcc, v10, v6
; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
-; CHECK-NEXT: v_add_i32_e32 v8, vcc, v9, v8
-; CHECK-NEXT: v_add_i32_e32 v9, vcc, v11, v10
-; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v8
-; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
-; CHECK-NEXT: v_add_i32_e32 v8, vcc, v9, v8
; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v8
+; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
+; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v11
+; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
+; CHECK-NEXT: v_add_i32_e32 v7, vcc, v9, v7
+; CHECK-NEXT: v_add_i32_e32 v8, vcc, v10, v8
+; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v7
+; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
+; CHECK-NEXT: v_add_i32_e32 v7, vcc, v8, v7
+; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v7
; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v6
-; CHECK-NEXT: v_addc_u32_e32 v5, vcc, v5, v7, vcc
-; CHECK-NEXT: v_mul_lo_u32 v6, v1, v3
-; CHECK-NEXT: v_mul_hi_u32 v7, v0, v3
-; CHECK-NEXT: v_mul_hi_u32 v3, v1, v3
-; CHECK-NEXT: v_mul_lo_u32 v8, v0, v5
-; CHECK-NEXT: v_mul_lo_u32 v9, v1, v5
-; CHECK-NEXT: v_mul_hi_u32 v10, v0, v5
-; CHECK-NEXT: v_mul_hi_u32 v5, v1, v5
-; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v8
+; CHECK-NEXT: v_addc_u32_e32 v4, vcc, v4, v5, vcc
+; CHECK-NEXT: v_mul_lo_u32 v5, v3, s5
+; CHECK-NEXT: v_mul_hi_u32 v6, s5, v3
+; CHECK-NEXT: v_mul_lo_u32 v7, v4, s5
+; CHECK-NEXT: v_mul_lo_u32 v8, v4, v5
+; CHECK-NEXT: v_mul_hi_u32 v9, v3, v5
+; CHECK-NEXT: v_mul_hi_u32 v5, v4, v5
+; CHECK-NEXT: v_sub_i32_e32 v7, vcc, v7, v3
+; CHECK-NEXT: v_add_i32_e32 v6, vcc, v7, v6
+; CHECK-NEXT: v_mul_lo_u32 v7, v3, v6
+; CHECK-NEXT: v_mul_lo_u32 v10, v4, v6
+; CHECK-NEXT: v_mul_hi_u32 v11, v3, v6
+; CHECK-NEXT: v_mul_hi_u32 v6, v4, v6
+; CHECK-NEXT: v_add_i32_e32 v7, vcc, v8, v7
; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
-; CHECK-NEXT: v_add_i32_e32 v3, vcc, v9, v3
+; CHECK-NEXT: v_add_i32_e32 v5, vcc, v10, v5
+; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
+; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v9
+; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
+; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v11
; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
+; CHECK-NEXT: v_add_i32_e32 v7, vcc, v8, v7
+; CHECK-NEXT: v_add_i32_e32 v8, vcc, v10, v9
+; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v7
+; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
+; CHECK-NEXT: v_add_i32_e32 v7, vcc, v8, v7
; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v7
-; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
-; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v10
+; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v5
+; CHECK-NEXT: v_addc_u32_e32 v4, vcc, v4, v6, vcc
+; CHECK-NEXT: v_mul_lo_u32 v5, v1, v3
+; CHECK-NEXT: v_mul_hi_u32 v6, v0, v3
+; CHECK-NEXT: v_mul_hi_u32 v3, v1, v3
+; CHECK-NEXT: v_mul_lo_u32 v7, v0, v4
+; CHECK-NEXT: v_mul_lo_u32 v8, v1, v4
+; CHECK-NEXT: v_mul_hi_u32 v9, v0, v4
+; CHECK-NEXT: v_mul_hi_u32 v4, v1, v4
+; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v7
; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
-; CHECK-NEXT: v_add_i32_e32 v6, vcc, v8, v6
-; CHECK-NEXT: v_add_i32_e32 v7, vcc, v9, v7
-; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v6
+; CHECK-NEXT: v_add_i32_e32 v3, vcc, v8, v3
+; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
+; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v6
+; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
+; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v9
; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
-; CHECK-NEXT: v_add_i32_e32 v6, vcc, v7, v6
-; CHECK-NEXT: v_mul_lo_u32 v7, v3, s4
+; CHECK-NEXT: v_add_i32_e32 v5, vcc, v7, v5
+; CHECK-NEXT: v_add_i32_e32 v6, vcc, v8, v6
+; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v5
+; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
+; CHECK-NEXT: v_add_i32_e32 v5, vcc, v6, v5
+; CHECK-NEXT: v_mul_lo_u32 v6, v3, s4
; CHECK-NEXT: v_mul_hi_u32 v3, s4, v3
-; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v6
-; CHECK-NEXT: v_mul_lo_u32 v5, v5, s4
-; CHECK-NEXT: v_add_i32_e32 v3, vcc, v5, v3
-; CHECK-NEXT: v_sub_i32_e64 v0, s[4:5], v0, v7
-; CHECK-NEXT: v_subb_u32_e64 v5, vcc, v1, v3, s[4:5]
+; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v5
+; CHECK-NEXT: v_mul_lo_u32 v4, v4, s4
+; CHECK-NEXT: v_add_i32_e32 v3, vcc, v4, v3
+; CHECK-NEXT: v_sub_i32_e64 v0, s[4:5], v0, v6
+; CHECK-NEXT: v_subb_u32_e64 v4, vcc, v1, v3, s[4:5]
; CHECK-NEXT: v_sub_i32_e32 v1, vcc, v1, v3
; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2
; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, -1, vcc
-; CHECK-NEXT: v_sub_i32_e32 v6, vcc, v0, v2
-; CHECK-NEXT: v_cmp_eq_u32_e64 s[6:7], 0, v5
-; CHECK-NEXT: v_cndmask_b32_e64 v3, v4, v3, s[6:7]
+; CHECK-NEXT: v_sub_i32_e32 v5, vcc, v0, v2
+; CHECK-NEXT: v_cmp_eq_u32_e64 s[6:7], 0, v4
+; CHECK-NEXT: v_cndmask_b32_e64 v3, -1, v3, s[6:7]
; CHECK-NEXT: v_subbrev_u32_e64 v1, s[4:5], 0, v1, s[4:5]
-; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v6, v2
+; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v5, v2
; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, -1, s[4:5]
; CHECK-NEXT: s_mov_b64 s[4:5], vcc
-; CHECK-NEXT: v_subrev_i32_e32 v7, vcc, 0x12d8fb, v6
+; CHECK-NEXT: v_subrev_i32_e32 v6, vcc, 0x12d8fb, v5
; CHECK-NEXT: v_subbrev_u32_e64 v1, s[4:5], 0, v1, s[4:5]
; CHECK-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v1
-; CHECK-NEXT: v_cndmask_b32_e64 v2, v4, v2, s[4:5]
-; CHECK-NEXT: v_subbrev_u32_e32 v4, vcc, 0, v1, vcc
+; CHECK-NEXT: v_cndmask_b32_e64 v2, -1, v2, s[4:5]
+; CHECK-NEXT: v_subbrev_u32_e32 v7, vcc, 0, v1, vcc
; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2
-; CHECK-NEXT: v_cndmask_b32_e32 v2, v6, v7, vcc
-; CHECK-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc
+; CHECK-NEXT: v_cndmask_b32_e32 v2, v5, v6, vcc
+; CHECK-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc
; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3
; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
-; CHECK-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
+; CHECK-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc
; CHECK-NEXT: s_setpc_b64 s[30:31]
%result = urem i64 %num, 1235195
ret i64 %result
@@ -1099,243 +1097,239 @@ define <2 x i64> @v_urem_v2i64_oddk_denom(<2 x i64> %num) {
; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT: s_mov_b32 s4, 0x12d8fb
; GISEL-NEXT: v_mov_b32_e32 v4, 0x12d8fb
-; GISEL-NEXT: v_cvt_f32_u32_e32 v6, 0x12d8fb
-; GISEL-NEXT: v_cvt_f32_ubyte0_e32 v5, 0
+; GISEL-NEXT: v_cvt_f32_u32_e32 v5, 0x12d8fb
+; GISEL-NEXT: v_cvt_f32_ubyte0_e32 v6, 0
; GISEL-NEXT: s_sub_u32 s5, 0, 0x12d8fb
-; GISEL-NEXT: v_madmk_f32 v7, v5, 0x4f800000, v6
+; GISEL-NEXT: v_madmk_f32 v7, v6, 0x4f800000, v5
; GISEL-NEXT: s_subb_u32 s6, 0, 0
-; GISEL-NEXT: s_bfe_i32 s7, 1, 0x10000
-; GISEL-NEXT: v_mac_f32_e32 v6, 0x4f800000, v5
-; GISEL-NEXT: v_rcp_iflag_f32_e32 v7, v7
-; GISEL-NEXT: v_mov_b32_e32 v5, s7
-; GISEL-NEXT: v_rcp_iflag_f32_e32 v6, v6
+; GISEL-NEXT: v_mac_f32_e32 v5, 0x4f800000, v6
+; GISEL-NEXT: v_rcp_iflag_f32_e32 v6, v7
+; GISEL-NEXT: v_rcp_iflag_f32_e32 v5, v5
; GISEL-NEXT: s_sub_u32 s7, 0, 0x12d8fb
-; GISEL-NEXT: v_mul_f32_e32 v7, 0x5f7ffffc, v7
; GISEL-NEXT: v_mul_f32_e32 v6, 0x5f7ffffc, v6
+; GISEL-NEXT: v_mul_f32_e32 v5, 0x5f7ffffc, v5
; GISEL-NEXT: s_subb_u32 s8, 0, 0
-; GISEL-NEXT: s_bfe_i32 s9, 1, 0x10000
-; GISEL-NEXT: v_mul_f32_e32 v8, 0x2f800000, v7
-; GISEL-NEXT: v_mul_f32_e32 v9, 0x2f800000, v6
-; GISEL-NEXT: v_mov_b32_e32 v10, s9
+; GISEL-NEXT: v_mul_f32_e32 v7, 0x2f800000, v6
+; GISEL-NEXT: v_mul_f32_e32 v8, 0x2f800000, v5
+; GISEL-NEXT: v_trunc_f32_e32 v7, v7
; GISEL-NEXT: v_trunc_f32_e32 v8, v8
-; GISEL-NEXT: v_trunc_f32_e32 v9, v9
-; GISEL-NEXT: v_mac_f32_e32 v7, 0xcf800000, v8
-; GISEL-NEXT: v_cvt_u32_f32_e32 v8, v8
-; GISEL-NEXT: v_mac_f32_e32 v6, 0xcf800000, v9
-; GISEL-NEXT: v_cvt_u32_f32_e32 v9, v9
+; GISEL-NEXT: v_mac_f32_e32 v6, 0xcf800000, v7
; GISEL-NEXT: v_cvt_u32_f32_e32 v7, v7
-; GISEL-NEXT: v_mul_lo_u32 v11, s5, v8
+; GISEL-NEXT: v_mac_f32_e32 v5, 0xcf800000, v8
+; GISEL-NEXT: v_cvt_u32_f32_e32 v8, v8
; GISEL-NEXT: v_cvt_u32_f32_e32 v6, v6
-; GISEL-NEXT: v_mul_lo_u32 v12, s7, v9
-; GISEL-NEXT: v_mul_lo_u32 v13, s5, v7
-; GISEL-NEXT: v_mul_lo_u32 v14, s6, v7
-; GISEL-NEXT: v_mul_hi_u32 v15, s5, v7
-; GISEL-NEXT: v_mul_lo_u32 v16, s7, v6
-; GISEL-NEXT: v_mul_lo_u32 v17, s8, v6
-; GISEL-NEXT: v_mul_hi_u32 v18, s7, v6
-; GISEL-NEXT: v_add_i32_e32 v11, vcc, v14, v11
-; GISEL-NEXT: v_mul_lo_u32 v14, v8, v13
-; GISEL-NEXT: v_mul_hi_u32 v19, v7, v13
-; GISEL-NEXT: v_mul_hi_u32 v13, v8, v13
-; GISEL-NEXT: v_add_i32_e32 v12, vcc, v17, v12
-; GISEL-NEXT: v_mul_lo_u32 v17, v9, v16
-; GISEL-NEXT: v_mul_hi_u32 v20, v6, v16
-; GISEL-NEXT: v_mul_hi_u32 v16, v9, v16
-; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v15
-; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v18
-; GISEL-NEXT: v_mul_lo_u32 v15, v7, v11
-; GISEL-NEXT: v_mul_lo_u32 v18, v8, v11
-; GISEL-NEXT: v_mul_hi_u32 v21, v7, v11
-; GISEL-NEXT: v_mul_hi_u32 v11, v8, v11
-; GISEL-NEXT: v_mul_lo_u32 v22, v6, v12
-; GISEL-NEXT: v_mul_lo_u32 v23, v9, v12
-; GISEL-NEXT: v_mul_hi_u32 v24, v6, v12
-; GISEL-NEXT: v_mul_hi_u32 v12, v9, v12
-; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v15
-; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v13, vcc, v18, v13
-; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v17, vcc, v17, v22
-; GISEL-NEXT: v_cndmask_b32_e64 v22, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v16, vcc, v23, v16
-; GISEL-NEXT: v_cndmask_b32_e64 v23, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v19
-; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v21
-; GISEL-NEXT: v_cndmask_b32_e64 v19, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v17, vcc, v17, v20
-; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v16, vcc, v16, v24
-; GISEL-NEXT: v_cndmask_b32_e64 v20, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v14, vcc, v15, v14
-; GISEL-NEXT: v_add_i32_e32 v15, vcc, v18, v19
-; GISEL-NEXT: v_add_i32_e32 v17, vcc, v22, v17
-; GISEL-NEXT: v_add_i32_e32 v18, vcc, v23, v20
-; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v14
-; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v16, vcc, v16, v17
-; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v14, vcc, v15, v14
-; GISEL-NEXT: v_add_i32_e32 v15, vcc, v18, v17
-; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v14
-; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v15
-; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v13
-; GISEL-NEXT: v_addc_u32_e32 v8, vcc, v8, v11, vcc
-; GISEL-NEXT: v_mul_lo_u32 v11, s5, v7
-; GISEL-NEXT: v_mul_lo_u32 v13, s6, v7
-; GISEL-NEXT: v_mul_hi_u32 v14, s5, v7
-; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v16
-; GISEL-NEXT: v_addc_u32_e32 v9, vcc, v9, v12, vcc
-; GISEL-NEXT: v_mul_lo_u32 v12, s7, v6
-; GISEL-NEXT: v_mul_lo_u32 v15, s8, v6
-; GISEL-NEXT: v_mul_hi_u32 v16, s7, v6
-; GISEL-NEXT: v_mul_lo_u32 v17, s5, v8
-; GISEL-NEXT: v_mul_lo_u32 v18, v8, v11
-; GISEL-NEXT: v_mul_hi_u32 v19, v7, v11
-; GISEL-NEXT: v_mul_hi_u32 v11, v8, v11
-; GISEL-NEXT: v_mul_lo_u32 v20, s7, v9
-; GISEL-NEXT: v_mul_lo_u32 v21, v9, v12
-; GISEL-NEXT: v_mul_hi_u32 v22, v6, v12
-; GISEL-NEXT: v_mul_hi_u32 v12, v9, v12
-; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v17
-; GISEL-NEXT: v_add_i32_e32 v15, vcc, v15, v20
-; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v14
-; GISEL-NEXT: v_add_i32_e32 v14, vcc, v15, v16
-; GISEL-NEXT: v_mul_lo_u32 v15, v7, v13
-; GISEL-NEXT: v_mul_lo_u32 v16, v8, v13
-; GISEL-NEXT: v_mul_hi_u32 v17, v7, v13
-; GISEL-NEXT: v_mul_hi_u32 v13, v8, v13
-; GISEL-NEXT: v_mul_lo_u32 v20, v6, v14
-; GISEL-NEXT: v_mul_lo_u32 v23, v9, v14
-; GISEL-NEXT: v_mul_hi_u32 v24, v6, v14
-; GISEL-NEXT: v_mul_hi_u32 v14, v9, v14
-; GISEL-NEXT: v_add_i32_e32 v15, vcc, v18, v15
-; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc
+; GISEL-NEXT: v_mul_lo_u32 v9, s5, v7
+; GISEL-NEXT: v_cvt_u32_f32_e32 v5, v5
+; GISEL-NEXT: v_mul_lo_u32 v10, s7, v8
+; GISEL-NEXT: v_mul_lo_u32 v11, s5, v6
+; GISEL-NEXT: v_mul_lo_u32 v12, s6, v6
+; GISEL-NEXT: v_mul_hi_u32 v13, s5, v6
+; GISEL-NEXT: v_mul_lo_u32 v14, s7, v5
+; GISEL-NEXT: v_mul_lo_u32 v15, s8, v5
+; GISEL-NEXT: v_mul_hi_u32 v16, s7, v5
+; GISEL-NEXT: v_add_i32_e32 v9, vcc, v12, v9
+; GISEL-NEXT: v_mul_lo_u32 v12, v7, v11
+; GISEL-NEXT: v_mul_hi_u32 v17, v6, v11
+; GISEL-NEXT: v_mul_hi_u32 v11, v7, v11
+; GISEL-NEXT: v_add_i32_e32 v10, vcc, v15, v10
+; GISEL-NEXT: v_mul_lo_u32 v15, v8, v14
+; GISEL-NEXT: v_mul_hi_u32 v18, v5, v14
+; GISEL-NEXT: v_mul_hi_u32 v14, v8, v14
+; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v13
+; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v16
+; GISEL-NEXT: v_mul_lo_u32 v13, v6, v9
+; GISEL-NEXT: v_mul_lo_u32 v16, v7, v9
+; GISEL-NEXT: v_mul_hi_u32 v19, v6, v9
+; GISEL-NEXT: v_mul_hi_u32 v9, v7, v9
+; GISEL-NEXT: v_mul_lo_u32 v20, v5, v10
+; GISEL-NEXT: v_mul_lo_u32 v21, v8, v10
+; GISEL-NEXT: v_mul_hi_u32 v22, v5, v10
+; GISEL-NEXT: v_mul_hi_u32 v10, v8, v10
+; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v13
+; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v11, vcc, v16, v11
; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v20, vcc, v21, v20
-; GISEL-NEXT: v_cndmask_b32_e64 v21, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v12, vcc, v23, v12
-; GISEL-NEXT: v_cndmask_b32_e64 v23, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v15, vcc, v15, v19
-; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v17
-; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v19, vcc, v20, v22
-; GISEL-NEXT: v_cndmask_b32_e64 v19, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v24
+; GISEL-NEXT: v_add_i32_e32 v15, vcc, v15, v20
; GISEL-NEXT: v_cndmask_b32_e64 v20, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v15, vcc, v18, v15
-; GISEL-NEXT: v_add_i32_e32 v16, vcc, v16, v17
-; GISEL-NEXT: v_add_i32_e32 v17, vcc, v21, v19
-; GISEL-NEXT: v_add_i32_e32 v18, vcc, v23, v20
-; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v15
-; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v14, vcc, v21, v14
+; GISEL-NEXT: v_cndmask_b32_e64 v21, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v17
+; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v19
; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v15, vcc, v16, v15
-; GISEL-NEXT: v_add_i32_e32 v16, vcc, v18, v17
-; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v15
-; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v16
-; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v11
-; GISEL-NEXT: v_addc_u32_e32 v8, vcc, v8, v13, vcc
-; GISEL-NEXT: v_mul_lo_u32 v11, v1, v7
-; GISEL-NEXT: v_mul_hi_u32 v13, v0, v7
-; GISEL-NEXT: v_mul_hi_u32 v7, v1, v7
-; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v12
-; GISEL-NEXT: v_addc_u32_e32 v9, vcc, v9, v14, vcc
-; GISEL-NEXT: v_mul_lo_u32 v12, v3, v6
-; GISEL-NEXT: v_mul_hi_u32 v14, v2, v6
-; GISEL-NEXT: v_mul_hi_u32 v6, v3, v6
-; GISEL-NEXT: v_mul_lo_u32 v15, v0, v8
-; GISEL-NEXT: v_mul_lo_u32 v16, v1, v8
-; GISEL-NEXT: v_mul_hi_u32 v17, v0, v8
-; GISEL-NEXT: v_mul_hi_u32 v8, v1, v8
-; GISEL-NEXT: v_mul_lo_u32 v18, v2, v9
-; GISEL-NEXT: v_mul_lo_u32 v19, v3, v9
-; GISEL-NEXT: v_mul_hi_u32 v20, v2, v9
-; GISEL-NEXT: v_mul_hi_u32 v9, v3, v9
-; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v15
+; GISEL-NEXT: v_add_i32_e32 v15, vcc, v15, v18
; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v7, vcc, v16, v7
-; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v18
+; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v22
; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v6, vcc, v19, v6
+; GISEL-NEXT: v_add_i32_e32 v12, vcc, v13, v12
+; GISEL-NEXT: v_add_i32_e32 v13, vcc, v16, v17
+; GISEL-NEXT: v_add_i32_e32 v15, vcc, v20, v15
+; GISEL-NEXT: v_add_i32_e32 v16, vcc, v21, v18
+; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v12
+; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v15
+; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v12, vcc, v13, v12
+; GISEL-NEXT: v_add_i32_e32 v13, vcc, v16, v15
+; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v12
+; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v13
+; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v11
+; GISEL-NEXT: v_addc_u32_e32 v7, vcc, v7, v9, vcc
+; GISEL-NEXT: v_mul_lo_u32 v9, s5, v6
+; GISEL-NEXT: v_mul_lo_u32 v11, s6, v6
+; GISEL-NEXT: v_mul_hi_u32 v12, s5, v6
+; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v14
+; GISEL-NEXT: v_addc_u32_e32 v8, vcc, v8, v10, vcc
+; GISEL-NEXT: v_mul_lo_u32 v10, s7, v5
+; GISEL-NEXT: v_mul_lo_u32 v13, s8, v5
+; GISEL-NEXT: v_mul_hi_u32 v14, s7, v5
+; GISEL-NEXT: v_mul_lo_u32 v15, s5, v7
+; GISEL-NEXT: v_mul_lo_u32 v16, v7, v9
+; GISEL-NEXT: v_mul_hi_u32 v17, v6, v9
+; GISEL-NEXT: v_mul_hi_u32 v9, v7, v9
+; GISEL-NEXT: v_mul_lo_u32 v18, s7, v8
+; GISEL-NEXT: v_mul_lo_u32 v19, v8, v10
+; GISEL-NEXT: v_mul_hi_u32 v20, v5, v10
+; GISEL-NEXT: v_mul_hi_u32 v10, v8, v10
+; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v15
+; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v18
+; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v12
+; GISEL-NEXT: v_add_i32_e32 v12, vcc, v13, v14
+; GISEL-NEXT: v_mul_lo_u32 v13, v6, v11
+; GISEL-NEXT: v_mul_lo_u32 v14, v7, v11
+; GISEL-NEXT: v_mul_hi_u32 v15, v6, v11
+; GISEL-NEXT: v_mul_hi_u32 v11, v7, v11
+; GISEL-NEXT: v_mul_lo_u32 v18, v5, v12
+; GISEL-NEXT: v_mul_lo_u32 v21, v8, v12
+; GISEL-NEXT: v_mul_hi_u32 v22, v5, v12
+; GISEL-NEXT: v_mul_hi_u32 v12, v8, v12
+; GISEL-NEXT: v_add_i32_e32 v13, vcc, v16, v13
+; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v9, vcc, v14, v9
+; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v18, vcc, v19, v18
; GISEL-NEXT: v_cndmask_b32_e64 v19, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v13
-; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v17
+; GISEL-NEXT: v_add_i32_e32 v10, vcc, v21, v10
+; GISEL-NEXT: v_cndmask_b32_e64 v21, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v17
+; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v15
+; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v17, vcc, v18, v20
+; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v22
+; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v13, vcc, v16, v13
+; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v15
+; GISEL-NEXT: v_add_i32_e32 v15, vcc, v19, v17
+; GISEL-NEXT: v_add_i32_e32 v16, vcc, v21, v18
+; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v13
; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v15
+; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v13, vcc, v14, v13
+; GISEL-NEXT: v_add_i32_e32 v14, vcc, v16, v15
+; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v13
; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v14
-; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v20
+; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v9
+; GISEL-NEXT: v_addc_u32_e32 v7, vcc, v7, v11, vcc
+; GISEL-NEXT: v_mul_lo_u32 v9, v1, v6
+; GISEL-NEXT: v_mul_hi_u32 v11, v0, v6
+; GISEL-NEXT: v_mul_hi_u32 v6, v1, v6
+; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v10
+; GISEL-NEXT: v_addc_u32_e32 v8, vcc, v8, v12, vcc
+; GISEL-NEXT: v_mul_lo_u32 v10, v3, v5
+; GISEL-NEXT: v_mul_hi_u32 v12, v2, v5
+; GISEL-NEXT: v_mul_hi_u32 v5, v3, v5
+; GISEL-NEXT: v_mul_lo_u32 v13, v0, v7
+; GISEL-NEXT: v_mul_lo_u32 v14, v1, v7
+; GISEL-NEXT: v_mul_hi_u32 v15, v0, v7
+; GISEL-NEXT: v_mul_hi_u32 v7, v1, v7
+; GISEL-NEXT: v_mul_lo_u32 v16, v2, v8
+; GISEL-NEXT: v_mul_lo_u32 v17, v3, v8
+; GISEL-NEXT: v_mul_hi_u32 v18, v2, v8
+; GISEL-NEXT: v_mul_hi_u32 v8, v3, v8
+; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v13
+; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v6, vcc, v14, v6
; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v11, vcc, v15, v11
-; GISEL-NEXT: v_add_i32_e32 v13, vcc, v16, v13
-; GISEL-NEXT: v_add_i32_e32 v12, vcc, v18, v12
-; GISEL-NEXT: v_add_i32_e32 v14, vcc, v19, v14
-; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v11
+; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v16
+; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v5, vcc, v17, v5
+; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v11
+; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v15
; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v12
+; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v12
+; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v18
; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v11, vcc, v13, v11
-; GISEL-NEXT: v_mul_lo_u32 v13, v7, s4
-; GISEL-NEXT: v_mul_hi_u32 v7, s4, v7
-; GISEL-NEXT: v_add_i32_e32 v12, vcc, v14, v12
-; GISEL-NEXT: v_mul_lo_u32 v14, v6, s4
+; GISEL-NEXT: v_add_i32_e32 v9, vcc, v13, v9
+; GISEL-NEXT: v_add_i32_e32 v11, vcc, v14, v11
+; GISEL-NEXT: v_add_i32_e32 v10, vcc, v16, v10
+; GISEL-NEXT: v_add_i32_e32 v12, vcc, v17, v12
+; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v9
+; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v10
+; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v9, vcc, v11, v9
+; GISEL-NEXT: v_mul_lo_u32 v11, v6, s4
; GISEL-NEXT: v_mul_hi_u32 v6, s4, v6
-; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v11
-; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v12
+; GISEL-NEXT: v_add_i32_e32 v10, vcc, v12, v10
+; GISEL-NEXT: v_mul_lo_u32 v12, v5, s4
+; GISEL-NEXT: v_mul_hi_u32 v5, s4, v5
+; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v9
+; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v10
+; GISEL-NEXT: v_mul_lo_u32 v7, v7, s4
; GISEL-NEXT: v_mul_lo_u32 v8, v8, s4
-; GISEL-NEXT: v_mul_lo_u32 v9, v9, s4
-; GISEL-NEXT: v_add_i32_e32 v7, vcc, v8, v7
-; GISEL-NEXT: v_add_i32_e32 v6, vcc, v9, v6
-; GISEL-NEXT: v_sub_i32_e64 v0, s[4:5], v0, v13
-; GISEL-NEXT: v_subb_u32_e64 v8, vcc, v1, v7, s[4:5]
-; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v7
+; GISEL-NEXT: v_add_i32_e32 v6, vcc, v7, v6
+; GISEL-NEXT: v_add_i32_e32 v5, vcc, v8, v5
+; GISEL-NEXT: v_sub_i32_e64 v0, s[4:5], v0, v11
+; GISEL-NEXT: v_subb_u32_e64 v7, vcc, v1, v6, s[4:5]
+; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v6
; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v4
-; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, -1, vcc
-; GISEL-NEXT: v_sub_i32_e64 v2, s[6:7], v2, v14
-; GISEL-NEXT: v_subb_u32_e64 v9, vcc, v3, v6, s[6:7]
-; GISEL-NEXT: v_sub_i32_e32 v3, vcc, v3, v6
-; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v2, v4
; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, -1, vcc
-; GISEL-NEXT: v_sub_i32_e32 v11, vcc, v2, v4
-; GISEL-NEXT: v_cmp_eq_u32_e64 s[8:9], 0, v8
-; GISEL-NEXT: v_cndmask_b32_e64 v7, v5, v7, s[8:9]
+; GISEL-NEXT: v_sub_i32_e64 v2, s[6:7], v2, v12
+; GISEL-NEXT: v_subb_u32_e64 v8, vcc, v3, v5, s[6:7]
+; GISEL-NEXT: v_sub_i32_e32 v3, vcc, v3, v5
+; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v2, v4
+; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, -1, vcc
+; GISEL-NEXT: v_sub_i32_e32 v9, vcc, v2, v4
+; GISEL-NEXT: v_cmp_eq_u32_e64 s[8:9], 0, v7
+; GISEL-NEXT: v_cndmask_b32_e64 v6, -1, v6, s[8:9]
; GISEL-NEXT: v_subbrev_u32_e64 v1, s[4:5], 0, v1, s[4:5]
-; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v9
-; GISEL-NEXT: v_cndmask_b32_e64 v6, v5, v6, s[4:5]
+; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v8
+; GISEL-NEXT: v_cndmask_b32_e64 v5, -1, v5, s[4:5]
; GISEL-NEXT: v_subbrev_u32_e64 v3, s[4:5], 0, v3, s[6:7]
-; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v11, v4
-; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, -1, s[4:5]
+; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v9, v4
+; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, -1, s[4:5]
; GISEL-NEXT: s_mov_b64 s[4:5], vcc
-; GISEL-NEXT: v_subrev_i32_e32 v13, vcc, 0x12d8fb, v11
-; GISEL-NEXT: v_sub_i32_e64 v14, s[6:7], v0, v4
+; GISEL-NEXT: v_subrev_i32_e32 v11, vcc, 0x12d8fb, v9
+; GISEL-NEXT: v_sub_i32_e64 v12, s[6:7], v0, v4
; GISEL-NEXT: v_subbrev_u32_e64 v1, s[6:7], 0, v1, s[6:7]
-; GISEL-NEXT: v_cmp_ge_u32_e64 s[6:7], v14, v4
-; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, -1, s[6:7]
+; GISEL-NEXT: v_cmp_ge_u32_e64 s[6:7], v12, v4
+; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, -1, s[6:7]
; GISEL-NEXT: v_subbrev_u32_e64 v3, s[4:5], 0, v3, s[4:5]
; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v1
-; GISEL-NEXT: v_cndmask_b32_e64 v5, v5, v15, s[4:5]
-; GISEL-NEXT: v_sub_i32_e64 v4, s[4:5], v14, v4
-; GISEL-NEXT: v_subbrev_u32_e64 v15, s[4:5], 0, v1, s[4:5]
+; GISEL-NEXT: v_cndmask_b32_e64 v13, -1, v13, s[4:5]
+; GISEL-NEXT: v_sub_i32_e64 v4, s[4:5], v12, v4
+; GISEL-NEXT: v_subbrev_u32_e64 v14, s[4:5], 0, v1, s[4:5]
; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v3
-; GISEL-NEXT: v_cndmask_b32_e64 v10, v10, v12, s[4:5]
-; GISEL-NEXT: v_subbrev_u32_e32 v12, vcc, 0, v3, vcc
-; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5
-; GISEL-NEXT: v_cndmask_b32_e32 v4, v14, v4, vcc
+; GISEL-NEXT: v_cndmask_b32_e64 v10, -1, v10, s[4:5]
+; GISEL-NEXT: v_subbrev_u32_e32 v15, vcc, 0, v3, vcc
+; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v13
+; GISEL-NEXT: v_cndmask_b32_e32 v4, v12, v4, vcc
; GISEL-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v10
-; GISEL-NEXT: v_cndmask_b32_e64 v5, v11, v13, s[4:5]
-; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v15, vcc
-; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v7
+; GISEL-NEXT: v_cndmask_b32_e64 v9, v9, v11, s[4:5]
+; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v14, vcc
+; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v6
; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
-; GISEL-NEXT: v_cndmask_b32_e64 v3, v3, v12, s[4:5]
-; GISEL-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v6
-; GISEL-NEXT: v_cndmask_b32_e64 v2, v2, v5, s[4:5]
-; GISEL-NEXT: v_cndmask_b32_e32 v1, v8, v1, vcc
-; GISEL-NEXT: v_cndmask_b32_e64 v3, v9, v3, s[4:5]
+; GISEL-NEXT: v_cndmask_b32_e64 v3, v3, v15, s[4:5]
+; GISEL-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v5
+; GISEL-NEXT: v_cndmask_b32_e64 v2, v2, v9, s[4:5]
+; GISEL-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc
+; GISEL-NEXT: v_cndmask_b32_e64 v3, v8, v3, s[4:5]
; GISEL-NEXT: s_setpc_b64 s[30:31]
;
; CGP-LABEL: v_urem_v2i64_oddk_denom:
@@ -1343,236 +1337,234 @@ define <2 x i64> @v_urem_v2i64_oddk_denom(<2 x i64> %num) {
; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CGP-NEXT: s_mov_b32 s4, 0x12d8fb
; CGP-NEXT: v_mov_b32_e32 v4, 0x12d8fb
-; CGP-NEXT: v_cvt_f32_u32_e32 v6, 0x12d8fb
-; CGP-NEXT: v_cvt_f32_ubyte0_e32 v5, 0
+; CGP-NEXT: v_cvt_f32_u32_e32 v5, 0x12d8fb
+; CGP-NEXT: v_cvt_f32_ubyte0_e32 v6, 0
; CGP-NEXT: s_mov_b32 s5, 0xffed2705
-; CGP-NEXT: s_bfe_i32 s6, 1, 0x10000
; CGP-NEXT: v_cvt_f32_u32_e32 v7, 0x12d8fb
; CGP-NEXT: v_cvt_f32_ubyte0_e32 v8, 0
-; CGP-NEXT: v_mac_f32_e32 v6, 0x4f800000, v5
-; CGP-NEXT: v_mov_b32_e32 v5, s6
+; CGP-NEXT: v_mac_f32_e32 v5, 0x4f800000, v6
; CGP-NEXT: v_mac_f32_e32 v7, 0x4f800000, v8
-; CGP-NEXT: v_rcp_iflag_f32_e32 v6, v6
-; CGP-NEXT: v_rcp_iflag_f32_e32 v7, v7
+; CGP-NEXT: v_rcp_iflag_f32_e32 v5, v5
+; CGP-NEXT: v_rcp_iflag_f32_e32 v6, v7
+; CGP-NEXT: v_mul_f32_e32 v5, 0x5f7ffffc, v5
; CGP-NEXT: v_mul_f32_e32 v6, 0x5f7ffffc, v6
-; CGP-NEXT: v_mul_f32_e32 v7, 0x5f7ffffc, v7
+; CGP-NEXT: v_mul_f32_e32 v7, 0x2f800000, v5
; CGP-NEXT: v_mul_f32_e32 v8, 0x2f800000, v6
-; CGP-NEXT: v_mul_f32_e32 v9, 0x2f800000, v7
+; CGP-NEXT: v_trunc_f32_e32 v7, v7
; CGP-NEXT: v_trunc_f32_e32 v8, v8
-; CGP-NEXT: v_trunc_f32_e32 v9, v9
+; CGP-NEXT: v_mac_f32_e32 v5, 0xcf800000, v7
+; CGP-NEXT: v_cvt_u32_f32_e32 v7, v7
; CGP-NEXT: v_mac_f32_e32 v6, 0xcf800000, v8
; CGP-NEXT: v_cvt_u32_f32_e32 v8, v8
-; CGP-NEXT: v_mac_f32_e32 v7, 0xcf800000, v9
-; CGP-NEXT: v_cvt_u32_f32_e32 v9, v9
+; CGP-NEXT: v_cvt_u32_f32_e32 v5, v5
+; CGP-NEXT: v_mul_lo_u32 v9, v7, s5
; CGP-NEXT: v_cvt_u32_f32_e32 v6, v6
; CGP-NEXT: v_mul_lo_u32 v10, v8, s5
-; CGP-NEXT: v_cvt_u32_f32_e32 v7, v7
-; CGP-NEXT: v_mul_lo_u32 v11, v9, s5
-; CGP-NEXT: v_mul_lo_u32 v12, v6, s5
-; CGP-NEXT: v_mul_hi_u32 v13, s5, v6
+; CGP-NEXT: v_mul_lo_u32 v11, v5, s5
+; CGP-NEXT: v_mul_hi_u32 v12, s5, v5
+; CGP-NEXT: v_sub_i32_e32 v9, vcc, v9, v5
+; CGP-NEXT: v_mul_lo_u32 v13, v6, s5
+; CGP-NEXT: v_mul_hi_u32 v14, s5, v6
; CGP-NEXT: v_sub_i32_e32 v10, vcc, v10, v6
-; CGP-NEXT: v_mul_lo_u32 v14, v7, s5
-; CGP-NEXT: v_mul_hi_u32 v15, s5, v7
-; CGP-NEXT: v_sub_i32_e32 v11, vcc, v11, v7
-; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v13
-; CGP-NEXT: v_mul_lo_u32 v13, v8, v12
-; CGP-NEXT: v_mul_hi_u32 v16, v6, v12
-; CGP-NEXT: v_mul_hi_u32 v12, v8, v12
-; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v15
-; CGP-NEXT: v_mul_lo_u32 v15, v9, v14
-; CGP-NEXT: v_mul_hi_u32 v17, v7, v14
-; CGP-NEXT: v_mul_hi_u32 v14, v9, v14
-; CGP-NEXT: v_mul_lo_u32 v18, v6, v10
-; CGP-NEXT: v_mul_lo_u32 v19, v8, v10
-; CGP-NEXT: v_mul_hi_u32 v20, v6, v10
+; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v12
+; CGP-NEXT: v_mul_lo_u32 v12, v7, v11
+; CGP-NEXT: v_mul_hi_u32 v15, v5, v11
+; CGP-NEXT: v_mul_hi_u32 v11, v7, v11
+; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v14
+; CGP-NEXT: v_mul_lo_u32 v14, v8, v13
+; CGP-NEXT: v_mul_hi_u32 v16, v6, v13
+; CGP-NEXT: v_mul_hi_u32 v13, v8, v13
+; CGP-NEXT: v_mul_lo_u32 v17, v5, v9
+; CGP-NEXT: v_mul_lo_u32 v18, v7, v9
+; CGP-NEXT: v_mul_hi_u32 v19, v5, v9
+; CGP-NEXT: v_mul_hi_u32 v9, v7, v9
+; CGP-NEXT: v_mul_lo_u32 v20, v6, v10
+; CGP-NEXT: v_mul_lo_u32 v21, v8, v10
+; CGP-NEXT: v_mul_hi_u32 v22, v6, v10
; CGP-NEXT: v_mul_hi_u32 v10, v8, v10
-; CGP-NEXT: v_mul_lo_u32 v21, v7, v11
-; CGP-NEXT: v_mul_lo_u32 v22, v9, v11
-; CGP-NEXT: v_mul_hi_u32 v23, v7, v11
-; CGP-NEXT: v_mul_hi_u32 v11, v9, v11
-; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v18
+; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v17
+; CGP-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc
+; CGP-NEXT: v_add_i32_e32 v11, vcc, v18, v11
; CGP-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v12, vcc, v19, v12
-; CGP-NEXT: v_cndmask_b32_e64 v19, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v15, vcc, v15, v21
+; CGP-NEXT: v_add_i32_e32 v14, vcc, v14, v20
+; CGP-NEXT: v_cndmask_b32_e64 v20, 0, 1, vcc
+; CGP-NEXT: v_add_i32_e32 v13, vcc, v21, v13
; CGP-NEXT: v_cndmask_b32_e64 v21, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v14, vcc, v22, v14
-; CGP-NEXT: v_cndmask_b32_e64 v22, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v16
-; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v20
-; CGP-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v15, vcc, v15, v17
-; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v14, vcc, v14, v23
-; CGP-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v13, vcc, v18, v13
-; CGP-NEXT: v_add_i32_e32 v16, vcc, v19, v16
-; CGP-NEXT: v_add_i32_e32 v15, vcc, v21, v15
-; CGP-NEXT: v_add_i32_e32 v17, vcc, v22, v17
-; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v13
-; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v14, vcc, v14, v15
+; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v15
+; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
+; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v19
; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v13, vcc, v16, v13
-; CGP-NEXT: v_add_i32_e32 v15, vcc, v17, v15
-; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v13
-; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v15
-; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v12
+; CGP-NEXT: v_add_i32_e32 v14, vcc, v14, v16
+; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc
+; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v22
+; CGP-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc
+; CGP-NEXT: v_add_i32_e32 v12, vcc, v17, v12
+; CGP-NEXT: v_add_i32_e32 v15, vcc, v18, v15
+; CGP-NEXT: v_add_i32_e32 v14, vcc, v20, v14
+; CGP-NEXT: v_add_i32_e32 v16, vcc, v21, v16
+; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v12
+; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
+; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v14
+; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc
+; CGP-NEXT: v_add_i32_e32 v12, vcc, v15, v12
+; CGP-NEXT: v_add_i32_e32 v14, vcc, v16, v14
+; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v12
+; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v14
+; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v11
+; CGP-NEXT: v_addc_u32_e32 v7, vcc, v7, v9, vcc
+; CGP-NEXT: v_mul_lo_u32 v9, v5, s5
+; CGP-NEXT: v_mul_hi_u32 v11, s5, v5
+; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v13
; CGP-NEXT: v_addc_u32_e32 v8, vcc, v8, v10, vcc
; CGP-NEXT: v_mul_lo_u32 v10, v6, s5
; CGP-NEXT: v_mul_hi_u32 v12, s5, v6
-; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v14
-; CGP-NEXT: v_addc_u32_e32 v9, vcc, v9, v11, vcc
-; CGP-NEXT: v_mul_lo_u32 v11, v7, s5
-; CGP-NEXT: v_mul_hi_u32 v13, s5, v7
-; CGP-NEXT: v_mul_lo_u32 v14, v8, s5
-; CGP-NEXT: v_mul_lo_u32 v15, v8, v10
-; CGP-NEXT: v_mul_hi_u32 v16, v6, v10
+; CGP-NEXT: v_mul_lo_u32 v13, v7, s5
+; CGP-NEXT: v_mul_lo_u32 v14, v7, v9
+; CGP-NEXT: v_mul_hi_u32 v15, v5, v9
+; CGP-NEXT: v_mul_hi_u32 v9, v7, v9
+; CGP-NEXT: v_mul_lo_u32 v16, v8, s5
+; CGP-NEXT: v_mul_lo_u32 v17, v8, v10
+; CGP-NEXT: v_mul_hi_u32 v18, v6, v10
; CGP-NEXT: v_mul_hi_u32 v10, v8, v10
-; CGP-NEXT: v_mul_lo_u32 v17, v9, s5
-; CGP-NEXT: v_mul_lo_u32 v18, v9, v11
-; CGP-NEXT: v_mul_hi_u32 v19, v7, v11
-; CGP-NEXT: v_mul_hi_u32 v11, v9, v11
-; CGP-NEXT: v_sub_i32_e32 v14, vcc, v14, v6
-; CGP-NEXT: v_sub_i32_e32 v17, vcc, v17, v7
-; CGP-NEXT: v_add_i32_e32 v12, vcc, v14, v12
-; CGP-NEXT: v_add_i32_e32 v13, vcc, v17, v13
-; CGP-NEXT: v_mul_lo_u32 v14, v6, v12
-; CGP-NEXT: v_mul_lo_u32 v17, v8, v12
-; CGP-NEXT: v_mul_hi_u32 v20, v6, v12
+; CGP-NEXT: v_sub_i32_e32 v13, vcc, v13, v5
+; CGP-NEXT: v_sub_i32_e32 v16, vcc, v16, v6
+; CGP-NEXT: v_add_i32_e32 v11, vcc, v13, v11
+; CGP-NEXT: v_add_i32_e32 v12, vcc, v16, v12
+; CGP-NEXT: v_mul_lo_u32 v13, v5, v11
+; CGP-NEXT: v_mul_lo_u32 v16, v7, v11
+; CGP-NEXT: v_mul_hi_u32 v19, v5, v11
+; CGP-NEXT: v_mul_hi_u32 v11, v7, v11
+; CGP-NEXT: v_mul_lo_u32 v20, v6, v12
+; CGP-NEXT: v_mul_lo_u32 v21, v8, v12
+; CGP-NEXT: v_mul_hi_u32 v22, v6, v12
; CGP-NEXT: v_mul_hi_u32 v12, v8, v12
-; CGP-NEXT: v_mul_lo_u32 v21, v7, v13
-; CGP-NEXT: v_mul_lo_u32 v22, v9, v13
-; CGP-NEXT: v_mul_hi_u32 v23, v7, v13
-; CGP-NEXT: v_mul_hi_u32 v13, v9, v13
-; CGP-NEXT: v_add_i32_e32 v14, vcc, v15, v14
-; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v10, vcc, v17, v10
-; CGP-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v18, vcc, v18, v21
-; CGP-NEXT: v_cndmask_b32_e64 v21, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v11, vcc, v22, v11
-; CGP-NEXT: v_cndmask_b32_e64 v22, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v14, vcc, v14, v16
+; CGP-NEXT: v_add_i32_e32 v13, vcc, v14, v13
; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v20
+; CGP-NEXT: v_add_i32_e32 v9, vcc, v16, v9
; CGP-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v18, vcc, v18, v19
+; CGP-NEXT: v_add_i32_e32 v17, vcc, v17, v20
+; CGP-NEXT: v_cndmask_b32_e64 v20, 0, 1, vcc
+; CGP-NEXT: v_add_i32_e32 v10, vcc, v21, v10
+; CGP-NEXT: v_cndmask_b32_e64 v21, 0, 1, vcc
+; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v15
+; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
+; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v19
+; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc
+; CGP-NEXT: v_add_i32_e32 v17, vcc, v17, v18
+; CGP-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc
+; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v22
; CGP-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v23
-; CGP-NEXT: v_cndmask_b32_e64 v19, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v14, vcc, v15, v14
-; CGP-NEXT: v_add_i32_e32 v15, vcc, v17, v16
+; CGP-NEXT: v_add_i32_e32 v13, vcc, v14, v13
+; CGP-NEXT: v_add_i32_e32 v14, vcc, v16, v15
+; CGP-NEXT: v_add_i32_e32 v15, vcc, v20, v17
; CGP-NEXT: v_add_i32_e32 v16, vcc, v21, v18
-; CGP-NEXT: v_add_i32_e32 v17, vcc, v22, v19
-; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v14
-; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v16
-; CGP-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v14, vcc, v15, v14
-; CGP-NEXT: v_add_i32_e32 v15, vcc, v17, v16
+; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v13
+; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
+; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v15
+; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc
+; CGP-NEXT: v_add_i32_e32 v13, vcc, v14, v13
+; CGP-NEXT: v_add_i32_e32 v14, vcc, v16, v15
+; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v13
; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v14
-; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v15
+; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v9
+; CGP-NEXT: v_addc_u32_e32 v7, vcc, v7, v11, vcc
+; CGP-NEXT: v_mul_lo_u32 v9, v1, v5
+; CGP-NEXT: v_mul_hi_u32 v11, v0, v5
+; CGP-NEXT: v_mul_hi_u32 v5, v1, v5
; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v10
; CGP-NEXT: v_addc_u32_e32 v8, vcc, v8, v12, vcc
-; CGP-NEXT: v_mul_lo_u32 v10, v1, v6
-; CGP-NEXT: v_mul_hi_u32 v12, v0, v6
-; CGP-NEXT: v_mul_hi_u32 v6, v1, v6
-; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v11
-; CGP-NEXT: v_addc_u32_e32 v9, vcc, v9, v13, vcc
-; CGP-NEXT: v_mul_lo_u32 v11, v3, v7
-; CGP-NEXT: v_mul_hi_u32 v13, v2, v7
-; CGP-NEXT: v_mul_hi_u32 v7, v3, v7
-; CGP-NEXT: v_mul_lo_u32 v14, v0, v8
-; CGP-NEXT: v_mul_lo_u32 v15, v1, v8
-; CGP-NEXT: v_mul_hi_u32 v16, v0, v8
-; CGP-NEXT: v_mul_hi_u32 v8, v1, v8
-; CGP-NEXT: v_mul_lo_u32 v17, v2, v9
-; CGP-NEXT: v_mul_lo_u32 v18, v3, v9
-; CGP-NEXT: v_mul_hi_u32 v19, v2, v9
-; CGP-NEXT: v_mul_hi_u32 v9, v3, v9
-; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v14
+; CGP-NEXT: v_mul_lo_u32 v10, v3, v6
+; CGP-NEXT: v_mul_hi_u32 v12, v2, v6
+; CGP-NEXT: v_mul_hi_u32 v6, v3, v6
+; CGP-NEXT: v_mul_lo_u32 v13, v0, v7
+; CGP-NEXT: v_mul_lo_u32 v14, v1, v7
+; CGP-NEXT: v_mul_hi_u32 v15, v0, v7
+; CGP-NEXT: v_mul_hi_u32 v7, v1, v7
+; CGP-NEXT: v_mul_lo_u32 v16, v2, v8
+; CGP-NEXT: v_mul_lo_u32 v17, v3, v8
+; CGP-NEXT: v_mul_hi_u32 v18, v2, v8
+; CGP-NEXT: v_mul_hi_u32 v8, v3, v8
+; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v13
+; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
+; CGP-NEXT: v_add_i32_e32 v5, vcc, v14, v5
; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v6, vcc, v15, v6
-; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v17
+; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v16
+; CGP-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc
+; CGP-NEXT: v_add_i32_e32 v6, vcc, v17, v6
; CGP-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v7, vcc, v18, v7
-; CGP-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc
+; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v11
+; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
+; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v15
+; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v12
; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v16
+; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v18
; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v13
-; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v19
-; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v10, vcc, v14, v10
-; CGP-NEXT: v_add_i32_e32 v12, vcc, v15, v12
-; CGP-NEXT: v_add_i32_e32 v11, vcc, v17, v11
-; CGP-NEXT: v_add_i32_e32 v13, vcc, v18, v13
+; CGP-NEXT: v_add_i32_e32 v9, vcc, v13, v9
+; CGP-NEXT: v_add_i32_e32 v11, vcc, v14, v11
+; CGP-NEXT: v_add_i32_e32 v10, vcc, v16, v10
+; CGP-NEXT: v_add_i32_e32 v12, vcc, v17, v12
+; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v9
+; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v10
; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v11
-; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
+; CGP-NEXT: v_add_i32_e32 v9, vcc, v11, v9
+; CGP-NEXT: v_mul_lo_u32 v11, v5, s4
+; CGP-NEXT: v_mul_hi_u32 v5, s4, v5
; CGP-NEXT: v_add_i32_e32 v10, vcc, v12, v10
; CGP-NEXT: v_mul_lo_u32 v12, v6, s4
; CGP-NEXT: v_mul_hi_u32 v6, s4, v6
-; CGP-NEXT: v_add_i32_e32 v11, vcc, v13, v11
-; CGP-NEXT: v_mul_lo_u32 v13, v7, s4
-; CGP-NEXT: v_mul_hi_u32 v7, s4, v7
+; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v9
; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v10
-; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v11
+; CGP-NEXT: v_mul_lo_u32 v7, v7, s4
; CGP-NEXT: v_mul_lo_u32 v8, v8, s4
-; CGP-NEXT: v_mul_lo_u32 v9, v9, s4
+; CGP-NEXT: v_add_i32_e32 v5, vcc, v7, v5
; CGP-NEXT: v_add_i32_e32 v6, vcc, v8, v6
-; CGP-NEXT: v_add_i32_e32 v7, vcc, v9, v7
-; CGP-NEXT: v_sub_i32_e64 v0, s[4:5], v0, v12
-; CGP-NEXT: v_subb_u32_e64 v8, vcc, v1, v6, s[4:5]
-; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v6
+; CGP-NEXT: v_sub_i32_e64 v0, s[4:5], v0, v11
+; CGP-NEXT: v_subb_u32_e64 v7, vcc, v1, v5, s[4:5]
+; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v5
; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v4
-; CGP-NEXT: v_cndmask_b32_e64 v6, 0, -1, vcc
-; CGP-NEXT: v_sub_i32_e64 v2, s[6:7], v2, v13
-; CGP-NEXT: v_subb_u32_e64 v9, vcc, v3, v7, s[6:7]
-; CGP-NEXT: v_sub_i32_e32 v3, vcc, v3, v7
+; CGP-NEXT: v_cndmask_b32_e64 v5, 0, -1, vcc
+; CGP-NEXT: v_sub_i32_e64 v2, s[6:7], v2, v12
+; CGP-NEXT: v_subb_u32_e64 v8, vcc, v3, v6, s[6:7]
+; CGP-NEXT: v_sub_i32_e32 v3, vcc, v3, v6
; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v2, v4
-; CGP-NEXT: v_cndmask_b32_e64 v7, 0, -1, vcc
-; CGP-NEXT: v_sub_i32_e32 v10, vcc, v2, v4
-; CGP-NEXT: v_cmp_eq_u32_e64 s[8:9], 0, v8
-; CGP-NEXT: v_cndmask_b32_e64 v6, v5, v6, s[8:9]
+; CGP-NEXT: v_cndmask_b32_e64 v6, 0, -1, vcc
+; CGP-NEXT: v_sub_i32_e32 v9, vcc, v2, v4
+; CGP-NEXT: v_cmp_eq_u32_e64 s[8:9], 0, v7
+; CGP-NEXT: v_cndmask_b32_e64 v5, -1, v5, s[8:9]
; CGP-NEXT: v_subbrev_u32_e64 v1, s[4:5], 0, v1, s[4:5]
-; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v9
-; CGP-NEXT: v_cndmask_b32_e64 v7, v5, v7, s[4:5]
+; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v8
+; CGP-NEXT: v_cndmask_b32_e64 v6, -1, v6, s[4:5]
; CGP-NEXT: v_subbrev_u32_e64 v3, s[4:5], 0, v3, s[6:7]
-; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v10, v4
-; CGP-NEXT: v_cndmask_b32_e64 v11, 0, -1, s[4:5]
+; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v9, v4
+; CGP-NEXT: v_cndmask_b32_e64 v10, 0, -1, s[4:5]
; CGP-NEXT: s_mov_b64 s[4:5], vcc
-; CGP-NEXT: v_subrev_i32_e32 v12, vcc, 0x12d8fb, v10
-; CGP-NEXT: v_sub_i32_e64 v13, s[6:7], v0, v4
+; CGP-NEXT: v_subrev_i32_e32 v11, vcc, 0x12d8fb, v9
+; CGP-NEXT: v_sub_i32_e64 v12, s[6:7], v0, v4
; CGP-NEXT: v_subbrev_u32_e64 v1, s[6:7], 0, v1, s[6:7]
-; CGP-NEXT: v_cmp_ge_u32_e64 s[6:7], v13, v4
-; CGP-NEXT: v_cndmask_b32_e64 v14, 0, -1, s[6:7]
+; CGP-NEXT: v_cmp_ge_u32_e64 s[6:7], v12, v4
+; CGP-NEXT: v_cndmask_b32_e64 v13, 0, -1, s[6:7]
; CGP-NEXT: v_subbrev_u32_e64 v3, s[4:5], 0, v3, s[4:5]
; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v1
-; CGP-NEXT: v_cndmask_b32_e64 v14, v5, v14, s[4:5]
-; CGP-NEXT: v_sub_i32_e64 v4, s[4:5], v13, v4
-; CGP-NEXT: v_subbrev_u32_e64 v15, s[4:5], 0, v1, s[4:5]
+; CGP-NEXT: v_cndmask_b32_e64 v13, -1, v13, s[4:5]
+; CGP-NEXT: v_sub_i32_e64 v4, s[4:5], v12, v4
+; CGP-NEXT: v_subbrev_u32_e64 v14, s[4:5], 0, v1, s[4:5]
; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v3
-; CGP-NEXT: v_cndmask_b32_e64 v5, v5, v11, s[4:5]
-; CGP-NEXT: v_subbrev_u32_e32 v11, vcc, 0, v3, vcc
-; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v14
-; CGP-NEXT: v_cndmask_b32_e32 v4, v13, v4, vcc
-; CGP-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v5
-; CGP-NEXT: v_cndmask_b32_e64 v5, v10, v12, s[4:5]
-; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v15, vcc
-; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v6
+; CGP-NEXT: v_cndmask_b32_e64 v10, -1, v10, s[4:5]
+; CGP-NEXT: v_subbrev_u32_e32 v15, vcc, 0, v3, vcc
+; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v13
+; CGP-NEXT: v_cndmask_b32_e32 v4, v12, v4, vcc
+; CGP-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v10
+; CGP-NEXT: v_cndmask_b32_e64 v9, v9, v11, s[4:5]
+; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v14, vcc
+; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5
; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
-; CGP-NEXT: v_cndmask_b32_e64 v3, v3, v11, s[4:5]
-; CGP-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v7
-; CGP-NEXT: v_cndmask_b32_e64 v2, v2, v5, s[4:5]
-; CGP-NEXT: v_cndmask_b32_e32 v1, v8, v1, vcc
-; CGP-NEXT: v_cndmask_b32_e64 v3, v9, v3, s[4:5]
+; CGP-NEXT: v_cndmask_b32_e64 v3, v3, v15, s[4:5]
+; CGP-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v6
+; CGP-NEXT: v_cndmask_b32_e64 v2, v2, v9, s[4:5]
+; CGP-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc
+; CGP-NEXT: v_cndmask_b32_e64 v3, v8, v3, s[4:5]
; CGP-NEXT: s_setpc_b64 s[30:31]
%result = urem <2 x i64> %num, <i64 1235195, i64 1235195>
ret <2 x i64> %result
@@ -2408,245 +2400,243 @@ define <2 x i64> @v_urem_v2i64_24bit(<2 x i64> %num, <2 x i64> %den) {
; GISEL-NEXT: v_and_b32_e32 v2, 0xffffff, v2
; GISEL-NEXT: v_and_b32_e32 v1, 0xffffff, v4
; GISEL-NEXT: v_and_b32_e32 v0, 0xffffff, v6
-; GISEL-NEXT: v_cvt_f32_ubyte0_e32 v5, 0
-; GISEL-NEXT: s_bfe_i32 s6, 1, 0x10000
-; GISEL-NEXT: v_cvt_f32_u32_e32 v6, v1
-; GISEL-NEXT: v_sub_i32_e32 v7, vcc, 0, v1
-; GISEL-NEXT: v_subb_u32_e64 v8, s[4:5], 0, 0, vcc
-; GISEL-NEXT: v_mov_b32_e32 v4, s6
-; GISEL-NEXT: v_cvt_f32_u32_e32 v9, v0
-; GISEL-NEXT: v_sub_i32_e32 v10, vcc, 0, v0
-; GISEL-NEXT: v_subb_u32_e64 v11, s[4:5], 0, 0, vcc
-; GISEL-NEXT: v_mac_f32_e32 v6, 0x4f800000, v5
-; GISEL-NEXT: v_mac_f32_e32 v9, 0x4f800000, v5
-; GISEL-NEXT: v_rcp_iflag_f32_e32 v5, v6
-; GISEL-NEXT: v_rcp_iflag_f32_e32 v6, v9
-; GISEL-NEXT: v_mul_f32_e32 v5, 0x5f7ffffc, v5
+; GISEL-NEXT: v_cvt_f32_ubyte0_e32 v6, 0
+; GISEL-NEXT: v_cvt_f32_u32_e32 v7, v1
+; GISEL-NEXT: v_sub_i32_e32 v4, vcc, 0, v1
+; GISEL-NEXT: v_subb_u32_e64 v5, s[4:5], 0, 0, vcc
+; GISEL-NEXT: v_cvt_f32_u32_e32 v8, v0
+; GISEL-NEXT: v_sub_i32_e32 v9, vcc, 0, v0
+; GISEL-NEXT: v_subb_u32_e64 v10, s[4:5], 0, 0, vcc
+; GISEL-NEXT: v_mac_f32_e32 v7, 0x4f800000, v6
+; GISEL-NEXT: v_mac_f32_e32 v8, 0x4f800000, v6
+; GISEL-NEXT: v_rcp_iflag_f32_e32 v6, v7
+; GISEL-NEXT: v_rcp_iflag_f32_e32 v7, v8
; GISEL-NEXT: v_mul_f32_e32 v6, 0x5f7ffffc, v6
-; GISEL-NEXT: v_mul_f32_e32 v9, 0x2f800000, v5
-; GISEL-NEXT: v_mul_f32_e32 v12, 0x2f800000, v6
-; GISEL-NEXT: v_trunc_f32_e32 v9, v9
-; GISEL-NEXT: v_trunc_f32_e32 v12, v12
-; GISEL-NEXT: v_mac_f32_e32 v5, 0xcf800000, v9
-; GISEL-NEXT: v_cvt_u32_f32_e32 v9, v9
-; GISEL-NEXT: v_mac_f32_e32 v6, 0xcf800000, v12
-; GISEL-NEXT: v_cvt_u32_f32_e32 v12, v12
-; GISEL-NEXT: v_cvt_u32_f32_e32 v5, v5
-; GISEL-NEXT: v_mul_lo_u32 v13, v7, v9
+; GISEL-NEXT: v_mul_f32_e32 v7, 0x5f7ffffc, v7
+; GISEL-NEXT: v_mul_f32_e32 v8, 0x2f800000, v6
+; GISEL-NEXT: v_mul_f32_e32 v11, 0x2f800000, v7
+; GISEL-NEXT: v_trunc_f32_e32 v8, v8
+; GISEL-NEXT: v_trunc_f32_e32 v11, v11
+; GISEL-NEXT: v_mac_f32_e32 v6, 0xcf800000, v8
+; GISEL-NEXT: v_cvt_u32_f32_e32 v8, v8
+; GISEL-NEXT: v_mac_f32_e32 v7, 0xcf800000, v11
+; GISEL-NEXT: v_cvt_u32_f32_e32 v11, v11
; GISEL-NEXT: v_cvt_u32_f32_e32 v6, v6
-; GISEL-NEXT: v_mul_lo_u32 v14, v10, v12
-; GISEL-NEXT: v_mul_lo_u32 v15, v7, v5
-; GISEL-NEXT: v_mul_lo_u32 v16, v8, v5
-; GISEL-NEXT: v_mul_hi_u32 v17, v7, v5
-; GISEL-NEXT: v_mul_lo_u32 v18, v10, v6
-; GISEL-NEXT: v_mul_lo_u32 v19, v11, v6
-; GISEL-NEXT: v_mul_hi_u32 v20, v10, v6
-; GISEL-NEXT: v_add_i32_e32 v13, vcc, v16, v13
-; GISEL-NEXT: v_mul_lo_u32 v16, v9, v15
-; GISEL-NEXT: v_mul_hi_u32 v21, v5, v15
-; GISEL-NEXT: v_mul_hi_u32 v15, v9, v15
-; GISEL-NEXT: v_add_i32_e32 v14, vcc, v19, v14
-; GISEL-NEXT: v_mul_lo_u32 v19, v12, v18
-; GISEL-NEXT: v_mul_hi_u32 v22, v6, v18
-; GISEL-NEXT: v_mul_hi_u32 v18, v12, v18
-; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v17
-; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v20
-; GISEL-NEXT: v_mul_lo_u32 v17, v5, v13
-; GISEL-NEXT: v_mul_lo_u32 v20, v9, v13
-; GISEL-NEXT: v_mul_hi_u32 v23, v5, v13
-; GISEL-NEXT: v_mul_hi_u32 v13, v9, v13
-; GISEL-NEXT: v_mul_lo_u32 v24, v6, v14
-; GISEL-NEXT: v_mul_lo_u32 v25, v12, v14
-; GISEL-NEXT: v_mul_hi_u32 v26, v6, v14
-; GISEL-NEXT: v_mul_hi_u32 v14, v12, v14
-; GISEL-NEXT: v_add_i32_e32 v16, vcc, v16, v17
-; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v15, vcc, v20, v15
-; GISEL-NEXT: v_cndmask_b32_e64 v20, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v19, vcc, v19, v24
-; GISEL-NEXT: v_cndmask_b32_e64 v24, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v18, vcc, v25, v18
-; GISEL-NEXT: v_cndmask_b32_e64 v25, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v16, vcc, v16, v21
-; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v15, vcc, v15, v23
-; GISEL-NEXT: v_cndmask_b32_e64 v21, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v19, vcc, v19, v22
-; GISEL-NEXT: v_cndmask_b32_e64 v19, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v18, vcc, v18, v26
-; GISEL-NEXT: v_cndmask_b32_e64 v22, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v16, vcc, v17, v16
-; GISEL-NEXT: v_add_i32_e32 v17, vcc, v20, v21
-; GISEL-NEXT: v_add_i32_e32 v19, vcc, v24, v19
-; GISEL-NEXT: v_add_i32_e32 v20, vcc, v25, v22
+; GISEL-NEXT: v_mul_lo_u32 v12, v4, v8
+; GISEL-NEXT: v_cvt_u32_f32_e32 v7, v7
+; GISEL-NEXT: v_mul_lo_u32 v13, v9, v11
+; GISEL-NEXT: v_mul_lo_u32 v14, v4, v6
+; GISEL-NEXT: v_mul_lo_u32 v15, v5, v6
+; GISEL-NEXT: v_mul_hi_u32 v16, v4, v6
+; GISEL-NEXT: v_mul_lo_u32 v17, v9, v7
+; GISEL-NEXT: v_mul_lo_u32 v18, v10, v7
+; GISEL-NEXT: v_mul_hi_u32 v19, v9, v7
+; GISEL-NEXT: v_add_i32_e32 v12, vcc, v15, v12
+; GISEL-NEXT: v_mul_lo_u32 v15, v8, v14
+; GISEL-NEXT: v_mul_hi_u32 v20, v6, v14
+; GISEL-NEXT: v_mul_hi_u32 v14, v8, v14
+; GISEL-NEXT: v_add_i32_e32 v13, vcc, v18, v13
+; GISEL-NEXT: v_mul_lo_u32 v18, v11, v17
+; GISEL-NEXT: v_mul_hi_u32 v21, v7, v17
+; GISEL-NEXT: v_mul_hi_u32 v17, v11, v17
+; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v16
+; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v19
+; GISEL-NEXT: v_mul_lo_u32 v16, v6, v12
+; GISEL-NEXT: v_mul_lo_u32 v19, v8, v12
+; GISEL-NEXT: v_mul_hi_u32 v22, v6, v12
+; GISEL-NEXT: v_mul_hi_u32 v12, v8, v12
+; GISEL-NEXT: v_mul_lo_u32 v23, v7, v13
+; GISEL-NEXT: v_mul_lo_u32 v24, v11, v13
+; GISEL-NEXT: v_mul_hi_u32 v25, v7, v13
+; GISEL-NEXT: v_mul_hi_u32 v13, v11, v13
; GISEL-NEXT: v_add_i32_e32 v15, vcc, v15, v16
; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v18, vcc, v18, v19
-; GISEL-NEXT: v_cndmask_b32_e64 v19, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v16, vcc, v17, v16
-; GISEL-NEXT: v_add_i32_e32 v17, vcc, v20, v19
-; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v16
-; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v17
-; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v15
-; GISEL-NEXT: v_addc_u32_e32 v9, vcc, v9, v13, vcc
-; GISEL-NEXT: v_mul_lo_u32 v13, v7, v5
-; GISEL-NEXT: v_mul_lo_u32 v8, v8, v5
-; GISEL-NEXT: v_mul_hi_u32 v15, v7, v5
-; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v18
-; GISEL-NEXT: v_addc_u32_e32 v12, vcc, v12, v14, vcc
-; GISEL-NEXT: v_mul_lo_u32 v14, v10, v6
-; GISEL-NEXT: v_mul_lo_u32 v11, v11, v6
-; GISEL-NEXT: v_mul_hi_u32 v16, v10, v6
-; GISEL-NEXT: v_mul_lo_u32 v7, v7, v9
-; GISEL-NEXT: v_mul_lo_u32 v17, v9, v13
-; GISEL-NEXT: v_mul_hi_u32 v18, v5, v13
-; GISEL-NEXT: v_mul_hi_u32 v13, v9, v13
-; GISEL-NEXT: v_mul_lo_u32 v10, v10, v12
-; GISEL-NEXT: v_mul_lo_u32 v19, v12, v14
-; GISEL-NEXT: v_mul_hi_u32 v20, v6, v14
-; GISEL-NEXT: v_mul_hi_u32 v14, v12, v14
-; GISEL-NEXT: v_add_i32_e32 v7, vcc, v8, v7
-; GISEL-NEXT: v_add_i32_e32 v8, vcc, v11, v10
-; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v15
-; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v16
-; GISEL-NEXT: v_mul_lo_u32 v10, v5, v7
-; GISEL-NEXT: v_mul_lo_u32 v11, v9, v7
-; GISEL-NEXT: v_mul_hi_u32 v15, v5, v7
-; GISEL-NEXT: v_mul_hi_u32 v7, v9, v7
-; GISEL-NEXT: v_mul_lo_u32 v16, v6, v8
-; GISEL-NEXT: v_mul_lo_u32 v21, v12, v8
-; GISEL-NEXT: v_mul_hi_u32 v22, v6, v8
-; GISEL-NEXT: v_mul_hi_u32 v8, v12, v8
-; GISEL-NEXT: v_add_i32_e32 v10, vcc, v17, v10
-; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v13
-; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v16, vcc, v19, v16
+; GISEL-NEXT: v_add_i32_e32 v14, vcc, v19, v14
; GISEL-NEXT: v_cndmask_b32_e64 v19, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v14, vcc, v21, v14
-; GISEL-NEXT: v_cndmask_b32_e64 v21, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v18
-; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v15
+; GISEL-NEXT: v_add_i32_e32 v18, vcc, v18, v23
+; GISEL-NEXT: v_cndmask_b32_e64 v23, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v17, vcc, v24, v17
+; GISEL-NEXT: v_cndmask_b32_e64 v24, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v15, vcc, v15, v20
; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v16, vcc, v16, v20
-; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v22
+; GISEL-NEXT: v_cndmask_b32_e64 v20, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v18, vcc, v18, v21
; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v10, vcc, v17, v10
-; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v15
-; GISEL-NEXT: v_add_i32_e32 v15, vcc, v19, v16
-; GISEL-NEXT: v_add_i32_e32 v16, vcc, v21, v18
-; GISEL-NEXT: v_add_i32_e32 v10, vcc, v11, v10
-; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v17, vcc, v17, v25
+; GISEL-NEXT: v_cndmask_b32_e64 v21, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v15, vcc, v16, v15
+; GISEL-NEXT: v_add_i32_e32 v16, vcc, v19, v20
+; GISEL-NEXT: v_add_i32_e32 v18, vcc, v23, v18
+; GISEL-NEXT: v_add_i32_e32 v19, vcc, v24, v21
; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v15
; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v11, vcc, v13, v11
-; GISEL-NEXT: v_add_i32_e32 v13, vcc, v16, v15
-; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v11
-; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v13
-; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v10
-; GISEL-NEXT: v_addc_u32_e32 v7, vcc, v9, v7, vcc
-; GISEL-NEXT: v_mul_lo_u32 v9, 0, v5
-; GISEL-NEXT: v_mul_hi_u32 v10, v3, v5
-; GISEL-NEXT: v_mul_hi_u32 v5, 0, v5
+; GISEL-NEXT: v_add_i32_e32 v17, vcc, v17, v18
+; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v15, vcc, v16, v15
+; GISEL-NEXT: v_add_i32_e32 v16, vcc, v19, v18
+; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v15
+; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v16
; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v14
-; GISEL-NEXT: v_addc_u32_e32 v8, vcc, v12, v8, vcc
-; GISEL-NEXT: v_mul_lo_u32 v11, 0, v6
-; GISEL-NEXT: v_mul_hi_u32 v12, v2, v6
+; GISEL-NEXT: v_addc_u32_e32 v8, vcc, v8, v12, vcc
+; GISEL-NEXT: v_mul_lo_u32 v12, v4, v6
+; GISEL-NEXT: v_mul_lo_u32 v5, v5, v6
+; GISEL-NEXT: v_mul_hi_u32 v14, v4, v6
+; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v17
+; GISEL-NEXT: v_addc_u32_e32 v11, vcc, v11, v13, vcc
+; GISEL-NEXT: v_mul_lo_u32 v13, v9, v7
+; GISEL-NEXT: v_mul_lo_u32 v10, v10, v7
+; GISEL-NEXT: v_mul_hi_u32 v15, v9, v7
+; GISEL-NEXT: v_mul_lo_u32 v4, v4, v8
+; GISEL-NEXT: v_mul_lo_u32 v16, v8, v12
+; GISEL-NEXT: v_mul_hi_u32 v17, v6, v12
+; GISEL-NEXT: v_mul_hi_u32 v12, v8, v12
+; GISEL-NEXT: v_mul_lo_u32 v9, v9, v11
+; GISEL-NEXT: v_mul_lo_u32 v18, v11, v13
+; GISEL-NEXT: v_mul_hi_u32 v19, v7, v13
+; GISEL-NEXT: v_mul_hi_u32 v13, v11, v13
+; GISEL-NEXT: v_add_i32_e32 v4, vcc, v5, v4
+; GISEL-NEXT: v_add_i32_e32 v5, vcc, v10, v9
+; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v14
+; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v15
+; GISEL-NEXT: v_mul_lo_u32 v9, v6, v4
+; GISEL-NEXT: v_mul_lo_u32 v10, v8, v4
+; GISEL-NEXT: v_mul_hi_u32 v14, v6, v4
+; GISEL-NEXT: v_mul_hi_u32 v4, v8, v4
+; GISEL-NEXT: v_mul_lo_u32 v15, v7, v5
+; GISEL-NEXT: v_mul_lo_u32 v20, v11, v5
+; GISEL-NEXT: v_mul_hi_u32 v21, v7, v5
+; GISEL-NEXT: v_mul_hi_u32 v5, v11, v5
+; GISEL-NEXT: v_add_i32_e32 v9, vcc, v16, v9
+; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v12
+; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v15, vcc, v18, v15
+; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v13, vcc, v20, v13
+; GISEL-NEXT: v_cndmask_b32_e64 v20, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v17
+; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v14
+; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v15, vcc, v15, v19
+; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v21
+; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v9, vcc, v16, v9
+; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v14
+; GISEL-NEXT: v_add_i32_e32 v14, vcc, v18, v15
+; GISEL-NEXT: v_add_i32_e32 v15, vcc, v20, v17
+; GISEL-NEXT: v_add_i32_e32 v9, vcc, v10, v9
+; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v14
+; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v10, vcc, v12, v10
+; GISEL-NEXT: v_add_i32_e32 v12, vcc, v15, v14
+; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v10
+; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v12
+; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v9
+; GISEL-NEXT: v_addc_u32_e32 v4, vcc, v8, v4, vcc
+; GISEL-NEXT: v_mul_lo_u32 v8, 0, v6
+; GISEL-NEXT: v_mul_hi_u32 v9, v3, v6
; GISEL-NEXT: v_mul_hi_u32 v6, 0, v6
-; GISEL-NEXT: v_mul_lo_u32 v13, v3, v7
-; GISEL-NEXT: v_mul_lo_u32 v14, 0, v7
-; GISEL-NEXT: v_mul_hi_u32 v15, v3, v7
+; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v13
+; GISEL-NEXT: v_addc_u32_e32 v5, vcc, v11, v5, vcc
+; GISEL-NEXT: v_mul_lo_u32 v10, 0, v7
+; GISEL-NEXT: v_mul_hi_u32 v11, v2, v7
; GISEL-NEXT: v_mul_hi_u32 v7, 0, v7
-; GISEL-NEXT: v_mul_lo_u32 v16, v2, v8
-; GISEL-NEXT: v_mul_lo_u32 v17, 0, v8
-; GISEL-NEXT: v_mul_hi_u32 v18, v2, v8
-; GISEL-NEXT: v_mul_hi_u32 v8, 0, v8
-; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v13
+; GISEL-NEXT: v_mul_lo_u32 v12, v3, v4
+; GISEL-NEXT: v_mul_lo_u32 v13, 0, v4
+; GISEL-NEXT: v_mul_hi_u32 v14, v3, v4
+; GISEL-NEXT: v_mul_hi_u32 v4, 0, v4
+; GISEL-NEXT: v_mul_lo_u32 v15, v2, v5
+; GISEL-NEXT: v_mul_lo_u32 v16, 0, v5
+; GISEL-NEXT: v_mul_hi_u32 v17, v2, v5
+; GISEL-NEXT: v_mul_hi_u32 v5, 0, v5
+; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v12
+; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v6, vcc, v13, v6
; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v5, vcc, v14, v5
-; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v16
+; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v15
+; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v7, vcc, v16, v7
; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v6, vcc, v17, v6
-; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v10
+; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v9
+; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v14
; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v15
+; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v11
; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v12
+; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v17
; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v18
-; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v8, vcc, v12, v8
; GISEL-NEXT: v_add_i32_e32 v9, vcc, v13, v9
-; GISEL-NEXT: v_add_i32_e32 v10, vcc, v14, v10
+; GISEL-NEXT: v_add_i32_e32 v10, vcc, v15, v10
; GISEL-NEXT: v_add_i32_e32 v11, vcc, v16, v11
-; GISEL-NEXT: v_add_i32_e32 v12, vcc, v17, v12
-; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v9
-; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v11
-; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v9, vcc, v10, v9
-; GISEL-NEXT: v_mul_lo_u32 v10, v1, v5
-; GISEL-NEXT: v_mul_lo_u32 v13, 0, v5
-; GISEL-NEXT: v_mul_hi_u32 v5, v1, v5
-; GISEL-NEXT: v_add_i32_e32 v11, vcc, v12, v11
-; GISEL-NEXT: v_mul_lo_u32 v12, v0, v6
-; GISEL-NEXT: v_mul_lo_u32 v14, 0, v6
-; GISEL-NEXT: v_mul_hi_u32 v6, v0, v6
-; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v9
-; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v11
-; GISEL-NEXT: v_mul_lo_u32 v7, v1, v7
-; GISEL-NEXT: v_mul_lo_u32 v8, v0, v8
-; GISEL-NEXT: v_add_i32_e32 v7, vcc, v13, v7
-; GISEL-NEXT: v_add_i32_e32 v8, vcc, v14, v8
-; GISEL-NEXT: v_add_i32_e32 v5, vcc, v7, v5
-; GISEL-NEXT: v_add_i32_e32 v6, vcc, v8, v6
-; GISEL-NEXT: v_sub_i32_e32 v3, vcc, v3, v10
-; GISEL-NEXT: v_subb_u32_e64 v7, s[4:5], 0, v5, vcc
-; GISEL-NEXT: v_sub_i32_e64 v5, s[4:5], 0, v5
+; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v8
+; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v10
+; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v8, vcc, v9, v8
+; GISEL-NEXT: v_mul_lo_u32 v9, v1, v6
+; GISEL-NEXT: v_mul_lo_u32 v12, 0, v6
+; GISEL-NEXT: v_mul_hi_u32 v6, v1, v6
+; GISEL-NEXT: v_add_i32_e32 v10, vcc, v11, v10
+; GISEL-NEXT: v_mul_lo_u32 v11, v0, v7
+; GISEL-NEXT: v_mul_lo_u32 v13, 0, v7
+; GISEL-NEXT: v_mul_hi_u32 v7, v0, v7
+; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v8
+; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v10
+; GISEL-NEXT: v_mul_lo_u32 v4, v1, v4
+; GISEL-NEXT: v_mul_lo_u32 v5, v0, v5
+; GISEL-NEXT: v_add_i32_e32 v4, vcc, v12, v4
+; GISEL-NEXT: v_add_i32_e32 v5, vcc, v13, v5
+; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v6
+; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v7
+; GISEL-NEXT: v_sub_i32_e32 v3, vcc, v3, v9
+; GISEL-NEXT: v_subb_u32_e64 v6, s[4:5], 0, v4, vcc
+; GISEL-NEXT: v_sub_i32_e64 v4, s[4:5], 0, v4
; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v3, v1
-; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, -1, s[4:5]
-; GISEL-NEXT: v_sub_i32_e64 v2, s[4:5], v2, v12
-; GISEL-NEXT: v_subb_u32_e64 v9, s[6:7], 0, v6, s[4:5]
-; GISEL-NEXT: v_sub_i32_e64 v6, s[6:7], 0, v6
+; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[4:5]
+; GISEL-NEXT: v_sub_i32_e64 v2, s[4:5], v2, v11
+; GISEL-NEXT: v_subb_u32_e64 v8, s[6:7], 0, v5, s[4:5]
+; GISEL-NEXT: v_sub_i32_e64 v5, s[6:7], 0, v5
; GISEL-NEXT: v_cmp_ge_u32_e64 s[6:7], v2, v0
-; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, -1, s[6:7]
-; GISEL-NEXT: v_cmp_eq_u32_e64 s[6:7], 0, v7
-; GISEL-NEXT: v_cndmask_b32_e64 v8, v4, v8, s[6:7]
-; GISEL-NEXT: v_subbrev_u32_e32 v5, vcc, 0, v5, vcc
-; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v9
-; GISEL-NEXT: v_cndmask_b32_e32 v10, v4, v10, vcc
-; GISEL-NEXT: v_subbrev_u32_e64 v6, vcc, 0, v6, s[4:5]
-; GISEL-NEXT: v_sub_i32_e32 v11, vcc, v3, v1
+; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, -1, s[6:7]
+; GISEL-NEXT: v_cmp_eq_u32_e64 s[6:7], 0, v6
+; GISEL-NEXT: v_cndmask_b32_e64 v7, -1, v7, s[6:7]
+; GISEL-NEXT: v_subbrev_u32_e32 v4, vcc, 0, v4, vcc
+; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v8
+; GISEL-NEXT: v_cndmask_b32_e32 v9, -1, v9, vcc
+; GISEL-NEXT: v_subbrev_u32_e64 v5, vcc, 0, v5, s[4:5]
+; GISEL-NEXT: v_sub_i32_e32 v10, vcc, v3, v1
+; GISEL-NEXT: v_subbrev_u32_e32 v4, vcc, 0, v4, vcc
+; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v10, v1
+; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, -1, vcc
+; GISEL-NEXT: v_sub_i32_e32 v12, vcc, v2, v0
; GISEL-NEXT: v_subbrev_u32_e32 v5, vcc, 0, v5, vcc
-; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v11, v1
-; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, -1, vcc
-; GISEL-NEXT: v_sub_i32_e32 v13, vcc, v2, v0
-; GISEL-NEXT: v_subbrev_u32_e32 v6, vcc, 0, v6, vcc
-; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v13, v0
-; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, -1, vcc
+; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v12, v0
+; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, -1, vcc
+; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4
+; GISEL-NEXT: v_cndmask_b32_e32 v11, -1, v11, vcc
+; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v10, v1
+; GISEL-NEXT: v_subbrev_u32_e32 v14, vcc, 0, v4, vcc
; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5
-; GISEL-NEXT: v_cndmask_b32_e32 v12, v4, v12, vcc
-; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v11, v1
+; GISEL-NEXT: v_cndmask_b32_e32 v13, -1, v13, vcc
+; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v12, v0
; GISEL-NEXT: v_subbrev_u32_e32 v15, vcc, 0, v5, vcc
-; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v6
+; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v11
+; GISEL-NEXT: v_cndmask_b32_e32 v1, v10, v1, vcc
+; GISEL-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v13
+; GISEL-NEXT: v_cndmask_b32_e64 v10, v12, v0, s[4:5]
; GISEL-NEXT: v_cndmask_b32_e32 v4, v4, v14, vcc
-; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v13, v0
-; GISEL-NEXT: v_subbrev_u32_e32 v14, vcc, 0, v6, vcc
-; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v12
-; GISEL-NEXT: v_cndmask_b32_e32 v1, v11, v1, vcc
-; GISEL-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v4
-; GISEL-NEXT: v_cndmask_b32_e64 v4, v13, v0, s[4:5]
-; GISEL-NEXT: v_cndmask_b32_e32 v5, v5, v15, vcc
-; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v8
+; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v7
; GISEL-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc
-; GISEL-NEXT: v_cndmask_b32_e64 v3, v6, v14, s[4:5]
-; GISEL-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v10
-; GISEL-NEXT: v_cndmask_b32_e64 v2, v2, v4, s[4:5]
-; GISEL-NEXT: v_cndmask_b32_e32 v1, v7, v5, vcc
-; GISEL-NEXT: v_cndmask_b32_e64 v3, v9, v3, s[4:5]
+; GISEL-NEXT: v_cndmask_b32_e64 v3, v5, v15, s[4:5]
+; GISEL-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v9
+; GISEL-NEXT: v_cndmask_b32_e64 v2, v2, v10, s[4:5]
+; GISEL-NEXT: v_cndmask_b32_e32 v1, v6, v4, vcc
+; GISEL-NEXT: v_cndmask_b32_e64 v3, v8, v3, s[4:5]
; GISEL-NEXT: s_setpc_b64 s[30:31]
;
; CGP-LABEL: v_urem_v2i64_24bit:
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll
index ab84d8a3725ae4..671ead6127308d 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll
@@ -131,65 +131,35 @@ define i1 @zeromask_f16(half %x) nounwind {
; FIXME: DAG and GlobalISel return
diff erent values for i1 true
define i1 @allflags_f16(half %x) nounwind {
-; GFX7SELDAG-LABEL: allflags_f16:
-; GFX7SELDAG: ; %bb.0:
-; GFX7SELDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7SELDAG-NEXT: v_mov_b32_e32 v0, 1
-; GFX7SELDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX7GLISEL-LABEL: allflags_f16:
-; GFX7GLISEL: ; %bb.0:
-; GFX7GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7GLISEL-NEXT: v_mov_b32_e32 v0, -1
-; GFX7GLISEL-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX8SELDAG-LABEL: allflags_f16:
-; GFX8SELDAG: ; %bb.0:
-; GFX8SELDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8SELDAG-NEXT: v_mov_b32_e32 v0, 1
-; GFX8SELDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX8GLISEL-LABEL: allflags_f16:
-; GFX8GLISEL: ; %bb.0:
-; GFX8GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8GLISEL-NEXT: v_mov_b32_e32 v0, -1
-; GFX8GLISEL-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9SELDAG-LABEL: allflags_f16:
-; GFX9SELDAG: ; %bb.0:
-; GFX9SELDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9SELDAG-NEXT: v_mov_b32_e32 v0, 1
-; GFX9SELDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9GLISEL-LABEL: allflags_f16:
-; GFX9GLISEL: ; %bb.0:
-; GFX9GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9GLISEL-NEXT: v_mov_b32_e32 v0, -1
-; GFX9GLISEL-NEXT: s_setpc_b64 s[30:31]
+; GFX7CHECK-LABEL: allflags_f16:
+; GFX7CHECK: ; %bb.0:
+; GFX7CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7CHECK-NEXT: v_mov_b32_e32 v0, 1
+; GFX7CHECK-NEXT: s_setpc_b64 s[30:31]
;
-; GFX10SELDAG-LABEL: allflags_f16:
-; GFX10SELDAG: ; %bb.0:
-; GFX10SELDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10SELDAG-NEXT: v_mov_b32_e32 v0, 1
-; GFX10SELDAG-NEXT: s_setpc_b64 s[30:31]
+; GFX8CHECK-LABEL: allflags_f16:
+; GFX8CHECK: ; %bb.0:
+; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8CHECK-NEXT: v_mov_b32_e32 v0, 1
+; GFX8CHECK-NEXT: s_setpc_b64 s[30:31]
;
-; GFX10GLISEL-LABEL: allflags_f16:
-; GFX10GLISEL: ; %bb.0:
-; GFX10GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10GLISEL-NEXT: v_mov_b32_e32 v0, -1
-; GFX10GLISEL-NEXT: s_setpc_b64 s[30:31]
+; GFX9CHECK-LABEL: allflags_f16:
+; GFX9CHECK: ; %bb.0:
+; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9CHECK-NEXT: v_mov_b32_e32 v0, 1
+; GFX9CHECK-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11SELDAG-LABEL: allflags_f16:
-; GFX11SELDAG: ; %bb.0:
-; GFX11SELDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11SELDAG-NEXT: v_mov_b32_e32 v0, 1
-; GFX11SELDAG-NEXT: s_setpc_b64 s[30:31]
+; GFX10CHECK-LABEL: allflags_f16:
+; GFX10CHECK: ; %bb.0:
+; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10CHECK-NEXT: v_mov_b32_e32 v0, 1
+; GFX10CHECK-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11GLISEL-LABEL: allflags_f16:
-; GFX11GLISEL: ; %bb.0:
-; GFX11GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11GLISEL-NEXT: v_mov_b32_e32 v0, -1
-; GFX11GLISEL-NEXT: s_setpc_b64 s[30:31]
+; GFX11CHECK-LABEL: allflags_f16:
+; GFX11CHECK: ; %bb.0:
+; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11CHECK-NEXT: v_mov_b32_e32 v0, 1
+; GFX11CHECK-NEXT: s_setpc_b64 s[30:31]
%1 = call i1 @llvm.is.fpclass.f16(half %x, i32 1023) ; 0x3ff
ret i1 %1
}
More information about the llvm-commits
mailing list