[llvm] [GlobalISel] Combine [S,U]SUBO (PR #116489)
Thorsten Schütt via llvm-commits
llvm-commits at lists.llvm.org
Sat Nov 16 22:34:33 PST 2024
https://github.com/tschuett updated https://github.com/llvm/llvm-project/pull/116489
>From 86a0535e87599a6086a3f78849d5ed2caadccfbf Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thorsten=20Sch=C3=BCtt?= <schuett at gmail.com>
Date: Sat, 16 Nov 2024 16:55:15 +0100
Subject: [PATCH 1/3] [GlobalISel] Combine [S,U]SUBO
We import the llvm.ssub.with.overflow.* Intrinsics, but the Legalizer
also builds them while legalizing other opcodes, see
narrowScalarAddSub.
---
.../llvm/CodeGen/GlobalISel/CombinerHelper.h | 3 +
.../CodeGen/GlobalISel/GenericMachineInstrs.h | 17 +
.../include/llvm/Target/GlobalISel/Combine.td | 17 +-
.../GlobalISel/CombinerHelperArtifacts.cpp | 72 ++
llvm/lib/Target/AArch64/AArch64Combine.td | 4 +-
.../AArch64/GlobalISel/combine-overflow.mir | 46 +
llvm/test/CodeGen/AArch64/popcount.ll | 4 +-
.../CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll | 358 ++++----
.../CodeGen/AMDGPU/GlobalISel/srem.i64.ll | 788 +++++++++---------
.../CodeGen/AMDGPU/GlobalISel/urem.i64.ll | 291 ++++---
10 files changed, 874 insertions(+), 726 deletions(-)
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
index b1232a368a3657..55c3b72c8e027f 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
@@ -939,6 +939,9 @@ class CombinerHelper {
// merge_values(_, zero) -> zext
bool matchMergeXAndZero(const MachineInstr &MI, BuildFnTy &MatchInfo);
+ // overflow sub
+ bool matchSuboCarryOut(const MachineInstr &MI, BuildFnTy &MatchInfo);
+
private:
/// Checks for legality of an indexed variant of \p LdSt.
bool isIndexedLoadStoreLegal(GLoadStore &LdSt) const;
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h b/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
index 4de14dee190fb3..9e5d4d34f24d2b 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
@@ -486,6 +486,23 @@ class GAddCarryOut : public GBinOpCarryOut {
}
};
+/// Represents overflowing sub operations.
+/// G_USUBO, G_SSUBO
+class GSubCarryOut : public GBinOpCarryOut {
+public:
+ bool isSigned() const { return getOpcode() == TargetOpcode::G_SSUBO; }
+
+ static bool classof(const MachineInstr *MI) {
+ switch (MI->getOpcode()) {
+ case TargetOpcode::G_USUBO:
+ case TargetOpcode::G_SSUBO:
+ return true;
+ default:
+ return false;
+ }
+ }
+};
+
/// Represents overflowing add/sub operations that also consume a carry-in.
/// G_UADDE, G_SADDE, G_USUBE, G_SSUBE
class GAddSubCarryInOut : public GAddSubCarryOut {
diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index f8379609bf1d98..b4f1551e965b14 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -1385,6 +1385,12 @@ def match_addos : GICombineRule<
[{ return Helper.matchAddOverflow(*${root}, ${matchinfo}); }]),
(apply [{ Helper.applyBuildFn(*${root}, ${matchinfo}); }])>;
+def match_subos : GICombineRule<
+ (defs root:$root, build_fn_matchinfo:$matchinfo),
+ (match (wip_match_opcode G_SSUBO, G_USUBO):$root,
+ [{ return Helper.matchSuboCarryOut(*${root}, ${matchinfo}); }]),
+ (apply [{ Helper.applyBuildFn(*${root}, ${matchinfo}); }])>;
+
def match_extract_of_element_undef_vector: GICombineRule <
(defs root:$root),
(match (G_IMPLICIT_DEF $vector),
@@ -1901,6 +1907,13 @@ def cmp_combines: GICombineGroup<[
redundant_binop_in_equality
]>;
+
+def artifact_combines: GICombineGroup<[
+ merge_combines,
+ match_addos,
+ match_subos
+]>;
+
// FIXME: These should use the custom predicate feature once it lands.
def undef_combines : GICombineGroup<[undef_to_fp_zero, undef_to_int_zero,
undef_to_negative_one,
@@ -1984,9 +1997,9 @@ def all_combines : GICombineGroup<[integer_reassoc_combines, trivial_combines,
and_or_disjoint_mask, fma_combines, fold_binop_into_select,
sub_add_reg, select_to_minmax,
fsub_to_fneg, commute_constant_to_rhs, match_ands, match_ors,
- combine_concat_vector, match_addos,
+ combine_concat_vector,
sext_trunc, zext_trunc, prefer_sign_combines, shuffle_combines,
- combine_use_vector_truncate, merge_combines]>;
+ combine_use_vector_truncate, artifact_combines]>;
// A combine group used to for prelegalizer combiners at -O0. The combines in
// this group have been selected based on experiments to balance code size and
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelperArtifacts.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelperArtifacts.cpp
index 797a1e84e21e35..85c56ee6863a95 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelperArtifacts.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelperArtifacts.cpp
@@ -84,3 +84,75 @@ bool CombinerHelper::matchMergeXAndZero(const MachineInstr &MI,
};
return true;
}
+
+bool CombinerHelper::matchSuboCarryOut(const MachineInstr &MI,
+ BuildFnTy &MatchInfo) {
+ const GSubCarryOut *Subo = cast<GSubCarryOut>(&MI);
+
+ Register Dst = Subo->getReg(0);
+ Register LHS = Subo->getLHSReg();
+ Register RHS = Subo->getRHSReg();
+ Register Carry = Subo->getCarryOutReg();
+ LLT DstTy = MRI.getType(Dst);
+ LLT CarryTy = MRI.getType(Carry);
+
+ // Check legality before known bits.
+ if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SUB, {DstTy}}) ||
+ !isConstantLegalOrBeforeLegalizer(CarryTy))
+ return false;
+
+ if (Subo->isSigned()) {
+ // G_SSUBO
+ ConstantRange KBLHS = ConstantRange::fromKnownBits(KB->getKnownBits(LHS),
+ /* IsSigned= */ true);
+ ConstantRange KBRHS = ConstantRange::fromKnownBits(KB->getKnownBits(RHS),
+ /* IsSigned= */ true);
+ switch (KBLHS.signedSubMayOverflow(KBRHS)) {
+ case ConstantRange::OverflowResult::MayOverflow:
+ return false;
+ case ConstantRange::OverflowResult::NeverOverflows: {
+ MatchInfo = [=](MachineIRBuilder &B) {
+ B.buildSub(Dst, LHS, RHS, MachineInstr::MIFlag::NoSWrap);
+ B.buildConstant(Carry, 0);
+ };
+ return true;
+ }
+ case ConstantRange::OverflowResult::AlwaysOverflowsLow:
+ case ConstantRange::OverflowResult::AlwaysOverflowsHigh: {
+ MatchInfo = [=](MachineIRBuilder &B) {
+ B.buildSub(Dst, LHS, RHS);
+ B.buildConstant(Carry, 1);
+ };
+ return true;
+ }
+ }
+ return false;
+ }
+
+ // G_USUBO
+ ConstantRange KBLHS = ConstantRange::fromKnownBits(KB->getKnownBits(LHS),
+ /* IsSigned= */ false);
+ ConstantRange KBRHS = ConstantRange::fromKnownBits(KB->getKnownBits(RHS),
+ /* IsSigned= */ false);
+ switch (KBLHS.unsignedSubMayOverflow(KBRHS)) {
+ case ConstantRange::OverflowResult::MayOverflow:
+ return false;
+ case ConstantRange::OverflowResult::NeverOverflows: {
+ MatchInfo = [=](MachineIRBuilder &B) {
+ B.buildSub(Dst, LHS, RHS, MachineInstr::MIFlag::NoUWrap);
+ B.buildConstant(Carry, 0);
+ };
+ return true;
+ }
+ case ConstantRange::OverflowResult::AlwaysOverflowsLow:
+ case ConstantRange::OverflowResult::AlwaysOverflowsHigh: {
+ MatchInfo = [=](MachineIRBuilder &B) {
+ B.buildSub(Dst, LHS, RHS);
+ B.buildConstant(Carry, 1);
+ };
+ return true;
+ }
+ }
+
+ return false;
+}
diff --git a/llvm/lib/Target/AArch64/AArch64Combine.td b/llvm/lib/Target/AArch64/AArch64Combine.td
index 8af8cdfeba6ac4..23563bf9b7881f 100644
--- a/llvm/lib/Target/AArch64/AArch64Combine.td
+++ b/llvm/lib/Target/AArch64/AArch64Combine.td
@@ -320,7 +320,7 @@ def AArch64PostLegalizerCombiner
hoist_logic_op_with_same_opcode_hands,
redundant_and, xor_of_and_with_same_reg,
extractvecelt_pairwise_add, redundant_or,
- mul_const, redundant_sext_inreg,
+ mul_const, redundant_sext_inreg, artifact_combines,
form_bitfield_extract, rotate_out_of_range,
icmp_to_true_false_known_bits,
select_combines, fold_merge_to_zext,
@@ -328,7 +328,7 @@ def AArch64PostLegalizerCombiner
ptr_add_immed_chain, overlapping_and,
split_store_zero_128, undef_combines,
select_to_minmax, or_to_bsp, combine_concat_vector,
- commute_constant_to_rhs, merge_combines,
+ commute_constant_to_rhs,
push_freeze_to_prevent_poison_from_propagating,
combine_mul_cmlt, combine_use_vector_truncate]> {
}
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-overflow.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-overflow.mir
index bc4b5ae7c066a6..87b30f558539c8 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-overflow.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-overflow.mir
@@ -176,3 +176,49 @@ body: |
$q1 = COPY %o_wide
RET_ReallyLR implicit $w0
...
+---
+name: sub_may
+body: |
+ bb.0:
+ liveins: $w0, $w1
+ ; CHECK-LABEL: name: sub_may
+ ; CHECK: liveins: $w0, $w1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+ ; CHECK-NEXT: %const:_(s32) = G_CONSTANT i32 512
+ ; CHECK-NEXT: %sub:_(s32), %o:_(s1) = G_SSUBO [[COPY]], %const
+ ; CHECK-NEXT: %o_wide:_(s32) = G_ZEXT %o(s1)
+ ; CHECK-NEXT: $w0 = COPY %sub(s32)
+ ; CHECK-NEXT: $w1 = COPY %o_wide(s32)
+ ; CHECK-NEXT: RET_ReallyLR implicit $w0
+ %0:_(s32) = COPY $w0
+ %const:_(s32) = G_CONSTANT i32 512
+ %sub:_(s32), %o:_(s1) = G_SSUBO %0, %const
+ %o_wide:_(s32) = G_ZEXT %o(s1)
+ $w0 = COPY %sub(s32)
+ $w1 = COPY %o_wide
+ RET_ReallyLR implicit $w0
+...
+---
+name: usub_may
+body: |
+ bb.0:
+ liveins: $w0, $w1
+ ; CHECK-LABEL: name: usub_may
+ ; CHECK: liveins: $w0, $w1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+ ; CHECK-NEXT: %const:_(s32) = G_CONSTANT i32 512
+ ; CHECK-NEXT: %sub:_(s32), %o:_(s1) = G_USUBO [[COPY]], %const
+ ; CHECK-NEXT: %o_wide:_(s32) = G_ZEXT %o(s1)
+ ; CHECK-NEXT: $w0 = COPY %sub(s32)
+ ; CHECK-NEXT: $w1 = COPY %o_wide(s32)
+ ; CHECK-NEXT: RET_ReallyLR implicit $w0
+ %0:_(s32) = COPY $w0
+ %const:_(s32) = G_CONSTANT i32 512
+ %sub:_(s32), %o:_(s1) = G_USUBO %0, %const
+ %o_wide:_(s32) = G_ZEXT %o(s1)
+ $w0 = COPY %sub(s32)
+ $w1 = COPY %o_wide
+ RET_ReallyLR implicit $w0
+...
diff --git a/llvm/test/CodeGen/AArch64/popcount.ll b/llvm/test/CodeGen/AArch64/popcount.ll
index 1fc4de1c48b7dd..f9f1cd4b1fcf76 100644
--- a/llvm/test/CodeGen/AArch64/popcount.ll
+++ b/llvm/test/CodeGen/AArch64/popcount.ll
@@ -113,9 +113,9 @@ define i16 @popcount256(ptr nocapture nonnull readonly %0) {
;
; GISEL-LABEL: popcount256:
; GISEL: // %bb.0: // %Entry
-; GISEL-NEXT: ldp x8, x9, [x0, #16]
+; GISEL-NEXT: ldp x8, x9, [x0]
; GISEL-NEXT: mov v0.d[0], x8
-; GISEL-NEXT: ldp x8, x10, [x0]
+; GISEL-NEXT: ldp x8, x10, [x0, #16]
; GISEL-NEXT: mov v1.d[0], x8
; GISEL-NEXT: mov v0.d[1], x9
; GISEL-NEXT: mov v1.d[1], x10
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll
index 613c73f7b9368b..14b30e0d79946c 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll
@@ -1178,212 +1178,212 @@ define <2 x i64> @v_sdiv_v2i64_oddk_denom(<2 x i64> %num) {
; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT: v_cvt_f32_u32_e32 v4, 0x12d8fb
; GISEL-NEXT: v_cvt_f32_ubyte0_e32 v5, 0
-; GISEL-NEXT: s_sub_u32 s6, 0, 0x12d8fb
-; GISEL-NEXT: s_subb_u32 s7, 0, 0
+; GISEL-NEXT: v_mov_b32_e32 v6, 0xffed2705
+; GISEL-NEXT: s_mov_b32 s6, 1
; GISEL-NEXT: v_mac_f32_e32 v4, 0x4f800000, v5
; GISEL-NEXT: v_rcp_iflag_f32_e32 v4, v4
+; GISEL-NEXT: s_cmp_lg_u32 s6, 0
+; GISEL-NEXT: s_subb_u32 s6, 0, 0
; GISEL-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4
; GISEL-NEXT: v_mul_f32_e32 v5, 0x2f800000, v4
-; GISEL-NEXT: v_trunc_f32_e32 v6, v5
-; GISEL-NEXT: v_mac_f32_e32 v4, 0xcf800000, v6
-; GISEL-NEXT: v_cvt_u32_f32_e32 v5, v4
-; GISEL-NEXT: v_cvt_u32_f32_e32 v7, v6
-; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], s6, v5, 0
-; GISEL-NEXT: v_mov_b32_e32 v4, v9
-; GISEL-NEXT: v_mad_u64_u32 v[9:10], s[4:5], s6, v7, v[4:5]
-; GISEL-NEXT: v_mul_lo_u32 v4, v7, v8
-; GISEL-NEXT: v_mul_hi_u32 v6, v5, v8
-; GISEL-NEXT: v_mad_u64_u32 v[9:10], s[4:5], s7, v5, v[9:10]
-; GISEL-NEXT: v_mul_hi_u32 v8, v7, v8
-; GISEL-NEXT: v_mul_lo_u32 v10, v5, v9
-; GISEL-NEXT: v_mul_lo_u32 v11, v7, v9
-; GISEL-NEXT: v_mul_hi_u32 v12, v5, v9
-; GISEL-NEXT: v_mul_hi_u32 v9, v7, v9
-; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v10
-; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v6
-; GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v4, vcc, v10, v4
-; GISEL-NEXT: v_add_i32_e32 v6, vcc, v11, v8
+; GISEL-NEXT: v_trunc_f32_e32 v8, v5
+; GISEL-NEXT: v_mac_f32_e32 v4, 0xcf800000, v8
+; GISEL-NEXT: v_cvt_u32_f32_e32 v7, v4
+; GISEL-NEXT: v_cvt_u32_f32_e32 v9, v8
+; GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v6, v7, 0
+; GISEL-NEXT: v_mov_b32_e32 v8, v5
+; GISEL-NEXT: v_mad_u64_u32 v[10:11], s[4:5], v6, v9, v[8:9]
+; GISEL-NEXT: v_mul_hi_u32 v12, v9, v4
+; GISEL-NEXT: v_mad_u64_u32 v[13:14], s[4:5], s6, v7, v[10:11]
+; GISEL-NEXT: v_mul_lo_u32 v10, v9, v4
+; GISEL-NEXT: v_mul_hi_u32 v11, v7, v4
+; GISEL-NEXT: v_mul_lo_u32 v8, v7, v13
+; GISEL-NEXT: v_mul_lo_u32 v4, v9, v13
+; GISEL-NEXT: v_add_i32_e32 v8, vcc, v10, v8
+; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v11
; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v12
-; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v10
-; GISEL-NEXT: v_add_i32_e32 v4, vcc, v6, v4
-; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v6, vcc, v8, v6
-; GISEL-NEXT: v_add_i32_e32 v6, vcc, v9, v6
-; GISEL-NEXT: v_add_i32_e32 v11, vcc, v5, v4
-; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], s6, v11, 0
-; GISEL-NEXT: v_addc_u32_e32 v12, vcc, v7, v6, vcc
-; GISEL-NEXT: v_mov_b32_e32 v4, v9
-; GISEL-NEXT: v_mad_u64_u32 v[9:10], s[4:5], s6, v12, v[4:5]
-; GISEL-NEXT: v_ashrrev_i32_e32 v6, 31, v1
-; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v6
-; GISEL-NEXT: v_mad_u64_u32 v[9:10], s[4:5], s7, v11, v[9:10]
-; GISEL-NEXT: v_addc_u32_e32 v1, vcc, v1, v6, vcc
-; GISEL-NEXT: v_xor_b32_e32 v10, v0, v6
-; GISEL-NEXT: v_mul_lo_u32 v0, v12, v8
-; GISEL-NEXT: v_mul_lo_u32 v4, v11, v9
-; GISEL-NEXT: v_xor_b32_e32 v13, v1, v6
-; GISEL-NEXT: v_mul_hi_u32 v1, v11, v8
-; GISEL-NEXT: v_mul_hi_u32 v8, v12, v8
+; GISEL-NEXT: v_add_i32_e32 v8, vcc, v14, v8
+; GISEL-NEXT: v_mul_hi_u32 v14, v7, v13
+; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v12
+; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v14
+; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v14, vcc, v15, v14
+; GISEL-NEXT: v_mul_hi_u32 v13, v9, v13
+; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v8
+; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v8, vcc, v14, v8
+; GISEL-NEXT: v_add_i32_e32 v8, vcc, v13, v8
+; GISEL-NEXT: v_add_i32_e32 v16, vcc, v7, v4
+; GISEL-NEXT: v_mad_u64_u32 v[13:14], s[4:5], v6, v16, 0
+; GISEL-NEXT: v_addc_u32_e32 v17, vcc, v9, v8, vcc
+; GISEL-NEXT: v_mov_b32_e32 v4, v14
+; GISEL-NEXT: v_mad_u64_u32 v[14:15], s[4:5], v6, v17, v[4:5]
+; GISEL-NEXT: v_ashrrev_i32_e32 v8, 31, v1
+; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v8
+; GISEL-NEXT: v_mad_u64_u32 v[14:15], s[4:5], s6, v16, v[14:15]
+; GISEL-NEXT: v_addc_u32_e32 v1, vcc, v1, v8, vcc
+; GISEL-NEXT: v_xor_b32_e32 v15, v0, v8
+; GISEL-NEXT: v_mul_lo_u32 v0, v17, v13
+; GISEL-NEXT: v_mul_lo_u32 v4, v16, v14
+; GISEL-NEXT: v_xor_b32_e32 v18, v1, v8
+; GISEL-NEXT: v_mul_hi_u32 v1, v16, v13
+; GISEL-NEXT: v_mul_hi_u32 v13, v17, v13
; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v4
; GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v1
; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
-; GISEL-NEXT: v_mul_lo_u32 v1, v12, v9
+; GISEL-NEXT: v_mul_lo_u32 v1, v17, v14
; GISEL-NEXT: v_add_i32_e32 v0, vcc, v4, v0
-; GISEL-NEXT: v_mul_hi_u32 v4, v11, v9
-; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v8
-; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
+; GISEL-NEXT: v_mul_hi_u32 v4, v16, v14
+; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v13
+; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v4
; GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v4, vcc, v8, v4
-; GISEL-NEXT: v_mul_hi_u32 v8, v12, v9
+; GISEL-NEXT: v_add_i32_e32 v4, vcc, v13, v4
+; GISEL-NEXT: v_mul_hi_u32 v13, v17, v14
; GISEL-NEXT: v_add_i32_e32 v0, vcc, v1, v0
; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v1, vcc, v4, v1
-; GISEL-NEXT: v_add_i32_e32 v1, vcc, v8, v1
-; GISEL-NEXT: v_add_i32_e32 v0, vcc, v11, v0
-; GISEL-NEXT: v_addc_u32_e32 v1, vcc, v12, v1, vcc
-; GISEL-NEXT: v_mul_lo_u32 v8, v13, v0
-; GISEL-NEXT: v_mul_lo_u32 v9, v10, v1
-; GISEL-NEXT: v_mul_hi_u32 v11, v10, v0
-; GISEL-NEXT: v_mul_hi_u32 v0, v13, v0
+; GISEL-NEXT: v_add_i32_e32 v1, vcc, v13, v1
+; GISEL-NEXT: v_add_i32_e32 v0, vcc, v16, v0
+; GISEL-NEXT: v_addc_u32_e32 v1, vcc, v17, v1, vcc
+; GISEL-NEXT: v_mul_lo_u32 v13, v18, v0
+; GISEL-NEXT: v_mul_lo_u32 v14, v15, v1
+; GISEL-NEXT: v_mul_hi_u32 v16, v15, v0
+; GISEL-NEXT: v_mul_hi_u32 v0, v18, v0
; GISEL-NEXT: v_mov_b32_e32 v4, 0x12d8fb
-; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v9
-; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v11
-; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
-; GISEL-NEXT: v_mul_lo_u32 v11, v13, v1
-; GISEL-NEXT: v_add_i32_e32 v8, vcc, v9, v8
-; GISEL-NEXT: v_mul_hi_u32 v9, v10, v1
-; GISEL-NEXT: v_add_i32_e32 v0, vcc, v11, v0
-; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v9
-; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v9, vcc, v11, v9
-; GISEL-NEXT: v_add_i32_e32 v11, vcc, v0, v8
-; GISEL-NEXT: v_mul_hi_u32 v12, v13, v1
-; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v4, v11, 0
-; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v8, vcc, v9, v8
-; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v8
-; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v4, v12, v[1:2]
-; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v10, v0
-; GISEL-NEXT: v_subb_u32_e64 v1, s[4:5], v13, v8, vcc
-; GISEL-NEXT: v_sub_i32_e64 v8, s[4:5], v13, v8
+; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v14
+; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v16
+; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
+; GISEL-NEXT: v_mul_lo_u32 v16, v18, v1
+; GISEL-NEXT: v_add_i32_e32 v13, vcc, v14, v13
+; GISEL-NEXT: v_mul_hi_u32 v14, v15, v1
+; GISEL-NEXT: v_add_i32_e32 v0, vcc, v16, v0
+; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v14
+; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v14, vcc, v16, v14
+; GISEL-NEXT: v_add_i32_e32 v16, vcc, v0, v13
+; GISEL-NEXT: v_mul_hi_u32 v17, v18, v1
+; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v4, v16, 0
+; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v13, vcc, v14, v13
+; GISEL-NEXT: v_add_i32_e32 v17, vcc, v17, v13
+; GISEL-NEXT: v_mad_u64_u32 v[13:14], s[4:5], v4, v17, v[1:2]
+; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v15, v0
+; GISEL-NEXT: v_subb_u32_e64 v1, s[4:5], v18, v13, vcc
+; GISEL-NEXT: v_sub_i32_e64 v13, s[4:5], v18, v13
; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v4
-; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, -1, s[4:5]
+; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, -1, s[4:5]
; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v1
-; GISEL-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v8, vcc
-; GISEL-NEXT: v_sub_i32_e32 v8, vcc, v0, v4
-; GISEL-NEXT: v_cndmask_b32_e64 v10, -1, v9, s[4:5]
-; GISEL-NEXT: v_subbrev_u32_e32 v9, vcc, 0, v1, vcc
-; GISEL-NEXT: s_sub_u32 s6, 0, 0x12d8fb
-; GISEL-NEXT: v_add_i32_e32 v13, vcc, 1, v11
-; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], s6, v5, 0
-; GISEL-NEXT: v_addc_u32_e32 v14, vcc, 0, v12, vcc
-; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v8, v4
-; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, -1, vcc
-; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v9
-; GISEL-NEXT: v_cndmask_b32_e32 v15, -1, v8, vcc
-; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], s6, v7, v[1:2]
-; GISEL-NEXT: s_subb_u32 s7, 0, 0
+; GISEL-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v13, vcc
+; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v4
+; GISEL-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
+; GISEL-NEXT: v_add_i32_e32 v13, vcc, 1, v16
+; GISEL-NEXT: v_addc_u32_e32 v15, vcc, 0, v17, vcc
+; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v4
+; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc
+; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
+; GISEL-NEXT: v_cndmask_b32_e32 v18, -1, v0, vcc
+; GISEL-NEXT: v_mov_b32_e32 v0, v5
+; GISEL-NEXT: v_cndmask_b32_e64 v14, -1, v14, s[4:5]
+; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v6, v9, v[0:1]
+; GISEL-NEXT: s_mov_b32 s6, 1
+; GISEL-NEXT: s_cmp_lg_u32 s6, 0
+; GISEL-NEXT: s_subb_u32 s6, 0, 0
+; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], s6, v7, v[0:1]
; GISEL-NEXT: v_add_i32_e32 v1, vcc, 1, v13
-; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], s7, v5, v[8:9]
-; GISEL-NEXT: v_addc_u32_e32 v16, vcc, 0, v14, vcc
-; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v15
-; GISEL-NEXT: v_cndmask_b32_e32 v9, v13, v1, vcc
-; GISEL-NEXT: v_mul_lo_u32 v1, v7, v0
-; GISEL-NEXT: v_mul_lo_u32 v13, v5, v8
-; GISEL-NEXT: v_mul_hi_u32 v15, v5, v0
-; GISEL-NEXT: v_cndmask_b32_e32 v14, v14, v16, vcc
-; GISEL-NEXT: v_mul_hi_u32 v0, v7, v0
-; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v13
-; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v15
-; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
-; GISEL-NEXT: v_mul_lo_u32 v15, v7, v8
-; GISEL-NEXT: v_add_i32_e32 v1, vcc, v13, v1
-; GISEL-NEXT: v_mul_hi_u32 v13, v5, v8
-; GISEL-NEXT: v_add_i32_e32 v0, vcc, v15, v0
-; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v13
-; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v13, vcc, v15, v13
-; GISEL-NEXT: v_mul_hi_u32 v8, v7, v8
-; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v1
+; GISEL-NEXT: v_mul_lo_u32 v19, v7, v0
+; GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v15, vcc
+; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v18
+; GISEL-NEXT: v_cndmask_b32_e32 v13, v13, v1, vcc
+; GISEL-NEXT: v_cndmask_b32_e32 v5, v15, v5, vcc
+; GISEL-NEXT: v_add_i32_e32 v1, vcc, v10, v19
+; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v11
; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v1, vcc, v13, v1
-; GISEL-NEXT: v_add_i32_e32 v1, vcc, v8, v1
-; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v0
-; GISEL-NEXT: v_addc_u32_e32 v13, vcc, v7, v1, vcc
-; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], s6, v5, 0
-; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v10
-; GISEL-NEXT: v_cndmask_b32_e32 v7, v11, v9, vcc
-; GISEL-NEXT: v_xor_b32_e32 v10, v7, v6
-; GISEL-NEXT: v_mad_u64_u32 v[7:8], s[4:5], s6, v13, v[1:2]
-; GISEL-NEXT: v_cndmask_b32_e32 v9, v12, v14, vcc
-; GISEL-NEXT: v_xor_b32_e32 v1, v9, v6
-; GISEL-NEXT: v_mad_u64_u32 v[7:8], s[4:5], s7, v5, v[7:8]
-; GISEL-NEXT: v_ashrrev_i32_e32 v9, 31, v3
-; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v9
-; GISEL-NEXT: v_addc_u32_e32 v3, vcc, v3, v9, vcc
-; GISEL-NEXT: v_xor_b32_e32 v8, v2, v9
-; GISEL-NEXT: v_mul_lo_u32 v2, v13, v0
-; GISEL-NEXT: v_mul_lo_u32 v11, v5, v7
-; GISEL-NEXT: v_xor_b32_e32 v12, v3, v9
-; GISEL-NEXT: v_mul_hi_u32 v3, v5, v0
-; GISEL-NEXT: v_mul_hi_u32 v0, v13, v0
-; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v11
+; GISEL-NEXT: v_mul_lo_u32 v11, v9, v0
+; GISEL-NEXT: v_add_i32_e32 v1, vcc, v10, v1
+; GISEL-NEXT: v_mul_hi_u32 v10, v7, v0
+; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v12
+; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v10, vcc, v11, v10
; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v11, vcc, v12, v11
+; GISEL-NEXT: v_mul_hi_u32 v0, v9, v0
+; GISEL-NEXT: v_add_i32_e32 v1, vcc, v10, v1
+; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v10, vcc, v11, v10
+; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v10
+; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v1
+; GISEL-NEXT: v_addc_u32_e32 v9, vcc, v9, v0, vcc
+; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v6, v7, 0
+; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v14
+; GISEL-NEXT: v_cndmask_b32_e32 v11, v17, v5, vcc
+; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v6, v9, v[1:2]
+; GISEL-NEXT: v_xor_b32_e32 v1, v11, v8
+; GISEL-NEXT: v_ashrrev_i32_e32 v11, 31, v3
+; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], s6, v7, v[5:6]
+; GISEL-NEXT: v_cndmask_b32_e32 v10, v16, v13, vcc
+; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v11
+; GISEL-NEXT: v_addc_u32_e32 v3, vcc, v3, v11, vcc
+; GISEL-NEXT: v_xor_b32_e32 v12, v2, v11
+; GISEL-NEXT: v_mul_lo_u32 v2, v9, v0
+; GISEL-NEXT: v_mul_lo_u32 v6, v7, v5
+; GISEL-NEXT: v_xor_b32_e32 v13, v3, v11
+; GISEL-NEXT: v_mul_hi_u32 v3, v7, v0
+; GISEL-NEXT: v_mul_hi_u32 v0, v9, v0
+; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v6
+; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v3
; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
-; GISEL-NEXT: v_mul_lo_u32 v3, v13, v7
-; GISEL-NEXT: v_add_i32_e32 v2, vcc, v11, v2
-; GISEL-NEXT: v_mul_hi_u32 v11, v5, v7
+; GISEL-NEXT: v_mul_lo_u32 v3, v9, v5
+; GISEL-NEXT: v_add_i32_e32 v2, vcc, v6, v2
+; GISEL-NEXT: v_mul_hi_u32 v6, v7, v5
; GISEL-NEXT: v_add_i32_e32 v0, vcc, v3, v0
; GISEL-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v11
-; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v11
-; GISEL-NEXT: v_mul_hi_u32 v7, v13, v7
+; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v6
+; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v6
+; GISEL-NEXT: v_mul_hi_u32 v5, v9, v5
; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v2
; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v2, vcc, v3, v2
-; GISEL-NEXT: v_add_i32_e32 v2, vcc, v7, v2
-; GISEL-NEXT: v_add_i32_e32 v3, vcc, v5, v0
-; GISEL-NEXT: v_addc_u32_e32 v2, vcc, v13, v2, vcc
-; GISEL-NEXT: v_mul_lo_u32 v5, v12, v3
-; GISEL-NEXT: v_mul_lo_u32 v7, v8, v2
-; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v10, v6
-; GISEL-NEXT: v_subb_u32_e32 v1, vcc, v1, v6, vcc
-; GISEL-NEXT: v_mul_hi_u32 v6, v8, v3
-; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v7
-; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v6
-; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v2, vcc, v5, v2
+; GISEL-NEXT: v_add_i32_e32 v3, vcc, v7, v0
+; GISEL-NEXT: v_addc_u32_e32 v2, vcc, v9, v2, vcc
+; GISEL-NEXT: v_mul_lo_u32 v5, v13, v3
; GISEL-NEXT: v_mul_lo_u32 v6, v12, v2
-; GISEL-NEXT: v_mul_hi_u32 v3, v12, v3
-; GISEL-NEXT: v_add_i32_e32 v5, vcc, v7, v5
-; GISEL-NEXT: v_mul_hi_u32 v7, v8, v2
-; GISEL-NEXT: v_add_i32_e32 v3, vcc, v6, v3
+; GISEL-NEXT: v_xor_b32_e32 v10, v10, v8
+; GISEL-NEXT: v_mul_hi_u32 v7, v12, v3
+; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v10, v8
+; GISEL-NEXT: v_subb_u32_e32 v1, vcc, v1, v8, vcc
+; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v6
; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v7
+; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v7
+; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
+; GISEL-NEXT: v_mul_lo_u32 v7, v13, v2
+; GISEL-NEXT: v_mul_hi_u32 v3, v13, v3
+; GISEL-NEXT: v_add_i32_e32 v5, vcc, v6, v5
+; GISEL-NEXT: v_mul_hi_u32 v6, v12, v2
+; GISEL-NEXT: v_add_i32_e32 v3, vcc, v7, v3
; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v7
+; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v6
+; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v6, vcc, v7, v6
; GISEL-NEXT: v_add_i32_e32 v7, vcc, v3, v5
-; GISEL-NEXT: v_mul_hi_u32 v10, v12, v2
+; GISEL-NEXT: v_mul_hi_u32 v8, v13, v2
; GISEL-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v4, v7, 0
; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v5, vcc, v6, v5
-; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v5
-; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v4, v10, v[3:4]
-; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v8, v2
-; GISEL-NEXT: v_subb_u32_e64 v3, s[4:5], v12, v5, vcc
-; GISEL-NEXT: v_sub_i32_e64 v5, s[4:5], v12, v5
+; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v5
+; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v4, v8, v[3:4]
+; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v12, v2
+; GISEL-NEXT: v_subb_u32_e64 v3, s[4:5], v13, v5, vcc
+; GISEL-NEXT: v_sub_i32_e64 v5, s[4:5], v13, v5
; GISEL-NEXT: v_subbrev_u32_e32 v5, vcc, 0, v5, vcc
; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v2, v4
; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v2, v4
@@ -1392,23 +1392,23 @@ define <2 x i64> @v_sdiv_v2i64_oddk_denom(<2 x i64> %num) {
; GISEL-NEXT: v_subbrev_u32_e32 v5, vcc, 0, v5, vcc
; GISEL-NEXT: v_cndmask_b32_e64 v3, -1, v6, s[4:5]
; GISEL-NEXT: v_add_i32_e32 v6, vcc, 1, v7
-; GISEL-NEXT: v_addc_u32_e32 v8, vcc, 0, v10, vcc
+; GISEL-NEXT: v_addc_u32_e32 v9, vcc, 0, v8, vcc
; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v2, v4
; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc
; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5
; GISEL-NEXT: v_cndmask_b32_e32 v2, -1, v2, vcc
; GISEL-NEXT: v_add_i32_e32 v4, vcc, 1, v6
-; GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v8, vcc
+; GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v9, vcc
; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2
; GISEL-NEXT: v_cndmask_b32_e32 v2, v6, v4, vcc
-; GISEL-NEXT: v_cndmask_b32_e32 v4, v8, v5, vcc
+; GISEL-NEXT: v_cndmask_b32_e32 v4, v9, v5, vcc
; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3
; GISEL-NEXT: v_cndmask_b32_e32 v2, v7, v2, vcc
-; GISEL-NEXT: v_cndmask_b32_e32 v3, v10, v4, vcc
-; GISEL-NEXT: v_xor_b32_e32 v2, v2, v9
-; GISEL-NEXT: v_xor_b32_e32 v3, v3, v9
-; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v2, v9
-; GISEL-NEXT: v_subb_u32_e32 v3, vcc, v3, v9, vcc
+; GISEL-NEXT: v_cndmask_b32_e32 v3, v8, v4, vcc
+; GISEL-NEXT: v_xor_b32_e32 v2, v2, v11
+; GISEL-NEXT: v_xor_b32_e32 v3, v3, v11
+; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v2, v11
+; GISEL-NEXT: v_subb_u32_e32 v3, vcc, v3, v11, vcc
; GISEL-NEXT: s_setpc_b64 s[30:31]
;
; CGP-LABEL: v_sdiv_v2i64_oddk_denom:
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll
index d5e22df59ccb37..ee7a040e41fd5e 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll
@@ -1106,210 +1106,210 @@ define <2 x i64> @v_srem_v2i64_pow2k_denom(<2 x i64> %num) {
; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT: v_cvt_f32_u32_e32 v4, 0x1000
; GISEL-NEXT: v_cvt_f32_ubyte0_e32 v5, 0
-; GISEL-NEXT: s_sub_u32 s6, 0, 0x1000
-; GISEL-NEXT: s_subb_u32 s7, 0, 0
+; GISEL-NEXT: v_mov_b32_e32 v6, 0xfffff000
+; GISEL-NEXT: s_mov_b32 s6, 1
; GISEL-NEXT: v_mac_f32_e32 v4, 0x4f800000, v5
; GISEL-NEXT: v_rcp_iflag_f32_e32 v4, v4
+; GISEL-NEXT: s_cmp_lg_u32 s6, 0
+; GISEL-NEXT: s_subb_u32 s6, 0, 0
; GISEL-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4
; GISEL-NEXT: v_mul_f32_e32 v5, 0x2f800000, v4
-; GISEL-NEXT: v_trunc_f32_e32 v6, v5
-; GISEL-NEXT: v_mac_f32_e32 v4, 0xcf800000, v6
-; GISEL-NEXT: v_cvt_u32_f32_e32 v5, v4
-; GISEL-NEXT: v_cvt_u32_f32_e32 v7, v6
-; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], s6, v5, 0
-; GISEL-NEXT: v_mov_b32_e32 v4, v9
-; GISEL-NEXT: v_mad_u64_u32 v[9:10], s[4:5], s6, v7, v[4:5]
-; GISEL-NEXT: v_mul_lo_u32 v4, v7, v8
-; GISEL-NEXT: v_mul_hi_u32 v6, v5, v8
-; GISEL-NEXT: v_mad_u64_u32 v[9:10], s[4:5], s7, v5, v[9:10]
-; GISEL-NEXT: v_mul_hi_u32 v8, v7, v8
-; GISEL-NEXT: v_mul_lo_u32 v10, v5, v9
-; GISEL-NEXT: v_mul_lo_u32 v11, v7, v9
-; GISEL-NEXT: v_mul_hi_u32 v12, v5, v9
-; GISEL-NEXT: v_mul_hi_u32 v9, v7, v9
-; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v10
-; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v6
-; GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v4, vcc, v10, v4
-; GISEL-NEXT: v_add_i32_e32 v6, vcc, v11, v8
+; GISEL-NEXT: v_trunc_f32_e32 v8, v5
+; GISEL-NEXT: v_mac_f32_e32 v4, 0xcf800000, v8
+; GISEL-NEXT: v_cvt_u32_f32_e32 v7, v4
+; GISEL-NEXT: v_cvt_u32_f32_e32 v9, v8
+; GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v6, v7, 0
+; GISEL-NEXT: v_mov_b32_e32 v8, v5
+; GISEL-NEXT: v_mad_u64_u32 v[10:11], s[4:5], v6, v9, v[8:9]
+; GISEL-NEXT: v_mul_hi_u32 v12, v9, v4
+; GISEL-NEXT: v_mad_u64_u32 v[13:14], s[4:5], s6, v7, v[10:11]
+; GISEL-NEXT: v_mul_lo_u32 v10, v9, v4
+; GISEL-NEXT: v_mul_hi_u32 v11, v7, v4
+; GISEL-NEXT: v_mul_lo_u32 v8, v7, v13
+; GISEL-NEXT: v_mul_lo_u32 v4, v9, v13
+; GISEL-NEXT: v_add_i32_e32 v8, vcc, v10, v8
+; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v11
; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v12
-; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v10
-; GISEL-NEXT: v_add_i32_e32 v4, vcc, v6, v4
-; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v6, vcc, v8, v6
-; GISEL-NEXT: v_add_i32_e32 v6, vcc, v9, v6
-; GISEL-NEXT: v_add_i32_e32 v11, vcc, v5, v4
-; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], s6, v11, 0
-; GISEL-NEXT: v_addc_u32_e32 v12, vcc, v7, v6, vcc
-; GISEL-NEXT: v_mov_b32_e32 v4, v9
-; GISEL-NEXT: v_mad_u64_u32 v[9:10], s[4:5], s6, v12, v[4:5]
-; GISEL-NEXT: v_ashrrev_i32_e32 v6, 31, v1
-; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v6
-; GISEL-NEXT: v_mad_u64_u32 v[9:10], s[4:5], s7, v11, v[9:10]
-; GISEL-NEXT: v_addc_u32_e32 v1, vcc, v1, v6, vcc
-; GISEL-NEXT: v_xor_b32_e32 v10, v0, v6
-; GISEL-NEXT: v_mul_lo_u32 v0, v12, v8
-; GISEL-NEXT: v_mul_lo_u32 v4, v11, v9
-; GISEL-NEXT: v_xor_b32_e32 v13, v1, v6
-; GISEL-NEXT: v_mul_hi_u32 v1, v11, v8
-; GISEL-NEXT: v_mul_hi_u32 v8, v12, v8
+; GISEL-NEXT: v_add_i32_e32 v8, vcc, v14, v8
+; GISEL-NEXT: v_mul_hi_u32 v14, v7, v13
+; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v12
+; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v14
+; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v14, vcc, v15, v14
+; GISEL-NEXT: v_mul_hi_u32 v13, v9, v13
+; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v8
+; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v8, vcc, v14, v8
+; GISEL-NEXT: v_add_i32_e32 v8, vcc, v13, v8
+; GISEL-NEXT: v_add_i32_e32 v16, vcc, v7, v4
+; GISEL-NEXT: v_mad_u64_u32 v[13:14], s[4:5], v6, v16, 0
+; GISEL-NEXT: v_addc_u32_e32 v17, vcc, v9, v8, vcc
+; GISEL-NEXT: v_mov_b32_e32 v4, v14
+; GISEL-NEXT: v_mad_u64_u32 v[14:15], s[4:5], v6, v17, v[4:5]
+; GISEL-NEXT: v_ashrrev_i32_e32 v8, 31, v1
+; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v8
+; GISEL-NEXT: v_mad_u64_u32 v[14:15], s[4:5], s6, v16, v[14:15]
+; GISEL-NEXT: v_addc_u32_e32 v1, vcc, v1, v8, vcc
+; GISEL-NEXT: v_xor_b32_e32 v15, v0, v8
+; GISEL-NEXT: v_mul_lo_u32 v0, v17, v13
+; GISEL-NEXT: v_mul_lo_u32 v4, v16, v14
+; GISEL-NEXT: v_xor_b32_e32 v18, v1, v8
+; GISEL-NEXT: v_mul_hi_u32 v1, v16, v13
+; GISEL-NEXT: v_mul_hi_u32 v13, v17, v13
; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v4
; GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v1
; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
-; GISEL-NEXT: v_mul_lo_u32 v1, v12, v9
+; GISEL-NEXT: v_mul_lo_u32 v1, v17, v14
; GISEL-NEXT: v_add_i32_e32 v0, vcc, v4, v0
-; GISEL-NEXT: v_mul_hi_u32 v4, v11, v9
-; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v8
-; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
+; GISEL-NEXT: v_mul_hi_u32 v4, v16, v14
+; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v13
+; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v4
; GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v4, vcc, v8, v4
-; GISEL-NEXT: v_mul_hi_u32 v8, v12, v9
+; GISEL-NEXT: v_add_i32_e32 v4, vcc, v13, v4
+; GISEL-NEXT: v_mul_hi_u32 v13, v17, v14
; GISEL-NEXT: v_add_i32_e32 v0, vcc, v1, v0
; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v1, vcc, v4, v1
-; GISEL-NEXT: v_add_i32_e32 v1, vcc, v8, v1
-; GISEL-NEXT: v_add_i32_e32 v0, vcc, v11, v0
-; GISEL-NEXT: v_addc_u32_e32 v1, vcc, v12, v1, vcc
-; GISEL-NEXT: v_mul_lo_u32 v8, v13, v0
-; GISEL-NEXT: v_mul_lo_u32 v9, v10, v1
-; GISEL-NEXT: v_mul_hi_u32 v11, v10, v0
-; GISEL-NEXT: v_mul_hi_u32 v0, v13, v0
+; GISEL-NEXT: v_add_i32_e32 v1, vcc, v13, v1
+; GISEL-NEXT: v_add_i32_e32 v0, vcc, v16, v0
+; GISEL-NEXT: v_addc_u32_e32 v1, vcc, v17, v1, vcc
+; GISEL-NEXT: v_mul_lo_u32 v13, v18, v0
+; GISEL-NEXT: v_mul_lo_u32 v14, v15, v1
+; GISEL-NEXT: v_mul_hi_u32 v16, v15, v0
+; GISEL-NEXT: v_mul_hi_u32 v0, v18, v0
; GISEL-NEXT: v_mov_b32_e32 v4, 0x1000
-; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v9
-; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v11
-; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
-; GISEL-NEXT: v_mul_lo_u32 v11, v13, v1
-; GISEL-NEXT: v_add_i32_e32 v8, vcc, v9, v8
-; GISEL-NEXT: v_mul_hi_u32 v9, v10, v1
-; GISEL-NEXT: v_add_i32_e32 v0, vcc, v11, v0
-; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v9
-; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v9, vcc, v11, v9
-; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v8
-; GISEL-NEXT: v_mul_hi_u32 v11, v13, v1
-; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v4, v0, 0
-; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v8, vcc, v9, v8
-; GISEL-NEXT: v_add_i32_e32 v8, vcc, v11, v8
-; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v4, v8, v[1:2]
-; GISEL-NEXT: v_sub_i32_e32 v10, vcc, v10, v0
-; GISEL-NEXT: v_subb_u32_e64 v11, s[4:5], v13, v8, vcc
-; GISEL-NEXT: v_sub_i32_e64 v0, s[4:5], v13, v8
-; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v10, v4
-; GISEL-NEXT: v_subbrev_u32_e32 v0, vcc, 0, v0, vcc
-; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, -1, s[4:5]
-; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v11
-; GISEL-NEXT: v_sub_i32_e32 v13, vcc, v10, v4
-; GISEL-NEXT: s_sub_u32 s6, 0, 0x1000
-; GISEL-NEXT: v_cndmask_b32_e64 v12, -1, v1, s[4:5]
-; GISEL-NEXT: v_subbrev_u32_e32 v14, vcc, 0, v0, vcc
-; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], s6, v5, 0
-; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v13, v4
-; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, -1, vcc
-; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v14
-; GISEL-NEXT: v_cndmask_b32_e32 v15, -1, v8, vcc
-; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], s6, v7, v[1:2]
-; GISEL-NEXT: s_subb_u32 s7, 0, 0
-; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v13, v4
-; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], s7, v5, v[8:9]
-; GISEL-NEXT: v_subbrev_u32_e32 v16, vcc, 0, v14, vcc
-; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v15
-; GISEL-NEXT: v_cndmask_b32_e32 v9, v13, v1, vcc
-; GISEL-NEXT: v_mul_lo_u32 v1, v7, v0
-; GISEL-NEXT: v_mul_lo_u32 v13, v5, v8
-; GISEL-NEXT: v_mul_hi_u32 v15, v5, v0
-; GISEL-NEXT: v_cndmask_b32_e32 v14, v14, v16, vcc
-; GISEL-NEXT: v_mul_hi_u32 v0, v7, v0
-; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v13
+; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v14
+; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v16
; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v15
-; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
-; GISEL-NEXT: v_mul_lo_u32 v15, v7, v8
-; GISEL-NEXT: v_add_i32_e32 v1, vcc, v13, v1
-; GISEL-NEXT: v_mul_hi_u32 v13, v5, v8
-; GISEL-NEXT: v_add_i32_e32 v0, vcc, v15, v0
-; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc
+; GISEL-NEXT: v_mul_lo_u32 v16, v18, v1
+; GISEL-NEXT: v_add_i32_e32 v13, vcc, v14, v13
+; GISEL-NEXT: v_mul_hi_u32 v14, v15, v1
+; GISEL-NEXT: v_add_i32_e32 v0, vcc, v16, v0
+; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v14
+; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v14, vcc, v16, v14
; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v13
+; GISEL-NEXT: v_mul_hi_u32 v16, v18, v1
+; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v4, v0, 0
; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v13, vcc, v15, v13
-; GISEL-NEXT: v_mul_hi_u32 v8, v7, v8
-; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v1
+; GISEL-NEXT: v_add_i32_e32 v13, vcc, v14, v13
+; GISEL-NEXT: v_add_i32_e32 v13, vcc, v16, v13
+; GISEL-NEXT: v_mad_u64_u32 v[13:14], s[4:5], v4, v13, v[1:2]
+; GISEL-NEXT: v_sub_i32_e32 v14, vcc, v15, v0
+; GISEL-NEXT: v_sub_i32_e64 v0, s[4:5], v18, v13
+; GISEL-NEXT: v_subb_u32_e64 v15, s[4:5], v18, v13, vcc
+; GISEL-NEXT: v_subbrev_u32_e32 v0, vcc, 0, v0, vcc
+; GISEL-NEXT: v_sub_i32_e32 v16, vcc, v14, v4
+; GISEL-NEXT: v_subbrev_u32_e32 v17, vcc, 0, v0, vcc
+; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v16, v4
+; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v14, v4
+; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc
+; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v17
+; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, -1, s[4:5]
+; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v15
+; GISEL-NEXT: v_cndmask_b32_e32 v18, -1, v0, vcc
+; GISEL-NEXT: v_mov_b32_e32 v0, v5
+; GISEL-NEXT: v_cndmask_b32_e64 v13, -1, v1, s[4:5]
+; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v6, v9, v[0:1]
+; GISEL-NEXT: s_mov_b32 s6, 1
+; GISEL-NEXT: s_cmp_lg_u32 s6, 0
+; GISEL-NEXT: s_subb_u32 s6, 0, 0
+; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], s6, v7, v[0:1]
+; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v16, v4
+; GISEL-NEXT: v_mul_lo_u32 v19, v7, v0
+; GISEL-NEXT: v_subbrev_u32_e32 v5, vcc, 0, v17, vcc
+; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v18
+; GISEL-NEXT: v_cndmask_b32_e32 v16, v16, v1, vcc
+; GISEL-NEXT: v_cndmask_b32_e32 v5, v17, v5, vcc
+; GISEL-NEXT: v_add_i32_e32 v1, vcc, v10, v19
+; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v11
; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v1, vcc, v13, v1
-; GISEL-NEXT: v_add_i32_e32 v1, vcc, v8, v1
-; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v0
-; GISEL-NEXT: v_addc_u32_e32 v13, vcc, v7, v1, vcc
-; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], s6, v5, 0
-; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v12
-; GISEL-NEXT: v_cndmask_b32_e32 v7, v10, v9, vcc
-; GISEL-NEXT: v_xor_b32_e32 v10, v7, v6
-; GISEL-NEXT: v_mad_u64_u32 v[7:8], s[4:5], s6, v13, v[1:2]
-; GISEL-NEXT: v_cndmask_b32_e32 v9, v11, v14, vcc
-; GISEL-NEXT: v_xor_b32_e32 v1, v9, v6
-; GISEL-NEXT: v_mad_u64_u32 v[7:8], s[4:5], s7, v5, v[7:8]
-; GISEL-NEXT: v_ashrrev_i32_e32 v9, 31, v3
-; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v9
-; GISEL-NEXT: v_addc_u32_e32 v3, vcc, v3, v9, vcc
-; GISEL-NEXT: v_xor_b32_e32 v8, v2, v9
-; GISEL-NEXT: v_mul_lo_u32 v2, v13, v0
-; GISEL-NEXT: v_mul_lo_u32 v11, v5, v7
-; GISEL-NEXT: v_xor_b32_e32 v12, v3, v9
-; GISEL-NEXT: v_mul_hi_u32 v3, v5, v0
-; GISEL-NEXT: v_mul_hi_u32 v0, v13, v0
-; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v11
+; GISEL-NEXT: v_mul_lo_u32 v11, v9, v0
+; GISEL-NEXT: v_add_i32_e32 v1, vcc, v10, v1
+; GISEL-NEXT: v_mul_hi_u32 v10, v7, v0
+; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v12
+; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v10, vcc, v11, v10
; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v11, vcc, v12, v11
+; GISEL-NEXT: v_mul_hi_u32 v0, v9, v0
+; GISEL-NEXT: v_add_i32_e32 v1, vcc, v10, v1
+; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v10, vcc, v11, v10
+; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v10
+; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v1
+; GISEL-NEXT: v_addc_u32_e32 v9, vcc, v9, v0, vcc
+; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v6, v7, 0
+; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v13
+; GISEL-NEXT: v_cndmask_b32_e32 v11, v15, v5, vcc
+; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v6, v9, v[1:2]
+; GISEL-NEXT: v_xor_b32_e32 v1, v11, v8
+; GISEL-NEXT: v_ashrrev_i32_e32 v11, 31, v3
+; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], s6, v7, v[5:6]
+; GISEL-NEXT: v_cndmask_b32_e32 v10, v14, v16, vcc
+; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v11
+; GISEL-NEXT: v_addc_u32_e32 v3, vcc, v3, v11, vcc
+; GISEL-NEXT: v_xor_b32_e32 v12, v2, v11
+; GISEL-NEXT: v_mul_lo_u32 v2, v9, v0
+; GISEL-NEXT: v_mul_lo_u32 v6, v7, v5
+; GISEL-NEXT: v_xor_b32_e32 v13, v3, v11
+; GISEL-NEXT: v_mul_hi_u32 v3, v7, v0
+; GISEL-NEXT: v_mul_hi_u32 v0, v9, v0
+; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v6
+; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v3
; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
-; GISEL-NEXT: v_mul_lo_u32 v3, v13, v7
-; GISEL-NEXT: v_add_i32_e32 v2, vcc, v11, v2
-; GISEL-NEXT: v_mul_hi_u32 v11, v5, v7
+; GISEL-NEXT: v_mul_lo_u32 v3, v9, v5
+; GISEL-NEXT: v_add_i32_e32 v2, vcc, v6, v2
+; GISEL-NEXT: v_mul_hi_u32 v6, v7, v5
; GISEL-NEXT: v_add_i32_e32 v0, vcc, v3, v0
; GISEL-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v11
-; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v11
-; GISEL-NEXT: v_mul_hi_u32 v7, v13, v7
+; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v6
+; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v6
+; GISEL-NEXT: v_mul_hi_u32 v5, v9, v5
; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v2
; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v2, vcc, v3, v2
-; GISEL-NEXT: v_add_i32_e32 v2, vcc, v7, v2
-; GISEL-NEXT: v_add_i32_e32 v3, vcc, v5, v0
-; GISEL-NEXT: v_addc_u32_e32 v2, vcc, v13, v2, vcc
-; GISEL-NEXT: v_mul_lo_u32 v5, v12, v3
-; GISEL-NEXT: v_mul_lo_u32 v7, v8, v2
-; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v10, v6
-; GISEL-NEXT: v_subb_u32_e32 v1, vcc, v1, v6, vcc
-; GISEL-NEXT: v_mul_hi_u32 v6, v8, v3
-; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v7
-; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v6
-; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v2, vcc, v5, v2
+; GISEL-NEXT: v_add_i32_e32 v3, vcc, v7, v0
+; GISEL-NEXT: v_addc_u32_e32 v2, vcc, v9, v2, vcc
+; GISEL-NEXT: v_mul_lo_u32 v5, v13, v3
; GISEL-NEXT: v_mul_lo_u32 v6, v12, v2
-; GISEL-NEXT: v_mul_hi_u32 v3, v12, v3
-; GISEL-NEXT: v_add_i32_e32 v5, vcc, v7, v5
-; GISEL-NEXT: v_mul_hi_u32 v7, v8, v2
-; GISEL-NEXT: v_add_i32_e32 v3, vcc, v6, v3
+; GISEL-NEXT: v_xor_b32_e32 v10, v10, v8
+; GISEL-NEXT: v_mul_hi_u32 v7, v12, v3
+; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v10, v8
+; GISEL-NEXT: v_subb_u32_e32 v1, vcc, v1, v8, vcc
+; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v6
; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v7
+; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v7
+; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
+; GISEL-NEXT: v_mul_lo_u32 v7, v13, v2
+; GISEL-NEXT: v_mul_hi_u32 v3, v13, v3
+; GISEL-NEXT: v_add_i32_e32 v5, vcc, v6, v5
+; GISEL-NEXT: v_mul_hi_u32 v6, v12, v2
+; GISEL-NEXT: v_add_i32_e32 v3, vcc, v7, v3
; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v7
+; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v6
+; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v6, vcc, v7, v6
; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v5
-; GISEL-NEXT: v_mul_hi_u32 v7, v12, v2
+; GISEL-NEXT: v_mul_hi_u32 v7, v13, v2
; GISEL-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v4, v3, 0
; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v5, vcc, v6, v5
; GISEL-NEXT: v_add_i32_e32 v5, vcc, v7, v5
; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v4, v5, v[3:4]
-; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v8, v2
-; GISEL-NEXT: v_subb_u32_e64 v3, s[4:5], v12, v5, vcc
-; GISEL-NEXT: v_sub_i32_e64 v5, s[4:5], v12, v5
+; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v12, v2
+; GISEL-NEXT: v_subb_u32_e64 v3, s[4:5], v13, v5, vcc
+; GISEL-NEXT: v_sub_i32_e64 v5, s[4:5], v13, v5
; GISEL-NEXT: v_subbrev_u32_e32 v5, vcc, 0, v5, vcc
; GISEL-NEXT: v_sub_i32_e32 v7, vcc, v2, v4
; GISEL-NEXT: v_subbrev_u32_e32 v5, vcc, 0, v5, vcc
@@ -1321,18 +1321,18 @@ define <2 x i64> @v_srem_v2i64_pow2k_denom(<2 x i64> %num) {
; GISEL-NEXT: v_sub_i32_e32 v4, vcc, v7, v4
; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[4:5]
; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v3
-; GISEL-NEXT: v_subbrev_u32_e32 v10, vcc, 0, v5, vcc
+; GISEL-NEXT: v_subbrev_u32_e32 v9, vcc, 0, v5, vcc
; GISEL-NEXT: v_cndmask_b32_e64 v6, -1, v6, s[4:5]
; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v8
; GISEL-NEXT: v_cndmask_b32_e32 v4, v7, v4, vcc
-; GISEL-NEXT: v_cndmask_b32_e32 v5, v5, v10, vcc
+; GISEL-NEXT: v_cndmask_b32_e32 v5, v5, v9, vcc
; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v6
; GISEL-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
; GISEL-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc
-; GISEL-NEXT: v_xor_b32_e32 v2, v2, v9
-; GISEL-NEXT: v_xor_b32_e32 v3, v3, v9
-; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v2, v9
-; GISEL-NEXT: v_subb_u32_e32 v3, vcc, v3, v9, vcc
+; GISEL-NEXT: v_xor_b32_e32 v2, v2, v11
+; GISEL-NEXT: v_xor_b32_e32 v3, v3, v11
+; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v2, v11
+; GISEL-NEXT: v_subb_u32_e32 v3, vcc, v3, v11, vcc
; GISEL-NEXT: s_setpc_b64 s[30:31]
;
; CGP-LABEL: v_srem_v2i64_pow2k_denom:
@@ -1699,210 +1699,210 @@ define <2 x i64> @v_srem_v2i64_oddk_denom(<2 x i64> %num) {
; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT: v_cvt_f32_u32_e32 v4, 0x12d8fb
; GISEL-NEXT: v_cvt_f32_ubyte0_e32 v5, 0
-; GISEL-NEXT: s_sub_u32 s6, 0, 0x12d8fb
-; GISEL-NEXT: s_subb_u32 s7, 0, 0
+; GISEL-NEXT: v_mov_b32_e32 v6, 0xffed2705
+; GISEL-NEXT: s_mov_b32 s6, 1
; GISEL-NEXT: v_mac_f32_e32 v4, 0x4f800000, v5
; GISEL-NEXT: v_rcp_iflag_f32_e32 v4, v4
+; GISEL-NEXT: s_cmp_lg_u32 s6, 0
+; GISEL-NEXT: s_subb_u32 s6, 0, 0
; GISEL-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4
; GISEL-NEXT: v_mul_f32_e32 v5, 0x2f800000, v4
-; GISEL-NEXT: v_trunc_f32_e32 v6, v5
-; GISEL-NEXT: v_mac_f32_e32 v4, 0xcf800000, v6
-; GISEL-NEXT: v_cvt_u32_f32_e32 v5, v4
-; GISEL-NEXT: v_cvt_u32_f32_e32 v7, v6
-; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], s6, v5, 0
-; GISEL-NEXT: v_mov_b32_e32 v4, v9
-; GISEL-NEXT: v_mad_u64_u32 v[9:10], s[4:5], s6, v7, v[4:5]
-; GISEL-NEXT: v_mul_lo_u32 v4, v7, v8
-; GISEL-NEXT: v_mul_hi_u32 v6, v5, v8
-; GISEL-NEXT: v_mad_u64_u32 v[9:10], s[4:5], s7, v5, v[9:10]
-; GISEL-NEXT: v_mul_hi_u32 v8, v7, v8
-; GISEL-NEXT: v_mul_lo_u32 v10, v5, v9
-; GISEL-NEXT: v_mul_lo_u32 v11, v7, v9
-; GISEL-NEXT: v_mul_hi_u32 v12, v5, v9
-; GISEL-NEXT: v_mul_hi_u32 v9, v7, v9
-; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v10
-; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v6
-; GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v4, vcc, v10, v4
-; GISEL-NEXT: v_add_i32_e32 v6, vcc, v11, v8
+; GISEL-NEXT: v_trunc_f32_e32 v8, v5
+; GISEL-NEXT: v_mac_f32_e32 v4, 0xcf800000, v8
+; GISEL-NEXT: v_cvt_u32_f32_e32 v7, v4
+; GISEL-NEXT: v_cvt_u32_f32_e32 v9, v8
+; GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v6, v7, 0
+; GISEL-NEXT: v_mov_b32_e32 v8, v5
+; GISEL-NEXT: v_mad_u64_u32 v[10:11], s[4:5], v6, v9, v[8:9]
+; GISEL-NEXT: v_mul_hi_u32 v12, v9, v4
+; GISEL-NEXT: v_mad_u64_u32 v[13:14], s[4:5], s6, v7, v[10:11]
+; GISEL-NEXT: v_mul_lo_u32 v10, v9, v4
+; GISEL-NEXT: v_mul_hi_u32 v11, v7, v4
+; GISEL-NEXT: v_mul_lo_u32 v8, v7, v13
+; GISEL-NEXT: v_mul_lo_u32 v4, v9, v13
+; GISEL-NEXT: v_add_i32_e32 v8, vcc, v10, v8
+; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v11
; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v12
-; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v10
-; GISEL-NEXT: v_add_i32_e32 v4, vcc, v6, v4
-; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v6, vcc, v8, v6
-; GISEL-NEXT: v_add_i32_e32 v6, vcc, v9, v6
-; GISEL-NEXT: v_add_i32_e32 v11, vcc, v5, v4
-; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], s6, v11, 0
-; GISEL-NEXT: v_addc_u32_e32 v12, vcc, v7, v6, vcc
-; GISEL-NEXT: v_mov_b32_e32 v4, v9
-; GISEL-NEXT: v_mad_u64_u32 v[9:10], s[4:5], s6, v12, v[4:5]
-; GISEL-NEXT: v_ashrrev_i32_e32 v6, 31, v1
-; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v6
-; GISEL-NEXT: v_mad_u64_u32 v[9:10], s[4:5], s7, v11, v[9:10]
-; GISEL-NEXT: v_addc_u32_e32 v1, vcc, v1, v6, vcc
-; GISEL-NEXT: v_xor_b32_e32 v10, v0, v6
-; GISEL-NEXT: v_mul_lo_u32 v0, v12, v8
-; GISEL-NEXT: v_mul_lo_u32 v4, v11, v9
-; GISEL-NEXT: v_xor_b32_e32 v13, v1, v6
-; GISEL-NEXT: v_mul_hi_u32 v1, v11, v8
-; GISEL-NEXT: v_mul_hi_u32 v8, v12, v8
+; GISEL-NEXT: v_add_i32_e32 v8, vcc, v14, v8
+; GISEL-NEXT: v_mul_hi_u32 v14, v7, v13
+; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v12
+; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v14
+; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v14, vcc, v15, v14
+; GISEL-NEXT: v_mul_hi_u32 v13, v9, v13
+; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v8
+; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v8, vcc, v14, v8
+; GISEL-NEXT: v_add_i32_e32 v8, vcc, v13, v8
+; GISEL-NEXT: v_add_i32_e32 v16, vcc, v7, v4
+; GISEL-NEXT: v_mad_u64_u32 v[13:14], s[4:5], v6, v16, 0
+; GISEL-NEXT: v_addc_u32_e32 v17, vcc, v9, v8, vcc
+; GISEL-NEXT: v_mov_b32_e32 v4, v14
+; GISEL-NEXT: v_mad_u64_u32 v[14:15], s[4:5], v6, v17, v[4:5]
+; GISEL-NEXT: v_ashrrev_i32_e32 v8, 31, v1
+; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v8
+; GISEL-NEXT: v_mad_u64_u32 v[14:15], s[4:5], s6, v16, v[14:15]
+; GISEL-NEXT: v_addc_u32_e32 v1, vcc, v1, v8, vcc
+; GISEL-NEXT: v_xor_b32_e32 v15, v0, v8
+; GISEL-NEXT: v_mul_lo_u32 v0, v17, v13
+; GISEL-NEXT: v_mul_lo_u32 v4, v16, v14
+; GISEL-NEXT: v_xor_b32_e32 v18, v1, v8
+; GISEL-NEXT: v_mul_hi_u32 v1, v16, v13
+; GISEL-NEXT: v_mul_hi_u32 v13, v17, v13
; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v4
; GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v1
; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
-; GISEL-NEXT: v_mul_lo_u32 v1, v12, v9
+; GISEL-NEXT: v_mul_lo_u32 v1, v17, v14
; GISEL-NEXT: v_add_i32_e32 v0, vcc, v4, v0
-; GISEL-NEXT: v_mul_hi_u32 v4, v11, v9
-; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v8
-; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
+; GISEL-NEXT: v_mul_hi_u32 v4, v16, v14
+; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v13
+; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v4
; GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v4, vcc, v8, v4
-; GISEL-NEXT: v_mul_hi_u32 v8, v12, v9
+; GISEL-NEXT: v_add_i32_e32 v4, vcc, v13, v4
+; GISEL-NEXT: v_mul_hi_u32 v13, v17, v14
; GISEL-NEXT: v_add_i32_e32 v0, vcc, v1, v0
; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v1, vcc, v4, v1
-; GISEL-NEXT: v_add_i32_e32 v1, vcc, v8, v1
-; GISEL-NEXT: v_add_i32_e32 v0, vcc, v11, v0
-; GISEL-NEXT: v_addc_u32_e32 v1, vcc, v12, v1, vcc
-; GISEL-NEXT: v_mul_lo_u32 v8, v13, v0
-; GISEL-NEXT: v_mul_lo_u32 v9, v10, v1
-; GISEL-NEXT: v_mul_hi_u32 v11, v10, v0
-; GISEL-NEXT: v_mul_hi_u32 v0, v13, v0
+; GISEL-NEXT: v_add_i32_e32 v1, vcc, v13, v1
+; GISEL-NEXT: v_add_i32_e32 v0, vcc, v16, v0
+; GISEL-NEXT: v_addc_u32_e32 v1, vcc, v17, v1, vcc
+; GISEL-NEXT: v_mul_lo_u32 v13, v18, v0
+; GISEL-NEXT: v_mul_lo_u32 v14, v15, v1
+; GISEL-NEXT: v_mul_hi_u32 v16, v15, v0
+; GISEL-NEXT: v_mul_hi_u32 v0, v18, v0
; GISEL-NEXT: v_mov_b32_e32 v4, 0x12d8fb
-; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v9
-; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v11
-; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
-; GISEL-NEXT: v_mul_lo_u32 v11, v13, v1
-; GISEL-NEXT: v_add_i32_e32 v8, vcc, v9, v8
-; GISEL-NEXT: v_mul_hi_u32 v9, v10, v1
-; GISEL-NEXT: v_add_i32_e32 v0, vcc, v11, v0
-; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v9
-; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v9, vcc, v11, v9
-; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v8
-; GISEL-NEXT: v_mul_hi_u32 v11, v13, v1
-; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v4, v0, 0
-; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v8, vcc, v9, v8
-; GISEL-NEXT: v_add_i32_e32 v8, vcc, v11, v8
-; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v4, v8, v[1:2]
-; GISEL-NEXT: v_sub_i32_e32 v10, vcc, v10, v0
-; GISEL-NEXT: v_subb_u32_e64 v11, s[4:5], v13, v8, vcc
-; GISEL-NEXT: v_sub_i32_e64 v0, s[4:5], v13, v8
-; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v10, v4
-; GISEL-NEXT: v_subbrev_u32_e32 v0, vcc, 0, v0, vcc
-; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, -1, s[4:5]
-; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v11
-; GISEL-NEXT: v_sub_i32_e32 v13, vcc, v10, v4
-; GISEL-NEXT: s_sub_u32 s6, 0, 0x12d8fb
-; GISEL-NEXT: v_cndmask_b32_e64 v12, -1, v1, s[4:5]
-; GISEL-NEXT: v_subbrev_u32_e32 v14, vcc, 0, v0, vcc
-; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], s6, v5, 0
-; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v13, v4
-; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, -1, vcc
-; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v14
-; GISEL-NEXT: v_cndmask_b32_e32 v15, -1, v8, vcc
-; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], s6, v7, v[1:2]
-; GISEL-NEXT: s_subb_u32 s7, 0, 0
-; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v13, v4
-; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], s7, v5, v[8:9]
-; GISEL-NEXT: v_subbrev_u32_e32 v16, vcc, 0, v14, vcc
-; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v15
-; GISEL-NEXT: v_cndmask_b32_e32 v9, v13, v1, vcc
-; GISEL-NEXT: v_mul_lo_u32 v1, v7, v0
-; GISEL-NEXT: v_mul_lo_u32 v13, v5, v8
-; GISEL-NEXT: v_mul_hi_u32 v15, v5, v0
-; GISEL-NEXT: v_cndmask_b32_e32 v14, v14, v16, vcc
-; GISEL-NEXT: v_mul_hi_u32 v0, v7, v0
-; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v13
+; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v14
+; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v16
; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v15
-; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
-; GISEL-NEXT: v_mul_lo_u32 v15, v7, v8
-; GISEL-NEXT: v_add_i32_e32 v1, vcc, v13, v1
-; GISEL-NEXT: v_mul_hi_u32 v13, v5, v8
-; GISEL-NEXT: v_add_i32_e32 v0, vcc, v15, v0
-; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc
+; GISEL-NEXT: v_mul_lo_u32 v16, v18, v1
+; GISEL-NEXT: v_add_i32_e32 v13, vcc, v14, v13
+; GISEL-NEXT: v_mul_hi_u32 v14, v15, v1
+; GISEL-NEXT: v_add_i32_e32 v0, vcc, v16, v0
+; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v14
+; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v14, vcc, v16, v14
; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v13
+; GISEL-NEXT: v_mul_hi_u32 v16, v18, v1
+; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v4, v0, 0
; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v13, vcc, v15, v13
-; GISEL-NEXT: v_mul_hi_u32 v8, v7, v8
-; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v1
+; GISEL-NEXT: v_add_i32_e32 v13, vcc, v14, v13
+; GISEL-NEXT: v_add_i32_e32 v13, vcc, v16, v13
+; GISEL-NEXT: v_mad_u64_u32 v[13:14], s[4:5], v4, v13, v[1:2]
+; GISEL-NEXT: v_sub_i32_e32 v14, vcc, v15, v0
+; GISEL-NEXT: v_sub_i32_e64 v0, s[4:5], v18, v13
+; GISEL-NEXT: v_subb_u32_e64 v15, s[4:5], v18, v13, vcc
+; GISEL-NEXT: v_subbrev_u32_e32 v0, vcc, 0, v0, vcc
+; GISEL-NEXT: v_sub_i32_e32 v16, vcc, v14, v4
+; GISEL-NEXT: v_subbrev_u32_e32 v17, vcc, 0, v0, vcc
+; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v16, v4
+; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v14, v4
+; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc
+; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v17
+; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, -1, s[4:5]
+; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v15
+; GISEL-NEXT: v_cndmask_b32_e32 v18, -1, v0, vcc
+; GISEL-NEXT: v_mov_b32_e32 v0, v5
+; GISEL-NEXT: v_cndmask_b32_e64 v13, -1, v1, s[4:5]
+; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v6, v9, v[0:1]
+; GISEL-NEXT: s_mov_b32 s6, 1
+; GISEL-NEXT: s_cmp_lg_u32 s6, 0
+; GISEL-NEXT: s_subb_u32 s6, 0, 0
+; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], s6, v7, v[0:1]
+; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v16, v4
+; GISEL-NEXT: v_mul_lo_u32 v19, v7, v0
+; GISEL-NEXT: v_subbrev_u32_e32 v5, vcc, 0, v17, vcc
+; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v18
+; GISEL-NEXT: v_cndmask_b32_e32 v16, v16, v1, vcc
+; GISEL-NEXT: v_cndmask_b32_e32 v5, v17, v5, vcc
+; GISEL-NEXT: v_add_i32_e32 v1, vcc, v10, v19
+; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v11
; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v1, vcc, v13, v1
-; GISEL-NEXT: v_add_i32_e32 v1, vcc, v8, v1
-; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v0
-; GISEL-NEXT: v_addc_u32_e32 v13, vcc, v7, v1, vcc
-; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], s6, v5, 0
-; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v12
-; GISEL-NEXT: v_cndmask_b32_e32 v7, v10, v9, vcc
-; GISEL-NEXT: v_xor_b32_e32 v10, v7, v6
-; GISEL-NEXT: v_mad_u64_u32 v[7:8], s[4:5], s6, v13, v[1:2]
-; GISEL-NEXT: v_cndmask_b32_e32 v9, v11, v14, vcc
-; GISEL-NEXT: v_xor_b32_e32 v1, v9, v6
-; GISEL-NEXT: v_mad_u64_u32 v[7:8], s[4:5], s7, v5, v[7:8]
-; GISEL-NEXT: v_ashrrev_i32_e32 v9, 31, v3
-; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v9
-; GISEL-NEXT: v_addc_u32_e32 v3, vcc, v3, v9, vcc
-; GISEL-NEXT: v_xor_b32_e32 v8, v2, v9
-; GISEL-NEXT: v_mul_lo_u32 v2, v13, v0
-; GISEL-NEXT: v_mul_lo_u32 v11, v5, v7
-; GISEL-NEXT: v_xor_b32_e32 v12, v3, v9
-; GISEL-NEXT: v_mul_hi_u32 v3, v5, v0
-; GISEL-NEXT: v_mul_hi_u32 v0, v13, v0
-; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v11
+; GISEL-NEXT: v_mul_lo_u32 v11, v9, v0
+; GISEL-NEXT: v_add_i32_e32 v1, vcc, v10, v1
+; GISEL-NEXT: v_mul_hi_u32 v10, v7, v0
+; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v12
+; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v10, vcc, v11, v10
; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v11, vcc, v12, v11
+; GISEL-NEXT: v_mul_hi_u32 v0, v9, v0
+; GISEL-NEXT: v_add_i32_e32 v1, vcc, v10, v1
+; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v10, vcc, v11, v10
+; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v10
+; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v1
+; GISEL-NEXT: v_addc_u32_e32 v9, vcc, v9, v0, vcc
+; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v6, v7, 0
+; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v13
+; GISEL-NEXT: v_cndmask_b32_e32 v11, v15, v5, vcc
+; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v6, v9, v[1:2]
+; GISEL-NEXT: v_xor_b32_e32 v1, v11, v8
+; GISEL-NEXT: v_ashrrev_i32_e32 v11, 31, v3
+; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], s6, v7, v[5:6]
+; GISEL-NEXT: v_cndmask_b32_e32 v10, v14, v16, vcc
+; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v11
+; GISEL-NEXT: v_addc_u32_e32 v3, vcc, v3, v11, vcc
+; GISEL-NEXT: v_xor_b32_e32 v12, v2, v11
+; GISEL-NEXT: v_mul_lo_u32 v2, v9, v0
+; GISEL-NEXT: v_mul_lo_u32 v6, v7, v5
+; GISEL-NEXT: v_xor_b32_e32 v13, v3, v11
+; GISEL-NEXT: v_mul_hi_u32 v3, v7, v0
+; GISEL-NEXT: v_mul_hi_u32 v0, v9, v0
+; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v6
+; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v3
; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
-; GISEL-NEXT: v_mul_lo_u32 v3, v13, v7
-; GISEL-NEXT: v_add_i32_e32 v2, vcc, v11, v2
-; GISEL-NEXT: v_mul_hi_u32 v11, v5, v7
+; GISEL-NEXT: v_mul_lo_u32 v3, v9, v5
+; GISEL-NEXT: v_add_i32_e32 v2, vcc, v6, v2
+; GISEL-NEXT: v_mul_hi_u32 v6, v7, v5
; GISEL-NEXT: v_add_i32_e32 v0, vcc, v3, v0
; GISEL-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v11
-; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v11
-; GISEL-NEXT: v_mul_hi_u32 v7, v13, v7
+; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v6
+; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v6
+; GISEL-NEXT: v_mul_hi_u32 v5, v9, v5
; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v2
; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v2, vcc, v3, v2
-; GISEL-NEXT: v_add_i32_e32 v2, vcc, v7, v2
-; GISEL-NEXT: v_add_i32_e32 v3, vcc, v5, v0
-; GISEL-NEXT: v_addc_u32_e32 v2, vcc, v13, v2, vcc
-; GISEL-NEXT: v_mul_lo_u32 v5, v12, v3
-; GISEL-NEXT: v_mul_lo_u32 v7, v8, v2
-; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v10, v6
-; GISEL-NEXT: v_subb_u32_e32 v1, vcc, v1, v6, vcc
-; GISEL-NEXT: v_mul_hi_u32 v6, v8, v3
-; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v7
-; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v6
-; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v2, vcc, v5, v2
+; GISEL-NEXT: v_add_i32_e32 v3, vcc, v7, v0
+; GISEL-NEXT: v_addc_u32_e32 v2, vcc, v9, v2, vcc
+; GISEL-NEXT: v_mul_lo_u32 v5, v13, v3
; GISEL-NEXT: v_mul_lo_u32 v6, v12, v2
-; GISEL-NEXT: v_mul_hi_u32 v3, v12, v3
-; GISEL-NEXT: v_add_i32_e32 v5, vcc, v7, v5
-; GISEL-NEXT: v_mul_hi_u32 v7, v8, v2
-; GISEL-NEXT: v_add_i32_e32 v3, vcc, v6, v3
+; GISEL-NEXT: v_xor_b32_e32 v10, v10, v8
+; GISEL-NEXT: v_mul_hi_u32 v7, v12, v3
+; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v10, v8
+; GISEL-NEXT: v_subb_u32_e32 v1, vcc, v1, v8, vcc
+; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v6
; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v7
+; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v7
+; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
+; GISEL-NEXT: v_mul_lo_u32 v7, v13, v2
+; GISEL-NEXT: v_mul_hi_u32 v3, v13, v3
+; GISEL-NEXT: v_add_i32_e32 v5, vcc, v6, v5
+; GISEL-NEXT: v_mul_hi_u32 v6, v12, v2
+; GISEL-NEXT: v_add_i32_e32 v3, vcc, v7, v3
; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v7
+; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v6
+; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v6, vcc, v7, v6
; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v5
-; GISEL-NEXT: v_mul_hi_u32 v7, v12, v2
+; GISEL-NEXT: v_mul_hi_u32 v7, v13, v2
; GISEL-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v4, v3, 0
; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v5, vcc, v6, v5
; GISEL-NEXT: v_add_i32_e32 v5, vcc, v7, v5
; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v4, v5, v[3:4]
-; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v8, v2
-; GISEL-NEXT: v_subb_u32_e64 v3, s[4:5], v12, v5, vcc
-; GISEL-NEXT: v_sub_i32_e64 v5, s[4:5], v12, v5
+; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v12, v2
+; GISEL-NEXT: v_subb_u32_e64 v3, s[4:5], v13, v5, vcc
+; GISEL-NEXT: v_sub_i32_e64 v5, s[4:5], v13, v5
; GISEL-NEXT: v_subbrev_u32_e32 v5, vcc, 0, v5, vcc
; GISEL-NEXT: v_sub_i32_e32 v7, vcc, v2, v4
; GISEL-NEXT: v_subbrev_u32_e32 v5, vcc, 0, v5, vcc
@@ -1914,18 +1914,18 @@ define <2 x i64> @v_srem_v2i64_oddk_denom(<2 x i64> %num) {
; GISEL-NEXT: v_sub_i32_e32 v4, vcc, v7, v4
; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[4:5]
; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v3
-; GISEL-NEXT: v_subbrev_u32_e32 v10, vcc, 0, v5, vcc
+; GISEL-NEXT: v_subbrev_u32_e32 v9, vcc, 0, v5, vcc
; GISEL-NEXT: v_cndmask_b32_e64 v6, -1, v6, s[4:5]
; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v8
; GISEL-NEXT: v_cndmask_b32_e32 v4, v7, v4, vcc
-; GISEL-NEXT: v_cndmask_b32_e32 v5, v5, v10, vcc
+; GISEL-NEXT: v_cndmask_b32_e32 v5, v5, v9, vcc
; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v6
; GISEL-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
; GISEL-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc
-; GISEL-NEXT: v_xor_b32_e32 v2, v2, v9
-; GISEL-NEXT: v_xor_b32_e32 v3, v3, v9
-; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v2, v9
-; GISEL-NEXT: v_subb_u32_e32 v3, vcc, v3, v9, vcc
+; GISEL-NEXT: v_xor_b32_e32 v2, v2, v11
+; GISEL-NEXT: v_xor_b32_e32 v3, v3, v11
+; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v2, v11
+; GISEL-NEXT: v_subb_u32_e32 v3, vcc, v3, v11, vcc
; GISEL-NEXT: s_setpc_b64 s[30:31]
;
; CGP-LABEL: v_srem_v2i64_oddk_denom:
@@ -3194,59 +3194,59 @@ define <2 x i64> @v_srem_v2i64_24bit(<2 x i64> %num, <2 x i64> %den) {
; GISEL-NEXT: v_cndmask_b32_e32 v6, v13, v10, vcc
; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v9
; GISEL-NEXT: v_cndmask_b32_e32 v1, v7, v5, vcc
+; GISEL-NEXT: v_cndmask_b32_e32 v7, v8, v6, vcc
; GISEL-NEXT: v_mul_lo_u32 v5, v17, v4
-; GISEL-NEXT: v_mul_lo_u32 v7, v14, v0
-; GISEL-NEXT: v_mul_hi_u32 v10, v14, v4
-; GISEL-NEXT: v_add_i32_e64 v9, s[4:5], 0, v12
-; GISEL-NEXT: v_add_i32_e64 v5, s[4:5], v5, v7
-; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, s[4:5]
-; GISEL-NEXT: v_add_i32_e64 v5, s[4:5], v5, v10
-; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1, s[4:5]
-; GISEL-NEXT: v_mul_lo_u32 v10, v17, v0
+; GISEL-NEXT: v_mul_lo_u32 v6, v14, v0
+; GISEL-NEXT: v_mul_hi_u32 v9, v14, v4
+; GISEL-NEXT: v_add_i32_e32 v8, vcc, 0, v12
+; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v6
+; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v9
+; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
+; GISEL-NEXT: v_mul_lo_u32 v9, v17, v0
; GISEL-NEXT: v_mul_hi_u32 v4, v17, v4
-; GISEL-NEXT: v_add_i32_e64 v5, s[4:5], v7, v5
-; GISEL-NEXT: v_mul_hi_u32 v7, v14, v0
-; GISEL-NEXT: v_add_i32_e64 v4, s[4:5], v10, v4
-; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, s[4:5]
-; GISEL-NEXT: v_add_i32_e64 v4, s[4:5], v4, v7
-; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, s[4:5]
-; GISEL-NEXT: v_add_i32_e64 v7, s[4:5], v10, v7
+; GISEL-NEXT: v_add_i32_e32 v5, vcc, v6, v5
+; GISEL-NEXT: v_mul_hi_u32 v6, v14, v0
+; GISEL-NEXT: v_add_i32_e32 v4, vcc, v9, v4
+; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v6
+; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v6, vcc, v9, v6
; GISEL-NEXT: v_mul_hi_u32 v0, v17, v0
-; GISEL-NEXT: v_add_i32_e64 v4, s[4:5], v4, v5
-; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1, s[4:5]
-; GISEL-NEXT: v_add_i32_e64 v5, s[4:5], v7, v5
-; GISEL-NEXT: v_add_i32_e64 v0, s[4:5], v0, v5
-; GISEL-NEXT: v_add_i32_e64 v4, s[4:5], v14, v4
-; GISEL-NEXT: v_addc_u32_e64 v0, s[4:5], v17, v0, s[4:5]
-; GISEL-NEXT: v_mul_lo_u32 v5, v3, v4
-; GISEL-NEXT: v_mul_lo_u32 v7, v9, v0
-; GISEL-NEXT: v_cndmask_b32_e32 v8, v8, v6, vcc
-; GISEL-NEXT: v_mul_hi_u32 v6, v9, v4
-; GISEL-NEXT: v_mul_hi_u32 v4, v3, v4
-; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v7
-; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v6
+; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v5
; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
-; GISEL-NEXT: v_mul_lo_u32 v6, v3, v0
-; GISEL-NEXT: v_add_i32_e32 v5, vcc, v7, v5
-; GISEL-NEXT: v_mul_hi_u32 v7, v9, v0
+; GISEL-NEXT: v_add_i32_e32 v5, vcc, v6, v5
+; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v5
+; GISEL-NEXT: v_add_i32_e32 v4, vcc, v14, v4
+; GISEL-NEXT: v_addc_u32_e32 v5, vcc, v17, v0, vcc
+; GISEL-NEXT: v_mul_lo_u32 v6, v3, v4
+; GISEL-NEXT: v_mul_lo_u32 v9, v8, v5
+; GISEL-NEXT: v_subrev_i32_e32 v0, vcc, 0, v1
+; GISEL-NEXT: v_mul_hi_u32 v1, v8, v4
+; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v9
+; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v1, vcc, v6, v1
+; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; GISEL-NEXT: v_mul_lo_u32 v6, v3, v5
+; GISEL-NEXT: v_mul_hi_u32 v4, v3, v4
+; GISEL-NEXT: v_add_i32_e32 v1, vcc, v9, v1
+; GISEL-NEXT: v_mul_hi_u32 v9, v8, v5
; GISEL-NEXT: v_add_i32_e32 v4, vcc, v6, v4
; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v7
-; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v7
-; GISEL-NEXT: v_add_i32_e32 v7, vcc, v4, v5
-; GISEL-NEXT: v_mul_hi_u32 v0, v3, v0
-; GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v2, v7, 0
-; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v10
-; GISEL-NEXT: v_add_i32_e32 v6, vcc, v0, v6
-; GISEL-NEXT: v_mov_b32_e32 v0, v5
-; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v2, v6, v[0:1]
-; GISEL-NEXT: v_subrev_i32_e32 v0, vcc, 0, v1
-; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v3, v7, v[5:6]
-; GISEL-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v8, vcc
-; GISEL-NEXT: v_sub_i32_e32 v4, vcc, v9, v4
+; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v9
+; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v9
+; GISEL-NEXT: v_add_i32_e32 v9, vcc, v4, v1
+; GISEL-NEXT: v_mul_hi_u32 v10, v3, v5
+; GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v2, v9, 0
+; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v1, vcc, v6, v1
+; GISEL-NEXT: v_add_i32_e32 v6, vcc, v10, v1
+; GISEL-NEXT: v_mov_b32_e32 v1, v5
+; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v2, v6, v[1:2]
+; GISEL-NEXT: v_subrev_i32_e32 v1, vcc, 0, v7
+; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v3, v9, v[5:6]
+; GISEL-NEXT: v_sub_i32_e32 v4, vcc, v8, v4
; GISEL-NEXT: v_subb_u32_e64 v6, s[4:5], v3, v5, vcc
; GISEL-NEXT: v_sub_i32_e64 v5, s[4:5], v3, v5
; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v6, v3
@@ -3274,7 +3274,7 @@ define <2 x i64> @v_srem_v2i64_24bit(<2 x i64> %num, <2 x i64> %den) {
; GISEL-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
; GISEL-NEXT: v_cndmask_b32_e32 v3, v6, v3, vcc
; GISEL-NEXT: v_subrev_i32_e32 v2, vcc, 0, v2
-; GISEL-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
+; GISEL-NEXT: v_subrev_i32_e32 v3, vcc, 0, v3
; GISEL-NEXT: s_setpc_b64 s[30:31]
;
; CGP-LABEL: v_srem_v2i64_24bit:
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll
index a7e5ce3d216199..faad7e93da5d37 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll
@@ -1095,192 +1095,189 @@ define <2 x i64> @v_urem_v2i64_oddk_denom(<2 x i64> %num) {
; GISEL: ; %bb.0:
; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT: v_mov_b32_e32 v4, 0x12d8fb
-; GISEL-NEXT: v_cvt_f32_u32_e32 v5, 0x12d8fb
-; GISEL-NEXT: v_cvt_f32_ubyte0_e32 v6, 0
-; GISEL-NEXT: s_sub_u32 s4, 0, 0x12d8fb
-; GISEL-NEXT: v_mac_f32_e32 v5, 0x4f800000, v6
+; GISEL-NEXT: v_cvt_f32_u32_e32 v6, 0x12d8fb
+; GISEL-NEXT: v_cvt_f32_ubyte0_e32 v7, 0
+; GISEL-NEXT: s_mov_b32 s4, 1
+; GISEL-NEXT: v_mov_b32_e32 v5, 0xffed2705
+; GISEL-NEXT: s_mov_b32 s5, 1
+; GISEL-NEXT: v_mac_f32_e32 v6, 0x4f800000, v7
+; GISEL-NEXT: v_rcp_iflag_f32_e32 v6, v6
+; GISEL-NEXT: s_cmp_lg_u32 s4, 0
+; GISEL-NEXT: s_subb_u32 s4, 0, 0
+; GISEL-NEXT: v_mul_f32_e32 v6, 0x5f7ffffc, v6
+; GISEL-NEXT: v_mul_f32_e32 v7, 0x2f800000, v6
+; GISEL-NEXT: s_cmp_lg_u32 s5, 0
; GISEL-NEXT: s_subb_u32 s5, 0, 0
-; GISEL-NEXT: v_rcp_iflag_f32_e32 v5, v5
-; GISEL-NEXT: s_sub_u32 s6, 0, 0x12d8fb
-; GISEL-NEXT: v_mul_f32_e32 v5, 0x5f7ffffc, v5
-; GISEL-NEXT: s_subb_u32 s7, 0, 0
-; GISEL-NEXT: v_mul_f32_e32 v6, 0x2f800000, v5
-; GISEL-NEXT: v_trunc_f32_e32 v6, v6
-; GISEL-NEXT: v_mac_f32_e32 v5, 0xcf800000, v6
+; GISEL-NEXT: v_trunc_f32_e32 v7, v7
+; GISEL-NEXT: v_mac_f32_e32 v6, 0xcf800000, v7
+; GISEL-NEXT: v_cvt_u32_f32_e32 v7, v7
; GISEL-NEXT: v_cvt_u32_f32_e32 v6, v6
-; GISEL-NEXT: v_cvt_u32_f32_e32 v5, v5
-; GISEL-NEXT: v_mul_lo_u32 v7, s4, v6
-; GISEL-NEXT: v_mul_lo_u32 v8, s6, v6
-; GISEL-NEXT: v_mul_lo_u32 v9, s4, v5
-; GISEL-NEXT: v_mul_lo_u32 v10, s5, v5
-; GISEL-NEXT: v_mul_hi_u32 v11, s4, v5
-; GISEL-NEXT: v_mul_lo_u32 v12, s6, v5
-; GISEL-NEXT: v_mul_lo_u32 v13, s7, v5
-; GISEL-NEXT: v_mul_hi_u32 v14, s6, v5
-; GISEL-NEXT: v_add_i32_e32 v7, vcc, v10, v7
-; GISEL-NEXT: v_mul_lo_u32 v10, v6, v9
-; GISEL-NEXT: v_mul_hi_u32 v15, v5, v9
-; GISEL-NEXT: v_mul_hi_u32 v9, v6, v9
-; GISEL-NEXT: v_add_i32_e32 v8, vcc, v13, v8
-; GISEL-NEXT: v_mul_lo_u32 v13, v6, v12
-; GISEL-NEXT: v_mul_hi_u32 v16, v5, v12
-; GISEL-NEXT: v_mul_hi_u32 v12, v6, v12
-; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v11
-; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v14
-; GISEL-NEXT: v_mul_lo_u32 v11, v5, v7
-; GISEL-NEXT: v_mul_lo_u32 v14, v6, v7
-; GISEL-NEXT: v_mul_hi_u32 v17, v5, v7
-; GISEL-NEXT: v_mul_hi_u32 v7, v6, v7
-; GISEL-NEXT: v_mul_lo_u32 v18, v5, v8
-; GISEL-NEXT: v_mul_lo_u32 v19, v6, v8
-; GISEL-NEXT: v_mul_hi_u32 v20, v5, v8
-; GISEL-NEXT: v_mul_hi_u32 v8, v6, v8
+; GISEL-NEXT: v_mul_lo_u32 v8, v7, v5
+; GISEL-NEXT: v_mul_lo_u32 v9, v6, v5
+; GISEL-NEXT: v_mul_lo_u32 v10, s4, v6
+; GISEL-NEXT: v_mul_hi_u32 v11, v6, v5
+; GISEL-NEXT: v_mul_lo_u32 v12, s5, v6
+; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v8
+; GISEL-NEXT: v_mul_lo_u32 v13, v7, v9
+; GISEL-NEXT: v_mul_hi_u32 v14, v6, v9
+; GISEL-NEXT: v_mul_hi_u32 v9, v7, v9
+; GISEL-NEXT: v_add_i32_e32 v8, vcc, v12, v8
; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v11
-; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v9, vcc, v14, v9
-; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v18
-; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v12, vcc, v19, v12
+; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v11
+; GISEL-NEXT: v_mul_lo_u32 v11, v6, v10
+; GISEL-NEXT: v_mul_lo_u32 v12, v7, v10
+; GISEL-NEXT: v_mul_hi_u32 v15, v6, v10
+; GISEL-NEXT: v_mul_hi_u32 v10, v7, v10
+; GISEL-NEXT: v_mul_lo_u32 v16, v6, v8
+; GISEL-NEXT: v_mul_lo_u32 v17, v7, v8
+; GISEL-NEXT: v_mul_hi_u32 v18, v6, v8
+; GISEL-NEXT: v_mul_hi_u32 v8, v7, v8
+; GISEL-NEXT: v_add_i32_e32 v11, vcc, v13, v11
; GISEL-NEXT: v_cndmask_b32_e64 v19, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v15
-; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v17
-; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v9
+; GISEL-NEXT: v_cndmask_b32_e64 v20, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v16
-; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v20
; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v10, vcc, v11, v10
-; GISEL-NEXT: v_add_i32_e32 v11, vcc, v14, v15
-; GISEL-NEXT: v_add_i32_e32 v13, vcc, v18, v13
-; GISEL-NEXT: v_add_i32_e32 v14, vcc, v19, v16
-; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v10
-; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v13
+; GISEL-NEXT: v_add_i32_e32 v9, vcc, v17, v9
+; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v14
+; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v15
+; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v14
; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v10, vcc, v11, v10
-; GISEL-NEXT: v_add_i32_e32 v11, vcc, v14, v13
-; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v10
-; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v11
-; GISEL-NEXT: v_add_i32_e32 v9, vcc, v5, v9
-; GISEL-NEXT: v_addc_u32_e32 v7, vcc, v6, v7, vcc
-; GISEL-NEXT: v_mul_lo_u32 v10, s4, v9
-; GISEL-NEXT: v_mul_lo_u32 v11, s5, v9
-; GISEL-NEXT: v_mul_hi_u32 v13, s4, v9
-; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v12
-; GISEL-NEXT: v_addc_u32_e32 v6, vcc, v6, v8, vcc
-; GISEL-NEXT: v_mul_lo_u32 v8, s6, v5
-; GISEL-NEXT: v_mul_lo_u32 v12, s7, v5
-; GISEL-NEXT: v_mul_hi_u32 v14, s6, v5
-; GISEL-NEXT: v_mul_lo_u32 v15, s4, v7
-; GISEL-NEXT: v_mul_lo_u32 v16, v7, v10
-; GISEL-NEXT: v_mul_hi_u32 v17, v9, v10
-; GISEL-NEXT: v_mul_hi_u32 v10, v7, v10
-; GISEL-NEXT: v_mul_lo_u32 v18, s6, v6
-; GISEL-NEXT: v_mul_lo_u32 v19, v6, v8
-; GISEL-NEXT: v_mul_hi_u32 v20, v5, v8
-; GISEL-NEXT: v_mul_hi_u32 v8, v6, v8
-; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v15
-; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v18
-; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v13
-; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v14
-; GISEL-NEXT: v_mul_lo_u32 v13, v9, v11
-; GISEL-NEXT: v_mul_lo_u32 v14, v7, v11
-; GISEL-NEXT: v_mul_hi_u32 v15, v9, v11
-; GISEL-NEXT: v_mul_hi_u32 v11, v7, v11
-; GISEL-NEXT: v_mul_lo_u32 v18, v5, v12
-; GISEL-NEXT: v_mul_lo_u32 v21, v6, v12
-; GISEL-NEXT: v_mul_hi_u32 v22, v5, v12
-; GISEL-NEXT: v_mul_hi_u32 v12, v6, v12
+; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v18
+; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v11, vcc, v19, v11
+; GISEL-NEXT: v_add_i32_e32 v15, vcc, v20, v15
; GISEL-NEXT: v_add_i32_e32 v13, vcc, v16, v13
-; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v10, vcc, v14, v10
+; GISEL-NEXT: v_add_i32_e32 v14, vcc, v17, v14
+; GISEL-NEXT: v_add_i32_e32 v11, vcc, v12, v11
+; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v13
+; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v12, vcc, v15, v12
+; GISEL-NEXT: v_add_i32_e32 v13, vcc, v14, v13
+; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v12
+; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v13
+; GISEL-NEXT: v_add_i32_e32 v11, vcc, v6, v11
+; GISEL-NEXT: v_addc_u32_e32 v10, vcc, v7, v10, vcc
+; GISEL-NEXT: v_mul_lo_u32 v12, v11, v5
+; GISEL-NEXT: v_mul_lo_u32 v13, s4, v11
+; GISEL-NEXT: v_mul_hi_u32 v14, v11, v5
+; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v9
+; GISEL-NEXT: v_addc_u32_e32 v7, vcc, v7, v8, vcc
+; GISEL-NEXT: v_mul_lo_u32 v8, v6, v5
+; GISEL-NEXT: v_mul_lo_u32 v9, s5, v6
+; GISEL-NEXT: v_mul_hi_u32 v15, v6, v5
+; GISEL-NEXT: v_mul_lo_u32 v16, v10, v5
+; GISEL-NEXT: v_mul_lo_u32 v17, v10, v12
+; GISEL-NEXT: v_mul_hi_u32 v18, v11, v12
+; GISEL-NEXT: v_mul_hi_u32 v12, v10, v12
+; GISEL-NEXT: v_mul_lo_u32 v5, v7, v5
+; GISEL-NEXT: v_mul_lo_u32 v19, v7, v8
+; GISEL-NEXT: v_mul_hi_u32 v20, v6, v8
+; GISEL-NEXT: v_mul_hi_u32 v8, v7, v8
+; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v16
+; GISEL-NEXT: v_add_i32_e32 v5, vcc, v9, v5
+; GISEL-NEXT: v_add_i32_e32 v9, vcc, v13, v14
+; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v15
+; GISEL-NEXT: v_mul_lo_u32 v13, v11, v9
+; GISEL-NEXT: v_mul_lo_u32 v14, v10, v9
+; GISEL-NEXT: v_mul_hi_u32 v15, v11, v9
+; GISEL-NEXT: v_mul_hi_u32 v9, v10, v9
+; GISEL-NEXT: v_mul_lo_u32 v16, v6, v5
+; GISEL-NEXT: v_mul_lo_u32 v21, v7, v5
+; GISEL-NEXT: v_mul_hi_u32 v22, v6, v5
+; GISEL-NEXT: v_mul_hi_u32 v5, v7, v5
+; GISEL-NEXT: v_add_i32_e32 v13, vcc, v17, v13
+; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v12, vcc, v14, v12
; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v18, vcc, v19, v18
+; GISEL-NEXT: v_add_i32_e32 v16, vcc, v19, v16
; GISEL-NEXT: v_cndmask_b32_e64 v19, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v8, vcc, v21, v8
; GISEL-NEXT: v_cndmask_b32_e64 v21, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v17
+; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v18
; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v15
+; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v15
; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v17, vcc, v18, v20
-; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v16, vcc, v16, v20
+; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v22
; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v13, vcc, v16, v13
+; GISEL-NEXT: v_add_i32_e32 v13, vcc, v17, v13
; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v15
-; GISEL-NEXT: v_add_i32_e32 v15, vcc, v19, v17
+; GISEL-NEXT: v_add_i32_e32 v15, vcc, v19, v16
; GISEL-NEXT: v_add_i32_e32 v16, vcc, v21, v18
-; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v13
+; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v13
; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v15
; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v13, vcc, v14, v13
; GISEL-NEXT: v_add_i32_e32 v14, vcc, v16, v15
-; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v13
-; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v14
-; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v10
-; GISEL-NEXT: v_addc_u32_e32 v7, vcc, v7, v11, vcc
-; GISEL-NEXT: v_mul_lo_u32 v10, v1, v9
-; GISEL-NEXT: v_mul_hi_u32 v11, v0, v9
+; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v13
+; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v14
+; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v12
+; GISEL-NEXT: v_addc_u32_e32 v9, vcc, v10, v9, vcc
+; GISEL-NEXT: v_mul_lo_u32 v10, v1, v11
+; GISEL-NEXT: v_mul_hi_u32 v12, v0, v11
+; GISEL-NEXT: v_mul_hi_u32 v11, v1, v11
+; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v8
+; GISEL-NEXT: v_addc_u32_e32 v5, vcc, v7, v5, vcc
+; GISEL-NEXT: v_mul_lo_u32 v7, v3, v6
+; GISEL-NEXT: v_mul_hi_u32 v8, v2, v6
+; GISEL-NEXT: v_mul_hi_u32 v6, v3, v6
+; GISEL-NEXT: v_mul_lo_u32 v13, v0, v9
+; GISEL-NEXT: v_mul_lo_u32 v14, v1, v9
+; GISEL-NEXT: v_mul_hi_u32 v15, v0, v9
; GISEL-NEXT: v_mul_hi_u32 v9, v1, v9
-; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v8
-; GISEL-NEXT: v_addc_u32_e32 v6, vcc, v6, v12, vcc
-; GISEL-NEXT: v_mul_lo_u32 v8, v3, v5
-; GISEL-NEXT: v_mul_hi_u32 v12, v2, v5
+; GISEL-NEXT: v_mul_lo_u32 v16, v2, v5
+; GISEL-NEXT: v_mul_lo_u32 v17, v3, v5
+; GISEL-NEXT: v_mul_hi_u32 v18, v2, v5
; GISEL-NEXT: v_mul_hi_u32 v5, v3, v5
-; GISEL-NEXT: v_mul_lo_u32 v13, v0, v7
-; GISEL-NEXT: v_mul_lo_u32 v14, v1, v7
-; GISEL-NEXT: v_mul_hi_u32 v15, v0, v7
-; GISEL-NEXT: v_mul_hi_u32 v7, v1, v7
-; GISEL-NEXT: v_mul_lo_u32 v16, v2, v6
-; GISEL-NEXT: v_mul_lo_u32 v17, v3, v6
-; GISEL-NEXT: v_mul_hi_u32 v18, v2, v6
-; GISEL-NEXT: v_mul_hi_u32 v6, v3, v6
; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v13
; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v9, vcc, v14, v9
+; GISEL-NEXT: v_add_i32_e32 v11, vcc, v14, v11
; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v16
+; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v16
; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v5, vcc, v17, v5
+; GISEL-NEXT: v_add_i32_e32 v6, vcc, v17, v6
; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v11
+; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v12
; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v15
-; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v12
-; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v18
+; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v15
; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v10, vcc, v13, v10
-; GISEL-NEXT: v_add_i32_e32 v11, vcc, v14, v11
-; GISEL-NEXT: v_add_i32_e32 v8, vcc, v16, v8
-; GISEL-NEXT: v_add_i32_e32 v12, vcc, v17, v12
-; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v10
-; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v8
+; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v8
+; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v18
; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v10, vcc, v13, v10
+; GISEL-NEXT: v_add_i32_e32 v12, vcc, v14, v12
+; GISEL-NEXT: v_add_i32_e32 v7, vcc, v16, v7
+; GISEL-NEXT: v_add_i32_e32 v8, vcc, v17, v8
; GISEL-NEXT: v_add_i32_e32 v10, vcc, v11, v10
-; GISEL-NEXT: v_mul_lo_u32 v11, v9, v4
-; GISEL-NEXT: v_mul_hi_u32 v9, v9, v4
-; GISEL-NEXT: v_add_i32_e32 v8, vcc, v12, v8
-; GISEL-NEXT: v_mul_lo_u32 v12, v5, v4
-; GISEL-NEXT: v_mul_hi_u32 v5, v5, v4
+; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v7
+; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v11, vcc, v12, v11
+; GISEL-NEXT: v_mul_lo_u32 v12, v10, v4
+; GISEL-NEXT: v_mul_hi_u32 v10, v10, v4
+; GISEL-NEXT: v_add_i32_e32 v7, vcc, v8, v7
+; GISEL-NEXT: v_mul_lo_u32 v8, v6, v4
+; GISEL-NEXT: v_mul_hi_u32 v6, v6, v4
+; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v11
+; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v7
+; GISEL-NEXT: v_mul_lo_u32 v7, v9, v4
+; GISEL-NEXT: v_mul_lo_u32 v5, v5, v4
; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v10
-; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v8
-; GISEL-NEXT: v_mul_lo_u32 v7, v7, v4
-; GISEL-NEXT: v_mul_lo_u32 v6, v6, v4
-; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v9
-; GISEL-NEXT: v_add_i32_e32 v5, vcc, v6, v5
-; GISEL-NEXT: v_sub_i32_e64 v0, s[4:5], v0, v11
+; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v6
+; GISEL-NEXT: v_sub_i32_e64 v0, s[4:5], v0, v12
; GISEL-NEXT: v_subb_u32_e64 v6, vcc, v1, v7, s[4:5]
; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v7
; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v4
; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, -1, vcc
-; GISEL-NEXT: v_sub_i32_e64 v2, s[6:7], v2, v12
+; GISEL-NEXT: v_sub_i32_e64 v2, s[6:7], v2, v8
; GISEL-NEXT: v_subb_u32_e64 v8, vcc, v3, v5, s[6:7]
; GISEL-NEXT: v_sub_i32_e32 v3, vcc, v3, v5
; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v2, v4
>From 8a72eaf30e1e6225e87ab00b12cd832ffa8fa555 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thorsten=20Sch=C3=BCtt?= <schuett at gmail.com>
Date: Sat, 16 Nov 2024 19:20:34 +0100
Subject: [PATCH 2/3] address review comments
---
.../include/llvm/Target/GlobalISel/Combine.td | 4 +-
.../GlobalISel/CombinerHelperArtifacts.cpp | 23 ++++----
.../AArch64/GlobalISel/combine-overflow.mir | 55 +++++++++++++++++++
3 files changed, 70 insertions(+), 12 deletions(-)
diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index b4f1551e965b14..8c2f23b2faf2e1 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -1385,7 +1385,7 @@ def match_addos : GICombineRule<
[{ return Helper.matchAddOverflow(*${root}, ${matchinfo}); }]),
(apply [{ Helper.applyBuildFn(*${root}, ${matchinfo}); }])>;
-def match_subos : GICombineRule<
+def match_subo_no_overflow : GICombineRule<
(defs root:$root, build_fn_matchinfo:$matchinfo),
(match (wip_match_opcode G_SSUBO, G_USUBO):$root,
[{ return Helper.matchSuboCarryOut(*${root}, ${matchinfo}); }]),
@@ -1911,7 +1911,7 @@ def cmp_combines: GICombineGroup<[
def artifact_combines: GICombineGroup<[
merge_combines,
match_addos,
- match_subos
+ match_subo_no_overflow
]>;
// FIXME: These should use the custom predicate feature once it lands.
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelperArtifacts.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelperArtifacts.cpp
index 85c56ee6863a95..2caf4bbfa62cdd 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelperArtifacts.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelperArtifacts.cpp
@@ -101,12 +101,15 @@ bool CombinerHelper::matchSuboCarryOut(const MachineInstr &MI,
!isConstantLegalOrBeforeLegalizer(CarryTy))
return false;
+ ConstantRange KBLHS =
+ ConstantRange::fromKnownBits(KB->getKnownBits(LHS),
+ /* IsSigned=*/Subo->isSigned());
+ ConstantRange KBRHS =
+ ConstantRange::fromKnownBits(KB->getKnownBits(RHS),
+ /* IsSigned=*/Subo->isSigned());
+
if (Subo->isSigned()) {
// G_SSUBO
- ConstantRange KBLHS = ConstantRange::fromKnownBits(KB->getKnownBits(LHS),
- /* IsSigned= */ true);
- ConstantRange KBRHS = ConstantRange::fromKnownBits(KB->getKnownBits(RHS),
- /* IsSigned= */ true);
switch (KBLHS.signedSubMayOverflow(KBRHS)) {
case ConstantRange::OverflowResult::MayOverflow:
return false;
@@ -121,7 +124,9 @@ bool CombinerHelper::matchSuboCarryOut(const MachineInstr &MI,
case ConstantRange::OverflowResult::AlwaysOverflowsHigh: {
MatchInfo = [=](MachineIRBuilder &B) {
B.buildSub(Dst, LHS, RHS);
- B.buildConstant(Carry, 1);
+ B.buildConstant(Carry, getICmpTrueVal(getTargetLowering(),
+ /*isVector=*/CarryTy.isVector(),
+ /*isFP=*/false));
};
return true;
}
@@ -130,10 +135,6 @@ bool CombinerHelper::matchSuboCarryOut(const MachineInstr &MI,
}
// G_USUBO
- ConstantRange KBLHS = ConstantRange::fromKnownBits(KB->getKnownBits(LHS),
- /* IsSigned= */ false);
- ConstantRange KBRHS = ConstantRange::fromKnownBits(KB->getKnownBits(RHS),
- /* IsSigned= */ false);
switch (KBLHS.unsignedSubMayOverflow(KBRHS)) {
case ConstantRange::OverflowResult::MayOverflow:
return false;
@@ -148,7 +149,9 @@ bool CombinerHelper::matchSuboCarryOut(const MachineInstr &MI,
case ConstantRange::OverflowResult::AlwaysOverflowsHigh: {
MatchInfo = [=](MachineIRBuilder &B) {
B.buildSub(Dst, LHS, RHS);
- B.buildConstant(Carry, 1);
+ B.buildConstant(Carry, getICmpTrueVal(getTargetLowering(),
+ /*isVector=*/CarryTy.isVector(),
+ /*isFP=*/false));
};
return true;
}
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-overflow.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-overflow.mir
index 87b30f558539c8..20cba54923548e 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-overflow.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-overflow.mir
@@ -222,3 +222,58 @@ body: |
$w1 = COPY %o_wide
RET_ReallyLR implicit $w0
...
+---
+name: usub_may_carry_s11
+body: |
+ bb.0:
+ liveins: $w0, $w1
+ ; CHECK-LABEL: name: usub_may_carry_s11
+ ; CHECK: liveins: $w0, $w1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+ ; CHECK-NEXT: %const:_(s32) = G_CONSTANT i32 512
+ ; CHECK-NEXT: %sub:_(s32), %o:_(s11) = G_USUBO [[COPY]], %const
+ ; CHECK-NEXT: %o_wide:_(s32) = G_ZEXT %o(s11)
+ ; CHECK-NEXT: $w0 = COPY %sub(s32)
+ ; CHECK-NEXT: $w1 = COPY %o_wide(s32)
+ ; CHECK-NEXT: RET_ReallyLR implicit $w0
+ %0:_(s32) = COPY $w0
+ %const:_(s32) = G_CONSTANT i32 512
+ %sub:_(s32), %o:_(s11) = G_USUBO %0, %const
+ %o_wide:_(s32) = G_ZEXT %o(s11)
+ $w0 = COPY %sub(s32)
+ $w1 = COPY %o_wide
+ RET_ReallyLR implicit $w0
+...
+---
+name: usub_may_carry_s11_vector
+body: |
+ bb.0:
+ liveins: $w0, $w1
+ ; CHECK-LABEL: name: usub_may_carry_s11_vector
+ ; CHECK: liveins: $w0, $w1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w0
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $w0
+ ; CHECK-NEXT: %const:_(s32) = G_CONSTANT i32 512
+ ; CHECK-NEXT: %bv:_(<4 x s32>) = G_BUILD_VECTOR %const(s32), [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32)
+ ; CHECK-NEXT: %bv1:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), %const(s32)
+ ; CHECK-NEXT: %sub:_(<4 x s32>), %o:_(<4 x s11>) = G_USUBO %bv, %bv1
+ ; CHECK-NEXT: %o_wide:_(<4 x s32>) = G_ZEXT %o(<4 x s11>)
+ ; CHECK-NEXT: $q0 = COPY %sub(<4 x s32>)
+ ; CHECK-NEXT: $q1 = COPY %o_wide(<4 x s32>)
+ ; CHECK-NEXT: RET_ReallyLR implicit $w0
+ %0:_(s32) = COPY $w0
+ %1:_(s32) = COPY $w0
+ %2:_(s32) = COPY $w0
+ %3:_(s32) = COPY $w0
+ %const:_(s32) = G_CONSTANT i32 512
+ %bv:_(<4 x s32>) = G_BUILD_VECTOR %const(s32), %0(s32), %1(s32), %2(s32)
+ %bv1:_(<4 x s32>) = G_BUILD_VECTOR %0(s32), %1(s32), %2(s32), %const(s32)
+ %sub:_(<4 x s32>), %o:_(<4 x s11>) = G_USUBO %bv, %bv1
+ %o_wide:_(<4 x s32>) = G_ZEXT %o(<4 x s11>)
+ $q0 = COPY %sub(<4 x s32>)
+ $q1 = COPY %o_wide
+ RET_ReallyLR implicit $w0
+...
>From 7e4209201f8f72ce2b9a7be0fc8e7a3532ef199f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thorsten=20Sch=C3=BCtt?= <schuett at gmail.com>
Date: Sun, 17 Nov 2024 07:34:01 +0100
Subject: [PATCH 3/3] moving and renaming
---
.../include/llvm/Target/GlobalISel/Combine.td | 4 +-
.../lib/CodeGen/GlobalISel/CombinerHelper.cpp | 75 +++++++++++++++++++
.../GlobalISel/CombinerHelperArtifacts.cpp | 75 -------------------
llvm/lib/Target/AArch64/AArch64Combine.td | 2 +-
4 files changed, 78 insertions(+), 78 deletions(-)
diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index 8c2f23b2faf2e1..230f7648fabc10 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -1908,7 +1908,7 @@ def cmp_combines: GICombineGroup<[
]>;
-def artifact_combines: GICombineGroup<[
+def post_legalizer_combines: GICombineGroup<[
merge_combines,
match_addos,
match_subo_no_overflow
@@ -1999,7 +1999,7 @@ def all_combines : GICombineGroup<[integer_reassoc_combines, trivial_combines,
fsub_to_fneg, commute_constant_to_rhs, match_ands, match_ors,
combine_concat_vector,
sext_trunc, zext_trunc, prefer_sign_combines, shuffle_combines,
- combine_use_vector_truncate, artifact_combines]>;
+ combine_use_vector_truncate, post_legalizer_combines]>;
// A combine group used to for prelegalizer combiners at -O0. The combines in
// this group have been selected based on experiments to balance code size and
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index 83d78c0bde399e..d95fc8cfbcf558 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -7790,3 +7790,78 @@ bool CombinerHelper::matchShuffleDisjointMask(MachineInstr &MI,
return true;
}
+
+bool CombinerHelper::matchSuboCarryOut(const MachineInstr &MI,
+ BuildFnTy &MatchInfo) {
+ const GSubCarryOut *Subo = cast<GSubCarryOut>(&MI);
+
+ Register Dst = Subo->getReg(0);
+ Register LHS = Subo->getLHSReg();
+ Register RHS = Subo->getRHSReg();
+ Register Carry = Subo->getCarryOutReg();
+ LLT DstTy = MRI.getType(Dst);
+ LLT CarryTy = MRI.getType(Carry);
+
+ // Check legality before known bits.
+ if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SUB, {DstTy}}) ||
+ !isConstantLegalOrBeforeLegalizer(CarryTy))
+ return false;
+
+ ConstantRange KBLHS =
+ ConstantRange::fromKnownBits(KB->getKnownBits(LHS),
+ /* IsSigned=*/Subo->isSigned());
+ ConstantRange KBRHS =
+ ConstantRange::fromKnownBits(KB->getKnownBits(RHS),
+ /* IsSigned=*/Subo->isSigned());
+
+ if (Subo->isSigned()) {
+ // G_SSUBO
+ switch (KBLHS.signedSubMayOverflow(KBRHS)) {
+ case ConstantRange::OverflowResult::MayOverflow:
+ return false;
+ case ConstantRange::OverflowResult::NeverOverflows: {
+ MatchInfo = [=](MachineIRBuilder &B) {
+ B.buildSub(Dst, LHS, RHS, MachineInstr::MIFlag::NoSWrap);
+ B.buildConstant(Carry, 0);
+ };
+ return true;
+ }
+ case ConstantRange::OverflowResult::AlwaysOverflowsLow:
+ case ConstantRange::OverflowResult::AlwaysOverflowsHigh: {
+ MatchInfo = [=](MachineIRBuilder &B) {
+ B.buildSub(Dst, LHS, RHS);
+ B.buildConstant(Carry, getICmpTrueVal(getTargetLowering(),
+ /*isVector=*/CarryTy.isVector(),
+ /*isFP=*/false));
+ };
+ return true;
+ }
+ }
+ return false;
+ }
+
+ // G_USUBO
+ switch (KBLHS.unsignedSubMayOverflow(KBRHS)) {
+ case ConstantRange::OverflowResult::MayOverflow:
+ return false;
+ case ConstantRange::OverflowResult::NeverOverflows: {
+ MatchInfo = [=](MachineIRBuilder &B) {
+ B.buildSub(Dst, LHS, RHS, MachineInstr::MIFlag::NoUWrap);
+ B.buildConstant(Carry, 0);
+ };
+ return true;
+ }
+ case ConstantRange::OverflowResult::AlwaysOverflowsLow:
+ case ConstantRange::OverflowResult::AlwaysOverflowsHigh: {
+ MatchInfo = [=](MachineIRBuilder &B) {
+ B.buildSub(Dst, LHS, RHS);
+ B.buildConstant(Carry, getICmpTrueVal(getTargetLowering(),
+ /*isVector=*/CarryTy.isVector(),
+ /*isFP=*/false));
+ };
+ return true;
+ }
+ }
+
+ return false;
+}
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelperArtifacts.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelperArtifacts.cpp
index 2caf4bbfa62cdd..797a1e84e21e35 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelperArtifacts.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelperArtifacts.cpp
@@ -84,78 +84,3 @@ bool CombinerHelper::matchMergeXAndZero(const MachineInstr &MI,
};
return true;
}
-
-bool CombinerHelper::matchSuboCarryOut(const MachineInstr &MI,
- BuildFnTy &MatchInfo) {
- const GSubCarryOut *Subo = cast<GSubCarryOut>(&MI);
-
- Register Dst = Subo->getReg(0);
- Register LHS = Subo->getLHSReg();
- Register RHS = Subo->getRHSReg();
- Register Carry = Subo->getCarryOutReg();
- LLT DstTy = MRI.getType(Dst);
- LLT CarryTy = MRI.getType(Carry);
-
- // Check legality before known bits.
- if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SUB, {DstTy}}) ||
- !isConstantLegalOrBeforeLegalizer(CarryTy))
- return false;
-
- ConstantRange KBLHS =
- ConstantRange::fromKnownBits(KB->getKnownBits(LHS),
- /* IsSigned=*/Subo->isSigned());
- ConstantRange KBRHS =
- ConstantRange::fromKnownBits(KB->getKnownBits(RHS),
- /* IsSigned=*/Subo->isSigned());
-
- if (Subo->isSigned()) {
- // G_SSUBO
- switch (KBLHS.signedSubMayOverflow(KBRHS)) {
- case ConstantRange::OverflowResult::MayOverflow:
- return false;
- case ConstantRange::OverflowResult::NeverOverflows: {
- MatchInfo = [=](MachineIRBuilder &B) {
- B.buildSub(Dst, LHS, RHS, MachineInstr::MIFlag::NoSWrap);
- B.buildConstant(Carry, 0);
- };
- return true;
- }
- case ConstantRange::OverflowResult::AlwaysOverflowsLow:
- case ConstantRange::OverflowResult::AlwaysOverflowsHigh: {
- MatchInfo = [=](MachineIRBuilder &B) {
- B.buildSub(Dst, LHS, RHS);
- B.buildConstant(Carry, getICmpTrueVal(getTargetLowering(),
- /*isVector=*/CarryTy.isVector(),
- /*isFP=*/false));
- };
- return true;
- }
- }
- return false;
- }
-
- // G_USUBO
- switch (KBLHS.unsignedSubMayOverflow(KBRHS)) {
- case ConstantRange::OverflowResult::MayOverflow:
- return false;
- case ConstantRange::OverflowResult::NeverOverflows: {
- MatchInfo = [=](MachineIRBuilder &B) {
- B.buildSub(Dst, LHS, RHS, MachineInstr::MIFlag::NoUWrap);
- B.buildConstant(Carry, 0);
- };
- return true;
- }
- case ConstantRange::OverflowResult::AlwaysOverflowsLow:
- case ConstantRange::OverflowResult::AlwaysOverflowsHigh: {
- MatchInfo = [=](MachineIRBuilder &B) {
- B.buildSub(Dst, LHS, RHS);
- B.buildConstant(Carry, getICmpTrueVal(getTargetLowering(),
- /*isVector=*/CarryTy.isVector(),
- /*isFP=*/false));
- };
- return true;
- }
- }
-
- return false;
-}
diff --git a/llvm/lib/Target/AArch64/AArch64Combine.td b/llvm/lib/Target/AArch64/AArch64Combine.td
index 23563bf9b7881f..f4a3d7e6371951 100644
--- a/llvm/lib/Target/AArch64/AArch64Combine.td
+++ b/llvm/lib/Target/AArch64/AArch64Combine.td
@@ -320,7 +320,7 @@ def AArch64PostLegalizerCombiner
hoist_logic_op_with_same_opcode_hands,
redundant_and, xor_of_and_with_same_reg,
extractvecelt_pairwise_add, redundant_or,
- mul_const, redundant_sext_inreg, artifact_combines,
+ mul_const, redundant_sext_inreg, post_legalizer_combines,
form_bitfield_extract, rotate_out_of_range,
icmp_to_true_false_known_bits,
select_combines, fold_merge_to_zext,
More information about the llvm-commits
mailing list