[llvm] GlobalISel: Use G_UADDE when narrowing G_UMULH (PR #97194)

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Tue Jul 2 08:13:46 PDT 2024


https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/97194

>From e10d4e81ac6a2136f203252f765a1421a8ddb798 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Tue, 2 Jul 2024 17:10:38 +0200
Subject: [PATCH 1/2] AArch64/GlobalISel: Add IR test for scalar multiplies

I didn't see an existing, simple test covering mul. There
appeared to be no wide multiply tests.
---
 llvm/test/CodeGen/AArch64/GlobalISel/mul.ll | 232 ++++++++++++++++++++
 1 file changed, 232 insertions(+)
 create mode 100644 llvm/test/CodeGen/AArch64/GlobalISel/mul.ll

diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/mul.ll b/llvm/test/CodeGen/AArch64/GlobalISel/mul.ll
new file mode 100644
index 0000000000000..20827e6bec2c6
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/mul.ll
@@ -0,0 +1,232 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -global-isel -mtriple=aarch64-linux-gnu < %s | FileCheck %s
+
+define i8 @mul_i8(i8 %x, i8 %y) {
+; CHECK-LABEL: mul_i8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mul w0, w0, w1
+; CHECK-NEXT:    ret
+  %mul = mul i8 %x, %y
+  ret i8 %mul
+}
+
+define i16 @mul_i16(i16 %x, i16 %y) {
+; CHECK-LABEL: mul_i16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mul w0, w0, w1
+; CHECK-NEXT:    ret
+  %mul = mul i16 %x, %y
+  ret i16 %mul
+}
+
+define i32 @mul_i32(i32 %x, i32 %y) {
+; CHECK-LABEL: mul_i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mul w0, w0, w1
+; CHECK-NEXT:    ret
+  %mul = mul i32 %x, %y
+  ret i32 %mul
+}
+
+define i64 @mul_i64(i64 %x, i64 %y) {
+; CHECK-LABEL: mul_i64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mul x0, x0, x1
+; CHECK-NEXT:    ret
+  %mul = mul i64 %x, %y
+  ret i64 %mul
+}
+
+define i96 @mul_i96(i96 %x, i96 %y) {
+; CHECK-LABEL: mul_i96:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mul x9, x0, x3
+; CHECK-NEXT:    mul x8, x0, x2
+; CHECK-NEXT:    umulh x10, x0, x2
+; CHECK-NEXT:    madd x9, x1, x2, x9
+; CHECK-NEXT:    mov x0, x8
+; CHECK-NEXT:    add x1, x9, x10
+; CHECK-NEXT:    ret
+  %mul = mul i96 %x, %y
+  ret i96 %mul
+}
+
+define i128 @mul_i128(i128 %x, i128 %y) {
+; CHECK-LABEL: mul_i128:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mul x9, x0, x3
+; CHECK-NEXT:    mul x8, x0, x2
+; CHECK-NEXT:    umulh x10, x0, x2
+; CHECK-NEXT:    madd x9, x1, x2, x9
+; CHECK-NEXT:    mov x0, x8
+; CHECK-NEXT:    add x1, x9, x10
+; CHECK-NEXT:    ret
+  %mul = mul i128 %x, %y
+  ret i128 %mul
+}
+
+define i160 @mul_i160(i160 %x, i160 %y) {
+; CHECK-LABEL: mul_i160:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mul x9, x1, x4
+; CHECK-NEXT:    mul x10, x1, x5
+; CHECK-NEXT:    mul x11, x0, x5
+; CHECK-NEXT:    umulh x12, x0, x4
+; CHECK-NEXT:    madd x10, x2, x4, x10
+; CHECK-NEXT:    adds x9, x9, x11
+; CHECK-NEXT:    umulh x13, x1, x4
+; CHECK-NEXT:    cset w11, hs
+; CHECK-NEXT:    adds x1, x9, x12
+; CHECK-NEXT:    and x11, x11, #0x1
+; CHECK-NEXT:    umulh x14, x0, x5
+; CHECK-NEXT:    mul x8, x0, x4
+; CHECK-NEXT:    madd x9, x0, x6, x10
+; CHECK-NEXT:    cset w10, hs
+; CHECK-NEXT:    and x10, x10, #0x1
+; CHECK-NEXT:    add x10, x11, x10
+; CHECK-NEXT:    add x11, x13, x14
+; CHECK-NEXT:    add x10, x11, x10
+; CHECK-NEXT:    mov x0, x8
+; CHECK-NEXT:    add x2, x9, x10
+; CHECK-NEXT:    ret
+  %mul = mul i160 %x, %y
+  ret i160 %mul
+}
+
+define i192 @mul_i192(i192 %x, i192 %y) {
+; CHECK-LABEL: mul_i192:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mul x9, x1, x4
+; CHECK-NEXT:    mul x10, x1, x5
+; CHECK-NEXT:    mul x11, x0, x5
+; CHECK-NEXT:    umulh x12, x0, x4
+; CHECK-NEXT:    madd x10, x2, x4, x10
+; CHECK-NEXT:    adds x9, x9, x11
+; CHECK-NEXT:    umulh x13, x1, x4
+; CHECK-NEXT:    cset w11, hs
+; CHECK-NEXT:    adds x1, x9, x12
+; CHECK-NEXT:    and x11, x11, #0x1
+; CHECK-NEXT:    umulh x14, x0, x5
+; CHECK-NEXT:    mul x8, x0, x4
+; CHECK-NEXT:    madd x9, x0, x6, x10
+; CHECK-NEXT:    cset w10, hs
+; CHECK-NEXT:    and x10, x10, #0x1
+; CHECK-NEXT:    add x10, x11, x10
+; CHECK-NEXT:    add x11, x13, x14
+; CHECK-NEXT:    add x10, x11, x10
+; CHECK-NEXT:    mov x0, x8
+; CHECK-NEXT:    add x2, x9, x10
+; CHECK-NEXT:    ret
+  %mul = mul i192 %x, %y
+  ret i192 %mul
+}
+
+define i224 @mul_i224(i224 %x, i224 %y) {
+; CHECK-LABEL: mul_i224:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mul x8, x1, x4
+; CHECK-NEXT:    mul x9, x0, x5
+; CHECK-NEXT:    umulh x10, x0, x4
+; CHECK-NEXT:    mul x14, x2, x5
+; CHECK-NEXT:    adds x8, x8, x9
+; CHECK-NEXT:    mul x11, x2, x4
+; CHECK-NEXT:    cset w9, hs
+; CHECK-NEXT:    adds x8, x8, x10
+; CHECK-NEXT:    and x9, x9, #0x1
+; CHECK-NEXT:    mul x12, x1, x5
+; CHECK-NEXT:    cset w10, hs
+; CHECK-NEXT:    and x10, x10, #0x1
+; CHECK-NEXT:    mul x13, x0, x6
+; CHECK-NEXT:    add x9, x9, x10
+; CHECK-NEXT:    umulh x15, x1, x4
+; CHECK-NEXT:    adds x11, x11, x12
+; CHECK-NEXT:    madd x14, x3, x4, x14
+; CHECK-NEXT:    umulh x16, x0, x5
+; CHECK-NEXT:    madd x12, x1, x6, x14
+; CHECK-NEXT:    cset w14, hs
+; CHECK-NEXT:    adds x11, x11, x13
+; CHECK-NEXT:    cset w13, hs
+; CHECK-NEXT:    adds x11, x11, x15
+; CHECK-NEXT:    and x14, x14, #0x1
+; CHECK-NEXT:    umulh x17, x2, x4
+; CHECK-NEXT:    and x13, x13, #0x1
+; CHECK-NEXT:    add x13, x14, x13
+; CHECK-NEXT:    umulh x18, x1, x5
+; CHECK-NEXT:    cset w1, hs
+; CHECK-NEXT:    adds x11, x11, x16
+; CHECK-NEXT:    and x14, x1, #0x1
+; CHECK-NEXT:    mov x1, x8
+; CHECK-NEXT:    umulh x15, x0, x6
+; CHECK-NEXT:    add x10, x13, x14
+; CHECK-NEXT:    cset w13, hs
+; CHECK-NEXT:    adds x2, x11, x9
+; CHECK-NEXT:    and x9, x13, #0x1
+; CHECK-NEXT:    madd x12, x0, x7, x12
+; CHECK-NEXT:    cset w11, hs
+; CHECK-NEXT:    add x9, x10, x9
+; CHECK-NEXT:    add x13, x17, x18
+; CHECK-NEXT:    and x10, x11, #0x1
+; CHECK-NEXT:    mul x0, x0, x4
+; CHECK-NEXT:    add x9, x9, x10
+; CHECK-NEXT:    add x11, x13, x15
+; CHECK-NEXT:    add x10, x12, x11
+; CHECK-NEXT:    add x3, x10, x9
+; CHECK-NEXT:    ret
+  %mul = mul i224 %x, %y
+  ret i224 %mul
+}
+
+define i256 @mul_i256(i256 %x, i256 %y) {
+; CHECK-LABEL: mul_i256:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mul x8, x1, x4
+; CHECK-NEXT:    mul x9, x0, x5
+; CHECK-NEXT:    umulh x10, x0, x4
+; CHECK-NEXT:    mul x14, x2, x5
+; CHECK-NEXT:    adds x8, x8, x9
+; CHECK-NEXT:    mul x11, x2, x4
+; CHECK-NEXT:    cset w9, hs
+; CHECK-NEXT:    adds x8, x8, x10
+; CHECK-NEXT:    and x9, x9, #0x1
+; CHECK-NEXT:    mul x12, x1, x5
+; CHECK-NEXT:    cset w10, hs
+; CHECK-NEXT:    and x10, x10, #0x1
+; CHECK-NEXT:    mul x13, x0, x6
+; CHECK-NEXT:    add x9, x9, x10
+; CHECK-NEXT:    umulh x15, x1, x4
+; CHECK-NEXT:    adds x11, x11, x12
+; CHECK-NEXT:    madd x14, x3, x4, x14
+; CHECK-NEXT:    umulh x16, x0, x5
+; CHECK-NEXT:    madd x12, x1, x6, x14
+; CHECK-NEXT:    cset w14, hs
+; CHECK-NEXT:    adds x11, x11, x13
+; CHECK-NEXT:    cset w13, hs
+; CHECK-NEXT:    adds x11, x11, x15
+; CHECK-NEXT:    and x14, x14, #0x1
+; CHECK-NEXT:    umulh x17, x2, x4
+; CHECK-NEXT:    and x13, x13, #0x1
+; CHECK-NEXT:    add x13, x14, x13
+; CHECK-NEXT:    umulh x18, x1, x5
+; CHECK-NEXT:    cset w1, hs
+; CHECK-NEXT:    adds x11, x11, x16
+; CHECK-NEXT:    and x14, x1, #0x1
+; CHECK-NEXT:    mov x1, x8
+; CHECK-NEXT:    umulh x15, x0, x6
+; CHECK-NEXT:    add x10, x13, x14
+; CHECK-NEXT:    cset w13, hs
+; CHECK-NEXT:    adds x2, x11, x9
+; CHECK-NEXT:    and x9, x13, #0x1
+; CHECK-NEXT:    madd x12, x0, x7, x12
+; CHECK-NEXT:    cset w11, hs
+; CHECK-NEXT:    add x9, x10, x9
+; CHECK-NEXT:    add x13, x17, x18
+; CHECK-NEXT:    and x10, x11, #0x1
+; CHECK-NEXT:    mul x0, x0, x4
+; CHECK-NEXT:    add x9, x9, x10
+; CHECK-NEXT:    add x11, x13, x15
+; CHECK-NEXT:    add x10, x12, x11
+; CHECK-NEXT:    add x3, x10, x9
+; CHECK-NEXT:    ret
+  %mul = mul i256 %x, %y
+  ret i256 %mul
+}

>From 26052805305f1f9dcbf3ab4102eb6b2372eb8dce Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Wed, 26 Feb 2020 23:13:59 -0500
Subject: [PATCH 2/2] GlobalISel: Use G_UADDE when narrowing G_UMULH

This greatly shrinks the AMDGPU div64 expansion.

Instead of adding a zext of the condition output, add a zero and use
the carry in to G_UADDE. This is closer to how the DAG expansion using
umulh does it, and it seems more natural to leave the boolean output
as a boolean input. We should have a combine to form G_UADDE from this
pattern, but the legalizer shouldn't create extra work for the
combiner if it can help it.

The Mips cases are regressions, but the DAG lowering for muli128 seems
to not use the expansion involving MULHU/MULHS at all. The DAG output
is radically different than GlobalISel as-is, so it seems like Mips
should be using a different legalization strategy here to begin with.

The RISCV legalizer tests look worse for the mul i96 case, but those
didn't exist when I wrote this patch and forgot about it 4 years ago,
so I haven't really looked into why. We've entered the age where most tests
should just be using IR, so I don't  know if this matters or not (the IR mul
test doesn't seem to cover i96)
---
 .../CodeGen/GlobalISel/LegalizerHelper.cpp    |   17 +-
 llvm/test/CodeGen/AArch64/GlobalISel/mul.ll   |  178 +-
 .../AMDGPU/GlobalISel/legalize-mul.mir        |   19 +-
 .../AMDGPU/GlobalISel/legalize-sdiv.mir       | 2307 +++++------
 .../AMDGPU/GlobalISel/legalize-srem.mir       | 2083 +++++-----
 .../AMDGPU/GlobalISel/legalize-udiv.mir       | 2232 +++++------
 .../AMDGPU/GlobalISel/legalize-umulh.mir      |  146 +-
 .../AMDGPU/GlobalISel/legalize-umulo.mir      |  198 +-
 .../AMDGPU/GlobalISel/legalize-urem.mir       | 2008 ++++------
 .../CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll     | 2988 +++++++-------
 .../test/CodeGen/AMDGPU/GlobalISel/sdivrem.ll | 1655 ++++----
 .../CodeGen/AMDGPU/GlobalISel/srem.i64.ll     | 3566 ++++++++---------
 .../CodeGen/AMDGPU/GlobalISel/udiv.i64.ll     | 1772 ++++----
 .../test/CodeGen/AMDGPU/GlobalISel/udivrem.ll | 1409 +++----
 .../CodeGen/AMDGPU/GlobalISel/urem.i64.ll     | 1666 ++++----
 .../CodeGen/Mips/GlobalISel/legalizer/mul.mir |  144 +-
 .../CodeGen/Mips/GlobalISel/llvm-ir/mul.ll    |   71 +-
 .../legalizer/legalize-mul-ext-rv32.mir       |   38 +-
 .../legalizer/legalize-mul-ext-rv64.mir       |   44 +-
 19 files changed, 10181 insertions(+), 12360 deletions(-)

diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index 86de1f3be9047..6a0175ca283b2 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -5719,6 +5719,7 @@ void LegalizerHelper::multiplyRegisters(SmallVectorImpl<Register> &DstRegs,
                                         ArrayRef<Register> Src1Regs,
                                         ArrayRef<Register> Src2Regs,
                                         LLT NarrowTy) {
+  const LLT S1 = LLT::scalar(1);
   MachineIRBuilder &B = MIRBuilder;
   unsigned SrcParts = Src1Regs.size();
   unsigned DstParts = DstRegs.size();
@@ -5731,6 +5732,8 @@ void LegalizerHelper::multiplyRegisters(SmallVectorImpl<Register> &DstRegs,
   unsigned CarrySumPrevDstIdx;
   SmallVector<Register, 4> Factors;
 
+  const Register Zero = B.buildConstant(NarrowTy, 0).getReg(0);
+
   for (DstIdx = 1; DstIdx < DstParts; DstIdx++) {
     // Collect low parts of muls for DstIdx.
     for (unsigned i = DstIdx + 1 < SrcParts ? 0 : DstIdx - SrcParts + 1;
@@ -5755,15 +5758,15 @@ void LegalizerHelper::multiplyRegisters(SmallVectorImpl<Register> &DstRegs,
     // Add all factors and accumulate all carries into CarrySum.
     if (DstIdx != DstParts - 1) {
       MachineInstrBuilder Uaddo =
-          B.buildUAddo(NarrowTy, LLT::scalar(1), Factors[0], Factors[1]);
+          B.buildUAddo(NarrowTy, S1, Factors[0], Factors[1]);
       FactorSum = Uaddo.getReg(0);
-      CarrySum = B.buildZExt(NarrowTy, Uaddo.getReg(1)).getReg(0);
+      CarrySum = Zero;
       for (unsigned i = 2; i < Factors.size(); ++i) {
-        MachineInstrBuilder Uaddo =
-            B.buildUAddo(NarrowTy, LLT::scalar(1), FactorSum, Factors[i]);
-        FactorSum = Uaddo.getReg(0);
-        MachineInstrBuilder Carry = B.buildZExt(NarrowTy, Uaddo.getReg(1));
-        CarrySum = B.buildAdd(NarrowTy, CarrySum, Carry).getReg(0);
+        auto Uadde =
+            B.buildUAdde(NarrowTy, S1, FactorSum, Factors[i], Uaddo.getReg(1));
+        FactorSum = Uadde.getReg(0);
+        CarrySum = B.buildUAdde(NarrowTy, S1, CarrySum, Zero, Uadde.getReg(1))
+                       .getReg(0);
       }
     } else {
       // Since value for the next index is not calculated, neither is CarrySum.
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/mul.ll b/llvm/test/CodeGen/AArch64/GlobalISel/mul.ll
index 20827e6bec2c6..5d8546c72d570 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/mul.ll
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/mul.ll
@@ -68,26 +68,25 @@ define i128 @mul_i128(i128 %x, i128 %y) {
 define i160 @mul_i160(i160 %x, i160 %y) {
 ; CHECK-LABEL: mul_i160:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mul x9, x1, x4
-; CHECK-NEXT:    mul x10, x1, x5
-; CHECK-NEXT:    mul x11, x0, x5
-; CHECK-NEXT:    umulh x12, x0, x4
-; CHECK-NEXT:    madd x10, x2, x4, x10
-; CHECK-NEXT:    adds x9, x9, x11
-; CHECK-NEXT:    umulh x13, x1, x4
+; CHECK-NEXT:    mul x8, x1, x4
+; CHECK-NEXT:    mul x9, x0, x5
+; CHECK-NEXT:    umulh x10, x0, x4
+; CHECK-NEXT:    mul x11, x2, x4
+; CHECK-NEXT:    adds x8, x8, x9
+; CHECK-NEXT:    mul x12, x1, x5
+; CHECK-NEXT:    mul x13, x0, x6
+; CHECK-NEXT:    umulh x14, x1, x4
+; CHECK-NEXT:    adcs x1, x8, x10
+; CHECK-NEXT:    adc x9, xzr, xzr
+; CHECK-NEXT:    adds x10, x11, x12
+; CHECK-NEXT:    umulh x8, x0, x5
 ; CHECK-NEXT:    cset w11, hs
-; CHECK-NEXT:    adds x1, x9, x12
-; CHECK-NEXT:    and x11, x11, #0x1
-; CHECK-NEXT:    umulh x14, x0, x5
-; CHECK-NEXT:    mul x8, x0, x4
-; CHECK-NEXT:    madd x9, x0, x6, x10
-; CHECK-NEXT:    cset w10, hs
-; CHECK-NEXT:    and x10, x10, #0x1
-; CHECK-NEXT:    add x10, x11, x10
-; CHECK-NEXT:    add x11, x13, x14
-; CHECK-NEXT:    add x10, x11, x10
-; CHECK-NEXT:    mov x0, x8
-; CHECK-NEXT:    add x2, x9, x10
+; CHECK-NEXT:    adc x10, x10, x13
+; CHECK-NEXT:    cmp w11, #1
+; CHECK-NEXT:    mul x0, x0, x4
+; CHECK-NEXT:    adc x10, x10, x14
+; CHECK-NEXT:    adc x8, x10, x8
+; CHECK-NEXT:    adc x2, x8, x9
 ; CHECK-NEXT:    ret
   %mul = mul i160 %x, %y
   ret i160 %mul
@@ -96,26 +95,25 @@ define i160 @mul_i160(i160 %x, i160 %y) {
 define i192 @mul_i192(i192 %x, i192 %y) {
 ; CHECK-LABEL: mul_i192:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mul x9, x1, x4
-; CHECK-NEXT:    mul x10, x1, x5
-; CHECK-NEXT:    mul x11, x0, x5
-; CHECK-NEXT:    umulh x12, x0, x4
-; CHECK-NEXT:    madd x10, x2, x4, x10
-; CHECK-NEXT:    adds x9, x9, x11
-; CHECK-NEXT:    umulh x13, x1, x4
+; CHECK-NEXT:    mul x8, x1, x4
+; CHECK-NEXT:    mul x9, x0, x5
+; CHECK-NEXT:    umulh x10, x0, x4
+; CHECK-NEXT:    mul x11, x2, x4
+; CHECK-NEXT:    adds x8, x8, x9
+; CHECK-NEXT:    mul x12, x1, x5
+; CHECK-NEXT:    mul x13, x0, x6
+; CHECK-NEXT:    umulh x14, x1, x4
+; CHECK-NEXT:    adcs x1, x8, x10
+; CHECK-NEXT:    adc x9, xzr, xzr
+; CHECK-NEXT:    adds x10, x11, x12
+; CHECK-NEXT:    umulh x8, x0, x5
 ; CHECK-NEXT:    cset w11, hs
-; CHECK-NEXT:    adds x1, x9, x12
-; CHECK-NEXT:    and x11, x11, #0x1
-; CHECK-NEXT:    umulh x14, x0, x5
-; CHECK-NEXT:    mul x8, x0, x4
-; CHECK-NEXT:    madd x9, x0, x6, x10
-; CHECK-NEXT:    cset w10, hs
-; CHECK-NEXT:    and x10, x10, #0x1
-; CHECK-NEXT:    add x10, x11, x10
-; CHECK-NEXT:    add x11, x13, x14
-; CHECK-NEXT:    add x10, x11, x10
-; CHECK-NEXT:    mov x0, x8
-; CHECK-NEXT:    add x2, x9, x10
+; CHECK-NEXT:    adc x10, x10, x13
+; CHECK-NEXT:    cmp w11, #1
+; CHECK-NEXT:    mul x0, x0, x4
+; CHECK-NEXT:    adc x10, x10, x14
+; CHECK-NEXT:    adc x8, x10, x8
+; CHECK-NEXT:    adc x2, x8, x9
 ; CHECK-NEXT:    ret
   %mul = mul i192 %x, %y
   ret i192 %mul
@@ -127,50 +125,40 @@ define i224 @mul_i224(i224 %x, i224 %y) {
 ; CHECK-NEXT:    mul x8, x1, x4
 ; CHECK-NEXT:    mul x9, x0, x5
 ; CHECK-NEXT:    umulh x10, x0, x4
-; CHECK-NEXT:    mul x14, x2, x5
-; CHECK-NEXT:    adds x8, x8, x9
 ; CHECK-NEXT:    mul x11, x2, x4
-; CHECK-NEXT:    cset w9, hs
-; CHECK-NEXT:    adds x8, x8, x10
-; CHECK-NEXT:    and x9, x9, #0x1
+; CHECK-NEXT:    adds x8, x8, x9
 ; CHECK-NEXT:    mul x12, x1, x5
-; CHECK-NEXT:    cset w10, hs
-; CHECK-NEXT:    and x10, x10, #0x1
+; CHECK-NEXT:    adcs x8, x8, x10
+; CHECK-NEXT:    mul x14, x2, x5
+; CHECK-NEXT:    adc x10, xzr, xzr
 ; CHECK-NEXT:    mul x13, x0, x6
-; CHECK-NEXT:    add x9, x9, x10
-; CHECK-NEXT:    umulh x15, x1, x4
 ; CHECK-NEXT:    adds x11, x11, x12
+; CHECK-NEXT:    umulh x15, x1, x4
 ; CHECK-NEXT:    madd x14, x3, x4, x14
 ; CHECK-NEXT:    umulh x16, x0, x5
 ; CHECK-NEXT:    madd x12, x1, x6, x14
 ; CHECK-NEXT:    cset w14, hs
-; CHECK-NEXT:    adds x11, x11, x13
-; CHECK-NEXT:    cset w13, hs
-; CHECK-NEXT:    adds x11, x11, x15
-; CHECK-NEXT:    and x14, x14, #0x1
+; CHECK-NEXT:    adcs x11, x11, x13
+; CHECK-NEXT:    adc x13, xzr, xzr
+; CHECK-NEXT:    cmp w14, #1
 ; CHECK-NEXT:    umulh x17, x2, x4
-; CHECK-NEXT:    and x13, x13, #0x1
-; CHECK-NEXT:    add x13, x14, x13
-; CHECK-NEXT:    umulh x18, x1, x5
-; CHECK-NEXT:    cset w1, hs
-; CHECK-NEXT:    adds x11, x11, x16
-; CHECK-NEXT:    and x14, x1, #0x1
+; CHECK-NEXT:    adcs x11, x11, x15
+; CHECK-NEXT:    adc x13, x13, xzr
+; CHECK-NEXT:    cmp w14, #1
+; CHECK-NEXT:    umulh x9, x1, x5
+; CHECK-NEXT:    adcs x11, x11, x16
 ; CHECK-NEXT:    mov x1, x8
-; CHECK-NEXT:    umulh x15, x0, x6
-; CHECK-NEXT:    add x10, x13, x14
-; CHECK-NEXT:    cset w13, hs
-; CHECK-NEXT:    adds x2, x11, x9
-; CHECK-NEXT:    and x9, x13, #0x1
+; CHECK-NEXT:    adc x13, x13, xzr
+; CHECK-NEXT:    cmp w14, #1
+; CHECK-NEXT:    umulh x18, x0, x6
+; CHECK-NEXT:    adcs x2, x11, x10
+; CHECK-NEXT:    adc x10, x13, xzr
 ; CHECK-NEXT:    madd x12, x0, x7, x12
-; CHECK-NEXT:    cset w11, hs
-; CHECK-NEXT:    add x9, x10, x9
-; CHECK-NEXT:    add x13, x17, x18
-; CHECK-NEXT:    and x10, x11, #0x1
+; CHECK-NEXT:    add x9, x17, x9
 ; CHECK-NEXT:    mul x0, x0, x4
+; CHECK-NEXT:    add x9, x9, x18
 ; CHECK-NEXT:    add x9, x9, x10
-; CHECK-NEXT:    add x11, x13, x15
-; CHECK-NEXT:    add x10, x12, x11
-; CHECK-NEXT:    add x3, x10, x9
+; CHECK-NEXT:    add x3, x12, x9
 ; CHECK-NEXT:    ret
   %mul = mul i224 %x, %y
   ret i224 %mul
@@ -182,50 +170,40 @@ define i256 @mul_i256(i256 %x, i256 %y) {
 ; CHECK-NEXT:    mul x8, x1, x4
 ; CHECK-NEXT:    mul x9, x0, x5
 ; CHECK-NEXT:    umulh x10, x0, x4
-; CHECK-NEXT:    mul x14, x2, x5
-; CHECK-NEXT:    adds x8, x8, x9
 ; CHECK-NEXT:    mul x11, x2, x4
-; CHECK-NEXT:    cset w9, hs
-; CHECK-NEXT:    adds x8, x8, x10
-; CHECK-NEXT:    and x9, x9, #0x1
+; CHECK-NEXT:    adds x8, x8, x9
 ; CHECK-NEXT:    mul x12, x1, x5
-; CHECK-NEXT:    cset w10, hs
-; CHECK-NEXT:    and x10, x10, #0x1
+; CHECK-NEXT:    adcs x8, x8, x10
+; CHECK-NEXT:    mul x14, x2, x5
+; CHECK-NEXT:    adc x10, xzr, xzr
 ; CHECK-NEXT:    mul x13, x0, x6
-; CHECK-NEXT:    add x9, x9, x10
-; CHECK-NEXT:    umulh x15, x1, x4
 ; CHECK-NEXT:    adds x11, x11, x12
+; CHECK-NEXT:    umulh x15, x1, x4
 ; CHECK-NEXT:    madd x14, x3, x4, x14
 ; CHECK-NEXT:    umulh x16, x0, x5
 ; CHECK-NEXT:    madd x12, x1, x6, x14
 ; CHECK-NEXT:    cset w14, hs
-; CHECK-NEXT:    adds x11, x11, x13
-; CHECK-NEXT:    cset w13, hs
-; CHECK-NEXT:    adds x11, x11, x15
-; CHECK-NEXT:    and x14, x14, #0x1
+; CHECK-NEXT:    adcs x11, x11, x13
+; CHECK-NEXT:    adc x13, xzr, xzr
+; CHECK-NEXT:    cmp w14, #1
 ; CHECK-NEXT:    umulh x17, x2, x4
-; CHECK-NEXT:    and x13, x13, #0x1
-; CHECK-NEXT:    add x13, x14, x13
-; CHECK-NEXT:    umulh x18, x1, x5
-; CHECK-NEXT:    cset w1, hs
-; CHECK-NEXT:    adds x11, x11, x16
-; CHECK-NEXT:    and x14, x1, #0x1
+; CHECK-NEXT:    adcs x11, x11, x15
+; CHECK-NEXT:    adc x13, x13, xzr
+; CHECK-NEXT:    cmp w14, #1
+; CHECK-NEXT:    umulh x9, x1, x5
+; CHECK-NEXT:    adcs x11, x11, x16
 ; CHECK-NEXT:    mov x1, x8
-; CHECK-NEXT:    umulh x15, x0, x6
-; CHECK-NEXT:    add x10, x13, x14
-; CHECK-NEXT:    cset w13, hs
-; CHECK-NEXT:    adds x2, x11, x9
-; CHECK-NEXT:    and x9, x13, #0x1
+; CHECK-NEXT:    adc x13, x13, xzr
+; CHECK-NEXT:    cmp w14, #1
+; CHECK-NEXT:    umulh x18, x0, x6
+; CHECK-NEXT:    adcs x2, x11, x10
+; CHECK-NEXT:    adc x10, x13, xzr
 ; CHECK-NEXT:    madd x12, x0, x7, x12
-; CHECK-NEXT:    cset w11, hs
-; CHECK-NEXT:    add x9, x10, x9
-; CHECK-NEXT:    add x13, x17, x18
-; CHECK-NEXT:    and x10, x11, #0x1
+; CHECK-NEXT:    add x9, x17, x9
 ; CHECK-NEXT:    mul x0, x0, x4
+; CHECK-NEXT:    add x9, x9, x18
 ; CHECK-NEXT:    add x9, x9, x10
-; CHECK-NEXT:    add x11, x13, x15
-; CHECK-NEXT:    add x10, x12, x11
-; CHECK-NEXT:    add x3, x10, x9
+; CHECK-NEXT:    add x3, x12, x9
 ; CHECK-NEXT:    ret
   %mul = mul i256 %x, %y
   ret i256 %mul
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-mul.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-mul.mir
index 2bf8649e76242..a79f1db9b8cb2 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-mul.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-mul.mir
@@ -619,25 +619,24 @@ body: |
     ; GFX6-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s96)
     ; GFX6-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s96)
     ; GFX6-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV3]]
+    ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
     ; GFX6-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UV1]], [[UV3]]
     ; GFX6-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV4]]
     ; GFX6-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[UV3]]
     ; GFX6-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL1]], [[MUL2]]
-    ; GFX6-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1)
-    ; GFX6-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH]]
-    ; GFX6-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1)
-    ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]]
+    ; GFX6-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UADDO]], [[UMULH]], [[UADDO1]]
+    ; GFX6-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[C]], [[C]], [[UADDE1]]
     ; GFX6-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UV2]], [[UV3]]
     ; GFX6-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UV1]], [[UV4]]
     ; GFX6-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV5]]
     ; GFX6-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[UV1]], [[UV3]]
     ; GFX6-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[UV4]]
-    ; GFX6-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[MUL3]], [[MUL4]]
-    ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[MUL5]]
-    ; GFX6-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ADD2]], [[UMULH1]]
-    ; GFX6-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[UMULH2]]
-    ; GFX6-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ADD4]], [[ADD]]
-    ; GFX6-NEXT: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[MUL]](s32), [[UADDO2]](s32), [[ADD5]](s32)
+    ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL3]], [[MUL4]]
+    ; GFX6-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[MUL5]]
+    ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[UMULH1]]
+    ; GFX6-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ADD2]], [[UMULH2]]
+    ; GFX6-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[UADDE2]]
+    ; GFX6-NEXT: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[MUL]](s32), [[UADDE]](s32), [[ADD4]](s32)
     ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[MV]](s96)
     ;
     ; GFX89-LABEL: name: test_mul_s96
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sdiv.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sdiv.mir
index f9ec3bca78931..81e13b6cf6745 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sdiv.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sdiv.mir
@@ -49,6 +49,7 @@ body: |
     ; GFX6-NEXT: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT2]], [[XOR2]]
     ; GFX6-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]]
     ; GFX6-NEXT: $vgpr0 = COPY [[SUB3]](s32)
+    ;
     ; GFX8-LABEL: name: test_sdiv_s32
     ; GFX8: liveins: $vgpr0, $vgpr1
     ; GFX8-NEXT: {{  $}}
@@ -87,6 +88,7 @@ body: |
     ; GFX8-NEXT: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT2]], [[XOR2]]
     ; GFX8-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]]
     ; GFX8-NEXT: $vgpr0 = COPY [[SUB3]](s32)
+    ;
     ; GFX9-LABEL: name: test_sdiv_s32
     ; GFX9: liveins: $vgpr0, $vgpr1
     ; GFX9-NEXT: {{  $}}
@@ -125,6 +127,7 @@ body: |
     ; GFX9-NEXT: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT2]], [[XOR2]]
     ; GFX9-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]]
     ; GFX9-NEXT: $vgpr0 = COPY [[SUB3]](s32)
+    ;
     ; GFX10-LABEL: name: test_sdiv_s32
     ; GFX10: liveins: $vgpr0, $vgpr1
     ; GFX10-NEXT: {{  $}}
@@ -244,6 +247,7 @@ body: |
     ; GFX6-NEXT: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[XOR7]], [[XOR6]]
     ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SUB3]](s32), [[SUB7]](s32)
     ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
+    ;
     ; GFX8-LABEL: name: test_sdiv_v2s32
     ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
     ; GFX8-NEXT: {{  $}}
@@ -313,6 +317,7 @@ body: |
     ; GFX8-NEXT: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[XOR7]], [[XOR6]]
     ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SUB3]](s32), [[SUB7]](s32)
     ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
+    ;
     ; GFX9-LABEL: name: test_sdiv_v2s32
     ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
     ; GFX9-NEXT: {{  $}}
@@ -382,6 +387,7 @@ body: |
     ; GFX9-NEXT: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[XOR7]], [[XOR6]]
     ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SUB3]](s32), [[SUB7]](s32)
     ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
+    ;
     ; GFX10-LABEL: name: test_sdiv_v2s32
     ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
     ; GFX10-NEXT: {{  $}}
@@ -506,6 +512,7 @@ body: |
     ; GFX6-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV10]], [[UV12]]
     ; GFX6-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV11]], [[UV13]], [[USUBO1]]
     ; GFX6-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]]
+    ; GFX6-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
     ; GFX6-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]]
     ; GFX6-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]]
     ; GFX6-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]]
@@ -515,89 +522,73 @@ body: |
     ; GFX6-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]]
     ; GFX6-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]]
     ; GFX6-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]]
-    ; GFX6-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1)
-    ; GFX6-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH1]]
-    ; GFX6-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1)
-    ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]]
+    ; GFX6-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDO4]], [[UMULH1]], [[UADDO5]]
+    ; GFX6-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE5]]
     ; GFX6-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]]
     ; GFX6-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]]
     ; GFX6-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]]
-    ; GFX6-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]]
-    ; GFX6-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1)
-    ; GFX6-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH3]]
-    ; GFX6-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1)
-    ; GFX6-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]]
-    ; GFX6-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD2]]
-    ; GFX6-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1)
-    ; GFX6-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]]
+    ; GFX6-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]]
+    ; GFX6-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDO6]], [[UMULH3]], [[UADDO7]]
+    ; GFX6-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE9]]
+    ; GFX6-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[UADDE8]], [[UADDE6]], [[UADDO7]]
+    ; GFX6-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDE10]], [[C6]], [[UADDE13]]
     ; GFX6-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]]
-    ; GFX6-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]]
-    ; GFX6-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO12]]
-    ; GFX6-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO15]]
-    ; GFX6-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO14]]
-    ; GFX6-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO14]]
-    ; GFX6-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE4]]
-    ; GFX6-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO14]]
-    ; GFX6-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]]
-    ; GFX6-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[UMULH5]]
-    ; GFX6-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[MUL6]]
-    ; GFX6-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[ADD7]]
-    ; GFX6-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[MUL6]]
-    ; GFX6-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]]
-    ; GFX6-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1)
-    ; GFX6-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]]
-    ; GFX6-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1)
-    ; GFX6-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]]
-    ; GFX6-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[ADD7]]
-    ; GFX6-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[MUL6]]
-    ; GFX6-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[ADD7]]
-    ; GFX6-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]]
-    ; GFX6-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1)
-    ; GFX6-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH8]]
-    ; GFX6-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO23]](s1)
-    ; GFX6-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]]
-    ; GFX6-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD8]]
-    ; GFX6-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1)
-    ; GFX6-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT9]]
-    ; GFX6-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[ADD7]]
-    ; GFX6-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD10]]
-    ; GFX6-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-    ; GFX6-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UADDO24]]
-    ; GFX6-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[ADD11]], [[UADDO27]]
+    ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[UADDE14]]
+    ; GFX6-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDE12]]
+    ; GFX6-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD2]], [[UADDO9]]
+    ; GFX6-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO8]]
+    ; GFX6-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO8]]
+    ; GFX6-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE16]]
+    ; GFX6-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO8]]
+    ; GFX6-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]]
+    ; GFX6-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[UMULH5]]
+    ; GFX6-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[MUL6]]
+    ; GFX6-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO8]], [[ADD4]]
+    ; GFX6-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO8]], [[MUL6]]
+    ; GFX6-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]]
+    ; GFX6-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDO10]], [[UMULH6]], [[UADDO11]]
+    ; GFX6-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE19]]
+    ; GFX6-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[ADD4]]
+    ; GFX6-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[MUL6]]
+    ; GFX6-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO8]], [[ADD4]]
+    ; GFX6-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]]
+    ; GFX6-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDO12]], [[UMULH8]], [[UADDO13]]
+    ; GFX6-NEXT: [[UADDE24:%[0-9]+]]:_(s32), [[UADDE25:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE23]]
+    ; GFX6-NEXT: [[UADDE26:%[0-9]+]]:_(s32), [[UADDE27:%[0-9]+]]:_(s1) = G_UADDE [[UADDE22]], [[UADDE20]], [[UADDO13]]
+    ; GFX6-NEXT: [[UADDE28:%[0-9]+]]:_(s32), [[UADDE29:%[0-9]+]]:_(s1) = G_UADDE [[UADDE24]], [[C6]], [[UADDE27]]
+    ; GFX6-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[ADD4]]
+    ; GFX6-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[UADDE28]]
+    ; GFX6-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UADDE26]]
+    ; GFX6-NEXT: [[UADDE30:%[0-9]+]]:_(s32), [[UADDE31:%[0-9]+]]:_(s1) = G_UADDE [[UADDE16]], [[ADD5]], [[UADDO15]]
     ; GFX6-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64)
     ; GFX6-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64)
-    ; GFX6-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDO26]]
-    ; GFX6-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UADDE6]]
-    ; GFX6-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDO26]]
-    ; GFX6-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]]
-    ; GFX6-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1)
-    ; GFX6-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]]
-    ; GFX6-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1)
-    ; GFX6-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]]
-    ; GFX6-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDE6]]
-    ; GFX6-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDO26]]
-    ; GFX6-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDE6]]
-    ; GFX6-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]]
-    ; GFX6-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1)
-    ; GFX6-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH12]]
-    ; GFX6-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1)
-    ; GFX6-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]]
-    ; GFX6-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD12]]
-    ; GFX6-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1)
-    ; GFX6-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT14]]
-    ; GFX6-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDE6]]
-    ; GFX6-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD14]]
-    ; GFX6-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[ADD15]](s32)
+    ; GFX6-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDO14]]
+    ; GFX6-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UADDE30]]
+    ; GFX6-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDO14]]
+    ; GFX6-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]]
+    ; GFX6-NEXT: [[UADDE32:%[0-9]+]]:_(s32), [[UADDE33:%[0-9]+]]:_(s1) = G_UADDE [[UADDO16]], [[UMULH10]], [[UADDO17]]
+    ; GFX6-NEXT: [[UADDE34:%[0-9]+]]:_(s32), [[UADDE35:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE33]]
+    ; GFX6-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDE30]]
+    ; GFX6-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDO14]]
+    ; GFX6-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDE30]]
+    ; GFX6-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]]
+    ; GFX6-NEXT: [[UADDE36:%[0-9]+]]:_(s32), [[UADDE37:%[0-9]+]]:_(s1) = G_UADDE [[UADDO18]], [[UMULH12]], [[UADDO19]]
+    ; GFX6-NEXT: [[UADDE38:%[0-9]+]]:_(s32), [[UADDE39:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE37]]
+    ; GFX6-NEXT: [[UADDE40:%[0-9]+]]:_(s32), [[UADDE41:%[0-9]+]]:_(s1) = G_UADDE [[UADDE36]], [[UADDE34]], [[UADDO19]]
+    ; GFX6-NEXT: [[UADDE42:%[0-9]+]]:_(s32), [[UADDE43:%[0-9]+]]:_(s1) = G_UADDE [[UADDE38]], [[C6]], [[UADDE41]]
+    ; GFX6-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDE30]]
+    ; GFX6-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[UADDE42]]
+    ; GFX6-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDE40]](s32), [[ADD6]](s32)
     ; GFX6-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64)
-    ; GFX6-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV18]], [[UADDO36]]
-    ; GFX6-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV19]], [[UADDO36]]
-    ; GFX6-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV18]], [[ADD15]]
-    ; GFX6-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV18]], [[UADDO36]]
-    ; GFX6-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]]
-    ; GFX6-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[UMULH14]]
+    ; GFX6-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV18]], [[UADDE40]]
+    ; GFX6-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV19]], [[UADDE40]]
+    ; GFX6-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV18]], [[ADD6]]
+    ; GFX6-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV18]], [[UADDE40]]
+    ; GFX6-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]]
+    ; GFX6-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ADD7]], [[UMULH14]]
     ; GFX6-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV14]], [[MUL15]]
-    ; GFX6-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV15]], [[ADD17]], [[USUBO3]]
-    ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV15]], [[ADD17]]
+    ; GFX6-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV15]], [[ADD8]], [[USUBO3]]
+    ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV15]], [[ADD8]]
     ; GFX6-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64)
     ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV21]]
     ; GFX6-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1)
@@ -610,9 +601,9 @@ body: |
     ; GFX6-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]]
     ; GFX6-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
     ; GFX6-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64)
-    ; GFX6-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[UADDO36]], [[UV22]]
-    ; GFX6-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[ADD15]], [[UV23]], [[UADDO39]]
-    ; GFX6-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO38]](s32), [[UADDE8]](s32)
+    ; GFX6-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDE40]], [[UV22]]
+    ; GFX6-NEXT: [[UADDE44:%[0-9]+]]:_(s32), [[UADDE45:%[0-9]+]]:_(s1) = G_UADDE [[ADD6]], [[UV23]], [[UADDO21]]
+    ; GFX6-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO20]](s32), [[UADDE44]](s32)
     ; GFX6-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV21]]
     ; GFX6-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1)
     ; GFX6-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV20]]
@@ -620,9 +611,9 @@ body: |
     ; GFX6-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV21]]
     ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]]
     ; GFX6-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64)
-    ; GFX6-NEXT: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UADDO38]], [[UV24]]
-    ; GFX6-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE8]], [[UV25]], [[UADDO41]]
-    ; GFX6-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO40]](s32), [[UADDE10]](s32)
+    ; GFX6-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UV24]]
+    ; GFX6-NEXT: [[UADDE46:%[0-9]+]]:_(s32), [[UADDE47:%[0-9]+]]:_(s1) = G_UADDE [[UADDE44]], [[UV25]], [[UADDO23]]
+    ; GFX6-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO22]](s32), [[UADDE46]](s32)
     ; GFX6-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C6]]
     ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV4]], [[MV3]]
     ; GFX6-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C6]]
@@ -635,6 +626,7 @@ body: |
     ; GFX6-NEXT: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[UV27]], [[UV29]], [[USUBO7]]
     ; GFX6-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO6]](s32), [[USUBE8]](s32)
     ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[MV5]](s64)
+    ;
     ; GFX8-LABEL: name: test_sdiv_s64
     ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
     ; GFX8-NEXT: {{  $}}
@@ -683,89 +675,74 @@ body: |
     ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI1]], [[ANYEXT]]
     ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[FPTOUI]], [[AMDGPU_MAD_U64_U32_2]]
     ; GFX8-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64)
+    ; GFX8-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
     ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV14]]
     ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[UV16]]
     ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV14]]
     ; GFX8-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]]
-    ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1)
-    ; GFX8-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH]]
-    ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1)
-    ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]]
+    ; GFX8-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDO4]], [[UMULH]], [[UADDO5]]
+    ; GFX8-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE5]]
     ; GFX8-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV16]]
     ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV14]]
     ; GFX8-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV16]]
-    ; GFX8-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]]
-    ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1)
-    ; GFX8-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH2]]
-    ; GFX8-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1)
-    ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]]
-    ; GFX8-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD]]
-    ; GFX8-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1)
-    ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]]
+    ; GFX8-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]]
+    ; GFX8-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDO6]], [[UMULH2]], [[UADDO7]]
+    ; GFX8-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE9]]
+    ; GFX8-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[UADDE8]], [[UADDE6]], [[UADDO7]]
+    ; GFX8-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDE10]], [[C6]], [[UADDE13]]
     ; GFX8-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV16]]
-    ; GFX8-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]]
-    ; GFX8-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO12]]
-    ; GFX8-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD3]], [[UADDO15]]
-    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO14]], [[C5]]
+    ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[UADDE14]]
+    ; GFX8-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDE12]]
+    ; GFX8-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD]], [[UADDO9]]
+    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO8]], [[C5]]
     ; GFX8-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](s64)
     ; GFX8-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[UV19]](s32)
-    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE4]], [[ANYEXT1]]
-    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO14]], [[AMDGPU_MAD_U64_U32_8]]
+    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE16]], [[ANYEXT1]]
+    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO8]], [[AMDGPU_MAD_U64_U32_8]]
     ; GFX8-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](s64)
-    ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[UV18]]
-    ; GFX8-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[UV20]]
-    ; GFX8-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[UV18]]
-    ; GFX8-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]]
-    ; GFX8-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1)
-    ; GFX8-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH4]]
-    ; GFX8-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1)
-    ; GFX8-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]]
-    ; GFX8-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[UV20]]
-    ; GFX8-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[UV18]]
-    ; GFX8-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[UV20]]
-    ; GFX8-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]]
-    ; GFX8-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1)
-    ; GFX8-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH6]]
-    ; GFX8-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO23]](s1)
-    ; GFX8-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]]
-    ; GFX8-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD4]]
-    ; GFX8-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1)
-    ; GFX8-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ADD5]], [[ZEXT9]]
-    ; GFX8-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[UV20]]
-    ; GFX8-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD6]]
-    ; GFX8-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-    ; GFX8-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UADDO24]]
-    ; GFX8-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[ADD7]], [[UADDO27]]
+    ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[UV18]]
+    ; GFX8-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO8]], [[UV20]]
+    ; GFX8-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO8]], [[UV18]]
+    ; GFX8-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]]
+    ; GFX8-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDO10]], [[UMULH4]], [[UADDO11]]
+    ; GFX8-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE19]]
+    ; GFX8-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[UV20]]
+    ; GFX8-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[UV18]]
+    ; GFX8-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO8]], [[UV20]]
+    ; GFX8-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]]
+    ; GFX8-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDO12]], [[UMULH6]], [[UADDO13]]
+    ; GFX8-NEXT: [[UADDE24:%[0-9]+]]:_(s32), [[UADDE25:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE23]]
+    ; GFX8-NEXT: [[UADDE26:%[0-9]+]]:_(s32), [[UADDE27:%[0-9]+]]:_(s1) = G_UADDE [[UADDE22]], [[UADDE20]], [[UADDO13]]
+    ; GFX8-NEXT: [[UADDE28:%[0-9]+]]:_(s32), [[UADDE29:%[0-9]+]]:_(s1) = G_UADDE [[UADDE24]], [[C6]], [[UADDE27]]
+    ; GFX8-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[UV20]]
+    ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[UADDE28]]
+    ; GFX8-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UADDE26]]
+    ; GFX8-NEXT: [[UADDE30:%[0-9]+]]:_(s32), [[UADDE31:%[0-9]+]]:_(s1) = G_UADDE [[UADDE16]], [[ADD1]], [[UADDO15]]
     ; GFX8-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64)
     ; GFX8-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64)
-    ; GFX8-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV25]], [[UADDO26]]
-    ; GFX8-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV24]], [[UADDE6]]
-    ; GFX8-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV24]], [[UADDO26]]
-    ; GFX8-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]]
-    ; GFX8-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1)
-    ; GFX8-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH8]]
-    ; GFX8-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1)
-    ; GFX8-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]]
-    ; GFX8-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV25]], [[UADDE6]]
-    ; GFX8-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV25]], [[UADDO26]]
-    ; GFX8-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV24]], [[UADDE6]]
-    ; GFX8-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]]
-    ; GFX8-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1)
-    ; GFX8-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH10]]
-    ; GFX8-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1)
-    ; GFX8-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]]
-    ; GFX8-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD8]]
-    ; GFX8-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1)
-    ; GFX8-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT14]]
-    ; GFX8-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV25]], [[UADDE6]]
-    ; GFX8-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[ADD10]]
-    ; GFX8-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[ADD11]](s32)
+    ; GFX8-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV25]], [[UADDO14]]
+    ; GFX8-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV24]], [[UADDE30]]
+    ; GFX8-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV24]], [[UADDO14]]
+    ; GFX8-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]]
+    ; GFX8-NEXT: [[UADDE32:%[0-9]+]]:_(s32), [[UADDE33:%[0-9]+]]:_(s1) = G_UADDE [[UADDO16]], [[UMULH8]], [[UADDO17]]
+    ; GFX8-NEXT: [[UADDE34:%[0-9]+]]:_(s32), [[UADDE35:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE33]]
+    ; GFX8-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV25]], [[UADDE30]]
+    ; GFX8-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV25]], [[UADDO14]]
+    ; GFX8-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV24]], [[UADDE30]]
+    ; GFX8-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]]
+    ; GFX8-NEXT: [[UADDE36:%[0-9]+]]:_(s32), [[UADDE37:%[0-9]+]]:_(s1) = G_UADDE [[UADDO18]], [[UMULH10]], [[UADDO19]]
+    ; GFX8-NEXT: [[UADDE38:%[0-9]+]]:_(s32), [[UADDE39:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE37]]
+    ; GFX8-NEXT: [[UADDE40:%[0-9]+]]:_(s32), [[UADDE41:%[0-9]+]]:_(s1) = G_UADDE [[UADDE36]], [[UADDE34]], [[UADDO19]]
+    ; GFX8-NEXT: [[UADDE42:%[0-9]+]]:_(s32), [[UADDE43:%[0-9]+]]:_(s1) = G_UADDE [[UADDE38]], [[C6]], [[UADDE41]]
+    ; GFX8-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV25]], [[UADDE30]]
+    ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[UADDE42]]
+    ; GFX8-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDE40]](s32), [[ADD2]](s32)
     ; GFX8-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64)
-    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV26]](s32), [[UADDO36]], [[C5]]
+    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV26]](s32), [[UADDE40]], [[C5]]
     ; GFX8-NEXT: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_12]](s64)
     ; GFX8-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[UV29]](s32)
-    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV26]](s32), [[ADD11]], [[ANYEXT2]]
-    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV27]](s32), [[UADDO36]], [[AMDGPU_MAD_U64_U32_14]]
+    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV26]](s32), [[ADD2]], [[ANYEXT2]]
+    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV27]](s32), [[UADDE40]], [[AMDGPU_MAD_U64_U32_14]]
     ; GFX8-NEXT: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_16]](s64)
     ; GFX8-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV22]], [[UV28]]
     ; GFX8-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV23]], [[UV30]], [[USUBO3]]
@@ -782,9 +759,9 @@ body: |
     ; GFX8-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]]
     ; GFX8-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
     ; GFX8-NEXT: [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64)
-    ; GFX8-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[UADDO36]], [[UV34]]
-    ; GFX8-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[ADD11]], [[UV35]], [[UADDO39]]
-    ; GFX8-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO38]](s32), [[UADDE8]](s32)
+    ; GFX8-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDE40]], [[UV34]]
+    ; GFX8-NEXT: [[UADDE44:%[0-9]+]]:_(s32), [[UADDE45:%[0-9]+]]:_(s1) = G_UADDE [[ADD2]], [[UV35]], [[UADDO21]]
+    ; GFX8-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO20]](s32), [[UADDE44]](s32)
     ; GFX8-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV33]]
     ; GFX8-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1)
     ; GFX8-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV32]]
@@ -792,9 +769,9 @@ body: |
     ; GFX8-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV33]]
     ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]]
     ; GFX8-NEXT: [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64)
-    ; GFX8-NEXT: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UADDO38]], [[UV36]]
-    ; GFX8-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE8]], [[UV37]], [[UADDO41]]
-    ; GFX8-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO40]](s32), [[UADDE10]](s32)
+    ; GFX8-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UV36]]
+    ; GFX8-NEXT: [[UADDE46:%[0-9]+]]:_(s32), [[UADDE47:%[0-9]+]]:_(s1) = G_UADDE [[UADDE44]], [[UV37]], [[UADDO23]]
+    ; GFX8-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO22]](s32), [[UADDE46]](s32)
     ; GFX8-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C6]]
     ; GFX8-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV4]], [[MV3]]
     ; GFX8-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C6]]
@@ -807,6 +784,7 @@ body: |
     ; GFX8-NEXT: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[UV39]], [[UV41]], [[USUBO7]]
     ; GFX8-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO6]](s32), [[USUBE8]](s32)
     ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[MV5]](s64)
+    ;
     ; GFX9-LABEL: name: test_sdiv_s64
     ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
     ; GFX9-NEXT: {{  $}}
@@ -855,89 +833,74 @@ body: |
     ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI1]], [[ANYEXT]]
     ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[FPTOUI]], [[AMDGPU_MAD_U64_U32_2]]
     ; GFX9-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64)
+    ; GFX9-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
     ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV14]]
     ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[UV16]]
     ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV14]]
     ; GFX9-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]]
-    ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1)
-    ; GFX9-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH]]
-    ; GFX9-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1)
-    ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]]
+    ; GFX9-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDO4]], [[UMULH]], [[UADDO5]]
+    ; GFX9-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE5]]
     ; GFX9-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV16]]
     ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV14]]
     ; GFX9-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV16]]
-    ; GFX9-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]]
-    ; GFX9-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1)
-    ; GFX9-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH2]]
-    ; GFX9-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1)
-    ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]]
-    ; GFX9-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD]]
-    ; GFX9-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1)
-    ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]]
+    ; GFX9-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]]
+    ; GFX9-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDO6]], [[UMULH2]], [[UADDO7]]
+    ; GFX9-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE9]]
+    ; GFX9-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[UADDE8]], [[UADDE6]], [[UADDO7]]
+    ; GFX9-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDE10]], [[C6]], [[UADDE13]]
     ; GFX9-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV16]]
-    ; GFX9-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]]
-    ; GFX9-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO12]]
-    ; GFX9-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD3]], [[UADDO15]]
-    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO14]], [[C5]]
+    ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[UADDE14]]
+    ; GFX9-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDE12]]
+    ; GFX9-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD]], [[UADDO9]]
+    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO8]], [[C5]]
     ; GFX9-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](s64)
     ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[UV19]](s32)
-    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE4]], [[ANYEXT1]]
-    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO14]], [[AMDGPU_MAD_U64_U32_8]]
+    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE16]], [[ANYEXT1]]
+    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO8]], [[AMDGPU_MAD_U64_U32_8]]
     ; GFX9-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](s64)
-    ; GFX9-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[UV18]]
-    ; GFX9-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[UV20]]
-    ; GFX9-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[UV18]]
-    ; GFX9-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]]
-    ; GFX9-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1)
-    ; GFX9-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH4]]
-    ; GFX9-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1)
-    ; GFX9-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]]
-    ; GFX9-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[UV20]]
-    ; GFX9-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[UV18]]
-    ; GFX9-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[UV20]]
-    ; GFX9-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]]
-    ; GFX9-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1)
-    ; GFX9-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH6]]
-    ; GFX9-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO23]](s1)
-    ; GFX9-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]]
-    ; GFX9-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD4]]
-    ; GFX9-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1)
-    ; GFX9-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ADD5]], [[ZEXT9]]
-    ; GFX9-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[UV20]]
-    ; GFX9-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD6]]
-    ; GFX9-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-    ; GFX9-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UADDO24]]
-    ; GFX9-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[ADD7]], [[UADDO27]]
+    ; GFX9-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[UV18]]
+    ; GFX9-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO8]], [[UV20]]
+    ; GFX9-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO8]], [[UV18]]
+    ; GFX9-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]]
+    ; GFX9-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDO10]], [[UMULH4]], [[UADDO11]]
+    ; GFX9-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE19]]
+    ; GFX9-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[UV20]]
+    ; GFX9-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[UV18]]
+    ; GFX9-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO8]], [[UV20]]
+    ; GFX9-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]]
+    ; GFX9-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDO12]], [[UMULH6]], [[UADDO13]]
+    ; GFX9-NEXT: [[UADDE24:%[0-9]+]]:_(s32), [[UADDE25:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE23]]
+    ; GFX9-NEXT: [[UADDE26:%[0-9]+]]:_(s32), [[UADDE27:%[0-9]+]]:_(s1) = G_UADDE [[UADDE22]], [[UADDE20]], [[UADDO13]]
+    ; GFX9-NEXT: [[UADDE28:%[0-9]+]]:_(s32), [[UADDE29:%[0-9]+]]:_(s1) = G_UADDE [[UADDE24]], [[C6]], [[UADDE27]]
+    ; GFX9-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[UV20]]
+    ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[UADDE28]]
+    ; GFX9-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UADDE26]]
+    ; GFX9-NEXT: [[UADDE30:%[0-9]+]]:_(s32), [[UADDE31:%[0-9]+]]:_(s1) = G_UADDE [[UADDE16]], [[ADD1]], [[UADDO15]]
     ; GFX9-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64)
     ; GFX9-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64)
-    ; GFX9-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV25]], [[UADDO26]]
-    ; GFX9-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV24]], [[UADDE6]]
-    ; GFX9-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV24]], [[UADDO26]]
-    ; GFX9-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]]
-    ; GFX9-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1)
-    ; GFX9-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH8]]
-    ; GFX9-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1)
-    ; GFX9-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]]
-    ; GFX9-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV25]], [[UADDE6]]
-    ; GFX9-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV25]], [[UADDO26]]
-    ; GFX9-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV24]], [[UADDE6]]
-    ; GFX9-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]]
-    ; GFX9-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1)
-    ; GFX9-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH10]]
-    ; GFX9-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1)
-    ; GFX9-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]]
-    ; GFX9-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD8]]
-    ; GFX9-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1)
-    ; GFX9-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT14]]
-    ; GFX9-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV25]], [[UADDE6]]
-    ; GFX9-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[ADD10]]
-    ; GFX9-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[ADD11]](s32)
+    ; GFX9-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV25]], [[UADDO14]]
+    ; GFX9-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV24]], [[UADDE30]]
+    ; GFX9-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV24]], [[UADDO14]]
+    ; GFX9-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]]
+    ; GFX9-NEXT: [[UADDE32:%[0-9]+]]:_(s32), [[UADDE33:%[0-9]+]]:_(s1) = G_UADDE [[UADDO16]], [[UMULH8]], [[UADDO17]]
+    ; GFX9-NEXT: [[UADDE34:%[0-9]+]]:_(s32), [[UADDE35:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE33]]
+    ; GFX9-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV25]], [[UADDE30]]
+    ; GFX9-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV25]], [[UADDO14]]
+    ; GFX9-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV24]], [[UADDE30]]
+    ; GFX9-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]]
+    ; GFX9-NEXT: [[UADDE36:%[0-9]+]]:_(s32), [[UADDE37:%[0-9]+]]:_(s1) = G_UADDE [[UADDO18]], [[UMULH10]], [[UADDO19]]
+    ; GFX9-NEXT: [[UADDE38:%[0-9]+]]:_(s32), [[UADDE39:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE37]]
+    ; GFX9-NEXT: [[UADDE40:%[0-9]+]]:_(s32), [[UADDE41:%[0-9]+]]:_(s1) = G_UADDE [[UADDE36]], [[UADDE34]], [[UADDO19]]
+    ; GFX9-NEXT: [[UADDE42:%[0-9]+]]:_(s32), [[UADDE43:%[0-9]+]]:_(s1) = G_UADDE [[UADDE38]], [[C6]], [[UADDE41]]
+    ; GFX9-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV25]], [[UADDE30]]
+    ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[UADDE42]]
+    ; GFX9-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDE40]](s32), [[ADD2]](s32)
     ; GFX9-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64)
-    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV26]](s32), [[UADDO36]], [[C5]]
+    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV26]](s32), [[UADDE40]], [[C5]]
     ; GFX9-NEXT: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_12]](s64)
     ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[UV29]](s32)
-    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV26]](s32), [[ADD11]], [[ANYEXT2]]
-    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV27]](s32), [[UADDO36]], [[AMDGPU_MAD_U64_U32_14]]
+    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV26]](s32), [[ADD2]], [[ANYEXT2]]
+    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV27]](s32), [[UADDE40]], [[AMDGPU_MAD_U64_U32_14]]
     ; GFX9-NEXT: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_16]](s64)
     ; GFX9-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV22]], [[UV28]]
     ; GFX9-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV23]], [[UV30]], [[USUBO3]]
@@ -954,9 +917,9 @@ body: |
     ; GFX9-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]]
     ; GFX9-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
     ; GFX9-NEXT: [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64)
-    ; GFX9-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[UADDO36]], [[UV34]]
-    ; GFX9-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[ADD11]], [[UV35]], [[UADDO39]]
-    ; GFX9-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO38]](s32), [[UADDE8]](s32)
+    ; GFX9-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDE40]], [[UV34]]
+    ; GFX9-NEXT: [[UADDE44:%[0-9]+]]:_(s32), [[UADDE45:%[0-9]+]]:_(s1) = G_UADDE [[ADD2]], [[UV35]], [[UADDO21]]
+    ; GFX9-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO20]](s32), [[UADDE44]](s32)
     ; GFX9-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV33]]
     ; GFX9-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1)
     ; GFX9-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV32]]
@@ -964,9 +927,9 @@ body: |
     ; GFX9-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV33]]
     ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]]
     ; GFX9-NEXT: [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64)
-    ; GFX9-NEXT: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UADDO38]], [[UV36]]
-    ; GFX9-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE8]], [[UV37]], [[UADDO41]]
-    ; GFX9-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO40]](s32), [[UADDE10]](s32)
+    ; GFX9-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UV36]]
+    ; GFX9-NEXT: [[UADDE46:%[0-9]+]]:_(s32), [[UADDE47:%[0-9]+]]:_(s1) = G_UADDE [[UADDE44]], [[UV37]], [[UADDO23]]
+    ; GFX9-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO22]](s32), [[UADDE46]](s32)
     ; GFX9-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C6]]
     ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV4]], [[MV3]]
     ; GFX9-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C6]]
@@ -979,6 +942,7 @@ body: |
     ; GFX9-NEXT: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[UV39]], [[UV41]], [[USUBO7]]
     ; GFX9-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO6]](s32), [[USUBE8]](s32)
     ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[MV5]](s64)
+    ;
     ; GFX10-LABEL: name: test_sdiv_s64
     ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
     ; GFX10-NEXT: {{  $}}
@@ -1027,93 +991,78 @@ body: |
     ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV15]], [[MUL]]
     ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]]
     ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[MUL1]]
+    ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
     ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV14]]
     ; GFX10-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]]
     ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV14]]
     ; GFX10-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[MUL3]]
-    ; GFX10-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1)
-    ; GFX10-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH]]
-    ; GFX10-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1)
-    ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]]
+    ; GFX10-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDO4]], [[UMULH]], [[UADDO5]]
+    ; GFX10-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE5]]
     ; GFX10-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]]
     ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV14]]
     ; GFX10-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]]
-    ; GFX10-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL4]], [[UMULH1]]
-    ; GFX10-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1)
-    ; GFX10-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH2]]
-    ; GFX10-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1)
-    ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]]
-    ; GFX10-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD2]]
-    ; GFX10-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1)
-    ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]]
+    ; GFX10-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[MUL4]], [[UMULH1]]
+    ; GFX10-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDO6]], [[UMULH2]], [[UADDO7]]
+    ; GFX10-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE9]]
+    ; GFX10-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[UADDE8]], [[UADDE6]], [[UADDO7]]
+    ; GFX10-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDE10]], [[C6]], [[UADDE13]]
     ; GFX10-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]]
-    ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD4]]
-    ; GFX10-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO12]]
-    ; GFX10-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO15]]
-    ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO14]], [[C5]]
+    ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[UADDE14]]
+    ; GFX10-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDE12]]
+    ; GFX10-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD2]], [[UADDO9]]
+    ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO8]], [[C5]]
     ; GFX10-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_2]](s64)
-    ; GFX10-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE4]]
-    ; GFX10-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UV17]], [[MUL5]]
-    ; GFX10-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO14]]
-    ; GFX10-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[MUL6]]
-    ; GFX10-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[UV16]]
-    ; GFX10-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[ADD7]]
-    ; GFX10-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[UV16]]
-    ; GFX10-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL7]], [[MUL8]]
-    ; GFX10-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1)
-    ; GFX10-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH4]]
-    ; GFX10-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1)
-    ; GFX10-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]]
-    ; GFX10-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[ADD7]]
-    ; GFX10-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[UV16]]
-    ; GFX10-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[ADD7]]
-    ; GFX10-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[UMULH5]]
-    ; GFX10-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1)
-    ; GFX10-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH6]]
-    ; GFX10-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO23]](s1)
-    ; GFX10-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]]
-    ; GFX10-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD8]]
-    ; GFX10-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1)
-    ; GFX10-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT9]]
-    ; GFX10-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[ADD7]]
-    ; GFX10-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD10]]
-    ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-    ; GFX10-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UADDO24]]
-    ; GFX10-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[ADD11]], [[UADDO27]]
+    ; GFX10-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE16]]
+    ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UV17]], [[MUL5]]
+    ; GFX10-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO8]]
+    ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[MUL6]]
+    ; GFX10-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[UV16]]
+    ; GFX10-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UADDO8]], [[ADD4]]
+    ; GFX10-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO8]], [[UV16]]
+    ; GFX10-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[MUL7]], [[MUL8]]
+    ; GFX10-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDO10]], [[UMULH4]], [[UADDO11]]
+    ; GFX10-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE19]]
+    ; GFX10-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[ADD4]]
+    ; GFX10-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[UV16]]
+    ; GFX10-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO8]], [[ADD4]]
+    ; GFX10-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[UMULH5]]
+    ; GFX10-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDO12]], [[UMULH6]], [[UADDO13]]
+    ; GFX10-NEXT: [[UADDE24:%[0-9]+]]:_(s32), [[UADDE25:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE23]]
+    ; GFX10-NEXT: [[UADDE26:%[0-9]+]]:_(s32), [[UADDE27:%[0-9]+]]:_(s1) = G_UADDE [[UADDE22]], [[UADDE20]], [[UADDO13]]
+    ; GFX10-NEXT: [[UADDE28:%[0-9]+]]:_(s32), [[UADDE29:%[0-9]+]]:_(s1) = G_UADDE [[UADDE24]], [[C6]], [[UADDE27]]
+    ; GFX10-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[ADD4]]
+    ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[UADDE28]]
+    ; GFX10-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UADDE26]]
+    ; GFX10-NEXT: [[UADDE30:%[0-9]+]]:_(s32), [[UADDE31:%[0-9]+]]:_(s1) = G_UADDE [[UADDE16]], [[ADD5]], [[UADDO15]]
     ; GFX10-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64)
     ; GFX10-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64)
-    ; GFX10-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDO26]]
-    ; GFX10-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UV20]], [[UADDE6]]
-    ; GFX10-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDO26]]
-    ; GFX10-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL10]], [[MUL11]]
-    ; GFX10-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1)
-    ; GFX10-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH8]]
-    ; GFX10-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1)
-    ; GFX10-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]]
-    ; GFX10-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDE6]]
-    ; GFX10-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDO26]]
-    ; GFX10-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDE6]]
-    ; GFX10-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[UMULH9]]
-    ; GFX10-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1)
-    ; GFX10-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH10]]
-    ; GFX10-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1)
-    ; GFX10-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]]
-    ; GFX10-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD12]]
-    ; GFX10-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1)
-    ; GFX10-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT14]]
-    ; GFX10-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDE6]]
-    ; GFX10-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[ADD14]]
-    ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[ADD15]](s32)
+    ; GFX10-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDO14]]
+    ; GFX10-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UV20]], [[UADDE30]]
+    ; GFX10-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDO14]]
+    ; GFX10-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL10]], [[MUL11]]
+    ; GFX10-NEXT: [[UADDE32:%[0-9]+]]:_(s32), [[UADDE33:%[0-9]+]]:_(s1) = G_UADDE [[UADDO16]], [[UMULH8]], [[UADDO17]]
+    ; GFX10-NEXT: [[UADDE34:%[0-9]+]]:_(s32), [[UADDE35:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE33]]
+    ; GFX10-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDE30]]
+    ; GFX10-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDO14]]
+    ; GFX10-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDE30]]
+    ; GFX10-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[UMULH9]]
+    ; GFX10-NEXT: [[UADDE36:%[0-9]+]]:_(s32), [[UADDE37:%[0-9]+]]:_(s1) = G_UADDE [[UADDO18]], [[UMULH10]], [[UADDO19]]
+    ; GFX10-NEXT: [[UADDE38:%[0-9]+]]:_(s32), [[UADDE39:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE37]]
+    ; GFX10-NEXT: [[UADDE40:%[0-9]+]]:_(s32), [[UADDE41:%[0-9]+]]:_(s1) = G_UADDE [[UADDE36]], [[UADDE34]], [[UADDO19]]
+    ; GFX10-NEXT: [[UADDE42:%[0-9]+]]:_(s32), [[UADDE43:%[0-9]+]]:_(s1) = G_UADDE [[UADDE38]], [[C6]], [[UADDE41]]
+    ; GFX10-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDE30]]
+    ; GFX10-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[UADDE42]]
+    ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDE40]](s32), [[ADD6]](s32)
     ; GFX10-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64)
-    ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV22]](s32), [[UADDO36]], [[C5]]
+    ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV22]](s32), [[UADDE40]], [[C5]]
     ; GFX10-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64)
-    ; GFX10-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV22]], [[ADD15]]
-    ; GFX10-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[UV25]], [[MUL13]]
-    ; GFX10-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV23]], [[UADDO36]]
-    ; GFX10-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[MUL14]]
+    ; GFX10-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV22]], [[ADD6]]
+    ; GFX10-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UV25]], [[MUL13]]
+    ; GFX10-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV23]], [[UADDE40]]
+    ; GFX10-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ADD7]], [[MUL14]]
     ; GFX10-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV18]], [[UV24]]
-    ; GFX10-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV19]], [[ADD17]], [[USUBO3]]
-    ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV19]], [[ADD17]]
+    ; GFX10-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV19]], [[ADD8]], [[USUBO3]]
+    ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV19]], [[ADD8]]
     ; GFX10-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64)
     ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV27]]
     ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1)
@@ -1126,9 +1075,9 @@ body: |
     ; GFX10-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]]
     ; GFX10-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
     ; GFX10-NEXT: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64)
-    ; GFX10-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[UADDO36]], [[UV28]]
-    ; GFX10-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[ADD15]], [[UV29]], [[UADDO39]]
-    ; GFX10-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO38]](s32), [[UADDE8]](s32)
+    ; GFX10-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDE40]], [[UV28]]
+    ; GFX10-NEXT: [[UADDE44:%[0-9]+]]:_(s32), [[UADDE45:%[0-9]+]]:_(s1) = G_UADDE [[ADD6]], [[UV29]], [[UADDO21]]
+    ; GFX10-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO20]](s32), [[UADDE44]](s32)
     ; GFX10-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV27]]
     ; GFX10-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1)
     ; GFX10-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV26]]
@@ -1136,9 +1085,9 @@ body: |
     ; GFX10-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV27]]
     ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]]
     ; GFX10-NEXT: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64)
-    ; GFX10-NEXT: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UADDO38]], [[UV30]]
-    ; GFX10-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE8]], [[UV31]], [[UADDO41]]
-    ; GFX10-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO40]](s32), [[UADDE10]](s32)
+    ; GFX10-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UV30]]
+    ; GFX10-NEXT: [[UADDE46:%[0-9]+]]:_(s32), [[UADDE47:%[0-9]+]]:_(s1) = G_UADDE [[UADDE44]], [[UV31]], [[UADDO23]]
+    ; GFX10-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO22]](s32), [[UADDE46]](s32)
     ; GFX10-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C6]]
     ; GFX10-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV4]], [[MV3]]
     ; GFX10-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C6]]
@@ -1208,6 +1157,7 @@ body: |
     ; GFX6-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV14]], [[UV16]]
     ; GFX6-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV15]], [[UV17]], [[USUBO1]]
     ; GFX6-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]]
+    ; GFX6-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
     ; GFX6-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]]
     ; GFX6-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]]
     ; GFX6-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]]
@@ -1217,89 +1167,73 @@ body: |
     ; GFX6-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]]
     ; GFX6-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]]
     ; GFX6-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]]
-    ; GFX6-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1)
-    ; GFX6-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH1]]
-    ; GFX6-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1)
-    ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]]
+    ; GFX6-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDO4]], [[UMULH1]], [[UADDO5]]
+    ; GFX6-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE5]]
     ; GFX6-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]]
     ; GFX6-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]]
     ; GFX6-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]]
-    ; GFX6-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]]
-    ; GFX6-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1)
-    ; GFX6-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH3]]
-    ; GFX6-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1)
-    ; GFX6-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]]
-    ; GFX6-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD2]]
-    ; GFX6-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1)
-    ; GFX6-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]]
+    ; GFX6-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]]
+    ; GFX6-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDO6]], [[UMULH3]], [[UADDO7]]
+    ; GFX6-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE9]]
+    ; GFX6-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[UADDE8]], [[UADDE6]], [[UADDO7]]
+    ; GFX6-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDE10]], [[C6]], [[UADDE13]]
     ; GFX6-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]]
-    ; GFX6-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]]
-    ; GFX6-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO12]]
-    ; GFX6-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO15]]
-    ; GFX6-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO14]]
-    ; GFX6-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO14]]
-    ; GFX6-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE4]]
-    ; GFX6-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO14]]
-    ; GFX6-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]]
-    ; GFX6-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[UMULH5]]
-    ; GFX6-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[MUL6]]
-    ; GFX6-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[ADD7]]
-    ; GFX6-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[MUL6]]
-    ; GFX6-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]]
-    ; GFX6-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1)
-    ; GFX6-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]]
-    ; GFX6-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1)
-    ; GFX6-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]]
-    ; GFX6-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[ADD7]]
-    ; GFX6-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[MUL6]]
-    ; GFX6-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[ADD7]]
-    ; GFX6-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]]
-    ; GFX6-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1)
-    ; GFX6-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH8]]
-    ; GFX6-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO23]](s1)
-    ; GFX6-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]]
-    ; GFX6-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD8]]
-    ; GFX6-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1)
-    ; GFX6-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT9]]
-    ; GFX6-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[ADD7]]
-    ; GFX6-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD10]]
-    ; GFX6-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-    ; GFX6-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UADDO24]]
-    ; GFX6-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[ADD11]], [[UADDO27]]
+    ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[UADDE14]]
+    ; GFX6-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDE12]]
+    ; GFX6-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD2]], [[UADDO9]]
+    ; GFX6-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO8]]
+    ; GFX6-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO8]]
+    ; GFX6-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE16]]
+    ; GFX6-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO8]]
+    ; GFX6-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]]
+    ; GFX6-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[UMULH5]]
+    ; GFX6-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[MUL6]]
+    ; GFX6-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO8]], [[ADD4]]
+    ; GFX6-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO8]], [[MUL6]]
+    ; GFX6-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]]
+    ; GFX6-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDO10]], [[UMULH6]], [[UADDO11]]
+    ; GFX6-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE19]]
+    ; GFX6-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[ADD4]]
+    ; GFX6-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[MUL6]]
+    ; GFX6-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO8]], [[ADD4]]
+    ; GFX6-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]]
+    ; GFX6-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDO12]], [[UMULH8]], [[UADDO13]]
+    ; GFX6-NEXT: [[UADDE24:%[0-9]+]]:_(s32), [[UADDE25:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE23]]
+    ; GFX6-NEXT: [[UADDE26:%[0-9]+]]:_(s32), [[UADDE27:%[0-9]+]]:_(s1) = G_UADDE [[UADDE22]], [[UADDE20]], [[UADDO13]]
+    ; GFX6-NEXT: [[UADDE28:%[0-9]+]]:_(s32), [[UADDE29:%[0-9]+]]:_(s1) = G_UADDE [[UADDE24]], [[C6]], [[UADDE27]]
+    ; GFX6-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[ADD4]]
+    ; GFX6-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[UADDE28]]
+    ; GFX6-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UADDE26]]
+    ; GFX6-NEXT: [[UADDE30:%[0-9]+]]:_(s32), [[UADDE31:%[0-9]+]]:_(s1) = G_UADDE [[UADDE16]], [[ADD5]], [[UADDO15]]
     ; GFX6-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64)
     ; GFX6-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64)
-    ; GFX6-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDO26]]
-    ; GFX6-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV20]], [[UADDE6]]
-    ; GFX6-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDO26]]
-    ; GFX6-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]]
-    ; GFX6-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1)
-    ; GFX6-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]]
-    ; GFX6-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1)
-    ; GFX6-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]]
-    ; GFX6-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDE6]]
-    ; GFX6-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDO26]]
-    ; GFX6-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDE6]]
-    ; GFX6-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]]
-    ; GFX6-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1)
-    ; GFX6-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH12]]
-    ; GFX6-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1)
-    ; GFX6-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]]
-    ; GFX6-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD12]]
-    ; GFX6-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1)
-    ; GFX6-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT14]]
-    ; GFX6-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDE6]]
-    ; GFX6-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD14]]
-    ; GFX6-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[ADD15]](s32)
+    ; GFX6-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDO14]]
+    ; GFX6-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV20]], [[UADDE30]]
+    ; GFX6-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDO14]]
+    ; GFX6-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]]
+    ; GFX6-NEXT: [[UADDE32:%[0-9]+]]:_(s32), [[UADDE33:%[0-9]+]]:_(s1) = G_UADDE [[UADDO16]], [[UMULH10]], [[UADDO17]]
+    ; GFX6-NEXT: [[UADDE34:%[0-9]+]]:_(s32), [[UADDE35:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE33]]
+    ; GFX6-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDE30]]
+    ; GFX6-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDO14]]
+    ; GFX6-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDE30]]
+    ; GFX6-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]]
+    ; GFX6-NEXT: [[UADDE36:%[0-9]+]]:_(s32), [[UADDE37:%[0-9]+]]:_(s1) = G_UADDE [[UADDO18]], [[UMULH12]], [[UADDO19]]
+    ; GFX6-NEXT: [[UADDE38:%[0-9]+]]:_(s32), [[UADDE39:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE37]]
+    ; GFX6-NEXT: [[UADDE40:%[0-9]+]]:_(s32), [[UADDE41:%[0-9]+]]:_(s1) = G_UADDE [[UADDE36]], [[UADDE34]], [[UADDO19]]
+    ; GFX6-NEXT: [[UADDE42:%[0-9]+]]:_(s32), [[UADDE43:%[0-9]+]]:_(s1) = G_UADDE [[UADDE38]], [[C6]], [[UADDE41]]
+    ; GFX6-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDE30]]
+    ; GFX6-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[UADDE42]]
+    ; GFX6-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDE40]](s32), [[ADD6]](s32)
     ; GFX6-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64)
-    ; GFX6-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV22]], [[UADDO36]]
-    ; GFX6-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV23]], [[UADDO36]]
-    ; GFX6-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV22]], [[ADD15]]
-    ; GFX6-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV22]], [[UADDO36]]
-    ; GFX6-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]]
-    ; GFX6-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[UMULH14]]
+    ; GFX6-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV22]], [[UADDE40]]
+    ; GFX6-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV23]], [[UADDE40]]
+    ; GFX6-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV22]], [[ADD6]]
+    ; GFX6-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV22]], [[UADDE40]]
+    ; GFX6-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]]
+    ; GFX6-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ADD7]], [[UMULH14]]
     ; GFX6-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV18]], [[MUL15]]
-    ; GFX6-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV19]], [[ADD17]], [[USUBO3]]
-    ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV19]], [[ADD17]]
+    ; GFX6-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV19]], [[ADD8]], [[USUBO3]]
+    ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV19]], [[ADD8]]
     ; GFX6-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64)
     ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV25]]
     ; GFX6-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1)
@@ -1312,9 +1246,9 @@ body: |
     ; GFX6-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]]
     ; GFX6-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
     ; GFX6-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64)
-    ; GFX6-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[UADDO36]], [[UV26]]
-    ; GFX6-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[ADD15]], [[UV27]], [[UADDO39]]
-    ; GFX6-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO38]](s32), [[UADDE8]](s32)
+    ; GFX6-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDE40]], [[UV26]]
+    ; GFX6-NEXT: [[UADDE44:%[0-9]+]]:_(s32), [[UADDE45:%[0-9]+]]:_(s1) = G_UADDE [[ADD6]], [[UV27]], [[UADDO21]]
+    ; GFX6-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO20]](s32), [[UADDE44]](s32)
     ; GFX6-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV25]]
     ; GFX6-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1)
     ; GFX6-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV24]]
@@ -1322,9 +1256,9 @@ body: |
     ; GFX6-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV25]]
     ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]]
     ; GFX6-NEXT: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64)
-    ; GFX6-NEXT: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UADDO38]], [[UV28]]
-    ; GFX6-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE8]], [[UV29]], [[UADDO41]]
-    ; GFX6-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO40]](s32), [[UADDE10]](s32)
+    ; GFX6-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UV28]]
+    ; GFX6-NEXT: [[UADDE46:%[0-9]+]]:_(s32), [[UADDE47:%[0-9]+]]:_(s1) = G_UADDE [[UADDE44]], [[UV29]], [[UADDO23]]
+    ; GFX6-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO22]](s32), [[UADDE46]](s32)
     ; GFX6-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C6]]
     ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV4]], [[MV3]]
     ; GFX6-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C6]]
@@ -1340,14 +1274,14 @@ body: |
     ; GFX6-NEXT: [[ASHR3:%[0-9]+]]:_(s64) = G_ASHR [[UV3]], [[C]](s32)
     ; GFX6-NEXT: [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64)
     ; GFX6-NEXT: [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR2]](s64)
-    ; GFX6-NEXT: [[UADDO42:%[0-9]+]]:_(s32), [[UADDO43:%[0-9]+]]:_(s1) = G_UADDO [[UV34]], [[UV36]]
-    ; GFX6-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[UV35]], [[UV37]], [[UADDO43]]
-    ; GFX6-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO42]](s32), [[UADDE12]](s32)
+    ; GFX6-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UV34]], [[UV36]]
+    ; GFX6-NEXT: [[UADDE48:%[0-9]+]]:_(s32), [[UADDE49:%[0-9]+]]:_(s1) = G_UADDE [[UV35]], [[UV37]], [[UADDO25]]
+    ; GFX6-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO24]](s32), [[UADDE48]](s32)
     ; GFX6-NEXT: [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64)
     ; GFX6-NEXT: [[UV40:%[0-9]+]]:_(s32), [[UV41:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR3]](s64)
-    ; GFX6-NEXT: [[UADDO44:%[0-9]+]]:_(s32), [[UADDO45:%[0-9]+]]:_(s1) = G_UADDO [[UV38]], [[UV40]]
-    ; GFX6-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UV39]], [[UV41]], [[UADDO45]]
-    ; GFX6-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO44]](s32), [[UADDE14]](s32)
+    ; GFX6-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UV38]], [[UV40]]
+    ; GFX6-NEXT: [[UADDE50:%[0-9]+]]:_(s32), [[UADDE51:%[0-9]+]]:_(s1) = G_UADDE [[UV39]], [[UV41]], [[UADDO27]]
+    ; GFX6-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO26]](s32), [[UADDE50]](s32)
     ; GFX6-NEXT: [[XOR4:%[0-9]+]]:_(s64) = G_XOR [[MV6]], [[ASHR2]]
     ; GFX6-NEXT: [[XOR5:%[0-9]+]]:_(s64) = G_XOR [[MV7]], [[ASHR3]]
     ; GFX6-NEXT: [[UV42:%[0-9]+]]:_(s32), [[UV43:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR5]](s64)
@@ -1371,94 +1305,79 @@ body: |
     ; GFX6-NEXT: [[MUL19:%[0-9]+]]:_(s32) = G_MUL [[USUBE10]], [[FPTOUI2]]
     ; GFX6-NEXT: [[MUL20:%[0-9]+]]:_(s32) = G_MUL [[USUBO8]], [[FPTOUI3]]
     ; GFX6-NEXT: [[UMULH15:%[0-9]+]]:_(s32) = G_UMULH [[USUBO8]], [[FPTOUI2]]
-    ; GFX6-NEXT: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[MUL19]], [[MUL20]]
-    ; GFX6-NEXT: [[ADD19:%[0-9]+]]:_(s32) = G_ADD [[ADD18]], [[UMULH15]]
+    ; GFX6-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[MUL19]], [[MUL20]]
+    ; GFX6-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[UMULH15]]
     ; GFX6-NEXT: [[MUL21:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[MUL18]]
-    ; GFX6-NEXT: [[MUL22:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI2]], [[ADD19]]
+    ; GFX6-NEXT: [[MUL22:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI2]], [[ADD10]]
     ; GFX6-NEXT: [[UMULH16:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[MUL18]]
-    ; GFX6-NEXT: [[UADDO46:%[0-9]+]]:_(s32), [[UADDO47:%[0-9]+]]:_(s1) = G_UADDO [[MUL21]], [[MUL22]]
-    ; GFX6-NEXT: [[ZEXT15:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO47]](s1)
-    ; GFX6-NEXT: [[UADDO48:%[0-9]+]]:_(s32), [[UADDO49:%[0-9]+]]:_(s1) = G_UADDO [[UADDO46]], [[UMULH16]]
-    ; GFX6-NEXT: [[ZEXT16:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO49]](s1)
-    ; GFX6-NEXT: [[ADD20:%[0-9]+]]:_(s32) = G_ADD [[ZEXT15]], [[ZEXT16]]
-    ; GFX6-NEXT: [[MUL23:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[ADD19]]
+    ; GFX6-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL21]], [[MUL22]]
+    ; GFX6-NEXT: [[UADDE52:%[0-9]+]]:_(s32), [[UADDE53:%[0-9]+]]:_(s1) = G_UADDE [[UADDO28]], [[UMULH16]], [[UADDO29]]
+    ; GFX6-NEXT: [[UADDE54:%[0-9]+]]:_(s32), [[UADDE55:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE53]]
+    ; GFX6-NEXT: [[MUL23:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[ADD10]]
     ; GFX6-NEXT: [[UMULH17:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[MUL18]]
-    ; GFX6-NEXT: [[UMULH18:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[ADD19]]
-    ; GFX6-NEXT: [[UADDO50:%[0-9]+]]:_(s32), [[UADDO51:%[0-9]+]]:_(s1) = G_UADDO [[MUL23]], [[UMULH17]]
-    ; GFX6-NEXT: [[ZEXT17:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO51]](s1)
-    ; GFX6-NEXT: [[UADDO52:%[0-9]+]]:_(s32), [[UADDO53:%[0-9]+]]:_(s1) = G_UADDO [[UADDO50]], [[UMULH18]]
-    ; GFX6-NEXT: [[ZEXT18:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO53]](s1)
-    ; GFX6-NEXT: [[ADD21:%[0-9]+]]:_(s32) = G_ADD [[ZEXT17]], [[ZEXT18]]
-    ; GFX6-NEXT: [[UADDO54:%[0-9]+]]:_(s32), [[UADDO55:%[0-9]+]]:_(s1) = G_UADDO [[UADDO52]], [[ADD20]]
-    ; GFX6-NEXT: [[ZEXT19:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO55]](s1)
-    ; GFX6-NEXT: [[ADD22:%[0-9]+]]:_(s32) = G_ADD [[ADD21]], [[ZEXT19]]
-    ; GFX6-NEXT: [[UMULH19:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[ADD19]]
-    ; GFX6-NEXT: [[ADD23:%[0-9]+]]:_(s32) = G_ADD [[UMULH19]], [[ADD22]]
-    ; GFX6-NEXT: [[UADDO56:%[0-9]+]]:_(s32), [[UADDO57:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI2]], [[UADDO54]]
-    ; GFX6-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI3]], [[ADD23]], [[UADDO57]]
-    ; GFX6-NEXT: [[MUL24:%[0-9]+]]:_(s32) = G_MUL [[USUBO8]], [[UADDO56]]
-    ; GFX6-NEXT: [[MUL25:%[0-9]+]]:_(s32) = G_MUL [[USUBE10]], [[UADDO56]]
-    ; GFX6-NEXT: [[MUL26:%[0-9]+]]:_(s32) = G_MUL [[USUBO8]], [[UADDE16]]
-    ; GFX6-NEXT: [[UMULH20:%[0-9]+]]:_(s32) = G_UMULH [[USUBO8]], [[UADDO56]]
-    ; GFX6-NEXT: [[ADD24:%[0-9]+]]:_(s32) = G_ADD [[MUL25]], [[MUL26]]
-    ; GFX6-NEXT: [[ADD25:%[0-9]+]]:_(s32) = G_ADD [[ADD24]], [[UMULH20]]
-    ; GFX6-NEXT: [[MUL27:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[MUL24]]
-    ; GFX6-NEXT: [[MUL28:%[0-9]+]]:_(s32) = G_MUL [[UADDO56]], [[ADD25]]
-    ; GFX6-NEXT: [[UMULH21:%[0-9]+]]:_(s32) = G_UMULH [[UADDO56]], [[MUL24]]
-    ; GFX6-NEXT: [[UADDO58:%[0-9]+]]:_(s32), [[UADDO59:%[0-9]+]]:_(s1) = G_UADDO [[MUL27]], [[MUL28]]
-    ; GFX6-NEXT: [[ZEXT20:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO59]](s1)
-    ; GFX6-NEXT: [[UADDO60:%[0-9]+]]:_(s32), [[UADDO61:%[0-9]+]]:_(s1) = G_UADDO [[UADDO58]], [[UMULH21]]
-    ; GFX6-NEXT: [[ZEXT21:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO61]](s1)
-    ; GFX6-NEXT: [[ADD26:%[0-9]+]]:_(s32) = G_ADD [[ZEXT20]], [[ZEXT21]]
-    ; GFX6-NEXT: [[MUL29:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[ADD25]]
-    ; GFX6-NEXT: [[UMULH22:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[MUL24]]
-    ; GFX6-NEXT: [[UMULH23:%[0-9]+]]:_(s32) = G_UMULH [[UADDO56]], [[ADD25]]
-    ; GFX6-NEXT: [[UADDO62:%[0-9]+]]:_(s32), [[UADDO63:%[0-9]+]]:_(s1) = G_UADDO [[MUL29]], [[UMULH22]]
-    ; GFX6-NEXT: [[ZEXT22:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO63]](s1)
-    ; GFX6-NEXT: [[UADDO64:%[0-9]+]]:_(s32), [[UADDO65:%[0-9]+]]:_(s1) = G_UADDO [[UADDO62]], [[UMULH23]]
-    ; GFX6-NEXT: [[ZEXT23:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO65]](s1)
-    ; GFX6-NEXT: [[ADD27:%[0-9]+]]:_(s32) = G_ADD [[ZEXT22]], [[ZEXT23]]
-    ; GFX6-NEXT: [[UADDO66:%[0-9]+]]:_(s32), [[UADDO67:%[0-9]+]]:_(s1) = G_UADDO [[UADDO64]], [[ADD26]]
-    ; GFX6-NEXT: [[ZEXT24:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO67]](s1)
-    ; GFX6-NEXT: [[ADD28:%[0-9]+]]:_(s32) = G_ADD [[ADD27]], [[ZEXT24]]
-    ; GFX6-NEXT: [[UMULH24:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[ADD25]]
-    ; GFX6-NEXT: [[ADD29:%[0-9]+]]:_(s32) = G_ADD [[UMULH24]], [[ADD28]]
-    ; GFX6-NEXT: [[UADDO68:%[0-9]+]]:_(s32), [[UADDO69:%[0-9]+]]:_(s1) = G_UADDO [[UADDO56]], [[UADDO66]]
-    ; GFX6-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDE16]], [[ADD29]], [[UADDO69]]
+    ; GFX6-NEXT: [[UMULH18:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[ADD10]]
+    ; GFX6-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[MUL23]], [[UMULH17]]
+    ; GFX6-NEXT: [[UADDE56:%[0-9]+]]:_(s32), [[UADDE57:%[0-9]+]]:_(s1) = G_UADDE [[UADDO30]], [[UMULH18]], [[UADDO31]]
+    ; GFX6-NEXT: [[UADDE58:%[0-9]+]]:_(s32), [[UADDE59:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE57]]
+    ; GFX6-NEXT: [[UADDE60:%[0-9]+]]:_(s32), [[UADDE61:%[0-9]+]]:_(s1) = G_UADDE [[UADDE56]], [[UADDE54]], [[UADDO31]]
+    ; GFX6-NEXT: [[UADDE62:%[0-9]+]]:_(s32), [[UADDE63:%[0-9]+]]:_(s1) = G_UADDE [[UADDE58]], [[C6]], [[UADDE61]]
+    ; GFX6-NEXT: [[UMULH19:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[ADD10]]
+    ; GFX6-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH19]], [[UADDE62]]
+    ; GFX6-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI2]], [[UADDE60]]
+    ; GFX6-NEXT: [[UADDE64:%[0-9]+]]:_(s32), [[UADDE65:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI3]], [[ADD11]], [[UADDO33]]
+    ; GFX6-NEXT: [[MUL24:%[0-9]+]]:_(s32) = G_MUL [[USUBO8]], [[UADDO32]]
+    ; GFX6-NEXT: [[MUL25:%[0-9]+]]:_(s32) = G_MUL [[USUBE10]], [[UADDO32]]
+    ; GFX6-NEXT: [[MUL26:%[0-9]+]]:_(s32) = G_MUL [[USUBO8]], [[UADDE64]]
+    ; GFX6-NEXT: [[UMULH20:%[0-9]+]]:_(s32) = G_UMULH [[USUBO8]], [[UADDO32]]
+    ; GFX6-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[MUL25]], [[MUL26]]
+    ; GFX6-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ADD12]], [[UMULH20]]
+    ; GFX6-NEXT: [[MUL27:%[0-9]+]]:_(s32) = G_MUL [[UADDE64]], [[MUL24]]
+    ; GFX6-NEXT: [[MUL28:%[0-9]+]]:_(s32) = G_MUL [[UADDO32]], [[ADD13]]
+    ; GFX6-NEXT: [[UMULH21:%[0-9]+]]:_(s32) = G_UMULH [[UADDO32]], [[MUL24]]
+    ; GFX6-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[MUL27]], [[MUL28]]
+    ; GFX6-NEXT: [[UADDE66:%[0-9]+]]:_(s32), [[UADDE67:%[0-9]+]]:_(s1) = G_UADDE [[UADDO34]], [[UMULH21]], [[UADDO35]]
+    ; GFX6-NEXT: [[UADDE68:%[0-9]+]]:_(s32), [[UADDE69:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE67]]
+    ; GFX6-NEXT: [[MUL29:%[0-9]+]]:_(s32) = G_MUL [[UADDE64]], [[ADD13]]
+    ; GFX6-NEXT: [[UMULH22:%[0-9]+]]:_(s32) = G_UMULH [[UADDE64]], [[MUL24]]
+    ; GFX6-NEXT: [[UMULH23:%[0-9]+]]:_(s32) = G_UMULH [[UADDO32]], [[ADD13]]
+    ; GFX6-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[MUL29]], [[UMULH22]]
+    ; GFX6-NEXT: [[UADDE70:%[0-9]+]]:_(s32), [[UADDE71:%[0-9]+]]:_(s1) = G_UADDE [[UADDO36]], [[UMULH23]], [[UADDO37]]
+    ; GFX6-NEXT: [[UADDE72:%[0-9]+]]:_(s32), [[UADDE73:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE71]]
+    ; GFX6-NEXT: [[UADDE74:%[0-9]+]]:_(s32), [[UADDE75:%[0-9]+]]:_(s1) = G_UADDE [[UADDE70]], [[UADDE68]], [[UADDO37]]
+    ; GFX6-NEXT: [[UADDE76:%[0-9]+]]:_(s32), [[UADDE77:%[0-9]+]]:_(s1) = G_UADDE [[UADDE72]], [[C6]], [[UADDE75]]
+    ; GFX6-NEXT: [[UMULH24:%[0-9]+]]:_(s32) = G_UMULH [[UADDE64]], [[ADD13]]
+    ; GFX6-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[UMULH24]], [[UADDE76]]
+    ; GFX6-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UADDE74]]
+    ; GFX6-NEXT: [[UADDE78:%[0-9]+]]:_(s32), [[UADDE79:%[0-9]+]]:_(s1) = G_UADDE [[UADDE64]], [[ADD14]], [[UADDO39]]
     ; GFX6-NEXT: [[UV48:%[0-9]+]]:_(s32), [[UV49:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR4]](s64)
     ; GFX6-NEXT: [[UV50:%[0-9]+]]:_(s32), [[UV51:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR4]](s64)
-    ; GFX6-NEXT: [[MUL30:%[0-9]+]]:_(s32) = G_MUL [[UV51]], [[UADDO68]]
-    ; GFX6-NEXT: [[MUL31:%[0-9]+]]:_(s32) = G_MUL [[UV50]], [[UADDE18]]
-    ; GFX6-NEXT: [[UMULH25:%[0-9]+]]:_(s32) = G_UMULH [[UV50]], [[UADDO68]]
-    ; GFX6-NEXT: [[UADDO70:%[0-9]+]]:_(s32), [[UADDO71:%[0-9]+]]:_(s1) = G_UADDO [[MUL30]], [[MUL31]]
-    ; GFX6-NEXT: [[ZEXT25:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO71]](s1)
-    ; GFX6-NEXT: [[UADDO72:%[0-9]+]]:_(s32), [[UADDO73:%[0-9]+]]:_(s1) = G_UADDO [[UADDO70]], [[UMULH25]]
-    ; GFX6-NEXT: [[ZEXT26:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO73]](s1)
-    ; GFX6-NEXT: [[ADD30:%[0-9]+]]:_(s32) = G_ADD [[ZEXT25]], [[ZEXT26]]
-    ; GFX6-NEXT: [[MUL32:%[0-9]+]]:_(s32) = G_MUL [[UV51]], [[UADDE18]]
-    ; GFX6-NEXT: [[UMULH26:%[0-9]+]]:_(s32) = G_UMULH [[UV51]], [[UADDO68]]
-    ; GFX6-NEXT: [[UMULH27:%[0-9]+]]:_(s32) = G_UMULH [[UV50]], [[UADDE18]]
-    ; GFX6-NEXT: [[UADDO74:%[0-9]+]]:_(s32), [[UADDO75:%[0-9]+]]:_(s1) = G_UADDO [[MUL32]], [[UMULH26]]
-    ; GFX6-NEXT: [[ZEXT27:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO75]](s1)
-    ; GFX6-NEXT: [[UADDO76:%[0-9]+]]:_(s32), [[UADDO77:%[0-9]+]]:_(s1) = G_UADDO [[UADDO74]], [[UMULH27]]
-    ; GFX6-NEXT: [[ZEXT28:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO77]](s1)
-    ; GFX6-NEXT: [[ADD31:%[0-9]+]]:_(s32) = G_ADD [[ZEXT27]], [[ZEXT28]]
-    ; GFX6-NEXT: [[UADDO78:%[0-9]+]]:_(s32), [[UADDO79:%[0-9]+]]:_(s1) = G_UADDO [[UADDO76]], [[ADD30]]
-    ; GFX6-NEXT: [[ZEXT29:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO79]](s1)
-    ; GFX6-NEXT: [[ADD32:%[0-9]+]]:_(s32) = G_ADD [[ADD31]], [[ZEXT29]]
-    ; GFX6-NEXT: [[UMULH28:%[0-9]+]]:_(s32) = G_UMULH [[UV51]], [[UADDE18]]
-    ; GFX6-NEXT: [[ADD33:%[0-9]+]]:_(s32) = G_ADD [[UMULH28]], [[ADD32]]
-    ; GFX6-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO78]](s32), [[ADD33]](s32)
+    ; GFX6-NEXT: [[MUL30:%[0-9]+]]:_(s32) = G_MUL [[UV51]], [[UADDO38]]
+    ; GFX6-NEXT: [[MUL31:%[0-9]+]]:_(s32) = G_MUL [[UV50]], [[UADDE78]]
+    ; GFX6-NEXT: [[UMULH25:%[0-9]+]]:_(s32) = G_UMULH [[UV50]], [[UADDO38]]
+    ; GFX6-NEXT: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[MUL30]], [[MUL31]]
+    ; GFX6-NEXT: [[UADDE80:%[0-9]+]]:_(s32), [[UADDE81:%[0-9]+]]:_(s1) = G_UADDE [[UADDO40]], [[UMULH25]], [[UADDO41]]
+    ; GFX6-NEXT: [[UADDE82:%[0-9]+]]:_(s32), [[UADDE83:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE81]]
+    ; GFX6-NEXT: [[MUL32:%[0-9]+]]:_(s32) = G_MUL [[UV51]], [[UADDE78]]
+    ; GFX6-NEXT: [[UMULH26:%[0-9]+]]:_(s32) = G_UMULH [[UV51]], [[UADDO38]]
+    ; GFX6-NEXT: [[UMULH27:%[0-9]+]]:_(s32) = G_UMULH [[UV50]], [[UADDE78]]
+    ; GFX6-NEXT: [[UADDO42:%[0-9]+]]:_(s32), [[UADDO43:%[0-9]+]]:_(s1) = G_UADDO [[MUL32]], [[UMULH26]]
+    ; GFX6-NEXT: [[UADDE84:%[0-9]+]]:_(s32), [[UADDE85:%[0-9]+]]:_(s1) = G_UADDE [[UADDO42]], [[UMULH27]], [[UADDO43]]
+    ; GFX6-NEXT: [[UADDE86:%[0-9]+]]:_(s32), [[UADDE87:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE85]]
+    ; GFX6-NEXT: [[UADDE88:%[0-9]+]]:_(s32), [[UADDE89:%[0-9]+]]:_(s1) = G_UADDE [[UADDE84]], [[UADDE82]], [[UADDO43]]
+    ; GFX6-NEXT: [[UADDE90:%[0-9]+]]:_(s32), [[UADDE91:%[0-9]+]]:_(s1) = G_UADDE [[UADDE86]], [[C6]], [[UADDE89]]
+    ; GFX6-NEXT: [[UMULH28:%[0-9]+]]:_(s32) = G_UMULH [[UV51]], [[UADDE78]]
+    ; GFX6-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH28]], [[UADDE90]]
+    ; GFX6-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDE88]](s32), [[ADD15]](s32)
     ; GFX6-NEXT: [[UV52:%[0-9]+]]:_(s32), [[UV53:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR5]](s64)
-    ; GFX6-NEXT: [[MUL33:%[0-9]+]]:_(s32) = G_MUL [[UV52]], [[UADDO78]]
-    ; GFX6-NEXT: [[MUL34:%[0-9]+]]:_(s32) = G_MUL [[UV53]], [[UADDO78]]
-    ; GFX6-NEXT: [[MUL35:%[0-9]+]]:_(s32) = G_MUL [[UV52]], [[ADD33]]
-    ; GFX6-NEXT: [[UMULH29:%[0-9]+]]:_(s32) = G_UMULH [[UV52]], [[UADDO78]]
-    ; GFX6-NEXT: [[ADD34:%[0-9]+]]:_(s32) = G_ADD [[MUL34]], [[MUL35]]
-    ; GFX6-NEXT: [[ADD35:%[0-9]+]]:_(s32) = G_ADD [[ADD34]], [[UMULH29]]
+    ; GFX6-NEXT: [[MUL33:%[0-9]+]]:_(s32) = G_MUL [[UV52]], [[UADDE88]]
+    ; GFX6-NEXT: [[MUL34:%[0-9]+]]:_(s32) = G_MUL [[UV53]], [[UADDE88]]
+    ; GFX6-NEXT: [[MUL35:%[0-9]+]]:_(s32) = G_MUL [[UV52]], [[ADD15]]
+    ; GFX6-NEXT: [[UMULH29:%[0-9]+]]:_(s32) = G_UMULH [[UV52]], [[UADDE88]]
+    ; GFX6-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[MUL34]], [[MUL35]]
+    ; GFX6-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[UMULH29]]
     ; GFX6-NEXT: [[USUBO10:%[0-9]+]]:_(s32), [[USUBO11:%[0-9]+]]:_(s1) = G_USUBO [[UV48]], [[MUL33]]
-    ; GFX6-NEXT: [[USUBE12:%[0-9]+]]:_(s32), [[USUBE13:%[0-9]+]]:_(s1) = G_USUBE [[UV49]], [[ADD35]], [[USUBO11]]
-    ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[UV49]], [[ADD35]]
+    ; GFX6-NEXT: [[USUBE12:%[0-9]+]]:_(s32), [[USUBE13:%[0-9]+]]:_(s1) = G_USUBE [[UV49]], [[ADD17]], [[USUBO11]]
+    ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[UV49]], [[ADD17]]
     ; GFX6-NEXT: [[UV54:%[0-9]+]]:_(s32), [[UV55:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR5]](s64)
     ; GFX6-NEXT: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE12]](s32), [[UV55]]
     ; GFX6-NEXT: [[SEXT4:%[0-9]+]]:_(s32) = G_SEXT [[ICMP8]](s1)
@@ -1470,9 +1389,9 @@ body: |
     ; GFX6-NEXT: [[USUBE14:%[0-9]+]]:_(s32), [[USUBE15:%[0-9]+]]:_(s1) = G_USUBE [[SUB1]], [[UV55]], [[USUBO11]]
     ; GFX6-NEXT: [[USUBE16:%[0-9]+]]:_(s32), [[USUBE17:%[0-9]+]]:_(s1) = G_USUBE [[USUBE14]], [[C6]], [[USUBO13]]
     ; GFX6-NEXT: [[UV56:%[0-9]+]]:_(s32), [[UV57:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64)
-    ; GFX6-NEXT: [[UADDO80:%[0-9]+]]:_(s32), [[UADDO81:%[0-9]+]]:_(s1) = G_UADDO [[UADDO78]], [[UV56]]
-    ; GFX6-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[ADD33]], [[UV57]], [[UADDO81]]
-    ; GFX6-NEXT: [[MV9:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO80]](s32), [[UADDE20]](s32)
+    ; GFX6-NEXT: [[UADDO44:%[0-9]+]]:_(s32), [[UADDO45:%[0-9]+]]:_(s1) = G_UADDO [[UADDE88]], [[UV56]]
+    ; GFX6-NEXT: [[UADDE92:%[0-9]+]]:_(s32), [[UADDE93:%[0-9]+]]:_(s1) = G_UADDE [[ADD15]], [[UV57]], [[UADDO45]]
+    ; GFX6-NEXT: [[MV9:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO44]](s32), [[UADDE92]](s32)
     ; GFX6-NEXT: [[ICMP11:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE16]](s32), [[UV55]]
     ; GFX6-NEXT: [[SEXT6:%[0-9]+]]:_(s32) = G_SEXT [[ICMP11]](s1)
     ; GFX6-NEXT: [[ICMP12:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO12]](s32), [[UV54]]
@@ -1480,9 +1399,9 @@ body: |
     ; GFX6-NEXT: [[ICMP13:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE16]](s32), [[UV55]]
     ; GFX6-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP13]](s1), [[SEXT7]], [[SEXT6]]
     ; GFX6-NEXT: [[UV58:%[0-9]+]]:_(s32), [[UV59:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64)
-    ; GFX6-NEXT: [[UADDO82:%[0-9]+]]:_(s32), [[UADDO83:%[0-9]+]]:_(s1) = G_UADDO [[UADDO80]], [[UV58]]
-    ; GFX6-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDE20]], [[UV59]], [[UADDO83]]
-    ; GFX6-NEXT: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO82]](s32), [[UADDE22]](s32)
+    ; GFX6-NEXT: [[UADDO46:%[0-9]+]]:_(s32), [[UADDO47:%[0-9]+]]:_(s1) = G_UADDO [[UADDO44]], [[UV58]]
+    ; GFX6-NEXT: [[UADDE94:%[0-9]+]]:_(s32), [[UADDE95:%[0-9]+]]:_(s1) = G_UADDE [[UADDE92]], [[UV59]], [[UADDO47]]
+    ; GFX6-NEXT: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO46]](s32), [[UADDE94]](s32)
     ; GFX6-NEXT: [[ICMP14:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT5]](s32), [[C6]]
     ; GFX6-NEXT: [[SELECT6:%[0-9]+]]:_(s64) = G_SELECT [[ICMP14]](s1), [[MV10]], [[MV9]]
     ; GFX6-NEXT: [[ICMP15:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT4]](s32), [[C6]]
@@ -1496,6 +1415,7 @@ body: |
     ; GFX6-NEXT: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO14]](s32), [[USUBE18]](s32)
     ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV5]](s64), [[MV11]](s64)
     ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
+    ;
     ; GFX8-LABEL: name: test_sdiv_v2s64
     ; GFX8: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7
     ; GFX8-NEXT: {{  $}}
@@ -1546,89 +1466,74 @@ body: |
     ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI1]], [[ANYEXT]]
     ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[FPTOUI]], [[AMDGPU_MAD_U64_U32_2]]
     ; GFX8-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64)
+    ; GFX8-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
     ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV18]]
     ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[UV20]]
     ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV18]]
     ; GFX8-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]]
-    ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1)
-    ; GFX8-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH]]
-    ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1)
-    ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]]
+    ; GFX8-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDO4]], [[UMULH]], [[UADDO5]]
+    ; GFX8-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE5]]
     ; GFX8-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV20]]
     ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV18]]
     ; GFX8-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV20]]
-    ; GFX8-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]]
-    ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1)
-    ; GFX8-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH2]]
-    ; GFX8-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1)
-    ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]]
-    ; GFX8-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD]]
-    ; GFX8-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1)
-    ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]]
+    ; GFX8-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]]
+    ; GFX8-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDO6]], [[UMULH2]], [[UADDO7]]
+    ; GFX8-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE9]]
+    ; GFX8-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[UADDE8]], [[UADDE6]], [[UADDO7]]
+    ; GFX8-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDE10]], [[C6]], [[UADDE13]]
     ; GFX8-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV20]]
-    ; GFX8-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]]
-    ; GFX8-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO12]]
-    ; GFX8-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD3]], [[UADDO15]]
-    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO14]], [[C5]]
+    ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[UADDE14]]
+    ; GFX8-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDE12]]
+    ; GFX8-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD]], [[UADDO9]]
+    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO8]], [[C5]]
     ; GFX8-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](s64)
     ; GFX8-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[UV23]](s32)
-    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE4]], [[ANYEXT1]]
-    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO14]], [[AMDGPU_MAD_U64_U32_8]]
+    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE16]], [[ANYEXT1]]
+    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO8]], [[AMDGPU_MAD_U64_U32_8]]
     ; GFX8-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](s64)
-    ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[UV22]]
-    ; GFX8-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[UV24]]
-    ; GFX8-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[UV22]]
-    ; GFX8-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]]
-    ; GFX8-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1)
-    ; GFX8-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH4]]
-    ; GFX8-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1)
-    ; GFX8-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]]
-    ; GFX8-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[UV24]]
-    ; GFX8-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[UV22]]
-    ; GFX8-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[UV24]]
-    ; GFX8-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]]
-    ; GFX8-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1)
-    ; GFX8-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH6]]
-    ; GFX8-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO23]](s1)
-    ; GFX8-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]]
-    ; GFX8-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD4]]
-    ; GFX8-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1)
-    ; GFX8-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ADD5]], [[ZEXT9]]
-    ; GFX8-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[UV24]]
-    ; GFX8-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD6]]
-    ; GFX8-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-    ; GFX8-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UADDO24]]
-    ; GFX8-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[ADD7]], [[UADDO27]]
+    ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[UV22]]
+    ; GFX8-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO8]], [[UV24]]
+    ; GFX8-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO8]], [[UV22]]
+    ; GFX8-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]]
+    ; GFX8-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDO10]], [[UMULH4]], [[UADDO11]]
+    ; GFX8-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE19]]
+    ; GFX8-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[UV24]]
+    ; GFX8-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[UV22]]
+    ; GFX8-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO8]], [[UV24]]
+    ; GFX8-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]]
+    ; GFX8-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDO12]], [[UMULH6]], [[UADDO13]]
+    ; GFX8-NEXT: [[UADDE24:%[0-9]+]]:_(s32), [[UADDE25:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE23]]
+    ; GFX8-NEXT: [[UADDE26:%[0-9]+]]:_(s32), [[UADDE27:%[0-9]+]]:_(s1) = G_UADDE [[UADDE22]], [[UADDE20]], [[UADDO13]]
+    ; GFX8-NEXT: [[UADDE28:%[0-9]+]]:_(s32), [[UADDE29:%[0-9]+]]:_(s1) = G_UADDE [[UADDE24]], [[C6]], [[UADDE27]]
+    ; GFX8-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[UV24]]
+    ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[UADDE28]]
+    ; GFX8-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UADDE26]]
+    ; GFX8-NEXT: [[UADDE30:%[0-9]+]]:_(s32), [[UADDE31:%[0-9]+]]:_(s1) = G_UADDE [[UADDE16]], [[ADD1]], [[UADDO15]]
     ; GFX8-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64)
     ; GFX8-NEXT: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64)
-    ; GFX8-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV29]], [[UADDO26]]
-    ; GFX8-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV28]], [[UADDE6]]
-    ; GFX8-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV28]], [[UADDO26]]
-    ; GFX8-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]]
-    ; GFX8-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1)
-    ; GFX8-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH8]]
-    ; GFX8-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1)
-    ; GFX8-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]]
-    ; GFX8-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV29]], [[UADDE6]]
-    ; GFX8-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV29]], [[UADDO26]]
-    ; GFX8-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV28]], [[UADDE6]]
-    ; GFX8-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]]
-    ; GFX8-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1)
-    ; GFX8-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH10]]
-    ; GFX8-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1)
-    ; GFX8-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]]
-    ; GFX8-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD8]]
-    ; GFX8-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1)
-    ; GFX8-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT14]]
-    ; GFX8-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV29]], [[UADDE6]]
-    ; GFX8-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[ADD10]]
-    ; GFX8-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[ADD11]](s32)
+    ; GFX8-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV29]], [[UADDO14]]
+    ; GFX8-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV28]], [[UADDE30]]
+    ; GFX8-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV28]], [[UADDO14]]
+    ; GFX8-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]]
+    ; GFX8-NEXT: [[UADDE32:%[0-9]+]]:_(s32), [[UADDE33:%[0-9]+]]:_(s1) = G_UADDE [[UADDO16]], [[UMULH8]], [[UADDO17]]
+    ; GFX8-NEXT: [[UADDE34:%[0-9]+]]:_(s32), [[UADDE35:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE33]]
+    ; GFX8-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV29]], [[UADDE30]]
+    ; GFX8-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV29]], [[UADDO14]]
+    ; GFX8-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV28]], [[UADDE30]]
+    ; GFX8-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]]
+    ; GFX8-NEXT: [[UADDE36:%[0-9]+]]:_(s32), [[UADDE37:%[0-9]+]]:_(s1) = G_UADDE [[UADDO18]], [[UMULH10]], [[UADDO19]]
+    ; GFX8-NEXT: [[UADDE38:%[0-9]+]]:_(s32), [[UADDE39:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE37]]
+    ; GFX8-NEXT: [[UADDE40:%[0-9]+]]:_(s32), [[UADDE41:%[0-9]+]]:_(s1) = G_UADDE [[UADDE36]], [[UADDE34]], [[UADDO19]]
+    ; GFX8-NEXT: [[UADDE42:%[0-9]+]]:_(s32), [[UADDE43:%[0-9]+]]:_(s1) = G_UADDE [[UADDE38]], [[C6]], [[UADDE41]]
+    ; GFX8-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV29]], [[UADDE30]]
+    ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[UADDE42]]
+    ; GFX8-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDE40]](s32), [[ADD2]](s32)
     ; GFX8-NEXT: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64)
-    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV30]](s32), [[UADDO36]], [[C5]]
+    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV30]](s32), [[UADDE40]], [[C5]]
     ; GFX8-NEXT: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_12]](s64)
     ; GFX8-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[UV33]](s32)
-    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV30]](s32), [[ADD11]], [[ANYEXT2]]
-    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV31]](s32), [[UADDO36]], [[AMDGPU_MAD_U64_U32_14]]
+    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV30]](s32), [[ADD2]], [[ANYEXT2]]
+    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV31]](s32), [[UADDE40]], [[AMDGPU_MAD_U64_U32_14]]
     ; GFX8-NEXT: [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_16]](s64)
     ; GFX8-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV26]], [[UV32]]
     ; GFX8-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV27]], [[UV34]], [[USUBO3]]
@@ -1645,9 +1550,9 @@ body: |
     ; GFX8-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]]
     ; GFX8-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
     ; GFX8-NEXT: [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64)
-    ; GFX8-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[UADDO36]], [[UV38]]
-    ; GFX8-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[ADD11]], [[UV39]], [[UADDO39]]
-    ; GFX8-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO38]](s32), [[UADDE8]](s32)
+    ; GFX8-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDE40]], [[UV38]]
+    ; GFX8-NEXT: [[UADDE44:%[0-9]+]]:_(s32), [[UADDE45:%[0-9]+]]:_(s1) = G_UADDE [[ADD2]], [[UV39]], [[UADDO21]]
+    ; GFX8-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO20]](s32), [[UADDE44]](s32)
     ; GFX8-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV37]]
     ; GFX8-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1)
     ; GFX8-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV36]]
@@ -1655,9 +1560,9 @@ body: |
     ; GFX8-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV37]]
     ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]]
     ; GFX8-NEXT: [[UV40:%[0-9]+]]:_(s32), [[UV41:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64)
-    ; GFX8-NEXT: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UADDO38]], [[UV40]]
-    ; GFX8-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE8]], [[UV41]], [[UADDO41]]
-    ; GFX8-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO40]](s32), [[UADDE10]](s32)
+    ; GFX8-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UV40]]
+    ; GFX8-NEXT: [[UADDE46:%[0-9]+]]:_(s32), [[UADDE47:%[0-9]+]]:_(s1) = G_UADDE [[UADDE44]], [[UV41]], [[UADDO23]]
+    ; GFX8-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO22]](s32), [[UADDE46]](s32)
     ; GFX8-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C6]]
     ; GFX8-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV4]], [[MV3]]
     ; GFX8-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C6]]
@@ -1673,14 +1578,14 @@ body: |
     ; GFX8-NEXT: [[ASHR3:%[0-9]+]]:_(s64) = G_ASHR [[UV3]], [[C]](s32)
     ; GFX8-NEXT: [[UV46:%[0-9]+]]:_(s32), [[UV47:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64)
     ; GFX8-NEXT: [[UV48:%[0-9]+]]:_(s32), [[UV49:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR2]](s64)
-    ; GFX8-NEXT: [[UADDO42:%[0-9]+]]:_(s32), [[UADDO43:%[0-9]+]]:_(s1) = G_UADDO [[UV46]], [[UV48]]
-    ; GFX8-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[UV47]], [[UV49]], [[UADDO43]]
-    ; GFX8-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO42]](s32), [[UADDE12]](s32)
+    ; GFX8-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UV46]], [[UV48]]
+    ; GFX8-NEXT: [[UADDE48:%[0-9]+]]:_(s32), [[UADDE49:%[0-9]+]]:_(s1) = G_UADDE [[UV47]], [[UV49]], [[UADDO25]]
+    ; GFX8-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO24]](s32), [[UADDE48]](s32)
     ; GFX8-NEXT: [[UV50:%[0-9]+]]:_(s32), [[UV51:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64)
     ; GFX8-NEXT: [[UV52:%[0-9]+]]:_(s32), [[UV53:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR3]](s64)
-    ; GFX8-NEXT: [[UADDO44:%[0-9]+]]:_(s32), [[UADDO45:%[0-9]+]]:_(s1) = G_UADDO [[UV50]], [[UV52]]
-    ; GFX8-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UV51]], [[UV53]], [[UADDO45]]
-    ; GFX8-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO44]](s32), [[UADDE14]](s32)
+    ; GFX8-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UV50]], [[UV52]]
+    ; GFX8-NEXT: [[UADDE50:%[0-9]+]]:_(s32), [[UADDE51:%[0-9]+]]:_(s1) = G_UADDE [[UV51]], [[UV53]], [[UADDO27]]
+    ; GFX8-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO26]](s32), [[UADDE50]](s32)
     ; GFX8-NEXT: [[XOR4:%[0-9]+]]:_(s64) = G_XOR [[MV6]], [[ASHR2]]
     ; GFX8-NEXT: [[XOR5:%[0-9]+]]:_(s64) = G_XOR [[MV7]], [[ASHR3]]
     ; GFX8-NEXT: [[UV54:%[0-9]+]]:_(s32), [[UV55:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR5]](s64)
@@ -1709,85 +1614,70 @@ body: |
     ; GFX8-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[UV60]]
     ; GFX8-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI2]], [[UV62]]
     ; GFX8-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[UV60]]
-    ; GFX8-NEXT: [[UADDO46:%[0-9]+]]:_(s32), [[UADDO47:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]]
-    ; GFX8-NEXT: [[ZEXT15:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO47]](s1)
-    ; GFX8-NEXT: [[UADDO48:%[0-9]+]]:_(s32), [[UADDO49:%[0-9]+]]:_(s1) = G_UADDO [[UADDO46]], [[UMULH12]]
-    ; GFX8-NEXT: [[ZEXT16:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO49]](s1)
-    ; GFX8-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT15]], [[ZEXT16]]
+    ; GFX8-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]]
+    ; GFX8-NEXT: [[UADDE52:%[0-9]+]]:_(s32), [[UADDE53:%[0-9]+]]:_(s1) = G_UADDE [[UADDO28]], [[UMULH12]], [[UADDO29]]
+    ; GFX8-NEXT: [[UADDE54:%[0-9]+]]:_(s32), [[UADDE55:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE53]]
     ; GFX8-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[UV62]]
     ; GFX8-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[UV60]]
     ; GFX8-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[UV62]]
-    ; GFX8-NEXT: [[UADDO50:%[0-9]+]]:_(s32), [[UADDO51:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH13]]
-    ; GFX8-NEXT: [[ZEXT17:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO51]](s1)
-    ; GFX8-NEXT: [[UADDO52:%[0-9]+]]:_(s32), [[UADDO53:%[0-9]+]]:_(s1) = G_UADDO [[UADDO50]], [[UMULH14]]
-    ; GFX8-NEXT: [[ZEXT18:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO53]](s1)
-    ; GFX8-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT17]], [[ZEXT18]]
-    ; GFX8-NEXT: [[UADDO54:%[0-9]+]]:_(s32), [[UADDO55:%[0-9]+]]:_(s1) = G_UADDO [[UADDO52]], [[ADD12]]
-    ; GFX8-NEXT: [[ZEXT19:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO55]](s1)
-    ; GFX8-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT19]]
+    ; GFX8-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH13]]
+    ; GFX8-NEXT: [[UADDE56:%[0-9]+]]:_(s32), [[UADDE57:%[0-9]+]]:_(s1) = G_UADDE [[UADDO30]], [[UMULH14]], [[UADDO31]]
+    ; GFX8-NEXT: [[UADDE58:%[0-9]+]]:_(s32), [[UADDE59:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE57]]
+    ; GFX8-NEXT: [[UADDE60:%[0-9]+]]:_(s32), [[UADDE61:%[0-9]+]]:_(s1) = G_UADDE [[UADDE56]], [[UADDE54]], [[UADDO31]]
+    ; GFX8-NEXT: [[UADDE62:%[0-9]+]]:_(s32), [[UADDE63:%[0-9]+]]:_(s1) = G_UADDE [[UADDE58]], [[C6]], [[UADDE61]]
     ; GFX8-NEXT: [[UMULH15:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[UV62]]
-    ; GFX8-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH15]], [[ADD14]]
-    ; GFX8-NEXT: [[UADDO56:%[0-9]+]]:_(s32), [[UADDO57:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI2]], [[UADDO54]]
-    ; GFX8-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI3]], [[ADD15]], [[UADDO57]]
-    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_24:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_25:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO8]](s32), [[UADDO56]], [[C5]]
+    ; GFX8-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH15]], [[UADDE62]]
+    ; GFX8-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI2]], [[UADDE60]]
+    ; GFX8-NEXT: [[UADDE64:%[0-9]+]]:_(s32), [[UADDE65:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI3]], [[ADD3]], [[UADDO33]]
+    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_24:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_25:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO8]](s32), [[UADDO32]], [[C5]]
     ; GFX8-NEXT: [[UV64:%[0-9]+]]:_(s32), [[UV65:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_24]](s64)
     ; GFX8-NEXT: [[ANYEXT4:%[0-9]+]]:_(s64) = G_ANYEXT [[UV65]](s32)
-    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_26:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_27:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO8]](s32), [[UADDE16]], [[ANYEXT4]]
-    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_28:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_29:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE10]](s32), [[UADDO56]], [[AMDGPU_MAD_U64_U32_26]]
+    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_26:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_27:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO8]](s32), [[UADDE64]], [[ANYEXT4]]
+    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_28:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_29:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE10]](s32), [[UADDO32]], [[AMDGPU_MAD_U64_U32_26]]
     ; GFX8-NEXT: [[UV66:%[0-9]+]]:_(s32), [[UV67:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_28]](s64)
-    ; GFX8-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[UV64]]
-    ; GFX8-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UADDO56]], [[UV66]]
-    ; GFX8-NEXT: [[UMULH16:%[0-9]+]]:_(s32) = G_UMULH [[UADDO56]], [[UV64]]
-    ; GFX8-NEXT: [[UADDO58:%[0-9]+]]:_(s32), [[UADDO59:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]]
-    ; GFX8-NEXT: [[ZEXT20:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO59]](s1)
-    ; GFX8-NEXT: [[UADDO60:%[0-9]+]]:_(s32), [[UADDO61:%[0-9]+]]:_(s1) = G_UADDO [[UADDO58]], [[UMULH16]]
-    ; GFX8-NEXT: [[ZEXT21:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO61]](s1)
-    ; GFX8-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[ZEXT20]], [[ZEXT21]]
-    ; GFX8-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[UV66]]
-    ; GFX8-NEXT: [[UMULH17:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[UV64]]
-    ; GFX8-NEXT: [[UMULH18:%[0-9]+]]:_(s32) = G_UMULH [[UADDO56]], [[UV66]]
-    ; GFX8-NEXT: [[UADDO62:%[0-9]+]]:_(s32), [[UADDO63:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH17]]
-    ; GFX8-NEXT: [[ZEXT22:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO63]](s1)
-    ; GFX8-NEXT: [[UADDO64:%[0-9]+]]:_(s32), [[UADDO65:%[0-9]+]]:_(s1) = G_UADDO [[UADDO62]], [[UMULH18]]
-    ; GFX8-NEXT: [[ZEXT23:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO65]](s1)
-    ; GFX8-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ZEXT22]], [[ZEXT23]]
-    ; GFX8-NEXT: [[UADDO66:%[0-9]+]]:_(s32), [[UADDO67:%[0-9]+]]:_(s1) = G_UADDO [[UADDO64]], [[ADD16]]
-    ; GFX8-NEXT: [[ZEXT24:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO67]](s1)
-    ; GFX8-NEXT: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[ADD17]], [[ZEXT24]]
-    ; GFX8-NEXT: [[UMULH19:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[UV66]]
-    ; GFX8-NEXT: [[ADD19:%[0-9]+]]:_(s32) = G_ADD [[UMULH19]], [[ADD18]]
-    ; GFX8-NEXT: [[UADDO68:%[0-9]+]]:_(s32), [[UADDO69:%[0-9]+]]:_(s1) = G_UADDO [[UADDO56]], [[UADDO66]]
-    ; GFX8-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDE16]], [[ADD19]], [[UADDO69]]
+    ; GFX8-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UADDE64]], [[UV64]]
+    ; GFX8-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UADDO32]], [[UV66]]
+    ; GFX8-NEXT: [[UMULH16:%[0-9]+]]:_(s32) = G_UMULH [[UADDO32]], [[UV64]]
+    ; GFX8-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]]
+    ; GFX8-NEXT: [[UADDE66:%[0-9]+]]:_(s32), [[UADDE67:%[0-9]+]]:_(s1) = G_UADDE [[UADDO34]], [[UMULH16]], [[UADDO35]]
+    ; GFX8-NEXT: [[UADDE68:%[0-9]+]]:_(s32), [[UADDE69:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE67]]
+    ; GFX8-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UADDE64]], [[UV66]]
+    ; GFX8-NEXT: [[UMULH17:%[0-9]+]]:_(s32) = G_UMULH [[UADDE64]], [[UV64]]
+    ; GFX8-NEXT: [[UMULH18:%[0-9]+]]:_(s32) = G_UMULH [[UADDO32]], [[UV66]]
+    ; GFX8-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH17]]
+    ; GFX8-NEXT: [[UADDE70:%[0-9]+]]:_(s32), [[UADDE71:%[0-9]+]]:_(s1) = G_UADDE [[UADDO36]], [[UMULH18]], [[UADDO37]]
+    ; GFX8-NEXT: [[UADDE72:%[0-9]+]]:_(s32), [[UADDE73:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE71]]
+    ; GFX8-NEXT: [[UADDE74:%[0-9]+]]:_(s32), [[UADDE75:%[0-9]+]]:_(s1) = G_UADDE [[UADDE70]], [[UADDE68]], [[UADDO37]]
+    ; GFX8-NEXT: [[UADDE76:%[0-9]+]]:_(s32), [[UADDE77:%[0-9]+]]:_(s1) = G_UADDE [[UADDE72]], [[C6]], [[UADDE75]]
+    ; GFX8-NEXT: [[UMULH19:%[0-9]+]]:_(s32) = G_UMULH [[UADDE64]], [[UV66]]
+    ; GFX8-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[UMULH19]], [[UADDE76]]
+    ; GFX8-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UADDE74]]
+    ; GFX8-NEXT: [[UADDE78:%[0-9]+]]:_(s32), [[UADDE79:%[0-9]+]]:_(s1) = G_UADDE [[UADDE64]], [[ADD4]], [[UADDO39]]
     ; GFX8-NEXT: [[UV68:%[0-9]+]]:_(s32), [[UV69:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR4]](s64)
     ; GFX8-NEXT: [[UV70:%[0-9]+]]:_(s32), [[UV71:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR4]](s64)
-    ; GFX8-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV71]], [[UADDO68]]
-    ; GFX8-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV70]], [[UADDE18]]
-    ; GFX8-NEXT: [[UMULH20:%[0-9]+]]:_(s32) = G_UMULH [[UV70]], [[UADDO68]]
-    ; GFX8-NEXT: [[UADDO70:%[0-9]+]]:_(s32), [[UADDO71:%[0-9]+]]:_(s1) = G_UADDO [[MUL15]], [[MUL16]]
-    ; GFX8-NEXT: [[ZEXT25:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO71]](s1)
-    ; GFX8-NEXT: [[UADDO72:%[0-9]+]]:_(s32), [[UADDO73:%[0-9]+]]:_(s1) = G_UADDO [[UADDO70]], [[UMULH20]]
-    ; GFX8-NEXT: [[ZEXT26:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO73]](s1)
-    ; GFX8-NEXT: [[ADD20:%[0-9]+]]:_(s32) = G_ADD [[ZEXT25]], [[ZEXT26]]
-    ; GFX8-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV71]], [[UADDE18]]
-    ; GFX8-NEXT: [[UMULH21:%[0-9]+]]:_(s32) = G_UMULH [[UV71]], [[UADDO68]]
-    ; GFX8-NEXT: [[UMULH22:%[0-9]+]]:_(s32) = G_UMULH [[UV70]], [[UADDE18]]
-    ; GFX8-NEXT: [[UADDO74:%[0-9]+]]:_(s32), [[UADDO75:%[0-9]+]]:_(s1) = G_UADDO [[MUL17]], [[UMULH21]]
-    ; GFX8-NEXT: [[ZEXT27:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO75]](s1)
-    ; GFX8-NEXT: [[UADDO76:%[0-9]+]]:_(s32), [[UADDO77:%[0-9]+]]:_(s1) = G_UADDO [[UADDO74]], [[UMULH22]]
-    ; GFX8-NEXT: [[ZEXT28:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO77]](s1)
-    ; GFX8-NEXT: [[ADD21:%[0-9]+]]:_(s32) = G_ADD [[ZEXT27]], [[ZEXT28]]
-    ; GFX8-NEXT: [[UADDO78:%[0-9]+]]:_(s32), [[UADDO79:%[0-9]+]]:_(s1) = G_UADDO [[UADDO76]], [[ADD20]]
-    ; GFX8-NEXT: [[ZEXT29:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO79]](s1)
-    ; GFX8-NEXT: [[ADD22:%[0-9]+]]:_(s32) = G_ADD [[ADD21]], [[ZEXT29]]
-    ; GFX8-NEXT: [[UMULH23:%[0-9]+]]:_(s32) = G_UMULH [[UV71]], [[UADDE18]]
-    ; GFX8-NEXT: [[ADD23:%[0-9]+]]:_(s32) = G_ADD [[UMULH23]], [[ADD22]]
-    ; GFX8-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO78]](s32), [[ADD23]](s32)
+    ; GFX8-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV71]], [[UADDO38]]
+    ; GFX8-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV70]], [[UADDE78]]
+    ; GFX8-NEXT: [[UMULH20:%[0-9]+]]:_(s32) = G_UMULH [[UV70]], [[UADDO38]]
+    ; GFX8-NEXT: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[MUL15]], [[MUL16]]
+    ; GFX8-NEXT: [[UADDE80:%[0-9]+]]:_(s32), [[UADDE81:%[0-9]+]]:_(s1) = G_UADDE [[UADDO40]], [[UMULH20]], [[UADDO41]]
+    ; GFX8-NEXT: [[UADDE82:%[0-9]+]]:_(s32), [[UADDE83:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE81]]
+    ; GFX8-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV71]], [[UADDE78]]
+    ; GFX8-NEXT: [[UMULH21:%[0-9]+]]:_(s32) = G_UMULH [[UV71]], [[UADDO38]]
+    ; GFX8-NEXT: [[UMULH22:%[0-9]+]]:_(s32) = G_UMULH [[UV70]], [[UADDE78]]
+    ; GFX8-NEXT: [[UADDO42:%[0-9]+]]:_(s32), [[UADDO43:%[0-9]+]]:_(s1) = G_UADDO [[MUL17]], [[UMULH21]]
+    ; GFX8-NEXT: [[UADDE84:%[0-9]+]]:_(s32), [[UADDE85:%[0-9]+]]:_(s1) = G_UADDE [[UADDO42]], [[UMULH22]], [[UADDO43]]
+    ; GFX8-NEXT: [[UADDE86:%[0-9]+]]:_(s32), [[UADDE87:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE85]]
+    ; GFX8-NEXT: [[UADDE88:%[0-9]+]]:_(s32), [[UADDE89:%[0-9]+]]:_(s1) = G_UADDE [[UADDE84]], [[UADDE82]], [[UADDO43]]
+    ; GFX8-NEXT: [[UADDE90:%[0-9]+]]:_(s32), [[UADDE91:%[0-9]+]]:_(s1) = G_UADDE [[UADDE86]], [[C6]], [[UADDE89]]
+    ; GFX8-NEXT: [[UMULH23:%[0-9]+]]:_(s32) = G_UMULH [[UV71]], [[UADDE78]]
+    ; GFX8-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH23]], [[UADDE90]]
+    ; GFX8-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDE88]](s32), [[ADD5]](s32)
     ; GFX8-NEXT: [[UV72:%[0-9]+]]:_(s32), [[UV73:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR5]](s64)
-    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_30:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_31:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV72]](s32), [[UADDO78]], [[C5]]
+    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_30:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_31:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV72]](s32), [[UADDE88]], [[C5]]
     ; GFX8-NEXT: [[UV74:%[0-9]+]]:_(s32), [[UV75:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_30]](s64)
     ; GFX8-NEXT: [[ANYEXT5:%[0-9]+]]:_(s64) = G_ANYEXT [[UV75]](s32)
-    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_32:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_33:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV72]](s32), [[ADD23]], [[ANYEXT5]]
-    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_34:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_35:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV73]](s32), [[UADDO78]], [[AMDGPU_MAD_U64_U32_32]]
+    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_32:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_33:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV72]](s32), [[ADD5]], [[ANYEXT5]]
+    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_34:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_35:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV73]](s32), [[UADDE88]], [[AMDGPU_MAD_U64_U32_32]]
     ; GFX8-NEXT: [[UV76:%[0-9]+]]:_(s32), [[UV77:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_34]](s64)
     ; GFX8-NEXT: [[USUBO10:%[0-9]+]]:_(s32), [[USUBO11:%[0-9]+]]:_(s1) = G_USUBO [[UV68]], [[UV74]]
     ; GFX8-NEXT: [[USUBE12:%[0-9]+]]:_(s32), [[USUBE13:%[0-9]+]]:_(s1) = G_USUBE [[UV69]], [[UV76]], [[USUBO11]]
@@ -1803,9 +1693,9 @@ body: |
     ; GFX8-NEXT: [[USUBE14:%[0-9]+]]:_(s32), [[USUBE15:%[0-9]+]]:_(s1) = G_USUBE [[SUB1]], [[UV79]], [[USUBO11]]
     ; GFX8-NEXT: [[USUBE16:%[0-9]+]]:_(s32), [[USUBE17:%[0-9]+]]:_(s1) = G_USUBE [[USUBE14]], [[C6]], [[USUBO13]]
     ; GFX8-NEXT: [[UV80:%[0-9]+]]:_(s32), [[UV81:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64)
-    ; GFX8-NEXT: [[UADDO80:%[0-9]+]]:_(s32), [[UADDO81:%[0-9]+]]:_(s1) = G_UADDO [[UADDO78]], [[UV80]]
-    ; GFX8-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[ADD23]], [[UV81]], [[UADDO81]]
-    ; GFX8-NEXT: [[MV9:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO80]](s32), [[UADDE20]](s32)
+    ; GFX8-NEXT: [[UADDO44:%[0-9]+]]:_(s32), [[UADDO45:%[0-9]+]]:_(s1) = G_UADDO [[UADDE88]], [[UV80]]
+    ; GFX8-NEXT: [[UADDE92:%[0-9]+]]:_(s32), [[UADDE93:%[0-9]+]]:_(s1) = G_UADDE [[ADD5]], [[UV81]], [[UADDO45]]
+    ; GFX8-NEXT: [[MV9:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO44]](s32), [[UADDE92]](s32)
     ; GFX8-NEXT: [[ICMP11:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE16]](s32), [[UV79]]
     ; GFX8-NEXT: [[SEXT6:%[0-9]+]]:_(s32) = G_SEXT [[ICMP11]](s1)
     ; GFX8-NEXT: [[ICMP12:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO12]](s32), [[UV78]]
@@ -1813,9 +1703,9 @@ body: |
     ; GFX8-NEXT: [[ICMP13:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE16]](s32), [[UV79]]
     ; GFX8-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP13]](s1), [[SEXT7]], [[SEXT6]]
     ; GFX8-NEXT: [[UV82:%[0-9]+]]:_(s32), [[UV83:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64)
-    ; GFX8-NEXT: [[UADDO82:%[0-9]+]]:_(s32), [[UADDO83:%[0-9]+]]:_(s1) = G_UADDO [[UADDO80]], [[UV82]]
-    ; GFX8-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDE20]], [[UV83]], [[UADDO83]]
-    ; GFX8-NEXT: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO82]](s32), [[UADDE22]](s32)
+    ; GFX8-NEXT: [[UADDO46:%[0-9]+]]:_(s32), [[UADDO47:%[0-9]+]]:_(s1) = G_UADDO [[UADDO44]], [[UV82]]
+    ; GFX8-NEXT: [[UADDE94:%[0-9]+]]:_(s32), [[UADDE95:%[0-9]+]]:_(s1) = G_UADDE [[UADDE92]], [[UV83]], [[UADDO47]]
+    ; GFX8-NEXT: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO46]](s32), [[UADDE94]](s32)
     ; GFX8-NEXT: [[ICMP14:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT5]](s32), [[C6]]
     ; GFX8-NEXT: [[SELECT6:%[0-9]+]]:_(s64) = G_SELECT [[ICMP14]](s1), [[MV10]], [[MV9]]
     ; GFX8-NEXT: [[ICMP15:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT4]](s32), [[C6]]
@@ -1829,6 +1719,7 @@ body: |
     ; GFX8-NEXT: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO14]](s32), [[USUBE18]](s32)
     ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV5]](s64), [[MV11]](s64)
     ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
+    ;
     ; GFX9-LABEL: name: test_sdiv_v2s64
     ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7
     ; GFX9-NEXT: {{  $}}
@@ -1879,89 +1770,74 @@ body: |
     ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI1]], [[ANYEXT]]
     ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[FPTOUI]], [[AMDGPU_MAD_U64_U32_2]]
     ; GFX9-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64)
+    ; GFX9-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
     ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV18]]
     ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[UV20]]
     ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV18]]
     ; GFX9-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]]
-    ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1)
-    ; GFX9-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH]]
-    ; GFX9-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1)
-    ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]]
+    ; GFX9-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDO4]], [[UMULH]], [[UADDO5]]
+    ; GFX9-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE5]]
     ; GFX9-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV20]]
     ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV18]]
     ; GFX9-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV20]]
-    ; GFX9-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]]
-    ; GFX9-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1)
-    ; GFX9-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH2]]
-    ; GFX9-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1)
-    ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]]
-    ; GFX9-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD]]
-    ; GFX9-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1)
-    ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]]
+    ; GFX9-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]]
+    ; GFX9-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDO6]], [[UMULH2]], [[UADDO7]]
+    ; GFX9-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE9]]
+    ; GFX9-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[UADDE8]], [[UADDE6]], [[UADDO7]]
+    ; GFX9-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDE10]], [[C6]], [[UADDE13]]
     ; GFX9-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV20]]
-    ; GFX9-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]]
-    ; GFX9-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO12]]
-    ; GFX9-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD3]], [[UADDO15]]
-    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO14]], [[C5]]
+    ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[UADDE14]]
+    ; GFX9-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDE12]]
+    ; GFX9-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD]], [[UADDO9]]
+    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO8]], [[C5]]
     ; GFX9-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](s64)
     ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[UV23]](s32)
-    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE4]], [[ANYEXT1]]
-    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO14]], [[AMDGPU_MAD_U64_U32_8]]
+    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE16]], [[ANYEXT1]]
+    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO8]], [[AMDGPU_MAD_U64_U32_8]]
     ; GFX9-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](s64)
-    ; GFX9-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[UV22]]
-    ; GFX9-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[UV24]]
-    ; GFX9-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[UV22]]
-    ; GFX9-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]]
-    ; GFX9-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1)
-    ; GFX9-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH4]]
-    ; GFX9-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1)
-    ; GFX9-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]]
-    ; GFX9-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[UV24]]
-    ; GFX9-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[UV22]]
-    ; GFX9-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[UV24]]
-    ; GFX9-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]]
-    ; GFX9-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1)
-    ; GFX9-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH6]]
-    ; GFX9-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO23]](s1)
-    ; GFX9-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]]
-    ; GFX9-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD4]]
-    ; GFX9-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1)
-    ; GFX9-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ADD5]], [[ZEXT9]]
-    ; GFX9-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[UV24]]
-    ; GFX9-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD6]]
-    ; GFX9-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-    ; GFX9-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UADDO24]]
-    ; GFX9-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[ADD7]], [[UADDO27]]
+    ; GFX9-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[UV22]]
+    ; GFX9-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO8]], [[UV24]]
+    ; GFX9-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO8]], [[UV22]]
+    ; GFX9-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]]
+    ; GFX9-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDO10]], [[UMULH4]], [[UADDO11]]
+    ; GFX9-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE19]]
+    ; GFX9-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[UV24]]
+    ; GFX9-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[UV22]]
+    ; GFX9-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO8]], [[UV24]]
+    ; GFX9-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]]
+    ; GFX9-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDO12]], [[UMULH6]], [[UADDO13]]
+    ; GFX9-NEXT: [[UADDE24:%[0-9]+]]:_(s32), [[UADDE25:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE23]]
+    ; GFX9-NEXT: [[UADDE26:%[0-9]+]]:_(s32), [[UADDE27:%[0-9]+]]:_(s1) = G_UADDE [[UADDE22]], [[UADDE20]], [[UADDO13]]
+    ; GFX9-NEXT: [[UADDE28:%[0-9]+]]:_(s32), [[UADDE29:%[0-9]+]]:_(s1) = G_UADDE [[UADDE24]], [[C6]], [[UADDE27]]
+    ; GFX9-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[UV24]]
+    ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[UADDE28]]
+    ; GFX9-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UADDE26]]
+    ; GFX9-NEXT: [[UADDE30:%[0-9]+]]:_(s32), [[UADDE31:%[0-9]+]]:_(s1) = G_UADDE [[UADDE16]], [[ADD1]], [[UADDO15]]
     ; GFX9-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64)
     ; GFX9-NEXT: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64)
-    ; GFX9-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV29]], [[UADDO26]]
-    ; GFX9-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV28]], [[UADDE6]]
-    ; GFX9-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV28]], [[UADDO26]]
-    ; GFX9-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]]
-    ; GFX9-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1)
-    ; GFX9-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH8]]
-    ; GFX9-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1)
-    ; GFX9-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]]
-    ; GFX9-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV29]], [[UADDE6]]
-    ; GFX9-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV29]], [[UADDO26]]
-    ; GFX9-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV28]], [[UADDE6]]
-    ; GFX9-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]]
-    ; GFX9-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1)
-    ; GFX9-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH10]]
-    ; GFX9-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1)
-    ; GFX9-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]]
-    ; GFX9-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD8]]
-    ; GFX9-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1)
-    ; GFX9-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT14]]
-    ; GFX9-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV29]], [[UADDE6]]
-    ; GFX9-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[ADD10]]
-    ; GFX9-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[ADD11]](s32)
+    ; GFX9-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV29]], [[UADDO14]]
+    ; GFX9-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV28]], [[UADDE30]]
+    ; GFX9-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV28]], [[UADDO14]]
+    ; GFX9-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]]
+    ; GFX9-NEXT: [[UADDE32:%[0-9]+]]:_(s32), [[UADDE33:%[0-9]+]]:_(s1) = G_UADDE [[UADDO16]], [[UMULH8]], [[UADDO17]]
+    ; GFX9-NEXT: [[UADDE34:%[0-9]+]]:_(s32), [[UADDE35:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE33]]
+    ; GFX9-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV29]], [[UADDE30]]
+    ; GFX9-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV29]], [[UADDO14]]
+    ; GFX9-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV28]], [[UADDE30]]
+    ; GFX9-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]]
+    ; GFX9-NEXT: [[UADDE36:%[0-9]+]]:_(s32), [[UADDE37:%[0-9]+]]:_(s1) = G_UADDE [[UADDO18]], [[UMULH10]], [[UADDO19]]
+    ; GFX9-NEXT: [[UADDE38:%[0-9]+]]:_(s32), [[UADDE39:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE37]]
+    ; GFX9-NEXT: [[UADDE40:%[0-9]+]]:_(s32), [[UADDE41:%[0-9]+]]:_(s1) = G_UADDE [[UADDE36]], [[UADDE34]], [[UADDO19]]
+    ; GFX9-NEXT: [[UADDE42:%[0-9]+]]:_(s32), [[UADDE43:%[0-9]+]]:_(s1) = G_UADDE [[UADDE38]], [[C6]], [[UADDE41]]
+    ; GFX9-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV29]], [[UADDE30]]
+    ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[UADDE42]]
+    ; GFX9-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDE40]](s32), [[ADD2]](s32)
     ; GFX9-NEXT: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64)
-    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV30]](s32), [[UADDO36]], [[C5]]
+    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV30]](s32), [[UADDE40]], [[C5]]
     ; GFX9-NEXT: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_12]](s64)
     ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[UV33]](s32)
-    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV30]](s32), [[ADD11]], [[ANYEXT2]]
-    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV31]](s32), [[UADDO36]], [[AMDGPU_MAD_U64_U32_14]]
+    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV30]](s32), [[ADD2]], [[ANYEXT2]]
+    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV31]](s32), [[UADDE40]], [[AMDGPU_MAD_U64_U32_14]]
     ; GFX9-NEXT: [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_16]](s64)
     ; GFX9-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV26]], [[UV32]]
     ; GFX9-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV27]], [[UV34]], [[USUBO3]]
@@ -1978,9 +1854,9 @@ body: |
     ; GFX9-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]]
     ; GFX9-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
     ; GFX9-NEXT: [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64)
-    ; GFX9-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[UADDO36]], [[UV38]]
-    ; GFX9-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[ADD11]], [[UV39]], [[UADDO39]]
-    ; GFX9-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO38]](s32), [[UADDE8]](s32)
+    ; GFX9-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDE40]], [[UV38]]
+    ; GFX9-NEXT: [[UADDE44:%[0-9]+]]:_(s32), [[UADDE45:%[0-9]+]]:_(s1) = G_UADDE [[ADD2]], [[UV39]], [[UADDO21]]
+    ; GFX9-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO20]](s32), [[UADDE44]](s32)
     ; GFX9-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV37]]
     ; GFX9-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1)
     ; GFX9-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV36]]
@@ -1988,9 +1864,9 @@ body: |
     ; GFX9-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV37]]
     ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]]
     ; GFX9-NEXT: [[UV40:%[0-9]+]]:_(s32), [[UV41:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64)
-    ; GFX9-NEXT: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UADDO38]], [[UV40]]
-    ; GFX9-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE8]], [[UV41]], [[UADDO41]]
-    ; GFX9-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO40]](s32), [[UADDE10]](s32)
+    ; GFX9-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UV40]]
+    ; GFX9-NEXT: [[UADDE46:%[0-9]+]]:_(s32), [[UADDE47:%[0-9]+]]:_(s1) = G_UADDE [[UADDE44]], [[UV41]], [[UADDO23]]
+    ; GFX9-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO22]](s32), [[UADDE46]](s32)
     ; GFX9-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C6]]
     ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV4]], [[MV3]]
     ; GFX9-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C6]]
@@ -2006,14 +1882,14 @@ body: |
     ; GFX9-NEXT: [[ASHR3:%[0-9]+]]:_(s64) = G_ASHR [[UV3]], [[C]](s32)
     ; GFX9-NEXT: [[UV46:%[0-9]+]]:_(s32), [[UV47:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64)
     ; GFX9-NEXT: [[UV48:%[0-9]+]]:_(s32), [[UV49:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR2]](s64)
-    ; GFX9-NEXT: [[UADDO42:%[0-9]+]]:_(s32), [[UADDO43:%[0-9]+]]:_(s1) = G_UADDO [[UV46]], [[UV48]]
-    ; GFX9-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[UV47]], [[UV49]], [[UADDO43]]
-    ; GFX9-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO42]](s32), [[UADDE12]](s32)
+    ; GFX9-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UV46]], [[UV48]]
+    ; GFX9-NEXT: [[UADDE48:%[0-9]+]]:_(s32), [[UADDE49:%[0-9]+]]:_(s1) = G_UADDE [[UV47]], [[UV49]], [[UADDO25]]
+    ; GFX9-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO24]](s32), [[UADDE48]](s32)
     ; GFX9-NEXT: [[UV50:%[0-9]+]]:_(s32), [[UV51:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64)
     ; GFX9-NEXT: [[UV52:%[0-9]+]]:_(s32), [[UV53:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR3]](s64)
-    ; GFX9-NEXT: [[UADDO44:%[0-9]+]]:_(s32), [[UADDO45:%[0-9]+]]:_(s1) = G_UADDO [[UV50]], [[UV52]]
-    ; GFX9-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UV51]], [[UV53]], [[UADDO45]]
-    ; GFX9-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO44]](s32), [[UADDE14]](s32)
+    ; GFX9-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UV50]], [[UV52]]
+    ; GFX9-NEXT: [[UADDE50:%[0-9]+]]:_(s32), [[UADDE51:%[0-9]+]]:_(s1) = G_UADDE [[UV51]], [[UV53]], [[UADDO27]]
+    ; GFX9-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO26]](s32), [[UADDE50]](s32)
     ; GFX9-NEXT: [[XOR4:%[0-9]+]]:_(s64) = G_XOR [[MV6]], [[ASHR2]]
     ; GFX9-NEXT: [[XOR5:%[0-9]+]]:_(s64) = G_XOR [[MV7]], [[ASHR3]]
     ; GFX9-NEXT: [[UV54:%[0-9]+]]:_(s32), [[UV55:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR5]](s64)
@@ -2042,85 +1918,70 @@ body: |
     ; GFX9-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[UV60]]
     ; GFX9-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI2]], [[UV62]]
     ; GFX9-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[UV60]]
-    ; GFX9-NEXT: [[UADDO46:%[0-9]+]]:_(s32), [[UADDO47:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]]
-    ; GFX9-NEXT: [[ZEXT15:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO47]](s1)
-    ; GFX9-NEXT: [[UADDO48:%[0-9]+]]:_(s32), [[UADDO49:%[0-9]+]]:_(s1) = G_UADDO [[UADDO46]], [[UMULH12]]
-    ; GFX9-NEXT: [[ZEXT16:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO49]](s1)
-    ; GFX9-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT15]], [[ZEXT16]]
+    ; GFX9-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]]
+    ; GFX9-NEXT: [[UADDE52:%[0-9]+]]:_(s32), [[UADDE53:%[0-9]+]]:_(s1) = G_UADDE [[UADDO28]], [[UMULH12]], [[UADDO29]]
+    ; GFX9-NEXT: [[UADDE54:%[0-9]+]]:_(s32), [[UADDE55:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE53]]
     ; GFX9-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[UV62]]
     ; GFX9-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[UV60]]
     ; GFX9-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[UV62]]
-    ; GFX9-NEXT: [[UADDO50:%[0-9]+]]:_(s32), [[UADDO51:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH13]]
-    ; GFX9-NEXT: [[ZEXT17:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO51]](s1)
-    ; GFX9-NEXT: [[UADDO52:%[0-9]+]]:_(s32), [[UADDO53:%[0-9]+]]:_(s1) = G_UADDO [[UADDO50]], [[UMULH14]]
-    ; GFX9-NEXT: [[ZEXT18:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO53]](s1)
-    ; GFX9-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT17]], [[ZEXT18]]
-    ; GFX9-NEXT: [[UADDO54:%[0-9]+]]:_(s32), [[UADDO55:%[0-9]+]]:_(s1) = G_UADDO [[UADDO52]], [[ADD12]]
-    ; GFX9-NEXT: [[ZEXT19:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO55]](s1)
-    ; GFX9-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT19]]
+    ; GFX9-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH13]]
+    ; GFX9-NEXT: [[UADDE56:%[0-9]+]]:_(s32), [[UADDE57:%[0-9]+]]:_(s1) = G_UADDE [[UADDO30]], [[UMULH14]], [[UADDO31]]
+    ; GFX9-NEXT: [[UADDE58:%[0-9]+]]:_(s32), [[UADDE59:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE57]]
+    ; GFX9-NEXT: [[UADDE60:%[0-9]+]]:_(s32), [[UADDE61:%[0-9]+]]:_(s1) = G_UADDE [[UADDE56]], [[UADDE54]], [[UADDO31]]
+    ; GFX9-NEXT: [[UADDE62:%[0-9]+]]:_(s32), [[UADDE63:%[0-9]+]]:_(s1) = G_UADDE [[UADDE58]], [[C6]], [[UADDE61]]
     ; GFX9-NEXT: [[UMULH15:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[UV62]]
-    ; GFX9-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH15]], [[ADD14]]
-    ; GFX9-NEXT: [[UADDO56:%[0-9]+]]:_(s32), [[UADDO57:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI2]], [[UADDO54]]
-    ; GFX9-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI3]], [[ADD15]], [[UADDO57]]
-    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_24:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_25:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO8]](s32), [[UADDO56]], [[C5]]
+    ; GFX9-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH15]], [[UADDE62]]
+    ; GFX9-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI2]], [[UADDE60]]
+    ; GFX9-NEXT: [[UADDE64:%[0-9]+]]:_(s32), [[UADDE65:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI3]], [[ADD3]], [[UADDO33]]
+    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_24:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_25:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO8]](s32), [[UADDO32]], [[C5]]
     ; GFX9-NEXT: [[UV64:%[0-9]+]]:_(s32), [[UV65:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_24]](s64)
     ; GFX9-NEXT: [[ANYEXT4:%[0-9]+]]:_(s64) = G_ANYEXT [[UV65]](s32)
-    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_26:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_27:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO8]](s32), [[UADDE16]], [[ANYEXT4]]
-    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_28:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_29:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE10]](s32), [[UADDO56]], [[AMDGPU_MAD_U64_U32_26]]
+    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_26:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_27:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO8]](s32), [[UADDE64]], [[ANYEXT4]]
+    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_28:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_29:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE10]](s32), [[UADDO32]], [[AMDGPU_MAD_U64_U32_26]]
     ; GFX9-NEXT: [[UV66:%[0-9]+]]:_(s32), [[UV67:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_28]](s64)
-    ; GFX9-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[UV64]]
-    ; GFX9-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UADDO56]], [[UV66]]
-    ; GFX9-NEXT: [[UMULH16:%[0-9]+]]:_(s32) = G_UMULH [[UADDO56]], [[UV64]]
-    ; GFX9-NEXT: [[UADDO58:%[0-9]+]]:_(s32), [[UADDO59:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]]
-    ; GFX9-NEXT: [[ZEXT20:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO59]](s1)
-    ; GFX9-NEXT: [[UADDO60:%[0-9]+]]:_(s32), [[UADDO61:%[0-9]+]]:_(s1) = G_UADDO [[UADDO58]], [[UMULH16]]
-    ; GFX9-NEXT: [[ZEXT21:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO61]](s1)
-    ; GFX9-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[ZEXT20]], [[ZEXT21]]
-    ; GFX9-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[UV66]]
-    ; GFX9-NEXT: [[UMULH17:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[UV64]]
-    ; GFX9-NEXT: [[UMULH18:%[0-9]+]]:_(s32) = G_UMULH [[UADDO56]], [[UV66]]
-    ; GFX9-NEXT: [[UADDO62:%[0-9]+]]:_(s32), [[UADDO63:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH17]]
-    ; GFX9-NEXT: [[ZEXT22:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO63]](s1)
-    ; GFX9-NEXT: [[UADDO64:%[0-9]+]]:_(s32), [[UADDO65:%[0-9]+]]:_(s1) = G_UADDO [[UADDO62]], [[UMULH18]]
-    ; GFX9-NEXT: [[ZEXT23:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO65]](s1)
-    ; GFX9-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ZEXT22]], [[ZEXT23]]
-    ; GFX9-NEXT: [[UADDO66:%[0-9]+]]:_(s32), [[UADDO67:%[0-9]+]]:_(s1) = G_UADDO [[UADDO64]], [[ADD16]]
-    ; GFX9-NEXT: [[ZEXT24:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO67]](s1)
-    ; GFX9-NEXT: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[ADD17]], [[ZEXT24]]
-    ; GFX9-NEXT: [[UMULH19:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[UV66]]
-    ; GFX9-NEXT: [[ADD19:%[0-9]+]]:_(s32) = G_ADD [[UMULH19]], [[ADD18]]
-    ; GFX9-NEXT: [[UADDO68:%[0-9]+]]:_(s32), [[UADDO69:%[0-9]+]]:_(s1) = G_UADDO [[UADDO56]], [[UADDO66]]
-    ; GFX9-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDE16]], [[ADD19]], [[UADDO69]]
+    ; GFX9-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UADDE64]], [[UV64]]
+    ; GFX9-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UADDO32]], [[UV66]]
+    ; GFX9-NEXT: [[UMULH16:%[0-9]+]]:_(s32) = G_UMULH [[UADDO32]], [[UV64]]
+    ; GFX9-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]]
+    ; GFX9-NEXT: [[UADDE66:%[0-9]+]]:_(s32), [[UADDE67:%[0-9]+]]:_(s1) = G_UADDE [[UADDO34]], [[UMULH16]], [[UADDO35]]
+    ; GFX9-NEXT: [[UADDE68:%[0-9]+]]:_(s32), [[UADDE69:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE67]]
+    ; GFX9-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UADDE64]], [[UV66]]
+    ; GFX9-NEXT: [[UMULH17:%[0-9]+]]:_(s32) = G_UMULH [[UADDE64]], [[UV64]]
+    ; GFX9-NEXT: [[UMULH18:%[0-9]+]]:_(s32) = G_UMULH [[UADDO32]], [[UV66]]
+    ; GFX9-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH17]]
+    ; GFX9-NEXT: [[UADDE70:%[0-9]+]]:_(s32), [[UADDE71:%[0-9]+]]:_(s1) = G_UADDE [[UADDO36]], [[UMULH18]], [[UADDO37]]
+    ; GFX9-NEXT: [[UADDE72:%[0-9]+]]:_(s32), [[UADDE73:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE71]]
+    ; GFX9-NEXT: [[UADDE74:%[0-9]+]]:_(s32), [[UADDE75:%[0-9]+]]:_(s1) = G_UADDE [[UADDE70]], [[UADDE68]], [[UADDO37]]
+    ; GFX9-NEXT: [[UADDE76:%[0-9]+]]:_(s32), [[UADDE77:%[0-9]+]]:_(s1) = G_UADDE [[UADDE72]], [[C6]], [[UADDE75]]
+    ; GFX9-NEXT: [[UMULH19:%[0-9]+]]:_(s32) = G_UMULH [[UADDE64]], [[UV66]]
+    ; GFX9-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[UMULH19]], [[UADDE76]]
+    ; GFX9-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UADDE74]]
+    ; GFX9-NEXT: [[UADDE78:%[0-9]+]]:_(s32), [[UADDE79:%[0-9]+]]:_(s1) = G_UADDE [[UADDE64]], [[ADD4]], [[UADDO39]]
     ; GFX9-NEXT: [[UV68:%[0-9]+]]:_(s32), [[UV69:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR4]](s64)
     ; GFX9-NEXT: [[UV70:%[0-9]+]]:_(s32), [[UV71:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR4]](s64)
-    ; GFX9-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV71]], [[UADDO68]]
-    ; GFX9-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV70]], [[UADDE18]]
-    ; GFX9-NEXT: [[UMULH20:%[0-9]+]]:_(s32) = G_UMULH [[UV70]], [[UADDO68]]
-    ; GFX9-NEXT: [[UADDO70:%[0-9]+]]:_(s32), [[UADDO71:%[0-9]+]]:_(s1) = G_UADDO [[MUL15]], [[MUL16]]
-    ; GFX9-NEXT: [[ZEXT25:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO71]](s1)
-    ; GFX9-NEXT: [[UADDO72:%[0-9]+]]:_(s32), [[UADDO73:%[0-9]+]]:_(s1) = G_UADDO [[UADDO70]], [[UMULH20]]
-    ; GFX9-NEXT: [[ZEXT26:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO73]](s1)
-    ; GFX9-NEXT: [[ADD20:%[0-9]+]]:_(s32) = G_ADD [[ZEXT25]], [[ZEXT26]]
-    ; GFX9-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV71]], [[UADDE18]]
-    ; GFX9-NEXT: [[UMULH21:%[0-9]+]]:_(s32) = G_UMULH [[UV71]], [[UADDO68]]
-    ; GFX9-NEXT: [[UMULH22:%[0-9]+]]:_(s32) = G_UMULH [[UV70]], [[UADDE18]]
-    ; GFX9-NEXT: [[UADDO74:%[0-9]+]]:_(s32), [[UADDO75:%[0-9]+]]:_(s1) = G_UADDO [[MUL17]], [[UMULH21]]
-    ; GFX9-NEXT: [[ZEXT27:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO75]](s1)
-    ; GFX9-NEXT: [[UADDO76:%[0-9]+]]:_(s32), [[UADDO77:%[0-9]+]]:_(s1) = G_UADDO [[UADDO74]], [[UMULH22]]
-    ; GFX9-NEXT: [[ZEXT28:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO77]](s1)
-    ; GFX9-NEXT: [[ADD21:%[0-9]+]]:_(s32) = G_ADD [[ZEXT27]], [[ZEXT28]]
-    ; GFX9-NEXT: [[UADDO78:%[0-9]+]]:_(s32), [[UADDO79:%[0-9]+]]:_(s1) = G_UADDO [[UADDO76]], [[ADD20]]
-    ; GFX9-NEXT: [[ZEXT29:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO79]](s1)
-    ; GFX9-NEXT: [[ADD22:%[0-9]+]]:_(s32) = G_ADD [[ADD21]], [[ZEXT29]]
-    ; GFX9-NEXT: [[UMULH23:%[0-9]+]]:_(s32) = G_UMULH [[UV71]], [[UADDE18]]
-    ; GFX9-NEXT: [[ADD23:%[0-9]+]]:_(s32) = G_ADD [[UMULH23]], [[ADD22]]
-    ; GFX9-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO78]](s32), [[ADD23]](s32)
+    ; GFX9-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV71]], [[UADDO38]]
+    ; GFX9-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV70]], [[UADDE78]]
+    ; GFX9-NEXT: [[UMULH20:%[0-9]+]]:_(s32) = G_UMULH [[UV70]], [[UADDO38]]
+    ; GFX9-NEXT: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[MUL15]], [[MUL16]]
+    ; GFX9-NEXT: [[UADDE80:%[0-9]+]]:_(s32), [[UADDE81:%[0-9]+]]:_(s1) = G_UADDE [[UADDO40]], [[UMULH20]], [[UADDO41]]
+    ; GFX9-NEXT: [[UADDE82:%[0-9]+]]:_(s32), [[UADDE83:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE81]]
+    ; GFX9-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV71]], [[UADDE78]]
+    ; GFX9-NEXT: [[UMULH21:%[0-9]+]]:_(s32) = G_UMULH [[UV71]], [[UADDO38]]
+    ; GFX9-NEXT: [[UMULH22:%[0-9]+]]:_(s32) = G_UMULH [[UV70]], [[UADDE78]]
+    ; GFX9-NEXT: [[UADDO42:%[0-9]+]]:_(s32), [[UADDO43:%[0-9]+]]:_(s1) = G_UADDO [[MUL17]], [[UMULH21]]
+    ; GFX9-NEXT: [[UADDE84:%[0-9]+]]:_(s32), [[UADDE85:%[0-9]+]]:_(s1) = G_UADDE [[UADDO42]], [[UMULH22]], [[UADDO43]]
+    ; GFX9-NEXT: [[UADDE86:%[0-9]+]]:_(s32), [[UADDE87:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE85]]
+    ; GFX9-NEXT: [[UADDE88:%[0-9]+]]:_(s32), [[UADDE89:%[0-9]+]]:_(s1) = G_UADDE [[UADDE84]], [[UADDE82]], [[UADDO43]]
+    ; GFX9-NEXT: [[UADDE90:%[0-9]+]]:_(s32), [[UADDE91:%[0-9]+]]:_(s1) = G_UADDE [[UADDE86]], [[C6]], [[UADDE89]]
+    ; GFX9-NEXT: [[UMULH23:%[0-9]+]]:_(s32) = G_UMULH [[UV71]], [[UADDE78]]
+    ; GFX9-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH23]], [[UADDE90]]
+    ; GFX9-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDE88]](s32), [[ADD5]](s32)
     ; GFX9-NEXT: [[UV72:%[0-9]+]]:_(s32), [[UV73:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR5]](s64)
-    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_30:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_31:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV72]](s32), [[UADDO78]], [[C5]]
+    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_30:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_31:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV72]](s32), [[UADDE88]], [[C5]]
     ; GFX9-NEXT: [[UV74:%[0-9]+]]:_(s32), [[UV75:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_30]](s64)
     ; GFX9-NEXT: [[ANYEXT5:%[0-9]+]]:_(s64) = G_ANYEXT [[UV75]](s32)
-    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_32:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_33:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV72]](s32), [[ADD23]], [[ANYEXT5]]
-    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_34:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_35:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV73]](s32), [[UADDO78]], [[AMDGPU_MAD_U64_U32_32]]
+    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_32:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_33:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV72]](s32), [[ADD5]], [[ANYEXT5]]
+    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_34:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_35:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV73]](s32), [[UADDE88]], [[AMDGPU_MAD_U64_U32_32]]
     ; GFX9-NEXT: [[UV76:%[0-9]+]]:_(s32), [[UV77:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_34]](s64)
     ; GFX9-NEXT: [[USUBO10:%[0-9]+]]:_(s32), [[USUBO11:%[0-9]+]]:_(s1) = G_USUBO [[UV68]], [[UV74]]
     ; GFX9-NEXT: [[USUBE12:%[0-9]+]]:_(s32), [[USUBE13:%[0-9]+]]:_(s1) = G_USUBE [[UV69]], [[UV76]], [[USUBO11]]
@@ -2136,9 +1997,9 @@ body: |
     ; GFX9-NEXT: [[USUBE14:%[0-9]+]]:_(s32), [[USUBE15:%[0-9]+]]:_(s1) = G_USUBE [[SUB1]], [[UV79]], [[USUBO11]]
     ; GFX9-NEXT: [[USUBE16:%[0-9]+]]:_(s32), [[USUBE17:%[0-9]+]]:_(s1) = G_USUBE [[USUBE14]], [[C6]], [[USUBO13]]
     ; GFX9-NEXT: [[UV80:%[0-9]+]]:_(s32), [[UV81:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64)
-    ; GFX9-NEXT: [[UADDO80:%[0-9]+]]:_(s32), [[UADDO81:%[0-9]+]]:_(s1) = G_UADDO [[UADDO78]], [[UV80]]
-    ; GFX9-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[ADD23]], [[UV81]], [[UADDO81]]
-    ; GFX9-NEXT: [[MV9:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO80]](s32), [[UADDE20]](s32)
+    ; GFX9-NEXT: [[UADDO44:%[0-9]+]]:_(s32), [[UADDO45:%[0-9]+]]:_(s1) = G_UADDO [[UADDE88]], [[UV80]]
+    ; GFX9-NEXT: [[UADDE92:%[0-9]+]]:_(s32), [[UADDE93:%[0-9]+]]:_(s1) = G_UADDE [[ADD5]], [[UV81]], [[UADDO45]]
+    ; GFX9-NEXT: [[MV9:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO44]](s32), [[UADDE92]](s32)
     ; GFX9-NEXT: [[ICMP11:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE16]](s32), [[UV79]]
     ; GFX9-NEXT: [[SEXT6:%[0-9]+]]:_(s32) = G_SEXT [[ICMP11]](s1)
     ; GFX9-NEXT: [[ICMP12:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO12]](s32), [[UV78]]
@@ -2146,9 +2007,9 @@ body: |
     ; GFX9-NEXT: [[ICMP13:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE16]](s32), [[UV79]]
     ; GFX9-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP13]](s1), [[SEXT7]], [[SEXT6]]
     ; GFX9-NEXT: [[UV82:%[0-9]+]]:_(s32), [[UV83:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64)
-    ; GFX9-NEXT: [[UADDO82:%[0-9]+]]:_(s32), [[UADDO83:%[0-9]+]]:_(s1) = G_UADDO [[UADDO80]], [[UV82]]
-    ; GFX9-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDE20]], [[UV83]], [[UADDO83]]
-    ; GFX9-NEXT: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO82]](s32), [[UADDE22]](s32)
+    ; GFX9-NEXT: [[UADDO46:%[0-9]+]]:_(s32), [[UADDO47:%[0-9]+]]:_(s1) = G_UADDO [[UADDO44]], [[UV82]]
+    ; GFX9-NEXT: [[UADDE94:%[0-9]+]]:_(s32), [[UADDE95:%[0-9]+]]:_(s1) = G_UADDE [[UADDE92]], [[UV83]], [[UADDO47]]
+    ; GFX9-NEXT: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO46]](s32), [[UADDE94]](s32)
     ; GFX9-NEXT: [[ICMP14:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT5]](s32), [[C6]]
     ; GFX9-NEXT: [[SELECT6:%[0-9]+]]:_(s64) = G_SELECT [[ICMP14]](s1), [[MV10]], [[MV9]]
     ; GFX9-NEXT: [[ICMP15:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT4]](s32), [[C6]]
@@ -2162,6 +2023,7 @@ body: |
     ; GFX9-NEXT: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO14]](s32), [[USUBE18]](s32)
     ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV5]](s64), [[MV11]](s64)
     ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
+    ;
     ; GFX10-LABEL: name: test_sdiv_v2s64
     ; GFX10: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7
     ; GFX10-NEXT: {{  $}}
@@ -2212,93 +2074,78 @@ body: |
     ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV19]], [[MUL]]
     ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]]
     ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[MUL1]]
+    ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
     ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV18]]
     ; GFX10-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]]
     ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV18]]
     ; GFX10-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[MUL3]]
-    ; GFX10-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1)
-    ; GFX10-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH]]
-    ; GFX10-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1)
-    ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]]
+    ; GFX10-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDO4]], [[UMULH]], [[UADDO5]]
+    ; GFX10-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE5]]
     ; GFX10-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]]
     ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV18]]
     ; GFX10-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]]
-    ; GFX10-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL4]], [[UMULH1]]
-    ; GFX10-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1)
-    ; GFX10-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH2]]
-    ; GFX10-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1)
-    ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]]
-    ; GFX10-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD2]]
-    ; GFX10-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1)
-    ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]]
+    ; GFX10-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[MUL4]], [[UMULH1]]
+    ; GFX10-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDO6]], [[UMULH2]], [[UADDO7]]
+    ; GFX10-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE9]]
+    ; GFX10-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[UADDE8]], [[UADDE6]], [[UADDO7]]
+    ; GFX10-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDE10]], [[C6]], [[UADDE13]]
     ; GFX10-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]]
-    ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD4]]
-    ; GFX10-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO12]]
-    ; GFX10-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO15]]
-    ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO14]], [[C5]]
+    ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[UADDE14]]
+    ; GFX10-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDE12]]
+    ; GFX10-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD2]], [[UADDO9]]
+    ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO8]], [[C5]]
     ; GFX10-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_2]](s64)
-    ; GFX10-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE4]]
-    ; GFX10-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UV21]], [[MUL5]]
-    ; GFX10-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO14]]
-    ; GFX10-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[MUL6]]
-    ; GFX10-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[UV20]]
-    ; GFX10-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[ADD7]]
-    ; GFX10-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[UV20]]
-    ; GFX10-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL7]], [[MUL8]]
-    ; GFX10-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1)
-    ; GFX10-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH4]]
-    ; GFX10-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1)
-    ; GFX10-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]]
-    ; GFX10-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[ADD7]]
-    ; GFX10-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[UV20]]
-    ; GFX10-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[ADD7]]
-    ; GFX10-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[UMULH5]]
-    ; GFX10-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1)
-    ; GFX10-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH6]]
-    ; GFX10-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO23]](s1)
-    ; GFX10-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]]
-    ; GFX10-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD8]]
-    ; GFX10-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1)
-    ; GFX10-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT9]]
-    ; GFX10-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[ADD7]]
-    ; GFX10-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD10]]
-    ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-    ; GFX10-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UADDO24]]
-    ; GFX10-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[ADD11]], [[UADDO27]]
+    ; GFX10-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE16]]
+    ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UV21]], [[MUL5]]
+    ; GFX10-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO8]]
+    ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[MUL6]]
+    ; GFX10-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[UV20]]
+    ; GFX10-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UADDO8]], [[ADD4]]
+    ; GFX10-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO8]], [[UV20]]
+    ; GFX10-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[MUL7]], [[MUL8]]
+    ; GFX10-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDO10]], [[UMULH4]], [[UADDO11]]
+    ; GFX10-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE19]]
+    ; GFX10-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[ADD4]]
+    ; GFX10-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[UV20]]
+    ; GFX10-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO8]], [[ADD4]]
+    ; GFX10-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[UMULH5]]
+    ; GFX10-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDO12]], [[UMULH6]], [[UADDO13]]
+    ; GFX10-NEXT: [[UADDE24:%[0-9]+]]:_(s32), [[UADDE25:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE23]]
+    ; GFX10-NEXT: [[UADDE26:%[0-9]+]]:_(s32), [[UADDE27:%[0-9]+]]:_(s1) = G_UADDE [[UADDE22]], [[UADDE20]], [[UADDO13]]
+    ; GFX10-NEXT: [[UADDE28:%[0-9]+]]:_(s32), [[UADDE29:%[0-9]+]]:_(s1) = G_UADDE [[UADDE24]], [[C6]], [[UADDE27]]
+    ; GFX10-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[ADD4]]
+    ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[UADDE28]]
+    ; GFX10-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UADDE26]]
+    ; GFX10-NEXT: [[UADDE30:%[0-9]+]]:_(s32), [[UADDE31:%[0-9]+]]:_(s1) = G_UADDE [[UADDE16]], [[ADD5]], [[UADDO15]]
     ; GFX10-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64)
     ; GFX10-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64)
-    ; GFX10-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UV25]], [[UADDO26]]
-    ; GFX10-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UV24]], [[UADDE6]]
-    ; GFX10-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV24]], [[UADDO26]]
-    ; GFX10-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL10]], [[MUL11]]
-    ; GFX10-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1)
-    ; GFX10-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH8]]
-    ; GFX10-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1)
-    ; GFX10-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]]
-    ; GFX10-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV25]], [[UADDE6]]
-    ; GFX10-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV25]], [[UADDO26]]
-    ; GFX10-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV24]], [[UADDE6]]
-    ; GFX10-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[UMULH9]]
-    ; GFX10-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1)
-    ; GFX10-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH10]]
-    ; GFX10-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1)
-    ; GFX10-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]]
-    ; GFX10-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD12]]
-    ; GFX10-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1)
-    ; GFX10-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT14]]
-    ; GFX10-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV25]], [[UADDE6]]
-    ; GFX10-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[ADD14]]
-    ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[ADD15]](s32)
+    ; GFX10-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UV25]], [[UADDO14]]
+    ; GFX10-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UV24]], [[UADDE30]]
+    ; GFX10-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV24]], [[UADDO14]]
+    ; GFX10-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL10]], [[MUL11]]
+    ; GFX10-NEXT: [[UADDE32:%[0-9]+]]:_(s32), [[UADDE33:%[0-9]+]]:_(s1) = G_UADDE [[UADDO16]], [[UMULH8]], [[UADDO17]]
+    ; GFX10-NEXT: [[UADDE34:%[0-9]+]]:_(s32), [[UADDE35:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE33]]
+    ; GFX10-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV25]], [[UADDE30]]
+    ; GFX10-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV25]], [[UADDO14]]
+    ; GFX10-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV24]], [[UADDE30]]
+    ; GFX10-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[UMULH9]]
+    ; GFX10-NEXT: [[UADDE36:%[0-9]+]]:_(s32), [[UADDE37:%[0-9]+]]:_(s1) = G_UADDE [[UADDO18]], [[UMULH10]], [[UADDO19]]
+    ; GFX10-NEXT: [[UADDE38:%[0-9]+]]:_(s32), [[UADDE39:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE37]]
+    ; GFX10-NEXT: [[UADDE40:%[0-9]+]]:_(s32), [[UADDE41:%[0-9]+]]:_(s1) = G_UADDE [[UADDE36]], [[UADDE34]], [[UADDO19]]
+    ; GFX10-NEXT: [[UADDE42:%[0-9]+]]:_(s32), [[UADDE43:%[0-9]+]]:_(s1) = G_UADDE [[UADDE38]], [[C6]], [[UADDE41]]
+    ; GFX10-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV25]], [[UADDE30]]
+    ; GFX10-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[UADDE42]]
+    ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDE40]](s32), [[ADD6]](s32)
     ; GFX10-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64)
-    ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV26]](s32), [[UADDO36]], [[C5]]
+    ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV26]](s32), [[UADDE40]], [[C5]]
     ; GFX10-NEXT: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64)
-    ; GFX10-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV26]], [[ADD15]]
-    ; GFX10-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[UV29]], [[MUL13]]
-    ; GFX10-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV27]], [[UADDO36]]
-    ; GFX10-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[MUL14]]
+    ; GFX10-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV26]], [[ADD6]]
+    ; GFX10-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UV29]], [[MUL13]]
+    ; GFX10-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV27]], [[UADDE40]]
+    ; GFX10-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ADD7]], [[MUL14]]
     ; GFX10-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV22]], [[UV28]]
-    ; GFX10-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV23]], [[ADD17]], [[USUBO3]]
-    ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV23]], [[ADD17]]
+    ; GFX10-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV23]], [[ADD8]], [[USUBO3]]
+    ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV23]], [[ADD8]]
     ; GFX10-NEXT: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64)
     ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV31]]
     ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1)
@@ -2311,9 +2158,9 @@ body: |
     ; GFX10-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]]
     ; GFX10-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
     ; GFX10-NEXT: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64)
-    ; GFX10-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[UADDO36]], [[UV32]]
-    ; GFX10-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[ADD15]], [[UV33]], [[UADDO39]]
-    ; GFX10-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO38]](s32), [[UADDE8]](s32)
+    ; GFX10-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDE40]], [[UV32]]
+    ; GFX10-NEXT: [[UADDE44:%[0-9]+]]:_(s32), [[UADDE45:%[0-9]+]]:_(s1) = G_UADDE [[ADD6]], [[UV33]], [[UADDO21]]
+    ; GFX10-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO20]](s32), [[UADDE44]](s32)
     ; GFX10-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV31]]
     ; GFX10-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1)
     ; GFX10-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV30]]
@@ -2321,9 +2168,9 @@ body: |
     ; GFX10-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV31]]
     ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]]
     ; GFX10-NEXT: [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64)
-    ; GFX10-NEXT: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UADDO38]], [[UV34]]
-    ; GFX10-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE8]], [[UV35]], [[UADDO41]]
-    ; GFX10-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO40]](s32), [[UADDE10]](s32)
+    ; GFX10-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UV34]]
+    ; GFX10-NEXT: [[UADDE46:%[0-9]+]]:_(s32), [[UADDE47:%[0-9]+]]:_(s1) = G_UADDE [[UADDE44]], [[UV35]], [[UADDO23]]
+    ; GFX10-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO22]](s32), [[UADDE46]](s32)
     ; GFX10-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C6]]
     ; GFX10-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV4]], [[MV3]]
     ; GFX10-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C6]]
@@ -2339,14 +2186,14 @@ body: |
     ; GFX10-NEXT: [[ASHR3:%[0-9]+]]:_(s64) = G_ASHR [[UV3]], [[C]](s32)
     ; GFX10-NEXT: [[UV40:%[0-9]+]]:_(s32), [[UV41:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64)
     ; GFX10-NEXT: [[UV42:%[0-9]+]]:_(s32), [[UV43:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR2]](s64)
-    ; GFX10-NEXT: [[UADDO42:%[0-9]+]]:_(s32), [[UADDO43:%[0-9]+]]:_(s1) = G_UADDO [[UV40]], [[UV42]]
-    ; GFX10-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[UV41]], [[UV43]], [[UADDO43]]
-    ; GFX10-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO42]](s32), [[UADDE12]](s32)
+    ; GFX10-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UV40]], [[UV42]]
+    ; GFX10-NEXT: [[UADDE48:%[0-9]+]]:_(s32), [[UADDE49:%[0-9]+]]:_(s1) = G_UADDE [[UV41]], [[UV43]], [[UADDO25]]
+    ; GFX10-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO24]](s32), [[UADDE48]](s32)
     ; GFX10-NEXT: [[UV44:%[0-9]+]]:_(s32), [[UV45:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64)
     ; GFX10-NEXT: [[UV46:%[0-9]+]]:_(s32), [[UV47:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR3]](s64)
-    ; GFX10-NEXT: [[UADDO44:%[0-9]+]]:_(s32), [[UADDO45:%[0-9]+]]:_(s1) = G_UADDO [[UV44]], [[UV46]]
-    ; GFX10-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UV45]], [[UV47]], [[UADDO45]]
-    ; GFX10-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO44]](s32), [[UADDE14]](s32)
+    ; GFX10-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UV44]], [[UV46]]
+    ; GFX10-NEXT: [[UADDE50:%[0-9]+]]:_(s32), [[UADDE51:%[0-9]+]]:_(s1) = G_UADDE [[UV45]], [[UV47]], [[UADDO27]]
+    ; GFX10-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO26]](s32), [[UADDE50]](s32)
     ; GFX10-NEXT: [[XOR4:%[0-9]+]]:_(s64) = G_XOR [[MV6]], [[ASHR2]]
     ; GFX10-NEXT: [[XOR5:%[0-9]+]]:_(s64) = G_XOR [[MV7]], [[ASHR3]]
     ; GFX10-NEXT: [[UV48:%[0-9]+]]:_(s32), [[UV49:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR5]](s64)
@@ -2369,95 +2216,80 @@ body: |
     ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO8]](s32), [[FPTOUI2]], [[C5]]
     ; GFX10-NEXT: [[UV54:%[0-9]+]]:_(s32), [[UV55:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](s64)
     ; GFX10-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[USUBO8]], [[FPTOUI3]]
-    ; GFX10-NEXT: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[UV55]], [[MUL15]]
+    ; GFX10-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[UV55]], [[MUL15]]
     ; GFX10-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[USUBE10]], [[FPTOUI2]]
-    ; GFX10-NEXT: [[ADD19:%[0-9]+]]:_(s32) = G_ADD [[ADD18]], [[MUL16]]
+    ; GFX10-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[MUL16]]
     ; GFX10-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[UV54]]
-    ; GFX10-NEXT: [[MUL18:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI2]], [[ADD19]]
+    ; GFX10-NEXT: [[MUL18:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI2]], [[ADD10]]
     ; GFX10-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[UV54]]
-    ; GFX10-NEXT: [[UADDO46:%[0-9]+]]:_(s32), [[UADDO47:%[0-9]+]]:_(s1) = G_UADDO [[MUL17]], [[MUL18]]
-    ; GFX10-NEXT: [[ZEXT15:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO47]](s1)
-    ; GFX10-NEXT: [[UADDO48:%[0-9]+]]:_(s32), [[UADDO49:%[0-9]+]]:_(s1) = G_UADDO [[UADDO46]], [[UMULH12]]
-    ; GFX10-NEXT: [[ZEXT16:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO49]](s1)
-    ; GFX10-NEXT: [[ADD20:%[0-9]+]]:_(s32) = G_ADD [[ZEXT15]], [[ZEXT16]]
-    ; GFX10-NEXT: [[MUL19:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[ADD19]]
+    ; GFX10-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL17]], [[MUL18]]
+    ; GFX10-NEXT: [[UADDE52:%[0-9]+]]:_(s32), [[UADDE53:%[0-9]+]]:_(s1) = G_UADDE [[UADDO28]], [[UMULH12]], [[UADDO29]]
+    ; GFX10-NEXT: [[UADDE54:%[0-9]+]]:_(s32), [[UADDE55:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE53]]
+    ; GFX10-NEXT: [[MUL19:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[ADD10]]
     ; GFX10-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[UV54]]
-    ; GFX10-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[ADD19]]
-    ; GFX10-NEXT: [[UADDO50:%[0-9]+]]:_(s32), [[UADDO51:%[0-9]+]]:_(s1) = G_UADDO [[MUL19]], [[UMULH13]]
-    ; GFX10-NEXT: [[ZEXT17:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO51]](s1)
-    ; GFX10-NEXT: [[UADDO52:%[0-9]+]]:_(s32), [[UADDO53:%[0-9]+]]:_(s1) = G_UADDO [[UADDO50]], [[UMULH14]]
-    ; GFX10-NEXT: [[ZEXT18:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO53]](s1)
-    ; GFX10-NEXT: [[ADD21:%[0-9]+]]:_(s32) = G_ADD [[ZEXT17]], [[ZEXT18]]
-    ; GFX10-NEXT: [[UADDO54:%[0-9]+]]:_(s32), [[UADDO55:%[0-9]+]]:_(s1) = G_UADDO [[UADDO52]], [[ADD20]]
-    ; GFX10-NEXT: [[ZEXT19:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO55]](s1)
-    ; GFX10-NEXT: [[ADD22:%[0-9]+]]:_(s32) = G_ADD [[ADD21]], [[ZEXT19]]
-    ; GFX10-NEXT: [[UMULH15:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[ADD19]]
-    ; GFX10-NEXT: [[ADD23:%[0-9]+]]:_(s32) = G_ADD [[UMULH15]], [[ADD22]]
-    ; GFX10-NEXT: [[UADDO56:%[0-9]+]]:_(s32), [[UADDO57:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI2]], [[UADDO54]]
-    ; GFX10-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI3]], [[ADD23]], [[UADDO57]]
-    ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO8]](s32), [[UADDO56]], [[C5]]
+    ; GFX10-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[ADD10]]
+    ; GFX10-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[MUL19]], [[UMULH13]]
+    ; GFX10-NEXT: [[UADDE56:%[0-9]+]]:_(s32), [[UADDE57:%[0-9]+]]:_(s1) = G_UADDE [[UADDO30]], [[UMULH14]], [[UADDO31]]
+    ; GFX10-NEXT: [[UADDE58:%[0-9]+]]:_(s32), [[UADDE59:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE57]]
+    ; GFX10-NEXT: [[UADDE60:%[0-9]+]]:_(s32), [[UADDE61:%[0-9]+]]:_(s1) = G_UADDE [[UADDE56]], [[UADDE54]], [[UADDO31]]
+    ; GFX10-NEXT: [[UADDE62:%[0-9]+]]:_(s32), [[UADDE63:%[0-9]+]]:_(s1) = G_UADDE [[UADDE58]], [[C6]], [[UADDE61]]
+    ; GFX10-NEXT: [[UMULH15:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[ADD10]]
+    ; GFX10-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH15]], [[UADDE62]]
+    ; GFX10-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI2]], [[UADDE60]]
+    ; GFX10-NEXT: [[UADDE64:%[0-9]+]]:_(s32), [[UADDE65:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI3]], [[ADD11]], [[UADDO33]]
+    ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO8]](s32), [[UADDO32]], [[C5]]
     ; GFX10-NEXT: [[UV56:%[0-9]+]]:_(s32), [[UV57:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_8]](s64)
-    ; GFX10-NEXT: [[MUL20:%[0-9]+]]:_(s32) = G_MUL [[USUBO8]], [[UADDE16]]
-    ; GFX10-NEXT: [[ADD24:%[0-9]+]]:_(s32) = G_ADD [[UV57]], [[MUL20]]
-    ; GFX10-NEXT: [[MUL21:%[0-9]+]]:_(s32) = G_MUL [[USUBE10]], [[UADDO56]]
-    ; GFX10-NEXT: [[ADD25:%[0-9]+]]:_(s32) = G_ADD [[ADD24]], [[MUL21]]
-    ; GFX10-NEXT: [[MUL22:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[UV56]]
-    ; GFX10-NEXT: [[MUL23:%[0-9]+]]:_(s32) = G_MUL [[UADDO56]], [[ADD25]]
-    ; GFX10-NEXT: [[UMULH16:%[0-9]+]]:_(s32) = G_UMULH [[UADDO56]], [[UV56]]
-    ; GFX10-NEXT: [[UADDO58:%[0-9]+]]:_(s32), [[UADDO59:%[0-9]+]]:_(s1) = G_UADDO [[MUL22]], [[MUL23]]
-    ; GFX10-NEXT: [[ZEXT20:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO59]](s1)
-    ; GFX10-NEXT: [[UADDO60:%[0-9]+]]:_(s32), [[UADDO61:%[0-9]+]]:_(s1) = G_UADDO [[UADDO58]], [[UMULH16]]
-    ; GFX10-NEXT: [[ZEXT21:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO61]](s1)
-    ; GFX10-NEXT: [[ADD26:%[0-9]+]]:_(s32) = G_ADD [[ZEXT20]], [[ZEXT21]]
-    ; GFX10-NEXT: [[MUL24:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[ADD25]]
-    ; GFX10-NEXT: [[UMULH17:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[UV56]]
-    ; GFX10-NEXT: [[UMULH18:%[0-9]+]]:_(s32) = G_UMULH [[UADDO56]], [[ADD25]]
-    ; GFX10-NEXT: [[UADDO62:%[0-9]+]]:_(s32), [[UADDO63:%[0-9]+]]:_(s1) = G_UADDO [[MUL24]], [[UMULH17]]
-    ; GFX10-NEXT: [[ZEXT22:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO63]](s1)
-    ; GFX10-NEXT: [[UADDO64:%[0-9]+]]:_(s32), [[UADDO65:%[0-9]+]]:_(s1) = G_UADDO [[UADDO62]], [[UMULH18]]
-    ; GFX10-NEXT: [[ZEXT23:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO65]](s1)
-    ; GFX10-NEXT: [[ADD27:%[0-9]+]]:_(s32) = G_ADD [[ZEXT22]], [[ZEXT23]]
-    ; GFX10-NEXT: [[UADDO66:%[0-9]+]]:_(s32), [[UADDO67:%[0-9]+]]:_(s1) = G_UADDO [[UADDO64]], [[ADD26]]
-    ; GFX10-NEXT: [[ZEXT24:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO67]](s1)
-    ; GFX10-NEXT: [[ADD28:%[0-9]+]]:_(s32) = G_ADD [[ADD27]], [[ZEXT24]]
-    ; GFX10-NEXT: [[UMULH19:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[ADD25]]
-    ; GFX10-NEXT: [[ADD29:%[0-9]+]]:_(s32) = G_ADD [[UMULH19]], [[ADD28]]
-    ; GFX10-NEXT: [[UADDO68:%[0-9]+]]:_(s32), [[UADDO69:%[0-9]+]]:_(s1) = G_UADDO [[UADDO56]], [[UADDO66]]
-    ; GFX10-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDE16]], [[ADD29]], [[UADDO69]]
+    ; GFX10-NEXT: [[MUL20:%[0-9]+]]:_(s32) = G_MUL [[USUBO8]], [[UADDE64]]
+    ; GFX10-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[UV57]], [[MUL20]]
+    ; GFX10-NEXT: [[MUL21:%[0-9]+]]:_(s32) = G_MUL [[USUBE10]], [[UADDO32]]
+    ; GFX10-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ADD12]], [[MUL21]]
+    ; GFX10-NEXT: [[MUL22:%[0-9]+]]:_(s32) = G_MUL [[UADDE64]], [[UV56]]
+    ; GFX10-NEXT: [[MUL23:%[0-9]+]]:_(s32) = G_MUL [[UADDO32]], [[ADD13]]
+    ; GFX10-NEXT: [[UMULH16:%[0-9]+]]:_(s32) = G_UMULH [[UADDO32]], [[UV56]]
+    ; GFX10-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[MUL22]], [[MUL23]]
+    ; GFX10-NEXT: [[UADDE66:%[0-9]+]]:_(s32), [[UADDE67:%[0-9]+]]:_(s1) = G_UADDE [[UADDO34]], [[UMULH16]], [[UADDO35]]
+    ; GFX10-NEXT: [[UADDE68:%[0-9]+]]:_(s32), [[UADDE69:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE67]]
+    ; GFX10-NEXT: [[MUL24:%[0-9]+]]:_(s32) = G_MUL [[UADDE64]], [[ADD13]]
+    ; GFX10-NEXT: [[UMULH17:%[0-9]+]]:_(s32) = G_UMULH [[UADDE64]], [[UV56]]
+    ; GFX10-NEXT: [[UMULH18:%[0-9]+]]:_(s32) = G_UMULH [[UADDO32]], [[ADD13]]
+    ; GFX10-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[MUL24]], [[UMULH17]]
+    ; GFX10-NEXT: [[UADDE70:%[0-9]+]]:_(s32), [[UADDE71:%[0-9]+]]:_(s1) = G_UADDE [[UADDO36]], [[UMULH18]], [[UADDO37]]
+    ; GFX10-NEXT: [[UADDE72:%[0-9]+]]:_(s32), [[UADDE73:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE71]]
+    ; GFX10-NEXT: [[UADDE74:%[0-9]+]]:_(s32), [[UADDE75:%[0-9]+]]:_(s1) = G_UADDE [[UADDE70]], [[UADDE68]], [[UADDO37]]
+    ; GFX10-NEXT: [[UADDE76:%[0-9]+]]:_(s32), [[UADDE77:%[0-9]+]]:_(s1) = G_UADDE [[UADDE72]], [[C6]], [[UADDE75]]
+    ; GFX10-NEXT: [[UMULH19:%[0-9]+]]:_(s32) = G_UMULH [[UADDE64]], [[ADD13]]
+    ; GFX10-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[UMULH19]], [[UADDE76]]
+    ; GFX10-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UADDE74]]
+    ; GFX10-NEXT: [[UADDE78:%[0-9]+]]:_(s32), [[UADDE79:%[0-9]+]]:_(s1) = G_UADDE [[UADDE64]], [[ADD14]], [[UADDO39]]
     ; GFX10-NEXT: [[UV58:%[0-9]+]]:_(s32), [[UV59:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR4]](s64)
     ; GFX10-NEXT: [[UV60:%[0-9]+]]:_(s32), [[UV61:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR4]](s64)
-    ; GFX10-NEXT: [[MUL25:%[0-9]+]]:_(s32) = G_MUL [[UV61]], [[UADDO68]]
-    ; GFX10-NEXT: [[MUL26:%[0-9]+]]:_(s32) = G_MUL [[UV60]], [[UADDE18]]
-    ; GFX10-NEXT: [[UMULH20:%[0-9]+]]:_(s32) = G_UMULH [[UV60]], [[UADDO68]]
-    ; GFX10-NEXT: [[UADDO70:%[0-9]+]]:_(s32), [[UADDO71:%[0-9]+]]:_(s1) = G_UADDO [[MUL25]], [[MUL26]]
-    ; GFX10-NEXT: [[ZEXT25:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO71]](s1)
-    ; GFX10-NEXT: [[UADDO72:%[0-9]+]]:_(s32), [[UADDO73:%[0-9]+]]:_(s1) = G_UADDO [[UADDO70]], [[UMULH20]]
-    ; GFX10-NEXT: [[ZEXT26:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO73]](s1)
-    ; GFX10-NEXT: [[ADD30:%[0-9]+]]:_(s32) = G_ADD [[ZEXT25]], [[ZEXT26]]
-    ; GFX10-NEXT: [[MUL27:%[0-9]+]]:_(s32) = G_MUL [[UV61]], [[UADDE18]]
-    ; GFX10-NEXT: [[UMULH21:%[0-9]+]]:_(s32) = G_UMULH [[UV61]], [[UADDO68]]
-    ; GFX10-NEXT: [[UMULH22:%[0-9]+]]:_(s32) = G_UMULH [[UV60]], [[UADDE18]]
-    ; GFX10-NEXT: [[UADDO74:%[0-9]+]]:_(s32), [[UADDO75:%[0-9]+]]:_(s1) = G_UADDO [[MUL27]], [[UMULH21]]
-    ; GFX10-NEXT: [[ZEXT27:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO75]](s1)
-    ; GFX10-NEXT: [[UADDO76:%[0-9]+]]:_(s32), [[UADDO77:%[0-9]+]]:_(s1) = G_UADDO [[UADDO74]], [[UMULH22]]
-    ; GFX10-NEXT: [[ZEXT28:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO77]](s1)
-    ; GFX10-NEXT: [[ADD31:%[0-9]+]]:_(s32) = G_ADD [[ZEXT27]], [[ZEXT28]]
-    ; GFX10-NEXT: [[UADDO78:%[0-9]+]]:_(s32), [[UADDO79:%[0-9]+]]:_(s1) = G_UADDO [[UADDO76]], [[ADD30]]
-    ; GFX10-NEXT: [[ZEXT29:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO79]](s1)
-    ; GFX10-NEXT: [[ADD32:%[0-9]+]]:_(s32) = G_ADD [[ADD31]], [[ZEXT29]]
-    ; GFX10-NEXT: [[UMULH23:%[0-9]+]]:_(s32) = G_UMULH [[UV61]], [[UADDE18]]
-    ; GFX10-NEXT: [[ADD33:%[0-9]+]]:_(s32) = G_ADD [[UMULH23]], [[ADD32]]
-    ; GFX10-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO78]](s32), [[ADD33]](s32)
+    ; GFX10-NEXT: [[MUL25:%[0-9]+]]:_(s32) = G_MUL [[UV61]], [[UADDO38]]
+    ; GFX10-NEXT: [[MUL26:%[0-9]+]]:_(s32) = G_MUL [[UV60]], [[UADDE78]]
+    ; GFX10-NEXT: [[UMULH20:%[0-9]+]]:_(s32) = G_UMULH [[UV60]], [[UADDO38]]
+    ; GFX10-NEXT: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[MUL25]], [[MUL26]]
+    ; GFX10-NEXT: [[UADDE80:%[0-9]+]]:_(s32), [[UADDE81:%[0-9]+]]:_(s1) = G_UADDE [[UADDO40]], [[UMULH20]], [[UADDO41]]
+    ; GFX10-NEXT: [[UADDE82:%[0-9]+]]:_(s32), [[UADDE83:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE81]]
+    ; GFX10-NEXT: [[MUL27:%[0-9]+]]:_(s32) = G_MUL [[UV61]], [[UADDE78]]
+    ; GFX10-NEXT: [[UMULH21:%[0-9]+]]:_(s32) = G_UMULH [[UV61]], [[UADDO38]]
+    ; GFX10-NEXT: [[UMULH22:%[0-9]+]]:_(s32) = G_UMULH [[UV60]], [[UADDE78]]
+    ; GFX10-NEXT: [[UADDO42:%[0-9]+]]:_(s32), [[UADDO43:%[0-9]+]]:_(s1) = G_UADDO [[MUL27]], [[UMULH21]]
+    ; GFX10-NEXT: [[UADDE84:%[0-9]+]]:_(s32), [[UADDE85:%[0-9]+]]:_(s1) = G_UADDE [[UADDO42]], [[UMULH22]], [[UADDO43]]
+    ; GFX10-NEXT: [[UADDE86:%[0-9]+]]:_(s32), [[UADDE87:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE85]]
+    ; GFX10-NEXT: [[UADDE88:%[0-9]+]]:_(s32), [[UADDE89:%[0-9]+]]:_(s1) = G_UADDE [[UADDE84]], [[UADDE82]], [[UADDO43]]
+    ; GFX10-NEXT: [[UADDE90:%[0-9]+]]:_(s32), [[UADDE91:%[0-9]+]]:_(s1) = G_UADDE [[UADDE86]], [[C6]], [[UADDE89]]
+    ; GFX10-NEXT: [[UMULH23:%[0-9]+]]:_(s32) = G_UMULH [[UV61]], [[UADDE78]]
+    ; GFX10-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH23]], [[UADDE90]]
+    ; GFX10-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDE88]](s32), [[ADD15]](s32)
     ; GFX10-NEXT: [[UV62:%[0-9]+]]:_(s32), [[UV63:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR5]](s64)
-    ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV62]](s32), [[UADDO78]], [[C5]]
+    ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV62]](s32), [[UADDE88]], [[C5]]
     ; GFX10-NEXT: [[UV64:%[0-9]+]]:_(s32), [[UV65:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](s64)
-    ; GFX10-NEXT: [[MUL28:%[0-9]+]]:_(s32) = G_MUL [[UV62]], [[ADD33]]
-    ; GFX10-NEXT: [[ADD34:%[0-9]+]]:_(s32) = G_ADD [[UV65]], [[MUL28]]
-    ; GFX10-NEXT: [[MUL29:%[0-9]+]]:_(s32) = G_MUL [[UV63]], [[UADDO78]]
-    ; GFX10-NEXT: [[ADD35:%[0-9]+]]:_(s32) = G_ADD [[ADD34]], [[MUL29]]
+    ; GFX10-NEXT: [[MUL28:%[0-9]+]]:_(s32) = G_MUL [[UV62]], [[ADD15]]
+    ; GFX10-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[UV65]], [[MUL28]]
+    ; GFX10-NEXT: [[MUL29:%[0-9]+]]:_(s32) = G_MUL [[UV63]], [[UADDE88]]
+    ; GFX10-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[MUL29]]
     ; GFX10-NEXT: [[USUBO10:%[0-9]+]]:_(s32), [[USUBO11:%[0-9]+]]:_(s1) = G_USUBO [[UV58]], [[UV64]]
-    ; GFX10-NEXT: [[USUBE12:%[0-9]+]]:_(s32), [[USUBE13:%[0-9]+]]:_(s1) = G_USUBE [[UV59]], [[ADD35]], [[USUBO11]]
-    ; GFX10-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[UV59]], [[ADD35]]
+    ; GFX10-NEXT: [[USUBE12:%[0-9]+]]:_(s32), [[USUBE13:%[0-9]+]]:_(s1) = G_USUBE [[UV59]], [[ADD17]], [[USUBO11]]
+    ; GFX10-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[UV59]], [[ADD17]]
     ; GFX10-NEXT: [[UV66:%[0-9]+]]:_(s32), [[UV67:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR5]](s64)
     ; GFX10-NEXT: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE12]](s32), [[UV67]]
     ; GFX10-NEXT: [[SEXT4:%[0-9]+]]:_(s32) = G_SEXT [[ICMP8]](s1)
@@ -2469,9 +2301,9 @@ body: |
     ; GFX10-NEXT: [[USUBE14:%[0-9]+]]:_(s32), [[USUBE15:%[0-9]+]]:_(s1) = G_USUBE [[SUB1]], [[UV67]], [[USUBO11]]
     ; GFX10-NEXT: [[USUBE16:%[0-9]+]]:_(s32), [[USUBE17:%[0-9]+]]:_(s1) = G_USUBE [[USUBE14]], [[C6]], [[USUBO13]]
     ; GFX10-NEXT: [[UV68:%[0-9]+]]:_(s32), [[UV69:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64)
-    ; GFX10-NEXT: [[UADDO80:%[0-9]+]]:_(s32), [[UADDO81:%[0-9]+]]:_(s1) = G_UADDO [[UADDO78]], [[UV68]]
-    ; GFX10-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[ADD33]], [[UV69]], [[UADDO81]]
-    ; GFX10-NEXT: [[MV9:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO80]](s32), [[UADDE20]](s32)
+    ; GFX10-NEXT: [[UADDO44:%[0-9]+]]:_(s32), [[UADDO45:%[0-9]+]]:_(s1) = G_UADDO [[UADDE88]], [[UV68]]
+    ; GFX10-NEXT: [[UADDE92:%[0-9]+]]:_(s32), [[UADDE93:%[0-9]+]]:_(s1) = G_UADDE [[ADD15]], [[UV69]], [[UADDO45]]
+    ; GFX10-NEXT: [[MV9:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO44]](s32), [[UADDE92]](s32)
     ; GFX10-NEXT: [[ICMP11:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE16]](s32), [[UV67]]
     ; GFX10-NEXT: [[SEXT6:%[0-9]+]]:_(s32) = G_SEXT [[ICMP11]](s1)
     ; GFX10-NEXT: [[ICMP12:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO12]](s32), [[UV66]]
@@ -2479,9 +2311,9 @@ body: |
     ; GFX10-NEXT: [[ICMP13:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE16]](s32), [[UV67]]
     ; GFX10-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP13]](s1), [[SEXT7]], [[SEXT6]]
     ; GFX10-NEXT: [[UV70:%[0-9]+]]:_(s32), [[UV71:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64)
-    ; GFX10-NEXT: [[UADDO82:%[0-9]+]]:_(s32), [[UADDO83:%[0-9]+]]:_(s1) = G_UADDO [[UADDO80]], [[UV70]]
-    ; GFX10-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDE20]], [[UV71]], [[UADDO83]]
-    ; GFX10-NEXT: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO82]](s32), [[UADDE22]](s32)
+    ; GFX10-NEXT: [[UADDO46:%[0-9]+]]:_(s32), [[UADDO47:%[0-9]+]]:_(s1) = G_UADDO [[UADDO44]], [[UV70]]
+    ; GFX10-NEXT: [[UADDE94:%[0-9]+]]:_(s32), [[UADDE95:%[0-9]+]]:_(s1) = G_UADDE [[UADDE92]], [[UV71]], [[UADDO47]]
+    ; GFX10-NEXT: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO46]](s32), [[UADDE94]](s32)
     ; GFX10-NEXT: [[ICMP14:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT5]](s32), [[C6]]
     ; GFX10-NEXT: [[SELECT6:%[0-9]+]]:_(s64) = G_SELECT [[ICMP14]](s1), [[MV10]], [[MV9]]
     ; GFX10-NEXT: [[ICMP15:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT4]](s32), [[C6]]
@@ -2547,6 +2379,7 @@ body: |
     ; GFX6-NEXT: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT2]], [[XOR2]]
     ; GFX6-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]]
     ; GFX6-NEXT: $vgpr0 = COPY [[SUB3]](s32)
+    ;
     ; GFX8-LABEL: name: test_sdiv_s16
     ; GFX8: liveins: $vgpr0, $vgpr1
     ; GFX8-NEXT: {{  $}}
@@ -2587,6 +2420,7 @@ body: |
     ; GFX8-NEXT: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT2]], [[XOR2]]
     ; GFX8-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]]
     ; GFX8-NEXT: $vgpr0 = COPY [[SUB3]](s32)
+    ;
     ; GFX9-LABEL: name: test_sdiv_s16
     ; GFX9: liveins: $vgpr0, $vgpr1
     ; GFX9-NEXT: {{  $}}
@@ -2627,6 +2461,7 @@ body: |
     ; GFX9-NEXT: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT2]], [[XOR2]]
     ; GFX9-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]]
     ; GFX9-NEXT: $vgpr0 = COPY [[SUB3]](s32)
+    ;
     ; GFX10-LABEL: name: test_sdiv_s16
     ; GFX10: liveins: $vgpr0, $vgpr1
     ; GFX10-NEXT: {{  $}}
@@ -2763,6 +2598,7 @@ body: |
     ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
     ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
     ; GFX6-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>)
+    ;
     ; GFX8-LABEL: name: test_sdiv_v2s16
     ; GFX8: liveins: $vgpr0, $vgpr1
     ; GFX8-NEXT: {{  $}}
@@ -2844,6 +2680,7 @@ body: |
     ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
     ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
     ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>)
+    ;
     ; GFX9-LABEL: name: test_sdiv_v2s16
     ; GFX9: liveins: $vgpr0, $vgpr1
     ; GFX9-NEXT: {{  $}}
@@ -2922,6 +2759,7 @@ body: |
     ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SUB7]](s32)
     ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
     ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>)
+    ;
     ; GFX10-LABEL: name: test_sdiv_v2s16
     ; GFX10: liveins: $vgpr0, $vgpr1
     ; GFX10-NEXT: {{  $}}
@@ -3052,6 +2890,7 @@ body: |
     ; GFX6-NEXT: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT2]], [[XOR2]]
     ; GFX6-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]]
     ; GFX6-NEXT: $vgpr0 = COPY [[SUB3]](s32)
+    ;
     ; GFX8-LABEL: name: test_sdiv_s7
     ; GFX8: liveins: $vgpr0, $vgpr1
     ; GFX8-NEXT: {{  $}}
@@ -3092,6 +2931,7 @@ body: |
     ; GFX8-NEXT: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT2]], [[XOR2]]
     ; GFX8-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]]
     ; GFX8-NEXT: $vgpr0 = COPY [[SUB3]](s32)
+    ;
     ; GFX9-LABEL: name: test_sdiv_s7
     ; GFX9: liveins: $vgpr0, $vgpr1
     ; GFX9-NEXT: {{  $}}
@@ -3132,6 +2972,7 @@ body: |
     ; GFX9-NEXT: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT2]], [[XOR2]]
     ; GFX9-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]]
     ; GFX9-NEXT: $vgpr0 = COPY [[SUB3]](s32)
+    ;
     ; GFX10-LABEL: name: test_sdiv_s7
     ; GFX10: liveins: $vgpr0, $vgpr1
     ; GFX10-NEXT: {{  $}}
@@ -3227,6 +3068,7 @@ body: |
     ; GFX6-NEXT: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT2]], [[XOR2]]
     ; GFX6-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]]
     ; GFX6-NEXT: $vgpr0 = COPY [[SUB3]](s32)
+    ;
     ; GFX8-LABEL: name: test_sdiv_s17
     ; GFX8: liveins: $vgpr0, $vgpr1
     ; GFX8-NEXT: {{  $}}
@@ -3267,6 +3109,7 @@ body: |
     ; GFX8-NEXT: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT2]], [[XOR2]]
     ; GFX8-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]]
     ; GFX8-NEXT: $vgpr0 = COPY [[SUB3]](s32)
+    ;
     ; GFX9-LABEL: name: test_sdiv_s17
     ; GFX9: liveins: $vgpr0, $vgpr1
     ; GFX9-NEXT: {{  $}}
@@ -3307,6 +3150,7 @@ body: |
     ; GFX9-NEXT: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT2]], [[XOR2]]
     ; GFX9-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]]
     ; GFX9-NEXT: $vgpr0 = COPY [[SUB3]](s32)
+    ;
     ; GFX10-LABEL: name: test_sdiv_s17
     ; GFX10: liveins: $vgpr0, $vgpr1
     ; GFX10-NEXT: {{  $}}
@@ -3407,6 +3251,7 @@ body: |
     ; GFX6-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV10]], [[UV12]]
     ; GFX6-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV11]], [[UV13]], [[USUBO1]]
     ; GFX6-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]]
+    ; GFX6-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
     ; GFX6-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]]
     ; GFX6-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]]
     ; GFX6-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]]
@@ -3416,89 +3261,73 @@ body: |
     ; GFX6-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]]
     ; GFX6-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]]
     ; GFX6-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]]
-    ; GFX6-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1)
-    ; GFX6-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH1]]
-    ; GFX6-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1)
-    ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]]
+    ; GFX6-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDO4]], [[UMULH1]], [[UADDO5]]
+    ; GFX6-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE5]]
     ; GFX6-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]]
     ; GFX6-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]]
     ; GFX6-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]]
-    ; GFX6-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]]
-    ; GFX6-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1)
-    ; GFX6-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH3]]
-    ; GFX6-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1)
-    ; GFX6-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]]
-    ; GFX6-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD2]]
-    ; GFX6-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1)
-    ; GFX6-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]]
+    ; GFX6-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]]
+    ; GFX6-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDO6]], [[UMULH3]], [[UADDO7]]
+    ; GFX6-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE9]]
+    ; GFX6-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[UADDE8]], [[UADDE6]], [[UADDO7]]
+    ; GFX6-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDE10]], [[C6]], [[UADDE13]]
     ; GFX6-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]]
-    ; GFX6-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]]
-    ; GFX6-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO12]]
-    ; GFX6-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO15]]
-    ; GFX6-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO14]]
-    ; GFX6-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO14]]
-    ; GFX6-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE4]]
-    ; GFX6-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO14]]
-    ; GFX6-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]]
-    ; GFX6-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[UMULH5]]
-    ; GFX6-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[MUL6]]
-    ; GFX6-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[ADD7]]
-    ; GFX6-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[MUL6]]
-    ; GFX6-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]]
-    ; GFX6-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1)
-    ; GFX6-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]]
-    ; GFX6-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1)
-    ; GFX6-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]]
-    ; GFX6-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[ADD7]]
-    ; GFX6-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[MUL6]]
-    ; GFX6-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[ADD7]]
-    ; GFX6-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]]
-    ; GFX6-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1)
-    ; GFX6-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH8]]
-    ; GFX6-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO23]](s1)
-    ; GFX6-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]]
-    ; GFX6-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD8]]
-    ; GFX6-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1)
-    ; GFX6-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT9]]
-    ; GFX6-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[ADD7]]
-    ; GFX6-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD10]]
-    ; GFX6-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-    ; GFX6-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UADDO24]]
-    ; GFX6-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[ADD11]], [[UADDO27]]
+    ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[UADDE14]]
+    ; GFX6-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDE12]]
+    ; GFX6-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD2]], [[UADDO9]]
+    ; GFX6-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO8]]
+    ; GFX6-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO8]]
+    ; GFX6-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE16]]
+    ; GFX6-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO8]]
+    ; GFX6-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]]
+    ; GFX6-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[UMULH5]]
+    ; GFX6-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[MUL6]]
+    ; GFX6-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO8]], [[ADD4]]
+    ; GFX6-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO8]], [[MUL6]]
+    ; GFX6-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]]
+    ; GFX6-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDO10]], [[UMULH6]], [[UADDO11]]
+    ; GFX6-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE19]]
+    ; GFX6-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[ADD4]]
+    ; GFX6-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[MUL6]]
+    ; GFX6-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO8]], [[ADD4]]
+    ; GFX6-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]]
+    ; GFX6-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDO12]], [[UMULH8]], [[UADDO13]]
+    ; GFX6-NEXT: [[UADDE24:%[0-9]+]]:_(s32), [[UADDE25:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE23]]
+    ; GFX6-NEXT: [[UADDE26:%[0-9]+]]:_(s32), [[UADDE27:%[0-9]+]]:_(s1) = G_UADDE [[UADDE22]], [[UADDE20]], [[UADDO13]]
+    ; GFX6-NEXT: [[UADDE28:%[0-9]+]]:_(s32), [[UADDE29:%[0-9]+]]:_(s1) = G_UADDE [[UADDE24]], [[C6]], [[UADDE27]]
+    ; GFX6-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[ADD4]]
+    ; GFX6-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[UADDE28]]
+    ; GFX6-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UADDE26]]
+    ; GFX6-NEXT: [[UADDE30:%[0-9]+]]:_(s32), [[UADDE31:%[0-9]+]]:_(s1) = G_UADDE [[UADDE16]], [[ADD5]], [[UADDO15]]
     ; GFX6-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64)
     ; GFX6-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64)
-    ; GFX6-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDO26]]
-    ; GFX6-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UADDE6]]
-    ; GFX6-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDO26]]
-    ; GFX6-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]]
-    ; GFX6-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1)
-    ; GFX6-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]]
-    ; GFX6-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1)
-    ; GFX6-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]]
-    ; GFX6-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDE6]]
-    ; GFX6-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDO26]]
-    ; GFX6-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDE6]]
-    ; GFX6-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]]
-    ; GFX6-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1)
-    ; GFX6-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH12]]
-    ; GFX6-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1)
-    ; GFX6-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]]
-    ; GFX6-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD12]]
-    ; GFX6-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1)
-    ; GFX6-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT14]]
-    ; GFX6-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDE6]]
-    ; GFX6-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD14]]
-    ; GFX6-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[ADD15]](s32)
+    ; GFX6-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDO14]]
+    ; GFX6-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UADDE30]]
+    ; GFX6-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDO14]]
+    ; GFX6-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]]
+    ; GFX6-NEXT: [[UADDE32:%[0-9]+]]:_(s32), [[UADDE33:%[0-9]+]]:_(s1) = G_UADDE [[UADDO16]], [[UMULH10]], [[UADDO17]]
+    ; GFX6-NEXT: [[UADDE34:%[0-9]+]]:_(s32), [[UADDE35:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE33]]
+    ; GFX6-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDE30]]
+    ; GFX6-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDO14]]
+    ; GFX6-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDE30]]
+    ; GFX6-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]]
+    ; GFX6-NEXT: [[UADDE36:%[0-9]+]]:_(s32), [[UADDE37:%[0-9]+]]:_(s1) = G_UADDE [[UADDO18]], [[UMULH12]], [[UADDO19]]
+    ; GFX6-NEXT: [[UADDE38:%[0-9]+]]:_(s32), [[UADDE39:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE37]]
+    ; GFX6-NEXT: [[UADDE40:%[0-9]+]]:_(s32), [[UADDE41:%[0-9]+]]:_(s1) = G_UADDE [[UADDE36]], [[UADDE34]], [[UADDO19]]
+    ; GFX6-NEXT: [[UADDE42:%[0-9]+]]:_(s32), [[UADDE43:%[0-9]+]]:_(s1) = G_UADDE [[UADDE38]], [[C6]], [[UADDE41]]
+    ; GFX6-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDE30]]
+    ; GFX6-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[UADDE42]]
+    ; GFX6-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDE40]](s32), [[ADD6]](s32)
     ; GFX6-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64)
-    ; GFX6-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV18]], [[UADDO36]]
-    ; GFX6-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV19]], [[UADDO36]]
-    ; GFX6-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV18]], [[ADD15]]
-    ; GFX6-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV18]], [[UADDO36]]
-    ; GFX6-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]]
-    ; GFX6-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[UMULH14]]
+    ; GFX6-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV18]], [[UADDE40]]
+    ; GFX6-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV19]], [[UADDE40]]
+    ; GFX6-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV18]], [[ADD6]]
+    ; GFX6-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV18]], [[UADDE40]]
+    ; GFX6-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]]
+    ; GFX6-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ADD7]], [[UMULH14]]
     ; GFX6-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV14]], [[MUL15]]
-    ; GFX6-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV15]], [[ADD17]], [[USUBO3]]
-    ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV15]], [[ADD17]]
+    ; GFX6-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV15]], [[ADD8]], [[USUBO3]]
+    ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV15]], [[ADD8]]
     ; GFX6-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64)
     ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV21]]
     ; GFX6-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1)
@@ -3511,9 +3340,9 @@ body: |
     ; GFX6-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]]
     ; GFX6-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
     ; GFX6-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64)
-    ; GFX6-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[UADDO36]], [[UV22]]
-    ; GFX6-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[ADD15]], [[UV23]], [[UADDO39]]
-    ; GFX6-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO38]](s32), [[UADDE8]](s32)
+    ; GFX6-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDE40]], [[UV22]]
+    ; GFX6-NEXT: [[UADDE44:%[0-9]+]]:_(s32), [[UADDE45:%[0-9]+]]:_(s1) = G_UADDE [[ADD6]], [[UV23]], [[UADDO21]]
+    ; GFX6-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO20]](s32), [[UADDE44]](s32)
     ; GFX6-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV21]]
     ; GFX6-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1)
     ; GFX6-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV20]]
@@ -3521,9 +3350,9 @@ body: |
     ; GFX6-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV21]]
     ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]]
     ; GFX6-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64)
-    ; GFX6-NEXT: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UADDO38]], [[UV24]]
-    ; GFX6-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE8]], [[UV25]], [[UADDO41]]
-    ; GFX6-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO40]](s32), [[UADDE10]](s32)
+    ; GFX6-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UV24]]
+    ; GFX6-NEXT: [[UADDE46:%[0-9]+]]:_(s32), [[UADDE47:%[0-9]+]]:_(s1) = G_UADDE [[UADDE44]], [[UV25]], [[UADDO23]]
+    ; GFX6-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO22]](s32), [[UADDE46]](s32)
     ; GFX6-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C6]]
     ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV4]], [[MV3]]
     ; GFX6-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C6]]
@@ -3536,6 +3365,7 @@ body: |
     ; GFX6-NEXT: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[UV27]], [[UV29]], [[USUBO7]]
     ; GFX6-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO6]](s32), [[USUBE8]](s32)
     ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[MV5]](s64)
+    ;
     ; GFX8-LABEL: name: test_sdiv_s33
     ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
     ; GFX8-NEXT: {{  $}}
@@ -3586,89 +3416,74 @@ body: |
     ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI1]], [[ANYEXT]]
     ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[FPTOUI]], [[AMDGPU_MAD_U64_U32_2]]
     ; GFX8-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64)
+    ; GFX8-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
     ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV14]]
     ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[UV16]]
     ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV14]]
     ; GFX8-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]]
-    ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1)
-    ; GFX8-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH]]
-    ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1)
-    ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]]
+    ; GFX8-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDO4]], [[UMULH]], [[UADDO5]]
+    ; GFX8-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE5]]
     ; GFX8-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV16]]
     ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV14]]
     ; GFX8-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV16]]
-    ; GFX8-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]]
-    ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1)
-    ; GFX8-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH2]]
-    ; GFX8-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1)
-    ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]]
-    ; GFX8-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD]]
-    ; GFX8-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1)
-    ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]]
+    ; GFX8-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]]
+    ; GFX8-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDO6]], [[UMULH2]], [[UADDO7]]
+    ; GFX8-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE9]]
+    ; GFX8-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[UADDE8]], [[UADDE6]], [[UADDO7]]
+    ; GFX8-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDE10]], [[C6]], [[UADDE13]]
     ; GFX8-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV16]]
-    ; GFX8-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]]
-    ; GFX8-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO12]]
-    ; GFX8-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD3]], [[UADDO15]]
-    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO14]], [[C5]]
+    ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[UADDE14]]
+    ; GFX8-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDE12]]
+    ; GFX8-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD]], [[UADDO9]]
+    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO8]], [[C5]]
     ; GFX8-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](s64)
     ; GFX8-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[UV19]](s32)
-    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE4]], [[ANYEXT1]]
-    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO14]], [[AMDGPU_MAD_U64_U32_8]]
+    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE16]], [[ANYEXT1]]
+    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO8]], [[AMDGPU_MAD_U64_U32_8]]
     ; GFX8-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](s64)
-    ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[UV18]]
-    ; GFX8-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[UV20]]
-    ; GFX8-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[UV18]]
-    ; GFX8-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]]
-    ; GFX8-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1)
-    ; GFX8-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH4]]
-    ; GFX8-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1)
-    ; GFX8-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]]
-    ; GFX8-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[UV20]]
-    ; GFX8-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[UV18]]
-    ; GFX8-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[UV20]]
-    ; GFX8-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]]
-    ; GFX8-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1)
-    ; GFX8-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH6]]
-    ; GFX8-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO23]](s1)
-    ; GFX8-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]]
-    ; GFX8-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD4]]
-    ; GFX8-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1)
-    ; GFX8-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ADD5]], [[ZEXT9]]
-    ; GFX8-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[UV20]]
-    ; GFX8-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD6]]
-    ; GFX8-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-    ; GFX8-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UADDO24]]
-    ; GFX8-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[ADD7]], [[UADDO27]]
+    ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[UV18]]
+    ; GFX8-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO8]], [[UV20]]
+    ; GFX8-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO8]], [[UV18]]
+    ; GFX8-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]]
+    ; GFX8-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDO10]], [[UMULH4]], [[UADDO11]]
+    ; GFX8-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE19]]
+    ; GFX8-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[UV20]]
+    ; GFX8-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[UV18]]
+    ; GFX8-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO8]], [[UV20]]
+    ; GFX8-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]]
+    ; GFX8-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDO12]], [[UMULH6]], [[UADDO13]]
+    ; GFX8-NEXT: [[UADDE24:%[0-9]+]]:_(s32), [[UADDE25:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE23]]
+    ; GFX8-NEXT: [[UADDE26:%[0-9]+]]:_(s32), [[UADDE27:%[0-9]+]]:_(s1) = G_UADDE [[UADDE22]], [[UADDE20]], [[UADDO13]]
+    ; GFX8-NEXT: [[UADDE28:%[0-9]+]]:_(s32), [[UADDE29:%[0-9]+]]:_(s1) = G_UADDE [[UADDE24]], [[C6]], [[UADDE27]]
+    ; GFX8-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[UV20]]
+    ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[UADDE28]]
+    ; GFX8-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UADDE26]]
+    ; GFX8-NEXT: [[UADDE30:%[0-9]+]]:_(s32), [[UADDE31:%[0-9]+]]:_(s1) = G_UADDE [[UADDE16]], [[ADD1]], [[UADDO15]]
     ; GFX8-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64)
     ; GFX8-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64)
-    ; GFX8-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV25]], [[UADDO26]]
-    ; GFX8-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV24]], [[UADDE6]]
-    ; GFX8-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV24]], [[UADDO26]]
-    ; GFX8-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]]
-    ; GFX8-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1)
-    ; GFX8-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH8]]
-    ; GFX8-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1)
-    ; GFX8-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]]
-    ; GFX8-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV25]], [[UADDE6]]
-    ; GFX8-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV25]], [[UADDO26]]
-    ; GFX8-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV24]], [[UADDE6]]
-    ; GFX8-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]]
-    ; GFX8-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1)
-    ; GFX8-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH10]]
-    ; GFX8-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1)
-    ; GFX8-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]]
-    ; GFX8-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD8]]
-    ; GFX8-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1)
-    ; GFX8-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT14]]
-    ; GFX8-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV25]], [[UADDE6]]
-    ; GFX8-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[ADD10]]
-    ; GFX8-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[ADD11]](s32)
+    ; GFX8-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV25]], [[UADDO14]]
+    ; GFX8-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV24]], [[UADDE30]]
+    ; GFX8-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV24]], [[UADDO14]]
+    ; GFX8-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]]
+    ; GFX8-NEXT: [[UADDE32:%[0-9]+]]:_(s32), [[UADDE33:%[0-9]+]]:_(s1) = G_UADDE [[UADDO16]], [[UMULH8]], [[UADDO17]]
+    ; GFX8-NEXT: [[UADDE34:%[0-9]+]]:_(s32), [[UADDE35:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE33]]
+    ; GFX8-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV25]], [[UADDE30]]
+    ; GFX8-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV25]], [[UADDO14]]
+    ; GFX8-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV24]], [[UADDE30]]
+    ; GFX8-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]]
+    ; GFX8-NEXT: [[UADDE36:%[0-9]+]]:_(s32), [[UADDE37:%[0-9]+]]:_(s1) = G_UADDE [[UADDO18]], [[UMULH10]], [[UADDO19]]
+    ; GFX8-NEXT: [[UADDE38:%[0-9]+]]:_(s32), [[UADDE39:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE37]]
+    ; GFX8-NEXT: [[UADDE40:%[0-9]+]]:_(s32), [[UADDE41:%[0-9]+]]:_(s1) = G_UADDE [[UADDE36]], [[UADDE34]], [[UADDO19]]
+    ; GFX8-NEXT: [[UADDE42:%[0-9]+]]:_(s32), [[UADDE43:%[0-9]+]]:_(s1) = G_UADDE [[UADDE38]], [[C6]], [[UADDE41]]
+    ; GFX8-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV25]], [[UADDE30]]
+    ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[UADDE42]]
+    ; GFX8-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDE40]](s32), [[ADD2]](s32)
     ; GFX8-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64)
-    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV26]](s32), [[UADDO36]], [[C5]]
+    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV26]](s32), [[UADDE40]], [[C5]]
     ; GFX8-NEXT: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_12]](s64)
     ; GFX8-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[UV29]](s32)
-    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV26]](s32), [[ADD11]], [[ANYEXT2]]
-    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV27]](s32), [[UADDO36]], [[AMDGPU_MAD_U64_U32_14]]
+    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV26]](s32), [[ADD2]], [[ANYEXT2]]
+    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV27]](s32), [[UADDE40]], [[AMDGPU_MAD_U64_U32_14]]
     ; GFX8-NEXT: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_16]](s64)
     ; GFX8-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV22]], [[UV28]]
     ; GFX8-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV23]], [[UV30]], [[USUBO3]]
@@ -3685,9 +3500,9 @@ body: |
     ; GFX8-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]]
     ; GFX8-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
     ; GFX8-NEXT: [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64)
-    ; GFX8-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[UADDO36]], [[UV34]]
-    ; GFX8-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[ADD11]], [[UV35]], [[UADDO39]]
-    ; GFX8-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO38]](s32), [[UADDE8]](s32)
+    ; GFX8-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDE40]], [[UV34]]
+    ; GFX8-NEXT: [[UADDE44:%[0-9]+]]:_(s32), [[UADDE45:%[0-9]+]]:_(s1) = G_UADDE [[ADD2]], [[UV35]], [[UADDO21]]
+    ; GFX8-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO20]](s32), [[UADDE44]](s32)
     ; GFX8-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV33]]
     ; GFX8-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1)
     ; GFX8-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV32]]
@@ -3695,9 +3510,9 @@ body: |
     ; GFX8-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV33]]
     ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]]
     ; GFX8-NEXT: [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64)
-    ; GFX8-NEXT: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UADDO38]], [[UV36]]
-    ; GFX8-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE8]], [[UV37]], [[UADDO41]]
-    ; GFX8-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO40]](s32), [[UADDE10]](s32)
+    ; GFX8-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UV36]]
+    ; GFX8-NEXT: [[UADDE46:%[0-9]+]]:_(s32), [[UADDE47:%[0-9]+]]:_(s1) = G_UADDE [[UADDE44]], [[UV37]], [[UADDO23]]
+    ; GFX8-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO22]](s32), [[UADDE46]](s32)
     ; GFX8-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C6]]
     ; GFX8-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV4]], [[MV3]]
     ; GFX8-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C6]]
@@ -3710,6 +3525,7 @@ body: |
     ; GFX8-NEXT: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[UV39]], [[UV41]], [[USUBO7]]
     ; GFX8-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO6]](s32), [[USUBE8]](s32)
     ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[MV5]](s64)
+    ;
     ; GFX9-LABEL: name: test_sdiv_s33
     ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
     ; GFX9-NEXT: {{  $}}
@@ -3760,89 +3576,74 @@ body: |
     ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI1]], [[ANYEXT]]
     ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[FPTOUI]], [[AMDGPU_MAD_U64_U32_2]]
     ; GFX9-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64)
+    ; GFX9-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
     ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV14]]
     ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[UV16]]
     ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV14]]
     ; GFX9-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]]
-    ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1)
-    ; GFX9-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH]]
-    ; GFX9-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1)
-    ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]]
+    ; GFX9-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDO4]], [[UMULH]], [[UADDO5]]
+    ; GFX9-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE5]]
     ; GFX9-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV16]]
     ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV14]]
     ; GFX9-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV16]]
-    ; GFX9-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]]
-    ; GFX9-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1)
-    ; GFX9-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH2]]
-    ; GFX9-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1)
-    ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]]
-    ; GFX9-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD]]
-    ; GFX9-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1)
-    ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]]
+    ; GFX9-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]]
+    ; GFX9-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDO6]], [[UMULH2]], [[UADDO7]]
+    ; GFX9-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE9]]
+    ; GFX9-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[UADDE8]], [[UADDE6]], [[UADDO7]]
+    ; GFX9-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDE10]], [[C6]], [[UADDE13]]
     ; GFX9-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV16]]
-    ; GFX9-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]]
-    ; GFX9-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO12]]
-    ; GFX9-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD3]], [[UADDO15]]
-    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO14]], [[C5]]
+    ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[UADDE14]]
+    ; GFX9-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDE12]]
+    ; GFX9-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD]], [[UADDO9]]
+    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO8]], [[C5]]
     ; GFX9-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](s64)
     ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[UV19]](s32)
-    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE4]], [[ANYEXT1]]
-    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO14]], [[AMDGPU_MAD_U64_U32_8]]
+    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE16]], [[ANYEXT1]]
+    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO8]], [[AMDGPU_MAD_U64_U32_8]]
     ; GFX9-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](s64)
-    ; GFX9-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[UV18]]
-    ; GFX9-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[UV20]]
-    ; GFX9-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[UV18]]
-    ; GFX9-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]]
-    ; GFX9-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1)
-    ; GFX9-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH4]]
-    ; GFX9-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1)
-    ; GFX9-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]]
-    ; GFX9-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[UV20]]
-    ; GFX9-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[UV18]]
-    ; GFX9-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[UV20]]
-    ; GFX9-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]]
-    ; GFX9-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1)
-    ; GFX9-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH6]]
-    ; GFX9-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO23]](s1)
-    ; GFX9-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]]
-    ; GFX9-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD4]]
-    ; GFX9-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1)
-    ; GFX9-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ADD5]], [[ZEXT9]]
-    ; GFX9-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[UV20]]
-    ; GFX9-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD6]]
-    ; GFX9-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-    ; GFX9-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UADDO24]]
-    ; GFX9-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[ADD7]], [[UADDO27]]
+    ; GFX9-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[UV18]]
+    ; GFX9-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO8]], [[UV20]]
+    ; GFX9-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO8]], [[UV18]]
+    ; GFX9-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]]
+    ; GFX9-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDO10]], [[UMULH4]], [[UADDO11]]
+    ; GFX9-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE19]]
+    ; GFX9-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[UV20]]
+    ; GFX9-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[UV18]]
+    ; GFX9-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO8]], [[UV20]]
+    ; GFX9-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]]
+    ; GFX9-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDO12]], [[UMULH6]], [[UADDO13]]
+    ; GFX9-NEXT: [[UADDE24:%[0-9]+]]:_(s32), [[UADDE25:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE23]]
+    ; GFX9-NEXT: [[UADDE26:%[0-9]+]]:_(s32), [[UADDE27:%[0-9]+]]:_(s1) = G_UADDE [[UADDE22]], [[UADDE20]], [[UADDO13]]
+    ; GFX9-NEXT: [[UADDE28:%[0-9]+]]:_(s32), [[UADDE29:%[0-9]+]]:_(s1) = G_UADDE [[UADDE24]], [[C6]], [[UADDE27]]
+    ; GFX9-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[UV20]]
+    ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[UADDE28]]
+    ; GFX9-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UADDE26]]
+    ; GFX9-NEXT: [[UADDE30:%[0-9]+]]:_(s32), [[UADDE31:%[0-9]+]]:_(s1) = G_UADDE [[UADDE16]], [[ADD1]], [[UADDO15]]
     ; GFX9-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64)
     ; GFX9-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64)
-    ; GFX9-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV25]], [[UADDO26]]
-    ; GFX9-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV24]], [[UADDE6]]
-    ; GFX9-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV24]], [[UADDO26]]
-    ; GFX9-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]]
-    ; GFX9-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1)
-    ; GFX9-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH8]]
-    ; GFX9-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1)
-    ; GFX9-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]]
-    ; GFX9-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV25]], [[UADDE6]]
-    ; GFX9-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV25]], [[UADDO26]]
-    ; GFX9-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV24]], [[UADDE6]]
-    ; GFX9-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]]
-    ; GFX9-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1)
-    ; GFX9-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH10]]
-    ; GFX9-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1)
-    ; GFX9-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]]
-    ; GFX9-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD8]]
-    ; GFX9-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1)
-    ; GFX9-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT14]]
-    ; GFX9-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV25]], [[UADDE6]]
-    ; GFX9-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[ADD10]]
-    ; GFX9-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[ADD11]](s32)
+    ; GFX9-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV25]], [[UADDO14]]
+    ; GFX9-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV24]], [[UADDE30]]
+    ; GFX9-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV24]], [[UADDO14]]
+    ; GFX9-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]]
+    ; GFX9-NEXT: [[UADDE32:%[0-9]+]]:_(s32), [[UADDE33:%[0-9]+]]:_(s1) = G_UADDE [[UADDO16]], [[UMULH8]], [[UADDO17]]
+    ; GFX9-NEXT: [[UADDE34:%[0-9]+]]:_(s32), [[UADDE35:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE33]]
+    ; GFX9-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV25]], [[UADDE30]]
+    ; GFX9-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV25]], [[UADDO14]]
+    ; GFX9-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV24]], [[UADDE30]]
+    ; GFX9-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]]
+    ; GFX9-NEXT: [[UADDE36:%[0-9]+]]:_(s32), [[UADDE37:%[0-9]+]]:_(s1) = G_UADDE [[UADDO18]], [[UMULH10]], [[UADDO19]]
+    ; GFX9-NEXT: [[UADDE38:%[0-9]+]]:_(s32), [[UADDE39:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE37]]
+    ; GFX9-NEXT: [[UADDE40:%[0-9]+]]:_(s32), [[UADDE41:%[0-9]+]]:_(s1) = G_UADDE [[UADDE36]], [[UADDE34]], [[UADDO19]]
+    ; GFX9-NEXT: [[UADDE42:%[0-9]+]]:_(s32), [[UADDE43:%[0-9]+]]:_(s1) = G_UADDE [[UADDE38]], [[C6]], [[UADDE41]]
+    ; GFX9-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV25]], [[UADDE30]]
+    ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[UADDE42]]
+    ; GFX9-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDE40]](s32), [[ADD2]](s32)
     ; GFX9-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64)
-    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV26]](s32), [[UADDO36]], [[C5]]
+    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV26]](s32), [[UADDE40]], [[C5]]
     ; GFX9-NEXT: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_12]](s64)
     ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[UV29]](s32)
-    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV26]](s32), [[ADD11]], [[ANYEXT2]]
-    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV27]](s32), [[UADDO36]], [[AMDGPU_MAD_U64_U32_14]]
+    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV26]](s32), [[ADD2]], [[ANYEXT2]]
+    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV27]](s32), [[UADDE40]], [[AMDGPU_MAD_U64_U32_14]]
     ; GFX9-NEXT: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_16]](s64)
     ; GFX9-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV22]], [[UV28]]
     ; GFX9-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV23]], [[UV30]], [[USUBO3]]
@@ -3859,9 +3660,9 @@ body: |
     ; GFX9-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]]
     ; GFX9-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
     ; GFX9-NEXT: [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64)
-    ; GFX9-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[UADDO36]], [[UV34]]
-    ; GFX9-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[ADD11]], [[UV35]], [[UADDO39]]
-    ; GFX9-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO38]](s32), [[UADDE8]](s32)
+    ; GFX9-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDE40]], [[UV34]]
+    ; GFX9-NEXT: [[UADDE44:%[0-9]+]]:_(s32), [[UADDE45:%[0-9]+]]:_(s1) = G_UADDE [[ADD2]], [[UV35]], [[UADDO21]]
+    ; GFX9-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO20]](s32), [[UADDE44]](s32)
     ; GFX9-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV33]]
     ; GFX9-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1)
     ; GFX9-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV32]]
@@ -3869,9 +3670,9 @@ body: |
     ; GFX9-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV33]]
     ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]]
     ; GFX9-NEXT: [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64)
-    ; GFX9-NEXT: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UADDO38]], [[UV36]]
-    ; GFX9-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE8]], [[UV37]], [[UADDO41]]
-    ; GFX9-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO40]](s32), [[UADDE10]](s32)
+    ; GFX9-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UV36]]
+    ; GFX9-NEXT: [[UADDE46:%[0-9]+]]:_(s32), [[UADDE47:%[0-9]+]]:_(s1) = G_UADDE [[UADDE44]], [[UV37]], [[UADDO23]]
+    ; GFX9-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO22]](s32), [[UADDE46]](s32)
     ; GFX9-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C6]]
     ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV4]], [[MV3]]
     ; GFX9-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C6]]
@@ -3884,6 +3685,7 @@ body: |
     ; GFX9-NEXT: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[UV39]], [[UV41]], [[USUBO7]]
     ; GFX9-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO6]](s32), [[USUBE8]](s32)
     ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[MV5]](s64)
+    ;
     ; GFX10-LABEL: name: test_sdiv_s33
     ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
     ; GFX10-NEXT: {{  $}}
@@ -3934,93 +3736,78 @@ body: |
     ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV15]], [[MUL]]
     ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]]
     ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[MUL1]]
+    ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
     ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV14]]
     ; GFX10-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]]
     ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV14]]
     ; GFX10-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[MUL3]]
-    ; GFX10-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1)
-    ; GFX10-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH]]
-    ; GFX10-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1)
-    ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]]
+    ; GFX10-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDO4]], [[UMULH]], [[UADDO5]]
+    ; GFX10-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE5]]
     ; GFX10-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]]
     ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV14]]
     ; GFX10-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]]
-    ; GFX10-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL4]], [[UMULH1]]
-    ; GFX10-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1)
-    ; GFX10-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH2]]
-    ; GFX10-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1)
-    ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]]
-    ; GFX10-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD2]]
-    ; GFX10-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1)
-    ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]]
+    ; GFX10-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[MUL4]], [[UMULH1]]
+    ; GFX10-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDO6]], [[UMULH2]], [[UADDO7]]
+    ; GFX10-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE9]]
+    ; GFX10-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[UADDE8]], [[UADDE6]], [[UADDO7]]
+    ; GFX10-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDE10]], [[C6]], [[UADDE13]]
     ; GFX10-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]]
-    ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD4]]
-    ; GFX10-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO12]]
-    ; GFX10-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO15]]
-    ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO14]], [[C5]]
+    ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[UADDE14]]
+    ; GFX10-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDE12]]
+    ; GFX10-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD2]], [[UADDO9]]
+    ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO8]], [[C5]]
     ; GFX10-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_2]](s64)
-    ; GFX10-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE4]]
-    ; GFX10-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UV17]], [[MUL5]]
-    ; GFX10-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO14]]
-    ; GFX10-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[MUL6]]
-    ; GFX10-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[UV16]]
-    ; GFX10-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[ADD7]]
-    ; GFX10-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[UV16]]
-    ; GFX10-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL7]], [[MUL8]]
-    ; GFX10-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1)
-    ; GFX10-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH4]]
-    ; GFX10-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1)
-    ; GFX10-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]]
-    ; GFX10-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[ADD7]]
-    ; GFX10-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[UV16]]
-    ; GFX10-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[ADD7]]
-    ; GFX10-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[UMULH5]]
-    ; GFX10-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1)
-    ; GFX10-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH6]]
-    ; GFX10-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO23]](s1)
-    ; GFX10-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]]
-    ; GFX10-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD8]]
-    ; GFX10-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1)
-    ; GFX10-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT9]]
-    ; GFX10-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[ADD7]]
-    ; GFX10-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD10]]
-    ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-    ; GFX10-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UADDO24]]
-    ; GFX10-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[ADD11]], [[UADDO27]]
+    ; GFX10-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE16]]
+    ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UV17]], [[MUL5]]
+    ; GFX10-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO8]]
+    ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[MUL6]]
+    ; GFX10-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[UV16]]
+    ; GFX10-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UADDO8]], [[ADD4]]
+    ; GFX10-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO8]], [[UV16]]
+    ; GFX10-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[MUL7]], [[MUL8]]
+    ; GFX10-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDO10]], [[UMULH4]], [[UADDO11]]
+    ; GFX10-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE19]]
+    ; GFX10-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[ADD4]]
+    ; GFX10-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[UV16]]
+    ; GFX10-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO8]], [[ADD4]]
+    ; GFX10-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[UMULH5]]
+    ; GFX10-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDO12]], [[UMULH6]], [[UADDO13]]
+    ; GFX10-NEXT: [[UADDE24:%[0-9]+]]:_(s32), [[UADDE25:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE23]]
+    ; GFX10-NEXT: [[UADDE26:%[0-9]+]]:_(s32), [[UADDE27:%[0-9]+]]:_(s1) = G_UADDE [[UADDE22]], [[UADDE20]], [[UADDO13]]
+    ; GFX10-NEXT: [[UADDE28:%[0-9]+]]:_(s32), [[UADDE29:%[0-9]+]]:_(s1) = G_UADDE [[UADDE24]], [[C6]], [[UADDE27]]
+    ; GFX10-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[ADD4]]
+    ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[UADDE28]]
+    ; GFX10-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UADDE26]]
+    ; GFX10-NEXT: [[UADDE30:%[0-9]+]]:_(s32), [[UADDE31:%[0-9]+]]:_(s1) = G_UADDE [[UADDE16]], [[ADD5]], [[UADDO15]]
     ; GFX10-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64)
     ; GFX10-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64)
-    ; GFX10-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDO26]]
-    ; GFX10-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UV20]], [[UADDE6]]
-    ; GFX10-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDO26]]
-    ; GFX10-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL10]], [[MUL11]]
-    ; GFX10-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1)
-    ; GFX10-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH8]]
-    ; GFX10-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1)
-    ; GFX10-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]]
-    ; GFX10-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDE6]]
-    ; GFX10-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDO26]]
-    ; GFX10-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDE6]]
-    ; GFX10-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[UMULH9]]
-    ; GFX10-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1)
-    ; GFX10-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH10]]
-    ; GFX10-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1)
-    ; GFX10-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]]
-    ; GFX10-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD12]]
-    ; GFX10-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1)
-    ; GFX10-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT14]]
-    ; GFX10-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDE6]]
-    ; GFX10-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[ADD14]]
-    ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[ADD15]](s32)
+    ; GFX10-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDO14]]
+    ; GFX10-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UV20]], [[UADDE30]]
+    ; GFX10-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDO14]]
+    ; GFX10-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL10]], [[MUL11]]
+    ; GFX10-NEXT: [[UADDE32:%[0-9]+]]:_(s32), [[UADDE33:%[0-9]+]]:_(s1) = G_UADDE [[UADDO16]], [[UMULH8]], [[UADDO17]]
+    ; GFX10-NEXT: [[UADDE34:%[0-9]+]]:_(s32), [[UADDE35:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE33]]
+    ; GFX10-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDE30]]
+    ; GFX10-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDO14]]
+    ; GFX10-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDE30]]
+    ; GFX10-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[UMULH9]]
+    ; GFX10-NEXT: [[UADDE36:%[0-9]+]]:_(s32), [[UADDE37:%[0-9]+]]:_(s1) = G_UADDE [[UADDO18]], [[UMULH10]], [[UADDO19]]
+    ; GFX10-NEXT: [[UADDE38:%[0-9]+]]:_(s32), [[UADDE39:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE37]]
+    ; GFX10-NEXT: [[UADDE40:%[0-9]+]]:_(s32), [[UADDE41:%[0-9]+]]:_(s1) = G_UADDE [[UADDE36]], [[UADDE34]], [[UADDO19]]
+    ; GFX10-NEXT: [[UADDE42:%[0-9]+]]:_(s32), [[UADDE43:%[0-9]+]]:_(s1) = G_UADDE [[UADDE38]], [[C6]], [[UADDE41]]
+    ; GFX10-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDE30]]
+    ; GFX10-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[UADDE42]]
+    ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDE40]](s32), [[ADD6]](s32)
     ; GFX10-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64)
-    ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV22]](s32), [[UADDO36]], [[C5]]
+    ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV22]](s32), [[UADDE40]], [[C5]]
     ; GFX10-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64)
-    ; GFX10-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV22]], [[ADD15]]
-    ; GFX10-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[UV25]], [[MUL13]]
-    ; GFX10-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV23]], [[UADDO36]]
-    ; GFX10-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[MUL14]]
+    ; GFX10-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV22]], [[ADD6]]
+    ; GFX10-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UV25]], [[MUL13]]
+    ; GFX10-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV23]], [[UADDE40]]
+    ; GFX10-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ADD7]], [[MUL14]]
     ; GFX10-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV18]], [[UV24]]
-    ; GFX10-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV19]], [[ADD17]], [[USUBO3]]
-    ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV19]], [[ADD17]]
+    ; GFX10-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV19]], [[ADD8]], [[USUBO3]]
+    ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV19]], [[ADD8]]
     ; GFX10-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64)
     ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV27]]
     ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1)
@@ -4033,9 +3820,9 @@ body: |
     ; GFX10-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]]
     ; GFX10-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
     ; GFX10-NEXT: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64)
-    ; GFX10-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[UADDO36]], [[UV28]]
-    ; GFX10-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[ADD15]], [[UV29]], [[UADDO39]]
-    ; GFX10-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO38]](s32), [[UADDE8]](s32)
+    ; GFX10-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDE40]], [[UV28]]
+    ; GFX10-NEXT: [[UADDE44:%[0-9]+]]:_(s32), [[UADDE45:%[0-9]+]]:_(s1) = G_UADDE [[ADD6]], [[UV29]], [[UADDO21]]
+    ; GFX10-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO20]](s32), [[UADDE44]](s32)
     ; GFX10-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV27]]
     ; GFX10-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1)
     ; GFX10-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV26]]
@@ -4043,9 +3830,9 @@ body: |
     ; GFX10-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV27]]
     ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]]
     ; GFX10-NEXT: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64)
-    ; GFX10-NEXT: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UADDO38]], [[UV30]]
-    ; GFX10-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE8]], [[UV31]], [[UADDO41]]
-    ; GFX10-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO40]](s32), [[UADDE10]](s32)
+    ; GFX10-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UV30]]
+    ; GFX10-NEXT: [[UADDE46:%[0-9]+]]:_(s32), [[UADDE47:%[0-9]+]]:_(s1) = G_UADDE [[UADDE44]], [[UV31]], [[UADDO23]]
+    ; GFX10-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO22]](s32), [[UADDE46]](s32)
     ; GFX10-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C6]]
     ; GFX10-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV4]], [[MV3]]
     ; GFX10-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C6]]
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-srem.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-srem.mir
index 08bb589b6ded2..bb2b47d6ee663 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-srem.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-srem.mir
@@ -45,6 +45,7 @@ body: |
     ; GFX6-NEXT: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT1]], [[ASHR]]
     ; GFX6-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR2]], [[ASHR]]
     ; GFX6-NEXT: $vgpr0 = COPY [[SUB4]](s32)
+    ;
     ; GFX8-LABEL: name: test_srem_s32
     ; GFX8: liveins: $vgpr0, $vgpr1
     ; GFX8-NEXT: {{  $}}
@@ -79,6 +80,7 @@ body: |
     ; GFX8-NEXT: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT1]], [[ASHR]]
     ; GFX8-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR2]], [[ASHR]]
     ; GFX8-NEXT: $vgpr0 = COPY [[SUB4]](s32)
+    ;
     ; GFX9-LABEL: name: test_srem_s32
     ; GFX9: liveins: $vgpr0, $vgpr1
     ; GFX9-NEXT: {{  $}}
@@ -113,6 +115,7 @@ body: |
     ; GFX9-NEXT: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT1]], [[ASHR]]
     ; GFX9-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR2]], [[ASHR]]
     ; GFX9-NEXT: $vgpr0 = COPY [[SUB4]](s32)
+    ;
     ; GFX10-LABEL: name: test_srem_s32
     ; GFX10: liveins: $vgpr0, $vgpr1
     ; GFX10-NEXT: {{  $}}
@@ -221,6 +224,7 @@ body: |
     ; GFX6-NEXT: [[SUB9:%[0-9]+]]:_(s32) = G_SUB [[XOR5]], [[ASHR2]]
     ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SUB4]](s32), [[SUB9]](s32)
     ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
+    ;
     ; GFX8-LABEL: name: test_srem_v2s32
     ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
     ; GFX8-NEXT: {{  $}}
@@ -283,6 +287,7 @@ body: |
     ; GFX8-NEXT: [[SUB9:%[0-9]+]]:_(s32) = G_SUB [[XOR5]], [[ASHR2]]
     ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SUB4]](s32), [[SUB9]](s32)
     ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
+    ;
     ; GFX9-LABEL: name: test_srem_v2s32
     ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
     ; GFX9-NEXT: {{  $}}
@@ -345,6 +350,7 @@ body: |
     ; GFX9-NEXT: [[SUB9:%[0-9]+]]:_(s32) = G_SUB [[XOR5]], [[ASHR2]]
     ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SUB4]](s32), [[SUB9]](s32)
     ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
+    ;
     ; GFX10-LABEL: name: test_srem_v2s32
     ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
     ; GFX10-NEXT: {{  $}}
@@ -462,6 +468,7 @@ body: |
     ; GFX6-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV10]], [[UV12]]
     ; GFX6-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV11]], [[UV13]], [[USUBO1]]
     ; GFX6-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]]
+    ; GFX6-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
     ; GFX6-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]]
     ; GFX6-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]]
     ; GFX6-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]]
@@ -471,88 +478,72 @@ body: |
     ; GFX6-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]]
     ; GFX6-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]]
     ; GFX6-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]]
-    ; GFX6-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1)
-    ; GFX6-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH1]]
-    ; GFX6-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1)
-    ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]]
+    ; GFX6-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDO4]], [[UMULH1]], [[UADDO5]]
+    ; GFX6-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE5]]
     ; GFX6-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]]
     ; GFX6-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]]
     ; GFX6-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]]
-    ; GFX6-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]]
-    ; GFX6-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1)
-    ; GFX6-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH3]]
-    ; GFX6-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1)
-    ; GFX6-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]]
-    ; GFX6-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD2]]
-    ; GFX6-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1)
-    ; GFX6-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]]
+    ; GFX6-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]]
+    ; GFX6-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDO6]], [[UMULH3]], [[UADDO7]]
+    ; GFX6-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE9]]
+    ; GFX6-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[UADDE8]], [[UADDE6]], [[UADDO7]]
+    ; GFX6-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDE10]], [[C6]], [[UADDE13]]
     ; GFX6-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]]
-    ; GFX6-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]]
-    ; GFX6-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO12]]
-    ; GFX6-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO15]]
-    ; GFX6-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO14]]
-    ; GFX6-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO14]]
-    ; GFX6-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE4]]
-    ; GFX6-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO14]]
-    ; GFX6-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]]
-    ; GFX6-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[UMULH5]]
-    ; GFX6-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[MUL6]]
-    ; GFX6-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[ADD7]]
-    ; GFX6-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[MUL6]]
-    ; GFX6-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]]
-    ; GFX6-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1)
-    ; GFX6-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]]
-    ; GFX6-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1)
-    ; GFX6-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]]
-    ; GFX6-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[ADD7]]
-    ; GFX6-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[MUL6]]
-    ; GFX6-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[ADD7]]
-    ; GFX6-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]]
-    ; GFX6-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1)
-    ; GFX6-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH8]]
-    ; GFX6-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO23]](s1)
-    ; GFX6-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]]
-    ; GFX6-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD8]]
-    ; GFX6-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1)
-    ; GFX6-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT9]]
-    ; GFX6-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[ADD7]]
-    ; GFX6-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD10]]
-    ; GFX6-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-    ; GFX6-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UADDO24]]
-    ; GFX6-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[ADD11]], [[UADDO27]]
+    ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[UADDE14]]
+    ; GFX6-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDE12]]
+    ; GFX6-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD2]], [[UADDO9]]
+    ; GFX6-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO8]]
+    ; GFX6-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO8]]
+    ; GFX6-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE16]]
+    ; GFX6-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO8]]
+    ; GFX6-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]]
+    ; GFX6-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[UMULH5]]
+    ; GFX6-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[MUL6]]
+    ; GFX6-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO8]], [[ADD4]]
+    ; GFX6-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO8]], [[MUL6]]
+    ; GFX6-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]]
+    ; GFX6-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDO10]], [[UMULH6]], [[UADDO11]]
+    ; GFX6-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE19]]
+    ; GFX6-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[ADD4]]
+    ; GFX6-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[MUL6]]
+    ; GFX6-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO8]], [[ADD4]]
+    ; GFX6-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]]
+    ; GFX6-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDO12]], [[UMULH8]], [[UADDO13]]
+    ; GFX6-NEXT: [[UADDE24:%[0-9]+]]:_(s32), [[UADDE25:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE23]]
+    ; GFX6-NEXT: [[UADDE26:%[0-9]+]]:_(s32), [[UADDE27:%[0-9]+]]:_(s1) = G_UADDE [[UADDE22]], [[UADDE20]], [[UADDO13]]
+    ; GFX6-NEXT: [[UADDE28:%[0-9]+]]:_(s32), [[UADDE29:%[0-9]+]]:_(s1) = G_UADDE [[UADDE24]], [[C6]], [[UADDE27]]
+    ; GFX6-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[ADD4]]
+    ; GFX6-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[UADDE28]]
+    ; GFX6-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UADDE26]]
+    ; GFX6-NEXT: [[UADDE30:%[0-9]+]]:_(s32), [[UADDE31:%[0-9]+]]:_(s1) = G_UADDE [[UADDE16]], [[ADD5]], [[UADDO15]]
     ; GFX6-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64)
     ; GFX6-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64)
-    ; GFX6-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDO26]]
-    ; GFX6-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UADDE6]]
-    ; GFX6-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDO26]]
-    ; GFX6-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]]
-    ; GFX6-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1)
-    ; GFX6-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]]
-    ; GFX6-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1)
-    ; GFX6-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]]
-    ; GFX6-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDE6]]
-    ; GFX6-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDO26]]
-    ; GFX6-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDE6]]
-    ; GFX6-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]]
-    ; GFX6-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1)
-    ; GFX6-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH12]]
-    ; GFX6-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1)
-    ; GFX6-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]]
-    ; GFX6-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD12]]
-    ; GFX6-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1)
-    ; GFX6-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT14]]
-    ; GFX6-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDE6]]
-    ; GFX6-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD14]]
+    ; GFX6-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDO14]]
+    ; GFX6-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UADDE30]]
+    ; GFX6-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDO14]]
+    ; GFX6-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]]
+    ; GFX6-NEXT: [[UADDE32:%[0-9]+]]:_(s32), [[UADDE33:%[0-9]+]]:_(s1) = G_UADDE [[UADDO16]], [[UMULH10]], [[UADDO17]]
+    ; GFX6-NEXT: [[UADDE34:%[0-9]+]]:_(s32), [[UADDE35:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE33]]
+    ; GFX6-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDE30]]
+    ; GFX6-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDO14]]
+    ; GFX6-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDE30]]
+    ; GFX6-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]]
+    ; GFX6-NEXT: [[UADDE36:%[0-9]+]]:_(s32), [[UADDE37:%[0-9]+]]:_(s1) = G_UADDE [[UADDO18]], [[UMULH12]], [[UADDO19]]
+    ; GFX6-NEXT: [[UADDE38:%[0-9]+]]:_(s32), [[UADDE39:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE37]]
+    ; GFX6-NEXT: [[UADDE40:%[0-9]+]]:_(s32), [[UADDE41:%[0-9]+]]:_(s1) = G_UADDE [[UADDE36]], [[UADDE34]], [[UADDO19]]
+    ; GFX6-NEXT: [[UADDE42:%[0-9]+]]:_(s32), [[UADDE43:%[0-9]+]]:_(s1) = G_UADDE [[UADDE38]], [[C6]], [[UADDE41]]
+    ; GFX6-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDE30]]
+    ; GFX6-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[UADDE42]]
     ; GFX6-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64)
-    ; GFX6-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV18]], [[UADDO36]]
-    ; GFX6-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV19]], [[UADDO36]]
-    ; GFX6-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV18]], [[ADD15]]
-    ; GFX6-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV18]], [[UADDO36]]
-    ; GFX6-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]]
-    ; GFX6-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[UMULH14]]
+    ; GFX6-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV18]], [[UADDE40]]
+    ; GFX6-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV19]], [[UADDE40]]
+    ; GFX6-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV18]], [[ADD6]]
+    ; GFX6-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV18]], [[UADDE40]]
+    ; GFX6-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]]
+    ; GFX6-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ADD7]], [[UMULH14]]
     ; GFX6-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV14]], [[MUL15]]
-    ; GFX6-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV15]], [[ADD17]], [[USUBO3]]
-    ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV15]], [[ADD17]]
+    ; GFX6-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV15]], [[ADD8]], [[USUBO3]]
+    ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV15]], [[ADD8]]
     ; GFX6-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO2]](s32), [[USUBE2]](s32)
     ; GFX6-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64)
     ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV21]]
@@ -586,6 +577,7 @@ body: |
     ; GFX6-NEXT: [[USUBE12:%[0-9]+]]:_(s32), [[USUBE13:%[0-9]+]]:_(s1) = G_USUBE [[UV23]], [[UV25]], [[USUBO9]]
     ; GFX6-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO8]](s32), [[USUBE12]](s32)
     ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[MV5]](s64)
+    ;
     ; GFX8-LABEL: name: test_srem_s64
     ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
     ; GFX8-NEXT: {{  $}}
@@ -634,88 +626,73 @@ body: |
     ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI1]], [[ANYEXT]]
     ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[FPTOUI]], [[AMDGPU_MAD_U64_U32_2]]
     ; GFX8-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64)
+    ; GFX8-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
     ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV14]]
     ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[UV16]]
     ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV14]]
     ; GFX8-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]]
-    ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1)
-    ; GFX8-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH]]
-    ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1)
-    ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]]
+    ; GFX8-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDO4]], [[UMULH]], [[UADDO5]]
+    ; GFX8-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE5]]
     ; GFX8-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV16]]
     ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV14]]
     ; GFX8-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV16]]
-    ; GFX8-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]]
-    ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1)
-    ; GFX8-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH2]]
-    ; GFX8-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1)
-    ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]]
-    ; GFX8-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD]]
-    ; GFX8-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1)
-    ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]]
+    ; GFX8-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]]
+    ; GFX8-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDO6]], [[UMULH2]], [[UADDO7]]
+    ; GFX8-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE9]]
+    ; GFX8-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[UADDE8]], [[UADDE6]], [[UADDO7]]
+    ; GFX8-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDE10]], [[C6]], [[UADDE13]]
     ; GFX8-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV16]]
-    ; GFX8-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]]
-    ; GFX8-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO12]]
-    ; GFX8-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD3]], [[UADDO15]]
-    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO14]], [[C5]]
+    ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[UADDE14]]
+    ; GFX8-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDE12]]
+    ; GFX8-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD]], [[UADDO9]]
+    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO8]], [[C5]]
     ; GFX8-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](s64)
     ; GFX8-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[UV19]](s32)
-    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE4]], [[ANYEXT1]]
-    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO14]], [[AMDGPU_MAD_U64_U32_8]]
+    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE16]], [[ANYEXT1]]
+    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO8]], [[AMDGPU_MAD_U64_U32_8]]
     ; GFX8-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](s64)
-    ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[UV18]]
-    ; GFX8-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[UV20]]
-    ; GFX8-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[UV18]]
-    ; GFX8-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]]
-    ; GFX8-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1)
-    ; GFX8-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH4]]
-    ; GFX8-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1)
-    ; GFX8-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]]
-    ; GFX8-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[UV20]]
-    ; GFX8-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[UV18]]
-    ; GFX8-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[UV20]]
-    ; GFX8-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]]
-    ; GFX8-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1)
-    ; GFX8-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH6]]
-    ; GFX8-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO23]](s1)
-    ; GFX8-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]]
-    ; GFX8-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD4]]
-    ; GFX8-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1)
-    ; GFX8-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ADD5]], [[ZEXT9]]
-    ; GFX8-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[UV20]]
-    ; GFX8-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD6]]
-    ; GFX8-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-    ; GFX8-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UADDO24]]
-    ; GFX8-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[ADD7]], [[UADDO27]]
+    ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[UV18]]
+    ; GFX8-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO8]], [[UV20]]
+    ; GFX8-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO8]], [[UV18]]
+    ; GFX8-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]]
+    ; GFX8-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDO10]], [[UMULH4]], [[UADDO11]]
+    ; GFX8-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE19]]
+    ; GFX8-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[UV20]]
+    ; GFX8-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[UV18]]
+    ; GFX8-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO8]], [[UV20]]
+    ; GFX8-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]]
+    ; GFX8-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDO12]], [[UMULH6]], [[UADDO13]]
+    ; GFX8-NEXT: [[UADDE24:%[0-9]+]]:_(s32), [[UADDE25:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE23]]
+    ; GFX8-NEXT: [[UADDE26:%[0-9]+]]:_(s32), [[UADDE27:%[0-9]+]]:_(s1) = G_UADDE [[UADDE22]], [[UADDE20]], [[UADDO13]]
+    ; GFX8-NEXT: [[UADDE28:%[0-9]+]]:_(s32), [[UADDE29:%[0-9]+]]:_(s1) = G_UADDE [[UADDE24]], [[C6]], [[UADDE27]]
+    ; GFX8-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[UV20]]
+    ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[UADDE28]]
+    ; GFX8-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UADDE26]]
+    ; GFX8-NEXT: [[UADDE30:%[0-9]+]]:_(s32), [[UADDE31:%[0-9]+]]:_(s1) = G_UADDE [[UADDE16]], [[ADD1]], [[UADDO15]]
     ; GFX8-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64)
     ; GFX8-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64)
-    ; GFX8-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV25]], [[UADDO26]]
-    ; GFX8-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV24]], [[UADDE6]]
-    ; GFX8-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV24]], [[UADDO26]]
-    ; GFX8-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]]
-    ; GFX8-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1)
-    ; GFX8-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH8]]
-    ; GFX8-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1)
-    ; GFX8-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]]
-    ; GFX8-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV25]], [[UADDE6]]
-    ; GFX8-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV25]], [[UADDO26]]
-    ; GFX8-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV24]], [[UADDE6]]
-    ; GFX8-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]]
-    ; GFX8-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1)
-    ; GFX8-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH10]]
-    ; GFX8-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1)
-    ; GFX8-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]]
-    ; GFX8-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD8]]
-    ; GFX8-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1)
-    ; GFX8-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT14]]
-    ; GFX8-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV25]], [[UADDE6]]
-    ; GFX8-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[ADD10]]
+    ; GFX8-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV25]], [[UADDO14]]
+    ; GFX8-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV24]], [[UADDE30]]
+    ; GFX8-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV24]], [[UADDO14]]
+    ; GFX8-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]]
+    ; GFX8-NEXT: [[UADDE32:%[0-9]+]]:_(s32), [[UADDE33:%[0-9]+]]:_(s1) = G_UADDE [[UADDO16]], [[UMULH8]], [[UADDO17]]
+    ; GFX8-NEXT: [[UADDE34:%[0-9]+]]:_(s32), [[UADDE35:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE33]]
+    ; GFX8-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV25]], [[UADDE30]]
+    ; GFX8-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV25]], [[UADDO14]]
+    ; GFX8-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV24]], [[UADDE30]]
+    ; GFX8-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]]
+    ; GFX8-NEXT: [[UADDE36:%[0-9]+]]:_(s32), [[UADDE37:%[0-9]+]]:_(s1) = G_UADDE [[UADDO18]], [[UMULH10]], [[UADDO19]]
+    ; GFX8-NEXT: [[UADDE38:%[0-9]+]]:_(s32), [[UADDE39:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE37]]
+    ; GFX8-NEXT: [[UADDE40:%[0-9]+]]:_(s32), [[UADDE41:%[0-9]+]]:_(s1) = G_UADDE [[UADDE36]], [[UADDE34]], [[UADDO19]]
+    ; GFX8-NEXT: [[UADDE42:%[0-9]+]]:_(s32), [[UADDE43:%[0-9]+]]:_(s1) = G_UADDE [[UADDE38]], [[C6]], [[UADDE41]]
+    ; GFX8-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV25]], [[UADDE30]]
+    ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[UADDE42]]
     ; GFX8-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64)
-    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV26]](s32), [[UADDO36]], [[C5]]
+    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV26]](s32), [[UADDE40]], [[C5]]
     ; GFX8-NEXT: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_12]](s64)
     ; GFX8-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[UV29]](s32)
-    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV26]](s32), [[ADD11]], [[ANYEXT2]]
-    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV27]](s32), [[UADDO36]], [[AMDGPU_MAD_U64_U32_14]]
+    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV26]](s32), [[ADD2]], [[ANYEXT2]]
+    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV27]](s32), [[UADDE40]], [[AMDGPU_MAD_U64_U32_14]]
     ; GFX8-NEXT: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_16]](s64)
     ; GFX8-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV22]], [[UV28]]
     ; GFX8-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV23]], [[UV30]], [[USUBO3]]
@@ -753,6 +730,7 @@ body: |
     ; GFX8-NEXT: [[USUBE12:%[0-9]+]]:_(s32), [[USUBE13:%[0-9]+]]:_(s1) = G_USUBE [[UV35]], [[UV37]], [[USUBO9]]
     ; GFX8-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO8]](s32), [[USUBE12]](s32)
     ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[MV5]](s64)
+    ;
     ; GFX9-LABEL: name: test_srem_s64
     ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
     ; GFX9-NEXT: {{  $}}
@@ -801,88 +779,73 @@ body: |
     ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI1]], [[ANYEXT]]
     ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[FPTOUI]], [[AMDGPU_MAD_U64_U32_2]]
     ; GFX9-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64)
+    ; GFX9-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
     ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV14]]
     ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[UV16]]
     ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV14]]
     ; GFX9-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]]
-    ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1)
-    ; GFX9-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH]]
-    ; GFX9-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1)
-    ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]]
+    ; GFX9-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDO4]], [[UMULH]], [[UADDO5]]
+    ; GFX9-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE5]]
     ; GFX9-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV16]]
     ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV14]]
     ; GFX9-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV16]]
-    ; GFX9-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]]
-    ; GFX9-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1)
-    ; GFX9-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH2]]
-    ; GFX9-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1)
-    ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]]
-    ; GFX9-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD]]
-    ; GFX9-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1)
-    ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]]
+    ; GFX9-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]]
+    ; GFX9-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDO6]], [[UMULH2]], [[UADDO7]]
+    ; GFX9-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE9]]
+    ; GFX9-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[UADDE8]], [[UADDE6]], [[UADDO7]]
+    ; GFX9-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDE10]], [[C6]], [[UADDE13]]
     ; GFX9-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV16]]
-    ; GFX9-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]]
-    ; GFX9-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO12]]
-    ; GFX9-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD3]], [[UADDO15]]
-    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO14]], [[C5]]
+    ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[UADDE14]]
+    ; GFX9-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDE12]]
+    ; GFX9-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD]], [[UADDO9]]
+    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO8]], [[C5]]
     ; GFX9-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](s64)
     ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[UV19]](s32)
-    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE4]], [[ANYEXT1]]
-    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO14]], [[AMDGPU_MAD_U64_U32_8]]
+    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE16]], [[ANYEXT1]]
+    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO8]], [[AMDGPU_MAD_U64_U32_8]]
     ; GFX9-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](s64)
-    ; GFX9-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[UV18]]
-    ; GFX9-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[UV20]]
-    ; GFX9-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[UV18]]
-    ; GFX9-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]]
-    ; GFX9-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1)
-    ; GFX9-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH4]]
-    ; GFX9-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1)
-    ; GFX9-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]]
-    ; GFX9-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[UV20]]
-    ; GFX9-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[UV18]]
-    ; GFX9-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[UV20]]
-    ; GFX9-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]]
-    ; GFX9-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1)
-    ; GFX9-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH6]]
-    ; GFX9-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO23]](s1)
-    ; GFX9-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]]
-    ; GFX9-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD4]]
-    ; GFX9-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1)
-    ; GFX9-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ADD5]], [[ZEXT9]]
-    ; GFX9-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[UV20]]
-    ; GFX9-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD6]]
-    ; GFX9-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-    ; GFX9-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UADDO24]]
-    ; GFX9-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[ADD7]], [[UADDO27]]
+    ; GFX9-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[UV18]]
+    ; GFX9-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO8]], [[UV20]]
+    ; GFX9-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO8]], [[UV18]]
+    ; GFX9-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]]
+    ; GFX9-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDO10]], [[UMULH4]], [[UADDO11]]
+    ; GFX9-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE19]]
+    ; GFX9-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[UV20]]
+    ; GFX9-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[UV18]]
+    ; GFX9-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO8]], [[UV20]]
+    ; GFX9-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]]
+    ; GFX9-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDO12]], [[UMULH6]], [[UADDO13]]
+    ; GFX9-NEXT: [[UADDE24:%[0-9]+]]:_(s32), [[UADDE25:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE23]]
+    ; GFX9-NEXT: [[UADDE26:%[0-9]+]]:_(s32), [[UADDE27:%[0-9]+]]:_(s1) = G_UADDE [[UADDE22]], [[UADDE20]], [[UADDO13]]
+    ; GFX9-NEXT: [[UADDE28:%[0-9]+]]:_(s32), [[UADDE29:%[0-9]+]]:_(s1) = G_UADDE [[UADDE24]], [[C6]], [[UADDE27]]
+    ; GFX9-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[UV20]]
+    ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[UADDE28]]
+    ; GFX9-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UADDE26]]
+    ; GFX9-NEXT: [[UADDE30:%[0-9]+]]:_(s32), [[UADDE31:%[0-9]+]]:_(s1) = G_UADDE [[UADDE16]], [[ADD1]], [[UADDO15]]
     ; GFX9-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64)
     ; GFX9-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64)
-    ; GFX9-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV25]], [[UADDO26]]
-    ; GFX9-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV24]], [[UADDE6]]
-    ; GFX9-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV24]], [[UADDO26]]
-    ; GFX9-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]]
-    ; GFX9-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1)
-    ; GFX9-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH8]]
-    ; GFX9-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1)
-    ; GFX9-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]]
-    ; GFX9-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV25]], [[UADDE6]]
-    ; GFX9-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV25]], [[UADDO26]]
-    ; GFX9-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV24]], [[UADDE6]]
-    ; GFX9-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]]
-    ; GFX9-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1)
-    ; GFX9-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH10]]
-    ; GFX9-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1)
-    ; GFX9-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]]
-    ; GFX9-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD8]]
-    ; GFX9-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1)
-    ; GFX9-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT14]]
-    ; GFX9-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV25]], [[UADDE6]]
-    ; GFX9-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[ADD10]]
+    ; GFX9-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV25]], [[UADDO14]]
+    ; GFX9-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV24]], [[UADDE30]]
+    ; GFX9-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV24]], [[UADDO14]]
+    ; GFX9-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]]
+    ; GFX9-NEXT: [[UADDE32:%[0-9]+]]:_(s32), [[UADDE33:%[0-9]+]]:_(s1) = G_UADDE [[UADDO16]], [[UMULH8]], [[UADDO17]]
+    ; GFX9-NEXT: [[UADDE34:%[0-9]+]]:_(s32), [[UADDE35:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE33]]
+    ; GFX9-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV25]], [[UADDE30]]
+    ; GFX9-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV25]], [[UADDO14]]
+    ; GFX9-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV24]], [[UADDE30]]
+    ; GFX9-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]]
+    ; GFX9-NEXT: [[UADDE36:%[0-9]+]]:_(s32), [[UADDE37:%[0-9]+]]:_(s1) = G_UADDE [[UADDO18]], [[UMULH10]], [[UADDO19]]
+    ; GFX9-NEXT: [[UADDE38:%[0-9]+]]:_(s32), [[UADDE39:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE37]]
+    ; GFX9-NEXT: [[UADDE40:%[0-9]+]]:_(s32), [[UADDE41:%[0-9]+]]:_(s1) = G_UADDE [[UADDE36]], [[UADDE34]], [[UADDO19]]
+    ; GFX9-NEXT: [[UADDE42:%[0-9]+]]:_(s32), [[UADDE43:%[0-9]+]]:_(s1) = G_UADDE [[UADDE38]], [[C6]], [[UADDE41]]
+    ; GFX9-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV25]], [[UADDE30]]
+    ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[UADDE42]]
     ; GFX9-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64)
-    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV26]](s32), [[UADDO36]], [[C5]]
+    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV26]](s32), [[UADDE40]], [[C5]]
     ; GFX9-NEXT: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_12]](s64)
     ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[UV29]](s32)
-    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV26]](s32), [[ADD11]], [[ANYEXT2]]
-    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV27]](s32), [[UADDO36]], [[AMDGPU_MAD_U64_U32_14]]
+    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV26]](s32), [[ADD2]], [[ANYEXT2]]
+    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV27]](s32), [[UADDE40]], [[AMDGPU_MAD_U64_U32_14]]
     ; GFX9-NEXT: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_16]](s64)
     ; GFX9-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV22]], [[UV28]]
     ; GFX9-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV23]], [[UV30]], [[USUBO3]]
@@ -920,6 +883,7 @@ body: |
     ; GFX9-NEXT: [[USUBE12:%[0-9]+]]:_(s32), [[USUBE13:%[0-9]+]]:_(s1) = G_USUBE [[UV35]], [[UV37]], [[USUBO9]]
     ; GFX9-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO8]](s32), [[USUBE12]](s32)
     ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[MV5]](s64)
+    ;
     ; GFX10-LABEL: name: test_srem_s64
     ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
     ; GFX10-NEXT: {{  $}}
@@ -968,92 +932,77 @@ body: |
     ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV15]], [[MUL]]
     ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]]
     ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[MUL1]]
+    ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
     ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV14]]
     ; GFX10-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]]
     ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV14]]
     ; GFX10-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[MUL3]]
-    ; GFX10-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1)
-    ; GFX10-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH]]
-    ; GFX10-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1)
-    ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]]
+    ; GFX10-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDO4]], [[UMULH]], [[UADDO5]]
+    ; GFX10-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE5]]
     ; GFX10-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]]
     ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV14]]
     ; GFX10-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]]
-    ; GFX10-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL4]], [[UMULH1]]
-    ; GFX10-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1)
-    ; GFX10-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH2]]
-    ; GFX10-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1)
-    ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]]
-    ; GFX10-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD2]]
-    ; GFX10-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1)
-    ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]]
+    ; GFX10-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[MUL4]], [[UMULH1]]
+    ; GFX10-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDO6]], [[UMULH2]], [[UADDO7]]
+    ; GFX10-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE9]]
+    ; GFX10-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[UADDE8]], [[UADDE6]], [[UADDO7]]
+    ; GFX10-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDE10]], [[C6]], [[UADDE13]]
     ; GFX10-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]]
-    ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD4]]
-    ; GFX10-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO12]]
-    ; GFX10-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO15]]
-    ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO14]], [[C5]]
+    ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[UADDE14]]
+    ; GFX10-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDE12]]
+    ; GFX10-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD2]], [[UADDO9]]
+    ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO8]], [[C5]]
     ; GFX10-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_2]](s64)
-    ; GFX10-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE4]]
-    ; GFX10-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UV17]], [[MUL5]]
-    ; GFX10-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO14]]
-    ; GFX10-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[MUL6]]
-    ; GFX10-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[UV16]]
-    ; GFX10-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[ADD7]]
-    ; GFX10-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[UV16]]
-    ; GFX10-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL7]], [[MUL8]]
-    ; GFX10-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1)
-    ; GFX10-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH4]]
-    ; GFX10-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1)
-    ; GFX10-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]]
-    ; GFX10-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[ADD7]]
-    ; GFX10-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[UV16]]
-    ; GFX10-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[ADD7]]
-    ; GFX10-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[UMULH5]]
-    ; GFX10-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1)
-    ; GFX10-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH6]]
-    ; GFX10-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO23]](s1)
-    ; GFX10-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]]
-    ; GFX10-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD8]]
-    ; GFX10-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1)
-    ; GFX10-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT9]]
-    ; GFX10-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[ADD7]]
-    ; GFX10-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD10]]
-    ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-    ; GFX10-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UADDO24]]
-    ; GFX10-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[ADD11]], [[UADDO27]]
+    ; GFX10-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE16]]
+    ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UV17]], [[MUL5]]
+    ; GFX10-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO8]]
+    ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[MUL6]]
+    ; GFX10-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[UV16]]
+    ; GFX10-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UADDO8]], [[ADD4]]
+    ; GFX10-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO8]], [[UV16]]
+    ; GFX10-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[MUL7]], [[MUL8]]
+    ; GFX10-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDO10]], [[UMULH4]], [[UADDO11]]
+    ; GFX10-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE19]]
+    ; GFX10-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[ADD4]]
+    ; GFX10-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[UV16]]
+    ; GFX10-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO8]], [[ADD4]]
+    ; GFX10-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[UMULH5]]
+    ; GFX10-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDO12]], [[UMULH6]], [[UADDO13]]
+    ; GFX10-NEXT: [[UADDE24:%[0-9]+]]:_(s32), [[UADDE25:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE23]]
+    ; GFX10-NEXT: [[UADDE26:%[0-9]+]]:_(s32), [[UADDE27:%[0-9]+]]:_(s1) = G_UADDE [[UADDE22]], [[UADDE20]], [[UADDO13]]
+    ; GFX10-NEXT: [[UADDE28:%[0-9]+]]:_(s32), [[UADDE29:%[0-9]+]]:_(s1) = G_UADDE [[UADDE24]], [[C6]], [[UADDE27]]
+    ; GFX10-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[ADD4]]
+    ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[UADDE28]]
+    ; GFX10-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UADDE26]]
+    ; GFX10-NEXT: [[UADDE30:%[0-9]+]]:_(s32), [[UADDE31:%[0-9]+]]:_(s1) = G_UADDE [[UADDE16]], [[ADD5]], [[UADDO15]]
     ; GFX10-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64)
     ; GFX10-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64)
-    ; GFX10-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDO26]]
-    ; GFX10-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UV20]], [[UADDE6]]
-    ; GFX10-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDO26]]
-    ; GFX10-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL10]], [[MUL11]]
-    ; GFX10-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1)
-    ; GFX10-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH8]]
-    ; GFX10-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1)
-    ; GFX10-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]]
-    ; GFX10-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDE6]]
-    ; GFX10-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDO26]]
-    ; GFX10-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDE6]]
-    ; GFX10-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[UMULH9]]
-    ; GFX10-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1)
-    ; GFX10-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH10]]
-    ; GFX10-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1)
-    ; GFX10-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]]
-    ; GFX10-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD12]]
-    ; GFX10-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1)
-    ; GFX10-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT14]]
-    ; GFX10-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDE6]]
-    ; GFX10-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[ADD14]]
+    ; GFX10-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDO14]]
+    ; GFX10-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UV20]], [[UADDE30]]
+    ; GFX10-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDO14]]
+    ; GFX10-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL10]], [[MUL11]]
+    ; GFX10-NEXT: [[UADDE32:%[0-9]+]]:_(s32), [[UADDE33:%[0-9]+]]:_(s1) = G_UADDE [[UADDO16]], [[UMULH8]], [[UADDO17]]
+    ; GFX10-NEXT: [[UADDE34:%[0-9]+]]:_(s32), [[UADDE35:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE33]]
+    ; GFX10-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDE30]]
+    ; GFX10-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDO14]]
+    ; GFX10-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDE30]]
+    ; GFX10-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[UMULH9]]
+    ; GFX10-NEXT: [[UADDE36:%[0-9]+]]:_(s32), [[UADDE37:%[0-9]+]]:_(s1) = G_UADDE [[UADDO18]], [[UMULH10]], [[UADDO19]]
+    ; GFX10-NEXT: [[UADDE38:%[0-9]+]]:_(s32), [[UADDE39:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE37]]
+    ; GFX10-NEXT: [[UADDE40:%[0-9]+]]:_(s32), [[UADDE41:%[0-9]+]]:_(s1) = G_UADDE [[UADDE36]], [[UADDE34]], [[UADDO19]]
+    ; GFX10-NEXT: [[UADDE42:%[0-9]+]]:_(s32), [[UADDE43:%[0-9]+]]:_(s1) = G_UADDE [[UADDE38]], [[C6]], [[UADDE41]]
+    ; GFX10-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDE30]]
+    ; GFX10-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[UADDE42]]
     ; GFX10-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64)
-    ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV22]](s32), [[UADDO36]], [[C5]]
+    ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV22]](s32), [[UADDE40]], [[C5]]
     ; GFX10-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64)
-    ; GFX10-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV22]], [[ADD15]]
-    ; GFX10-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[UV25]], [[MUL13]]
-    ; GFX10-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV23]], [[UADDO36]]
-    ; GFX10-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[MUL14]]
+    ; GFX10-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV22]], [[ADD6]]
+    ; GFX10-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UV25]], [[MUL13]]
+    ; GFX10-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV23]], [[UADDE40]]
+    ; GFX10-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ADD7]], [[MUL14]]
     ; GFX10-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV18]], [[UV24]]
-    ; GFX10-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV19]], [[ADD17]], [[USUBO3]]
-    ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV19]], [[ADD17]]
+    ; GFX10-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV19]], [[ADD8]], [[USUBO3]]
+    ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV19]], [[ADD8]]
     ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO2]](s32), [[USUBE2]](s32)
     ; GFX10-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64)
     ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV27]]
@@ -1144,6 +1093,7 @@ body: |
     ; GFX6-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV14]], [[UV16]]
     ; GFX6-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV15]], [[UV17]], [[USUBO1]]
     ; GFX6-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]]
+    ; GFX6-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
     ; GFX6-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]]
     ; GFX6-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]]
     ; GFX6-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]]
@@ -1153,88 +1103,72 @@ body: |
     ; GFX6-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]]
     ; GFX6-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]]
     ; GFX6-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]]
-    ; GFX6-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1)
-    ; GFX6-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH1]]
-    ; GFX6-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1)
-    ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]]
+    ; GFX6-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDO4]], [[UMULH1]], [[UADDO5]]
+    ; GFX6-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE5]]
     ; GFX6-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]]
     ; GFX6-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]]
     ; GFX6-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]]
-    ; GFX6-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]]
-    ; GFX6-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1)
-    ; GFX6-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH3]]
-    ; GFX6-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1)
-    ; GFX6-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]]
-    ; GFX6-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD2]]
-    ; GFX6-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1)
-    ; GFX6-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]]
+    ; GFX6-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]]
+    ; GFX6-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDO6]], [[UMULH3]], [[UADDO7]]
+    ; GFX6-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE9]]
+    ; GFX6-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[UADDE8]], [[UADDE6]], [[UADDO7]]
+    ; GFX6-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDE10]], [[C6]], [[UADDE13]]
     ; GFX6-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]]
-    ; GFX6-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]]
-    ; GFX6-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO12]]
-    ; GFX6-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO15]]
-    ; GFX6-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO14]]
-    ; GFX6-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO14]]
-    ; GFX6-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE4]]
-    ; GFX6-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO14]]
-    ; GFX6-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]]
-    ; GFX6-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[UMULH5]]
-    ; GFX6-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[MUL6]]
-    ; GFX6-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[ADD7]]
-    ; GFX6-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[MUL6]]
-    ; GFX6-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]]
-    ; GFX6-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1)
-    ; GFX6-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]]
-    ; GFX6-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1)
-    ; GFX6-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]]
-    ; GFX6-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[ADD7]]
-    ; GFX6-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[MUL6]]
-    ; GFX6-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[ADD7]]
-    ; GFX6-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]]
-    ; GFX6-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1)
-    ; GFX6-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH8]]
-    ; GFX6-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO23]](s1)
-    ; GFX6-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]]
-    ; GFX6-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD8]]
-    ; GFX6-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1)
-    ; GFX6-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT9]]
-    ; GFX6-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[ADD7]]
-    ; GFX6-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD10]]
-    ; GFX6-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-    ; GFX6-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UADDO24]]
-    ; GFX6-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[ADD11]], [[UADDO27]]
+    ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[UADDE14]]
+    ; GFX6-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDE12]]
+    ; GFX6-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD2]], [[UADDO9]]
+    ; GFX6-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO8]]
+    ; GFX6-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO8]]
+    ; GFX6-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE16]]
+    ; GFX6-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO8]]
+    ; GFX6-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]]
+    ; GFX6-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[UMULH5]]
+    ; GFX6-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[MUL6]]
+    ; GFX6-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO8]], [[ADD4]]
+    ; GFX6-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO8]], [[MUL6]]
+    ; GFX6-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]]
+    ; GFX6-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDO10]], [[UMULH6]], [[UADDO11]]
+    ; GFX6-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE19]]
+    ; GFX6-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[ADD4]]
+    ; GFX6-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[MUL6]]
+    ; GFX6-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO8]], [[ADD4]]
+    ; GFX6-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]]
+    ; GFX6-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDO12]], [[UMULH8]], [[UADDO13]]
+    ; GFX6-NEXT: [[UADDE24:%[0-9]+]]:_(s32), [[UADDE25:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE23]]
+    ; GFX6-NEXT: [[UADDE26:%[0-9]+]]:_(s32), [[UADDE27:%[0-9]+]]:_(s1) = G_UADDE [[UADDE22]], [[UADDE20]], [[UADDO13]]
+    ; GFX6-NEXT: [[UADDE28:%[0-9]+]]:_(s32), [[UADDE29:%[0-9]+]]:_(s1) = G_UADDE [[UADDE24]], [[C6]], [[UADDE27]]
+    ; GFX6-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[ADD4]]
+    ; GFX6-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[UADDE28]]
+    ; GFX6-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UADDE26]]
+    ; GFX6-NEXT: [[UADDE30:%[0-9]+]]:_(s32), [[UADDE31:%[0-9]+]]:_(s1) = G_UADDE [[UADDE16]], [[ADD5]], [[UADDO15]]
     ; GFX6-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64)
     ; GFX6-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64)
-    ; GFX6-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDO26]]
-    ; GFX6-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV20]], [[UADDE6]]
-    ; GFX6-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDO26]]
-    ; GFX6-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]]
-    ; GFX6-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1)
-    ; GFX6-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]]
-    ; GFX6-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1)
-    ; GFX6-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]]
-    ; GFX6-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDE6]]
-    ; GFX6-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDO26]]
-    ; GFX6-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDE6]]
-    ; GFX6-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]]
-    ; GFX6-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1)
-    ; GFX6-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH12]]
-    ; GFX6-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1)
-    ; GFX6-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]]
-    ; GFX6-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD12]]
-    ; GFX6-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1)
-    ; GFX6-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT14]]
-    ; GFX6-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDE6]]
-    ; GFX6-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD14]]
+    ; GFX6-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDO14]]
+    ; GFX6-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV20]], [[UADDE30]]
+    ; GFX6-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDO14]]
+    ; GFX6-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]]
+    ; GFX6-NEXT: [[UADDE32:%[0-9]+]]:_(s32), [[UADDE33:%[0-9]+]]:_(s1) = G_UADDE [[UADDO16]], [[UMULH10]], [[UADDO17]]
+    ; GFX6-NEXT: [[UADDE34:%[0-9]+]]:_(s32), [[UADDE35:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE33]]
+    ; GFX6-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDE30]]
+    ; GFX6-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDO14]]
+    ; GFX6-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDE30]]
+    ; GFX6-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]]
+    ; GFX6-NEXT: [[UADDE36:%[0-9]+]]:_(s32), [[UADDE37:%[0-9]+]]:_(s1) = G_UADDE [[UADDO18]], [[UMULH12]], [[UADDO19]]
+    ; GFX6-NEXT: [[UADDE38:%[0-9]+]]:_(s32), [[UADDE39:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE37]]
+    ; GFX6-NEXT: [[UADDE40:%[0-9]+]]:_(s32), [[UADDE41:%[0-9]+]]:_(s1) = G_UADDE [[UADDE36]], [[UADDE34]], [[UADDO19]]
+    ; GFX6-NEXT: [[UADDE42:%[0-9]+]]:_(s32), [[UADDE43:%[0-9]+]]:_(s1) = G_UADDE [[UADDE38]], [[C6]], [[UADDE41]]
+    ; GFX6-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDE30]]
+    ; GFX6-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[UADDE42]]
     ; GFX6-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64)
-    ; GFX6-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV22]], [[UADDO36]]
-    ; GFX6-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV23]], [[UADDO36]]
-    ; GFX6-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV22]], [[ADD15]]
-    ; GFX6-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV22]], [[UADDO36]]
-    ; GFX6-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]]
-    ; GFX6-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[UMULH14]]
+    ; GFX6-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV22]], [[UADDE40]]
+    ; GFX6-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV23]], [[UADDE40]]
+    ; GFX6-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV22]], [[ADD6]]
+    ; GFX6-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV22]], [[UADDE40]]
+    ; GFX6-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]]
+    ; GFX6-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ADD7]], [[UMULH14]]
     ; GFX6-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV18]], [[MUL15]]
-    ; GFX6-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV19]], [[ADD17]], [[USUBO3]]
-    ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV19]], [[ADD17]]
+    ; GFX6-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV19]], [[ADD8]], [[USUBO3]]
+    ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV19]], [[ADD8]]
     ; GFX6-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO2]](s32), [[USUBE2]](s32)
     ; GFX6-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64)
     ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV25]]
@@ -1271,14 +1205,14 @@ body: |
     ; GFX6-NEXT: [[ASHR3:%[0-9]+]]:_(s64) = G_ASHR [[UV3]], [[C]](s32)
     ; GFX6-NEXT: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64)
     ; GFX6-NEXT: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR2]](s64)
-    ; GFX6-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[UV30]], [[UV32]]
-    ; GFX6-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UV31]], [[UV33]], [[UADDO39]]
-    ; GFX6-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO38]](s32), [[UADDE8]](s32)
+    ; GFX6-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UV30]], [[UV32]]
+    ; GFX6-NEXT: [[UADDE44:%[0-9]+]]:_(s32), [[UADDE45:%[0-9]+]]:_(s1) = G_UADDE [[UV31]], [[UV33]], [[UADDO21]]
+    ; GFX6-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO20]](s32), [[UADDE44]](s32)
     ; GFX6-NEXT: [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64)
     ; GFX6-NEXT: [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR3]](s64)
-    ; GFX6-NEXT: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UV34]], [[UV36]]
-    ; GFX6-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UV35]], [[UV37]], [[UADDO41]]
-    ; GFX6-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO40]](s32), [[UADDE10]](s32)
+    ; GFX6-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UV34]], [[UV36]]
+    ; GFX6-NEXT: [[UADDE46:%[0-9]+]]:_(s32), [[UADDE47:%[0-9]+]]:_(s1) = G_UADDE [[UV35]], [[UV37]], [[UADDO23]]
+    ; GFX6-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO22]](s32), [[UADDE46]](s32)
     ; GFX6-NEXT: [[XOR3:%[0-9]+]]:_(s64) = G_XOR [[MV6]], [[ASHR2]]
     ; GFX6-NEXT: [[XOR4:%[0-9]+]]:_(s64) = G_XOR [[MV7]], [[ASHR3]]
     ; GFX6-NEXT: [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR4]](s64)
@@ -1302,93 +1236,78 @@ body: |
     ; GFX6-NEXT: [[MUL19:%[0-9]+]]:_(s32) = G_MUL [[USUBE14]], [[FPTOUI2]]
     ; GFX6-NEXT: [[MUL20:%[0-9]+]]:_(s32) = G_MUL [[USUBO10]], [[FPTOUI3]]
     ; GFX6-NEXT: [[UMULH15:%[0-9]+]]:_(s32) = G_UMULH [[USUBO10]], [[FPTOUI2]]
-    ; GFX6-NEXT: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[MUL19]], [[MUL20]]
-    ; GFX6-NEXT: [[ADD19:%[0-9]+]]:_(s32) = G_ADD [[ADD18]], [[UMULH15]]
+    ; GFX6-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[MUL19]], [[MUL20]]
+    ; GFX6-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[UMULH15]]
     ; GFX6-NEXT: [[MUL21:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[MUL18]]
-    ; GFX6-NEXT: [[MUL22:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI2]], [[ADD19]]
+    ; GFX6-NEXT: [[MUL22:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI2]], [[ADD10]]
     ; GFX6-NEXT: [[UMULH16:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[MUL18]]
-    ; GFX6-NEXT: [[UADDO42:%[0-9]+]]:_(s32), [[UADDO43:%[0-9]+]]:_(s1) = G_UADDO [[MUL21]], [[MUL22]]
-    ; GFX6-NEXT: [[ZEXT15:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO43]](s1)
-    ; GFX6-NEXT: [[UADDO44:%[0-9]+]]:_(s32), [[UADDO45:%[0-9]+]]:_(s1) = G_UADDO [[UADDO42]], [[UMULH16]]
-    ; GFX6-NEXT: [[ZEXT16:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO45]](s1)
-    ; GFX6-NEXT: [[ADD20:%[0-9]+]]:_(s32) = G_ADD [[ZEXT15]], [[ZEXT16]]
-    ; GFX6-NEXT: [[MUL23:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[ADD19]]
+    ; GFX6-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL21]], [[MUL22]]
+    ; GFX6-NEXT: [[UADDE48:%[0-9]+]]:_(s32), [[UADDE49:%[0-9]+]]:_(s1) = G_UADDE [[UADDO24]], [[UMULH16]], [[UADDO25]]
+    ; GFX6-NEXT: [[UADDE50:%[0-9]+]]:_(s32), [[UADDE51:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE49]]
+    ; GFX6-NEXT: [[MUL23:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[ADD10]]
     ; GFX6-NEXT: [[UMULH17:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[MUL18]]
-    ; GFX6-NEXT: [[UMULH18:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[ADD19]]
-    ; GFX6-NEXT: [[UADDO46:%[0-9]+]]:_(s32), [[UADDO47:%[0-9]+]]:_(s1) = G_UADDO [[MUL23]], [[UMULH17]]
-    ; GFX6-NEXT: [[ZEXT17:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO47]](s1)
-    ; GFX6-NEXT: [[UADDO48:%[0-9]+]]:_(s32), [[UADDO49:%[0-9]+]]:_(s1) = G_UADDO [[UADDO46]], [[UMULH18]]
-    ; GFX6-NEXT: [[ZEXT18:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO49]](s1)
-    ; GFX6-NEXT: [[ADD21:%[0-9]+]]:_(s32) = G_ADD [[ZEXT17]], [[ZEXT18]]
-    ; GFX6-NEXT: [[UADDO50:%[0-9]+]]:_(s32), [[UADDO51:%[0-9]+]]:_(s1) = G_UADDO [[UADDO48]], [[ADD20]]
-    ; GFX6-NEXT: [[ZEXT19:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO51]](s1)
-    ; GFX6-NEXT: [[ADD22:%[0-9]+]]:_(s32) = G_ADD [[ADD21]], [[ZEXT19]]
-    ; GFX6-NEXT: [[UMULH19:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[ADD19]]
-    ; GFX6-NEXT: [[ADD23:%[0-9]+]]:_(s32) = G_ADD [[UMULH19]], [[ADD22]]
-    ; GFX6-NEXT: [[UADDO52:%[0-9]+]]:_(s32), [[UADDO53:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI2]], [[UADDO50]]
-    ; GFX6-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI3]], [[ADD23]], [[UADDO53]]
-    ; GFX6-NEXT: [[MUL24:%[0-9]+]]:_(s32) = G_MUL [[USUBO10]], [[UADDO52]]
-    ; GFX6-NEXT: [[MUL25:%[0-9]+]]:_(s32) = G_MUL [[USUBE14]], [[UADDO52]]
-    ; GFX6-NEXT: [[MUL26:%[0-9]+]]:_(s32) = G_MUL [[USUBO10]], [[UADDE12]]
-    ; GFX6-NEXT: [[UMULH20:%[0-9]+]]:_(s32) = G_UMULH [[USUBO10]], [[UADDO52]]
-    ; GFX6-NEXT: [[ADD24:%[0-9]+]]:_(s32) = G_ADD [[MUL25]], [[MUL26]]
-    ; GFX6-NEXT: [[ADD25:%[0-9]+]]:_(s32) = G_ADD [[ADD24]], [[UMULH20]]
-    ; GFX6-NEXT: [[MUL27:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[MUL24]]
-    ; GFX6-NEXT: [[MUL28:%[0-9]+]]:_(s32) = G_MUL [[UADDO52]], [[ADD25]]
-    ; GFX6-NEXT: [[UMULH21:%[0-9]+]]:_(s32) = G_UMULH [[UADDO52]], [[MUL24]]
-    ; GFX6-NEXT: [[UADDO54:%[0-9]+]]:_(s32), [[UADDO55:%[0-9]+]]:_(s1) = G_UADDO [[MUL27]], [[MUL28]]
-    ; GFX6-NEXT: [[ZEXT20:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO55]](s1)
-    ; GFX6-NEXT: [[UADDO56:%[0-9]+]]:_(s32), [[UADDO57:%[0-9]+]]:_(s1) = G_UADDO [[UADDO54]], [[UMULH21]]
-    ; GFX6-NEXT: [[ZEXT21:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO57]](s1)
-    ; GFX6-NEXT: [[ADD26:%[0-9]+]]:_(s32) = G_ADD [[ZEXT20]], [[ZEXT21]]
-    ; GFX6-NEXT: [[MUL29:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[ADD25]]
-    ; GFX6-NEXT: [[UMULH22:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[MUL24]]
-    ; GFX6-NEXT: [[UMULH23:%[0-9]+]]:_(s32) = G_UMULH [[UADDO52]], [[ADD25]]
-    ; GFX6-NEXT: [[UADDO58:%[0-9]+]]:_(s32), [[UADDO59:%[0-9]+]]:_(s1) = G_UADDO [[MUL29]], [[UMULH22]]
-    ; GFX6-NEXT: [[ZEXT22:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO59]](s1)
-    ; GFX6-NEXT: [[UADDO60:%[0-9]+]]:_(s32), [[UADDO61:%[0-9]+]]:_(s1) = G_UADDO [[UADDO58]], [[UMULH23]]
-    ; GFX6-NEXT: [[ZEXT23:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO61]](s1)
-    ; GFX6-NEXT: [[ADD27:%[0-9]+]]:_(s32) = G_ADD [[ZEXT22]], [[ZEXT23]]
-    ; GFX6-NEXT: [[UADDO62:%[0-9]+]]:_(s32), [[UADDO63:%[0-9]+]]:_(s1) = G_UADDO [[UADDO60]], [[ADD26]]
-    ; GFX6-NEXT: [[ZEXT24:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO63]](s1)
-    ; GFX6-NEXT: [[ADD28:%[0-9]+]]:_(s32) = G_ADD [[ADD27]], [[ZEXT24]]
-    ; GFX6-NEXT: [[UMULH24:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[ADD25]]
-    ; GFX6-NEXT: [[ADD29:%[0-9]+]]:_(s32) = G_ADD [[UMULH24]], [[ADD28]]
-    ; GFX6-NEXT: [[UADDO64:%[0-9]+]]:_(s32), [[UADDO65:%[0-9]+]]:_(s1) = G_UADDO [[UADDO52]], [[UADDO62]]
-    ; GFX6-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDE12]], [[ADD29]], [[UADDO65]]
+    ; GFX6-NEXT: [[UMULH18:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[ADD10]]
+    ; GFX6-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[MUL23]], [[UMULH17]]
+    ; GFX6-NEXT: [[UADDE52:%[0-9]+]]:_(s32), [[UADDE53:%[0-9]+]]:_(s1) = G_UADDE [[UADDO26]], [[UMULH18]], [[UADDO27]]
+    ; GFX6-NEXT: [[UADDE54:%[0-9]+]]:_(s32), [[UADDE55:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE53]]
+    ; GFX6-NEXT: [[UADDE56:%[0-9]+]]:_(s32), [[UADDE57:%[0-9]+]]:_(s1) = G_UADDE [[UADDE52]], [[UADDE50]], [[UADDO27]]
+    ; GFX6-NEXT: [[UADDE58:%[0-9]+]]:_(s32), [[UADDE59:%[0-9]+]]:_(s1) = G_UADDE [[UADDE54]], [[C6]], [[UADDE57]]
+    ; GFX6-NEXT: [[UMULH19:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[ADD10]]
+    ; GFX6-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH19]], [[UADDE58]]
+    ; GFX6-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI2]], [[UADDE56]]
+    ; GFX6-NEXT: [[UADDE60:%[0-9]+]]:_(s32), [[UADDE61:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI3]], [[ADD11]], [[UADDO29]]
+    ; GFX6-NEXT: [[MUL24:%[0-9]+]]:_(s32) = G_MUL [[USUBO10]], [[UADDO28]]
+    ; GFX6-NEXT: [[MUL25:%[0-9]+]]:_(s32) = G_MUL [[USUBE14]], [[UADDO28]]
+    ; GFX6-NEXT: [[MUL26:%[0-9]+]]:_(s32) = G_MUL [[USUBO10]], [[UADDE60]]
+    ; GFX6-NEXT: [[UMULH20:%[0-9]+]]:_(s32) = G_UMULH [[USUBO10]], [[UADDO28]]
+    ; GFX6-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[MUL25]], [[MUL26]]
+    ; GFX6-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ADD12]], [[UMULH20]]
+    ; GFX6-NEXT: [[MUL27:%[0-9]+]]:_(s32) = G_MUL [[UADDE60]], [[MUL24]]
+    ; GFX6-NEXT: [[MUL28:%[0-9]+]]:_(s32) = G_MUL [[UADDO28]], [[ADD13]]
+    ; GFX6-NEXT: [[UMULH21:%[0-9]+]]:_(s32) = G_UMULH [[UADDO28]], [[MUL24]]
+    ; GFX6-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[MUL27]], [[MUL28]]
+    ; GFX6-NEXT: [[UADDE62:%[0-9]+]]:_(s32), [[UADDE63:%[0-9]+]]:_(s1) = G_UADDE [[UADDO30]], [[UMULH21]], [[UADDO31]]
+    ; GFX6-NEXT: [[UADDE64:%[0-9]+]]:_(s32), [[UADDE65:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE63]]
+    ; GFX6-NEXT: [[MUL29:%[0-9]+]]:_(s32) = G_MUL [[UADDE60]], [[ADD13]]
+    ; GFX6-NEXT: [[UMULH22:%[0-9]+]]:_(s32) = G_UMULH [[UADDE60]], [[MUL24]]
+    ; GFX6-NEXT: [[UMULH23:%[0-9]+]]:_(s32) = G_UMULH [[UADDO28]], [[ADD13]]
+    ; GFX6-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL29]], [[UMULH22]]
+    ; GFX6-NEXT: [[UADDE66:%[0-9]+]]:_(s32), [[UADDE67:%[0-9]+]]:_(s1) = G_UADDE [[UADDO32]], [[UMULH23]], [[UADDO33]]
+    ; GFX6-NEXT: [[UADDE68:%[0-9]+]]:_(s32), [[UADDE69:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE67]]
+    ; GFX6-NEXT: [[UADDE70:%[0-9]+]]:_(s32), [[UADDE71:%[0-9]+]]:_(s1) = G_UADDE [[UADDE66]], [[UADDE64]], [[UADDO33]]
+    ; GFX6-NEXT: [[UADDE72:%[0-9]+]]:_(s32), [[UADDE73:%[0-9]+]]:_(s1) = G_UADDE [[UADDE68]], [[C6]], [[UADDE71]]
+    ; GFX6-NEXT: [[UMULH24:%[0-9]+]]:_(s32) = G_UMULH [[UADDE60]], [[ADD13]]
+    ; GFX6-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[UMULH24]], [[UADDE72]]
+    ; GFX6-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UADDE70]]
+    ; GFX6-NEXT: [[UADDE74:%[0-9]+]]:_(s32), [[UADDE75:%[0-9]+]]:_(s1) = G_UADDE [[UADDE60]], [[ADD14]], [[UADDO35]]
     ; GFX6-NEXT: [[UV44:%[0-9]+]]:_(s32), [[UV45:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR3]](s64)
     ; GFX6-NEXT: [[UV46:%[0-9]+]]:_(s32), [[UV47:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR3]](s64)
-    ; GFX6-NEXT: [[MUL30:%[0-9]+]]:_(s32) = G_MUL [[UV47]], [[UADDO64]]
-    ; GFX6-NEXT: [[MUL31:%[0-9]+]]:_(s32) = G_MUL [[UV46]], [[UADDE14]]
-    ; GFX6-NEXT: [[UMULH25:%[0-9]+]]:_(s32) = G_UMULH [[UV46]], [[UADDO64]]
-    ; GFX6-NEXT: [[UADDO66:%[0-9]+]]:_(s32), [[UADDO67:%[0-9]+]]:_(s1) = G_UADDO [[MUL30]], [[MUL31]]
-    ; GFX6-NEXT: [[ZEXT25:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO67]](s1)
-    ; GFX6-NEXT: [[UADDO68:%[0-9]+]]:_(s32), [[UADDO69:%[0-9]+]]:_(s1) = G_UADDO [[UADDO66]], [[UMULH25]]
-    ; GFX6-NEXT: [[ZEXT26:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO69]](s1)
-    ; GFX6-NEXT: [[ADD30:%[0-9]+]]:_(s32) = G_ADD [[ZEXT25]], [[ZEXT26]]
-    ; GFX6-NEXT: [[MUL32:%[0-9]+]]:_(s32) = G_MUL [[UV47]], [[UADDE14]]
-    ; GFX6-NEXT: [[UMULH26:%[0-9]+]]:_(s32) = G_UMULH [[UV47]], [[UADDO64]]
-    ; GFX6-NEXT: [[UMULH27:%[0-9]+]]:_(s32) = G_UMULH [[UV46]], [[UADDE14]]
-    ; GFX6-NEXT: [[UADDO70:%[0-9]+]]:_(s32), [[UADDO71:%[0-9]+]]:_(s1) = G_UADDO [[MUL32]], [[UMULH26]]
-    ; GFX6-NEXT: [[ZEXT27:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO71]](s1)
-    ; GFX6-NEXT: [[UADDO72:%[0-9]+]]:_(s32), [[UADDO73:%[0-9]+]]:_(s1) = G_UADDO [[UADDO70]], [[UMULH27]]
-    ; GFX6-NEXT: [[ZEXT28:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO73]](s1)
-    ; GFX6-NEXT: [[ADD31:%[0-9]+]]:_(s32) = G_ADD [[ZEXT27]], [[ZEXT28]]
-    ; GFX6-NEXT: [[UADDO74:%[0-9]+]]:_(s32), [[UADDO75:%[0-9]+]]:_(s1) = G_UADDO [[UADDO72]], [[ADD30]]
-    ; GFX6-NEXT: [[ZEXT29:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO75]](s1)
-    ; GFX6-NEXT: [[ADD32:%[0-9]+]]:_(s32) = G_ADD [[ADD31]], [[ZEXT29]]
-    ; GFX6-NEXT: [[UMULH28:%[0-9]+]]:_(s32) = G_UMULH [[UV47]], [[UADDE14]]
-    ; GFX6-NEXT: [[ADD33:%[0-9]+]]:_(s32) = G_ADD [[UMULH28]], [[ADD32]]
+    ; GFX6-NEXT: [[MUL30:%[0-9]+]]:_(s32) = G_MUL [[UV47]], [[UADDO34]]
+    ; GFX6-NEXT: [[MUL31:%[0-9]+]]:_(s32) = G_MUL [[UV46]], [[UADDE74]]
+    ; GFX6-NEXT: [[UMULH25:%[0-9]+]]:_(s32) = G_UMULH [[UV46]], [[UADDO34]]
+    ; GFX6-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[MUL30]], [[MUL31]]
+    ; GFX6-NEXT: [[UADDE76:%[0-9]+]]:_(s32), [[UADDE77:%[0-9]+]]:_(s1) = G_UADDE [[UADDO36]], [[UMULH25]], [[UADDO37]]
+    ; GFX6-NEXT: [[UADDE78:%[0-9]+]]:_(s32), [[UADDE79:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE77]]
+    ; GFX6-NEXT: [[MUL32:%[0-9]+]]:_(s32) = G_MUL [[UV47]], [[UADDE74]]
+    ; GFX6-NEXT: [[UMULH26:%[0-9]+]]:_(s32) = G_UMULH [[UV47]], [[UADDO34]]
+    ; GFX6-NEXT: [[UMULH27:%[0-9]+]]:_(s32) = G_UMULH [[UV46]], [[UADDE74]]
+    ; GFX6-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[MUL32]], [[UMULH26]]
+    ; GFX6-NEXT: [[UADDE80:%[0-9]+]]:_(s32), [[UADDE81:%[0-9]+]]:_(s1) = G_UADDE [[UADDO38]], [[UMULH27]], [[UADDO39]]
+    ; GFX6-NEXT: [[UADDE82:%[0-9]+]]:_(s32), [[UADDE83:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE81]]
+    ; GFX6-NEXT: [[UADDE84:%[0-9]+]]:_(s32), [[UADDE85:%[0-9]+]]:_(s1) = G_UADDE [[UADDE80]], [[UADDE78]], [[UADDO39]]
+    ; GFX6-NEXT: [[UADDE86:%[0-9]+]]:_(s32), [[UADDE87:%[0-9]+]]:_(s1) = G_UADDE [[UADDE82]], [[C6]], [[UADDE85]]
+    ; GFX6-NEXT: [[UMULH28:%[0-9]+]]:_(s32) = G_UMULH [[UV47]], [[UADDE74]]
+    ; GFX6-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH28]], [[UADDE86]]
     ; GFX6-NEXT: [[UV48:%[0-9]+]]:_(s32), [[UV49:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR4]](s64)
-    ; GFX6-NEXT: [[MUL33:%[0-9]+]]:_(s32) = G_MUL [[UV48]], [[UADDO74]]
-    ; GFX6-NEXT: [[MUL34:%[0-9]+]]:_(s32) = G_MUL [[UV49]], [[UADDO74]]
-    ; GFX6-NEXT: [[MUL35:%[0-9]+]]:_(s32) = G_MUL [[UV48]], [[ADD33]]
-    ; GFX6-NEXT: [[UMULH29:%[0-9]+]]:_(s32) = G_UMULH [[UV48]], [[UADDO74]]
-    ; GFX6-NEXT: [[ADD34:%[0-9]+]]:_(s32) = G_ADD [[MUL34]], [[MUL35]]
-    ; GFX6-NEXT: [[ADD35:%[0-9]+]]:_(s32) = G_ADD [[ADD34]], [[UMULH29]]
+    ; GFX6-NEXT: [[MUL33:%[0-9]+]]:_(s32) = G_MUL [[UV48]], [[UADDE84]]
+    ; GFX6-NEXT: [[MUL34:%[0-9]+]]:_(s32) = G_MUL [[UV49]], [[UADDE84]]
+    ; GFX6-NEXT: [[MUL35:%[0-9]+]]:_(s32) = G_MUL [[UV48]], [[ADD15]]
+    ; GFX6-NEXT: [[UMULH29:%[0-9]+]]:_(s32) = G_UMULH [[UV48]], [[UADDE84]]
+    ; GFX6-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[MUL34]], [[MUL35]]
+    ; GFX6-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[UMULH29]]
     ; GFX6-NEXT: [[USUBO12:%[0-9]+]]:_(s32), [[USUBO13:%[0-9]+]]:_(s1) = G_USUBO [[UV44]], [[MUL33]]
-    ; GFX6-NEXT: [[USUBE16:%[0-9]+]]:_(s32), [[USUBE17:%[0-9]+]]:_(s1) = G_USUBE [[UV45]], [[ADD35]], [[USUBO13]]
-    ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[UV45]], [[ADD35]]
+    ; GFX6-NEXT: [[USUBE16:%[0-9]+]]:_(s32), [[USUBE17:%[0-9]+]]:_(s1) = G_USUBE [[UV45]], [[ADD17]], [[USUBO13]]
+    ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[UV45]], [[ADD17]]
     ; GFX6-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO12]](s32), [[USUBE16]](s32)
     ; GFX6-NEXT: [[UV50:%[0-9]+]]:_(s32), [[UV51:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR4]](s64)
     ; GFX6-NEXT: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE16]](s32), [[UV51]]
@@ -1423,6 +1342,7 @@ body: |
     ; GFX6-NEXT: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO18]](s32), [[USUBE26]](s32)
     ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV5]](s64), [[MV11]](s64)
     ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
+    ;
     ; GFX8-LABEL: name: test_srem_v2s64
     ; GFX8: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7
     ; GFX8-NEXT: {{  $}}
@@ -1473,88 +1393,73 @@ body: |
     ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI1]], [[ANYEXT]]
     ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[FPTOUI]], [[AMDGPU_MAD_U64_U32_2]]
     ; GFX8-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64)
+    ; GFX8-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
     ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV18]]
     ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[UV20]]
     ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV18]]
     ; GFX8-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]]
-    ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1)
-    ; GFX8-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH]]
-    ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1)
-    ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]]
+    ; GFX8-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDO4]], [[UMULH]], [[UADDO5]]
+    ; GFX8-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE5]]
     ; GFX8-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV20]]
     ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV18]]
     ; GFX8-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV20]]
-    ; GFX8-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]]
-    ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1)
-    ; GFX8-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH2]]
-    ; GFX8-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1)
-    ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]]
-    ; GFX8-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD]]
-    ; GFX8-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1)
-    ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]]
+    ; GFX8-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]]
+    ; GFX8-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDO6]], [[UMULH2]], [[UADDO7]]
+    ; GFX8-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE9]]
+    ; GFX8-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[UADDE8]], [[UADDE6]], [[UADDO7]]
+    ; GFX8-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDE10]], [[C6]], [[UADDE13]]
     ; GFX8-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV20]]
-    ; GFX8-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]]
-    ; GFX8-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO12]]
-    ; GFX8-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD3]], [[UADDO15]]
-    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO14]], [[C5]]
+    ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[UADDE14]]
+    ; GFX8-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDE12]]
+    ; GFX8-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD]], [[UADDO9]]
+    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO8]], [[C5]]
     ; GFX8-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](s64)
     ; GFX8-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[UV23]](s32)
-    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE4]], [[ANYEXT1]]
-    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO14]], [[AMDGPU_MAD_U64_U32_8]]
+    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE16]], [[ANYEXT1]]
+    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO8]], [[AMDGPU_MAD_U64_U32_8]]
     ; GFX8-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](s64)
-    ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[UV22]]
-    ; GFX8-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[UV24]]
-    ; GFX8-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[UV22]]
-    ; GFX8-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]]
-    ; GFX8-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1)
-    ; GFX8-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH4]]
-    ; GFX8-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1)
-    ; GFX8-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]]
-    ; GFX8-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[UV24]]
-    ; GFX8-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[UV22]]
-    ; GFX8-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[UV24]]
-    ; GFX8-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]]
-    ; GFX8-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1)
-    ; GFX8-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH6]]
-    ; GFX8-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO23]](s1)
-    ; GFX8-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]]
-    ; GFX8-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD4]]
-    ; GFX8-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1)
-    ; GFX8-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ADD5]], [[ZEXT9]]
-    ; GFX8-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[UV24]]
-    ; GFX8-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD6]]
-    ; GFX8-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-    ; GFX8-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UADDO24]]
-    ; GFX8-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[ADD7]], [[UADDO27]]
+    ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[UV22]]
+    ; GFX8-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO8]], [[UV24]]
+    ; GFX8-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO8]], [[UV22]]
+    ; GFX8-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]]
+    ; GFX8-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDO10]], [[UMULH4]], [[UADDO11]]
+    ; GFX8-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE19]]
+    ; GFX8-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[UV24]]
+    ; GFX8-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[UV22]]
+    ; GFX8-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO8]], [[UV24]]
+    ; GFX8-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]]
+    ; GFX8-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDO12]], [[UMULH6]], [[UADDO13]]
+    ; GFX8-NEXT: [[UADDE24:%[0-9]+]]:_(s32), [[UADDE25:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE23]]
+    ; GFX8-NEXT: [[UADDE26:%[0-9]+]]:_(s32), [[UADDE27:%[0-9]+]]:_(s1) = G_UADDE [[UADDE22]], [[UADDE20]], [[UADDO13]]
+    ; GFX8-NEXT: [[UADDE28:%[0-9]+]]:_(s32), [[UADDE29:%[0-9]+]]:_(s1) = G_UADDE [[UADDE24]], [[C6]], [[UADDE27]]
+    ; GFX8-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[UV24]]
+    ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[UADDE28]]
+    ; GFX8-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UADDE26]]
+    ; GFX8-NEXT: [[UADDE30:%[0-9]+]]:_(s32), [[UADDE31:%[0-9]+]]:_(s1) = G_UADDE [[UADDE16]], [[ADD1]], [[UADDO15]]
     ; GFX8-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64)
     ; GFX8-NEXT: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64)
-    ; GFX8-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV29]], [[UADDO26]]
-    ; GFX8-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV28]], [[UADDE6]]
-    ; GFX8-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV28]], [[UADDO26]]
-    ; GFX8-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]]
-    ; GFX8-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1)
-    ; GFX8-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH8]]
-    ; GFX8-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1)
-    ; GFX8-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]]
-    ; GFX8-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV29]], [[UADDE6]]
-    ; GFX8-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV29]], [[UADDO26]]
-    ; GFX8-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV28]], [[UADDE6]]
-    ; GFX8-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]]
-    ; GFX8-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1)
-    ; GFX8-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH10]]
-    ; GFX8-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1)
-    ; GFX8-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]]
-    ; GFX8-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD8]]
-    ; GFX8-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1)
-    ; GFX8-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT14]]
-    ; GFX8-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV29]], [[UADDE6]]
-    ; GFX8-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[ADD10]]
+    ; GFX8-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV29]], [[UADDO14]]
+    ; GFX8-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV28]], [[UADDE30]]
+    ; GFX8-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV28]], [[UADDO14]]
+    ; GFX8-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]]
+    ; GFX8-NEXT: [[UADDE32:%[0-9]+]]:_(s32), [[UADDE33:%[0-9]+]]:_(s1) = G_UADDE [[UADDO16]], [[UMULH8]], [[UADDO17]]
+    ; GFX8-NEXT: [[UADDE34:%[0-9]+]]:_(s32), [[UADDE35:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE33]]
+    ; GFX8-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV29]], [[UADDE30]]
+    ; GFX8-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV29]], [[UADDO14]]
+    ; GFX8-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV28]], [[UADDE30]]
+    ; GFX8-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]]
+    ; GFX8-NEXT: [[UADDE36:%[0-9]+]]:_(s32), [[UADDE37:%[0-9]+]]:_(s1) = G_UADDE [[UADDO18]], [[UMULH10]], [[UADDO19]]
+    ; GFX8-NEXT: [[UADDE38:%[0-9]+]]:_(s32), [[UADDE39:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE37]]
+    ; GFX8-NEXT: [[UADDE40:%[0-9]+]]:_(s32), [[UADDE41:%[0-9]+]]:_(s1) = G_UADDE [[UADDE36]], [[UADDE34]], [[UADDO19]]
+    ; GFX8-NEXT: [[UADDE42:%[0-9]+]]:_(s32), [[UADDE43:%[0-9]+]]:_(s1) = G_UADDE [[UADDE38]], [[C6]], [[UADDE41]]
+    ; GFX8-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV29]], [[UADDE30]]
+    ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[UADDE42]]
     ; GFX8-NEXT: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64)
-    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV30]](s32), [[UADDO36]], [[C5]]
+    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV30]](s32), [[UADDE40]], [[C5]]
     ; GFX8-NEXT: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_12]](s64)
     ; GFX8-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[UV33]](s32)
-    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV30]](s32), [[ADD11]], [[ANYEXT2]]
-    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV31]](s32), [[UADDO36]], [[AMDGPU_MAD_U64_U32_14]]
+    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV30]](s32), [[ADD2]], [[ANYEXT2]]
+    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV31]](s32), [[UADDE40]], [[AMDGPU_MAD_U64_U32_14]]
     ; GFX8-NEXT: [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_16]](s64)
     ; GFX8-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV26]], [[UV32]]
     ; GFX8-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV27]], [[UV34]], [[USUBO3]]
@@ -1595,14 +1500,14 @@ body: |
     ; GFX8-NEXT: [[ASHR3:%[0-9]+]]:_(s64) = G_ASHR [[UV3]], [[C]](s32)
     ; GFX8-NEXT: [[UV42:%[0-9]+]]:_(s32), [[UV43:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64)
     ; GFX8-NEXT: [[UV44:%[0-9]+]]:_(s32), [[UV45:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR2]](s64)
-    ; GFX8-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[UV42]], [[UV44]]
-    ; GFX8-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UV43]], [[UV45]], [[UADDO39]]
-    ; GFX8-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO38]](s32), [[UADDE8]](s32)
+    ; GFX8-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UV42]], [[UV44]]
+    ; GFX8-NEXT: [[UADDE44:%[0-9]+]]:_(s32), [[UADDE45:%[0-9]+]]:_(s1) = G_UADDE [[UV43]], [[UV45]], [[UADDO21]]
+    ; GFX8-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO20]](s32), [[UADDE44]](s32)
     ; GFX8-NEXT: [[UV46:%[0-9]+]]:_(s32), [[UV47:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64)
     ; GFX8-NEXT: [[UV48:%[0-9]+]]:_(s32), [[UV49:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR3]](s64)
-    ; GFX8-NEXT: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UV46]], [[UV48]]
-    ; GFX8-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UV47]], [[UV49]], [[UADDO41]]
-    ; GFX8-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO40]](s32), [[UADDE10]](s32)
+    ; GFX8-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UV46]], [[UV48]]
+    ; GFX8-NEXT: [[UADDE46:%[0-9]+]]:_(s32), [[UADDE47:%[0-9]+]]:_(s1) = G_UADDE [[UV47]], [[UV49]], [[UADDO23]]
+    ; GFX8-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO22]](s32), [[UADDE46]](s32)
     ; GFX8-NEXT: [[XOR3:%[0-9]+]]:_(s64) = G_XOR [[MV6]], [[ASHR2]]
     ; GFX8-NEXT: [[XOR4:%[0-9]+]]:_(s64) = G_XOR [[MV7]], [[ASHR3]]
     ; GFX8-NEXT: [[UV50:%[0-9]+]]:_(s32), [[UV51:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR4]](s64)
@@ -1631,84 +1536,69 @@ body: |
     ; GFX8-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[UV56]]
     ; GFX8-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI2]], [[UV58]]
     ; GFX8-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[UV56]]
-    ; GFX8-NEXT: [[UADDO42:%[0-9]+]]:_(s32), [[UADDO43:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]]
-    ; GFX8-NEXT: [[ZEXT15:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO43]](s1)
-    ; GFX8-NEXT: [[UADDO44:%[0-9]+]]:_(s32), [[UADDO45:%[0-9]+]]:_(s1) = G_UADDO [[UADDO42]], [[UMULH12]]
-    ; GFX8-NEXT: [[ZEXT16:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO45]](s1)
-    ; GFX8-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT15]], [[ZEXT16]]
+    ; GFX8-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]]
+    ; GFX8-NEXT: [[UADDE48:%[0-9]+]]:_(s32), [[UADDE49:%[0-9]+]]:_(s1) = G_UADDE [[UADDO24]], [[UMULH12]], [[UADDO25]]
+    ; GFX8-NEXT: [[UADDE50:%[0-9]+]]:_(s32), [[UADDE51:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE49]]
     ; GFX8-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[UV58]]
     ; GFX8-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[UV56]]
     ; GFX8-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[UV58]]
-    ; GFX8-NEXT: [[UADDO46:%[0-9]+]]:_(s32), [[UADDO47:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH13]]
-    ; GFX8-NEXT: [[ZEXT17:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO47]](s1)
-    ; GFX8-NEXT: [[UADDO48:%[0-9]+]]:_(s32), [[UADDO49:%[0-9]+]]:_(s1) = G_UADDO [[UADDO46]], [[UMULH14]]
-    ; GFX8-NEXT: [[ZEXT18:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO49]](s1)
-    ; GFX8-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT17]], [[ZEXT18]]
-    ; GFX8-NEXT: [[UADDO50:%[0-9]+]]:_(s32), [[UADDO51:%[0-9]+]]:_(s1) = G_UADDO [[UADDO48]], [[ADD12]]
-    ; GFX8-NEXT: [[ZEXT19:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO51]](s1)
-    ; GFX8-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT19]]
+    ; GFX8-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH13]]
+    ; GFX8-NEXT: [[UADDE52:%[0-9]+]]:_(s32), [[UADDE53:%[0-9]+]]:_(s1) = G_UADDE [[UADDO26]], [[UMULH14]], [[UADDO27]]
+    ; GFX8-NEXT: [[UADDE54:%[0-9]+]]:_(s32), [[UADDE55:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE53]]
+    ; GFX8-NEXT: [[UADDE56:%[0-9]+]]:_(s32), [[UADDE57:%[0-9]+]]:_(s1) = G_UADDE [[UADDE52]], [[UADDE50]], [[UADDO27]]
+    ; GFX8-NEXT: [[UADDE58:%[0-9]+]]:_(s32), [[UADDE59:%[0-9]+]]:_(s1) = G_UADDE [[UADDE54]], [[C6]], [[UADDE57]]
     ; GFX8-NEXT: [[UMULH15:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[UV58]]
-    ; GFX8-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH15]], [[ADD14]]
-    ; GFX8-NEXT: [[UADDO52:%[0-9]+]]:_(s32), [[UADDO53:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI2]], [[UADDO50]]
-    ; GFX8-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI3]], [[ADD15]], [[UADDO53]]
-    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_24:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_25:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO10]](s32), [[UADDO52]], [[C5]]
+    ; GFX8-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH15]], [[UADDE58]]
+    ; GFX8-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI2]], [[UADDE56]]
+    ; GFX8-NEXT: [[UADDE60:%[0-9]+]]:_(s32), [[UADDE61:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI3]], [[ADD3]], [[UADDO29]]
+    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_24:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_25:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO10]](s32), [[UADDO28]], [[C5]]
     ; GFX8-NEXT: [[UV60:%[0-9]+]]:_(s32), [[UV61:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_24]](s64)
     ; GFX8-NEXT: [[ANYEXT4:%[0-9]+]]:_(s64) = G_ANYEXT [[UV61]](s32)
-    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_26:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_27:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO10]](s32), [[UADDE12]], [[ANYEXT4]]
-    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_28:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_29:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE14]](s32), [[UADDO52]], [[AMDGPU_MAD_U64_U32_26]]
+    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_26:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_27:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO10]](s32), [[UADDE60]], [[ANYEXT4]]
+    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_28:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_29:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE14]](s32), [[UADDO28]], [[AMDGPU_MAD_U64_U32_26]]
     ; GFX8-NEXT: [[UV62:%[0-9]+]]:_(s32), [[UV63:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_28]](s64)
-    ; GFX8-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[UV60]]
-    ; GFX8-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UADDO52]], [[UV62]]
-    ; GFX8-NEXT: [[UMULH16:%[0-9]+]]:_(s32) = G_UMULH [[UADDO52]], [[UV60]]
-    ; GFX8-NEXT: [[UADDO54:%[0-9]+]]:_(s32), [[UADDO55:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]]
-    ; GFX8-NEXT: [[ZEXT20:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO55]](s1)
-    ; GFX8-NEXT: [[UADDO56:%[0-9]+]]:_(s32), [[UADDO57:%[0-9]+]]:_(s1) = G_UADDO [[UADDO54]], [[UMULH16]]
-    ; GFX8-NEXT: [[ZEXT21:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO57]](s1)
-    ; GFX8-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[ZEXT20]], [[ZEXT21]]
-    ; GFX8-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[UV62]]
-    ; GFX8-NEXT: [[UMULH17:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[UV60]]
-    ; GFX8-NEXT: [[UMULH18:%[0-9]+]]:_(s32) = G_UMULH [[UADDO52]], [[UV62]]
-    ; GFX8-NEXT: [[UADDO58:%[0-9]+]]:_(s32), [[UADDO59:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH17]]
-    ; GFX8-NEXT: [[ZEXT22:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO59]](s1)
-    ; GFX8-NEXT: [[UADDO60:%[0-9]+]]:_(s32), [[UADDO61:%[0-9]+]]:_(s1) = G_UADDO [[UADDO58]], [[UMULH18]]
-    ; GFX8-NEXT: [[ZEXT23:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO61]](s1)
-    ; GFX8-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ZEXT22]], [[ZEXT23]]
-    ; GFX8-NEXT: [[UADDO62:%[0-9]+]]:_(s32), [[UADDO63:%[0-9]+]]:_(s1) = G_UADDO [[UADDO60]], [[ADD16]]
-    ; GFX8-NEXT: [[ZEXT24:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO63]](s1)
-    ; GFX8-NEXT: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[ADD17]], [[ZEXT24]]
-    ; GFX8-NEXT: [[UMULH19:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[UV62]]
-    ; GFX8-NEXT: [[ADD19:%[0-9]+]]:_(s32) = G_ADD [[UMULH19]], [[ADD18]]
-    ; GFX8-NEXT: [[UADDO64:%[0-9]+]]:_(s32), [[UADDO65:%[0-9]+]]:_(s1) = G_UADDO [[UADDO52]], [[UADDO62]]
-    ; GFX8-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDE12]], [[ADD19]], [[UADDO65]]
+    ; GFX8-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UADDE60]], [[UV60]]
+    ; GFX8-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UADDO28]], [[UV62]]
+    ; GFX8-NEXT: [[UMULH16:%[0-9]+]]:_(s32) = G_UMULH [[UADDO28]], [[UV60]]
+    ; GFX8-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]]
+    ; GFX8-NEXT: [[UADDE62:%[0-9]+]]:_(s32), [[UADDE63:%[0-9]+]]:_(s1) = G_UADDE [[UADDO30]], [[UMULH16]], [[UADDO31]]
+    ; GFX8-NEXT: [[UADDE64:%[0-9]+]]:_(s32), [[UADDE65:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE63]]
+    ; GFX8-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UADDE60]], [[UV62]]
+    ; GFX8-NEXT: [[UMULH17:%[0-9]+]]:_(s32) = G_UMULH [[UADDE60]], [[UV60]]
+    ; GFX8-NEXT: [[UMULH18:%[0-9]+]]:_(s32) = G_UMULH [[UADDO28]], [[UV62]]
+    ; GFX8-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH17]]
+    ; GFX8-NEXT: [[UADDE66:%[0-9]+]]:_(s32), [[UADDE67:%[0-9]+]]:_(s1) = G_UADDE [[UADDO32]], [[UMULH18]], [[UADDO33]]
+    ; GFX8-NEXT: [[UADDE68:%[0-9]+]]:_(s32), [[UADDE69:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE67]]
+    ; GFX8-NEXT: [[UADDE70:%[0-9]+]]:_(s32), [[UADDE71:%[0-9]+]]:_(s1) = G_UADDE [[UADDE66]], [[UADDE64]], [[UADDO33]]
+    ; GFX8-NEXT: [[UADDE72:%[0-9]+]]:_(s32), [[UADDE73:%[0-9]+]]:_(s1) = G_UADDE [[UADDE68]], [[C6]], [[UADDE71]]
+    ; GFX8-NEXT: [[UMULH19:%[0-9]+]]:_(s32) = G_UMULH [[UADDE60]], [[UV62]]
+    ; GFX8-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[UMULH19]], [[UADDE72]]
+    ; GFX8-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UADDE70]]
+    ; GFX8-NEXT: [[UADDE74:%[0-9]+]]:_(s32), [[UADDE75:%[0-9]+]]:_(s1) = G_UADDE [[UADDE60]], [[ADD4]], [[UADDO35]]
     ; GFX8-NEXT: [[UV64:%[0-9]+]]:_(s32), [[UV65:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR3]](s64)
     ; GFX8-NEXT: [[UV66:%[0-9]+]]:_(s32), [[UV67:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR3]](s64)
-    ; GFX8-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV67]], [[UADDO64]]
-    ; GFX8-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV66]], [[UADDE14]]
-    ; GFX8-NEXT: [[UMULH20:%[0-9]+]]:_(s32) = G_UMULH [[UV66]], [[UADDO64]]
-    ; GFX8-NEXT: [[UADDO66:%[0-9]+]]:_(s32), [[UADDO67:%[0-9]+]]:_(s1) = G_UADDO [[MUL15]], [[MUL16]]
-    ; GFX8-NEXT: [[ZEXT25:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO67]](s1)
-    ; GFX8-NEXT: [[UADDO68:%[0-9]+]]:_(s32), [[UADDO69:%[0-9]+]]:_(s1) = G_UADDO [[UADDO66]], [[UMULH20]]
-    ; GFX8-NEXT: [[ZEXT26:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO69]](s1)
-    ; GFX8-NEXT: [[ADD20:%[0-9]+]]:_(s32) = G_ADD [[ZEXT25]], [[ZEXT26]]
-    ; GFX8-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV67]], [[UADDE14]]
-    ; GFX8-NEXT: [[UMULH21:%[0-9]+]]:_(s32) = G_UMULH [[UV67]], [[UADDO64]]
-    ; GFX8-NEXT: [[UMULH22:%[0-9]+]]:_(s32) = G_UMULH [[UV66]], [[UADDE14]]
-    ; GFX8-NEXT: [[UADDO70:%[0-9]+]]:_(s32), [[UADDO71:%[0-9]+]]:_(s1) = G_UADDO [[MUL17]], [[UMULH21]]
-    ; GFX8-NEXT: [[ZEXT27:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO71]](s1)
-    ; GFX8-NEXT: [[UADDO72:%[0-9]+]]:_(s32), [[UADDO73:%[0-9]+]]:_(s1) = G_UADDO [[UADDO70]], [[UMULH22]]
-    ; GFX8-NEXT: [[ZEXT28:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO73]](s1)
-    ; GFX8-NEXT: [[ADD21:%[0-9]+]]:_(s32) = G_ADD [[ZEXT27]], [[ZEXT28]]
-    ; GFX8-NEXT: [[UADDO74:%[0-9]+]]:_(s32), [[UADDO75:%[0-9]+]]:_(s1) = G_UADDO [[UADDO72]], [[ADD20]]
-    ; GFX8-NEXT: [[ZEXT29:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO75]](s1)
-    ; GFX8-NEXT: [[ADD22:%[0-9]+]]:_(s32) = G_ADD [[ADD21]], [[ZEXT29]]
-    ; GFX8-NEXT: [[UMULH23:%[0-9]+]]:_(s32) = G_UMULH [[UV67]], [[UADDE14]]
-    ; GFX8-NEXT: [[ADD23:%[0-9]+]]:_(s32) = G_ADD [[UMULH23]], [[ADD22]]
+    ; GFX8-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV67]], [[UADDO34]]
+    ; GFX8-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV66]], [[UADDE74]]
+    ; GFX8-NEXT: [[UMULH20:%[0-9]+]]:_(s32) = G_UMULH [[UV66]], [[UADDO34]]
+    ; GFX8-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[MUL15]], [[MUL16]]
+    ; GFX8-NEXT: [[UADDE76:%[0-9]+]]:_(s32), [[UADDE77:%[0-9]+]]:_(s1) = G_UADDE [[UADDO36]], [[UMULH20]], [[UADDO37]]
+    ; GFX8-NEXT: [[UADDE78:%[0-9]+]]:_(s32), [[UADDE79:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE77]]
+    ; GFX8-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV67]], [[UADDE74]]
+    ; GFX8-NEXT: [[UMULH21:%[0-9]+]]:_(s32) = G_UMULH [[UV67]], [[UADDO34]]
+    ; GFX8-NEXT: [[UMULH22:%[0-9]+]]:_(s32) = G_UMULH [[UV66]], [[UADDE74]]
+    ; GFX8-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[MUL17]], [[UMULH21]]
+    ; GFX8-NEXT: [[UADDE80:%[0-9]+]]:_(s32), [[UADDE81:%[0-9]+]]:_(s1) = G_UADDE [[UADDO38]], [[UMULH22]], [[UADDO39]]
+    ; GFX8-NEXT: [[UADDE82:%[0-9]+]]:_(s32), [[UADDE83:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE81]]
+    ; GFX8-NEXT: [[UADDE84:%[0-9]+]]:_(s32), [[UADDE85:%[0-9]+]]:_(s1) = G_UADDE [[UADDE80]], [[UADDE78]], [[UADDO39]]
+    ; GFX8-NEXT: [[UADDE86:%[0-9]+]]:_(s32), [[UADDE87:%[0-9]+]]:_(s1) = G_UADDE [[UADDE82]], [[C6]], [[UADDE85]]
+    ; GFX8-NEXT: [[UMULH23:%[0-9]+]]:_(s32) = G_UMULH [[UV67]], [[UADDE74]]
+    ; GFX8-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH23]], [[UADDE86]]
     ; GFX8-NEXT: [[UV68:%[0-9]+]]:_(s32), [[UV69:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR4]](s64)
-    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_30:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_31:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV68]](s32), [[UADDO74]], [[C5]]
+    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_30:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_31:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV68]](s32), [[UADDE84]], [[C5]]
     ; GFX8-NEXT: [[UV70:%[0-9]+]]:_(s32), [[UV71:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_30]](s64)
     ; GFX8-NEXT: [[ANYEXT5:%[0-9]+]]:_(s64) = G_ANYEXT [[UV71]](s32)
-    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_32:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_33:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV68]](s32), [[ADD23]], [[ANYEXT5]]
-    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_34:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_35:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV69]](s32), [[UADDO74]], [[AMDGPU_MAD_U64_U32_32]]
+    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_32:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_33:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV68]](s32), [[ADD5]], [[ANYEXT5]]
+    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_34:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_35:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV69]](s32), [[UADDE84]], [[AMDGPU_MAD_U64_U32_32]]
     ; GFX8-NEXT: [[UV72:%[0-9]+]]:_(s32), [[UV73:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_34]](s64)
     ; GFX8-NEXT: [[USUBO12:%[0-9]+]]:_(s32), [[USUBO13:%[0-9]+]]:_(s1) = G_USUBO [[UV64]], [[UV70]]
     ; GFX8-NEXT: [[USUBE16:%[0-9]+]]:_(s32), [[USUBE17:%[0-9]+]]:_(s1) = G_USUBE [[UV65]], [[UV72]], [[USUBO13]]
@@ -1747,6 +1637,7 @@ body: |
     ; GFX8-NEXT: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO18]](s32), [[USUBE26]](s32)
     ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV5]](s64), [[MV11]](s64)
     ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
+    ;
     ; GFX9-LABEL: name: test_srem_v2s64
     ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7
     ; GFX9-NEXT: {{  $}}
@@ -1797,88 +1688,73 @@ body: |
     ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI1]], [[ANYEXT]]
     ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[FPTOUI]], [[AMDGPU_MAD_U64_U32_2]]
     ; GFX9-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64)
+    ; GFX9-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
     ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV18]]
     ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[UV20]]
     ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV18]]
     ; GFX9-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]]
-    ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1)
-    ; GFX9-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH]]
-    ; GFX9-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1)
-    ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]]
+    ; GFX9-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDO4]], [[UMULH]], [[UADDO5]]
+    ; GFX9-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE5]]
     ; GFX9-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV20]]
     ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV18]]
     ; GFX9-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV20]]
-    ; GFX9-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]]
-    ; GFX9-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1)
-    ; GFX9-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH2]]
-    ; GFX9-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1)
-    ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]]
-    ; GFX9-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD]]
-    ; GFX9-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1)
-    ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]]
+    ; GFX9-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]]
+    ; GFX9-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDO6]], [[UMULH2]], [[UADDO7]]
+    ; GFX9-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE9]]
+    ; GFX9-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[UADDE8]], [[UADDE6]], [[UADDO7]]
+    ; GFX9-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDE10]], [[C6]], [[UADDE13]]
     ; GFX9-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV20]]
-    ; GFX9-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]]
-    ; GFX9-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO12]]
-    ; GFX9-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD3]], [[UADDO15]]
-    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO14]], [[C5]]
+    ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[UADDE14]]
+    ; GFX9-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDE12]]
+    ; GFX9-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD]], [[UADDO9]]
+    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO8]], [[C5]]
     ; GFX9-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](s64)
     ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[UV23]](s32)
-    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE4]], [[ANYEXT1]]
-    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO14]], [[AMDGPU_MAD_U64_U32_8]]
+    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE16]], [[ANYEXT1]]
+    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO8]], [[AMDGPU_MAD_U64_U32_8]]
     ; GFX9-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](s64)
-    ; GFX9-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[UV22]]
-    ; GFX9-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[UV24]]
-    ; GFX9-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[UV22]]
-    ; GFX9-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]]
-    ; GFX9-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1)
-    ; GFX9-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH4]]
-    ; GFX9-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1)
-    ; GFX9-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]]
-    ; GFX9-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[UV24]]
-    ; GFX9-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[UV22]]
-    ; GFX9-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[UV24]]
-    ; GFX9-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]]
-    ; GFX9-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1)
-    ; GFX9-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH6]]
-    ; GFX9-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO23]](s1)
-    ; GFX9-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]]
-    ; GFX9-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD4]]
-    ; GFX9-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1)
-    ; GFX9-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ADD5]], [[ZEXT9]]
-    ; GFX9-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[UV24]]
-    ; GFX9-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD6]]
-    ; GFX9-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-    ; GFX9-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UADDO24]]
-    ; GFX9-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[ADD7]], [[UADDO27]]
+    ; GFX9-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[UV22]]
+    ; GFX9-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO8]], [[UV24]]
+    ; GFX9-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO8]], [[UV22]]
+    ; GFX9-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]]
+    ; GFX9-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDO10]], [[UMULH4]], [[UADDO11]]
+    ; GFX9-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE19]]
+    ; GFX9-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[UV24]]
+    ; GFX9-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[UV22]]
+    ; GFX9-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO8]], [[UV24]]
+    ; GFX9-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]]
+    ; GFX9-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDO12]], [[UMULH6]], [[UADDO13]]
+    ; GFX9-NEXT: [[UADDE24:%[0-9]+]]:_(s32), [[UADDE25:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE23]]
+    ; GFX9-NEXT: [[UADDE26:%[0-9]+]]:_(s32), [[UADDE27:%[0-9]+]]:_(s1) = G_UADDE [[UADDE22]], [[UADDE20]], [[UADDO13]]
+    ; GFX9-NEXT: [[UADDE28:%[0-9]+]]:_(s32), [[UADDE29:%[0-9]+]]:_(s1) = G_UADDE [[UADDE24]], [[C6]], [[UADDE27]]
+    ; GFX9-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[UV24]]
+    ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[UADDE28]]
+    ; GFX9-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UADDE26]]
+    ; GFX9-NEXT: [[UADDE30:%[0-9]+]]:_(s32), [[UADDE31:%[0-9]+]]:_(s1) = G_UADDE [[UADDE16]], [[ADD1]], [[UADDO15]]
     ; GFX9-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64)
     ; GFX9-NEXT: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64)
-    ; GFX9-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV29]], [[UADDO26]]
-    ; GFX9-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV28]], [[UADDE6]]
-    ; GFX9-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV28]], [[UADDO26]]
-    ; GFX9-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]]
-    ; GFX9-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1)
-    ; GFX9-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH8]]
-    ; GFX9-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1)
-    ; GFX9-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]]
-    ; GFX9-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV29]], [[UADDE6]]
-    ; GFX9-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV29]], [[UADDO26]]
-    ; GFX9-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV28]], [[UADDE6]]
-    ; GFX9-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]]
-    ; GFX9-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1)
-    ; GFX9-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH10]]
-    ; GFX9-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1)
-    ; GFX9-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]]
-    ; GFX9-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD8]]
-    ; GFX9-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1)
-    ; GFX9-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT14]]
-    ; GFX9-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV29]], [[UADDE6]]
-    ; GFX9-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[ADD10]]
+    ; GFX9-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV29]], [[UADDO14]]
+    ; GFX9-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV28]], [[UADDE30]]
+    ; GFX9-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV28]], [[UADDO14]]
+    ; GFX9-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]]
+    ; GFX9-NEXT: [[UADDE32:%[0-9]+]]:_(s32), [[UADDE33:%[0-9]+]]:_(s1) = G_UADDE [[UADDO16]], [[UMULH8]], [[UADDO17]]
+    ; GFX9-NEXT: [[UADDE34:%[0-9]+]]:_(s32), [[UADDE35:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE33]]
+    ; GFX9-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV29]], [[UADDE30]]
+    ; GFX9-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV29]], [[UADDO14]]
+    ; GFX9-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV28]], [[UADDE30]]
+    ; GFX9-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]]
+    ; GFX9-NEXT: [[UADDE36:%[0-9]+]]:_(s32), [[UADDE37:%[0-9]+]]:_(s1) = G_UADDE [[UADDO18]], [[UMULH10]], [[UADDO19]]
+    ; GFX9-NEXT: [[UADDE38:%[0-9]+]]:_(s32), [[UADDE39:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE37]]
+    ; GFX9-NEXT: [[UADDE40:%[0-9]+]]:_(s32), [[UADDE41:%[0-9]+]]:_(s1) = G_UADDE [[UADDE36]], [[UADDE34]], [[UADDO19]]
+    ; GFX9-NEXT: [[UADDE42:%[0-9]+]]:_(s32), [[UADDE43:%[0-9]+]]:_(s1) = G_UADDE [[UADDE38]], [[C6]], [[UADDE41]]
+    ; GFX9-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV29]], [[UADDE30]]
+    ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[UADDE42]]
     ; GFX9-NEXT: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64)
-    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV30]](s32), [[UADDO36]], [[C5]]
+    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV30]](s32), [[UADDE40]], [[C5]]
     ; GFX9-NEXT: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_12]](s64)
     ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[UV33]](s32)
-    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV30]](s32), [[ADD11]], [[ANYEXT2]]
-    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV31]](s32), [[UADDO36]], [[AMDGPU_MAD_U64_U32_14]]
+    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV30]](s32), [[ADD2]], [[ANYEXT2]]
+    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV31]](s32), [[UADDE40]], [[AMDGPU_MAD_U64_U32_14]]
     ; GFX9-NEXT: [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_16]](s64)
     ; GFX9-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV26]], [[UV32]]
     ; GFX9-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV27]], [[UV34]], [[USUBO3]]
@@ -1919,14 +1795,14 @@ body: |
     ; GFX9-NEXT: [[ASHR3:%[0-9]+]]:_(s64) = G_ASHR [[UV3]], [[C]](s32)
     ; GFX9-NEXT: [[UV42:%[0-9]+]]:_(s32), [[UV43:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64)
     ; GFX9-NEXT: [[UV44:%[0-9]+]]:_(s32), [[UV45:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR2]](s64)
-    ; GFX9-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[UV42]], [[UV44]]
-    ; GFX9-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UV43]], [[UV45]], [[UADDO39]]
-    ; GFX9-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO38]](s32), [[UADDE8]](s32)
+    ; GFX9-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UV42]], [[UV44]]
+    ; GFX9-NEXT: [[UADDE44:%[0-9]+]]:_(s32), [[UADDE45:%[0-9]+]]:_(s1) = G_UADDE [[UV43]], [[UV45]], [[UADDO21]]
+    ; GFX9-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO20]](s32), [[UADDE44]](s32)
     ; GFX9-NEXT: [[UV46:%[0-9]+]]:_(s32), [[UV47:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64)
     ; GFX9-NEXT: [[UV48:%[0-9]+]]:_(s32), [[UV49:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR3]](s64)
-    ; GFX9-NEXT: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UV46]], [[UV48]]
-    ; GFX9-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UV47]], [[UV49]], [[UADDO41]]
-    ; GFX9-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO40]](s32), [[UADDE10]](s32)
+    ; GFX9-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UV46]], [[UV48]]
+    ; GFX9-NEXT: [[UADDE46:%[0-9]+]]:_(s32), [[UADDE47:%[0-9]+]]:_(s1) = G_UADDE [[UV47]], [[UV49]], [[UADDO23]]
+    ; GFX9-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO22]](s32), [[UADDE46]](s32)
     ; GFX9-NEXT: [[XOR3:%[0-9]+]]:_(s64) = G_XOR [[MV6]], [[ASHR2]]
     ; GFX9-NEXT: [[XOR4:%[0-9]+]]:_(s64) = G_XOR [[MV7]], [[ASHR3]]
     ; GFX9-NEXT: [[UV50:%[0-9]+]]:_(s32), [[UV51:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR4]](s64)
@@ -1955,84 +1831,69 @@ body: |
     ; GFX9-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[UV56]]
     ; GFX9-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI2]], [[UV58]]
     ; GFX9-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[UV56]]
-    ; GFX9-NEXT: [[UADDO42:%[0-9]+]]:_(s32), [[UADDO43:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]]
-    ; GFX9-NEXT: [[ZEXT15:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO43]](s1)
-    ; GFX9-NEXT: [[UADDO44:%[0-9]+]]:_(s32), [[UADDO45:%[0-9]+]]:_(s1) = G_UADDO [[UADDO42]], [[UMULH12]]
-    ; GFX9-NEXT: [[ZEXT16:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO45]](s1)
-    ; GFX9-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT15]], [[ZEXT16]]
+    ; GFX9-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]]
+    ; GFX9-NEXT: [[UADDE48:%[0-9]+]]:_(s32), [[UADDE49:%[0-9]+]]:_(s1) = G_UADDE [[UADDO24]], [[UMULH12]], [[UADDO25]]
+    ; GFX9-NEXT: [[UADDE50:%[0-9]+]]:_(s32), [[UADDE51:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE49]]
     ; GFX9-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[UV58]]
     ; GFX9-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[UV56]]
     ; GFX9-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[UV58]]
-    ; GFX9-NEXT: [[UADDO46:%[0-9]+]]:_(s32), [[UADDO47:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH13]]
-    ; GFX9-NEXT: [[ZEXT17:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO47]](s1)
-    ; GFX9-NEXT: [[UADDO48:%[0-9]+]]:_(s32), [[UADDO49:%[0-9]+]]:_(s1) = G_UADDO [[UADDO46]], [[UMULH14]]
-    ; GFX9-NEXT: [[ZEXT18:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO49]](s1)
-    ; GFX9-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT17]], [[ZEXT18]]
-    ; GFX9-NEXT: [[UADDO50:%[0-9]+]]:_(s32), [[UADDO51:%[0-9]+]]:_(s1) = G_UADDO [[UADDO48]], [[ADD12]]
-    ; GFX9-NEXT: [[ZEXT19:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO51]](s1)
-    ; GFX9-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT19]]
+    ; GFX9-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH13]]
+    ; GFX9-NEXT: [[UADDE52:%[0-9]+]]:_(s32), [[UADDE53:%[0-9]+]]:_(s1) = G_UADDE [[UADDO26]], [[UMULH14]], [[UADDO27]]
+    ; GFX9-NEXT: [[UADDE54:%[0-9]+]]:_(s32), [[UADDE55:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE53]]
+    ; GFX9-NEXT: [[UADDE56:%[0-9]+]]:_(s32), [[UADDE57:%[0-9]+]]:_(s1) = G_UADDE [[UADDE52]], [[UADDE50]], [[UADDO27]]
+    ; GFX9-NEXT: [[UADDE58:%[0-9]+]]:_(s32), [[UADDE59:%[0-9]+]]:_(s1) = G_UADDE [[UADDE54]], [[C6]], [[UADDE57]]
     ; GFX9-NEXT: [[UMULH15:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[UV58]]
-    ; GFX9-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH15]], [[ADD14]]
-    ; GFX9-NEXT: [[UADDO52:%[0-9]+]]:_(s32), [[UADDO53:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI2]], [[UADDO50]]
-    ; GFX9-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI3]], [[ADD15]], [[UADDO53]]
-    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_24:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_25:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO10]](s32), [[UADDO52]], [[C5]]
+    ; GFX9-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH15]], [[UADDE58]]
+    ; GFX9-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI2]], [[UADDE56]]
+    ; GFX9-NEXT: [[UADDE60:%[0-9]+]]:_(s32), [[UADDE61:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI3]], [[ADD3]], [[UADDO29]]
+    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_24:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_25:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO10]](s32), [[UADDO28]], [[C5]]
     ; GFX9-NEXT: [[UV60:%[0-9]+]]:_(s32), [[UV61:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_24]](s64)
     ; GFX9-NEXT: [[ANYEXT4:%[0-9]+]]:_(s64) = G_ANYEXT [[UV61]](s32)
-    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_26:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_27:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO10]](s32), [[UADDE12]], [[ANYEXT4]]
-    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_28:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_29:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE14]](s32), [[UADDO52]], [[AMDGPU_MAD_U64_U32_26]]
+    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_26:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_27:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO10]](s32), [[UADDE60]], [[ANYEXT4]]
+    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_28:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_29:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE14]](s32), [[UADDO28]], [[AMDGPU_MAD_U64_U32_26]]
     ; GFX9-NEXT: [[UV62:%[0-9]+]]:_(s32), [[UV63:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_28]](s64)
-    ; GFX9-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[UV60]]
-    ; GFX9-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UADDO52]], [[UV62]]
-    ; GFX9-NEXT: [[UMULH16:%[0-9]+]]:_(s32) = G_UMULH [[UADDO52]], [[UV60]]
-    ; GFX9-NEXT: [[UADDO54:%[0-9]+]]:_(s32), [[UADDO55:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]]
-    ; GFX9-NEXT: [[ZEXT20:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO55]](s1)
-    ; GFX9-NEXT: [[UADDO56:%[0-9]+]]:_(s32), [[UADDO57:%[0-9]+]]:_(s1) = G_UADDO [[UADDO54]], [[UMULH16]]
-    ; GFX9-NEXT: [[ZEXT21:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO57]](s1)
-    ; GFX9-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[ZEXT20]], [[ZEXT21]]
-    ; GFX9-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[UV62]]
-    ; GFX9-NEXT: [[UMULH17:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[UV60]]
-    ; GFX9-NEXT: [[UMULH18:%[0-9]+]]:_(s32) = G_UMULH [[UADDO52]], [[UV62]]
-    ; GFX9-NEXT: [[UADDO58:%[0-9]+]]:_(s32), [[UADDO59:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH17]]
-    ; GFX9-NEXT: [[ZEXT22:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO59]](s1)
-    ; GFX9-NEXT: [[UADDO60:%[0-9]+]]:_(s32), [[UADDO61:%[0-9]+]]:_(s1) = G_UADDO [[UADDO58]], [[UMULH18]]
-    ; GFX9-NEXT: [[ZEXT23:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO61]](s1)
-    ; GFX9-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ZEXT22]], [[ZEXT23]]
-    ; GFX9-NEXT: [[UADDO62:%[0-9]+]]:_(s32), [[UADDO63:%[0-9]+]]:_(s1) = G_UADDO [[UADDO60]], [[ADD16]]
-    ; GFX9-NEXT: [[ZEXT24:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO63]](s1)
-    ; GFX9-NEXT: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[ADD17]], [[ZEXT24]]
-    ; GFX9-NEXT: [[UMULH19:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[UV62]]
-    ; GFX9-NEXT: [[ADD19:%[0-9]+]]:_(s32) = G_ADD [[UMULH19]], [[ADD18]]
-    ; GFX9-NEXT: [[UADDO64:%[0-9]+]]:_(s32), [[UADDO65:%[0-9]+]]:_(s1) = G_UADDO [[UADDO52]], [[UADDO62]]
-    ; GFX9-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDE12]], [[ADD19]], [[UADDO65]]
+    ; GFX9-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UADDE60]], [[UV60]]
+    ; GFX9-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UADDO28]], [[UV62]]
+    ; GFX9-NEXT: [[UMULH16:%[0-9]+]]:_(s32) = G_UMULH [[UADDO28]], [[UV60]]
+    ; GFX9-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]]
+    ; GFX9-NEXT: [[UADDE62:%[0-9]+]]:_(s32), [[UADDE63:%[0-9]+]]:_(s1) = G_UADDE [[UADDO30]], [[UMULH16]], [[UADDO31]]
+    ; GFX9-NEXT: [[UADDE64:%[0-9]+]]:_(s32), [[UADDE65:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE63]]
+    ; GFX9-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UADDE60]], [[UV62]]
+    ; GFX9-NEXT: [[UMULH17:%[0-9]+]]:_(s32) = G_UMULH [[UADDE60]], [[UV60]]
+    ; GFX9-NEXT: [[UMULH18:%[0-9]+]]:_(s32) = G_UMULH [[UADDO28]], [[UV62]]
+    ; GFX9-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH17]]
+    ; GFX9-NEXT: [[UADDE66:%[0-9]+]]:_(s32), [[UADDE67:%[0-9]+]]:_(s1) = G_UADDE [[UADDO32]], [[UMULH18]], [[UADDO33]]
+    ; GFX9-NEXT: [[UADDE68:%[0-9]+]]:_(s32), [[UADDE69:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE67]]
+    ; GFX9-NEXT: [[UADDE70:%[0-9]+]]:_(s32), [[UADDE71:%[0-9]+]]:_(s1) = G_UADDE [[UADDE66]], [[UADDE64]], [[UADDO33]]
+    ; GFX9-NEXT: [[UADDE72:%[0-9]+]]:_(s32), [[UADDE73:%[0-9]+]]:_(s1) = G_UADDE [[UADDE68]], [[C6]], [[UADDE71]]
+    ; GFX9-NEXT: [[UMULH19:%[0-9]+]]:_(s32) = G_UMULH [[UADDE60]], [[UV62]]
+    ; GFX9-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[UMULH19]], [[UADDE72]]
+    ; GFX9-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UADDE70]]
+    ; GFX9-NEXT: [[UADDE74:%[0-9]+]]:_(s32), [[UADDE75:%[0-9]+]]:_(s1) = G_UADDE [[UADDE60]], [[ADD4]], [[UADDO35]]
     ; GFX9-NEXT: [[UV64:%[0-9]+]]:_(s32), [[UV65:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR3]](s64)
     ; GFX9-NEXT: [[UV66:%[0-9]+]]:_(s32), [[UV67:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR3]](s64)
-    ; GFX9-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV67]], [[UADDO64]]
-    ; GFX9-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV66]], [[UADDE14]]
-    ; GFX9-NEXT: [[UMULH20:%[0-9]+]]:_(s32) = G_UMULH [[UV66]], [[UADDO64]]
-    ; GFX9-NEXT: [[UADDO66:%[0-9]+]]:_(s32), [[UADDO67:%[0-9]+]]:_(s1) = G_UADDO [[MUL15]], [[MUL16]]
-    ; GFX9-NEXT: [[ZEXT25:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO67]](s1)
-    ; GFX9-NEXT: [[UADDO68:%[0-9]+]]:_(s32), [[UADDO69:%[0-9]+]]:_(s1) = G_UADDO [[UADDO66]], [[UMULH20]]
-    ; GFX9-NEXT: [[ZEXT26:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO69]](s1)
-    ; GFX9-NEXT: [[ADD20:%[0-9]+]]:_(s32) = G_ADD [[ZEXT25]], [[ZEXT26]]
-    ; GFX9-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV67]], [[UADDE14]]
-    ; GFX9-NEXT: [[UMULH21:%[0-9]+]]:_(s32) = G_UMULH [[UV67]], [[UADDO64]]
-    ; GFX9-NEXT: [[UMULH22:%[0-9]+]]:_(s32) = G_UMULH [[UV66]], [[UADDE14]]
-    ; GFX9-NEXT: [[UADDO70:%[0-9]+]]:_(s32), [[UADDO71:%[0-9]+]]:_(s1) = G_UADDO [[MUL17]], [[UMULH21]]
-    ; GFX9-NEXT: [[ZEXT27:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO71]](s1)
-    ; GFX9-NEXT: [[UADDO72:%[0-9]+]]:_(s32), [[UADDO73:%[0-9]+]]:_(s1) = G_UADDO [[UADDO70]], [[UMULH22]]
-    ; GFX9-NEXT: [[ZEXT28:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO73]](s1)
-    ; GFX9-NEXT: [[ADD21:%[0-9]+]]:_(s32) = G_ADD [[ZEXT27]], [[ZEXT28]]
-    ; GFX9-NEXT: [[UADDO74:%[0-9]+]]:_(s32), [[UADDO75:%[0-9]+]]:_(s1) = G_UADDO [[UADDO72]], [[ADD20]]
-    ; GFX9-NEXT: [[ZEXT29:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO75]](s1)
-    ; GFX9-NEXT: [[ADD22:%[0-9]+]]:_(s32) = G_ADD [[ADD21]], [[ZEXT29]]
-    ; GFX9-NEXT: [[UMULH23:%[0-9]+]]:_(s32) = G_UMULH [[UV67]], [[UADDE14]]
-    ; GFX9-NEXT: [[ADD23:%[0-9]+]]:_(s32) = G_ADD [[UMULH23]], [[ADD22]]
+    ; GFX9-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV67]], [[UADDO34]]
+    ; GFX9-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV66]], [[UADDE74]]
+    ; GFX9-NEXT: [[UMULH20:%[0-9]+]]:_(s32) = G_UMULH [[UV66]], [[UADDO34]]
+    ; GFX9-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[MUL15]], [[MUL16]]
+    ; GFX9-NEXT: [[UADDE76:%[0-9]+]]:_(s32), [[UADDE77:%[0-9]+]]:_(s1) = G_UADDE [[UADDO36]], [[UMULH20]], [[UADDO37]]
+    ; GFX9-NEXT: [[UADDE78:%[0-9]+]]:_(s32), [[UADDE79:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE77]]
+    ; GFX9-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV67]], [[UADDE74]]
+    ; GFX9-NEXT: [[UMULH21:%[0-9]+]]:_(s32) = G_UMULH [[UV67]], [[UADDO34]]
+    ; GFX9-NEXT: [[UMULH22:%[0-9]+]]:_(s32) = G_UMULH [[UV66]], [[UADDE74]]
+    ; GFX9-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[MUL17]], [[UMULH21]]
+    ; GFX9-NEXT: [[UADDE80:%[0-9]+]]:_(s32), [[UADDE81:%[0-9]+]]:_(s1) = G_UADDE [[UADDO38]], [[UMULH22]], [[UADDO39]]
+    ; GFX9-NEXT: [[UADDE82:%[0-9]+]]:_(s32), [[UADDE83:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE81]]
+    ; GFX9-NEXT: [[UADDE84:%[0-9]+]]:_(s32), [[UADDE85:%[0-9]+]]:_(s1) = G_UADDE [[UADDE80]], [[UADDE78]], [[UADDO39]]
+    ; GFX9-NEXT: [[UADDE86:%[0-9]+]]:_(s32), [[UADDE87:%[0-9]+]]:_(s1) = G_UADDE [[UADDE82]], [[C6]], [[UADDE85]]
+    ; GFX9-NEXT: [[UMULH23:%[0-9]+]]:_(s32) = G_UMULH [[UV67]], [[UADDE74]]
+    ; GFX9-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH23]], [[UADDE86]]
     ; GFX9-NEXT: [[UV68:%[0-9]+]]:_(s32), [[UV69:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR4]](s64)
-    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_30:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_31:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV68]](s32), [[UADDO74]], [[C5]]
+    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_30:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_31:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV68]](s32), [[UADDE84]], [[C5]]
     ; GFX9-NEXT: [[UV70:%[0-9]+]]:_(s32), [[UV71:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_30]](s64)
     ; GFX9-NEXT: [[ANYEXT5:%[0-9]+]]:_(s64) = G_ANYEXT [[UV71]](s32)
-    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_32:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_33:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV68]](s32), [[ADD23]], [[ANYEXT5]]
-    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_34:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_35:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV69]](s32), [[UADDO74]], [[AMDGPU_MAD_U64_U32_32]]
+    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_32:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_33:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV68]](s32), [[ADD5]], [[ANYEXT5]]
+    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_34:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_35:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV69]](s32), [[UADDE84]], [[AMDGPU_MAD_U64_U32_32]]
     ; GFX9-NEXT: [[UV72:%[0-9]+]]:_(s32), [[UV73:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_34]](s64)
     ; GFX9-NEXT: [[USUBO12:%[0-9]+]]:_(s32), [[USUBO13:%[0-9]+]]:_(s1) = G_USUBO [[UV64]], [[UV70]]
     ; GFX9-NEXT: [[USUBE16:%[0-9]+]]:_(s32), [[USUBE17:%[0-9]+]]:_(s1) = G_USUBE [[UV65]], [[UV72]], [[USUBO13]]
@@ -2071,6 +1932,7 @@ body: |
     ; GFX9-NEXT: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO18]](s32), [[USUBE26]](s32)
     ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV5]](s64), [[MV11]](s64)
     ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
+    ;
     ; GFX10-LABEL: name: test_srem_v2s64
     ; GFX10: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7
     ; GFX10-NEXT: {{  $}}
@@ -2121,92 +1983,77 @@ body: |
     ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV19]], [[MUL]]
     ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]]
     ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[MUL1]]
+    ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
     ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV18]]
     ; GFX10-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]]
     ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV18]]
     ; GFX10-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[MUL3]]
-    ; GFX10-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1)
-    ; GFX10-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH]]
-    ; GFX10-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1)
-    ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]]
+    ; GFX10-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDO4]], [[UMULH]], [[UADDO5]]
+    ; GFX10-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE5]]
     ; GFX10-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]]
     ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV18]]
     ; GFX10-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]]
-    ; GFX10-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL4]], [[UMULH1]]
-    ; GFX10-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1)
-    ; GFX10-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH2]]
-    ; GFX10-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1)
-    ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]]
-    ; GFX10-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD2]]
-    ; GFX10-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1)
-    ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]]
+    ; GFX10-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[MUL4]], [[UMULH1]]
+    ; GFX10-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDO6]], [[UMULH2]], [[UADDO7]]
+    ; GFX10-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE9]]
+    ; GFX10-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[UADDE8]], [[UADDE6]], [[UADDO7]]
+    ; GFX10-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDE10]], [[C6]], [[UADDE13]]
     ; GFX10-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]]
-    ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD4]]
-    ; GFX10-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO12]]
-    ; GFX10-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO15]]
-    ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO14]], [[C5]]
+    ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[UADDE14]]
+    ; GFX10-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDE12]]
+    ; GFX10-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD2]], [[UADDO9]]
+    ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO8]], [[C5]]
     ; GFX10-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_2]](s64)
-    ; GFX10-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE4]]
-    ; GFX10-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UV21]], [[MUL5]]
-    ; GFX10-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO14]]
-    ; GFX10-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[MUL6]]
-    ; GFX10-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[UV20]]
-    ; GFX10-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[ADD7]]
-    ; GFX10-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[UV20]]
-    ; GFX10-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL7]], [[MUL8]]
-    ; GFX10-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1)
-    ; GFX10-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH4]]
-    ; GFX10-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1)
-    ; GFX10-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]]
-    ; GFX10-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[ADD7]]
-    ; GFX10-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[UV20]]
-    ; GFX10-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[ADD7]]
-    ; GFX10-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[UMULH5]]
-    ; GFX10-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1)
-    ; GFX10-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH6]]
-    ; GFX10-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO23]](s1)
-    ; GFX10-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]]
-    ; GFX10-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD8]]
-    ; GFX10-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1)
-    ; GFX10-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT9]]
-    ; GFX10-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[ADD7]]
-    ; GFX10-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD10]]
-    ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-    ; GFX10-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UADDO24]]
-    ; GFX10-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[ADD11]], [[UADDO27]]
+    ; GFX10-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE16]]
+    ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UV21]], [[MUL5]]
+    ; GFX10-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO8]]
+    ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[MUL6]]
+    ; GFX10-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[UV20]]
+    ; GFX10-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UADDO8]], [[ADD4]]
+    ; GFX10-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO8]], [[UV20]]
+    ; GFX10-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[MUL7]], [[MUL8]]
+    ; GFX10-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDO10]], [[UMULH4]], [[UADDO11]]
+    ; GFX10-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE19]]
+    ; GFX10-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[ADD4]]
+    ; GFX10-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[UV20]]
+    ; GFX10-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO8]], [[ADD4]]
+    ; GFX10-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[UMULH5]]
+    ; GFX10-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDO12]], [[UMULH6]], [[UADDO13]]
+    ; GFX10-NEXT: [[UADDE24:%[0-9]+]]:_(s32), [[UADDE25:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE23]]
+    ; GFX10-NEXT: [[UADDE26:%[0-9]+]]:_(s32), [[UADDE27:%[0-9]+]]:_(s1) = G_UADDE [[UADDE22]], [[UADDE20]], [[UADDO13]]
+    ; GFX10-NEXT: [[UADDE28:%[0-9]+]]:_(s32), [[UADDE29:%[0-9]+]]:_(s1) = G_UADDE [[UADDE24]], [[C6]], [[UADDE27]]
+    ; GFX10-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[ADD4]]
+    ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[UADDE28]]
+    ; GFX10-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UADDE26]]
+    ; GFX10-NEXT: [[UADDE30:%[0-9]+]]:_(s32), [[UADDE31:%[0-9]+]]:_(s1) = G_UADDE [[UADDE16]], [[ADD5]], [[UADDO15]]
     ; GFX10-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64)
     ; GFX10-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64)
-    ; GFX10-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UV25]], [[UADDO26]]
-    ; GFX10-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UV24]], [[UADDE6]]
-    ; GFX10-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV24]], [[UADDO26]]
-    ; GFX10-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL10]], [[MUL11]]
-    ; GFX10-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1)
-    ; GFX10-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH8]]
-    ; GFX10-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1)
-    ; GFX10-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]]
-    ; GFX10-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV25]], [[UADDE6]]
-    ; GFX10-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV25]], [[UADDO26]]
-    ; GFX10-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV24]], [[UADDE6]]
-    ; GFX10-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[UMULH9]]
-    ; GFX10-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1)
-    ; GFX10-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH10]]
-    ; GFX10-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1)
-    ; GFX10-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]]
-    ; GFX10-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD12]]
-    ; GFX10-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1)
-    ; GFX10-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT14]]
-    ; GFX10-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV25]], [[UADDE6]]
-    ; GFX10-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[ADD14]]
+    ; GFX10-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UV25]], [[UADDO14]]
+    ; GFX10-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UV24]], [[UADDE30]]
+    ; GFX10-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV24]], [[UADDO14]]
+    ; GFX10-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL10]], [[MUL11]]
+    ; GFX10-NEXT: [[UADDE32:%[0-9]+]]:_(s32), [[UADDE33:%[0-9]+]]:_(s1) = G_UADDE [[UADDO16]], [[UMULH8]], [[UADDO17]]
+    ; GFX10-NEXT: [[UADDE34:%[0-9]+]]:_(s32), [[UADDE35:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE33]]
+    ; GFX10-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV25]], [[UADDE30]]
+    ; GFX10-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV25]], [[UADDO14]]
+    ; GFX10-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV24]], [[UADDE30]]
+    ; GFX10-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[UMULH9]]
+    ; GFX10-NEXT: [[UADDE36:%[0-9]+]]:_(s32), [[UADDE37:%[0-9]+]]:_(s1) = G_UADDE [[UADDO18]], [[UMULH10]], [[UADDO19]]
+    ; GFX10-NEXT: [[UADDE38:%[0-9]+]]:_(s32), [[UADDE39:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE37]]
+    ; GFX10-NEXT: [[UADDE40:%[0-9]+]]:_(s32), [[UADDE41:%[0-9]+]]:_(s1) = G_UADDE [[UADDE36]], [[UADDE34]], [[UADDO19]]
+    ; GFX10-NEXT: [[UADDE42:%[0-9]+]]:_(s32), [[UADDE43:%[0-9]+]]:_(s1) = G_UADDE [[UADDE38]], [[C6]], [[UADDE41]]
+    ; GFX10-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV25]], [[UADDE30]]
+    ; GFX10-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[UADDE42]]
     ; GFX10-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64)
-    ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV26]](s32), [[UADDO36]], [[C5]]
+    ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV26]](s32), [[UADDE40]], [[C5]]
     ; GFX10-NEXT: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64)
-    ; GFX10-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV26]], [[ADD15]]
-    ; GFX10-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[UV29]], [[MUL13]]
-    ; GFX10-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV27]], [[UADDO36]]
-    ; GFX10-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[MUL14]]
+    ; GFX10-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV26]], [[ADD6]]
+    ; GFX10-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UV29]], [[MUL13]]
+    ; GFX10-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV27]], [[UADDE40]]
+    ; GFX10-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ADD7]], [[MUL14]]
     ; GFX10-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV22]], [[UV28]]
-    ; GFX10-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV23]], [[ADD17]], [[USUBO3]]
-    ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV23]], [[ADD17]]
+    ; GFX10-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV23]], [[ADD8]], [[USUBO3]]
+    ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV23]], [[ADD8]]
     ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO2]](s32), [[USUBE2]](s32)
     ; GFX10-NEXT: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64)
     ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV31]]
@@ -2243,14 +2090,14 @@ body: |
     ; GFX10-NEXT: [[ASHR3:%[0-9]+]]:_(s64) = G_ASHR [[UV3]], [[C]](s32)
     ; GFX10-NEXT: [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64)
     ; GFX10-NEXT: [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR2]](s64)
-    ; GFX10-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[UV36]], [[UV38]]
-    ; GFX10-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UV37]], [[UV39]], [[UADDO39]]
-    ; GFX10-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO38]](s32), [[UADDE8]](s32)
+    ; GFX10-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UV36]], [[UV38]]
+    ; GFX10-NEXT: [[UADDE44:%[0-9]+]]:_(s32), [[UADDE45:%[0-9]+]]:_(s1) = G_UADDE [[UV37]], [[UV39]], [[UADDO21]]
+    ; GFX10-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO20]](s32), [[UADDE44]](s32)
     ; GFX10-NEXT: [[UV40:%[0-9]+]]:_(s32), [[UV41:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64)
     ; GFX10-NEXT: [[UV42:%[0-9]+]]:_(s32), [[UV43:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR3]](s64)
-    ; GFX10-NEXT: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UV40]], [[UV42]]
-    ; GFX10-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UV41]], [[UV43]], [[UADDO41]]
-    ; GFX10-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO40]](s32), [[UADDE10]](s32)
+    ; GFX10-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UV40]], [[UV42]]
+    ; GFX10-NEXT: [[UADDE46:%[0-9]+]]:_(s32), [[UADDE47:%[0-9]+]]:_(s1) = G_UADDE [[UV41]], [[UV43]], [[UADDO23]]
+    ; GFX10-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO22]](s32), [[UADDE46]](s32)
     ; GFX10-NEXT: [[XOR3:%[0-9]+]]:_(s64) = G_XOR [[MV6]], [[ASHR2]]
     ; GFX10-NEXT: [[XOR4:%[0-9]+]]:_(s64) = G_XOR [[MV7]], [[ASHR3]]
     ; GFX10-NEXT: [[UV44:%[0-9]+]]:_(s32), [[UV45:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR4]](s64)
@@ -2273,94 +2120,79 @@ body: |
     ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO10]](s32), [[FPTOUI2]], [[C5]]
     ; GFX10-NEXT: [[UV50:%[0-9]+]]:_(s32), [[UV51:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](s64)
     ; GFX10-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[USUBO10]], [[FPTOUI3]]
-    ; GFX10-NEXT: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[UV51]], [[MUL15]]
+    ; GFX10-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[UV51]], [[MUL15]]
     ; GFX10-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[USUBE14]], [[FPTOUI2]]
-    ; GFX10-NEXT: [[ADD19:%[0-9]+]]:_(s32) = G_ADD [[ADD18]], [[MUL16]]
+    ; GFX10-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[MUL16]]
     ; GFX10-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[UV50]]
-    ; GFX10-NEXT: [[MUL18:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI2]], [[ADD19]]
+    ; GFX10-NEXT: [[MUL18:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI2]], [[ADD10]]
     ; GFX10-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[UV50]]
-    ; GFX10-NEXT: [[UADDO42:%[0-9]+]]:_(s32), [[UADDO43:%[0-9]+]]:_(s1) = G_UADDO [[MUL17]], [[MUL18]]
-    ; GFX10-NEXT: [[ZEXT15:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO43]](s1)
-    ; GFX10-NEXT: [[UADDO44:%[0-9]+]]:_(s32), [[UADDO45:%[0-9]+]]:_(s1) = G_UADDO [[UADDO42]], [[UMULH12]]
-    ; GFX10-NEXT: [[ZEXT16:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO45]](s1)
-    ; GFX10-NEXT: [[ADD20:%[0-9]+]]:_(s32) = G_ADD [[ZEXT15]], [[ZEXT16]]
-    ; GFX10-NEXT: [[MUL19:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[ADD19]]
+    ; GFX10-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL17]], [[MUL18]]
+    ; GFX10-NEXT: [[UADDE48:%[0-9]+]]:_(s32), [[UADDE49:%[0-9]+]]:_(s1) = G_UADDE [[UADDO24]], [[UMULH12]], [[UADDO25]]
+    ; GFX10-NEXT: [[UADDE50:%[0-9]+]]:_(s32), [[UADDE51:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE49]]
+    ; GFX10-NEXT: [[MUL19:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[ADD10]]
     ; GFX10-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[UV50]]
-    ; GFX10-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[ADD19]]
-    ; GFX10-NEXT: [[UADDO46:%[0-9]+]]:_(s32), [[UADDO47:%[0-9]+]]:_(s1) = G_UADDO [[MUL19]], [[UMULH13]]
-    ; GFX10-NEXT: [[ZEXT17:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO47]](s1)
-    ; GFX10-NEXT: [[UADDO48:%[0-9]+]]:_(s32), [[UADDO49:%[0-9]+]]:_(s1) = G_UADDO [[UADDO46]], [[UMULH14]]
-    ; GFX10-NEXT: [[ZEXT18:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO49]](s1)
-    ; GFX10-NEXT: [[ADD21:%[0-9]+]]:_(s32) = G_ADD [[ZEXT17]], [[ZEXT18]]
-    ; GFX10-NEXT: [[UADDO50:%[0-9]+]]:_(s32), [[UADDO51:%[0-9]+]]:_(s1) = G_UADDO [[UADDO48]], [[ADD20]]
-    ; GFX10-NEXT: [[ZEXT19:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO51]](s1)
-    ; GFX10-NEXT: [[ADD22:%[0-9]+]]:_(s32) = G_ADD [[ADD21]], [[ZEXT19]]
-    ; GFX10-NEXT: [[UMULH15:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[ADD19]]
-    ; GFX10-NEXT: [[ADD23:%[0-9]+]]:_(s32) = G_ADD [[UMULH15]], [[ADD22]]
-    ; GFX10-NEXT: [[UADDO52:%[0-9]+]]:_(s32), [[UADDO53:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI2]], [[UADDO50]]
-    ; GFX10-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI3]], [[ADD23]], [[UADDO53]]
-    ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO10]](s32), [[UADDO52]], [[C5]]
+    ; GFX10-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[ADD10]]
+    ; GFX10-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[MUL19]], [[UMULH13]]
+    ; GFX10-NEXT: [[UADDE52:%[0-9]+]]:_(s32), [[UADDE53:%[0-9]+]]:_(s1) = G_UADDE [[UADDO26]], [[UMULH14]], [[UADDO27]]
+    ; GFX10-NEXT: [[UADDE54:%[0-9]+]]:_(s32), [[UADDE55:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE53]]
+    ; GFX10-NEXT: [[UADDE56:%[0-9]+]]:_(s32), [[UADDE57:%[0-9]+]]:_(s1) = G_UADDE [[UADDE52]], [[UADDE50]], [[UADDO27]]
+    ; GFX10-NEXT: [[UADDE58:%[0-9]+]]:_(s32), [[UADDE59:%[0-9]+]]:_(s1) = G_UADDE [[UADDE54]], [[C6]], [[UADDE57]]
+    ; GFX10-NEXT: [[UMULH15:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[ADD10]]
+    ; GFX10-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH15]], [[UADDE58]]
+    ; GFX10-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI2]], [[UADDE56]]
+    ; GFX10-NEXT: [[UADDE60:%[0-9]+]]:_(s32), [[UADDE61:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI3]], [[ADD11]], [[UADDO29]]
+    ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO10]](s32), [[UADDO28]], [[C5]]
     ; GFX10-NEXT: [[UV52:%[0-9]+]]:_(s32), [[UV53:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_8]](s64)
-    ; GFX10-NEXT: [[MUL20:%[0-9]+]]:_(s32) = G_MUL [[USUBO10]], [[UADDE12]]
-    ; GFX10-NEXT: [[ADD24:%[0-9]+]]:_(s32) = G_ADD [[UV53]], [[MUL20]]
-    ; GFX10-NEXT: [[MUL21:%[0-9]+]]:_(s32) = G_MUL [[USUBE14]], [[UADDO52]]
-    ; GFX10-NEXT: [[ADD25:%[0-9]+]]:_(s32) = G_ADD [[ADD24]], [[MUL21]]
-    ; GFX10-NEXT: [[MUL22:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[UV52]]
-    ; GFX10-NEXT: [[MUL23:%[0-9]+]]:_(s32) = G_MUL [[UADDO52]], [[ADD25]]
-    ; GFX10-NEXT: [[UMULH16:%[0-9]+]]:_(s32) = G_UMULH [[UADDO52]], [[UV52]]
-    ; GFX10-NEXT: [[UADDO54:%[0-9]+]]:_(s32), [[UADDO55:%[0-9]+]]:_(s1) = G_UADDO [[MUL22]], [[MUL23]]
-    ; GFX10-NEXT: [[ZEXT20:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO55]](s1)
-    ; GFX10-NEXT: [[UADDO56:%[0-9]+]]:_(s32), [[UADDO57:%[0-9]+]]:_(s1) = G_UADDO [[UADDO54]], [[UMULH16]]
-    ; GFX10-NEXT: [[ZEXT21:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO57]](s1)
-    ; GFX10-NEXT: [[ADD26:%[0-9]+]]:_(s32) = G_ADD [[ZEXT20]], [[ZEXT21]]
-    ; GFX10-NEXT: [[MUL24:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[ADD25]]
-    ; GFX10-NEXT: [[UMULH17:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[UV52]]
-    ; GFX10-NEXT: [[UMULH18:%[0-9]+]]:_(s32) = G_UMULH [[UADDO52]], [[ADD25]]
-    ; GFX10-NEXT: [[UADDO58:%[0-9]+]]:_(s32), [[UADDO59:%[0-9]+]]:_(s1) = G_UADDO [[MUL24]], [[UMULH17]]
-    ; GFX10-NEXT: [[ZEXT22:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO59]](s1)
-    ; GFX10-NEXT: [[UADDO60:%[0-9]+]]:_(s32), [[UADDO61:%[0-9]+]]:_(s1) = G_UADDO [[UADDO58]], [[UMULH18]]
-    ; GFX10-NEXT: [[ZEXT23:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO61]](s1)
-    ; GFX10-NEXT: [[ADD27:%[0-9]+]]:_(s32) = G_ADD [[ZEXT22]], [[ZEXT23]]
-    ; GFX10-NEXT: [[UADDO62:%[0-9]+]]:_(s32), [[UADDO63:%[0-9]+]]:_(s1) = G_UADDO [[UADDO60]], [[ADD26]]
-    ; GFX10-NEXT: [[ZEXT24:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO63]](s1)
-    ; GFX10-NEXT: [[ADD28:%[0-9]+]]:_(s32) = G_ADD [[ADD27]], [[ZEXT24]]
-    ; GFX10-NEXT: [[UMULH19:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[ADD25]]
-    ; GFX10-NEXT: [[ADD29:%[0-9]+]]:_(s32) = G_ADD [[UMULH19]], [[ADD28]]
-    ; GFX10-NEXT: [[UADDO64:%[0-9]+]]:_(s32), [[UADDO65:%[0-9]+]]:_(s1) = G_UADDO [[UADDO52]], [[UADDO62]]
-    ; GFX10-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDE12]], [[ADD29]], [[UADDO65]]
+    ; GFX10-NEXT: [[MUL20:%[0-9]+]]:_(s32) = G_MUL [[USUBO10]], [[UADDE60]]
+    ; GFX10-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[UV53]], [[MUL20]]
+    ; GFX10-NEXT: [[MUL21:%[0-9]+]]:_(s32) = G_MUL [[USUBE14]], [[UADDO28]]
+    ; GFX10-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ADD12]], [[MUL21]]
+    ; GFX10-NEXT: [[MUL22:%[0-9]+]]:_(s32) = G_MUL [[UADDE60]], [[UV52]]
+    ; GFX10-NEXT: [[MUL23:%[0-9]+]]:_(s32) = G_MUL [[UADDO28]], [[ADD13]]
+    ; GFX10-NEXT: [[UMULH16:%[0-9]+]]:_(s32) = G_UMULH [[UADDO28]], [[UV52]]
+    ; GFX10-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[MUL22]], [[MUL23]]
+    ; GFX10-NEXT: [[UADDE62:%[0-9]+]]:_(s32), [[UADDE63:%[0-9]+]]:_(s1) = G_UADDE [[UADDO30]], [[UMULH16]], [[UADDO31]]
+    ; GFX10-NEXT: [[UADDE64:%[0-9]+]]:_(s32), [[UADDE65:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE63]]
+    ; GFX10-NEXT: [[MUL24:%[0-9]+]]:_(s32) = G_MUL [[UADDE60]], [[ADD13]]
+    ; GFX10-NEXT: [[UMULH17:%[0-9]+]]:_(s32) = G_UMULH [[UADDE60]], [[UV52]]
+    ; GFX10-NEXT: [[UMULH18:%[0-9]+]]:_(s32) = G_UMULH [[UADDO28]], [[ADD13]]
+    ; GFX10-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL24]], [[UMULH17]]
+    ; GFX10-NEXT: [[UADDE66:%[0-9]+]]:_(s32), [[UADDE67:%[0-9]+]]:_(s1) = G_UADDE [[UADDO32]], [[UMULH18]], [[UADDO33]]
+    ; GFX10-NEXT: [[UADDE68:%[0-9]+]]:_(s32), [[UADDE69:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE67]]
+    ; GFX10-NEXT: [[UADDE70:%[0-9]+]]:_(s32), [[UADDE71:%[0-9]+]]:_(s1) = G_UADDE [[UADDE66]], [[UADDE64]], [[UADDO33]]
+    ; GFX10-NEXT: [[UADDE72:%[0-9]+]]:_(s32), [[UADDE73:%[0-9]+]]:_(s1) = G_UADDE [[UADDE68]], [[C6]], [[UADDE71]]
+    ; GFX10-NEXT: [[UMULH19:%[0-9]+]]:_(s32) = G_UMULH [[UADDE60]], [[ADD13]]
+    ; GFX10-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[UMULH19]], [[UADDE72]]
+    ; GFX10-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UADDE70]]
+    ; GFX10-NEXT: [[UADDE74:%[0-9]+]]:_(s32), [[UADDE75:%[0-9]+]]:_(s1) = G_UADDE [[UADDE60]], [[ADD14]], [[UADDO35]]
     ; GFX10-NEXT: [[UV54:%[0-9]+]]:_(s32), [[UV55:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR3]](s64)
     ; GFX10-NEXT: [[UV56:%[0-9]+]]:_(s32), [[UV57:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR3]](s64)
-    ; GFX10-NEXT: [[MUL25:%[0-9]+]]:_(s32) = G_MUL [[UV57]], [[UADDO64]]
-    ; GFX10-NEXT: [[MUL26:%[0-9]+]]:_(s32) = G_MUL [[UV56]], [[UADDE14]]
-    ; GFX10-NEXT: [[UMULH20:%[0-9]+]]:_(s32) = G_UMULH [[UV56]], [[UADDO64]]
-    ; GFX10-NEXT: [[UADDO66:%[0-9]+]]:_(s32), [[UADDO67:%[0-9]+]]:_(s1) = G_UADDO [[MUL25]], [[MUL26]]
-    ; GFX10-NEXT: [[ZEXT25:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO67]](s1)
-    ; GFX10-NEXT: [[UADDO68:%[0-9]+]]:_(s32), [[UADDO69:%[0-9]+]]:_(s1) = G_UADDO [[UADDO66]], [[UMULH20]]
-    ; GFX10-NEXT: [[ZEXT26:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO69]](s1)
-    ; GFX10-NEXT: [[ADD30:%[0-9]+]]:_(s32) = G_ADD [[ZEXT25]], [[ZEXT26]]
-    ; GFX10-NEXT: [[MUL27:%[0-9]+]]:_(s32) = G_MUL [[UV57]], [[UADDE14]]
-    ; GFX10-NEXT: [[UMULH21:%[0-9]+]]:_(s32) = G_UMULH [[UV57]], [[UADDO64]]
-    ; GFX10-NEXT: [[UMULH22:%[0-9]+]]:_(s32) = G_UMULH [[UV56]], [[UADDE14]]
-    ; GFX10-NEXT: [[UADDO70:%[0-9]+]]:_(s32), [[UADDO71:%[0-9]+]]:_(s1) = G_UADDO [[MUL27]], [[UMULH21]]
-    ; GFX10-NEXT: [[ZEXT27:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO71]](s1)
-    ; GFX10-NEXT: [[UADDO72:%[0-9]+]]:_(s32), [[UADDO73:%[0-9]+]]:_(s1) = G_UADDO [[UADDO70]], [[UMULH22]]
-    ; GFX10-NEXT: [[ZEXT28:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO73]](s1)
-    ; GFX10-NEXT: [[ADD31:%[0-9]+]]:_(s32) = G_ADD [[ZEXT27]], [[ZEXT28]]
-    ; GFX10-NEXT: [[UADDO74:%[0-9]+]]:_(s32), [[UADDO75:%[0-9]+]]:_(s1) = G_UADDO [[UADDO72]], [[ADD30]]
-    ; GFX10-NEXT: [[ZEXT29:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO75]](s1)
-    ; GFX10-NEXT: [[ADD32:%[0-9]+]]:_(s32) = G_ADD [[ADD31]], [[ZEXT29]]
-    ; GFX10-NEXT: [[UMULH23:%[0-9]+]]:_(s32) = G_UMULH [[UV57]], [[UADDE14]]
-    ; GFX10-NEXT: [[ADD33:%[0-9]+]]:_(s32) = G_ADD [[UMULH23]], [[ADD32]]
+    ; GFX10-NEXT: [[MUL25:%[0-9]+]]:_(s32) = G_MUL [[UV57]], [[UADDO34]]
+    ; GFX10-NEXT: [[MUL26:%[0-9]+]]:_(s32) = G_MUL [[UV56]], [[UADDE74]]
+    ; GFX10-NEXT: [[UMULH20:%[0-9]+]]:_(s32) = G_UMULH [[UV56]], [[UADDO34]]
+    ; GFX10-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[MUL25]], [[MUL26]]
+    ; GFX10-NEXT: [[UADDE76:%[0-9]+]]:_(s32), [[UADDE77:%[0-9]+]]:_(s1) = G_UADDE [[UADDO36]], [[UMULH20]], [[UADDO37]]
+    ; GFX10-NEXT: [[UADDE78:%[0-9]+]]:_(s32), [[UADDE79:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE77]]
+    ; GFX10-NEXT: [[MUL27:%[0-9]+]]:_(s32) = G_MUL [[UV57]], [[UADDE74]]
+    ; GFX10-NEXT: [[UMULH21:%[0-9]+]]:_(s32) = G_UMULH [[UV57]], [[UADDO34]]
+    ; GFX10-NEXT: [[UMULH22:%[0-9]+]]:_(s32) = G_UMULH [[UV56]], [[UADDE74]]
+    ; GFX10-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[MUL27]], [[UMULH21]]
+    ; GFX10-NEXT: [[UADDE80:%[0-9]+]]:_(s32), [[UADDE81:%[0-9]+]]:_(s1) = G_UADDE [[UADDO38]], [[UMULH22]], [[UADDO39]]
+    ; GFX10-NEXT: [[UADDE82:%[0-9]+]]:_(s32), [[UADDE83:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE81]]
+    ; GFX10-NEXT: [[UADDE84:%[0-9]+]]:_(s32), [[UADDE85:%[0-9]+]]:_(s1) = G_UADDE [[UADDE80]], [[UADDE78]], [[UADDO39]]
+    ; GFX10-NEXT: [[UADDE86:%[0-9]+]]:_(s32), [[UADDE87:%[0-9]+]]:_(s1) = G_UADDE [[UADDE82]], [[C6]], [[UADDE85]]
+    ; GFX10-NEXT: [[UMULH23:%[0-9]+]]:_(s32) = G_UMULH [[UV57]], [[UADDE74]]
+    ; GFX10-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH23]], [[UADDE86]]
     ; GFX10-NEXT: [[UV58:%[0-9]+]]:_(s32), [[UV59:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR4]](s64)
-    ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV58]](s32), [[UADDO74]], [[C5]]
+    ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV58]](s32), [[UADDE84]], [[C5]]
     ; GFX10-NEXT: [[UV60:%[0-9]+]]:_(s32), [[UV61:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](s64)
-    ; GFX10-NEXT: [[MUL28:%[0-9]+]]:_(s32) = G_MUL [[UV58]], [[ADD33]]
-    ; GFX10-NEXT: [[ADD34:%[0-9]+]]:_(s32) = G_ADD [[UV61]], [[MUL28]]
-    ; GFX10-NEXT: [[MUL29:%[0-9]+]]:_(s32) = G_MUL [[UV59]], [[UADDO74]]
-    ; GFX10-NEXT: [[ADD35:%[0-9]+]]:_(s32) = G_ADD [[ADD34]], [[MUL29]]
+    ; GFX10-NEXT: [[MUL28:%[0-9]+]]:_(s32) = G_MUL [[UV58]], [[ADD15]]
+    ; GFX10-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[UV61]], [[MUL28]]
+    ; GFX10-NEXT: [[MUL29:%[0-9]+]]:_(s32) = G_MUL [[UV59]], [[UADDE84]]
+    ; GFX10-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[MUL29]]
     ; GFX10-NEXT: [[USUBO12:%[0-9]+]]:_(s32), [[USUBO13:%[0-9]+]]:_(s1) = G_USUBO [[UV54]], [[UV60]]
-    ; GFX10-NEXT: [[USUBE16:%[0-9]+]]:_(s32), [[USUBE17:%[0-9]+]]:_(s1) = G_USUBE [[UV55]], [[ADD35]], [[USUBO13]]
-    ; GFX10-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[UV55]], [[ADD35]]
+    ; GFX10-NEXT: [[USUBE16:%[0-9]+]]:_(s32), [[USUBE17:%[0-9]+]]:_(s1) = G_USUBE [[UV55]], [[ADD17]], [[USUBO13]]
+    ; GFX10-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[UV55]], [[ADD17]]
     ; GFX10-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO12]](s32), [[USUBE16]](s32)
     ; GFX10-NEXT: [[UV62:%[0-9]+]]:_(s32), [[UV63:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR4]](s64)
     ; GFX10-NEXT: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE16]](s32), [[UV63]]
@@ -2445,6 +2277,7 @@ body: |
     ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
     ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[SUB4]], [[C3]]
     ; GFX6-NEXT: $vgpr0 = COPY [[AND]](s32)
+    ;
     ; GFX8-LABEL: name: test_srem_s16
     ; GFX8: liveins: $vgpr0, $vgpr1
     ; GFX8-NEXT: {{  $}}
@@ -2483,6 +2316,7 @@ body: |
     ; GFX8-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
     ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[SUB4]], [[C3]]
     ; GFX8-NEXT: $vgpr0 = COPY [[AND]](s32)
+    ;
     ; GFX9-LABEL: name: test_srem_s16
     ; GFX9: liveins: $vgpr0, $vgpr1
     ; GFX9-NEXT: {{  $}}
@@ -2521,6 +2355,7 @@ body: |
     ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
     ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[SUB4]], [[C3]]
     ; GFX9-NEXT: $vgpr0 = COPY [[AND]](s32)
+    ;
     ; GFX10-LABEL: name: test_srem_s16
     ; GFX10: liveins: $vgpr0, $vgpr1
     ; GFX10-NEXT: {{  $}}
@@ -2648,6 +2483,7 @@ body: |
     ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
     ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
     ; GFX6-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>)
+    ;
     ; GFX8-LABEL: name: test_srem_v2s16
     ; GFX8: liveins: $vgpr0, $vgpr1
     ; GFX8-NEXT: {{  $}}
@@ -2722,6 +2558,7 @@ body: |
     ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
     ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
     ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>)
+    ;
     ; GFX9-LABEL: name: test_srem_v2s16
     ; GFX9: liveins: $vgpr0, $vgpr1
     ; GFX9-NEXT: {{  $}}
@@ -2793,6 +2630,7 @@ body: |
     ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SUB9]](s32)
     ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
     ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>)
+    ;
     ; GFX10-LABEL: name: test_srem_v2s16
     ; GFX10: liveins: $vgpr0, $vgpr1
     ; GFX10-NEXT: {{  $}}
@@ -2912,6 +2750,7 @@ body: |
     ; GFX6-NEXT: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT1]], [[ASHR]]
     ; GFX6-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR2]], [[ASHR]]
     ; GFX6-NEXT: $vgpr0 = COPY [[SUB4]](s32)
+    ;
     ; GFX8-LABEL: name: test_srem_s7
     ; GFX8: liveins: $vgpr0, $vgpr1
     ; GFX8-NEXT: {{  $}}
@@ -2948,6 +2787,7 @@ body: |
     ; GFX8-NEXT: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT1]], [[ASHR]]
     ; GFX8-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR2]], [[ASHR]]
     ; GFX8-NEXT: $vgpr0 = COPY [[SUB4]](s32)
+    ;
     ; GFX9-LABEL: name: test_srem_s7
     ; GFX9: liveins: $vgpr0, $vgpr1
     ; GFX9-NEXT: {{  $}}
@@ -2984,6 +2824,7 @@ body: |
     ; GFX9-NEXT: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT1]], [[ASHR]]
     ; GFX9-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR2]], [[ASHR]]
     ; GFX9-NEXT: $vgpr0 = COPY [[SUB4]](s32)
+    ;
     ; GFX10-LABEL: name: test_srem_s7
     ; GFX10: liveins: $vgpr0, $vgpr1
     ; GFX10-NEXT: {{  $}}
@@ -3071,6 +2912,7 @@ body: |
     ; GFX6-NEXT: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT1]], [[ASHR]]
     ; GFX6-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR2]], [[ASHR]]
     ; GFX6-NEXT: $vgpr0 = COPY [[SUB4]](s32)
+    ;
     ; GFX8-LABEL: name: test_srem_s17
     ; GFX8: liveins: $vgpr0, $vgpr1
     ; GFX8-NEXT: {{  $}}
@@ -3107,6 +2949,7 @@ body: |
     ; GFX8-NEXT: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT1]], [[ASHR]]
     ; GFX8-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR2]], [[ASHR]]
     ; GFX8-NEXT: $vgpr0 = COPY [[SUB4]](s32)
+    ;
     ; GFX9-LABEL: name: test_srem_s17
     ; GFX9: liveins: $vgpr0, $vgpr1
     ; GFX9-NEXT: {{  $}}
@@ -3143,6 +2986,7 @@ body: |
     ; GFX9-NEXT: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT1]], [[ASHR]]
     ; GFX9-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR2]], [[ASHR]]
     ; GFX9-NEXT: $vgpr0 = COPY [[SUB4]](s32)
+    ;
     ; GFX10-LABEL: name: test_srem_s17
     ; GFX10: liveins: $vgpr0, $vgpr1
     ; GFX10-NEXT: {{  $}}
@@ -3239,6 +3083,7 @@ body: |
     ; GFX6-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV10]], [[UV12]]
     ; GFX6-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV11]], [[UV13]], [[USUBO1]]
     ; GFX6-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]]
+    ; GFX6-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
     ; GFX6-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]]
     ; GFX6-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]]
     ; GFX6-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]]
@@ -3248,88 +3093,72 @@ body: |
     ; GFX6-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]]
     ; GFX6-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]]
     ; GFX6-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]]
-    ; GFX6-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1)
-    ; GFX6-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH1]]
-    ; GFX6-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1)
-    ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]]
+    ; GFX6-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDO4]], [[UMULH1]], [[UADDO5]]
+    ; GFX6-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE5]]
     ; GFX6-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]]
     ; GFX6-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]]
     ; GFX6-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]]
-    ; GFX6-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]]
-    ; GFX6-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1)
-    ; GFX6-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH3]]
-    ; GFX6-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1)
-    ; GFX6-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]]
-    ; GFX6-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD2]]
-    ; GFX6-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1)
-    ; GFX6-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]]
+    ; GFX6-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]]
+    ; GFX6-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDO6]], [[UMULH3]], [[UADDO7]]
+    ; GFX6-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE9]]
+    ; GFX6-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[UADDE8]], [[UADDE6]], [[UADDO7]]
+    ; GFX6-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDE10]], [[C6]], [[UADDE13]]
     ; GFX6-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]]
-    ; GFX6-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]]
-    ; GFX6-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO12]]
-    ; GFX6-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO15]]
-    ; GFX6-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO14]]
-    ; GFX6-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO14]]
-    ; GFX6-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE4]]
-    ; GFX6-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO14]]
-    ; GFX6-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]]
-    ; GFX6-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[UMULH5]]
-    ; GFX6-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[MUL6]]
-    ; GFX6-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[ADD7]]
-    ; GFX6-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[MUL6]]
-    ; GFX6-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]]
-    ; GFX6-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1)
-    ; GFX6-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]]
-    ; GFX6-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1)
-    ; GFX6-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]]
-    ; GFX6-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[ADD7]]
-    ; GFX6-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[MUL6]]
-    ; GFX6-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[ADD7]]
-    ; GFX6-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]]
-    ; GFX6-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1)
-    ; GFX6-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH8]]
-    ; GFX6-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO23]](s1)
-    ; GFX6-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]]
-    ; GFX6-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD8]]
-    ; GFX6-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1)
-    ; GFX6-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT9]]
-    ; GFX6-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[ADD7]]
-    ; GFX6-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD10]]
-    ; GFX6-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-    ; GFX6-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UADDO24]]
-    ; GFX6-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[ADD11]], [[UADDO27]]
+    ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[UADDE14]]
+    ; GFX6-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDE12]]
+    ; GFX6-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD2]], [[UADDO9]]
+    ; GFX6-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO8]]
+    ; GFX6-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO8]]
+    ; GFX6-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE16]]
+    ; GFX6-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO8]]
+    ; GFX6-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]]
+    ; GFX6-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[UMULH5]]
+    ; GFX6-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[MUL6]]
+    ; GFX6-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO8]], [[ADD4]]
+    ; GFX6-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO8]], [[MUL6]]
+    ; GFX6-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]]
+    ; GFX6-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDO10]], [[UMULH6]], [[UADDO11]]
+    ; GFX6-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE19]]
+    ; GFX6-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[ADD4]]
+    ; GFX6-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[MUL6]]
+    ; GFX6-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO8]], [[ADD4]]
+    ; GFX6-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]]
+    ; GFX6-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDO12]], [[UMULH8]], [[UADDO13]]
+    ; GFX6-NEXT: [[UADDE24:%[0-9]+]]:_(s32), [[UADDE25:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE23]]
+    ; GFX6-NEXT: [[UADDE26:%[0-9]+]]:_(s32), [[UADDE27:%[0-9]+]]:_(s1) = G_UADDE [[UADDE22]], [[UADDE20]], [[UADDO13]]
+    ; GFX6-NEXT: [[UADDE28:%[0-9]+]]:_(s32), [[UADDE29:%[0-9]+]]:_(s1) = G_UADDE [[UADDE24]], [[C6]], [[UADDE27]]
+    ; GFX6-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[ADD4]]
+    ; GFX6-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[UADDE28]]
+    ; GFX6-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UADDE26]]
+    ; GFX6-NEXT: [[UADDE30:%[0-9]+]]:_(s32), [[UADDE31:%[0-9]+]]:_(s1) = G_UADDE [[UADDE16]], [[ADD5]], [[UADDO15]]
     ; GFX6-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64)
     ; GFX6-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64)
-    ; GFX6-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDO26]]
-    ; GFX6-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UADDE6]]
-    ; GFX6-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDO26]]
-    ; GFX6-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]]
-    ; GFX6-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1)
-    ; GFX6-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]]
-    ; GFX6-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1)
-    ; GFX6-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]]
-    ; GFX6-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDE6]]
-    ; GFX6-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDO26]]
-    ; GFX6-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDE6]]
-    ; GFX6-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]]
-    ; GFX6-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1)
-    ; GFX6-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH12]]
-    ; GFX6-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1)
-    ; GFX6-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]]
-    ; GFX6-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD12]]
-    ; GFX6-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1)
-    ; GFX6-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT14]]
-    ; GFX6-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDE6]]
-    ; GFX6-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD14]]
+    ; GFX6-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDO14]]
+    ; GFX6-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UADDE30]]
+    ; GFX6-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDO14]]
+    ; GFX6-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]]
+    ; GFX6-NEXT: [[UADDE32:%[0-9]+]]:_(s32), [[UADDE33:%[0-9]+]]:_(s1) = G_UADDE [[UADDO16]], [[UMULH10]], [[UADDO17]]
+    ; GFX6-NEXT: [[UADDE34:%[0-9]+]]:_(s32), [[UADDE35:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE33]]
+    ; GFX6-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDE30]]
+    ; GFX6-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDO14]]
+    ; GFX6-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDE30]]
+    ; GFX6-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]]
+    ; GFX6-NEXT: [[UADDE36:%[0-9]+]]:_(s32), [[UADDE37:%[0-9]+]]:_(s1) = G_UADDE [[UADDO18]], [[UMULH12]], [[UADDO19]]
+    ; GFX6-NEXT: [[UADDE38:%[0-9]+]]:_(s32), [[UADDE39:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE37]]
+    ; GFX6-NEXT: [[UADDE40:%[0-9]+]]:_(s32), [[UADDE41:%[0-9]+]]:_(s1) = G_UADDE [[UADDE36]], [[UADDE34]], [[UADDO19]]
+    ; GFX6-NEXT: [[UADDE42:%[0-9]+]]:_(s32), [[UADDE43:%[0-9]+]]:_(s1) = G_UADDE [[UADDE38]], [[C6]], [[UADDE41]]
+    ; GFX6-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDE30]]
+    ; GFX6-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[UADDE42]]
     ; GFX6-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64)
-    ; GFX6-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV18]], [[UADDO36]]
-    ; GFX6-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV19]], [[UADDO36]]
-    ; GFX6-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV18]], [[ADD15]]
-    ; GFX6-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV18]], [[UADDO36]]
-    ; GFX6-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]]
-    ; GFX6-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[UMULH14]]
+    ; GFX6-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV18]], [[UADDE40]]
+    ; GFX6-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV19]], [[UADDE40]]
+    ; GFX6-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV18]], [[ADD6]]
+    ; GFX6-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV18]], [[UADDE40]]
+    ; GFX6-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]]
+    ; GFX6-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ADD7]], [[UMULH14]]
     ; GFX6-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV14]], [[MUL15]]
-    ; GFX6-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV15]], [[ADD17]], [[USUBO3]]
-    ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV15]], [[ADD17]]
+    ; GFX6-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV15]], [[ADD8]], [[USUBO3]]
+    ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV15]], [[ADD8]]
     ; GFX6-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO2]](s32), [[USUBE2]](s32)
     ; GFX6-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64)
     ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV21]]
@@ -3363,6 +3192,7 @@ body: |
     ; GFX6-NEXT: [[USUBE12:%[0-9]+]]:_(s32), [[USUBE13:%[0-9]+]]:_(s1) = G_USUBE [[UV23]], [[UV25]], [[USUBO9]]
     ; GFX6-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO8]](s32), [[USUBE12]](s32)
     ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[MV5]](s64)
+    ;
     ; GFX8-LABEL: name: test_srem_s33
     ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
     ; GFX8-NEXT: {{  $}}
@@ -3413,88 +3243,73 @@ body: |
     ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI1]], [[ANYEXT]]
     ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[FPTOUI]], [[AMDGPU_MAD_U64_U32_2]]
     ; GFX8-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64)
+    ; GFX8-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
     ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV14]]
     ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[UV16]]
     ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV14]]
     ; GFX8-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]]
-    ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1)
-    ; GFX8-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH]]
-    ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1)
-    ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]]
+    ; GFX8-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDO4]], [[UMULH]], [[UADDO5]]
+    ; GFX8-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE5]]
     ; GFX8-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV16]]
     ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV14]]
     ; GFX8-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV16]]
-    ; GFX8-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]]
-    ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1)
-    ; GFX8-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH2]]
-    ; GFX8-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1)
-    ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]]
-    ; GFX8-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD]]
-    ; GFX8-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1)
-    ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]]
+    ; GFX8-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]]
+    ; GFX8-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDO6]], [[UMULH2]], [[UADDO7]]
+    ; GFX8-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE9]]
+    ; GFX8-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[UADDE8]], [[UADDE6]], [[UADDO7]]
+    ; GFX8-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDE10]], [[C6]], [[UADDE13]]
     ; GFX8-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV16]]
-    ; GFX8-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]]
-    ; GFX8-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO12]]
-    ; GFX8-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD3]], [[UADDO15]]
-    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO14]], [[C5]]
+    ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[UADDE14]]
+    ; GFX8-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDE12]]
+    ; GFX8-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD]], [[UADDO9]]
+    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO8]], [[C5]]
     ; GFX8-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](s64)
     ; GFX8-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[UV19]](s32)
-    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE4]], [[ANYEXT1]]
-    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO14]], [[AMDGPU_MAD_U64_U32_8]]
+    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE16]], [[ANYEXT1]]
+    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO8]], [[AMDGPU_MAD_U64_U32_8]]
     ; GFX8-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](s64)
-    ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[UV18]]
-    ; GFX8-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[UV20]]
-    ; GFX8-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[UV18]]
-    ; GFX8-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]]
-    ; GFX8-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1)
-    ; GFX8-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH4]]
-    ; GFX8-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1)
-    ; GFX8-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]]
-    ; GFX8-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[UV20]]
-    ; GFX8-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[UV18]]
-    ; GFX8-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[UV20]]
-    ; GFX8-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]]
-    ; GFX8-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1)
-    ; GFX8-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH6]]
-    ; GFX8-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO23]](s1)
-    ; GFX8-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]]
-    ; GFX8-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD4]]
-    ; GFX8-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1)
-    ; GFX8-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ADD5]], [[ZEXT9]]
-    ; GFX8-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[UV20]]
-    ; GFX8-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD6]]
-    ; GFX8-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-    ; GFX8-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UADDO24]]
-    ; GFX8-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[ADD7]], [[UADDO27]]
+    ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[UV18]]
+    ; GFX8-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO8]], [[UV20]]
+    ; GFX8-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO8]], [[UV18]]
+    ; GFX8-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]]
+    ; GFX8-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDO10]], [[UMULH4]], [[UADDO11]]
+    ; GFX8-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE19]]
+    ; GFX8-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[UV20]]
+    ; GFX8-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[UV18]]
+    ; GFX8-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO8]], [[UV20]]
+    ; GFX8-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]]
+    ; GFX8-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDO12]], [[UMULH6]], [[UADDO13]]
+    ; GFX8-NEXT: [[UADDE24:%[0-9]+]]:_(s32), [[UADDE25:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE23]]
+    ; GFX8-NEXT: [[UADDE26:%[0-9]+]]:_(s32), [[UADDE27:%[0-9]+]]:_(s1) = G_UADDE [[UADDE22]], [[UADDE20]], [[UADDO13]]
+    ; GFX8-NEXT: [[UADDE28:%[0-9]+]]:_(s32), [[UADDE29:%[0-9]+]]:_(s1) = G_UADDE [[UADDE24]], [[C6]], [[UADDE27]]
+    ; GFX8-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[UV20]]
+    ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[UADDE28]]
+    ; GFX8-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UADDE26]]
+    ; GFX8-NEXT: [[UADDE30:%[0-9]+]]:_(s32), [[UADDE31:%[0-9]+]]:_(s1) = G_UADDE [[UADDE16]], [[ADD1]], [[UADDO15]]
     ; GFX8-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64)
     ; GFX8-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64)
-    ; GFX8-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV25]], [[UADDO26]]
-    ; GFX8-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV24]], [[UADDE6]]
-    ; GFX8-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV24]], [[UADDO26]]
-    ; GFX8-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]]
-    ; GFX8-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1)
-    ; GFX8-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH8]]
-    ; GFX8-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1)
-    ; GFX8-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]]
-    ; GFX8-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV25]], [[UADDE6]]
-    ; GFX8-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV25]], [[UADDO26]]
-    ; GFX8-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV24]], [[UADDE6]]
-    ; GFX8-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]]
-    ; GFX8-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1)
-    ; GFX8-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH10]]
-    ; GFX8-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1)
-    ; GFX8-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]]
-    ; GFX8-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD8]]
-    ; GFX8-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1)
-    ; GFX8-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT14]]
-    ; GFX8-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV25]], [[UADDE6]]
-    ; GFX8-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[ADD10]]
+    ; GFX8-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV25]], [[UADDO14]]
+    ; GFX8-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV24]], [[UADDE30]]
+    ; GFX8-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV24]], [[UADDO14]]
+    ; GFX8-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]]
+    ; GFX8-NEXT: [[UADDE32:%[0-9]+]]:_(s32), [[UADDE33:%[0-9]+]]:_(s1) = G_UADDE [[UADDO16]], [[UMULH8]], [[UADDO17]]
+    ; GFX8-NEXT: [[UADDE34:%[0-9]+]]:_(s32), [[UADDE35:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE33]]
+    ; GFX8-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV25]], [[UADDE30]]
+    ; GFX8-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV25]], [[UADDO14]]
+    ; GFX8-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV24]], [[UADDE30]]
+    ; GFX8-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]]
+    ; GFX8-NEXT: [[UADDE36:%[0-9]+]]:_(s32), [[UADDE37:%[0-9]+]]:_(s1) = G_UADDE [[UADDO18]], [[UMULH10]], [[UADDO19]]
+    ; GFX8-NEXT: [[UADDE38:%[0-9]+]]:_(s32), [[UADDE39:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE37]]
+    ; GFX8-NEXT: [[UADDE40:%[0-9]+]]:_(s32), [[UADDE41:%[0-9]+]]:_(s1) = G_UADDE [[UADDE36]], [[UADDE34]], [[UADDO19]]
+    ; GFX8-NEXT: [[UADDE42:%[0-9]+]]:_(s32), [[UADDE43:%[0-9]+]]:_(s1) = G_UADDE [[UADDE38]], [[C6]], [[UADDE41]]
+    ; GFX8-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV25]], [[UADDE30]]
+    ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[UADDE42]]
     ; GFX8-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64)
-    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV26]](s32), [[UADDO36]], [[C5]]
+    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV26]](s32), [[UADDE40]], [[C5]]
     ; GFX8-NEXT: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_12]](s64)
     ; GFX8-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[UV29]](s32)
-    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV26]](s32), [[ADD11]], [[ANYEXT2]]
-    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV27]](s32), [[UADDO36]], [[AMDGPU_MAD_U64_U32_14]]
+    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV26]](s32), [[ADD2]], [[ANYEXT2]]
+    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV27]](s32), [[UADDE40]], [[AMDGPU_MAD_U64_U32_14]]
     ; GFX8-NEXT: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_16]](s64)
     ; GFX8-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV22]], [[UV28]]
     ; GFX8-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV23]], [[UV30]], [[USUBO3]]
@@ -3532,6 +3347,7 @@ body: |
     ; GFX8-NEXT: [[USUBE12:%[0-9]+]]:_(s32), [[USUBE13:%[0-9]+]]:_(s1) = G_USUBE [[UV35]], [[UV37]], [[USUBO9]]
     ; GFX8-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO8]](s32), [[USUBE12]](s32)
     ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[MV5]](s64)
+    ;
     ; GFX9-LABEL: name: test_srem_s33
     ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
     ; GFX9-NEXT: {{  $}}
@@ -3582,88 +3398,73 @@ body: |
     ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI1]], [[ANYEXT]]
     ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[FPTOUI]], [[AMDGPU_MAD_U64_U32_2]]
     ; GFX9-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64)
+    ; GFX9-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
     ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV14]]
     ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[UV16]]
     ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV14]]
     ; GFX9-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]]
-    ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1)
-    ; GFX9-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH]]
-    ; GFX9-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1)
-    ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]]
+    ; GFX9-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDO4]], [[UMULH]], [[UADDO5]]
+    ; GFX9-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE5]]
     ; GFX9-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV16]]
     ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV14]]
     ; GFX9-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV16]]
-    ; GFX9-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]]
-    ; GFX9-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1)
-    ; GFX9-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH2]]
-    ; GFX9-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1)
-    ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]]
-    ; GFX9-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD]]
-    ; GFX9-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1)
-    ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]]
+    ; GFX9-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]]
+    ; GFX9-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDO6]], [[UMULH2]], [[UADDO7]]
+    ; GFX9-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE9]]
+    ; GFX9-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[UADDE8]], [[UADDE6]], [[UADDO7]]
+    ; GFX9-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDE10]], [[C6]], [[UADDE13]]
     ; GFX9-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV16]]
-    ; GFX9-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]]
-    ; GFX9-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO12]]
-    ; GFX9-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD3]], [[UADDO15]]
-    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO14]], [[C5]]
+    ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[UADDE14]]
+    ; GFX9-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDE12]]
+    ; GFX9-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD]], [[UADDO9]]
+    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO8]], [[C5]]
     ; GFX9-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](s64)
     ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[UV19]](s32)
-    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE4]], [[ANYEXT1]]
-    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO14]], [[AMDGPU_MAD_U64_U32_8]]
+    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE16]], [[ANYEXT1]]
+    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO8]], [[AMDGPU_MAD_U64_U32_8]]
     ; GFX9-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](s64)
-    ; GFX9-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[UV18]]
-    ; GFX9-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[UV20]]
-    ; GFX9-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[UV18]]
-    ; GFX9-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]]
-    ; GFX9-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1)
-    ; GFX9-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH4]]
-    ; GFX9-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1)
-    ; GFX9-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]]
-    ; GFX9-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[UV20]]
-    ; GFX9-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[UV18]]
-    ; GFX9-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[UV20]]
-    ; GFX9-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]]
-    ; GFX9-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1)
-    ; GFX9-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH6]]
-    ; GFX9-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO23]](s1)
-    ; GFX9-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]]
-    ; GFX9-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD4]]
-    ; GFX9-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1)
-    ; GFX9-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ADD5]], [[ZEXT9]]
-    ; GFX9-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[UV20]]
-    ; GFX9-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD6]]
-    ; GFX9-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-    ; GFX9-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UADDO24]]
-    ; GFX9-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[ADD7]], [[UADDO27]]
+    ; GFX9-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[UV18]]
+    ; GFX9-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO8]], [[UV20]]
+    ; GFX9-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO8]], [[UV18]]
+    ; GFX9-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]]
+    ; GFX9-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDO10]], [[UMULH4]], [[UADDO11]]
+    ; GFX9-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE19]]
+    ; GFX9-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[UV20]]
+    ; GFX9-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[UV18]]
+    ; GFX9-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO8]], [[UV20]]
+    ; GFX9-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]]
+    ; GFX9-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDO12]], [[UMULH6]], [[UADDO13]]
+    ; GFX9-NEXT: [[UADDE24:%[0-9]+]]:_(s32), [[UADDE25:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE23]]
+    ; GFX9-NEXT: [[UADDE26:%[0-9]+]]:_(s32), [[UADDE27:%[0-9]+]]:_(s1) = G_UADDE [[UADDE22]], [[UADDE20]], [[UADDO13]]
+    ; GFX9-NEXT: [[UADDE28:%[0-9]+]]:_(s32), [[UADDE29:%[0-9]+]]:_(s1) = G_UADDE [[UADDE24]], [[C6]], [[UADDE27]]
+    ; GFX9-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[UV20]]
+    ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[UADDE28]]
+    ; GFX9-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UADDE26]]
+    ; GFX9-NEXT: [[UADDE30:%[0-9]+]]:_(s32), [[UADDE31:%[0-9]+]]:_(s1) = G_UADDE [[UADDE16]], [[ADD1]], [[UADDO15]]
     ; GFX9-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64)
     ; GFX9-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64)
-    ; GFX9-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV25]], [[UADDO26]]
-    ; GFX9-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV24]], [[UADDE6]]
-    ; GFX9-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV24]], [[UADDO26]]
-    ; GFX9-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]]
-    ; GFX9-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1)
-    ; GFX9-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH8]]
-    ; GFX9-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1)
-    ; GFX9-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]]
-    ; GFX9-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV25]], [[UADDE6]]
-    ; GFX9-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV25]], [[UADDO26]]
-    ; GFX9-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV24]], [[UADDE6]]
-    ; GFX9-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]]
-    ; GFX9-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1)
-    ; GFX9-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH10]]
-    ; GFX9-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1)
-    ; GFX9-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]]
-    ; GFX9-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD8]]
-    ; GFX9-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1)
-    ; GFX9-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT14]]
-    ; GFX9-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV25]], [[UADDE6]]
-    ; GFX9-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[ADD10]]
+    ; GFX9-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV25]], [[UADDO14]]
+    ; GFX9-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV24]], [[UADDE30]]
+    ; GFX9-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV24]], [[UADDO14]]
+    ; GFX9-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]]
+    ; GFX9-NEXT: [[UADDE32:%[0-9]+]]:_(s32), [[UADDE33:%[0-9]+]]:_(s1) = G_UADDE [[UADDO16]], [[UMULH8]], [[UADDO17]]
+    ; GFX9-NEXT: [[UADDE34:%[0-9]+]]:_(s32), [[UADDE35:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE33]]
+    ; GFX9-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV25]], [[UADDE30]]
+    ; GFX9-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV25]], [[UADDO14]]
+    ; GFX9-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV24]], [[UADDE30]]
+    ; GFX9-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]]
+    ; GFX9-NEXT: [[UADDE36:%[0-9]+]]:_(s32), [[UADDE37:%[0-9]+]]:_(s1) = G_UADDE [[UADDO18]], [[UMULH10]], [[UADDO19]]
+    ; GFX9-NEXT: [[UADDE38:%[0-9]+]]:_(s32), [[UADDE39:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE37]]
+    ; GFX9-NEXT: [[UADDE40:%[0-9]+]]:_(s32), [[UADDE41:%[0-9]+]]:_(s1) = G_UADDE [[UADDE36]], [[UADDE34]], [[UADDO19]]
+    ; GFX9-NEXT: [[UADDE42:%[0-9]+]]:_(s32), [[UADDE43:%[0-9]+]]:_(s1) = G_UADDE [[UADDE38]], [[C6]], [[UADDE41]]
+    ; GFX9-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV25]], [[UADDE30]]
+    ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[UADDE42]]
     ; GFX9-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64)
-    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV26]](s32), [[UADDO36]], [[C5]]
+    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV26]](s32), [[UADDE40]], [[C5]]
     ; GFX9-NEXT: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_12]](s64)
     ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[UV29]](s32)
-    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV26]](s32), [[ADD11]], [[ANYEXT2]]
-    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV27]](s32), [[UADDO36]], [[AMDGPU_MAD_U64_U32_14]]
+    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV26]](s32), [[ADD2]], [[ANYEXT2]]
+    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV27]](s32), [[UADDE40]], [[AMDGPU_MAD_U64_U32_14]]
     ; GFX9-NEXT: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_16]](s64)
     ; GFX9-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV22]], [[UV28]]
     ; GFX9-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV23]], [[UV30]], [[USUBO3]]
@@ -3701,6 +3502,7 @@ body: |
     ; GFX9-NEXT: [[USUBE12:%[0-9]+]]:_(s32), [[USUBE13:%[0-9]+]]:_(s1) = G_USUBE [[UV35]], [[UV37]], [[USUBO9]]
     ; GFX9-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO8]](s32), [[USUBE12]](s32)
     ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[MV5]](s64)
+    ;
     ; GFX10-LABEL: name: test_srem_s33
     ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
     ; GFX10-NEXT: {{  $}}
@@ -3751,92 +3553,77 @@ body: |
     ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV15]], [[MUL]]
     ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]]
     ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[MUL1]]
+    ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
     ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV14]]
     ; GFX10-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]]
     ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV14]]
     ; GFX10-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[MUL3]]
-    ; GFX10-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1)
-    ; GFX10-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH]]
-    ; GFX10-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1)
-    ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]]
+    ; GFX10-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDO4]], [[UMULH]], [[UADDO5]]
+    ; GFX10-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE5]]
     ; GFX10-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]]
     ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV14]]
     ; GFX10-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]]
-    ; GFX10-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL4]], [[UMULH1]]
-    ; GFX10-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1)
-    ; GFX10-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH2]]
-    ; GFX10-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1)
-    ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]]
-    ; GFX10-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD2]]
-    ; GFX10-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1)
-    ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]]
+    ; GFX10-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[MUL4]], [[UMULH1]]
+    ; GFX10-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDO6]], [[UMULH2]], [[UADDO7]]
+    ; GFX10-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE9]]
+    ; GFX10-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[UADDE8]], [[UADDE6]], [[UADDO7]]
+    ; GFX10-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDE10]], [[C6]], [[UADDE13]]
     ; GFX10-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]]
-    ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD4]]
-    ; GFX10-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO12]]
-    ; GFX10-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO15]]
-    ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO14]], [[C5]]
+    ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[UADDE14]]
+    ; GFX10-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDE12]]
+    ; GFX10-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD2]], [[UADDO9]]
+    ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO8]], [[C5]]
     ; GFX10-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_2]](s64)
-    ; GFX10-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE4]]
-    ; GFX10-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UV17]], [[MUL5]]
-    ; GFX10-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO14]]
-    ; GFX10-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[MUL6]]
-    ; GFX10-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[UV16]]
-    ; GFX10-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[ADD7]]
-    ; GFX10-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[UV16]]
-    ; GFX10-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL7]], [[MUL8]]
-    ; GFX10-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1)
-    ; GFX10-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH4]]
-    ; GFX10-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1)
-    ; GFX10-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]]
-    ; GFX10-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[ADD7]]
-    ; GFX10-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[UV16]]
-    ; GFX10-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[ADD7]]
-    ; GFX10-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[UMULH5]]
-    ; GFX10-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1)
-    ; GFX10-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH6]]
-    ; GFX10-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO23]](s1)
-    ; GFX10-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]]
-    ; GFX10-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD8]]
-    ; GFX10-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1)
-    ; GFX10-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT9]]
-    ; GFX10-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[ADD7]]
-    ; GFX10-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD10]]
-    ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-    ; GFX10-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UADDO24]]
-    ; GFX10-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[ADD11]], [[UADDO27]]
+    ; GFX10-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE16]]
+    ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UV17]], [[MUL5]]
+    ; GFX10-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO8]]
+    ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[MUL6]]
+    ; GFX10-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[UV16]]
+    ; GFX10-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UADDO8]], [[ADD4]]
+    ; GFX10-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO8]], [[UV16]]
+    ; GFX10-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[MUL7]], [[MUL8]]
+    ; GFX10-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDO10]], [[UMULH4]], [[UADDO11]]
+    ; GFX10-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE19]]
+    ; GFX10-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[ADD4]]
+    ; GFX10-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[UV16]]
+    ; GFX10-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO8]], [[ADD4]]
+    ; GFX10-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[UMULH5]]
+    ; GFX10-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDO12]], [[UMULH6]], [[UADDO13]]
+    ; GFX10-NEXT: [[UADDE24:%[0-9]+]]:_(s32), [[UADDE25:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE23]]
+    ; GFX10-NEXT: [[UADDE26:%[0-9]+]]:_(s32), [[UADDE27:%[0-9]+]]:_(s1) = G_UADDE [[UADDE22]], [[UADDE20]], [[UADDO13]]
+    ; GFX10-NEXT: [[UADDE28:%[0-9]+]]:_(s32), [[UADDE29:%[0-9]+]]:_(s1) = G_UADDE [[UADDE24]], [[C6]], [[UADDE27]]
+    ; GFX10-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[ADD4]]
+    ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[UADDE28]]
+    ; GFX10-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UADDE26]]
+    ; GFX10-NEXT: [[UADDE30:%[0-9]+]]:_(s32), [[UADDE31:%[0-9]+]]:_(s1) = G_UADDE [[UADDE16]], [[ADD5]], [[UADDO15]]
     ; GFX10-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64)
     ; GFX10-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64)
-    ; GFX10-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDO26]]
-    ; GFX10-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UV20]], [[UADDE6]]
-    ; GFX10-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDO26]]
-    ; GFX10-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL10]], [[MUL11]]
-    ; GFX10-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1)
-    ; GFX10-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH8]]
-    ; GFX10-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1)
-    ; GFX10-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]]
-    ; GFX10-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDE6]]
-    ; GFX10-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDO26]]
-    ; GFX10-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDE6]]
-    ; GFX10-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[UMULH9]]
-    ; GFX10-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1)
-    ; GFX10-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH10]]
-    ; GFX10-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1)
-    ; GFX10-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]]
-    ; GFX10-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD12]]
-    ; GFX10-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1)
-    ; GFX10-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT14]]
-    ; GFX10-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDE6]]
-    ; GFX10-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[ADD14]]
+    ; GFX10-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDO14]]
+    ; GFX10-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UV20]], [[UADDE30]]
+    ; GFX10-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDO14]]
+    ; GFX10-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL10]], [[MUL11]]
+    ; GFX10-NEXT: [[UADDE32:%[0-9]+]]:_(s32), [[UADDE33:%[0-9]+]]:_(s1) = G_UADDE [[UADDO16]], [[UMULH8]], [[UADDO17]]
+    ; GFX10-NEXT: [[UADDE34:%[0-9]+]]:_(s32), [[UADDE35:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE33]]
+    ; GFX10-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDE30]]
+    ; GFX10-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDO14]]
+    ; GFX10-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDE30]]
+    ; GFX10-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[UMULH9]]
+    ; GFX10-NEXT: [[UADDE36:%[0-9]+]]:_(s32), [[UADDE37:%[0-9]+]]:_(s1) = G_UADDE [[UADDO18]], [[UMULH10]], [[UADDO19]]
+    ; GFX10-NEXT: [[UADDE38:%[0-9]+]]:_(s32), [[UADDE39:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE37]]
+    ; GFX10-NEXT: [[UADDE40:%[0-9]+]]:_(s32), [[UADDE41:%[0-9]+]]:_(s1) = G_UADDE [[UADDE36]], [[UADDE34]], [[UADDO19]]
+    ; GFX10-NEXT: [[UADDE42:%[0-9]+]]:_(s32), [[UADDE43:%[0-9]+]]:_(s1) = G_UADDE [[UADDE38]], [[C6]], [[UADDE41]]
+    ; GFX10-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDE30]]
+    ; GFX10-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[UADDE42]]
     ; GFX10-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64)
-    ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV22]](s32), [[UADDO36]], [[C5]]
+    ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV22]](s32), [[UADDE40]], [[C5]]
     ; GFX10-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64)
-    ; GFX10-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV22]], [[ADD15]]
-    ; GFX10-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[UV25]], [[MUL13]]
-    ; GFX10-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV23]], [[UADDO36]]
-    ; GFX10-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[MUL14]]
+    ; GFX10-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV22]], [[ADD6]]
+    ; GFX10-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UV25]], [[MUL13]]
+    ; GFX10-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV23]], [[UADDE40]]
+    ; GFX10-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ADD7]], [[MUL14]]
     ; GFX10-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV18]], [[UV24]]
-    ; GFX10-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV19]], [[ADD17]], [[USUBO3]]
-    ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV19]], [[ADD17]]
+    ; GFX10-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV19]], [[ADD8]], [[USUBO3]]
+    ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV19]], [[ADD8]]
     ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO2]](s32), [[USUBE2]](s32)
     ; GFX10-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64)
     ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV27]]
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-udiv.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-udiv.mir
index bfba201e264b1..6d9a168aa65e1 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-udiv.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-udiv.mir
@@ -381,6 +381,7 @@ body: |
     ; GFX6-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV2]], [[UV4]]
     ; GFX6-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV3]], [[UV5]], [[USUBO1]]
     ; GFX6-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]]
+    ; GFX6-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
     ; GFX6-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]]
     ; GFX6-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]]
     ; GFX6-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]]
@@ -390,89 +391,73 @@ body: |
     ; GFX6-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]]
     ; GFX6-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]]
     ; GFX6-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]]
-    ; GFX6-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1)
-    ; GFX6-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH1]]
-    ; GFX6-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1)
-    ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]]
+    ; GFX6-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UADDO]], [[UMULH1]], [[UADDO1]]
+    ; GFX6-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE1]]
     ; GFX6-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]]
     ; GFX6-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]]
     ; GFX6-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]]
-    ; GFX6-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]]
-    ; GFX6-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1)
-    ; GFX6-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH3]]
-    ; GFX6-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1)
-    ; GFX6-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]]
-    ; GFX6-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD2]]
-    ; GFX6-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1)
-    ; GFX6-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]]
+    ; GFX6-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]]
+    ; GFX6-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDO2]], [[UMULH3]], [[UADDO3]]
+    ; GFX6-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE5]]
+    ; GFX6-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[UADDE2]], [[UADDO3]]
+    ; GFX6-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE6]], [[C5]], [[UADDE9]]
     ; GFX6-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]]
-    ; GFX6-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]]
-    ; GFX6-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO8]]
-    ; GFX6-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO11]]
-    ; GFX6-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO10]]
-    ; GFX6-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO10]]
-    ; GFX6-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE]]
-    ; GFX6-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO10]]
-    ; GFX6-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]]
-    ; GFX6-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[UMULH5]]
-    ; GFX6-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[MUL6]]
-    ; GFX6-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[ADD7]]
-    ; GFX6-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[MUL6]]
-    ; GFX6-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]]
-    ; GFX6-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1)
-    ; GFX6-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH6]]
-    ; GFX6-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1)
-    ; GFX6-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]]
-    ; GFX6-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[ADD7]]
-    ; GFX6-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[MUL6]]
-    ; GFX6-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[ADD7]]
-    ; GFX6-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]]
-    ; GFX6-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1)
-    ; GFX6-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH8]]
-    ; GFX6-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1)
-    ; GFX6-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]]
-    ; GFX6-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDO18]], [[ADD8]]
-    ; GFX6-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1)
-    ; GFX6-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT9]]
-    ; GFX6-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[ADD7]]
-    ; GFX6-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD10]]
-    ; GFX6-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-    ; GFX6-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UADDO20]]
-    ; GFX6-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UADDE]], [[ADD11]], [[UADDO23]]
+    ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[UADDE10]]
+    ; GFX6-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDE8]]
+    ; GFX6-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD2]], [[UADDO5]]
+    ; GFX6-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO4]]
+    ; GFX6-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO4]]
+    ; GFX6-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE12]]
+    ; GFX6-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO4]]
+    ; GFX6-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]]
+    ; GFX6-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[UMULH5]]
+    ; GFX6-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[MUL6]]
+    ; GFX6-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO4]], [[ADD4]]
+    ; GFX6-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO4]], [[MUL6]]
+    ; GFX6-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]]
+    ; GFX6-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDO6]], [[UMULH6]], [[UADDO7]]
+    ; GFX6-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE15]]
+    ; GFX6-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[ADD4]]
+    ; GFX6-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[MUL6]]
+    ; GFX6-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO4]], [[ADD4]]
+    ; GFX6-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]]
+    ; GFX6-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDO8]], [[UMULH8]], [[UADDO9]]
+    ; GFX6-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE19]]
+    ; GFX6-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDE18]], [[UADDE16]], [[UADDO9]]
+    ; GFX6-NEXT: [[UADDE24:%[0-9]+]]:_(s32), [[UADDE25:%[0-9]+]]:_(s1) = G_UADDE [[UADDE20]], [[C5]], [[UADDE23]]
+    ; GFX6-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[ADD4]]
+    ; GFX6-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[UADDE24]]
+    ; GFX6-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UADDE22]]
+    ; GFX6-NEXT: [[UADDE26:%[0-9]+]]:_(s32), [[UADDE27:%[0-9]+]]:_(s1) = G_UADDE [[UADDE12]], [[ADD5]], [[UADDO11]]
     ; GFX6-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64)
     ; GFX6-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64)
-    ; GFX6-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UADDO22]]
-    ; GFX6-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV8]], [[UADDE2]]
-    ; GFX6-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UADDO22]]
-    ; GFX6-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]]
-    ; GFX6-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1)
-    ; GFX6-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH10]]
-    ; GFX6-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO27]](s1)
-    ; GFX6-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]]
-    ; GFX6-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UADDE2]]
-    ; GFX6-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UADDO22]]
-    ; GFX6-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UADDE2]]
-    ; GFX6-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]]
-    ; GFX6-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1)
-    ; GFX6-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH12]]
-    ; GFX6-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1)
-    ; GFX6-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]]
-    ; GFX6-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[UADDO30]], [[ADD12]]
-    ; GFX6-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1)
-    ; GFX6-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT14]]
-    ; GFX6-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UADDE2]]
-    ; GFX6-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD14]]
-    ; GFX6-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO32]](s32), [[ADD15]](s32)
+    ; GFX6-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UADDO10]]
+    ; GFX6-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV8]], [[UADDE26]]
+    ; GFX6-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UADDO10]]
+    ; GFX6-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]]
+    ; GFX6-NEXT: [[UADDE28:%[0-9]+]]:_(s32), [[UADDE29:%[0-9]+]]:_(s1) = G_UADDE [[UADDO12]], [[UMULH10]], [[UADDO13]]
+    ; GFX6-NEXT: [[UADDE30:%[0-9]+]]:_(s32), [[UADDE31:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE29]]
+    ; GFX6-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UADDE26]]
+    ; GFX6-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UADDO10]]
+    ; GFX6-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UADDE26]]
+    ; GFX6-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]]
+    ; GFX6-NEXT: [[UADDE32:%[0-9]+]]:_(s32), [[UADDE33:%[0-9]+]]:_(s1) = G_UADDE [[UADDO14]], [[UMULH12]], [[UADDO15]]
+    ; GFX6-NEXT: [[UADDE34:%[0-9]+]]:_(s32), [[UADDE35:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE33]]
+    ; GFX6-NEXT: [[UADDE36:%[0-9]+]]:_(s32), [[UADDE37:%[0-9]+]]:_(s1) = G_UADDE [[UADDE32]], [[UADDE30]], [[UADDO15]]
+    ; GFX6-NEXT: [[UADDE38:%[0-9]+]]:_(s32), [[UADDE39:%[0-9]+]]:_(s1) = G_UADDE [[UADDE34]], [[C5]], [[UADDE37]]
+    ; GFX6-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UADDE26]]
+    ; GFX6-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[UADDE38]]
+    ; GFX6-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDE36]](s32), [[ADD6]](s32)
     ; GFX6-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64)
-    ; GFX6-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV10]], [[UADDO32]]
-    ; GFX6-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV11]], [[UADDO32]]
-    ; GFX6-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV10]], [[ADD15]]
-    ; GFX6-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV10]], [[UADDO32]]
-    ; GFX6-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]]
-    ; GFX6-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[UMULH14]]
+    ; GFX6-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV10]], [[UADDE36]]
+    ; GFX6-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV11]], [[UADDE36]]
+    ; GFX6-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV10]], [[ADD6]]
+    ; GFX6-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV10]], [[UADDE36]]
+    ; GFX6-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]]
+    ; GFX6-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ADD7]], [[UMULH14]]
     ; GFX6-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV6]], [[MUL15]]
-    ; GFX6-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV7]], [[ADD17]], [[USUBO3]]
-    ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV7]], [[ADD17]]
+    ; GFX6-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV7]], [[ADD8]], [[USUBO3]]
+    ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV7]], [[ADD8]]
     ; GFX6-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64)
     ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV13]]
     ; GFX6-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1)
@@ -485,9 +470,9 @@ body: |
     ; GFX6-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C5]], [[USUBO5]]
     ; GFX6-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
     ; GFX6-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64)
-    ; GFX6-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UV14]]
-    ; GFX6-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[ADD15]], [[UV15]], [[UADDO35]]
-    ; GFX6-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO34]](s32), [[UADDE4]](s32)
+    ; GFX6-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[UADDE36]], [[UV14]]
+    ; GFX6-NEXT: [[UADDE40:%[0-9]+]]:_(s32), [[UADDE41:%[0-9]+]]:_(s1) = G_UADDE [[ADD6]], [[UV15]], [[UADDO17]]
+    ; GFX6-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO16]](s32), [[UADDE40]](s32)
     ; GFX6-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV13]]
     ; GFX6-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1)
     ; GFX6-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV12]]
@@ -495,9 +480,9 @@ body: |
     ; GFX6-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV13]]
     ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]]
     ; GFX6-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64)
-    ; GFX6-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[UV16]]
-    ; GFX6-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[UV17]], [[UADDO37]]
-    ; GFX6-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[UADDE6]](s32)
+    ; GFX6-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UV16]]
+    ; GFX6-NEXT: [[UADDE42:%[0-9]+]]:_(s32), [[UADDE43:%[0-9]+]]:_(s1) = G_UADDE [[UADDE40]], [[UV17]], [[UADDO19]]
+    ; GFX6-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO18]](s32), [[UADDE42]](s32)
     ; GFX6-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C5]]
     ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV2]], [[MV1]]
     ; GFX6-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C5]]
@@ -537,89 +522,74 @@ body: |
     ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI1]], [[ANYEXT]]
     ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[FPTOUI]], [[AMDGPU_MAD_U64_U32_2]]
     ; GFX8-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64)
+    ; GFX8-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
     ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV6]]
     ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[UV8]]
     ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV6]]
     ; GFX8-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]]
-    ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1)
-    ; GFX8-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH]]
-    ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1)
-    ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]]
+    ; GFX8-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UADDO]], [[UMULH]], [[UADDO1]]
+    ; GFX8-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE1]]
     ; GFX8-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV8]]
     ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV6]]
     ; GFX8-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV8]]
-    ; GFX8-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]]
-    ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1)
-    ; GFX8-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH2]]
-    ; GFX8-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1)
-    ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]]
-    ; GFX8-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD]]
-    ; GFX8-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1)
-    ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]]
+    ; GFX8-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]]
+    ; GFX8-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDO2]], [[UMULH2]], [[UADDO3]]
+    ; GFX8-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE5]]
+    ; GFX8-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[UADDE2]], [[UADDO3]]
+    ; GFX8-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE6]], [[C5]], [[UADDE9]]
     ; GFX8-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV8]]
-    ; GFX8-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]]
-    ; GFX8-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO8]]
-    ; GFX8-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD3]], [[UADDO11]]
-    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO10]], [[C4]]
+    ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[UADDE10]]
+    ; GFX8-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDE8]]
+    ; GFX8-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD]], [[UADDO5]]
+    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO4]], [[C4]]
     ; GFX8-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](s64)
     ; GFX8-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[UV11]](s32)
-    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE]], [[ANYEXT1]]
-    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO10]], [[AMDGPU_MAD_U64_U32_8]]
+    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE12]], [[ANYEXT1]]
+    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO4]], [[AMDGPU_MAD_U64_U32_8]]
     ; GFX8-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](s64)
-    ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[UV10]]
-    ; GFX8-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[UV12]]
-    ; GFX8-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[UV10]]
-    ; GFX8-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]]
-    ; GFX8-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1)
-    ; GFX8-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH4]]
-    ; GFX8-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1)
-    ; GFX8-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]]
-    ; GFX8-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[UV12]]
-    ; GFX8-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[UV10]]
-    ; GFX8-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[UV12]]
-    ; GFX8-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]]
-    ; GFX8-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1)
-    ; GFX8-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]]
-    ; GFX8-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1)
-    ; GFX8-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]]
-    ; GFX8-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDO18]], [[ADD4]]
-    ; GFX8-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1)
-    ; GFX8-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ADD5]], [[ZEXT9]]
-    ; GFX8-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[UV12]]
-    ; GFX8-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD6]]
-    ; GFX8-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-    ; GFX8-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UADDO20]]
-    ; GFX8-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UADDE]], [[ADD7]], [[UADDO23]]
+    ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[UV10]]
+    ; GFX8-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO4]], [[UV12]]
+    ; GFX8-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO4]], [[UV10]]
+    ; GFX8-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]]
+    ; GFX8-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDO6]], [[UMULH4]], [[UADDO7]]
+    ; GFX8-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE15]]
+    ; GFX8-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[UV12]]
+    ; GFX8-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[UV10]]
+    ; GFX8-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO4]], [[UV12]]
+    ; GFX8-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]]
+    ; GFX8-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDO8]], [[UMULH6]], [[UADDO9]]
+    ; GFX8-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE19]]
+    ; GFX8-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDE18]], [[UADDE16]], [[UADDO9]]
+    ; GFX8-NEXT: [[UADDE24:%[0-9]+]]:_(s32), [[UADDE25:%[0-9]+]]:_(s1) = G_UADDE [[UADDE20]], [[C5]], [[UADDE23]]
+    ; GFX8-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[UV12]]
+    ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[UADDE24]]
+    ; GFX8-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UADDE22]]
+    ; GFX8-NEXT: [[UADDE26:%[0-9]+]]:_(s32), [[UADDE27:%[0-9]+]]:_(s1) = G_UADDE [[UADDE12]], [[ADD1]], [[UADDO11]]
     ; GFX8-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64)
     ; GFX8-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64)
-    ; GFX8-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDO22]]
-    ; GFX8-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UADDE2]]
-    ; GFX8-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDO22]]
-    ; GFX8-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]]
-    ; GFX8-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1)
-    ; GFX8-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH8]]
-    ; GFX8-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO27]](s1)
-    ; GFX8-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]]
-    ; GFX8-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDE2]]
-    ; GFX8-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDO22]]
-    ; GFX8-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDE2]]
-    ; GFX8-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]]
-    ; GFX8-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1)
-    ; GFX8-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]]
-    ; GFX8-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1)
-    ; GFX8-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]]
-    ; GFX8-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[UADDO30]], [[ADD8]]
-    ; GFX8-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1)
-    ; GFX8-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT14]]
-    ; GFX8-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDE2]]
-    ; GFX8-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[ADD10]]
-    ; GFX8-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO32]](s32), [[ADD11]](s32)
+    ; GFX8-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDO10]]
+    ; GFX8-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UADDE26]]
+    ; GFX8-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDO10]]
+    ; GFX8-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]]
+    ; GFX8-NEXT: [[UADDE28:%[0-9]+]]:_(s32), [[UADDE29:%[0-9]+]]:_(s1) = G_UADDE [[UADDO12]], [[UMULH8]], [[UADDO13]]
+    ; GFX8-NEXT: [[UADDE30:%[0-9]+]]:_(s32), [[UADDE31:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE29]]
+    ; GFX8-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDE26]]
+    ; GFX8-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDO10]]
+    ; GFX8-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDE26]]
+    ; GFX8-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]]
+    ; GFX8-NEXT: [[UADDE32:%[0-9]+]]:_(s32), [[UADDE33:%[0-9]+]]:_(s1) = G_UADDE [[UADDO14]], [[UMULH10]], [[UADDO15]]
+    ; GFX8-NEXT: [[UADDE34:%[0-9]+]]:_(s32), [[UADDE35:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE33]]
+    ; GFX8-NEXT: [[UADDE36:%[0-9]+]]:_(s32), [[UADDE37:%[0-9]+]]:_(s1) = G_UADDE [[UADDE32]], [[UADDE30]], [[UADDO15]]
+    ; GFX8-NEXT: [[UADDE38:%[0-9]+]]:_(s32), [[UADDE39:%[0-9]+]]:_(s1) = G_UADDE [[UADDE34]], [[C5]], [[UADDE37]]
+    ; GFX8-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDE26]]
+    ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[UADDE38]]
+    ; GFX8-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDE36]](s32), [[ADD2]](s32)
     ; GFX8-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64)
-    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV18]](s32), [[UADDO32]], [[C4]]
+    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV18]](s32), [[UADDE36]], [[C4]]
     ; GFX8-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_12]](s64)
     ; GFX8-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[UV21]](s32)
-    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV18]](s32), [[ADD11]], [[ANYEXT2]]
-    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV19]](s32), [[UADDO32]], [[AMDGPU_MAD_U64_U32_14]]
+    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV18]](s32), [[ADD2]], [[ANYEXT2]]
+    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV19]](s32), [[UADDE36]], [[AMDGPU_MAD_U64_U32_14]]
     ; GFX8-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_16]](s64)
     ; GFX8-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV14]], [[UV20]]
     ; GFX8-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV15]], [[UV22]], [[USUBO3]]
@@ -636,9 +606,9 @@ body: |
     ; GFX8-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C5]], [[USUBO5]]
     ; GFX8-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
     ; GFX8-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64)
-    ; GFX8-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UV26]]
-    ; GFX8-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[ADD11]], [[UV27]], [[UADDO35]]
-    ; GFX8-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO34]](s32), [[UADDE4]](s32)
+    ; GFX8-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[UADDE36]], [[UV26]]
+    ; GFX8-NEXT: [[UADDE40:%[0-9]+]]:_(s32), [[UADDE41:%[0-9]+]]:_(s1) = G_UADDE [[ADD2]], [[UV27]], [[UADDO17]]
+    ; GFX8-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO16]](s32), [[UADDE40]](s32)
     ; GFX8-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV25]]
     ; GFX8-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1)
     ; GFX8-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV24]]
@@ -646,9 +616,9 @@ body: |
     ; GFX8-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV25]]
     ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]]
     ; GFX8-NEXT: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64)
-    ; GFX8-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[UV28]]
-    ; GFX8-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[UV29]], [[UADDO37]]
-    ; GFX8-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[UADDE6]](s32)
+    ; GFX8-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UV28]]
+    ; GFX8-NEXT: [[UADDE42:%[0-9]+]]:_(s32), [[UADDE43:%[0-9]+]]:_(s1) = G_UADDE [[UADDE40]], [[UV29]], [[UADDO19]]
+    ; GFX8-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO18]](s32), [[UADDE42]](s32)
     ; GFX8-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C5]]
     ; GFX8-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV2]], [[MV1]]
     ; GFX8-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C5]]
@@ -688,89 +658,74 @@ body: |
     ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI1]], [[ANYEXT]]
     ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[FPTOUI]], [[AMDGPU_MAD_U64_U32_2]]
     ; GFX9-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64)
+    ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
     ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV6]]
     ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[UV8]]
     ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV6]]
     ; GFX9-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]]
-    ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1)
-    ; GFX9-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH]]
-    ; GFX9-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1)
-    ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]]
+    ; GFX9-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UADDO]], [[UMULH]], [[UADDO1]]
+    ; GFX9-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE1]]
     ; GFX9-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV8]]
     ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV6]]
     ; GFX9-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV8]]
-    ; GFX9-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]]
-    ; GFX9-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1)
-    ; GFX9-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH2]]
-    ; GFX9-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1)
-    ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]]
-    ; GFX9-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD]]
-    ; GFX9-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1)
-    ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]]
+    ; GFX9-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]]
+    ; GFX9-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDO2]], [[UMULH2]], [[UADDO3]]
+    ; GFX9-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE5]]
+    ; GFX9-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[UADDE2]], [[UADDO3]]
+    ; GFX9-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE6]], [[C5]], [[UADDE9]]
     ; GFX9-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV8]]
-    ; GFX9-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]]
-    ; GFX9-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO8]]
-    ; GFX9-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD3]], [[UADDO11]]
-    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO10]], [[C4]]
+    ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[UADDE10]]
+    ; GFX9-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDE8]]
+    ; GFX9-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD]], [[UADDO5]]
+    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO4]], [[C4]]
     ; GFX9-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](s64)
     ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[UV11]](s32)
-    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE]], [[ANYEXT1]]
-    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO10]], [[AMDGPU_MAD_U64_U32_8]]
+    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE12]], [[ANYEXT1]]
+    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO4]], [[AMDGPU_MAD_U64_U32_8]]
     ; GFX9-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](s64)
-    ; GFX9-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[UV10]]
-    ; GFX9-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[UV12]]
-    ; GFX9-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[UV10]]
-    ; GFX9-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]]
-    ; GFX9-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1)
-    ; GFX9-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH4]]
-    ; GFX9-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1)
-    ; GFX9-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]]
-    ; GFX9-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[UV12]]
-    ; GFX9-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[UV10]]
-    ; GFX9-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[UV12]]
-    ; GFX9-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]]
-    ; GFX9-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1)
-    ; GFX9-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]]
-    ; GFX9-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1)
-    ; GFX9-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]]
-    ; GFX9-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDO18]], [[ADD4]]
-    ; GFX9-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1)
-    ; GFX9-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ADD5]], [[ZEXT9]]
-    ; GFX9-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[UV12]]
-    ; GFX9-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD6]]
-    ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-    ; GFX9-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UADDO20]]
-    ; GFX9-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UADDE]], [[ADD7]], [[UADDO23]]
+    ; GFX9-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[UV10]]
+    ; GFX9-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO4]], [[UV12]]
+    ; GFX9-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO4]], [[UV10]]
+    ; GFX9-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]]
+    ; GFX9-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDO6]], [[UMULH4]], [[UADDO7]]
+    ; GFX9-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE15]]
+    ; GFX9-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[UV12]]
+    ; GFX9-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[UV10]]
+    ; GFX9-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO4]], [[UV12]]
+    ; GFX9-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]]
+    ; GFX9-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDO8]], [[UMULH6]], [[UADDO9]]
+    ; GFX9-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE19]]
+    ; GFX9-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDE18]], [[UADDE16]], [[UADDO9]]
+    ; GFX9-NEXT: [[UADDE24:%[0-9]+]]:_(s32), [[UADDE25:%[0-9]+]]:_(s1) = G_UADDE [[UADDE20]], [[C5]], [[UADDE23]]
+    ; GFX9-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[UV12]]
+    ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[UADDE24]]
+    ; GFX9-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UADDE22]]
+    ; GFX9-NEXT: [[UADDE26:%[0-9]+]]:_(s32), [[UADDE27:%[0-9]+]]:_(s1) = G_UADDE [[UADDE12]], [[ADD1]], [[UADDO11]]
     ; GFX9-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64)
     ; GFX9-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64)
-    ; GFX9-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDO22]]
-    ; GFX9-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UADDE2]]
-    ; GFX9-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDO22]]
-    ; GFX9-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]]
-    ; GFX9-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1)
-    ; GFX9-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH8]]
-    ; GFX9-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO27]](s1)
-    ; GFX9-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]]
-    ; GFX9-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDE2]]
-    ; GFX9-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDO22]]
-    ; GFX9-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDE2]]
-    ; GFX9-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]]
-    ; GFX9-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1)
-    ; GFX9-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]]
-    ; GFX9-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1)
-    ; GFX9-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]]
-    ; GFX9-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[UADDO30]], [[ADD8]]
-    ; GFX9-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1)
-    ; GFX9-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT14]]
-    ; GFX9-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDE2]]
-    ; GFX9-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[ADD10]]
-    ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO32]](s32), [[ADD11]](s32)
+    ; GFX9-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDO10]]
+    ; GFX9-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UADDE26]]
+    ; GFX9-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDO10]]
+    ; GFX9-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]]
+    ; GFX9-NEXT: [[UADDE28:%[0-9]+]]:_(s32), [[UADDE29:%[0-9]+]]:_(s1) = G_UADDE [[UADDO12]], [[UMULH8]], [[UADDO13]]
+    ; GFX9-NEXT: [[UADDE30:%[0-9]+]]:_(s32), [[UADDE31:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE29]]
+    ; GFX9-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDE26]]
+    ; GFX9-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDO10]]
+    ; GFX9-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDE26]]
+    ; GFX9-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]]
+    ; GFX9-NEXT: [[UADDE32:%[0-9]+]]:_(s32), [[UADDE33:%[0-9]+]]:_(s1) = G_UADDE [[UADDO14]], [[UMULH10]], [[UADDO15]]
+    ; GFX9-NEXT: [[UADDE34:%[0-9]+]]:_(s32), [[UADDE35:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE33]]
+    ; GFX9-NEXT: [[UADDE36:%[0-9]+]]:_(s32), [[UADDE37:%[0-9]+]]:_(s1) = G_UADDE [[UADDE32]], [[UADDE30]], [[UADDO15]]
+    ; GFX9-NEXT: [[UADDE38:%[0-9]+]]:_(s32), [[UADDE39:%[0-9]+]]:_(s1) = G_UADDE [[UADDE34]], [[C5]], [[UADDE37]]
+    ; GFX9-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDE26]]
+    ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[UADDE38]]
+    ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDE36]](s32), [[ADD2]](s32)
     ; GFX9-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64)
-    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV18]](s32), [[UADDO32]], [[C4]]
+    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV18]](s32), [[UADDE36]], [[C4]]
     ; GFX9-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_12]](s64)
     ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[UV21]](s32)
-    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV18]](s32), [[ADD11]], [[ANYEXT2]]
-    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV19]](s32), [[UADDO32]], [[AMDGPU_MAD_U64_U32_14]]
+    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV18]](s32), [[ADD2]], [[ANYEXT2]]
+    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV19]](s32), [[UADDE36]], [[AMDGPU_MAD_U64_U32_14]]
     ; GFX9-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_16]](s64)
     ; GFX9-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV14]], [[UV20]]
     ; GFX9-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV15]], [[UV22]], [[USUBO3]]
@@ -787,9 +742,9 @@ body: |
     ; GFX9-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C5]], [[USUBO5]]
     ; GFX9-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
     ; GFX9-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64)
-    ; GFX9-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UV26]]
-    ; GFX9-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[ADD11]], [[UV27]], [[UADDO35]]
-    ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO34]](s32), [[UADDE4]](s32)
+    ; GFX9-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[UADDE36]], [[UV26]]
+    ; GFX9-NEXT: [[UADDE40:%[0-9]+]]:_(s32), [[UADDE41:%[0-9]+]]:_(s1) = G_UADDE [[ADD2]], [[UV27]], [[UADDO17]]
+    ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO16]](s32), [[UADDE40]](s32)
     ; GFX9-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV25]]
     ; GFX9-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1)
     ; GFX9-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV24]]
@@ -797,9 +752,9 @@ body: |
     ; GFX9-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV25]]
     ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]]
     ; GFX9-NEXT: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64)
-    ; GFX9-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[UV28]]
-    ; GFX9-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[UV29]], [[UADDO37]]
-    ; GFX9-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[UADDE6]](s32)
+    ; GFX9-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UV28]]
+    ; GFX9-NEXT: [[UADDE42:%[0-9]+]]:_(s32), [[UADDE43:%[0-9]+]]:_(s1) = G_UADDE [[UADDE40]], [[UV29]], [[UADDO19]]
+    ; GFX9-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO18]](s32), [[UADDE42]](s32)
     ; GFX9-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C5]]
     ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV2]], [[MV1]]
     ; GFX9-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C5]]
@@ -839,93 +794,78 @@ body: |
     ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV7]], [[MUL]]
     ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]]
     ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[MUL1]]
+    ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
     ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV6]]
     ; GFX10-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]]
     ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV6]]
     ; GFX10-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[MUL3]]
-    ; GFX10-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1)
-    ; GFX10-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH]]
-    ; GFX10-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1)
-    ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]]
+    ; GFX10-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UADDO]], [[UMULH]], [[UADDO1]]
+    ; GFX10-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE1]]
     ; GFX10-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]]
     ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV6]]
     ; GFX10-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]]
-    ; GFX10-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL4]], [[UMULH1]]
-    ; GFX10-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1)
-    ; GFX10-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH2]]
-    ; GFX10-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1)
-    ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]]
-    ; GFX10-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD2]]
-    ; GFX10-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1)
-    ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]]
+    ; GFX10-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[MUL4]], [[UMULH1]]
+    ; GFX10-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDO2]], [[UMULH2]], [[UADDO3]]
+    ; GFX10-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE5]]
+    ; GFX10-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[UADDE2]], [[UADDO3]]
+    ; GFX10-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE6]], [[C5]], [[UADDE9]]
     ; GFX10-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]]
-    ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD4]]
-    ; GFX10-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO8]]
-    ; GFX10-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO11]]
-    ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO10]], [[C4]]
+    ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[UADDE10]]
+    ; GFX10-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDE8]]
+    ; GFX10-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD2]], [[UADDO5]]
+    ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO4]], [[C4]]
     ; GFX10-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_2]](s64)
-    ; GFX10-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE]]
-    ; GFX10-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UV9]], [[MUL5]]
-    ; GFX10-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO10]]
-    ; GFX10-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[MUL6]]
-    ; GFX10-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[UV8]]
-    ; GFX10-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[ADD7]]
-    ; GFX10-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[UV8]]
-    ; GFX10-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL7]], [[MUL8]]
-    ; GFX10-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1)
-    ; GFX10-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH4]]
-    ; GFX10-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1)
-    ; GFX10-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]]
-    ; GFX10-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[ADD7]]
-    ; GFX10-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[UV8]]
-    ; GFX10-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[ADD7]]
-    ; GFX10-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[UMULH5]]
-    ; GFX10-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1)
-    ; GFX10-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]]
-    ; GFX10-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1)
-    ; GFX10-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]]
-    ; GFX10-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDO18]], [[ADD8]]
-    ; GFX10-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1)
-    ; GFX10-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT9]]
-    ; GFX10-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[ADD7]]
-    ; GFX10-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD10]]
-    ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-    ; GFX10-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UADDO20]]
-    ; GFX10-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UADDE]], [[ADD11]], [[UADDO23]]
+    ; GFX10-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE12]]
+    ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UV9]], [[MUL5]]
+    ; GFX10-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO4]]
+    ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[MUL6]]
+    ; GFX10-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[UV8]]
+    ; GFX10-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UADDO4]], [[ADD4]]
+    ; GFX10-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO4]], [[UV8]]
+    ; GFX10-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[MUL7]], [[MUL8]]
+    ; GFX10-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDO6]], [[UMULH4]], [[UADDO7]]
+    ; GFX10-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE15]]
+    ; GFX10-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[ADD4]]
+    ; GFX10-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[UV8]]
+    ; GFX10-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO4]], [[ADD4]]
+    ; GFX10-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[UMULH5]]
+    ; GFX10-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDO8]], [[UMULH6]], [[UADDO9]]
+    ; GFX10-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE19]]
+    ; GFX10-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDE18]], [[UADDE16]], [[UADDO9]]
+    ; GFX10-NEXT: [[UADDE24:%[0-9]+]]:_(s32), [[UADDE25:%[0-9]+]]:_(s1) = G_UADDE [[UADDE20]], [[C5]], [[UADDE23]]
+    ; GFX10-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[ADD4]]
+    ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[UADDE24]]
+    ; GFX10-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UADDE22]]
+    ; GFX10-NEXT: [[UADDE26:%[0-9]+]]:_(s32), [[UADDE27:%[0-9]+]]:_(s1) = G_UADDE [[UADDE12]], [[ADD5]], [[UADDO11]]
     ; GFX10-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64)
     ; GFX10-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64)
-    ; GFX10-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UV13]], [[UADDO22]]
-    ; GFX10-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UV12]], [[UADDE2]]
-    ; GFX10-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV12]], [[UADDO22]]
-    ; GFX10-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL10]], [[MUL11]]
-    ; GFX10-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1)
-    ; GFX10-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH8]]
-    ; GFX10-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO27]](s1)
-    ; GFX10-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]]
-    ; GFX10-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV13]], [[UADDE2]]
-    ; GFX10-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV13]], [[UADDO22]]
-    ; GFX10-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV12]], [[UADDE2]]
-    ; GFX10-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[UMULH9]]
-    ; GFX10-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1)
-    ; GFX10-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]]
-    ; GFX10-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1)
-    ; GFX10-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]]
-    ; GFX10-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[UADDO30]], [[ADD12]]
-    ; GFX10-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1)
-    ; GFX10-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT14]]
-    ; GFX10-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV13]], [[UADDE2]]
-    ; GFX10-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[ADD14]]
-    ; GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO32]](s32), [[ADD15]](s32)
+    ; GFX10-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UV13]], [[UADDO10]]
+    ; GFX10-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UV12]], [[UADDE26]]
+    ; GFX10-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV12]], [[UADDO10]]
+    ; GFX10-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL10]], [[MUL11]]
+    ; GFX10-NEXT: [[UADDE28:%[0-9]+]]:_(s32), [[UADDE29:%[0-9]+]]:_(s1) = G_UADDE [[UADDO12]], [[UMULH8]], [[UADDO13]]
+    ; GFX10-NEXT: [[UADDE30:%[0-9]+]]:_(s32), [[UADDE31:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE29]]
+    ; GFX10-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV13]], [[UADDE26]]
+    ; GFX10-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV13]], [[UADDO10]]
+    ; GFX10-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV12]], [[UADDE26]]
+    ; GFX10-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[UMULH9]]
+    ; GFX10-NEXT: [[UADDE32:%[0-9]+]]:_(s32), [[UADDE33:%[0-9]+]]:_(s1) = G_UADDE [[UADDO14]], [[UMULH10]], [[UADDO15]]
+    ; GFX10-NEXT: [[UADDE34:%[0-9]+]]:_(s32), [[UADDE35:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE33]]
+    ; GFX10-NEXT: [[UADDE36:%[0-9]+]]:_(s32), [[UADDE37:%[0-9]+]]:_(s1) = G_UADDE [[UADDE32]], [[UADDE30]], [[UADDO15]]
+    ; GFX10-NEXT: [[UADDE38:%[0-9]+]]:_(s32), [[UADDE39:%[0-9]+]]:_(s1) = G_UADDE [[UADDE34]], [[C5]], [[UADDE37]]
+    ; GFX10-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV13]], [[UADDE26]]
+    ; GFX10-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[UADDE38]]
+    ; GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDE36]](s32), [[ADD6]](s32)
     ; GFX10-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64)
-    ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV14]](s32), [[UADDO32]], [[C4]]
+    ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV14]](s32), [[UADDE36]], [[C4]]
     ; GFX10-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64)
-    ; GFX10-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV14]], [[ADD15]]
-    ; GFX10-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[UV17]], [[MUL13]]
-    ; GFX10-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV15]], [[UADDO32]]
-    ; GFX10-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[MUL14]]
+    ; GFX10-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV14]], [[ADD6]]
+    ; GFX10-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UV17]], [[MUL13]]
+    ; GFX10-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV15]], [[UADDE36]]
+    ; GFX10-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ADD7]], [[MUL14]]
     ; GFX10-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV10]], [[UV16]]
-    ; GFX10-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV11]], [[ADD17]], [[USUBO3]]
-    ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV11]], [[ADD17]]
+    ; GFX10-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV11]], [[ADD8]], [[USUBO3]]
+    ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV11]], [[ADD8]]
     ; GFX10-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64)
     ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV19]]
     ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1)
@@ -938,9 +878,9 @@ body: |
     ; GFX10-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C5]], [[USUBO5]]
     ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
     ; GFX10-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64)
-    ; GFX10-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UV20]]
-    ; GFX10-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[ADD15]], [[UV21]], [[UADDO35]]
-    ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO34]](s32), [[UADDE4]](s32)
+    ; GFX10-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[UADDE36]], [[UV20]]
+    ; GFX10-NEXT: [[UADDE40:%[0-9]+]]:_(s32), [[UADDE41:%[0-9]+]]:_(s1) = G_UADDE [[ADD6]], [[UV21]], [[UADDO17]]
+    ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO16]](s32), [[UADDE40]](s32)
     ; GFX10-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV19]]
     ; GFX10-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1)
     ; GFX10-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV18]]
@@ -948,9 +888,9 @@ body: |
     ; GFX10-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV19]]
     ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]]
     ; GFX10-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64)
-    ; GFX10-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[UV22]]
-    ; GFX10-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[UV23]], [[UADDO37]]
-    ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[UADDE6]](s32)
+    ; GFX10-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UV22]]
+    ; GFX10-NEXT: [[UADDE42:%[0-9]+]]:_(s32), [[UADDE43:%[0-9]+]]:_(s1) = G_UADDE [[UADDE40]], [[UV23]], [[UADDO19]]
+    ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO18]](s32), [[UADDE42]](s32)
     ; GFX10-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C5]]
     ; GFX10-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV2]], [[MV1]]
     ; GFX10-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C5]]
@@ -998,6 +938,7 @@ body: |
     ; GFX6-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV6]], [[UV8]]
     ; GFX6-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV7]], [[UV9]], [[USUBO1]]
     ; GFX6-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]]
+    ; GFX6-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
     ; GFX6-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]]
     ; GFX6-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]]
     ; GFX6-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]]
@@ -1007,89 +948,73 @@ body: |
     ; GFX6-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]]
     ; GFX6-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]]
     ; GFX6-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]]
-    ; GFX6-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1)
-    ; GFX6-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH1]]
-    ; GFX6-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1)
-    ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]]
+    ; GFX6-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UADDO]], [[UMULH1]], [[UADDO1]]
+    ; GFX6-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE1]]
     ; GFX6-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]]
     ; GFX6-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]]
     ; GFX6-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]]
-    ; GFX6-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]]
-    ; GFX6-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1)
-    ; GFX6-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH3]]
-    ; GFX6-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1)
-    ; GFX6-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]]
-    ; GFX6-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD2]]
-    ; GFX6-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1)
-    ; GFX6-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]]
+    ; GFX6-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]]
+    ; GFX6-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDO2]], [[UMULH3]], [[UADDO3]]
+    ; GFX6-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE5]]
+    ; GFX6-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[UADDE2]], [[UADDO3]]
+    ; GFX6-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE6]], [[C5]], [[UADDE9]]
     ; GFX6-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]]
-    ; GFX6-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]]
-    ; GFX6-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO8]]
-    ; GFX6-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO11]]
-    ; GFX6-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO10]]
-    ; GFX6-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO10]]
-    ; GFX6-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE]]
-    ; GFX6-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO10]]
-    ; GFX6-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]]
-    ; GFX6-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[UMULH5]]
-    ; GFX6-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[MUL6]]
-    ; GFX6-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[ADD7]]
-    ; GFX6-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[MUL6]]
-    ; GFX6-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]]
-    ; GFX6-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1)
-    ; GFX6-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH6]]
-    ; GFX6-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1)
-    ; GFX6-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]]
-    ; GFX6-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[ADD7]]
-    ; GFX6-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[MUL6]]
-    ; GFX6-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[ADD7]]
-    ; GFX6-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]]
-    ; GFX6-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1)
-    ; GFX6-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH8]]
-    ; GFX6-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1)
-    ; GFX6-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]]
-    ; GFX6-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDO18]], [[ADD8]]
-    ; GFX6-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1)
-    ; GFX6-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT9]]
-    ; GFX6-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[ADD7]]
-    ; GFX6-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD10]]
-    ; GFX6-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-    ; GFX6-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UADDO20]]
-    ; GFX6-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UADDE]], [[ADD11]], [[UADDO23]]
+    ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[UADDE10]]
+    ; GFX6-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDE8]]
+    ; GFX6-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD2]], [[UADDO5]]
+    ; GFX6-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO4]]
+    ; GFX6-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO4]]
+    ; GFX6-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE12]]
+    ; GFX6-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO4]]
+    ; GFX6-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]]
+    ; GFX6-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[UMULH5]]
+    ; GFX6-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[MUL6]]
+    ; GFX6-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO4]], [[ADD4]]
+    ; GFX6-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO4]], [[MUL6]]
+    ; GFX6-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]]
+    ; GFX6-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDO6]], [[UMULH6]], [[UADDO7]]
+    ; GFX6-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE15]]
+    ; GFX6-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[ADD4]]
+    ; GFX6-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[MUL6]]
+    ; GFX6-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO4]], [[ADD4]]
+    ; GFX6-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]]
+    ; GFX6-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDO8]], [[UMULH8]], [[UADDO9]]
+    ; GFX6-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE19]]
+    ; GFX6-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDE18]], [[UADDE16]], [[UADDO9]]
+    ; GFX6-NEXT: [[UADDE24:%[0-9]+]]:_(s32), [[UADDE25:%[0-9]+]]:_(s1) = G_UADDE [[UADDE20]], [[C5]], [[UADDE23]]
+    ; GFX6-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[ADD4]]
+    ; GFX6-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[UADDE24]]
+    ; GFX6-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UADDE22]]
+    ; GFX6-NEXT: [[UADDE26:%[0-9]+]]:_(s32), [[UADDE27:%[0-9]+]]:_(s1) = G_UADDE [[UADDE12]], [[ADD5]], [[UADDO11]]
     ; GFX6-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64)
     ; GFX6-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64)
-    ; GFX6-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV13]], [[UADDO22]]
-    ; GFX6-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV12]], [[UADDE2]]
-    ; GFX6-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV12]], [[UADDO22]]
-    ; GFX6-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]]
-    ; GFX6-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1)
-    ; GFX6-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH10]]
-    ; GFX6-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO27]](s1)
-    ; GFX6-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]]
-    ; GFX6-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV13]], [[UADDE2]]
-    ; GFX6-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV13]], [[UADDO22]]
-    ; GFX6-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV12]], [[UADDE2]]
-    ; GFX6-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]]
-    ; GFX6-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1)
-    ; GFX6-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH12]]
-    ; GFX6-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1)
-    ; GFX6-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]]
-    ; GFX6-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[UADDO30]], [[ADD12]]
-    ; GFX6-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1)
-    ; GFX6-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT14]]
-    ; GFX6-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV13]], [[UADDE2]]
-    ; GFX6-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD14]]
-    ; GFX6-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO32]](s32), [[ADD15]](s32)
+    ; GFX6-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV13]], [[UADDO10]]
+    ; GFX6-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV12]], [[UADDE26]]
+    ; GFX6-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV12]], [[UADDO10]]
+    ; GFX6-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]]
+    ; GFX6-NEXT: [[UADDE28:%[0-9]+]]:_(s32), [[UADDE29:%[0-9]+]]:_(s1) = G_UADDE [[UADDO12]], [[UMULH10]], [[UADDO13]]
+    ; GFX6-NEXT: [[UADDE30:%[0-9]+]]:_(s32), [[UADDE31:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE29]]
+    ; GFX6-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV13]], [[UADDE26]]
+    ; GFX6-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV13]], [[UADDO10]]
+    ; GFX6-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV12]], [[UADDE26]]
+    ; GFX6-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]]
+    ; GFX6-NEXT: [[UADDE32:%[0-9]+]]:_(s32), [[UADDE33:%[0-9]+]]:_(s1) = G_UADDE [[UADDO14]], [[UMULH12]], [[UADDO15]]
+    ; GFX6-NEXT: [[UADDE34:%[0-9]+]]:_(s32), [[UADDE35:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE33]]
+    ; GFX6-NEXT: [[UADDE36:%[0-9]+]]:_(s32), [[UADDE37:%[0-9]+]]:_(s1) = G_UADDE [[UADDE32]], [[UADDE30]], [[UADDO15]]
+    ; GFX6-NEXT: [[UADDE38:%[0-9]+]]:_(s32), [[UADDE39:%[0-9]+]]:_(s1) = G_UADDE [[UADDE34]], [[C5]], [[UADDE37]]
+    ; GFX6-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV13]], [[UADDE26]]
+    ; GFX6-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[UADDE38]]
+    ; GFX6-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDE36]](s32), [[ADD6]](s32)
     ; GFX6-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64)
-    ; GFX6-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV14]], [[UADDO32]]
-    ; GFX6-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV15]], [[UADDO32]]
-    ; GFX6-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV14]], [[ADD15]]
-    ; GFX6-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV14]], [[UADDO32]]
-    ; GFX6-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]]
-    ; GFX6-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[UMULH14]]
+    ; GFX6-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV14]], [[UADDE36]]
+    ; GFX6-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV15]], [[UADDE36]]
+    ; GFX6-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV14]], [[ADD6]]
+    ; GFX6-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV14]], [[UADDE36]]
+    ; GFX6-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]]
+    ; GFX6-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ADD7]], [[UMULH14]]
     ; GFX6-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV10]], [[MUL15]]
-    ; GFX6-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV11]], [[ADD17]], [[USUBO3]]
-    ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV11]], [[ADD17]]
+    ; GFX6-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV11]], [[ADD8]], [[USUBO3]]
+    ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV11]], [[ADD8]]
     ; GFX6-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64)
     ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV17]]
     ; GFX6-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1)
@@ -1102,9 +1027,9 @@ body: |
     ; GFX6-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C5]], [[USUBO5]]
     ; GFX6-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
     ; GFX6-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64)
-    ; GFX6-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UV18]]
-    ; GFX6-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[ADD15]], [[UV19]], [[UADDO35]]
-    ; GFX6-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO34]](s32), [[UADDE4]](s32)
+    ; GFX6-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[UADDE36]], [[UV18]]
+    ; GFX6-NEXT: [[UADDE40:%[0-9]+]]:_(s32), [[UADDE41:%[0-9]+]]:_(s1) = G_UADDE [[ADD6]], [[UV19]], [[UADDO17]]
+    ; GFX6-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO16]](s32), [[UADDE40]](s32)
     ; GFX6-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV17]]
     ; GFX6-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1)
     ; GFX6-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV16]]
@@ -1112,9 +1037,9 @@ body: |
     ; GFX6-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV17]]
     ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]]
     ; GFX6-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64)
-    ; GFX6-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[UV20]]
-    ; GFX6-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[UV21]], [[UADDO37]]
-    ; GFX6-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[UADDE6]](s32)
+    ; GFX6-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UV20]]
+    ; GFX6-NEXT: [[UADDE42:%[0-9]+]]:_(s32), [[UADDE43:%[0-9]+]]:_(s1) = G_UADDE [[UADDE40]], [[UV21]], [[UADDO19]]
+    ; GFX6-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO18]](s32), [[UADDE42]](s32)
     ; GFX6-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C5]]
     ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV2]], [[MV1]]
     ; GFX6-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C5]]
@@ -1140,94 +1065,79 @@ body: |
     ; GFX6-NEXT: [[MUL19:%[0-9]+]]:_(s32) = G_MUL [[USUBE8]], [[FPTOUI2]]
     ; GFX6-NEXT: [[MUL20:%[0-9]+]]:_(s32) = G_MUL [[USUBO6]], [[FPTOUI3]]
     ; GFX6-NEXT: [[UMULH15:%[0-9]+]]:_(s32) = G_UMULH [[USUBO6]], [[FPTOUI2]]
-    ; GFX6-NEXT: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[MUL19]], [[MUL20]]
-    ; GFX6-NEXT: [[ADD19:%[0-9]+]]:_(s32) = G_ADD [[ADD18]], [[UMULH15]]
+    ; GFX6-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[MUL19]], [[MUL20]]
+    ; GFX6-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[UMULH15]]
     ; GFX6-NEXT: [[MUL21:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[MUL18]]
-    ; GFX6-NEXT: [[MUL22:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI2]], [[ADD19]]
+    ; GFX6-NEXT: [[MUL22:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI2]], [[ADD10]]
     ; GFX6-NEXT: [[UMULH16:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[MUL18]]
-    ; GFX6-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[MUL21]], [[MUL22]]
-    ; GFX6-NEXT: [[ZEXT15:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO39]](s1)
-    ; GFX6-NEXT: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UADDO38]], [[UMULH16]]
-    ; GFX6-NEXT: [[ZEXT16:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO41]](s1)
-    ; GFX6-NEXT: [[ADD20:%[0-9]+]]:_(s32) = G_ADD [[ZEXT15]], [[ZEXT16]]
-    ; GFX6-NEXT: [[MUL23:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[ADD19]]
+    ; GFX6-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL21]], [[MUL22]]
+    ; GFX6-NEXT: [[UADDE44:%[0-9]+]]:_(s32), [[UADDE45:%[0-9]+]]:_(s1) = G_UADDE [[UADDO20]], [[UMULH16]], [[UADDO21]]
+    ; GFX6-NEXT: [[UADDE46:%[0-9]+]]:_(s32), [[UADDE47:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE45]]
+    ; GFX6-NEXT: [[MUL23:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[ADD10]]
     ; GFX6-NEXT: [[UMULH17:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[MUL18]]
-    ; GFX6-NEXT: [[UMULH18:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[ADD19]]
-    ; GFX6-NEXT: [[UADDO42:%[0-9]+]]:_(s32), [[UADDO43:%[0-9]+]]:_(s1) = G_UADDO [[MUL23]], [[UMULH17]]
-    ; GFX6-NEXT: [[ZEXT17:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO43]](s1)
-    ; GFX6-NEXT: [[UADDO44:%[0-9]+]]:_(s32), [[UADDO45:%[0-9]+]]:_(s1) = G_UADDO [[UADDO42]], [[UMULH18]]
-    ; GFX6-NEXT: [[ZEXT18:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO45]](s1)
-    ; GFX6-NEXT: [[ADD21:%[0-9]+]]:_(s32) = G_ADD [[ZEXT17]], [[ZEXT18]]
-    ; GFX6-NEXT: [[UADDO46:%[0-9]+]]:_(s32), [[UADDO47:%[0-9]+]]:_(s1) = G_UADDO [[UADDO44]], [[ADD20]]
-    ; GFX6-NEXT: [[ZEXT19:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO47]](s1)
-    ; GFX6-NEXT: [[ADD22:%[0-9]+]]:_(s32) = G_ADD [[ADD21]], [[ZEXT19]]
-    ; GFX6-NEXT: [[UMULH19:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[ADD19]]
-    ; GFX6-NEXT: [[ADD23:%[0-9]+]]:_(s32) = G_ADD [[UMULH19]], [[ADD22]]
-    ; GFX6-NEXT: [[UADDO48:%[0-9]+]]:_(s32), [[UADDO49:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI2]], [[UADDO46]]
-    ; GFX6-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI3]], [[ADD23]], [[UADDO49]]
-    ; GFX6-NEXT: [[MUL24:%[0-9]+]]:_(s32) = G_MUL [[USUBO6]], [[UADDO48]]
-    ; GFX6-NEXT: [[MUL25:%[0-9]+]]:_(s32) = G_MUL [[USUBE8]], [[UADDO48]]
-    ; GFX6-NEXT: [[MUL26:%[0-9]+]]:_(s32) = G_MUL [[USUBO6]], [[UADDE8]]
-    ; GFX6-NEXT: [[UMULH20:%[0-9]+]]:_(s32) = G_UMULH [[USUBO6]], [[UADDO48]]
-    ; GFX6-NEXT: [[ADD24:%[0-9]+]]:_(s32) = G_ADD [[MUL25]], [[MUL26]]
-    ; GFX6-NEXT: [[ADD25:%[0-9]+]]:_(s32) = G_ADD [[ADD24]], [[UMULH20]]
-    ; GFX6-NEXT: [[MUL27:%[0-9]+]]:_(s32) = G_MUL [[UADDE8]], [[MUL24]]
-    ; GFX6-NEXT: [[MUL28:%[0-9]+]]:_(s32) = G_MUL [[UADDO48]], [[ADD25]]
-    ; GFX6-NEXT: [[UMULH21:%[0-9]+]]:_(s32) = G_UMULH [[UADDO48]], [[MUL24]]
-    ; GFX6-NEXT: [[UADDO50:%[0-9]+]]:_(s32), [[UADDO51:%[0-9]+]]:_(s1) = G_UADDO [[MUL27]], [[MUL28]]
-    ; GFX6-NEXT: [[ZEXT20:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO51]](s1)
-    ; GFX6-NEXT: [[UADDO52:%[0-9]+]]:_(s32), [[UADDO53:%[0-9]+]]:_(s1) = G_UADDO [[UADDO50]], [[UMULH21]]
-    ; GFX6-NEXT: [[ZEXT21:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO53]](s1)
-    ; GFX6-NEXT: [[ADD26:%[0-9]+]]:_(s32) = G_ADD [[ZEXT20]], [[ZEXT21]]
-    ; GFX6-NEXT: [[MUL29:%[0-9]+]]:_(s32) = G_MUL [[UADDE8]], [[ADD25]]
-    ; GFX6-NEXT: [[UMULH22:%[0-9]+]]:_(s32) = G_UMULH [[UADDE8]], [[MUL24]]
-    ; GFX6-NEXT: [[UMULH23:%[0-9]+]]:_(s32) = G_UMULH [[UADDO48]], [[ADD25]]
-    ; GFX6-NEXT: [[UADDO54:%[0-9]+]]:_(s32), [[UADDO55:%[0-9]+]]:_(s1) = G_UADDO [[MUL29]], [[UMULH22]]
-    ; GFX6-NEXT: [[ZEXT22:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO55]](s1)
-    ; GFX6-NEXT: [[UADDO56:%[0-9]+]]:_(s32), [[UADDO57:%[0-9]+]]:_(s1) = G_UADDO [[UADDO54]], [[UMULH23]]
-    ; GFX6-NEXT: [[ZEXT23:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO57]](s1)
-    ; GFX6-NEXT: [[ADD27:%[0-9]+]]:_(s32) = G_ADD [[ZEXT22]], [[ZEXT23]]
-    ; GFX6-NEXT: [[UADDO58:%[0-9]+]]:_(s32), [[UADDO59:%[0-9]+]]:_(s1) = G_UADDO [[UADDO56]], [[ADD26]]
-    ; GFX6-NEXT: [[ZEXT24:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO59]](s1)
-    ; GFX6-NEXT: [[ADD28:%[0-9]+]]:_(s32) = G_ADD [[ADD27]], [[ZEXT24]]
-    ; GFX6-NEXT: [[UMULH24:%[0-9]+]]:_(s32) = G_UMULH [[UADDE8]], [[ADD25]]
-    ; GFX6-NEXT: [[ADD29:%[0-9]+]]:_(s32) = G_ADD [[UMULH24]], [[ADD28]]
-    ; GFX6-NEXT: [[UADDO60:%[0-9]+]]:_(s32), [[UADDO61:%[0-9]+]]:_(s1) = G_UADDO [[UADDO48]], [[UADDO58]]
-    ; GFX6-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE8]], [[ADD29]], [[UADDO61]]
+    ; GFX6-NEXT: [[UMULH18:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[ADD10]]
+    ; GFX6-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[MUL23]], [[UMULH17]]
+    ; GFX6-NEXT: [[UADDE48:%[0-9]+]]:_(s32), [[UADDE49:%[0-9]+]]:_(s1) = G_UADDE [[UADDO22]], [[UMULH18]], [[UADDO23]]
+    ; GFX6-NEXT: [[UADDE50:%[0-9]+]]:_(s32), [[UADDE51:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE49]]
+    ; GFX6-NEXT: [[UADDE52:%[0-9]+]]:_(s32), [[UADDE53:%[0-9]+]]:_(s1) = G_UADDE [[UADDE48]], [[UADDE46]], [[UADDO23]]
+    ; GFX6-NEXT: [[UADDE54:%[0-9]+]]:_(s32), [[UADDE55:%[0-9]+]]:_(s1) = G_UADDE [[UADDE50]], [[C5]], [[UADDE53]]
+    ; GFX6-NEXT: [[UMULH19:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[ADD10]]
+    ; GFX6-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH19]], [[UADDE54]]
+    ; GFX6-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI2]], [[UADDE52]]
+    ; GFX6-NEXT: [[UADDE56:%[0-9]+]]:_(s32), [[UADDE57:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI3]], [[ADD11]], [[UADDO25]]
+    ; GFX6-NEXT: [[MUL24:%[0-9]+]]:_(s32) = G_MUL [[USUBO6]], [[UADDO24]]
+    ; GFX6-NEXT: [[MUL25:%[0-9]+]]:_(s32) = G_MUL [[USUBE8]], [[UADDO24]]
+    ; GFX6-NEXT: [[MUL26:%[0-9]+]]:_(s32) = G_MUL [[USUBO6]], [[UADDE56]]
+    ; GFX6-NEXT: [[UMULH20:%[0-9]+]]:_(s32) = G_UMULH [[USUBO6]], [[UADDO24]]
+    ; GFX6-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[MUL25]], [[MUL26]]
+    ; GFX6-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ADD12]], [[UMULH20]]
+    ; GFX6-NEXT: [[MUL27:%[0-9]+]]:_(s32) = G_MUL [[UADDE56]], [[MUL24]]
+    ; GFX6-NEXT: [[MUL28:%[0-9]+]]:_(s32) = G_MUL [[UADDO24]], [[ADD13]]
+    ; GFX6-NEXT: [[UMULH21:%[0-9]+]]:_(s32) = G_UMULH [[UADDO24]], [[MUL24]]
+    ; GFX6-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[MUL27]], [[MUL28]]
+    ; GFX6-NEXT: [[UADDE58:%[0-9]+]]:_(s32), [[UADDE59:%[0-9]+]]:_(s1) = G_UADDE [[UADDO26]], [[UMULH21]], [[UADDO27]]
+    ; GFX6-NEXT: [[UADDE60:%[0-9]+]]:_(s32), [[UADDE61:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE59]]
+    ; GFX6-NEXT: [[MUL29:%[0-9]+]]:_(s32) = G_MUL [[UADDE56]], [[ADD13]]
+    ; GFX6-NEXT: [[UMULH22:%[0-9]+]]:_(s32) = G_UMULH [[UADDE56]], [[MUL24]]
+    ; GFX6-NEXT: [[UMULH23:%[0-9]+]]:_(s32) = G_UMULH [[UADDO24]], [[ADD13]]
+    ; GFX6-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL29]], [[UMULH22]]
+    ; GFX6-NEXT: [[UADDE62:%[0-9]+]]:_(s32), [[UADDE63:%[0-9]+]]:_(s1) = G_UADDE [[UADDO28]], [[UMULH23]], [[UADDO29]]
+    ; GFX6-NEXT: [[UADDE64:%[0-9]+]]:_(s32), [[UADDE65:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE63]]
+    ; GFX6-NEXT: [[UADDE66:%[0-9]+]]:_(s32), [[UADDE67:%[0-9]+]]:_(s1) = G_UADDE [[UADDE62]], [[UADDE60]], [[UADDO29]]
+    ; GFX6-NEXT: [[UADDE68:%[0-9]+]]:_(s32), [[UADDE69:%[0-9]+]]:_(s1) = G_UADDE [[UADDE64]], [[C5]], [[UADDE67]]
+    ; GFX6-NEXT: [[UMULH24:%[0-9]+]]:_(s32) = G_UMULH [[UADDE56]], [[ADD13]]
+    ; GFX6-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[UMULH24]], [[UADDE68]]
+    ; GFX6-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UADDE66]]
+    ; GFX6-NEXT: [[UADDE70:%[0-9]+]]:_(s32), [[UADDE71:%[0-9]+]]:_(s1) = G_UADDE [[UADDE56]], [[ADD14]], [[UADDO31]]
     ; GFX6-NEXT: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64)
     ; GFX6-NEXT: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64)
-    ; GFX6-NEXT: [[MUL30:%[0-9]+]]:_(s32) = G_MUL [[UV31]], [[UADDO60]]
-    ; GFX6-NEXT: [[MUL31:%[0-9]+]]:_(s32) = G_MUL [[UV30]], [[UADDE10]]
-    ; GFX6-NEXT: [[UMULH25:%[0-9]+]]:_(s32) = G_UMULH [[UV30]], [[UADDO60]]
-    ; GFX6-NEXT: [[UADDO62:%[0-9]+]]:_(s32), [[UADDO63:%[0-9]+]]:_(s1) = G_UADDO [[MUL30]], [[MUL31]]
-    ; GFX6-NEXT: [[ZEXT25:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO63]](s1)
-    ; GFX6-NEXT: [[UADDO64:%[0-9]+]]:_(s32), [[UADDO65:%[0-9]+]]:_(s1) = G_UADDO [[UADDO62]], [[UMULH25]]
-    ; GFX6-NEXT: [[ZEXT26:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO65]](s1)
-    ; GFX6-NEXT: [[ADD30:%[0-9]+]]:_(s32) = G_ADD [[ZEXT25]], [[ZEXT26]]
-    ; GFX6-NEXT: [[MUL32:%[0-9]+]]:_(s32) = G_MUL [[UV31]], [[UADDE10]]
-    ; GFX6-NEXT: [[UMULH26:%[0-9]+]]:_(s32) = G_UMULH [[UV31]], [[UADDO60]]
-    ; GFX6-NEXT: [[UMULH27:%[0-9]+]]:_(s32) = G_UMULH [[UV30]], [[UADDE10]]
-    ; GFX6-NEXT: [[UADDO66:%[0-9]+]]:_(s32), [[UADDO67:%[0-9]+]]:_(s1) = G_UADDO [[MUL32]], [[UMULH26]]
-    ; GFX6-NEXT: [[ZEXT27:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO67]](s1)
-    ; GFX6-NEXT: [[UADDO68:%[0-9]+]]:_(s32), [[UADDO69:%[0-9]+]]:_(s1) = G_UADDO [[UADDO66]], [[UMULH27]]
-    ; GFX6-NEXT: [[ZEXT28:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO69]](s1)
-    ; GFX6-NEXT: [[ADD31:%[0-9]+]]:_(s32) = G_ADD [[ZEXT27]], [[ZEXT28]]
-    ; GFX6-NEXT: [[UADDO70:%[0-9]+]]:_(s32), [[UADDO71:%[0-9]+]]:_(s1) = G_UADDO [[UADDO68]], [[ADD30]]
-    ; GFX6-NEXT: [[ZEXT29:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO71]](s1)
-    ; GFX6-NEXT: [[ADD32:%[0-9]+]]:_(s32) = G_ADD [[ADD31]], [[ZEXT29]]
-    ; GFX6-NEXT: [[UMULH28:%[0-9]+]]:_(s32) = G_UMULH [[UV31]], [[UADDE10]]
-    ; GFX6-NEXT: [[ADD33:%[0-9]+]]:_(s32) = G_ADD [[UMULH28]], [[ADD32]]
-    ; GFX6-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO70]](s32), [[ADD33]](s32)
+    ; GFX6-NEXT: [[MUL30:%[0-9]+]]:_(s32) = G_MUL [[UV31]], [[UADDO30]]
+    ; GFX6-NEXT: [[MUL31:%[0-9]+]]:_(s32) = G_MUL [[UV30]], [[UADDE70]]
+    ; GFX6-NEXT: [[UMULH25:%[0-9]+]]:_(s32) = G_UMULH [[UV30]], [[UADDO30]]
+    ; GFX6-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL30]], [[MUL31]]
+    ; GFX6-NEXT: [[UADDE72:%[0-9]+]]:_(s32), [[UADDE73:%[0-9]+]]:_(s1) = G_UADDE [[UADDO32]], [[UMULH25]], [[UADDO33]]
+    ; GFX6-NEXT: [[UADDE74:%[0-9]+]]:_(s32), [[UADDE75:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE73]]
+    ; GFX6-NEXT: [[MUL32:%[0-9]+]]:_(s32) = G_MUL [[UV31]], [[UADDE70]]
+    ; GFX6-NEXT: [[UMULH26:%[0-9]+]]:_(s32) = G_UMULH [[UV31]], [[UADDO30]]
+    ; GFX6-NEXT: [[UMULH27:%[0-9]+]]:_(s32) = G_UMULH [[UV30]], [[UADDE70]]
+    ; GFX6-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[MUL32]], [[UMULH26]]
+    ; GFX6-NEXT: [[UADDE76:%[0-9]+]]:_(s32), [[UADDE77:%[0-9]+]]:_(s1) = G_UADDE [[UADDO34]], [[UMULH27]], [[UADDO35]]
+    ; GFX6-NEXT: [[UADDE78:%[0-9]+]]:_(s32), [[UADDE79:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE77]]
+    ; GFX6-NEXT: [[UADDE80:%[0-9]+]]:_(s32), [[UADDE81:%[0-9]+]]:_(s1) = G_UADDE [[UADDE76]], [[UADDE74]], [[UADDO35]]
+    ; GFX6-NEXT: [[UADDE82:%[0-9]+]]:_(s32), [[UADDE83:%[0-9]+]]:_(s1) = G_UADDE [[UADDE78]], [[C5]], [[UADDE81]]
+    ; GFX6-NEXT: [[UMULH28:%[0-9]+]]:_(s32) = G_UMULH [[UV31]], [[UADDE70]]
+    ; GFX6-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH28]], [[UADDE82]]
+    ; GFX6-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDE80]](s32), [[ADD15]](s32)
     ; GFX6-NEXT: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64)
-    ; GFX6-NEXT: [[MUL33:%[0-9]+]]:_(s32) = G_MUL [[UV32]], [[UADDO70]]
-    ; GFX6-NEXT: [[MUL34:%[0-9]+]]:_(s32) = G_MUL [[UV33]], [[UADDO70]]
-    ; GFX6-NEXT: [[MUL35:%[0-9]+]]:_(s32) = G_MUL [[UV32]], [[ADD33]]
-    ; GFX6-NEXT: [[UMULH29:%[0-9]+]]:_(s32) = G_UMULH [[UV32]], [[UADDO70]]
-    ; GFX6-NEXT: [[ADD34:%[0-9]+]]:_(s32) = G_ADD [[MUL34]], [[MUL35]]
-    ; GFX6-NEXT: [[ADD35:%[0-9]+]]:_(s32) = G_ADD [[ADD34]], [[UMULH29]]
+    ; GFX6-NEXT: [[MUL33:%[0-9]+]]:_(s32) = G_MUL [[UV32]], [[UADDE80]]
+    ; GFX6-NEXT: [[MUL34:%[0-9]+]]:_(s32) = G_MUL [[UV33]], [[UADDE80]]
+    ; GFX6-NEXT: [[MUL35:%[0-9]+]]:_(s32) = G_MUL [[UV32]], [[ADD15]]
+    ; GFX6-NEXT: [[UMULH29:%[0-9]+]]:_(s32) = G_UMULH [[UV32]], [[UADDE80]]
+    ; GFX6-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[MUL34]], [[MUL35]]
+    ; GFX6-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[UMULH29]]
     ; GFX6-NEXT: [[USUBO8:%[0-9]+]]:_(s32), [[USUBO9:%[0-9]+]]:_(s1) = G_USUBO [[UV28]], [[MUL33]]
-    ; GFX6-NEXT: [[USUBE10:%[0-9]+]]:_(s32), [[USUBE11:%[0-9]+]]:_(s1) = G_USUBE [[UV29]], [[ADD35]], [[USUBO9]]
-    ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[UV29]], [[ADD35]]
+    ; GFX6-NEXT: [[USUBE10:%[0-9]+]]:_(s32), [[USUBE11:%[0-9]+]]:_(s1) = G_USUBE [[UV29]], [[ADD17]], [[USUBO9]]
+    ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[UV29]], [[ADD17]]
     ; GFX6-NEXT: [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64)
     ; GFX6-NEXT: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE10]](s32), [[UV35]]
     ; GFX6-NEXT: [[SEXT4:%[0-9]+]]:_(s32) = G_SEXT [[ICMP8]](s1)
@@ -1239,9 +1149,9 @@ body: |
     ; GFX6-NEXT: [[USUBE12:%[0-9]+]]:_(s32), [[USUBE13:%[0-9]+]]:_(s1) = G_USUBE [[SUB1]], [[UV35]], [[USUBO9]]
     ; GFX6-NEXT: [[USUBE14:%[0-9]+]]:_(s32), [[USUBE15:%[0-9]+]]:_(s1) = G_USUBE [[USUBE12]], [[C5]], [[USUBO11]]
     ; GFX6-NEXT: [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64)
-    ; GFX6-NEXT: [[UADDO72:%[0-9]+]]:_(s32), [[UADDO73:%[0-9]+]]:_(s1) = G_UADDO [[UADDO70]], [[UV36]]
-    ; GFX6-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[ADD33]], [[UV37]], [[UADDO73]]
-    ; GFX6-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO72]](s32), [[UADDE12]](s32)
+    ; GFX6-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDE80]], [[UV36]]
+    ; GFX6-NEXT: [[UADDE84:%[0-9]+]]:_(s32), [[UADDE85:%[0-9]+]]:_(s1) = G_UADDE [[ADD15]], [[UV37]], [[UADDO37]]
+    ; GFX6-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[UADDE84]](s32)
     ; GFX6-NEXT: [[ICMP11:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE14]](s32), [[UV35]]
     ; GFX6-NEXT: [[SEXT6:%[0-9]+]]:_(s32) = G_SEXT [[ICMP11]](s1)
     ; GFX6-NEXT: [[ICMP12:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO10]](s32), [[UV34]]
@@ -1249,9 +1159,9 @@ body: |
     ; GFX6-NEXT: [[ICMP13:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE14]](s32), [[UV35]]
     ; GFX6-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP13]](s1), [[SEXT7]], [[SEXT6]]
     ; GFX6-NEXT: [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64)
-    ; GFX6-NEXT: [[UADDO74:%[0-9]+]]:_(s32), [[UADDO75:%[0-9]+]]:_(s1) = G_UADDO [[UADDO72]], [[UV38]]
-    ; GFX6-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDE12]], [[UV39]], [[UADDO75]]
-    ; GFX6-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO74]](s32), [[UADDE14]](s32)
+    ; GFX6-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[UADDO36]], [[UV38]]
+    ; GFX6-NEXT: [[UADDE86:%[0-9]+]]:_(s32), [[UADDE87:%[0-9]+]]:_(s1) = G_UADDE [[UADDE84]], [[UV39]], [[UADDO39]]
+    ; GFX6-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO38]](s32), [[UADDE86]](s32)
     ; GFX6-NEXT: [[ICMP14:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT5]](s32), [[C5]]
     ; GFX6-NEXT: [[SELECT6:%[0-9]+]]:_(s64) = G_SELECT [[ICMP14]](s1), [[MV5]], [[MV4]]
     ; GFX6-NEXT: [[ICMP15:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT4]](s32), [[C5]]
@@ -1294,89 +1204,74 @@ body: |
     ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI1]], [[ANYEXT]]
     ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[FPTOUI]], [[AMDGPU_MAD_U64_U32_2]]
     ; GFX8-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64)
+    ; GFX8-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
     ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV10]]
     ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[UV12]]
     ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV10]]
     ; GFX8-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]]
-    ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1)
-    ; GFX8-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH]]
-    ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1)
-    ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]]
+    ; GFX8-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UADDO]], [[UMULH]], [[UADDO1]]
+    ; GFX8-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE1]]
     ; GFX8-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV12]]
     ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV10]]
     ; GFX8-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV12]]
-    ; GFX8-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]]
-    ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1)
-    ; GFX8-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH2]]
-    ; GFX8-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1)
-    ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]]
-    ; GFX8-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD]]
-    ; GFX8-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1)
-    ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]]
+    ; GFX8-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]]
+    ; GFX8-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDO2]], [[UMULH2]], [[UADDO3]]
+    ; GFX8-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE5]]
+    ; GFX8-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[UADDE2]], [[UADDO3]]
+    ; GFX8-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE6]], [[C5]], [[UADDE9]]
     ; GFX8-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV12]]
-    ; GFX8-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]]
-    ; GFX8-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO8]]
-    ; GFX8-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD3]], [[UADDO11]]
-    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO10]], [[C4]]
+    ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[UADDE10]]
+    ; GFX8-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDE8]]
+    ; GFX8-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD]], [[UADDO5]]
+    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO4]], [[C4]]
     ; GFX8-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](s64)
     ; GFX8-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[UV15]](s32)
-    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE]], [[ANYEXT1]]
-    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO10]], [[AMDGPU_MAD_U64_U32_8]]
+    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE12]], [[ANYEXT1]]
+    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO4]], [[AMDGPU_MAD_U64_U32_8]]
     ; GFX8-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](s64)
-    ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[UV14]]
-    ; GFX8-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[UV16]]
-    ; GFX8-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[UV14]]
-    ; GFX8-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]]
-    ; GFX8-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1)
-    ; GFX8-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH4]]
-    ; GFX8-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1)
-    ; GFX8-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]]
-    ; GFX8-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[UV16]]
-    ; GFX8-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[UV14]]
-    ; GFX8-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[UV16]]
-    ; GFX8-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]]
-    ; GFX8-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1)
-    ; GFX8-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]]
-    ; GFX8-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1)
-    ; GFX8-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]]
-    ; GFX8-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDO18]], [[ADD4]]
-    ; GFX8-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1)
-    ; GFX8-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ADD5]], [[ZEXT9]]
-    ; GFX8-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[UV16]]
-    ; GFX8-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD6]]
-    ; GFX8-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-    ; GFX8-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UADDO20]]
-    ; GFX8-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UADDE]], [[ADD7]], [[UADDO23]]
+    ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[UV14]]
+    ; GFX8-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO4]], [[UV16]]
+    ; GFX8-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO4]], [[UV14]]
+    ; GFX8-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]]
+    ; GFX8-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDO6]], [[UMULH4]], [[UADDO7]]
+    ; GFX8-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE15]]
+    ; GFX8-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[UV16]]
+    ; GFX8-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[UV14]]
+    ; GFX8-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO4]], [[UV16]]
+    ; GFX8-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]]
+    ; GFX8-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDO8]], [[UMULH6]], [[UADDO9]]
+    ; GFX8-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE19]]
+    ; GFX8-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDE18]], [[UADDE16]], [[UADDO9]]
+    ; GFX8-NEXT: [[UADDE24:%[0-9]+]]:_(s32), [[UADDE25:%[0-9]+]]:_(s1) = G_UADDE [[UADDE20]], [[C5]], [[UADDE23]]
+    ; GFX8-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[UV16]]
+    ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[UADDE24]]
+    ; GFX8-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UADDE22]]
+    ; GFX8-NEXT: [[UADDE26:%[0-9]+]]:_(s32), [[UADDE27:%[0-9]+]]:_(s1) = G_UADDE [[UADDE12]], [[ADD1]], [[UADDO11]]
     ; GFX8-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64)
     ; GFX8-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64)
-    ; GFX8-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDO22]]
-    ; GFX8-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV20]], [[UADDE2]]
-    ; GFX8-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDO22]]
-    ; GFX8-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]]
-    ; GFX8-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1)
-    ; GFX8-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH8]]
-    ; GFX8-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO27]](s1)
-    ; GFX8-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]]
-    ; GFX8-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDE2]]
-    ; GFX8-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDO22]]
-    ; GFX8-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDE2]]
-    ; GFX8-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]]
-    ; GFX8-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1)
-    ; GFX8-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]]
-    ; GFX8-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1)
-    ; GFX8-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]]
-    ; GFX8-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[UADDO30]], [[ADD8]]
-    ; GFX8-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1)
-    ; GFX8-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT14]]
-    ; GFX8-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDE2]]
-    ; GFX8-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[ADD10]]
-    ; GFX8-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO32]](s32), [[ADD11]](s32)
+    ; GFX8-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDO10]]
+    ; GFX8-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV20]], [[UADDE26]]
+    ; GFX8-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDO10]]
+    ; GFX8-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]]
+    ; GFX8-NEXT: [[UADDE28:%[0-9]+]]:_(s32), [[UADDE29:%[0-9]+]]:_(s1) = G_UADDE [[UADDO12]], [[UMULH8]], [[UADDO13]]
+    ; GFX8-NEXT: [[UADDE30:%[0-9]+]]:_(s32), [[UADDE31:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE29]]
+    ; GFX8-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDE26]]
+    ; GFX8-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDO10]]
+    ; GFX8-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDE26]]
+    ; GFX8-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]]
+    ; GFX8-NEXT: [[UADDE32:%[0-9]+]]:_(s32), [[UADDE33:%[0-9]+]]:_(s1) = G_UADDE [[UADDO14]], [[UMULH10]], [[UADDO15]]
+    ; GFX8-NEXT: [[UADDE34:%[0-9]+]]:_(s32), [[UADDE35:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE33]]
+    ; GFX8-NEXT: [[UADDE36:%[0-9]+]]:_(s32), [[UADDE37:%[0-9]+]]:_(s1) = G_UADDE [[UADDE32]], [[UADDE30]], [[UADDO15]]
+    ; GFX8-NEXT: [[UADDE38:%[0-9]+]]:_(s32), [[UADDE39:%[0-9]+]]:_(s1) = G_UADDE [[UADDE34]], [[C5]], [[UADDE37]]
+    ; GFX8-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDE26]]
+    ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[UADDE38]]
+    ; GFX8-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDE36]](s32), [[ADD2]](s32)
     ; GFX8-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64)
-    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV22]](s32), [[UADDO32]], [[C4]]
+    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV22]](s32), [[UADDE36]], [[C4]]
     ; GFX8-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_12]](s64)
     ; GFX8-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[UV25]](s32)
-    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV22]](s32), [[ADD11]], [[ANYEXT2]]
-    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV23]](s32), [[UADDO32]], [[AMDGPU_MAD_U64_U32_14]]
+    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV22]](s32), [[ADD2]], [[ANYEXT2]]
+    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV23]](s32), [[UADDE36]], [[AMDGPU_MAD_U64_U32_14]]
     ; GFX8-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_16]](s64)
     ; GFX8-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV18]], [[UV24]]
     ; GFX8-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV19]], [[UV26]], [[USUBO3]]
@@ -1393,9 +1288,9 @@ body: |
     ; GFX8-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C5]], [[USUBO5]]
     ; GFX8-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
     ; GFX8-NEXT: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64)
-    ; GFX8-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UV30]]
-    ; GFX8-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[ADD11]], [[UV31]], [[UADDO35]]
-    ; GFX8-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO34]](s32), [[UADDE4]](s32)
+    ; GFX8-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[UADDE36]], [[UV30]]
+    ; GFX8-NEXT: [[UADDE40:%[0-9]+]]:_(s32), [[UADDE41:%[0-9]+]]:_(s1) = G_UADDE [[ADD2]], [[UV31]], [[UADDO17]]
+    ; GFX8-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO16]](s32), [[UADDE40]](s32)
     ; GFX8-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV29]]
     ; GFX8-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1)
     ; GFX8-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV28]]
@@ -1403,9 +1298,9 @@ body: |
     ; GFX8-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV29]]
     ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]]
     ; GFX8-NEXT: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64)
-    ; GFX8-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[UV32]]
-    ; GFX8-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[UV33]], [[UADDO37]]
-    ; GFX8-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[UADDE6]](s32)
+    ; GFX8-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UV32]]
+    ; GFX8-NEXT: [[UADDE42:%[0-9]+]]:_(s32), [[UADDE43:%[0-9]+]]:_(s1) = G_UADDE [[UADDE40]], [[UV33]], [[UADDO19]]
+    ; GFX8-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO18]](s32), [[UADDE42]](s32)
     ; GFX8-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C5]]
     ; GFX8-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV2]], [[MV1]]
     ; GFX8-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C5]]
@@ -1436,85 +1331,70 @@ body: |
     ; GFX8-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[UV40]]
     ; GFX8-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI2]], [[UV42]]
     ; GFX8-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[UV40]]
-    ; GFX8-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]]
-    ; GFX8-NEXT: [[ZEXT15:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO39]](s1)
-    ; GFX8-NEXT: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UADDO38]], [[UMULH12]]
-    ; GFX8-NEXT: [[ZEXT16:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO41]](s1)
-    ; GFX8-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT15]], [[ZEXT16]]
+    ; GFX8-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]]
+    ; GFX8-NEXT: [[UADDE44:%[0-9]+]]:_(s32), [[UADDE45:%[0-9]+]]:_(s1) = G_UADDE [[UADDO20]], [[UMULH12]], [[UADDO21]]
+    ; GFX8-NEXT: [[UADDE46:%[0-9]+]]:_(s32), [[UADDE47:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE45]]
     ; GFX8-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[UV42]]
     ; GFX8-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[UV40]]
     ; GFX8-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[UV42]]
-    ; GFX8-NEXT: [[UADDO42:%[0-9]+]]:_(s32), [[UADDO43:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH13]]
-    ; GFX8-NEXT: [[ZEXT17:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO43]](s1)
-    ; GFX8-NEXT: [[UADDO44:%[0-9]+]]:_(s32), [[UADDO45:%[0-9]+]]:_(s1) = G_UADDO [[UADDO42]], [[UMULH14]]
-    ; GFX8-NEXT: [[ZEXT18:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO45]](s1)
-    ; GFX8-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT17]], [[ZEXT18]]
-    ; GFX8-NEXT: [[UADDO46:%[0-9]+]]:_(s32), [[UADDO47:%[0-9]+]]:_(s1) = G_UADDO [[UADDO44]], [[ADD12]]
-    ; GFX8-NEXT: [[ZEXT19:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO47]](s1)
-    ; GFX8-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT19]]
+    ; GFX8-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH13]]
+    ; GFX8-NEXT: [[UADDE48:%[0-9]+]]:_(s32), [[UADDE49:%[0-9]+]]:_(s1) = G_UADDE [[UADDO22]], [[UMULH14]], [[UADDO23]]
+    ; GFX8-NEXT: [[UADDE50:%[0-9]+]]:_(s32), [[UADDE51:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE49]]
+    ; GFX8-NEXT: [[UADDE52:%[0-9]+]]:_(s32), [[UADDE53:%[0-9]+]]:_(s1) = G_UADDE [[UADDE48]], [[UADDE46]], [[UADDO23]]
+    ; GFX8-NEXT: [[UADDE54:%[0-9]+]]:_(s32), [[UADDE55:%[0-9]+]]:_(s1) = G_UADDE [[UADDE50]], [[C5]], [[UADDE53]]
     ; GFX8-NEXT: [[UMULH15:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[UV42]]
-    ; GFX8-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH15]], [[ADD14]]
-    ; GFX8-NEXT: [[UADDO48:%[0-9]+]]:_(s32), [[UADDO49:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI2]], [[UADDO46]]
-    ; GFX8-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI3]], [[ADD15]], [[UADDO49]]
-    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_24:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_25:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO6]](s32), [[UADDO48]], [[C4]]
+    ; GFX8-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH15]], [[UADDE54]]
+    ; GFX8-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI2]], [[UADDE52]]
+    ; GFX8-NEXT: [[UADDE56:%[0-9]+]]:_(s32), [[UADDE57:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI3]], [[ADD3]], [[UADDO25]]
+    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_24:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_25:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO6]](s32), [[UADDO24]], [[C4]]
     ; GFX8-NEXT: [[UV44:%[0-9]+]]:_(s32), [[UV45:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_24]](s64)
     ; GFX8-NEXT: [[ANYEXT4:%[0-9]+]]:_(s64) = G_ANYEXT [[UV45]](s32)
-    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_26:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_27:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO6]](s32), [[UADDE8]], [[ANYEXT4]]
-    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_28:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_29:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE8]](s32), [[UADDO48]], [[AMDGPU_MAD_U64_U32_26]]
+    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_26:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_27:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO6]](s32), [[UADDE56]], [[ANYEXT4]]
+    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_28:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_29:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE8]](s32), [[UADDO24]], [[AMDGPU_MAD_U64_U32_26]]
     ; GFX8-NEXT: [[UV46:%[0-9]+]]:_(s32), [[UV47:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_28]](s64)
-    ; GFX8-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UADDE8]], [[UV44]]
-    ; GFX8-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UADDO48]], [[UV46]]
-    ; GFX8-NEXT: [[UMULH16:%[0-9]+]]:_(s32) = G_UMULH [[UADDO48]], [[UV44]]
-    ; GFX8-NEXT: [[UADDO50:%[0-9]+]]:_(s32), [[UADDO51:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]]
-    ; GFX8-NEXT: [[ZEXT20:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO51]](s1)
-    ; GFX8-NEXT: [[UADDO52:%[0-9]+]]:_(s32), [[UADDO53:%[0-9]+]]:_(s1) = G_UADDO [[UADDO50]], [[UMULH16]]
-    ; GFX8-NEXT: [[ZEXT21:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO53]](s1)
-    ; GFX8-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[ZEXT20]], [[ZEXT21]]
-    ; GFX8-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UADDE8]], [[UV46]]
-    ; GFX8-NEXT: [[UMULH17:%[0-9]+]]:_(s32) = G_UMULH [[UADDE8]], [[UV44]]
-    ; GFX8-NEXT: [[UMULH18:%[0-9]+]]:_(s32) = G_UMULH [[UADDO48]], [[UV46]]
-    ; GFX8-NEXT: [[UADDO54:%[0-9]+]]:_(s32), [[UADDO55:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH17]]
-    ; GFX8-NEXT: [[ZEXT22:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO55]](s1)
-    ; GFX8-NEXT: [[UADDO56:%[0-9]+]]:_(s32), [[UADDO57:%[0-9]+]]:_(s1) = G_UADDO [[UADDO54]], [[UMULH18]]
-    ; GFX8-NEXT: [[ZEXT23:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO57]](s1)
-    ; GFX8-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ZEXT22]], [[ZEXT23]]
-    ; GFX8-NEXT: [[UADDO58:%[0-9]+]]:_(s32), [[UADDO59:%[0-9]+]]:_(s1) = G_UADDO [[UADDO56]], [[ADD16]]
-    ; GFX8-NEXT: [[ZEXT24:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO59]](s1)
-    ; GFX8-NEXT: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[ADD17]], [[ZEXT24]]
-    ; GFX8-NEXT: [[UMULH19:%[0-9]+]]:_(s32) = G_UMULH [[UADDE8]], [[UV46]]
-    ; GFX8-NEXT: [[ADD19:%[0-9]+]]:_(s32) = G_ADD [[UMULH19]], [[ADD18]]
-    ; GFX8-NEXT: [[UADDO60:%[0-9]+]]:_(s32), [[UADDO61:%[0-9]+]]:_(s1) = G_UADDO [[UADDO48]], [[UADDO58]]
-    ; GFX8-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE8]], [[ADD19]], [[UADDO61]]
+    ; GFX8-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UADDE56]], [[UV44]]
+    ; GFX8-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UADDO24]], [[UV46]]
+    ; GFX8-NEXT: [[UMULH16:%[0-9]+]]:_(s32) = G_UMULH [[UADDO24]], [[UV44]]
+    ; GFX8-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]]
+    ; GFX8-NEXT: [[UADDE58:%[0-9]+]]:_(s32), [[UADDE59:%[0-9]+]]:_(s1) = G_UADDE [[UADDO26]], [[UMULH16]], [[UADDO27]]
+    ; GFX8-NEXT: [[UADDE60:%[0-9]+]]:_(s32), [[UADDE61:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE59]]
+    ; GFX8-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UADDE56]], [[UV46]]
+    ; GFX8-NEXT: [[UMULH17:%[0-9]+]]:_(s32) = G_UMULH [[UADDE56]], [[UV44]]
+    ; GFX8-NEXT: [[UMULH18:%[0-9]+]]:_(s32) = G_UMULH [[UADDO24]], [[UV46]]
+    ; GFX8-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH17]]
+    ; GFX8-NEXT: [[UADDE62:%[0-9]+]]:_(s32), [[UADDE63:%[0-9]+]]:_(s1) = G_UADDE [[UADDO28]], [[UMULH18]], [[UADDO29]]
+    ; GFX8-NEXT: [[UADDE64:%[0-9]+]]:_(s32), [[UADDE65:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE63]]
+    ; GFX8-NEXT: [[UADDE66:%[0-9]+]]:_(s32), [[UADDE67:%[0-9]+]]:_(s1) = G_UADDE [[UADDE62]], [[UADDE60]], [[UADDO29]]
+    ; GFX8-NEXT: [[UADDE68:%[0-9]+]]:_(s32), [[UADDE69:%[0-9]+]]:_(s1) = G_UADDE [[UADDE64]], [[C5]], [[UADDE67]]
+    ; GFX8-NEXT: [[UMULH19:%[0-9]+]]:_(s32) = G_UMULH [[UADDE56]], [[UV46]]
+    ; GFX8-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[UMULH19]], [[UADDE68]]
+    ; GFX8-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UADDE66]]
+    ; GFX8-NEXT: [[UADDE70:%[0-9]+]]:_(s32), [[UADDE71:%[0-9]+]]:_(s1) = G_UADDE [[UADDE56]], [[ADD4]], [[UADDO31]]
     ; GFX8-NEXT: [[UV48:%[0-9]+]]:_(s32), [[UV49:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64)
     ; GFX8-NEXT: [[UV50:%[0-9]+]]:_(s32), [[UV51:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64)
-    ; GFX8-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV51]], [[UADDO60]]
-    ; GFX8-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV50]], [[UADDE10]]
-    ; GFX8-NEXT: [[UMULH20:%[0-9]+]]:_(s32) = G_UMULH [[UV50]], [[UADDO60]]
-    ; GFX8-NEXT: [[UADDO62:%[0-9]+]]:_(s32), [[UADDO63:%[0-9]+]]:_(s1) = G_UADDO [[MUL15]], [[MUL16]]
-    ; GFX8-NEXT: [[ZEXT25:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO63]](s1)
-    ; GFX8-NEXT: [[UADDO64:%[0-9]+]]:_(s32), [[UADDO65:%[0-9]+]]:_(s1) = G_UADDO [[UADDO62]], [[UMULH20]]
-    ; GFX8-NEXT: [[ZEXT26:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO65]](s1)
-    ; GFX8-NEXT: [[ADD20:%[0-9]+]]:_(s32) = G_ADD [[ZEXT25]], [[ZEXT26]]
-    ; GFX8-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV51]], [[UADDE10]]
-    ; GFX8-NEXT: [[UMULH21:%[0-9]+]]:_(s32) = G_UMULH [[UV51]], [[UADDO60]]
-    ; GFX8-NEXT: [[UMULH22:%[0-9]+]]:_(s32) = G_UMULH [[UV50]], [[UADDE10]]
-    ; GFX8-NEXT: [[UADDO66:%[0-9]+]]:_(s32), [[UADDO67:%[0-9]+]]:_(s1) = G_UADDO [[MUL17]], [[UMULH21]]
-    ; GFX8-NEXT: [[ZEXT27:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO67]](s1)
-    ; GFX8-NEXT: [[UADDO68:%[0-9]+]]:_(s32), [[UADDO69:%[0-9]+]]:_(s1) = G_UADDO [[UADDO66]], [[UMULH22]]
-    ; GFX8-NEXT: [[ZEXT28:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO69]](s1)
-    ; GFX8-NEXT: [[ADD21:%[0-9]+]]:_(s32) = G_ADD [[ZEXT27]], [[ZEXT28]]
-    ; GFX8-NEXT: [[UADDO70:%[0-9]+]]:_(s32), [[UADDO71:%[0-9]+]]:_(s1) = G_UADDO [[UADDO68]], [[ADD20]]
-    ; GFX8-NEXT: [[ZEXT29:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO71]](s1)
-    ; GFX8-NEXT: [[ADD22:%[0-9]+]]:_(s32) = G_ADD [[ADD21]], [[ZEXT29]]
-    ; GFX8-NEXT: [[UMULH23:%[0-9]+]]:_(s32) = G_UMULH [[UV51]], [[UADDE10]]
-    ; GFX8-NEXT: [[ADD23:%[0-9]+]]:_(s32) = G_ADD [[UMULH23]], [[ADD22]]
-    ; GFX8-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO70]](s32), [[ADD23]](s32)
+    ; GFX8-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV51]], [[UADDO30]]
+    ; GFX8-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV50]], [[UADDE70]]
+    ; GFX8-NEXT: [[UMULH20:%[0-9]+]]:_(s32) = G_UMULH [[UV50]], [[UADDO30]]
+    ; GFX8-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL15]], [[MUL16]]
+    ; GFX8-NEXT: [[UADDE72:%[0-9]+]]:_(s32), [[UADDE73:%[0-9]+]]:_(s1) = G_UADDE [[UADDO32]], [[UMULH20]], [[UADDO33]]
+    ; GFX8-NEXT: [[UADDE74:%[0-9]+]]:_(s32), [[UADDE75:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE73]]
+    ; GFX8-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV51]], [[UADDE70]]
+    ; GFX8-NEXT: [[UMULH21:%[0-9]+]]:_(s32) = G_UMULH [[UV51]], [[UADDO30]]
+    ; GFX8-NEXT: [[UMULH22:%[0-9]+]]:_(s32) = G_UMULH [[UV50]], [[UADDE70]]
+    ; GFX8-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[MUL17]], [[UMULH21]]
+    ; GFX8-NEXT: [[UADDE76:%[0-9]+]]:_(s32), [[UADDE77:%[0-9]+]]:_(s1) = G_UADDE [[UADDO34]], [[UMULH22]], [[UADDO35]]
+    ; GFX8-NEXT: [[UADDE78:%[0-9]+]]:_(s32), [[UADDE79:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE77]]
+    ; GFX8-NEXT: [[UADDE80:%[0-9]+]]:_(s32), [[UADDE81:%[0-9]+]]:_(s1) = G_UADDE [[UADDE76]], [[UADDE74]], [[UADDO35]]
+    ; GFX8-NEXT: [[UADDE82:%[0-9]+]]:_(s32), [[UADDE83:%[0-9]+]]:_(s1) = G_UADDE [[UADDE78]], [[C5]], [[UADDE81]]
+    ; GFX8-NEXT: [[UMULH23:%[0-9]+]]:_(s32) = G_UMULH [[UV51]], [[UADDE70]]
+    ; GFX8-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH23]], [[UADDE82]]
+    ; GFX8-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDE80]](s32), [[ADD5]](s32)
     ; GFX8-NEXT: [[UV52:%[0-9]+]]:_(s32), [[UV53:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64)
-    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_30:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_31:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV52]](s32), [[UADDO70]], [[C4]]
+    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_30:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_31:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV52]](s32), [[UADDE80]], [[C4]]
     ; GFX8-NEXT: [[UV54:%[0-9]+]]:_(s32), [[UV55:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_30]](s64)
     ; GFX8-NEXT: [[ANYEXT5:%[0-9]+]]:_(s64) = G_ANYEXT [[UV55]](s32)
-    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_32:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_33:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV52]](s32), [[ADD23]], [[ANYEXT5]]
-    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_34:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_35:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV53]](s32), [[UADDO70]], [[AMDGPU_MAD_U64_U32_32]]
+    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_32:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_33:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV52]](s32), [[ADD5]], [[ANYEXT5]]
+    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_34:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_35:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV53]](s32), [[UADDE80]], [[AMDGPU_MAD_U64_U32_32]]
     ; GFX8-NEXT: [[UV56:%[0-9]+]]:_(s32), [[UV57:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_34]](s64)
     ; GFX8-NEXT: [[USUBO8:%[0-9]+]]:_(s32), [[USUBO9:%[0-9]+]]:_(s1) = G_USUBO [[UV48]], [[UV54]]
     ; GFX8-NEXT: [[USUBE10:%[0-9]+]]:_(s32), [[USUBE11:%[0-9]+]]:_(s1) = G_USUBE [[UV49]], [[UV56]], [[USUBO9]]
@@ -1530,9 +1410,9 @@ body: |
     ; GFX8-NEXT: [[USUBE12:%[0-9]+]]:_(s32), [[USUBE13:%[0-9]+]]:_(s1) = G_USUBE [[SUB1]], [[UV59]], [[USUBO9]]
     ; GFX8-NEXT: [[USUBE14:%[0-9]+]]:_(s32), [[USUBE15:%[0-9]+]]:_(s1) = G_USUBE [[USUBE12]], [[C5]], [[USUBO11]]
     ; GFX8-NEXT: [[UV60:%[0-9]+]]:_(s32), [[UV61:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64)
-    ; GFX8-NEXT: [[UADDO72:%[0-9]+]]:_(s32), [[UADDO73:%[0-9]+]]:_(s1) = G_UADDO [[UADDO70]], [[UV60]]
-    ; GFX8-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[ADD23]], [[UV61]], [[UADDO73]]
-    ; GFX8-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO72]](s32), [[UADDE12]](s32)
+    ; GFX8-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDE80]], [[UV60]]
+    ; GFX8-NEXT: [[UADDE84:%[0-9]+]]:_(s32), [[UADDE85:%[0-9]+]]:_(s1) = G_UADDE [[ADD5]], [[UV61]], [[UADDO37]]
+    ; GFX8-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[UADDE84]](s32)
     ; GFX8-NEXT: [[ICMP11:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE14]](s32), [[UV59]]
     ; GFX8-NEXT: [[SEXT6:%[0-9]+]]:_(s32) = G_SEXT [[ICMP11]](s1)
     ; GFX8-NEXT: [[ICMP12:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO10]](s32), [[UV58]]
@@ -1540,9 +1420,9 @@ body: |
     ; GFX8-NEXT: [[ICMP13:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE14]](s32), [[UV59]]
     ; GFX8-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP13]](s1), [[SEXT7]], [[SEXT6]]
     ; GFX8-NEXT: [[UV62:%[0-9]+]]:_(s32), [[UV63:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64)
-    ; GFX8-NEXT: [[UADDO74:%[0-9]+]]:_(s32), [[UADDO75:%[0-9]+]]:_(s1) = G_UADDO [[UADDO72]], [[UV62]]
-    ; GFX8-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDE12]], [[UV63]], [[UADDO75]]
-    ; GFX8-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO74]](s32), [[UADDE14]](s32)
+    ; GFX8-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[UADDO36]], [[UV62]]
+    ; GFX8-NEXT: [[UADDE86:%[0-9]+]]:_(s32), [[UADDE87:%[0-9]+]]:_(s1) = G_UADDE [[UADDE84]], [[UV63]], [[UADDO39]]
+    ; GFX8-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO38]](s32), [[UADDE86]](s32)
     ; GFX8-NEXT: [[ICMP14:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT5]](s32), [[C5]]
     ; GFX8-NEXT: [[SELECT6:%[0-9]+]]:_(s64) = G_SELECT [[ICMP14]](s1), [[MV5]], [[MV4]]
     ; GFX8-NEXT: [[ICMP15:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT4]](s32), [[C5]]
@@ -1585,89 +1465,74 @@ body: |
     ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI1]], [[ANYEXT]]
     ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[FPTOUI]], [[AMDGPU_MAD_U64_U32_2]]
     ; GFX9-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64)
+    ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
     ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV10]]
     ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[UV12]]
     ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV10]]
     ; GFX9-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]]
-    ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1)
-    ; GFX9-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH]]
-    ; GFX9-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1)
-    ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]]
+    ; GFX9-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UADDO]], [[UMULH]], [[UADDO1]]
+    ; GFX9-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE1]]
     ; GFX9-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV12]]
     ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV10]]
     ; GFX9-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV12]]
-    ; GFX9-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]]
-    ; GFX9-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1)
-    ; GFX9-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH2]]
-    ; GFX9-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1)
-    ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]]
-    ; GFX9-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD]]
-    ; GFX9-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1)
-    ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]]
+    ; GFX9-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]]
+    ; GFX9-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDO2]], [[UMULH2]], [[UADDO3]]
+    ; GFX9-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE5]]
+    ; GFX9-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[UADDE2]], [[UADDO3]]
+    ; GFX9-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE6]], [[C5]], [[UADDE9]]
     ; GFX9-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV12]]
-    ; GFX9-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]]
-    ; GFX9-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO8]]
-    ; GFX9-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD3]], [[UADDO11]]
-    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO10]], [[C4]]
+    ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[UADDE10]]
+    ; GFX9-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDE8]]
+    ; GFX9-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD]], [[UADDO5]]
+    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO4]], [[C4]]
     ; GFX9-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](s64)
     ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[UV15]](s32)
-    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE]], [[ANYEXT1]]
-    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO10]], [[AMDGPU_MAD_U64_U32_8]]
+    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE12]], [[ANYEXT1]]
+    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO4]], [[AMDGPU_MAD_U64_U32_8]]
     ; GFX9-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](s64)
-    ; GFX9-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[UV14]]
-    ; GFX9-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[UV16]]
-    ; GFX9-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[UV14]]
-    ; GFX9-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]]
-    ; GFX9-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1)
-    ; GFX9-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH4]]
-    ; GFX9-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1)
-    ; GFX9-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]]
-    ; GFX9-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[UV16]]
-    ; GFX9-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[UV14]]
-    ; GFX9-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[UV16]]
-    ; GFX9-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]]
-    ; GFX9-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1)
-    ; GFX9-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]]
-    ; GFX9-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1)
-    ; GFX9-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]]
-    ; GFX9-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDO18]], [[ADD4]]
-    ; GFX9-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1)
-    ; GFX9-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ADD5]], [[ZEXT9]]
-    ; GFX9-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[UV16]]
-    ; GFX9-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD6]]
-    ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-    ; GFX9-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UADDO20]]
-    ; GFX9-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UADDE]], [[ADD7]], [[UADDO23]]
+    ; GFX9-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[UV14]]
+    ; GFX9-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO4]], [[UV16]]
+    ; GFX9-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO4]], [[UV14]]
+    ; GFX9-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]]
+    ; GFX9-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDO6]], [[UMULH4]], [[UADDO7]]
+    ; GFX9-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE15]]
+    ; GFX9-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[UV16]]
+    ; GFX9-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[UV14]]
+    ; GFX9-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO4]], [[UV16]]
+    ; GFX9-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]]
+    ; GFX9-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDO8]], [[UMULH6]], [[UADDO9]]
+    ; GFX9-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE19]]
+    ; GFX9-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDE18]], [[UADDE16]], [[UADDO9]]
+    ; GFX9-NEXT: [[UADDE24:%[0-9]+]]:_(s32), [[UADDE25:%[0-9]+]]:_(s1) = G_UADDE [[UADDE20]], [[C5]], [[UADDE23]]
+    ; GFX9-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[UV16]]
+    ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[UADDE24]]
+    ; GFX9-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UADDE22]]
+    ; GFX9-NEXT: [[UADDE26:%[0-9]+]]:_(s32), [[UADDE27:%[0-9]+]]:_(s1) = G_UADDE [[UADDE12]], [[ADD1]], [[UADDO11]]
     ; GFX9-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64)
     ; GFX9-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64)
-    ; GFX9-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDO22]]
-    ; GFX9-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV20]], [[UADDE2]]
-    ; GFX9-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDO22]]
-    ; GFX9-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]]
-    ; GFX9-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1)
-    ; GFX9-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH8]]
-    ; GFX9-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO27]](s1)
-    ; GFX9-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]]
-    ; GFX9-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDE2]]
-    ; GFX9-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDO22]]
-    ; GFX9-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDE2]]
-    ; GFX9-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]]
-    ; GFX9-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1)
-    ; GFX9-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]]
-    ; GFX9-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1)
-    ; GFX9-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]]
-    ; GFX9-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[UADDO30]], [[ADD8]]
-    ; GFX9-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1)
-    ; GFX9-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT14]]
-    ; GFX9-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDE2]]
-    ; GFX9-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[ADD10]]
-    ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO32]](s32), [[ADD11]](s32)
+    ; GFX9-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDO10]]
+    ; GFX9-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV20]], [[UADDE26]]
+    ; GFX9-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDO10]]
+    ; GFX9-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]]
+    ; GFX9-NEXT: [[UADDE28:%[0-9]+]]:_(s32), [[UADDE29:%[0-9]+]]:_(s1) = G_UADDE [[UADDO12]], [[UMULH8]], [[UADDO13]]
+    ; GFX9-NEXT: [[UADDE30:%[0-9]+]]:_(s32), [[UADDE31:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE29]]
+    ; GFX9-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDE26]]
+    ; GFX9-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDO10]]
+    ; GFX9-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDE26]]
+    ; GFX9-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]]
+    ; GFX9-NEXT: [[UADDE32:%[0-9]+]]:_(s32), [[UADDE33:%[0-9]+]]:_(s1) = G_UADDE [[UADDO14]], [[UMULH10]], [[UADDO15]]
+    ; GFX9-NEXT: [[UADDE34:%[0-9]+]]:_(s32), [[UADDE35:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE33]]
+    ; GFX9-NEXT: [[UADDE36:%[0-9]+]]:_(s32), [[UADDE37:%[0-9]+]]:_(s1) = G_UADDE [[UADDE32]], [[UADDE30]], [[UADDO15]]
+    ; GFX9-NEXT: [[UADDE38:%[0-9]+]]:_(s32), [[UADDE39:%[0-9]+]]:_(s1) = G_UADDE [[UADDE34]], [[C5]], [[UADDE37]]
+    ; GFX9-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDE26]]
+    ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[UADDE38]]
+    ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDE36]](s32), [[ADD2]](s32)
     ; GFX9-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64)
-    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV22]](s32), [[UADDO32]], [[C4]]
+    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV22]](s32), [[UADDE36]], [[C4]]
     ; GFX9-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_12]](s64)
     ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[UV25]](s32)
-    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV22]](s32), [[ADD11]], [[ANYEXT2]]
-    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV23]](s32), [[UADDO32]], [[AMDGPU_MAD_U64_U32_14]]
+    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV22]](s32), [[ADD2]], [[ANYEXT2]]
+    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV23]](s32), [[UADDE36]], [[AMDGPU_MAD_U64_U32_14]]
     ; GFX9-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_16]](s64)
     ; GFX9-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV18]], [[UV24]]
     ; GFX9-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV19]], [[UV26]], [[USUBO3]]
@@ -1684,9 +1549,9 @@ body: |
     ; GFX9-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C5]], [[USUBO5]]
     ; GFX9-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
     ; GFX9-NEXT: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64)
-    ; GFX9-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UV30]]
-    ; GFX9-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[ADD11]], [[UV31]], [[UADDO35]]
-    ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO34]](s32), [[UADDE4]](s32)
+    ; GFX9-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[UADDE36]], [[UV30]]
+    ; GFX9-NEXT: [[UADDE40:%[0-9]+]]:_(s32), [[UADDE41:%[0-9]+]]:_(s1) = G_UADDE [[ADD2]], [[UV31]], [[UADDO17]]
+    ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO16]](s32), [[UADDE40]](s32)
     ; GFX9-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV29]]
     ; GFX9-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1)
     ; GFX9-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV28]]
@@ -1694,9 +1559,9 @@ body: |
     ; GFX9-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV29]]
     ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]]
     ; GFX9-NEXT: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64)
-    ; GFX9-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[UV32]]
-    ; GFX9-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[UV33]], [[UADDO37]]
-    ; GFX9-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[UADDE6]](s32)
+    ; GFX9-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UV32]]
+    ; GFX9-NEXT: [[UADDE42:%[0-9]+]]:_(s32), [[UADDE43:%[0-9]+]]:_(s1) = G_UADDE [[UADDE40]], [[UV33]], [[UADDO19]]
+    ; GFX9-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO18]](s32), [[UADDE42]](s32)
     ; GFX9-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C5]]
     ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV2]], [[MV1]]
     ; GFX9-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C5]]
@@ -1727,85 +1592,70 @@ body: |
     ; GFX9-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[UV40]]
     ; GFX9-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI2]], [[UV42]]
     ; GFX9-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[UV40]]
-    ; GFX9-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]]
-    ; GFX9-NEXT: [[ZEXT15:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO39]](s1)
-    ; GFX9-NEXT: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UADDO38]], [[UMULH12]]
-    ; GFX9-NEXT: [[ZEXT16:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO41]](s1)
-    ; GFX9-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT15]], [[ZEXT16]]
+    ; GFX9-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]]
+    ; GFX9-NEXT: [[UADDE44:%[0-9]+]]:_(s32), [[UADDE45:%[0-9]+]]:_(s1) = G_UADDE [[UADDO20]], [[UMULH12]], [[UADDO21]]
+    ; GFX9-NEXT: [[UADDE46:%[0-9]+]]:_(s32), [[UADDE47:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE45]]
     ; GFX9-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[UV42]]
     ; GFX9-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[UV40]]
     ; GFX9-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[UV42]]
-    ; GFX9-NEXT: [[UADDO42:%[0-9]+]]:_(s32), [[UADDO43:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH13]]
-    ; GFX9-NEXT: [[ZEXT17:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO43]](s1)
-    ; GFX9-NEXT: [[UADDO44:%[0-9]+]]:_(s32), [[UADDO45:%[0-9]+]]:_(s1) = G_UADDO [[UADDO42]], [[UMULH14]]
-    ; GFX9-NEXT: [[ZEXT18:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO45]](s1)
-    ; GFX9-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT17]], [[ZEXT18]]
-    ; GFX9-NEXT: [[UADDO46:%[0-9]+]]:_(s32), [[UADDO47:%[0-9]+]]:_(s1) = G_UADDO [[UADDO44]], [[ADD12]]
-    ; GFX9-NEXT: [[ZEXT19:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO47]](s1)
-    ; GFX9-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT19]]
+    ; GFX9-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH13]]
+    ; GFX9-NEXT: [[UADDE48:%[0-9]+]]:_(s32), [[UADDE49:%[0-9]+]]:_(s1) = G_UADDE [[UADDO22]], [[UMULH14]], [[UADDO23]]
+    ; GFX9-NEXT: [[UADDE50:%[0-9]+]]:_(s32), [[UADDE51:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE49]]
+    ; GFX9-NEXT: [[UADDE52:%[0-9]+]]:_(s32), [[UADDE53:%[0-9]+]]:_(s1) = G_UADDE [[UADDE48]], [[UADDE46]], [[UADDO23]]
+    ; GFX9-NEXT: [[UADDE54:%[0-9]+]]:_(s32), [[UADDE55:%[0-9]+]]:_(s1) = G_UADDE [[UADDE50]], [[C5]], [[UADDE53]]
     ; GFX9-NEXT: [[UMULH15:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[UV42]]
-    ; GFX9-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH15]], [[ADD14]]
-    ; GFX9-NEXT: [[UADDO48:%[0-9]+]]:_(s32), [[UADDO49:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI2]], [[UADDO46]]
-    ; GFX9-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI3]], [[ADD15]], [[UADDO49]]
-    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_24:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_25:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO6]](s32), [[UADDO48]], [[C4]]
+    ; GFX9-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH15]], [[UADDE54]]
+    ; GFX9-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI2]], [[UADDE52]]
+    ; GFX9-NEXT: [[UADDE56:%[0-9]+]]:_(s32), [[UADDE57:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI3]], [[ADD3]], [[UADDO25]]
+    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_24:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_25:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO6]](s32), [[UADDO24]], [[C4]]
     ; GFX9-NEXT: [[UV44:%[0-9]+]]:_(s32), [[UV45:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_24]](s64)
     ; GFX9-NEXT: [[ANYEXT4:%[0-9]+]]:_(s64) = G_ANYEXT [[UV45]](s32)
-    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_26:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_27:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO6]](s32), [[UADDE8]], [[ANYEXT4]]
-    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_28:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_29:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE8]](s32), [[UADDO48]], [[AMDGPU_MAD_U64_U32_26]]
+    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_26:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_27:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO6]](s32), [[UADDE56]], [[ANYEXT4]]
+    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_28:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_29:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE8]](s32), [[UADDO24]], [[AMDGPU_MAD_U64_U32_26]]
     ; GFX9-NEXT: [[UV46:%[0-9]+]]:_(s32), [[UV47:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_28]](s64)
-    ; GFX9-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UADDE8]], [[UV44]]
-    ; GFX9-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UADDO48]], [[UV46]]
-    ; GFX9-NEXT: [[UMULH16:%[0-9]+]]:_(s32) = G_UMULH [[UADDO48]], [[UV44]]
-    ; GFX9-NEXT: [[UADDO50:%[0-9]+]]:_(s32), [[UADDO51:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]]
-    ; GFX9-NEXT: [[ZEXT20:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO51]](s1)
-    ; GFX9-NEXT: [[UADDO52:%[0-9]+]]:_(s32), [[UADDO53:%[0-9]+]]:_(s1) = G_UADDO [[UADDO50]], [[UMULH16]]
-    ; GFX9-NEXT: [[ZEXT21:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO53]](s1)
-    ; GFX9-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[ZEXT20]], [[ZEXT21]]
-    ; GFX9-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UADDE8]], [[UV46]]
-    ; GFX9-NEXT: [[UMULH17:%[0-9]+]]:_(s32) = G_UMULH [[UADDE8]], [[UV44]]
-    ; GFX9-NEXT: [[UMULH18:%[0-9]+]]:_(s32) = G_UMULH [[UADDO48]], [[UV46]]
-    ; GFX9-NEXT: [[UADDO54:%[0-9]+]]:_(s32), [[UADDO55:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH17]]
-    ; GFX9-NEXT: [[ZEXT22:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO55]](s1)
-    ; GFX9-NEXT: [[UADDO56:%[0-9]+]]:_(s32), [[UADDO57:%[0-9]+]]:_(s1) = G_UADDO [[UADDO54]], [[UMULH18]]
-    ; GFX9-NEXT: [[ZEXT23:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO57]](s1)
-    ; GFX9-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ZEXT22]], [[ZEXT23]]
-    ; GFX9-NEXT: [[UADDO58:%[0-9]+]]:_(s32), [[UADDO59:%[0-9]+]]:_(s1) = G_UADDO [[UADDO56]], [[ADD16]]
-    ; GFX9-NEXT: [[ZEXT24:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO59]](s1)
-    ; GFX9-NEXT: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[ADD17]], [[ZEXT24]]
-    ; GFX9-NEXT: [[UMULH19:%[0-9]+]]:_(s32) = G_UMULH [[UADDE8]], [[UV46]]
-    ; GFX9-NEXT: [[ADD19:%[0-9]+]]:_(s32) = G_ADD [[UMULH19]], [[ADD18]]
-    ; GFX9-NEXT: [[UADDO60:%[0-9]+]]:_(s32), [[UADDO61:%[0-9]+]]:_(s1) = G_UADDO [[UADDO48]], [[UADDO58]]
-    ; GFX9-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE8]], [[ADD19]], [[UADDO61]]
+    ; GFX9-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UADDE56]], [[UV44]]
+    ; GFX9-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UADDO24]], [[UV46]]
+    ; GFX9-NEXT: [[UMULH16:%[0-9]+]]:_(s32) = G_UMULH [[UADDO24]], [[UV44]]
+    ; GFX9-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]]
+    ; GFX9-NEXT: [[UADDE58:%[0-9]+]]:_(s32), [[UADDE59:%[0-9]+]]:_(s1) = G_UADDE [[UADDO26]], [[UMULH16]], [[UADDO27]]
+    ; GFX9-NEXT: [[UADDE60:%[0-9]+]]:_(s32), [[UADDE61:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE59]]
+    ; GFX9-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UADDE56]], [[UV46]]
+    ; GFX9-NEXT: [[UMULH17:%[0-9]+]]:_(s32) = G_UMULH [[UADDE56]], [[UV44]]
+    ; GFX9-NEXT: [[UMULH18:%[0-9]+]]:_(s32) = G_UMULH [[UADDO24]], [[UV46]]
+    ; GFX9-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH17]]
+    ; GFX9-NEXT: [[UADDE62:%[0-9]+]]:_(s32), [[UADDE63:%[0-9]+]]:_(s1) = G_UADDE [[UADDO28]], [[UMULH18]], [[UADDO29]]
+    ; GFX9-NEXT: [[UADDE64:%[0-9]+]]:_(s32), [[UADDE65:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE63]]
+    ; GFX9-NEXT: [[UADDE66:%[0-9]+]]:_(s32), [[UADDE67:%[0-9]+]]:_(s1) = G_UADDE [[UADDE62]], [[UADDE60]], [[UADDO29]]
+    ; GFX9-NEXT: [[UADDE68:%[0-9]+]]:_(s32), [[UADDE69:%[0-9]+]]:_(s1) = G_UADDE [[UADDE64]], [[C5]], [[UADDE67]]
+    ; GFX9-NEXT: [[UMULH19:%[0-9]+]]:_(s32) = G_UMULH [[UADDE56]], [[UV46]]
+    ; GFX9-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[UMULH19]], [[UADDE68]]
+    ; GFX9-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UADDE66]]
+    ; GFX9-NEXT: [[UADDE70:%[0-9]+]]:_(s32), [[UADDE71:%[0-9]+]]:_(s1) = G_UADDE [[UADDE56]], [[ADD4]], [[UADDO31]]
     ; GFX9-NEXT: [[UV48:%[0-9]+]]:_(s32), [[UV49:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64)
     ; GFX9-NEXT: [[UV50:%[0-9]+]]:_(s32), [[UV51:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64)
-    ; GFX9-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV51]], [[UADDO60]]
-    ; GFX9-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV50]], [[UADDE10]]
-    ; GFX9-NEXT: [[UMULH20:%[0-9]+]]:_(s32) = G_UMULH [[UV50]], [[UADDO60]]
-    ; GFX9-NEXT: [[UADDO62:%[0-9]+]]:_(s32), [[UADDO63:%[0-9]+]]:_(s1) = G_UADDO [[MUL15]], [[MUL16]]
-    ; GFX9-NEXT: [[ZEXT25:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO63]](s1)
-    ; GFX9-NEXT: [[UADDO64:%[0-9]+]]:_(s32), [[UADDO65:%[0-9]+]]:_(s1) = G_UADDO [[UADDO62]], [[UMULH20]]
-    ; GFX9-NEXT: [[ZEXT26:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO65]](s1)
-    ; GFX9-NEXT: [[ADD20:%[0-9]+]]:_(s32) = G_ADD [[ZEXT25]], [[ZEXT26]]
-    ; GFX9-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV51]], [[UADDE10]]
-    ; GFX9-NEXT: [[UMULH21:%[0-9]+]]:_(s32) = G_UMULH [[UV51]], [[UADDO60]]
-    ; GFX9-NEXT: [[UMULH22:%[0-9]+]]:_(s32) = G_UMULH [[UV50]], [[UADDE10]]
-    ; GFX9-NEXT: [[UADDO66:%[0-9]+]]:_(s32), [[UADDO67:%[0-9]+]]:_(s1) = G_UADDO [[MUL17]], [[UMULH21]]
-    ; GFX9-NEXT: [[ZEXT27:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO67]](s1)
-    ; GFX9-NEXT: [[UADDO68:%[0-9]+]]:_(s32), [[UADDO69:%[0-9]+]]:_(s1) = G_UADDO [[UADDO66]], [[UMULH22]]
-    ; GFX9-NEXT: [[ZEXT28:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO69]](s1)
-    ; GFX9-NEXT: [[ADD21:%[0-9]+]]:_(s32) = G_ADD [[ZEXT27]], [[ZEXT28]]
-    ; GFX9-NEXT: [[UADDO70:%[0-9]+]]:_(s32), [[UADDO71:%[0-9]+]]:_(s1) = G_UADDO [[UADDO68]], [[ADD20]]
-    ; GFX9-NEXT: [[ZEXT29:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO71]](s1)
-    ; GFX9-NEXT: [[ADD22:%[0-9]+]]:_(s32) = G_ADD [[ADD21]], [[ZEXT29]]
-    ; GFX9-NEXT: [[UMULH23:%[0-9]+]]:_(s32) = G_UMULH [[UV51]], [[UADDE10]]
-    ; GFX9-NEXT: [[ADD23:%[0-9]+]]:_(s32) = G_ADD [[UMULH23]], [[ADD22]]
-    ; GFX9-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO70]](s32), [[ADD23]](s32)
+    ; GFX9-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV51]], [[UADDO30]]
+    ; GFX9-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV50]], [[UADDE70]]
+    ; GFX9-NEXT: [[UMULH20:%[0-9]+]]:_(s32) = G_UMULH [[UV50]], [[UADDO30]]
+    ; GFX9-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL15]], [[MUL16]]
+    ; GFX9-NEXT: [[UADDE72:%[0-9]+]]:_(s32), [[UADDE73:%[0-9]+]]:_(s1) = G_UADDE [[UADDO32]], [[UMULH20]], [[UADDO33]]
+    ; GFX9-NEXT: [[UADDE74:%[0-9]+]]:_(s32), [[UADDE75:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE73]]
+    ; GFX9-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV51]], [[UADDE70]]
+    ; GFX9-NEXT: [[UMULH21:%[0-9]+]]:_(s32) = G_UMULH [[UV51]], [[UADDO30]]
+    ; GFX9-NEXT: [[UMULH22:%[0-9]+]]:_(s32) = G_UMULH [[UV50]], [[UADDE70]]
+    ; GFX9-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[MUL17]], [[UMULH21]]
+    ; GFX9-NEXT: [[UADDE76:%[0-9]+]]:_(s32), [[UADDE77:%[0-9]+]]:_(s1) = G_UADDE [[UADDO34]], [[UMULH22]], [[UADDO35]]
+    ; GFX9-NEXT: [[UADDE78:%[0-9]+]]:_(s32), [[UADDE79:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE77]]
+    ; GFX9-NEXT: [[UADDE80:%[0-9]+]]:_(s32), [[UADDE81:%[0-9]+]]:_(s1) = G_UADDE [[UADDE76]], [[UADDE74]], [[UADDO35]]
+    ; GFX9-NEXT: [[UADDE82:%[0-9]+]]:_(s32), [[UADDE83:%[0-9]+]]:_(s1) = G_UADDE [[UADDE78]], [[C5]], [[UADDE81]]
+    ; GFX9-NEXT: [[UMULH23:%[0-9]+]]:_(s32) = G_UMULH [[UV51]], [[UADDE70]]
+    ; GFX9-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH23]], [[UADDE82]]
+    ; GFX9-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDE80]](s32), [[ADD5]](s32)
     ; GFX9-NEXT: [[UV52:%[0-9]+]]:_(s32), [[UV53:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64)
-    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_30:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_31:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV52]](s32), [[UADDO70]], [[C4]]
+    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_30:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_31:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV52]](s32), [[UADDE80]], [[C4]]
     ; GFX9-NEXT: [[UV54:%[0-9]+]]:_(s32), [[UV55:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_30]](s64)
     ; GFX9-NEXT: [[ANYEXT5:%[0-9]+]]:_(s64) = G_ANYEXT [[UV55]](s32)
-    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_32:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_33:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV52]](s32), [[ADD23]], [[ANYEXT5]]
-    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_34:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_35:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV53]](s32), [[UADDO70]], [[AMDGPU_MAD_U64_U32_32]]
+    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_32:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_33:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV52]](s32), [[ADD5]], [[ANYEXT5]]
+    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_34:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_35:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV53]](s32), [[UADDE80]], [[AMDGPU_MAD_U64_U32_32]]
     ; GFX9-NEXT: [[UV56:%[0-9]+]]:_(s32), [[UV57:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_34]](s64)
     ; GFX9-NEXT: [[USUBO8:%[0-9]+]]:_(s32), [[USUBO9:%[0-9]+]]:_(s1) = G_USUBO [[UV48]], [[UV54]]
     ; GFX9-NEXT: [[USUBE10:%[0-9]+]]:_(s32), [[USUBE11:%[0-9]+]]:_(s1) = G_USUBE [[UV49]], [[UV56]], [[USUBO9]]
@@ -1821,9 +1671,9 @@ body: |
     ; GFX9-NEXT: [[USUBE12:%[0-9]+]]:_(s32), [[USUBE13:%[0-9]+]]:_(s1) = G_USUBE [[SUB1]], [[UV59]], [[USUBO9]]
     ; GFX9-NEXT: [[USUBE14:%[0-9]+]]:_(s32), [[USUBE15:%[0-9]+]]:_(s1) = G_USUBE [[USUBE12]], [[C5]], [[USUBO11]]
     ; GFX9-NEXT: [[UV60:%[0-9]+]]:_(s32), [[UV61:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64)
-    ; GFX9-NEXT: [[UADDO72:%[0-9]+]]:_(s32), [[UADDO73:%[0-9]+]]:_(s1) = G_UADDO [[UADDO70]], [[UV60]]
-    ; GFX9-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[ADD23]], [[UV61]], [[UADDO73]]
-    ; GFX9-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO72]](s32), [[UADDE12]](s32)
+    ; GFX9-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDE80]], [[UV60]]
+    ; GFX9-NEXT: [[UADDE84:%[0-9]+]]:_(s32), [[UADDE85:%[0-9]+]]:_(s1) = G_UADDE [[ADD5]], [[UV61]], [[UADDO37]]
+    ; GFX9-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[UADDE84]](s32)
     ; GFX9-NEXT: [[ICMP11:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE14]](s32), [[UV59]]
     ; GFX9-NEXT: [[SEXT6:%[0-9]+]]:_(s32) = G_SEXT [[ICMP11]](s1)
     ; GFX9-NEXT: [[ICMP12:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO10]](s32), [[UV58]]
@@ -1831,9 +1681,9 @@ body: |
     ; GFX9-NEXT: [[ICMP13:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE14]](s32), [[UV59]]
     ; GFX9-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP13]](s1), [[SEXT7]], [[SEXT6]]
     ; GFX9-NEXT: [[UV62:%[0-9]+]]:_(s32), [[UV63:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64)
-    ; GFX9-NEXT: [[UADDO74:%[0-9]+]]:_(s32), [[UADDO75:%[0-9]+]]:_(s1) = G_UADDO [[UADDO72]], [[UV62]]
-    ; GFX9-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDE12]], [[UV63]], [[UADDO75]]
-    ; GFX9-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO74]](s32), [[UADDE14]](s32)
+    ; GFX9-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[UADDO36]], [[UV62]]
+    ; GFX9-NEXT: [[UADDE86:%[0-9]+]]:_(s32), [[UADDE87:%[0-9]+]]:_(s1) = G_UADDE [[UADDE84]], [[UV63]], [[UADDO39]]
+    ; GFX9-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO38]](s32), [[UADDE86]](s32)
     ; GFX9-NEXT: [[ICMP14:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT5]](s32), [[C5]]
     ; GFX9-NEXT: [[SELECT6:%[0-9]+]]:_(s64) = G_SELECT [[ICMP14]](s1), [[MV5]], [[MV4]]
     ; GFX9-NEXT: [[ICMP15:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT4]](s32), [[C5]]
@@ -1876,93 +1726,78 @@ body: |
     ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV11]], [[MUL]]
     ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]]
     ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[MUL1]]
+    ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
     ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV10]]
     ; GFX10-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]]
     ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV10]]
     ; GFX10-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[MUL3]]
-    ; GFX10-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1)
-    ; GFX10-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH]]
-    ; GFX10-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1)
-    ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]]
+    ; GFX10-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UADDO]], [[UMULH]], [[UADDO1]]
+    ; GFX10-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE1]]
     ; GFX10-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]]
     ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV10]]
     ; GFX10-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]]
-    ; GFX10-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL4]], [[UMULH1]]
-    ; GFX10-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1)
-    ; GFX10-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH2]]
-    ; GFX10-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1)
-    ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]]
-    ; GFX10-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD2]]
-    ; GFX10-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1)
-    ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]]
+    ; GFX10-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[MUL4]], [[UMULH1]]
+    ; GFX10-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDO2]], [[UMULH2]], [[UADDO3]]
+    ; GFX10-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE5]]
+    ; GFX10-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[UADDE2]], [[UADDO3]]
+    ; GFX10-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE6]], [[C5]], [[UADDE9]]
     ; GFX10-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]]
-    ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD4]]
-    ; GFX10-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO8]]
-    ; GFX10-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO11]]
-    ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO10]], [[C4]]
+    ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[UADDE10]]
+    ; GFX10-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDE8]]
+    ; GFX10-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD2]], [[UADDO5]]
+    ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO4]], [[C4]]
     ; GFX10-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_2]](s64)
-    ; GFX10-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE]]
-    ; GFX10-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UV13]], [[MUL5]]
-    ; GFX10-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO10]]
-    ; GFX10-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[MUL6]]
-    ; GFX10-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[UV12]]
-    ; GFX10-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[ADD7]]
-    ; GFX10-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[UV12]]
-    ; GFX10-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL7]], [[MUL8]]
-    ; GFX10-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1)
-    ; GFX10-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH4]]
-    ; GFX10-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1)
-    ; GFX10-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]]
-    ; GFX10-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[ADD7]]
-    ; GFX10-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[UV12]]
-    ; GFX10-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[ADD7]]
-    ; GFX10-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[UMULH5]]
-    ; GFX10-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1)
-    ; GFX10-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]]
-    ; GFX10-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1)
-    ; GFX10-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]]
-    ; GFX10-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDO18]], [[ADD8]]
-    ; GFX10-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1)
-    ; GFX10-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT9]]
-    ; GFX10-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[ADD7]]
-    ; GFX10-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD10]]
-    ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-    ; GFX10-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UADDO20]]
-    ; GFX10-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UADDE]], [[ADD11]], [[UADDO23]]
+    ; GFX10-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE12]]
+    ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UV13]], [[MUL5]]
+    ; GFX10-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO4]]
+    ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[MUL6]]
+    ; GFX10-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[UV12]]
+    ; GFX10-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UADDO4]], [[ADD4]]
+    ; GFX10-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO4]], [[UV12]]
+    ; GFX10-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[MUL7]], [[MUL8]]
+    ; GFX10-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDO6]], [[UMULH4]], [[UADDO7]]
+    ; GFX10-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE15]]
+    ; GFX10-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[ADD4]]
+    ; GFX10-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[UV12]]
+    ; GFX10-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO4]], [[ADD4]]
+    ; GFX10-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[UMULH5]]
+    ; GFX10-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDO8]], [[UMULH6]], [[UADDO9]]
+    ; GFX10-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE19]]
+    ; GFX10-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDE18]], [[UADDE16]], [[UADDO9]]
+    ; GFX10-NEXT: [[UADDE24:%[0-9]+]]:_(s32), [[UADDE25:%[0-9]+]]:_(s1) = G_UADDE [[UADDE20]], [[C5]], [[UADDE23]]
+    ; GFX10-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[ADD4]]
+    ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[UADDE24]]
+    ; GFX10-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UADDE22]]
+    ; GFX10-NEXT: [[UADDE26:%[0-9]+]]:_(s32), [[UADDE27:%[0-9]+]]:_(s1) = G_UADDE [[UADDE12]], [[ADD5]], [[UADDO11]]
     ; GFX10-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64)
     ; GFX10-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64)
-    ; GFX10-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDO22]]
-    ; GFX10-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UADDE2]]
-    ; GFX10-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDO22]]
-    ; GFX10-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL10]], [[MUL11]]
-    ; GFX10-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1)
-    ; GFX10-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH8]]
-    ; GFX10-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO27]](s1)
-    ; GFX10-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]]
-    ; GFX10-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDE2]]
-    ; GFX10-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDO22]]
-    ; GFX10-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDE2]]
-    ; GFX10-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[UMULH9]]
-    ; GFX10-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1)
-    ; GFX10-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]]
-    ; GFX10-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1)
-    ; GFX10-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]]
-    ; GFX10-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[UADDO30]], [[ADD12]]
-    ; GFX10-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1)
-    ; GFX10-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT14]]
-    ; GFX10-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDE2]]
-    ; GFX10-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[ADD14]]
-    ; GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO32]](s32), [[ADD15]](s32)
+    ; GFX10-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDO10]]
+    ; GFX10-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UADDE26]]
+    ; GFX10-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDO10]]
+    ; GFX10-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL10]], [[MUL11]]
+    ; GFX10-NEXT: [[UADDE28:%[0-9]+]]:_(s32), [[UADDE29:%[0-9]+]]:_(s1) = G_UADDE [[UADDO12]], [[UMULH8]], [[UADDO13]]
+    ; GFX10-NEXT: [[UADDE30:%[0-9]+]]:_(s32), [[UADDE31:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE29]]
+    ; GFX10-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDE26]]
+    ; GFX10-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDO10]]
+    ; GFX10-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDE26]]
+    ; GFX10-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[UMULH9]]
+    ; GFX10-NEXT: [[UADDE32:%[0-9]+]]:_(s32), [[UADDE33:%[0-9]+]]:_(s1) = G_UADDE [[UADDO14]], [[UMULH10]], [[UADDO15]]
+    ; GFX10-NEXT: [[UADDE34:%[0-9]+]]:_(s32), [[UADDE35:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE33]]
+    ; GFX10-NEXT: [[UADDE36:%[0-9]+]]:_(s32), [[UADDE37:%[0-9]+]]:_(s1) = G_UADDE [[UADDE32]], [[UADDE30]], [[UADDO15]]
+    ; GFX10-NEXT: [[UADDE38:%[0-9]+]]:_(s32), [[UADDE39:%[0-9]+]]:_(s1) = G_UADDE [[UADDE34]], [[C5]], [[UADDE37]]
+    ; GFX10-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDE26]]
+    ; GFX10-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[UADDE38]]
+    ; GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDE36]](s32), [[ADD6]](s32)
     ; GFX10-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64)
-    ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV18]](s32), [[UADDO32]], [[C4]]
+    ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV18]](s32), [[UADDE36]], [[C4]]
     ; GFX10-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64)
-    ; GFX10-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV18]], [[ADD15]]
-    ; GFX10-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[UV21]], [[MUL13]]
-    ; GFX10-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV19]], [[UADDO32]]
-    ; GFX10-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[MUL14]]
+    ; GFX10-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV18]], [[ADD6]]
+    ; GFX10-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UV21]], [[MUL13]]
+    ; GFX10-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV19]], [[UADDE36]]
+    ; GFX10-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ADD7]], [[MUL14]]
     ; GFX10-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV14]], [[UV20]]
-    ; GFX10-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV15]], [[ADD17]], [[USUBO3]]
-    ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV15]], [[ADD17]]
+    ; GFX10-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV15]], [[ADD8]], [[USUBO3]]
+    ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV15]], [[ADD8]]
     ; GFX10-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64)
     ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV23]]
     ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1)
@@ -1975,9 +1810,9 @@ body: |
     ; GFX10-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C5]], [[USUBO5]]
     ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
     ; GFX10-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64)
-    ; GFX10-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UV24]]
-    ; GFX10-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[ADD15]], [[UV25]], [[UADDO35]]
-    ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO34]](s32), [[UADDE4]](s32)
+    ; GFX10-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[UADDE36]], [[UV24]]
+    ; GFX10-NEXT: [[UADDE40:%[0-9]+]]:_(s32), [[UADDE41:%[0-9]+]]:_(s1) = G_UADDE [[ADD6]], [[UV25]], [[UADDO17]]
+    ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO16]](s32), [[UADDE40]](s32)
     ; GFX10-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV23]]
     ; GFX10-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1)
     ; GFX10-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV22]]
@@ -1985,9 +1820,9 @@ body: |
     ; GFX10-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV23]]
     ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]]
     ; GFX10-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64)
-    ; GFX10-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[UV26]]
-    ; GFX10-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[UV27]], [[UADDO37]]
-    ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[UADDE6]](s32)
+    ; GFX10-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UV26]]
+    ; GFX10-NEXT: [[UADDE42:%[0-9]+]]:_(s32), [[UADDE43:%[0-9]+]]:_(s1) = G_UADDE [[UADDE40]], [[UV27]], [[UADDO19]]
+    ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO18]](s32), [[UADDE42]](s32)
     ; GFX10-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C5]]
     ; GFX10-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV2]], [[MV1]]
     ; GFX10-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C5]]
@@ -2012,95 +1847,80 @@ body: |
     ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO6]](s32), [[FPTOUI2]], [[C4]]
     ; GFX10-NEXT: [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](s64)
     ; GFX10-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[USUBO6]], [[FPTOUI3]]
-    ; GFX10-NEXT: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[UV35]], [[MUL15]]
+    ; GFX10-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[UV35]], [[MUL15]]
     ; GFX10-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[USUBE8]], [[FPTOUI2]]
-    ; GFX10-NEXT: [[ADD19:%[0-9]+]]:_(s32) = G_ADD [[ADD18]], [[MUL16]]
+    ; GFX10-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[MUL16]]
     ; GFX10-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[UV34]]
-    ; GFX10-NEXT: [[MUL18:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI2]], [[ADD19]]
+    ; GFX10-NEXT: [[MUL18:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI2]], [[ADD10]]
     ; GFX10-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[UV34]]
-    ; GFX10-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[MUL17]], [[MUL18]]
-    ; GFX10-NEXT: [[ZEXT15:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO39]](s1)
-    ; GFX10-NEXT: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UADDO38]], [[UMULH12]]
-    ; GFX10-NEXT: [[ZEXT16:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO41]](s1)
-    ; GFX10-NEXT: [[ADD20:%[0-9]+]]:_(s32) = G_ADD [[ZEXT15]], [[ZEXT16]]
-    ; GFX10-NEXT: [[MUL19:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[ADD19]]
+    ; GFX10-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL17]], [[MUL18]]
+    ; GFX10-NEXT: [[UADDE44:%[0-9]+]]:_(s32), [[UADDE45:%[0-9]+]]:_(s1) = G_UADDE [[UADDO20]], [[UMULH12]], [[UADDO21]]
+    ; GFX10-NEXT: [[UADDE46:%[0-9]+]]:_(s32), [[UADDE47:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE45]]
+    ; GFX10-NEXT: [[MUL19:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[ADD10]]
     ; GFX10-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[UV34]]
-    ; GFX10-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[ADD19]]
-    ; GFX10-NEXT: [[UADDO42:%[0-9]+]]:_(s32), [[UADDO43:%[0-9]+]]:_(s1) = G_UADDO [[MUL19]], [[UMULH13]]
-    ; GFX10-NEXT: [[ZEXT17:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO43]](s1)
-    ; GFX10-NEXT: [[UADDO44:%[0-9]+]]:_(s32), [[UADDO45:%[0-9]+]]:_(s1) = G_UADDO [[UADDO42]], [[UMULH14]]
-    ; GFX10-NEXT: [[ZEXT18:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO45]](s1)
-    ; GFX10-NEXT: [[ADD21:%[0-9]+]]:_(s32) = G_ADD [[ZEXT17]], [[ZEXT18]]
-    ; GFX10-NEXT: [[UADDO46:%[0-9]+]]:_(s32), [[UADDO47:%[0-9]+]]:_(s1) = G_UADDO [[UADDO44]], [[ADD20]]
-    ; GFX10-NEXT: [[ZEXT19:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO47]](s1)
-    ; GFX10-NEXT: [[ADD22:%[0-9]+]]:_(s32) = G_ADD [[ADD21]], [[ZEXT19]]
-    ; GFX10-NEXT: [[UMULH15:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[ADD19]]
-    ; GFX10-NEXT: [[ADD23:%[0-9]+]]:_(s32) = G_ADD [[UMULH15]], [[ADD22]]
-    ; GFX10-NEXT: [[UADDO48:%[0-9]+]]:_(s32), [[UADDO49:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI2]], [[UADDO46]]
-    ; GFX10-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI3]], [[ADD23]], [[UADDO49]]
-    ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO6]](s32), [[UADDO48]], [[C4]]
+    ; GFX10-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[ADD10]]
+    ; GFX10-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[MUL19]], [[UMULH13]]
+    ; GFX10-NEXT: [[UADDE48:%[0-9]+]]:_(s32), [[UADDE49:%[0-9]+]]:_(s1) = G_UADDE [[UADDO22]], [[UMULH14]], [[UADDO23]]
+    ; GFX10-NEXT: [[UADDE50:%[0-9]+]]:_(s32), [[UADDE51:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE49]]
+    ; GFX10-NEXT: [[UADDE52:%[0-9]+]]:_(s32), [[UADDE53:%[0-9]+]]:_(s1) = G_UADDE [[UADDE48]], [[UADDE46]], [[UADDO23]]
+    ; GFX10-NEXT: [[UADDE54:%[0-9]+]]:_(s32), [[UADDE55:%[0-9]+]]:_(s1) = G_UADDE [[UADDE50]], [[C5]], [[UADDE53]]
+    ; GFX10-NEXT: [[UMULH15:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[ADD10]]
+    ; GFX10-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH15]], [[UADDE54]]
+    ; GFX10-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI2]], [[UADDE52]]
+    ; GFX10-NEXT: [[UADDE56:%[0-9]+]]:_(s32), [[UADDE57:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI3]], [[ADD11]], [[UADDO25]]
+    ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO6]](s32), [[UADDO24]], [[C4]]
     ; GFX10-NEXT: [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_8]](s64)
-    ; GFX10-NEXT: [[MUL20:%[0-9]+]]:_(s32) = G_MUL [[USUBO6]], [[UADDE8]]
-    ; GFX10-NEXT: [[ADD24:%[0-9]+]]:_(s32) = G_ADD [[UV37]], [[MUL20]]
-    ; GFX10-NEXT: [[MUL21:%[0-9]+]]:_(s32) = G_MUL [[USUBE8]], [[UADDO48]]
-    ; GFX10-NEXT: [[ADD25:%[0-9]+]]:_(s32) = G_ADD [[ADD24]], [[MUL21]]
-    ; GFX10-NEXT: [[MUL22:%[0-9]+]]:_(s32) = G_MUL [[UADDE8]], [[UV36]]
-    ; GFX10-NEXT: [[MUL23:%[0-9]+]]:_(s32) = G_MUL [[UADDO48]], [[ADD25]]
-    ; GFX10-NEXT: [[UMULH16:%[0-9]+]]:_(s32) = G_UMULH [[UADDO48]], [[UV36]]
-    ; GFX10-NEXT: [[UADDO50:%[0-9]+]]:_(s32), [[UADDO51:%[0-9]+]]:_(s1) = G_UADDO [[MUL22]], [[MUL23]]
-    ; GFX10-NEXT: [[ZEXT20:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO51]](s1)
-    ; GFX10-NEXT: [[UADDO52:%[0-9]+]]:_(s32), [[UADDO53:%[0-9]+]]:_(s1) = G_UADDO [[UADDO50]], [[UMULH16]]
-    ; GFX10-NEXT: [[ZEXT21:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO53]](s1)
-    ; GFX10-NEXT: [[ADD26:%[0-9]+]]:_(s32) = G_ADD [[ZEXT20]], [[ZEXT21]]
-    ; GFX10-NEXT: [[MUL24:%[0-9]+]]:_(s32) = G_MUL [[UADDE8]], [[ADD25]]
-    ; GFX10-NEXT: [[UMULH17:%[0-9]+]]:_(s32) = G_UMULH [[UADDE8]], [[UV36]]
-    ; GFX10-NEXT: [[UMULH18:%[0-9]+]]:_(s32) = G_UMULH [[UADDO48]], [[ADD25]]
-    ; GFX10-NEXT: [[UADDO54:%[0-9]+]]:_(s32), [[UADDO55:%[0-9]+]]:_(s1) = G_UADDO [[MUL24]], [[UMULH17]]
-    ; GFX10-NEXT: [[ZEXT22:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO55]](s1)
-    ; GFX10-NEXT: [[UADDO56:%[0-9]+]]:_(s32), [[UADDO57:%[0-9]+]]:_(s1) = G_UADDO [[UADDO54]], [[UMULH18]]
-    ; GFX10-NEXT: [[ZEXT23:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO57]](s1)
-    ; GFX10-NEXT: [[ADD27:%[0-9]+]]:_(s32) = G_ADD [[ZEXT22]], [[ZEXT23]]
-    ; GFX10-NEXT: [[UADDO58:%[0-9]+]]:_(s32), [[UADDO59:%[0-9]+]]:_(s1) = G_UADDO [[UADDO56]], [[ADD26]]
-    ; GFX10-NEXT: [[ZEXT24:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO59]](s1)
-    ; GFX10-NEXT: [[ADD28:%[0-9]+]]:_(s32) = G_ADD [[ADD27]], [[ZEXT24]]
-    ; GFX10-NEXT: [[UMULH19:%[0-9]+]]:_(s32) = G_UMULH [[UADDE8]], [[ADD25]]
-    ; GFX10-NEXT: [[ADD29:%[0-9]+]]:_(s32) = G_ADD [[UMULH19]], [[ADD28]]
-    ; GFX10-NEXT: [[UADDO60:%[0-9]+]]:_(s32), [[UADDO61:%[0-9]+]]:_(s1) = G_UADDO [[UADDO48]], [[UADDO58]]
-    ; GFX10-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE8]], [[ADD29]], [[UADDO61]]
+    ; GFX10-NEXT: [[MUL20:%[0-9]+]]:_(s32) = G_MUL [[USUBO6]], [[UADDE56]]
+    ; GFX10-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[UV37]], [[MUL20]]
+    ; GFX10-NEXT: [[MUL21:%[0-9]+]]:_(s32) = G_MUL [[USUBE8]], [[UADDO24]]
+    ; GFX10-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ADD12]], [[MUL21]]
+    ; GFX10-NEXT: [[MUL22:%[0-9]+]]:_(s32) = G_MUL [[UADDE56]], [[UV36]]
+    ; GFX10-NEXT: [[MUL23:%[0-9]+]]:_(s32) = G_MUL [[UADDO24]], [[ADD13]]
+    ; GFX10-NEXT: [[UMULH16:%[0-9]+]]:_(s32) = G_UMULH [[UADDO24]], [[UV36]]
+    ; GFX10-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[MUL22]], [[MUL23]]
+    ; GFX10-NEXT: [[UADDE58:%[0-9]+]]:_(s32), [[UADDE59:%[0-9]+]]:_(s1) = G_UADDE [[UADDO26]], [[UMULH16]], [[UADDO27]]
+    ; GFX10-NEXT: [[UADDE60:%[0-9]+]]:_(s32), [[UADDE61:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE59]]
+    ; GFX10-NEXT: [[MUL24:%[0-9]+]]:_(s32) = G_MUL [[UADDE56]], [[ADD13]]
+    ; GFX10-NEXT: [[UMULH17:%[0-9]+]]:_(s32) = G_UMULH [[UADDE56]], [[UV36]]
+    ; GFX10-NEXT: [[UMULH18:%[0-9]+]]:_(s32) = G_UMULH [[UADDO24]], [[ADD13]]
+    ; GFX10-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL24]], [[UMULH17]]
+    ; GFX10-NEXT: [[UADDE62:%[0-9]+]]:_(s32), [[UADDE63:%[0-9]+]]:_(s1) = G_UADDE [[UADDO28]], [[UMULH18]], [[UADDO29]]
+    ; GFX10-NEXT: [[UADDE64:%[0-9]+]]:_(s32), [[UADDE65:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE63]]
+    ; GFX10-NEXT: [[UADDE66:%[0-9]+]]:_(s32), [[UADDE67:%[0-9]+]]:_(s1) = G_UADDE [[UADDE62]], [[UADDE60]], [[UADDO29]]
+    ; GFX10-NEXT: [[UADDE68:%[0-9]+]]:_(s32), [[UADDE69:%[0-9]+]]:_(s1) = G_UADDE [[UADDE64]], [[C5]], [[UADDE67]]
+    ; GFX10-NEXT: [[UMULH19:%[0-9]+]]:_(s32) = G_UMULH [[UADDE56]], [[ADD13]]
+    ; GFX10-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[UMULH19]], [[UADDE68]]
+    ; GFX10-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UADDE66]]
+    ; GFX10-NEXT: [[UADDE70:%[0-9]+]]:_(s32), [[UADDE71:%[0-9]+]]:_(s1) = G_UADDE [[UADDE56]], [[ADD14]], [[UADDO31]]
     ; GFX10-NEXT: [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64)
     ; GFX10-NEXT: [[UV40:%[0-9]+]]:_(s32), [[UV41:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64)
-    ; GFX10-NEXT: [[MUL25:%[0-9]+]]:_(s32) = G_MUL [[UV41]], [[UADDO60]]
-    ; GFX10-NEXT: [[MUL26:%[0-9]+]]:_(s32) = G_MUL [[UV40]], [[UADDE10]]
-    ; GFX10-NEXT: [[UMULH20:%[0-9]+]]:_(s32) = G_UMULH [[UV40]], [[UADDO60]]
-    ; GFX10-NEXT: [[UADDO62:%[0-9]+]]:_(s32), [[UADDO63:%[0-9]+]]:_(s1) = G_UADDO [[MUL25]], [[MUL26]]
-    ; GFX10-NEXT: [[ZEXT25:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO63]](s1)
-    ; GFX10-NEXT: [[UADDO64:%[0-9]+]]:_(s32), [[UADDO65:%[0-9]+]]:_(s1) = G_UADDO [[UADDO62]], [[UMULH20]]
-    ; GFX10-NEXT: [[ZEXT26:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO65]](s1)
-    ; GFX10-NEXT: [[ADD30:%[0-9]+]]:_(s32) = G_ADD [[ZEXT25]], [[ZEXT26]]
-    ; GFX10-NEXT: [[MUL27:%[0-9]+]]:_(s32) = G_MUL [[UV41]], [[UADDE10]]
-    ; GFX10-NEXT: [[UMULH21:%[0-9]+]]:_(s32) = G_UMULH [[UV41]], [[UADDO60]]
-    ; GFX10-NEXT: [[UMULH22:%[0-9]+]]:_(s32) = G_UMULH [[UV40]], [[UADDE10]]
-    ; GFX10-NEXT: [[UADDO66:%[0-9]+]]:_(s32), [[UADDO67:%[0-9]+]]:_(s1) = G_UADDO [[MUL27]], [[UMULH21]]
-    ; GFX10-NEXT: [[ZEXT27:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO67]](s1)
-    ; GFX10-NEXT: [[UADDO68:%[0-9]+]]:_(s32), [[UADDO69:%[0-9]+]]:_(s1) = G_UADDO [[UADDO66]], [[UMULH22]]
-    ; GFX10-NEXT: [[ZEXT28:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO69]](s1)
-    ; GFX10-NEXT: [[ADD31:%[0-9]+]]:_(s32) = G_ADD [[ZEXT27]], [[ZEXT28]]
-    ; GFX10-NEXT: [[UADDO70:%[0-9]+]]:_(s32), [[UADDO71:%[0-9]+]]:_(s1) = G_UADDO [[UADDO68]], [[ADD30]]
-    ; GFX10-NEXT: [[ZEXT29:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO71]](s1)
-    ; GFX10-NEXT: [[ADD32:%[0-9]+]]:_(s32) = G_ADD [[ADD31]], [[ZEXT29]]
-    ; GFX10-NEXT: [[UMULH23:%[0-9]+]]:_(s32) = G_UMULH [[UV41]], [[UADDE10]]
-    ; GFX10-NEXT: [[ADD33:%[0-9]+]]:_(s32) = G_ADD [[UMULH23]], [[ADD32]]
-    ; GFX10-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO70]](s32), [[ADD33]](s32)
+    ; GFX10-NEXT: [[MUL25:%[0-9]+]]:_(s32) = G_MUL [[UV41]], [[UADDO30]]
+    ; GFX10-NEXT: [[MUL26:%[0-9]+]]:_(s32) = G_MUL [[UV40]], [[UADDE70]]
+    ; GFX10-NEXT: [[UMULH20:%[0-9]+]]:_(s32) = G_UMULH [[UV40]], [[UADDO30]]
+    ; GFX10-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL25]], [[MUL26]]
+    ; GFX10-NEXT: [[UADDE72:%[0-9]+]]:_(s32), [[UADDE73:%[0-9]+]]:_(s1) = G_UADDE [[UADDO32]], [[UMULH20]], [[UADDO33]]
+    ; GFX10-NEXT: [[UADDE74:%[0-9]+]]:_(s32), [[UADDE75:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE73]]
+    ; GFX10-NEXT: [[MUL27:%[0-9]+]]:_(s32) = G_MUL [[UV41]], [[UADDE70]]
+    ; GFX10-NEXT: [[UMULH21:%[0-9]+]]:_(s32) = G_UMULH [[UV41]], [[UADDO30]]
+    ; GFX10-NEXT: [[UMULH22:%[0-9]+]]:_(s32) = G_UMULH [[UV40]], [[UADDE70]]
+    ; GFX10-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[MUL27]], [[UMULH21]]
+    ; GFX10-NEXT: [[UADDE76:%[0-9]+]]:_(s32), [[UADDE77:%[0-9]+]]:_(s1) = G_UADDE [[UADDO34]], [[UMULH22]], [[UADDO35]]
+    ; GFX10-NEXT: [[UADDE78:%[0-9]+]]:_(s32), [[UADDE79:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE77]]
+    ; GFX10-NEXT: [[UADDE80:%[0-9]+]]:_(s32), [[UADDE81:%[0-9]+]]:_(s1) = G_UADDE [[UADDE76]], [[UADDE74]], [[UADDO35]]
+    ; GFX10-NEXT: [[UADDE82:%[0-9]+]]:_(s32), [[UADDE83:%[0-9]+]]:_(s1) = G_UADDE [[UADDE78]], [[C5]], [[UADDE81]]
+    ; GFX10-NEXT: [[UMULH23:%[0-9]+]]:_(s32) = G_UMULH [[UV41]], [[UADDE70]]
+    ; GFX10-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH23]], [[UADDE82]]
+    ; GFX10-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDE80]](s32), [[ADD15]](s32)
     ; GFX10-NEXT: [[UV42:%[0-9]+]]:_(s32), [[UV43:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64)
-    ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV42]](s32), [[UADDO70]], [[C4]]
+    ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV42]](s32), [[UADDE80]], [[C4]]
     ; GFX10-NEXT: [[UV44:%[0-9]+]]:_(s32), [[UV45:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](s64)
-    ; GFX10-NEXT: [[MUL28:%[0-9]+]]:_(s32) = G_MUL [[UV42]], [[ADD33]]
-    ; GFX10-NEXT: [[ADD34:%[0-9]+]]:_(s32) = G_ADD [[UV45]], [[MUL28]]
-    ; GFX10-NEXT: [[MUL29:%[0-9]+]]:_(s32) = G_MUL [[UV43]], [[UADDO70]]
-    ; GFX10-NEXT: [[ADD35:%[0-9]+]]:_(s32) = G_ADD [[ADD34]], [[MUL29]]
+    ; GFX10-NEXT: [[MUL28:%[0-9]+]]:_(s32) = G_MUL [[UV42]], [[ADD15]]
+    ; GFX10-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[UV45]], [[MUL28]]
+    ; GFX10-NEXT: [[MUL29:%[0-9]+]]:_(s32) = G_MUL [[UV43]], [[UADDE80]]
+    ; GFX10-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[MUL29]]
     ; GFX10-NEXT: [[USUBO8:%[0-9]+]]:_(s32), [[USUBO9:%[0-9]+]]:_(s1) = G_USUBO [[UV38]], [[UV44]]
-    ; GFX10-NEXT: [[USUBE10:%[0-9]+]]:_(s32), [[USUBE11:%[0-9]+]]:_(s1) = G_USUBE [[UV39]], [[ADD35]], [[USUBO9]]
-    ; GFX10-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[UV39]], [[ADD35]]
+    ; GFX10-NEXT: [[USUBE10:%[0-9]+]]:_(s32), [[USUBE11:%[0-9]+]]:_(s1) = G_USUBE [[UV39]], [[ADD17]], [[USUBO9]]
+    ; GFX10-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[UV39]], [[ADD17]]
     ; GFX10-NEXT: [[UV46:%[0-9]+]]:_(s32), [[UV47:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64)
     ; GFX10-NEXT: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE10]](s32), [[UV47]]
     ; GFX10-NEXT: [[SEXT4:%[0-9]+]]:_(s32) = G_SEXT [[ICMP8]](s1)
@@ -2112,9 +1932,9 @@ body: |
     ; GFX10-NEXT: [[USUBE12:%[0-9]+]]:_(s32), [[USUBE13:%[0-9]+]]:_(s1) = G_USUBE [[SUB1]], [[UV47]], [[USUBO9]]
     ; GFX10-NEXT: [[USUBE14:%[0-9]+]]:_(s32), [[USUBE15:%[0-9]+]]:_(s1) = G_USUBE [[USUBE12]], [[C5]], [[USUBO11]]
     ; GFX10-NEXT: [[UV48:%[0-9]+]]:_(s32), [[UV49:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64)
-    ; GFX10-NEXT: [[UADDO72:%[0-9]+]]:_(s32), [[UADDO73:%[0-9]+]]:_(s1) = G_UADDO [[UADDO70]], [[UV48]]
-    ; GFX10-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[ADD33]], [[UV49]], [[UADDO73]]
-    ; GFX10-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO72]](s32), [[UADDE12]](s32)
+    ; GFX10-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDE80]], [[UV48]]
+    ; GFX10-NEXT: [[UADDE84:%[0-9]+]]:_(s32), [[UADDE85:%[0-9]+]]:_(s1) = G_UADDE [[ADD15]], [[UV49]], [[UADDO37]]
+    ; GFX10-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[UADDE84]](s32)
     ; GFX10-NEXT: [[ICMP11:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE14]](s32), [[UV47]]
     ; GFX10-NEXT: [[SEXT6:%[0-9]+]]:_(s32) = G_SEXT [[ICMP11]](s1)
     ; GFX10-NEXT: [[ICMP12:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO10]](s32), [[UV46]]
@@ -2122,9 +1942,9 @@ body: |
     ; GFX10-NEXT: [[ICMP13:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE14]](s32), [[UV47]]
     ; GFX10-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP13]](s1), [[SEXT7]], [[SEXT6]]
     ; GFX10-NEXT: [[UV50:%[0-9]+]]:_(s32), [[UV51:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64)
-    ; GFX10-NEXT: [[UADDO74:%[0-9]+]]:_(s32), [[UADDO75:%[0-9]+]]:_(s1) = G_UADDO [[UADDO72]], [[UV50]]
-    ; GFX10-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDE12]], [[UV51]], [[UADDO75]]
-    ; GFX10-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO74]](s32), [[UADDE14]](s32)
+    ; GFX10-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[UADDO36]], [[UV50]]
+    ; GFX10-NEXT: [[UADDE86:%[0-9]+]]:_(s32), [[UADDE87:%[0-9]+]]:_(s1) = G_UADDE [[UADDE84]], [[UV51]], [[UADDO39]]
+    ; GFX10-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO38]](s32), [[UADDE86]](s32)
     ; GFX10-NEXT: [[ICMP14:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT5]](s32), [[C5]]
     ; GFX10-NEXT: [[SELECT6:%[0-9]+]]:_(s64) = G_SELECT [[ICMP14]](s1), [[MV5]], [[MV4]]
     ; GFX10-NEXT: [[ICMP15:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT4]](s32), [[C5]]
@@ -2855,6 +2675,7 @@ body: |
     ; GFX6-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV2]], [[UV4]]
     ; GFX6-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV3]], [[UV5]], [[USUBO1]]
     ; GFX6-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]]
+    ; GFX6-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
     ; GFX6-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]]
     ; GFX6-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]]
     ; GFX6-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]]
@@ -2864,89 +2685,73 @@ body: |
     ; GFX6-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]]
     ; GFX6-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]]
     ; GFX6-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]]
-    ; GFX6-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1)
-    ; GFX6-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH1]]
-    ; GFX6-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1)
-    ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]]
+    ; GFX6-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UADDO]], [[UMULH1]], [[UADDO1]]
+    ; GFX6-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE1]]
     ; GFX6-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]]
     ; GFX6-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]]
     ; GFX6-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]]
-    ; GFX6-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]]
-    ; GFX6-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1)
-    ; GFX6-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH3]]
-    ; GFX6-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1)
-    ; GFX6-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]]
-    ; GFX6-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD2]]
-    ; GFX6-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1)
-    ; GFX6-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]]
+    ; GFX6-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]]
+    ; GFX6-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDO2]], [[UMULH3]], [[UADDO3]]
+    ; GFX6-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE5]]
+    ; GFX6-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[UADDE2]], [[UADDO3]]
+    ; GFX6-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE6]], [[C6]], [[UADDE9]]
     ; GFX6-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]]
-    ; GFX6-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]]
-    ; GFX6-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO8]]
-    ; GFX6-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO11]]
-    ; GFX6-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO10]]
-    ; GFX6-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO10]]
-    ; GFX6-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE]]
-    ; GFX6-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO10]]
-    ; GFX6-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]]
-    ; GFX6-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[UMULH5]]
-    ; GFX6-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[MUL6]]
-    ; GFX6-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[ADD7]]
-    ; GFX6-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[MUL6]]
-    ; GFX6-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]]
-    ; GFX6-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1)
-    ; GFX6-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH6]]
-    ; GFX6-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1)
-    ; GFX6-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]]
-    ; GFX6-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[ADD7]]
-    ; GFX6-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[MUL6]]
-    ; GFX6-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[ADD7]]
-    ; GFX6-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]]
-    ; GFX6-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1)
-    ; GFX6-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH8]]
-    ; GFX6-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1)
-    ; GFX6-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]]
-    ; GFX6-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDO18]], [[ADD8]]
-    ; GFX6-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1)
-    ; GFX6-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT9]]
-    ; GFX6-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[ADD7]]
-    ; GFX6-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD10]]
-    ; GFX6-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-    ; GFX6-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UADDO20]]
-    ; GFX6-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UADDE]], [[ADD11]], [[UADDO23]]
+    ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[UADDE10]]
+    ; GFX6-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDE8]]
+    ; GFX6-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD2]], [[UADDO5]]
+    ; GFX6-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO4]]
+    ; GFX6-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO4]]
+    ; GFX6-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE12]]
+    ; GFX6-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO4]]
+    ; GFX6-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]]
+    ; GFX6-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[UMULH5]]
+    ; GFX6-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[MUL6]]
+    ; GFX6-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO4]], [[ADD4]]
+    ; GFX6-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO4]], [[MUL6]]
+    ; GFX6-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]]
+    ; GFX6-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDO6]], [[UMULH6]], [[UADDO7]]
+    ; GFX6-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE15]]
+    ; GFX6-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[ADD4]]
+    ; GFX6-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[MUL6]]
+    ; GFX6-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO4]], [[ADD4]]
+    ; GFX6-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]]
+    ; GFX6-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDO8]], [[UMULH8]], [[UADDO9]]
+    ; GFX6-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE19]]
+    ; GFX6-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDE18]], [[UADDE16]], [[UADDO9]]
+    ; GFX6-NEXT: [[UADDE24:%[0-9]+]]:_(s32), [[UADDE25:%[0-9]+]]:_(s1) = G_UADDE [[UADDE20]], [[C6]], [[UADDE23]]
+    ; GFX6-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[ADD4]]
+    ; GFX6-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[UADDE24]]
+    ; GFX6-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UADDE22]]
+    ; GFX6-NEXT: [[UADDE26:%[0-9]+]]:_(s32), [[UADDE27:%[0-9]+]]:_(s1) = G_UADDE [[UADDE12]], [[ADD5]], [[UADDO11]]
     ; GFX6-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]](s64)
     ; GFX6-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]](s64)
-    ; GFX6-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UADDO22]]
-    ; GFX6-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV8]], [[UADDE2]]
-    ; GFX6-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UADDO22]]
-    ; GFX6-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]]
-    ; GFX6-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1)
-    ; GFX6-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH10]]
-    ; GFX6-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO27]](s1)
-    ; GFX6-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]]
-    ; GFX6-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UADDE2]]
-    ; GFX6-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UADDO22]]
-    ; GFX6-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UADDE2]]
-    ; GFX6-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]]
-    ; GFX6-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1)
-    ; GFX6-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH12]]
-    ; GFX6-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1)
-    ; GFX6-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]]
-    ; GFX6-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[UADDO30]], [[ADD12]]
-    ; GFX6-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1)
-    ; GFX6-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT14]]
-    ; GFX6-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UADDE2]]
-    ; GFX6-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD14]]
-    ; GFX6-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO32]](s32), [[ADD15]](s32)
+    ; GFX6-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UADDO10]]
+    ; GFX6-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV8]], [[UADDE26]]
+    ; GFX6-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UADDO10]]
+    ; GFX6-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]]
+    ; GFX6-NEXT: [[UADDE28:%[0-9]+]]:_(s32), [[UADDE29:%[0-9]+]]:_(s1) = G_UADDE [[UADDO12]], [[UMULH10]], [[UADDO13]]
+    ; GFX6-NEXT: [[UADDE30:%[0-9]+]]:_(s32), [[UADDE31:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE29]]
+    ; GFX6-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UADDE26]]
+    ; GFX6-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UADDO10]]
+    ; GFX6-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UADDE26]]
+    ; GFX6-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]]
+    ; GFX6-NEXT: [[UADDE32:%[0-9]+]]:_(s32), [[UADDE33:%[0-9]+]]:_(s1) = G_UADDE [[UADDO14]], [[UMULH12]], [[UADDO15]]
+    ; GFX6-NEXT: [[UADDE34:%[0-9]+]]:_(s32), [[UADDE35:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE33]]
+    ; GFX6-NEXT: [[UADDE36:%[0-9]+]]:_(s32), [[UADDE37:%[0-9]+]]:_(s1) = G_UADDE [[UADDE32]], [[UADDE30]], [[UADDO15]]
+    ; GFX6-NEXT: [[UADDE38:%[0-9]+]]:_(s32), [[UADDE39:%[0-9]+]]:_(s1) = G_UADDE [[UADDE34]], [[C6]], [[UADDE37]]
+    ; GFX6-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UADDE26]]
+    ; GFX6-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[UADDE38]]
+    ; GFX6-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDE36]](s32), [[ADD6]](s32)
     ; GFX6-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](s64)
-    ; GFX6-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV10]], [[UADDO32]]
-    ; GFX6-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV11]], [[UADDO32]]
-    ; GFX6-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV10]], [[ADD15]]
-    ; GFX6-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV10]], [[UADDO32]]
-    ; GFX6-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]]
-    ; GFX6-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[UMULH14]]
+    ; GFX6-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV10]], [[UADDE36]]
+    ; GFX6-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV11]], [[UADDE36]]
+    ; GFX6-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV10]], [[ADD6]]
+    ; GFX6-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV10]], [[UADDE36]]
+    ; GFX6-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]]
+    ; GFX6-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ADD7]], [[UMULH14]]
     ; GFX6-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV6]], [[MUL15]]
-    ; GFX6-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV7]], [[ADD17]], [[USUBO3]]
-    ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV7]], [[ADD17]]
+    ; GFX6-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV7]], [[ADD8]], [[USUBO3]]
+    ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV7]], [[ADD8]]
     ; GFX6-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](s64)
     ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV13]]
     ; GFX6-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1)
@@ -2959,9 +2764,9 @@ body: |
     ; GFX6-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]]
     ; GFX6-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
     ; GFX6-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64)
-    ; GFX6-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UV14]]
-    ; GFX6-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[ADD15]], [[UV15]], [[UADDO35]]
-    ; GFX6-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO34]](s32), [[UADDE4]](s32)
+    ; GFX6-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[UADDE36]], [[UV14]]
+    ; GFX6-NEXT: [[UADDE40:%[0-9]+]]:_(s32), [[UADDE41:%[0-9]+]]:_(s1) = G_UADDE [[ADD6]], [[UV15]], [[UADDO17]]
+    ; GFX6-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO16]](s32), [[UADDE40]](s32)
     ; GFX6-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV13]]
     ; GFX6-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1)
     ; GFX6-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV12]]
@@ -2969,9 +2774,9 @@ body: |
     ; GFX6-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV13]]
     ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]]
     ; GFX6-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64)
-    ; GFX6-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[UV16]]
-    ; GFX6-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[UV17]], [[UADDO37]]
-    ; GFX6-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[UADDE6]](s32)
+    ; GFX6-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UV16]]
+    ; GFX6-NEXT: [[UADDE42:%[0-9]+]]:_(s32), [[UADDE43:%[0-9]+]]:_(s1) = G_UADDE [[UADDE40]], [[UV17]], [[UADDO19]]
+    ; GFX6-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO18]](s32), [[UADDE42]](s32)
     ; GFX6-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C6]]
     ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV2]], [[MV1]]
     ; GFX6-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C6]]
@@ -3014,89 +2819,74 @@ body: |
     ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI1]], [[ANYEXT]]
     ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[FPTOUI]], [[AMDGPU_MAD_U64_U32_2]]
     ; GFX8-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64)
+    ; GFX8-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
     ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV6]]
     ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[UV8]]
     ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV6]]
     ; GFX8-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]]
-    ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1)
-    ; GFX8-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH]]
-    ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1)
-    ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]]
+    ; GFX8-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UADDO]], [[UMULH]], [[UADDO1]]
+    ; GFX8-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE1]]
     ; GFX8-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV8]]
     ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV6]]
     ; GFX8-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV8]]
-    ; GFX8-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]]
-    ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1)
-    ; GFX8-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH2]]
-    ; GFX8-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1)
-    ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]]
-    ; GFX8-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD]]
-    ; GFX8-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1)
-    ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]]
+    ; GFX8-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]]
+    ; GFX8-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDO2]], [[UMULH2]], [[UADDO3]]
+    ; GFX8-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE5]]
+    ; GFX8-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[UADDE2]], [[UADDO3]]
+    ; GFX8-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE6]], [[C6]], [[UADDE9]]
     ; GFX8-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV8]]
-    ; GFX8-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]]
-    ; GFX8-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO8]]
-    ; GFX8-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD3]], [[UADDO11]]
-    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO10]], [[C5]]
+    ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[UADDE10]]
+    ; GFX8-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDE8]]
+    ; GFX8-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD]], [[UADDO5]]
+    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO4]], [[C5]]
     ; GFX8-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](s64)
     ; GFX8-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[UV11]](s32)
-    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE]], [[ANYEXT1]]
-    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO10]], [[AMDGPU_MAD_U64_U32_8]]
+    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE12]], [[ANYEXT1]]
+    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO4]], [[AMDGPU_MAD_U64_U32_8]]
     ; GFX8-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](s64)
-    ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[UV10]]
-    ; GFX8-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[UV12]]
-    ; GFX8-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[UV10]]
-    ; GFX8-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]]
-    ; GFX8-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1)
-    ; GFX8-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH4]]
-    ; GFX8-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1)
-    ; GFX8-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]]
-    ; GFX8-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[UV12]]
-    ; GFX8-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[UV10]]
-    ; GFX8-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[UV12]]
-    ; GFX8-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]]
-    ; GFX8-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1)
-    ; GFX8-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]]
-    ; GFX8-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1)
-    ; GFX8-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]]
-    ; GFX8-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDO18]], [[ADD4]]
-    ; GFX8-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1)
-    ; GFX8-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ADD5]], [[ZEXT9]]
-    ; GFX8-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[UV12]]
-    ; GFX8-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD6]]
-    ; GFX8-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-    ; GFX8-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UADDO20]]
-    ; GFX8-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UADDE]], [[ADD7]], [[UADDO23]]
+    ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[UV10]]
+    ; GFX8-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO4]], [[UV12]]
+    ; GFX8-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO4]], [[UV10]]
+    ; GFX8-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]]
+    ; GFX8-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDO6]], [[UMULH4]], [[UADDO7]]
+    ; GFX8-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE15]]
+    ; GFX8-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[UV12]]
+    ; GFX8-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[UV10]]
+    ; GFX8-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO4]], [[UV12]]
+    ; GFX8-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]]
+    ; GFX8-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDO8]], [[UMULH6]], [[UADDO9]]
+    ; GFX8-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE19]]
+    ; GFX8-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDE18]], [[UADDE16]], [[UADDO9]]
+    ; GFX8-NEXT: [[UADDE24:%[0-9]+]]:_(s32), [[UADDE25:%[0-9]+]]:_(s1) = G_UADDE [[UADDE20]], [[C6]], [[UADDE23]]
+    ; GFX8-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[UV12]]
+    ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[UADDE24]]
+    ; GFX8-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UADDE22]]
+    ; GFX8-NEXT: [[UADDE26:%[0-9]+]]:_(s32), [[UADDE27:%[0-9]+]]:_(s1) = G_UADDE [[UADDE12]], [[ADD1]], [[UADDO11]]
     ; GFX8-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]](s64)
     ; GFX8-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]](s64)
-    ; GFX8-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDO22]]
-    ; GFX8-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UADDE2]]
-    ; GFX8-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDO22]]
-    ; GFX8-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]]
-    ; GFX8-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1)
-    ; GFX8-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH8]]
-    ; GFX8-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO27]](s1)
-    ; GFX8-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]]
-    ; GFX8-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDE2]]
-    ; GFX8-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDO22]]
-    ; GFX8-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDE2]]
-    ; GFX8-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]]
-    ; GFX8-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1)
-    ; GFX8-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]]
-    ; GFX8-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1)
-    ; GFX8-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]]
-    ; GFX8-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[UADDO30]], [[ADD8]]
-    ; GFX8-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1)
-    ; GFX8-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT14]]
-    ; GFX8-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDE2]]
-    ; GFX8-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[ADD10]]
-    ; GFX8-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO32]](s32), [[ADD11]](s32)
+    ; GFX8-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDO10]]
+    ; GFX8-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UADDE26]]
+    ; GFX8-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDO10]]
+    ; GFX8-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]]
+    ; GFX8-NEXT: [[UADDE28:%[0-9]+]]:_(s32), [[UADDE29:%[0-9]+]]:_(s1) = G_UADDE [[UADDO12]], [[UMULH8]], [[UADDO13]]
+    ; GFX8-NEXT: [[UADDE30:%[0-9]+]]:_(s32), [[UADDE31:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE29]]
+    ; GFX8-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDE26]]
+    ; GFX8-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDO10]]
+    ; GFX8-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDE26]]
+    ; GFX8-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]]
+    ; GFX8-NEXT: [[UADDE32:%[0-9]+]]:_(s32), [[UADDE33:%[0-9]+]]:_(s1) = G_UADDE [[UADDO14]], [[UMULH10]], [[UADDO15]]
+    ; GFX8-NEXT: [[UADDE34:%[0-9]+]]:_(s32), [[UADDE35:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE33]]
+    ; GFX8-NEXT: [[UADDE36:%[0-9]+]]:_(s32), [[UADDE37:%[0-9]+]]:_(s1) = G_UADDE [[UADDE32]], [[UADDE30]], [[UADDO15]]
+    ; GFX8-NEXT: [[UADDE38:%[0-9]+]]:_(s32), [[UADDE39:%[0-9]+]]:_(s1) = G_UADDE [[UADDE34]], [[C6]], [[UADDE37]]
+    ; GFX8-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDE26]]
+    ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[UADDE38]]
+    ; GFX8-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDE36]](s32), [[ADD2]](s32)
     ; GFX8-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](s64)
-    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV18]](s32), [[UADDO32]], [[C5]]
+    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV18]](s32), [[UADDE36]], [[C5]]
     ; GFX8-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_12]](s64)
     ; GFX8-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[UV21]](s32)
-    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV18]](s32), [[ADD11]], [[ANYEXT2]]
-    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV19]](s32), [[UADDO32]], [[AMDGPU_MAD_U64_U32_14]]
+    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV18]](s32), [[ADD2]], [[ANYEXT2]]
+    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV19]](s32), [[UADDE36]], [[AMDGPU_MAD_U64_U32_14]]
     ; GFX8-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_16]](s64)
     ; GFX8-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV14]], [[UV20]]
     ; GFX8-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV15]], [[UV22]], [[USUBO3]]
@@ -3113,9 +2903,9 @@ body: |
     ; GFX8-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]]
     ; GFX8-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
     ; GFX8-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64)
-    ; GFX8-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UV26]]
-    ; GFX8-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[ADD11]], [[UV27]], [[UADDO35]]
-    ; GFX8-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO34]](s32), [[UADDE4]](s32)
+    ; GFX8-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[UADDE36]], [[UV26]]
+    ; GFX8-NEXT: [[UADDE40:%[0-9]+]]:_(s32), [[UADDE41:%[0-9]+]]:_(s1) = G_UADDE [[ADD2]], [[UV27]], [[UADDO17]]
+    ; GFX8-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO16]](s32), [[UADDE40]](s32)
     ; GFX8-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV25]]
     ; GFX8-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1)
     ; GFX8-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV24]]
@@ -3123,9 +2913,9 @@ body: |
     ; GFX8-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV25]]
     ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]]
     ; GFX8-NEXT: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64)
-    ; GFX8-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[UV28]]
-    ; GFX8-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[UV29]], [[UADDO37]]
-    ; GFX8-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[UADDE6]](s32)
+    ; GFX8-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UV28]]
+    ; GFX8-NEXT: [[UADDE42:%[0-9]+]]:_(s32), [[UADDE43:%[0-9]+]]:_(s1) = G_UADDE [[UADDE40]], [[UV29]], [[UADDO19]]
+    ; GFX8-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO18]](s32), [[UADDE42]](s32)
     ; GFX8-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C6]]
     ; GFX8-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV2]], [[MV1]]
     ; GFX8-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C6]]
@@ -3168,89 +2958,74 @@ body: |
     ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI1]], [[ANYEXT]]
     ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[FPTOUI]], [[AMDGPU_MAD_U64_U32_2]]
     ; GFX9-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64)
+    ; GFX9-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
     ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV6]]
     ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[UV8]]
     ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV6]]
     ; GFX9-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]]
-    ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1)
-    ; GFX9-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH]]
-    ; GFX9-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1)
-    ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]]
+    ; GFX9-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UADDO]], [[UMULH]], [[UADDO1]]
+    ; GFX9-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE1]]
     ; GFX9-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV8]]
     ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV6]]
     ; GFX9-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV8]]
-    ; GFX9-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]]
-    ; GFX9-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1)
-    ; GFX9-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH2]]
-    ; GFX9-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1)
-    ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]]
-    ; GFX9-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD]]
-    ; GFX9-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1)
-    ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]]
+    ; GFX9-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]]
+    ; GFX9-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDO2]], [[UMULH2]], [[UADDO3]]
+    ; GFX9-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE5]]
+    ; GFX9-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[UADDE2]], [[UADDO3]]
+    ; GFX9-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE6]], [[C6]], [[UADDE9]]
     ; GFX9-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV8]]
-    ; GFX9-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]]
-    ; GFX9-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO8]]
-    ; GFX9-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD3]], [[UADDO11]]
-    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO10]], [[C5]]
+    ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[UADDE10]]
+    ; GFX9-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDE8]]
+    ; GFX9-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD]], [[UADDO5]]
+    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO4]], [[C5]]
     ; GFX9-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](s64)
     ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[UV11]](s32)
-    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE]], [[ANYEXT1]]
-    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO10]], [[AMDGPU_MAD_U64_U32_8]]
+    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE12]], [[ANYEXT1]]
+    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO4]], [[AMDGPU_MAD_U64_U32_8]]
     ; GFX9-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](s64)
-    ; GFX9-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[UV10]]
-    ; GFX9-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[UV12]]
-    ; GFX9-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[UV10]]
-    ; GFX9-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]]
-    ; GFX9-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1)
-    ; GFX9-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH4]]
-    ; GFX9-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1)
-    ; GFX9-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]]
-    ; GFX9-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[UV12]]
-    ; GFX9-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[UV10]]
-    ; GFX9-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[UV12]]
-    ; GFX9-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]]
-    ; GFX9-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1)
-    ; GFX9-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]]
-    ; GFX9-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1)
-    ; GFX9-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]]
-    ; GFX9-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDO18]], [[ADD4]]
-    ; GFX9-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1)
-    ; GFX9-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ADD5]], [[ZEXT9]]
-    ; GFX9-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[UV12]]
-    ; GFX9-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD6]]
-    ; GFX9-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-    ; GFX9-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UADDO20]]
-    ; GFX9-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UADDE]], [[ADD7]], [[UADDO23]]
+    ; GFX9-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[UV10]]
+    ; GFX9-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO4]], [[UV12]]
+    ; GFX9-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO4]], [[UV10]]
+    ; GFX9-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]]
+    ; GFX9-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDO6]], [[UMULH4]], [[UADDO7]]
+    ; GFX9-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE15]]
+    ; GFX9-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[UV12]]
+    ; GFX9-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[UV10]]
+    ; GFX9-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO4]], [[UV12]]
+    ; GFX9-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]]
+    ; GFX9-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDO8]], [[UMULH6]], [[UADDO9]]
+    ; GFX9-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE19]]
+    ; GFX9-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDE18]], [[UADDE16]], [[UADDO9]]
+    ; GFX9-NEXT: [[UADDE24:%[0-9]+]]:_(s32), [[UADDE25:%[0-9]+]]:_(s1) = G_UADDE [[UADDE20]], [[C6]], [[UADDE23]]
+    ; GFX9-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[UV12]]
+    ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[UADDE24]]
+    ; GFX9-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UADDE22]]
+    ; GFX9-NEXT: [[UADDE26:%[0-9]+]]:_(s32), [[UADDE27:%[0-9]+]]:_(s1) = G_UADDE [[UADDE12]], [[ADD1]], [[UADDO11]]
     ; GFX9-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]](s64)
     ; GFX9-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]](s64)
-    ; GFX9-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDO22]]
-    ; GFX9-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UADDE2]]
-    ; GFX9-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDO22]]
-    ; GFX9-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]]
-    ; GFX9-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1)
-    ; GFX9-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH8]]
-    ; GFX9-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO27]](s1)
-    ; GFX9-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]]
-    ; GFX9-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDE2]]
-    ; GFX9-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDO22]]
-    ; GFX9-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDE2]]
-    ; GFX9-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]]
-    ; GFX9-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1)
-    ; GFX9-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]]
-    ; GFX9-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1)
-    ; GFX9-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]]
-    ; GFX9-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[UADDO30]], [[ADD8]]
-    ; GFX9-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1)
-    ; GFX9-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT14]]
-    ; GFX9-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDE2]]
-    ; GFX9-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[ADD10]]
-    ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO32]](s32), [[ADD11]](s32)
+    ; GFX9-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDO10]]
+    ; GFX9-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UADDE26]]
+    ; GFX9-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDO10]]
+    ; GFX9-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]]
+    ; GFX9-NEXT: [[UADDE28:%[0-9]+]]:_(s32), [[UADDE29:%[0-9]+]]:_(s1) = G_UADDE [[UADDO12]], [[UMULH8]], [[UADDO13]]
+    ; GFX9-NEXT: [[UADDE30:%[0-9]+]]:_(s32), [[UADDE31:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE29]]
+    ; GFX9-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDE26]]
+    ; GFX9-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDO10]]
+    ; GFX9-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDE26]]
+    ; GFX9-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]]
+    ; GFX9-NEXT: [[UADDE32:%[0-9]+]]:_(s32), [[UADDE33:%[0-9]+]]:_(s1) = G_UADDE [[UADDO14]], [[UMULH10]], [[UADDO15]]
+    ; GFX9-NEXT: [[UADDE34:%[0-9]+]]:_(s32), [[UADDE35:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE33]]
+    ; GFX9-NEXT: [[UADDE36:%[0-9]+]]:_(s32), [[UADDE37:%[0-9]+]]:_(s1) = G_UADDE [[UADDE32]], [[UADDE30]], [[UADDO15]]
+    ; GFX9-NEXT: [[UADDE38:%[0-9]+]]:_(s32), [[UADDE39:%[0-9]+]]:_(s1) = G_UADDE [[UADDE34]], [[C6]], [[UADDE37]]
+    ; GFX9-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDE26]]
+    ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[UADDE38]]
+    ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDE36]](s32), [[ADD2]](s32)
     ; GFX9-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](s64)
-    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV18]](s32), [[UADDO32]], [[C5]]
+    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV18]](s32), [[UADDE36]], [[C5]]
     ; GFX9-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_12]](s64)
     ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[UV21]](s32)
-    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV18]](s32), [[ADD11]], [[ANYEXT2]]
-    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV19]](s32), [[UADDO32]], [[AMDGPU_MAD_U64_U32_14]]
+    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV18]](s32), [[ADD2]], [[ANYEXT2]]
+    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV19]](s32), [[UADDE36]], [[AMDGPU_MAD_U64_U32_14]]
     ; GFX9-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_16]](s64)
     ; GFX9-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV14]], [[UV20]]
     ; GFX9-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV15]], [[UV22]], [[USUBO3]]
@@ -3267,9 +3042,9 @@ body: |
     ; GFX9-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]]
     ; GFX9-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
     ; GFX9-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64)
-    ; GFX9-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UV26]]
-    ; GFX9-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[ADD11]], [[UV27]], [[UADDO35]]
-    ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO34]](s32), [[UADDE4]](s32)
+    ; GFX9-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[UADDE36]], [[UV26]]
+    ; GFX9-NEXT: [[UADDE40:%[0-9]+]]:_(s32), [[UADDE41:%[0-9]+]]:_(s1) = G_UADDE [[ADD2]], [[UV27]], [[UADDO17]]
+    ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO16]](s32), [[UADDE40]](s32)
     ; GFX9-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV25]]
     ; GFX9-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1)
     ; GFX9-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV24]]
@@ -3277,9 +3052,9 @@ body: |
     ; GFX9-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV25]]
     ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]]
     ; GFX9-NEXT: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64)
-    ; GFX9-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[UV28]]
-    ; GFX9-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[UV29]], [[UADDO37]]
-    ; GFX9-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[UADDE6]](s32)
+    ; GFX9-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UV28]]
+    ; GFX9-NEXT: [[UADDE42:%[0-9]+]]:_(s32), [[UADDE43:%[0-9]+]]:_(s1) = G_UADDE [[UADDE40]], [[UV29]], [[UADDO19]]
+    ; GFX9-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO18]](s32), [[UADDE42]](s32)
     ; GFX9-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C6]]
     ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV2]], [[MV1]]
     ; GFX9-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C6]]
@@ -3322,93 +3097,78 @@ body: |
     ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV7]], [[MUL]]
     ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]]
     ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[MUL1]]
+    ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
     ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV6]]
     ; GFX10-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]]
     ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV6]]
     ; GFX10-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[MUL3]]
-    ; GFX10-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1)
-    ; GFX10-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH]]
-    ; GFX10-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1)
-    ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]]
+    ; GFX10-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UADDO]], [[UMULH]], [[UADDO1]]
+    ; GFX10-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE1]]
     ; GFX10-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]]
     ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV6]]
     ; GFX10-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]]
-    ; GFX10-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL4]], [[UMULH1]]
-    ; GFX10-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1)
-    ; GFX10-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH2]]
-    ; GFX10-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1)
-    ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]]
-    ; GFX10-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD2]]
-    ; GFX10-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1)
-    ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]]
+    ; GFX10-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[MUL4]], [[UMULH1]]
+    ; GFX10-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDO2]], [[UMULH2]], [[UADDO3]]
+    ; GFX10-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE5]]
+    ; GFX10-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[UADDE2]], [[UADDO3]]
+    ; GFX10-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE6]], [[C6]], [[UADDE9]]
     ; GFX10-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]]
-    ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD4]]
-    ; GFX10-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO8]]
-    ; GFX10-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO11]]
-    ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO10]], [[C5]]
+    ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[UADDE10]]
+    ; GFX10-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDE8]]
+    ; GFX10-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD2]], [[UADDO5]]
+    ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO4]], [[C5]]
     ; GFX10-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_2]](s64)
-    ; GFX10-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE]]
-    ; GFX10-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UV9]], [[MUL5]]
-    ; GFX10-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO10]]
-    ; GFX10-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[MUL6]]
-    ; GFX10-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[UV8]]
-    ; GFX10-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[ADD7]]
-    ; GFX10-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[UV8]]
-    ; GFX10-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL7]], [[MUL8]]
-    ; GFX10-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1)
-    ; GFX10-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH4]]
-    ; GFX10-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1)
-    ; GFX10-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]]
-    ; GFX10-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[ADD7]]
-    ; GFX10-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[UV8]]
-    ; GFX10-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[ADD7]]
-    ; GFX10-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[UMULH5]]
-    ; GFX10-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1)
-    ; GFX10-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]]
-    ; GFX10-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1)
-    ; GFX10-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]]
-    ; GFX10-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDO18]], [[ADD8]]
-    ; GFX10-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1)
-    ; GFX10-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT9]]
-    ; GFX10-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[ADD7]]
-    ; GFX10-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD10]]
-    ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-    ; GFX10-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UADDO20]]
-    ; GFX10-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UADDE]], [[ADD11]], [[UADDO23]]
+    ; GFX10-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE12]]
+    ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UV9]], [[MUL5]]
+    ; GFX10-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO4]]
+    ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[MUL6]]
+    ; GFX10-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[UV8]]
+    ; GFX10-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UADDO4]], [[ADD4]]
+    ; GFX10-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO4]], [[UV8]]
+    ; GFX10-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[MUL7]], [[MUL8]]
+    ; GFX10-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDO6]], [[UMULH4]], [[UADDO7]]
+    ; GFX10-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE15]]
+    ; GFX10-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[ADD4]]
+    ; GFX10-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[UV8]]
+    ; GFX10-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO4]], [[ADD4]]
+    ; GFX10-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[UMULH5]]
+    ; GFX10-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDO8]], [[UMULH6]], [[UADDO9]]
+    ; GFX10-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE19]]
+    ; GFX10-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDE18]], [[UADDE16]], [[UADDO9]]
+    ; GFX10-NEXT: [[UADDE24:%[0-9]+]]:_(s32), [[UADDE25:%[0-9]+]]:_(s1) = G_UADDE [[UADDE20]], [[C6]], [[UADDE23]]
+    ; GFX10-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[ADD4]]
+    ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[UADDE24]]
+    ; GFX10-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UADDE22]]
+    ; GFX10-NEXT: [[UADDE26:%[0-9]+]]:_(s32), [[UADDE27:%[0-9]+]]:_(s1) = G_UADDE [[UADDE12]], [[ADD5]], [[UADDO11]]
     ; GFX10-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]](s64)
     ; GFX10-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]](s64)
-    ; GFX10-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UV13]], [[UADDO22]]
-    ; GFX10-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UV12]], [[UADDE2]]
-    ; GFX10-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV12]], [[UADDO22]]
-    ; GFX10-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL10]], [[MUL11]]
-    ; GFX10-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1)
-    ; GFX10-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH8]]
-    ; GFX10-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO27]](s1)
-    ; GFX10-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]]
-    ; GFX10-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV13]], [[UADDE2]]
-    ; GFX10-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV13]], [[UADDO22]]
-    ; GFX10-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV12]], [[UADDE2]]
-    ; GFX10-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[UMULH9]]
-    ; GFX10-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1)
-    ; GFX10-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]]
-    ; GFX10-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1)
-    ; GFX10-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]]
-    ; GFX10-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[UADDO30]], [[ADD12]]
-    ; GFX10-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1)
-    ; GFX10-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT14]]
-    ; GFX10-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV13]], [[UADDE2]]
-    ; GFX10-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[ADD14]]
-    ; GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO32]](s32), [[ADD15]](s32)
+    ; GFX10-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UV13]], [[UADDO10]]
+    ; GFX10-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UV12]], [[UADDE26]]
+    ; GFX10-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV12]], [[UADDO10]]
+    ; GFX10-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL10]], [[MUL11]]
+    ; GFX10-NEXT: [[UADDE28:%[0-9]+]]:_(s32), [[UADDE29:%[0-9]+]]:_(s1) = G_UADDE [[UADDO12]], [[UMULH8]], [[UADDO13]]
+    ; GFX10-NEXT: [[UADDE30:%[0-9]+]]:_(s32), [[UADDE31:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE29]]
+    ; GFX10-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV13]], [[UADDE26]]
+    ; GFX10-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV13]], [[UADDO10]]
+    ; GFX10-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV12]], [[UADDE26]]
+    ; GFX10-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[UMULH9]]
+    ; GFX10-NEXT: [[UADDE32:%[0-9]+]]:_(s32), [[UADDE33:%[0-9]+]]:_(s1) = G_UADDE [[UADDO14]], [[UMULH10]], [[UADDO15]]
+    ; GFX10-NEXT: [[UADDE34:%[0-9]+]]:_(s32), [[UADDE35:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE33]]
+    ; GFX10-NEXT: [[UADDE36:%[0-9]+]]:_(s32), [[UADDE37:%[0-9]+]]:_(s1) = G_UADDE [[UADDE32]], [[UADDE30]], [[UADDO15]]
+    ; GFX10-NEXT: [[UADDE38:%[0-9]+]]:_(s32), [[UADDE39:%[0-9]+]]:_(s1) = G_UADDE [[UADDE34]], [[C6]], [[UADDE37]]
+    ; GFX10-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV13]], [[UADDE26]]
+    ; GFX10-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[UADDE38]]
+    ; GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDE36]](s32), [[ADD6]](s32)
     ; GFX10-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](s64)
-    ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV14]](s32), [[UADDO32]], [[C5]]
+    ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV14]](s32), [[UADDE36]], [[C5]]
     ; GFX10-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64)
-    ; GFX10-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV14]], [[ADD15]]
-    ; GFX10-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[UV17]], [[MUL13]]
-    ; GFX10-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV15]], [[UADDO32]]
-    ; GFX10-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[MUL14]]
+    ; GFX10-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV14]], [[ADD6]]
+    ; GFX10-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UV17]], [[MUL13]]
+    ; GFX10-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV15]], [[UADDE36]]
+    ; GFX10-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ADD7]], [[MUL14]]
     ; GFX10-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV10]], [[UV16]]
-    ; GFX10-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV11]], [[ADD17]], [[USUBO3]]
-    ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV11]], [[ADD17]]
+    ; GFX10-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV11]], [[ADD8]], [[USUBO3]]
+    ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV11]], [[ADD8]]
     ; GFX10-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](s64)
     ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV19]]
     ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1)
@@ -3421,9 +3181,9 @@ body: |
     ; GFX10-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]]
     ; GFX10-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
     ; GFX10-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64)
-    ; GFX10-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UV20]]
-    ; GFX10-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[ADD15]], [[UV21]], [[UADDO35]]
-    ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO34]](s32), [[UADDE4]](s32)
+    ; GFX10-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[UADDE36]], [[UV20]]
+    ; GFX10-NEXT: [[UADDE40:%[0-9]+]]:_(s32), [[UADDE41:%[0-9]+]]:_(s1) = G_UADDE [[ADD6]], [[UV21]], [[UADDO17]]
+    ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO16]](s32), [[UADDE40]](s32)
     ; GFX10-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV19]]
     ; GFX10-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1)
     ; GFX10-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV18]]
@@ -3431,9 +3191,9 @@ body: |
     ; GFX10-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV19]]
     ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]]
     ; GFX10-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64)
-    ; GFX10-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[UV22]]
-    ; GFX10-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[UV23]], [[UADDO37]]
-    ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[UADDE6]](s32)
+    ; GFX10-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UV22]]
+    ; GFX10-NEXT: [[UADDE42:%[0-9]+]]:_(s32), [[UADDE43:%[0-9]+]]:_(s1) = G_UADDE [[UADDE40]], [[UV23]], [[UADDO19]]
+    ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO18]](s32), [[UADDE42]](s32)
     ; GFX10-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C6]]
     ; GFX10-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV2]], [[MV1]]
     ; GFX10-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C6]]
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umulh.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umulh.mir
index 3a919f004964b..54a2327b5e20f 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umulh.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umulh.mir
@@ -77,28 +77,24 @@ body: |
     ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
     ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64)
     ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64)
+    ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
     ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[UV1]], [[UV2]]
     ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV3]]
     ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[UV2]]
     ; GFX8-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]]
-    ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1)
-    ; GFX8-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH]]
-    ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1)
-    ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]]
+    ; GFX8-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UADDO]], [[UMULH]], [[UADDO1]]
+    ; GFX8-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[C]], [[C]], [[UADDE1]]
     ; GFX8-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[UV1]], [[UV3]]
     ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[UV1]], [[UV2]]
     ; GFX8-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[UV3]]
-    ; GFX8-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]]
-    ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1)
-    ; GFX8-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH2]]
-    ; GFX8-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1)
-    ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]]
-    ; GFX8-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD]]
-    ; GFX8-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1)
-    ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]]
+    ; GFX8-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]]
+    ; GFX8-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDO2]], [[UMULH2]], [[UADDO3]]
+    ; GFX8-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[C]], [[C]], [[UADDE5]]
+    ; GFX8-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[UADDE2]], [[UADDO3]]
+    ; GFX8-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE6]], [[C]], [[UADDE9]]
     ; GFX8-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[UV1]], [[UV3]]
-    ; GFX8-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]]
-    ; GFX8-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO8]](s32), [[ADD3]](s32)
+    ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[UADDE10]]
+    ; GFX8-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDE8]](s32), [[ADD]](s32)
     ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64)
     ;
     ; GFX9-LABEL: name: test_umulh_s64
@@ -108,28 +104,24 @@ body: |
     ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
     ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64)
     ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
     ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[UV1]], [[UV2]]
     ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV3]]
     ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[UV2]]
     ; GFX9-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]]
-    ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1)
-    ; GFX9-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH]]
-    ; GFX9-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1)
-    ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]]
+    ; GFX9-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UADDO]], [[UMULH]], [[UADDO1]]
+    ; GFX9-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[C]], [[C]], [[UADDE1]]
     ; GFX9-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[UV1]], [[UV3]]
     ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[UV1]], [[UV2]]
     ; GFX9-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[UV3]]
-    ; GFX9-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]]
-    ; GFX9-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1)
-    ; GFX9-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH2]]
-    ; GFX9-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1)
-    ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]]
-    ; GFX9-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD]]
-    ; GFX9-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1)
-    ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]]
+    ; GFX9-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]]
+    ; GFX9-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDO2]], [[UMULH2]], [[UADDO3]]
+    ; GFX9-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[C]], [[C]], [[UADDE5]]
+    ; GFX9-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[UADDE2]], [[UADDO3]]
+    ; GFX9-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE6]], [[C]], [[UADDE9]]
     ; GFX9-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[UV1]], [[UV3]]
-    ; GFX9-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]]
-    ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO8]](s32), [[ADD3]](s32)
+    ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[UADDE10]]
+    ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDE8]](s32), [[ADD]](s32)
     ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64)
     %0:_(s64) = COPY $vgpr0_vgpr1
     %1:_(s64) = COPY $vgpr2_vgpr3
@@ -152,52 +144,43 @@ body: |
     ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>)
     ; GFX8-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64)
     ; GFX8-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64)
+    ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
     ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[UV5]], [[UV6]]
     ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UV4]], [[UV7]]
     ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[UV4]], [[UV6]]
     ; GFX8-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]]
-    ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1)
-    ; GFX8-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH]]
-    ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1)
-    ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]]
+    ; GFX8-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UADDO]], [[UMULH]], [[UADDO1]]
+    ; GFX8-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[C]], [[C]], [[UADDE1]]
     ; GFX8-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[UV5]], [[UV7]]
     ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[UV5]], [[UV6]]
     ; GFX8-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[UV4]], [[UV7]]
-    ; GFX8-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]]
-    ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1)
-    ; GFX8-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH2]]
-    ; GFX8-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1)
-    ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]]
-    ; GFX8-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD]]
-    ; GFX8-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1)
-    ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]]
+    ; GFX8-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]]
+    ; GFX8-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDO2]], [[UMULH2]], [[UADDO3]]
+    ; GFX8-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[C]], [[C]], [[UADDE5]]
+    ; GFX8-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[UADDE2]], [[UADDO3]]
+    ; GFX8-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE6]], [[C]], [[UADDE9]]
     ; GFX8-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[UV5]], [[UV7]]
-    ; GFX8-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]]
-    ; GFX8-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO8]](s32), [[ADD3]](s32)
+    ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[UADDE10]]
+    ; GFX8-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDE8]](s32), [[ADD]](s32)
     ; GFX8-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64)
     ; GFX8-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64)
     ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UV10]]
     ; GFX8-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UV8]], [[UV11]]
     ; GFX8-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UV10]]
-    ; GFX8-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]]
-    ; GFX8-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1)
-    ; GFX8-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UMULH4]]
-    ; GFX8-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1)
-    ; GFX8-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]]
+    ; GFX8-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]]
+    ; GFX8-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[UADDO4]], [[UMULH4]], [[UADDO5]]
+    ; GFX8-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[C]], [[C]], [[UADDE13]]
     ; GFX8-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UV11]]
     ; GFX8-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UV10]]
     ; GFX8-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UV11]]
-    ; GFX8-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]]
-    ; GFX8-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1)
-    ; GFX8-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UMULH6]]
-    ; GFX8-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1)
-    ; GFX8-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]]
-    ; GFX8-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[ADD4]]
-    ; GFX8-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1)
-    ; GFX8-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ADD5]], [[ZEXT9]]
+    ; GFX8-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]]
+    ; GFX8-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[UADDO6]], [[UMULH6]], [[UADDO7]]
+    ; GFX8-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[C]], [[C]], [[UADDE17]]
+    ; GFX8-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[UADDE16]], [[UADDE14]], [[UADDO7]]
+    ; GFX8-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDE18]], [[C]], [[UADDE21]]
     ; GFX8-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UV11]]
-    ; GFX8-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD6]]
-    ; GFX8-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO18]](s32), [[ADD7]](s32)
+    ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[UADDE22]]
+    ; GFX8-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDE20]](s32), [[ADD1]](s32)
     ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64)
     ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
     ;
@@ -210,52 +193,43 @@ body: |
     ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>)
     ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64)
     ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
     ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[UV5]], [[UV6]]
     ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UV4]], [[UV7]]
     ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[UV4]], [[UV6]]
     ; GFX9-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]]
-    ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1)
-    ; GFX9-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH]]
-    ; GFX9-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1)
-    ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]]
+    ; GFX9-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UADDO]], [[UMULH]], [[UADDO1]]
+    ; GFX9-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[C]], [[C]], [[UADDE1]]
     ; GFX9-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[UV5]], [[UV7]]
     ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[UV5]], [[UV6]]
     ; GFX9-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[UV4]], [[UV7]]
-    ; GFX9-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]]
-    ; GFX9-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1)
-    ; GFX9-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH2]]
-    ; GFX9-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1)
-    ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]]
-    ; GFX9-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD]]
-    ; GFX9-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1)
-    ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]]
+    ; GFX9-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]]
+    ; GFX9-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDO2]], [[UMULH2]], [[UADDO3]]
+    ; GFX9-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[C]], [[C]], [[UADDE5]]
+    ; GFX9-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[UADDE2]], [[UADDO3]]
+    ; GFX9-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE6]], [[C]], [[UADDE9]]
     ; GFX9-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[UV5]], [[UV7]]
-    ; GFX9-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]]
-    ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO8]](s32), [[ADD3]](s32)
+    ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[UADDE10]]
+    ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDE8]](s32), [[ADD]](s32)
     ; GFX9-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64)
     ; GFX9-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64)
     ; GFX9-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UV10]]
     ; GFX9-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UV8]], [[UV11]]
     ; GFX9-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UV10]]
-    ; GFX9-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]]
-    ; GFX9-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1)
-    ; GFX9-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UMULH4]]
-    ; GFX9-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1)
-    ; GFX9-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]]
+    ; GFX9-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]]
+    ; GFX9-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[UADDO4]], [[UMULH4]], [[UADDO5]]
+    ; GFX9-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[C]], [[C]], [[UADDE13]]
     ; GFX9-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UV11]]
     ; GFX9-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UV10]]
     ; GFX9-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UV11]]
-    ; GFX9-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]]
-    ; GFX9-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1)
-    ; GFX9-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UMULH6]]
-    ; GFX9-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1)
-    ; GFX9-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]]
-    ; GFX9-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[ADD4]]
-    ; GFX9-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1)
-    ; GFX9-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ADD5]], [[ZEXT9]]
+    ; GFX9-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]]
+    ; GFX9-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[UADDO6]], [[UMULH6]], [[UADDO7]]
+    ; GFX9-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[C]], [[C]], [[UADDE17]]
+    ; GFX9-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[UADDE16]], [[UADDE14]], [[UADDO7]]
+    ; GFX9-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDE18]], [[C]], [[UADDE21]]
     ; GFX9-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UV11]]
-    ; GFX9-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD6]]
-    ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO18]](s32), [[ADD7]](s32)
+    ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[UADDE22]]
+    ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDE20]](s32), [[ADD1]](s32)
     ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64)
     ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
     %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umulo.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umulo.mir
index 13c52d08b4941..2917b06936944 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umulo.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umulo.mir
@@ -115,42 +115,38 @@ body: |
     ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
     ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64)
     ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64)
+    ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
     ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[UV1]], [[UV2]]
     ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV3]]
     ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[UV2]]
     ; GFX8-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]]
-    ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1)
-    ; GFX8-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH]]
-    ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1)
-    ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]]
+    ; GFX8-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UADDO]], [[UMULH]], [[UADDO1]]
+    ; GFX8-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[C]], [[C]], [[UADDE1]]
     ; GFX8-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[UV1]], [[UV3]]
     ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[UV1]], [[UV2]]
     ; GFX8-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[UV3]]
-    ; GFX8-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]]
-    ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1)
-    ; GFX8-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH2]]
-    ; GFX8-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1)
-    ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]]
-    ; GFX8-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD]]
-    ; GFX8-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1)
-    ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]]
+    ; GFX8-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]]
+    ; GFX8-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDO2]], [[UMULH2]], [[UADDO3]]
+    ; GFX8-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[C]], [[C]], [[UADDE5]]
+    ; GFX8-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[UADDE2]], [[UADDO3]]
+    ; GFX8-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE6]], [[C]], [[UADDE9]]
     ; GFX8-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[UV1]], [[UV3]]
-    ; GFX8-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]]
-    ; GFX8-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO8]](s32), [[ADD3]](s32)
-    ; GFX8-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[UADDE10]]
+    ; GFX8-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDE8]](s32), [[ADD]](s32)
+    ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
     ; GFX8-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64)
     ; GFX8-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64)
-    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV4]](s32), [[UV6]], [[C]]
+    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV4]](s32), [[UV6]], [[C1]]
     ; GFX8-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_]](s64)
     ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[UV9]](s32)
     ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV4]](s32), [[UV7]], [[ANYEXT]]
     ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV5]](s32), [[UV6]], [[AMDGPU_MAD_U64_U32_2]]
     ; GFX8-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64)
     ; GFX8-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV8]](s32), [[UV10]](s32)
-    ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MV]](s64), [[C]]
-    ; GFX8-NEXT: [[ZEXT5:%[0-9]+]]:_(s64) = G_ZEXT [[ICMP]](s1)
+    ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MV]](s64), [[C1]]
+    ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[ICMP]](s1)
     ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[MV1]](s64)
-    ; GFX8-NEXT: $vgpr2_vgpr3 = COPY [[ZEXT5]](s64)
+    ; GFX8-NEXT: $vgpr2_vgpr3 = COPY [[ZEXT]](s64)
     ;
     ; GFX9-LABEL: name: test_umulo_s64
     ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
@@ -159,42 +155,38 @@ body: |
     ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
     ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64)
     ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
     ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[UV1]], [[UV2]]
     ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV3]]
     ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[UV2]]
     ; GFX9-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]]
-    ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1)
-    ; GFX9-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH]]
-    ; GFX9-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1)
-    ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]]
+    ; GFX9-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UADDO]], [[UMULH]], [[UADDO1]]
+    ; GFX9-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[C]], [[C]], [[UADDE1]]
     ; GFX9-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[UV1]], [[UV3]]
     ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[UV1]], [[UV2]]
     ; GFX9-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[UV3]]
-    ; GFX9-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]]
-    ; GFX9-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1)
-    ; GFX9-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH2]]
-    ; GFX9-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1)
-    ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]]
-    ; GFX9-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD]]
-    ; GFX9-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1)
-    ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]]
+    ; GFX9-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]]
+    ; GFX9-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDO2]], [[UMULH2]], [[UADDO3]]
+    ; GFX9-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[C]], [[C]], [[UADDE5]]
+    ; GFX9-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[UADDE2]], [[UADDO3]]
+    ; GFX9-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE6]], [[C]], [[UADDE9]]
     ; GFX9-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[UV1]], [[UV3]]
-    ; GFX9-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]]
-    ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO8]](s32), [[ADD3]](s32)
-    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[UADDE10]]
+    ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDE8]](s32), [[ADD]](s32)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
     ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64)
     ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64)
-    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV4]](s32), [[UV6]], [[C]]
+    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV4]](s32), [[UV6]], [[C1]]
     ; GFX9-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_]](s64)
     ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[UV9]](s32)
     ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV4]](s32), [[UV7]], [[ANYEXT]]
     ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV5]](s32), [[UV6]], [[AMDGPU_MAD_U64_U32_2]]
     ; GFX9-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64)
     ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV8]](s32), [[UV10]](s32)
-    ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MV]](s64), [[C]]
-    ; GFX9-NEXT: [[ZEXT5:%[0-9]+]]:_(s64) = G_ZEXT [[ICMP]](s1)
+    ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MV]](s64), [[C1]]
+    ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[ICMP]](s1)
     ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[MV1]](s64)
-    ; GFX9-NEXT: $vgpr2_vgpr3 = COPY [[ZEXT5]](s64)
+    ; GFX9-NEXT: $vgpr2_vgpr3 = COPY [[ZEXT]](s64)
     %0:_(s64) = COPY $vgpr0_vgpr1
     %1:_(s64) = COPY $vgpr2_vgpr3
     %2:_(s64), %3:_(s1) = G_UMULO %0, %1
@@ -218,79 +210,70 @@ body: |
     ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>)
     ; GFX8-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64)
     ; GFX8-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64)
+    ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
     ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[UV5]], [[UV6]]
     ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UV4]], [[UV7]]
     ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[UV4]], [[UV6]]
     ; GFX8-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]]
-    ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1)
-    ; GFX8-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH]]
-    ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1)
-    ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]]
+    ; GFX8-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UADDO]], [[UMULH]], [[UADDO1]]
+    ; GFX8-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[C]], [[C]], [[UADDE1]]
     ; GFX8-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[UV5]], [[UV7]]
     ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[UV5]], [[UV6]]
     ; GFX8-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[UV4]], [[UV7]]
-    ; GFX8-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]]
-    ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1)
-    ; GFX8-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH2]]
-    ; GFX8-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1)
-    ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]]
-    ; GFX8-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD]]
-    ; GFX8-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1)
-    ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]]
+    ; GFX8-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]]
+    ; GFX8-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDO2]], [[UMULH2]], [[UADDO3]]
+    ; GFX8-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[C]], [[C]], [[UADDE5]]
+    ; GFX8-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[UADDE2]], [[UADDO3]]
+    ; GFX8-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE6]], [[C]], [[UADDE9]]
     ; GFX8-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[UV5]], [[UV7]]
-    ; GFX8-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]]
-    ; GFX8-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO8]](s32), [[ADD3]](s32)
-    ; GFX8-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[UADDE10]]
+    ; GFX8-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDE8]](s32), [[ADD]](s32)
+    ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
     ; GFX8-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64)
     ; GFX8-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64)
-    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV8]](s32), [[UV10]], [[C]]
+    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV8]](s32), [[UV10]], [[C1]]
     ; GFX8-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_]](s64)
     ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[UV13]](s32)
     ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV8]](s32), [[UV11]], [[ANYEXT]]
     ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV9]](s32), [[UV10]], [[AMDGPU_MAD_U64_U32_2]]
     ; GFX8-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64)
     ; GFX8-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV12]](s32), [[UV14]](s32)
-    ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MV]](s64), [[C]]
+    ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MV]](s64), [[C1]]
     ; GFX8-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64)
     ; GFX8-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64)
     ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UV18]]
     ; GFX8-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UV19]]
     ; GFX8-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UV18]]
-    ; GFX8-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]]
-    ; GFX8-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1)
-    ; GFX8-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UMULH4]]
-    ; GFX8-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1)
-    ; GFX8-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]]
+    ; GFX8-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]]
+    ; GFX8-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[UADDO4]], [[UMULH4]], [[UADDO5]]
+    ; GFX8-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[C]], [[C]], [[UADDE13]]
     ; GFX8-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UV19]]
     ; GFX8-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UV18]]
     ; GFX8-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UV19]]
-    ; GFX8-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]]
-    ; GFX8-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1)
-    ; GFX8-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UMULH6]]
-    ; GFX8-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1)
-    ; GFX8-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]]
-    ; GFX8-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[ADD4]]
-    ; GFX8-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1)
-    ; GFX8-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ADD5]], [[ZEXT9]]
+    ; GFX8-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]]
+    ; GFX8-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[UADDO6]], [[UMULH6]], [[UADDO7]]
+    ; GFX8-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[C]], [[C]], [[UADDE17]]
+    ; GFX8-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[UADDE16]], [[UADDE14]], [[UADDO7]]
+    ; GFX8-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDE18]], [[C]], [[UADDE21]]
     ; GFX8-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UV19]]
-    ; GFX8-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD6]]
-    ; GFX8-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO18]](s32), [[ADD7]](s32)
+    ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[UADDE22]]
+    ; GFX8-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDE20]](s32), [[ADD1]](s32)
     ; GFX8-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64)
     ; GFX8-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64)
-    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV20]](s32), [[UV22]], [[C]]
+    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV20]](s32), [[UV22]], [[C1]]
     ; GFX8-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](s64)
     ; GFX8-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[UV25]](s32)
     ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV20]](s32), [[UV23]], [[ANYEXT1]]
     ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV21]](s32), [[UV22]], [[AMDGPU_MAD_U64_U32_8]]
     ; GFX8-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](s64)
     ; GFX8-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV24]](s32), [[UV26]](s32)
-    ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MV2]](s64), [[C]]
+    ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MV2]](s64), [[C1]]
     ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV1]](s64), [[MV3]](s64)
     ; GFX8-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[ICMP]](s1)
-    ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[ANYEXT2]], [[C1]]
+    ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[ANYEXT2]], [[C2]]
     ; GFX8-NEXT: [[ANYEXT3:%[0-9]+]]:_(s64) = G_ANYEXT [[ICMP1]](s1)
-    ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[ANYEXT3]], [[C1]]
+    ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[ANYEXT3]], [[C2]]
     ; GFX8-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[AND]](s64), [[AND1]](s64)
     ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
     ; GFX8-NEXT: $vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR1]](<2 x s64>)
@@ -304,79 +287,70 @@ body: |
     ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>)
     ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64)
     ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64)
+    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
     ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[UV5]], [[UV6]]
     ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UV4]], [[UV7]]
     ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[UV4]], [[UV6]]
     ; GFX9-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]]
-    ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1)
-    ; GFX9-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH]]
-    ; GFX9-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1)
-    ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]]
+    ; GFX9-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UADDO]], [[UMULH]], [[UADDO1]]
+    ; GFX9-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[C]], [[C]], [[UADDE1]]
     ; GFX9-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[UV5]], [[UV7]]
     ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[UV5]], [[UV6]]
     ; GFX9-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[UV4]], [[UV7]]
-    ; GFX9-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]]
-    ; GFX9-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1)
-    ; GFX9-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH2]]
-    ; GFX9-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1)
-    ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]]
-    ; GFX9-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD]]
-    ; GFX9-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1)
-    ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]]
+    ; GFX9-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]]
+    ; GFX9-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDO2]], [[UMULH2]], [[UADDO3]]
+    ; GFX9-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[C]], [[C]], [[UADDE5]]
+    ; GFX9-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[UADDE2]], [[UADDO3]]
+    ; GFX9-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE6]], [[C]], [[UADDE9]]
     ; GFX9-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[UV5]], [[UV7]]
-    ; GFX9-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]]
-    ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO8]](s32), [[ADD3]](s32)
-    ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[UADDE10]]
+    ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDE8]](s32), [[ADD]](s32)
+    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
     ; GFX9-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64)
     ; GFX9-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64)
-    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV8]](s32), [[UV10]], [[C]]
+    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV8]](s32), [[UV10]], [[C1]]
     ; GFX9-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_]](s64)
     ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[UV13]](s32)
     ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV8]](s32), [[UV11]], [[ANYEXT]]
     ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV9]](s32), [[UV10]], [[AMDGPU_MAD_U64_U32_2]]
     ; GFX9-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64)
     ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV12]](s32), [[UV14]](s32)
-    ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MV]](s64), [[C]]
+    ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MV]](s64), [[C1]]
     ; GFX9-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64)
     ; GFX9-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64)
     ; GFX9-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UV18]]
     ; GFX9-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UV19]]
     ; GFX9-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UV18]]
-    ; GFX9-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]]
-    ; GFX9-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1)
-    ; GFX9-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UMULH4]]
-    ; GFX9-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1)
-    ; GFX9-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]]
+    ; GFX9-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]]
+    ; GFX9-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[UADDO4]], [[UMULH4]], [[UADDO5]]
+    ; GFX9-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[C]], [[C]], [[UADDE13]]
     ; GFX9-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UV19]]
     ; GFX9-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UV18]]
     ; GFX9-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UV19]]
-    ; GFX9-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]]
-    ; GFX9-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1)
-    ; GFX9-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UMULH6]]
-    ; GFX9-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1)
-    ; GFX9-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]]
-    ; GFX9-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[ADD4]]
-    ; GFX9-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1)
-    ; GFX9-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ADD5]], [[ZEXT9]]
+    ; GFX9-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]]
+    ; GFX9-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[UADDO6]], [[UMULH6]], [[UADDO7]]
+    ; GFX9-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[C]], [[C]], [[UADDE17]]
+    ; GFX9-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[UADDE16]], [[UADDE14]], [[UADDO7]]
+    ; GFX9-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDE18]], [[C]], [[UADDE21]]
     ; GFX9-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UV19]]
-    ; GFX9-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD6]]
-    ; GFX9-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO18]](s32), [[ADD7]](s32)
+    ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[UADDE22]]
+    ; GFX9-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDE20]](s32), [[ADD1]](s32)
     ; GFX9-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64)
     ; GFX9-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64)
-    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV20]](s32), [[UV22]], [[C]]
+    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV20]](s32), [[UV22]], [[C1]]
     ; GFX9-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](s64)
     ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[UV25]](s32)
     ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV20]](s32), [[UV23]], [[ANYEXT1]]
     ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV21]](s32), [[UV22]], [[AMDGPU_MAD_U64_U32_8]]
     ; GFX9-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](s64)
     ; GFX9-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV24]](s32), [[UV26]](s32)
-    ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MV2]](s64), [[C]]
+    ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MV2]](s64), [[C1]]
     ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV1]](s64), [[MV3]](s64)
     ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[ICMP]](s1)
-    ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[ANYEXT2]], [[C1]]
+    ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[ANYEXT2]], [[C2]]
     ; GFX9-NEXT: [[ANYEXT3:%[0-9]+]]:_(s64) = G_ANYEXT [[ICMP1]](s1)
-    ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[ANYEXT3]], [[C1]]
+    ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[ANYEXT3]], [[C2]]
     ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[AND]](s64), [[AND1]](s64)
     ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
     ; GFX9-NEXT: $vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR1]](<2 x s64>)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-urem.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-urem.mir
index 44f44123bb736..b01e0eda6768e 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-urem.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-urem.mir
@@ -349,6 +349,7 @@ body: |
     ; GFX6-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV2]], [[UV4]]
     ; GFX6-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV3]], [[UV5]], [[USUBO1]]
     ; GFX6-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]]
+    ; GFX6-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
     ; GFX6-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]]
     ; GFX6-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]]
     ; GFX6-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]]
@@ -358,88 +359,72 @@ body: |
     ; GFX6-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]]
     ; GFX6-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]]
     ; GFX6-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]]
-    ; GFX6-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1)
-    ; GFX6-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH1]]
-    ; GFX6-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1)
-    ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]]
+    ; GFX6-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UADDO]], [[UMULH1]], [[UADDO1]]
+    ; GFX6-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE1]]
     ; GFX6-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]]
     ; GFX6-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]]
     ; GFX6-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]]
-    ; GFX6-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]]
-    ; GFX6-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1)
-    ; GFX6-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH3]]
-    ; GFX6-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1)
-    ; GFX6-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]]
-    ; GFX6-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD2]]
-    ; GFX6-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1)
-    ; GFX6-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]]
+    ; GFX6-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]]
+    ; GFX6-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDO2]], [[UMULH3]], [[UADDO3]]
+    ; GFX6-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE5]]
+    ; GFX6-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[UADDE2]], [[UADDO3]]
+    ; GFX6-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE6]], [[C5]], [[UADDE9]]
     ; GFX6-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]]
-    ; GFX6-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]]
-    ; GFX6-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO8]]
-    ; GFX6-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO11]]
-    ; GFX6-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO10]]
-    ; GFX6-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO10]]
-    ; GFX6-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE]]
-    ; GFX6-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO10]]
-    ; GFX6-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]]
-    ; GFX6-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[UMULH5]]
-    ; GFX6-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[MUL6]]
-    ; GFX6-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[ADD7]]
-    ; GFX6-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[MUL6]]
-    ; GFX6-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]]
-    ; GFX6-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1)
-    ; GFX6-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH6]]
-    ; GFX6-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1)
-    ; GFX6-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]]
-    ; GFX6-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[ADD7]]
-    ; GFX6-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[MUL6]]
-    ; GFX6-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[ADD7]]
-    ; GFX6-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]]
-    ; GFX6-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1)
-    ; GFX6-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH8]]
-    ; GFX6-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1)
-    ; GFX6-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]]
-    ; GFX6-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDO18]], [[ADD8]]
-    ; GFX6-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1)
-    ; GFX6-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT9]]
-    ; GFX6-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[ADD7]]
-    ; GFX6-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD10]]
-    ; GFX6-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-    ; GFX6-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UADDO20]]
-    ; GFX6-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UADDE]], [[ADD11]], [[UADDO23]]
+    ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[UADDE10]]
+    ; GFX6-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDE8]]
+    ; GFX6-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD2]], [[UADDO5]]
+    ; GFX6-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO4]]
+    ; GFX6-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO4]]
+    ; GFX6-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE12]]
+    ; GFX6-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO4]]
+    ; GFX6-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]]
+    ; GFX6-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[UMULH5]]
+    ; GFX6-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[MUL6]]
+    ; GFX6-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO4]], [[ADD4]]
+    ; GFX6-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO4]], [[MUL6]]
+    ; GFX6-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]]
+    ; GFX6-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDO6]], [[UMULH6]], [[UADDO7]]
+    ; GFX6-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE15]]
+    ; GFX6-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[ADD4]]
+    ; GFX6-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[MUL6]]
+    ; GFX6-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO4]], [[ADD4]]
+    ; GFX6-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]]
+    ; GFX6-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDO8]], [[UMULH8]], [[UADDO9]]
+    ; GFX6-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE19]]
+    ; GFX6-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDE18]], [[UADDE16]], [[UADDO9]]
+    ; GFX6-NEXT: [[UADDE24:%[0-9]+]]:_(s32), [[UADDE25:%[0-9]+]]:_(s1) = G_UADDE [[UADDE20]], [[C5]], [[UADDE23]]
+    ; GFX6-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[ADD4]]
+    ; GFX6-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[UADDE24]]
+    ; GFX6-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UADDE22]]
+    ; GFX6-NEXT: [[UADDE26:%[0-9]+]]:_(s32), [[UADDE27:%[0-9]+]]:_(s1) = G_UADDE [[UADDE12]], [[ADD5]], [[UADDO11]]
     ; GFX6-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64)
     ; GFX6-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64)
-    ; GFX6-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UADDO22]]
-    ; GFX6-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV8]], [[UADDE2]]
-    ; GFX6-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UADDO22]]
-    ; GFX6-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]]
-    ; GFX6-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1)
-    ; GFX6-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH10]]
-    ; GFX6-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO27]](s1)
-    ; GFX6-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]]
-    ; GFX6-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UADDE2]]
-    ; GFX6-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UADDO22]]
-    ; GFX6-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UADDE2]]
-    ; GFX6-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]]
-    ; GFX6-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1)
-    ; GFX6-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH12]]
-    ; GFX6-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1)
-    ; GFX6-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]]
-    ; GFX6-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[UADDO30]], [[ADD12]]
-    ; GFX6-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1)
-    ; GFX6-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT14]]
-    ; GFX6-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UADDE2]]
-    ; GFX6-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD14]]
+    ; GFX6-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UADDO10]]
+    ; GFX6-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV8]], [[UADDE26]]
+    ; GFX6-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UADDO10]]
+    ; GFX6-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]]
+    ; GFX6-NEXT: [[UADDE28:%[0-9]+]]:_(s32), [[UADDE29:%[0-9]+]]:_(s1) = G_UADDE [[UADDO12]], [[UMULH10]], [[UADDO13]]
+    ; GFX6-NEXT: [[UADDE30:%[0-9]+]]:_(s32), [[UADDE31:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE29]]
+    ; GFX6-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UADDE26]]
+    ; GFX6-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UADDO10]]
+    ; GFX6-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UADDE26]]
+    ; GFX6-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]]
+    ; GFX6-NEXT: [[UADDE32:%[0-9]+]]:_(s32), [[UADDE33:%[0-9]+]]:_(s1) = G_UADDE [[UADDO14]], [[UMULH12]], [[UADDO15]]
+    ; GFX6-NEXT: [[UADDE34:%[0-9]+]]:_(s32), [[UADDE35:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE33]]
+    ; GFX6-NEXT: [[UADDE36:%[0-9]+]]:_(s32), [[UADDE37:%[0-9]+]]:_(s1) = G_UADDE [[UADDE32]], [[UADDE30]], [[UADDO15]]
+    ; GFX6-NEXT: [[UADDE38:%[0-9]+]]:_(s32), [[UADDE39:%[0-9]+]]:_(s1) = G_UADDE [[UADDE34]], [[C5]], [[UADDE37]]
+    ; GFX6-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UADDE26]]
+    ; GFX6-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[UADDE38]]
     ; GFX6-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64)
-    ; GFX6-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV10]], [[UADDO32]]
-    ; GFX6-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV11]], [[UADDO32]]
-    ; GFX6-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV10]], [[ADD15]]
-    ; GFX6-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV10]], [[UADDO32]]
-    ; GFX6-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]]
-    ; GFX6-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[UMULH14]]
+    ; GFX6-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV10]], [[UADDE36]]
+    ; GFX6-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV11]], [[UADDE36]]
+    ; GFX6-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV10]], [[ADD6]]
+    ; GFX6-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV10]], [[UADDE36]]
+    ; GFX6-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]]
+    ; GFX6-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ADD7]], [[UMULH14]]
     ; GFX6-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV6]], [[MUL15]]
-    ; GFX6-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV7]], [[ADD17]], [[USUBO3]]
-    ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV7]], [[ADD17]]
+    ; GFX6-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV7]], [[ADD8]], [[USUBO3]]
+    ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV7]], [[ADD8]]
     ; GFX6-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO2]](s32), [[USUBE2]](s32)
     ; GFX6-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64)
     ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV13]]
@@ -501,88 +486,73 @@ body: |
     ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI1]], [[ANYEXT]]
     ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[FPTOUI]], [[AMDGPU_MAD_U64_U32_2]]
     ; GFX8-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64)
+    ; GFX8-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
     ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV6]]
     ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[UV8]]
     ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV6]]
     ; GFX8-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]]
-    ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1)
-    ; GFX8-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH]]
-    ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1)
-    ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]]
+    ; GFX8-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UADDO]], [[UMULH]], [[UADDO1]]
+    ; GFX8-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE1]]
     ; GFX8-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV8]]
     ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV6]]
     ; GFX8-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV8]]
-    ; GFX8-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]]
-    ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1)
-    ; GFX8-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH2]]
-    ; GFX8-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1)
-    ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]]
-    ; GFX8-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD]]
-    ; GFX8-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1)
-    ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]]
+    ; GFX8-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]]
+    ; GFX8-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDO2]], [[UMULH2]], [[UADDO3]]
+    ; GFX8-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE5]]
+    ; GFX8-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[UADDE2]], [[UADDO3]]
+    ; GFX8-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE6]], [[C5]], [[UADDE9]]
     ; GFX8-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV8]]
-    ; GFX8-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]]
-    ; GFX8-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO8]]
-    ; GFX8-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD3]], [[UADDO11]]
-    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO10]], [[C4]]
+    ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[UADDE10]]
+    ; GFX8-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDE8]]
+    ; GFX8-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD]], [[UADDO5]]
+    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO4]], [[C4]]
     ; GFX8-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](s64)
     ; GFX8-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[UV11]](s32)
-    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE]], [[ANYEXT1]]
-    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO10]], [[AMDGPU_MAD_U64_U32_8]]
+    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE12]], [[ANYEXT1]]
+    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO4]], [[AMDGPU_MAD_U64_U32_8]]
     ; GFX8-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](s64)
-    ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[UV10]]
-    ; GFX8-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[UV12]]
-    ; GFX8-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[UV10]]
-    ; GFX8-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]]
-    ; GFX8-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1)
-    ; GFX8-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH4]]
-    ; GFX8-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1)
-    ; GFX8-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]]
-    ; GFX8-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[UV12]]
-    ; GFX8-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[UV10]]
-    ; GFX8-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[UV12]]
-    ; GFX8-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]]
-    ; GFX8-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1)
-    ; GFX8-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]]
-    ; GFX8-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1)
-    ; GFX8-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]]
-    ; GFX8-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDO18]], [[ADD4]]
-    ; GFX8-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1)
-    ; GFX8-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ADD5]], [[ZEXT9]]
-    ; GFX8-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[UV12]]
-    ; GFX8-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD6]]
-    ; GFX8-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-    ; GFX8-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UADDO20]]
-    ; GFX8-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UADDE]], [[ADD7]], [[UADDO23]]
+    ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[UV10]]
+    ; GFX8-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO4]], [[UV12]]
+    ; GFX8-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO4]], [[UV10]]
+    ; GFX8-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]]
+    ; GFX8-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDO6]], [[UMULH4]], [[UADDO7]]
+    ; GFX8-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE15]]
+    ; GFX8-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[UV12]]
+    ; GFX8-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[UV10]]
+    ; GFX8-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO4]], [[UV12]]
+    ; GFX8-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]]
+    ; GFX8-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDO8]], [[UMULH6]], [[UADDO9]]
+    ; GFX8-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE19]]
+    ; GFX8-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDE18]], [[UADDE16]], [[UADDO9]]
+    ; GFX8-NEXT: [[UADDE24:%[0-9]+]]:_(s32), [[UADDE25:%[0-9]+]]:_(s1) = G_UADDE [[UADDE20]], [[C5]], [[UADDE23]]
+    ; GFX8-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[UV12]]
+    ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[UADDE24]]
+    ; GFX8-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UADDE22]]
+    ; GFX8-NEXT: [[UADDE26:%[0-9]+]]:_(s32), [[UADDE27:%[0-9]+]]:_(s1) = G_UADDE [[UADDE12]], [[ADD1]], [[UADDO11]]
     ; GFX8-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64)
     ; GFX8-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64)
-    ; GFX8-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDO22]]
-    ; GFX8-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UADDE2]]
-    ; GFX8-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDO22]]
-    ; GFX8-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]]
-    ; GFX8-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1)
-    ; GFX8-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH8]]
-    ; GFX8-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO27]](s1)
-    ; GFX8-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]]
-    ; GFX8-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDE2]]
-    ; GFX8-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDO22]]
-    ; GFX8-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDE2]]
-    ; GFX8-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]]
-    ; GFX8-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1)
-    ; GFX8-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]]
-    ; GFX8-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1)
-    ; GFX8-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]]
-    ; GFX8-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[UADDO30]], [[ADD8]]
-    ; GFX8-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1)
-    ; GFX8-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT14]]
-    ; GFX8-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDE2]]
-    ; GFX8-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[ADD10]]
+    ; GFX8-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDO10]]
+    ; GFX8-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UADDE26]]
+    ; GFX8-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDO10]]
+    ; GFX8-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]]
+    ; GFX8-NEXT: [[UADDE28:%[0-9]+]]:_(s32), [[UADDE29:%[0-9]+]]:_(s1) = G_UADDE [[UADDO12]], [[UMULH8]], [[UADDO13]]
+    ; GFX8-NEXT: [[UADDE30:%[0-9]+]]:_(s32), [[UADDE31:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE29]]
+    ; GFX8-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDE26]]
+    ; GFX8-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDO10]]
+    ; GFX8-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDE26]]
+    ; GFX8-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]]
+    ; GFX8-NEXT: [[UADDE32:%[0-9]+]]:_(s32), [[UADDE33:%[0-9]+]]:_(s1) = G_UADDE [[UADDO14]], [[UMULH10]], [[UADDO15]]
+    ; GFX8-NEXT: [[UADDE34:%[0-9]+]]:_(s32), [[UADDE35:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE33]]
+    ; GFX8-NEXT: [[UADDE36:%[0-9]+]]:_(s32), [[UADDE37:%[0-9]+]]:_(s1) = G_UADDE [[UADDE32]], [[UADDE30]], [[UADDO15]]
+    ; GFX8-NEXT: [[UADDE38:%[0-9]+]]:_(s32), [[UADDE39:%[0-9]+]]:_(s1) = G_UADDE [[UADDE34]], [[C5]], [[UADDE37]]
+    ; GFX8-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDE26]]
+    ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[UADDE38]]
     ; GFX8-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64)
-    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV18]](s32), [[UADDO32]], [[C4]]
+    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV18]](s32), [[UADDE36]], [[C4]]
     ; GFX8-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_12]](s64)
     ; GFX8-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[UV21]](s32)
-    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV18]](s32), [[ADD11]], [[ANYEXT2]]
-    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV19]](s32), [[UADDO32]], [[AMDGPU_MAD_U64_U32_14]]
+    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV18]](s32), [[ADD2]], [[ANYEXT2]]
+    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV19]](s32), [[UADDE36]], [[AMDGPU_MAD_U64_U32_14]]
     ; GFX8-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_16]](s64)
     ; GFX8-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV14]], [[UV20]]
     ; GFX8-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV15]], [[UV22]], [[USUBO3]]
@@ -648,88 +618,73 @@ body: |
     ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI1]], [[ANYEXT]]
     ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[FPTOUI]], [[AMDGPU_MAD_U64_U32_2]]
     ; GFX9-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64)
+    ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
     ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV6]]
     ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[UV8]]
     ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV6]]
     ; GFX9-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]]
-    ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1)
-    ; GFX9-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH]]
-    ; GFX9-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1)
-    ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]]
+    ; GFX9-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UADDO]], [[UMULH]], [[UADDO1]]
+    ; GFX9-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE1]]
     ; GFX9-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV8]]
     ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV6]]
     ; GFX9-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV8]]
-    ; GFX9-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]]
-    ; GFX9-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1)
-    ; GFX9-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH2]]
-    ; GFX9-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1)
-    ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]]
-    ; GFX9-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD]]
-    ; GFX9-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1)
-    ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]]
+    ; GFX9-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]]
+    ; GFX9-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDO2]], [[UMULH2]], [[UADDO3]]
+    ; GFX9-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE5]]
+    ; GFX9-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[UADDE2]], [[UADDO3]]
+    ; GFX9-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE6]], [[C5]], [[UADDE9]]
     ; GFX9-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV8]]
-    ; GFX9-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]]
-    ; GFX9-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO8]]
-    ; GFX9-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD3]], [[UADDO11]]
-    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO10]], [[C4]]
+    ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[UADDE10]]
+    ; GFX9-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDE8]]
+    ; GFX9-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD]], [[UADDO5]]
+    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO4]], [[C4]]
     ; GFX9-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](s64)
     ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[UV11]](s32)
-    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE]], [[ANYEXT1]]
-    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO10]], [[AMDGPU_MAD_U64_U32_8]]
+    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE12]], [[ANYEXT1]]
+    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO4]], [[AMDGPU_MAD_U64_U32_8]]
     ; GFX9-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](s64)
-    ; GFX9-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[UV10]]
-    ; GFX9-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[UV12]]
-    ; GFX9-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[UV10]]
-    ; GFX9-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]]
-    ; GFX9-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1)
-    ; GFX9-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH4]]
-    ; GFX9-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1)
-    ; GFX9-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]]
-    ; GFX9-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[UV12]]
-    ; GFX9-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[UV10]]
-    ; GFX9-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[UV12]]
-    ; GFX9-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]]
-    ; GFX9-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1)
-    ; GFX9-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]]
-    ; GFX9-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1)
-    ; GFX9-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]]
-    ; GFX9-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDO18]], [[ADD4]]
-    ; GFX9-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1)
-    ; GFX9-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ADD5]], [[ZEXT9]]
-    ; GFX9-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[UV12]]
-    ; GFX9-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD6]]
-    ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-    ; GFX9-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UADDO20]]
-    ; GFX9-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UADDE]], [[ADD7]], [[UADDO23]]
+    ; GFX9-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[UV10]]
+    ; GFX9-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO4]], [[UV12]]
+    ; GFX9-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO4]], [[UV10]]
+    ; GFX9-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]]
+    ; GFX9-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDO6]], [[UMULH4]], [[UADDO7]]
+    ; GFX9-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE15]]
+    ; GFX9-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[UV12]]
+    ; GFX9-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[UV10]]
+    ; GFX9-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO4]], [[UV12]]
+    ; GFX9-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]]
+    ; GFX9-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDO8]], [[UMULH6]], [[UADDO9]]
+    ; GFX9-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE19]]
+    ; GFX9-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDE18]], [[UADDE16]], [[UADDO9]]
+    ; GFX9-NEXT: [[UADDE24:%[0-9]+]]:_(s32), [[UADDE25:%[0-9]+]]:_(s1) = G_UADDE [[UADDE20]], [[C5]], [[UADDE23]]
+    ; GFX9-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[UV12]]
+    ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[UADDE24]]
+    ; GFX9-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UADDE22]]
+    ; GFX9-NEXT: [[UADDE26:%[0-9]+]]:_(s32), [[UADDE27:%[0-9]+]]:_(s1) = G_UADDE [[UADDE12]], [[ADD1]], [[UADDO11]]
     ; GFX9-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64)
     ; GFX9-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64)
-    ; GFX9-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDO22]]
-    ; GFX9-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UADDE2]]
-    ; GFX9-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDO22]]
-    ; GFX9-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]]
-    ; GFX9-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1)
-    ; GFX9-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH8]]
-    ; GFX9-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO27]](s1)
-    ; GFX9-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]]
-    ; GFX9-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDE2]]
-    ; GFX9-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDO22]]
-    ; GFX9-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDE2]]
-    ; GFX9-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]]
-    ; GFX9-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1)
-    ; GFX9-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]]
-    ; GFX9-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1)
-    ; GFX9-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]]
-    ; GFX9-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[UADDO30]], [[ADD8]]
-    ; GFX9-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1)
-    ; GFX9-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT14]]
-    ; GFX9-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDE2]]
-    ; GFX9-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[ADD10]]
+    ; GFX9-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDO10]]
+    ; GFX9-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UADDE26]]
+    ; GFX9-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDO10]]
+    ; GFX9-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]]
+    ; GFX9-NEXT: [[UADDE28:%[0-9]+]]:_(s32), [[UADDE29:%[0-9]+]]:_(s1) = G_UADDE [[UADDO12]], [[UMULH8]], [[UADDO13]]
+    ; GFX9-NEXT: [[UADDE30:%[0-9]+]]:_(s32), [[UADDE31:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE29]]
+    ; GFX9-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDE26]]
+    ; GFX9-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDO10]]
+    ; GFX9-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDE26]]
+    ; GFX9-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]]
+    ; GFX9-NEXT: [[UADDE32:%[0-9]+]]:_(s32), [[UADDE33:%[0-9]+]]:_(s1) = G_UADDE [[UADDO14]], [[UMULH10]], [[UADDO15]]
+    ; GFX9-NEXT: [[UADDE34:%[0-9]+]]:_(s32), [[UADDE35:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE33]]
+    ; GFX9-NEXT: [[UADDE36:%[0-9]+]]:_(s32), [[UADDE37:%[0-9]+]]:_(s1) = G_UADDE [[UADDE32]], [[UADDE30]], [[UADDO15]]
+    ; GFX9-NEXT: [[UADDE38:%[0-9]+]]:_(s32), [[UADDE39:%[0-9]+]]:_(s1) = G_UADDE [[UADDE34]], [[C5]], [[UADDE37]]
+    ; GFX9-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDE26]]
+    ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[UADDE38]]
     ; GFX9-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64)
-    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV18]](s32), [[UADDO32]], [[C4]]
+    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV18]](s32), [[UADDE36]], [[C4]]
     ; GFX9-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_12]](s64)
     ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[UV21]](s32)
-    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV18]](s32), [[ADD11]], [[ANYEXT2]]
-    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV19]](s32), [[UADDO32]], [[AMDGPU_MAD_U64_U32_14]]
+    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV18]](s32), [[ADD2]], [[ANYEXT2]]
+    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV19]](s32), [[UADDE36]], [[AMDGPU_MAD_U64_U32_14]]
     ; GFX9-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_16]](s64)
     ; GFX9-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV14]], [[UV20]]
     ; GFX9-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV15]], [[UV22]], [[USUBO3]]
@@ -795,92 +750,77 @@ body: |
     ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV7]], [[MUL]]
     ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]]
     ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[MUL1]]
+    ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
     ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV6]]
     ; GFX10-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]]
     ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV6]]
     ; GFX10-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[MUL3]]
-    ; GFX10-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1)
-    ; GFX10-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH]]
-    ; GFX10-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1)
-    ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]]
+    ; GFX10-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UADDO]], [[UMULH]], [[UADDO1]]
+    ; GFX10-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE1]]
     ; GFX10-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]]
     ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV6]]
     ; GFX10-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]]
-    ; GFX10-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL4]], [[UMULH1]]
-    ; GFX10-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1)
-    ; GFX10-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH2]]
-    ; GFX10-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1)
-    ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]]
-    ; GFX10-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD2]]
-    ; GFX10-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1)
-    ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]]
+    ; GFX10-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[MUL4]], [[UMULH1]]
+    ; GFX10-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDO2]], [[UMULH2]], [[UADDO3]]
+    ; GFX10-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE5]]
+    ; GFX10-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[UADDE2]], [[UADDO3]]
+    ; GFX10-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE6]], [[C5]], [[UADDE9]]
     ; GFX10-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]]
-    ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD4]]
-    ; GFX10-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO8]]
-    ; GFX10-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO11]]
-    ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO10]], [[C4]]
+    ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[UADDE10]]
+    ; GFX10-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDE8]]
+    ; GFX10-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD2]], [[UADDO5]]
+    ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO4]], [[C4]]
     ; GFX10-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_2]](s64)
-    ; GFX10-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE]]
-    ; GFX10-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UV9]], [[MUL5]]
-    ; GFX10-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO10]]
-    ; GFX10-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[MUL6]]
-    ; GFX10-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[UV8]]
-    ; GFX10-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[ADD7]]
-    ; GFX10-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[UV8]]
-    ; GFX10-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL7]], [[MUL8]]
-    ; GFX10-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1)
-    ; GFX10-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH4]]
-    ; GFX10-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1)
-    ; GFX10-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]]
-    ; GFX10-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[ADD7]]
-    ; GFX10-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[UV8]]
-    ; GFX10-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[ADD7]]
-    ; GFX10-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[UMULH5]]
-    ; GFX10-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1)
-    ; GFX10-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]]
-    ; GFX10-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1)
-    ; GFX10-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]]
-    ; GFX10-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDO18]], [[ADD8]]
-    ; GFX10-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1)
-    ; GFX10-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT9]]
-    ; GFX10-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[ADD7]]
-    ; GFX10-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD10]]
-    ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-    ; GFX10-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UADDO20]]
-    ; GFX10-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UADDE]], [[ADD11]], [[UADDO23]]
+    ; GFX10-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE12]]
+    ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UV9]], [[MUL5]]
+    ; GFX10-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO4]]
+    ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[MUL6]]
+    ; GFX10-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[UV8]]
+    ; GFX10-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UADDO4]], [[ADD4]]
+    ; GFX10-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO4]], [[UV8]]
+    ; GFX10-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[MUL7]], [[MUL8]]
+    ; GFX10-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDO6]], [[UMULH4]], [[UADDO7]]
+    ; GFX10-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE15]]
+    ; GFX10-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[ADD4]]
+    ; GFX10-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[UV8]]
+    ; GFX10-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO4]], [[ADD4]]
+    ; GFX10-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[UMULH5]]
+    ; GFX10-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDO8]], [[UMULH6]], [[UADDO9]]
+    ; GFX10-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE19]]
+    ; GFX10-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDE18]], [[UADDE16]], [[UADDO9]]
+    ; GFX10-NEXT: [[UADDE24:%[0-9]+]]:_(s32), [[UADDE25:%[0-9]+]]:_(s1) = G_UADDE [[UADDE20]], [[C5]], [[UADDE23]]
+    ; GFX10-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[ADD4]]
+    ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[UADDE24]]
+    ; GFX10-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UADDE22]]
+    ; GFX10-NEXT: [[UADDE26:%[0-9]+]]:_(s32), [[UADDE27:%[0-9]+]]:_(s1) = G_UADDE [[UADDE12]], [[ADD5]], [[UADDO11]]
     ; GFX10-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64)
     ; GFX10-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64)
-    ; GFX10-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UV13]], [[UADDO22]]
-    ; GFX10-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UV12]], [[UADDE2]]
-    ; GFX10-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV12]], [[UADDO22]]
-    ; GFX10-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL10]], [[MUL11]]
-    ; GFX10-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1)
-    ; GFX10-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH8]]
-    ; GFX10-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO27]](s1)
-    ; GFX10-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]]
-    ; GFX10-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV13]], [[UADDE2]]
-    ; GFX10-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV13]], [[UADDO22]]
-    ; GFX10-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV12]], [[UADDE2]]
-    ; GFX10-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[UMULH9]]
-    ; GFX10-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1)
-    ; GFX10-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]]
-    ; GFX10-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1)
-    ; GFX10-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]]
-    ; GFX10-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[UADDO30]], [[ADD12]]
-    ; GFX10-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1)
-    ; GFX10-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT14]]
-    ; GFX10-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV13]], [[UADDE2]]
-    ; GFX10-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[ADD14]]
+    ; GFX10-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UV13]], [[UADDO10]]
+    ; GFX10-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UV12]], [[UADDE26]]
+    ; GFX10-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV12]], [[UADDO10]]
+    ; GFX10-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL10]], [[MUL11]]
+    ; GFX10-NEXT: [[UADDE28:%[0-9]+]]:_(s32), [[UADDE29:%[0-9]+]]:_(s1) = G_UADDE [[UADDO12]], [[UMULH8]], [[UADDO13]]
+    ; GFX10-NEXT: [[UADDE30:%[0-9]+]]:_(s32), [[UADDE31:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE29]]
+    ; GFX10-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV13]], [[UADDE26]]
+    ; GFX10-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV13]], [[UADDO10]]
+    ; GFX10-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV12]], [[UADDE26]]
+    ; GFX10-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[UMULH9]]
+    ; GFX10-NEXT: [[UADDE32:%[0-9]+]]:_(s32), [[UADDE33:%[0-9]+]]:_(s1) = G_UADDE [[UADDO14]], [[UMULH10]], [[UADDO15]]
+    ; GFX10-NEXT: [[UADDE34:%[0-9]+]]:_(s32), [[UADDE35:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE33]]
+    ; GFX10-NEXT: [[UADDE36:%[0-9]+]]:_(s32), [[UADDE37:%[0-9]+]]:_(s1) = G_UADDE [[UADDE32]], [[UADDE30]], [[UADDO15]]
+    ; GFX10-NEXT: [[UADDE38:%[0-9]+]]:_(s32), [[UADDE39:%[0-9]+]]:_(s1) = G_UADDE [[UADDE34]], [[C5]], [[UADDE37]]
+    ; GFX10-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV13]], [[UADDE26]]
+    ; GFX10-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[UADDE38]]
     ; GFX10-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64)
-    ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV14]](s32), [[UADDO32]], [[C4]]
+    ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV14]](s32), [[UADDE36]], [[C4]]
     ; GFX10-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64)
-    ; GFX10-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV14]], [[ADD15]]
-    ; GFX10-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[UV17]], [[MUL13]]
-    ; GFX10-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV15]], [[UADDO32]]
-    ; GFX10-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[MUL14]]
+    ; GFX10-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV14]], [[ADD6]]
+    ; GFX10-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UV17]], [[MUL13]]
+    ; GFX10-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV15]], [[UADDE36]]
+    ; GFX10-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ADD7]], [[MUL14]]
     ; GFX10-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV10]], [[UV16]]
-    ; GFX10-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV11]], [[ADD17]], [[USUBO3]]
-    ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV11]], [[ADD17]]
+    ; GFX10-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV11]], [[ADD8]], [[USUBO3]]
+    ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV11]], [[ADD8]]
     ; GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO2]](s32), [[USUBE2]](s32)
     ; GFX10-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64)
     ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV19]]
@@ -950,6 +890,7 @@ body: |
     ; GFX6-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV6]], [[UV8]]
     ; GFX6-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV7]], [[UV9]], [[USUBO1]]
     ; GFX6-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]]
+    ; GFX6-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
     ; GFX6-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]]
     ; GFX6-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]]
     ; GFX6-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]]
@@ -959,88 +900,72 @@ body: |
     ; GFX6-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]]
     ; GFX6-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]]
     ; GFX6-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]]
-    ; GFX6-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1)
-    ; GFX6-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH1]]
-    ; GFX6-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1)
-    ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]]
+    ; GFX6-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UADDO]], [[UMULH1]], [[UADDO1]]
+    ; GFX6-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE1]]
     ; GFX6-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]]
     ; GFX6-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]]
     ; GFX6-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]]
-    ; GFX6-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]]
-    ; GFX6-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1)
-    ; GFX6-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH3]]
-    ; GFX6-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1)
-    ; GFX6-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]]
-    ; GFX6-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD2]]
-    ; GFX6-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1)
-    ; GFX6-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]]
+    ; GFX6-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]]
+    ; GFX6-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDO2]], [[UMULH3]], [[UADDO3]]
+    ; GFX6-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE5]]
+    ; GFX6-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[UADDE2]], [[UADDO3]]
+    ; GFX6-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE6]], [[C5]], [[UADDE9]]
     ; GFX6-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]]
-    ; GFX6-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]]
-    ; GFX6-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO8]]
-    ; GFX6-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO11]]
-    ; GFX6-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO10]]
-    ; GFX6-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO10]]
-    ; GFX6-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE]]
-    ; GFX6-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO10]]
-    ; GFX6-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]]
-    ; GFX6-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[UMULH5]]
-    ; GFX6-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[MUL6]]
-    ; GFX6-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[ADD7]]
-    ; GFX6-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[MUL6]]
-    ; GFX6-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]]
-    ; GFX6-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1)
-    ; GFX6-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH6]]
-    ; GFX6-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1)
-    ; GFX6-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]]
-    ; GFX6-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[ADD7]]
-    ; GFX6-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[MUL6]]
-    ; GFX6-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[ADD7]]
-    ; GFX6-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]]
-    ; GFX6-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1)
-    ; GFX6-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH8]]
-    ; GFX6-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1)
-    ; GFX6-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]]
-    ; GFX6-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDO18]], [[ADD8]]
-    ; GFX6-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1)
-    ; GFX6-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT9]]
-    ; GFX6-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[ADD7]]
-    ; GFX6-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD10]]
-    ; GFX6-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-    ; GFX6-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UADDO20]]
-    ; GFX6-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UADDE]], [[ADD11]], [[UADDO23]]
+    ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[UADDE10]]
+    ; GFX6-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDE8]]
+    ; GFX6-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD2]], [[UADDO5]]
+    ; GFX6-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO4]]
+    ; GFX6-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO4]]
+    ; GFX6-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE12]]
+    ; GFX6-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO4]]
+    ; GFX6-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]]
+    ; GFX6-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[UMULH5]]
+    ; GFX6-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[MUL6]]
+    ; GFX6-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO4]], [[ADD4]]
+    ; GFX6-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO4]], [[MUL6]]
+    ; GFX6-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]]
+    ; GFX6-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDO6]], [[UMULH6]], [[UADDO7]]
+    ; GFX6-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE15]]
+    ; GFX6-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[ADD4]]
+    ; GFX6-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[MUL6]]
+    ; GFX6-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO4]], [[ADD4]]
+    ; GFX6-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]]
+    ; GFX6-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDO8]], [[UMULH8]], [[UADDO9]]
+    ; GFX6-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE19]]
+    ; GFX6-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDE18]], [[UADDE16]], [[UADDO9]]
+    ; GFX6-NEXT: [[UADDE24:%[0-9]+]]:_(s32), [[UADDE25:%[0-9]+]]:_(s1) = G_UADDE [[UADDE20]], [[C5]], [[UADDE23]]
+    ; GFX6-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[ADD4]]
+    ; GFX6-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[UADDE24]]
+    ; GFX6-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UADDE22]]
+    ; GFX6-NEXT: [[UADDE26:%[0-9]+]]:_(s32), [[UADDE27:%[0-9]+]]:_(s1) = G_UADDE [[UADDE12]], [[ADD5]], [[UADDO11]]
     ; GFX6-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64)
     ; GFX6-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64)
-    ; GFX6-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV13]], [[UADDO22]]
-    ; GFX6-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV12]], [[UADDE2]]
-    ; GFX6-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV12]], [[UADDO22]]
-    ; GFX6-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]]
-    ; GFX6-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1)
-    ; GFX6-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH10]]
-    ; GFX6-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO27]](s1)
-    ; GFX6-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]]
-    ; GFX6-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV13]], [[UADDE2]]
-    ; GFX6-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV13]], [[UADDO22]]
-    ; GFX6-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV12]], [[UADDE2]]
-    ; GFX6-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]]
-    ; GFX6-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1)
-    ; GFX6-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH12]]
-    ; GFX6-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1)
-    ; GFX6-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]]
-    ; GFX6-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[UADDO30]], [[ADD12]]
-    ; GFX6-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1)
-    ; GFX6-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT14]]
-    ; GFX6-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV13]], [[UADDE2]]
-    ; GFX6-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD14]]
+    ; GFX6-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV13]], [[UADDO10]]
+    ; GFX6-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV12]], [[UADDE26]]
+    ; GFX6-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV12]], [[UADDO10]]
+    ; GFX6-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]]
+    ; GFX6-NEXT: [[UADDE28:%[0-9]+]]:_(s32), [[UADDE29:%[0-9]+]]:_(s1) = G_UADDE [[UADDO12]], [[UMULH10]], [[UADDO13]]
+    ; GFX6-NEXT: [[UADDE30:%[0-9]+]]:_(s32), [[UADDE31:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE29]]
+    ; GFX6-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV13]], [[UADDE26]]
+    ; GFX6-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV13]], [[UADDO10]]
+    ; GFX6-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV12]], [[UADDE26]]
+    ; GFX6-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]]
+    ; GFX6-NEXT: [[UADDE32:%[0-9]+]]:_(s32), [[UADDE33:%[0-9]+]]:_(s1) = G_UADDE [[UADDO14]], [[UMULH12]], [[UADDO15]]
+    ; GFX6-NEXT: [[UADDE34:%[0-9]+]]:_(s32), [[UADDE35:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE33]]
+    ; GFX6-NEXT: [[UADDE36:%[0-9]+]]:_(s32), [[UADDE37:%[0-9]+]]:_(s1) = G_UADDE [[UADDE32]], [[UADDE30]], [[UADDO15]]
+    ; GFX6-NEXT: [[UADDE38:%[0-9]+]]:_(s32), [[UADDE39:%[0-9]+]]:_(s1) = G_UADDE [[UADDE34]], [[C5]], [[UADDE37]]
+    ; GFX6-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV13]], [[UADDE26]]
+    ; GFX6-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[UADDE38]]
     ; GFX6-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64)
-    ; GFX6-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV14]], [[UADDO32]]
-    ; GFX6-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV15]], [[UADDO32]]
-    ; GFX6-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV14]], [[ADD15]]
-    ; GFX6-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV14]], [[UADDO32]]
-    ; GFX6-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]]
-    ; GFX6-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[UMULH14]]
+    ; GFX6-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV14]], [[UADDE36]]
+    ; GFX6-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV15]], [[UADDE36]]
+    ; GFX6-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV14]], [[ADD6]]
+    ; GFX6-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV14]], [[UADDE36]]
+    ; GFX6-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]]
+    ; GFX6-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ADD7]], [[UMULH14]]
     ; GFX6-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV10]], [[MUL15]]
-    ; GFX6-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV11]], [[ADD17]], [[USUBO3]]
-    ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV11]], [[ADD17]]
+    ; GFX6-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV11]], [[ADD8]], [[USUBO3]]
+    ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV11]], [[ADD8]]
     ; GFX6-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO2]](s32), [[USUBE2]](s32)
     ; GFX6-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64)
     ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV17]]
@@ -1088,93 +1013,78 @@ body: |
     ; GFX6-NEXT: [[MUL19:%[0-9]+]]:_(s32) = G_MUL [[USUBE12]], [[FPTOUI2]]
     ; GFX6-NEXT: [[MUL20:%[0-9]+]]:_(s32) = G_MUL [[USUBO8]], [[FPTOUI3]]
     ; GFX6-NEXT: [[UMULH15:%[0-9]+]]:_(s32) = G_UMULH [[USUBO8]], [[FPTOUI2]]
-    ; GFX6-NEXT: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[MUL19]], [[MUL20]]
-    ; GFX6-NEXT: [[ADD19:%[0-9]+]]:_(s32) = G_ADD [[ADD18]], [[UMULH15]]
+    ; GFX6-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[MUL19]], [[MUL20]]
+    ; GFX6-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[UMULH15]]
     ; GFX6-NEXT: [[MUL21:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[MUL18]]
-    ; GFX6-NEXT: [[MUL22:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI2]], [[ADD19]]
+    ; GFX6-NEXT: [[MUL22:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI2]], [[ADD10]]
     ; GFX6-NEXT: [[UMULH16:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[MUL18]]
-    ; GFX6-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[MUL21]], [[MUL22]]
-    ; GFX6-NEXT: [[ZEXT15:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1)
-    ; GFX6-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[UMULH16]]
-    ; GFX6-NEXT: [[ZEXT16:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1)
-    ; GFX6-NEXT: [[ADD20:%[0-9]+]]:_(s32) = G_ADD [[ZEXT15]], [[ZEXT16]]
-    ; GFX6-NEXT: [[MUL23:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[ADD19]]
+    ; GFX6-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL21]], [[MUL22]]
+    ; GFX6-NEXT: [[UADDE40:%[0-9]+]]:_(s32), [[UADDE41:%[0-9]+]]:_(s1) = G_UADDE [[UADDO16]], [[UMULH16]], [[UADDO17]]
+    ; GFX6-NEXT: [[UADDE42:%[0-9]+]]:_(s32), [[UADDE43:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE41]]
+    ; GFX6-NEXT: [[MUL23:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[ADD10]]
     ; GFX6-NEXT: [[UMULH17:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[MUL18]]
-    ; GFX6-NEXT: [[UMULH18:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[ADD19]]
-    ; GFX6-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[MUL23]], [[UMULH17]]
-    ; GFX6-NEXT: [[ZEXT17:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO39]](s1)
-    ; GFX6-NEXT: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UADDO38]], [[UMULH18]]
-    ; GFX6-NEXT: [[ZEXT18:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO41]](s1)
-    ; GFX6-NEXT: [[ADD21:%[0-9]+]]:_(s32) = G_ADD [[ZEXT17]], [[ZEXT18]]
-    ; GFX6-NEXT: [[UADDO42:%[0-9]+]]:_(s32), [[UADDO43:%[0-9]+]]:_(s1) = G_UADDO [[UADDO40]], [[ADD20]]
-    ; GFX6-NEXT: [[ZEXT19:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO43]](s1)
-    ; GFX6-NEXT: [[ADD22:%[0-9]+]]:_(s32) = G_ADD [[ADD21]], [[ZEXT19]]
-    ; GFX6-NEXT: [[UMULH19:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[ADD19]]
-    ; GFX6-NEXT: [[ADD23:%[0-9]+]]:_(s32) = G_ADD [[UMULH19]], [[ADD22]]
-    ; GFX6-NEXT: [[UADDO44:%[0-9]+]]:_(s32), [[UADDO45:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI2]], [[UADDO42]]
-    ; GFX6-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI3]], [[ADD23]], [[UADDO45]]
-    ; GFX6-NEXT: [[MUL24:%[0-9]+]]:_(s32) = G_MUL [[USUBO8]], [[UADDO44]]
-    ; GFX6-NEXT: [[MUL25:%[0-9]+]]:_(s32) = G_MUL [[USUBE12]], [[UADDO44]]
-    ; GFX6-NEXT: [[MUL26:%[0-9]+]]:_(s32) = G_MUL [[USUBO8]], [[UADDE4]]
-    ; GFX6-NEXT: [[UMULH20:%[0-9]+]]:_(s32) = G_UMULH [[USUBO8]], [[UADDO44]]
-    ; GFX6-NEXT: [[ADD24:%[0-9]+]]:_(s32) = G_ADD [[MUL25]], [[MUL26]]
-    ; GFX6-NEXT: [[ADD25:%[0-9]+]]:_(s32) = G_ADD [[ADD24]], [[UMULH20]]
-    ; GFX6-NEXT: [[MUL27:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[MUL24]]
-    ; GFX6-NEXT: [[MUL28:%[0-9]+]]:_(s32) = G_MUL [[UADDO44]], [[ADD25]]
-    ; GFX6-NEXT: [[UMULH21:%[0-9]+]]:_(s32) = G_UMULH [[UADDO44]], [[MUL24]]
-    ; GFX6-NEXT: [[UADDO46:%[0-9]+]]:_(s32), [[UADDO47:%[0-9]+]]:_(s1) = G_UADDO [[MUL27]], [[MUL28]]
-    ; GFX6-NEXT: [[ZEXT20:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO47]](s1)
-    ; GFX6-NEXT: [[UADDO48:%[0-9]+]]:_(s32), [[UADDO49:%[0-9]+]]:_(s1) = G_UADDO [[UADDO46]], [[UMULH21]]
-    ; GFX6-NEXT: [[ZEXT21:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO49]](s1)
-    ; GFX6-NEXT: [[ADD26:%[0-9]+]]:_(s32) = G_ADD [[ZEXT20]], [[ZEXT21]]
-    ; GFX6-NEXT: [[MUL29:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[ADD25]]
-    ; GFX6-NEXT: [[UMULH22:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[MUL24]]
-    ; GFX6-NEXT: [[UMULH23:%[0-9]+]]:_(s32) = G_UMULH [[UADDO44]], [[ADD25]]
-    ; GFX6-NEXT: [[UADDO50:%[0-9]+]]:_(s32), [[UADDO51:%[0-9]+]]:_(s1) = G_UADDO [[MUL29]], [[UMULH22]]
-    ; GFX6-NEXT: [[ZEXT22:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO51]](s1)
-    ; GFX6-NEXT: [[UADDO52:%[0-9]+]]:_(s32), [[UADDO53:%[0-9]+]]:_(s1) = G_UADDO [[UADDO50]], [[UMULH23]]
-    ; GFX6-NEXT: [[ZEXT23:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO53]](s1)
-    ; GFX6-NEXT: [[ADD27:%[0-9]+]]:_(s32) = G_ADD [[ZEXT22]], [[ZEXT23]]
-    ; GFX6-NEXT: [[UADDO54:%[0-9]+]]:_(s32), [[UADDO55:%[0-9]+]]:_(s1) = G_UADDO [[UADDO52]], [[ADD26]]
-    ; GFX6-NEXT: [[ZEXT24:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO55]](s1)
-    ; GFX6-NEXT: [[ADD28:%[0-9]+]]:_(s32) = G_ADD [[ADD27]], [[ZEXT24]]
-    ; GFX6-NEXT: [[UMULH24:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[ADD25]]
-    ; GFX6-NEXT: [[ADD29:%[0-9]+]]:_(s32) = G_ADD [[UMULH24]], [[ADD28]]
-    ; GFX6-NEXT: [[UADDO56:%[0-9]+]]:_(s32), [[UADDO57:%[0-9]+]]:_(s1) = G_UADDO [[UADDO44]], [[UADDO54]]
-    ; GFX6-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[ADD29]], [[UADDO57]]
+    ; GFX6-NEXT: [[UMULH18:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[ADD10]]
+    ; GFX6-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[MUL23]], [[UMULH17]]
+    ; GFX6-NEXT: [[UADDE44:%[0-9]+]]:_(s32), [[UADDE45:%[0-9]+]]:_(s1) = G_UADDE [[UADDO18]], [[UMULH18]], [[UADDO19]]
+    ; GFX6-NEXT: [[UADDE46:%[0-9]+]]:_(s32), [[UADDE47:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE45]]
+    ; GFX6-NEXT: [[UADDE48:%[0-9]+]]:_(s32), [[UADDE49:%[0-9]+]]:_(s1) = G_UADDE [[UADDE44]], [[UADDE42]], [[UADDO19]]
+    ; GFX6-NEXT: [[UADDE50:%[0-9]+]]:_(s32), [[UADDE51:%[0-9]+]]:_(s1) = G_UADDE [[UADDE46]], [[C5]], [[UADDE49]]
+    ; GFX6-NEXT: [[UMULH19:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[ADD10]]
+    ; GFX6-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH19]], [[UADDE50]]
+    ; GFX6-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI2]], [[UADDE48]]
+    ; GFX6-NEXT: [[UADDE52:%[0-9]+]]:_(s32), [[UADDE53:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI3]], [[ADD11]], [[UADDO21]]
+    ; GFX6-NEXT: [[MUL24:%[0-9]+]]:_(s32) = G_MUL [[USUBO8]], [[UADDO20]]
+    ; GFX6-NEXT: [[MUL25:%[0-9]+]]:_(s32) = G_MUL [[USUBE12]], [[UADDO20]]
+    ; GFX6-NEXT: [[MUL26:%[0-9]+]]:_(s32) = G_MUL [[USUBO8]], [[UADDE52]]
+    ; GFX6-NEXT: [[UMULH20:%[0-9]+]]:_(s32) = G_UMULH [[USUBO8]], [[UADDO20]]
+    ; GFX6-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[MUL25]], [[MUL26]]
+    ; GFX6-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ADD12]], [[UMULH20]]
+    ; GFX6-NEXT: [[MUL27:%[0-9]+]]:_(s32) = G_MUL [[UADDE52]], [[MUL24]]
+    ; GFX6-NEXT: [[MUL28:%[0-9]+]]:_(s32) = G_MUL [[UADDO20]], [[ADD13]]
+    ; GFX6-NEXT: [[UMULH21:%[0-9]+]]:_(s32) = G_UMULH [[UADDO20]], [[MUL24]]
+    ; GFX6-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[MUL27]], [[MUL28]]
+    ; GFX6-NEXT: [[UADDE54:%[0-9]+]]:_(s32), [[UADDE55:%[0-9]+]]:_(s1) = G_UADDE [[UADDO22]], [[UMULH21]], [[UADDO23]]
+    ; GFX6-NEXT: [[UADDE56:%[0-9]+]]:_(s32), [[UADDE57:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE55]]
+    ; GFX6-NEXT: [[MUL29:%[0-9]+]]:_(s32) = G_MUL [[UADDE52]], [[ADD13]]
+    ; GFX6-NEXT: [[UMULH22:%[0-9]+]]:_(s32) = G_UMULH [[UADDE52]], [[MUL24]]
+    ; GFX6-NEXT: [[UMULH23:%[0-9]+]]:_(s32) = G_UMULH [[UADDO20]], [[ADD13]]
+    ; GFX6-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL29]], [[UMULH22]]
+    ; GFX6-NEXT: [[UADDE58:%[0-9]+]]:_(s32), [[UADDE59:%[0-9]+]]:_(s1) = G_UADDE [[UADDO24]], [[UMULH23]], [[UADDO25]]
+    ; GFX6-NEXT: [[UADDE60:%[0-9]+]]:_(s32), [[UADDE61:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE59]]
+    ; GFX6-NEXT: [[UADDE62:%[0-9]+]]:_(s32), [[UADDE63:%[0-9]+]]:_(s1) = G_UADDE [[UADDE58]], [[UADDE56]], [[UADDO25]]
+    ; GFX6-NEXT: [[UADDE64:%[0-9]+]]:_(s32), [[UADDE65:%[0-9]+]]:_(s1) = G_UADDE [[UADDE60]], [[C5]], [[UADDE63]]
+    ; GFX6-NEXT: [[UMULH24:%[0-9]+]]:_(s32) = G_UMULH [[UADDE52]], [[ADD13]]
+    ; GFX6-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[UMULH24]], [[UADDE64]]
+    ; GFX6-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UADDE62]]
+    ; GFX6-NEXT: [[UADDE66:%[0-9]+]]:_(s32), [[UADDE67:%[0-9]+]]:_(s1) = G_UADDE [[UADDE52]], [[ADD14]], [[UADDO27]]
     ; GFX6-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64)
     ; GFX6-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64)
-    ; GFX6-NEXT: [[MUL30:%[0-9]+]]:_(s32) = G_MUL [[UV27]], [[UADDO56]]
-    ; GFX6-NEXT: [[MUL31:%[0-9]+]]:_(s32) = G_MUL [[UV26]], [[UADDE6]]
-    ; GFX6-NEXT: [[UMULH25:%[0-9]+]]:_(s32) = G_UMULH [[UV26]], [[UADDO56]]
-    ; GFX6-NEXT: [[UADDO58:%[0-9]+]]:_(s32), [[UADDO59:%[0-9]+]]:_(s1) = G_UADDO [[MUL30]], [[MUL31]]
-    ; GFX6-NEXT: [[ZEXT25:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO59]](s1)
-    ; GFX6-NEXT: [[UADDO60:%[0-9]+]]:_(s32), [[UADDO61:%[0-9]+]]:_(s1) = G_UADDO [[UADDO58]], [[UMULH25]]
-    ; GFX6-NEXT: [[ZEXT26:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO61]](s1)
-    ; GFX6-NEXT: [[ADD30:%[0-9]+]]:_(s32) = G_ADD [[ZEXT25]], [[ZEXT26]]
-    ; GFX6-NEXT: [[MUL32:%[0-9]+]]:_(s32) = G_MUL [[UV27]], [[UADDE6]]
-    ; GFX6-NEXT: [[UMULH26:%[0-9]+]]:_(s32) = G_UMULH [[UV27]], [[UADDO56]]
-    ; GFX6-NEXT: [[UMULH27:%[0-9]+]]:_(s32) = G_UMULH [[UV26]], [[UADDE6]]
-    ; GFX6-NEXT: [[UADDO62:%[0-9]+]]:_(s32), [[UADDO63:%[0-9]+]]:_(s1) = G_UADDO [[MUL32]], [[UMULH26]]
-    ; GFX6-NEXT: [[ZEXT27:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO63]](s1)
-    ; GFX6-NEXT: [[UADDO64:%[0-9]+]]:_(s32), [[UADDO65:%[0-9]+]]:_(s1) = G_UADDO [[UADDO62]], [[UMULH27]]
-    ; GFX6-NEXT: [[ZEXT28:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO65]](s1)
-    ; GFX6-NEXT: [[ADD31:%[0-9]+]]:_(s32) = G_ADD [[ZEXT27]], [[ZEXT28]]
-    ; GFX6-NEXT: [[UADDO66:%[0-9]+]]:_(s32), [[UADDO67:%[0-9]+]]:_(s1) = G_UADDO [[UADDO64]], [[ADD30]]
-    ; GFX6-NEXT: [[ZEXT29:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO67]](s1)
-    ; GFX6-NEXT: [[ADD32:%[0-9]+]]:_(s32) = G_ADD [[ADD31]], [[ZEXT29]]
-    ; GFX6-NEXT: [[UMULH28:%[0-9]+]]:_(s32) = G_UMULH [[UV27]], [[UADDE6]]
-    ; GFX6-NEXT: [[ADD33:%[0-9]+]]:_(s32) = G_ADD [[UMULH28]], [[ADD32]]
+    ; GFX6-NEXT: [[MUL30:%[0-9]+]]:_(s32) = G_MUL [[UV27]], [[UADDO26]]
+    ; GFX6-NEXT: [[MUL31:%[0-9]+]]:_(s32) = G_MUL [[UV26]], [[UADDE66]]
+    ; GFX6-NEXT: [[UMULH25:%[0-9]+]]:_(s32) = G_UMULH [[UV26]], [[UADDO26]]
+    ; GFX6-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL30]], [[MUL31]]
+    ; GFX6-NEXT: [[UADDE68:%[0-9]+]]:_(s32), [[UADDE69:%[0-9]+]]:_(s1) = G_UADDE [[UADDO28]], [[UMULH25]], [[UADDO29]]
+    ; GFX6-NEXT: [[UADDE70:%[0-9]+]]:_(s32), [[UADDE71:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE69]]
+    ; GFX6-NEXT: [[MUL32:%[0-9]+]]:_(s32) = G_MUL [[UV27]], [[UADDE66]]
+    ; GFX6-NEXT: [[UMULH26:%[0-9]+]]:_(s32) = G_UMULH [[UV27]], [[UADDO26]]
+    ; GFX6-NEXT: [[UMULH27:%[0-9]+]]:_(s32) = G_UMULH [[UV26]], [[UADDE66]]
+    ; GFX6-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[MUL32]], [[UMULH26]]
+    ; GFX6-NEXT: [[UADDE72:%[0-9]+]]:_(s32), [[UADDE73:%[0-9]+]]:_(s1) = G_UADDE [[UADDO30]], [[UMULH27]], [[UADDO31]]
+    ; GFX6-NEXT: [[UADDE74:%[0-9]+]]:_(s32), [[UADDE75:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE73]]
+    ; GFX6-NEXT: [[UADDE76:%[0-9]+]]:_(s32), [[UADDE77:%[0-9]+]]:_(s1) = G_UADDE [[UADDE72]], [[UADDE70]], [[UADDO31]]
+    ; GFX6-NEXT: [[UADDE78:%[0-9]+]]:_(s32), [[UADDE79:%[0-9]+]]:_(s1) = G_UADDE [[UADDE74]], [[C5]], [[UADDE77]]
+    ; GFX6-NEXT: [[UMULH28:%[0-9]+]]:_(s32) = G_UMULH [[UV27]], [[UADDE66]]
+    ; GFX6-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH28]], [[UADDE78]]
     ; GFX6-NEXT: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64)
-    ; GFX6-NEXT: [[MUL33:%[0-9]+]]:_(s32) = G_MUL [[UV28]], [[UADDO66]]
-    ; GFX6-NEXT: [[MUL34:%[0-9]+]]:_(s32) = G_MUL [[UV29]], [[UADDO66]]
-    ; GFX6-NEXT: [[MUL35:%[0-9]+]]:_(s32) = G_MUL [[UV28]], [[ADD33]]
-    ; GFX6-NEXT: [[UMULH29:%[0-9]+]]:_(s32) = G_UMULH [[UV28]], [[UADDO66]]
-    ; GFX6-NEXT: [[ADD34:%[0-9]+]]:_(s32) = G_ADD [[MUL34]], [[MUL35]]
-    ; GFX6-NEXT: [[ADD35:%[0-9]+]]:_(s32) = G_ADD [[ADD34]], [[UMULH29]]
+    ; GFX6-NEXT: [[MUL33:%[0-9]+]]:_(s32) = G_MUL [[UV28]], [[UADDE76]]
+    ; GFX6-NEXT: [[MUL34:%[0-9]+]]:_(s32) = G_MUL [[UV29]], [[UADDE76]]
+    ; GFX6-NEXT: [[MUL35:%[0-9]+]]:_(s32) = G_MUL [[UV28]], [[ADD15]]
+    ; GFX6-NEXT: [[UMULH29:%[0-9]+]]:_(s32) = G_UMULH [[UV28]], [[UADDE76]]
+    ; GFX6-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[MUL34]], [[MUL35]]
+    ; GFX6-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[UMULH29]]
     ; GFX6-NEXT: [[USUBO10:%[0-9]+]]:_(s32), [[USUBO11:%[0-9]+]]:_(s1) = G_USUBO [[UV24]], [[MUL33]]
-    ; GFX6-NEXT: [[USUBE14:%[0-9]+]]:_(s32), [[USUBE15:%[0-9]+]]:_(s1) = G_USUBE [[UV25]], [[ADD35]], [[USUBO11]]
-    ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[UV25]], [[ADD35]]
+    ; GFX6-NEXT: [[USUBE14:%[0-9]+]]:_(s32), [[USUBE15:%[0-9]+]]:_(s1) = G_USUBE [[UV25]], [[ADD17]], [[USUBO11]]
+    ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[UV25]], [[ADD17]]
     ; GFX6-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO10]](s32), [[USUBE14]](s32)
     ; GFX6-NEXT: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64)
     ; GFX6-NEXT: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE14]](s32), [[UV31]]
@@ -1239,88 +1149,73 @@ body: |
     ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI1]], [[ANYEXT]]
     ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[FPTOUI]], [[AMDGPU_MAD_U64_U32_2]]
     ; GFX8-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64)
+    ; GFX8-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
     ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV10]]
     ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[UV12]]
     ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV10]]
     ; GFX8-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]]
-    ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1)
-    ; GFX8-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH]]
-    ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1)
-    ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]]
+    ; GFX8-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UADDO]], [[UMULH]], [[UADDO1]]
+    ; GFX8-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE1]]
     ; GFX8-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV12]]
     ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV10]]
     ; GFX8-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV12]]
-    ; GFX8-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]]
-    ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1)
-    ; GFX8-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH2]]
-    ; GFX8-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1)
-    ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]]
-    ; GFX8-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD]]
-    ; GFX8-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1)
-    ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]]
+    ; GFX8-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]]
+    ; GFX8-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDO2]], [[UMULH2]], [[UADDO3]]
+    ; GFX8-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE5]]
+    ; GFX8-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[UADDE2]], [[UADDO3]]
+    ; GFX8-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE6]], [[C5]], [[UADDE9]]
     ; GFX8-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV12]]
-    ; GFX8-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]]
-    ; GFX8-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO8]]
-    ; GFX8-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD3]], [[UADDO11]]
-    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO10]], [[C4]]
+    ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[UADDE10]]
+    ; GFX8-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDE8]]
+    ; GFX8-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD]], [[UADDO5]]
+    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO4]], [[C4]]
     ; GFX8-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](s64)
     ; GFX8-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[UV15]](s32)
-    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE]], [[ANYEXT1]]
-    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO10]], [[AMDGPU_MAD_U64_U32_8]]
+    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE12]], [[ANYEXT1]]
+    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO4]], [[AMDGPU_MAD_U64_U32_8]]
     ; GFX8-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](s64)
-    ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[UV14]]
-    ; GFX8-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[UV16]]
-    ; GFX8-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[UV14]]
-    ; GFX8-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]]
-    ; GFX8-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1)
-    ; GFX8-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH4]]
-    ; GFX8-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1)
-    ; GFX8-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]]
-    ; GFX8-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[UV16]]
-    ; GFX8-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[UV14]]
-    ; GFX8-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[UV16]]
-    ; GFX8-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]]
-    ; GFX8-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1)
-    ; GFX8-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]]
-    ; GFX8-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1)
-    ; GFX8-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]]
-    ; GFX8-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDO18]], [[ADD4]]
-    ; GFX8-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1)
-    ; GFX8-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ADD5]], [[ZEXT9]]
-    ; GFX8-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[UV16]]
-    ; GFX8-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD6]]
-    ; GFX8-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-    ; GFX8-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UADDO20]]
-    ; GFX8-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UADDE]], [[ADD7]], [[UADDO23]]
+    ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[UV14]]
+    ; GFX8-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO4]], [[UV16]]
+    ; GFX8-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO4]], [[UV14]]
+    ; GFX8-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]]
+    ; GFX8-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDO6]], [[UMULH4]], [[UADDO7]]
+    ; GFX8-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE15]]
+    ; GFX8-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[UV16]]
+    ; GFX8-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[UV14]]
+    ; GFX8-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO4]], [[UV16]]
+    ; GFX8-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]]
+    ; GFX8-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDO8]], [[UMULH6]], [[UADDO9]]
+    ; GFX8-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE19]]
+    ; GFX8-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDE18]], [[UADDE16]], [[UADDO9]]
+    ; GFX8-NEXT: [[UADDE24:%[0-9]+]]:_(s32), [[UADDE25:%[0-9]+]]:_(s1) = G_UADDE [[UADDE20]], [[C5]], [[UADDE23]]
+    ; GFX8-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[UV16]]
+    ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[UADDE24]]
+    ; GFX8-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UADDE22]]
+    ; GFX8-NEXT: [[UADDE26:%[0-9]+]]:_(s32), [[UADDE27:%[0-9]+]]:_(s1) = G_UADDE [[UADDE12]], [[ADD1]], [[UADDO11]]
     ; GFX8-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64)
     ; GFX8-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64)
-    ; GFX8-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDO22]]
-    ; GFX8-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV20]], [[UADDE2]]
-    ; GFX8-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDO22]]
-    ; GFX8-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]]
-    ; GFX8-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1)
-    ; GFX8-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH8]]
-    ; GFX8-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO27]](s1)
-    ; GFX8-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]]
-    ; GFX8-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDE2]]
-    ; GFX8-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDO22]]
-    ; GFX8-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDE2]]
-    ; GFX8-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]]
-    ; GFX8-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1)
-    ; GFX8-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]]
-    ; GFX8-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1)
-    ; GFX8-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]]
-    ; GFX8-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[UADDO30]], [[ADD8]]
-    ; GFX8-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1)
-    ; GFX8-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT14]]
-    ; GFX8-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDE2]]
-    ; GFX8-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[ADD10]]
+    ; GFX8-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDO10]]
+    ; GFX8-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV20]], [[UADDE26]]
+    ; GFX8-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDO10]]
+    ; GFX8-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]]
+    ; GFX8-NEXT: [[UADDE28:%[0-9]+]]:_(s32), [[UADDE29:%[0-9]+]]:_(s1) = G_UADDE [[UADDO12]], [[UMULH8]], [[UADDO13]]
+    ; GFX8-NEXT: [[UADDE30:%[0-9]+]]:_(s32), [[UADDE31:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE29]]
+    ; GFX8-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDE26]]
+    ; GFX8-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDO10]]
+    ; GFX8-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDE26]]
+    ; GFX8-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]]
+    ; GFX8-NEXT: [[UADDE32:%[0-9]+]]:_(s32), [[UADDE33:%[0-9]+]]:_(s1) = G_UADDE [[UADDO14]], [[UMULH10]], [[UADDO15]]
+    ; GFX8-NEXT: [[UADDE34:%[0-9]+]]:_(s32), [[UADDE35:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE33]]
+    ; GFX8-NEXT: [[UADDE36:%[0-9]+]]:_(s32), [[UADDE37:%[0-9]+]]:_(s1) = G_UADDE [[UADDE32]], [[UADDE30]], [[UADDO15]]
+    ; GFX8-NEXT: [[UADDE38:%[0-9]+]]:_(s32), [[UADDE39:%[0-9]+]]:_(s1) = G_UADDE [[UADDE34]], [[C5]], [[UADDE37]]
+    ; GFX8-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDE26]]
+    ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[UADDE38]]
     ; GFX8-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64)
-    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV22]](s32), [[UADDO32]], [[C4]]
+    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV22]](s32), [[UADDE36]], [[C4]]
     ; GFX8-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_12]](s64)
     ; GFX8-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[UV25]](s32)
-    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV22]](s32), [[ADD11]], [[ANYEXT2]]
-    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV23]](s32), [[UADDO32]], [[AMDGPU_MAD_U64_U32_14]]
+    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV22]](s32), [[ADD2]], [[ANYEXT2]]
+    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV23]](s32), [[UADDE36]], [[AMDGPU_MAD_U64_U32_14]]
     ; GFX8-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_16]](s64)
     ; GFX8-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV18]], [[UV24]]
     ; GFX8-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV19]], [[UV26]], [[USUBO3]]
@@ -1377,84 +1272,69 @@ body: |
     ; GFX8-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[UV36]]
     ; GFX8-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI2]], [[UV38]]
     ; GFX8-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[UV36]]
-    ; GFX8-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]]
-    ; GFX8-NEXT: [[ZEXT15:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1)
-    ; GFX8-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[UMULH12]]
-    ; GFX8-NEXT: [[ZEXT16:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1)
-    ; GFX8-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT15]], [[ZEXT16]]
+    ; GFX8-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]]
+    ; GFX8-NEXT: [[UADDE40:%[0-9]+]]:_(s32), [[UADDE41:%[0-9]+]]:_(s1) = G_UADDE [[UADDO16]], [[UMULH12]], [[UADDO17]]
+    ; GFX8-NEXT: [[UADDE42:%[0-9]+]]:_(s32), [[UADDE43:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE41]]
     ; GFX8-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[UV38]]
     ; GFX8-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[UV36]]
     ; GFX8-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[UV38]]
-    ; GFX8-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH13]]
-    ; GFX8-NEXT: [[ZEXT17:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO39]](s1)
-    ; GFX8-NEXT: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UADDO38]], [[UMULH14]]
-    ; GFX8-NEXT: [[ZEXT18:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO41]](s1)
-    ; GFX8-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT17]], [[ZEXT18]]
-    ; GFX8-NEXT: [[UADDO42:%[0-9]+]]:_(s32), [[UADDO43:%[0-9]+]]:_(s1) = G_UADDO [[UADDO40]], [[ADD12]]
-    ; GFX8-NEXT: [[ZEXT19:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO43]](s1)
-    ; GFX8-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT19]]
+    ; GFX8-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH13]]
+    ; GFX8-NEXT: [[UADDE44:%[0-9]+]]:_(s32), [[UADDE45:%[0-9]+]]:_(s1) = G_UADDE [[UADDO18]], [[UMULH14]], [[UADDO19]]
+    ; GFX8-NEXT: [[UADDE46:%[0-9]+]]:_(s32), [[UADDE47:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE45]]
+    ; GFX8-NEXT: [[UADDE48:%[0-9]+]]:_(s32), [[UADDE49:%[0-9]+]]:_(s1) = G_UADDE [[UADDE44]], [[UADDE42]], [[UADDO19]]
+    ; GFX8-NEXT: [[UADDE50:%[0-9]+]]:_(s32), [[UADDE51:%[0-9]+]]:_(s1) = G_UADDE [[UADDE46]], [[C5]], [[UADDE49]]
     ; GFX8-NEXT: [[UMULH15:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[UV38]]
-    ; GFX8-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH15]], [[ADD14]]
-    ; GFX8-NEXT: [[UADDO44:%[0-9]+]]:_(s32), [[UADDO45:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI2]], [[UADDO42]]
-    ; GFX8-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI3]], [[ADD15]], [[UADDO45]]
-    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_24:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_25:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO8]](s32), [[UADDO44]], [[C4]]
+    ; GFX8-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH15]], [[UADDE50]]
+    ; GFX8-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI2]], [[UADDE48]]
+    ; GFX8-NEXT: [[UADDE52:%[0-9]+]]:_(s32), [[UADDE53:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI3]], [[ADD3]], [[UADDO21]]
+    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_24:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_25:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO8]](s32), [[UADDO20]], [[C4]]
     ; GFX8-NEXT: [[UV40:%[0-9]+]]:_(s32), [[UV41:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_24]](s64)
     ; GFX8-NEXT: [[ANYEXT4:%[0-9]+]]:_(s64) = G_ANYEXT [[UV41]](s32)
-    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_26:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_27:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO8]](s32), [[UADDE4]], [[ANYEXT4]]
-    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_28:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_29:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE12]](s32), [[UADDO44]], [[AMDGPU_MAD_U64_U32_26]]
+    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_26:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_27:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO8]](s32), [[UADDE52]], [[ANYEXT4]]
+    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_28:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_29:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE12]](s32), [[UADDO20]], [[AMDGPU_MAD_U64_U32_26]]
     ; GFX8-NEXT: [[UV42:%[0-9]+]]:_(s32), [[UV43:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_28]](s64)
-    ; GFX8-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[UV40]]
-    ; GFX8-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UADDO44]], [[UV42]]
-    ; GFX8-NEXT: [[UMULH16:%[0-9]+]]:_(s32) = G_UMULH [[UADDO44]], [[UV40]]
-    ; GFX8-NEXT: [[UADDO46:%[0-9]+]]:_(s32), [[UADDO47:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]]
-    ; GFX8-NEXT: [[ZEXT20:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO47]](s1)
-    ; GFX8-NEXT: [[UADDO48:%[0-9]+]]:_(s32), [[UADDO49:%[0-9]+]]:_(s1) = G_UADDO [[UADDO46]], [[UMULH16]]
-    ; GFX8-NEXT: [[ZEXT21:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO49]](s1)
-    ; GFX8-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[ZEXT20]], [[ZEXT21]]
-    ; GFX8-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[UV42]]
-    ; GFX8-NEXT: [[UMULH17:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[UV40]]
-    ; GFX8-NEXT: [[UMULH18:%[0-9]+]]:_(s32) = G_UMULH [[UADDO44]], [[UV42]]
-    ; GFX8-NEXT: [[UADDO50:%[0-9]+]]:_(s32), [[UADDO51:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH17]]
-    ; GFX8-NEXT: [[ZEXT22:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO51]](s1)
-    ; GFX8-NEXT: [[UADDO52:%[0-9]+]]:_(s32), [[UADDO53:%[0-9]+]]:_(s1) = G_UADDO [[UADDO50]], [[UMULH18]]
-    ; GFX8-NEXT: [[ZEXT23:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO53]](s1)
-    ; GFX8-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ZEXT22]], [[ZEXT23]]
-    ; GFX8-NEXT: [[UADDO54:%[0-9]+]]:_(s32), [[UADDO55:%[0-9]+]]:_(s1) = G_UADDO [[UADDO52]], [[ADD16]]
-    ; GFX8-NEXT: [[ZEXT24:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO55]](s1)
-    ; GFX8-NEXT: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[ADD17]], [[ZEXT24]]
-    ; GFX8-NEXT: [[UMULH19:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[UV42]]
-    ; GFX8-NEXT: [[ADD19:%[0-9]+]]:_(s32) = G_ADD [[UMULH19]], [[ADD18]]
-    ; GFX8-NEXT: [[UADDO56:%[0-9]+]]:_(s32), [[UADDO57:%[0-9]+]]:_(s1) = G_UADDO [[UADDO44]], [[UADDO54]]
-    ; GFX8-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[ADD19]], [[UADDO57]]
+    ; GFX8-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UADDE52]], [[UV40]]
+    ; GFX8-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UADDO20]], [[UV42]]
+    ; GFX8-NEXT: [[UMULH16:%[0-9]+]]:_(s32) = G_UMULH [[UADDO20]], [[UV40]]
+    ; GFX8-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]]
+    ; GFX8-NEXT: [[UADDE54:%[0-9]+]]:_(s32), [[UADDE55:%[0-9]+]]:_(s1) = G_UADDE [[UADDO22]], [[UMULH16]], [[UADDO23]]
+    ; GFX8-NEXT: [[UADDE56:%[0-9]+]]:_(s32), [[UADDE57:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE55]]
+    ; GFX8-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UADDE52]], [[UV42]]
+    ; GFX8-NEXT: [[UMULH17:%[0-9]+]]:_(s32) = G_UMULH [[UADDE52]], [[UV40]]
+    ; GFX8-NEXT: [[UMULH18:%[0-9]+]]:_(s32) = G_UMULH [[UADDO20]], [[UV42]]
+    ; GFX8-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH17]]
+    ; GFX8-NEXT: [[UADDE58:%[0-9]+]]:_(s32), [[UADDE59:%[0-9]+]]:_(s1) = G_UADDE [[UADDO24]], [[UMULH18]], [[UADDO25]]
+    ; GFX8-NEXT: [[UADDE60:%[0-9]+]]:_(s32), [[UADDE61:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE59]]
+    ; GFX8-NEXT: [[UADDE62:%[0-9]+]]:_(s32), [[UADDE63:%[0-9]+]]:_(s1) = G_UADDE [[UADDE58]], [[UADDE56]], [[UADDO25]]
+    ; GFX8-NEXT: [[UADDE64:%[0-9]+]]:_(s32), [[UADDE65:%[0-9]+]]:_(s1) = G_UADDE [[UADDE60]], [[C5]], [[UADDE63]]
+    ; GFX8-NEXT: [[UMULH19:%[0-9]+]]:_(s32) = G_UMULH [[UADDE52]], [[UV42]]
+    ; GFX8-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[UMULH19]], [[UADDE64]]
+    ; GFX8-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UADDE62]]
+    ; GFX8-NEXT: [[UADDE66:%[0-9]+]]:_(s32), [[UADDE67:%[0-9]+]]:_(s1) = G_UADDE [[UADDE52]], [[ADD4]], [[UADDO27]]
     ; GFX8-NEXT: [[UV44:%[0-9]+]]:_(s32), [[UV45:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64)
     ; GFX8-NEXT: [[UV46:%[0-9]+]]:_(s32), [[UV47:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64)
-    ; GFX8-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV47]], [[UADDO56]]
-    ; GFX8-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV46]], [[UADDE6]]
-    ; GFX8-NEXT: [[UMULH20:%[0-9]+]]:_(s32) = G_UMULH [[UV46]], [[UADDO56]]
-    ; GFX8-NEXT: [[UADDO58:%[0-9]+]]:_(s32), [[UADDO59:%[0-9]+]]:_(s1) = G_UADDO [[MUL15]], [[MUL16]]
-    ; GFX8-NEXT: [[ZEXT25:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO59]](s1)
-    ; GFX8-NEXT: [[UADDO60:%[0-9]+]]:_(s32), [[UADDO61:%[0-9]+]]:_(s1) = G_UADDO [[UADDO58]], [[UMULH20]]
-    ; GFX8-NEXT: [[ZEXT26:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO61]](s1)
-    ; GFX8-NEXT: [[ADD20:%[0-9]+]]:_(s32) = G_ADD [[ZEXT25]], [[ZEXT26]]
-    ; GFX8-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV47]], [[UADDE6]]
-    ; GFX8-NEXT: [[UMULH21:%[0-9]+]]:_(s32) = G_UMULH [[UV47]], [[UADDO56]]
-    ; GFX8-NEXT: [[UMULH22:%[0-9]+]]:_(s32) = G_UMULH [[UV46]], [[UADDE6]]
-    ; GFX8-NEXT: [[UADDO62:%[0-9]+]]:_(s32), [[UADDO63:%[0-9]+]]:_(s1) = G_UADDO [[MUL17]], [[UMULH21]]
-    ; GFX8-NEXT: [[ZEXT27:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO63]](s1)
-    ; GFX8-NEXT: [[UADDO64:%[0-9]+]]:_(s32), [[UADDO65:%[0-9]+]]:_(s1) = G_UADDO [[UADDO62]], [[UMULH22]]
-    ; GFX8-NEXT: [[ZEXT28:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO65]](s1)
-    ; GFX8-NEXT: [[ADD21:%[0-9]+]]:_(s32) = G_ADD [[ZEXT27]], [[ZEXT28]]
-    ; GFX8-NEXT: [[UADDO66:%[0-9]+]]:_(s32), [[UADDO67:%[0-9]+]]:_(s1) = G_UADDO [[UADDO64]], [[ADD20]]
-    ; GFX8-NEXT: [[ZEXT29:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO67]](s1)
-    ; GFX8-NEXT: [[ADD22:%[0-9]+]]:_(s32) = G_ADD [[ADD21]], [[ZEXT29]]
-    ; GFX8-NEXT: [[UMULH23:%[0-9]+]]:_(s32) = G_UMULH [[UV47]], [[UADDE6]]
-    ; GFX8-NEXT: [[ADD23:%[0-9]+]]:_(s32) = G_ADD [[UMULH23]], [[ADD22]]
+    ; GFX8-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV47]], [[UADDO26]]
+    ; GFX8-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV46]], [[UADDE66]]
+    ; GFX8-NEXT: [[UMULH20:%[0-9]+]]:_(s32) = G_UMULH [[UV46]], [[UADDO26]]
+    ; GFX8-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL15]], [[MUL16]]
+    ; GFX8-NEXT: [[UADDE68:%[0-9]+]]:_(s32), [[UADDE69:%[0-9]+]]:_(s1) = G_UADDE [[UADDO28]], [[UMULH20]], [[UADDO29]]
+    ; GFX8-NEXT: [[UADDE70:%[0-9]+]]:_(s32), [[UADDE71:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE69]]
+    ; GFX8-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV47]], [[UADDE66]]
+    ; GFX8-NEXT: [[UMULH21:%[0-9]+]]:_(s32) = G_UMULH [[UV47]], [[UADDO26]]
+    ; GFX8-NEXT: [[UMULH22:%[0-9]+]]:_(s32) = G_UMULH [[UV46]], [[UADDE66]]
+    ; GFX8-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[MUL17]], [[UMULH21]]
+    ; GFX8-NEXT: [[UADDE72:%[0-9]+]]:_(s32), [[UADDE73:%[0-9]+]]:_(s1) = G_UADDE [[UADDO30]], [[UMULH22]], [[UADDO31]]
+    ; GFX8-NEXT: [[UADDE74:%[0-9]+]]:_(s32), [[UADDE75:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE73]]
+    ; GFX8-NEXT: [[UADDE76:%[0-9]+]]:_(s32), [[UADDE77:%[0-9]+]]:_(s1) = G_UADDE [[UADDE72]], [[UADDE70]], [[UADDO31]]
+    ; GFX8-NEXT: [[UADDE78:%[0-9]+]]:_(s32), [[UADDE79:%[0-9]+]]:_(s1) = G_UADDE [[UADDE74]], [[C5]], [[UADDE77]]
+    ; GFX8-NEXT: [[UMULH23:%[0-9]+]]:_(s32) = G_UMULH [[UV47]], [[UADDE66]]
+    ; GFX8-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH23]], [[UADDE78]]
     ; GFX8-NEXT: [[UV48:%[0-9]+]]:_(s32), [[UV49:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64)
-    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_30:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_31:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV48]](s32), [[UADDO66]], [[C4]]
+    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_30:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_31:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV48]](s32), [[UADDE76]], [[C4]]
     ; GFX8-NEXT: [[UV50:%[0-9]+]]:_(s32), [[UV51:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_30]](s64)
     ; GFX8-NEXT: [[ANYEXT5:%[0-9]+]]:_(s64) = G_ANYEXT [[UV51]](s32)
-    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_32:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_33:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV48]](s32), [[ADD23]], [[ANYEXT5]]
-    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_34:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_35:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV49]](s32), [[UADDO66]], [[AMDGPU_MAD_U64_U32_32]]
+    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_32:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_33:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV48]](s32), [[ADD5]], [[ANYEXT5]]
+    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_34:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_35:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV49]](s32), [[UADDE76]], [[AMDGPU_MAD_U64_U32_32]]
     ; GFX8-NEXT: [[UV52:%[0-9]+]]:_(s32), [[UV53:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_34]](s64)
     ; GFX8-NEXT: [[USUBO10:%[0-9]+]]:_(s32), [[USUBO11:%[0-9]+]]:_(s1) = G_USUBO [[UV44]], [[UV50]]
     ; GFX8-NEXT: [[USUBE14:%[0-9]+]]:_(s32), [[USUBE15:%[0-9]+]]:_(s1) = G_USUBE [[UV45]], [[UV52]], [[USUBO11]]
@@ -1523,88 +1403,73 @@ body: |
     ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI1]], [[ANYEXT]]
     ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[FPTOUI]], [[AMDGPU_MAD_U64_U32_2]]
     ; GFX9-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64)
+    ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
     ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV10]]
     ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[UV12]]
     ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV10]]
     ; GFX9-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]]
-    ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1)
-    ; GFX9-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH]]
-    ; GFX9-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1)
-    ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]]
+    ; GFX9-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UADDO]], [[UMULH]], [[UADDO1]]
+    ; GFX9-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE1]]
     ; GFX9-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV12]]
     ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV10]]
     ; GFX9-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV12]]
-    ; GFX9-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]]
-    ; GFX9-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1)
-    ; GFX9-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH2]]
-    ; GFX9-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1)
-    ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]]
-    ; GFX9-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD]]
-    ; GFX9-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1)
-    ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]]
+    ; GFX9-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]]
+    ; GFX9-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDO2]], [[UMULH2]], [[UADDO3]]
+    ; GFX9-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE5]]
+    ; GFX9-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[UADDE2]], [[UADDO3]]
+    ; GFX9-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE6]], [[C5]], [[UADDE9]]
     ; GFX9-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV12]]
-    ; GFX9-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]]
-    ; GFX9-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO8]]
-    ; GFX9-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD3]], [[UADDO11]]
-    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO10]], [[C4]]
+    ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[UADDE10]]
+    ; GFX9-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDE8]]
+    ; GFX9-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD]], [[UADDO5]]
+    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO4]], [[C4]]
     ; GFX9-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](s64)
     ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[UV15]](s32)
-    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE]], [[ANYEXT1]]
-    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO10]], [[AMDGPU_MAD_U64_U32_8]]
+    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE12]], [[ANYEXT1]]
+    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO4]], [[AMDGPU_MAD_U64_U32_8]]
     ; GFX9-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](s64)
-    ; GFX9-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[UV14]]
-    ; GFX9-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[UV16]]
-    ; GFX9-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[UV14]]
-    ; GFX9-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]]
-    ; GFX9-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1)
-    ; GFX9-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH4]]
-    ; GFX9-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1)
-    ; GFX9-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]]
-    ; GFX9-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[UV16]]
-    ; GFX9-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[UV14]]
-    ; GFX9-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[UV16]]
-    ; GFX9-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]]
-    ; GFX9-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1)
-    ; GFX9-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]]
-    ; GFX9-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1)
-    ; GFX9-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]]
-    ; GFX9-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDO18]], [[ADD4]]
-    ; GFX9-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1)
-    ; GFX9-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ADD5]], [[ZEXT9]]
-    ; GFX9-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[UV16]]
-    ; GFX9-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD6]]
-    ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-    ; GFX9-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UADDO20]]
-    ; GFX9-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UADDE]], [[ADD7]], [[UADDO23]]
+    ; GFX9-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[UV14]]
+    ; GFX9-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO4]], [[UV16]]
+    ; GFX9-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO4]], [[UV14]]
+    ; GFX9-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]]
+    ; GFX9-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDO6]], [[UMULH4]], [[UADDO7]]
+    ; GFX9-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE15]]
+    ; GFX9-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[UV16]]
+    ; GFX9-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[UV14]]
+    ; GFX9-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO4]], [[UV16]]
+    ; GFX9-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]]
+    ; GFX9-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDO8]], [[UMULH6]], [[UADDO9]]
+    ; GFX9-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE19]]
+    ; GFX9-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDE18]], [[UADDE16]], [[UADDO9]]
+    ; GFX9-NEXT: [[UADDE24:%[0-9]+]]:_(s32), [[UADDE25:%[0-9]+]]:_(s1) = G_UADDE [[UADDE20]], [[C5]], [[UADDE23]]
+    ; GFX9-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[UV16]]
+    ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[UADDE24]]
+    ; GFX9-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UADDE22]]
+    ; GFX9-NEXT: [[UADDE26:%[0-9]+]]:_(s32), [[UADDE27:%[0-9]+]]:_(s1) = G_UADDE [[UADDE12]], [[ADD1]], [[UADDO11]]
     ; GFX9-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64)
     ; GFX9-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64)
-    ; GFX9-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDO22]]
-    ; GFX9-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV20]], [[UADDE2]]
-    ; GFX9-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDO22]]
-    ; GFX9-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]]
-    ; GFX9-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1)
-    ; GFX9-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH8]]
-    ; GFX9-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO27]](s1)
-    ; GFX9-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]]
-    ; GFX9-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDE2]]
-    ; GFX9-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDO22]]
-    ; GFX9-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDE2]]
-    ; GFX9-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]]
-    ; GFX9-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1)
-    ; GFX9-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]]
-    ; GFX9-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1)
-    ; GFX9-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]]
-    ; GFX9-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[UADDO30]], [[ADD8]]
-    ; GFX9-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1)
-    ; GFX9-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT14]]
-    ; GFX9-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDE2]]
-    ; GFX9-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[ADD10]]
+    ; GFX9-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDO10]]
+    ; GFX9-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV20]], [[UADDE26]]
+    ; GFX9-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDO10]]
+    ; GFX9-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]]
+    ; GFX9-NEXT: [[UADDE28:%[0-9]+]]:_(s32), [[UADDE29:%[0-9]+]]:_(s1) = G_UADDE [[UADDO12]], [[UMULH8]], [[UADDO13]]
+    ; GFX9-NEXT: [[UADDE30:%[0-9]+]]:_(s32), [[UADDE31:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE29]]
+    ; GFX9-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDE26]]
+    ; GFX9-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDO10]]
+    ; GFX9-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDE26]]
+    ; GFX9-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]]
+    ; GFX9-NEXT: [[UADDE32:%[0-9]+]]:_(s32), [[UADDE33:%[0-9]+]]:_(s1) = G_UADDE [[UADDO14]], [[UMULH10]], [[UADDO15]]
+    ; GFX9-NEXT: [[UADDE34:%[0-9]+]]:_(s32), [[UADDE35:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE33]]
+    ; GFX9-NEXT: [[UADDE36:%[0-9]+]]:_(s32), [[UADDE37:%[0-9]+]]:_(s1) = G_UADDE [[UADDE32]], [[UADDE30]], [[UADDO15]]
+    ; GFX9-NEXT: [[UADDE38:%[0-9]+]]:_(s32), [[UADDE39:%[0-9]+]]:_(s1) = G_UADDE [[UADDE34]], [[C5]], [[UADDE37]]
+    ; GFX9-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDE26]]
+    ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[UADDE38]]
     ; GFX9-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64)
-    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV22]](s32), [[UADDO32]], [[C4]]
+    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV22]](s32), [[UADDE36]], [[C4]]
     ; GFX9-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_12]](s64)
     ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[UV25]](s32)
-    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV22]](s32), [[ADD11]], [[ANYEXT2]]
-    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV23]](s32), [[UADDO32]], [[AMDGPU_MAD_U64_U32_14]]
+    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV22]](s32), [[ADD2]], [[ANYEXT2]]
+    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV23]](s32), [[UADDE36]], [[AMDGPU_MAD_U64_U32_14]]
     ; GFX9-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_16]](s64)
     ; GFX9-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV18]], [[UV24]]
     ; GFX9-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV19]], [[UV26]], [[USUBO3]]
@@ -1661,84 +1526,69 @@ body: |
     ; GFX9-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[UV36]]
     ; GFX9-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI2]], [[UV38]]
     ; GFX9-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[UV36]]
-    ; GFX9-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]]
-    ; GFX9-NEXT: [[ZEXT15:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1)
-    ; GFX9-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[UMULH12]]
-    ; GFX9-NEXT: [[ZEXT16:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1)
-    ; GFX9-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT15]], [[ZEXT16]]
+    ; GFX9-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]]
+    ; GFX9-NEXT: [[UADDE40:%[0-9]+]]:_(s32), [[UADDE41:%[0-9]+]]:_(s1) = G_UADDE [[UADDO16]], [[UMULH12]], [[UADDO17]]
+    ; GFX9-NEXT: [[UADDE42:%[0-9]+]]:_(s32), [[UADDE43:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE41]]
     ; GFX9-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[UV38]]
     ; GFX9-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[UV36]]
     ; GFX9-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[UV38]]
-    ; GFX9-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH13]]
-    ; GFX9-NEXT: [[ZEXT17:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO39]](s1)
-    ; GFX9-NEXT: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UADDO38]], [[UMULH14]]
-    ; GFX9-NEXT: [[ZEXT18:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO41]](s1)
-    ; GFX9-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT17]], [[ZEXT18]]
-    ; GFX9-NEXT: [[UADDO42:%[0-9]+]]:_(s32), [[UADDO43:%[0-9]+]]:_(s1) = G_UADDO [[UADDO40]], [[ADD12]]
-    ; GFX9-NEXT: [[ZEXT19:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO43]](s1)
-    ; GFX9-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT19]]
+    ; GFX9-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH13]]
+    ; GFX9-NEXT: [[UADDE44:%[0-9]+]]:_(s32), [[UADDE45:%[0-9]+]]:_(s1) = G_UADDE [[UADDO18]], [[UMULH14]], [[UADDO19]]
+    ; GFX9-NEXT: [[UADDE46:%[0-9]+]]:_(s32), [[UADDE47:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE45]]
+    ; GFX9-NEXT: [[UADDE48:%[0-9]+]]:_(s32), [[UADDE49:%[0-9]+]]:_(s1) = G_UADDE [[UADDE44]], [[UADDE42]], [[UADDO19]]
+    ; GFX9-NEXT: [[UADDE50:%[0-9]+]]:_(s32), [[UADDE51:%[0-9]+]]:_(s1) = G_UADDE [[UADDE46]], [[C5]], [[UADDE49]]
     ; GFX9-NEXT: [[UMULH15:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[UV38]]
-    ; GFX9-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH15]], [[ADD14]]
-    ; GFX9-NEXT: [[UADDO44:%[0-9]+]]:_(s32), [[UADDO45:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI2]], [[UADDO42]]
-    ; GFX9-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI3]], [[ADD15]], [[UADDO45]]
-    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_24:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_25:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO8]](s32), [[UADDO44]], [[C4]]
+    ; GFX9-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH15]], [[UADDE50]]
+    ; GFX9-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI2]], [[UADDE48]]
+    ; GFX9-NEXT: [[UADDE52:%[0-9]+]]:_(s32), [[UADDE53:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI3]], [[ADD3]], [[UADDO21]]
+    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_24:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_25:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO8]](s32), [[UADDO20]], [[C4]]
     ; GFX9-NEXT: [[UV40:%[0-9]+]]:_(s32), [[UV41:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_24]](s64)
     ; GFX9-NEXT: [[ANYEXT4:%[0-9]+]]:_(s64) = G_ANYEXT [[UV41]](s32)
-    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_26:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_27:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO8]](s32), [[UADDE4]], [[ANYEXT4]]
-    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_28:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_29:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE12]](s32), [[UADDO44]], [[AMDGPU_MAD_U64_U32_26]]
+    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_26:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_27:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO8]](s32), [[UADDE52]], [[ANYEXT4]]
+    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_28:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_29:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE12]](s32), [[UADDO20]], [[AMDGPU_MAD_U64_U32_26]]
     ; GFX9-NEXT: [[UV42:%[0-9]+]]:_(s32), [[UV43:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_28]](s64)
-    ; GFX9-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[UV40]]
-    ; GFX9-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UADDO44]], [[UV42]]
-    ; GFX9-NEXT: [[UMULH16:%[0-9]+]]:_(s32) = G_UMULH [[UADDO44]], [[UV40]]
-    ; GFX9-NEXT: [[UADDO46:%[0-9]+]]:_(s32), [[UADDO47:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]]
-    ; GFX9-NEXT: [[ZEXT20:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO47]](s1)
-    ; GFX9-NEXT: [[UADDO48:%[0-9]+]]:_(s32), [[UADDO49:%[0-9]+]]:_(s1) = G_UADDO [[UADDO46]], [[UMULH16]]
-    ; GFX9-NEXT: [[ZEXT21:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO49]](s1)
-    ; GFX9-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[ZEXT20]], [[ZEXT21]]
-    ; GFX9-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[UV42]]
-    ; GFX9-NEXT: [[UMULH17:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[UV40]]
-    ; GFX9-NEXT: [[UMULH18:%[0-9]+]]:_(s32) = G_UMULH [[UADDO44]], [[UV42]]
-    ; GFX9-NEXT: [[UADDO50:%[0-9]+]]:_(s32), [[UADDO51:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH17]]
-    ; GFX9-NEXT: [[ZEXT22:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO51]](s1)
-    ; GFX9-NEXT: [[UADDO52:%[0-9]+]]:_(s32), [[UADDO53:%[0-9]+]]:_(s1) = G_UADDO [[UADDO50]], [[UMULH18]]
-    ; GFX9-NEXT: [[ZEXT23:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO53]](s1)
-    ; GFX9-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ZEXT22]], [[ZEXT23]]
-    ; GFX9-NEXT: [[UADDO54:%[0-9]+]]:_(s32), [[UADDO55:%[0-9]+]]:_(s1) = G_UADDO [[UADDO52]], [[ADD16]]
-    ; GFX9-NEXT: [[ZEXT24:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO55]](s1)
-    ; GFX9-NEXT: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[ADD17]], [[ZEXT24]]
-    ; GFX9-NEXT: [[UMULH19:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[UV42]]
-    ; GFX9-NEXT: [[ADD19:%[0-9]+]]:_(s32) = G_ADD [[UMULH19]], [[ADD18]]
-    ; GFX9-NEXT: [[UADDO56:%[0-9]+]]:_(s32), [[UADDO57:%[0-9]+]]:_(s1) = G_UADDO [[UADDO44]], [[UADDO54]]
-    ; GFX9-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[ADD19]], [[UADDO57]]
+    ; GFX9-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UADDE52]], [[UV40]]
+    ; GFX9-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UADDO20]], [[UV42]]
+    ; GFX9-NEXT: [[UMULH16:%[0-9]+]]:_(s32) = G_UMULH [[UADDO20]], [[UV40]]
+    ; GFX9-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]]
+    ; GFX9-NEXT: [[UADDE54:%[0-9]+]]:_(s32), [[UADDE55:%[0-9]+]]:_(s1) = G_UADDE [[UADDO22]], [[UMULH16]], [[UADDO23]]
+    ; GFX9-NEXT: [[UADDE56:%[0-9]+]]:_(s32), [[UADDE57:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE55]]
+    ; GFX9-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UADDE52]], [[UV42]]
+    ; GFX9-NEXT: [[UMULH17:%[0-9]+]]:_(s32) = G_UMULH [[UADDE52]], [[UV40]]
+    ; GFX9-NEXT: [[UMULH18:%[0-9]+]]:_(s32) = G_UMULH [[UADDO20]], [[UV42]]
+    ; GFX9-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH17]]
+    ; GFX9-NEXT: [[UADDE58:%[0-9]+]]:_(s32), [[UADDE59:%[0-9]+]]:_(s1) = G_UADDE [[UADDO24]], [[UMULH18]], [[UADDO25]]
+    ; GFX9-NEXT: [[UADDE60:%[0-9]+]]:_(s32), [[UADDE61:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE59]]
+    ; GFX9-NEXT: [[UADDE62:%[0-9]+]]:_(s32), [[UADDE63:%[0-9]+]]:_(s1) = G_UADDE [[UADDE58]], [[UADDE56]], [[UADDO25]]
+    ; GFX9-NEXT: [[UADDE64:%[0-9]+]]:_(s32), [[UADDE65:%[0-9]+]]:_(s1) = G_UADDE [[UADDE60]], [[C5]], [[UADDE63]]
+    ; GFX9-NEXT: [[UMULH19:%[0-9]+]]:_(s32) = G_UMULH [[UADDE52]], [[UV42]]
+    ; GFX9-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[UMULH19]], [[UADDE64]]
+    ; GFX9-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UADDE62]]
+    ; GFX9-NEXT: [[UADDE66:%[0-9]+]]:_(s32), [[UADDE67:%[0-9]+]]:_(s1) = G_UADDE [[UADDE52]], [[ADD4]], [[UADDO27]]
     ; GFX9-NEXT: [[UV44:%[0-9]+]]:_(s32), [[UV45:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64)
     ; GFX9-NEXT: [[UV46:%[0-9]+]]:_(s32), [[UV47:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64)
-    ; GFX9-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV47]], [[UADDO56]]
-    ; GFX9-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV46]], [[UADDE6]]
-    ; GFX9-NEXT: [[UMULH20:%[0-9]+]]:_(s32) = G_UMULH [[UV46]], [[UADDO56]]
-    ; GFX9-NEXT: [[UADDO58:%[0-9]+]]:_(s32), [[UADDO59:%[0-9]+]]:_(s1) = G_UADDO [[MUL15]], [[MUL16]]
-    ; GFX9-NEXT: [[ZEXT25:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO59]](s1)
-    ; GFX9-NEXT: [[UADDO60:%[0-9]+]]:_(s32), [[UADDO61:%[0-9]+]]:_(s1) = G_UADDO [[UADDO58]], [[UMULH20]]
-    ; GFX9-NEXT: [[ZEXT26:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO61]](s1)
-    ; GFX9-NEXT: [[ADD20:%[0-9]+]]:_(s32) = G_ADD [[ZEXT25]], [[ZEXT26]]
-    ; GFX9-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV47]], [[UADDE6]]
-    ; GFX9-NEXT: [[UMULH21:%[0-9]+]]:_(s32) = G_UMULH [[UV47]], [[UADDO56]]
-    ; GFX9-NEXT: [[UMULH22:%[0-9]+]]:_(s32) = G_UMULH [[UV46]], [[UADDE6]]
-    ; GFX9-NEXT: [[UADDO62:%[0-9]+]]:_(s32), [[UADDO63:%[0-9]+]]:_(s1) = G_UADDO [[MUL17]], [[UMULH21]]
-    ; GFX9-NEXT: [[ZEXT27:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO63]](s1)
-    ; GFX9-NEXT: [[UADDO64:%[0-9]+]]:_(s32), [[UADDO65:%[0-9]+]]:_(s1) = G_UADDO [[UADDO62]], [[UMULH22]]
-    ; GFX9-NEXT: [[ZEXT28:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO65]](s1)
-    ; GFX9-NEXT: [[ADD21:%[0-9]+]]:_(s32) = G_ADD [[ZEXT27]], [[ZEXT28]]
-    ; GFX9-NEXT: [[UADDO66:%[0-9]+]]:_(s32), [[UADDO67:%[0-9]+]]:_(s1) = G_UADDO [[UADDO64]], [[ADD20]]
-    ; GFX9-NEXT: [[ZEXT29:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO67]](s1)
-    ; GFX9-NEXT: [[ADD22:%[0-9]+]]:_(s32) = G_ADD [[ADD21]], [[ZEXT29]]
-    ; GFX9-NEXT: [[UMULH23:%[0-9]+]]:_(s32) = G_UMULH [[UV47]], [[UADDE6]]
-    ; GFX9-NEXT: [[ADD23:%[0-9]+]]:_(s32) = G_ADD [[UMULH23]], [[ADD22]]
+    ; GFX9-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV47]], [[UADDO26]]
+    ; GFX9-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV46]], [[UADDE66]]
+    ; GFX9-NEXT: [[UMULH20:%[0-9]+]]:_(s32) = G_UMULH [[UV46]], [[UADDO26]]
+    ; GFX9-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL15]], [[MUL16]]
+    ; GFX9-NEXT: [[UADDE68:%[0-9]+]]:_(s32), [[UADDE69:%[0-9]+]]:_(s1) = G_UADDE [[UADDO28]], [[UMULH20]], [[UADDO29]]
+    ; GFX9-NEXT: [[UADDE70:%[0-9]+]]:_(s32), [[UADDE71:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE69]]
+    ; GFX9-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV47]], [[UADDE66]]
+    ; GFX9-NEXT: [[UMULH21:%[0-9]+]]:_(s32) = G_UMULH [[UV47]], [[UADDO26]]
+    ; GFX9-NEXT: [[UMULH22:%[0-9]+]]:_(s32) = G_UMULH [[UV46]], [[UADDE66]]
+    ; GFX9-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[MUL17]], [[UMULH21]]
+    ; GFX9-NEXT: [[UADDE72:%[0-9]+]]:_(s32), [[UADDE73:%[0-9]+]]:_(s1) = G_UADDE [[UADDO30]], [[UMULH22]], [[UADDO31]]
+    ; GFX9-NEXT: [[UADDE74:%[0-9]+]]:_(s32), [[UADDE75:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE73]]
+    ; GFX9-NEXT: [[UADDE76:%[0-9]+]]:_(s32), [[UADDE77:%[0-9]+]]:_(s1) = G_UADDE [[UADDE72]], [[UADDE70]], [[UADDO31]]
+    ; GFX9-NEXT: [[UADDE78:%[0-9]+]]:_(s32), [[UADDE79:%[0-9]+]]:_(s1) = G_UADDE [[UADDE74]], [[C5]], [[UADDE77]]
+    ; GFX9-NEXT: [[UMULH23:%[0-9]+]]:_(s32) = G_UMULH [[UV47]], [[UADDE66]]
+    ; GFX9-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH23]], [[UADDE78]]
     ; GFX9-NEXT: [[UV48:%[0-9]+]]:_(s32), [[UV49:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64)
-    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_30:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_31:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV48]](s32), [[UADDO66]], [[C4]]
+    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_30:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_31:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV48]](s32), [[UADDE76]], [[C4]]
     ; GFX9-NEXT: [[UV50:%[0-9]+]]:_(s32), [[UV51:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_30]](s64)
     ; GFX9-NEXT: [[ANYEXT5:%[0-9]+]]:_(s64) = G_ANYEXT [[UV51]](s32)
-    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_32:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_33:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV48]](s32), [[ADD23]], [[ANYEXT5]]
-    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_34:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_35:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV49]](s32), [[UADDO66]], [[AMDGPU_MAD_U64_U32_32]]
+    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_32:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_33:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV48]](s32), [[ADD5]], [[ANYEXT5]]
+    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_34:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_35:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV49]](s32), [[UADDE76]], [[AMDGPU_MAD_U64_U32_32]]
     ; GFX9-NEXT: [[UV52:%[0-9]+]]:_(s32), [[UV53:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_34]](s64)
     ; GFX9-NEXT: [[USUBO10:%[0-9]+]]:_(s32), [[USUBO11:%[0-9]+]]:_(s1) = G_USUBO [[UV44]], [[UV50]]
     ; GFX9-NEXT: [[USUBE14:%[0-9]+]]:_(s32), [[USUBE15:%[0-9]+]]:_(s1) = G_USUBE [[UV45]], [[UV52]], [[USUBO11]]
@@ -1807,92 +1657,77 @@ body: |
     ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV11]], [[MUL]]
     ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]]
     ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[MUL1]]
+    ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
     ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV10]]
     ; GFX10-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]]
     ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV10]]
     ; GFX10-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[MUL3]]
-    ; GFX10-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1)
-    ; GFX10-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH]]
-    ; GFX10-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1)
-    ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]]
+    ; GFX10-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UADDO]], [[UMULH]], [[UADDO1]]
+    ; GFX10-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE1]]
     ; GFX10-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]]
     ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV10]]
     ; GFX10-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]]
-    ; GFX10-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL4]], [[UMULH1]]
-    ; GFX10-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1)
-    ; GFX10-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH2]]
-    ; GFX10-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1)
-    ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]]
-    ; GFX10-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD2]]
-    ; GFX10-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1)
-    ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]]
+    ; GFX10-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[MUL4]], [[UMULH1]]
+    ; GFX10-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDO2]], [[UMULH2]], [[UADDO3]]
+    ; GFX10-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE5]]
+    ; GFX10-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[UADDE2]], [[UADDO3]]
+    ; GFX10-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE6]], [[C5]], [[UADDE9]]
     ; GFX10-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]]
-    ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD4]]
-    ; GFX10-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO8]]
-    ; GFX10-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO11]]
-    ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO10]], [[C4]]
+    ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[UADDE10]]
+    ; GFX10-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDE8]]
+    ; GFX10-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD2]], [[UADDO5]]
+    ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO4]], [[C4]]
     ; GFX10-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_2]](s64)
-    ; GFX10-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE]]
-    ; GFX10-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UV13]], [[MUL5]]
-    ; GFX10-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO10]]
-    ; GFX10-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[MUL6]]
-    ; GFX10-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[UV12]]
-    ; GFX10-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[ADD7]]
-    ; GFX10-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[UV12]]
-    ; GFX10-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL7]], [[MUL8]]
-    ; GFX10-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1)
-    ; GFX10-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH4]]
-    ; GFX10-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1)
-    ; GFX10-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]]
-    ; GFX10-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[ADD7]]
-    ; GFX10-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[UV12]]
-    ; GFX10-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[ADD7]]
-    ; GFX10-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[UMULH5]]
-    ; GFX10-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1)
-    ; GFX10-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]]
-    ; GFX10-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1)
-    ; GFX10-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]]
-    ; GFX10-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDO18]], [[ADD8]]
-    ; GFX10-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1)
-    ; GFX10-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT9]]
-    ; GFX10-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[ADD7]]
-    ; GFX10-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD10]]
-    ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-    ; GFX10-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UADDO20]]
-    ; GFX10-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UADDE]], [[ADD11]], [[UADDO23]]
+    ; GFX10-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE12]]
+    ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UV13]], [[MUL5]]
+    ; GFX10-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO4]]
+    ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[MUL6]]
+    ; GFX10-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[UV12]]
+    ; GFX10-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UADDO4]], [[ADD4]]
+    ; GFX10-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO4]], [[UV12]]
+    ; GFX10-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[MUL7]], [[MUL8]]
+    ; GFX10-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDO6]], [[UMULH4]], [[UADDO7]]
+    ; GFX10-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE15]]
+    ; GFX10-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[ADD4]]
+    ; GFX10-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[UV12]]
+    ; GFX10-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO4]], [[ADD4]]
+    ; GFX10-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[UMULH5]]
+    ; GFX10-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDO8]], [[UMULH6]], [[UADDO9]]
+    ; GFX10-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE19]]
+    ; GFX10-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDE18]], [[UADDE16]], [[UADDO9]]
+    ; GFX10-NEXT: [[UADDE24:%[0-9]+]]:_(s32), [[UADDE25:%[0-9]+]]:_(s1) = G_UADDE [[UADDE20]], [[C5]], [[UADDE23]]
+    ; GFX10-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[ADD4]]
+    ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[UADDE24]]
+    ; GFX10-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UADDE22]]
+    ; GFX10-NEXT: [[UADDE26:%[0-9]+]]:_(s32), [[UADDE27:%[0-9]+]]:_(s1) = G_UADDE [[UADDE12]], [[ADD5]], [[UADDO11]]
     ; GFX10-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64)
     ; GFX10-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64)
-    ; GFX10-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDO22]]
-    ; GFX10-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UADDE2]]
-    ; GFX10-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDO22]]
-    ; GFX10-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL10]], [[MUL11]]
-    ; GFX10-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1)
-    ; GFX10-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH8]]
-    ; GFX10-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO27]](s1)
-    ; GFX10-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]]
-    ; GFX10-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDE2]]
-    ; GFX10-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDO22]]
-    ; GFX10-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDE2]]
-    ; GFX10-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[UMULH9]]
-    ; GFX10-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1)
-    ; GFX10-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]]
-    ; GFX10-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1)
-    ; GFX10-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]]
-    ; GFX10-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[UADDO30]], [[ADD12]]
-    ; GFX10-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1)
-    ; GFX10-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT14]]
-    ; GFX10-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDE2]]
-    ; GFX10-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[ADD14]]
+    ; GFX10-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDO10]]
+    ; GFX10-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UADDE26]]
+    ; GFX10-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDO10]]
+    ; GFX10-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL10]], [[MUL11]]
+    ; GFX10-NEXT: [[UADDE28:%[0-9]+]]:_(s32), [[UADDE29:%[0-9]+]]:_(s1) = G_UADDE [[UADDO12]], [[UMULH8]], [[UADDO13]]
+    ; GFX10-NEXT: [[UADDE30:%[0-9]+]]:_(s32), [[UADDE31:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE29]]
+    ; GFX10-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDE26]]
+    ; GFX10-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDO10]]
+    ; GFX10-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDE26]]
+    ; GFX10-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[UMULH9]]
+    ; GFX10-NEXT: [[UADDE32:%[0-9]+]]:_(s32), [[UADDE33:%[0-9]+]]:_(s1) = G_UADDE [[UADDO14]], [[UMULH10]], [[UADDO15]]
+    ; GFX10-NEXT: [[UADDE34:%[0-9]+]]:_(s32), [[UADDE35:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE33]]
+    ; GFX10-NEXT: [[UADDE36:%[0-9]+]]:_(s32), [[UADDE37:%[0-9]+]]:_(s1) = G_UADDE [[UADDE32]], [[UADDE30]], [[UADDO15]]
+    ; GFX10-NEXT: [[UADDE38:%[0-9]+]]:_(s32), [[UADDE39:%[0-9]+]]:_(s1) = G_UADDE [[UADDE34]], [[C5]], [[UADDE37]]
+    ; GFX10-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDE26]]
+    ; GFX10-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[UADDE38]]
     ; GFX10-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64)
-    ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV18]](s32), [[UADDO32]], [[C4]]
+    ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV18]](s32), [[UADDE36]], [[C4]]
     ; GFX10-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64)
-    ; GFX10-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV18]], [[ADD15]]
-    ; GFX10-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[UV21]], [[MUL13]]
-    ; GFX10-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV19]], [[UADDO32]]
-    ; GFX10-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[MUL14]]
+    ; GFX10-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV18]], [[ADD6]]
+    ; GFX10-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UV21]], [[MUL13]]
+    ; GFX10-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV19]], [[UADDE36]]
+    ; GFX10-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ADD7]], [[MUL14]]
     ; GFX10-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV14]], [[UV20]]
-    ; GFX10-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV15]], [[ADD17]], [[USUBO3]]
-    ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV15]], [[ADD17]]
+    ; GFX10-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV15]], [[ADD8]], [[USUBO3]]
+    ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV15]], [[ADD8]]
     ; GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO2]](s32), [[USUBE2]](s32)
     ; GFX10-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64)
     ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV23]]
@@ -1939,94 +1774,79 @@ body: |
     ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO8]](s32), [[FPTOUI2]], [[C4]]
     ; GFX10-NEXT: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](s64)
     ; GFX10-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[USUBO8]], [[FPTOUI3]]
-    ; GFX10-NEXT: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[UV31]], [[MUL15]]
+    ; GFX10-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[UV31]], [[MUL15]]
     ; GFX10-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[USUBE12]], [[FPTOUI2]]
-    ; GFX10-NEXT: [[ADD19:%[0-9]+]]:_(s32) = G_ADD [[ADD18]], [[MUL16]]
+    ; GFX10-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[MUL16]]
     ; GFX10-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[UV30]]
-    ; GFX10-NEXT: [[MUL18:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI2]], [[ADD19]]
+    ; GFX10-NEXT: [[MUL18:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI2]], [[ADD10]]
     ; GFX10-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[UV30]]
-    ; GFX10-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[MUL17]], [[MUL18]]
-    ; GFX10-NEXT: [[ZEXT15:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1)
-    ; GFX10-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[UMULH12]]
-    ; GFX10-NEXT: [[ZEXT16:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1)
-    ; GFX10-NEXT: [[ADD20:%[0-9]+]]:_(s32) = G_ADD [[ZEXT15]], [[ZEXT16]]
-    ; GFX10-NEXT: [[MUL19:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[ADD19]]
+    ; GFX10-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL17]], [[MUL18]]
+    ; GFX10-NEXT: [[UADDE40:%[0-9]+]]:_(s32), [[UADDE41:%[0-9]+]]:_(s1) = G_UADDE [[UADDO16]], [[UMULH12]], [[UADDO17]]
+    ; GFX10-NEXT: [[UADDE42:%[0-9]+]]:_(s32), [[UADDE43:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE41]]
+    ; GFX10-NEXT: [[MUL19:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[ADD10]]
     ; GFX10-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[UV30]]
-    ; GFX10-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[ADD19]]
-    ; GFX10-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[MUL19]], [[UMULH13]]
-    ; GFX10-NEXT: [[ZEXT17:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO39]](s1)
-    ; GFX10-NEXT: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UADDO38]], [[UMULH14]]
-    ; GFX10-NEXT: [[ZEXT18:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO41]](s1)
-    ; GFX10-NEXT: [[ADD21:%[0-9]+]]:_(s32) = G_ADD [[ZEXT17]], [[ZEXT18]]
-    ; GFX10-NEXT: [[UADDO42:%[0-9]+]]:_(s32), [[UADDO43:%[0-9]+]]:_(s1) = G_UADDO [[UADDO40]], [[ADD20]]
-    ; GFX10-NEXT: [[ZEXT19:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO43]](s1)
-    ; GFX10-NEXT: [[ADD22:%[0-9]+]]:_(s32) = G_ADD [[ADD21]], [[ZEXT19]]
-    ; GFX10-NEXT: [[UMULH15:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[ADD19]]
-    ; GFX10-NEXT: [[ADD23:%[0-9]+]]:_(s32) = G_ADD [[UMULH15]], [[ADD22]]
-    ; GFX10-NEXT: [[UADDO44:%[0-9]+]]:_(s32), [[UADDO45:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI2]], [[UADDO42]]
-    ; GFX10-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI3]], [[ADD23]], [[UADDO45]]
-    ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO8]](s32), [[UADDO44]], [[C4]]
+    ; GFX10-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[ADD10]]
+    ; GFX10-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[MUL19]], [[UMULH13]]
+    ; GFX10-NEXT: [[UADDE44:%[0-9]+]]:_(s32), [[UADDE45:%[0-9]+]]:_(s1) = G_UADDE [[UADDO18]], [[UMULH14]], [[UADDO19]]
+    ; GFX10-NEXT: [[UADDE46:%[0-9]+]]:_(s32), [[UADDE47:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE45]]
+    ; GFX10-NEXT: [[UADDE48:%[0-9]+]]:_(s32), [[UADDE49:%[0-9]+]]:_(s1) = G_UADDE [[UADDE44]], [[UADDE42]], [[UADDO19]]
+    ; GFX10-NEXT: [[UADDE50:%[0-9]+]]:_(s32), [[UADDE51:%[0-9]+]]:_(s1) = G_UADDE [[UADDE46]], [[C5]], [[UADDE49]]
+    ; GFX10-NEXT: [[UMULH15:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[ADD10]]
+    ; GFX10-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH15]], [[UADDE50]]
+    ; GFX10-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI2]], [[UADDE48]]
+    ; GFX10-NEXT: [[UADDE52:%[0-9]+]]:_(s32), [[UADDE53:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI3]], [[ADD11]], [[UADDO21]]
+    ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO8]](s32), [[UADDO20]], [[C4]]
     ; GFX10-NEXT: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_8]](s64)
-    ; GFX10-NEXT: [[MUL20:%[0-9]+]]:_(s32) = G_MUL [[USUBO8]], [[UADDE4]]
-    ; GFX10-NEXT: [[ADD24:%[0-9]+]]:_(s32) = G_ADD [[UV33]], [[MUL20]]
-    ; GFX10-NEXT: [[MUL21:%[0-9]+]]:_(s32) = G_MUL [[USUBE12]], [[UADDO44]]
-    ; GFX10-NEXT: [[ADD25:%[0-9]+]]:_(s32) = G_ADD [[ADD24]], [[MUL21]]
-    ; GFX10-NEXT: [[MUL22:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[UV32]]
-    ; GFX10-NEXT: [[MUL23:%[0-9]+]]:_(s32) = G_MUL [[UADDO44]], [[ADD25]]
-    ; GFX10-NEXT: [[UMULH16:%[0-9]+]]:_(s32) = G_UMULH [[UADDO44]], [[UV32]]
-    ; GFX10-NEXT: [[UADDO46:%[0-9]+]]:_(s32), [[UADDO47:%[0-9]+]]:_(s1) = G_UADDO [[MUL22]], [[MUL23]]
-    ; GFX10-NEXT: [[ZEXT20:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO47]](s1)
-    ; GFX10-NEXT: [[UADDO48:%[0-9]+]]:_(s32), [[UADDO49:%[0-9]+]]:_(s1) = G_UADDO [[UADDO46]], [[UMULH16]]
-    ; GFX10-NEXT: [[ZEXT21:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO49]](s1)
-    ; GFX10-NEXT: [[ADD26:%[0-9]+]]:_(s32) = G_ADD [[ZEXT20]], [[ZEXT21]]
-    ; GFX10-NEXT: [[MUL24:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[ADD25]]
-    ; GFX10-NEXT: [[UMULH17:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[UV32]]
-    ; GFX10-NEXT: [[UMULH18:%[0-9]+]]:_(s32) = G_UMULH [[UADDO44]], [[ADD25]]
-    ; GFX10-NEXT: [[UADDO50:%[0-9]+]]:_(s32), [[UADDO51:%[0-9]+]]:_(s1) = G_UADDO [[MUL24]], [[UMULH17]]
-    ; GFX10-NEXT: [[ZEXT22:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO51]](s1)
-    ; GFX10-NEXT: [[UADDO52:%[0-9]+]]:_(s32), [[UADDO53:%[0-9]+]]:_(s1) = G_UADDO [[UADDO50]], [[UMULH18]]
-    ; GFX10-NEXT: [[ZEXT23:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO53]](s1)
-    ; GFX10-NEXT: [[ADD27:%[0-9]+]]:_(s32) = G_ADD [[ZEXT22]], [[ZEXT23]]
-    ; GFX10-NEXT: [[UADDO54:%[0-9]+]]:_(s32), [[UADDO55:%[0-9]+]]:_(s1) = G_UADDO [[UADDO52]], [[ADD26]]
-    ; GFX10-NEXT: [[ZEXT24:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO55]](s1)
-    ; GFX10-NEXT: [[ADD28:%[0-9]+]]:_(s32) = G_ADD [[ADD27]], [[ZEXT24]]
-    ; GFX10-NEXT: [[UMULH19:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[ADD25]]
-    ; GFX10-NEXT: [[ADD29:%[0-9]+]]:_(s32) = G_ADD [[UMULH19]], [[ADD28]]
-    ; GFX10-NEXT: [[UADDO56:%[0-9]+]]:_(s32), [[UADDO57:%[0-9]+]]:_(s1) = G_UADDO [[UADDO44]], [[UADDO54]]
-    ; GFX10-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[ADD29]], [[UADDO57]]
+    ; GFX10-NEXT: [[MUL20:%[0-9]+]]:_(s32) = G_MUL [[USUBO8]], [[UADDE52]]
+    ; GFX10-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[UV33]], [[MUL20]]
+    ; GFX10-NEXT: [[MUL21:%[0-9]+]]:_(s32) = G_MUL [[USUBE12]], [[UADDO20]]
+    ; GFX10-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ADD12]], [[MUL21]]
+    ; GFX10-NEXT: [[MUL22:%[0-9]+]]:_(s32) = G_MUL [[UADDE52]], [[UV32]]
+    ; GFX10-NEXT: [[MUL23:%[0-9]+]]:_(s32) = G_MUL [[UADDO20]], [[ADD13]]
+    ; GFX10-NEXT: [[UMULH16:%[0-9]+]]:_(s32) = G_UMULH [[UADDO20]], [[UV32]]
+    ; GFX10-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[MUL22]], [[MUL23]]
+    ; GFX10-NEXT: [[UADDE54:%[0-9]+]]:_(s32), [[UADDE55:%[0-9]+]]:_(s1) = G_UADDE [[UADDO22]], [[UMULH16]], [[UADDO23]]
+    ; GFX10-NEXT: [[UADDE56:%[0-9]+]]:_(s32), [[UADDE57:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE55]]
+    ; GFX10-NEXT: [[MUL24:%[0-9]+]]:_(s32) = G_MUL [[UADDE52]], [[ADD13]]
+    ; GFX10-NEXT: [[UMULH17:%[0-9]+]]:_(s32) = G_UMULH [[UADDE52]], [[UV32]]
+    ; GFX10-NEXT: [[UMULH18:%[0-9]+]]:_(s32) = G_UMULH [[UADDO20]], [[ADD13]]
+    ; GFX10-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL24]], [[UMULH17]]
+    ; GFX10-NEXT: [[UADDE58:%[0-9]+]]:_(s32), [[UADDE59:%[0-9]+]]:_(s1) = G_UADDE [[UADDO24]], [[UMULH18]], [[UADDO25]]
+    ; GFX10-NEXT: [[UADDE60:%[0-9]+]]:_(s32), [[UADDE61:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE59]]
+    ; GFX10-NEXT: [[UADDE62:%[0-9]+]]:_(s32), [[UADDE63:%[0-9]+]]:_(s1) = G_UADDE [[UADDE58]], [[UADDE56]], [[UADDO25]]
+    ; GFX10-NEXT: [[UADDE64:%[0-9]+]]:_(s32), [[UADDE65:%[0-9]+]]:_(s1) = G_UADDE [[UADDE60]], [[C5]], [[UADDE63]]
+    ; GFX10-NEXT: [[UMULH19:%[0-9]+]]:_(s32) = G_UMULH [[UADDE52]], [[ADD13]]
+    ; GFX10-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[UMULH19]], [[UADDE64]]
+    ; GFX10-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UADDE62]]
+    ; GFX10-NEXT: [[UADDE66:%[0-9]+]]:_(s32), [[UADDE67:%[0-9]+]]:_(s1) = G_UADDE [[UADDE52]], [[ADD14]], [[UADDO27]]
     ; GFX10-NEXT: [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64)
     ; GFX10-NEXT: [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64)
-    ; GFX10-NEXT: [[MUL25:%[0-9]+]]:_(s32) = G_MUL [[UV37]], [[UADDO56]]
-    ; GFX10-NEXT: [[MUL26:%[0-9]+]]:_(s32) = G_MUL [[UV36]], [[UADDE6]]
-    ; GFX10-NEXT: [[UMULH20:%[0-9]+]]:_(s32) = G_UMULH [[UV36]], [[UADDO56]]
-    ; GFX10-NEXT: [[UADDO58:%[0-9]+]]:_(s32), [[UADDO59:%[0-9]+]]:_(s1) = G_UADDO [[MUL25]], [[MUL26]]
-    ; GFX10-NEXT: [[ZEXT25:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO59]](s1)
-    ; GFX10-NEXT: [[UADDO60:%[0-9]+]]:_(s32), [[UADDO61:%[0-9]+]]:_(s1) = G_UADDO [[UADDO58]], [[UMULH20]]
-    ; GFX10-NEXT: [[ZEXT26:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO61]](s1)
-    ; GFX10-NEXT: [[ADD30:%[0-9]+]]:_(s32) = G_ADD [[ZEXT25]], [[ZEXT26]]
-    ; GFX10-NEXT: [[MUL27:%[0-9]+]]:_(s32) = G_MUL [[UV37]], [[UADDE6]]
-    ; GFX10-NEXT: [[UMULH21:%[0-9]+]]:_(s32) = G_UMULH [[UV37]], [[UADDO56]]
-    ; GFX10-NEXT: [[UMULH22:%[0-9]+]]:_(s32) = G_UMULH [[UV36]], [[UADDE6]]
-    ; GFX10-NEXT: [[UADDO62:%[0-9]+]]:_(s32), [[UADDO63:%[0-9]+]]:_(s1) = G_UADDO [[MUL27]], [[UMULH21]]
-    ; GFX10-NEXT: [[ZEXT27:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO63]](s1)
-    ; GFX10-NEXT: [[UADDO64:%[0-9]+]]:_(s32), [[UADDO65:%[0-9]+]]:_(s1) = G_UADDO [[UADDO62]], [[UMULH22]]
-    ; GFX10-NEXT: [[ZEXT28:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO65]](s1)
-    ; GFX10-NEXT: [[ADD31:%[0-9]+]]:_(s32) = G_ADD [[ZEXT27]], [[ZEXT28]]
-    ; GFX10-NEXT: [[UADDO66:%[0-9]+]]:_(s32), [[UADDO67:%[0-9]+]]:_(s1) = G_UADDO [[UADDO64]], [[ADD30]]
-    ; GFX10-NEXT: [[ZEXT29:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO67]](s1)
-    ; GFX10-NEXT: [[ADD32:%[0-9]+]]:_(s32) = G_ADD [[ADD31]], [[ZEXT29]]
-    ; GFX10-NEXT: [[UMULH23:%[0-9]+]]:_(s32) = G_UMULH [[UV37]], [[UADDE6]]
-    ; GFX10-NEXT: [[ADD33:%[0-9]+]]:_(s32) = G_ADD [[UMULH23]], [[ADD32]]
+    ; GFX10-NEXT: [[MUL25:%[0-9]+]]:_(s32) = G_MUL [[UV37]], [[UADDO26]]
+    ; GFX10-NEXT: [[MUL26:%[0-9]+]]:_(s32) = G_MUL [[UV36]], [[UADDE66]]
+    ; GFX10-NEXT: [[UMULH20:%[0-9]+]]:_(s32) = G_UMULH [[UV36]], [[UADDO26]]
+    ; GFX10-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL25]], [[MUL26]]
+    ; GFX10-NEXT: [[UADDE68:%[0-9]+]]:_(s32), [[UADDE69:%[0-9]+]]:_(s1) = G_UADDE [[UADDO28]], [[UMULH20]], [[UADDO29]]
+    ; GFX10-NEXT: [[UADDE70:%[0-9]+]]:_(s32), [[UADDE71:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE69]]
+    ; GFX10-NEXT: [[MUL27:%[0-9]+]]:_(s32) = G_MUL [[UV37]], [[UADDE66]]
+    ; GFX10-NEXT: [[UMULH21:%[0-9]+]]:_(s32) = G_UMULH [[UV37]], [[UADDO26]]
+    ; GFX10-NEXT: [[UMULH22:%[0-9]+]]:_(s32) = G_UMULH [[UV36]], [[UADDE66]]
+    ; GFX10-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[MUL27]], [[UMULH21]]
+    ; GFX10-NEXT: [[UADDE72:%[0-9]+]]:_(s32), [[UADDE73:%[0-9]+]]:_(s1) = G_UADDE [[UADDO30]], [[UMULH22]], [[UADDO31]]
+    ; GFX10-NEXT: [[UADDE74:%[0-9]+]]:_(s32), [[UADDE75:%[0-9]+]]:_(s1) = G_UADDE [[C5]], [[C5]], [[UADDE73]]
+    ; GFX10-NEXT: [[UADDE76:%[0-9]+]]:_(s32), [[UADDE77:%[0-9]+]]:_(s1) = G_UADDE [[UADDE72]], [[UADDE70]], [[UADDO31]]
+    ; GFX10-NEXT: [[UADDE78:%[0-9]+]]:_(s32), [[UADDE79:%[0-9]+]]:_(s1) = G_UADDE [[UADDE74]], [[C5]], [[UADDE77]]
+    ; GFX10-NEXT: [[UMULH23:%[0-9]+]]:_(s32) = G_UMULH [[UV37]], [[UADDE66]]
+    ; GFX10-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH23]], [[UADDE78]]
     ; GFX10-NEXT: [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64)
-    ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV38]](s32), [[UADDO66]], [[C4]]
+    ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV38]](s32), [[UADDE76]], [[C4]]
     ; GFX10-NEXT: [[UV40:%[0-9]+]]:_(s32), [[UV41:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](s64)
-    ; GFX10-NEXT: [[MUL28:%[0-9]+]]:_(s32) = G_MUL [[UV38]], [[ADD33]]
-    ; GFX10-NEXT: [[ADD34:%[0-9]+]]:_(s32) = G_ADD [[UV41]], [[MUL28]]
-    ; GFX10-NEXT: [[MUL29:%[0-9]+]]:_(s32) = G_MUL [[UV39]], [[UADDO66]]
-    ; GFX10-NEXT: [[ADD35:%[0-9]+]]:_(s32) = G_ADD [[ADD34]], [[MUL29]]
+    ; GFX10-NEXT: [[MUL28:%[0-9]+]]:_(s32) = G_MUL [[UV38]], [[ADD15]]
+    ; GFX10-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[UV41]], [[MUL28]]
+    ; GFX10-NEXT: [[MUL29:%[0-9]+]]:_(s32) = G_MUL [[UV39]], [[UADDE76]]
+    ; GFX10-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[MUL29]]
     ; GFX10-NEXT: [[USUBO10:%[0-9]+]]:_(s32), [[USUBO11:%[0-9]+]]:_(s1) = G_USUBO [[UV34]], [[UV40]]
-    ; GFX10-NEXT: [[USUBE14:%[0-9]+]]:_(s32), [[USUBE15:%[0-9]+]]:_(s1) = G_USUBE [[UV35]], [[ADD35]], [[USUBO11]]
-    ; GFX10-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[UV35]], [[ADD35]]
+    ; GFX10-NEXT: [[USUBE14:%[0-9]+]]:_(s32), [[USUBE15:%[0-9]+]]:_(s1) = G_USUBE [[UV35]], [[ADD17]], [[USUBO11]]
+    ; GFX10-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[UV35]], [[ADD17]]
     ; GFX10-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO10]](s32), [[USUBE14]](s32)
     ; GFX10-NEXT: [[UV42:%[0-9]+]]:_(s32), [[UV43:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64)
     ; GFX10-NEXT: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE14]](s32), [[UV43]]
@@ -2723,6 +2543,7 @@ body: |
     ; GFX6-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV2]], [[UV4]]
     ; GFX6-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV3]], [[UV5]], [[USUBO1]]
     ; GFX6-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]]
+    ; GFX6-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
     ; GFX6-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]]
     ; GFX6-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]]
     ; GFX6-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]]
@@ -2732,88 +2553,72 @@ body: |
     ; GFX6-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]]
     ; GFX6-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]]
     ; GFX6-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]]
-    ; GFX6-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1)
-    ; GFX6-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH1]]
-    ; GFX6-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1)
-    ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]]
+    ; GFX6-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UADDO]], [[UMULH1]], [[UADDO1]]
+    ; GFX6-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE1]]
     ; GFX6-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]]
     ; GFX6-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]]
     ; GFX6-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]]
-    ; GFX6-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]]
-    ; GFX6-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1)
-    ; GFX6-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH3]]
-    ; GFX6-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1)
-    ; GFX6-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]]
-    ; GFX6-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD2]]
-    ; GFX6-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1)
-    ; GFX6-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]]
+    ; GFX6-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]]
+    ; GFX6-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDO2]], [[UMULH3]], [[UADDO3]]
+    ; GFX6-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE5]]
+    ; GFX6-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[UADDE2]], [[UADDO3]]
+    ; GFX6-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE6]], [[C6]], [[UADDE9]]
     ; GFX6-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]]
-    ; GFX6-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]]
-    ; GFX6-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO8]]
-    ; GFX6-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO11]]
-    ; GFX6-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO10]]
-    ; GFX6-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO10]]
-    ; GFX6-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE]]
-    ; GFX6-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO10]]
-    ; GFX6-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]]
-    ; GFX6-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[UMULH5]]
-    ; GFX6-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[MUL6]]
-    ; GFX6-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[ADD7]]
-    ; GFX6-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[MUL6]]
-    ; GFX6-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]]
-    ; GFX6-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1)
-    ; GFX6-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH6]]
-    ; GFX6-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1)
-    ; GFX6-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]]
-    ; GFX6-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[ADD7]]
-    ; GFX6-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[MUL6]]
-    ; GFX6-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[ADD7]]
-    ; GFX6-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]]
-    ; GFX6-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1)
-    ; GFX6-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH8]]
-    ; GFX6-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1)
-    ; GFX6-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]]
-    ; GFX6-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDO18]], [[ADD8]]
-    ; GFX6-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1)
-    ; GFX6-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT9]]
-    ; GFX6-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[ADD7]]
-    ; GFX6-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD10]]
-    ; GFX6-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-    ; GFX6-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UADDO20]]
-    ; GFX6-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UADDE]], [[ADD11]], [[UADDO23]]
+    ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[UADDE10]]
+    ; GFX6-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDE8]]
+    ; GFX6-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD2]], [[UADDO5]]
+    ; GFX6-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO4]]
+    ; GFX6-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO4]]
+    ; GFX6-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE12]]
+    ; GFX6-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO4]]
+    ; GFX6-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]]
+    ; GFX6-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[UMULH5]]
+    ; GFX6-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[MUL6]]
+    ; GFX6-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO4]], [[ADD4]]
+    ; GFX6-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO4]], [[MUL6]]
+    ; GFX6-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]]
+    ; GFX6-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDO6]], [[UMULH6]], [[UADDO7]]
+    ; GFX6-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE15]]
+    ; GFX6-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[ADD4]]
+    ; GFX6-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[MUL6]]
+    ; GFX6-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO4]], [[ADD4]]
+    ; GFX6-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]]
+    ; GFX6-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDO8]], [[UMULH8]], [[UADDO9]]
+    ; GFX6-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE19]]
+    ; GFX6-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDE18]], [[UADDE16]], [[UADDO9]]
+    ; GFX6-NEXT: [[UADDE24:%[0-9]+]]:_(s32), [[UADDE25:%[0-9]+]]:_(s1) = G_UADDE [[UADDE20]], [[C6]], [[UADDE23]]
+    ; GFX6-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[ADD4]]
+    ; GFX6-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[UADDE24]]
+    ; GFX6-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UADDE22]]
+    ; GFX6-NEXT: [[UADDE26:%[0-9]+]]:_(s32), [[UADDE27:%[0-9]+]]:_(s1) = G_UADDE [[UADDE12]], [[ADD5]], [[UADDO11]]
     ; GFX6-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]](s64)
     ; GFX6-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]](s64)
-    ; GFX6-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UADDO22]]
-    ; GFX6-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV8]], [[UADDE2]]
-    ; GFX6-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UADDO22]]
-    ; GFX6-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]]
-    ; GFX6-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1)
-    ; GFX6-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH10]]
-    ; GFX6-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO27]](s1)
-    ; GFX6-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]]
-    ; GFX6-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UADDE2]]
-    ; GFX6-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UADDO22]]
-    ; GFX6-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UADDE2]]
-    ; GFX6-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]]
-    ; GFX6-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1)
-    ; GFX6-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH12]]
-    ; GFX6-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1)
-    ; GFX6-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]]
-    ; GFX6-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[UADDO30]], [[ADD12]]
-    ; GFX6-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1)
-    ; GFX6-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT14]]
-    ; GFX6-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UADDE2]]
-    ; GFX6-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD14]]
+    ; GFX6-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UADDO10]]
+    ; GFX6-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV8]], [[UADDE26]]
+    ; GFX6-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UADDO10]]
+    ; GFX6-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]]
+    ; GFX6-NEXT: [[UADDE28:%[0-9]+]]:_(s32), [[UADDE29:%[0-9]+]]:_(s1) = G_UADDE [[UADDO12]], [[UMULH10]], [[UADDO13]]
+    ; GFX6-NEXT: [[UADDE30:%[0-9]+]]:_(s32), [[UADDE31:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE29]]
+    ; GFX6-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UADDE26]]
+    ; GFX6-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UADDO10]]
+    ; GFX6-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UADDE26]]
+    ; GFX6-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]]
+    ; GFX6-NEXT: [[UADDE32:%[0-9]+]]:_(s32), [[UADDE33:%[0-9]+]]:_(s1) = G_UADDE [[UADDO14]], [[UMULH12]], [[UADDO15]]
+    ; GFX6-NEXT: [[UADDE34:%[0-9]+]]:_(s32), [[UADDE35:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE33]]
+    ; GFX6-NEXT: [[UADDE36:%[0-9]+]]:_(s32), [[UADDE37:%[0-9]+]]:_(s1) = G_UADDE [[UADDE32]], [[UADDE30]], [[UADDO15]]
+    ; GFX6-NEXT: [[UADDE38:%[0-9]+]]:_(s32), [[UADDE39:%[0-9]+]]:_(s1) = G_UADDE [[UADDE34]], [[C6]], [[UADDE37]]
+    ; GFX6-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UADDE26]]
+    ; GFX6-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[UADDE38]]
     ; GFX6-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](s64)
-    ; GFX6-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV10]], [[UADDO32]]
-    ; GFX6-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV11]], [[UADDO32]]
-    ; GFX6-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV10]], [[ADD15]]
-    ; GFX6-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV10]], [[UADDO32]]
-    ; GFX6-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]]
-    ; GFX6-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[UMULH14]]
+    ; GFX6-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV10]], [[UADDE36]]
+    ; GFX6-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV11]], [[UADDE36]]
+    ; GFX6-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV10]], [[ADD6]]
+    ; GFX6-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV10]], [[UADDE36]]
+    ; GFX6-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]]
+    ; GFX6-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ADD7]], [[UMULH14]]
     ; GFX6-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV6]], [[MUL15]]
-    ; GFX6-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV7]], [[ADD17]], [[USUBO3]]
-    ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV7]], [[ADD17]]
+    ; GFX6-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV7]], [[ADD8]], [[USUBO3]]
+    ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV7]], [[ADD8]]
     ; GFX6-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO2]](s32), [[USUBE2]](s32)
     ; GFX6-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](s64)
     ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV13]]
@@ -2878,88 +2683,73 @@ body: |
     ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI1]], [[ANYEXT]]
     ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[FPTOUI]], [[AMDGPU_MAD_U64_U32_2]]
     ; GFX8-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64)
+    ; GFX8-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
     ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV6]]
     ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[UV8]]
     ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV6]]
     ; GFX8-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]]
-    ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1)
-    ; GFX8-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH]]
-    ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1)
-    ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]]
+    ; GFX8-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UADDO]], [[UMULH]], [[UADDO1]]
+    ; GFX8-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE1]]
     ; GFX8-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV8]]
     ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV6]]
     ; GFX8-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV8]]
-    ; GFX8-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]]
-    ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1)
-    ; GFX8-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH2]]
-    ; GFX8-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1)
-    ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]]
-    ; GFX8-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD]]
-    ; GFX8-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1)
-    ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]]
+    ; GFX8-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]]
+    ; GFX8-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDO2]], [[UMULH2]], [[UADDO3]]
+    ; GFX8-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE5]]
+    ; GFX8-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[UADDE2]], [[UADDO3]]
+    ; GFX8-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE6]], [[C6]], [[UADDE9]]
     ; GFX8-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV8]]
-    ; GFX8-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]]
-    ; GFX8-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO8]]
-    ; GFX8-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD3]], [[UADDO11]]
-    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO10]], [[C5]]
+    ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[UADDE10]]
+    ; GFX8-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDE8]]
+    ; GFX8-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD]], [[UADDO5]]
+    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO4]], [[C5]]
     ; GFX8-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](s64)
     ; GFX8-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[UV11]](s32)
-    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE]], [[ANYEXT1]]
-    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO10]], [[AMDGPU_MAD_U64_U32_8]]
+    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE12]], [[ANYEXT1]]
+    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO4]], [[AMDGPU_MAD_U64_U32_8]]
     ; GFX8-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](s64)
-    ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[UV10]]
-    ; GFX8-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[UV12]]
-    ; GFX8-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[UV10]]
-    ; GFX8-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]]
-    ; GFX8-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1)
-    ; GFX8-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH4]]
-    ; GFX8-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1)
-    ; GFX8-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]]
-    ; GFX8-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[UV12]]
-    ; GFX8-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[UV10]]
-    ; GFX8-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[UV12]]
-    ; GFX8-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]]
-    ; GFX8-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1)
-    ; GFX8-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]]
-    ; GFX8-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1)
-    ; GFX8-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]]
-    ; GFX8-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDO18]], [[ADD4]]
-    ; GFX8-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1)
-    ; GFX8-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ADD5]], [[ZEXT9]]
-    ; GFX8-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[UV12]]
-    ; GFX8-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD6]]
-    ; GFX8-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-    ; GFX8-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UADDO20]]
-    ; GFX8-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UADDE]], [[ADD7]], [[UADDO23]]
+    ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[UV10]]
+    ; GFX8-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO4]], [[UV12]]
+    ; GFX8-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO4]], [[UV10]]
+    ; GFX8-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]]
+    ; GFX8-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDO6]], [[UMULH4]], [[UADDO7]]
+    ; GFX8-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE15]]
+    ; GFX8-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[UV12]]
+    ; GFX8-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[UV10]]
+    ; GFX8-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO4]], [[UV12]]
+    ; GFX8-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]]
+    ; GFX8-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDO8]], [[UMULH6]], [[UADDO9]]
+    ; GFX8-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE19]]
+    ; GFX8-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDE18]], [[UADDE16]], [[UADDO9]]
+    ; GFX8-NEXT: [[UADDE24:%[0-9]+]]:_(s32), [[UADDE25:%[0-9]+]]:_(s1) = G_UADDE [[UADDE20]], [[C6]], [[UADDE23]]
+    ; GFX8-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[UV12]]
+    ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[UADDE24]]
+    ; GFX8-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UADDE22]]
+    ; GFX8-NEXT: [[UADDE26:%[0-9]+]]:_(s32), [[UADDE27:%[0-9]+]]:_(s1) = G_UADDE [[UADDE12]], [[ADD1]], [[UADDO11]]
     ; GFX8-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]](s64)
     ; GFX8-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]](s64)
-    ; GFX8-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDO22]]
-    ; GFX8-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UADDE2]]
-    ; GFX8-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDO22]]
-    ; GFX8-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]]
-    ; GFX8-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1)
-    ; GFX8-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH8]]
-    ; GFX8-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO27]](s1)
-    ; GFX8-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]]
-    ; GFX8-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDE2]]
-    ; GFX8-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDO22]]
-    ; GFX8-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDE2]]
-    ; GFX8-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]]
-    ; GFX8-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1)
-    ; GFX8-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]]
-    ; GFX8-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1)
-    ; GFX8-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]]
-    ; GFX8-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[UADDO30]], [[ADD8]]
-    ; GFX8-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1)
-    ; GFX8-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT14]]
-    ; GFX8-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDE2]]
-    ; GFX8-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[ADD10]]
+    ; GFX8-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDO10]]
+    ; GFX8-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UADDE26]]
+    ; GFX8-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDO10]]
+    ; GFX8-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]]
+    ; GFX8-NEXT: [[UADDE28:%[0-9]+]]:_(s32), [[UADDE29:%[0-9]+]]:_(s1) = G_UADDE [[UADDO12]], [[UMULH8]], [[UADDO13]]
+    ; GFX8-NEXT: [[UADDE30:%[0-9]+]]:_(s32), [[UADDE31:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE29]]
+    ; GFX8-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDE26]]
+    ; GFX8-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDO10]]
+    ; GFX8-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDE26]]
+    ; GFX8-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]]
+    ; GFX8-NEXT: [[UADDE32:%[0-9]+]]:_(s32), [[UADDE33:%[0-9]+]]:_(s1) = G_UADDE [[UADDO14]], [[UMULH10]], [[UADDO15]]
+    ; GFX8-NEXT: [[UADDE34:%[0-9]+]]:_(s32), [[UADDE35:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE33]]
+    ; GFX8-NEXT: [[UADDE36:%[0-9]+]]:_(s32), [[UADDE37:%[0-9]+]]:_(s1) = G_UADDE [[UADDE32]], [[UADDE30]], [[UADDO15]]
+    ; GFX8-NEXT: [[UADDE38:%[0-9]+]]:_(s32), [[UADDE39:%[0-9]+]]:_(s1) = G_UADDE [[UADDE34]], [[C6]], [[UADDE37]]
+    ; GFX8-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDE26]]
+    ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[UADDE38]]
     ; GFX8-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](s64)
-    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV18]](s32), [[UADDO32]], [[C5]]
+    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV18]](s32), [[UADDE36]], [[C5]]
     ; GFX8-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_12]](s64)
     ; GFX8-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[UV21]](s32)
-    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV18]](s32), [[ADD11]], [[ANYEXT2]]
-    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV19]](s32), [[UADDO32]], [[AMDGPU_MAD_U64_U32_14]]
+    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV18]](s32), [[ADD2]], [[ANYEXT2]]
+    ; GFX8-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV19]](s32), [[UADDE36]], [[AMDGPU_MAD_U64_U32_14]]
     ; GFX8-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_16]](s64)
     ; GFX8-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV14]], [[UV20]]
     ; GFX8-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV15]], [[UV22]], [[USUBO3]]
@@ -3028,88 +2818,73 @@ body: |
     ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[FPTOUI1]], [[ANYEXT]]
     ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[FPTOUI]], [[AMDGPU_MAD_U64_U32_2]]
     ; GFX9-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64)
+    ; GFX9-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
     ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV6]]
     ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[UV8]]
     ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV6]]
     ; GFX9-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]]
-    ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1)
-    ; GFX9-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH]]
-    ; GFX9-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1)
-    ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]]
+    ; GFX9-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UADDO]], [[UMULH]], [[UADDO1]]
+    ; GFX9-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE1]]
     ; GFX9-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV8]]
     ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV6]]
     ; GFX9-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV8]]
-    ; GFX9-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]]
-    ; GFX9-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1)
-    ; GFX9-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH2]]
-    ; GFX9-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1)
-    ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]]
-    ; GFX9-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD]]
-    ; GFX9-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1)
-    ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]]
+    ; GFX9-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]]
+    ; GFX9-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDO2]], [[UMULH2]], [[UADDO3]]
+    ; GFX9-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE5]]
+    ; GFX9-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[UADDE2]], [[UADDO3]]
+    ; GFX9-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE6]], [[C6]], [[UADDE9]]
     ; GFX9-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV8]]
-    ; GFX9-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]]
-    ; GFX9-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO8]]
-    ; GFX9-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD3]], [[UADDO11]]
-    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO10]], [[C5]]
+    ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[UADDE10]]
+    ; GFX9-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDE8]]
+    ; GFX9-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD]], [[UADDO5]]
+    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_6:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_7:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO4]], [[C5]]
     ; GFX9-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_6]](s64)
     ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[UV11]](s32)
-    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE]], [[ANYEXT1]]
-    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO10]], [[AMDGPU_MAD_U64_U32_8]]
+    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_8:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_9:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDE12]], [[ANYEXT1]]
+    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_10:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_11:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBE]](s32), [[UADDO4]], [[AMDGPU_MAD_U64_U32_8]]
     ; GFX9-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_10]](s64)
-    ; GFX9-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[UV10]]
-    ; GFX9-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[UV12]]
-    ; GFX9-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[UV10]]
-    ; GFX9-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]]
-    ; GFX9-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1)
-    ; GFX9-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH4]]
-    ; GFX9-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1)
-    ; GFX9-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]]
-    ; GFX9-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[UV12]]
-    ; GFX9-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[UV10]]
-    ; GFX9-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[UV12]]
-    ; GFX9-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]]
-    ; GFX9-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1)
-    ; GFX9-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]]
-    ; GFX9-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1)
-    ; GFX9-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]]
-    ; GFX9-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDO18]], [[ADD4]]
-    ; GFX9-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1)
-    ; GFX9-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ADD5]], [[ZEXT9]]
-    ; GFX9-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[UV12]]
-    ; GFX9-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD6]]
-    ; GFX9-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-    ; GFX9-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UADDO20]]
-    ; GFX9-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UADDE]], [[ADD7]], [[UADDO23]]
+    ; GFX9-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[UV10]]
+    ; GFX9-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UADDO4]], [[UV12]]
+    ; GFX9-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO4]], [[UV10]]
+    ; GFX9-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]]
+    ; GFX9-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDO6]], [[UMULH4]], [[UADDO7]]
+    ; GFX9-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE15]]
+    ; GFX9-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[UV12]]
+    ; GFX9-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[UV10]]
+    ; GFX9-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO4]], [[UV12]]
+    ; GFX9-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]]
+    ; GFX9-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDO8]], [[UMULH6]], [[UADDO9]]
+    ; GFX9-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE19]]
+    ; GFX9-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDE18]], [[UADDE16]], [[UADDO9]]
+    ; GFX9-NEXT: [[UADDE24:%[0-9]+]]:_(s32), [[UADDE25:%[0-9]+]]:_(s1) = G_UADDE [[UADDE20]], [[C6]], [[UADDE23]]
+    ; GFX9-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[UV12]]
+    ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[UADDE24]]
+    ; GFX9-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UADDE22]]
+    ; GFX9-NEXT: [[UADDE26:%[0-9]+]]:_(s32), [[UADDE27:%[0-9]+]]:_(s1) = G_UADDE [[UADDE12]], [[ADD1]], [[UADDO11]]
     ; GFX9-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]](s64)
     ; GFX9-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]](s64)
-    ; GFX9-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDO22]]
-    ; GFX9-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UADDE2]]
-    ; GFX9-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDO22]]
-    ; GFX9-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]]
-    ; GFX9-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1)
-    ; GFX9-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH8]]
-    ; GFX9-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO27]](s1)
-    ; GFX9-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]]
-    ; GFX9-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDE2]]
-    ; GFX9-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDO22]]
-    ; GFX9-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDE2]]
-    ; GFX9-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]]
-    ; GFX9-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1)
-    ; GFX9-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]]
-    ; GFX9-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1)
-    ; GFX9-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]]
-    ; GFX9-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[UADDO30]], [[ADD8]]
-    ; GFX9-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1)
-    ; GFX9-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT14]]
-    ; GFX9-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDE2]]
-    ; GFX9-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[ADD10]]
+    ; GFX9-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDO10]]
+    ; GFX9-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UADDE26]]
+    ; GFX9-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDO10]]
+    ; GFX9-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]]
+    ; GFX9-NEXT: [[UADDE28:%[0-9]+]]:_(s32), [[UADDE29:%[0-9]+]]:_(s1) = G_UADDE [[UADDO12]], [[UMULH8]], [[UADDO13]]
+    ; GFX9-NEXT: [[UADDE30:%[0-9]+]]:_(s32), [[UADDE31:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE29]]
+    ; GFX9-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDE26]]
+    ; GFX9-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDO10]]
+    ; GFX9-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDE26]]
+    ; GFX9-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH9]]
+    ; GFX9-NEXT: [[UADDE32:%[0-9]+]]:_(s32), [[UADDE33:%[0-9]+]]:_(s1) = G_UADDE [[UADDO14]], [[UMULH10]], [[UADDO15]]
+    ; GFX9-NEXT: [[UADDE34:%[0-9]+]]:_(s32), [[UADDE35:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE33]]
+    ; GFX9-NEXT: [[UADDE36:%[0-9]+]]:_(s32), [[UADDE37:%[0-9]+]]:_(s1) = G_UADDE [[UADDE32]], [[UADDE30]], [[UADDO15]]
+    ; GFX9-NEXT: [[UADDE38:%[0-9]+]]:_(s32), [[UADDE39:%[0-9]+]]:_(s1) = G_UADDE [[UADDE34]], [[C6]], [[UADDE37]]
+    ; GFX9-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDE26]]
+    ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[UADDE38]]
     ; GFX9-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](s64)
-    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV18]](s32), [[UADDO32]], [[C5]]
+    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_12:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_13:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV18]](s32), [[UADDE36]], [[C5]]
     ; GFX9-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_12]](s64)
     ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[UV21]](s32)
-    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV18]](s32), [[ADD11]], [[ANYEXT2]]
-    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV19]](s32), [[UADDO32]], [[AMDGPU_MAD_U64_U32_14]]
+    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_14:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_15:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV18]](s32), [[ADD2]], [[ANYEXT2]]
+    ; GFX9-NEXT: [[AMDGPU_MAD_U64_U32_16:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_17:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV19]](s32), [[UADDE36]], [[AMDGPU_MAD_U64_U32_14]]
     ; GFX9-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_16]](s64)
     ; GFX9-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV14]], [[UV20]]
     ; GFX9-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV15]], [[UV22]], [[USUBO3]]
@@ -3178,92 +2953,77 @@ body: |
     ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV7]], [[MUL]]
     ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]]
     ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[MUL1]]
+    ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
     ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV6]]
     ; GFX10-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]]
     ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV6]]
     ; GFX10-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[MUL3]]
-    ; GFX10-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1)
-    ; GFX10-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH]]
-    ; GFX10-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1)
-    ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]]
+    ; GFX10-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UADDO]], [[UMULH]], [[UADDO1]]
+    ; GFX10-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE1]]
     ; GFX10-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]]
     ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV6]]
     ; GFX10-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]]
-    ; GFX10-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL4]], [[UMULH1]]
-    ; GFX10-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1)
-    ; GFX10-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH2]]
-    ; GFX10-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1)
-    ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]]
-    ; GFX10-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD2]]
-    ; GFX10-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1)
-    ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]]
+    ; GFX10-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[MUL4]], [[UMULH1]]
+    ; GFX10-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDO2]], [[UMULH2]], [[UADDO3]]
+    ; GFX10-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE5]]
+    ; GFX10-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[UADDE2]], [[UADDO3]]
+    ; GFX10-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE6]], [[C6]], [[UADDE9]]
     ; GFX10-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]]
-    ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD4]]
-    ; GFX10-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO8]]
-    ; GFX10-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO11]]
-    ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO10]], [[C5]]
+    ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[UADDE10]]
+    ; GFX10-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDE8]]
+    ; GFX10-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD2]], [[UADDO5]]
+    ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_2:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_3:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[USUBO]](s32), [[UADDO4]], [[C5]]
     ; GFX10-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_2]](s64)
-    ; GFX10-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE]]
-    ; GFX10-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UV9]], [[MUL5]]
-    ; GFX10-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO10]]
-    ; GFX10-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[MUL6]]
-    ; GFX10-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[UV8]]
-    ; GFX10-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[ADD7]]
-    ; GFX10-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[UV8]]
-    ; GFX10-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL7]], [[MUL8]]
-    ; GFX10-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1)
-    ; GFX10-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH4]]
-    ; GFX10-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1)
-    ; GFX10-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]]
-    ; GFX10-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[ADD7]]
-    ; GFX10-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[UV8]]
-    ; GFX10-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[ADD7]]
-    ; GFX10-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[UMULH5]]
-    ; GFX10-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1)
-    ; GFX10-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]]
-    ; GFX10-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1)
-    ; GFX10-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]]
-    ; GFX10-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDO18]], [[ADD8]]
-    ; GFX10-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1)
-    ; GFX10-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT9]]
-    ; GFX10-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[ADD7]]
-    ; GFX10-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD10]]
-    ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-    ; GFX10-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UADDO20]]
-    ; GFX10-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UADDE]], [[ADD11]], [[UADDO23]]
+    ; GFX10-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE12]]
+    ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UV9]], [[MUL5]]
+    ; GFX10-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO4]]
+    ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[MUL6]]
+    ; GFX10-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[UV8]]
+    ; GFX10-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UADDO4]], [[ADD4]]
+    ; GFX10-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UADDO4]], [[UV8]]
+    ; GFX10-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[MUL7]], [[MUL8]]
+    ; GFX10-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDO6]], [[UMULH4]], [[UADDO7]]
+    ; GFX10-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE15]]
+    ; GFX10-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[ADD4]]
+    ; GFX10-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[UV8]]
+    ; GFX10-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO4]], [[ADD4]]
+    ; GFX10-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[UMULH5]]
+    ; GFX10-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDO8]], [[UMULH6]], [[UADDO9]]
+    ; GFX10-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE19]]
+    ; GFX10-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDE18]], [[UADDE16]], [[UADDO9]]
+    ; GFX10-NEXT: [[UADDE24:%[0-9]+]]:_(s32), [[UADDE25:%[0-9]+]]:_(s1) = G_UADDE [[UADDE20]], [[C6]], [[UADDE23]]
+    ; GFX10-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[ADD4]]
+    ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[UADDE24]]
+    ; GFX10-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UADDE22]]
+    ; GFX10-NEXT: [[UADDE26:%[0-9]+]]:_(s32), [[UADDE27:%[0-9]+]]:_(s1) = G_UADDE [[UADDE12]], [[ADD5]], [[UADDO11]]
     ; GFX10-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]](s64)
     ; GFX10-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]](s64)
-    ; GFX10-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UV13]], [[UADDO22]]
-    ; GFX10-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UV12]], [[UADDE2]]
-    ; GFX10-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV12]], [[UADDO22]]
-    ; GFX10-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL10]], [[MUL11]]
-    ; GFX10-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1)
-    ; GFX10-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH8]]
-    ; GFX10-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO27]](s1)
-    ; GFX10-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]]
-    ; GFX10-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV13]], [[UADDE2]]
-    ; GFX10-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV13]], [[UADDO22]]
-    ; GFX10-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV12]], [[UADDE2]]
-    ; GFX10-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[UMULH9]]
-    ; GFX10-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1)
-    ; GFX10-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]]
-    ; GFX10-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1)
-    ; GFX10-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]]
-    ; GFX10-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[UADDO30]], [[ADD12]]
-    ; GFX10-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1)
-    ; GFX10-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT14]]
-    ; GFX10-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV13]], [[UADDE2]]
-    ; GFX10-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[ADD14]]
+    ; GFX10-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UV13]], [[UADDO10]]
+    ; GFX10-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UV12]], [[UADDE26]]
+    ; GFX10-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV12]], [[UADDO10]]
+    ; GFX10-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL10]], [[MUL11]]
+    ; GFX10-NEXT: [[UADDE28:%[0-9]+]]:_(s32), [[UADDE29:%[0-9]+]]:_(s1) = G_UADDE [[UADDO12]], [[UMULH8]], [[UADDO13]]
+    ; GFX10-NEXT: [[UADDE30:%[0-9]+]]:_(s32), [[UADDE31:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE29]]
+    ; GFX10-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV13]], [[UADDE26]]
+    ; GFX10-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV13]], [[UADDO10]]
+    ; GFX10-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV12]], [[UADDE26]]
+    ; GFX10-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[UMULH9]]
+    ; GFX10-NEXT: [[UADDE32:%[0-9]+]]:_(s32), [[UADDE33:%[0-9]+]]:_(s1) = G_UADDE [[UADDO14]], [[UMULH10]], [[UADDO15]]
+    ; GFX10-NEXT: [[UADDE34:%[0-9]+]]:_(s32), [[UADDE35:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE33]]
+    ; GFX10-NEXT: [[UADDE36:%[0-9]+]]:_(s32), [[UADDE37:%[0-9]+]]:_(s1) = G_UADDE [[UADDE32]], [[UADDE30]], [[UADDO15]]
+    ; GFX10-NEXT: [[UADDE38:%[0-9]+]]:_(s32), [[UADDE39:%[0-9]+]]:_(s1) = G_UADDE [[UADDE34]], [[C6]], [[UADDE37]]
+    ; GFX10-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV13]], [[UADDE26]]
+    ; GFX10-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UMULH11]], [[UADDE38]]
     ; GFX10-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](s64)
-    ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV14]](s32), [[UADDO32]], [[C5]]
+    ; GFX10-NEXT: [[AMDGPU_MAD_U64_U32_4:%[0-9]+]]:_(s64), [[AMDGPU_MAD_U64_U32_5:%[0-9]+]]:_(s1) = G_AMDGPU_MAD_U64_U32 [[UV14]](s32), [[UADDE36]], [[C5]]
     ; GFX10-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_MAD_U64_U32_4]](s64)
-    ; GFX10-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV14]], [[ADD15]]
-    ; GFX10-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[UV17]], [[MUL13]]
-    ; GFX10-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV15]], [[UADDO32]]
-    ; GFX10-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[MUL14]]
+    ; GFX10-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV14]], [[ADD6]]
+    ; GFX10-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UV17]], [[MUL13]]
+    ; GFX10-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV15]], [[UADDE36]]
+    ; GFX10-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ADD7]], [[MUL14]]
     ; GFX10-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV10]], [[UV16]]
-    ; GFX10-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV11]], [[ADD17]], [[USUBO3]]
-    ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV11]], [[ADD17]]
+    ; GFX10-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV11]], [[ADD8]], [[USUBO3]]
+    ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV11]], [[ADD8]]
     ; GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO2]](s32), [[USUBE2]](s32)
     ; GFX10-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](s64)
     ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV19]]
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll
index 377fa24cb4755..cafd4c1c5c813 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll
@@ -26,133 +26,116 @@ define i64 @v_sdiv_i64(i64 %num, i64 %den) {
 ; CHECK-NEXT:  .LBB0_3:
 ; CHECK-NEXT:    v_ashrrev_i32_e32 v0, 31, v3
 ; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v2, v0
-; CHECK-NEXT:    v_addc_u32_e32 v3, vcc, v3, v0, vcc
-; CHECK-NEXT:    v_xor_b32_e32 v2, v1, v0
-; CHECK-NEXT:    v_xor_b32_e32 v1, v3, v0
-; CHECK-NEXT:    v_cvt_f32_u32_e32 v3, v2
-; CHECK-NEXT:    v_cvt_f32_u32_e32 v6, v1
-; CHECK-NEXT:    v_sub_i32_e32 v10, vcc, 0, v2
-; CHECK-NEXT:    v_subb_u32_e32 v11, vcc, 0, v1, vcc
-; CHECK-NEXT:    v_mac_f32_e32 v3, 0x4f800000, v6
-; CHECK-NEXT:    v_rcp_iflag_f32_e32 v3, v3
-; CHECK-NEXT:    v_mul_f32_e32 v3, 0x5f7ffffc, v3
-; CHECK-NEXT:    v_mul_f32_e32 v6, 0x2f800000, v3
-; CHECK-NEXT:    v_trunc_f32_e32 v8, v6
-; CHECK-NEXT:    v_mac_f32_e32 v3, 0xcf800000, v8
-; CHECK-NEXT:    v_cvt_u32_f32_e32 v9, v3
-; CHECK-NEXT:    v_cvt_u32_f32_e32 v12, v8
-; CHECK-NEXT:    v_mad_u64_u32 v[6:7], s[4:5], v10, v9, 0
-; CHECK-NEXT:    v_mov_b32_e32 v3, v7
-; CHECK-NEXT:    v_mad_u64_u32 v[7:8], s[4:5], v10, v12, v[3:4]
-; CHECK-NEXT:    v_mul_lo_u32 v3, v12, v6
-; CHECK-NEXT:    v_mad_u64_u32 v[7:8], s[4:5], v11, v9, v[7:8]
-; CHECK-NEXT:    v_mul_hi_u32 v8, v9, v6
-; CHECK-NEXT:    v_mul_hi_u32 v6, v12, v6
-; CHECK-NEXT:    v_mul_lo_u32 v13, v9, v7
-; CHECK-NEXT:    v_mul_lo_u32 v14, v12, v7
+; CHECK-NEXT:    v_addc_u32_e32 v2, vcc, v3, v0, vcc
+; CHECK-NEXT:    v_xor_b32_e32 v6, v1, v0
+; CHECK-NEXT:    v_xor_b32_e32 v7, v2, v0
+; CHECK-NEXT:    v_cvt_f32_u32_e32 v1, v6
+; CHECK-NEXT:    v_cvt_f32_u32_e32 v2, v7
+; CHECK-NEXT:    v_sub_i32_e32 v9, vcc, 0, v6
+; CHECK-NEXT:    v_subb_u32_e32 v10, vcc, 0, v7, vcc
+; CHECK-NEXT:    v_mac_f32_e32 v1, 0x4f800000, v2
+; CHECK-NEXT:    v_rcp_iflag_f32_e32 v1, v1
+; CHECK-NEXT:    v_mul_f32_e32 v1, 0x5f7ffffc, v1
+; CHECK-NEXT:    v_mul_f32_e32 v2, 0x2f800000, v1
+; CHECK-NEXT:    v_trunc_f32_e32 v3, v2
+; CHECK-NEXT:    v_mac_f32_e32 v1, 0xcf800000, v3
+; CHECK-NEXT:    v_cvt_u32_f32_e32 v8, v1
+; CHECK-NEXT:    v_cvt_u32_f32_e32 v11, v3
+; CHECK-NEXT:    v_mad_u64_u32 v[1:2], s[4:5], v9, v8, 0
+; CHECK-NEXT:    v_mad_u64_u32 v[2:3], s[4:5], v9, v11, v[2:3]
+; CHECK-NEXT:    v_mul_hi_u32 v12, v8, v1
+; CHECK-NEXT:    v_mad_u64_u32 v[2:3], s[4:5], v10, v8, v[2:3]
+; CHECK-NEXT:    v_mul_lo_u32 v3, v11, v1
+; CHECK-NEXT:    v_mul_hi_u32 v1, v11, v1
+; CHECK-NEXT:    v_mul_lo_u32 v13, v8, v2
+; CHECK-NEXT:    v_mul_lo_u32 v14, v11, v2
 ; CHECK-NEXT:    v_add_i32_e32 v3, vcc, v3, v13
-; CHECK-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
+; CHECK-NEXT:    v_addc_u32_e32 v3, vcc, v3, v12, vcc
+; CHECK-NEXT:    v_mul_hi_u32 v12, v8, v2
+; CHECK-NEXT:    v_addc_u32_e64 v3, s[4:5], 0, 0, vcc
+; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v14, v1
+; CHECK-NEXT:    v_mul_hi_u32 v2, v11, v2
+; CHECK-NEXT:    v_addc_u32_e64 v1, s[4:5], v1, v12, vcc
+; CHECK-NEXT:    v_addc_u32_e64 v12, s[4:5], 0, 0, s[4:5]
+; CHECK-NEXT:    v_addc_u32_e32 v1, vcc, v1, v3, vcc
+; CHECK-NEXT:    v_addc_u32_e32 v3, vcc, 0, v12, vcc
+; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v3
+; CHECK-NEXT:    v_add_i32_e32 v8, vcc, v8, v1
+; CHECK-NEXT:    v_addc_u32_e32 v11, vcc, v11, v2, vcc
+; CHECK-NEXT:    v_mad_u64_u32 v[1:2], s[4:5], v9, v8, 0
+; CHECK-NEXT:    v_mad_u64_u32 v[2:3], s[4:5], v9, v11, v[2:3]
+; CHECK-NEXT:    v_ashrrev_i32_e32 v9, 31, v5
+; CHECK-NEXT:    v_mul_hi_u32 v12, v8, v1
+; CHECK-NEXT:    v_mad_u64_u32 v[2:3], s[4:5], v10, v8, v[2:3]
+; CHECK-NEXT:    v_add_i32_e32 v3, vcc, v4, v9
+; CHECK-NEXT:    v_addc_u32_e32 v4, vcc, v5, v9, vcc
+; CHECK-NEXT:    v_xor_b32_e32 v5, v3, v9
+; CHECK-NEXT:    v_mul_lo_u32 v3, v11, v1
+; CHECK-NEXT:    v_mul_lo_u32 v10, v8, v2
+; CHECK-NEXT:    v_mul_hi_u32 v1, v11, v1
+; CHECK-NEXT:    v_xor_b32_e32 v4, v4, v9
+; CHECK-NEXT:    v_add_i32_e32 v3, vcc, v3, v10
+; CHECK-NEXT:    v_addc_u32_e32 v3, vcc, v3, v12, vcc
+; CHECK-NEXT:    v_mul_lo_u32 v3, v11, v2
+; CHECK-NEXT:    v_mul_hi_u32 v12, v8, v2
+; CHECK-NEXT:    v_addc_u32_e64 v10, s[4:5], 0, 0, vcc
+; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v3, v1
+; CHECK-NEXT:    v_mul_hi_u32 v2, v11, v2
+; CHECK-NEXT:    v_addc_u32_e64 v1, s[4:5], v1, v12, vcc
+; CHECK-NEXT:    v_addc_u32_e64 v3, s[4:5], 0, 0, s[4:5]
+; CHECK-NEXT:    v_addc_u32_e32 v1, vcc, v1, v10, vcc
+; CHECK-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
+; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v3
+; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v8, v1
+; CHECK-NEXT:    v_addc_u32_e32 v2, vcc, v11, v2, vcc
+; CHECK-NEXT:    v_mul_lo_u32 v3, v4, v1
+; CHECK-NEXT:    v_mul_lo_u32 v8, v5, v2
+; CHECK-NEXT:    v_mul_hi_u32 v10, v5, v1
+; CHECK-NEXT:    v_mul_hi_u32 v1, v4, v1
 ; CHECK-NEXT:    v_add_i32_e32 v3, vcc, v3, v8
-; CHECK-NEXT:    v_mul_hi_u32 v8, v9, v7
-; CHECK-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v3, vcc, v13, v3
-; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v14, v6
-; CHECK-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v6, v8
-; CHECK-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v8, vcc, v13, v8
-; CHECK-NEXT:    v_mul_hi_u32 v7, v12, v7
-; CHECK-NEXT:    v_add_i32_e32 v3, vcc, v6, v3
-; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v8, v6
-; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v7, v6
-; CHECK-NEXT:    v_add_i32_e32 v9, vcc, v9, v3
-; CHECK-NEXT:    v_addc_u32_e32 v12, vcc, v12, v6, vcc
-; CHECK-NEXT:    v_mad_u64_u32 v[6:7], s[4:5], v10, v9, 0
-; CHECK-NEXT:    v_mov_b32_e32 v3, v7
-; CHECK-NEXT:    v_mad_u64_u32 v[7:8], s[4:5], v10, v12, v[3:4]
-; CHECK-NEXT:    v_ashrrev_i32_e32 v10, 31, v5
-; CHECK-NEXT:    v_add_i32_e32 v3, vcc, v4, v10
-; CHECK-NEXT:    v_mad_u64_u32 v[7:8], s[4:5], v11, v9, v[7:8]
-; CHECK-NEXT:    v_addc_u32_e32 v4, vcc, v5, v10, vcc
-; CHECK-NEXT:    v_xor_b32_e32 v8, v3, v10
-; CHECK-NEXT:    v_mul_lo_u32 v3, v12, v6
-; CHECK-NEXT:    v_mul_lo_u32 v5, v9, v7
-; CHECK-NEXT:    v_xor_b32_e32 v11, v4, v10
-; CHECK-NEXT:    v_mul_hi_u32 v4, v9, v6
-; CHECK-NEXT:    v_mul_hi_u32 v6, v12, v6
-; CHECK-NEXT:    v_add_i32_e32 v3, vcc, v3, v5
-; CHECK-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v3, vcc, v3, v4
-; CHECK-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc
-; CHECK-NEXT:    v_mul_lo_u32 v4, v12, v7
-; CHECK-NEXT:    v_add_i32_e32 v3, vcc, v5, v3
-; CHECK-NEXT:    v_mul_hi_u32 v5, v9, v7
-; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v4, v6
-; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v4, v5
-; CHECK-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v6, v5
-; CHECK-NEXT:    v_mul_hi_u32 v6, v12, v7
-; CHECK-NEXT:    v_add_i32_e32 v3, vcc, v4, v3
-; CHECK-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v5, v4
-; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v6, v4
-; CHECK-NEXT:    v_add_i32_e32 v3, vcc, v9, v3
-; CHECK-NEXT:    v_addc_u32_e32 v4, vcc, v12, v4, vcc
-; CHECK-NEXT:    v_mul_lo_u32 v5, v11, v3
-; CHECK-NEXT:    v_mul_lo_u32 v6, v8, v4
-; CHECK-NEXT:    v_mul_hi_u32 v7, v8, v3
-; CHECK-NEXT:    v_mul_hi_u32 v3, v11, v3
-; CHECK-NEXT:    v_mul_hi_u32 v9, v11, v4
-; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v5, v6
-; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v5, v7
-; CHECK-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
-; CHECK-NEXT:    v_mul_lo_u32 v7, v11, v4
-; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v6, v5
-; CHECK-NEXT:    v_mul_hi_u32 v6, v8, v4
-; CHECK-NEXT:    v_add_i32_e32 v3, vcc, v7, v3
-; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v3, vcc, v3, v6
-; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v7, v6
-; CHECK-NEXT:    v_add_i32_e32 v7, vcc, v3, v5
-; CHECK-NEXT:    v_mad_u64_u32 v[3:4], s[4:5], v2, v7, 0
-; CHECK-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v6, v5
-; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v9, v5
-; CHECK-NEXT:    v_mad_u64_u32 v[4:5], s[4:5], v2, v6, v[4:5]
-; CHECK-NEXT:    v_sub_i32_e32 v3, vcc, v8, v3
-; CHECK-NEXT:    v_mad_u64_u32 v[4:5], s[4:5], v1, v7, v[4:5]
-; CHECK-NEXT:    v_subb_u32_e64 v5, s[4:5], v11, v4, vcc
-; CHECK-NEXT:    v_sub_i32_e64 v4, s[4:5], v11, v4
-; CHECK-NEXT:    v_cmp_ge_u32_e64 s[4:5], v5, v1
-; CHECK-NEXT:    v_subb_u32_e32 v4, vcc, v4, v1, vcc
-; CHECK-NEXT:    v_cndmask_b32_e64 v8, 0, -1, s[4:5]
-; CHECK-NEXT:    v_cmp_ge_u32_e64 s[4:5], v3, v2
-; CHECK-NEXT:    v_sub_i32_e32 v3, vcc, v3, v2
-; CHECK-NEXT:    v_cndmask_b32_e64 v9, 0, -1, s[4:5]
-; CHECK-NEXT:    v_cmp_eq_u32_e64 s[4:5], v5, v1
-; CHECK-NEXT:    v_subbrev_u32_e32 v4, vcc, 0, v4, vcc
-; CHECK-NEXT:    v_cndmask_b32_e64 v5, v8, v9, s[4:5]
-; CHECK-NEXT:    v_add_i32_e32 v8, vcc, 1, v7
-; CHECK-NEXT:    v_addc_u32_e32 v9, vcc, 0, v6, vcc
-; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, v4, v1
+; CHECK-NEXT:    v_addc_u32_e32 v3, vcc, v3, v10, vcc
+; CHECK-NEXT:    v_mul_lo_u32 v3, v4, v2
+; CHECK-NEXT:    v_mul_hi_u32 v10, v5, v2
+; CHECK-NEXT:    v_addc_u32_e64 v8, s[4:5], 0, 0, vcc
+; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v3, v1
+; CHECK-NEXT:    v_addc_u32_e64 v1, s[4:5], v1, v10, vcc
+; CHECK-NEXT:    v_addc_u32_e32 v8, vcc, v1, v8, vcc
+; CHECK-NEXT:    v_mul_hi_u32 v10, v4, v2
+; CHECK-NEXT:    v_addc_u32_e64 v3, s[4:5], 0, 0, s[4:5]
+; CHECK-NEXT:    v_mad_u64_u32 v[1:2], s[4:5], v6, v8, 0
+; CHECK-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
+; CHECK-NEXT:    v_add_i32_e32 v10, vcc, v10, v3
+; CHECK-NEXT:    v_mad_u64_u32 v[2:3], s[4:5], v6, v10, v[2:3]
+; CHECK-NEXT:    v_sub_i32_e32 v1, vcc, v5, v1
+; CHECK-NEXT:    v_mad_u64_u32 v[2:3], s[4:5], v7, v8, v[2:3]
+; CHECK-NEXT:    v_subb_u32_e64 v3, s[4:5], v4, v2, vcc
+; CHECK-NEXT:    v_sub_i32_e64 v2, s[4:5], v4, v2
+; CHECK-NEXT:    v_cmp_ge_u32_e64 s[4:5], v3, v7
+; CHECK-NEXT:    v_subb_u32_e32 v2, vcc, v2, v7, vcc
+; CHECK-NEXT:    v_cndmask_b32_e64 v4, 0, -1, s[4:5]
+; CHECK-NEXT:    v_cmp_ge_u32_e64 s[4:5], v1, v6
+; CHECK-NEXT:    v_sub_i32_e32 v1, vcc, v1, v6
+; CHECK-NEXT:    v_cndmask_b32_e64 v5, 0, -1, s[4:5]
+; CHECK-NEXT:    v_cmp_eq_u32_e64 s[4:5], v3, v7
+; CHECK-NEXT:    v_subbrev_u32_e32 v2, vcc, 0, v2, vcc
+; CHECK-NEXT:    v_cndmask_b32_e64 v3, v4, v5, s[4:5]
+; CHECK-NEXT:    v_add_i32_e32 v4, vcc, 1, v8
+; CHECK-NEXT:    v_addc_u32_e32 v5, vcc, 0, v10, vcc
+; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, v2, v7
 ; CHECK-NEXT:    v_cndmask_b32_e64 v11, 0, -1, vcc
-; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, v3, v2
-; CHECK-NEXT:    v_cndmask_b32_e64 v2, 0, -1, vcc
-; CHECK-NEXT:    v_cmp_eq_u32_e32 vcc, v4, v1
-; CHECK-NEXT:    v_cndmask_b32_e32 v1, v11, v2, vcc
-; CHECK-NEXT:    v_add_i32_e32 v2, vcc, 1, v8
-; CHECK-NEXT:    v_addc_u32_e32 v3, vcc, 0, v9, vcc
+; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v6
+; CHECK-NEXT:    v_cndmask_b32_e64 v1, 0, -1, vcc
+; CHECK-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v7
+; CHECK-NEXT:    v_cndmask_b32_e32 v1, v11, v1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v2, vcc, 1, v4
+; CHECK-NEXT:    v_addc_u32_e32 v6, vcc, 0, v5, vcc
 ; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v1
-; CHECK-NEXT:    v_cndmask_b32_e32 v1, v8, v2, vcc
-; CHECK-NEXT:    v_cndmask_b32_e32 v2, v9, v3, vcc
-; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v5
-; CHECK-NEXT:    v_cndmask_b32_e32 v1, v7, v1, vcc
-; CHECK-NEXT:    v_xor_b32_e32 v3, v10, v0
-; CHECK-NEXT:    v_cndmask_b32_e32 v2, v6, v2, vcc
+; CHECK-NEXT:    v_cndmask_b32_e32 v1, v4, v2, vcc
+; CHECK-NEXT:    v_cndmask_b32_e32 v2, v5, v6, vcc
+; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v3
+; CHECK-NEXT:    v_cndmask_b32_e32 v1, v8, v1, vcc
+; CHECK-NEXT:    v_xor_b32_e32 v3, v9, v0
+; CHECK-NEXT:    v_cndmask_b32_e32 v2, v10, v2, vcc
 ; CHECK-NEXT:    v_xor_b32_e32 v0, v1, v3
 ; CHECK-NEXT:    v_xor_b32_e32 v1, v2, v3
 ; CHECK-NEXT:    v_sub_i32_e32 v0, vcc, v0, v3
@@ -235,18 +218,13 @@ define amdgpu_ps i64 @s_sdiv_i64(i64 inreg %num, i64 inreg %den) {
 ; CHECK-NEXT:    v_mul_hi_u32 v8, v3, v1
 ; CHECK-NEXT:    v_mul_hi_u32 v1, v4, v1
 ; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v6
-; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v5
-; CHECK-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v6, v2
+; CHECK-NEXT:    v_addc_u32_e32 v2, vcc, v2, v5, vcc
+; CHECK-NEXT:    v_addc_u32_e64 v2, s[0:1], 0, 0, vcc
 ; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v7, v0
-; CHECK-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v8
-; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v5, v6
-; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
-; CHECK-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v5, v2
+; CHECK-NEXT:    v_addc_u32_e64 v0, s[0:1], v0, v8, vcc
+; CHECK-NEXT:    v_addc_u32_e64 v5, s[0:1], 0, 0, s[0:1]
+; CHECK-NEXT:    v_addc_u32_e32 v0, vcc, v0, v2, vcc
+; CHECK-NEXT:    v_addc_u32_e32 v2, vcc, 0, v5, vcc
 ; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v1, v2
 ; CHECK-NEXT:    v_add_i32_e32 v3, vcc, v3, v0
 ; CHECK-NEXT:    v_addc_u32_e32 v4, vcc, v4, v1, vcc
@@ -258,21 +236,16 @@ define amdgpu_ps i64 @s_sdiv_i64(i64 inreg %num, i64 inreg %den) {
 ; CHECK-NEXT:    v_mul_hi_u32 v0, v4, v0
 ; CHECK-NEXT:    v_mul_lo_u32 v5, v3, v1
 ; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v5
-; CHECK-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v6
-; CHECK-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; CHECK-NEXT:    v_mul_lo_u32 v6, v4, v1
-; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v5, v2
-; CHECK-NEXT:    v_mul_hi_u32 v5, v3, v1
-; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v6, v0
-; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v5
-; CHECK-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v6, v5
+; CHECK-NEXT:    v_addc_u32_e32 v2, vcc, v2, v6, vcc
+; CHECK-NEXT:    v_mul_lo_u32 v2, v4, v1
+; CHECK-NEXT:    v_mul_hi_u32 v6, v3, v1
+; CHECK-NEXT:    v_addc_u32_e64 v5, s[0:1], 0, 0, vcc
+; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v2, v0
 ; CHECK-NEXT:    v_mul_hi_u32 v1, v4, v1
-; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
-; CHECK-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v5, v2
+; CHECK-NEXT:    v_addc_u32_e64 v0, s[0:1], v0, v6, vcc
+; CHECK-NEXT:    v_addc_u32_e64 v2, s[0:1], 0, 0, s[0:1]
+; CHECK-NEXT:    v_addc_u32_e32 v0, vcc, v0, v5, vcc
+; CHECK-NEXT:    v_addc_u32_e32 v2, vcc, 0, v2, vcc
 ; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v1, v2
 ; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v3, v0
 ; CHECK-NEXT:    v_addc_u32_e32 v1, vcc, v4, v1, vcc
@@ -280,38 +253,33 @@ define amdgpu_ps i64 @s_sdiv_i64(i64 inreg %num, i64 inreg %den) {
 ; CHECK-NEXT:    v_mul_lo_u32 v3, s12, v1
 ; CHECK-NEXT:    v_mul_hi_u32 v4, s12, v0
 ; CHECK-NEXT:    v_mul_hi_u32 v0, s13, v0
-; CHECK-NEXT:    v_mul_hi_u32 v5, s13, v1
+; CHECK-NEXT:    v_mov_b32_e32 v5, s13
 ; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v3
-; CHECK-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v4
-; CHECK-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; CHECK-NEXT:    v_mul_lo_u32 v4, s13, v1
-; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v3, v2
-; CHECK-NEXT:    v_mul_hi_u32 v3, s12, v1
-; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v4, v0
-; CHECK-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v3
-; CHECK-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v3, vcc, v4, v3
-; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v0, v2
-; CHECK-NEXT:    v_mad_u64_u32 v[0:1], s[0:1], s10, v4, 0
-; CHECK-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v3, v2
-; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v5, v2
+; CHECK-NEXT:    v_addc_u32_e32 v2, vcc, v2, v4, vcc
+; CHECK-NEXT:    v_mul_lo_u32 v2, s13, v1
+; CHECK-NEXT:    v_mul_hi_u32 v4, s12, v1
+; CHECK-NEXT:    v_addc_u32_e64 v3, s[0:1], 0, 0, vcc
+; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v2, v0
+; CHECK-NEXT:    v_addc_u32_e64 v0, s[0:1], v0, v4, vcc
+; CHECK-NEXT:    v_addc_u32_e32 v3, vcc, v0, v3, vcc
+; CHECK-NEXT:    v_mul_hi_u32 v4, s13, v1
+; CHECK-NEXT:    v_addc_u32_e64 v2, s[0:1], 0, 0, s[0:1]
+; CHECK-NEXT:    v_mad_u64_u32 v[0:1], s[0:1], s10, v3, 0
+; CHECK-NEXT:    v_addc_u32_e32 v2, vcc, 0, v2, vcc
+; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v4, v2
 ; CHECK-NEXT:    v_mad_u64_u32 v[1:2], s[0:1], s10, v2, v[1:2]
-; CHECK-NEXT:    v_mov_b32_e32 v5, s13
 ; CHECK-NEXT:    v_sub_i32_e32 v0, vcc, s12, v0
-; CHECK-NEXT:    v_mad_u64_u32 v[1:2], s[0:1], s11, v4, v[1:2]
-; CHECK-NEXT:    v_mov_b32_e32 v3, s11
+; CHECK-NEXT:    v_mad_u64_u32 v[1:2], s[0:1], s11, v3, v[1:2]
+; CHECK-NEXT:    v_mov_b32_e32 v4, s11
 ; CHECK-NEXT:    v_subb_u32_e64 v2, s[0:1], v5, v1, vcc
 ; CHECK-NEXT:    v_sub_i32_e64 v1, s[0:1], s13, v1
 ; CHECK-NEXT:    v_cmp_le_u32_e64 s[0:1], s11, v2
-; CHECK-NEXT:    v_subb_u32_e32 v1, vcc, v1, v3, vcc
+; CHECK-NEXT:    v_subb_u32_e32 v1, vcc, v1, v4, vcc
 ; CHECK-NEXT:    v_cndmask_b32_e64 v5, 0, -1, s[0:1]
 ; CHECK-NEXT:    v_cmp_le_u32_e64 s[0:1], s10, v0
 ; CHECK-NEXT:    v_subrev_i32_e32 v0, vcc, s10, v0
 ; CHECK-NEXT:    v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v3, vcc, 1, v4
+; CHECK-NEXT:    v_add_i32_e32 v4, vcc, 1, v3
 ; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, -1, s[0:1]
 ; CHECK-NEXT:    v_cmp_eq_u32_e64 s[0:1], s11, v2
 ; CHECK-NEXT:    v_cmp_le_u32_e32 vcc, s11, v1
@@ -321,11 +289,11 @@ define amdgpu_ps i64 @s_sdiv_i64(i64 inreg %num, i64 inreg %den) {
 ; CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, -1, vcc
 ; CHECK-NEXT:    v_cmp_eq_u32_e32 vcc, s11, v1
 ; CHECK-NEXT:    v_cndmask_b32_e32 v0, v5, v0, vcc
-; CHECK-NEXT:    v_add_i32_e32 v1, vcc, 1, v3
+; CHECK-NEXT:    v_add_i32_e32 v1, vcc, 1, v4
 ; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
-; CHECK-NEXT:    v_cndmask_b32_e32 v0, v3, v1, vcc
+; CHECK-NEXT:    v_cndmask_b32_e32 v0, v4, v1, vcc
 ; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v2
-; CHECK-NEXT:    v_cndmask_b32_e32 v0, v4, v0, vcc
+; CHECK-NEXT:    v_cndmask_b32_e32 v0, v3, v0, vcc
 ; CHECK-NEXT:    s_xor_b64 s[0:1], s[6:7], s[8:9]
 ; CHECK-NEXT:    v_xor_b32_e32 v0, s0, v0
 ; CHECK-NEXT:    v_subrev_i32_e32 v0, vcc, s0, v0
@@ -404,105 +372,90 @@ define <2 x i64> @v_sdiv_v2i64(<2 x i64> %num, <2 x i64> %den) {
 ; GISEL-NEXT:    v_mul_hi_u32 v11, v9, v11
 ; GISEL-NEXT:    v_mul_lo_u32 v13, v14, v12
 ; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v13
-; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v17
-; GISEL-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
-; GISEL-NEXT:    v_mul_lo_u32 v17, v9, v12
-; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v13, v5
-; GISEL-NEXT:    v_mul_hi_u32 v13, v14, v12
-; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v17, v11
-; GISEL-NEXT:    v_cndmask_b32_e64 v17, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v11, v13
-; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v17, v13
+; GISEL-NEXT:    v_addc_u32_e32 v5, vcc, v5, v17, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v5, v9, v12
+; GISEL-NEXT:    v_mul_hi_u32 v17, v14, v12
+; GISEL-NEXT:    v_addc_u32_e64 v13, s[4:5], 0, 0, vcc
+; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v11
 ; GISEL-NEXT:    v_mul_hi_u32 v12, v9, v12
-; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v11, v5
-; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v13, v11
+; GISEL-NEXT:    v_addc_u32_e64 v5, s[4:5], v5, v17, vcc
+; GISEL-NEXT:    v_addc_u32_e64 v11, s[4:5], 0, 0, s[4:5]
+; GISEL-NEXT:    v_addc_u32_e32 v5, vcc, v5, v13, vcc
+; GISEL-NEXT:    v_addc_u32_e32 v11, vcc, 0, v11, vcc
 ; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v12, v11
 ; GISEL-NEXT:    v_add_i32_e32 v14, vcc, v14, v5
-; GISEL-NEXT:    v_addc_u32_e32 v17, vcc, v9, v11, vcc
+; GISEL-NEXT:    v_addc_u32_e32 v9, vcc, v9, v11, vcc
 ; GISEL-NEXT:    v_mad_u64_u32 v[11:12], s[4:5], v15, v14, 0
-; GISEL-NEXT:    v_ashrrev_i32_e32 v9, 31, v1
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v9
 ; GISEL-NEXT:    v_mov_b32_e32 v5, v12
-; GISEL-NEXT:    v_mad_u64_u32 v[12:13], s[4:5], v15, v17, v[5:6]
-; GISEL-NEXT:    v_addc_u32_e32 v1, vcc, v1, v9, vcc
+; GISEL-NEXT:    v_mad_u64_u32 v[12:13], s[4:5], v15, v9, v[5:6]
+; GISEL-NEXT:    v_ashrrev_i32_e32 v5, 31, v1
+; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v5
 ; GISEL-NEXT:    v_mad_u64_u32 v[12:13], s[4:5], v16, v14, v[12:13]
-; GISEL-NEXT:    v_xor_b32_e32 v5, v0, v9
-; GISEL-NEXT:    v_mul_lo_u32 v0, v17, v11
-; GISEL-NEXT:    v_mul_lo_u32 v13, v14, v12
-; GISEL-NEXT:    v_xor_b32_e32 v15, v1, v9
+; GISEL-NEXT:    v_addc_u32_e32 v1, vcc, v1, v5, vcc
+; GISEL-NEXT:    v_xor_b32_e32 v13, v0, v5
+; GISEL-NEXT:    v_mul_lo_u32 v0, v9, v11
+; GISEL-NEXT:    v_mul_lo_u32 v15, v14, v12
+; GISEL-NEXT:    v_xor_b32_e32 v16, v1, v5
 ; GISEL-NEXT:    v_mul_hi_u32 v1, v14, v11
-; GISEL-NEXT:    v_mul_hi_u32 v11, v17, v11
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v13
-; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v15
+; GISEL-NEXT:    v_addc_u32_e32 v0, vcc, v0, v1, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v0, v9, v12
+; GISEL-NEXT:    v_mul_hi_u32 v1, v9, v11
+; GISEL-NEXT:    v_mul_hi_u32 v15, v14, v12
+; GISEL-NEXT:    v_addc_u32_e64 v11, s[4:5], 0, 0, vcc
 ; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v1
-; GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
-; GISEL-NEXT:    v_mul_lo_u32 v1, v17, v12
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v13, v0
-; GISEL-NEXT:    v_mul_hi_u32 v13, v14, v12
-; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v1, v11
-; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v1, v13
-; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v11, v13
-; GISEL-NEXT:    v_mul_hi_u32 v12, v17, v12
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v1, v0
-; GISEL-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v11, v1
+; GISEL-NEXT:    v_mul_hi_u32 v12, v9, v12
+; GISEL-NEXT:    v_addc_u32_e64 v0, s[4:5], v0, v15, vcc
+; GISEL-NEXT:    v_addc_u32_e64 v1, s[4:5], 0, 0, s[4:5]
+; GISEL-NEXT:    v_addc_u32_e32 v0, vcc, v0, v11, vcc
+; GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
 ; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v12, v1
 ; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v14, v0
-; GISEL-NEXT:    v_addc_u32_e32 v1, vcc, v17, v1, vcc
-; GISEL-NEXT:    v_mul_lo_u32 v11, v15, v0
-; GISEL-NEXT:    v_mul_lo_u32 v12, v5, v1
-; GISEL-NEXT:    v_mul_hi_u32 v13, v5, v0
-; GISEL-NEXT:    v_mul_hi_u32 v0, v15, v0
-; GISEL-NEXT:    v_mul_hi_u32 v14, v15, v1
-; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v11, v12
-; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v11, v13
-; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
-; GISEL-NEXT:    v_mul_lo_u32 v13, v15, v1
-; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v12, v11
-; GISEL-NEXT:    v_mul_hi_u32 v12, v5, v1
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v13, v0
-; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v12
-; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v13, v12
-; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v0, v11
-; GISEL-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v10, v13, 0
-; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v12, v11
-; GISEL-NEXT:    v_add_i32_e32 v14, vcc, v14, v11
-; GISEL-NEXT:    v_mad_u64_u32 v[11:12], s[4:5], v10, v14, v[1:2]
-; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v5, v0
-; GISEL-NEXT:    v_mad_u64_u32 v[11:12], s[4:5], v4, v13, v[11:12]
-; GISEL-NEXT:    v_subb_u32_e64 v1, s[4:5], v15, v11, vcc
-; GISEL-NEXT:    v_sub_i32_e64 v5, s[4:5], v15, v11
+; GISEL-NEXT:    v_addc_u32_e32 v1, vcc, v9, v1, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v9, v16, v0
+; GISEL-NEXT:    v_mul_lo_u32 v11, v13, v1
+; GISEL-NEXT:    v_mul_hi_u32 v12, v13, v0
+; GISEL-NEXT:    v_mul_hi_u32 v0, v16, v0
+; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v11
+; GISEL-NEXT:    v_addc_u32_e32 v9, vcc, v9, v12, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v9, v16, v1
+; GISEL-NEXT:    v_mul_hi_u32 v12, v13, v1
+; GISEL-NEXT:    v_addc_u32_e64 v11, s[4:5], 0, 0, vcc
+; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v9, v0
+; GISEL-NEXT:    v_addc_u32_e64 v0, s[4:5], v0, v12, vcc
+; GISEL-NEXT:    v_addc_u32_e32 v14, vcc, v0, v11, vcc
+; GISEL-NEXT:    v_mul_hi_u32 v11, v16, v1
+; GISEL-NEXT:    v_addc_u32_e64 v9, s[4:5], 0, 0, s[4:5]
+; GISEL-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v10, v14, 0
+; GISEL-NEXT:    v_addc_u32_e32 v9, vcc, 0, v9, vcc
+; GISEL-NEXT:    v_add_i32_e32 v15, vcc, v11, v9
+; GISEL-NEXT:    v_mad_u64_u32 v[11:12], s[4:5], v10, v15, v[1:2]
+; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v13, v0
+; GISEL-NEXT:    v_mad_u64_u32 v[11:12], s[4:5], v4, v14, v[11:12]
+; GISEL-NEXT:    v_subb_u32_e64 v1, s[4:5], v16, v11, vcc
+; GISEL-NEXT:    v_sub_i32_e64 v9, s[4:5], v16, v11
 ; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v1, v4
 ; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, -1, s[4:5]
 ; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v0, v10
 ; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, -1, s[4:5]
 ; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v1, v4
-; GISEL-NEXT:    v_subb_u32_e32 v1, vcc, v5, v4, vcc
+; GISEL-NEXT:    v_subb_u32_e32 v1, vcc, v9, v4, vcc
 ; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v10
 ; GISEL-NEXT:    v_cndmask_b32_e64 v12, v11, v12, s[4:5]
 ; GISEL-NEXT:    v_subbrev_u32_e32 v11, vcc, 0, v1, vcc
-; GISEL-NEXT:    v_ashrrev_i32_e32 v5, 31, v7
-; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v6, v5
-; GISEL-NEXT:    v_addc_u32_e32 v6, vcc, v7, v5, vcc
-; GISEL-NEXT:    v_xor_b32_e32 v7, v1, v5
-; GISEL-NEXT:    v_xor_b32_e32 v6, v6, v5
+; GISEL-NEXT:    v_ashrrev_i32_e32 v9, 31, v7
+; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v6, v9
+; GISEL-NEXT:    v_addc_u32_e32 v6, vcc, v7, v9, vcc
+; GISEL-NEXT:    v_xor_b32_e32 v7, v1, v9
+; GISEL-NEXT:    v_xor_b32_e32 v6, v6, v9
 ; GISEL-NEXT:    v_cvt_f32_u32_e32 v1, v7
-; GISEL-NEXT:    v_cvt_f32_u32_e32 v15, v6
-; GISEL-NEXT:    v_add_i32_e32 v16, vcc, 1, v13
-; GISEL-NEXT:    v_addc_u32_e32 v17, vcc, 0, v14, vcc
-; GISEL-NEXT:    v_mac_f32_e32 v1, 0x4f800000, v15
+; GISEL-NEXT:    v_cvt_f32_u32_e32 v13, v6
+; GISEL-NEXT:    v_add_i32_e32 v16, vcc, 1, v14
+; GISEL-NEXT:    v_addc_u32_e32 v17, vcc, 0, v15, vcc
+; GISEL-NEXT:    v_mac_f32_e32 v1, 0x4f800000, v13
 ; GISEL-NEXT:    v_rcp_iflag_f32_e32 v1, v1
 ; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v11, v4
-; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, -1, vcc
+; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, -1, vcc
 ; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v10
 ; GISEL-NEXT:    v_mul_f32_e32 v0, 0x5f7ffffc, v1
 ; GISEL-NEXT:    v_mul_f32_e32 v1, 0x2f800000, v0
@@ -515,130 +468,115 @@ define <2 x i64> @v_sdiv_v2i64(<2 x i64> %num, <2 x i64> %den) {
 ; GISEL-NEXT:    v_cvt_u32_f32_e32 v18, v18
 ; GISEL-NEXT:    v_subb_u32_e32 v21, vcc, 0, v6, vcc
 ; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v11, v4
-; GISEL-NEXT:    v_cndmask_b32_e32 v4, v15, v10, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v4, v13, v10, vcc
 ; GISEL-NEXT:    v_mad_u64_u32 v[10:11], s[4:5], v20, v18, v[1:2]
 ; GISEL-NEXT:    v_add_i32_e32 v1, vcc, 1, v16
 ; GISEL-NEXT:    v_mad_u64_u32 v[10:11], s[4:5], v21, v19, v[10:11]
-; GISEL-NEXT:    v_addc_u32_e32 v15, vcc, 0, v17, vcc
+; GISEL-NEXT:    v_addc_u32_e32 v13, vcc, 0, v17, vcc
 ; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v4
 ; GISEL-NEXT:    v_cndmask_b32_e32 v4, v16, v1, vcc
 ; GISEL-NEXT:    v_mul_lo_u32 v1, v18, v0
 ; GISEL-NEXT:    v_mul_lo_u32 v11, v19, v10
 ; GISEL-NEXT:    v_mul_hi_u32 v16, v19, v0
-; GISEL-NEXT:    v_cndmask_b32_e32 v15, v17, v15, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v13, v17, v13, vcc
 ; GISEL-NEXT:    v_mul_hi_u32 v0, v18, v0
 ; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v1, v11
-; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v1, v16
-; GISEL-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; GISEL-NEXT:    v_mul_lo_u32 v16, v18, v10
-; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v11, v1
-; GISEL-NEXT:    v_mul_hi_u32 v11, v19, v10
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v16, v0
-; GISEL-NEXT:    v_cndmask_b32_e64 v16, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v11
-; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v16, v11
+; GISEL-NEXT:    v_addc_u32_e32 v1, vcc, v1, v16, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v1, v18, v10
+; GISEL-NEXT:    v_mul_hi_u32 v16, v19, v10
+; GISEL-NEXT:    v_addc_u32_e64 v11, s[4:5], 0, 0, vcc
+; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v1, v0
 ; GISEL-NEXT:    v_mul_hi_u32 v10, v18, v10
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v1
-; GISEL-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v11, v1
+; GISEL-NEXT:    v_addc_u32_e64 v0, s[4:5], v0, v16, vcc
+; GISEL-NEXT:    v_addc_u32_e64 v1, s[4:5], 0, 0, s[4:5]
+; GISEL-NEXT:    v_addc_u32_e32 v0, vcc, v0, v11, vcc
+; GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
 ; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v10, v1
-; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v19, v0
-; GISEL-NEXT:    v_addc_u32_e32 v11, vcc, v18, v1, vcc
-; GISEL-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v20, v10, 0
+; GISEL-NEXT:    v_add_i32_e32 v16, vcc, v19, v0
+; GISEL-NEXT:    v_addc_u32_e32 v17, vcc, v18, v1, vcc
+; GISEL-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v20, v16, 0
 ; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v12
-; GISEL-NEXT:    v_cndmask_b32_e32 v4, v13, v4, vcc
-; GISEL-NEXT:    v_xor_b32_e32 v13, v9, v8
-; GISEL-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], v20, v11, v[1:2]
-; GISEL-NEXT:    v_cndmask_b32_e32 v12, v14, v15, vcc
-; GISEL-NEXT:    v_ashrrev_i32_e32 v14, 31, v3
-; GISEL-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], v21, v10, v[8:9]
-; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v2, v14
-; GISEL-NEXT:    v_xor_b32_e32 v1, v4, v13
-; GISEL-NEXT:    v_addc_u32_e32 v3, vcc, v3, v14, vcc
-; GISEL-NEXT:    v_xor_b32_e32 v9, v2, v14
-; GISEL-NEXT:    v_mul_lo_u32 v2, v11, v0
-; GISEL-NEXT:    v_mul_lo_u32 v4, v10, v8
-; GISEL-NEXT:    v_xor_b32_e32 v15, v3, v14
-; GISEL-NEXT:    v_mul_hi_u32 v3, v10, v0
-; GISEL-NEXT:    v_mul_hi_u32 v0, v11, v0
-; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v2, v4
-; GISEL-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v2, v3
-; GISEL-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; GISEL-NEXT:    v_mul_lo_u32 v3, v11, v8
+; GISEL-NEXT:    v_cndmask_b32_e32 v12, v14, v4, vcc
+; GISEL-NEXT:    v_mad_u64_u32 v[10:11], s[4:5], v20, v17, v[1:2]
+; GISEL-NEXT:    v_xor_b32_e32 v1, v5, v8
+; GISEL-NEXT:    v_ashrrev_i32_e32 v8, 31, v3
+; GISEL-NEXT:    v_mad_u64_u32 v[4:5], s[4:5], v21, v16, v[10:11]
+; GISEL-NEXT:    v_cndmask_b32_e32 v13, v15, v13, vcc
+; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v2, v8
+; GISEL-NEXT:    v_addc_u32_e32 v3, vcc, v3, v8, vcc
+; GISEL-NEXT:    v_xor_b32_e32 v5, v2, v8
+; GISEL-NEXT:    v_mul_lo_u32 v2, v17, v0
+; GISEL-NEXT:    v_mul_lo_u32 v10, v16, v4
+; GISEL-NEXT:    v_xor_b32_e32 v11, v3, v8
+; GISEL-NEXT:    v_mul_hi_u32 v3, v16, v0
+; GISEL-NEXT:    v_mul_hi_u32 v0, v17, v0
+; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v2, v10
+; GISEL-NEXT:    v_addc_u32_e32 v2, vcc, v2, v3, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v2, v17, v4
+; GISEL-NEXT:    v_mul_hi_u32 v10, v16, v4
+; GISEL-NEXT:    v_addc_u32_e64 v3, s[4:5], 0, 0, vcc
+; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v2, v0
+; GISEL-NEXT:    v_mul_hi_u32 v4, v17, v4
+; GISEL-NEXT:    v_addc_u32_e64 v0, s[4:5], v0, v10, vcc
+; GISEL-NEXT:    v_addc_u32_e64 v2, s[4:5], 0, 0, s[4:5]
+; GISEL-NEXT:    v_addc_u32_e32 v0, vcc, v0, v3, vcc
+; GISEL-NEXT:    v_addc_u32_e32 v2, vcc, 0, v2, vcc
 ; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v4, v2
-; GISEL-NEXT:    v_mul_hi_u32 v4, v10, v8
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v3, v0
-; GISEL-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v4
-; GISEL-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v3, v4
-; GISEL-NEXT:    v_mul_hi_u32 v4, v11, v8
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
-; GISEL-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v3, v2
-; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v4, v2
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v10, v0
-; GISEL-NEXT:    v_addc_u32_e32 v2, vcc, v11, v2, vcc
-; GISEL-NEXT:    v_mul_lo_u32 v3, v15, v0
-; GISEL-NEXT:    v_mul_lo_u32 v4, v9, v2
-; GISEL-NEXT:    v_mul_hi_u32 v10, v9, v0
-; GISEL-NEXT:    v_mul_hi_u32 v0, v15, v0
-; GISEL-NEXT:    v_mul_hi_u32 v11, v15, v2
+; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v16, v0
+; GISEL-NEXT:    v_addc_u32_e32 v2, vcc, v17, v2, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v3, v11, v0
+; GISEL-NEXT:    v_mul_lo_u32 v4, v5, v2
+; GISEL-NEXT:    v_xor_b32_e32 v10, v12, v1
+; GISEL-NEXT:    v_mul_hi_u32 v12, v5, v0
+; GISEL-NEXT:    v_mul_hi_u32 v0, v11, v0
 ; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v3, v4
-; GISEL-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v3, v10
-; GISEL-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc
-; GISEL-NEXT:    v_mul_lo_u32 v10, v15, v2
-; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v4, v3
-; GISEL-NEXT:    v_mul_hi_u32 v4, v9, v2
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v10, v0
-; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v4
-; GISEL-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v10, v4
-; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v0, v3
-; GISEL-NEXT:    v_mad_u64_u32 v[2:3], s[4:5], v7, v10, 0
-; GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v4, v0
-; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v11, v0
+; GISEL-NEXT:    v_addc_u32_e32 v3, vcc, v3, v12, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v3, v11, v2
+; GISEL-NEXT:    v_mul_hi_u32 v12, v5, v2
+; GISEL-NEXT:    v_addc_u32_e64 v4, s[4:5], 0, 0, vcc
+; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v3, v0
+; GISEL-NEXT:    v_addc_u32_e64 v0, s[4:5], v0, v12, vcc
+; GISEL-NEXT:    v_addc_u32_e64 v3, s[4:5], 0, 0, s[4:5]
+; GISEL-NEXT:    v_addc_u32_e32 v12, vcc, v0, v4, vcc
+; GISEL-NEXT:    v_addc_u32_e32 v0, vcc, 0, v3, vcc
+; GISEL-NEXT:    v_mul_hi_u32 v4, v11, v2
+; GISEL-NEXT:    v_mad_u64_u32 v[2:3], s[4:5], v7, v12, 0
+; GISEL-NEXT:    v_xor_b32_e32 v13, v13, v1
+; GISEL-NEXT:    v_add_i32_e32 v14, vcc, v4, v0
 ; GISEL-NEXT:    v_mov_b32_e32 v0, v3
-; GISEL-NEXT:    v_mad_u64_u32 v[3:4], s[4:5], v7, v11, v[0:1]
-; GISEL-NEXT:    v_xor_b32_e32 v8, v12, v13
-; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v1, v13
-; GISEL-NEXT:    v_mad_u64_u32 v[3:4], s[4:5], v6, v10, v[3:4]
-; GISEL-NEXT:    v_subb_u32_e32 v1, vcc, v8, v13, vcc
-; GISEL-NEXT:    v_sub_i32_e32 v2, vcc, v9, v2
-; GISEL-NEXT:    v_subb_u32_e64 v4, s[4:5], v15, v3, vcc
-; GISEL-NEXT:    v_sub_i32_e64 v3, s[4:5], v15, v3
+; GISEL-NEXT:    v_mad_u64_u32 v[3:4], s[4:5], v7, v14, v[0:1]
+; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v10, v1
+; GISEL-NEXT:    v_mad_u64_u32 v[3:4], s[4:5], v6, v12, v[3:4]
+; GISEL-NEXT:    v_subb_u32_e32 v1, vcc, v13, v1, vcc
+; GISEL-NEXT:    v_sub_i32_e32 v2, vcc, v5, v2
+; GISEL-NEXT:    v_subb_u32_e64 v4, s[4:5], v11, v3, vcc
+; GISEL-NEXT:    v_sub_i32_e64 v3, s[4:5], v11, v3
 ; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v4, v6
 ; GISEL-NEXT:    v_subb_u32_e32 v3, vcc, v3, v6, vcc
-; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, -1, s[4:5]
+; GISEL-NEXT:    v_cndmask_b32_e64 v5, 0, -1, s[4:5]
 ; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v2, v7
 ; GISEL-NEXT:    v_sub_i32_e32 v2, vcc, v2, v7
-; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, -1, s[4:5]
+; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, -1, s[4:5]
 ; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v4, v6
 ; GISEL-NEXT:    v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
-; GISEL-NEXT:    v_cndmask_b32_e64 v4, v8, v9, s[4:5]
-; GISEL-NEXT:    v_add_i32_e32 v8, vcc, 1, v10
-; GISEL-NEXT:    v_addc_u32_e32 v9, vcc, 0, v11, vcc
+; GISEL-NEXT:    v_cndmask_b32_e64 v4, v5, v10, s[4:5]
+; GISEL-NEXT:    v_add_i32_e32 v5, vcc, 1, v12
+; GISEL-NEXT:    v_addc_u32_e32 v10, vcc, 0, v14, vcc
 ; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v3, v6
-; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, -1, vcc
+; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, -1, vcc
 ; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v2, v7
 ; GISEL-NEXT:    v_cndmask_b32_e64 v2, 0, -1, vcc
 ; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v6
-; GISEL-NEXT:    v_cndmask_b32_e32 v2, v12, v2, vcc
-; GISEL-NEXT:    v_add_i32_e32 v3, vcc, 1, v8
-; GISEL-NEXT:    v_addc_u32_e32 v6, vcc, 0, v9, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v2, v11, v2, vcc
+; GISEL-NEXT:    v_add_i32_e32 v3, vcc, 1, v5
+; GISEL-NEXT:    v_addc_u32_e32 v6, vcc, 0, v10, vcc
 ; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v2
-; GISEL-NEXT:    v_cndmask_b32_e32 v2, v8, v3, vcc
-; GISEL-NEXT:    v_cndmask_b32_e32 v3, v9, v6, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v2, v5, v3, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v3, v10, v6, vcc
 ; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v4
-; GISEL-NEXT:    v_cndmask_b32_e32 v2, v10, v2, vcc
-; GISEL-NEXT:    v_xor_b32_e32 v4, v14, v5
-; GISEL-NEXT:    v_cndmask_b32_e32 v3, v11, v3, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v2, v12, v2, vcc
+; GISEL-NEXT:    v_xor_b32_e32 v4, v8, v9
+; GISEL-NEXT:    v_cndmask_b32_e32 v3, v14, v3, vcc
 ; GISEL-NEXT:    v_xor_b32_e32 v2, v2, v4
 ; GISEL-NEXT:    v_xor_b32_e32 v3, v3, v4
 ; GISEL-NEXT:    v_sub_i32_e32 v2, vcc, v2, v4
@@ -662,131 +600,116 @@ define <2 x i64> @v_sdiv_v2i64(<2 x i64> %num, <2 x i64> %den) {
 ; CGP-NEXT:  ; %bb.1:
 ; CGP-NEXT:    v_ashrrev_i32_e32 v0, 31, v5
 ; CGP-NEXT:    v_add_i32_e32 v1, vcc, v4, v0
-; CGP-NEXT:    v_addc_u32_e32 v3, vcc, v5, v0, vcc
-; CGP-NEXT:    v_xor_b32_e32 v2, v1, v0
-; CGP-NEXT:    v_xor_b32_e32 v1, v3, v0
-; CGP-NEXT:    v_cvt_f32_u32_e32 v3, v2
-; CGP-NEXT:    v_cvt_f32_u32_e32 v4, v1
-; CGP-NEXT:    v_sub_i32_e32 v13, vcc, 0, v2
-; CGP-NEXT:    v_subb_u32_e32 v14, vcc, 0, v1, vcc
-; CGP-NEXT:    v_mac_f32_e32 v3, 0x4f800000, v4
-; CGP-NEXT:    v_rcp_iflag_f32_e32 v3, v3
-; CGP-NEXT:    v_mul_f32_e32 v3, 0x5f7ffffc, v3
-; CGP-NEXT:    v_mul_f32_e32 v4, 0x2f800000, v3
-; CGP-NEXT:    v_trunc_f32_e32 v5, v4
-; CGP-NEXT:    v_mac_f32_e32 v3, 0xcf800000, v5
-; CGP-NEXT:    v_cvt_u32_f32_e32 v12, v3
-; CGP-NEXT:    v_cvt_u32_f32_e32 v15, v5
-; CGP-NEXT:    v_mad_u64_u32 v[3:4], s[4:5], v13, v12, 0
-; CGP-NEXT:    v_mad_u64_u32 v[4:5], s[4:5], v13, v15, v[4:5]
-; CGP-NEXT:    v_mul_hi_u32 v16, v12, v3
-; CGP-NEXT:    v_mad_u64_u32 v[4:5], s[4:5], v14, v12, v[4:5]
-; CGP-NEXT:    v_mul_lo_u32 v5, v15, v3
-; CGP-NEXT:    v_mul_hi_u32 v3, v15, v3
-; CGP-NEXT:    v_mul_lo_u32 v17, v12, v4
-; CGP-NEXT:    v_mul_lo_u32 v18, v15, v4
-; CGP-NEXT:    v_add_i32_e32 v5, vcc, v5, v17
-; CGP-NEXT:    v_cndmask_b32_e64 v17, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v5, vcc, v5, v16
-; CGP-NEXT:    v_mul_hi_u32 v16, v12, v4
-; CGP-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v5, vcc, v17, v5
-; CGP-NEXT:    v_add_i32_e32 v3, vcc, v18, v3
-; CGP-NEXT:    v_cndmask_b32_e64 v17, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v3, vcc, v3, v16
-; CGP-NEXT:    v_cndmask_b32_e64 v16, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v16, vcc, v17, v16
-; CGP-NEXT:    v_mul_hi_u32 v4, v15, v4
-; CGP-NEXT:    v_add_i32_e32 v3, vcc, v3, v5
-; CGP-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v5, vcc, v16, v5
-; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v5
-; CGP-NEXT:    v_add_i32_e32 v12, vcc, v12, v3
-; CGP-NEXT:    v_addc_u32_e32 v15, vcc, v15, v4, vcc
-; CGP-NEXT:    v_mad_u64_u32 v[3:4], s[4:5], v13, v12, 0
-; CGP-NEXT:    v_mad_u64_u32 v[4:5], s[4:5], v13, v15, v[4:5]
+; CGP-NEXT:    v_addc_u32_e32 v2, vcc, v5, v0, vcc
+; CGP-NEXT:    v_xor_b32_e32 v4, v1, v0
+; CGP-NEXT:    v_xor_b32_e32 v5, v2, v0
+; CGP-NEXT:    v_cvt_f32_u32_e32 v1, v4
+; CGP-NEXT:    v_cvt_f32_u32_e32 v2, v5
+; CGP-NEXT:    v_sub_i32_e32 v13, vcc, 0, v4
+; CGP-NEXT:    v_subb_u32_e32 v14, vcc, 0, v5, vcc
+; CGP-NEXT:    v_mac_f32_e32 v1, 0x4f800000, v2
+; CGP-NEXT:    v_rcp_iflag_f32_e32 v1, v1
+; CGP-NEXT:    v_mul_f32_e32 v1, 0x5f7ffffc, v1
+; CGP-NEXT:    v_mul_f32_e32 v2, 0x2f800000, v1
+; CGP-NEXT:    v_trunc_f32_e32 v3, v2
+; CGP-NEXT:    v_mac_f32_e32 v1, 0xcf800000, v3
+; CGP-NEXT:    v_cvt_u32_f32_e32 v12, v1
+; CGP-NEXT:    v_cvt_u32_f32_e32 v15, v3
+; CGP-NEXT:    v_mad_u64_u32 v[1:2], s[4:5], v13, v12, 0
+; CGP-NEXT:    v_mad_u64_u32 v[2:3], s[4:5], v13, v15, v[2:3]
+; CGP-NEXT:    v_mul_hi_u32 v16, v12, v1
+; CGP-NEXT:    v_mad_u64_u32 v[2:3], s[4:5], v14, v12, v[2:3]
+; CGP-NEXT:    v_mul_lo_u32 v3, v15, v1
+; CGP-NEXT:    v_mul_hi_u32 v1, v15, v1
+; CGP-NEXT:    v_mul_lo_u32 v17, v12, v2
+; CGP-NEXT:    v_mul_lo_u32 v18, v15, v2
+; CGP-NEXT:    v_add_i32_e32 v3, vcc, v3, v17
+; CGP-NEXT:    v_addc_u32_e32 v3, vcc, v3, v16, vcc
+; CGP-NEXT:    v_mul_hi_u32 v16, v12, v2
+; CGP-NEXT:    v_addc_u32_e64 v3, s[4:5], 0, 0, vcc
+; CGP-NEXT:    v_add_i32_e32 v1, vcc, v18, v1
+; CGP-NEXT:    v_mul_hi_u32 v2, v15, v2
+; CGP-NEXT:    v_addc_u32_e64 v1, s[4:5], v1, v16, vcc
+; CGP-NEXT:    v_addc_u32_e64 v16, s[4:5], 0, 0, s[4:5]
+; CGP-NEXT:    v_addc_u32_e32 v1, vcc, v1, v3, vcc
+; CGP-NEXT:    v_addc_u32_e32 v3, vcc, 0, v16, vcc
+; CGP-NEXT:    v_add_i32_e32 v2, vcc, v2, v3
+; CGP-NEXT:    v_add_i32_e32 v12, vcc, v12, v1
+; CGP-NEXT:    v_addc_u32_e32 v15, vcc, v15, v2, vcc
+; CGP-NEXT:    v_mad_u64_u32 v[1:2], s[4:5], v13, v12, 0
+; CGP-NEXT:    v_mad_u64_u32 v[2:3], s[4:5], v13, v15, v[2:3]
 ; CGP-NEXT:    v_ashrrev_i32_e32 v13, 31, v11
-; CGP-NEXT:    v_mul_hi_u32 v16, v12, v3
-; CGP-NEXT:    v_mad_u64_u32 v[4:5], s[4:5], v14, v12, v[4:5]
-; CGP-NEXT:    v_add_i32_e32 v5, vcc, v10, v13
+; CGP-NEXT:    v_mul_hi_u32 v16, v12, v1
+; CGP-NEXT:    v_mad_u64_u32 v[2:3], s[4:5], v14, v12, v[2:3]
+; CGP-NEXT:    v_add_i32_e32 v3, vcc, v10, v13
 ; CGP-NEXT:    v_addc_u32_e32 v10, vcc, v11, v13, vcc
-; CGP-NEXT:    v_xor_b32_e32 v11, v5, v13
-; CGP-NEXT:    v_mul_lo_u32 v5, v15, v3
-; CGP-NEXT:    v_mul_lo_u32 v14, v12, v4
-; CGP-NEXT:    v_mul_hi_u32 v3, v15, v3
+; CGP-NEXT:    v_xor_b32_e32 v11, v3, v13
+; CGP-NEXT:    v_mul_lo_u32 v3, v15, v1
+; CGP-NEXT:    v_mul_lo_u32 v14, v12, v2
+; CGP-NEXT:    v_mul_hi_u32 v1, v15, v1
 ; CGP-NEXT:    v_xor_b32_e32 v10, v10, v13
-; CGP-NEXT:    v_add_i32_e32 v5, vcc, v5, v14
-; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v5, vcc, v5, v16
-; CGP-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
-; CGP-NEXT:    v_mul_lo_u32 v16, v15, v4
-; CGP-NEXT:    v_add_i32_e32 v5, vcc, v14, v5
-; CGP-NEXT:    v_mul_hi_u32 v14, v12, v4
-; CGP-NEXT:    v_add_i32_e32 v3, vcc, v16, v3
-; CGP-NEXT:    v_cndmask_b32_e64 v16, 0, 1, vcc
 ; CGP-NEXT:    v_add_i32_e32 v3, vcc, v3, v14
-; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v14, vcc, v16, v14
-; CGP-NEXT:    v_mul_hi_u32 v4, v15, v4
-; CGP-NEXT:    v_add_i32_e32 v3, vcc, v3, v5
-; CGP-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v5, vcc, v14, v5
-; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v5
-; CGP-NEXT:    v_add_i32_e32 v3, vcc, v12, v3
-; CGP-NEXT:    v_addc_u32_e32 v4, vcc, v15, v4, vcc
-; CGP-NEXT:    v_mul_lo_u32 v5, v10, v3
-; CGP-NEXT:    v_mul_lo_u32 v12, v11, v4
-; CGP-NEXT:    v_mul_hi_u32 v14, v11, v3
-; CGP-NEXT:    v_mul_hi_u32 v3, v10, v3
-; CGP-NEXT:    v_mul_hi_u32 v15, v10, v4
-; CGP-NEXT:    v_add_i32_e32 v5, vcc, v5, v12
-; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v5, vcc, v5, v14
-; CGP-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
-; CGP-NEXT:    v_mul_lo_u32 v14, v10, v4
-; CGP-NEXT:    v_add_i32_e32 v5, vcc, v12, v5
-; CGP-NEXT:    v_mul_hi_u32 v12, v11, v4
-; CGP-NEXT:    v_add_i32_e32 v3, vcc, v14, v3
-; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
+; CGP-NEXT:    v_addc_u32_e32 v3, vcc, v3, v16, vcc
+; CGP-NEXT:    v_mul_lo_u32 v3, v15, v2
+; CGP-NEXT:    v_mul_hi_u32 v16, v12, v2
+; CGP-NEXT:    v_addc_u32_e64 v14, s[4:5], 0, 0, vcc
+; CGP-NEXT:    v_add_i32_e32 v1, vcc, v3, v1
+; CGP-NEXT:    v_mul_hi_u32 v2, v15, v2
+; CGP-NEXT:    v_addc_u32_e64 v1, s[4:5], v1, v16, vcc
+; CGP-NEXT:    v_addc_u32_e64 v3, s[4:5], 0, 0, s[4:5]
+; CGP-NEXT:    v_addc_u32_e32 v1, vcc, v1, v14, vcc
+; CGP-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
+; CGP-NEXT:    v_add_i32_e32 v2, vcc, v2, v3
+; CGP-NEXT:    v_add_i32_e32 v1, vcc, v12, v1
+; CGP-NEXT:    v_addc_u32_e32 v2, vcc, v15, v2, vcc
+; CGP-NEXT:    v_mul_lo_u32 v3, v10, v1
+; CGP-NEXT:    v_mul_lo_u32 v12, v11, v2
+; CGP-NEXT:    v_mul_hi_u32 v14, v11, v1
+; CGP-NEXT:    v_mul_hi_u32 v1, v10, v1
 ; CGP-NEXT:    v_add_i32_e32 v3, vcc, v3, v12
-; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v12, vcc, v14, v12
-; CGP-NEXT:    v_add_i32_e32 v14, vcc, v3, v5
-; CGP-NEXT:    v_mad_u64_u32 v[3:4], s[4:5], v2, v14, 0
-; CGP-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v5, vcc, v12, v5
-; CGP-NEXT:    v_add_i32_e32 v12, vcc, v15, v5
-; CGP-NEXT:    v_mad_u64_u32 v[4:5], s[4:5], v2, v12, v[4:5]
-; CGP-NEXT:    v_sub_i32_e32 v3, vcc, v11, v3
-; CGP-NEXT:    v_mad_u64_u32 v[4:5], s[4:5], v1, v14, v[4:5]
-; CGP-NEXT:    v_subb_u32_e64 v5, s[4:5], v10, v4, vcc
-; CGP-NEXT:    v_sub_i32_e64 v4, s[4:5], v10, v4
-; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v5, v1
-; CGP-NEXT:    v_subb_u32_e32 v4, vcc, v4, v1, vcc
+; CGP-NEXT:    v_addc_u32_e32 v3, vcc, v3, v14, vcc
+; CGP-NEXT:    v_mul_lo_u32 v3, v10, v2
+; CGP-NEXT:    v_mul_hi_u32 v14, v11, v2
+; CGP-NEXT:    v_addc_u32_e64 v12, s[4:5], 0, 0, vcc
+; CGP-NEXT:    v_add_i32_e32 v1, vcc, v3, v1
+; CGP-NEXT:    v_addc_u32_e64 v1, s[4:5], v1, v14, vcc
+; CGP-NEXT:    v_addc_u32_e32 v12, vcc, v1, v12, vcc
+; CGP-NEXT:    v_mul_hi_u32 v14, v10, v2
+; CGP-NEXT:    v_addc_u32_e64 v3, s[4:5], 0, 0, s[4:5]
+; CGP-NEXT:    v_mad_u64_u32 v[1:2], s[4:5], v4, v12, 0
+; CGP-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
+; CGP-NEXT:    v_add_i32_e32 v14, vcc, v14, v3
+; CGP-NEXT:    v_mad_u64_u32 v[2:3], s[4:5], v4, v14, v[2:3]
+; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v11, v1
+; CGP-NEXT:    v_mad_u64_u32 v[2:3], s[4:5], v5, v12, v[2:3]
+; CGP-NEXT:    v_subb_u32_e64 v3, s[4:5], v10, v2, vcc
+; CGP-NEXT:    v_sub_i32_e64 v2, s[4:5], v10, v2
+; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v3, v5
+; CGP-NEXT:    v_subb_u32_e32 v2, vcc, v2, v5, vcc
 ; CGP-NEXT:    v_cndmask_b32_e64 v10, 0, -1, s[4:5]
-; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v3, v2
-; CGP-NEXT:    v_sub_i32_e32 v3, vcc, v3, v2
+; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v1, v4
+; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v1, v4
 ; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, -1, s[4:5]
-; CGP-NEXT:    v_cmp_eq_u32_e64 s[4:5], v5, v1
-; CGP-NEXT:    v_subbrev_u32_e32 v4, vcc, 0, v4, vcc
-; CGP-NEXT:    v_cndmask_b32_e64 v5, v10, v11, s[4:5]
-; CGP-NEXT:    v_add_i32_e32 v10, vcc, 1, v14
-; CGP-NEXT:    v_addc_u32_e32 v11, vcc, 0, v12, vcc
-; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v4, v1
+; CGP-NEXT:    v_cmp_eq_u32_e64 s[4:5], v3, v5
+; CGP-NEXT:    v_subbrev_u32_e32 v2, vcc, 0, v2, vcc
+; CGP-NEXT:    v_cndmask_b32_e64 v3, v10, v11, s[4:5]
+; CGP-NEXT:    v_add_i32_e32 v10, vcc, 1, v12
+; CGP-NEXT:    v_addc_u32_e32 v11, vcc, 0, v14, vcc
+; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v2, v5
 ; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, -1, vcc
-; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v3, v2
-; CGP-NEXT:    v_cndmask_b32_e64 v2, 0, -1, vcc
-; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, v4, v1
-; CGP-NEXT:    v_cndmask_b32_e32 v1, v15, v2, vcc
+; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v4
+; CGP-NEXT:    v_cndmask_b32_e64 v1, 0, -1, vcc
+; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v5
+; CGP-NEXT:    v_cndmask_b32_e32 v1, v15, v1, vcc
 ; CGP-NEXT:    v_add_i32_e32 v2, vcc, 1, v10
-; CGP-NEXT:    v_addc_u32_e32 v3, vcc, 0, v11, vcc
+; CGP-NEXT:    v_addc_u32_e32 v4, vcc, 0, v11, vcc
 ; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v1
 ; CGP-NEXT:    v_cndmask_b32_e32 v1, v10, v2, vcc
-; CGP-NEXT:    v_cndmask_b32_e32 v2, v11, v3, vcc
-; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v5
-; CGP-NEXT:    v_cndmask_b32_e32 v1, v14, v1, vcc
+; CGP-NEXT:    v_cndmask_b32_e32 v2, v11, v4, vcc
+; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v3
+; CGP-NEXT:    v_cndmask_b32_e32 v1, v12, v1, vcc
 ; CGP-NEXT:    v_xor_b32_e32 v3, v13, v0
-; CGP-NEXT:    v_cndmask_b32_e32 v2, v12, v2, vcc
+; CGP-NEXT:    v_cndmask_b32_e32 v2, v14, v2, vcc
 ; CGP-NEXT:    v_xor_b32_e32 v0, v1, v3
 ; CGP-NEXT:    v_xor_b32_e32 v1, v2, v3
 ; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v0, v3
@@ -835,131 +758,116 @@ define <2 x i64> @v_sdiv_v2i64(<2 x i64> %num, <2 x i64> %den) {
 ; CGP-NEXT:  .LBB2_7:
 ; CGP-NEXT:    v_ashrrev_i32_e32 v2, 31, v7
 ; CGP-NEXT:    v_add_i32_e32 v3, vcc, v6, v2
-; CGP-NEXT:    v_addc_u32_e32 v5, vcc, v7, v2, vcc
-; CGP-NEXT:    v_xor_b32_e32 v4, v3, v2
-; CGP-NEXT:    v_xor_b32_e32 v3, v5, v2
-; CGP-NEXT:    v_cvt_f32_u32_e32 v5, v4
-; CGP-NEXT:    v_cvt_f32_u32_e32 v6, v3
-; CGP-NEXT:    v_sub_i32_e32 v11, vcc, 0, v4
-; CGP-NEXT:    v_subb_u32_e32 v12, vcc, 0, v3, vcc
-; CGP-NEXT:    v_mac_f32_e32 v5, 0x4f800000, v6
-; CGP-NEXT:    v_rcp_iflag_f32_e32 v5, v5
-; CGP-NEXT:    v_mul_f32_e32 v5, 0x5f7ffffc, v5
-; CGP-NEXT:    v_mul_f32_e32 v6, 0x2f800000, v5
-; CGP-NEXT:    v_trunc_f32_e32 v7, v6
-; CGP-NEXT:    v_mac_f32_e32 v5, 0xcf800000, v7
-; CGP-NEXT:    v_cvt_u32_f32_e32 v10, v5
-; CGP-NEXT:    v_cvt_u32_f32_e32 v13, v7
-; CGP-NEXT:    v_mad_u64_u32 v[5:6], s[4:5], v11, v10, 0
-; CGP-NEXT:    v_mad_u64_u32 v[6:7], s[4:5], v11, v13, v[6:7]
-; CGP-NEXT:    v_mul_hi_u32 v14, v10, v5
-; CGP-NEXT:    v_mad_u64_u32 v[6:7], s[4:5], v12, v10, v[6:7]
-; CGP-NEXT:    v_mul_lo_u32 v7, v13, v5
-; CGP-NEXT:    v_mul_hi_u32 v5, v13, v5
-; CGP-NEXT:    v_mul_lo_u32 v15, v10, v6
-; CGP-NEXT:    v_mul_lo_u32 v16, v13, v6
-; CGP-NEXT:    v_add_i32_e32 v7, vcc, v7, v15
-; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v7, vcc, v7, v14
-; CGP-NEXT:    v_mul_hi_u32 v14, v10, v6
-; CGP-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v7, vcc, v15, v7
-; CGP-NEXT:    v_add_i32_e32 v5, vcc, v16, v5
-; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v5, vcc, v5, v14
-; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v14, vcc, v15, v14
-; CGP-NEXT:    v_mul_hi_u32 v6, v13, v6
-; CGP-NEXT:    v_add_i32_e32 v5, vcc, v5, v7
-; CGP-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v7, vcc, v14, v7
-; CGP-NEXT:    v_add_i32_e32 v6, vcc, v6, v7
-; CGP-NEXT:    v_add_i32_e32 v10, vcc, v10, v5
-; CGP-NEXT:    v_addc_u32_e32 v13, vcc, v13, v6, vcc
-; CGP-NEXT:    v_mad_u64_u32 v[5:6], s[4:5], v11, v10, 0
-; CGP-NEXT:    v_mad_u64_u32 v[6:7], s[4:5], v11, v13, v[6:7]
+; CGP-NEXT:    v_addc_u32_e32 v4, vcc, v7, v2, vcc
+; CGP-NEXT:    v_xor_b32_e32 v6, v3, v2
+; CGP-NEXT:    v_xor_b32_e32 v7, v4, v2
+; CGP-NEXT:    v_cvt_f32_u32_e32 v3, v6
+; CGP-NEXT:    v_cvt_f32_u32_e32 v4, v7
+; CGP-NEXT:    v_sub_i32_e32 v11, vcc, 0, v6
+; CGP-NEXT:    v_subb_u32_e32 v12, vcc, 0, v7, vcc
+; CGP-NEXT:    v_mac_f32_e32 v3, 0x4f800000, v4
+; CGP-NEXT:    v_rcp_iflag_f32_e32 v3, v3
+; CGP-NEXT:    v_mul_f32_e32 v3, 0x5f7ffffc, v3
+; CGP-NEXT:    v_mul_f32_e32 v4, 0x2f800000, v3
+; CGP-NEXT:    v_trunc_f32_e32 v5, v4
+; CGP-NEXT:    v_mac_f32_e32 v3, 0xcf800000, v5
+; CGP-NEXT:    v_cvt_u32_f32_e32 v10, v3
+; CGP-NEXT:    v_cvt_u32_f32_e32 v13, v5
+; CGP-NEXT:    v_mad_u64_u32 v[3:4], s[4:5], v11, v10, 0
+; CGP-NEXT:    v_mad_u64_u32 v[4:5], s[4:5], v11, v13, v[4:5]
+; CGP-NEXT:    v_mul_hi_u32 v14, v10, v3
+; CGP-NEXT:    v_mad_u64_u32 v[4:5], s[4:5], v12, v10, v[4:5]
+; CGP-NEXT:    v_mul_lo_u32 v5, v13, v3
+; CGP-NEXT:    v_mul_hi_u32 v3, v13, v3
+; CGP-NEXT:    v_mul_lo_u32 v15, v10, v4
+; CGP-NEXT:    v_mul_lo_u32 v16, v13, v4
+; CGP-NEXT:    v_add_i32_e32 v5, vcc, v5, v15
+; CGP-NEXT:    v_addc_u32_e32 v5, vcc, v5, v14, vcc
+; CGP-NEXT:    v_mul_hi_u32 v14, v10, v4
+; CGP-NEXT:    v_addc_u32_e64 v5, s[4:5], 0, 0, vcc
+; CGP-NEXT:    v_add_i32_e32 v3, vcc, v16, v3
+; CGP-NEXT:    v_mul_hi_u32 v4, v13, v4
+; CGP-NEXT:    v_addc_u32_e64 v3, s[4:5], v3, v14, vcc
+; CGP-NEXT:    v_addc_u32_e64 v14, s[4:5], 0, 0, s[4:5]
+; CGP-NEXT:    v_addc_u32_e32 v3, vcc, v3, v5, vcc
+; CGP-NEXT:    v_addc_u32_e32 v5, vcc, 0, v14, vcc
+; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v5
+; CGP-NEXT:    v_add_i32_e32 v10, vcc, v10, v3
+; CGP-NEXT:    v_addc_u32_e32 v13, vcc, v13, v4, vcc
+; CGP-NEXT:    v_mad_u64_u32 v[3:4], s[4:5], v11, v10, 0
+; CGP-NEXT:    v_mad_u64_u32 v[4:5], s[4:5], v11, v13, v[4:5]
 ; CGP-NEXT:    v_ashrrev_i32_e32 v11, 31, v9
-; CGP-NEXT:    v_mul_hi_u32 v14, v10, v5
-; CGP-NEXT:    v_mad_u64_u32 v[6:7], s[4:5], v12, v10, v[6:7]
-; CGP-NEXT:    v_add_i32_e32 v7, vcc, v8, v11
+; CGP-NEXT:    v_mul_hi_u32 v14, v10, v3
+; CGP-NEXT:    v_mad_u64_u32 v[4:5], s[4:5], v12, v10, v[4:5]
+; CGP-NEXT:    v_add_i32_e32 v5, vcc, v8, v11
 ; CGP-NEXT:    v_addc_u32_e32 v8, vcc, v9, v11, vcc
-; CGP-NEXT:    v_xor_b32_e32 v9, v7, v11
-; CGP-NEXT:    v_mul_lo_u32 v7, v13, v5
-; CGP-NEXT:    v_mul_lo_u32 v12, v10, v6
-; CGP-NEXT:    v_mul_hi_u32 v5, v13, v5
+; CGP-NEXT:    v_xor_b32_e32 v9, v5, v11
+; CGP-NEXT:    v_mul_lo_u32 v5, v13, v3
+; CGP-NEXT:    v_mul_lo_u32 v12, v10, v4
+; CGP-NEXT:    v_mul_hi_u32 v3, v13, v3
 ; CGP-NEXT:    v_xor_b32_e32 v8, v8, v11
-; CGP-NEXT:    v_add_i32_e32 v7, vcc, v7, v12
-; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v7, vcc, v7, v14
-; CGP-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
-; CGP-NEXT:    v_mul_lo_u32 v14, v13, v6
-; CGP-NEXT:    v_add_i32_e32 v7, vcc, v12, v7
-; CGP-NEXT:    v_mul_hi_u32 v12, v10, v6
-; CGP-NEXT:    v_add_i32_e32 v5, vcc, v14, v5
-; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
 ; CGP-NEXT:    v_add_i32_e32 v5, vcc, v5, v12
-; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v12, vcc, v14, v12
-; CGP-NEXT:    v_mul_hi_u32 v6, v13, v6
-; CGP-NEXT:    v_add_i32_e32 v5, vcc, v5, v7
-; CGP-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v7, vcc, v12, v7
-; CGP-NEXT:    v_add_i32_e32 v6, vcc, v6, v7
-; CGP-NEXT:    v_add_i32_e32 v5, vcc, v10, v5
-; CGP-NEXT:    v_addc_u32_e32 v6, vcc, v13, v6, vcc
-; CGP-NEXT:    v_mul_lo_u32 v7, v8, v5
-; CGP-NEXT:    v_mul_lo_u32 v10, v9, v6
-; CGP-NEXT:    v_mul_hi_u32 v12, v9, v5
-; CGP-NEXT:    v_mul_hi_u32 v5, v8, v5
-; CGP-NEXT:    v_mul_hi_u32 v13, v8, v6
-; CGP-NEXT:    v_add_i32_e32 v7, vcc, v7, v10
-; CGP-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v7, vcc, v7, v12
-; CGP-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
-; CGP-NEXT:    v_mul_lo_u32 v12, v8, v6
-; CGP-NEXT:    v_add_i32_e32 v7, vcc, v10, v7
-; CGP-NEXT:    v_mul_hi_u32 v10, v9, v6
-; CGP-NEXT:    v_add_i32_e32 v5, vcc, v12, v5
-; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
+; CGP-NEXT:    v_addc_u32_e32 v5, vcc, v5, v14, vcc
+; CGP-NEXT:    v_mul_lo_u32 v5, v13, v4
+; CGP-NEXT:    v_mul_hi_u32 v14, v10, v4
+; CGP-NEXT:    v_addc_u32_e64 v12, s[4:5], 0, 0, vcc
+; CGP-NEXT:    v_add_i32_e32 v3, vcc, v5, v3
+; CGP-NEXT:    v_mul_hi_u32 v4, v13, v4
+; CGP-NEXT:    v_addc_u32_e64 v3, s[4:5], v3, v14, vcc
+; CGP-NEXT:    v_addc_u32_e64 v5, s[4:5], 0, 0, s[4:5]
+; CGP-NEXT:    v_addc_u32_e32 v3, vcc, v3, v12, vcc
+; CGP-NEXT:    v_addc_u32_e32 v5, vcc, 0, v5, vcc
+; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v5
+; CGP-NEXT:    v_add_i32_e32 v3, vcc, v10, v3
+; CGP-NEXT:    v_addc_u32_e32 v4, vcc, v13, v4, vcc
+; CGP-NEXT:    v_mul_lo_u32 v5, v8, v3
+; CGP-NEXT:    v_mul_lo_u32 v10, v9, v4
+; CGP-NEXT:    v_mul_hi_u32 v12, v9, v3
+; CGP-NEXT:    v_mul_hi_u32 v3, v8, v3
 ; CGP-NEXT:    v_add_i32_e32 v5, vcc, v5, v10
-; CGP-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v10, vcc, v12, v10
-; CGP-NEXT:    v_add_i32_e32 v12, vcc, v5, v7
-; CGP-NEXT:    v_mad_u64_u32 v[5:6], s[4:5], v4, v12, 0
-; CGP-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v7, vcc, v10, v7
-; CGP-NEXT:    v_add_i32_e32 v10, vcc, v13, v7
-; CGP-NEXT:    v_mad_u64_u32 v[6:7], s[4:5], v4, v10, v[6:7]
-; CGP-NEXT:    v_sub_i32_e32 v5, vcc, v9, v5
-; CGP-NEXT:    v_mad_u64_u32 v[6:7], s[4:5], v3, v12, v[6:7]
-; CGP-NEXT:    v_subb_u32_e64 v7, s[4:5], v8, v6, vcc
-; CGP-NEXT:    v_sub_i32_e64 v6, s[4:5], v8, v6
-; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v7, v3
-; CGP-NEXT:    v_subb_u32_e32 v6, vcc, v6, v3, vcc
+; CGP-NEXT:    v_addc_u32_e32 v5, vcc, v5, v12, vcc
+; CGP-NEXT:    v_mul_lo_u32 v5, v8, v4
+; CGP-NEXT:    v_mul_hi_u32 v12, v9, v4
+; CGP-NEXT:    v_addc_u32_e64 v10, s[4:5], 0, 0, vcc
+; CGP-NEXT:    v_add_i32_e32 v3, vcc, v5, v3
+; CGP-NEXT:    v_addc_u32_e64 v3, s[4:5], v3, v12, vcc
+; CGP-NEXT:    v_addc_u32_e32 v10, vcc, v3, v10, vcc
+; CGP-NEXT:    v_mul_hi_u32 v12, v8, v4
+; CGP-NEXT:    v_addc_u32_e64 v5, s[4:5], 0, 0, s[4:5]
+; CGP-NEXT:    v_mad_u64_u32 v[3:4], s[4:5], v6, v10, 0
+; CGP-NEXT:    v_addc_u32_e32 v5, vcc, 0, v5, vcc
+; CGP-NEXT:    v_add_i32_e32 v12, vcc, v12, v5
+; CGP-NEXT:    v_mad_u64_u32 v[4:5], s[4:5], v6, v12, v[4:5]
+; CGP-NEXT:    v_sub_i32_e32 v3, vcc, v9, v3
+; CGP-NEXT:    v_mad_u64_u32 v[4:5], s[4:5], v7, v10, v[4:5]
+; CGP-NEXT:    v_subb_u32_e64 v5, s[4:5], v8, v4, vcc
+; CGP-NEXT:    v_sub_i32_e64 v4, s[4:5], v8, v4
+; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v5, v7
+; CGP-NEXT:    v_subb_u32_e32 v4, vcc, v4, v7, vcc
 ; CGP-NEXT:    v_cndmask_b32_e64 v8, 0, -1, s[4:5]
-; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v5, v4
-; CGP-NEXT:    v_sub_i32_e32 v5, vcc, v5, v4
+; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v3, v6
+; CGP-NEXT:    v_sub_i32_e32 v3, vcc, v3, v6
 ; CGP-NEXT:    v_cndmask_b32_e64 v9, 0, -1, s[4:5]
-; CGP-NEXT:    v_cmp_eq_u32_e64 s[4:5], v7, v3
-; CGP-NEXT:    v_subbrev_u32_e32 v6, vcc, 0, v6, vcc
-; CGP-NEXT:    v_cndmask_b32_e64 v7, v8, v9, s[4:5]
-; CGP-NEXT:    v_add_i32_e32 v8, vcc, 1, v12
-; CGP-NEXT:    v_addc_u32_e32 v9, vcc, 0, v10, vcc
-; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v6, v3
+; CGP-NEXT:    v_cmp_eq_u32_e64 s[4:5], v5, v7
+; CGP-NEXT:    v_subbrev_u32_e32 v4, vcc, 0, v4, vcc
+; CGP-NEXT:    v_cndmask_b32_e64 v5, v8, v9, s[4:5]
+; CGP-NEXT:    v_add_i32_e32 v8, vcc, 1, v10
+; CGP-NEXT:    v_addc_u32_e32 v9, vcc, 0, v12, vcc
+; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v4, v7
 ; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, -1, vcc
-; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v5, v4
-; CGP-NEXT:    v_cndmask_b32_e64 v4, 0, -1, vcc
-; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, v6, v3
-; CGP-NEXT:    v_cndmask_b32_e32 v3, v13, v4, vcc
+; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v3, v6
+; CGP-NEXT:    v_cndmask_b32_e64 v3, 0, -1, vcc
+; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, v4, v7
+; CGP-NEXT:    v_cndmask_b32_e32 v3, v13, v3, vcc
 ; CGP-NEXT:    v_add_i32_e32 v4, vcc, 1, v8
-; CGP-NEXT:    v_addc_u32_e32 v5, vcc, 0, v9, vcc
+; CGP-NEXT:    v_addc_u32_e32 v6, vcc, 0, v9, vcc
 ; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v3
 ; CGP-NEXT:    v_cndmask_b32_e32 v3, v8, v4, vcc
-; CGP-NEXT:    v_cndmask_b32_e32 v4, v9, v5, vcc
-; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v7
-; CGP-NEXT:    v_cndmask_b32_e32 v3, v12, v3, vcc
+; CGP-NEXT:    v_cndmask_b32_e32 v4, v9, v6, vcc
+; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v5
+; CGP-NEXT:    v_cndmask_b32_e32 v3, v10, v3, vcc
 ; CGP-NEXT:    v_xor_b32_e32 v5, v11, v2
-; CGP-NEXT:    v_cndmask_b32_e32 v4, v10, v4, vcc
+; CGP-NEXT:    v_cndmask_b32_e32 v4, v12, v4, vcc
 ; CGP-NEXT:    v_xor_b32_e32 v2, v3, v5
 ; CGP-NEXT:    v_xor_b32_e32 v3, v4, v5
 ; CGP-NEXT:    v_sub_i32_e32 v2, vcc, v2, v5
@@ -1069,18 +977,13 @@ define i64 @v_sdiv_i64_oddk_denom(i64 %num) {
 ; CHECK-NEXT:    v_mul_hi_u32 v11, v5, v3
 ; CHECK-NEXT:    v_mul_hi_u32 v3, v7, v3
 ; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v4, v9
-; CHECK-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v10, v2
-; CHECK-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v4, v8
-; CHECK-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v9, v4
-; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v11
-; CHECK-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v8, vcc, v10, v8
-; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v4
-; CHECK-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v8, v4
+; CHECK-NEXT:    v_addc_u32_e32 v4, vcc, v4, v8, vcc
+; CHECK-NEXT:    v_add_i32_e64 v2, s[4:5], v10, v2
+; CHECK-NEXT:    v_addc_u32_e64 v2, s[6:7], v2, v11, s[4:5]
+; CHECK-NEXT:    v_addc_u32_e64 v4, s[8:9], 0, 0, vcc
+; CHECK-NEXT:    v_addc_u32_e64 v8, s[6:7], 0, 0, s[6:7]
+; CHECK-NEXT:    v_addc_u32_e64 v2, vcc, v2, v4, s[4:5]
+; CHECK-NEXT:    v_addc_u32_e32 v4, vcc, 0, v8, vcc
 ; CHECK-NEXT:    v_add_i32_e32 v3, vcc, v3, v4
 ; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v5, v2
 ; CHECK-NEXT:    v_addc_u32_e32 v7, vcc, v7, v3, vcc
@@ -1095,23 +998,18 @@ define i64 @v_sdiv_i64_oddk_denom(i64 %num) {
 ; CHECK-NEXT:    v_mul_lo_u32 v8, v5, v3
 ; CHECK-NEXT:    v_xor_b32_e32 v9, v1, v6
 ; CHECK-NEXT:    v_mul_hi_u32 v1, v5, v2
-; CHECK-NEXT:    v_mul_hi_u32 v2, v7, v2
 ; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v8
-; CHECK-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v1
-; CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
-; CHECK-NEXT:    v_mul_lo_u32 v1, v7, v3
-; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v8, v0
+; CHECK-NEXT:    v_addc_u32_e32 v0, vcc, v0, v1, vcc
+; CHECK-NEXT:    v_mul_lo_u32 v0, v7, v3
+; CHECK-NEXT:    v_mul_hi_u32 v1, v7, v2
 ; CHECK-NEXT:    v_mul_hi_u32 v8, v5, v3
-; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v1, v2
-; CHECK-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v1, v8
-; CHECK-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v8
+; CHECK-NEXT:    v_addc_u32_e64 v2, s[4:5], 0, 0, vcc
+; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v1
 ; CHECK-NEXT:    v_mul_hi_u32 v3, v7, v3
-; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v1, v0
-; CHECK-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v2, v1
+; CHECK-NEXT:    v_addc_u32_e64 v0, s[4:5], v0, v8, vcc
+; CHECK-NEXT:    v_addc_u32_e64 v1, s[4:5], 0, 0, s[4:5]
+; CHECK-NEXT:    v_addc_u32_e32 v0, vcc, v0, v2, vcc
+; CHECK-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
 ; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v3, v1
 ; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v5, v0
 ; CHECK-NEXT:    v_addc_u32_e32 v1, vcc, v7, v1, vcc
@@ -1121,24 +1019,19 @@ define i64 @v_sdiv_i64_oddk_denom(i64 %num) {
 ; CHECK-NEXT:    v_mul_hi_u32 v0, v9, v0
 ; CHECK-NEXT:    v_mov_b32_e32 v5, 0x12d8fb
 ; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v3
-; CHECK-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v7
-; CHECK-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; CHECK-NEXT:    v_mul_lo_u32 v7, v9, v1
-; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v3, v2
-; CHECK-NEXT:    v_mul_hi_u32 v3, v4, v1
-; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v7, v0
-; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v3
-; CHECK-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v3, vcc, v7, v3
-; CHECK-NEXT:    v_add_i32_e32 v7, vcc, v0, v2
-; CHECK-NEXT:    v_mul_hi_u32 v8, v9, v1
-; CHECK-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v5, v7, 0
-; CHECK-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v3, v2
-; CHECK-NEXT:    v_add_i32_e32 v3, vcc, v8, v2
-; CHECK-NEXT:    v_mad_u64_u32 v[1:2], s[4:5], v5, v3, v[1:2]
+; CHECK-NEXT:    v_addc_u32_e32 v2, vcc, v2, v7, vcc
+; CHECK-NEXT:    v_mul_lo_u32 v2, v9, v1
+; CHECK-NEXT:    v_mul_hi_u32 v7, v4, v1
+; CHECK-NEXT:    v_addc_u32_e64 v3, s[4:5], 0, 0, vcc
+; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v2, v0
+; CHECK-NEXT:    v_addc_u32_e64 v0, s[4:5], v0, v7, vcc
+; CHECK-NEXT:    v_addc_u32_e32 v3, vcc, v0, v3, vcc
+; CHECK-NEXT:    v_mul_hi_u32 v7, v9, v1
+; CHECK-NEXT:    v_addc_u32_e64 v2, s[4:5], 0, 0, s[4:5]
+; CHECK-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v5, v3, 0
+; CHECK-NEXT:    v_addc_u32_e32 v2, vcc, 0, v2, vcc
+; CHECK-NEXT:    v_add_i32_e32 v7, vcc, v7, v2
+; CHECK-NEXT:    v_mad_u64_u32 v[1:2], s[4:5], v5, v7, v[1:2]
 ; CHECK-NEXT:    v_sub_i32_e32 v0, vcc, v4, v0
 ; CHECK-NEXT:    v_subb_u32_e64 v2, s[4:5], v9, v1, vcc
 ; CHECK-NEXT:    v_sub_i32_e64 v1, s[4:5], v9, v1
@@ -1149,8 +1042,8 @@ define i64 @v_sdiv_i64_oddk_denom(i64 %num) {
 ; CHECK-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v2
 ; CHECK-NEXT:    v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
 ; CHECK-NEXT:    v_cndmask_b32_e64 v2, -1, v4, s[4:5]
-; CHECK-NEXT:    v_add_i32_e32 v4, vcc, 1, v7
-; CHECK-NEXT:    v_addc_u32_e32 v8, vcc, 0, v3, vcc
+; CHECK-NEXT:    v_add_i32_e32 v4, vcc, 1, v3
+; CHECK-NEXT:    v_addc_u32_e32 v8, vcc, 0, v7, vcc
 ; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v5
 ; CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, -1, vcc
 ; CHECK-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v1
@@ -1161,8 +1054,8 @@ define i64 @v_sdiv_i64_oddk_denom(i64 %num) {
 ; CHECK-NEXT:    v_cndmask_b32_e32 v0, v4, v1, vcc
 ; CHECK-NEXT:    v_cndmask_b32_e32 v1, v8, v5, vcc
 ; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v2
-; CHECK-NEXT:    v_cndmask_b32_e32 v0, v7, v0, vcc
-; CHECK-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
+; CHECK-NEXT:    v_cndmask_b32_e32 v0, v3, v0, vcc
+; CHECK-NEXT:    v_cndmask_b32_e32 v1, v7, v1, vcc
 ; CHECK-NEXT:    v_xor_b32_e32 v0, v0, v6
 ; CHECK-NEXT:    v_xor_b32_e32 v1, v1, v6
 ; CHECK-NEXT:    v_sub_i32_e32 v0, vcc, v0, v6
@@ -1182,6 +1075,8 @@ define <2 x i64> @v_sdiv_v2i64_oddk_denom(<2 x i64> %num) {
 ; GISEL-NEXT:    s_subb_u32 s7, 0, 0
 ; GISEL-NEXT:    v_mac_f32_e32 v4, 0x4f800000, v5
 ; GISEL-NEXT:    v_rcp_iflag_f32_e32 v4, v4
+; GISEL-NEXT:    s_sub_u32 s8, 0, 0x12d8fb
+; GISEL-NEXT:    s_subb_u32 s9, 0, 0
 ; GISEL-NEXT:    v_mul_f32_e32 v4, 0x5f7ffffc, v4
 ; GISEL-NEXT:    v_mul_f32_e32 v5, 0x2f800000, v4
 ; GISEL-NEXT:    v_trunc_f32_e32 v7, v5
@@ -1199,18 +1094,13 @@ define <2 x i64> @v_sdiv_v2i64_oddk_denom(<2 x i64> %num) {
 ; GISEL-NEXT:    v_mul_hi_u32 v12, v6, v8
 ; GISEL-NEXT:    v_mul_hi_u32 v8, v7, v8
 ; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v10
-; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v9
-; GISEL-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v10, v5
+; GISEL-NEXT:    v_addc_u32_e32 v5, vcc, v5, v9, vcc
+; GISEL-NEXT:    v_addc_u32_e64 v5, s[4:5], 0, 0, vcc
 ; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v11, v4
-; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v12
-; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v10
-; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v5
-; GISEL-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v9, v5
+; GISEL-NEXT:    v_addc_u32_e64 v4, s[4:5], v4, v12, vcc
+; GISEL-NEXT:    v_addc_u32_e64 v9, s[4:5], 0, 0, s[4:5]
+; GISEL-NEXT:    v_addc_u32_e32 v4, vcc, v4, v5, vcc
+; GISEL-NEXT:    v_addc_u32_e32 v5, vcc, 0, v9, vcc
 ; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v8, v5
 ; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v6, v4
 ; GISEL-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], s6, v11, 0
@@ -1226,23 +1116,18 @@ define <2 x i64> @v_sdiv_v2i64_oddk_denom(<2 x i64> %num) {
 ; GISEL-NEXT:    v_mul_lo_u32 v12, v11, v9
 ; GISEL-NEXT:    v_xor_b32_e32 v13, v1, v4
 ; GISEL-NEXT:    v_mul_hi_u32 v1, v11, v8
-; GISEL-NEXT:    v_mul_hi_u32 v8, v5, v8
 ; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v12
-; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v1
-; GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
-; GISEL-NEXT:    v_mul_lo_u32 v1, v5, v9
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v12, v0
+; GISEL-NEXT:    v_addc_u32_e32 v0, vcc, v0, v1, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v0, v5, v9
+; GISEL-NEXT:    v_mul_hi_u32 v1, v5, v8
 ; GISEL-NEXT:    v_mul_hi_u32 v12, v11, v9
-; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v1, v8
-; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v1, v12
-; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v12
+; GISEL-NEXT:    v_addc_u32_e64 v8, s[4:5], 0, 0, vcc
+; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v1
 ; GISEL-NEXT:    v_mul_hi_u32 v9, v5, v9
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v1, v0
-; GISEL-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v8, v1
+; GISEL-NEXT:    v_addc_u32_e64 v0, s[4:5], v0, v12, vcc
+; GISEL-NEXT:    v_addc_u32_e64 v1, s[4:5], 0, 0, s[4:5]
+; GISEL-NEXT:    v_addc_u32_e32 v0, vcc, v0, v8, vcc
+; GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
 ; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v9, v1
 ; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v11, v0
 ; GISEL-NEXT:    v_addc_u32_e32 v1, vcc, v5, v1, vcc
@@ -1252,28 +1137,21 @@ define <2 x i64> @v_sdiv_v2i64_oddk_denom(<2 x i64> %num) {
 ; GISEL-NEXT:    v_mul_hi_u32 v0, v13, v0
 ; GISEL-NEXT:    v_mov_b32_e32 v5, 0x12d8fb
 ; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v9
-; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v11
-; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
-; GISEL-NEXT:    v_mul_lo_u32 v11, v13, v1
-; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v9, v8
-; GISEL-NEXT:    v_mul_hi_u32 v9, v10, v1
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v11, v0
-; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v9
-; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v11, v9
-; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v0, v8
-; GISEL-NEXT:    v_mul_hi_u32 v12, v13, v1
+; GISEL-NEXT:    v_addc_u32_e32 v8, vcc, v8, v11, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v8, v13, v1
+; GISEL-NEXT:    v_mul_hi_u32 v11, v10, v1
+; GISEL-NEXT:    v_addc_u32_e64 v9, s[4:5], 0, 0, vcc
+; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v8, v0
+; GISEL-NEXT:    v_addc_u32_e64 v0, s[4:5], v0, v11, vcc
+; GISEL-NEXT:    v_addc_u32_e32 v11, vcc, v0, v9, vcc
+; GISEL-NEXT:    v_mul_hi_u32 v9, v13, v1
+; GISEL-NEXT:    v_addc_u32_e64 v8, s[4:5], 0, 0, s[4:5]
 ; GISEL-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v5, v11, 0
-; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v9, v8
-; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v12, v8
+; GISEL-NEXT:    v_addc_u32_e32 v8, vcc, 0, v8, vcc
+; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v9, v8
 ; GISEL-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], v5, v12, v[1:2]
 ; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v10, v0
 ; GISEL-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], 0, v11, v[8:9]
-; GISEL-NEXT:    s_sub_u32 s6, 0, 0x12d8fb
-; GISEL-NEXT:    s_subb_u32 s7, 0, 0
 ; GISEL-NEXT:    v_subb_u32_e64 v1, s[4:5], v13, v8, vcc
 ; GISEL-NEXT:    v_sub_i32_e64 v8, s[4:5], v13, v8
 ; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v0, v5
@@ -1282,107 +1160,92 @@ define <2 x i64> @v_sdiv_v2i64_oddk_denom(<2 x i64> %num) {
 ; GISEL-NEXT:    v_subbrev_u32_e32 v1, vcc, 0, v8, vcc
 ; GISEL-NEXT:    v_sub_i32_e32 v8, vcc, v0, v5
 ; GISEL-NEXT:    v_cndmask_b32_e64 v10, -1, v9, s[4:5]
-; GISEL-NEXT:    v_subbrev_u32_e32 v9, vcc, 0, v1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v13, vcc, 1, v11
-; GISEL-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], s6, v6, 0
-; GISEL-NEXT:    v_addc_u32_e32 v14, vcc, 0, v12, vcc
+; GISEL-NEXT:    v_subbrev_u32_e32 v13, vcc, 0, v1, vcc
+; GISEL-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], s8, v6, 0
+; GISEL-NEXT:    v_add_i32_e32 v14, vcc, 1, v11
+; GISEL-NEXT:    v_addc_u32_e32 v15, vcc, 0, v12, vcc
 ; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v8, v5
-; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, -1, vcc
-; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v9
-; GISEL-NEXT:    v_cndmask_b32_e32 v15, -1, v8, vcc
-; GISEL-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], s6, v7, v[1:2]
-; GISEL-NEXT:    v_add_i32_e32 v1, vcc, 1, v13
-; GISEL-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], s7, v6, v[8:9]
-; GISEL-NEXT:    v_addc_u32_e32 v16, vcc, 0, v14, vcc
-; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v15
-; GISEL-NEXT:    v_cndmask_b32_e32 v9, v13, v1, vcc
-; GISEL-NEXT:    v_mul_lo_u32 v1, v7, v0
-; GISEL-NEXT:    v_mul_lo_u32 v13, v6, v8
-; GISEL-NEXT:    v_mul_hi_u32 v15, v6, v0
-; GISEL-NEXT:    v_cndmask_b32_e32 v14, v14, v16, vcc
+; GISEL-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], s8, v7, v[1:2]
+; GISEL-NEXT:    v_cndmask_b32_e64 v16, 0, -1, vcc
+; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v13
+; GISEL-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], s9, v6, v[8:9]
+; GISEL-NEXT:    v_cndmask_b32_e32 v1, -1, v16, vcc
+; GISEL-NEXT:    v_add_i32_e32 v9, vcc, 1, v14
+; GISEL-NEXT:    v_addc_u32_e32 v13, vcc, 0, v15, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v16, v7, v0
+; GISEL-NEXT:    v_mul_lo_u32 v17, v6, v8
+; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v1
+; GISEL-NEXT:    v_mul_hi_u32 v1, v6, v0
+; GISEL-NEXT:    v_cndmask_b32_e32 v9, v14, v9, vcc
+; GISEL-NEXT:    v_add_i32_e64 v14, s[4:5], v16, v17
+; GISEL-NEXT:    v_addc_u32_e64 v1, s[4:5], v14, v1, s[4:5]
+; GISEL-NEXT:    v_mul_lo_u32 v1, v7, v8
 ; GISEL-NEXT:    v_mul_hi_u32 v0, v7, v0
-; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v1, v13
-; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v1, v15
-; GISEL-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; GISEL-NEXT:    v_mul_lo_u32 v15, v7, v8
-; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v13, v1
-; GISEL-NEXT:    v_mul_hi_u32 v13, v6, v8
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v15, v0
-; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v13
-; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v15, v13
+; GISEL-NEXT:    v_mul_hi_u32 v16, v6, v8
+; GISEL-NEXT:    v_addc_u32_e64 v14, s[4:5], 0, 0, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v0, s[4:5], v1, v0
 ; GISEL-NEXT:    v_mul_hi_u32 v8, v7, v8
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v1
-; GISEL-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v13, v1
-; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v8, v1
-; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v6, v0
-; GISEL-NEXT:    v_addc_u32_e32 v13, vcc, v7, v1, vcc
-; GISEL-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], s6, v8, 0
+; GISEL-NEXT:    v_addc_u32_e64 v0, s[6:7], v0, v16, s[4:5]
+; GISEL-NEXT:    v_addc_u32_e64 v1, s[6:7], 0, 0, s[6:7]
+; GISEL-NEXT:    v_addc_u32_e64 v0, s[4:5], v0, v14, s[4:5]
+; GISEL-NEXT:    v_addc_u32_e64 v1, s[4:5], 0, v1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v1, s[4:5], v8, v1
+; GISEL-NEXT:    v_add_i32_e64 v8, s[4:5], v6, v0
+; GISEL-NEXT:    v_addc_u32_e64 v14, s[4:5], v7, v1, s[4:5]
+; GISEL-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], s8, v8, 0
+; GISEL-NEXT:    v_cndmask_b32_e32 v13, v15, v13, vcc
 ; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v10
+; GISEL-NEXT:    v_mad_u64_u32 v[6:7], s[4:5], s8, v14, v[1:2]
+; GISEL-NEXT:    v_ashrrev_i32_e32 v10, 31, v3
 ; GISEL-NEXT:    v_cndmask_b32_e32 v9, v11, v9, vcc
-; GISEL-NEXT:    v_mad_u64_u32 v[6:7], s[4:5], s6, v13, v[1:2]
-; GISEL-NEXT:    v_xor_b32_e32 v1, v9, v4
-; GISEL-NEXT:    v_ashrrev_i32_e32 v9, 31, v3
-; GISEL-NEXT:    v_mad_u64_u32 v[6:7], s[4:5], s7, v8, v[6:7]
-; GISEL-NEXT:    v_cndmask_b32_e32 v10, v12, v14, vcc
-; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v2, v9
-; GISEL-NEXT:    v_addc_u32_e32 v3, vcc, v3, v9, vcc
-; GISEL-NEXT:    v_xor_b32_e32 v11, v2, v9
-; GISEL-NEXT:    v_mul_lo_u32 v2, v13, v0
+; GISEL-NEXT:    v_mad_u64_u32 v[6:7], s[4:5], s9, v8, v[6:7]
+; GISEL-NEXT:    v_cndmask_b32_e32 v1, v12, v13, vcc
+; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v2, v10
+; GISEL-NEXT:    v_addc_u32_e32 v3, vcc, v3, v10, vcc
+; GISEL-NEXT:    v_xor_b32_e32 v11, v2, v10
+; GISEL-NEXT:    v_mul_lo_u32 v2, v14, v0
 ; GISEL-NEXT:    v_mul_lo_u32 v7, v8, v6
-; GISEL-NEXT:    v_xor_b32_e32 v12, v3, v9
+; GISEL-NEXT:    v_xor_b32_e32 v12, v3, v10
 ; GISEL-NEXT:    v_mul_hi_u32 v3, v8, v0
-; GISEL-NEXT:    v_mul_hi_u32 v0, v13, v0
+; GISEL-NEXT:    v_mul_hi_u32 v0, v14, v0
 ; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v2, v7
-; GISEL-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v2, v3
-; GISEL-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; GISEL-NEXT:    v_mul_lo_u32 v3, v13, v6
-; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v7, v2
+; GISEL-NEXT:    v_addc_u32_e32 v2, vcc, v2, v3, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v2, v14, v6
 ; GISEL-NEXT:    v_mul_hi_u32 v7, v8, v6
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v3, v0
-; GISEL-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v7
-; GISEL-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v3, v7
-; GISEL-NEXT:    v_mul_hi_u32 v6, v13, v6
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
-; GISEL-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v3, v2
+; GISEL-NEXT:    v_addc_u32_e64 v3, s[4:5], 0, 0, vcc
+; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v2, v0
+; GISEL-NEXT:    v_mul_hi_u32 v6, v14, v6
+; GISEL-NEXT:    v_addc_u32_e64 v0, s[4:5], v0, v7, vcc
+; GISEL-NEXT:    v_addc_u32_e64 v2, s[4:5], 0, 0, s[4:5]
+; GISEL-NEXT:    v_addc_u32_e32 v0, vcc, v0, v3, vcc
+; GISEL-NEXT:    v_addc_u32_e32 v2, vcc, 0, v2, vcc
 ; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v6, v2
 ; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v8, v0
-; GISEL-NEXT:    v_addc_u32_e32 v2, vcc, v13, v2, vcc
+; GISEL-NEXT:    v_addc_u32_e32 v2, vcc, v14, v2, vcc
 ; GISEL-NEXT:    v_mul_lo_u32 v3, v12, v0
 ; GISEL-NEXT:    v_mul_lo_u32 v6, v11, v2
 ; GISEL-NEXT:    v_mul_hi_u32 v7, v11, v0
 ; GISEL-NEXT:    v_mul_hi_u32 v0, v12, v0
-; GISEL-NEXT:    v_xor_b32_e32 v8, v10, v4
+; GISEL-NEXT:    v_xor_b32_e32 v8, v9, v4
 ; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v3, v6
-; GISEL-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v3, v7
-; GISEL-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc
-; GISEL-NEXT:    v_mul_lo_u32 v7, v12, v2
-; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v6, v3
-; GISEL-NEXT:    v_mul_hi_u32 v6, v11, v2
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v7, v0
-; GISEL-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v6
-; GISEL-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v7, v6
-; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v0, v3
-; GISEL-NEXT:    v_mul_hi_u32 v7, v12, v2
-; GISEL-NEXT:    v_mad_u64_u32 v[2:3], s[4:5], v5, v10, 0
-; GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v6, v0
-; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v7, v0
+; GISEL-NEXT:    v_addc_u32_e32 v3, vcc, v3, v7, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v3, v12, v2
+; GISEL-NEXT:    v_mul_hi_u32 v7, v11, v2
+; GISEL-NEXT:    v_addc_u32_e64 v6, s[4:5], 0, 0, vcc
+; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v3, v0
+; GISEL-NEXT:    v_addc_u32_e64 v0, s[4:5], v0, v7, vcc
+; GISEL-NEXT:    v_addc_u32_e64 v3, s[4:5], 0, 0, s[4:5]
+; GISEL-NEXT:    v_addc_u32_e32 v9, vcc, v0, v6, vcc
+; GISEL-NEXT:    v_addc_u32_e32 v0, vcc, 0, v3, vcc
+; GISEL-NEXT:    v_mul_hi_u32 v6, v12, v2
+; GISEL-NEXT:    v_mad_u64_u32 v[2:3], s[4:5], v5, v9, 0
+; GISEL-NEXT:    v_xor_b32_e32 v1, v1, v4
+; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v6, v0
 ; GISEL-NEXT:    v_mov_b32_e32 v0, v3
 ; GISEL-NEXT:    v_mad_u64_u32 v[6:7], s[4:5], v5, v13, v[0:1]
-; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v1, v4
-; GISEL-NEXT:    v_subb_u32_e32 v1, vcc, v8, v4, vcc
-; GISEL-NEXT:    v_mad_u64_u32 v[3:4], s[4:5], 0, v10, v[6:7]
+; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v8, v4
+; GISEL-NEXT:    v_subb_u32_e32 v1, vcc, v1, v4, vcc
+; GISEL-NEXT:    v_mad_u64_u32 v[3:4], s[4:5], 0, v9, v[6:7]
 ; GISEL-NEXT:    v_sub_i32_e32 v2, vcc, v11, v2
 ; GISEL-NEXT:    v_subb_u32_e64 v4, s[4:5], v12, v3, vcc
 ; GISEL-NEXT:    v_sub_i32_e64 v3, s[4:5], v12, v3
@@ -1393,7 +1256,7 @@ define <2 x i64> @v_sdiv_v2i64_oddk_denom(<2 x i64> %num) {
 ; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v4
 ; GISEL-NEXT:    v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
 ; GISEL-NEXT:    v_cndmask_b32_e64 v4, -1, v6, s[4:5]
-; GISEL-NEXT:    v_add_i32_e32 v6, vcc, 1, v10
+; GISEL-NEXT:    v_add_i32_e32 v6, vcc, 1, v9
 ; GISEL-NEXT:    v_addc_u32_e32 v7, vcc, 0, v13, vcc
 ; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v2, v5
 ; GISEL-NEXT:    v_cndmask_b32_e64 v2, 0, -1, vcc
@@ -1405,12 +1268,12 @@ define <2 x i64> @v_sdiv_v2i64_oddk_denom(<2 x i64> %num) {
 ; GISEL-NEXT:    v_cndmask_b32_e32 v2, v6, v3, vcc
 ; GISEL-NEXT:    v_cndmask_b32_e32 v3, v7, v5, vcc
 ; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v4
-; GISEL-NEXT:    v_cndmask_b32_e32 v2, v10, v2, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v2, v9, v2, vcc
 ; GISEL-NEXT:    v_cndmask_b32_e32 v3, v13, v3, vcc
-; GISEL-NEXT:    v_xor_b32_e32 v2, v2, v9
-; GISEL-NEXT:    v_xor_b32_e32 v3, v3, v9
-; GISEL-NEXT:    v_sub_i32_e32 v2, vcc, v2, v9
-; GISEL-NEXT:    v_subb_u32_e32 v3, vcc, v3, v9, vcc
+; GISEL-NEXT:    v_xor_b32_e32 v2, v2, v10
+; GISEL-NEXT:    v_xor_b32_e32 v3, v3, v10
+; GISEL-NEXT:    v_sub_i32_e32 v2, vcc, v2, v10
+; GISEL-NEXT:    v_subb_u32_e32 v3, vcc, v3, v10, vcc
 ; GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; CGP-LABEL: v_sdiv_v2i64_oddk_denom:
@@ -1423,222 +1286,192 @@ define <2 x i64> @v_sdiv_v2i64_oddk_denom(<2 x i64> %num) {
 ; CGP-NEXT:    v_rcp_iflag_f32_e32 v4, v4
 ; CGP-NEXT:    v_mul_f32_e32 v4, 0x5f7ffffc, v4
 ; CGP-NEXT:    v_mul_f32_e32 v5, 0x2f800000, v4
-; CGP-NEXT:    v_trunc_f32_e32 v7, v5
-; CGP-NEXT:    v_mac_f32_e32 v4, 0xcf800000, v7
-; CGP-NEXT:    v_cvt_u32_f32_e32 v8, v4
-; CGP-NEXT:    v_cvt_u32_f32_e32 v9, v7
-; CGP-NEXT:    v_mad_u64_u32 v[4:5], s[4:5], v6, v8, 0
-; CGP-NEXT:    v_mov_b32_e32 v7, v5
-; CGP-NEXT:    v_mad_u64_u32 v[10:11], s[4:5], v6, v9, v[7:8]
-; CGP-NEXT:    v_mul_hi_u32 v12, v9, v4
-; CGP-NEXT:    v_mad_u64_u32 v[13:14], s[4:5], -1, v8, v[10:11]
-; CGP-NEXT:    v_mul_lo_u32 v10, v9, v4
-; CGP-NEXT:    v_mul_hi_u32 v11, v8, v4
-; CGP-NEXT:    v_mul_lo_u32 v4, v8, v13
-; CGP-NEXT:    v_mul_lo_u32 v7, v9, v13
-; CGP-NEXT:    v_mul_hi_u32 v14, v8, v13
-; CGP-NEXT:    v_mul_hi_u32 v13, v9, v13
-; CGP-NEXT:    v_add_i32_e32 v4, vcc, v10, v4
-; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
+; CGP-NEXT:    v_trunc_f32_e32 v5, v5
+; CGP-NEXT:    v_mac_f32_e32 v4, 0xcf800000, v5
+; CGP-NEXT:    v_cvt_u32_f32_e32 v7, v4
+; CGP-NEXT:    v_cvt_u32_f32_e32 v8, v5
+; CGP-NEXT:    v_mad_u64_u32 v[9:10], s[4:5], v6, v7, 0
+; CGP-NEXT:    v_mov_b32_e32 v4, v10
+; CGP-NEXT:    v_mad_u64_u32 v[4:5], s[4:5], v6, v8, v[4:5]
+; CGP-NEXT:    v_mul_lo_u32 v14, v8, v9
+; CGP-NEXT:    v_mul_hi_u32 v15, v7, v9
+; CGP-NEXT:    v_mad_u64_u32 v[4:5], s[4:5], -1, v7, v[4:5]
+; CGP-NEXT:    v_mul_hi_u32 v9, v8, v9
+; CGP-NEXT:    v_mul_lo_u32 v5, v7, v4
+; CGP-NEXT:    v_mul_lo_u32 v11, v8, v4
+; CGP-NEXT:    v_mul_hi_u32 v12, v7, v4
+; CGP-NEXT:    v_mul_hi_u32 v4, v8, v4
+; CGP-NEXT:    v_add_i32_e32 v5, vcc, v14, v5
+; CGP-NEXT:    v_addc_u32_e32 v5, vcc, v5, v15, vcc
+; CGP-NEXT:    v_addc_u32_e64 v5, s[4:5], 0, 0, vcc
+; CGP-NEXT:    v_add_i32_e32 v11, vcc, v11, v9
+; CGP-NEXT:    v_addc_u32_e64 v11, s[4:5], v11, v12, vcc
+; CGP-NEXT:    v_addc_u32_e64 v12, s[4:5], 0, 0, s[4:5]
+; CGP-NEXT:    v_addc_u32_e32 v5, vcc, v11, v5, vcc
+; CGP-NEXT:    v_addc_u32_e32 v11, vcc, 0, v12, vcc
 ; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v11
-; CGP-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v4, vcc, v15, v4
-; CGP-NEXT:    v_add_i32_e32 v7, vcc, v7, v12
-; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v7, vcc, v7, v14
-; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v14, vcc, v15, v14
-; CGP-NEXT:    v_add_i32_e32 v4, vcc, v7, v4
-; CGP-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v7, vcc, v14, v7
-; CGP-NEXT:    v_add_i32_e32 v7, vcc, v13, v7
-; CGP-NEXT:    v_add_i32_e32 v16, vcc, v8, v4
-; CGP-NEXT:    v_mad_u64_u32 v[13:14], s[4:5], v6, v16, 0
-; CGP-NEXT:    v_addc_u32_e32 v17, vcc, v9, v7, vcc
-; CGP-NEXT:    v_mov_b32_e32 v4, v14
-; CGP-NEXT:    v_mad_u64_u32 v[14:15], s[4:5], v6, v17, v[4:5]
-; CGP-NEXT:    v_ashrrev_i32_e32 v7, 31, v1
-; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v7
-; CGP-NEXT:    v_mad_u64_u32 v[14:15], s[4:5], -1, v16, v[14:15]
-; CGP-NEXT:    v_addc_u32_e32 v1, vcc, v1, v7, vcc
-; CGP-NEXT:    v_xor_b32_e32 v15, v0, v7
-; CGP-NEXT:    v_mul_lo_u32 v0, v17, v13
-; CGP-NEXT:    v_mul_lo_u32 v4, v16, v14
-; CGP-NEXT:    v_xor_b32_e32 v18, v1, v7
-; CGP-NEXT:    v_mul_hi_u32 v1, v16, v13
-; CGP-NEXT:    v_mul_hi_u32 v13, v17, v13
+; CGP-NEXT:    v_add_i32_e32 v5, vcc, v7, v5
+; CGP-NEXT:    v_mad_u64_u32 v[11:12], s[4:5], v6, v5, 0
+; CGP-NEXT:    v_addc_u32_e32 v16, vcc, v8, v4, vcc
+; CGP-NEXT:    v_mov_b32_e32 v4, v12
+; CGP-NEXT:    v_mad_u64_u32 v[12:13], s[4:5], v6, v16, v[4:5]
+; CGP-NEXT:    v_ashrrev_i32_e32 v4, 31, v1
 ; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v4
-; CGP-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
+; CGP-NEXT:    v_mad_u64_u32 v[12:13], s[4:5], -1, v5, v[12:13]
+; CGP-NEXT:    v_addc_u32_e32 v1, vcc, v1, v4, vcc
+; CGP-NEXT:    v_xor_b32_e32 v13, v0, v4
+; CGP-NEXT:    v_mul_lo_u32 v0, v16, v11
+; CGP-NEXT:    v_mul_lo_u32 v17, v5, v12
+; CGP-NEXT:    v_xor_b32_e32 v18, v1, v4
+; CGP-NEXT:    v_mul_hi_u32 v1, v5, v11
+; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v17
+; CGP-NEXT:    v_addc_u32_e32 v0, vcc, v0, v1, vcc
+; CGP-NEXT:    v_mul_lo_u32 v0, v16, v12
+; CGP-NEXT:    v_mul_hi_u32 v1, v16, v11
+; CGP-NEXT:    v_mul_hi_u32 v17, v5, v12
+; CGP-NEXT:    v_addc_u32_e64 v11, s[4:5], 0, 0, vcc
 ; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v1
-; CGP-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
-; CGP-NEXT:    v_mul_lo_u32 v1, v17, v14
-; CGP-NEXT:    v_add_i32_e32 v0, vcc, v4, v0
-; CGP-NEXT:    v_mul_hi_u32 v4, v16, v14
-; CGP-NEXT:    v_add_i32_e32 v1, vcc, v1, v13
-; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v1, vcc, v1, v4
-; CGP-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v4, vcc, v13, v4
-; CGP-NEXT:    v_mul_hi_u32 v13, v17, v14
-; CGP-NEXT:    v_add_i32_e32 v0, vcc, v1, v0
-; CGP-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v1, vcc, v4, v1
-; CGP-NEXT:    v_add_i32_e32 v1, vcc, v13, v1
-; CGP-NEXT:    v_add_i32_e32 v0, vcc, v16, v0
-; CGP-NEXT:    v_addc_u32_e32 v1, vcc, v17, v1, vcc
-; CGP-NEXT:    v_mul_lo_u32 v13, v18, v0
-; CGP-NEXT:    v_mul_lo_u32 v14, v15, v1
-; CGP-NEXT:    v_mul_hi_u32 v16, v15, v0
+; CGP-NEXT:    v_mul_hi_u32 v12, v16, v12
+; CGP-NEXT:    v_addc_u32_e64 v0, s[4:5], v0, v17, vcc
+; CGP-NEXT:    v_addc_u32_e64 v1, s[4:5], 0, 0, s[4:5]
+; CGP-NEXT:    v_addc_u32_e32 v0, vcc, v0, v11, vcc
+; CGP-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; CGP-NEXT:    v_add_i32_e32 v1, vcc, v12, v1
+; CGP-NEXT:    v_add_i32_e32 v0, vcc, v5, v0
+; CGP-NEXT:    v_addc_u32_e32 v1, vcc, v16, v1, vcc
+; CGP-NEXT:    v_mul_lo_u32 v11, v18, v0
+; CGP-NEXT:    v_mul_lo_u32 v12, v13, v1
+; CGP-NEXT:    v_mul_hi_u32 v16, v13, v0
 ; CGP-NEXT:    v_mul_hi_u32 v0, v18, v0
-; CGP-NEXT:    v_mov_b32_e32 v4, 0x12d8fb
-; CGP-NEXT:    v_add_i32_e32 v13, vcc, v13, v14
-; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v13, vcc, v13, v16
-; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
-; CGP-NEXT:    v_mul_lo_u32 v16, v18, v1
-; CGP-NEXT:    v_add_i32_e32 v13, vcc, v14, v13
-; CGP-NEXT:    v_mul_hi_u32 v14, v15, v1
-; CGP-NEXT:    v_add_i32_e32 v0, vcc, v16, v0
-; CGP-NEXT:    v_cndmask_b32_e64 v16, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v14
-; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v14, vcc, v16, v14
-; CGP-NEXT:    v_add_i32_e32 v16, vcc, v0, v13
-; CGP-NEXT:    v_mul_hi_u32 v17, v18, v1
-; CGP-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v4, v16, 0
-; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v13, vcc, v14, v13
-; CGP-NEXT:    v_add_i32_e32 v17, vcc, v17, v13
-; CGP-NEXT:    v_mad_u64_u32 v[13:14], s[4:5], v4, v17, v[1:2]
-; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v15, v0
-; CGP-NEXT:    v_subb_u32_e64 v1, s[4:5], v18, v13, vcc
-; CGP-NEXT:    v_sub_i32_e64 v13, s[4:5], v18, v13
-; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v0, v4
-; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, -1, s[4:5]
+; CGP-NEXT:    v_mov_b32_e32 v5, 0x12d8fb
+; CGP-NEXT:    v_add_i32_e32 v11, vcc, v11, v12
+; CGP-NEXT:    v_addc_u32_e32 v11, vcc, v11, v16, vcc
+; CGP-NEXT:    v_mul_lo_u32 v11, v18, v1
+; CGP-NEXT:    v_mul_hi_u32 v16, v13, v1
+; CGP-NEXT:    v_addc_u32_e64 v12, s[4:5], 0, 0, vcc
+; CGP-NEXT:    v_add_i32_e32 v0, vcc, v11, v0
+; CGP-NEXT:    v_addc_u32_e64 v0, s[4:5], v0, v16, vcc
+; CGP-NEXT:    v_addc_u32_e32 v16, vcc, v0, v12, vcc
+; CGP-NEXT:    v_mul_hi_u32 v12, v18, v1
+; CGP-NEXT:    v_addc_u32_e64 v11, s[4:5], 0, 0, s[4:5]
+; CGP-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v5, v16, 0
+; CGP-NEXT:    v_addc_u32_e32 v11, vcc, 0, v11, vcc
+; CGP-NEXT:    v_add_i32_e32 v17, vcc, v12, v11
+; CGP-NEXT:    v_mad_u64_u32 v[11:12], s[4:5], v5, v17, v[1:2]
+; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v13, v0
+; CGP-NEXT:    v_subb_u32_e64 v1, s[4:5], v18, v11, vcc
+; CGP-NEXT:    v_sub_i32_e64 v11, s[4:5], v18, v11
+; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v0, v5
+; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, -1, s[4:5]
 ; CGP-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v1
-; CGP-NEXT:    v_subbrev_u32_e32 v1, vcc, 0, v13, vcc
-; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v0, v4
-; CGP-NEXT:    v_subbrev_u32_e32 v13, vcc, 0, v1, vcc
-; CGP-NEXT:    v_add_i32_e32 v15, vcc, 1, v16
+; CGP-NEXT:    v_subbrev_u32_e32 v1, vcc, 0, v11, vcc
+; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v0, v5
+; CGP-NEXT:    v_subbrev_u32_e32 v11, vcc, 0, v1, vcc
+; CGP-NEXT:    v_add_i32_e32 v13, vcc, 1, v16
 ; CGP-NEXT:    v_addc_u32_e32 v18, vcc, 0, v17, vcc
-; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v4
-; CGP-NEXT:    v_mov_b32_e32 v0, v5
-; CGP-NEXT:    v_cndmask_b32_e64 v14, -1, v14, s[4:5]
-; CGP-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v6, v9, v[0:1]
+; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v5
+; CGP-NEXT:    v_mov_b32_e32 v0, v10
+; CGP-NEXT:    v_cndmask_b32_e64 v12, -1, v12, s[4:5]
+; CGP-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v6, v8, v[0:1]
 ; CGP-NEXT:    v_cndmask_b32_e64 v19, 0, -1, vcc
-; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v13
-; CGP-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], -1, v8, v[0:1]
-; CGP-NEXT:    v_cndmask_b32_e32 v5, -1, v19, vcc
-; CGP-NEXT:    v_add_i32_e32 v1, vcc, 1, v15
-; CGP-NEXT:    v_mul_lo_u32 v19, v8, v0
-; CGP-NEXT:    v_addc_u32_e32 v13, vcc, 0, v18, vcc
-; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v5
-; CGP-NEXT:    v_cndmask_b32_e32 v5, v15, v1, vcc
-; CGP-NEXT:    v_cndmask_b32_e32 v13, v18, v13, vcc
-; CGP-NEXT:    v_add_i32_e32 v1, vcc, v10, v19
-; CGP-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v1, vcc, v1, v11
-; CGP-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; CGP-NEXT:    v_mul_lo_u32 v11, v9, v0
-; CGP-NEXT:    v_add_i32_e32 v1, vcc, v10, v1
-; CGP-NEXT:    v_mul_hi_u32 v10, v8, v0
-; CGP-NEXT:    v_add_i32_e32 v11, vcc, v11, v12
-; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v10, vcc, v11, v10
-; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v11, vcc, v12, v11
-; CGP-NEXT:    v_mul_hi_u32 v0, v9, v0
-; CGP-NEXT:    v_add_i32_e32 v1, vcc, v10, v1
-; CGP-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v10, vcc, v11, v10
-; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v10
-; CGP-NEXT:    v_add_i32_e32 v8, vcc, v8, v1
-; CGP-NEXT:    v_addc_u32_e32 v9, vcc, v9, v0, vcc
-; CGP-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v6, v8, 0
-; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v14
-; CGP-NEXT:    v_cndmask_b32_e32 v5, v16, v5, vcc
-; CGP-NEXT:    v_xor_b32_e32 v11, v5, v7
-; CGP-NEXT:    v_mad_u64_u32 v[5:6], s[4:5], v6, v9, v[1:2]
-; CGP-NEXT:    v_cndmask_b32_e32 v10, v17, v13, vcc
-; CGP-NEXT:    v_xor_b32_e32 v1, v10, v7
-; CGP-NEXT:    v_mad_u64_u32 v[5:6], s[4:5], -1, v8, v[5:6]
+; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v11
+; CGP-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], -1, v7, v[0:1]
+; CGP-NEXT:    v_cndmask_b32_e32 v10, -1, v19, vcc
+; CGP-NEXT:    v_add_i32_e32 v1, vcc, 1, v13
+; CGP-NEXT:    v_mul_lo_u32 v19, v7, v0
+; CGP-NEXT:    v_addc_u32_e32 v11, vcc, 0, v18, vcc
+; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v10
+; CGP-NEXT:    v_cndmask_b32_e32 v10, v13, v1, vcc
+; CGP-NEXT:    v_cndmask_b32_e32 v11, v18, v11, vcc
+; CGP-NEXT:    v_add_i32_e32 v1, vcc, v14, v19
+; CGP-NEXT:    v_addc_u32_e32 v1, vcc, v1, v15, vcc
+; CGP-NEXT:    v_mul_lo_u32 v1, v8, v0
+; CGP-NEXT:    v_mul_hi_u32 v14, v7, v0
+; CGP-NEXT:    v_addc_u32_e64 v13, s[4:5], 0, 0, vcc
+; CGP-NEXT:    v_add_i32_e32 v1, vcc, v1, v9
+; CGP-NEXT:    v_mul_hi_u32 v0, v8, v0
+; CGP-NEXT:    v_addc_u32_e64 v1, s[4:5], v1, v14, vcc
+; CGP-NEXT:    v_addc_u32_e64 v9, s[4:5], 0, 0, s[4:5]
+; CGP-NEXT:    v_addc_u32_e32 v1, vcc, v1, v13, vcc
+; CGP-NEXT:    v_addc_u32_e32 v9, vcc, 0, v9, vcc
+; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v9
+; CGP-NEXT:    v_add_i32_e32 v9, vcc, v7, v1
+; CGP-NEXT:    v_addc_u32_e32 v8, vcc, v8, v0, vcc
+; CGP-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v6, v9, 0
+; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v12
+; CGP-NEXT:    v_cndmask_b32_e32 v10, v16, v10, vcc
+; CGP-NEXT:    v_mad_u64_u32 v[6:7], s[4:5], v6, v8, v[1:2]
+; CGP-NEXT:    v_xor_b32_e32 v1, v10, v4
 ; CGP-NEXT:    v_ashrrev_i32_e32 v10, 31, v3
+; CGP-NEXT:    v_mad_u64_u32 v[6:7], s[4:5], -1, v9, v[6:7]
+; CGP-NEXT:    v_cndmask_b32_e32 v11, v17, v11, vcc
 ; CGP-NEXT:    v_add_i32_e32 v2, vcc, v2, v10
 ; CGP-NEXT:    v_addc_u32_e32 v3, vcc, v3, v10, vcc
-; CGP-NEXT:    v_xor_b32_e32 v12, v2, v10
-; CGP-NEXT:    v_mul_lo_u32 v2, v9, v0
-; CGP-NEXT:    v_mul_lo_u32 v6, v8, v5
+; CGP-NEXT:    v_xor_b32_e32 v7, v2, v10
+; CGP-NEXT:    v_mul_lo_u32 v2, v8, v0
+; CGP-NEXT:    v_mul_lo_u32 v12, v9, v6
 ; CGP-NEXT:    v_xor_b32_e32 v13, v3, v10
-; CGP-NEXT:    v_mul_hi_u32 v3, v8, v0
-; CGP-NEXT:    v_mul_hi_u32 v0, v9, v0
-; CGP-NEXT:    v_add_i32_e32 v2, vcc, v2, v6
-; CGP-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v2, vcc, v2, v3
-; CGP-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; CGP-NEXT:    v_mul_lo_u32 v3, v9, v5
+; CGP-NEXT:    v_mul_hi_u32 v3, v9, v0
+; CGP-NEXT:    v_mul_hi_u32 v0, v8, v0
+; CGP-NEXT:    v_add_i32_e32 v2, vcc, v2, v12
+; CGP-NEXT:    v_addc_u32_e32 v2, vcc, v2, v3, vcc
+; CGP-NEXT:    v_mul_lo_u32 v2, v8, v6
+; CGP-NEXT:    v_mul_hi_u32 v12, v9, v6
+; CGP-NEXT:    v_addc_u32_e64 v3, s[4:5], 0, 0, vcc
+; CGP-NEXT:    v_add_i32_e32 v0, vcc, v2, v0
+; CGP-NEXT:    v_mul_hi_u32 v6, v8, v6
+; CGP-NEXT:    v_addc_u32_e64 v0, s[4:5], v0, v12, vcc
+; CGP-NEXT:    v_addc_u32_e64 v2, s[4:5], 0, 0, s[4:5]
+; CGP-NEXT:    v_addc_u32_e32 v0, vcc, v0, v3, vcc
+; CGP-NEXT:    v_addc_u32_e32 v2, vcc, 0, v2, vcc
 ; CGP-NEXT:    v_add_i32_e32 v2, vcc, v6, v2
-; CGP-NEXT:    v_mul_hi_u32 v6, v8, v5
-; CGP-NEXT:    v_add_i32_e32 v0, vcc, v3, v0
-; CGP-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v6
-; CGP-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v0, vcc, v9, v0
+; CGP-NEXT:    v_addc_u32_e32 v2, vcc, v8, v2, vcc
+; CGP-NEXT:    v_mul_lo_u32 v3, v13, v0
+; CGP-NEXT:    v_mul_lo_u32 v6, v7, v2
+; CGP-NEXT:    v_mul_hi_u32 v9, v7, v0
+; CGP-NEXT:    v_mul_hi_u32 v0, v13, v0
+; CGP-NEXT:    v_xor_b32_e32 v8, v11, v4
 ; CGP-NEXT:    v_add_i32_e32 v3, vcc, v3, v6
-; CGP-NEXT:    v_mul_hi_u32 v5, v9, v5
-; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
-; CGP-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v2, vcc, v3, v2
-; CGP-NEXT:    v_add_i32_e32 v2, vcc, v5, v2
-; CGP-NEXT:    v_add_i32_e32 v3, vcc, v8, v0
-; CGP-NEXT:    v_addc_u32_e32 v2, vcc, v9, v2, vcc
-; CGP-NEXT:    v_mul_lo_u32 v5, v13, v3
-; CGP-NEXT:    v_mul_lo_u32 v6, v12, v2
-; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v11, v7
-; CGP-NEXT:    v_subb_u32_e32 v1, vcc, v1, v7, vcc
-; CGP-NEXT:    v_mul_hi_u32 v7, v12, v3
-; CGP-NEXT:    v_add_i32_e32 v5, vcc, v5, v6
-; CGP-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v5, vcc, v5, v7
-; CGP-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
-; CGP-NEXT:    v_mul_lo_u32 v7, v13, v2
-; CGP-NEXT:    v_mul_hi_u32 v3, v13, v3
-; CGP-NEXT:    v_add_i32_e32 v5, vcc, v6, v5
-; CGP-NEXT:    v_mul_hi_u32 v6, v12, v2
-; CGP-NEXT:    v_add_i32_e32 v3, vcc, v7, v3
-; CGP-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v3, vcc, v3, v6
-; CGP-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v6, vcc, v7, v6
-; CGP-NEXT:    v_add_i32_e32 v7, vcc, v3, v5
-; CGP-NEXT:    v_mul_hi_u32 v8, v13, v2
-; CGP-NEXT:    v_mad_u64_u32 v[2:3], s[4:5], v4, v7, 0
-; CGP-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v5, vcc, v6, v5
-; CGP-NEXT:    v_add_i32_e32 v8, vcc, v8, v5
-; CGP-NEXT:    v_mad_u64_u32 v[5:6], s[4:5], v4, v8, v[3:4]
-; CGP-NEXT:    v_sub_i32_e32 v2, vcc, v12, v2
-; CGP-NEXT:    v_subb_u32_e64 v3, s[4:5], v13, v5, vcc
-; CGP-NEXT:    v_sub_i32_e64 v5, s[4:5], v13, v5
-; CGP-NEXT:    v_subbrev_u32_e32 v5, vcc, 0, v5, vcc
-; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v2, v4
-; CGP-NEXT:    v_sub_i32_e32 v2, vcc, v2, v4
-; CGP-NEXT:    v_cndmask_b32_e64 v6, 0, -1, s[4:5]
-; CGP-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v3
-; CGP-NEXT:    v_subbrev_u32_e32 v5, vcc, 0, v5, vcc
-; CGP-NEXT:    v_cndmask_b32_e64 v3, -1, v6, s[4:5]
-; CGP-NEXT:    v_add_i32_e32 v6, vcc, 1, v7
+; CGP-NEXT:    v_addc_u32_e32 v3, vcc, v3, v9, vcc
+; CGP-NEXT:    v_mul_lo_u32 v3, v13, v2
+; CGP-NEXT:    v_mul_hi_u32 v9, v7, v2
+; CGP-NEXT:    v_addc_u32_e64 v6, s[4:5], 0, 0, vcc
+; CGP-NEXT:    v_add_i32_e32 v0, vcc, v3, v0
+; CGP-NEXT:    v_addc_u32_e64 v0, s[4:5], v0, v9, vcc
+; CGP-NEXT:    v_addc_u32_e64 v3, s[4:5], 0, 0, s[4:5]
+; CGP-NEXT:    v_addc_u32_e32 v6, vcc, v0, v6, vcc
+; CGP-NEXT:    v_mul_hi_u32 v11, v13, v2
+; CGP-NEXT:    v_addc_u32_e32 v9, vcc, 0, v3, vcc
+; CGP-NEXT:    v_mad_u64_u32 v[2:3], s[4:5], v5, v6, 0
+; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v1, v4
+; CGP-NEXT:    v_subb_u32_e32 v1, vcc, v8, v4, vcc
+; CGP-NEXT:    v_add_i32_e32 v8, vcc, v11, v9
+; CGP-NEXT:    v_mad_u64_u32 v[3:4], s[4:5], v5, v8, v[3:4]
+; CGP-NEXT:    v_sub_i32_e32 v2, vcc, v7, v2
+; CGP-NEXT:    v_subb_u32_e64 v4, s[4:5], v13, v3, vcc
+; CGP-NEXT:    v_sub_i32_e64 v3, s[4:5], v13, v3
+; CGP-NEXT:    v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
+; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v2, v5
+; CGP-NEXT:    v_sub_i32_e32 v2, vcc, v2, v5
+; CGP-NEXT:    v_cndmask_b32_e64 v7, 0, -1, s[4:5]
+; CGP-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v4
+; CGP-NEXT:    v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
+; CGP-NEXT:    v_cndmask_b32_e64 v4, -1, v7, s[4:5]
+; CGP-NEXT:    v_add_i32_e32 v7, vcc, 1, v6
 ; CGP-NEXT:    v_addc_u32_e32 v9, vcc, 0, v8, vcc
-; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v2, v4
+; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v2, v5
 ; CGP-NEXT:    v_cndmask_b32_e64 v2, 0, -1, vcc
-; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v5
+; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v3
 ; CGP-NEXT:    v_cndmask_b32_e32 v2, -1, v2, vcc
-; CGP-NEXT:    v_add_i32_e32 v4, vcc, 1, v6
+; CGP-NEXT:    v_add_i32_e32 v3, vcc, 1, v7
 ; CGP-NEXT:    v_addc_u32_e32 v5, vcc, 0, v9, vcc
 ; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v2
-; CGP-NEXT:    v_cndmask_b32_e32 v2, v6, v4, vcc
-; CGP-NEXT:    v_cndmask_b32_e32 v4, v9, v5, vcc
-; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v3
-; CGP-NEXT:    v_cndmask_b32_e32 v2, v7, v2, vcc
-; CGP-NEXT:    v_cndmask_b32_e32 v3, v8, v4, vcc
+; CGP-NEXT:    v_cndmask_b32_e32 v2, v7, v3, vcc
+; CGP-NEXT:    v_cndmask_b32_e32 v3, v9, v5, vcc
+; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v4
+; CGP-NEXT:    v_cndmask_b32_e32 v2, v6, v2, vcc
+; CGP-NEXT:    v_cndmask_b32_e32 v3, v8, v3, vcc
 ; CGP-NEXT:    v_xor_b32_e32 v2, v2, v10
 ; CGP-NEXT:    v_xor_b32_e32 v3, v3, v10
 ; CGP-NEXT:    v_sub_i32_e32 v2, vcc, v2, v10
@@ -1673,130 +1506,115 @@ define i64 @v_sdiv_i64_pow2_shl_denom(i64 %x, i64 %y) {
 ; CHECK-NEXT:  .LBB7_3:
 ; CHECK-NEXT:    v_ashrrev_i32_e32 v0, 31, v6
 ; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v5, v0
-; CHECK-NEXT:    v_addc_u32_e32 v5, vcc, v6, v0, vcc
-; CHECK-NEXT:    v_xor_b32_e32 v2, v1, v0
-; CHECK-NEXT:    v_xor_b32_e32 v1, v5, v0
-; CHECK-NEXT:    v_cvt_f32_u32_e32 v5, v2
-; CHECK-NEXT:    v_cvt_f32_u32_e32 v6, v1
-; CHECK-NEXT:    v_sub_i32_e32 v9, vcc, 0, v2
-; CHECK-NEXT:    v_subb_u32_e32 v10, vcc, 0, v1, vcc
-; CHECK-NEXT:    v_mac_f32_e32 v5, 0x4f800000, v6
-; CHECK-NEXT:    v_rcp_iflag_f32_e32 v5, v5
-; CHECK-NEXT:    v_mul_f32_e32 v5, 0x5f7ffffc, v5
-; CHECK-NEXT:    v_mul_f32_e32 v6, 0x2f800000, v5
-; CHECK-NEXT:    v_trunc_f32_e32 v7, v6
-; CHECK-NEXT:    v_mac_f32_e32 v5, 0xcf800000, v7
-; CHECK-NEXT:    v_cvt_u32_f32_e32 v8, v5
-; CHECK-NEXT:    v_cvt_u32_f32_e32 v11, v7
-; CHECK-NEXT:    v_mad_u64_u32 v[5:6], s[4:5], v9, v8, 0
-; CHECK-NEXT:    v_mad_u64_u32 v[6:7], s[4:5], v9, v11, v[6:7]
-; CHECK-NEXT:    v_mul_hi_u32 v12, v8, v5
-; CHECK-NEXT:    v_mad_u64_u32 v[6:7], s[4:5], v10, v8, v[6:7]
-; CHECK-NEXT:    v_mul_lo_u32 v7, v11, v5
-; CHECK-NEXT:    v_mul_hi_u32 v5, v11, v5
-; CHECK-NEXT:    v_mul_lo_u32 v13, v8, v6
-; CHECK-NEXT:    v_mul_lo_u32 v14, v11, v6
-; CHECK-NEXT:    v_add_i32_e32 v7, vcc, v7, v13
-; CHECK-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v7, vcc, v7, v12
-; CHECK-NEXT:    v_mul_hi_u32 v12, v8, v6
-; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v7, vcc, v13, v7
-; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v14, v5
-; CHECK-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v5, v12
-; CHECK-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v12, vcc, v13, v12
-; CHECK-NEXT:    v_mul_hi_u32 v6, v11, v6
-; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v5, v7
-; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v7, vcc, v12, v7
-; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v6, v7
-; CHECK-NEXT:    v_add_i32_e32 v8, vcc, v8, v5
-; CHECK-NEXT:    v_addc_u32_e32 v11, vcc, v11, v6, vcc
-; CHECK-NEXT:    v_mad_u64_u32 v[5:6], s[4:5], v9, v8, 0
-; CHECK-NEXT:    v_mad_u64_u32 v[6:7], s[4:5], v9, v11, v[6:7]
-; CHECK-NEXT:    v_ashrrev_i32_e32 v9, 31, v4
-; CHECK-NEXT:    v_add_i32_e32 v3, vcc, v3, v9
-; CHECK-NEXT:    v_mad_u64_u32 v[6:7], s[4:5], v10, v8, v[6:7]
-; CHECK-NEXT:    v_addc_u32_e32 v4, vcc, v4, v9, vcc
-; CHECK-NEXT:    v_xor_b32_e32 v7, v3, v9
-; CHECK-NEXT:    v_mul_lo_u32 v3, v11, v5
-; CHECK-NEXT:    v_mul_lo_u32 v10, v8, v6
-; CHECK-NEXT:    v_xor_b32_e32 v12, v4, v9
-; CHECK-NEXT:    v_mul_hi_u32 v4, v8, v5
-; CHECK-NEXT:    v_mul_hi_u32 v5, v11, v5
-; CHECK-NEXT:    v_add_i32_e32 v3, vcc, v3, v10
-; CHECK-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v3, vcc, v3, v4
-; CHECK-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc
-; CHECK-NEXT:    v_mul_lo_u32 v4, v11, v6
-; CHECK-NEXT:    v_add_i32_e32 v3, vcc, v10, v3
-; CHECK-NEXT:    v_mul_hi_u32 v10, v8, v6
-; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v4, v5
-; CHECK-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v4, v10
-; CHECK-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v5, v10
-; CHECK-NEXT:    v_mul_hi_u32 v6, v11, v6
-; CHECK-NEXT:    v_add_i32_e32 v3, vcc, v4, v3
-; CHECK-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v5, v4
-; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v6, v4
-; CHECK-NEXT:    v_add_i32_e32 v3, vcc, v8, v3
-; CHECK-NEXT:    v_addc_u32_e32 v4, vcc, v11, v4, vcc
-; CHECK-NEXT:    v_mul_lo_u32 v5, v12, v3
-; CHECK-NEXT:    v_mul_lo_u32 v6, v7, v4
-; CHECK-NEXT:    v_mul_hi_u32 v8, v7, v3
-; CHECK-NEXT:    v_mul_hi_u32 v3, v12, v3
-; CHECK-NEXT:    v_mul_hi_u32 v10, v12, v4
-; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v5, v6
-; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v5, v8
-; CHECK-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
-; CHECK-NEXT:    v_mul_lo_u32 v8, v12, v4
-; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v6, v5
-; CHECK-NEXT:    v_mul_hi_u32 v6, v7, v4
-; CHECK-NEXT:    v_add_i32_e32 v3, vcc, v8, v3
-; CHECK-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v3, vcc, v3, v6
-; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v8, v6
-; CHECK-NEXT:    v_add_i32_e32 v8, vcc, v3, v5
-; CHECK-NEXT:    v_mad_u64_u32 v[3:4], s[4:5], v2, v8, 0
-; CHECK-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v6, v5
-; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v10, v5
-; CHECK-NEXT:    v_mad_u64_u32 v[4:5], s[4:5], v2, v6, v[4:5]
-; CHECK-NEXT:    v_sub_i32_e32 v3, vcc, v7, v3
-; CHECK-NEXT:    v_mad_u64_u32 v[4:5], s[4:5], v1, v8, v[4:5]
-; CHECK-NEXT:    v_subb_u32_e64 v5, s[4:5], v12, v4, vcc
-; CHECK-NEXT:    v_sub_i32_e64 v4, s[4:5], v12, v4
-; CHECK-NEXT:    v_cmp_ge_u32_e64 s[4:5], v5, v1
-; CHECK-NEXT:    v_subb_u32_e32 v4, vcc, v4, v1, vcc
-; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, -1, s[4:5]
-; CHECK-NEXT:    v_cmp_ge_u32_e64 s[4:5], v3, v2
-; CHECK-NEXT:    v_sub_i32_e32 v3, vcc, v3, v2
-; CHECK-NEXT:    v_cndmask_b32_e64 v10, 0, -1, s[4:5]
-; CHECK-NEXT:    v_cmp_eq_u32_e64 s[4:5], v5, v1
-; CHECK-NEXT:    v_subbrev_u32_e32 v4, vcc, 0, v4, vcc
-; CHECK-NEXT:    v_cndmask_b32_e64 v5, v7, v10, s[4:5]
-; CHECK-NEXT:    v_add_i32_e32 v7, vcc, 1, v8
-; CHECK-NEXT:    v_addc_u32_e32 v10, vcc, 0, v6, vcc
-; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, v4, v1
+; CHECK-NEXT:    v_addc_u32_e32 v2, vcc, v6, v0, vcc
+; CHECK-NEXT:    v_xor_b32_e32 v7, v1, v0
+; CHECK-NEXT:    v_xor_b32_e32 v8, v2, v0
+; CHECK-NEXT:    v_cvt_f32_u32_e32 v1, v7
+; CHECK-NEXT:    v_cvt_f32_u32_e32 v2, v8
+; CHECK-NEXT:    v_sub_i32_e32 v10, vcc, 0, v7
+; CHECK-NEXT:    v_subb_u32_e32 v11, vcc, 0, v8, vcc
+; CHECK-NEXT:    v_mac_f32_e32 v1, 0x4f800000, v2
+; CHECK-NEXT:    v_rcp_iflag_f32_e32 v1, v1
+; CHECK-NEXT:    v_mul_f32_e32 v1, 0x5f7ffffc, v1
+; CHECK-NEXT:    v_mul_f32_e32 v2, 0x2f800000, v1
+; CHECK-NEXT:    v_trunc_f32_e32 v5, v2
+; CHECK-NEXT:    v_mac_f32_e32 v1, 0xcf800000, v5
+; CHECK-NEXT:    v_cvt_u32_f32_e32 v9, v1
+; CHECK-NEXT:    v_cvt_u32_f32_e32 v12, v5
+; CHECK-NEXT:    v_mad_u64_u32 v[1:2], s[4:5], v10, v9, 0
+; CHECK-NEXT:    v_mad_u64_u32 v[5:6], s[4:5], v10, v12, v[2:3]
+; CHECK-NEXT:    v_mul_lo_u32 v2, v12, v1
+; CHECK-NEXT:    v_mad_u64_u32 v[5:6], s[4:5], v11, v9, v[5:6]
+; CHECK-NEXT:    v_mul_hi_u32 v6, v9, v1
+; CHECK-NEXT:    v_mul_hi_u32 v1, v12, v1
+; CHECK-NEXT:    v_mul_lo_u32 v13, v9, v5
+; CHECK-NEXT:    v_mul_lo_u32 v14, v12, v5
+; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v13
+; CHECK-NEXT:    v_addc_u32_e32 v2, vcc, v2, v6, vcc
+; CHECK-NEXT:    v_mul_hi_u32 v6, v9, v5
+; CHECK-NEXT:    v_addc_u32_e64 v2, s[4:5], 0, 0, vcc
+; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v14, v1
+; CHECK-NEXT:    v_mul_hi_u32 v5, v12, v5
+; CHECK-NEXT:    v_addc_u32_e64 v1, s[4:5], v1, v6, vcc
+; CHECK-NEXT:    v_addc_u32_e64 v6, s[4:5], 0, 0, s[4:5]
+; CHECK-NEXT:    v_addc_u32_e32 v1, vcc, v1, v2, vcc
+; CHECK-NEXT:    v_addc_u32_e32 v2, vcc, 0, v6, vcc
+; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v5, v2
+; CHECK-NEXT:    v_add_i32_e32 v9, vcc, v9, v1
+; CHECK-NEXT:    v_addc_u32_e32 v12, vcc, v12, v2, vcc
+; CHECK-NEXT:    v_mad_u64_u32 v[1:2], s[4:5], v10, v9, 0
+; CHECK-NEXT:    v_mad_u64_u32 v[5:6], s[4:5], v10, v12, v[2:3]
+; CHECK-NEXT:    v_ashrrev_i32_e32 v10, 31, v4
+; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v3, v10
+; CHECK-NEXT:    v_mad_u64_u32 v[5:6], s[4:5], v11, v9, v[5:6]
+; CHECK-NEXT:    v_addc_u32_e32 v3, vcc, v4, v10, vcc
+; CHECK-NEXT:    v_xor_b32_e32 v4, v2, v10
+; CHECK-NEXT:    v_mul_lo_u32 v2, v12, v1
+; CHECK-NEXT:    v_mul_lo_u32 v6, v9, v5
+; CHECK-NEXT:    v_xor_b32_e32 v11, v3, v10
+; CHECK-NEXT:    v_mul_hi_u32 v3, v9, v1
+; CHECK-NEXT:    v_mul_hi_u32 v1, v12, v1
+; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v6
+; CHECK-NEXT:    v_addc_u32_e32 v2, vcc, v2, v3, vcc
+; CHECK-NEXT:    v_mul_lo_u32 v2, v12, v5
+; CHECK-NEXT:    v_mul_hi_u32 v6, v9, v5
+; CHECK-NEXT:    v_addc_u32_e64 v3, s[4:5], 0, 0, vcc
+; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v2, v1
+; CHECK-NEXT:    v_mul_hi_u32 v5, v12, v5
+; CHECK-NEXT:    v_addc_u32_e64 v1, s[4:5], v1, v6, vcc
+; CHECK-NEXT:    v_addc_u32_e64 v2, s[4:5], 0, 0, s[4:5]
+; CHECK-NEXT:    v_addc_u32_e32 v1, vcc, v1, v3, vcc
+; CHECK-NEXT:    v_addc_u32_e32 v2, vcc, 0, v2, vcc
+; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v5, v2
+; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v9, v1
+; CHECK-NEXT:    v_addc_u32_e32 v2, vcc, v12, v2, vcc
+; CHECK-NEXT:    v_mul_lo_u32 v3, v11, v1
+; CHECK-NEXT:    v_mul_lo_u32 v5, v4, v2
+; CHECK-NEXT:    v_mul_hi_u32 v6, v4, v1
+; CHECK-NEXT:    v_mul_hi_u32 v1, v11, v1
+; CHECK-NEXT:    v_add_i32_e32 v3, vcc, v3, v5
+; CHECK-NEXT:    v_addc_u32_e32 v3, vcc, v3, v6, vcc
+; CHECK-NEXT:    v_mul_lo_u32 v3, v11, v2
+; CHECK-NEXT:    v_mul_hi_u32 v6, v4, v2
+; CHECK-NEXT:    v_addc_u32_e64 v5, s[4:5], 0, 0, vcc
+; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v3, v1
+; CHECK-NEXT:    v_addc_u32_e64 v1, s[4:5], v1, v6, vcc
+; CHECK-NEXT:    v_addc_u32_e32 v5, vcc, v1, v5, vcc
+; CHECK-NEXT:    v_mul_hi_u32 v6, v11, v2
+; CHECK-NEXT:    v_addc_u32_e64 v3, s[4:5], 0, 0, s[4:5]
+; CHECK-NEXT:    v_mad_u64_u32 v[1:2], s[4:5], v7, v5, 0
+; CHECK-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
+; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v6, v3
+; CHECK-NEXT:    v_mad_u64_u32 v[2:3], s[4:5], v7, v6, v[2:3]
+; CHECK-NEXT:    v_sub_i32_e32 v1, vcc, v4, v1
+; CHECK-NEXT:    v_mad_u64_u32 v[2:3], s[4:5], v8, v5, v[2:3]
+; CHECK-NEXT:    v_subb_u32_e64 v3, s[4:5], v11, v2, vcc
+; CHECK-NEXT:    v_sub_i32_e64 v2, s[4:5], v11, v2
+; CHECK-NEXT:    v_cmp_ge_u32_e64 s[4:5], v3, v8
+; CHECK-NEXT:    v_subb_u32_e32 v2, vcc, v2, v8, vcc
+; CHECK-NEXT:    v_cndmask_b32_e64 v4, 0, -1, s[4:5]
+; CHECK-NEXT:    v_cmp_ge_u32_e64 s[4:5], v1, v7
+; CHECK-NEXT:    v_sub_i32_e32 v1, vcc, v1, v7
+; CHECK-NEXT:    v_cndmask_b32_e64 v9, 0, -1, s[4:5]
+; CHECK-NEXT:    v_cmp_eq_u32_e64 s[4:5], v3, v8
+; CHECK-NEXT:    v_subbrev_u32_e32 v2, vcc, 0, v2, vcc
+; CHECK-NEXT:    v_cndmask_b32_e64 v3, v4, v9, s[4:5]
+; CHECK-NEXT:    v_add_i32_e32 v4, vcc, 1, v5
+; CHECK-NEXT:    v_addc_u32_e32 v9, vcc, 0, v6, vcc
+; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, v2, v8
 ; CHECK-NEXT:    v_cndmask_b32_e64 v11, 0, -1, vcc
-; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, v3, v2
-; CHECK-NEXT:    v_cndmask_b32_e64 v2, 0, -1, vcc
-; CHECK-NEXT:    v_cmp_eq_u32_e32 vcc, v4, v1
-; CHECK-NEXT:    v_cndmask_b32_e32 v1, v11, v2, vcc
-; CHECK-NEXT:    v_add_i32_e32 v2, vcc, 1, v7
-; CHECK-NEXT:    v_addc_u32_e32 v3, vcc, 0, v10, vcc
+; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v7
+; CHECK-NEXT:    v_cndmask_b32_e64 v1, 0, -1, vcc
+; CHECK-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v8
+; CHECK-NEXT:    v_cndmask_b32_e32 v1, v11, v1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v2, vcc, 1, v4
+; CHECK-NEXT:    v_addc_u32_e32 v7, vcc, 0, v9, vcc
 ; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v1
-; CHECK-NEXT:    v_cndmask_b32_e32 v1, v7, v2, vcc
-; CHECK-NEXT:    v_cndmask_b32_e32 v2, v10, v3, vcc
-; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v5
-; CHECK-NEXT:    v_cndmask_b32_e32 v1, v8, v1, vcc
-; CHECK-NEXT:    v_xor_b32_e32 v3, v9, v0
+; CHECK-NEXT:    v_cndmask_b32_e32 v1, v4, v2, vcc
+; CHECK-NEXT:    v_cndmask_b32_e32 v2, v9, v7, vcc
+; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v3
+; CHECK-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc
+; CHECK-NEXT:    v_xor_b32_e32 v3, v10, v0
 ; CHECK-NEXT:    v_cndmask_b32_e32 v2, v6, v2, vcc
 ; CHECK-NEXT:    v_xor_b32_e32 v0, v1, v3
 ; CHECK-NEXT:    v_xor_b32_e32 v1, v2, v3
@@ -1868,21 +1686,16 @@ define <2 x i64> @v_sdiv_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) {
 ; GISEL-NEXT:    v_mul_hi_u32 v11, v17, v11
 ; GISEL-NEXT:    v_mul_lo_u32 v13, v14, v12
 ; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v7, v13
-; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v7, v18
-; GISEL-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
-; GISEL-NEXT:    v_mul_lo_u32 v18, v17, v12
-; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v13, v7
-; GISEL-NEXT:    v_mul_hi_u32 v13, v14, v12
-; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v18, v11
-; GISEL-NEXT:    v_cndmask_b32_e64 v18, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v11, v13
-; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v18, v13
+; GISEL-NEXT:    v_addc_u32_e32 v7, vcc, v7, v18, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v7, v17, v12
+; GISEL-NEXT:    v_mul_hi_u32 v18, v14, v12
+; GISEL-NEXT:    v_addc_u32_e64 v13, s[4:5], 0, 0, vcc
+; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v7, v11
 ; GISEL-NEXT:    v_mul_hi_u32 v12, v17, v12
-; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v11, v7
-; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v13, v11
+; GISEL-NEXT:    v_addc_u32_e64 v7, s[4:5], v7, v18, vcc
+; GISEL-NEXT:    v_addc_u32_e64 v11, s[4:5], 0, 0, s[4:5]
+; GISEL-NEXT:    v_addc_u32_e32 v7, vcc, v7, v13, vcc
+; GISEL-NEXT:    v_addc_u32_e32 v11, vcc, 0, v11, vcc
 ; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v12, v11
 ; GISEL-NEXT:    v_add_i32_e32 v14, vcc, v14, v7
 ; GISEL-NEXT:    v_addc_u32_e32 v17, vcc, v17, v11, vcc
@@ -1898,23 +1711,18 @@ define <2 x i64> @v_sdiv_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) {
 ; GISEL-NEXT:    v_mul_lo_u32 v15, v14, v12
 ; GISEL-NEXT:    v_xor_b32_e32 v16, v1, v7
 ; GISEL-NEXT:    v_mul_hi_u32 v1, v14, v11
-; GISEL-NEXT:    v_mul_hi_u32 v11, v17, v11
 ; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v15
-; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v1
-; GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
-; GISEL-NEXT:    v_mul_lo_u32 v1, v17, v12
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v15, v0
+; GISEL-NEXT:    v_addc_u32_e32 v0, vcc, v0, v1, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v0, v17, v12
+; GISEL-NEXT:    v_mul_hi_u32 v1, v17, v11
 ; GISEL-NEXT:    v_mul_hi_u32 v15, v14, v12
-; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v1, v11
-; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v1, v15
-; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v11, v15
+; GISEL-NEXT:    v_addc_u32_e64 v11, s[4:5], 0, 0, vcc
+; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v1
 ; GISEL-NEXT:    v_mul_hi_u32 v12, v17, v12
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v1, v0
-; GISEL-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v11, v1
+; GISEL-NEXT:    v_addc_u32_e64 v0, s[4:5], v0, v15, vcc
+; GISEL-NEXT:    v_addc_u32_e64 v1, s[4:5], 0, 0, s[4:5]
+; GISEL-NEXT:    v_addc_u32_e32 v0, vcc, v0, v11, vcc
+; GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
 ; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v12, v1
 ; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v14, v0
 ; GISEL-NEXT:    v_addc_u32_e32 v1, vcc, v17, v1, vcc
@@ -1922,28 +1730,22 @@ define <2 x i64> @v_sdiv_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) {
 ; GISEL-NEXT:    v_mul_lo_u32 v12, v13, v1
 ; GISEL-NEXT:    v_mul_hi_u32 v14, v13, v0
 ; GISEL-NEXT:    v_mul_hi_u32 v0, v16, v0
-; GISEL-NEXT:    v_mul_hi_u32 v15, v16, v1
 ; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v11, v12
-; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v11, v14
-; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
-; GISEL-NEXT:    v_mul_lo_u32 v14, v16, v1
-; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v12, v11
-; GISEL-NEXT:    v_mul_hi_u32 v12, v13, v1
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v14, v0
-; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v12
-; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v14, v12
-; GISEL-NEXT:    v_add_i32_e32 v14, vcc, v0, v11
+; GISEL-NEXT:    v_addc_u32_e32 v11, vcc, v11, v14, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v11, v16, v1
+; GISEL-NEXT:    v_mul_hi_u32 v14, v13, v1
+; GISEL-NEXT:    v_addc_u32_e64 v12, s[4:5], 0, 0, vcc
+; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v11, v0
+; GISEL-NEXT:    v_addc_u32_e64 v0, s[4:5], v0, v14, vcc
+; GISEL-NEXT:    v_addc_u32_e32 v14, vcc, v0, v12, vcc
+; GISEL-NEXT:    v_mul_hi_u32 v12, v16, v1
+; GISEL-NEXT:    v_addc_u32_e64 v11, s[4:5], 0, 0, s[4:5]
 ; GISEL-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v8, v14, 0
-; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v12, v11
-; GISEL-NEXT:    v_add_i32_e32 v15, vcc, v15, v11
+; GISEL-NEXT:    v_addc_u32_e32 v11, vcc, 0, v11, vcc
+; GISEL-NEXT:    v_add_i32_e32 v15, vcc, v12, v11
 ; GISEL-NEXT:    v_mad_u64_u32 v[11:12], s[4:5], v8, v15, v[1:2]
 ; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v13, v0
 ; GISEL-NEXT:    v_mad_u64_u32 v[11:12], s[4:5], v5, v14, v[11:12]
-; GISEL-NEXT:    v_xor_b32_e32 v7, v7, v4
 ; GISEL-NEXT:    v_subb_u32_e64 v1, s[4:5], v16, v11, vcc
 ; GISEL-NEXT:    v_sub_i32_e64 v6, s[4:5], v16, v11
 ; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v1, v5
@@ -1993,116 +1795,102 @@ define <2 x i64> @v_sdiv_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) {
 ; GISEL-NEXT:    v_cndmask_b32_e32 v8, v17, v8, vcc
 ; GISEL-NEXT:    v_mul_hi_u32 v0, v18, v0
 ; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v1, v12
-; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v1, v16
-; GISEL-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; GISEL-NEXT:    v_mul_lo_u32 v16, v18, v11
-; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v12, v1
-; GISEL-NEXT:    v_mul_hi_u32 v12, v19, v11
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v16, v0
-; GISEL-NEXT:    v_cndmask_b32_e64 v16, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v12
-; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v16, v12
+; GISEL-NEXT:    v_addc_u32_e32 v1, vcc, v1, v16, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v1, v18, v11
+; GISEL-NEXT:    v_mul_hi_u32 v16, v19, v11
+; GISEL-NEXT:    v_addc_u32_e64 v12, s[4:5], 0, 0, vcc
+; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v1, v0
 ; GISEL-NEXT:    v_mul_hi_u32 v11, v18, v11
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v1
-; GISEL-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v12, v1
+; GISEL-NEXT:    v_addc_u32_e64 v0, s[4:5], v0, v16, vcc
+; GISEL-NEXT:    v_addc_u32_e64 v1, s[4:5], 0, 0, s[4:5]
+; GISEL-NEXT:    v_addc_u32_e32 v0, vcc, v0, v12, vcc
+; GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
 ; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v11, v1
-; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v19, v0
-; GISEL-NEXT:    v_addc_u32_e32 v12, vcc, v18, v1, vcc
-; GISEL-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v20, v11, 0
+; GISEL-NEXT:    v_add_i32_e32 v16, vcc, v19, v0
+; GISEL-NEXT:    v_addc_u32_e32 v17, vcc, v18, v1, vcc
+; GISEL-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v20, v16, 0
 ; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v13
 ; GISEL-NEXT:    v_cndmask_b32_e32 v13, v14, v5, vcc
-; GISEL-NEXT:    v_mad_u64_u32 v[4:5], s[4:5], v20, v12, v[1:2]
-; GISEL-NEXT:    v_xor_b32_e32 v1, v13, v7
-; GISEL-NEXT:    v_ashrrev_i32_e32 v13, 31, v3
-; GISEL-NEXT:    v_mad_u64_u32 v[4:5], s[4:5], v21, v11, v[4:5]
+; GISEL-NEXT:    v_mad_u64_u32 v[11:12], s[4:5], v20, v17, v[1:2]
+; GISEL-NEXT:    v_xor_b32_e32 v1, v7, v4
+; GISEL-NEXT:    v_ashrrev_i32_e32 v7, 31, v3
+; GISEL-NEXT:    v_mad_u64_u32 v[4:5], s[4:5], v21, v16, v[11:12]
 ; GISEL-NEXT:    v_cndmask_b32_e32 v8, v15, v8, vcc
-; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v2, v13
-; GISEL-NEXT:    v_addc_u32_e32 v3, vcc, v3, v13, vcc
-; GISEL-NEXT:    v_xor_b32_e32 v5, v2, v13
-; GISEL-NEXT:    v_mul_lo_u32 v2, v12, v0
-; GISEL-NEXT:    v_mul_lo_u32 v14, v11, v4
-; GISEL-NEXT:    v_xor_b32_e32 v15, v3, v13
-; GISEL-NEXT:    v_mul_hi_u32 v3, v11, v0
-; GISEL-NEXT:    v_mul_hi_u32 v0, v12, v0
-; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v2, v14
-; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v2, v3
-; GISEL-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; GISEL-NEXT:    v_mul_lo_u32 v3, v12, v4
-; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v14, v2
-; GISEL-NEXT:    v_mul_hi_u32 v14, v11, v4
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v3, v0
-; GISEL-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v14
-; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v3, v14
-; GISEL-NEXT:    v_mul_hi_u32 v4, v12, v4
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
-; GISEL-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v3, v2
+; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v2, v7
+; GISEL-NEXT:    v_addc_u32_e32 v3, vcc, v3, v7, vcc
+; GISEL-NEXT:    v_xor_b32_e32 v5, v2, v7
+; GISEL-NEXT:    v_mul_lo_u32 v2, v17, v0
+; GISEL-NEXT:    v_mul_lo_u32 v11, v16, v4
+; GISEL-NEXT:    v_xor_b32_e32 v12, v3, v7
+; GISEL-NEXT:    v_mul_hi_u32 v3, v16, v0
+; GISEL-NEXT:    v_mul_hi_u32 v0, v17, v0
+; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v2, v11
+; GISEL-NEXT:    v_addc_u32_e32 v2, vcc, v2, v3, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v2, v17, v4
+; GISEL-NEXT:    v_mul_hi_u32 v11, v16, v4
+; GISEL-NEXT:    v_addc_u32_e64 v3, s[4:5], 0, 0, vcc
+; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v2, v0
+; GISEL-NEXT:    v_mul_hi_u32 v4, v17, v4
+; GISEL-NEXT:    v_addc_u32_e64 v0, s[4:5], v0, v11, vcc
+; GISEL-NEXT:    v_addc_u32_e64 v2, s[4:5], 0, 0, s[4:5]
+; GISEL-NEXT:    v_addc_u32_e32 v0, vcc, v0, v3, vcc
+; GISEL-NEXT:    v_addc_u32_e32 v2, vcc, 0, v2, vcc
 ; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v4, v2
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v11, v0
-; GISEL-NEXT:    v_addc_u32_e32 v2, vcc, v12, v2, vcc
-; GISEL-NEXT:    v_mul_lo_u32 v3, v15, v0
+; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v16, v0
+; GISEL-NEXT:    v_addc_u32_e32 v2, vcc, v17, v2, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v3, v12, v0
 ; GISEL-NEXT:    v_mul_lo_u32 v4, v5, v2
-; GISEL-NEXT:    v_mul_hi_u32 v11, v5, v0
-; GISEL-NEXT:    v_mul_hi_u32 v0, v15, v0
-; GISEL-NEXT:    v_mul_hi_u32 v12, v15, v2
+; GISEL-NEXT:    v_xor_b32_e32 v11, v13, v1
+; GISEL-NEXT:    v_mul_hi_u32 v13, v5, v0
+; GISEL-NEXT:    v_mul_hi_u32 v0, v12, v0
 ; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v3, v4
-; GISEL-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v3, v11
-; GISEL-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc
-; GISEL-NEXT:    v_mul_lo_u32 v11, v15, v2
-; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v4, v3
-; GISEL-NEXT:    v_mul_hi_u32 v4, v5, v2
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v11, v0
-; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v4
-; GISEL-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v11, v4
-; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v0, v3
-; GISEL-NEXT:    v_mad_u64_u32 v[2:3], s[4:5], v10, v11, 0
-; GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v4, v0
-; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v12, v0
+; GISEL-NEXT:    v_addc_u32_e32 v3, vcc, v3, v13, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v3, v12, v2
+; GISEL-NEXT:    v_mul_hi_u32 v13, v5, v2
+; GISEL-NEXT:    v_addc_u32_e64 v4, s[4:5], 0, 0, vcc
+; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v3, v0
+; GISEL-NEXT:    v_addc_u32_e64 v0, s[4:5], v0, v13, vcc
+; GISEL-NEXT:    v_addc_u32_e64 v3, s[4:5], 0, 0, s[4:5]
+; GISEL-NEXT:    v_addc_u32_e32 v13, vcc, v0, v4, vcc
+; GISEL-NEXT:    v_addc_u32_e32 v0, vcc, 0, v3, vcc
+; GISEL-NEXT:    v_mul_hi_u32 v4, v12, v2
+; GISEL-NEXT:    v_mad_u64_u32 v[2:3], s[4:5], v10, v13, 0
+; GISEL-NEXT:    v_xor_b32_e32 v8, v8, v1
+; GISEL-NEXT:    v_add_i32_e32 v14, vcc, v4, v0
 ; GISEL-NEXT:    v_mov_b32_e32 v0, v3
-; GISEL-NEXT:    v_mad_u64_u32 v[3:4], s[4:5], v10, v12, v[0:1]
-; GISEL-NEXT:    v_xor_b32_e32 v8, v8, v7
-; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v1, v7
-; GISEL-NEXT:    v_mad_u64_u32 v[3:4], s[4:5], v9, v11, v[3:4]
-; GISEL-NEXT:    v_subb_u32_e32 v1, vcc, v8, v7, vcc
+; GISEL-NEXT:    v_mad_u64_u32 v[3:4], s[4:5], v10, v14, v[0:1]
+; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v11, v1
+; GISEL-NEXT:    v_mad_u64_u32 v[3:4], s[4:5], v9, v13, v[3:4]
+; GISEL-NEXT:    v_subb_u32_e32 v1, vcc, v8, v1, vcc
 ; GISEL-NEXT:    v_sub_i32_e32 v2, vcc, v5, v2
-; GISEL-NEXT:    v_subb_u32_e64 v4, s[4:5], v15, v3, vcc
-; GISEL-NEXT:    v_sub_i32_e64 v3, s[4:5], v15, v3
+; GISEL-NEXT:    v_subb_u32_e64 v4, s[4:5], v12, v3, vcc
+; GISEL-NEXT:    v_sub_i32_e64 v3, s[4:5], v12, v3
 ; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v4, v9
 ; GISEL-NEXT:    v_subb_u32_e32 v3, vcc, v3, v9, vcc
 ; GISEL-NEXT:    v_cndmask_b32_e64 v5, 0, -1, s[4:5]
 ; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v2, v10
 ; GISEL-NEXT:    v_sub_i32_e32 v2, vcc, v2, v10
-; GISEL-NEXT:    v_cndmask_b32_e64 v7, 0, -1, s[4:5]
+; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, -1, s[4:5]
 ; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v4, v9
 ; GISEL-NEXT:    v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
-; GISEL-NEXT:    v_cndmask_b32_e64 v4, v5, v7, s[4:5]
-; GISEL-NEXT:    v_add_i32_e32 v5, vcc, 1, v11
-; GISEL-NEXT:    v_addc_u32_e32 v7, vcc, 0, v12, vcc
+; GISEL-NEXT:    v_cndmask_b32_e64 v4, v5, v8, s[4:5]
+; GISEL-NEXT:    v_add_i32_e32 v5, vcc, 1, v13
+; GISEL-NEXT:    v_addc_u32_e32 v8, vcc, 0, v14, vcc
 ; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v3, v9
-; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, -1, vcc
+; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, -1, vcc
 ; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v2, v10
 ; GISEL-NEXT:    v_cndmask_b32_e64 v2, 0, -1, vcc
 ; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v9
-; GISEL-NEXT:    v_cndmask_b32_e32 v2, v8, v2, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v2, v11, v2, vcc
 ; GISEL-NEXT:    v_add_i32_e32 v3, vcc, 1, v5
-; GISEL-NEXT:    v_addc_u32_e32 v8, vcc, 0, v7, vcc
+; GISEL-NEXT:    v_addc_u32_e32 v9, vcc, 0, v8, vcc
 ; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v2
 ; GISEL-NEXT:    v_cndmask_b32_e32 v2, v5, v3, vcc
-; GISEL-NEXT:    v_cndmask_b32_e32 v3, v7, v8, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v3, v8, v9, vcc
 ; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v4
-; GISEL-NEXT:    v_cndmask_b32_e32 v2, v11, v2, vcc
-; GISEL-NEXT:    v_xor_b32_e32 v4, v13, v6
-; GISEL-NEXT:    v_cndmask_b32_e32 v3, v12, v3, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v2, v13, v2, vcc
+; GISEL-NEXT:    v_xor_b32_e32 v4, v7, v6
+; GISEL-NEXT:    v_cndmask_b32_e32 v3, v14, v3, vcc
 ; GISEL-NEXT:    v_xor_b32_e32 v2, v2, v4
 ; GISEL-NEXT:    v_xor_b32_e32 v3, v3, v4
 ; GISEL-NEXT:    v_sub_i32_e32 v2, vcc, v2, v4
@@ -2129,131 +1917,119 @@ define <2 x i64> @v_sdiv_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) {
 ; CGP-NEXT:  ; %bb.1:
 ; CGP-NEXT:    v_ashrrev_i32_e32 v0, 31, v12
 ; CGP-NEXT:    v_add_i32_e32 v1, vcc, v11, v0
-; CGP-NEXT:    v_addc_u32_e32 v10, vcc, v12, v0, vcc
-; CGP-NEXT:    v_xor_b32_e32 v4, v1, v0
-; CGP-NEXT:    v_xor_b32_e32 v1, v10, v0
+; CGP-NEXT:    v_addc_u32_e32 v4, vcc, v12, v0, vcc
+; CGP-NEXT:    v_xor_b32_e32 v13, v1, v0
+; CGP-NEXT:    v_xor_b32_e32 v4, v4, v0
+; CGP-NEXT:    v_cvt_f32_u32_e32 v1, v13
 ; CGP-NEXT:    v_cvt_f32_u32_e32 v10, v4
-; CGP-NEXT:    v_cvt_f32_u32_e32 v11, v1
-; CGP-NEXT:    v_sub_i32_e32 v14, vcc, 0, v4
-; CGP-NEXT:    v_subb_u32_e32 v15, vcc, 0, v1, vcc
-; CGP-NEXT:    v_mac_f32_e32 v10, 0x4f800000, v11
-; CGP-NEXT:    v_rcp_iflag_f32_e32 v10, v10
-; CGP-NEXT:    v_mul_f32_e32 v10, 0x5f7ffffc, v10
-; CGP-NEXT:    v_mul_f32_e32 v11, 0x2f800000, v10
-; CGP-NEXT:    v_trunc_f32_e32 v12, v11
-; CGP-NEXT:    v_mac_f32_e32 v10, 0xcf800000, v12
-; CGP-NEXT:    v_cvt_u32_f32_e32 v13, v10
-; CGP-NEXT:    v_cvt_u32_f32_e32 v16, v12
-; CGP-NEXT:    v_mad_u64_u32 v[10:11], s[4:5], v14, v13, 0
-; CGP-NEXT:    v_mad_u64_u32 v[11:12], s[4:5], v14, v16, v[11:12]
-; CGP-NEXT:    v_mul_hi_u32 v17, v13, v10
-; CGP-NEXT:    v_mad_u64_u32 v[11:12], s[4:5], v15, v13, v[11:12]
-; CGP-NEXT:    v_mul_lo_u32 v12, v16, v10
-; CGP-NEXT:    v_mul_hi_u32 v10, v16, v10
-; CGP-NEXT:    v_mul_lo_u32 v18, v13, v11
-; CGP-NEXT:    v_mul_lo_u32 v19, v16, v11
-; CGP-NEXT:    v_add_i32_e32 v12, vcc, v12, v18
-; CGP-NEXT:    v_cndmask_b32_e64 v18, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v12, vcc, v12, v17
-; CGP-NEXT:    v_mul_hi_u32 v17, v13, v11
-; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v12, vcc, v18, v12
+; CGP-NEXT:    v_sub_i32_e32 v15, vcc, 0, v13
+; CGP-NEXT:    v_subb_u32_e32 v16, vcc, 0, v4, vcc
+; CGP-NEXT:    v_mac_f32_e32 v1, 0x4f800000, v10
+; CGP-NEXT:    v_rcp_iflag_f32_e32 v1, v1
+; CGP-NEXT:    v_mul_f32_e32 v1, 0x5f7ffffc, v1
+; CGP-NEXT:    v_mul_f32_e32 v10, 0x2f800000, v1
+; CGP-NEXT:    v_trunc_f32_e32 v12, v10
+; CGP-NEXT:    v_mac_f32_e32 v1, 0xcf800000, v12
+; CGP-NEXT:    v_cvt_u32_f32_e32 v14, v1
+; CGP-NEXT:    v_cvt_u32_f32_e32 v17, v12
+; CGP-NEXT:    v_mad_u64_u32 v[10:11], s[4:5], v15, v14, 0
+; CGP-NEXT:    v_mov_b32_e32 v1, v11
+; CGP-NEXT:    v_mad_u64_u32 v[11:12], s[4:5], v15, v17, v[1:2]
+; CGP-NEXT:    v_mul_lo_u32 v1, v17, v10
+; CGP-NEXT:    v_mad_u64_u32 v[11:12], s[4:5], v16, v14, v[11:12]
+; CGP-NEXT:    v_mul_hi_u32 v12, v14, v10
+; CGP-NEXT:    v_mul_hi_u32 v10, v17, v10
+; CGP-NEXT:    v_mul_lo_u32 v18, v14, v11
+; CGP-NEXT:    v_mul_lo_u32 v19, v17, v11
+; CGP-NEXT:    v_add_i32_e32 v1, vcc, v1, v18
+; CGP-NEXT:    v_addc_u32_e32 v1, vcc, v1, v12, vcc
+; CGP-NEXT:    v_mul_hi_u32 v12, v14, v11
+; CGP-NEXT:    v_addc_u32_e64 v1, s[4:5], 0, 0, vcc
 ; CGP-NEXT:    v_add_i32_e32 v10, vcc, v19, v10
-; CGP-NEXT:    v_cndmask_b32_e64 v18, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v10, vcc, v10, v17
-; CGP-NEXT:    v_cndmask_b32_e64 v17, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v17, vcc, v18, v17
-; CGP-NEXT:    v_mul_hi_u32 v11, v16, v11
-; CGP-NEXT:    v_add_i32_e32 v10, vcc, v10, v12
-; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v12, vcc, v17, v12
-; CGP-NEXT:    v_add_i32_e32 v11, vcc, v11, v12
-; CGP-NEXT:    v_add_i32_e32 v13, vcc, v13, v10
-; CGP-NEXT:    v_addc_u32_e32 v16, vcc, v16, v11, vcc
-; CGP-NEXT:    v_mad_u64_u32 v[10:11], s[4:5], v14, v13, 0
-; CGP-NEXT:    v_mad_u64_u32 v[11:12], s[4:5], v14, v16, v[11:12]
-; CGP-NEXT:    v_ashrrev_i32_e32 v14, 31, v9
-; CGP-NEXT:    v_add_i32_e32 v8, vcc, v8, v14
-; CGP-NEXT:    v_mad_u64_u32 v[11:12], s[4:5], v15, v13, v[11:12]
-; CGP-NEXT:    v_addc_u32_e32 v9, vcc, v9, v14, vcc
-; CGP-NEXT:    v_xor_b32_e32 v12, v8, v14
-; CGP-NEXT:    v_mul_lo_u32 v8, v16, v10
-; CGP-NEXT:    v_mul_lo_u32 v15, v13, v11
-; CGP-NEXT:    v_xor_b32_e32 v17, v9, v14
-; CGP-NEXT:    v_mul_hi_u32 v9, v13, v10
-; CGP-NEXT:    v_mul_hi_u32 v10, v16, v10
-; CGP-NEXT:    v_add_i32_e32 v8, vcc, v8, v15
-; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v8, vcc, v8, v9
-; CGP-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
-; CGP-NEXT:    v_mul_lo_u32 v9, v16, v11
-; CGP-NEXT:    v_add_i32_e32 v8, vcc, v15, v8
-; CGP-NEXT:    v_mul_hi_u32 v15, v13, v11
-; CGP-NEXT:    v_add_i32_e32 v9, vcc, v9, v10
-; CGP-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v9, vcc, v9, v15
-; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v10, vcc, v10, v15
-; CGP-NEXT:    v_mul_hi_u32 v11, v16, v11
-; CGP-NEXT:    v_add_i32_e32 v8, vcc, v9, v8
-; CGP-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v9, vcc, v10, v9
-; CGP-NEXT:    v_add_i32_e32 v9, vcc, v11, v9
-; CGP-NEXT:    v_add_i32_e32 v8, vcc, v13, v8
-; CGP-NEXT:    v_addc_u32_e32 v9, vcc, v16, v9, vcc
-; CGP-NEXT:    v_mul_lo_u32 v10, v17, v8
-; CGP-NEXT:    v_mul_lo_u32 v11, v12, v9
-; CGP-NEXT:    v_mul_hi_u32 v13, v12, v8
-; CGP-NEXT:    v_mul_hi_u32 v8, v17, v8
-; CGP-NEXT:    v_mul_hi_u32 v15, v17, v9
-; CGP-NEXT:    v_add_i32_e32 v10, vcc, v10, v11
-; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v10, vcc, v10, v13
-; CGP-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
-; CGP-NEXT:    v_mul_lo_u32 v13, v17, v9
-; CGP-NEXT:    v_add_i32_e32 v10, vcc, v11, v10
-; CGP-NEXT:    v_mul_hi_u32 v11, v12, v9
-; CGP-NEXT:    v_add_i32_e32 v8, vcc, v13, v8
-; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v8, vcc, v8, v11
-; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v11, vcc, v13, v11
-; CGP-NEXT:    v_add_i32_e32 v13, vcc, v8, v10
-; CGP-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], v4, v13, 0
-; CGP-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
+; CGP-NEXT:    v_mul_hi_u32 v11, v17, v11
+; CGP-NEXT:    v_addc_u32_e64 v10, s[4:5], v10, v12, vcc
+; CGP-NEXT:    v_addc_u32_e64 v12, s[4:5], 0, 0, s[4:5]
+; CGP-NEXT:    v_addc_u32_e32 v1, vcc, v10, v1, vcc
+; CGP-NEXT:    v_addc_u32_e32 v10, vcc, 0, v12, vcc
 ; CGP-NEXT:    v_add_i32_e32 v10, vcc, v11, v10
-; CGP-NEXT:    v_add_i32_e32 v11, vcc, v15, v10
+; CGP-NEXT:    v_add_i32_e32 v14, vcc, v14, v1
+; CGP-NEXT:    v_addc_u32_e32 v17, vcc, v17, v10, vcc
+; CGP-NEXT:    v_mad_u64_u32 v[10:11], s[4:5], v15, v14, 0
+; CGP-NEXT:    v_mov_b32_e32 v1, v11
+; CGP-NEXT:    v_mad_u64_u32 v[11:12], s[4:5], v15, v17, v[1:2]
+; CGP-NEXT:    v_ashrrev_i32_e32 v15, 31, v9
+; CGP-NEXT:    v_add_i32_e32 v1, vcc, v8, v15
+; CGP-NEXT:    v_mad_u64_u32 v[11:12], s[4:5], v16, v14, v[11:12]
+; CGP-NEXT:    v_addc_u32_e32 v8, vcc, v9, v15, vcc
+; CGP-NEXT:    v_xor_b32_e32 v12, v1, v15
+; CGP-NEXT:    v_mul_lo_u32 v1, v17, v10
+; CGP-NEXT:    v_mul_lo_u32 v9, v14, v11
+; CGP-NEXT:    v_xor_b32_e32 v16, v8, v15
+; CGP-NEXT:    v_mul_hi_u32 v8, v14, v10
+; CGP-NEXT:    v_add_i32_e32 v1, vcc, v1, v9
+; CGP-NEXT:    v_addc_u32_e32 v1, vcc, v1, v8, vcc
+; CGP-NEXT:    v_mul_lo_u32 v1, v17, v11
+; CGP-NEXT:    v_mul_hi_u32 v8, v17, v10
+; CGP-NEXT:    v_mul_hi_u32 v10, v14, v11
+; CGP-NEXT:    v_addc_u32_e64 v9, s[4:5], 0, 0, vcc
+; CGP-NEXT:    v_add_i32_e32 v1, vcc, v1, v8
+; CGP-NEXT:    v_addc_u32_e64 v1, s[4:5], v1, v10, vcc
+; CGP-NEXT:    v_mul_hi_u32 v10, v17, v11
+; CGP-NEXT:    v_addc_u32_e64 v8, s[4:5], 0, 0, s[4:5]
+; CGP-NEXT:    v_addc_u32_e32 v1, vcc, v1, v9, vcc
+; CGP-NEXT:    v_addc_u32_e32 v8, vcc, 0, v8, vcc
+; CGP-NEXT:    v_add_i32_e32 v8, vcc, v10, v8
+; CGP-NEXT:    v_add_i32_e32 v1, vcc, v14, v1
+; CGP-NEXT:    v_addc_u32_e32 v8, vcc, v17, v8, vcc
+; CGP-NEXT:    v_mul_lo_u32 v9, v16, v1
+; CGP-NEXT:    v_mul_lo_u32 v10, v12, v8
+; CGP-NEXT:    v_mul_hi_u32 v11, v12, v1
+; CGP-NEXT:    v_mul_hi_u32 v1, v16, v1
+; CGP-NEXT:    v_add_i32_e32 v9, vcc, v9, v10
+; CGP-NEXT:    v_addc_u32_e32 v9, vcc, v9, v11, vcc
+; CGP-NEXT:    v_mul_lo_u32 v9, v16, v8
+; CGP-NEXT:    v_mul_hi_u32 v11, v12, v8
+; CGP-NEXT:    v_addc_u32_e64 v10, s[4:5], 0, 0, vcc
+; CGP-NEXT:    v_add_i32_e32 v1, vcc, v9, v1
+; CGP-NEXT:    v_addc_u32_e64 v1, s[4:5], v1, v11, vcc
+; CGP-NEXT:    v_addc_u32_e64 v9, s[4:5], 0, 0, s[4:5]
+; CGP-NEXT:    v_addc_u32_e32 v11, vcc, v1, v10, vcc
+; CGP-NEXT:    v_addc_u32_e32 v1, vcc, 0, v9, vcc
+; CGP-NEXT:    v_mul_hi_u32 v10, v16, v8
+; CGP-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], v13, v11, 0
+; CGP-NEXT:    v_add_i32_e32 v14, vcc, v10, v1
+; CGP-NEXT:    v_mov_b32_e32 v1, v9
+; CGP-NEXT:    v_mad_u64_u32 v[9:10], s[4:5], v13, v14, v[1:2]
+; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v12, v8
 ; CGP-NEXT:    v_mad_u64_u32 v[9:10], s[4:5], v4, v11, v[9:10]
-; CGP-NEXT:    v_sub_i32_e32 v8, vcc, v12, v8
-; CGP-NEXT:    v_mad_u64_u32 v[9:10], s[4:5], v1, v13, v[9:10]
-; CGP-NEXT:    v_subb_u32_e64 v10, s[4:5], v17, v9, vcc
-; CGP-NEXT:    v_sub_i32_e64 v9, s[4:5], v17, v9
-; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v10, v1
-; CGP-NEXT:    v_subb_u32_e32 v9, vcc, v9, v1, vcc
-; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, -1, s[4:5]
+; CGP-NEXT:    v_subb_u32_e64 v8, s[4:5], v16, v9, vcc
+; CGP-NEXT:    v_sub_i32_e64 v9, s[4:5], v16, v9
 ; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v8, v4
-; CGP-NEXT:    v_sub_i32_e32 v8, vcc, v8, v4
-; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, -1, s[4:5]
-; CGP-NEXT:    v_cmp_eq_u32_e64 s[4:5], v10, v1
+; CGP-NEXT:    v_subb_u32_e32 v9, vcc, v9, v4, vcc
+; CGP-NEXT:    v_cndmask_b32_e64 v10, 0, -1, s[4:5]
+; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v1, v13
+; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v1, v13
+; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, -1, s[4:5]
+; CGP-NEXT:    v_cmp_eq_u32_e64 s[4:5], v8, v4
 ; CGP-NEXT:    v_subbrev_u32_e32 v9, vcc, 0, v9, vcc
-; CGP-NEXT:    v_cndmask_b32_e64 v10, v12, v15, s[4:5]
-; CGP-NEXT:    v_add_i32_e32 v12, vcc, 1, v13
-; CGP-NEXT:    v_addc_u32_e32 v15, vcc, 0, v11, vcc
-; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v9, v1
+; CGP-NEXT:    v_cndmask_b32_e64 v8, v10, v12, s[4:5]
+; CGP-NEXT:    v_add_i32_e32 v10, vcc, 1, v11
+; CGP-NEXT:    v_addc_u32_e32 v12, vcc, 0, v14, vcc
+; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v9, v4
 ; CGP-NEXT:    v_cndmask_b32_e64 v16, 0, -1, vcc
-; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v8, v4
-; CGP-NEXT:    v_cndmask_b32_e64 v4, 0, -1, vcc
-; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, v9, v1
-; CGP-NEXT:    v_cndmask_b32_e32 v1, v16, v4, vcc
-; CGP-NEXT:    v_add_i32_e32 v4, vcc, 1, v12
-; CGP-NEXT:    v_addc_u32_e32 v8, vcc, 0, v15, vcc
+; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v13
+; CGP-NEXT:    v_cndmask_b32_e64 v1, 0, -1, vcc
+; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, v9, v4
+; CGP-NEXT:    v_cndmask_b32_e32 v1, v16, v1, vcc
+; CGP-NEXT:    v_add_i32_e32 v4, vcc, 1, v10
+; CGP-NEXT:    v_addc_u32_e32 v9, vcc, 0, v12, vcc
 ; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v1
-; CGP-NEXT:    v_cndmask_b32_e32 v1, v12, v4, vcc
-; CGP-NEXT:    v_cndmask_b32_e32 v4, v15, v8, vcc
-; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v10
-; CGP-NEXT:    v_cndmask_b32_e32 v1, v13, v1, vcc
-; CGP-NEXT:    v_xor_b32_e32 v8, v14, v0
-; CGP-NEXT:    v_cndmask_b32_e32 v4, v11, v4, vcc
+; CGP-NEXT:    v_cndmask_b32_e32 v1, v10, v4, vcc
+; CGP-NEXT:    v_cndmask_b32_e32 v4, v12, v9, vcc
+; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v8
+; CGP-NEXT:    v_cndmask_b32_e32 v1, v11, v1, vcc
+; CGP-NEXT:    v_xor_b32_e32 v8, v15, v0
+; CGP-NEXT:    v_cndmask_b32_e32 v4, v14, v4, vcc
 ; CGP-NEXT:    v_xor_b32_e32 v0, v1, v8
 ; CGP-NEXT:    v_xor_b32_e32 v1, v4, v8
 ; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v0, v8
@@ -2304,133 +2080,116 @@ define <2 x i64> @v_sdiv_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) {
 ; CGP-NEXT:  .LBB8_7:
 ; CGP-NEXT:    v_ashrrev_i32_e32 v2, 31, v10
 ; CGP-NEXT:    v_add_i32_e32 v3, vcc, v9, v2
-; CGP-NEXT:    v_addc_u32_e32 v6, vcc, v10, v2, vcc
-; CGP-NEXT:    v_xor_b32_e32 v4, v3, v2
-; CGP-NEXT:    v_xor_b32_e32 v3, v6, v2
-; CGP-NEXT:    v_cvt_f32_u32_e32 v6, v4
-; CGP-NEXT:    v_cvt_f32_u32_e32 v8, v3
-; CGP-NEXT:    v_sub_i32_e32 v12, vcc, 0, v4
-; CGP-NEXT:    v_subb_u32_e32 v13, vcc, 0, v3, vcc
-; CGP-NEXT:    v_mac_f32_e32 v6, 0x4f800000, v8
-; CGP-NEXT:    v_rcp_iflag_f32_e32 v6, v6
-; CGP-NEXT:    v_mul_f32_e32 v6, 0x5f7ffffc, v6
-; CGP-NEXT:    v_mul_f32_e32 v8, 0x2f800000, v6
-; CGP-NEXT:    v_trunc_f32_e32 v10, v8
-; CGP-NEXT:    v_mac_f32_e32 v6, 0xcf800000, v10
-; CGP-NEXT:    v_cvt_u32_f32_e32 v11, v6
-; CGP-NEXT:    v_cvt_u32_f32_e32 v14, v10
-; CGP-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], v12, v11, 0
-; CGP-NEXT:    v_mov_b32_e32 v6, v9
-; CGP-NEXT:    v_mad_u64_u32 v[9:10], s[4:5], v12, v14, v[6:7]
-; CGP-NEXT:    v_mul_lo_u32 v6, v14, v8
-; CGP-NEXT:    v_mad_u64_u32 v[9:10], s[4:5], v13, v11, v[9:10]
-; CGP-NEXT:    v_mul_hi_u32 v10, v11, v8
+; CGP-NEXT:    v_addc_u32_e32 v4, vcc, v10, v2, vcc
+; CGP-NEXT:    v_xor_b32_e32 v6, v3, v2
+; CGP-NEXT:    v_xor_b32_e32 v10, v4, v2
+; CGP-NEXT:    v_cvt_f32_u32_e32 v3, v6
+; CGP-NEXT:    v_cvt_f32_u32_e32 v4, v10
+; CGP-NEXT:    v_sub_i32_e32 v12, vcc, 0, v6
+; CGP-NEXT:    v_subb_u32_e32 v13, vcc, 0, v10, vcc
+; CGP-NEXT:    v_mac_f32_e32 v3, 0x4f800000, v4
+; CGP-NEXT:    v_rcp_iflag_f32_e32 v3, v3
+; CGP-NEXT:    v_mul_f32_e32 v3, 0x5f7ffffc, v3
+; CGP-NEXT:    v_mul_f32_e32 v4, 0x2f800000, v3
+; CGP-NEXT:    v_trunc_f32_e32 v8, v4
+; CGP-NEXT:    v_mac_f32_e32 v3, 0xcf800000, v8
+; CGP-NEXT:    v_cvt_u32_f32_e32 v11, v3
+; CGP-NEXT:    v_cvt_u32_f32_e32 v14, v8
+; CGP-NEXT:    v_mad_u64_u32 v[3:4], s[4:5], v12, v11, 0
+; CGP-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], v12, v14, v[4:5]
+; CGP-NEXT:    v_mul_lo_u32 v4, v14, v3
+; CGP-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], v13, v11, v[8:9]
+; CGP-NEXT:    v_mul_hi_u32 v9, v11, v3
+; CGP-NEXT:    v_mul_hi_u32 v3, v14, v3
+; CGP-NEXT:    v_mul_lo_u32 v15, v11, v8
+; CGP-NEXT:    v_mul_lo_u32 v16, v14, v8
+; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v15
+; CGP-NEXT:    v_addc_u32_e32 v4, vcc, v4, v9, vcc
+; CGP-NEXT:    v_mul_hi_u32 v9, v11, v8
+; CGP-NEXT:    v_addc_u32_e64 v4, s[4:5], 0, 0, vcc
+; CGP-NEXT:    v_add_i32_e32 v3, vcc, v16, v3
 ; CGP-NEXT:    v_mul_hi_u32 v8, v14, v8
-; CGP-NEXT:    v_mul_lo_u32 v15, v11, v9
-; CGP-NEXT:    v_mul_lo_u32 v16, v14, v9
-; CGP-NEXT:    v_add_i32_e32 v6, vcc, v6, v15
-; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v6, vcc, v6, v10
-; CGP-NEXT:    v_mul_hi_u32 v10, v11, v9
-; CGP-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v6, vcc, v15, v6
-; CGP-NEXT:    v_add_i32_e32 v8, vcc, v16, v8
-; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v8, vcc, v8, v10
-; CGP-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v10, vcc, v15, v10
-; CGP-NEXT:    v_mul_hi_u32 v9, v14, v9
-; CGP-NEXT:    v_add_i32_e32 v6, vcc, v8, v6
-; CGP-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v8, vcc, v10, v8
-; CGP-NEXT:    v_add_i32_e32 v8, vcc, v9, v8
-; CGP-NEXT:    v_add_i32_e32 v11, vcc, v11, v6
-; CGP-NEXT:    v_addc_u32_e32 v14, vcc, v14, v8, vcc
-; CGP-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], v12, v11, 0
-; CGP-NEXT:    v_mov_b32_e32 v6, v9
-; CGP-NEXT:    v_mad_u64_u32 v[9:10], s[4:5], v12, v14, v[6:7]
+; CGP-NEXT:    v_addc_u32_e64 v3, s[4:5], v3, v9, vcc
+; CGP-NEXT:    v_addc_u32_e64 v9, s[4:5], 0, 0, s[4:5]
+; CGP-NEXT:    v_addc_u32_e32 v3, vcc, v3, v4, vcc
+; CGP-NEXT:    v_addc_u32_e32 v4, vcc, 0, v9, vcc
+; CGP-NEXT:    v_add_i32_e32 v4, vcc, v8, v4
+; CGP-NEXT:    v_add_i32_e32 v11, vcc, v11, v3
+; CGP-NEXT:    v_addc_u32_e32 v14, vcc, v14, v4, vcc
+; CGP-NEXT:    v_mad_u64_u32 v[3:4], s[4:5], v12, v11, 0
+; CGP-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], v12, v14, v[4:5]
 ; CGP-NEXT:    v_ashrrev_i32_e32 v12, 31, v7
-; CGP-NEXT:    v_add_i32_e32 v5, vcc, v5, v12
-; CGP-NEXT:    v_mad_u64_u32 v[9:10], s[4:5], v13, v11, v[9:10]
-; CGP-NEXT:    v_addc_u32_e32 v6, vcc, v7, v12, vcc
-; CGP-NEXT:    v_xor_b32_e32 v10, v5, v12
-; CGP-NEXT:    v_mul_lo_u32 v5, v14, v8
-; CGP-NEXT:    v_mul_lo_u32 v7, v11, v9
-; CGP-NEXT:    v_xor_b32_e32 v13, v6, v12
-; CGP-NEXT:    v_mul_hi_u32 v6, v11, v8
+; CGP-NEXT:    v_add_i32_e32 v4, vcc, v5, v12
+; CGP-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], v13, v11, v[8:9]
+; CGP-NEXT:    v_addc_u32_e32 v5, vcc, v7, v12, vcc
+; CGP-NEXT:    v_xor_b32_e32 v7, v4, v12
+; CGP-NEXT:    v_mul_lo_u32 v4, v14, v3
+; CGP-NEXT:    v_mul_lo_u32 v9, v11, v8
+; CGP-NEXT:    v_xor_b32_e32 v13, v5, v12
+; CGP-NEXT:    v_mul_hi_u32 v5, v11, v3
+; CGP-NEXT:    v_mul_hi_u32 v3, v14, v3
+; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v9
+; CGP-NEXT:    v_addc_u32_e32 v4, vcc, v4, v5, vcc
+; CGP-NEXT:    v_mul_lo_u32 v4, v14, v8
+; CGP-NEXT:    v_mul_hi_u32 v9, v11, v8
+; CGP-NEXT:    v_addc_u32_e64 v5, s[4:5], 0, 0, vcc
+; CGP-NEXT:    v_add_i32_e32 v3, vcc, v4, v3
 ; CGP-NEXT:    v_mul_hi_u32 v8, v14, v8
-; CGP-NEXT:    v_add_i32_e32 v5, vcc, v5, v7
-; CGP-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v5, vcc, v5, v6
-; CGP-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
-; CGP-NEXT:    v_mul_lo_u32 v6, v14, v9
-; CGP-NEXT:    v_add_i32_e32 v5, vcc, v7, v5
-; CGP-NEXT:    v_mul_hi_u32 v7, v11, v9
-; CGP-NEXT:    v_add_i32_e32 v6, vcc, v6, v8
-; CGP-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v6, vcc, v6, v7
-; CGP-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v7, vcc, v8, v7
-; CGP-NEXT:    v_mul_hi_u32 v8, v14, v9
-; CGP-NEXT:    v_add_i32_e32 v5, vcc, v6, v5
-; CGP-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v6, vcc, v7, v6
-; CGP-NEXT:    v_add_i32_e32 v6, vcc, v8, v6
-; CGP-NEXT:    v_add_i32_e32 v5, vcc, v11, v5
-; CGP-NEXT:    v_addc_u32_e32 v6, vcc, v14, v6, vcc
-; CGP-NEXT:    v_mul_lo_u32 v7, v13, v5
-; CGP-NEXT:    v_mul_lo_u32 v8, v10, v6
-; CGP-NEXT:    v_mul_hi_u32 v9, v10, v5
-; CGP-NEXT:    v_mul_hi_u32 v5, v13, v5
-; CGP-NEXT:    v_mul_hi_u32 v11, v13, v6
-; CGP-NEXT:    v_add_i32_e32 v7, vcc, v7, v8
-; CGP-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v7, vcc, v7, v9
-; CGP-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
-; CGP-NEXT:    v_mul_lo_u32 v9, v13, v6
-; CGP-NEXT:    v_add_i32_e32 v7, vcc, v8, v7
-; CGP-NEXT:    v_mul_hi_u32 v8, v10, v6
-; CGP-NEXT:    v_add_i32_e32 v5, vcc, v9, v5
-; CGP-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
+; CGP-NEXT:    v_addc_u32_e64 v3, s[4:5], v3, v9, vcc
+; CGP-NEXT:    v_addc_u32_e64 v4, s[4:5], 0, 0, s[4:5]
+; CGP-NEXT:    v_addc_u32_e32 v3, vcc, v3, v5, vcc
+; CGP-NEXT:    v_addc_u32_e32 v4, vcc, 0, v4, vcc
+; CGP-NEXT:    v_add_i32_e32 v4, vcc, v8, v4
+; CGP-NEXT:    v_add_i32_e32 v3, vcc, v11, v3
+; CGP-NEXT:    v_addc_u32_e32 v4, vcc, v14, v4, vcc
+; CGP-NEXT:    v_mul_lo_u32 v5, v13, v3
+; CGP-NEXT:    v_mul_lo_u32 v8, v7, v4
+; CGP-NEXT:    v_mul_hi_u32 v9, v7, v3
+; CGP-NEXT:    v_mul_hi_u32 v3, v13, v3
 ; CGP-NEXT:    v_add_i32_e32 v5, vcc, v5, v8
-; CGP-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v8, vcc, v9, v8
-; CGP-NEXT:    v_add_i32_e32 v9, vcc, v5, v7
-; CGP-NEXT:    v_mad_u64_u32 v[5:6], s[4:5], v4, v9, 0
-; CGP-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v7, vcc, v8, v7
-; CGP-NEXT:    v_add_i32_e32 v8, vcc, v11, v7
-; CGP-NEXT:    v_mad_u64_u32 v[6:7], s[4:5], v4, v8, v[6:7]
-; CGP-NEXT:    v_sub_i32_e32 v5, vcc, v10, v5
-; CGP-NEXT:    v_mad_u64_u32 v[6:7], s[4:5], v3, v9, v[6:7]
-; CGP-NEXT:    v_subb_u32_e64 v7, s[4:5], v13, v6, vcc
-; CGP-NEXT:    v_sub_i32_e64 v6, s[4:5], v13, v6
-; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v7, v3
-; CGP-NEXT:    v_subb_u32_e32 v6, vcc, v6, v3, vcc
-; CGP-NEXT:    v_cndmask_b32_e64 v10, 0, -1, s[4:5]
-; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v5, v4
-; CGP-NEXT:    v_sub_i32_e32 v5, vcc, v5, v4
+; CGP-NEXT:    v_addc_u32_e32 v5, vcc, v5, v9, vcc
+; CGP-NEXT:    v_mul_lo_u32 v5, v13, v4
+; CGP-NEXT:    v_mul_hi_u32 v9, v7, v4
+; CGP-NEXT:    v_addc_u32_e64 v8, s[4:5], 0, 0, vcc
+; CGP-NEXT:    v_add_i32_e32 v3, vcc, v5, v3
+; CGP-NEXT:    v_addc_u32_e64 v3, s[4:5], v3, v9, vcc
+; CGP-NEXT:    v_addc_u32_e32 v8, vcc, v3, v8, vcc
+; CGP-NEXT:    v_mul_hi_u32 v9, v13, v4
+; CGP-NEXT:    v_addc_u32_e64 v5, s[4:5], 0, 0, s[4:5]
+; CGP-NEXT:    v_mad_u64_u32 v[3:4], s[4:5], v6, v8, 0
+; CGP-NEXT:    v_addc_u32_e32 v5, vcc, 0, v5, vcc
+; CGP-NEXT:    v_add_i32_e32 v9, vcc, v9, v5
+; CGP-NEXT:    v_mad_u64_u32 v[4:5], s[4:5], v6, v9, v[4:5]
+; CGP-NEXT:    v_sub_i32_e32 v3, vcc, v7, v3
+; CGP-NEXT:    v_mad_u64_u32 v[4:5], s[4:5], v10, v8, v[4:5]
+; CGP-NEXT:    v_subb_u32_e64 v5, s[4:5], v13, v4, vcc
+; CGP-NEXT:    v_sub_i32_e64 v4, s[4:5], v13, v4
+; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v5, v10
+; CGP-NEXT:    v_subb_u32_e32 v4, vcc, v4, v10, vcc
+; CGP-NEXT:    v_cndmask_b32_e64 v7, 0, -1, s[4:5]
+; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v3, v6
+; CGP-NEXT:    v_sub_i32_e32 v3, vcc, v3, v6
 ; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, -1, s[4:5]
-; CGP-NEXT:    v_cmp_eq_u32_e64 s[4:5], v7, v3
-; CGP-NEXT:    v_subbrev_u32_e32 v6, vcc, 0, v6, vcc
-; CGP-NEXT:    v_cndmask_b32_e64 v7, v10, v11, s[4:5]
-; CGP-NEXT:    v_add_i32_e32 v10, vcc, 1, v9
-; CGP-NEXT:    v_addc_u32_e32 v11, vcc, 0, v8, vcc
-; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v6, v3
+; CGP-NEXT:    v_cmp_eq_u32_e64 s[4:5], v5, v10
+; CGP-NEXT:    v_subbrev_u32_e32 v4, vcc, 0, v4, vcc
+; CGP-NEXT:    v_cndmask_b32_e64 v5, v7, v11, s[4:5]
+; CGP-NEXT:    v_add_i32_e32 v7, vcc, 1, v8
+; CGP-NEXT:    v_addc_u32_e32 v11, vcc, 0, v9, vcc
+; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v4, v10
 ; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, -1, vcc
-; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v5, v4
-; CGP-NEXT:    v_cndmask_b32_e64 v4, 0, -1, vcc
-; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, v6, v3
-; CGP-NEXT:    v_cndmask_b32_e32 v3, v13, v4, vcc
-; CGP-NEXT:    v_add_i32_e32 v4, vcc, 1, v10
-; CGP-NEXT:    v_addc_u32_e32 v5, vcc, 0, v11, vcc
+; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v3, v6
+; CGP-NEXT:    v_cndmask_b32_e64 v3, 0, -1, vcc
+; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, v4, v10
+; CGP-NEXT:    v_cndmask_b32_e32 v3, v13, v3, vcc
+; CGP-NEXT:    v_add_i32_e32 v4, vcc, 1, v7
+; CGP-NEXT:    v_addc_u32_e32 v6, vcc, 0, v11, vcc
 ; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v3
-; CGP-NEXT:    v_cndmask_b32_e32 v3, v10, v4, vcc
-; CGP-NEXT:    v_cndmask_b32_e32 v4, v11, v5, vcc
-; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v7
-; CGP-NEXT:    v_cndmask_b32_e32 v3, v9, v3, vcc
+; CGP-NEXT:    v_cndmask_b32_e32 v3, v7, v4, vcc
+; CGP-NEXT:    v_cndmask_b32_e32 v4, v11, v6, vcc
+; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v5
+; CGP-NEXT:    v_cndmask_b32_e32 v3, v8, v3, vcc
 ; CGP-NEXT:    v_xor_b32_e32 v5, v12, v2
-; CGP-NEXT:    v_cndmask_b32_e32 v4, v8, v4, vcc
+; CGP-NEXT:    v_cndmask_b32_e32 v4, v9, v4, vcc
 ; CGP-NEXT:    v_xor_b32_e32 v2, v3, v5
 ; CGP-NEXT:    v_xor_b32_e32 v3, v4, v5
 ; CGP-NEXT:    v_sub_i32_e32 v2, vcc, v2, v5
@@ -2536,235 +2295,204 @@ define <2 x i64> @v_sdiv_v2i64_24bit(<2 x i64> %num, <2 x i64> %den) {
 ; GISEL-NEXT:    v_and_b32_e32 v1, 0xffffff, v4
 ; GISEL-NEXT:    v_add_i32_e64 v3, s[4:5], 0, 0
 ; GISEL-NEXT:    v_add_i32_e32 v1, vcc, 0, v1
-; GISEL-NEXT:    v_cvt_f32_u32_e32 v5, v1
-; GISEL-NEXT:    v_cvt_f32_u32_e32 v4, v3
+; GISEL-NEXT:    v_cvt_f32_u32_e32 v4, v1
+; GISEL-NEXT:    v_cvt_f32_u32_e32 v5, v3
 ; GISEL-NEXT:    v_sub_i32_e32 v11, vcc, 0, v1
 ; GISEL-NEXT:    v_subb_u32_e32 v12, vcc, 0, v3, vcc
-; GISEL-NEXT:    v_mac_f32_e32 v5, 0x4f800000, v4
-; GISEL-NEXT:    v_rcp_iflag_f32_e32 v5, v5
+; GISEL-NEXT:    v_mac_f32_e32 v4, 0x4f800000, v5
+; GISEL-NEXT:    v_rcp_iflag_f32_e32 v4, v4
 ; GISEL-NEXT:    v_and_b32_e32 v0, 0xffffff, v0
-; GISEL-NEXT:    v_mul_f32_e32 v5, 0x5f7ffffc, v5
-; GISEL-NEXT:    v_mul_f32_e32 v7, 0x2f800000, v5
+; GISEL-NEXT:    v_mul_f32_e32 v4, 0x5f7ffffc, v4
+; GISEL-NEXT:    v_mul_f32_e32 v7, 0x2f800000, v4
 ; GISEL-NEXT:    v_trunc_f32_e32 v9, v7
-; GISEL-NEXT:    v_mac_f32_e32 v5, 0xcf800000, v9
-; GISEL-NEXT:    v_cvt_u32_f32_e32 v10, v5
+; GISEL-NEXT:    v_mac_f32_e32 v4, 0xcf800000, v9
+; GISEL-NEXT:    v_cvt_u32_f32_e32 v10, v4
 ; GISEL-NEXT:    v_cvt_u32_f32_e32 v13, v9
 ; GISEL-NEXT:    v_mad_u64_u32 v[7:8], s[4:5], v11, v10, 0
-; GISEL-NEXT:    v_mov_b32_e32 v5, v8
-; GISEL-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], v11, v13, v[5:6]
-; GISEL-NEXT:    v_mul_lo_u32 v5, v13, v7
+; GISEL-NEXT:    v_mov_b32_e32 v4, v8
+; GISEL-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], v11, v13, v[4:5]
+; GISEL-NEXT:    v_mul_lo_u32 v4, v13, v7
 ; GISEL-NEXT:    v_mul_hi_u32 v14, v10, v7
 ; GISEL-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], v12, v10, v[8:9]
-; GISEL-NEXT:    v_mul_hi_u32 v7, v13, v7
 ; GISEL-NEXT:    v_mul_lo_u32 v9, v10, v8
-; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v9
-; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v14
-; GISEL-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
-; GISEL-NEXT:    v_mul_lo_u32 v14, v13, v8
-; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v9, v5
+; GISEL-NEXT:    v_mul_lo_u32 v15, v13, v8
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v9
+; GISEL-NEXT:    v_addc_u32_e32 v4, vcc, v4, v14, vcc
+; GISEL-NEXT:    v_mul_hi_u32 v4, v13, v7
 ; GISEL-NEXT:    v_mul_hi_u32 v9, v10, v8
-; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v14, v7
-; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v7, v9
-; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v14, v9
+; GISEL-NEXT:    v_addc_u32_e64 v7, s[4:5], 0, 0, vcc
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v15, v4
 ; GISEL-NEXT:    v_mul_hi_u32 v8, v13, v8
-; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v7, v5
-; GISEL-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v9, v7
+; GISEL-NEXT:    v_addc_u32_e64 v4, s[4:5], v4, v9, vcc
+; GISEL-NEXT:    v_addc_u32_e64 v9, s[4:5], 0, 0, s[4:5]
+; GISEL-NEXT:    v_addc_u32_e32 v4, vcc, v4, v7, vcc
+; GISEL-NEXT:    v_addc_u32_e32 v7, vcc, 0, v9, vcc
 ; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v8, v7
-; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v10, v5
+; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v10, v4
 ; GISEL-NEXT:    v_addc_u32_e32 v13, vcc, v13, v7, vcc
 ; GISEL-NEXT:    v_mad_u64_u32 v[7:8], s[4:5], v11, v10, 0
-; GISEL-NEXT:    v_mov_b32_e32 v5, v8
-; GISEL-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], v11, v13, v[5:6]
-; GISEL-NEXT:    v_mul_lo_u32 v5, v13, v7
+; GISEL-NEXT:    v_mov_b32_e32 v4, v8
+; GISEL-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], v11, v13, v[4:5]
+; GISEL-NEXT:    v_mul_lo_u32 v4, v13, v7
 ; GISEL-NEXT:    v_add_i32_e32 v11, vcc, 0, v0
 ; GISEL-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], v12, v10, v[8:9]
 ; GISEL-NEXT:    v_mul_hi_u32 v0, v10, v7
-; GISEL-NEXT:    v_mul_hi_u32 v7, v13, v7
 ; GISEL-NEXT:    v_mul_lo_u32 v9, v10, v8
-; GISEL-NEXT:    v_and_b32_e32 v12, 0xffffff, v2
-; GISEL-NEXT:    v_and_b32_e32 v2, 0xffffff, v6
-; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v9
-; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v5, v0
-; GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
-; GISEL-NEXT:    v_mul_lo_u32 v5, v13, v8
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v9, v0
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v9
+; GISEL-NEXT:    v_addc_u32_e32 v0, vcc, v4, v0, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v0, v13, v8
+; GISEL-NEXT:    v_mul_hi_u32 v4, v13, v7
 ; GISEL-NEXT:    v_mul_hi_u32 v9, v10, v8
-; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v7
-; GISEL-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v9
-; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v7, v9
+; GISEL-NEXT:    v_addc_u32_e64 v7, s[4:5], 0, 0, vcc
+; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v4
 ; GISEL-NEXT:    v_mul_hi_u32 v8, v13, v8
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v5, v0
-; GISEL-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v7, v5
-; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v8, v5
+; GISEL-NEXT:    v_addc_u32_e64 v0, s[4:5], v0, v9, vcc
+; GISEL-NEXT:    v_addc_u32_e64 v4, s[4:5], 0, 0, s[4:5]
+; GISEL-NEXT:    v_addc_u32_e32 v0, vcc, v0, v7, vcc
+; GISEL-NEXT:    v_addc_u32_e32 v4, vcc, 0, v4, vcc
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v8, v4
 ; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v10, v0
-; GISEL-NEXT:    v_addc_u32_e32 v5, vcc, v13, v5, vcc
-; GISEL-NEXT:    v_mul_lo_u32 v7, v3, v0
-; GISEL-NEXT:    v_mul_lo_u32 v8, v11, v5
+; GISEL-NEXT:    v_addc_u32_e32 v7, vcc, v13, v4, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v4, v3, v0
+; GISEL-NEXT:    v_mul_lo_u32 v8, v11, v7
 ; GISEL-NEXT:    v_mul_hi_u32 v9, v11, v0
 ; GISEL-NEXT:    v_mul_hi_u32 v0, v3, v0
-; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v7, v8
-; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v7, v9
-; GISEL-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
-; GISEL-NEXT:    v_mul_lo_u32 v9, v3, v5
-; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v8, v7
-; GISEL-NEXT:    v_mul_hi_u32 v8, v11, v5
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v9, v0
-; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v8
-; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v8
-; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v0, v7
-; GISEL-NEXT:    v_mul_hi_u32 v5, v3, v5
-; GISEL-NEXT:    v_mad_u64_u32 v[7:8], s[4:5], v1, v10, 0
-; GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
+; GISEL-NEXT:    v_and_b32_e32 v10, 0xffffff, v2
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v8
+; GISEL-NEXT:    v_addc_u32_e32 v4, vcc, v4, v9, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v4, v3, v7
+; GISEL-NEXT:    v_mul_hi_u32 v9, v11, v7
+; GISEL-NEXT:    v_addc_u32_e64 v8, s[4:5], 0, 0, vcc
+; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v4, v0
+; GISEL-NEXT:    v_addc_u32_e64 v0, s[4:5], v0, v9, vcc
+; GISEL-NEXT:    v_addc_u32_e64 v9, s[4:5], 0, 0, s[4:5]
+; GISEL-NEXT:    v_addc_u32_e32 v4, vcc, v0, v8, vcc
+; GISEL-NEXT:    v_addc_u32_e32 v0, vcc, 0, v9, vcc
+; GISEL-NEXT:    v_mul_hi_u32 v9, v3, v7
+; GISEL-NEXT:    v_mad_u64_u32 v[7:8], s[4:5], v1, v4, 0
+; GISEL-NEXT:    v_and_b32_e32 v2, 0xffffff, v6
 ; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v9, v0
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v5, v0
-; GISEL-NEXT:    v_mov_b32_e32 v5, v8
-; GISEL-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], v1, v0, v[5:6]
-; GISEL-NEXT:    v_mad_u64_u32 v[5:6], s[4:5], v3, v10, v[8:9]
-; GISEL-NEXT:    v_sub_i32_e32 v6, vcc, v11, v7
-; GISEL-NEXT:    v_subb_u32_e64 v7, s[4:5], v3, v5, vcc
-; GISEL-NEXT:    v_sub_i32_e64 v5, s[4:5], v3, v5
-; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v7, v3
-; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, -1, s[4:5]
-; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v6, v1
-; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, -1, s[4:5]
+; GISEL-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], v1, v0, v[8:9]
 ; GISEL-NEXT:    v_add_i32_e64 v2, s[4:5], 0, v2
-; GISEL-NEXT:    v_cvt_f32_u32_e32 v11, v2
-; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v7, v3
-; GISEL-NEXT:    v_subb_u32_e32 v5, vcc, v5, v3, vcc
-; GISEL-NEXT:    v_mac_f32_e32 v11, 0x4f800000, v4
-; GISEL-NEXT:    v_rcp_iflag_f32_e32 v4, v11
-; GISEL-NEXT:    v_cndmask_b32_e64 v7, v8, v9, s[4:5]
-; GISEL-NEXT:    v_sub_i32_e32 v8, vcc, v6, v1
-; GISEL-NEXT:    v_mul_f32_e32 v4, 0x5f7ffffc, v4
-; GISEL-NEXT:    v_subbrev_u32_e32 v9, vcc, 0, v5, vcc
-; GISEL-NEXT:    v_mul_f32_e32 v5, 0x2f800000, v4
-; GISEL-NEXT:    v_trunc_f32_e32 v5, v5
-; GISEL-NEXT:    v_mac_f32_e32 v4, 0xcf800000, v5
-; GISEL-NEXT:    v_cvt_u32_f32_e32 v11, v4
-; GISEL-NEXT:    v_sub_i32_e32 v14, vcc, 0, v2
-; GISEL-NEXT:    v_cvt_u32_f32_e32 v13, v5
-; GISEL-NEXT:    v_mad_u64_u32 v[4:5], s[4:5], v14, v11, 0
-; GISEL-NEXT:    v_subb_u32_e32 v15, vcc, 0, v3, vcc
-; GISEL-NEXT:    v_mad_u64_u32 v[5:6], s[4:5], v14, v13, v[5:6]
-; GISEL-NEXT:    v_add_i32_e32 v16, vcc, 1, v10
-; GISEL-NEXT:    v_mad_u64_u32 v[5:6], s[4:5], v15, v11, v[5:6]
+; GISEL-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], v3, v4, v[8:9]
+; GISEL-NEXT:    v_sub_i32_e32 v9, vcc, v11, v7
+; GISEL-NEXT:    v_cvt_f32_u32_e32 v7, v2
+; GISEL-NEXT:    v_subb_u32_e64 v6, s[4:5], v3, v8, vcc
+; GISEL-NEXT:    v_sub_i32_e64 v8, s[4:5], v3, v8
+; GISEL-NEXT:    v_mac_f32_e32 v7, 0x4f800000, v5
+; GISEL-NEXT:    v_rcp_iflag_f32_e32 v5, v7
+; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v6, v3
+; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, -1, s[4:5]
+; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v9, v1
+; GISEL-NEXT:    v_mul_f32_e32 v5, 0x5f7ffffc, v5
+; GISEL-NEXT:    v_cndmask_b32_e64 v7, 0, -1, s[4:5]
+; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v6, v3
+; GISEL-NEXT:    v_mul_f32_e32 v6, 0x2f800000, v5
+; GISEL-NEXT:    v_cndmask_b32_e64 v11, v11, v7, s[4:5]
+; GISEL-NEXT:    v_trunc_f32_e32 v7, v6
+; GISEL-NEXT:    v_mac_f32_e32 v5, 0xcf800000, v7
+; GISEL-NEXT:    v_cvt_u32_f32_e32 v12, v5
+; GISEL-NEXT:    v_sub_i32_e64 v13, s[4:5], 0, v2
+; GISEL-NEXT:    v_subb_u32_e64 v14, s[4:5], 0, v3, s[4:5]
+; GISEL-NEXT:    v_mad_u64_u32 v[5:6], s[4:5], v13, v12, 0
+; GISEL-NEXT:    v_cvt_u32_f32_e32 v15, v7
+; GISEL-NEXT:    v_subb_u32_e32 v8, vcc, v8, v3, vcc
+; GISEL-NEXT:    v_sub_i32_e32 v9, vcc, v9, v1
+; GISEL-NEXT:    v_mad_u64_u32 v[6:7], s[4:5], v13, v15, v[6:7]
+; GISEL-NEXT:    v_subbrev_u32_e32 v8, vcc, 0, v8, vcc
+; GISEL-NEXT:    v_mad_u64_u32 v[6:7], s[4:5], v14, v12, v[6:7]
+; GISEL-NEXT:    v_mul_lo_u32 v7, v15, v5
+; GISEL-NEXT:    v_add_i32_e32 v16, vcc, 1, v4
+; GISEL-NEXT:    v_mul_lo_u32 v18, v12, v6
+; GISEL-NEXT:    v_mul_hi_u32 v20, v12, v5
 ; GISEL-NEXT:    v_addc_u32_e32 v17, vcc, 0, v0, vcc
-; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v9, v3
-; GISEL-NEXT:    v_cndmask_b32_e64 v18, 0, -1, vcc
-; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v8, v1
+; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v8, v3
+; GISEL-NEXT:    v_cndmask_b32_e64 v19, 0, -1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v7, v18
+; GISEL-NEXT:    v_addc_u32_e32 v7, vcc, v7, v20, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v7, v15, v6
+; GISEL-NEXT:    v_mul_hi_u32 v5, v15, v5
+; GISEL-NEXT:    v_mul_hi_u32 v20, v12, v6
+; GISEL-NEXT:    v_addc_u32_e64 v18, s[4:5], 0, 0, vcc
+; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v7, v5
+; GISEL-NEXT:    v_mul_hi_u32 v6, v15, v6
+; GISEL-NEXT:    v_addc_u32_e64 v5, s[4:5], v5, v20, vcc
+; GISEL-NEXT:    v_addc_u32_e64 v7, s[4:5], 0, 0, s[4:5]
+; GISEL-NEXT:    v_addc_u32_e32 v5, vcc, v5, v18, vcc
+; GISEL-NEXT:    v_addc_u32_e32 v7, vcc, 0, v7, vcc
+; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v6, v7
+; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v12, v5
+; GISEL-NEXT:    v_addc_u32_e32 v15, vcc, v15, v6, vcc
+; GISEL-NEXT:    v_mad_u64_u32 v[5:6], s[4:5], v13, v12, 0
+; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v9, v1
 ; GISEL-NEXT:    v_cndmask_b32_e64 v1, 0, -1, vcc
-; GISEL-NEXT:    v_mul_lo_u32 v6, v13, v4
-; GISEL-NEXT:    v_mul_lo_u32 v8, v11, v5
-; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v9, v3
-; GISEL-NEXT:    v_cndmask_b32_e32 v9, v18, v1, vcc
-; GISEL-NEXT:    v_mul_hi_u32 v1, v11, v4
-; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v6, v8
-; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v6, v1
-; GISEL-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; GISEL-NEXT:    v_mul_lo_u32 v6, v13, v5
-; GISEL-NEXT:    v_mul_hi_u32 v4, v13, v4
-; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v8, v1
-; GISEL-NEXT:    v_mul_hi_u32 v8, v11, v5
-; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v6, v4
-; GISEL-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v8
-; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
+; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v8, v3
+; GISEL-NEXT:    v_cndmask_b32_e32 v8, v19, v1, vcc
+; GISEL-NEXT:    v_mov_b32_e32 v1, v6
+; GISEL-NEXT:    v_mad_u64_u32 v[6:7], s[4:5], v13, v15, v[1:2]
+; GISEL-NEXT:    v_add_i32_e32 v1, vcc, 1, v16
+; GISEL-NEXT:    v_mad_u64_u32 v[6:7], s[4:5], v14, v12, v[6:7]
+; GISEL-NEXT:    v_addc_u32_e32 v9, vcc, 0, v17, vcc
+; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v8
+; GISEL-NEXT:    v_cndmask_b32_e32 v7, v17, v9, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v8, v15, v5
+; GISEL-NEXT:    v_mul_lo_u32 v9, v12, v6
+; GISEL-NEXT:    v_mul_hi_u32 v13, v12, v5
+; GISEL-NEXT:    v_cndmask_b32_e32 v1, v16, v1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v10, vcc, 0, v10
+; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v9
+; GISEL-NEXT:    v_addc_u32_e32 v8, vcc, v8, v13, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v8, v15, v6
+; GISEL-NEXT:    v_mul_hi_u32 v5, v15, v5
+; GISEL-NEXT:    v_mul_hi_u32 v13, v12, v6
+; GISEL-NEXT:    v_addc_u32_e64 v9, s[4:5], 0, 0, vcc
+; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v8, v5
+; GISEL-NEXT:    v_mul_hi_u32 v6, v15, v6
+; GISEL-NEXT:    v_addc_u32_e64 v5, s[4:5], v5, v13, vcc
+; GISEL-NEXT:    v_addc_u32_e64 v8, s[4:5], 0, 0, s[4:5]
+; GISEL-NEXT:    v_addc_u32_e32 v5, vcc, v5, v9, vcc
+; GISEL-NEXT:    v_addc_u32_e32 v8, vcc, 0, v8, vcc
 ; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v6, v8
-; GISEL-NEXT:    v_mul_hi_u32 v5, v13, v5
-; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v4, v1
-; GISEL-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v6, v4
-; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v5, v4
-; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v11, v1
-; GISEL-NEXT:    v_addc_u32_e32 v11, vcc, v13, v4, vcc
-; GISEL-NEXT:    v_mad_u64_u32 v[4:5], s[4:5], v14, v8, 0
-; GISEL-NEXT:    v_add_i32_e32 v13, vcc, 1, v16
-; GISEL-NEXT:    v_mov_b32_e32 v1, v5
-; GISEL-NEXT:    v_mad_u64_u32 v[5:6], s[4:5], v14, v11, v[1:2]
-; GISEL-NEXT:    v_addc_u32_e32 v18, vcc, 0, v17, vcc
-; GISEL-NEXT:    v_mad_u64_u32 v[5:6], s[4:5], v15, v8, v[5:6]
-; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v9
-; GISEL-NEXT:    v_cndmask_b32_e32 v1, v16, v13, vcc
-; GISEL-NEXT:    v_cndmask_b32_e32 v9, v17, v18, vcc
-; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v7
-; GISEL-NEXT:    v_mul_lo_u32 v6, v11, v4
-; GISEL-NEXT:    v_mul_lo_u32 v7, v8, v5
-; GISEL-NEXT:    v_cndmask_b32_e32 v1, v10, v1, vcc
-; GISEL-NEXT:    v_add_i32_e64 v10, s[4:5], 0, v12
-; GISEL-NEXT:    v_mul_hi_u32 v12, v8, v4
-; GISEL-NEXT:    v_add_i32_e64 v6, s[4:5], v6, v7
-; GISEL-NEXT:    v_cndmask_b32_e64 v7, 0, 1, s[4:5]
-; GISEL-NEXT:    v_add_i32_e64 v6, s[4:5], v6, v12
-; GISEL-NEXT:    v_cndmask_b32_e64 v6, 0, 1, s[4:5]
-; GISEL-NEXT:    v_mul_lo_u32 v12, v11, v5
-; GISEL-NEXT:    v_mul_hi_u32 v4, v11, v4
-; GISEL-NEXT:    v_add_i32_e64 v6, s[4:5], v7, v6
-; GISEL-NEXT:    v_mul_hi_u32 v7, v8, v5
-; GISEL-NEXT:    v_add_i32_e64 v4, s[4:5], v12, v4
-; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[4:5]
-; GISEL-NEXT:    v_add_i32_e64 v4, s[4:5], v4, v7
-; GISEL-NEXT:    v_cndmask_b32_e64 v7, 0, 1, s[4:5]
-; GISEL-NEXT:    v_add_i32_e64 v7, s[4:5], v12, v7
-; GISEL-NEXT:    v_mul_hi_u32 v5, v11, v5
-; GISEL-NEXT:    v_add_i32_e64 v4, s[4:5], v4, v6
-; GISEL-NEXT:    v_cndmask_b32_e64 v6, 0, 1, s[4:5]
-; GISEL-NEXT:    v_add_i32_e64 v6, s[4:5], v7, v6
-; GISEL-NEXT:    v_add_i32_e64 v5, s[4:5], v5, v6
-; GISEL-NEXT:    v_add_i32_e64 v4, s[4:5], v8, v4
-; GISEL-NEXT:    v_addc_u32_e64 v5, s[4:5], v11, v5, s[4:5]
-; GISEL-NEXT:    v_mul_lo_u32 v6, v3, v4
-; GISEL-NEXT:    v_mul_lo_u32 v7, v10, v5
-; GISEL-NEXT:    v_cndmask_b32_e32 v8, v0, v9, vcc
-; GISEL-NEXT:    v_mul_hi_u32 v0, v10, v4
-; GISEL-NEXT:    v_mul_hi_u32 v4, v3, v4
-; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v6, v7
-; GISEL-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v6, v0
-; GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
-; GISEL-NEXT:    v_mul_lo_u32 v6, v3, v5
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v7, v0
-; GISEL-NEXT:    v_mul_hi_u32 v7, v10, v5
-; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v6, v4
-; GISEL-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v7
-; GISEL-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v6, v7
-; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v4, v0
-; GISEL-NEXT:    v_mul_hi_u32 v9, v3, v5
-; GISEL-NEXT:    v_mad_u64_u32 v[4:5], s[4:5], v2, v7, 0
-; GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v6, v0
-; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v0
+; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v12, v5
+; GISEL-NEXT:    v_addc_u32_e32 v6, vcc, v15, v6, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v8, v3, v5
+; GISEL-NEXT:    v_mul_lo_u32 v9, v10, v6
+; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v11
+; GISEL-NEXT:    v_cndmask_b32_e32 v1, v4, v1, vcc
+; GISEL-NEXT:    v_mul_hi_u32 v4, v10, v5
+; GISEL-NEXT:    v_add_i32_e64 v8, s[4:5], v8, v9
+; GISEL-NEXT:    v_mul_hi_u32 v5, v3, v5
+; GISEL-NEXT:    v_addc_u32_e64 v4, s[4:5], v8, v4, s[4:5]
+; GISEL-NEXT:    v_mul_lo_u32 v4, v3, v6
+; GISEL-NEXT:    v_mul_hi_u32 v9, v10, v6
+; GISEL-NEXT:    v_addc_u32_e64 v8, s[4:5], 0, 0, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v4, s[4:5], v4, v5
+; GISEL-NEXT:    v_addc_u32_e64 v4, s[6:7], v4, v9, s[4:5]
+; GISEL-NEXT:    v_addc_u32_e64 v5, s[6:7], 0, 0, s[6:7]
+; GISEL-NEXT:    v_addc_u32_e64 v8, s[4:5], v4, v8, s[4:5]
+; GISEL-NEXT:    v_addc_u32_e64 v9, s[4:5], 0, v5, s[4:5]
+; GISEL-NEXT:    v_mul_hi_u32 v6, v3, v6
+; GISEL-NEXT:    v_mad_u64_u32 v[4:5], s[4:5], v2, v8, 0
+; GISEL-NEXT:    v_cndmask_b32_e32 v7, v0, v7, vcc
+; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v6, v9
 ; GISEL-NEXT:    v_mov_b32_e32 v0, v5
 ; GISEL-NEXT:    v_mad_u64_u32 v[5:6], s[4:5], v2, v9, v[0:1]
 ; GISEL-NEXT:    v_subrev_i32_e32 v0, vcc, 0, v1
-; GISEL-NEXT:    v_mad_u64_u32 v[5:6], s[4:5], v3, v7, v[5:6]
-; GISEL-NEXT:    v_subbrev_u32_e32 v1, vcc, 0, v8, vcc
+; GISEL-NEXT:    v_mad_u64_u32 v[5:6], s[4:5], v3, v8, v[5:6]
+; GISEL-NEXT:    v_subbrev_u32_e32 v1, vcc, 0, v7, vcc
 ; GISEL-NEXT:    v_sub_i32_e32 v4, vcc, v10, v4
 ; GISEL-NEXT:    v_subb_u32_e64 v6, s[4:5], v3, v5, vcc
 ; GISEL-NEXT:    v_sub_i32_e64 v5, s[4:5], v3, v5
 ; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v6, v3
 ; GISEL-NEXT:    v_subb_u32_e32 v5, vcc, v5, v3, vcc
-; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, -1, s[4:5]
+; GISEL-NEXT:    v_cndmask_b32_e64 v7, 0, -1, s[4:5]
 ; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v4, v2
 ; GISEL-NEXT:    v_sub_i32_e32 v4, vcc, v4, v2
 ; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, -1, s[4:5]
 ; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v6, v3
 ; GISEL-NEXT:    v_subbrev_u32_e32 v5, vcc, 0, v5, vcc
-; GISEL-NEXT:    v_cndmask_b32_e64 v6, v8, v10, s[4:5]
-; GISEL-NEXT:    v_add_i32_e32 v8, vcc, 1, v7
+; GISEL-NEXT:    v_cndmask_b32_e64 v6, v7, v10, s[4:5]
+; GISEL-NEXT:    v_add_i32_e32 v7, vcc, 1, v8
 ; GISEL-NEXT:    v_addc_u32_e32 v10, vcc, 0, v9, vcc
 ; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v5, v3
 ; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, -1, vcc
@@ -2772,13 +2500,13 @@ define <2 x i64> @v_sdiv_v2i64_24bit(<2 x i64> %num, <2 x i64> %den) {
 ; GISEL-NEXT:    v_cndmask_b32_e64 v2, 0, -1, vcc
 ; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v5, v3
 ; GISEL-NEXT:    v_cndmask_b32_e32 v2, v11, v2, vcc
-; GISEL-NEXT:    v_add_i32_e32 v3, vcc, 1, v8
+; GISEL-NEXT:    v_add_i32_e32 v3, vcc, 1, v7
 ; GISEL-NEXT:    v_addc_u32_e32 v4, vcc, 0, v10, vcc
 ; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v2
-; GISEL-NEXT:    v_cndmask_b32_e32 v2, v8, v3, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v2, v7, v3, vcc
 ; GISEL-NEXT:    v_cndmask_b32_e32 v3, v10, v4, vcc
 ; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v6
-; GISEL-NEXT:    v_cndmask_b32_e32 v2, v7, v2, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v2, v8, v2, vcc
 ; GISEL-NEXT:    v_cndmask_b32_e32 v3, v9, v3, vcc
 ; GISEL-NEXT:    v_subrev_i32_e32 v2, vcc, 0, v2
 ; GISEL-NEXT:    v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/sdivrem.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/sdivrem.ll
index b666f45521661..73157c28ca82b 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/sdivrem.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/sdivrem.ll
@@ -147,23 +147,23 @@ define amdgpu_kernel void @sdivrem_i64(ptr addrspace(1) %out0, ptr addrspace(1)
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_load_dwordx8 s[4:11], s[4:5], 0x0
 ; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8-NEXT:    s_ashr_i32 s2, s9, 31
-; GFX8-NEXT:    s_ashr_i32 s12, s11, 31
-; GFX8-NEXT:    s_add_u32 s0, s8, s2
-; GFX8-NEXT:    s_addc_u32 s1, s9, s2
-; GFX8-NEXT:    s_add_u32 s8, s10, s12
-; GFX8-NEXT:    s_mov_b32 s13, s12
-; GFX8-NEXT:    s_addc_u32 s9, s11, s12
-; GFX8-NEXT:    s_xor_b64 s[8:9], s[8:9], s[12:13]
+; GFX8-NEXT:    s_ashr_i32 s12, s9, 31
+; GFX8-NEXT:    s_ashr_i32 s14, s11, 31
+; GFX8-NEXT:    s_add_u32 s0, s8, s12
+; GFX8-NEXT:    s_addc_u32 s1, s9, s12
+; GFX8-NEXT:    s_add_u32 s2, s10, s14
+; GFX8-NEXT:    s_mov_b32 s15, s14
+; GFX8-NEXT:    s_addc_u32 s3, s11, s14
+; GFX8-NEXT:    s_xor_b64 s[8:9], s[2:3], s[14:15]
 ; GFX8-NEXT:    v_cvt_f32_u32_e32 v0, s9
 ; GFX8-NEXT:    v_cvt_f32_u32_e32 v1, s8
-; GFX8-NEXT:    s_mov_b32 s3, s2
-; GFX8-NEXT:    s_xor_b64 s[10:11], s[0:1], s[2:3]
+; GFX8-NEXT:    s_mov_b32 s13, s12
+; GFX8-NEXT:    s_xor_b64 s[10:11], s[0:1], s[12:13]
 ; GFX8-NEXT:    v_mul_f32_e32 v0, 0x4f800000, v0
 ; GFX8-NEXT:    v_add_f32_e32 v0, v0, v1
 ; GFX8-NEXT:    v_rcp_iflag_f32_e32 v0, v0
-; GFX8-NEXT:    s_sub_u32 s14, 0, s8
-; GFX8-NEXT:    s_subb_u32 s15, 0, s9
+; GFX8-NEXT:    s_sub_u32 s18, 0, s8
+; GFX8-NEXT:    s_subb_u32 s19, 0, s9
 ; GFX8-NEXT:    v_mul_f32_e32 v0, 0x5f7ffffc, v0
 ; GFX8-NEXT:    v_mul_f32_e32 v1, 0x2f800000, v0
 ; GFX8-NEXT:    v_trunc_f32_e32 v2, v1
@@ -171,10 +171,10 @@ define amdgpu_kernel void @sdivrem_i64(ptr addrspace(1) %out0, ptr addrspace(1)
 ; GFX8-NEXT:    v_add_f32_e32 v0, v1, v0
 ; GFX8-NEXT:    v_cvt_u32_f32_e32 v3, v0
 ; GFX8-NEXT:    v_cvt_u32_f32_e32 v4, v2
-; GFX8-NEXT:    v_mad_u64_u32 v[0:1], s[0:1], s14, v3, 0
-; GFX8-NEXT:    v_mad_u64_u32 v[1:2], s[0:1], s14, v4, v[1:2]
+; GFX8-NEXT:    v_mad_u64_u32 v[0:1], s[0:1], s18, v3, 0
+; GFX8-NEXT:    v_mad_u64_u32 v[1:2], s[0:1], s18, v4, v[1:2]
 ; GFX8-NEXT:    v_mul_hi_u32 v5, v3, v0
-; GFX8-NEXT:    v_mad_u64_u32 v[1:2], s[0:1], s15, v3, v[1:2]
+; GFX8-NEXT:    v_mad_u64_u32 v[1:2], s[0:1], s19, v3, v[1:2]
 ; GFX8-NEXT:    v_mul_lo_u32 v2, v4, v0
 ; GFX8-NEXT:    v_mul_hi_u32 v0, v4, v0
 ; GFX8-NEXT:    v_mul_lo_u32 v6, v3, v1
@@ -182,44 +182,34 @@ define amdgpu_kernel void @sdivrem_i64(ptr addrspace(1) %out0, ptr addrspace(1)
 ; GFX8-NEXT:    v_mul_hi_u32 v8, v3, v1
 ; GFX8-NEXT:    v_mul_hi_u32 v1, v4, v1
 ; GFX8-NEXT:    v_add_u32_e32 v2, vcc, v2, v6
-; GFX8-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
-; GFX8-NEXT:    v_add_u32_e32 v0, vcc, v7, v0
-; GFX8-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
-; GFX8-NEXT:    v_add_u32_e32 v2, vcc, v2, v5
-; GFX8-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; GFX8-NEXT:    v_add_u32_e32 v2, vcc, v6, v2
-; GFX8-NEXT:    v_add_u32_e32 v0, vcc, v0, v8
-; GFX8-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
-; GFX8-NEXT:    v_add_u32_e32 v5, vcc, v7, v5
-; GFX8-NEXT:    v_add_u32_e32 v0, vcc, v0, v2
-; GFX8-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; GFX8-NEXT:    v_add_u32_e32 v2, vcc, v5, v2
+; GFX8-NEXT:    v_addc_u32_e32 v2, vcc, v2, v5, vcc
+; GFX8-NEXT:    v_add_u32_e64 v0, s[0:1], v7, v0
+; GFX8-NEXT:    v_addc_u32_e64 v0, s[2:3], v0, v8, s[0:1]
+; GFX8-NEXT:    v_addc_u32_e64 v2, s[16:17], 0, 0, vcc
+; GFX8-NEXT:    v_addc_u32_e64 v5, s[2:3], 0, 0, s[2:3]
+; GFX8-NEXT:    v_addc_u32_e64 v0, vcc, v0, v2, s[0:1]
+; GFX8-NEXT:    v_addc_u32_e32 v2, vcc, 0, v5, vcc
 ; GFX8-NEXT:    v_add_u32_e32 v1, vcc, v1, v2
 ; GFX8-NEXT:    v_add_u32_e32 v3, vcc, v3, v0
 ; GFX8-NEXT:    v_addc_u32_e32 v4, vcc, v4, v1, vcc
-; GFX8-NEXT:    v_mad_u64_u32 v[0:1], s[0:1], s14, v3, 0
-; GFX8-NEXT:    v_mad_u64_u32 v[1:2], s[0:1], s14, v4, v[1:2]
+; GFX8-NEXT:    v_mad_u64_u32 v[0:1], s[0:1], s18, v3, 0
+; GFX8-NEXT:    v_mad_u64_u32 v[1:2], s[0:1], s18, v4, v[1:2]
 ; GFX8-NEXT:    v_mul_hi_u32 v6, v3, v0
-; GFX8-NEXT:    v_mad_u64_u32 v[1:2], s[0:1], s15, v3, v[1:2]
+; GFX8-NEXT:    v_mad_u64_u32 v[1:2], s[0:1], s19, v3, v[1:2]
 ; GFX8-NEXT:    v_mul_lo_u32 v2, v4, v0
 ; GFX8-NEXT:    v_mul_hi_u32 v0, v4, v0
 ; GFX8-NEXT:    v_mul_lo_u32 v5, v3, v1
 ; GFX8-NEXT:    v_add_u32_e32 v2, vcc, v2, v5
-; GFX8-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
-; GFX8-NEXT:    v_add_u32_e32 v2, vcc, v2, v6
-; GFX8-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; GFX8-NEXT:    v_mul_lo_u32 v6, v4, v1
-; GFX8-NEXT:    v_add_u32_e32 v2, vcc, v5, v2
-; GFX8-NEXT:    v_mul_hi_u32 v5, v3, v1
-; GFX8-NEXT:    v_add_u32_e32 v0, vcc, v6, v0
-; GFX8-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
-; GFX8-NEXT:    v_add_u32_e32 v0, vcc, v0, v5
-; GFX8-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
-; GFX8-NEXT:    v_add_u32_e32 v5, vcc, v6, v5
+; GFX8-NEXT:    v_addc_u32_e32 v2, vcc, v2, v6, vcc
+; GFX8-NEXT:    v_mul_lo_u32 v2, v4, v1
+; GFX8-NEXT:    v_mul_hi_u32 v6, v3, v1
+; GFX8-NEXT:    v_addc_u32_e64 v5, s[0:1], 0, 0, vcc
+; GFX8-NEXT:    v_add_u32_e32 v0, vcc, v2, v0
 ; GFX8-NEXT:    v_mul_hi_u32 v1, v4, v1
-; GFX8-NEXT:    v_add_u32_e32 v0, vcc, v0, v2
-; GFX8-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; GFX8-NEXT:    v_add_u32_e32 v2, vcc, v5, v2
+; GFX8-NEXT:    v_addc_u32_e64 v0, s[0:1], v0, v6, vcc
+; GFX8-NEXT:    v_addc_u32_e64 v2, s[0:1], 0, 0, s[0:1]
+; GFX8-NEXT:    v_addc_u32_e32 v0, vcc, v0, v5, vcc
+; GFX8-NEXT:    v_addc_u32_e32 v2, vcc, 0, v2, vcc
 ; GFX8-NEXT:    v_add_u32_e32 v1, vcc, v1, v2
 ; GFX8-NEXT:    v_add_u32_e32 v0, vcc, v3, v0
 ; GFX8-NEXT:    v_addc_u32_e32 v1, vcc, v4, v1, vcc
@@ -227,28 +217,23 @@ define amdgpu_kernel void @sdivrem_i64(ptr addrspace(1) %out0, ptr addrspace(1)
 ; GFX8-NEXT:    v_mul_lo_u32 v3, s10, v1
 ; GFX8-NEXT:    v_mul_hi_u32 v4, s10, v0
 ; GFX8-NEXT:    v_mul_hi_u32 v0, s11, v0
-; GFX8-NEXT:    v_mul_hi_u32 v5, s11, v1
-; GFX8-NEXT:    v_add_u32_e32 v2, vcc, v2, v3
-; GFX8-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc
-; GFX8-NEXT:    v_add_u32_e32 v2, vcc, v2, v4
-; GFX8-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; GFX8-NEXT:    v_mul_lo_u32 v4, s11, v1
-; GFX8-NEXT:    v_add_u32_e32 v2, vcc, v3, v2
-; GFX8-NEXT:    v_mul_hi_u32 v3, s10, v1
-; GFX8-NEXT:    v_add_u32_e32 v0, vcc, v4, v0
-; GFX8-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
-; GFX8-NEXT:    v_add_u32_e32 v0, vcc, v0, v3
-; GFX8-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc
-; GFX8-NEXT:    v_add_u32_e32 v3, vcc, v4, v3
-; GFX8-NEXT:    v_add_u32_e32 v4, vcc, v0, v2
-; GFX8-NEXT:    v_mad_u64_u32 v[0:1], s[0:1], s8, v4, 0
-; GFX8-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; GFX8-NEXT:    v_add_u32_e32 v2, vcc, v3, v2
-; GFX8-NEXT:    v_add_u32_e32 v3, vcc, v5, v2
-; GFX8-NEXT:    v_mad_u64_u32 v[1:2], s[0:1], s8, v3, v[1:2]
 ; GFX8-NEXT:    v_mov_b32_e32 v6, s11
+; GFX8-NEXT:    v_add_u32_e32 v2, vcc, v2, v3
+; GFX8-NEXT:    v_addc_u32_e32 v2, vcc, v2, v4, vcc
+; GFX8-NEXT:    v_mul_lo_u32 v2, s11, v1
+; GFX8-NEXT:    v_mul_hi_u32 v4, s10, v1
+; GFX8-NEXT:    v_addc_u32_e64 v3, s[0:1], 0, 0, vcc
+; GFX8-NEXT:    v_add_u32_e32 v0, vcc, v2, v0
+; GFX8-NEXT:    v_addc_u32_e64 v0, s[0:1], v0, v4, vcc
+; GFX8-NEXT:    v_addc_u32_e32 v3, vcc, v0, v3, vcc
+; GFX8-NEXT:    v_mul_hi_u32 v4, s11, v1
+; GFX8-NEXT:    v_addc_u32_e64 v2, s[0:1], 0, 0, s[0:1]
+; GFX8-NEXT:    v_mad_u64_u32 v[0:1], s[0:1], s8, v3, 0
+; GFX8-NEXT:    v_addc_u32_e32 v2, vcc, 0, v2, vcc
+; GFX8-NEXT:    v_add_u32_e32 v4, vcc, v4, v2
+; GFX8-NEXT:    v_mad_u64_u32 v[1:2], s[0:1], s8, v4, v[1:2]
 ; GFX8-NEXT:    v_sub_u32_e32 v0, vcc, s10, v0
-; GFX8-NEXT:    v_mad_u64_u32 v[1:2], s[0:1], s9, v4, v[1:2]
+; GFX8-NEXT:    v_mad_u64_u32 v[1:2], s[0:1], s9, v3, v[1:2]
 ; GFX8-NEXT:    v_mov_b32_e32 v5, s9
 ; GFX8-NEXT:    v_subb_u32_e64 v2, s[0:1], v6, v1, vcc
 ; GFX8-NEXT:    v_sub_u32_e64 v1, s[0:1], s11, v1
@@ -261,8 +246,8 @@ define amdgpu_kernel void @sdivrem_i64(ptr addrspace(1) %out0, ptr addrspace(1)
 ; GFX8-NEXT:    v_cndmask_b32_e64 v6, v6, v7, s[0:1]
 ; GFX8-NEXT:    v_subrev_u32_e32 v7, vcc, s8, v0
 ; GFX8-NEXT:    v_subbrev_u32_e64 v8, s[0:1], 0, v1, vcc
-; GFX8-NEXT:    v_add_u32_e64 v9, s[0:1], 1, v4
-; GFX8-NEXT:    v_addc_u32_e64 v10, s[0:1], 0, v3, s[0:1]
+; GFX8-NEXT:    v_add_u32_e64 v9, s[0:1], 1, v3
+; GFX8-NEXT:    v_addc_u32_e64 v10, s[0:1], 0, v4, s[0:1]
 ; GFX8-NEXT:    v_cmp_le_u32_e64 s[0:1], s9, v8
 ; GFX8-NEXT:    v_cndmask_b32_e64 v11, 0, -1, s[0:1]
 ; GFX8-NEXT:    v_cmp_le_u32_e64 s[0:1], s8, v7
@@ -280,20 +265,20 @@ define amdgpu_kernel void @sdivrem_i64(ptr addrspace(1) %out0, ptr addrspace(1)
 ; GFX8-NEXT:    v_cmp_ne_u32_e64 s[0:1], 0, v6
 ; GFX8-NEXT:    v_cndmask_b32_e32 v5, v7, v5, vcc
 ; GFX8-NEXT:    v_cndmask_b32_e32 v1, v8, v1, vcc
-; GFX8-NEXT:    v_cndmask_b32_e64 v4, v4, v9, s[0:1]
-; GFX8-NEXT:    v_cndmask_b32_e64 v3, v3, v10, s[0:1]
+; GFX8-NEXT:    v_cndmask_b32_e64 v3, v3, v9, s[0:1]
+; GFX8-NEXT:    v_cndmask_b32_e64 v4, v4, v10, s[0:1]
 ; GFX8-NEXT:    v_cndmask_b32_e64 v5, v0, v5, s[0:1]
 ; GFX8-NEXT:    v_cndmask_b32_e64 v2, v2, v1, s[0:1]
-; GFX8-NEXT:    s_xor_b64 s[0:1], s[2:3], s[12:13]
-; GFX8-NEXT:    v_xor_b32_e32 v0, s0, v4
-; GFX8-NEXT:    v_xor_b32_e32 v1, s1, v3
+; GFX8-NEXT:    s_xor_b64 s[0:1], s[12:13], s[14:15]
+; GFX8-NEXT:    v_xor_b32_e32 v0, s0, v3
+; GFX8-NEXT:    v_xor_b32_e32 v1, s1, v4
 ; GFX8-NEXT:    v_mov_b32_e32 v3, s1
 ; GFX8-NEXT:    v_subrev_u32_e32 v0, vcc, s0, v0
 ; GFX8-NEXT:    v_subb_u32_e32 v1, vcc, v1, v3, vcc
-; GFX8-NEXT:    v_xor_b32_e32 v3, s2, v5
-; GFX8-NEXT:    v_xor_b32_e32 v4, s2, v2
-; GFX8-NEXT:    v_mov_b32_e32 v5, s2
-; GFX8-NEXT:    v_subrev_u32_e32 v2, vcc, s2, v3
+; GFX8-NEXT:    v_xor_b32_e32 v3, s12, v5
+; GFX8-NEXT:    v_xor_b32_e32 v4, s12, v2
+; GFX8-NEXT:    v_mov_b32_e32 v5, s12
+; GFX8-NEXT:    v_subrev_u32_e32 v2, vcc, s12, v3
 ; GFX8-NEXT:    v_subb_u32_e32 v3, vcc, v4, v5, vcc
 ; GFX8-NEXT:    v_mov_b32_e32 v4, s4
 ; GFX8-NEXT:    v_mov_b32_e32 v5, s5
@@ -307,23 +292,23 @@ define amdgpu_kernel void @sdivrem_i64(ptr addrspace(1) %out0, ptr addrspace(1)
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_load_dwordx8 s[4:11], s[4:5], 0x0
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    s_ashr_i32 s2, s9, 31
-; GFX9-NEXT:    s_ashr_i32 s12, s11, 31
-; GFX9-NEXT:    s_add_u32 s0, s8, s2
-; GFX9-NEXT:    s_addc_u32 s1, s9, s2
-; GFX9-NEXT:    s_add_u32 s8, s10, s12
-; GFX9-NEXT:    s_mov_b32 s13, s12
-; GFX9-NEXT:    s_addc_u32 s9, s11, s12
-; GFX9-NEXT:    s_xor_b64 s[8:9], s[8:9], s[12:13]
+; GFX9-NEXT:    s_ashr_i32 s12, s9, 31
+; GFX9-NEXT:    s_ashr_i32 s14, s11, 31
+; GFX9-NEXT:    s_add_u32 s0, s8, s12
+; GFX9-NEXT:    s_addc_u32 s1, s9, s12
+; GFX9-NEXT:    s_add_u32 s2, s10, s14
+; GFX9-NEXT:    s_mov_b32 s15, s14
+; GFX9-NEXT:    s_addc_u32 s3, s11, s14
+; GFX9-NEXT:    s_xor_b64 s[8:9], s[2:3], s[14:15]
 ; GFX9-NEXT:    v_cvt_f32_u32_e32 v0, s9
 ; GFX9-NEXT:    v_cvt_f32_u32_e32 v1, s8
-; GFX9-NEXT:    s_mov_b32 s3, s2
-; GFX9-NEXT:    s_xor_b64 s[10:11], s[0:1], s[2:3]
+; GFX9-NEXT:    s_mov_b32 s13, s12
+; GFX9-NEXT:    s_xor_b64 s[10:11], s[0:1], s[12:13]
 ; GFX9-NEXT:    v_mul_f32_e32 v0, 0x4f800000, v0
 ; GFX9-NEXT:    v_add_f32_e32 v0, v0, v1
 ; GFX9-NEXT:    v_rcp_iflag_f32_e32 v0, v0
-; GFX9-NEXT:    s_sub_u32 s14, 0, s8
-; GFX9-NEXT:    s_subb_u32 s15, 0, s9
+; GFX9-NEXT:    s_sub_u32 s18, 0, s8
+; GFX9-NEXT:    s_subb_u32 s19, 0, s9
 ; GFX9-NEXT:    v_mul_f32_e32 v0, 0x5f7ffffc, v0
 ; GFX9-NEXT:    v_mul_f32_e32 v1, 0x2f800000, v0
 ; GFX9-NEXT:    v_trunc_f32_e32 v2, v1
@@ -331,10 +316,10 @@ define amdgpu_kernel void @sdivrem_i64(ptr addrspace(1) %out0, ptr addrspace(1)
 ; GFX9-NEXT:    v_add_f32_e32 v0, v1, v0
 ; GFX9-NEXT:    v_cvt_u32_f32_e32 v3, v0
 ; GFX9-NEXT:    v_cvt_u32_f32_e32 v4, v2
-; GFX9-NEXT:    v_mad_u64_u32 v[0:1], s[0:1], s14, v3, 0
-; GFX9-NEXT:    v_mad_u64_u32 v[1:2], s[0:1], s14, v4, v[1:2]
+; GFX9-NEXT:    v_mad_u64_u32 v[0:1], s[0:1], s18, v3, 0
+; GFX9-NEXT:    v_mad_u64_u32 v[1:2], s[0:1], s18, v4, v[1:2]
 ; GFX9-NEXT:    v_mul_hi_u32 v5, v3, v0
-; GFX9-NEXT:    v_mad_u64_u32 v[1:2], s[0:1], s15, v3, v[1:2]
+; GFX9-NEXT:    v_mad_u64_u32 v[1:2], s[0:1], s19, v3, v[1:2]
 ; GFX9-NEXT:    v_mul_lo_u32 v2, v4, v0
 ; GFX9-NEXT:    v_mul_hi_u32 v0, v4, v0
 ; GFX9-NEXT:    v_mul_lo_u32 v6, v3, v1
@@ -342,92 +327,79 @@ define amdgpu_kernel void @sdivrem_i64(ptr addrspace(1) %out0, ptr addrspace(1)
 ; GFX9-NEXT:    v_mul_hi_u32 v8, v3, v1
 ; GFX9-NEXT:    v_mul_hi_u32 v1, v4, v1
 ; GFX9-NEXT:    v_add_co_u32_e32 v2, vcc, v2, v6
-; GFX9-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
-; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, v7, v0
-; GFX9-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
-; GFX9-NEXT:    v_add_co_u32_e32 v2, vcc, v2, v5
-; GFX9-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; GFX9-NEXT:    v_add_u32_e32 v2, v6, v2
-; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v8
-; GFX9-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
-; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
-; GFX9-NEXT:    v_add_u32_e32 v5, v7, v5
-; GFX9-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; GFX9-NEXT:    v_add3_u32 v1, v5, v2, v1
+; GFX9-NEXT:    v_addc_co_u32_e32 v2, vcc, v2, v5, vcc
+; GFX9-NEXT:    v_add_co_u32_e64 v0, s[0:1], v7, v0
+; GFX9-NEXT:    v_addc_co_u32_e64 v0, s[2:3], v0, v8, s[0:1]
+; GFX9-NEXT:    v_addc_co_u32_e64 v2, s[16:17], 0, 0, vcc
+; GFX9-NEXT:    v_addc_co_u32_e64 v5, s[2:3], 0, 0, s[2:3]
+; GFX9-NEXT:    v_addc_co_u32_e64 v0, vcc, v0, v2, s[0:1]
+; GFX9-NEXT:    v_addc_co_u32_e32 v2, vcc, 0, v5, vcc
+; GFX9-NEXT:    v_add_u32_e32 v1, v1, v2
 ; GFX9-NEXT:    v_add_co_u32_e32 v3, vcc, v3, v0
 ; GFX9-NEXT:    v_addc_co_u32_e32 v4, vcc, v4, v1, vcc
-; GFX9-NEXT:    v_mad_u64_u32 v[0:1], s[0:1], s14, v3, 0
-; GFX9-NEXT:    v_mov_b32_e32 v7, s9
-; GFX9-NEXT:    v_mad_u64_u32 v[1:2], s[0:1], s14, v4, v[1:2]
+; GFX9-NEXT:    v_mad_u64_u32 v[0:1], s[0:1], s18, v3, 0
+; GFX9-NEXT:    v_mad_u64_u32 v[1:2], s[0:1], s18, v4, v[1:2]
 ; GFX9-NEXT:    v_mul_hi_u32 v6, v3, v0
-; GFX9-NEXT:    v_mad_u64_u32 v[1:2], s[0:1], s15, v3, v[1:2]
+; GFX9-NEXT:    v_mad_u64_u32 v[1:2], s[0:1], s19, v3, v[1:2]
 ; GFX9-NEXT:    v_mul_lo_u32 v2, v4, v0
 ; GFX9-NEXT:    v_mul_hi_u32 v0, v4, v0
 ; GFX9-NEXT:    v_mul_lo_u32 v5, v3, v1
 ; GFX9-NEXT:    v_add_co_u32_e32 v2, vcc, v2, v5
-; GFX9-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
-; GFX9-NEXT:    v_add_co_u32_e32 v2, vcc, v2, v6
-; GFX9-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; GFX9-NEXT:    v_mul_lo_u32 v6, v4, v1
-; GFX9-NEXT:    v_add_u32_e32 v2, v5, v2
-; GFX9-NEXT:    v_mul_hi_u32 v5, v3, v1
+; GFX9-NEXT:    v_addc_co_u32_e32 v2, vcc, v2, v6, vcc
+; GFX9-NEXT:    v_mul_lo_u32 v2, v4, v1
+; GFX9-NEXT:    v_mul_hi_u32 v6, v3, v1
+; GFX9-NEXT:    v_addc_co_u32_e64 v5, s[0:1], 0, 0, vcc
+; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, v2, v0
 ; GFX9-NEXT:    v_mul_hi_u32 v1, v4, v1
-; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, v6, v0
-; GFX9-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
-; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v5
-; GFX9-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
-; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
-; GFX9-NEXT:    v_add_u32_e32 v5, v6, v5
-; GFX9-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; GFX9-NEXT:    v_add3_u32 v1, v5, v2, v1
+; GFX9-NEXT:    v_addc_co_u32_e64 v0, s[0:1], v0, v6, vcc
+; GFX9-NEXT:    v_addc_co_u32_e64 v2, s[0:1], 0, 0, s[0:1]
+; GFX9-NEXT:    v_addc_co_u32_e32 v0, vcc, v0, v5, vcc
+; GFX9-NEXT:    v_addc_co_u32_e32 v2, vcc, 0, v2, vcc
+; GFX9-NEXT:    v_add_u32_e32 v1, v1, v2
 ; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, v3, v0
 ; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, v4, v1, vcc
 ; GFX9-NEXT:    v_mul_lo_u32 v2, s11, v0
 ; GFX9-NEXT:    v_mul_lo_u32 v3, s10, v1
 ; GFX9-NEXT:    v_mul_hi_u32 v4, s10, v0
 ; GFX9-NEXT:    v_mul_hi_u32 v0, s11, v0
-; GFX9-NEXT:    v_mul_hi_u32 v6, s11, v1
-; GFX9-NEXT:    v_add_co_u32_e32 v2, vcc, v2, v3
-; GFX9-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc
-; GFX9-NEXT:    v_add_co_u32_e32 v2, vcc, v2, v4
-; GFX9-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; GFX9-NEXT:    v_mul_lo_u32 v4, s11, v1
-; GFX9-NEXT:    v_add_u32_e32 v2, v3, v2
-; GFX9-NEXT:    v_mul_hi_u32 v3, s10, v1
-; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, v4, v0
-; GFX9-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
-; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v3
-; GFX9-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc
-; GFX9-NEXT:    v_add_co_u32_e32 v5, vcc, v0, v2
-; GFX9-NEXT:    v_mad_u64_u32 v[0:1], s[0:1], s8, v5, 0
-; GFX9-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; GFX9-NEXT:    v_add_u32_e32 v3, v4, v3
-; GFX9-NEXT:    v_add3_u32 v3, v3, v2, v6
-; GFX9-NEXT:    v_mad_u64_u32 v[1:2], s[0:1], s8, v3, v[1:2]
 ; GFX9-NEXT:    v_mov_b32_e32 v6, s11
+; GFX9-NEXT:    v_add_co_u32_e32 v2, vcc, v2, v3
+; GFX9-NEXT:    v_addc_co_u32_e32 v2, vcc, v2, v4, vcc
+; GFX9-NEXT:    v_mul_lo_u32 v2, s11, v1
+; GFX9-NEXT:    v_mul_hi_u32 v4, s10, v1
+; GFX9-NEXT:    v_addc_co_u32_e64 v3, s[0:1], 0, 0, vcc
+; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, v2, v0
+; GFX9-NEXT:    v_addc_co_u32_e64 v0, s[0:1], v0, v4, vcc
+; GFX9-NEXT:    v_addc_co_u32_e32 v3, vcc, v0, v3, vcc
+; GFX9-NEXT:    v_mul_hi_u32 v4, s11, v1
+; GFX9-NEXT:    v_addc_co_u32_e64 v2, s[0:1], 0, 0, s[0:1]
+; GFX9-NEXT:    v_mad_u64_u32 v[0:1], s[0:1], s8, v3, 0
+; GFX9-NEXT:    v_addc_co_u32_e32 v2, vcc, 0, v2, vcc
+; GFX9-NEXT:    v_add_u32_e32 v4, v4, v2
+; GFX9-NEXT:    v_mad_u64_u32 v[1:2], s[0:1], s8, v4, v[1:2]
 ; GFX9-NEXT:    v_sub_co_u32_e32 v0, vcc, s10, v0
-; GFX9-NEXT:    v_mad_u64_u32 v[1:2], s[0:1], s9, v5, v[1:2]
-; GFX9-NEXT:    v_mov_b32_e32 v4, 0
+; GFX9-NEXT:    v_mad_u64_u32 v[1:2], s[0:1], s9, v3, v[1:2]
+; GFX9-NEXT:    v_mov_b32_e32 v5, s9
 ; GFX9-NEXT:    v_subb_co_u32_e64 v2, s[0:1], v6, v1, vcc
 ; GFX9-NEXT:    v_cmp_le_u32_e64 s[0:1], s9, v2
 ; GFX9-NEXT:    v_sub_u32_e32 v1, s11, v1
-; GFX9-NEXT:    v_cndmask_b32_e64 v6, 0, -1, s[0:1]
+; GFX9-NEXT:    v_cndmask_b32_e64 v7, 0, -1, s[0:1]
 ; GFX9-NEXT:    v_cmp_le_u32_e64 s[0:1], s8, v0
 ; GFX9-NEXT:    v_cndmask_b32_e64 v8, 0, -1, s[0:1]
 ; GFX9-NEXT:    v_cmp_eq_u32_e64 s[0:1], s9, v2
-; GFX9-NEXT:    v_subb_co_u32_e32 v1, vcc, v1, v7, vcc
-; GFX9-NEXT:    v_cndmask_b32_e64 v6, v6, v8, s[0:1]
+; GFX9-NEXT:    v_subb_co_u32_e32 v1, vcc, v1, v5, vcc
+; GFX9-NEXT:    v_cndmask_b32_e64 v7, v7, v8, s[0:1]
 ; GFX9-NEXT:    v_subrev_co_u32_e32 v8, vcc, s8, v0
 ; GFX9-NEXT:    v_subbrev_co_u32_e64 v9, s[0:1], 0, v1, vcc
-; GFX9-NEXT:    v_add_co_u32_e64 v10, s[0:1], 1, v5
-; GFX9-NEXT:    v_addc_co_u32_e64 v11, s[0:1], 0, v3, s[0:1]
+; GFX9-NEXT:    v_add_co_u32_e64 v10, s[0:1], 1, v3
+; GFX9-NEXT:    v_addc_co_u32_e64 v11, s[0:1], 0, v4, s[0:1]
 ; GFX9-NEXT:    v_cmp_le_u32_e64 s[0:1], s9, v9
 ; GFX9-NEXT:    v_cndmask_b32_e64 v12, 0, -1, s[0:1]
 ; GFX9-NEXT:    v_cmp_le_u32_e64 s[0:1], s8, v8
-; GFX9-NEXT:    v_subb_co_u32_e32 v1, vcc, v1, v7, vcc
+; GFX9-NEXT:    v_subb_co_u32_e32 v1, vcc, v1, v5, vcc
 ; GFX9-NEXT:    v_cndmask_b32_e64 v13, 0, -1, s[0:1]
 ; GFX9-NEXT:    v_cmp_eq_u32_e64 s[0:1], s9, v9
-; GFX9-NEXT:    v_subrev_co_u32_e32 v7, vcc, s8, v8
+; GFX9-NEXT:    v_subrev_co_u32_e32 v5, vcc, s8, v8
 ; GFX9-NEXT:    v_cndmask_b32_e64 v12, v12, v13, s[0:1]
 ; GFX9-NEXT:    v_add_co_u32_e64 v13, s[0:1], 1, v10
 ; GFX9-NEXT:    v_subbrev_co_u32_e32 v1, vcc, 0, v1, vcc
@@ -435,26 +407,27 @@ define amdgpu_kernel void @sdivrem_i64(ptr addrspace(1) %out0, ptr addrspace(1)
 ; GFX9-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v12
 ; GFX9-NEXT:    v_cndmask_b32_e32 v10, v10, v13, vcc
 ; GFX9-NEXT:    v_cndmask_b32_e32 v11, v11, v14, vcc
-; GFX9-NEXT:    v_cmp_ne_u32_e64 s[0:1], 0, v6
-; GFX9-NEXT:    v_cndmask_b32_e32 v6, v8, v7, vcc
+; GFX9-NEXT:    v_cmp_ne_u32_e64 s[0:1], 0, v7
+; GFX9-NEXT:    v_cndmask_b32_e32 v5, v8, v5, vcc
 ; GFX9-NEXT:    v_cndmask_b32_e32 v1, v9, v1, vcc
-; GFX9-NEXT:    v_cndmask_b32_e64 v5, v5, v10, s[0:1]
-; GFX9-NEXT:    v_cndmask_b32_e64 v3, v3, v11, s[0:1]
-; GFX9-NEXT:    v_cndmask_b32_e64 v6, v0, v6, s[0:1]
+; GFX9-NEXT:    v_cndmask_b32_e64 v3, v3, v10, s[0:1]
+; GFX9-NEXT:    v_cndmask_b32_e64 v4, v4, v11, s[0:1]
+; GFX9-NEXT:    v_cndmask_b32_e64 v5, v0, v5, s[0:1]
 ; GFX9-NEXT:    v_cndmask_b32_e64 v2, v2, v1, s[0:1]
-; GFX9-NEXT:    s_xor_b64 s[0:1], s[2:3], s[12:13]
-; GFX9-NEXT:    v_xor_b32_e32 v0, s0, v5
-; GFX9-NEXT:    v_xor_b32_e32 v1, s1, v3
+; GFX9-NEXT:    s_xor_b64 s[0:1], s[12:13], s[14:15]
+; GFX9-NEXT:    v_xor_b32_e32 v0, s0, v3
+; GFX9-NEXT:    v_xor_b32_e32 v1, s1, v4
 ; GFX9-NEXT:    v_mov_b32_e32 v3, s1
 ; GFX9-NEXT:    v_subrev_co_u32_e32 v0, vcc, s0, v0
 ; GFX9-NEXT:    v_subb_co_u32_e32 v1, vcc, v1, v3, vcc
-; GFX9-NEXT:    v_xor_b32_e32 v3, s2, v6
-; GFX9-NEXT:    v_xor_b32_e32 v5, s2, v2
-; GFX9-NEXT:    v_mov_b32_e32 v6, s2
-; GFX9-NEXT:    v_subrev_co_u32_e32 v2, vcc, s2, v3
-; GFX9-NEXT:    v_subb_co_u32_e32 v3, vcc, v5, v6, vcc
-; GFX9-NEXT:    global_store_dwordx2 v4, v[0:1], s[4:5]
-; GFX9-NEXT:    global_store_dwordx2 v4, v[2:3], s[6:7]
+; GFX9-NEXT:    v_xor_b32_e32 v3, s12, v5
+; GFX9-NEXT:    v_mov_b32_e32 v6, 0
+; GFX9-NEXT:    v_xor_b32_e32 v4, s12, v2
+; GFX9-NEXT:    v_mov_b32_e32 v5, s12
+; GFX9-NEXT:    v_subrev_co_u32_e32 v2, vcc, s12, v3
+; GFX9-NEXT:    v_subb_co_u32_e32 v3, vcc, v4, v5, vcc
+; GFX9-NEXT:    global_store_dwordx2 v6, v[0:1], s[4:5]
+; GFX9-NEXT:    global_store_dwordx2 v6, v[2:3], s[6:7]
 ; GFX9-NEXT:    s_endpgm
 ;
 ; GFX10-LABEL: sdivrem_i64:
@@ -470,10 +443,11 @@ define amdgpu_kernel void @sdivrem_i64(ptr addrspace(1) %out0, ptr addrspace(1)
 ; GFX10-NEXT:    s_addc_u32 s9, s11, s12
 ; GFX10-NEXT:    s_mov_b32 s3, s2
 ; GFX10-NEXT:    s_xor_b64 s[8:9], s[8:9], s[12:13]
-; GFX10-NEXT:    s_xor_b64 s[0:1], s[0:1], s[2:3]
+; GFX10-NEXT:    s_xor_b64 s[10:11], s[0:1], s[2:3]
 ; GFX10-NEXT:    v_cvt_f32_u32_e32 v0, s9
 ; GFX10-NEXT:    v_cvt_f32_u32_e32 v1, s8
-; GFX10-NEXT:    s_sub_u32 s10, 0, s8
+; GFX10-NEXT:    s_sub_u32 s14, 0, s8
+; GFX10-NEXT:    s_subb_u32 s15, 0, s9
 ; GFX10-NEXT:    v_mul_f32_e32 v0, 0x4f800000, v0
 ; GFX10-NEXT:    v_add_f32_e32 v0, v0, v1
 ; GFX10-NEXT:    v_rcp_iflag_f32_e32 v0, v0
@@ -484,85 +458,72 @@ define amdgpu_kernel void @sdivrem_i64(ptr addrspace(1) %out0, ptr addrspace(1)
 ; GFX10-NEXT:    v_cvt_u32_f32_e32 v4, v2
 ; GFX10-NEXT:    v_add_f32_e32 v0, v1, v0
 ; GFX10-NEXT:    v_cvt_u32_f32_e32 v3, v0
-; GFX10-NEXT:    v_mad_u64_u32 v[0:1], s11, s10, v3, 0
-; GFX10-NEXT:    v_mad_u64_u32 v[1:2], s11, s10, v4, v[1:2]
-; GFX10-NEXT:    s_subb_u32 s11, 0, s9
-; GFX10-NEXT:    v_mul_hi_u32 v6, v4, v0
-; GFX10-NEXT:    v_mad_u64_u32 v[1:2], s14, s11, v3, v[1:2]
+; GFX10-NEXT:    v_mad_u64_u32 v[0:1], s0, s14, v3, 0
+; GFX10-NEXT:    v_mad_u64_u32 v[1:2], s0, s14, v4, v[1:2]
+; GFX10-NEXT:    v_mul_hi_u32 v6, v3, v0
+; GFX10-NEXT:    v_mad_u64_u32 v[1:2], s0, s15, v3, v[1:2]
 ; GFX10-NEXT:    v_mul_lo_u32 v2, v4, v0
-; GFX10-NEXT:    v_mul_hi_u32 v0, v3, v0
+; GFX10-NEXT:    v_mul_hi_u32 v0, v4, v0
 ; GFX10-NEXT:    v_mul_lo_u32 v5, v3, v1
 ; GFX10-NEXT:    v_mul_lo_u32 v7, v4, v1
 ; GFX10-NEXT:    v_mul_hi_u32 v8, v3, v1
 ; GFX10-NEXT:    v_mul_hi_u32 v1, v4, v1
-; GFX10-NEXT:    v_add_co_u32 v2, s14, v2, v5
-; GFX10-NEXT:    v_cndmask_b32_e64 v5, 0, 1, s14
-; GFX10-NEXT:    v_add_co_u32 v6, s14, v7, v6
-; GFX10-NEXT:    v_cndmask_b32_e64 v7, 0, 1, s14
-; GFX10-NEXT:    v_add_co_u32 v0, s14, v2, v0
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s14
-; GFX10-NEXT:    v_add_co_u32 v2, s14, v6, v8
-; GFX10-NEXT:    v_cndmask_b32_e64 v6, 0, 1, s14
-; GFX10-NEXT:    v_add_nc_u32_e32 v0, v5, v0
-; GFX10-NEXT:    v_add_nc_u32_e32 v5, v7, v6
-; GFX10-NEXT:    v_add_co_u32 v0, s14, v2, v0
-; GFX10-NEXT:    v_cndmask_b32_e64 v2, 0, 1, s14
+; GFX10-NEXT:    v_add_co_u32 v2, vcc_lo, v2, v5
+; GFX10-NEXT:    v_add_co_u32 v0, s0, v7, v0
+; GFX10-NEXT:    v_add_co_ci_u32_e32 v2, vcc_lo, v2, v6, vcc_lo
+; GFX10-NEXT:    v_add_co_ci_u32_e64 v0, s1, v0, v8, s0
+; GFX10-NEXT:    v_add_co_ci_u32_e64 v2, s16, 0, 0, vcc_lo
+; GFX10-NEXT:    v_add_co_ci_u32_e64 v5, s1, 0, 0, s1
+; GFX10-NEXT:    v_add_co_ci_u32_e64 v0, vcc_lo, v0, v2, s0
+; GFX10-NEXT:    v_add_co_ci_u32_e32 v2, vcc_lo, 0, v5, vcc_lo
 ; GFX10-NEXT:    v_add_co_u32 v3, vcc_lo, v3, v0
-; GFX10-NEXT:    v_add3_u32 v1, v5, v2, v1
+; GFX10-NEXT:    v_add_nc_u32_e32 v1, v1, v2
 ; GFX10-NEXT:    v_add_co_ci_u32_e32 v4, vcc_lo, v4, v1, vcc_lo
-; GFX10-NEXT:    v_mad_u64_u32 v[0:1], s14, s10, v3, 0
-; GFX10-NEXT:    v_mad_u64_u32 v[1:2], s10, s10, v4, v[1:2]
-; GFX10-NEXT:    v_mul_hi_u32 v6, v4, v0
-; GFX10-NEXT:    v_mad_u64_u32 v[1:2], s10, s11, v3, v[1:2]
+; GFX10-NEXT:    v_mad_u64_u32 v[0:1], s0, s14, v3, 0
+; GFX10-NEXT:    v_mad_u64_u32 v[1:2], s0, s14, v4, v[1:2]
+; GFX10-NEXT:    v_mul_hi_u32 v6, v3, v0
+; GFX10-NEXT:    v_mad_u64_u32 v[1:2], s0, s15, v3, v[1:2]
 ; GFX10-NEXT:    v_mul_lo_u32 v2, v4, v0
-; GFX10-NEXT:    v_mul_hi_u32 v0, v3, v0
+; GFX10-NEXT:    v_mul_hi_u32 v0, v4, v0
 ; GFX10-NEXT:    v_mul_lo_u32 v5, v3, v1
 ; GFX10-NEXT:    v_mul_lo_u32 v7, v4, v1
 ; GFX10-NEXT:    v_mul_hi_u32 v8, v3, v1
 ; GFX10-NEXT:    v_mul_hi_u32 v1, v4, v1
-; GFX10-NEXT:    v_add_co_u32 v2, s10, v2, v5
-; GFX10-NEXT:    v_cndmask_b32_e64 v5, 0, 1, s10
-; GFX10-NEXT:    v_add_co_u32 v6, s10, v7, v6
-; GFX10-NEXT:    v_cndmask_b32_e64 v7, 0, 1, s10
-; GFX10-NEXT:    v_add_co_u32 v0, s10, v2, v0
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s10
-; GFX10-NEXT:    v_add_co_u32 v2, s10, v6, v8
-; GFX10-NEXT:    v_cndmask_b32_e64 v6, 0, 1, s10
-; GFX10-NEXT:    v_add_nc_u32_e32 v0, v5, v0
-; GFX10-NEXT:    v_add_nc_u32_e32 v5, v7, v6
-; GFX10-NEXT:    v_add_co_u32 v0, s10, v2, v0
-; GFX10-NEXT:    v_cndmask_b32_e64 v2, 0, 1, s10
+; GFX10-NEXT:    v_add_co_u32 v2, vcc_lo, v2, v5
+; GFX10-NEXT:    v_add_co_u32 v0, s0, v7, v0
+; GFX10-NEXT:    v_add_co_ci_u32_e32 v2, vcc_lo, v2, v6, vcc_lo
+; GFX10-NEXT:    v_add_co_ci_u32_e64 v0, s1, v0, v8, s0
+; GFX10-NEXT:    v_add_co_ci_u32_e64 v2, s14, 0, 0, vcc_lo
+; GFX10-NEXT:    v_add_co_ci_u32_e64 v5, s1, 0, 0, s1
+; GFX10-NEXT:    v_add_co_ci_u32_e64 v0, vcc_lo, v0, v2, s0
+; GFX10-NEXT:    v_add_co_ci_u32_e32 v2, vcc_lo, 0, v5, vcc_lo
 ; GFX10-NEXT:    v_add_co_u32 v0, vcc_lo, v3, v0
-; GFX10-NEXT:    v_add3_u32 v1, v5, v2, v1
-; GFX10-NEXT:    v_mul_lo_u32 v2, s1, v0
+; GFX10-NEXT:    v_add_nc_u32_e32 v1, v1, v2
+; GFX10-NEXT:    v_mul_lo_u32 v2, s11, v0
 ; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, v4, v1, vcc_lo
-; GFX10-NEXT:    v_mul_hi_u32 v4, s0, v0
-; GFX10-NEXT:    v_mul_hi_u32 v0, s1, v0
-; GFX10-NEXT:    v_mul_lo_u32 v3, s0, v1
-; GFX10-NEXT:    v_mul_lo_u32 v5, s1, v1
-; GFX10-NEXT:    v_add_co_u32 v2, s10, v2, v3
-; GFX10-NEXT:    v_mul_hi_u32 v3, s0, v1
-; GFX10-NEXT:    v_cndmask_b32_e64 v6, 0, 1, s10
-; GFX10-NEXT:    v_add_co_u32 v2, s10, v2, v4
-; GFX10-NEXT:    v_cndmask_b32_e64 v2, 0, 1, s10
-; GFX10-NEXT:    v_add_co_u32 v0, s10, v5, v0
-; GFX10-NEXT:    v_cndmask_b32_e64 v4, 0, 1, s10
-; GFX10-NEXT:    v_add_nc_u32_e32 v2, v6, v2
-; GFX10-NEXT:    v_add_co_u32 v0, s10, v0, v3
-; GFX10-NEXT:    v_cndmask_b32_e64 v3, 0, 1, s10
-; GFX10-NEXT:    v_add_co_u32 v5, s10, v0, v2
-; GFX10-NEXT:    v_mul_hi_u32 v2, s1, v1
-; GFX10-NEXT:    v_cndmask_b32_e64 v6, 0, 1, s10
-; GFX10-NEXT:    v_add_nc_u32_e32 v3, v4, v3
-; GFX10-NEXT:    v_mad_u64_u32 v[0:1], s10, s8, v5, 0
-; GFX10-NEXT:    v_add3_u32 v3, v3, v6, v2
-; GFX10-NEXT:    v_mad_u64_u32 v[1:2], s10, s8, v3, v[1:2]
-; GFX10-NEXT:    v_mad_u64_u32 v[1:2], s10, s9, v5, v[1:2]
-; GFX10-NEXT:    v_add_co_u32 v2, vcc_lo, v5, 1
-; GFX10-NEXT:    v_add_co_ci_u32_e32 v4, vcc_lo, 0, v3, vcc_lo
-; GFX10-NEXT:    v_sub_co_u32 v0, vcc_lo, s0, v0
-; GFX10-NEXT:    v_sub_nc_u32_e32 v6, s1, v1
-; GFX10-NEXT:    v_sub_co_ci_u32_e64 v1, s0, s1, v1, vcc_lo
+; GFX10-NEXT:    v_mul_hi_u32 v4, s10, v0
+; GFX10-NEXT:    v_mul_hi_u32 v0, s11, v0
+; GFX10-NEXT:    v_mul_lo_u32 v3, s10, v1
+; GFX10-NEXT:    v_mul_lo_u32 v5, s11, v1
+; GFX10-NEXT:    v_mul_hi_u32 v6, s10, v1
+; GFX10-NEXT:    v_add_co_u32 v2, vcc_lo, v2, v3
+; GFX10-NEXT:    v_add_co_u32 v0, s0, v5, v0
+; GFX10-NEXT:    v_add_co_ci_u32_e32 v2, vcc_lo, v2, v4, vcc_lo
+; GFX10-NEXT:    v_add_co_ci_u32_e64 v0, s1, v0, v6, s0
+; GFX10-NEXT:    v_add_co_ci_u32_e64 v2, s14, 0, 0, vcc_lo
+; GFX10-NEXT:    v_add_co_ci_u32_e64 v3, s1, 0, 0, s1
+; GFX10-NEXT:    v_add_co_ci_u32_e64 v4, vcc_lo, v0, v2, s0
+; GFX10-NEXT:    v_mul_hi_u32 v2, s11, v1
+; GFX10-NEXT:    v_add_co_ci_u32_e32 v3, vcc_lo, 0, v3, vcc_lo
+; GFX10-NEXT:    v_mad_u64_u32 v[0:1], s0, s8, v4, 0
+; GFX10-NEXT:    v_add_nc_u32_e32 v3, v2, v3
+; GFX10-NEXT:    v_mad_u64_u32 v[1:2], s0, s8, v3, v[1:2]
+; GFX10-NEXT:    v_mad_u64_u32 v[1:2], s0, s9, v4, v[1:2]
+; GFX10-NEXT:    v_add_co_u32 v2, vcc_lo, v4, 1
+; GFX10-NEXT:    v_add_co_ci_u32_e32 v5, vcc_lo, 0, v3, vcc_lo
+; GFX10-NEXT:    v_sub_co_u32 v0, vcc_lo, s10, v0
+; GFX10-NEXT:    v_sub_nc_u32_e32 v6, s11, v1
+; GFX10-NEXT:    v_sub_co_ci_u32_e64 v1, s0, s11, v1, vcc_lo
 ; GFX10-NEXT:    v_subrev_co_ci_u32_e32 v6, vcc_lo, s9, v6, vcc_lo
 ; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, s8, v0
 ; GFX10-NEXT:    v_cndmask_b32_e64 v7, 0, -1, vcc_lo
@@ -576,7 +537,7 @@ define amdgpu_kernel void @sdivrem_i64(ptr addrspace(1) %out0, ptr addrspace(1)
 ; GFX10-NEXT:    v_cmp_le_u32_e64 s0, s9, v9
 ; GFX10-NEXT:    v_cndmask_b32_e64 v12, 0, -1, s0
 ; GFX10-NEXT:    v_add_co_u32 v13, s0, v2, 1
-; GFX10-NEXT:    v_add_co_ci_u32_e64 v14, s0, 0, v4, s0
+; GFX10-NEXT:    v_add_co_ci_u32_e64 v14, s0, 0, v5, s0
 ; GFX10-NEXT:    v_cmp_eq_u32_e64 s0, s9, v9
 ; GFX10-NEXT:    v_cndmask_b32_e64 v11, v12, v11, s0
 ; GFX10-NEXT:    v_cmp_eq_u32_e64 s0, s9, v1
@@ -586,12 +547,12 @@ define amdgpu_kernel void @sdivrem_i64(ptr addrspace(1) %out0, ptr addrspace(1)
 ; GFX10-NEXT:    v_subrev_co_ci_u32_e64 v6, s0, 0, v6, s0
 ; GFX10-NEXT:    v_cndmask_b32_e32 v2, v2, v13, vcc_lo
 ; GFX10-NEXT:    v_cmp_ne_u32_e64 s0, 0, v7
-; GFX10-NEXT:    v_cndmask_b32_e32 v4, v4, v14, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v5, v5, v14, vcc_lo
 ; GFX10-NEXT:    v_cndmask_b32_e32 v7, v8, v10, vcc_lo
 ; GFX10-NEXT:    v_cndmask_b32_e32 v6, v9, v6, vcc_lo
 ; GFX10-NEXT:    s_xor_b64 s[8:9], s[2:3], s[12:13]
-; GFX10-NEXT:    v_cndmask_b32_e64 v2, v5, v2, s0
-; GFX10-NEXT:    v_cndmask_b32_e64 v3, v3, v4, s0
+; GFX10-NEXT:    v_cndmask_b32_e64 v2, v4, v2, s0
+; GFX10-NEXT:    v_cndmask_b32_e64 v3, v3, v5, s0
 ; GFX10-NEXT:    v_cndmask_b32_e64 v0, v0, v7, s0
 ; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, v6, s0
 ; GFX10-NEXT:    v_mov_b32_e32 v4, 0
@@ -1272,25 +1233,25 @@ define amdgpu_kernel void @sdivrem_v2i64(ptr addrspace(1) %out0, ptr addrspace(1
 ; GFX8-LABEL: sdivrem_v2i64:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x0
-; GFX8-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x20
+; GFX8-NEXT:    s_load_dwordx4 s[4:7], s[4:5], 0x20
 ; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8-NEXT:    s_ashr_i32 s4, s13, 31
-; GFX8-NEXT:    s_ashr_i32 s6, s1, 31
-; GFX8-NEXT:    s_add_u32 s16, s12, s4
-; GFX8-NEXT:    s_addc_u32 s17, s13, s4
-; GFX8-NEXT:    s_add_u32 s0, s0, s6
-; GFX8-NEXT:    s_mov_b32 s7, s6
-; GFX8-NEXT:    s_addc_u32 s1, s1, s6
-; GFX8-NEXT:    s_xor_b64 s[12:13], s[0:1], s[6:7]
+; GFX8-NEXT:    s_ashr_i32 s16, s13, 31
+; GFX8-NEXT:    s_ashr_i32 s18, s5, 31
+; GFX8-NEXT:    s_add_u32 s0, s12, s16
+; GFX8-NEXT:    s_addc_u32 s1, s13, s16
+; GFX8-NEXT:    s_add_u32 s2, s4, s18
+; GFX8-NEXT:    s_mov_b32 s19, s18
+; GFX8-NEXT:    s_addc_u32 s3, s5, s18
+; GFX8-NEXT:    s_xor_b64 s[12:13], s[2:3], s[18:19]
 ; GFX8-NEXT:    v_cvt_f32_u32_e32 v0, s13
 ; GFX8-NEXT:    v_cvt_f32_u32_e32 v1, s12
-; GFX8-NEXT:    s_mov_b32 s5, s4
-; GFX8-NEXT:    s_xor_b64 s[16:17], s[16:17], s[4:5]
+; GFX8-NEXT:    s_mov_b32 s17, s16
+; GFX8-NEXT:    s_xor_b64 s[4:5], s[0:1], s[16:17]
 ; GFX8-NEXT:    v_mul_f32_e32 v0, 0x4f800000, v0
 ; GFX8-NEXT:    v_add_f32_e32 v0, v0, v1
 ; GFX8-NEXT:    v_rcp_iflag_f32_e32 v0, v0
-; GFX8-NEXT:    s_sub_u32 s18, 0, s12
-; GFX8-NEXT:    s_subb_u32 s19, 0, s13
+; GFX8-NEXT:    s_sub_u32 s22, 0, s12
+; GFX8-NEXT:    s_subb_u32 s23, 0, s13
 ; GFX8-NEXT:    v_mul_f32_e32 v0, 0x5f7ffffc, v0
 ; GFX8-NEXT:    v_mul_f32_e32 v1, 0x2f800000, v0
 ; GFX8-NEXT:    v_trunc_f32_e32 v2, v1
@@ -1298,10 +1259,10 @@ define amdgpu_kernel void @sdivrem_v2i64(ptr addrspace(1) %out0, ptr addrspace(1
 ; GFX8-NEXT:    v_add_f32_e32 v0, v1, v0
 ; GFX8-NEXT:    v_cvt_u32_f32_e32 v3, v0
 ; GFX8-NEXT:    v_cvt_u32_f32_e32 v4, v2
-; GFX8-NEXT:    v_mad_u64_u32 v[0:1], s[0:1], s18, v3, 0
-; GFX8-NEXT:    v_mad_u64_u32 v[1:2], s[0:1], s18, v4, v[1:2]
+; GFX8-NEXT:    v_mad_u64_u32 v[0:1], s[0:1], s22, v3, 0
+; GFX8-NEXT:    v_mad_u64_u32 v[1:2], s[0:1], s22, v4, v[1:2]
 ; GFX8-NEXT:    v_mul_hi_u32 v5, v3, v0
-; GFX8-NEXT:    v_mad_u64_u32 v[1:2], s[0:1], s19, v3, v[1:2]
+; GFX8-NEXT:    v_mad_u64_u32 v[1:2], s[0:1], s23, v3, v[1:2]
 ; GFX8-NEXT:    v_mul_lo_u32 v2, v4, v0
 ; GFX8-NEXT:    v_mul_hi_u32 v0, v4, v0
 ; GFX8-NEXT:    v_mul_lo_u32 v6, v3, v1
@@ -1309,243 +1270,213 @@ define amdgpu_kernel void @sdivrem_v2i64(ptr addrspace(1) %out0, ptr addrspace(1
 ; GFX8-NEXT:    v_mul_hi_u32 v8, v3, v1
 ; GFX8-NEXT:    v_mul_hi_u32 v1, v4, v1
 ; GFX8-NEXT:    v_add_u32_e32 v2, vcc, v2, v6
-; GFX8-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
-; GFX8-NEXT:    v_add_u32_e32 v0, vcc, v7, v0
-; GFX8-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
-; GFX8-NEXT:    v_add_u32_e32 v2, vcc, v2, v5
-; GFX8-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; GFX8-NEXT:    v_add_u32_e32 v2, vcc, v6, v2
-; GFX8-NEXT:    v_add_u32_e32 v0, vcc, v0, v8
-; GFX8-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
-; GFX8-NEXT:    v_add_u32_e32 v5, vcc, v7, v5
-; GFX8-NEXT:    v_add_u32_e32 v0, vcc, v0, v2
-; GFX8-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; GFX8-NEXT:    v_add_u32_e32 v2, vcc, v5, v2
+; GFX8-NEXT:    v_addc_u32_e32 v2, vcc, v2, v5, vcc
+; GFX8-NEXT:    v_add_u32_e64 v0, s[0:1], v7, v0
+; GFX8-NEXT:    v_addc_u32_e64 v0, s[2:3], v0, v8, s[0:1]
+; GFX8-NEXT:    v_addc_u32_e64 v2, s[20:21], 0, 0, vcc
+; GFX8-NEXT:    v_addc_u32_e64 v5, s[2:3], 0, 0, s[2:3]
+; GFX8-NEXT:    v_addc_u32_e64 v0, vcc, v0, v2, s[0:1]
+; GFX8-NEXT:    v_addc_u32_e32 v2, vcc, 0, v5, vcc
 ; GFX8-NEXT:    v_add_u32_e32 v1, vcc, v1, v2
 ; GFX8-NEXT:    v_add_u32_e32 v3, vcc, v3, v0
 ; GFX8-NEXT:    v_addc_u32_e32 v4, vcc, v4, v1, vcc
-; GFX8-NEXT:    v_mad_u64_u32 v[0:1], s[0:1], s18, v3, 0
-; GFX8-NEXT:    v_mad_u64_u32 v[1:2], s[0:1], s18, v4, v[1:2]
+; GFX8-NEXT:    v_mad_u64_u32 v[0:1], s[0:1], s22, v3, 0
+; GFX8-NEXT:    s_xor_b64 s[20:21], s[16:17], s[18:19]
+; GFX8-NEXT:    s_ashr_i32 s18, s7, 31
+; GFX8-NEXT:    v_mad_u64_u32 v[1:2], s[0:1], s22, v4, v[1:2]
 ; GFX8-NEXT:    v_mul_hi_u32 v6, v3, v0
-; GFX8-NEXT:    v_mad_u64_u32 v[1:2], s[0:1], s19, v3, v[1:2]
+; GFX8-NEXT:    s_mov_b32 s19, s18
+; GFX8-NEXT:    v_mad_u64_u32 v[1:2], s[0:1], s23, v3, v[1:2]
 ; GFX8-NEXT:    v_mul_lo_u32 v2, v4, v0
 ; GFX8-NEXT:    v_mul_hi_u32 v0, v4, v0
 ; GFX8-NEXT:    v_mul_lo_u32 v5, v3, v1
-; GFX8-NEXT:    s_xor_b64 s[18:19], s[4:5], s[6:7]
-; GFX8-NEXT:    s_ashr_i32 s6, s15, 31
-; GFX8-NEXT:    s_mov_b32 s7, s6
 ; GFX8-NEXT:    v_add_u32_e32 v2, vcc, v2, v5
-; GFX8-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
-; GFX8-NEXT:    v_add_u32_e32 v2, vcc, v2, v6
-; GFX8-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; GFX8-NEXT:    v_mul_lo_u32 v6, v4, v1
-; GFX8-NEXT:    v_add_u32_e32 v2, vcc, v5, v2
-; GFX8-NEXT:    v_mul_hi_u32 v5, v3, v1
-; GFX8-NEXT:    v_add_u32_e32 v0, vcc, v6, v0
-; GFX8-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
-; GFX8-NEXT:    v_add_u32_e32 v0, vcc, v0, v5
-; GFX8-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
-; GFX8-NEXT:    v_add_u32_e32 v5, vcc, v6, v5
+; GFX8-NEXT:    v_addc_u32_e32 v2, vcc, v2, v6, vcc
+; GFX8-NEXT:    v_mul_lo_u32 v2, v4, v1
+; GFX8-NEXT:    v_mul_hi_u32 v6, v3, v1
+; GFX8-NEXT:    v_addc_u32_e64 v5, s[0:1], 0, 0, vcc
+; GFX8-NEXT:    v_add_u32_e32 v0, vcc, v2, v0
 ; GFX8-NEXT:    v_mul_hi_u32 v1, v4, v1
-; GFX8-NEXT:    v_add_u32_e32 v0, vcc, v0, v2
-; GFX8-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; GFX8-NEXT:    v_add_u32_e32 v2, vcc, v5, v2
+; GFX8-NEXT:    v_addc_u32_e64 v0, s[0:1], v0, v6, vcc
+; GFX8-NEXT:    v_addc_u32_e64 v2, s[0:1], 0, 0, s[0:1]
+; GFX8-NEXT:    v_addc_u32_e32 v0, vcc, v0, v5, vcc
+; GFX8-NEXT:    v_addc_u32_e32 v2, vcc, 0, v2, vcc
 ; GFX8-NEXT:    v_add_u32_e32 v1, vcc, v1, v2
 ; GFX8-NEXT:    v_add_u32_e32 v0, vcc, v3, v0
 ; GFX8-NEXT:    v_addc_u32_e32 v1, vcc, v4, v1, vcc
-; GFX8-NEXT:    v_mul_lo_u32 v2, s17, v0
-; GFX8-NEXT:    v_mul_lo_u32 v3, s16, v1
-; GFX8-NEXT:    v_mul_hi_u32 v4, s16, v0
-; GFX8-NEXT:    v_mul_hi_u32 v0, s17, v0
-; GFX8-NEXT:    v_mul_hi_u32 v5, s17, v1
+; GFX8-NEXT:    v_mul_lo_u32 v2, s5, v0
+; GFX8-NEXT:    v_mul_lo_u32 v3, s4, v1
+; GFX8-NEXT:    v_mul_hi_u32 v4, s4, v0
+; GFX8-NEXT:    v_mul_hi_u32 v0, s5, v0
+; GFX8-NEXT:    v_mov_b32_e32 v6, s5
 ; GFX8-NEXT:    v_add_u32_e32 v2, vcc, v2, v3
-; GFX8-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc
-; GFX8-NEXT:    v_add_u32_e32 v2, vcc, v2, v4
-; GFX8-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; GFX8-NEXT:    v_mul_lo_u32 v4, s17, v1
-; GFX8-NEXT:    v_add_u32_e32 v2, vcc, v3, v2
-; GFX8-NEXT:    v_mul_hi_u32 v3, s16, v1
-; GFX8-NEXT:    v_add_u32_e32 v0, vcc, v4, v0
-; GFX8-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
-; GFX8-NEXT:    v_add_u32_e32 v0, vcc, v0, v3
-; GFX8-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc
-; GFX8-NEXT:    v_add_u32_e32 v3, vcc, v4, v3
-; GFX8-NEXT:    v_add_u32_e32 v4, vcc, v0, v2
-; GFX8-NEXT:    v_mad_u64_u32 v[0:1], s[0:1], s12, v4, 0
-; GFX8-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; GFX8-NEXT:    v_add_u32_e32 v2, vcc, v3, v2
-; GFX8-NEXT:    v_add_u32_e32 v3, vcc, v5, v2
-; GFX8-NEXT:    v_mad_u64_u32 v[1:2], s[0:1], s12, v3, v[1:2]
-; GFX8-NEXT:    v_mov_b32_e32 v6, s17
-; GFX8-NEXT:    v_sub_u32_e32 v7, vcc, s16, v0
-; GFX8-NEXT:    v_mad_u64_u32 v[1:2], s[0:1], s13, v4, v[1:2]
+; GFX8-NEXT:    v_addc_u32_e32 v2, vcc, v2, v4, vcc
+; GFX8-NEXT:    v_mul_lo_u32 v2, s5, v1
+; GFX8-NEXT:    v_mul_hi_u32 v4, s4, v1
+; GFX8-NEXT:    v_addc_u32_e64 v3, s[0:1], 0, 0, vcc
+; GFX8-NEXT:    v_add_u32_e32 v0, vcc, v2, v0
+; GFX8-NEXT:    v_addc_u32_e64 v0, s[0:1], v0, v4, vcc
+; GFX8-NEXT:    v_addc_u32_e32 v3, vcc, v0, v3, vcc
+; GFX8-NEXT:    v_mul_hi_u32 v4, s5, v1
+; GFX8-NEXT:    v_addc_u32_e64 v2, s[0:1], 0, 0, s[0:1]
+; GFX8-NEXT:    v_mad_u64_u32 v[0:1], s[0:1], s12, v3, 0
+; GFX8-NEXT:    v_addc_u32_e32 v2, vcc, 0, v2, vcc
+; GFX8-NEXT:    v_add_u32_e32 v4, vcc, v4, v2
+; GFX8-NEXT:    v_mad_u64_u32 v[1:2], s[0:1], s12, v4, v[1:2]
+; GFX8-NEXT:    v_sub_u32_e32 v7, vcc, s4, v0
+; GFX8-NEXT:    v_mad_u64_u32 v[1:2], s[0:1], s13, v3, v[1:2]
 ; GFX8-NEXT:    v_mov_b32_e32 v5, s13
-; GFX8-NEXT:    s_ashr_i32 s16, s3, 31
+; GFX8-NEXT:    s_ashr_i32 s4, s15, 31
 ; GFX8-NEXT:    v_subb_u32_e64 v6, s[0:1], v6, v1, vcc
-; GFX8-NEXT:    v_sub_u32_e64 v0, s[0:1], s17, v1
+; GFX8-NEXT:    v_sub_u32_e64 v0, s[0:1], s5, v1
 ; GFX8-NEXT:    v_cmp_le_u32_e64 s[0:1], s13, v6
 ; GFX8-NEXT:    v_cndmask_b32_e64 v1, 0, -1, s[0:1]
 ; GFX8-NEXT:    v_cmp_le_u32_e64 s[0:1], s12, v7
 ; GFX8-NEXT:    v_subb_u32_e32 v0, vcc, v0, v5, vcc
 ; GFX8-NEXT:    v_cndmask_b32_e64 v2, 0, -1, s[0:1]
 ; GFX8-NEXT:    v_cmp_eq_u32_e64 s[0:1], s13, v6
-; GFX8-NEXT:    v_subrev_u32_e32 v8, vcc, s12, v7
-; GFX8-NEXT:    v_cndmask_b32_e64 v2, v1, v2, s[0:1]
-; GFX8-NEXT:    v_subbrev_u32_e64 v9, s[0:1], 0, v0, vcc
-; GFX8-NEXT:    v_add_u32_e64 v1, s[0:1], 1, v4
-; GFX8-NEXT:    v_addc_u32_e64 v10, s[0:1], 0, v3, s[0:1]
-; GFX8-NEXT:    v_cmp_le_u32_e64 s[0:1], s13, v9
-; GFX8-NEXT:    v_cndmask_b32_e64 v11, 0, -1, s[0:1]
-; GFX8-NEXT:    v_cmp_le_u32_e64 s[0:1], s12, v8
+; GFX8-NEXT:    v_subrev_u32_e32 v9, vcc, s12, v7
+; GFX8-NEXT:    v_cndmask_b32_e64 v8, v1, v2, s[0:1]
+; GFX8-NEXT:    v_subbrev_u32_e64 v10, s[0:1], 0, v0, vcc
+; GFX8-NEXT:    v_add_u32_e64 v2, s[0:1], 1, v3
+; GFX8-NEXT:    v_addc_u32_e64 v11, s[0:1], 0, v4, s[0:1]
+; GFX8-NEXT:    v_cmp_le_u32_e64 s[0:1], s13, v10
+; GFX8-NEXT:    v_cndmask_b32_e64 v1, 0, -1, s[0:1]
+; GFX8-NEXT:    v_cmp_le_u32_e64 s[0:1], s12, v9
 ; GFX8-NEXT:    v_cndmask_b32_e64 v12, 0, -1, s[0:1]
-; GFX8-NEXT:    v_cmp_eq_u32_e64 s[0:1], s13, v9
-; GFX8-NEXT:    v_cndmask_b32_e64 v11, v11, v12, s[0:1]
-; GFX8-NEXT:    v_add_u32_e64 v12, s[0:1], 1, v1
-; GFX8-NEXT:    v_addc_u32_e64 v13, s[0:1], 0, v10, s[0:1]
-; GFX8-NEXT:    s_add_u32 s0, s14, s6
-; GFX8-NEXT:    s_addc_u32 s1, s15, s6
-; GFX8-NEXT:    s_add_u32 s2, s2, s16
-; GFX8-NEXT:    s_mov_b32 s17, s16
-; GFX8-NEXT:    s_addc_u32 s3, s3, s16
-; GFX8-NEXT:    s_xor_b64 s[2:3], s[2:3], s[16:17]
-; GFX8-NEXT:    v_cvt_f32_u32_e32 v14, s3
+; GFX8-NEXT:    v_cmp_eq_u32_e64 s[0:1], s13, v10
+; GFX8-NEXT:    v_cndmask_b32_e64 v12, v1, v12, s[0:1]
+; GFX8-NEXT:    v_add_u32_e64 v13, s[0:1], 1, v2
+; GFX8-NEXT:    v_addc_u32_e64 v14, s[0:1], 0, v11, s[0:1]
+; GFX8-NEXT:    s_add_u32 s0, s14, s4
+; GFX8-NEXT:    s_addc_u32 s1, s15, s4
+; GFX8-NEXT:    s_add_u32 s2, s6, s18
+; GFX8-NEXT:    s_addc_u32 s3, s7, s18
+; GFX8-NEXT:    s_xor_b64 s[6:7], s[2:3], s[18:19]
+; GFX8-NEXT:    v_cvt_f32_u32_e32 v1, s7
+; GFX8-NEXT:    v_cvt_f32_u32_e32 v15, s6
 ; GFX8-NEXT:    v_subb_u32_e32 v0, vcc, v0, v5, vcc
-; GFX8-NEXT:    v_cvt_f32_u32_e32 v5, s2
-; GFX8-NEXT:    v_subrev_u32_e32 v15, vcc, s12, v8
-; GFX8-NEXT:    v_subbrev_u32_e32 v16, vcc, 0, v0, vcc
-; GFX8-NEXT:    v_mul_f32_e32 v0, 0x4f800000, v14
-; GFX8-NEXT:    v_add_f32_e32 v0, v0, v5
-; GFX8-NEXT:    v_rcp_iflag_f32_e32 v0, v0
-; GFX8-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v11
-; GFX8-NEXT:    v_cndmask_b32_e32 v5, v1, v12, vcc
-; GFX8-NEXT:    s_xor_b64 s[12:13], s[0:1], s[6:7]
-; GFX8-NEXT:    v_mul_f32_e32 v0, 0x5f7ffffc, v0
+; GFX8-NEXT:    v_mul_f32_e32 v1, 0x4f800000, v1
+; GFX8-NEXT:    v_add_f32_e32 v1, v1, v15
+; GFX8-NEXT:    v_rcp_iflag_f32_e32 v1, v1
+; GFX8-NEXT:    v_subrev_u32_e32 v5, vcc, s12, v9
+; GFX8-NEXT:    v_subbrev_u32_e32 v15, vcc, 0, v0, vcc
+; GFX8-NEXT:    v_mul_f32_e32 v0, 0x5f7ffffc, v1
 ; GFX8-NEXT:    v_mul_f32_e32 v1, 0x2f800000, v0
-; GFX8-NEXT:    v_trunc_f32_e32 v11, v1
-; GFX8-NEXT:    v_mul_f32_e32 v1, 0xcf800000, v11
+; GFX8-NEXT:    v_trunc_f32_e32 v16, v1
+; GFX8-NEXT:    v_mul_f32_e32 v1, 0xcf800000, v16
 ; GFX8-NEXT:    v_add_f32_e32 v0, v1, v0
-; GFX8-NEXT:    v_cvt_u32_f32_e32 v12, v0
-; GFX8-NEXT:    s_sub_u32 s5, 0, s2
-; GFX8-NEXT:    s_subb_u32 s20, 0, s3
-; GFX8-NEXT:    v_cndmask_b32_e32 v10, v10, v13, vcc
-; GFX8-NEXT:    v_mad_u64_u32 v[0:1], s[0:1], s5, v12, 0
-; GFX8-NEXT:    v_cmp_ne_u32_e64 s[0:1], 0, v2
-; GFX8-NEXT:    v_cndmask_b32_e64 v4, v4, v5, s[0:1]
-; GFX8-NEXT:    v_cvt_u32_f32_e32 v5, v11
-; GFX8-NEXT:    v_cndmask_b32_e64 v10, v3, v10, s[0:1]
-; GFX8-NEXT:    v_cndmask_b32_e32 v3, v8, v15, vcc
-; GFX8-NEXT:    v_cndmask_b32_e64 v7, v7, v3, s[0:1]
-; GFX8-NEXT:    v_mad_u64_u32 v[1:2], s[14:15], s5, v5, v[1:2]
-; GFX8-NEXT:    v_mul_lo_u32 v3, v5, v0
-; GFX8-NEXT:    v_mad_u64_u32 v[1:2], s[14:15], s20, v12, v[1:2]
-; GFX8-NEXT:    v_cndmask_b32_e32 v2, v9, v16, vcc
-; GFX8-NEXT:    v_cndmask_b32_e64 v6, v6, v2, s[0:1]
-; GFX8-NEXT:    v_mul_lo_u32 v8, v12, v1
-; GFX8-NEXT:    v_mul_hi_u32 v2, v12, v0
-; GFX8-NEXT:    v_mul_hi_u32 v0, v5, v0
-; GFX8-NEXT:    v_xor_b32_e32 v9, s19, v10
-; GFX8-NEXT:    v_add_u32_e32 v3, vcc, v3, v8
-; GFX8-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
-; GFX8-NEXT:    v_add_u32_e32 v2, vcc, v3, v2
-; GFX8-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; GFX8-NEXT:    v_mul_lo_u32 v3, v5, v1
-; GFX8-NEXT:    v_add_u32_e32 v2, vcc, v8, v2
-; GFX8-NEXT:    v_mul_hi_u32 v8, v12, v1
-; GFX8-NEXT:    v_add_u32_e32 v0, vcc, v3, v0
-; GFX8-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc
-; GFX8-NEXT:    v_add_u32_e32 v0, vcc, v0, v8
-; GFX8-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
-; GFX8-NEXT:    v_add_u32_e32 v3, vcc, v3, v8
-; GFX8-NEXT:    v_mul_hi_u32 v1, v5, v1
-; GFX8-NEXT:    v_add_u32_e32 v0, vcc, v0, v2
-; GFX8-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; GFX8-NEXT:    v_add_u32_e32 v2, vcc, v3, v2
+; GFX8-NEXT:    v_cvt_u32_f32_e32 v17, v0
+; GFX8-NEXT:    s_mov_b32 s5, s4
+; GFX8-NEXT:    s_xor_b64 s[12:13], s[0:1], s[4:5]
+; GFX8-NEXT:    s_sub_u32 s14, 0, s6
+; GFX8-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v12
+; GFX8-NEXT:    v_mad_u64_u32 v[0:1], s[0:1], s14, v17, 0
+; GFX8-NEXT:    v_cndmask_b32_e32 v12, v2, v13, vcc
+; GFX8-NEXT:    v_cvt_u32_f32_e32 v13, v16
+; GFX8-NEXT:    s_subb_u32 s15, 0, s7
+; GFX8-NEXT:    v_cndmask_b32_e32 v5, v9, v5, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v9, v10, v15, vcc
+; GFX8-NEXT:    v_mad_u64_u32 v[1:2], s[0:1], s14, v13, v[1:2]
+; GFX8-NEXT:    v_cmp_ne_u32_e64 s[0:1], 0, v8
+; GFX8-NEXT:    v_cndmask_b32_e64 v8, v3, v12, s[0:1]
+; GFX8-NEXT:    v_mad_u64_u32 v[1:2], s[2:3], s15, v17, v[1:2]
+; GFX8-NEXT:    v_mul_lo_u32 v2, v13, v0
+; GFX8-NEXT:    v_mul_hi_u32 v10, v17, v0
+; GFX8-NEXT:    v_mul_lo_u32 v3, v17, v1
+; GFX8-NEXT:    v_cndmask_b32_e32 v11, v11, v14, vcc
+; GFX8-NEXT:    v_mul_hi_u32 v0, v13, v0
+; GFX8-NEXT:    v_cndmask_b32_e64 v4, v4, v11, s[0:1]
+; GFX8-NEXT:    v_add_u32_e32 v2, vcc, v2, v3
+; GFX8-NEXT:    v_addc_u32_e32 v2, vcc, v2, v10, vcc
+; GFX8-NEXT:    v_mul_lo_u32 v2, v13, v1
+; GFX8-NEXT:    v_mul_hi_u32 v10, v17, v1
+; GFX8-NEXT:    v_addc_u32_e64 v3, s[2:3], 0, 0, vcc
+; GFX8-NEXT:    v_add_u32_e32 v0, vcc, v2, v0
+; GFX8-NEXT:    v_mul_hi_u32 v1, v13, v1
+; GFX8-NEXT:    v_addc_u32_e64 v0, s[2:3], v0, v10, vcc
+; GFX8-NEXT:    v_addc_u32_e64 v2, s[2:3], 0, 0, s[2:3]
+; GFX8-NEXT:    v_addc_u32_e32 v0, vcc, v0, v3, vcc
+; GFX8-NEXT:    v_addc_u32_e32 v2, vcc, 0, v2, vcc
 ; GFX8-NEXT:    v_add_u32_e32 v1, vcc, v1, v2
-; GFX8-NEXT:    v_add_u32_e32 v8, vcc, v12, v0
-; GFX8-NEXT:    v_mad_u64_u32 v[2:3], s[0:1], s5, v8, 0
-; GFX8-NEXT:    v_addc_u32_e32 v5, vcc, v5, v1, vcc
-; GFX8-NEXT:    v_xor_b32_e32 v1, s18, v4
+; GFX8-NEXT:    v_add_u32_e32 v10, vcc, v17, v0
+; GFX8-NEXT:    v_mad_u64_u32 v[2:3], s[2:3], s14, v10, 0
+; GFX8-NEXT:    v_addc_u32_e32 v11, vcc, v13, v1, vcc
 ; GFX8-NEXT:    v_mov_b32_e32 v0, v3
-; GFX8-NEXT:    v_mad_u64_u32 v[3:4], s[0:1], s5, v5, v[0:1]
-; GFX8-NEXT:    v_mov_b32_e32 v10, s19
-; GFX8-NEXT:    v_subrev_u32_e32 v0, vcc, s18, v1
-; GFX8-NEXT:    v_mad_u64_u32 v[3:4], s[0:1], s20, v8, v[3:4]
-; GFX8-NEXT:    v_subb_u32_e32 v1, vcc, v9, v10, vcc
-; GFX8-NEXT:    v_xor_b32_e32 v4, s4, v7
-; GFX8-NEXT:    v_mul_lo_u32 v7, v5, v2
-; GFX8-NEXT:    v_mul_lo_u32 v9, v8, v3
-; GFX8-NEXT:    v_mul_hi_u32 v11, v8, v2
-; GFX8-NEXT:    v_mul_hi_u32 v2, v5, v2
-; GFX8-NEXT:    v_xor_b32_e32 v6, s4, v6
-; GFX8-NEXT:    v_add_u32_e32 v7, vcc, v7, v9
-; GFX8-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
-; GFX8-NEXT:    v_add_u32_e32 v7, vcc, v7, v11
-; GFX8-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
-; GFX8-NEXT:    v_mul_lo_u32 v11, v5, v3
-; GFX8-NEXT:    v_add_u32_e32 v7, vcc, v9, v7
-; GFX8-NEXT:    v_mul_hi_u32 v9, v8, v3
-; GFX8-NEXT:    v_add_u32_e32 v2, vcc, v11, v2
-; GFX8-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
-; GFX8-NEXT:    v_add_u32_e32 v2, vcc, v2, v9
-; GFX8-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
-; GFX8-NEXT:    v_add_u32_e32 v9, vcc, v11, v9
-; GFX8-NEXT:    v_mul_hi_u32 v3, v5, v3
-; GFX8-NEXT:    v_add_u32_e32 v2, vcc, v2, v7
-; GFX8-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
-; GFX8-NEXT:    v_add_u32_e32 v7, vcc, v9, v7
-; GFX8-NEXT:    v_add_u32_e32 v3, vcc, v3, v7
-; GFX8-NEXT:    v_add_u32_e32 v2, vcc, v8, v2
-; GFX8-NEXT:    v_addc_u32_e32 v3, vcc, v5, v3, vcc
-; GFX8-NEXT:    v_mov_b32_e32 v10, s4
-; GFX8-NEXT:    v_mul_lo_u32 v7, s13, v2
-; GFX8-NEXT:    v_mul_lo_u32 v8, s12, v3
-; GFX8-NEXT:    v_subrev_u32_e32 v4, vcc, s4, v4
-; GFX8-NEXT:    v_subb_u32_e32 v5, vcc, v6, v10, vcc
-; GFX8-NEXT:    v_mul_hi_u32 v6, s12, v2
-; GFX8-NEXT:    v_add_u32_e32 v7, vcc, v7, v8
-; GFX8-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
-; GFX8-NEXT:    v_add_u32_e32 v6, vcc, v7, v6
-; GFX8-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
-; GFX8-NEXT:    v_mul_lo_u32 v7, s13, v3
+; GFX8-NEXT:    v_cndmask_b32_e64 v5, v7, v5, s[0:1]
+; GFX8-NEXT:    v_cndmask_b32_e64 v6, v6, v9, s[0:1]
+; GFX8-NEXT:    v_mad_u64_u32 v[0:1], s[0:1], s14, v11, v[0:1]
+; GFX8-NEXT:    v_xor_b32_e32 v7, s20, v8
+; GFX8-NEXT:    v_xor_b32_e32 v8, s21, v4
+; GFX8-NEXT:    v_mad_u64_u32 v[3:4], s[0:1], s15, v10, v[0:1]
+; GFX8-NEXT:    v_mov_b32_e32 v9, s21
+; GFX8-NEXT:    v_subrev_u32_e32 v0, vcc, s20, v7
+; GFX8-NEXT:    v_xor_b32_e32 v4, s16, v5
+; GFX8-NEXT:    v_mul_lo_u32 v5, v11, v2
+; GFX8-NEXT:    v_mul_lo_u32 v7, v10, v3
+; GFX8-NEXT:    v_subb_u32_e32 v1, vcc, v8, v9, vcc
+; GFX8-NEXT:    v_mul_hi_u32 v8, v10, v2
+; GFX8-NEXT:    v_add_u32_e32 v5, vcc, v5, v7
+; GFX8-NEXT:    v_mul_hi_u32 v2, v11, v2
+; GFX8-NEXT:    v_addc_u32_e32 v5, vcc, v5, v8, vcc
+; GFX8-NEXT:    v_mul_lo_u32 v5, v11, v3
+; GFX8-NEXT:    v_mul_hi_u32 v8, v10, v3
+; GFX8-NEXT:    v_addc_u32_e64 v7, s[0:1], 0, 0, vcc
+; GFX8-NEXT:    v_add_u32_e32 v2, vcc, v5, v2
+; GFX8-NEXT:    v_mul_hi_u32 v3, v11, v3
+; GFX8-NEXT:    v_addc_u32_e64 v2, s[0:1], v2, v8, vcc
+; GFX8-NEXT:    v_addc_u32_e64 v5, s[0:1], 0, 0, s[0:1]
+; GFX8-NEXT:    v_addc_u32_e32 v2, vcc, v2, v7, vcc
+; GFX8-NEXT:    v_addc_u32_e32 v5, vcc, 0, v5, vcc
+; GFX8-NEXT:    v_add_u32_e32 v3, vcc, v3, v5
+; GFX8-NEXT:    v_add_u32_e32 v2, vcc, v10, v2
+; GFX8-NEXT:    v_addc_u32_e32 v3, vcc, v11, v3, vcc
+; GFX8-NEXT:    v_mul_lo_u32 v5, s13, v2
+; GFX8-NEXT:    v_mul_lo_u32 v7, s12, v3
+; GFX8-NEXT:    v_mul_hi_u32 v9, s12, v2
 ; GFX8-NEXT:    v_mul_hi_u32 v2, s13, v2
-; GFX8-NEXT:    v_add_u32_e32 v6, vcc, v8, v6
-; GFX8-NEXT:    v_mul_hi_u32 v8, s12, v3
-; GFX8-NEXT:    v_add_u32_e32 v2, vcc, v7, v2
-; GFX8-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
-; GFX8-NEXT:    v_add_u32_e32 v2, vcc, v2, v8
-; GFX8-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
-; GFX8-NEXT:    v_add_u32_e32 v7, vcc, v7, v8
-; GFX8-NEXT:    v_add_u32_e32 v8, vcc, v2, v6
-; GFX8-NEXT:    v_mul_hi_u32 v9, s13, v3
-; GFX8-NEXT:    v_mad_u64_u32 v[2:3], s[0:1], s2, v8, 0
-; GFX8-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
-; GFX8-NEXT:    v_add_u32_e32 v6, vcc, v7, v6
-; GFX8-NEXT:    v_add_u32_e32 v9, vcc, v9, v6
-; GFX8-NEXT:    v_mad_u64_u32 v[6:7], s[0:1], s2, v9, v[3:4]
+; GFX8-NEXT:    v_mul_hi_u32 v10, s13, v3
+; GFX8-NEXT:    v_add_u32_e32 v5, vcc, v5, v7
+; GFX8-NEXT:    v_addc_u32_e32 v5, vcc, v5, v9, vcc
+; GFX8-NEXT:    v_mul_lo_u32 v5, s13, v3
+; GFX8-NEXT:    v_mul_hi_u32 v9, s12, v3
+; GFX8-NEXT:    v_addc_u32_e64 v7, s[0:1], 0, 0, vcc
+; GFX8-NEXT:    v_add_u32_e32 v2, vcc, v5, v2
+; GFX8-NEXT:    v_addc_u32_e64 v2, s[0:1], v2, v9, vcc
+; GFX8-NEXT:    v_addc_u32_e32 v9, vcc, v2, v7, vcc
+; GFX8-NEXT:    v_addc_u32_e64 v5, s[0:1], 0, 0, s[0:1]
+; GFX8-NEXT:    v_mad_u64_u32 v[2:3], s[0:1], s6, v9, 0
+; GFX8-NEXT:    v_addc_u32_e32 v7, vcc, 0, v5, vcc
+; GFX8-NEXT:    v_xor_b32_e32 v6, s16, v6
+; GFX8-NEXT:    v_mov_b32_e32 v8, s16
+; GFX8-NEXT:    v_subrev_u32_e32 v4, vcc, s16, v4
+; GFX8-NEXT:    v_subb_u32_e32 v5, vcc, v6, v8, vcc
+; GFX8-NEXT:    v_add_u32_e32 v8, vcc, v10, v7
+; GFX8-NEXT:    v_mad_u64_u32 v[6:7], s[0:1], s6, v8, v[3:4]
 ; GFX8-NEXT:    v_mov_b32_e32 v10, s13
 ; GFX8-NEXT:    v_sub_u32_e32 v2, vcc, s12, v2
-; GFX8-NEXT:    v_mad_u64_u32 v[6:7], s[0:1], s3, v8, v[6:7]
-; GFX8-NEXT:    v_mov_b32_e32 v3, s3
+; GFX8-NEXT:    v_mad_u64_u32 v[6:7], s[0:1], s7, v9, v[6:7]
+; GFX8-NEXT:    v_mov_b32_e32 v3, s7
 ; GFX8-NEXT:    v_subb_u32_e64 v7, s[0:1], v10, v6, vcc
 ; GFX8-NEXT:    v_sub_u32_e64 v6, s[0:1], s13, v6
-; GFX8-NEXT:    v_cmp_le_u32_e64 s[0:1], s3, v7
+; GFX8-NEXT:    v_cmp_le_u32_e64 s[0:1], s7, v7
 ; GFX8-NEXT:    v_cndmask_b32_e64 v10, 0, -1, s[0:1]
-; GFX8-NEXT:    v_cmp_le_u32_e64 s[0:1], s2, v2
+; GFX8-NEXT:    v_cmp_le_u32_e64 s[0:1], s6, v2
 ; GFX8-NEXT:    v_cndmask_b32_e64 v11, 0, -1, s[0:1]
-; GFX8-NEXT:    v_cmp_eq_u32_e64 s[0:1], s3, v7
+; GFX8-NEXT:    v_cmp_eq_u32_e64 s[0:1], s7, v7
 ; GFX8-NEXT:    v_subb_u32_e32 v6, vcc, v6, v3, vcc
 ; GFX8-NEXT:    v_cndmask_b32_e64 v10, v10, v11, s[0:1]
-; GFX8-NEXT:    v_subrev_u32_e32 v11, vcc, s2, v2
+; GFX8-NEXT:    v_subrev_u32_e32 v11, vcc, s6, v2
 ; GFX8-NEXT:    v_subbrev_u32_e64 v12, s[0:1], 0, v6, vcc
-; GFX8-NEXT:    v_add_u32_e64 v13, s[0:1], 1, v8
-; GFX8-NEXT:    v_addc_u32_e64 v14, s[0:1], 0, v9, s[0:1]
-; GFX8-NEXT:    v_cmp_le_u32_e64 s[0:1], s3, v12
+; GFX8-NEXT:    v_add_u32_e64 v13, s[0:1], 1, v9
+; GFX8-NEXT:    v_addc_u32_e64 v14, s[0:1], 0, v8, s[0:1]
+; GFX8-NEXT:    v_cmp_le_u32_e64 s[0:1], s7, v12
 ; GFX8-NEXT:    v_cndmask_b32_e64 v15, 0, -1, s[0:1]
-; GFX8-NEXT:    v_cmp_le_u32_e64 s[0:1], s2, v11
+; GFX8-NEXT:    v_cmp_le_u32_e64 s[0:1], s6, v11
 ; GFX8-NEXT:    v_subb_u32_e32 v3, vcc, v6, v3, vcc
 ; GFX8-NEXT:    v_cndmask_b32_e64 v16, 0, -1, s[0:1]
-; GFX8-NEXT:    v_cmp_eq_u32_e64 s[0:1], s3, v12
-; GFX8-NEXT:    v_subrev_u32_e32 v6, vcc, s2, v11
+; GFX8-NEXT:    v_cmp_eq_u32_e64 s[0:1], s7, v12
+; GFX8-NEXT:    v_subrev_u32_e32 v6, vcc, s6, v11
 ; GFX8-NEXT:    v_cndmask_b32_e64 v15, v15, v16, s[0:1]
 ; GFX8-NEXT:    v_add_u32_e64 v16, s[0:1], 1, v13
 ; GFX8-NEXT:    v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
@@ -1556,20 +1487,20 @@ define amdgpu_kernel void @sdivrem_v2i64(ptr addrspace(1) %out0, ptr addrspace(1
 ; GFX8-NEXT:    v_cmp_ne_u32_e64 s[0:1], 0, v10
 ; GFX8-NEXT:    v_cndmask_b32_e32 v6, v11, v6, vcc
 ; GFX8-NEXT:    v_cndmask_b32_e32 v3, v12, v3, vcc
-; GFX8-NEXT:    v_cndmask_b32_e64 v8, v8, v13, s[0:1]
-; GFX8-NEXT:    v_cndmask_b32_e64 v9, v9, v14, s[0:1]
+; GFX8-NEXT:    v_cndmask_b32_e64 v9, v9, v13, s[0:1]
+; GFX8-NEXT:    v_cndmask_b32_e64 v8, v8, v14, s[0:1]
 ; GFX8-NEXT:    v_cndmask_b32_e64 v6, v2, v6, s[0:1]
 ; GFX8-NEXT:    v_cndmask_b32_e64 v7, v7, v3, s[0:1]
-; GFX8-NEXT:    s_xor_b64 s[0:1], s[6:7], s[16:17]
-; GFX8-NEXT:    v_xor_b32_e32 v2, s0, v8
-; GFX8-NEXT:    v_xor_b32_e32 v3, s1, v9
+; GFX8-NEXT:    s_xor_b64 s[0:1], s[4:5], s[18:19]
+; GFX8-NEXT:    v_xor_b32_e32 v2, s0, v9
+; GFX8-NEXT:    v_xor_b32_e32 v3, s1, v8
 ; GFX8-NEXT:    v_mov_b32_e32 v8, s1
 ; GFX8-NEXT:    v_subrev_u32_e32 v2, vcc, s0, v2
 ; GFX8-NEXT:    v_subb_u32_e32 v3, vcc, v3, v8, vcc
-; GFX8-NEXT:    v_xor_b32_e32 v6, s6, v6
-; GFX8-NEXT:    v_xor_b32_e32 v7, s6, v7
-; GFX8-NEXT:    v_mov_b32_e32 v8, s6
-; GFX8-NEXT:    v_subrev_u32_e32 v6, vcc, s6, v6
+; GFX8-NEXT:    v_xor_b32_e32 v6, s4, v6
+; GFX8-NEXT:    v_xor_b32_e32 v7, s4, v7
+; GFX8-NEXT:    v_mov_b32_e32 v8, s4
+; GFX8-NEXT:    v_subrev_u32_e32 v6, vcc, s4, v6
 ; GFX8-NEXT:    v_subb_u32_e32 v7, vcc, v7, v8, vcc
 ; GFX8-NEXT:    v_mov_b32_e32 v8, s8
 ; GFX8-NEXT:    v_mov_b32_e32 v9, s9
@@ -1583,25 +1514,25 @@ define amdgpu_kernel void @sdivrem_v2i64(ptr addrspace(1) %out0, ptr addrspace(1
 ; GFX9-LABEL: sdivrem_v2i64:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x0
-; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x20
+; GFX9-NEXT:    s_load_dwordx4 s[16:19], s[4:5], 0x20
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    s_ashr_i32 s4, s13, 31
-; GFX9-NEXT:    s_ashr_i32 s6, s1, 31
-; GFX9-NEXT:    s_add_u32 s16, s12, s4
-; GFX9-NEXT:    s_addc_u32 s17, s13, s4
-; GFX9-NEXT:    s_add_u32 s0, s0, s6
+; GFX9-NEXT:    s_ashr_i32 s6, s17, 31
+; GFX9-NEXT:    s_add_u32 s0, s12, s4
+; GFX9-NEXT:    s_addc_u32 s1, s13, s4
+; GFX9-NEXT:    s_add_u32 s2, s16, s6
 ; GFX9-NEXT:    s_mov_b32 s7, s6
-; GFX9-NEXT:    s_addc_u32 s1, s1, s6
-; GFX9-NEXT:    s_xor_b64 s[12:13], s[0:1], s[6:7]
+; GFX9-NEXT:    s_addc_u32 s3, s17, s6
+; GFX9-NEXT:    s_xor_b64 s[12:13], s[2:3], s[6:7]
 ; GFX9-NEXT:    v_cvt_f32_u32_e32 v0, s13
 ; GFX9-NEXT:    v_cvt_f32_u32_e32 v1, s12
 ; GFX9-NEXT:    s_mov_b32 s5, s4
-; GFX9-NEXT:    s_xor_b64 s[16:17], s[16:17], s[4:5]
+; GFX9-NEXT:    s_xor_b64 s[16:17], s[0:1], s[4:5]
 ; GFX9-NEXT:    v_mul_f32_e32 v0, 0x4f800000, v0
 ; GFX9-NEXT:    v_add_f32_e32 v0, v0, v1
 ; GFX9-NEXT:    v_rcp_iflag_f32_e32 v0, v0
-; GFX9-NEXT:    s_sub_u32 s18, 0, s12
-; GFX9-NEXT:    s_subb_u32 s19, 0, s13
+; GFX9-NEXT:    s_sub_u32 s22, 0, s12
+; GFX9-NEXT:    s_subb_u32 s23, 0, s13
 ; GFX9-NEXT:    v_mul_f32_e32 v0, 0x5f7ffffc, v0
 ; GFX9-NEXT:    v_mul_f32_e32 v1, 0x2f800000, v0
 ; GFX9-NEXT:    v_trunc_f32_e32 v2, v1
@@ -1609,10 +1540,10 @@ define amdgpu_kernel void @sdivrem_v2i64(ptr addrspace(1) %out0, ptr addrspace(1
 ; GFX9-NEXT:    v_add_f32_e32 v0, v1, v0
 ; GFX9-NEXT:    v_cvt_u32_f32_e32 v3, v0
 ; GFX9-NEXT:    v_cvt_u32_f32_e32 v4, v2
-; GFX9-NEXT:    v_mad_u64_u32 v[0:1], s[0:1], s18, v3, 0
-; GFX9-NEXT:    v_mad_u64_u32 v[1:2], s[0:1], s18, v4, v[1:2]
+; GFX9-NEXT:    v_mad_u64_u32 v[0:1], s[0:1], s22, v3, 0
+; GFX9-NEXT:    v_mad_u64_u32 v[1:2], s[0:1], s22, v4, v[1:2]
 ; GFX9-NEXT:    v_mul_hi_u32 v5, v3, v0
-; GFX9-NEXT:    v_mad_u64_u32 v[1:2], s[0:1], s19, v3, v[1:2]
+; GFX9-NEXT:    v_mad_u64_u32 v[1:2], s[0:1], s23, v3, v[1:2]
 ; GFX9-NEXT:    v_mul_lo_u32 v2, v4, v0
 ; GFX9-NEXT:    v_mul_hi_u32 v0, v4, v0
 ; GFX9-NEXT:    v_mul_lo_u32 v6, v3, v1
@@ -1620,239 +1551,214 @@ define amdgpu_kernel void @sdivrem_v2i64(ptr addrspace(1) %out0, ptr addrspace(1
 ; GFX9-NEXT:    v_mul_hi_u32 v8, v3, v1
 ; GFX9-NEXT:    v_mul_hi_u32 v1, v4, v1
 ; GFX9-NEXT:    v_add_co_u32_e32 v2, vcc, v2, v6
-; GFX9-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
-; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, v7, v0
-; GFX9-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
-; GFX9-NEXT:    v_add_co_u32_e32 v2, vcc, v2, v5
-; GFX9-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; GFX9-NEXT:    v_add_u32_e32 v2, v6, v2
-; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v8
-; GFX9-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
-; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
-; GFX9-NEXT:    v_add_u32_e32 v5, v7, v5
-; GFX9-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; GFX9-NEXT:    v_add3_u32 v1, v5, v2, v1
+; GFX9-NEXT:    v_addc_co_u32_e32 v2, vcc, v2, v5, vcc
+; GFX9-NEXT:    v_add_co_u32_e64 v0, s[0:1], v7, v0
+; GFX9-NEXT:    v_addc_co_u32_e64 v0, s[2:3], v0, v8, s[0:1]
+; GFX9-NEXT:    v_addc_co_u32_e64 v2, s[20:21], 0, 0, vcc
+; GFX9-NEXT:    v_addc_co_u32_e64 v5, s[2:3], 0, 0, s[2:3]
+; GFX9-NEXT:    v_addc_co_u32_e64 v0, vcc, v0, v2, s[0:1]
+; GFX9-NEXT:    v_addc_co_u32_e32 v2, vcc, 0, v5, vcc
+; GFX9-NEXT:    v_add_u32_e32 v1, v1, v2
 ; GFX9-NEXT:    v_add_co_u32_e32 v3, vcc, v3, v0
 ; GFX9-NEXT:    v_addc_co_u32_e32 v4, vcc, v4, v1, vcc
-; GFX9-NEXT:    v_mad_u64_u32 v[0:1], s[0:1], s18, v3, 0
-; GFX9-NEXT:    v_mov_b32_e32 v7, s13
-; GFX9-NEXT:    v_mad_u64_u32 v[1:2], s[0:1], s18, v4, v[1:2]
+; GFX9-NEXT:    v_mad_u64_u32 v[0:1], s[0:1], s22, v3, 0
+; GFX9-NEXT:    s_xor_b64 s[20:21], s[4:5], s[6:7]
+; GFX9-NEXT:    s_ashr_i32 s6, s15, 31
+; GFX9-NEXT:    v_mad_u64_u32 v[1:2], s[0:1], s22, v4, v[1:2]
 ; GFX9-NEXT:    v_mul_hi_u32 v6, v3, v0
-; GFX9-NEXT:    v_mad_u64_u32 v[1:2], s[0:1], s19, v3, v[1:2]
+; GFX9-NEXT:    s_mov_b32 s7, s6
+; GFX9-NEXT:    v_mad_u64_u32 v[1:2], s[0:1], s23, v3, v[1:2]
 ; GFX9-NEXT:    v_mul_lo_u32 v2, v4, v0
 ; GFX9-NEXT:    v_mul_hi_u32 v0, v4, v0
 ; GFX9-NEXT:    v_mul_lo_u32 v5, v3, v1
-; GFX9-NEXT:    s_xor_b64 s[18:19], s[4:5], s[6:7]
-; GFX9-NEXT:    s_ashr_i32 s6, s15, 31
-; GFX9-NEXT:    s_mov_b32 s7, s6
 ; GFX9-NEXT:    v_add_co_u32_e32 v2, vcc, v2, v5
-; GFX9-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
-; GFX9-NEXT:    v_add_co_u32_e32 v2, vcc, v2, v6
-; GFX9-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; GFX9-NEXT:    v_mul_lo_u32 v6, v4, v1
-; GFX9-NEXT:    v_add_u32_e32 v2, v5, v2
-; GFX9-NEXT:    v_mul_hi_u32 v5, v3, v1
+; GFX9-NEXT:    v_addc_co_u32_e32 v2, vcc, v2, v6, vcc
+; GFX9-NEXT:    v_mul_lo_u32 v2, v4, v1
+; GFX9-NEXT:    v_mul_hi_u32 v6, v3, v1
+; GFX9-NEXT:    v_addc_co_u32_e64 v5, s[0:1], 0, 0, vcc
+; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, v2, v0
 ; GFX9-NEXT:    v_mul_hi_u32 v1, v4, v1
-; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, v6, v0
-; GFX9-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
-; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v5
-; GFX9-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
-; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
-; GFX9-NEXT:    v_add_u32_e32 v5, v6, v5
-; GFX9-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; GFX9-NEXT:    v_add3_u32 v1, v5, v2, v1
+; GFX9-NEXT:    v_addc_co_u32_e64 v0, s[0:1], v0, v6, vcc
+; GFX9-NEXT:    v_addc_co_u32_e64 v2, s[0:1], 0, 0, s[0:1]
+; GFX9-NEXT:    v_addc_co_u32_e32 v0, vcc, v0, v5, vcc
+; GFX9-NEXT:    v_addc_co_u32_e32 v2, vcc, 0, v2, vcc
+; GFX9-NEXT:    v_add_u32_e32 v1, v1, v2
 ; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, v3, v0
 ; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, v4, v1, vcc
 ; GFX9-NEXT:    v_mul_lo_u32 v2, s17, v0
 ; GFX9-NEXT:    v_mul_lo_u32 v3, s16, v1
 ; GFX9-NEXT:    v_mul_hi_u32 v4, s16, v0
 ; GFX9-NEXT:    v_mul_hi_u32 v0, s17, v0
-; GFX9-NEXT:    v_mul_hi_u32 v6, s17, v1
-; GFX9-NEXT:    v_add_co_u32_e32 v2, vcc, v2, v3
-; GFX9-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc
-; GFX9-NEXT:    v_add_co_u32_e32 v2, vcc, v2, v4
-; GFX9-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; GFX9-NEXT:    v_mul_lo_u32 v4, s17, v1
-; GFX9-NEXT:    v_add_u32_e32 v2, v3, v2
-; GFX9-NEXT:    v_mul_hi_u32 v3, s16, v1
-; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, v4, v0
-; GFX9-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
-; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v3
-; GFX9-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc
-; GFX9-NEXT:    v_add_co_u32_e32 v5, vcc, v0, v2
-; GFX9-NEXT:    v_mad_u64_u32 v[1:2], s[0:1], s12, v5, 0
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
-; GFX9-NEXT:    v_add_u32_e32 v3, v4, v3
-; GFX9-NEXT:    v_add3_u32 v4, v3, v0, v6
-; GFX9-NEXT:    v_mov_b32_e32 v0, v2
-; GFX9-NEXT:    v_mad_u64_u32 v[2:3], s[0:1], s12, v4, v[0:1]
 ; GFX9-NEXT:    v_mov_b32_e32 v6, s17
-; GFX9-NEXT:    v_sub_co_u32_e32 v8, vcc, s16, v1
-; GFX9-NEXT:    v_mad_u64_u32 v[2:3], s[0:1], s13, v5, v[2:3]
-; GFX9-NEXT:    s_ashr_i32 s16, s3, 31
-; GFX9-NEXT:    v_mov_b32_e32 v0, 0
-; GFX9-NEXT:    v_subb_co_u32_e64 v6, s[0:1], v6, v2, vcc
-; GFX9-NEXT:    v_sub_u32_e32 v1, s17, v2
+; GFX9-NEXT:    v_add_co_u32_e32 v2, vcc, v2, v3
+; GFX9-NEXT:    v_addc_co_u32_e32 v2, vcc, v2, v4, vcc
+; GFX9-NEXT:    v_mul_lo_u32 v2, s17, v1
+; GFX9-NEXT:    v_mul_hi_u32 v4, s16, v1
+; GFX9-NEXT:    v_addc_co_u32_e64 v3, s[0:1], 0, 0, vcc
+; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, v2, v0
+; GFX9-NEXT:    v_addc_co_u32_e64 v0, s[0:1], v0, v4, vcc
+; GFX9-NEXT:    v_addc_co_u32_e32 v4, vcc, v0, v3, vcc
+; GFX9-NEXT:    v_mul_hi_u32 v3, s17, v1
+; GFX9-NEXT:    v_addc_co_u32_e64 v2, s[0:1], 0, 0, s[0:1]
+; GFX9-NEXT:    v_mad_u64_u32 v[0:1], s[0:1], s12, v4, 0
+; GFX9-NEXT:    v_addc_co_u32_e32 v2, vcc, 0, v2, vcc
+; GFX9-NEXT:    v_add_u32_e32 v5, v3, v2
+; GFX9-NEXT:    v_mad_u64_u32 v[1:2], s[0:1], s12, v5, v[1:2]
+; GFX9-NEXT:    v_sub_co_u32_e32 v7, vcc, s16, v0
+; GFX9-NEXT:    v_mad_u64_u32 v[1:2], s[0:1], s13, v4, v[1:2]
+; GFX9-NEXT:    v_mov_b32_e32 v3, s13
+; GFX9-NEXT:    s_ashr_i32 s16, s19, 31
+; GFX9-NEXT:    v_subb_co_u32_e64 v6, s[0:1], v6, v1, vcc
+; GFX9-NEXT:    v_sub_u32_e32 v1, s17, v1
 ; GFX9-NEXT:    v_cmp_le_u32_e64 s[0:1], s13, v6
 ; GFX9-NEXT:    v_cndmask_b32_e64 v2, 0, -1, s[0:1]
-; GFX9-NEXT:    v_cmp_le_u32_e64 s[0:1], s12, v8
-; GFX9-NEXT:    v_subb_co_u32_e32 v1, vcc, v1, v7, vcc
-; GFX9-NEXT:    v_cndmask_b32_e64 v3, 0, -1, s[0:1]
+; GFX9-NEXT:    v_cmp_le_u32_e64 s[0:1], s12, v7
+; GFX9-NEXT:    v_subb_co_u32_e32 v1, vcc, v1, v3, vcc
+; GFX9-NEXT:    v_cndmask_b32_e64 v8, 0, -1, s[0:1]
 ; GFX9-NEXT:    v_cmp_eq_u32_e64 s[0:1], s13, v6
-; GFX9-NEXT:    v_subrev_co_u32_e32 v10, vcc, s12, v8
-; GFX9-NEXT:    v_cndmask_b32_e64 v9, v2, v3, s[0:1]
-; GFX9-NEXT:    v_subbrev_co_u32_e64 v11, s[0:1], 0, v1, vcc
-; GFX9-NEXT:    v_add_co_u32_e64 v3, s[0:1], 1, v5
-; GFX9-NEXT:    v_addc_co_u32_e64 v12, s[0:1], 0, v4, s[0:1]
-; GFX9-NEXT:    v_cmp_le_u32_e64 s[0:1], s13, v11
+; GFX9-NEXT:    v_subrev_co_u32_e32 v9, vcc, s12, v7
+; GFX9-NEXT:    v_cndmask_b32_e64 v8, v2, v8, s[0:1]
+; GFX9-NEXT:    v_subbrev_co_u32_e64 v10, s[0:1], 0, v1, vcc
+; GFX9-NEXT:    v_add_co_u32_e64 v11, s[0:1], 1, v4
+; GFX9-NEXT:    v_addc_co_u32_e64 v12, s[0:1], 0, v5, s[0:1]
+; GFX9-NEXT:    v_cmp_le_u32_e64 s[0:1], s13, v10
 ; GFX9-NEXT:    v_cndmask_b32_e64 v2, 0, -1, s[0:1]
-; GFX9-NEXT:    v_cmp_le_u32_e64 s[0:1], s12, v10
+; GFX9-NEXT:    v_cmp_le_u32_e64 s[0:1], s12, v9
 ; GFX9-NEXT:    v_cndmask_b32_e64 v13, 0, -1, s[0:1]
-; GFX9-NEXT:    v_cmp_eq_u32_e64 s[0:1], s13, v11
+; GFX9-NEXT:    v_cmp_eq_u32_e64 s[0:1], s13, v10
 ; GFX9-NEXT:    v_cndmask_b32_e64 v13, v2, v13, s[0:1]
-; GFX9-NEXT:    v_add_co_u32_e64 v14, s[0:1], 1, v3
+; GFX9-NEXT:    v_add_co_u32_e64 v14, s[0:1], 1, v11
 ; GFX9-NEXT:    v_addc_co_u32_e64 v15, s[0:1], 0, v12, s[0:1]
 ; GFX9-NEXT:    s_add_u32 s0, s14, s6
 ; GFX9-NEXT:    s_addc_u32 s1, s15, s6
-; GFX9-NEXT:    s_add_u32 s2, s2, s16
+; GFX9-NEXT:    s_add_u32 s2, s18, s16
 ; GFX9-NEXT:    s_mov_b32 s17, s16
-; GFX9-NEXT:    s_addc_u32 s3, s3, s16
-; GFX9-NEXT:    s_xor_b64 s[2:3], s[2:3], s[16:17]
-; GFX9-NEXT:    v_cvt_f32_u32_e32 v2, s3
-; GFX9-NEXT:    v_cvt_f32_u32_e32 v16, s2
-; GFX9-NEXT:    v_subb_co_u32_e32 v1, vcc, v1, v7, vcc
+; GFX9-NEXT:    s_addc_u32 s3, s19, s16
+; GFX9-NEXT:    s_xor_b64 s[14:15], s[2:3], s[16:17]
+; GFX9-NEXT:    v_cvt_f32_u32_e32 v2, s15
+; GFX9-NEXT:    v_cvt_f32_u32_e32 v16, s14
+; GFX9-NEXT:    v_subb_co_u32_e32 v1, vcc, v1, v3, vcc
 ; GFX9-NEXT:    v_mul_f32_e32 v2, 0x4f800000, v2
 ; GFX9-NEXT:    v_add_f32_e32 v2, v2, v16
 ; GFX9-NEXT:    v_rcp_iflag_f32_e32 v2, v2
-; GFX9-NEXT:    v_subrev_co_u32_e32 v7, vcc, s12, v10
-; GFX9-NEXT:    v_subbrev_co_u32_e32 v16, vcc, 0, v1, vcc
+; GFX9-NEXT:    v_subrev_co_u32_e32 v16, vcc, s12, v9
+; GFX9-NEXT:    v_subbrev_co_u32_e32 v17, vcc, 0, v1, vcc
 ; GFX9-NEXT:    v_mul_f32_e32 v1, 0x5f7ffffc, v2
 ; GFX9-NEXT:    v_mul_f32_e32 v2, 0x2f800000, v1
-; GFX9-NEXT:    v_trunc_f32_e32 v17, v2
-; GFX9-NEXT:    v_mul_f32_e32 v2, 0xcf800000, v17
+; GFX9-NEXT:    v_trunc_f32_e32 v3, v2
+; GFX9-NEXT:    v_mul_f32_e32 v2, 0xcf800000, v3
 ; GFX9-NEXT:    v_add_f32_e32 v1, v2, v1
 ; GFX9-NEXT:    v_cvt_u32_f32_e32 v18, v1
 ; GFX9-NEXT:    s_xor_b64 s[12:13], s[0:1], s[6:7]
-; GFX9-NEXT:    s_sub_u32 s5, 0, s2
+; GFX9-NEXT:    s_sub_u32 s5, 0, s14
 ; GFX9-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v13
 ; GFX9-NEXT:    v_mad_u64_u32 v[1:2], s[0:1], s5, v18, 0
-; GFX9-NEXT:    v_cndmask_b32_e32 v13, v3, v14, vcc
-; GFX9-NEXT:    v_cvt_u32_f32_e32 v14, v17
-; GFX9-NEXT:    s_subb_u32 s20, 0, s3
+; GFX9-NEXT:    v_cvt_u32_f32_e32 v13, v3
+; GFX9-NEXT:    s_subb_u32 s18, 0, s15
+; GFX9-NEXT:    v_cndmask_b32_e32 v11, v11, v14, vcc
 ; GFX9-NEXT:    v_cndmask_b32_e32 v12, v12, v15, vcc
-; GFX9-NEXT:    v_cndmask_b32_e32 v7, v10, v7, vcc
-; GFX9-NEXT:    v_mad_u64_u32 v[2:3], s[0:1], s5, v14, v[2:3]
-; GFX9-NEXT:    v_cmp_ne_u32_e64 s[0:1], 0, v9
-; GFX9-NEXT:    v_cndmask_b32_e64 v9, v4, v12, s[0:1]
-; GFX9-NEXT:    v_mad_u64_u32 v[2:3], s[14:15], s20, v18, v[2:3]
-; GFX9-NEXT:    v_mul_lo_u32 v3, v14, v1
-; GFX9-NEXT:    v_cndmask_b32_e32 v10, v11, v16, vcc
-; GFX9-NEXT:    v_mul_lo_u32 v4, v18, v2
+; GFX9-NEXT:    v_mad_u64_u32 v[2:3], s[0:1], s5, v13, v[2:3]
+; GFX9-NEXT:    v_cmp_ne_u32_e64 s[0:1], 0, v8
+; GFX9-NEXT:    v_cndmask_b32_e64 v8, v4, v11, s[0:1]
+; GFX9-NEXT:    v_mad_u64_u32 v[2:3], s[2:3], s18, v18, v[2:3]
+; GFX9-NEXT:    v_mul_lo_u32 v3, v13, v1
 ; GFX9-NEXT:    v_mul_hi_u32 v11, v18, v1
-; GFX9-NEXT:    v_mul_hi_u32 v1, v14, v1
-; GFX9-NEXT:    v_cndmask_b32_e64 v5, v5, v13, s[0:1]
+; GFX9-NEXT:    v_mul_lo_u32 v4, v18, v2
+; GFX9-NEXT:    v_cndmask_b32_e32 v9, v9, v16, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v10, v10, v17, vcc
+; GFX9-NEXT:    v_mul_hi_u32 v1, v13, v1
 ; GFX9-NEXT:    v_add_co_u32_e32 v3, vcc, v3, v4
-; GFX9-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
-; GFX9-NEXT:    v_add_co_u32_e32 v3, vcc, v3, v11
-; GFX9-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc
-; GFX9-NEXT:    v_mul_lo_u32 v11, v14, v2
-; GFX9-NEXT:    v_add_u32_e32 v3, v4, v3
-; GFX9-NEXT:    v_mul_hi_u32 v4, v18, v2
-; GFX9-NEXT:    v_mul_hi_u32 v2, v14, v2
-; GFX9-NEXT:    v_add_co_u32_e32 v1, vcc, v11, v1
-; GFX9-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
-; GFX9-NEXT:    v_add_co_u32_e32 v1, vcc, v1, v4
-; GFX9-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
-; GFX9-NEXT:    v_add_co_u32_e32 v1, vcc, v1, v3
-; GFX9-NEXT:    v_add_u32_e32 v4, v11, v4
-; GFX9-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc
+; GFX9-NEXT:    v_addc_co_u32_e32 v3, vcc, v3, v11, vcc
+; GFX9-NEXT:    v_mul_lo_u32 v3, v13, v2
+; GFX9-NEXT:    v_mul_hi_u32 v11, v18, v2
+; GFX9-NEXT:    v_addc_co_u32_e64 v4, s[2:3], 0, 0, vcc
+; GFX9-NEXT:    v_add_co_u32_e32 v1, vcc, v3, v1
+; GFX9-NEXT:    v_addc_co_u32_e64 v1, s[2:3], v1, v11, vcc
+; GFX9-NEXT:    v_mul_hi_u32 v2, v13, v2
+; GFX9-NEXT:    v_addc_co_u32_e64 v3, s[2:3], 0, 0, s[2:3]
+; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, v1, v4, vcc
+; GFX9-NEXT:    v_addc_co_u32_e32 v3, vcc, 0, v3, vcc
 ; GFX9-NEXT:    v_add_co_u32_e32 v11, vcc, v18, v1
-; GFX9-NEXT:    v_add3_u32 v2, v4, v3, v2
-; GFX9-NEXT:    v_mad_u64_u32 v[3:4], s[14:15], s5, v11, 0
-; GFX9-NEXT:    v_addc_co_u32_e32 v12, vcc, v14, v2, vcc
+; GFX9-NEXT:    v_add_u32_e32 v2, v2, v3
+; GFX9-NEXT:    v_mad_u64_u32 v[3:4], s[2:3], s5, v11, 0
+; GFX9-NEXT:    v_cndmask_b32_e64 v5, v5, v12, s[0:1]
+; GFX9-NEXT:    v_addc_co_u32_e32 v12, vcc, v13, v2, vcc
 ; GFX9-NEXT:    v_mov_b32_e32 v1, v4
-; GFX9-NEXT:    v_cndmask_b32_e64 v7, v8, v7, s[0:1]
+; GFX9-NEXT:    v_cndmask_b32_e64 v7, v7, v9, s[0:1]
 ; GFX9-NEXT:    v_cndmask_b32_e64 v6, v6, v10, s[0:1]
 ; GFX9-NEXT:    v_mad_u64_u32 v[1:2], s[0:1], s5, v12, v[1:2]
-; GFX9-NEXT:    v_xor_b32_e32 v8, s18, v5
-; GFX9-NEXT:    v_xor_b32_e32 v9, s19, v9
-; GFX9-NEXT:    v_mad_u64_u32 v[4:5], s[0:1], s20, v11, v[1:2]
-; GFX9-NEXT:    v_mov_b32_e32 v10, s19
-; GFX9-NEXT:    v_subrev_co_u32_e32 v1, vcc, s18, v8
+; GFX9-NEXT:    v_xor_b32_e32 v9, s21, v5
+; GFX9-NEXT:    v_xor_b32_e32 v8, s20, v8
+; GFX9-NEXT:    v_mad_u64_u32 v[4:5], s[0:1], s18, v11, v[1:2]
+; GFX9-NEXT:    v_mov_b32_e32 v10, s21
+; GFX9-NEXT:    v_subrev_co_u32_e32 v1, vcc, s20, v8
 ; GFX9-NEXT:    v_xor_b32_e32 v5, s4, v7
 ; GFX9-NEXT:    v_mul_lo_u32 v7, v12, v3
 ; GFX9-NEXT:    v_mul_lo_u32 v8, v11, v4
 ; GFX9-NEXT:    v_subb_co_u32_e32 v2, vcc, v9, v10, vcc
 ; GFX9-NEXT:    v_mul_hi_u32 v9, v11, v3
 ; GFX9-NEXT:    v_add_co_u32_e32 v7, vcc, v7, v8
-; GFX9-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
-; GFX9-NEXT:    v_add_co_u32_e32 v7, vcc, v7, v9
-; GFX9-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
-; GFX9-NEXT:    v_mul_lo_u32 v9, v12, v4
 ; GFX9-NEXT:    v_mul_hi_u32 v3, v12, v3
-; GFX9-NEXT:    v_add_u32_e32 v7, v8, v7
-; GFX9-NEXT:    v_mul_hi_u32 v8, v11, v4
+; GFX9-NEXT:    v_addc_co_u32_e32 v7, vcc, v7, v9, vcc
+; GFX9-NEXT:    v_mul_lo_u32 v7, v12, v4
+; GFX9-NEXT:    v_mul_hi_u32 v9, v11, v4
+; GFX9-NEXT:    v_addc_co_u32_e64 v8, s[0:1], 0, 0, vcc
+; GFX9-NEXT:    v_add_co_u32_e32 v3, vcc, v7, v3
 ; GFX9-NEXT:    v_mul_hi_u32 v4, v12, v4
-; GFX9-NEXT:    v_add_co_u32_e32 v3, vcc, v9, v3
-; GFX9-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
-; GFX9-NEXT:    v_add_co_u32_e32 v3, vcc, v3, v8
-; GFX9-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
-; GFX9-NEXT:    v_add_co_u32_e32 v3, vcc, v3, v7
-; GFX9-NEXT:    v_add_u32_e32 v8, v9, v8
-; GFX9-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
-; GFX9-NEXT:    v_add3_u32 v4, v8, v7, v4
+; GFX9-NEXT:    v_addc_co_u32_e64 v3, s[0:1], v3, v9, vcc
+; GFX9-NEXT:    v_addc_co_u32_e64 v7, s[0:1], 0, 0, s[0:1]
+; GFX9-NEXT:    v_addc_co_u32_e32 v3, vcc, v3, v8, vcc
+; GFX9-NEXT:    v_addc_co_u32_e32 v7, vcc, 0, v7, vcc
+; GFX9-NEXT:    v_add_u32_e32 v4, v4, v7
 ; GFX9-NEXT:    v_add_co_u32_e32 v3, vcc, v11, v3
 ; GFX9-NEXT:    v_addc_co_u32_e32 v4, vcc, v12, v4, vcc
 ; GFX9-NEXT:    v_mul_lo_u32 v7, s13, v3
 ; GFX9-NEXT:    v_mul_lo_u32 v8, s12, v4
 ; GFX9-NEXT:    v_mul_hi_u32 v10, s12, v3
 ; GFX9-NEXT:    v_mul_hi_u32 v3, s13, v3
-; GFX9-NEXT:    v_mul_hi_u32 v12, s13, v4
-; GFX9-NEXT:    v_add_co_u32_e32 v7, vcc, v7, v8
-; GFX9-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
-; GFX9-NEXT:    v_add_co_u32_e32 v7, vcc, v7, v10
-; GFX9-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
-; GFX9-NEXT:    v_mul_lo_u32 v10, s13, v4
-; GFX9-NEXT:    v_add_u32_e32 v7, v8, v7
-; GFX9-NEXT:    v_mul_hi_u32 v8, s12, v4
 ; GFX9-NEXT:    v_xor_b32_e32 v6, s4, v6
-; GFX9-NEXT:    v_add_co_u32_e32 v3, vcc, v10, v3
-; GFX9-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
-; GFX9-NEXT:    v_add_co_u32_e32 v3, vcc, v3, v8
-; GFX9-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
-; GFX9-NEXT:    v_add_co_u32_e32 v11, vcc, v3, v7
-; GFX9-NEXT:    v_mad_u64_u32 v[3:4], s[0:1], s2, v11, 0
+; GFX9-NEXT:    v_add_co_u32_e32 v7, vcc, v7, v8
+; GFX9-NEXT:    v_addc_co_u32_e32 v7, vcc, v7, v10, vcc
+; GFX9-NEXT:    v_mul_lo_u32 v7, s13, v4
+; GFX9-NEXT:    v_mul_hi_u32 v10, s12, v4
+; GFX9-NEXT:    v_addc_co_u32_e64 v8, s[0:1], 0, 0, vcc
+; GFX9-NEXT:    v_add_co_u32_e32 v3, vcc, v7, v3
+; GFX9-NEXT:    v_addc_co_u32_e64 v3, s[0:1], v3, v10, vcc
+; GFX9-NEXT:    v_addc_co_u32_e32 v10, vcc, v3, v8, vcc
+; GFX9-NEXT:    v_mul_hi_u32 v8, s13, v4
+; GFX9-NEXT:    v_addc_co_u32_e64 v7, s[0:1], 0, 0, s[0:1]
+; GFX9-NEXT:    v_mad_u64_u32 v[3:4], s[0:1], s14, v10, 0
+; GFX9-NEXT:    v_addc_co_u32_e32 v7, vcc, 0, v7, vcc
 ; GFX9-NEXT:    v_mov_b32_e32 v9, s4
-; GFX9-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
 ; GFX9-NEXT:    v_subrev_co_u32_e32 v5, vcc, s4, v5
-; GFX9-NEXT:    v_add_u32_e32 v8, v10, v8
 ; GFX9-NEXT:    v_subb_co_u32_e32 v6, vcc, v6, v9, vcc
-; GFX9-NEXT:    v_add3_u32 v9, v8, v7, v12
-; GFX9-NEXT:    v_mad_u64_u32 v[7:8], s[0:1], s2, v9, v[4:5]
-; GFX9-NEXT:    v_mov_b32_e32 v10, s13
+; GFX9-NEXT:    v_add_u32_e32 v9, v8, v7
+; GFX9-NEXT:    v_mad_u64_u32 v[7:8], s[0:1], s14, v9, v[4:5]
+; GFX9-NEXT:    v_mov_b32_e32 v11, s13
 ; GFX9-NEXT:    v_sub_co_u32_e32 v3, vcc, s12, v3
-; GFX9-NEXT:    v_mad_u64_u32 v[7:8], s[0:1], s3, v11, v[7:8]
-; GFX9-NEXT:    v_mov_b32_e32 v4, s3
-; GFX9-NEXT:    v_subb_co_u32_e64 v8, s[0:1], v10, v7, vcc
-; GFX9-NEXT:    v_cmp_le_u32_e64 s[0:1], s3, v8
+; GFX9-NEXT:    v_mad_u64_u32 v[7:8], s[0:1], s15, v10, v[7:8]
+; GFX9-NEXT:    v_mov_b32_e32 v4, s15
+; GFX9-NEXT:    v_mov_b32_e32 v0, 0
+; GFX9-NEXT:    v_subb_co_u32_e64 v8, s[0:1], v11, v7, vcc
+; GFX9-NEXT:    v_cmp_le_u32_e64 s[0:1], s15, v8
 ; GFX9-NEXT:    v_sub_u32_e32 v7, s13, v7
-; GFX9-NEXT:    v_cndmask_b32_e64 v10, 0, -1, s[0:1]
-; GFX9-NEXT:    v_cmp_le_u32_e64 s[0:1], s2, v3
+; GFX9-NEXT:    v_cndmask_b32_e64 v11, 0, -1, s[0:1]
+; GFX9-NEXT:    v_cmp_le_u32_e64 s[0:1], s14, v3
 ; GFX9-NEXT:    v_cndmask_b32_e64 v12, 0, -1, s[0:1]
-; GFX9-NEXT:    v_cmp_eq_u32_e64 s[0:1], s3, v8
+; GFX9-NEXT:    v_cmp_eq_u32_e64 s[0:1], s15, v8
 ; GFX9-NEXT:    v_subb_co_u32_e32 v7, vcc, v7, v4, vcc
-; GFX9-NEXT:    v_cndmask_b32_e64 v10, v10, v12, s[0:1]
-; GFX9-NEXT:    v_subrev_co_u32_e32 v12, vcc, s2, v3
+; GFX9-NEXT:    v_cndmask_b32_e64 v11, v11, v12, s[0:1]
+; GFX9-NEXT:    v_subrev_co_u32_e32 v12, vcc, s14, v3
 ; GFX9-NEXT:    v_subbrev_co_u32_e64 v13, s[0:1], 0, v7, vcc
-; GFX9-NEXT:    v_add_co_u32_e64 v14, s[0:1], 1, v11
+; GFX9-NEXT:    v_add_co_u32_e64 v14, s[0:1], 1, v10
 ; GFX9-NEXT:    v_addc_co_u32_e64 v15, s[0:1], 0, v9, s[0:1]
-; GFX9-NEXT:    v_cmp_le_u32_e64 s[0:1], s3, v13
+; GFX9-NEXT:    v_cmp_le_u32_e64 s[0:1], s15, v13
 ; GFX9-NEXT:    v_cndmask_b32_e64 v16, 0, -1, s[0:1]
-; GFX9-NEXT:    v_cmp_le_u32_e64 s[0:1], s2, v12
+; GFX9-NEXT:    v_cmp_le_u32_e64 s[0:1], s14, v12
 ; GFX9-NEXT:    v_subb_co_u32_e32 v4, vcc, v7, v4, vcc
 ; GFX9-NEXT:    v_cndmask_b32_e64 v17, 0, -1, s[0:1]
-; GFX9-NEXT:    v_cmp_eq_u32_e64 s[0:1], s3, v13
-; GFX9-NEXT:    v_subrev_co_u32_e32 v7, vcc, s2, v12
+; GFX9-NEXT:    v_cmp_eq_u32_e64 s[0:1], s15, v13
+; GFX9-NEXT:    v_subrev_co_u32_e32 v7, vcc, s14, v12
 ; GFX9-NEXT:    v_cndmask_b32_e64 v16, v16, v17, s[0:1]
 ; GFX9-NEXT:    v_add_co_u32_e64 v17, s[0:1], 1, v14
 ; GFX9-NEXT:    v_subbrev_co_u32_e32 v4, vcc, 0, v4, vcc
@@ -1860,10 +1766,10 @@ define amdgpu_kernel void @sdivrem_v2i64(ptr addrspace(1) %out0, ptr addrspace(1
 ; GFX9-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v16
 ; GFX9-NEXT:    v_cndmask_b32_e32 v14, v14, v17, vcc
 ; GFX9-NEXT:    v_cndmask_b32_e32 v15, v15, v18, vcc
-; GFX9-NEXT:    v_cmp_ne_u32_e64 s[0:1], 0, v10
+; GFX9-NEXT:    v_cmp_ne_u32_e64 s[0:1], 0, v11
 ; GFX9-NEXT:    v_cndmask_b32_e32 v7, v12, v7, vcc
 ; GFX9-NEXT:    v_cndmask_b32_e32 v4, v13, v4, vcc
-; GFX9-NEXT:    v_cndmask_b32_e64 v10, v11, v14, s[0:1]
+; GFX9-NEXT:    v_cndmask_b32_e64 v10, v10, v14, s[0:1]
 ; GFX9-NEXT:    v_cndmask_b32_e64 v9, v9, v15, s[0:1]
 ; GFX9-NEXT:    v_cndmask_b32_e64 v7, v3, v7, s[0:1]
 ; GFX9-NEXT:    v_cndmask_b32_e64 v8, v8, v4, s[0:1]
@@ -1897,26 +1803,26 @@ define amdgpu_kernel void @sdivrem_v2i64(ptr addrspace(1) %out0, ptr addrspace(1
 ; GFX10-NEXT:    s_addc_u32 s1, s1, s16
 ; GFX10-NEXT:    s_mov_b32 s5, s4
 ; GFX10-NEXT:    s_xor_b64 s[6:7], s[0:1], s[16:17]
-; GFX10-NEXT:    s_xor_b64 s[0:1], s[12:13], s[4:5]
+; GFX10-NEXT:    s_xor_b64 s[12:13], s[12:13], s[4:5]
 ; GFX10-NEXT:    v_cvt_f32_u32_e32 v1, s7
-; GFX10-NEXT:    s_sub_u32 s21, 0, s6
-; GFX10-NEXT:    s_subb_u32 s20, 0, s7
-; GFX10-NEXT:    s_ashr_i32 s12, s15, 31
-; GFX10-NEXT:    s_xor_b64 s[18:19], s[4:5], s[16:17]
-; GFX10-NEXT:    s_ashr_i32 s16, s3, 31
-; GFX10-NEXT:    s_add_u32 s14, s14, s12
-; GFX10-NEXT:    s_addc_u32 s15, s15, s12
+; GFX10-NEXT:    s_sub_u32 s25, 0, s6
+; GFX10-NEXT:    s_subb_u32 s24, 0, s7
+; GFX10-NEXT:    s_xor_b64 s[20:21], s[4:5], s[16:17]
+; GFX10-NEXT:    s_ashr_i32 s16, s15, 31
+; GFX10-NEXT:    s_ashr_i32 s18, s3, 31
+; GFX10-NEXT:    s_add_u32 s0, s14, s16
+; GFX10-NEXT:    s_addc_u32 s1, s15, s16
 ; GFX10-NEXT:    v_cvt_f32_u32_e32 v0, s6
 ; GFX10-NEXT:    v_mul_f32_e32 v1, 0x4f800000, v1
-; GFX10-NEXT:    s_add_u32 s2, s2, s16
+; GFX10-NEXT:    s_add_u32 s2, s2, s18
+; GFX10-NEXT:    s_mov_b32 s19, s18
+; GFX10-NEXT:    s_addc_u32 s3, s3, s18
 ; GFX10-NEXT:    s_mov_b32 s17, s16
-; GFX10-NEXT:    s_addc_u32 s3, s3, s16
-; GFX10-NEXT:    s_mov_b32 s13, s12
-; GFX10-NEXT:    s_xor_b64 s[2:3], s[2:3], s[16:17]
+; GFX10-NEXT:    s_xor_b64 s[14:15], s[2:3], s[18:19]
 ; GFX10-NEXT:    v_add_f32_e32 v0, v1, v0
-; GFX10-NEXT:    v_cvt_f32_u32_e32 v1, s3
-; GFX10-NEXT:    v_cvt_f32_u32_e32 v2, s2
-; GFX10-NEXT:    s_xor_b64 s[14:15], s[14:15], s[12:13]
+; GFX10-NEXT:    v_cvt_f32_u32_e32 v1, s15
+; GFX10-NEXT:    v_cvt_f32_u32_e32 v2, s14
+; GFX10-NEXT:    s_xor_b64 s[22:23], s[0:1], s[16:17]
 ; GFX10-NEXT:    v_rcp_iflag_f32_e32 v0, v0
 ; GFX10-NEXT:    v_mul_f32_e32 v1, 0x4f800000, v1
 ; GFX10-NEXT:    v_add_f32_e32 v1, v1, v2
@@ -1927,255 +1833,230 @@ define amdgpu_kernel void @sdivrem_v2i64(ptr addrspace(1) %out0, ptr addrspace(1
 ; GFX10-NEXT:    v_mul_f32_e32 v3, 0x5f7ffffc, v1
 ; GFX10-NEXT:    v_mul_f32_e32 v1, 0xcf800000, v2
 ; GFX10-NEXT:    v_mul_f32_e32 v4, 0x2f800000, v3
-; GFX10-NEXT:    v_cvt_u32_f32_e32 v9, v2
+; GFX10-NEXT:    v_cvt_u32_f32_e32 v7, v2
 ; GFX10-NEXT:    v_add_f32_e32 v0, v1, v0
-; GFX10-NEXT:    v_trunc_f32_e32 v6, v4
-; GFX10-NEXT:    v_cvt_u32_f32_e32 v7, v0
-; GFX10-NEXT:    v_mul_f32_e32 v4, 0xcf800000, v6
-; GFX10-NEXT:    v_mad_u64_u32 v[0:1], s5, s21, v7, 0
-; GFX10-NEXT:    v_add_f32_e32 v3, v4, v3
-; GFX10-NEXT:    s_sub_u32 s5, 0, s2
+; GFX10-NEXT:    v_trunc_f32_e32 v4, v4
+; GFX10-NEXT:    v_cvt_u32_f32_e32 v6, v0
+; GFX10-NEXT:    v_mul_f32_e32 v5, 0xcf800000, v4
+; GFX10-NEXT:    v_cvt_u32_f32_e32 v9, v4
+; GFX10-NEXT:    v_mad_u64_u32 v[0:1], s2, s25, v6, 0
+; GFX10-NEXT:    v_add_f32_e32 v3, v5, v3
+; GFX10-NEXT:    s_sub_u32 s2, 0, s14
+; GFX10-NEXT:    s_subb_u32 s3, 0, s15
 ; GFX10-NEXT:    v_cvt_u32_f32_e32 v8, v3
-; GFX10-NEXT:    v_mul_hi_u32 v10, v9, v0
-; GFX10-NEXT:    v_mad_u64_u32 v[2:3], s22, s5, v8, 0
-; GFX10-NEXT:    v_mad_u64_u32 v[4:5], s22, s21, v9, v[1:2]
-; GFX10-NEXT:    v_cvt_u32_f32_e32 v5, v6
+; GFX10-NEXT:    v_mad_u64_u32 v[1:2], s0, s25, v7, v[1:2]
+; GFX10-NEXT:    v_mul_lo_u32 v10, v7, v0
+; GFX10-NEXT:    v_mad_u64_u32 v[2:3], s0, s2, v8, 0
+; GFX10-NEXT:    v_mad_u64_u32 v[4:5], s0, s24, v6, v[1:2]
 ; GFX10-NEXT:    v_mov_b32_e32 v1, v3
-; GFX10-NEXT:    v_mul_hi_u32 v6, v7, v0
-; GFX10-NEXT:    s_subb_u32 s22, 0, s3
+; GFX10-NEXT:    v_mul_hi_u32 v3, v6, v0
+; GFX10-NEXT:    v_mul_hi_u32 v5, v7, v0
+; GFX10-NEXT:    v_mul_lo_u32 v11, v9, v2
 ; GFX10-NEXT:    v_mul_hi_u32 v12, v8, v2
-; GFX10-NEXT:    v_mul_lo_u32 v11, v5, v2
-; GFX10-NEXT:    v_mad_u64_u32 v[3:4], s23, s20, v7, v[4:5]
-; GFX10-NEXT:    v_mul_lo_u32 v4, v9, v0
-; GFX10-NEXT:    v_mad_u64_u32 v[0:1], s23, s5, v5, v[1:2]
-; GFX10-NEXT:    v_mul_hi_u32 v2, v5, v2
-; GFX10-NEXT:    v_mul_lo_u32 v13, v7, v3
-; GFX10-NEXT:    v_mul_lo_u32 v14, v9, v3
-; GFX10-NEXT:    v_mul_hi_u32 v15, v7, v3
-; GFX10-NEXT:    v_mad_u64_u32 v[0:1], s23, s22, v8, v[0:1]
-; GFX10-NEXT:    v_mul_hi_u32 v1, v9, v3
-; GFX10-NEXT:    v_add_co_u32 v3, s23, v4, v13
-; GFX10-NEXT:    v_cndmask_b32_e64 v4, 0, 1, s23
-; GFX10-NEXT:    v_add_co_u32 v10, s23, v14, v10
-; GFX10-NEXT:    v_cndmask_b32_e64 v13, 0, 1, s23
-; GFX10-NEXT:    v_mul_lo_u32 v14, v8, v0
-; GFX10-NEXT:    v_add_co_u32 v3, s23, v3, v6
-; GFX10-NEXT:    v_cndmask_b32_e64 v3, 0, 1, s23
-; GFX10-NEXT:    v_add_co_u32 v6, s23, v10, v15
-; GFX10-NEXT:    v_mul_lo_u32 v15, v5, v0
-; GFX10-NEXT:    v_cndmask_b32_e64 v10, 0, 1, s23
-; GFX10-NEXT:    v_mul_hi_u32 v16, v8, v0
-; GFX10-NEXT:    v_mul_hi_u32 v17, v5, v0
-; GFX10-NEXT:    v_add_nc_u32_e32 v0, v4, v3
-; GFX10-NEXT:    v_add_co_u32 v4, s23, v11, v14
-; GFX10-NEXT:    v_add_nc_u32_e32 v3, v13, v10
-; GFX10-NEXT:    v_cndmask_b32_e64 v10, 0, 1, s23
-; GFX10-NEXT:    v_add_co_u32 v2, s23, v15, v2
-; GFX10-NEXT:    v_cndmask_b32_e64 v11, 0, 1, s23
-; GFX10-NEXT:    v_add_co_u32 v0, s23, v6, v0
-; GFX10-NEXT:    v_cndmask_b32_e64 v6, 0, 1, s23
-; GFX10-NEXT:    v_add_co_u32 v4, s23, v4, v12
-; GFX10-NEXT:    v_cndmask_b32_e64 v4, 0, 1, s23
-; GFX10-NEXT:    v_add_co_u32 v2, s23, v2, v16
-; GFX10-NEXT:    v_add3_u32 v1, v3, v6, v1
-; GFX10-NEXT:    v_add_co_u32 v6, vcc_lo, v7, v0
-; GFX10-NEXT:    v_add_nc_u32_e32 v3, v10, v4
-; GFX10-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s23
-; GFX10-NEXT:    v_add_co_ci_u32_e32 v7, vcc_lo, v9, v1, vcc_lo
-; GFX10-NEXT:    v_mad_u64_u32 v[0:1], s23, s21, v6, 0
-; GFX10-NEXT:    v_add_co_u32 v2, s23, v2, v3
-; GFX10-NEXT:    v_add_nc_u32_e32 v4, v11, v12
-; GFX10-NEXT:    v_cndmask_b32_e64 v3, 0, 1, s23
-; GFX10-NEXT:    v_mov_b32_e32 v10, 0
+; GFX10-NEXT:    v_mad_u64_u32 v[0:1], s0, s2, v9, v[1:2]
+; GFX10-NEXT:    v_mul_lo_u32 v13, v6, v4
+; GFX10-NEXT:    v_mul_lo_u32 v14, v7, v4
+; GFX10-NEXT:    v_mul_hi_u32 v15, v6, v4
+; GFX10-NEXT:    v_mul_hi_u32 v2, v9, v2
+; GFX10-NEXT:    v_mul_hi_u32 v4, v7, v4
+; GFX10-NEXT:    v_mad_u64_u32 v[0:1], s0, s3, v8, v[0:1]
+; GFX10-NEXT:    v_add_co_u32 v1, vcc_lo, v10, v13
+; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
+; GFX10-NEXT:    v_add_co_u32 v1, s0, v14, v5
+; GFX10-NEXT:    v_add_co_ci_u32_e64 v3, s5, 0, 0, vcc_lo
+; GFX10-NEXT:    v_mul_lo_u32 v10, v8, v0
+; GFX10-NEXT:    v_add_co_ci_u32_e64 v1, s1, v1, v15, s0
+; GFX10-NEXT:    v_add_co_ci_u32_e64 v5, s1, 0, 0, s1
+; GFX10-NEXT:    v_mul_lo_u32 v13, v9, v0
+; GFX10-NEXT:    v_mul_hi_u32 v14, v8, v0
+; GFX10-NEXT:    v_mul_hi_u32 v15, v9, v0
+; GFX10-NEXT:    v_add_co_ci_u32_e64 v0, vcc_lo, v1, v3, s0
+; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v5, vcc_lo
+; GFX10-NEXT:    v_add_co_u32 v3, vcc_lo, v11, v10
+; GFX10-NEXT:    v_add_co_u32 v2, s0, v13, v2
+; GFX10-NEXT:    v_add_nc_u32_e32 v1, v4, v1
+; GFX10-NEXT:    v_add_co_ci_u32_e32 v3, vcc_lo, v3, v12, vcc_lo
+; GFX10-NEXT:    v_add_co_ci_u32_e64 v2, s1, v2, v14, s0
+; GFX10-NEXT:    v_add_co_ci_u32_e64 v3, s5, 0, 0, vcc_lo
+; GFX10-NEXT:    v_add_co_u32 v6, vcc_lo, v6, v0
+; GFX10-NEXT:    v_add_co_ci_u32_e64 v4, s1, 0, 0, s1
+; GFX10-NEXT:    v_add_co_ci_u32_e32 v7, vcc_lo, v7, v1, vcc_lo
+; GFX10-NEXT:    v_add_co_ci_u32_e64 v2, vcc_lo, v2, v3, s0
+; GFX10-NEXT:    v_add_co_ci_u32_e32 v3, vcc_lo, 0, v4, vcc_lo
+; GFX10-NEXT:    v_mad_u64_u32 v[0:1], s1, s25, v6, 0
 ; GFX10-NEXT:    v_add_co_u32 v8, vcc_lo, v8, v2
-; GFX10-NEXT:    v_mul_hi_u32 v11, v7, v0
-; GFX10-NEXT:    v_add3_u32 v3, v4, v3, v17
-; GFX10-NEXT:    v_add_co_ci_u32_e32 v9, vcc_lo, v5, v3, vcc_lo
-; GFX10-NEXT:    v_mad_u64_u32 v[2:3], s23, s5, v8, 0
-; GFX10-NEXT:    v_mad_u64_u32 v[4:5], s21, s21, v7, v[1:2]
+; GFX10-NEXT:    v_add_nc_u32_e32 v3, v15, v3
+; GFX10-NEXT:    v_mad_u64_u32 v[1:2], s0, s25, v7, v[1:2]
+; GFX10-NEXT:    v_add_co_ci_u32_e32 v9, vcc_lo, v9, v3, vcc_lo
+; GFX10-NEXT:    v_mad_u64_u32 v[2:3], s0, s2, v8, 0
+; GFX10-NEXT:    v_mul_lo_u32 v10, v7, v0
+; GFX10-NEXT:    v_mad_u64_u32 v[4:5], s0, s24, v6, v[1:2]
 ; GFX10-NEXT:    v_mov_b32_e32 v1, v3
-; GFX10-NEXT:    v_mul_lo_u32 v12, v9, v2
-; GFX10-NEXT:    v_mul_hi_u32 v13, v8, v2
-; GFX10-NEXT:    v_mad_u64_u32 v[3:4], s20, s20, v6, v[4:5]
-; GFX10-NEXT:    v_mul_lo_u32 v4, v7, v0
-; GFX10-NEXT:    v_mul_hi_u32 v5, v6, v0
-; GFX10-NEXT:    v_mad_u64_u32 v[0:1], s5, s5, v9, v[1:2]
+; GFX10-NEXT:    v_mul_hi_u32 v3, v6, v0
+; GFX10-NEXT:    v_mul_hi_u32 v5, v7, v0
+; GFX10-NEXT:    v_mul_lo_u32 v11, v9, v2
+; GFX10-NEXT:    v_mul_hi_u32 v12, v8, v2
+; GFX10-NEXT:    v_mad_u64_u32 v[0:1], s0, s2, v9, v[1:2]
+; GFX10-NEXT:    v_mul_lo_u32 v13, v6, v4
+; GFX10-NEXT:    v_mul_lo_u32 v14, v7, v4
+; GFX10-NEXT:    v_mul_hi_u32 v15, v6, v4
+; GFX10-NEXT:    v_mul_hi_u32 v4, v7, v4
 ; GFX10-NEXT:    v_mul_hi_u32 v2, v9, v2
-; GFX10-NEXT:    v_mul_lo_u32 v14, v6, v3
-; GFX10-NEXT:    v_mul_lo_u32 v15, v7, v3
-; GFX10-NEXT:    v_mul_hi_u32 v16, v6, v3
-; GFX10-NEXT:    v_mad_u64_u32 v[0:1], s5, s22, v8, v[0:1]
-; GFX10-NEXT:    v_mul_hi_u32 v1, v7, v3
-; GFX10-NEXT:    v_add_co_u32 v3, s5, v4, v14
-; GFX10-NEXT:    v_cndmask_b32_e64 v4, 0, 1, s5
-; GFX10-NEXT:    v_add_co_u32 v11, s5, v15, v11
-; GFX10-NEXT:    v_cndmask_b32_e64 v14, 0, 1, s5
-; GFX10-NEXT:    v_add_co_u32 v3, s5, v3, v5
-; GFX10-NEXT:    v_mul_lo_u32 v15, v8, v0
-; GFX10-NEXT:    v_cndmask_b32_e64 v3, 0, 1, s5
-; GFX10-NEXT:    v_add_co_u32 v5, s5, v11, v16
-; GFX10-NEXT:    v_mul_lo_u32 v16, v9, v0
-; GFX10-NEXT:    v_cndmask_b32_e64 v11, 0, 1, s5
+; GFX10-NEXT:    v_mad_u64_u32 v[0:1], s0, s3, v8, v[0:1]
+; GFX10-NEXT:    v_add_co_u32 v1, vcc_lo, v10, v13
+; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
+; GFX10-NEXT:    v_add_co_u32 v1, s0, v14, v5
+; GFX10-NEXT:    v_add_co_ci_u32_e64 v3, s2, 0, 0, vcc_lo
+; GFX10-NEXT:    v_mul_lo_u32 v10, v8, v0
+; GFX10-NEXT:    v_add_co_ci_u32_e64 v1, s1, v1, v15, s0
+; GFX10-NEXT:    v_add_co_ci_u32_e64 v5, s1, 0, 0, s1
+; GFX10-NEXT:    v_mul_lo_u32 v13, v9, v0
+; GFX10-NEXT:    v_add_co_ci_u32_e64 v1, vcc_lo, v1, v3, s0
+; GFX10-NEXT:    v_add_co_ci_u32_e32 v3, vcc_lo, 0, v5, vcc_lo
+; GFX10-NEXT:    v_add_co_u32 v5, vcc_lo, v11, v10
+; GFX10-NEXT:    v_mul_hi_u32 v14, v8, v0
 ; GFX10-NEXT:    v_add_nc_u32_e32 v3, v4, v3
-; GFX10-NEXT:    v_mul_hi_u32 v17, v8, v0
+; GFX10-NEXT:    v_add_co_u32 v2, s0, v13, v2
+; GFX10-NEXT:    v_add_co_ci_u32_e32 v5, vcc_lo, v5, v12, vcc_lo
+; GFX10-NEXT:    v_add_co_ci_u32_e64 v4, s2, 0, 0, vcc_lo
+; GFX10-NEXT:    v_add_co_u32 v1, vcc_lo, v6, v1
+; GFX10-NEXT:    v_add_co_ci_u32_e32 v3, vcc_lo, v7, v3, vcc_lo
+; GFX10-NEXT:    v_add_co_ci_u32_e64 v2, s1, v2, v14, s0
+; GFX10-NEXT:    v_mul_lo_u32 v6, s13, v1
+; GFX10-NEXT:    v_mul_lo_u32 v10, s12, v3
+; GFX10-NEXT:    v_add_co_ci_u32_e64 v5, s1, 0, 0, s1
+; GFX10-NEXT:    v_mul_hi_u32 v7, s12, v1
+; GFX10-NEXT:    v_mul_hi_u32 v1, s13, v1
+; GFX10-NEXT:    v_mul_lo_u32 v11, s13, v3
+; GFX10-NEXT:    v_add_co_ci_u32_e64 v2, vcc_lo, v2, v4, s0
+; GFX10-NEXT:    v_add_co_ci_u32_e32 v4, vcc_lo, 0, v5, vcc_lo
+; GFX10-NEXT:    v_mul_hi_u32 v5, s12, v3
+; GFX10-NEXT:    v_add_co_u32 v6, vcc_lo, v6, v10
+; GFX10-NEXT:    v_add_co_u32 v1, s0, v11, v1
 ; GFX10-NEXT:    v_mul_hi_u32 v0, v9, v0
-; GFX10-NEXT:    v_add_nc_u32_e32 v4, v14, v11
-; GFX10-NEXT:    v_add_co_u32 v11, s5, v12, v15
-; GFX10-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s5
-; GFX10-NEXT:    v_add_co_u32 v2, s5, v16, v2
-; GFX10-NEXT:    v_cndmask_b32_e64 v14, 0, 1, s5
-; GFX10-NEXT:    v_add_co_u32 v3, s5, v5, v3
-; GFX10-NEXT:    v_cndmask_b32_e64 v5, 0, 1, s5
-; GFX10-NEXT:    v_add_co_u32 v11, s5, v11, v13
-; GFX10-NEXT:    v_cndmask_b32_e64 v11, 0, 1, s5
-; GFX10-NEXT:    v_add_co_u32 v2, s5, v2, v17
-; GFX10-NEXT:    v_add3_u32 v1, v4, v5, v1
-; GFX10-NEXT:    v_add_co_u32 v3, vcc_lo, v6, v3
-; GFX10-NEXT:    v_add_nc_u32_e32 v4, v12, v11
-; GFX10-NEXT:    v_cndmask_b32_e64 v13, 0, 1, s5
-; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, v7, v1, vcc_lo
-; GFX10-NEXT:    v_mul_lo_u32 v6, s1, v3
-; GFX10-NEXT:    v_add_co_u32 v2, s5, v2, v4
-; GFX10-NEXT:    v_add_nc_u32_e32 v5, v14, v13
-; GFX10-NEXT:    v_cndmask_b32_e64 v4, 0, 1, s5
-; GFX10-NEXT:    v_mul_lo_u32 v11, s0, v1
-; GFX10-NEXT:    v_mul_hi_u32 v7, s0, v3
-; GFX10-NEXT:    v_mul_hi_u32 v3, s1, v3
-; GFX10-NEXT:    v_mul_lo_u32 v12, s1, v1
-; GFX10-NEXT:    v_add3_u32 v0, v5, v4, v0
+; GFX10-NEXT:    v_add_co_ci_u32_e32 v6, vcc_lo, v6, v7, vcc_lo
+; GFX10-NEXT:    v_add_co_ci_u32_e64 v1, s1, v1, v5, s0
+; GFX10-NEXT:    v_add_co_ci_u32_e64 v5, s2, 0, 0, vcc_lo
+; GFX10-NEXT:    v_add_co_ci_u32_e64 v6, s1, 0, 0, s1
+; GFX10-NEXT:    v_add_nc_u32_e32 v4, v0, v4
+; GFX10-NEXT:    v_add_co_ci_u32_e64 v5, vcc_lo, v1, v5, s0
+; GFX10-NEXT:    v_mul_hi_u32 v3, s13, v3
+; GFX10-NEXT:    v_add_co_ci_u32_e32 v6, vcc_lo, 0, v6, vcc_lo
 ; GFX10-NEXT:    v_add_co_u32 v2, vcc_lo, v8, v2
-; GFX10-NEXT:    v_mul_hi_u32 v4, s0, v1
-; GFX10-NEXT:    v_mul_hi_u32 v5, s1, v1
-; GFX10-NEXT:    v_add_co_u32 v1, s5, v6, v11
-; GFX10-NEXT:    v_add_co_ci_u32_e32 v8, vcc_lo, v9, v0, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v6, 0, 1, s5
-; GFX10-NEXT:    v_add_co_u32 v3, s5, v12, v3
-; GFX10-NEXT:    v_cndmask_b32_e64 v11, 0, 1, s5
-; GFX10-NEXT:    v_add_co_u32 v1, s5, v1, v7
-; GFX10-NEXT:    v_mul_lo_u32 v0, s15, v2
-; GFX10-NEXT:    v_mul_lo_u32 v12, s14, v8
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, 0, 1, s5
-; GFX10-NEXT:    v_add_co_u32 v3, s5, v3, v4
-; GFX10-NEXT:    v_mul_hi_u32 v9, s14, v2
-; GFX10-NEXT:    v_cndmask_b32_e64 v4, 0, 1, s5
-; GFX10-NEXT:    v_mul_hi_u32 v2, s15, v2
-; GFX10-NEXT:    v_mul_lo_u32 v7, s15, v8
-; GFX10-NEXT:    v_add_nc_u32_e32 v1, v6, v1
-; GFX10-NEXT:    v_add_co_u32 v6, s5, v0, v12
-; GFX10-NEXT:    v_mul_hi_u32 v13, s14, v8
-; GFX10-NEXT:    v_add_nc_u32_e32 v4, v11, v4
-; GFX10-NEXT:    v_cndmask_b32_e64 v11, 0, 1, s5
-; GFX10-NEXT:    v_add_co_u32 v12, s5, v3, v1
-; GFX10-NEXT:    v_add_co_u32 v2, s20, v7, v2
-; GFX10-NEXT:    v_cndmask_b32_e64 v7, 0, 1, s5
-; GFX10-NEXT:    v_mad_u64_u32 v[0:1], s5, s6, v12, 0
-; GFX10-NEXT:    v_add_co_u32 v6, s5, v6, v9
-; GFX10-NEXT:    v_cndmask_b32_e64 v6, 0, 1, s5
-; GFX10-NEXT:    v_add_co_u32 v9, s5, v2, v13
-; GFX10-NEXT:    v_cndmask_b32_e64 v3, 0, 1, s20
-; GFX10-NEXT:    v_cndmask_b32_e64 v2, 0, 1, s5
-; GFX10-NEXT:    v_add3_u32 v4, v4, v7, v5
-; GFX10-NEXT:    v_add_nc_u32_e32 v6, v11, v6
-; GFX10-NEXT:    v_mul_hi_u32 v5, s15, v8
-; GFX10-NEXT:    v_add_co_u32 v7, vcc_lo, v12, 1
-; GFX10-NEXT:    v_add_nc_u32_e32 v3, v3, v2
-; GFX10-NEXT:    v_mad_u64_u32 v[1:2], s5, s6, v4, v[1:2]
-; GFX10-NEXT:    v_add_co_u32 v6, s5, v9, v6
-; GFX10-NEXT:    v_cndmask_b32_e64 v9, 0, 1, s5
-; GFX10-NEXT:    v_add_co_ci_u32_e32 v8, vcc_lo, 0, v4, vcc_lo
-; GFX10-NEXT:    v_add_co_u32 v11, vcc_lo, v7, 1
-; GFX10-NEXT:    v_mad_u64_u32 v[1:2], s5, s7, v12, v[1:2]
-; GFX10-NEXT:    v_add3_u32 v5, v3, v9, v5
-; GFX10-NEXT:    v_mad_u64_u32 v[2:3], s5, s2, v6, 0
-; GFX10-NEXT:    v_add_co_ci_u32_e32 v13, vcc_lo, 0, v8, vcc_lo
-; GFX10-NEXT:    v_sub_co_u32 v14, vcc_lo, s0, v0
-; GFX10-NEXT:    v_sub_nc_u32_e32 v9, s1, v1
-; GFX10-NEXT:    v_sub_co_ci_u32_e64 v15, s0, s1, v1, vcc_lo
-; GFX10-NEXT:    v_mov_b32_e32 v0, v3
-; GFX10-NEXT:    v_subrev_co_ci_u32_e32 v9, vcc_lo, s7, v9, vcc_lo
-; GFX10-NEXT:    v_sub_co_u32 v3, vcc_lo, v14, s6
-; GFX10-NEXT:    v_subrev_co_ci_u32_e64 v16, s0, 0, v9, vcc_lo
-; GFX10-NEXT:    v_cmp_le_u32_e64 s0, s6, v14
-; GFX10-NEXT:    v_subrev_co_ci_u32_e32 v9, vcc_lo, s7, v9, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v17, 0, -1, s0
-; GFX10-NEXT:    v_cmp_le_u32_e64 s0, s6, v3
-; GFX10-NEXT:    v_cndmask_b32_e64 v18, 0, -1, s0
-; GFX10-NEXT:    v_cmp_le_u32_e64 s0, s7, v16
-; GFX10-NEXT:    v_cndmask_b32_e64 v19, 0, -1, s0
-; GFX10-NEXT:    v_cmp_le_u32_e64 s0, s7, v15
-; GFX10-NEXT:    v_cndmask_b32_e64 v20, 0, -1, s0
-; GFX10-NEXT:    v_mad_u64_u32 v[0:1], s0, s2, v5, v[0:1]
-; GFX10-NEXT:    v_cmp_eq_u32_e64 s0, s7, v16
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v19, v18, s0
-; GFX10-NEXT:    v_cmp_eq_u32_e64 s0, s7, v15
-; GFX10-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v1
-; GFX10-NEXT:    v_cndmask_b32_e64 v17, v20, v17, s0
-; GFX10-NEXT:    v_sub_co_u32 v1, s0, v3, s6
+; GFX10-NEXT:    v_add_co_ci_u32_e32 v4, vcc_lo, v9, v4, vcc_lo
+; GFX10-NEXT:    v_mad_u64_u32 v[0:1], s0, s6, v5, 0
+; GFX10-NEXT:    v_mul_lo_u32 v7, s23, v2
+; GFX10-NEXT:    v_add_nc_u32_e32 v3, v3, v6
+; GFX10-NEXT:    v_mul_lo_u32 v6, s22, v4
+; GFX10-NEXT:    v_mul_hi_u32 v9, s22, v2
+; GFX10-NEXT:    v_mul_hi_u32 v10, s23, v2
+; GFX10-NEXT:    v_mul_lo_u32 v11, s23, v4
+; GFX10-NEXT:    v_mad_u64_u32 v[1:2], s0, s6, v3, v[1:2]
+; GFX10-NEXT:    v_mul_hi_u32 v12, s22, v4
+; GFX10-NEXT:    v_mul_hi_u32 v4, s23, v4
+; GFX10-NEXT:    v_add_co_u32 v2, vcc_lo, v7, v6
+; GFX10-NEXT:    v_sub_co_u32 v6, s2, s12, v0
+; GFX10-NEXT:    v_mov_b32_e32 v8, 0
+; GFX10-NEXT:    v_add_co_ci_u32_e32 v2, vcc_lo, v2, v9, vcc_lo
+; GFX10-NEXT:    v_add_co_ci_u32_e64 v0, s3, 0, 0, vcc_lo
+; GFX10-NEXT:    v_mad_u64_u32 v[1:2], s0, s7, v5, v[1:2]
+; GFX10-NEXT:    v_add_co_u32 v2, s0, v11, v10
+; GFX10-NEXT:    v_add_co_ci_u32_e64 v2, s1, v2, v12, s0
+; GFX10-NEXT:    v_sub_co_ci_u32_e64 v9, vcc_lo, s13, v1, s2
+; GFX10-NEXT:    v_sub_nc_u32_e32 v7, s13, v1
+; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, s6, v6
+; GFX10-NEXT:    v_add_co_ci_u32_e64 v17, s0, v2, v0, s0
+; GFX10-NEXT:    v_cndmask_b32_e64 v1, 0, -1, vcc_lo
+; GFX10-NEXT:    v_subrev_co_ci_u32_e64 v7, vcc_lo, s7, v7, s2
+; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, s7, v9
+; GFX10-NEXT:    v_cndmask_b32_e64 v10, 0, -1, vcc_lo
+; GFX10-NEXT:    v_sub_co_u32 v11, vcc_lo, v6, s6
+; GFX10-NEXT:    v_subrev_co_ci_u32_e64 v12, s2, 0, v7, vcc_lo
+; GFX10-NEXT:    v_add_co_u32 v13, s2, v5, 1
+; GFX10-NEXT:    v_add_co_ci_u32_e64 v14, s2, 0, v3, s2
+; GFX10-NEXT:    v_cmp_eq_u32_e64 s2, s7, v9
+; GFX10-NEXT:    v_subrev_co_ci_u32_e32 v7, vcc_lo, s7, v7, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e64 v10, v10, v1, s2
+; GFX10-NEXT:    v_add_co_ci_u32_e64 v1, s1, 0, 0, s1
+; GFX10-NEXT:    v_cmp_le_u32_e64 s2, s7, v12
+; GFX10-NEXT:    v_cmp_le_u32_e64 s1, s6, v11
+; GFX10-NEXT:    v_add_co_ci_u32_e64 v2, s0, 0, v1, s0
+; GFX10-NEXT:    v_mad_u64_u32 v[0:1], s0, s14, v17, 0
+; GFX10-NEXT:    v_cndmask_b32_e64 v15, 0, -1, s2
+; GFX10-NEXT:    v_cndmask_b32_e64 v16, 0, -1, s1
+; GFX10-NEXT:    v_cmp_eq_u32_e64 s0, s7, v12
+; GFX10-NEXT:    v_add_nc_u32_e32 v4, v4, v2
+; GFX10-NEXT:    v_cndmask_b32_e64 v15, v15, v16, s0
+; GFX10-NEXT:    v_add_co_u32 v16, s0, v13, 1
+; GFX10-NEXT:    v_add_co_ci_u32_e64 v18, s0, 0, v14, s0
+; GFX10-NEXT:    v_mad_u64_u32 v[1:2], s0, s14, v4, v[1:2]
+; GFX10-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v15
+; GFX10-NEXT:    v_sub_co_u32 v2, s0, v11, s6
+; GFX10-NEXT:    v_subrev_co_ci_u32_e64 v7, s0, 0, v7, s0
+; GFX10-NEXT:    v_cmp_ne_u32_e64 s0, 0, v10
+; GFX10-NEXT:    v_cndmask_b32_e32 v10, v11, v2, vcc_lo
+; GFX10-NEXT:    v_mad_u64_u32 v[1:2], s1, s15, v17, v[1:2]
+; GFX10-NEXT:    v_cndmask_b32_e32 v13, v13, v16, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v14, v14, v18, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e64 v6, v6, v10, s0
+; GFX10-NEXT:    v_cndmask_b32_e64 v2, v5, v13, s0
+; GFX10-NEXT:    v_cndmask_b32_e32 v5, v12, v7, vcc_lo
+; GFX10-NEXT:    v_sub_co_u32 v7, vcc_lo, s22, v0
+; GFX10-NEXT:    v_sub_co_ci_u32_e64 v10, s1, s23, v1, vcc_lo
+; GFX10-NEXT:    v_sub_nc_u32_e32 v1, s23, v1
+; GFX10-NEXT:    v_cndmask_b32_e64 v5, v9, v5, s0
+; GFX10-NEXT:    v_cndmask_b32_e64 v3, v3, v14, s0
+; GFX10-NEXT:    v_cmp_le_u32_e64 s0, s15, v10
+; GFX10-NEXT:    v_xor_b32_e32 v0, s20, v2
+; GFX10-NEXT:    v_subrev_co_ci_u32_e32 v9, vcc_lo, s15, v1, vcc_lo
+; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, s14, v7
+; GFX10-NEXT:    v_xor_b32_e32 v2, s21, v3
+; GFX10-NEXT:    v_cndmask_b32_e64 v3, 0, -1, s0
+; GFX10-NEXT:    v_xor_b32_e32 v5, s4, v5
+; GFX10-NEXT:    v_cndmask_b32_e64 v11, 0, -1, vcc_lo
+; GFX10-NEXT:    v_sub_co_u32 v12, vcc_lo, v7, s14
+; GFX10-NEXT:    v_subrev_co_ci_u32_e64 v13, s0, 0, v9, vcc_lo
+; GFX10-NEXT:    v_sub_co_u32 v0, s0, v0, s20
+; GFX10-NEXT:    v_subrev_co_ci_u32_e64 v1, s0, s21, v2, s0
+; GFX10-NEXT:    v_cmp_eq_u32_e64 s0, s15, v10
+; GFX10-NEXT:    v_xor_b32_e32 v2, s4, v6
+; GFX10-NEXT:    v_subrev_co_ci_u32_e32 v9, vcc_lo, s15, v9, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e64 v3, v3, v11, s0
+; GFX10-NEXT:    v_cmp_le_u32_e64 s0, s15, v13
+; GFX10-NEXT:    v_cndmask_b32_e64 v6, 0, -1, s0
+; GFX10-NEXT:    v_cmp_le_u32_e64 s0, s14, v12
+; GFX10-NEXT:    v_cndmask_b32_e64 v11, 0, -1, s0
+; GFX10-NEXT:    v_add_co_u32 v14, s0, v17, 1
+; GFX10-NEXT:    v_add_co_ci_u32_e64 v15, s0, 0, v4, s0
+; GFX10-NEXT:    v_cmp_eq_u32_e64 s0, s15, v13
+; GFX10-NEXT:    v_cndmask_b32_e64 v6, v6, v11, s0
+; GFX10-NEXT:    v_add_co_u32 v11, s0, v14, 1
+; GFX10-NEXT:    v_add_co_ci_u32_e64 v16, s0, 0, v15, s0
+; GFX10-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v6
+; GFX10-NEXT:    v_sub_co_u32 v6, s0, v12, s14
 ; GFX10-NEXT:    v_subrev_co_ci_u32_e64 v9, s0, 0, v9, s0
-; GFX10-NEXT:    v_cndmask_b32_e32 v7, v7, v11, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc_lo
-; GFX10-NEXT:    v_mad_u64_u32 v[0:1], s1, s3, v6, v[0:1]
-; GFX10-NEXT:    v_cmp_ne_u32_e64 s0, 0, v17
-; GFX10-NEXT:    v_cndmask_b32_e32 v8, v8, v13, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v12, v7, s0
-; GFX10-NEXT:    v_cndmask_b32_e32 v7, v16, v9, vcc_lo
-; GFX10-NEXT:    v_sub_co_u32 v2, vcc_lo, s14, v2
-; GFX10-NEXT:    v_cndmask_b32_e64 v4, v4, v8, s0
-; GFX10-NEXT:    v_sub_co_ci_u32_e64 v8, s1, s15, v0, vcc_lo
-; GFX10-NEXT:    v_sub_nc_u32_e32 v0, s15, v0
-; GFX10-NEXT:    v_cndmask_b32_e64 v3, v14, v3, s0
-; GFX10-NEXT:    v_cndmask_b32_e64 v7, v15, v7, s0
-; GFX10-NEXT:    v_cmp_le_u32_e64 s0, s3, v8
-; GFX10-NEXT:    v_xor_b32_e32 v1, s18, v1
-; GFX10-NEXT:    v_subrev_co_ci_u32_e32 v11, vcc_lo, s3, v0, vcc_lo
-; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, s2, v2
-; GFX10-NEXT:    v_xor_b32_e32 v4, s19, v4
-; GFX10-NEXT:    v_cndmask_b32_e64 v9, 0, -1, s0
-; GFX10-NEXT:    v_xor_b32_e32 v3, s4, v3
-; GFX10-NEXT:    v_xor_b32_e32 v7, s4, v7
-; GFX10-NEXT:    v_cndmask_b32_e64 v12, 0, -1, vcc_lo
-; GFX10-NEXT:    v_sub_co_u32 v13, vcc_lo, v2, s2
-; GFX10-NEXT:    v_subrev_co_ci_u32_e64 v14, s0, 0, v11, vcc_lo
-; GFX10-NEXT:    v_sub_co_u32 v0, s0, v1, s18
-; GFX10-NEXT:    v_subrev_co_ci_u32_e64 v1, s0, s19, v4, s0
-; GFX10-NEXT:    v_cmp_eq_u32_e64 s0, s3, v8
-; GFX10-NEXT:    v_subrev_co_ci_u32_e32 v11, vcc_lo, s3, v11, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v4, v9, v12, s0
-; GFX10-NEXT:    v_cmp_le_u32_e64 s0, s3, v14
-; GFX10-NEXT:    v_cndmask_b32_e64 v9, 0, -1, s0
-; GFX10-NEXT:    v_cmp_le_u32_e64 s0, s2, v13
-; GFX10-NEXT:    v_cndmask_b32_e64 v12, 0, -1, s0
-; GFX10-NEXT:    v_add_co_u32 v15, s0, v6, 1
-; GFX10-NEXT:    v_add_co_ci_u32_e64 v16, s0, 0, v5, s0
-; GFX10-NEXT:    v_cmp_eq_u32_e64 s0, s3, v14
-; GFX10-NEXT:    v_cndmask_b32_e64 v9, v9, v12, s0
-; GFX10-NEXT:    v_add_co_u32 v12, s0, v15, 1
-; GFX10-NEXT:    v_add_co_ci_u32_e64 v17, s0, 0, v16, s0
-; GFX10-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v9
-; GFX10-NEXT:    v_sub_co_u32 v9, s0, v13, s2
-; GFX10-NEXT:    v_subrev_co_ci_u32_e64 v11, s0, 0, v11, s0
-; GFX10-NEXT:    v_cndmask_b32_e32 v12, v15, v12, vcc_lo
-; GFX10-NEXT:    v_cmp_ne_u32_e64 s0, 0, v4
-; GFX10-NEXT:    v_cndmask_b32_e32 v15, v16, v17, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e32 v4, v13, v9, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e32 v9, v14, v11, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v6, v6, v12, s0
-; GFX10-NEXT:    v_cndmask_b32_e64 v11, v5, v15, s0
-; GFX10-NEXT:    v_cndmask_b32_e64 v2, v2, v4, s0
-; GFX10-NEXT:    v_cndmask_b32_e64 v8, v8, v9, s0
-; GFX10-NEXT:    s_xor_b64 s[0:1], s[12:13], s[16:17]
-; GFX10-NEXT:    v_sub_co_u32 v4, vcc_lo, v3, s4
-; GFX10-NEXT:    v_xor_b32_e32 v3, s0, v6
-; GFX10-NEXT:    v_xor_b32_e32 v6, s1, v11
-; GFX10-NEXT:    v_subrev_co_ci_u32_e32 v5, vcc_lo, s4, v7, vcc_lo
-; GFX10-NEXT:    v_xor_b32_e32 v7, s12, v2
-; GFX10-NEXT:    v_xor_b32_e32 v8, s12, v8
-; GFX10-NEXT:    v_sub_co_u32 v2, vcc_lo, v3, s0
-; GFX10-NEXT:    v_subrev_co_ci_u32_e32 v3, vcc_lo, s1, v6, vcc_lo
-; GFX10-NEXT:    v_sub_co_u32 v6, vcc_lo, v7, s12
-; GFX10-NEXT:    v_subrev_co_ci_u32_e32 v7, vcc_lo, s12, v8, vcc_lo
-; GFX10-NEXT:    global_store_dwordx4 v10, v[0:3], s[8:9]
-; GFX10-NEXT:    global_store_dwordx4 v10, v[4:7], s[10:11]
+; GFX10-NEXT:    v_cndmask_b32_e32 v11, v14, v11, vcc_lo
+; GFX10-NEXT:    v_cmp_ne_u32_e64 s0, 0, v3
+; GFX10-NEXT:    v_cndmask_b32_e32 v14, v15, v16, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v3, v12, v6, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v6, v13, v9, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e64 v9, v17, v11, s0
+; GFX10-NEXT:    v_cndmask_b32_e64 v11, v4, v14, s0
+; GFX10-NEXT:    v_cndmask_b32_e64 v3, v7, v3, s0
+; GFX10-NEXT:    v_cndmask_b32_e64 v6, v10, v6, s0
+; GFX10-NEXT:    s_xor_b64 s[0:1], s[16:17], s[18:19]
+; GFX10-NEXT:    v_sub_co_u32 v4, vcc_lo, v2, s4
+; GFX10-NEXT:    v_xor_b32_e32 v2, s0, v9
+; GFX10-NEXT:    v_xor_b32_e32 v7, s1, v11
+; GFX10-NEXT:    v_xor_b32_e32 v9, s16, v3
+; GFX10-NEXT:    v_subrev_co_ci_u32_e32 v5, vcc_lo, s4, v5, vcc_lo
+; GFX10-NEXT:    v_xor_b32_e32 v10, s16, v6
+; GFX10-NEXT:    v_sub_co_u32 v2, vcc_lo, v2, s0
+; GFX10-NEXT:    v_subrev_co_ci_u32_e32 v3, vcc_lo, s1, v7, vcc_lo
+; GFX10-NEXT:    v_sub_co_u32 v6, vcc_lo, v9, s16
+; GFX10-NEXT:    v_subrev_co_ci_u32_e32 v7, vcc_lo, s16, v10, vcc_lo
+; GFX10-NEXT:    global_store_dwordx4 v8, v[0:3], s[8:9]
+; GFX10-NEXT:    global_store_dwordx4 v8, v[4:7], s[10:11]
 ; GFX10-NEXT:    s_endpgm
   %div = sdiv <2 x i64> %x, %y
   store <2 x i64> %div, ptr addrspace(1) %out0
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll
index 83ebc84e1f84a..8aa2238a90d1d 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll
@@ -24,135 +24,120 @@ define i64 @v_srem_i64(i64 %num, i64 %den) {
 ; CHECK-NEXT:    s_or_b64 exec, exec, s[4:5]
 ; CHECK-NEXT:    s_setpc_b64 s[30:31]
 ; CHECK-NEXT:  .LBB0_3:
-; CHECK-NEXT:    v_ashrrev_i32_e32 v1, 31, v3
-; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v2, v1
-; CHECK-NEXT:    v_addc_u32_e32 v2, vcc, v3, v1, vcc
-; CHECK-NEXT:    v_xor_b32_e32 v0, v0, v1
-; CHECK-NEXT:    v_xor_b32_e32 v1, v2, v1
-; CHECK-NEXT:    v_cvt_f32_u32_e32 v2, v0
-; CHECK-NEXT:    v_cvt_f32_u32_e32 v3, v1
-; CHECK-NEXT:    v_sub_i32_e32 v9, vcc, 0, v0
-; CHECK-NEXT:    v_subb_u32_e32 v10, vcc, 0, v1, vcc
-; CHECK-NEXT:    v_mac_f32_e32 v2, 0x4f800000, v3
-; CHECK-NEXT:    v_rcp_iflag_f32_e32 v2, v2
-; CHECK-NEXT:    v_mul_f32_e32 v2, 0x5f7ffffc, v2
-; CHECK-NEXT:    v_mul_f32_e32 v3, 0x2f800000, v2
-; CHECK-NEXT:    v_trunc_f32_e32 v6, v3
-; CHECK-NEXT:    v_mac_f32_e32 v2, 0xcf800000, v6
-; CHECK-NEXT:    v_cvt_u32_f32_e32 v8, v2
-; CHECK-NEXT:    v_cvt_u32_f32_e32 v11, v6
-; CHECK-NEXT:    v_mad_u64_u32 v[2:3], s[4:5], v9, v8, 0
-; CHECK-NEXT:    v_mad_u64_u32 v[6:7], s[4:5], v9, v11, v[3:4]
-; CHECK-NEXT:    v_mul_lo_u32 v3, v11, v2
-; CHECK-NEXT:    v_mad_u64_u32 v[6:7], s[4:5], v10, v8, v[6:7]
-; CHECK-NEXT:    v_mul_hi_u32 v7, v8, v2
-; CHECK-NEXT:    v_mul_hi_u32 v2, v11, v2
-; CHECK-NEXT:    v_mul_lo_u32 v12, v8, v6
-; CHECK-NEXT:    v_mul_lo_u32 v13, v11, v6
-; CHECK-NEXT:    v_add_i32_e32 v3, vcc, v3, v12
-; CHECK-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v3, vcc, v3, v7
-; CHECK-NEXT:    v_mul_hi_u32 v7, v8, v6
-; CHECK-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v3, vcc, v12, v3
-; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v13, v2
-; CHECK-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v7
-; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v7, vcc, v12, v7
-; CHECK-NEXT:    v_mul_hi_u32 v6, v11, v6
-; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v3
-; CHECK-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v3, vcc, v7, v3
-; CHECK-NEXT:    v_add_i32_e32 v3, vcc, v6, v3
-; CHECK-NEXT:    v_add_i32_e32 v8, vcc, v8, v2
-; CHECK-NEXT:    v_addc_u32_e32 v11, vcc, v11, v3, vcc
-; CHECK-NEXT:    v_mad_u64_u32 v[2:3], s[4:5], v9, v8, 0
-; CHECK-NEXT:    v_mad_u64_u32 v[6:7], s[4:5], v9, v11, v[3:4]
-; CHECK-NEXT:    v_ashrrev_i32_e32 v9, 31, v5
-; CHECK-NEXT:    v_add_i32_e32 v3, vcc, v4, v9
-; CHECK-NEXT:    v_mad_u64_u32 v[6:7], s[4:5], v10, v8, v[6:7]
-; CHECK-NEXT:    v_addc_u32_e32 v4, vcc, v5, v9, vcc
-; CHECK-NEXT:    v_xor_b32_e32 v5, v3, v9
-; CHECK-NEXT:    v_mul_lo_u32 v3, v11, v2
-; CHECK-NEXT:    v_mul_lo_u32 v7, v8, v6
-; CHECK-NEXT:    v_xor_b32_e32 v10, v4, v9
-; CHECK-NEXT:    v_mul_hi_u32 v4, v8, v2
-; CHECK-NEXT:    v_mul_hi_u32 v2, v11, v2
-; CHECK-NEXT:    v_add_i32_e32 v3, vcc, v3, v7
-; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v3, vcc, v3, v4
-; CHECK-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc
-; CHECK-NEXT:    v_mul_lo_u32 v4, v11, v6
-; CHECK-NEXT:    v_add_i32_e32 v3, vcc, v7, v3
-; CHECK-NEXT:    v_mul_hi_u32 v7, v8, v6
-; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v4, v2
-; CHECK-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
+; CHECK-NEXT:    v_ashrrev_i32_e32 v0, 31, v3
+; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v2, v0
+; CHECK-NEXT:    v_addc_u32_e32 v2, vcc, v3, v0, vcc
+; CHECK-NEXT:    v_xor_b32_e32 v3, v1, v0
+; CHECK-NEXT:    v_xor_b32_e32 v6, v2, v0
+; CHECK-NEXT:    v_cvt_f32_u32_e32 v0, v3
+; CHECK-NEXT:    v_cvt_f32_u32_e32 v1, v6
+; CHECK-NEXT:    v_sub_i32_e32 v8, vcc, 0, v3
+; CHECK-NEXT:    v_subb_u32_e32 v9, vcc, 0, v6, vcc
+; CHECK-NEXT:    v_mac_f32_e32 v0, 0x4f800000, v1
+; CHECK-NEXT:    v_rcp_iflag_f32_e32 v0, v0
+; CHECK-NEXT:    v_mul_f32_e32 v0, 0x5f7ffffc, v0
+; CHECK-NEXT:    v_mul_f32_e32 v1, 0x2f800000, v0
+; CHECK-NEXT:    v_trunc_f32_e32 v2, v1
+; CHECK-NEXT:    v_mac_f32_e32 v0, 0xcf800000, v2
+; CHECK-NEXT:    v_cvt_u32_f32_e32 v7, v0
+; CHECK-NEXT:    v_cvt_u32_f32_e32 v10, v2
+; CHECK-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v8, v7, 0
+; CHECK-NEXT:    v_mad_u64_u32 v[1:2], s[4:5], v8, v10, v[1:2]
+; CHECK-NEXT:    v_mul_hi_u32 v11, v7, v0
+; CHECK-NEXT:    v_mad_u64_u32 v[1:2], s[4:5], v9, v7, v[1:2]
+; CHECK-NEXT:    v_mul_lo_u32 v2, v10, v0
+; CHECK-NEXT:    v_mul_hi_u32 v0, v10, v0
+; CHECK-NEXT:    v_mul_lo_u32 v12, v7, v1
+; CHECK-NEXT:    v_mul_lo_u32 v13, v10, v1
+; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v12
+; CHECK-NEXT:    v_addc_u32_e32 v2, vcc, v2, v11, vcc
+; CHECK-NEXT:    v_mul_hi_u32 v11, v7, v1
+; CHECK-NEXT:    v_addc_u32_e64 v2, s[4:5], 0, 0, vcc
+; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v13, v0
+; CHECK-NEXT:    v_mul_hi_u32 v1, v10, v1
+; CHECK-NEXT:    v_addc_u32_e64 v0, s[4:5], v0, v11, vcc
+; CHECK-NEXT:    v_addc_u32_e64 v11, s[4:5], 0, 0, s[4:5]
+; CHECK-NEXT:    v_addc_u32_e32 v0, vcc, v0, v2, vcc
+; CHECK-NEXT:    v_addc_u32_e32 v2, vcc, 0, v11, vcc
+; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v1, v2
+; CHECK-NEXT:    v_add_i32_e32 v7, vcc, v7, v0
+; CHECK-NEXT:    v_addc_u32_e32 v10, vcc, v10, v1, vcc
+; CHECK-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v8, v7, 0
+; CHECK-NEXT:    v_mad_u64_u32 v[1:2], s[4:5], v8, v10, v[1:2]
+; CHECK-NEXT:    v_ashrrev_i32_e32 v8, 31, v5
+; CHECK-NEXT:    v_mul_hi_u32 v11, v7, v0
+; CHECK-NEXT:    v_mad_u64_u32 v[1:2], s[4:5], v9, v7, v[1:2]
+; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v4, v8
+; CHECK-NEXT:    v_addc_u32_e32 v4, vcc, v5, v8, vcc
+; CHECK-NEXT:    v_xor_b32_e32 v5, v2, v8
+; CHECK-NEXT:    v_mul_lo_u32 v2, v10, v0
+; CHECK-NEXT:    v_mul_lo_u32 v9, v7, v1
+; CHECK-NEXT:    v_mul_hi_u32 v0, v10, v0
+; CHECK-NEXT:    v_xor_b32_e32 v4, v4, v8
+; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v9
+; CHECK-NEXT:    v_addc_u32_e32 v2, vcc, v2, v11, vcc
+; CHECK-NEXT:    v_mul_lo_u32 v2, v10, v1
+; CHECK-NEXT:    v_mul_hi_u32 v11, v7, v1
+; CHECK-NEXT:    v_addc_u32_e64 v9, s[4:5], 0, 0, vcc
+; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v2, v0
+; CHECK-NEXT:    v_mul_hi_u32 v1, v10, v1
+; CHECK-NEXT:    v_addc_u32_e64 v0, s[4:5], v0, v11, vcc
+; CHECK-NEXT:    v_addc_u32_e64 v2, s[4:5], 0, 0, s[4:5]
+; CHECK-NEXT:    v_addc_u32_e32 v0, vcc, v0, v9, vcc
+; CHECK-NEXT:    v_addc_u32_e32 v2, vcc, 0, v2, vcc
+; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v1, v2
+; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v7, v0
+; CHECK-NEXT:    v_addc_u32_e32 v1, vcc, v10, v1, vcc
+; CHECK-NEXT:    v_mul_lo_u32 v2, v4, v0
+; CHECK-NEXT:    v_mul_lo_u32 v7, v5, v1
+; CHECK-NEXT:    v_mul_hi_u32 v9, v5, v0
+; CHECK-NEXT:    v_mul_hi_u32 v0, v4, v0
 ; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v7
-; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v4, v7
-; CHECK-NEXT:    v_mul_hi_u32 v6, v11, v6
-; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v3
-; CHECK-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v3, vcc, v4, v3
-; CHECK-NEXT:    v_add_i32_e32 v3, vcc, v6, v3
-; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v8, v2
-; CHECK-NEXT:    v_addc_u32_e32 v3, vcc, v11, v3, vcc
-; CHECK-NEXT:    v_mul_lo_u32 v4, v10, v2
-; CHECK-NEXT:    v_mul_lo_u32 v6, v5, v3
-; CHECK-NEXT:    v_mul_hi_u32 v7, v5, v2
-; CHECK-NEXT:    v_mul_hi_u32 v2, v10, v2
-; CHECK-NEXT:    v_mul_hi_u32 v8, v10, v3
-; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v4, v6
-; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v4, v7
-; CHECK-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
-; CHECK-NEXT:    v_mul_lo_u32 v7, v10, v3
-; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v6, v4
-; CHECK-NEXT:    v_mul_hi_u32 v6, v5, v3
-; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v7, v2
-; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v6
-; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v7, v6
-; CHECK-NEXT:    v_add_i32_e32 v7, vcc, v2, v4
-; CHECK-NEXT:    v_mad_u64_u32 v[2:3], s[4:5], v0, v7, 0
-; CHECK-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v6, v4
-; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v8, v4
-; CHECK-NEXT:    v_mad_u64_u32 v[3:4], s[4:5], v0, v4, v[3:4]
-; CHECK-NEXT:    v_sub_i32_e32 v2, vcc, v5, v2
-; CHECK-NEXT:    v_mad_u64_u32 v[3:4], s[4:5], v1, v7, v[3:4]
-; CHECK-NEXT:    v_subb_u32_e64 v4, s[4:5], v10, v3, vcc
-; CHECK-NEXT:    v_sub_i32_e64 v3, s[4:5], v10, v3
-; CHECK-NEXT:    v_cmp_ge_u32_e64 s[4:5], v4, v1
+; CHECK-NEXT:    v_addc_u32_e32 v2, vcc, v2, v9, vcc
+; CHECK-NEXT:    v_mul_lo_u32 v2, v4, v1
+; CHECK-NEXT:    v_mul_hi_u32 v9, v5, v1
+; CHECK-NEXT:    v_addc_u32_e64 v7, s[4:5], 0, 0, vcc
+; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v2, v0
+; CHECK-NEXT:    v_addc_u32_e64 v0, s[4:5], v0, v9, vcc
+; CHECK-NEXT:    v_addc_u32_e32 v7, vcc, v0, v7, vcc
+; CHECK-NEXT:    v_mul_hi_u32 v9, v4, v1
+; CHECK-NEXT:    v_addc_u32_e64 v2, s[4:5], 0, 0, s[4:5]
+; CHECK-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v3, v7, 0
+; CHECK-NEXT:    v_addc_u32_e32 v2, vcc, 0, v2, vcc
+; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v9, v2
+; CHECK-NEXT:    v_mad_u64_u32 v[1:2], s[4:5], v3, v2, v[1:2]
+; CHECK-NEXT:    v_sub_i32_e32 v0, vcc, v5, v0
+; CHECK-NEXT:    v_mad_u64_u32 v[1:2], s[4:5], v6, v7, v[1:2]
+; CHECK-NEXT:    v_subb_u32_e64 v2, s[4:5], v4, v1, vcc
+; CHECK-NEXT:    v_sub_i32_e64 v1, s[4:5], v4, v1
+; CHECK-NEXT:    v_cmp_ge_u32_e64 s[4:5], v2, v6
+; CHECK-NEXT:    v_cndmask_b32_e64 v4, 0, -1, s[4:5]
+; CHECK-NEXT:    v_cmp_ge_u32_e64 s[4:5], v0, v3
 ; CHECK-NEXT:    v_cndmask_b32_e64 v5, 0, -1, s[4:5]
-; CHECK-NEXT:    v_cmp_ge_u32_e64 s[4:5], v2, v0
-; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, -1, s[4:5]
-; CHECK-NEXT:    v_cmp_eq_u32_e64 s[4:5], v4, v1
-; CHECK-NEXT:    v_subb_u32_e32 v3, vcc, v3, v1, vcc
-; CHECK-NEXT:    v_cndmask_b32_e64 v5, v5, v6, s[4:5]
-; CHECK-NEXT:    v_sub_i32_e32 v6, vcc, v2, v0
-; CHECK-NEXT:    v_subbrev_u32_e64 v7, s[4:5], 0, v3, vcc
-; CHECK-NEXT:    v_cmp_ge_u32_e64 s[4:5], v7, v1
-; CHECK-NEXT:    v_cndmask_b32_e64 v8, 0, -1, s[4:5]
-; CHECK-NEXT:    v_cmp_ge_u32_e64 s[4:5], v6, v0
+; CHECK-NEXT:    v_cmp_eq_u32_e64 s[4:5], v2, v6
+; CHECK-NEXT:    v_subb_u32_e32 v1, vcc, v1, v6, vcc
+; CHECK-NEXT:    v_cndmask_b32_e64 v4, v4, v5, s[4:5]
+; CHECK-NEXT:    v_sub_i32_e32 v5, vcc, v0, v3
+; CHECK-NEXT:    v_subbrev_u32_e64 v7, s[4:5], 0, v1, vcc
+; CHECK-NEXT:    v_cmp_ge_u32_e64 s[4:5], v7, v6
+; CHECK-NEXT:    v_cndmask_b32_e64 v9, 0, -1, s[4:5]
+; CHECK-NEXT:    v_cmp_ge_u32_e64 s[4:5], v5, v3
+; CHECK-NEXT:    v_subb_u32_e32 v1, vcc, v1, v6, vcc
 ; CHECK-NEXT:    v_cndmask_b32_e64 v10, 0, -1, s[4:5]
-; CHECK-NEXT:    v_cmp_eq_u32_e64 s[4:5], v7, v1
-; CHECK-NEXT:    v_subb_u32_e32 v1, vcc, v3, v1, vcc
-; CHECK-NEXT:    v_sub_i32_e32 v0, vcc, v6, v0
-; CHECK-NEXT:    v_cndmask_b32_e64 v8, v8, v10, s[4:5]
+; CHECK-NEXT:    v_cmp_eq_u32_e64 s[4:5], v7, v6
+; CHECK-NEXT:    v_sub_i32_e32 v3, vcc, v5, v3
+; CHECK-NEXT:    v_cndmask_b32_e64 v9, v9, v10, s[4:5]
 ; CHECK-NEXT:    v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
-; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v8
-; CHECK-NEXT:    v_cndmask_b32_e32 v0, v6, v0, vcc
+; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v9
+; CHECK-NEXT:    v_cndmask_b32_e32 v3, v5, v3, vcc
 ; CHECK-NEXT:    v_cndmask_b32_e32 v1, v7, v1, vcc
-; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v5
-; CHECK-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
-; CHECK-NEXT:    v_cndmask_b32_e32 v1, v4, v1, vcc
-; CHECK-NEXT:    v_xor_b32_e32 v0, v0, v9
-; CHECK-NEXT:    v_xor_b32_e32 v1, v1, v9
-; CHECK-NEXT:    v_sub_i32_e32 v0, vcc, v0, v9
-; CHECK-NEXT:    v_subb_u32_e32 v1, vcc, v1, v9, vcc
+; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v4
+; CHECK-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
+; CHECK-NEXT:    v_cndmask_b32_e32 v1, v2, v1, vcc
+; CHECK-NEXT:    v_xor_b32_e32 v0, v0, v8
+; CHECK-NEXT:    v_xor_b32_e32 v1, v1, v8
+; CHECK-NEXT:    v_sub_i32_e32 v0, vcc, v0, v8
+; CHECK-NEXT:    v_subb_u32_e32 v1, vcc, v1, v8, vcc
 ; CHECK-NEXT:    ; implicit-def: $vgpr2
 ; CHECK-NEXT:    ; implicit-def: $vgpr4
 ; CHECK-NEXT:    s_andn2_saveexec_b64 s[4:5], s[6:7]
@@ -229,18 +214,13 @@ define amdgpu_ps i64 @s_srem_i64(i64 inreg %num, i64 inreg %den) {
 ; CHECK-NEXT:    v_mul_hi_u32 v8, v3, v1
 ; CHECK-NEXT:    v_mul_hi_u32 v1, v4, v1
 ; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v6
-; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v5
-; CHECK-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v6, v2
+; CHECK-NEXT:    v_addc_u32_e32 v2, vcc, v2, v5, vcc
+; CHECK-NEXT:    v_addc_u32_e64 v2, s[0:1], 0, 0, vcc
 ; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v7, v0
-; CHECK-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v8
-; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v5, v6
-; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
-; CHECK-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v5, v2
+; CHECK-NEXT:    v_addc_u32_e64 v0, s[0:1], v0, v8, vcc
+; CHECK-NEXT:    v_addc_u32_e64 v5, s[0:1], 0, 0, s[0:1]
+; CHECK-NEXT:    v_addc_u32_e32 v0, vcc, v0, v2, vcc
+; CHECK-NEXT:    v_addc_u32_e32 v2, vcc, 0, v5, vcc
 ; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v1, v2
 ; CHECK-NEXT:    v_add_i32_e32 v3, vcc, v3, v0
 ; CHECK-NEXT:    v_addc_u32_e32 v4, vcc, v4, v1, vcc
@@ -252,21 +232,16 @@ define amdgpu_ps i64 @s_srem_i64(i64 inreg %num, i64 inreg %den) {
 ; CHECK-NEXT:    v_mul_hi_u32 v0, v4, v0
 ; CHECK-NEXT:    v_mul_lo_u32 v5, v3, v1
 ; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v5
-; CHECK-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v6
-; CHECK-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; CHECK-NEXT:    v_mul_lo_u32 v6, v4, v1
-; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v5, v2
-; CHECK-NEXT:    v_mul_hi_u32 v5, v3, v1
-; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v6, v0
-; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v5
-; CHECK-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v6, v5
+; CHECK-NEXT:    v_addc_u32_e32 v2, vcc, v2, v6, vcc
+; CHECK-NEXT:    v_mul_lo_u32 v2, v4, v1
+; CHECK-NEXT:    v_mul_hi_u32 v6, v3, v1
+; CHECK-NEXT:    v_addc_u32_e64 v5, s[0:1], 0, 0, vcc
+; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v2, v0
 ; CHECK-NEXT:    v_mul_hi_u32 v1, v4, v1
-; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
-; CHECK-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v5, v2
+; CHECK-NEXT:    v_addc_u32_e64 v0, s[0:1], v0, v6, vcc
+; CHECK-NEXT:    v_addc_u32_e64 v2, s[0:1], 0, 0, s[0:1]
+; CHECK-NEXT:    v_addc_u32_e32 v0, vcc, v0, v5, vcc
+; CHECK-NEXT:    v_addc_u32_e32 v2, vcc, 0, v2, vcc
 ; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v1, v2
 ; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v3, v0
 ; CHECK-NEXT:    v_addc_u32_e32 v1, vcc, v4, v1, vcc
@@ -274,41 +249,36 @@ define amdgpu_ps i64 @s_srem_i64(i64 inreg %num, i64 inreg %den) {
 ; CHECK-NEXT:    v_mul_lo_u32 v3, s10, v1
 ; CHECK-NEXT:    v_mul_hi_u32 v4, s10, v0
 ; CHECK-NEXT:    v_mul_hi_u32 v0, s11, v0
-; CHECK-NEXT:    v_mul_hi_u32 v5, s11, v1
+; CHECK-NEXT:    v_mov_b32_e32 v5, s11
 ; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v3
-; CHECK-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v4
-; CHECK-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; CHECK-NEXT:    v_mul_lo_u32 v4, s11, v1
-; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v3, v2
-; CHECK-NEXT:    v_mul_hi_u32 v3, s10, v1
-; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v4, v0
-; CHECK-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v3
-; CHECK-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v3, vcc, v4, v3
-; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v0, v2
-; CHECK-NEXT:    v_mad_u64_u32 v[0:1], s[0:1], s8, v4, 0
-; CHECK-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v3, v2
-; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v5, v2
+; CHECK-NEXT:    v_addc_u32_e32 v2, vcc, v2, v4, vcc
+; CHECK-NEXT:    v_mul_lo_u32 v2, s11, v1
+; CHECK-NEXT:    v_mul_hi_u32 v4, s10, v1
+; CHECK-NEXT:    v_addc_u32_e64 v3, s[0:1], 0, 0, vcc
+; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v2, v0
+; CHECK-NEXT:    v_addc_u32_e64 v0, s[0:1], v0, v4, vcc
+; CHECK-NEXT:    v_addc_u32_e32 v3, vcc, v0, v3, vcc
+; CHECK-NEXT:    v_mul_hi_u32 v4, s11, v1
+; CHECK-NEXT:    v_addc_u32_e64 v2, s[0:1], 0, 0, s[0:1]
+; CHECK-NEXT:    v_mad_u64_u32 v[0:1], s[0:1], s8, v3, 0
+; CHECK-NEXT:    v_addc_u32_e32 v2, vcc, 0, v2, vcc
+; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v4, v2
 ; CHECK-NEXT:    v_mad_u64_u32 v[1:2], s[0:1], s8, v2, v[1:2]
-; CHECK-NEXT:    v_mov_b32_e32 v5, s11
 ; CHECK-NEXT:    v_sub_i32_e32 v0, vcc, s10, v0
-; CHECK-NEXT:    v_mad_u64_u32 v[1:2], s[0:1], s9, v4, v[1:2]
-; CHECK-NEXT:    v_mov_b32_e32 v3, s9
+; CHECK-NEXT:    v_mad_u64_u32 v[1:2], s[0:1], s9, v3, v[1:2]
+; CHECK-NEXT:    v_mov_b32_e32 v4, s9
 ; CHECK-NEXT:    v_subb_u32_e64 v2, s[0:1], v5, v1, vcc
 ; CHECK-NEXT:    v_sub_i32_e64 v1, s[0:1], s11, v1
-; CHECK-NEXT:    v_subb_u32_e32 v1, vcc, v1, v3, vcc
 ; CHECK-NEXT:    v_cmp_le_u32_e64 s[0:1], s9, v2
-; CHECK-NEXT:    v_subrev_i32_e32 v3, vcc, s8, v0
-; CHECK-NEXT:    v_cndmask_b32_e64 v4, 0, -1, s[0:1]
+; CHECK-NEXT:    v_cndmask_b32_e64 v3, 0, -1, s[0:1]
 ; CHECK-NEXT:    v_cmp_le_u32_e64 s[0:1], s8, v0
-; CHECK-NEXT:    v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
 ; CHECK-NEXT:    v_cndmask_b32_e64 v5, 0, -1, s[0:1]
 ; CHECK-NEXT:    v_cmp_eq_u32_e64 s[0:1], s9, v2
+; CHECK-NEXT:    v_subb_u32_e32 v1, vcc, v1, v4, vcc
+; CHECK-NEXT:    v_cndmask_b32_e64 v2, v3, v5, s[0:1]
+; CHECK-NEXT:    v_subrev_i32_e32 v3, vcc, s8, v0
+; CHECK-NEXT:    v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
 ; CHECK-NEXT:    v_cmp_le_u32_e32 vcc, s9, v1
-; CHECK-NEXT:    v_cndmask_b32_e64 v2, v4, v5, s[0:1]
 ; CHECK-NEXT:    v_cndmask_b32_e64 v4, 0, -1, vcc
 ; CHECK-NEXT:    v_cmp_le_u32_e32 vcc, s8, v3
 ; CHECK-NEXT:    v_cndmask_b32_e64 v5, 0, -1, vcc
@@ -394,21 +364,16 @@ define <2 x i64> @v_srem_v2i64(<2 x i64> %num, <2 x i64> %den) {
 ; GISEL-NEXT:    v_mul_hi_u32 v9, v15, v9
 ; GISEL-NEXT:    v_mul_lo_u32 v11, v12, v10
 ; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v11
-; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v16
-; GISEL-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
-; GISEL-NEXT:    v_mul_lo_u32 v16, v15, v10
-; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v11, v4
-; GISEL-NEXT:    v_mul_hi_u32 v11, v12, v10
-; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v16, v9
-; GISEL-NEXT:    v_cndmask_b32_e64 v16, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v11
-; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v16, v11
+; GISEL-NEXT:    v_addc_u32_e32 v4, vcc, v4, v16, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v4, v15, v10
+; GISEL-NEXT:    v_mul_hi_u32 v16, v12, v10
+; GISEL-NEXT:    v_addc_u32_e64 v11, s[4:5], 0, 0, vcc
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v9
 ; GISEL-NEXT:    v_mul_hi_u32 v10, v15, v10
-; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v9, v4
-; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v11, v9
+; GISEL-NEXT:    v_addc_u32_e64 v4, s[4:5], v4, v16, vcc
+; GISEL-NEXT:    v_addc_u32_e64 v9, s[4:5], 0, 0, s[4:5]
+; GISEL-NEXT:    v_addc_u32_e32 v4, vcc, v4, v11, vcc
+; GISEL-NEXT:    v_addc_u32_e32 v9, vcc, 0, v9, vcc
 ; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v10, v9
 ; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v12, v4
 ; GISEL-NEXT:    v_addc_u32_e32 v15, vcc, v15, v9, vcc
@@ -424,23 +389,18 @@ define <2 x i64> @v_srem_v2i64(<2 x i64> %num, <2 x i64> %den) {
 ; GISEL-NEXT:    v_mul_lo_u32 v13, v12, v10
 ; GISEL-NEXT:    v_xor_b32_e32 v14, v1, v4
 ; GISEL-NEXT:    v_mul_hi_u32 v1, v12, v9
-; GISEL-NEXT:    v_mul_hi_u32 v9, v15, v9
 ; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v13
-; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v1
-; GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
-; GISEL-NEXT:    v_mul_lo_u32 v1, v15, v10
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v13, v0
+; GISEL-NEXT:    v_addc_u32_e32 v0, vcc, v0, v1, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v0, v15, v10
+; GISEL-NEXT:    v_mul_hi_u32 v1, v15, v9
 ; GISEL-NEXT:    v_mul_hi_u32 v13, v12, v10
-; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v1, v9
-; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v1, v13
-; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v13
+; GISEL-NEXT:    v_addc_u32_e64 v9, s[4:5], 0, 0, vcc
+; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v1
 ; GISEL-NEXT:    v_mul_hi_u32 v10, v15, v10
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v1, v0
-; GISEL-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v9, v1
+; GISEL-NEXT:    v_addc_u32_e64 v0, s[4:5], v0, v13, vcc
+; GISEL-NEXT:    v_addc_u32_e64 v1, s[4:5], 0, 0, s[4:5]
+; GISEL-NEXT:    v_addc_u32_e32 v0, vcc, v0, v9, vcc
+; GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
 ; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v10, v1
 ; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v12, v0
 ; GISEL-NEXT:    v_addc_u32_e32 v1, vcc, v15, v1, vcc
@@ -448,159 +408,139 @@ define <2 x i64> @v_srem_v2i64(<2 x i64> %num, <2 x i64> %den) {
 ; GISEL-NEXT:    v_mul_lo_u32 v10, v11, v1
 ; GISEL-NEXT:    v_mul_hi_u32 v12, v11, v0
 ; GISEL-NEXT:    v_mul_hi_u32 v0, v14, v0
-; GISEL-NEXT:    v_mul_hi_u32 v13, v14, v1
 ; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v10
-; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v12
-; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
-; GISEL-NEXT:    v_mul_lo_u32 v12, v14, v1
-; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v10, v9
-; GISEL-NEXT:    v_mul_hi_u32 v10, v11, v1
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v12, v0
-; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v10
-; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v12, v10
-; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v0, v9
+; GISEL-NEXT:    v_addc_u32_e32 v9, vcc, v9, v12, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v9, v14, v1
+; GISEL-NEXT:    v_mul_hi_u32 v12, v11, v1
+; GISEL-NEXT:    v_addc_u32_e64 v10, s[4:5], 0, 0, vcc
+; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v9, v0
+; GISEL-NEXT:    v_addc_u32_e64 v0, s[4:5], v0, v12, vcc
+; GISEL-NEXT:    v_addc_u32_e32 v12, vcc, v0, v10, vcc
+; GISEL-NEXT:    v_mul_hi_u32 v10, v14, v1
+; GISEL-NEXT:    v_addc_u32_e64 v9, s[4:5], 0, 0, s[4:5]
 ; GISEL-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v5, v12, 0
-; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
+; GISEL-NEXT:    v_addc_u32_e32 v9, vcc, 0, v9, vcc
 ; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v10, v9
-; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v13, v9
 ; GISEL-NEXT:    v_mad_u64_u32 v[9:10], s[4:5], v5, v9, v[1:2]
+; GISEL-NEXT:    v_sub_i32_e32 v11, vcc, v11, v0
 ; GISEL-NEXT:    v_mad_u64_u32 v[9:10], s[4:5], v8, v12, v[9:10]
-; GISEL-NEXT:    v_sub_i32_e32 v10, vcc, v11, v0
-; GISEL-NEXT:    v_subb_u32_e64 v11, s[4:5], v14, v9, vcc
+; GISEL-NEXT:    v_subb_u32_e64 v12, s[4:5], v14, v9, vcc
 ; GISEL-NEXT:    v_sub_i32_e64 v0, s[4:5], v14, v9
-; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v11, v8
+; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v12, v8
 ; GISEL-NEXT:    v_cndmask_b32_e64 v1, 0, -1, s[4:5]
-; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v10, v5
+; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v11, v5
 ; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, -1, s[4:5]
-; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v11, v8
-; GISEL-NEXT:    v_cndmask_b32_e64 v12, v1, v9, s[4:5]
-; GISEL-NEXT:    v_subb_u32_e32 v9, vcc, v0, v8, vcc
+; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v12, v8
+; GISEL-NEXT:    v_subb_u32_e32 v14, vcc, v0, v8, vcc
 ; GISEL-NEXT:    v_ashrrev_i32_e32 v0, 31, v7
+; GISEL-NEXT:    v_cndmask_b32_e64 v13, v1, v9, s[4:5]
 ; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v6, v0
 ; GISEL-NEXT:    v_addc_u32_e32 v7, vcc, v7, v0, vcc
 ; GISEL-NEXT:    v_xor_b32_e32 v6, v1, v0
 ; GISEL-NEXT:    v_xor_b32_e32 v7, v7, v0
 ; GISEL-NEXT:    v_cvt_f32_u32_e32 v0, v6
 ; GISEL-NEXT:    v_cvt_f32_u32_e32 v1, v7
-; GISEL-NEXT:    v_sub_i32_e32 v13, vcc, v10, v5
-; GISEL-NEXT:    v_subbrev_u32_e64 v14, s[4:5], 0, v9, vcc
+; GISEL-NEXT:    v_sub_i32_e32 v15, vcc, v11, v5
+; GISEL-NEXT:    v_subbrev_u32_e64 v16, s[4:5], 0, v14, vcc
 ; GISEL-NEXT:    v_mac_f32_e32 v0, 0x4f800000, v1
 ; GISEL-NEXT:    v_rcp_iflag_f32_e32 v0, v0
-; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v14, v8
-; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, -1, s[4:5]
-; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v13, v5
-; GISEL-NEXT:    v_cndmask_b32_e64 v1, 0, -1, s[4:5]
-; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v14, v8
+; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v16, v8
+; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, -1, s[4:5]
+; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v15, v5
 ; GISEL-NEXT:    v_mul_f32_e32 v0, 0x5f7ffffc, v0
-; GISEL-NEXT:    v_cndmask_b32_e64 v15, v15, v1, s[4:5]
 ; GISEL-NEXT:    v_mul_f32_e32 v1, 0x2f800000, v0
-; GISEL-NEXT:    v_trunc_f32_e32 v16, v1
-; GISEL-NEXT:    v_mac_f32_e32 v0, 0xcf800000, v16
-; GISEL-NEXT:    v_cvt_u32_f32_e32 v17, v0
-; GISEL-NEXT:    v_sub_i32_e64 v18, s[4:5], 0, v6
-; GISEL-NEXT:    v_subb_u32_e64 v19, s[4:5], 0, v7, s[4:5]
-; GISEL-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v18, v17, 0
-; GISEL-NEXT:    v_cvt_u32_f32_e32 v16, v16
-; GISEL-NEXT:    v_subb_u32_e32 v20, vcc, v9, v8, vcc
-; GISEL-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], v18, v16, v[1:2]
-; GISEL-NEXT:    v_sub_i32_e32 v1, vcc, v13, v5
-; GISEL-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], v19, v17, v[8:9]
-; GISEL-NEXT:    v_subbrev_u32_e32 v5, vcc, 0, v20, vcc
-; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v15
-; GISEL-NEXT:    v_cndmask_b32_e32 v9, v13, v1, vcc
-; GISEL-NEXT:    v_mul_lo_u32 v1, v16, v0
-; GISEL-NEXT:    v_mul_lo_u32 v13, v17, v8
-; GISEL-NEXT:    v_cndmask_b32_e32 v5, v14, v5, vcc
-; GISEL-NEXT:    v_mul_hi_u32 v14, v17, v0
-; GISEL-NEXT:    v_mul_hi_u32 v0, v16, v0
-; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v1, v13
-; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v1, v14
-; GISEL-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; GISEL-NEXT:    v_mul_lo_u32 v14, v16, v8
-; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v13, v1
-; GISEL-NEXT:    v_mul_hi_u32 v13, v17, v8
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v14, v0
-; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v13
-; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v14, v13
-; GISEL-NEXT:    v_mul_hi_u32 v8, v16, v8
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v1
-; GISEL-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v13, v1
-; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v8, v1
-; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v17, v0
-; GISEL-NEXT:    v_addc_u32_e32 v14, vcc, v16, v1, vcc
-; GISEL-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v18, v13, 0
-; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v12
-; GISEL-NEXT:    v_cndmask_b32_e32 v10, v10, v9, vcc
-; GISEL-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], v18, v14, v[1:2]
-; GISEL-NEXT:    v_xor_b32_e32 v1, v10, v4
-; GISEL-NEXT:    v_ashrrev_i32_e32 v10, 31, v3
-; GISEL-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], v19, v13, v[8:9]
+; GISEL-NEXT:    v_trunc_f32_e32 v17, v1
+; GISEL-NEXT:    v_mac_f32_e32 v0, 0xcf800000, v17
+; GISEL-NEXT:    v_cvt_u32_f32_e32 v18, v0
+; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, -1, s[4:5]
+; GISEL-NEXT:    v_sub_i32_e64 v19, s[4:5], 0, v6
+; GISEL-NEXT:    v_subb_u32_e64 v20, s[4:5], 0, v7, s[4:5]
+; GISEL-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v19, v18, 0
+; GISEL-NEXT:    v_cvt_u32_f32_e32 v17, v17
+; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v16, v8
+; GISEL-NEXT:    v_cndmask_b32_e64 v21, v9, v10, s[4:5]
+; GISEL-NEXT:    v_mad_u64_u32 v[9:10], s[4:5], v19, v17, v[1:2]
+; GISEL-NEXT:    v_subb_u32_e32 v1, vcc, v14, v8, vcc
+; GISEL-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], v20, v18, v[9:10]
+; GISEL-NEXT:    v_sub_i32_e32 v5, vcc, v15, v5
+; GISEL-NEXT:    v_subbrev_u32_e32 v9, vcc, 0, v1, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v1, v17, v0
+; GISEL-NEXT:    v_mul_lo_u32 v10, v18, v8
+; GISEL-NEXT:    v_mul_hi_u32 v14, v18, v0
+; GISEL-NEXT:    v_mul_hi_u32 v0, v17, v0
+; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v21
+; GISEL-NEXT:    v_add_i32_e64 v1, s[4:5], v1, v10
+; GISEL-NEXT:    v_addc_u32_e64 v1, s[4:5], v1, v14, s[4:5]
+; GISEL-NEXT:    v_mul_lo_u32 v1, v17, v8
+; GISEL-NEXT:    v_mul_hi_u32 v14, v18, v8
+; GISEL-NEXT:    v_addc_u32_e64 v10, s[4:5], 0, 0, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v0, s[4:5], v1, v0
+; GISEL-NEXT:    v_mul_hi_u32 v8, v17, v8
+; GISEL-NEXT:    v_addc_u32_e64 v0, s[6:7], v0, v14, s[4:5]
+; GISEL-NEXT:    v_addc_u32_e64 v1, s[6:7], 0, 0, s[6:7]
+; GISEL-NEXT:    v_addc_u32_e64 v0, s[4:5], v0, v10, s[4:5]
+; GISEL-NEXT:    v_addc_u32_e64 v1, s[4:5], 0, v1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v1, s[4:5], v8, v1
+; GISEL-NEXT:    v_add_i32_e64 v10, s[4:5], v18, v0
+; GISEL-NEXT:    v_addc_u32_e64 v14, s[4:5], v17, v1, s[4:5]
+; GISEL-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v19, v10, 0
+; GISEL-NEXT:    v_cndmask_b32_e32 v5, v15, v5, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v15, v16, v9, vcc
+; GISEL-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], v19, v14, v[1:2]
+; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v13
 ; GISEL-NEXT:    v_cndmask_b32_e32 v5, v11, v5, vcc
-; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v2, v10
-; GISEL-NEXT:    v_addc_u32_e32 v3, vcc, v3, v10, vcc
-; GISEL-NEXT:    v_xor_b32_e32 v11, v2, v10
+; GISEL-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], v20, v10, v[8:9]
+; GISEL-NEXT:    v_ashrrev_i32_e32 v11, 31, v3
+; GISEL-NEXT:    v_cndmask_b32_e32 v1, v12, v15, vcc
+; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v2, v11
+; GISEL-NEXT:    v_addc_u32_e32 v3, vcc, v3, v11, vcc
+; GISEL-NEXT:    v_xor_b32_e32 v12, v2, v11
 ; GISEL-NEXT:    v_mul_lo_u32 v2, v14, v0
-; GISEL-NEXT:    v_mul_lo_u32 v9, v13, v8
-; GISEL-NEXT:    v_xor_b32_e32 v12, v3, v10
-; GISEL-NEXT:    v_mul_hi_u32 v3, v13, v0
+; GISEL-NEXT:    v_mul_lo_u32 v9, v10, v8
+; GISEL-NEXT:    v_xor_b32_e32 v13, v3, v11
+; GISEL-NEXT:    v_mul_hi_u32 v3, v10, v0
 ; GISEL-NEXT:    v_mul_hi_u32 v0, v14, v0
 ; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v2, v9
-; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v2, v3
-; GISEL-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; GISEL-NEXT:    v_mul_lo_u32 v3, v14, v8
-; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v9, v2
-; GISEL-NEXT:    v_mul_hi_u32 v9, v13, v8
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v3, v0
-; GISEL-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v9
-; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v3, v9
+; GISEL-NEXT:    v_addc_u32_e32 v2, vcc, v2, v3, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v2, v14, v8
+; GISEL-NEXT:    v_mul_hi_u32 v9, v10, v8
+; GISEL-NEXT:    v_addc_u32_e64 v3, s[4:5], 0, 0, vcc
+; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v2, v0
 ; GISEL-NEXT:    v_mul_hi_u32 v8, v14, v8
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
-; GISEL-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v3, v2
+; GISEL-NEXT:    v_addc_u32_e64 v0, s[4:5], v0, v9, vcc
+; GISEL-NEXT:    v_addc_u32_e64 v2, s[4:5], 0, 0, s[4:5]
+; GISEL-NEXT:    v_addc_u32_e32 v0, vcc, v0, v3, vcc
+; GISEL-NEXT:    v_addc_u32_e32 v2, vcc, 0, v2, vcc
 ; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v8, v2
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v13, v0
+; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v10, v0
 ; GISEL-NEXT:    v_addc_u32_e32 v2, vcc, v14, v2, vcc
-; GISEL-NEXT:    v_mul_lo_u32 v3, v12, v0
-; GISEL-NEXT:    v_mul_lo_u32 v8, v11, v2
-; GISEL-NEXT:    v_mul_hi_u32 v9, v11, v0
-; GISEL-NEXT:    v_mul_hi_u32 v0, v12, v0
-; GISEL-NEXT:    v_xor_b32_e32 v5, v5, v4
+; GISEL-NEXT:    v_mul_lo_u32 v3, v13, v0
+; GISEL-NEXT:    v_mul_lo_u32 v8, v12, v2
+; GISEL-NEXT:    v_mul_hi_u32 v9, v12, v0
+; GISEL-NEXT:    v_mul_hi_u32 v0, v13, v0
+; GISEL-NEXT:    v_xor_b32_e32 v1, v1, v4
 ; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v3, v8
-; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v3, v9
-; GISEL-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc
-; GISEL-NEXT:    v_mul_lo_u32 v9, v12, v2
-; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v8, v3
-; GISEL-NEXT:    v_mul_hi_u32 v8, v11, v2
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v9, v0
-; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v8
-; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v9, v8
-; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v0, v3
+; GISEL-NEXT:    v_addc_u32_e32 v3, vcc, v3, v9, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v3, v13, v2
 ; GISEL-NEXT:    v_mul_hi_u32 v9, v12, v2
-; GISEL-NEXT:    v_mad_u64_u32 v[2:3], s[4:5], v6, v13, 0
-; GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v8, v0
-; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v9, v0
+; GISEL-NEXT:    v_addc_u32_e64 v8, s[4:5], 0, 0, vcc
+; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v3, v0
+; GISEL-NEXT:    v_addc_u32_e64 v0, s[4:5], v0, v9, vcc
+; GISEL-NEXT:    v_addc_u32_e64 v3, s[4:5], 0, 0, s[4:5]
+; GISEL-NEXT:    v_addc_u32_e32 v10, vcc, v0, v8, vcc
+; GISEL-NEXT:    v_addc_u32_e32 v0, vcc, 0, v3, vcc
+; GISEL-NEXT:    v_mul_hi_u32 v8, v13, v2
+; GISEL-NEXT:    v_mad_u64_u32 v[2:3], s[4:5], v6, v10, 0
+; GISEL-NEXT:    v_xor_b32_e32 v5, v5, v4
+; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v0
 ; GISEL-NEXT:    v_mov_b32_e32 v0, v3
 ; GISEL-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], v6, v8, v[0:1]
-; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v1, v4
-; GISEL-NEXT:    v_subb_u32_e32 v1, vcc, v5, v4, vcc
-; GISEL-NEXT:    v_mad_u64_u32 v[3:4], s[4:5], v7, v13, v[8:9]
-; GISEL-NEXT:    v_sub_i32_e32 v2, vcc, v11, v2
-; GISEL-NEXT:    v_subb_u32_e64 v4, s[4:5], v12, v3, vcc
-; GISEL-NEXT:    v_sub_i32_e64 v3, s[4:5], v12, v3
+; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v5, v4
+; GISEL-NEXT:    v_subb_u32_e32 v1, vcc, v1, v4, vcc
+; GISEL-NEXT:    v_mad_u64_u32 v[3:4], s[4:5], v7, v10, v[8:9]
+; GISEL-NEXT:    v_sub_i32_e32 v2, vcc, v12, v2
+; GISEL-NEXT:    v_subb_u32_e64 v4, s[4:5], v13, v3, vcc
+; GISEL-NEXT:    v_sub_i32_e64 v3, s[4:5], v13, v3
 ; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v4, v7
 ; GISEL-NEXT:    v_cndmask_b32_e64 v5, 0, -1, s[4:5]
 ; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v2, v6
@@ -611,24 +551,24 @@ define <2 x i64> @v_srem_v2i64(<2 x i64> %num, <2 x i64> %den) {
 ; GISEL-NEXT:    v_sub_i32_e32 v8, vcc, v2, v6
 ; GISEL-NEXT:    v_subbrev_u32_e64 v9, s[4:5], 0, v3, vcc
 ; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v9, v7
-; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, -1, s[4:5]
+; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, -1, s[4:5]
 ; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v8, v6
 ; GISEL-NEXT:    v_subb_u32_e32 v3, vcc, v3, v7, vcc
 ; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, -1, s[4:5]
 ; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v9, v7
 ; GISEL-NEXT:    v_sub_i32_e32 v6, vcc, v8, v6
-; GISEL-NEXT:    v_cndmask_b32_e64 v11, v11, v12, s[4:5]
+; GISEL-NEXT:    v_cndmask_b32_e64 v10, v10, v12, s[4:5]
 ; GISEL-NEXT:    v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
-; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v11
+; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v10
 ; GISEL-NEXT:    v_cndmask_b32_e32 v6, v8, v6, vcc
 ; GISEL-NEXT:    v_cndmask_b32_e32 v3, v9, v3, vcc
 ; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v5
 ; GISEL-NEXT:    v_cndmask_b32_e32 v2, v2, v6, vcc
 ; GISEL-NEXT:    v_cndmask_b32_e32 v3, v4, v3, vcc
-; GISEL-NEXT:    v_xor_b32_e32 v2, v2, v10
-; GISEL-NEXT:    v_xor_b32_e32 v3, v3, v10
-; GISEL-NEXT:    v_sub_i32_e32 v2, vcc, v2, v10
-; GISEL-NEXT:    v_subb_u32_e32 v3, vcc, v3, v10, vcc
+; GISEL-NEXT:    v_xor_b32_e32 v2, v2, v11
+; GISEL-NEXT:    v_xor_b32_e32 v3, v3, v11
+; GISEL-NEXT:    v_sub_i32_e32 v2, vcc, v2, v11
+; GISEL-NEXT:    v_subb_u32_e32 v3, vcc, v3, v11, vcc
 ; GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; CGP-LABEL: v_srem_v2i64:
@@ -646,131 +586,116 @@ define <2 x i64> @v_srem_v2i64(<2 x i64> %num, <2 x i64> %den) {
 ; CGP-NEXT:    s_xor_b64 s[6:7], exec, s[4:5]
 ; CGP-NEXT:    s_cbranch_execz .LBB2_2
 ; CGP-NEXT:  ; %bb.1:
-; CGP-NEXT:    v_ashrrev_i32_e32 v1, 31, v5
-; CGP-NEXT:    v_add_i32_e32 v0, vcc, v4, v1
-; CGP-NEXT:    v_addc_u32_e32 v2, vcc, v5, v1, vcc
-; CGP-NEXT:    v_xor_b32_e32 v0, v0, v1
-; CGP-NEXT:    v_xor_b32_e32 v1, v2, v1
-; CGP-NEXT:    v_cvt_f32_u32_e32 v2, v0
-; CGP-NEXT:    v_cvt_f32_u32_e32 v3, v1
-; CGP-NEXT:    v_sub_i32_e32 v12, vcc, 0, v0
-; CGP-NEXT:    v_subb_u32_e32 v13, vcc, 0, v1, vcc
-; CGP-NEXT:    v_mac_f32_e32 v2, 0x4f800000, v3
-; CGP-NEXT:    v_rcp_iflag_f32_e32 v2, v2
-; CGP-NEXT:    v_mul_f32_e32 v2, 0x5f7ffffc, v2
-; CGP-NEXT:    v_mul_f32_e32 v3, 0x2f800000, v2
-; CGP-NEXT:    v_trunc_f32_e32 v4, v3
-; CGP-NEXT:    v_mac_f32_e32 v2, 0xcf800000, v4
-; CGP-NEXT:    v_cvt_u32_f32_e32 v5, v2
-; CGP-NEXT:    v_cvt_u32_f32_e32 v14, v4
-; CGP-NEXT:    v_mad_u64_u32 v[2:3], s[4:5], v12, v5, 0
-; CGP-NEXT:    v_mad_u64_u32 v[3:4], s[4:5], v12, v14, v[3:4]
-; CGP-NEXT:    v_mul_hi_u32 v15, v5, v2
-; CGP-NEXT:    v_mad_u64_u32 v[3:4], s[4:5], v13, v5, v[3:4]
-; CGP-NEXT:    v_mul_lo_u32 v4, v14, v2
-; CGP-NEXT:    v_mul_hi_u32 v2, v14, v2
-; CGP-NEXT:    v_mul_lo_u32 v16, v5, v3
-; CGP-NEXT:    v_mul_lo_u32 v17, v14, v3
-; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v16
-; CGP-NEXT:    v_cndmask_b32_e64 v16, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v15
-; CGP-NEXT:    v_mul_hi_u32 v15, v5, v3
-; CGP-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v4, vcc, v16, v4
-; CGP-NEXT:    v_add_i32_e32 v2, vcc, v17, v2
-; CGP-NEXT:    v_cndmask_b32_e64 v16, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v2, vcc, v2, v15
-; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v15, vcc, v16, v15
-; CGP-NEXT:    v_mul_hi_u32 v3, v14, v3
-; CGP-NEXT:    v_add_i32_e32 v2, vcc, v2, v4
-; CGP-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v4, vcc, v15, v4
-; CGP-NEXT:    v_add_i32_e32 v3, vcc, v3, v4
-; CGP-NEXT:    v_add_i32_e32 v5, vcc, v5, v2
-; CGP-NEXT:    v_addc_u32_e32 v14, vcc, v14, v3, vcc
-; CGP-NEXT:    v_mad_u64_u32 v[2:3], s[4:5], v12, v5, 0
-; CGP-NEXT:    v_mad_u64_u32 v[3:4], s[4:5], v12, v14, v[3:4]
+; CGP-NEXT:    v_ashrrev_i32_e32 v0, 31, v5
+; CGP-NEXT:    v_add_i32_e32 v1, vcc, v4, v0
+; CGP-NEXT:    v_addc_u32_e32 v2, vcc, v5, v0, vcc
+; CGP-NEXT:    v_xor_b32_e32 v3, v1, v0
+; CGP-NEXT:    v_xor_b32_e32 v4, v2, v0
+; CGP-NEXT:    v_cvt_f32_u32_e32 v0, v3
+; CGP-NEXT:    v_cvt_f32_u32_e32 v1, v4
+; CGP-NEXT:    v_sub_i32_e32 v12, vcc, 0, v3
+; CGP-NEXT:    v_subb_u32_e32 v13, vcc, 0, v4, vcc
+; CGP-NEXT:    v_mac_f32_e32 v0, 0x4f800000, v1
+; CGP-NEXT:    v_rcp_iflag_f32_e32 v0, v0
+; CGP-NEXT:    v_mul_f32_e32 v0, 0x5f7ffffc, v0
+; CGP-NEXT:    v_mul_f32_e32 v1, 0x2f800000, v0
+; CGP-NEXT:    v_trunc_f32_e32 v2, v1
+; CGP-NEXT:    v_mac_f32_e32 v0, 0xcf800000, v2
+; CGP-NEXT:    v_cvt_u32_f32_e32 v5, v0
+; CGP-NEXT:    v_cvt_u32_f32_e32 v14, v2
+; CGP-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v12, v5, 0
+; CGP-NEXT:    v_mad_u64_u32 v[1:2], s[4:5], v12, v14, v[1:2]
+; CGP-NEXT:    v_mul_hi_u32 v15, v5, v0
+; CGP-NEXT:    v_mad_u64_u32 v[1:2], s[4:5], v13, v5, v[1:2]
+; CGP-NEXT:    v_mul_lo_u32 v2, v14, v0
+; CGP-NEXT:    v_mul_hi_u32 v0, v14, v0
+; CGP-NEXT:    v_mul_lo_u32 v16, v5, v1
+; CGP-NEXT:    v_mul_lo_u32 v17, v14, v1
+; CGP-NEXT:    v_add_i32_e32 v2, vcc, v2, v16
+; CGP-NEXT:    v_addc_u32_e32 v2, vcc, v2, v15, vcc
+; CGP-NEXT:    v_mul_hi_u32 v15, v5, v1
+; CGP-NEXT:    v_addc_u32_e64 v2, s[4:5], 0, 0, vcc
+; CGP-NEXT:    v_add_i32_e32 v0, vcc, v17, v0
+; CGP-NEXT:    v_mul_hi_u32 v1, v14, v1
+; CGP-NEXT:    v_addc_u32_e64 v0, s[4:5], v0, v15, vcc
+; CGP-NEXT:    v_addc_u32_e64 v15, s[4:5], 0, 0, s[4:5]
+; CGP-NEXT:    v_addc_u32_e32 v0, vcc, v0, v2, vcc
+; CGP-NEXT:    v_addc_u32_e32 v2, vcc, 0, v15, vcc
+; CGP-NEXT:    v_add_i32_e32 v1, vcc, v1, v2
+; CGP-NEXT:    v_add_i32_e32 v5, vcc, v5, v0
+; CGP-NEXT:    v_addc_u32_e32 v14, vcc, v14, v1, vcc
+; CGP-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v12, v5, 0
+; CGP-NEXT:    v_mad_u64_u32 v[1:2], s[4:5], v12, v14, v[1:2]
 ; CGP-NEXT:    v_ashrrev_i32_e32 v12, 31, v11
-; CGP-NEXT:    v_mul_hi_u32 v15, v5, v2
-; CGP-NEXT:    v_mad_u64_u32 v[3:4], s[4:5], v13, v5, v[3:4]
-; CGP-NEXT:    v_add_i32_e32 v4, vcc, v10, v12
+; CGP-NEXT:    v_mul_hi_u32 v15, v5, v0
+; CGP-NEXT:    v_mad_u64_u32 v[1:2], s[4:5], v13, v5, v[1:2]
+; CGP-NEXT:    v_add_i32_e32 v2, vcc, v10, v12
 ; CGP-NEXT:    v_addc_u32_e32 v10, vcc, v11, v12, vcc
-; CGP-NEXT:    v_xor_b32_e32 v11, v4, v12
-; CGP-NEXT:    v_mul_lo_u32 v4, v14, v2
-; CGP-NEXT:    v_mul_lo_u32 v13, v5, v3
-; CGP-NEXT:    v_mul_hi_u32 v2, v14, v2
+; CGP-NEXT:    v_xor_b32_e32 v11, v2, v12
+; CGP-NEXT:    v_mul_lo_u32 v2, v14, v0
+; CGP-NEXT:    v_mul_lo_u32 v13, v5, v1
+; CGP-NEXT:    v_mul_hi_u32 v0, v14, v0
 ; CGP-NEXT:    v_xor_b32_e32 v10, v10, v12
-; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v13
-; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v15
-; CGP-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
-; CGP-NEXT:    v_mul_lo_u32 v15, v14, v3
-; CGP-NEXT:    v_add_i32_e32 v4, vcc, v13, v4
-; CGP-NEXT:    v_mul_hi_u32 v13, v5, v3
-; CGP-NEXT:    v_add_i32_e32 v2, vcc, v15, v2
-; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
 ; CGP-NEXT:    v_add_i32_e32 v2, vcc, v2, v13
-; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v13, vcc, v15, v13
-; CGP-NEXT:    v_mul_hi_u32 v3, v14, v3
-; CGP-NEXT:    v_add_i32_e32 v2, vcc, v2, v4
-; CGP-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v4, vcc, v13, v4
-; CGP-NEXT:    v_add_i32_e32 v3, vcc, v3, v4
-; CGP-NEXT:    v_add_i32_e32 v2, vcc, v5, v2
-; CGP-NEXT:    v_addc_u32_e32 v3, vcc, v14, v3, vcc
-; CGP-NEXT:    v_mul_lo_u32 v4, v10, v2
-; CGP-NEXT:    v_mul_lo_u32 v5, v11, v3
-; CGP-NEXT:    v_mul_hi_u32 v13, v11, v2
-; CGP-NEXT:    v_mul_hi_u32 v2, v10, v2
-; CGP-NEXT:    v_mul_hi_u32 v14, v10, v3
-; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v5
-; CGP-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v13
-; CGP-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
-; CGP-NEXT:    v_mul_lo_u32 v13, v10, v3
-; CGP-NEXT:    v_add_i32_e32 v4, vcc, v5, v4
-; CGP-NEXT:    v_mul_hi_u32 v5, v11, v3
-; CGP-NEXT:    v_add_i32_e32 v2, vcc, v13, v2
-; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
+; CGP-NEXT:    v_addc_u32_e32 v2, vcc, v2, v15, vcc
+; CGP-NEXT:    v_mul_lo_u32 v2, v14, v1
+; CGP-NEXT:    v_mul_hi_u32 v15, v5, v1
+; CGP-NEXT:    v_addc_u32_e64 v13, s[4:5], 0, 0, vcc
+; CGP-NEXT:    v_add_i32_e32 v0, vcc, v2, v0
+; CGP-NEXT:    v_mul_hi_u32 v1, v14, v1
+; CGP-NEXT:    v_addc_u32_e64 v0, s[4:5], v0, v15, vcc
+; CGP-NEXT:    v_addc_u32_e64 v2, s[4:5], 0, 0, s[4:5]
+; CGP-NEXT:    v_addc_u32_e32 v0, vcc, v0, v13, vcc
+; CGP-NEXT:    v_addc_u32_e32 v2, vcc, 0, v2, vcc
+; CGP-NEXT:    v_add_i32_e32 v1, vcc, v1, v2
+; CGP-NEXT:    v_add_i32_e32 v0, vcc, v5, v0
+; CGP-NEXT:    v_addc_u32_e32 v1, vcc, v14, v1, vcc
+; CGP-NEXT:    v_mul_lo_u32 v2, v10, v0
+; CGP-NEXT:    v_mul_lo_u32 v5, v11, v1
+; CGP-NEXT:    v_mul_hi_u32 v13, v11, v0
+; CGP-NEXT:    v_mul_hi_u32 v0, v10, v0
 ; CGP-NEXT:    v_add_i32_e32 v2, vcc, v2, v5
-; CGP-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v5, vcc, v13, v5
-; CGP-NEXT:    v_add_i32_e32 v13, vcc, v2, v4
-; CGP-NEXT:    v_mad_u64_u32 v[2:3], s[4:5], v0, v13, 0
-; CGP-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v4, vcc, v5, v4
-; CGP-NEXT:    v_add_i32_e32 v4, vcc, v14, v4
-; CGP-NEXT:    v_mad_u64_u32 v[3:4], s[4:5], v0, v4, v[3:4]
-; CGP-NEXT:    v_sub_i32_e32 v2, vcc, v11, v2
-; CGP-NEXT:    v_mad_u64_u32 v[3:4], s[4:5], v1, v13, v[3:4]
-; CGP-NEXT:    v_subb_u32_e64 v4, s[4:5], v10, v3, vcc
-; CGP-NEXT:    v_sub_i32_e64 v3, s[4:5], v10, v3
-; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v4, v1
+; CGP-NEXT:    v_addc_u32_e32 v2, vcc, v2, v13, vcc
+; CGP-NEXT:    v_mul_lo_u32 v2, v10, v1
+; CGP-NEXT:    v_mul_hi_u32 v13, v11, v1
+; CGP-NEXT:    v_addc_u32_e64 v5, s[4:5], 0, 0, vcc
+; CGP-NEXT:    v_add_i32_e32 v0, vcc, v2, v0
+; CGP-NEXT:    v_addc_u32_e64 v0, s[4:5], v0, v13, vcc
+; CGP-NEXT:    v_addc_u32_e32 v5, vcc, v0, v5, vcc
+; CGP-NEXT:    v_mul_hi_u32 v13, v10, v1
+; CGP-NEXT:    v_addc_u32_e64 v2, s[4:5], 0, 0, s[4:5]
+; CGP-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v3, v5, 0
+; CGP-NEXT:    v_addc_u32_e32 v2, vcc, 0, v2, vcc
+; CGP-NEXT:    v_add_i32_e32 v2, vcc, v13, v2
+; CGP-NEXT:    v_mad_u64_u32 v[1:2], s[4:5], v3, v2, v[1:2]
+; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v11, v0
+; CGP-NEXT:    v_mad_u64_u32 v[1:2], s[4:5], v4, v5, v[1:2]
+; CGP-NEXT:    v_subb_u32_e64 v2, s[4:5], v10, v1, vcc
+; CGP-NEXT:    v_sub_i32_e64 v1, s[4:5], v10, v1
+; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v2, v4
 ; CGP-NEXT:    v_cndmask_b32_e64 v5, 0, -1, s[4:5]
-; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v2, v0
+; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v0, v3
 ; CGP-NEXT:    v_cndmask_b32_e64 v10, 0, -1, s[4:5]
-; CGP-NEXT:    v_cmp_eq_u32_e64 s[4:5], v4, v1
-; CGP-NEXT:    v_subb_u32_e32 v3, vcc, v3, v1, vcc
+; CGP-NEXT:    v_cmp_eq_u32_e64 s[4:5], v2, v4
+; CGP-NEXT:    v_subb_u32_e32 v1, vcc, v1, v4, vcc
 ; CGP-NEXT:    v_cndmask_b32_e64 v5, v5, v10, s[4:5]
-; CGP-NEXT:    v_sub_i32_e32 v10, vcc, v2, v0
-; CGP-NEXT:    v_subbrev_u32_e64 v11, s[4:5], 0, v3, vcc
-; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v11, v1
+; CGP-NEXT:    v_sub_i32_e32 v10, vcc, v0, v3
+; CGP-NEXT:    v_subbrev_u32_e64 v11, s[4:5], 0, v1, vcc
+; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v11, v4
 ; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, -1, s[4:5]
-; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v10, v0
+; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v10, v3
+; CGP-NEXT:    v_subb_u32_e32 v1, vcc, v1, v4, vcc
 ; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, -1, s[4:5]
-; CGP-NEXT:    v_cmp_eq_u32_e64 s[4:5], v11, v1
-; CGP-NEXT:    v_subb_u32_e32 v1, vcc, v3, v1, vcc
-; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v10, v0
+; CGP-NEXT:    v_cmp_eq_u32_e64 s[4:5], v11, v4
+; CGP-NEXT:    v_sub_i32_e32 v3, vcc, v10, v3
 ; CGP-NEXT:    v_cndmask_b32_e64 v13, v13, v14, s[4:5]
 ; CGP-NEXT:    v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
 ; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v13
-; CGP-NEXT:    v_cndmask_b32_e32 v0, v10, v0, vcc
+; CGP-NEXT:    v_cndmask_b32_e32 v3, v10, v3, vcc
 ; CGP-NEXT:    v_cndmask_b32_e32 v1, v11, v1, vcc
 ; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v5
-; CGP-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
-; CGP-NEXT:    v_cndmask_b32_e32 v1, v4, v1, vcc
+; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
+; CGP-NEXT:    v_cndmask_b32_e32 v1, v2, v1, vcc
 ; CGP-NEXT:    v_xor_b32_e32 v0, v0, v12
 ; CGP-NEXT:    v_xor_b32_e32 v1, v1, v12
 ; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v0, v12
@@ -815,131 +740,116 @@ define <2 x i64> @v_srem_v2i64(<2 x i64> %num, <2 x i64> %den) {
 ; CGP-NEXT:    s_or_b64 exec, exec, s[4:5]
 ; CGP-NEXT:    s_setpc_b64 s[30:31]
 ; CGP-NEXT:  .LBB2_7:
-; CGP-NEXT:    v_ashrrev_i32_e32 v3, 31, v7
-; CGP-NEXT:    v_add_i32_e32 v2, vcc, v6, v3
-; CGP-NEXT:    v_addc_u32_e32 v4, vcc, v7, v3, vcc
-; CGP-NEXT:    v_xor_b32_e32 v2, v2, v3
-; CGP-NEXT:    v_xor_b32_e32 v3, v4, v3
-; CGP-NEXT:    v_cvt_f32_u32_e32 v4, v2
-; CGP-NEXT:    v_cvt_f32_u32_e32 v5, v3
-; CGP-NEXT:    v_sub_i32_e32 v10, vcc, 0, v2
-; CGP-NEXT:    v_subb_u32_e32 v11, vcc, 0, v3, vcc
-; CGP-NEXT:    v_mac_f32_e32 v4, 0x4f800000, v5
-; CGP-NEXT:    v_rcp_iflag_f32_e32 v4, v4
-; CGP-NEXT:    v_mul_f32_e32 v4, 0x5f7ffffc, v4
-; CGP-NEXT:    v_mul_f32_e32 v5, 0x2f800000, v4
-; CGP-NEXT:    v_trunc_f32_e32 v6, v5
-; CGP-NEXT:    v_mac_f32_e32 v4, 0xcf800000, v6
-; CGP-NEXT:    v_cvt_u32_f32_e32 v7, v4
-; CGP-NEXT:    v_cvt_u32_f32_e32 v12, v6
-; CGP-NEXT:    v_mad_u64_u32 v[4:5], s[4:5], v10, v7, 0
-; CGP-NEXT:    v_mad_u64_u32 v[5:6], s[4:5], v10, v12, v[5:6]
-; CGP-NEXT:    v_mul_hi_u32 v13, v7, v4
-; CGP-NEXT:    v_mad_u64_u32 v[5:6], s[4:5], v11, v7, v[5:6]
-; CGP-NEXT:    v_mul_lo_u32 v6, v12, v4
-; CGP-NEXT:    v_mul_hi_u32 v4, v12, v4
-; CGP-NEXT:    v_mul_lo_u32 v14, v7, v5
-; CGP-NEXT:    v_mul_lo_u32 v15, v12, v5
-; CGP-NEXT:    v_add_i32_e32 v6, vcc, v6, v14
-; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v6, vcc, v6, v13
-; CGP-NEXT:    v_mul_hi_u32 v13, v7, v5
-; CGP-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v6, vcc, v14, v6
-; CGP-NEXT:    v_add_i32_e32 v4, vcc, v15, v4
-; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v13
-; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v13, vcc, v14, v13
-; CGP-NEXT:    v_mul_hi_u32 v5, v12, v5
-; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v6
-; CGP-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v6, vcc, v13, v6
-; CGP-NEXT:    v_add_i32_e32 v5, vcc, v5, v6
-; CGP-NEXT:    v_add_i32_e32 v7, vcc, v7, v4
-; CGP-NEXT:    v_addc_u32_e32 v12, vcc, v12, v5, vcc
-; CGP-NEXT:    v_mad_u64_u32 v[4:5], s[4:5], v10, v7, 0
-; CGP-NEXT:    v_mad_u64_u32 v[5:6], s[4:5], v10, v12, v[5:6]
+; CGP-NEXT:    v_ashrrev_i32_e32 v2, 31, v7
+; CGP-NEXT:    v_add_i32_e32 v3, vcc, v6, v2
+; CGP-NEXT:    v_addc_u32_e32 v4, vcc, v7, v2, vcc
+; CGP-NEXT:    v_xor_b32_e32 v5, v3, v2
+; CGP-NEXT:    v_xor_b32_e32 v6, v4, v2
+; CGP-NEXT:    v_cvt_f32_u32_e32 v2, v5
+; CGP-NEXT:    v_cvt_f32_u32_e32 v3, v6
+; CGP-NEXT:    v_sub_i32_e32 v10, vcc, 0, v5
+; CGP-NEXT:    v_subb_u32_e32 v11, vcc, 0, v6, vcc
+; CGP-NEXT:    v_mac_f32_e32 v2, 0x4f800000, v3
+; CGP-NEXT:    v_rcp_iflag_f32_e32 v2, v2
+; CGP-NEXT:    v_mul_f32_e32 v2, 0x5f7ffffc, v2
+; CGP-NEXT:    v_mul_f32_e32 v3, 0x2f800000, v2
+; CGP-NEXT:    v_trunc_f32_e32 v4, v3
+; CGP-NEXT:    v_mac_f32_e32 v2, 0xcf800000, v4
+; CGP-NEXT:    v_cvt_u32_f32_e32 v7, v2
+; CGP-NEXT:    v_cvt_u32_f32_e32 v12, v4
+; CGP-NEXT:    v_mad_u64_u32 v[2:3], s[4:5], v10, v7, 0
+; CGP-NEXT:    v_mad_u64_u32 v[3:4], s[4:5], v10, v12, v[3:4]
+; CGP-NEXT:    v_mul_hi_u32 v13, v7, v2
+; CGP-NEXT:    v_mad_u64_u32 v[3:4], s[4:5], v11, v7, v[3:4]
+; CGP-NEXT:    v_mul_lo_u32 v4, v12, v2
+; CGP-NEXT:    v_mul_hi_u32 v2, v12, v2
+; CGP-NEXT:    v_mul_lo_u32 v14, v7, v3
+; CGP-NEXT:    v_mul_lo_u32 v15, v12, v3
+; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v14
+; CGP-NEXT:    v_addc_u32_e32 v4, vcc, v4, v13, vcc
+; CGP-NEXT:    v_mul_hi_u32 v13, v7, v3
+; CGP-NEXT:    v_addc_u32_e64 v4, s[4:5], 0, 0, vcc
+; CGP-NEXT:    v_add_i32_e32 v2, vcc, v15, v2
+; CGP-NEXT:    v_mul_hi_u32 v3, v12, v3
+; CGP-NEXT:    v_addc_u32_e64 v2, s[4:5], v2, v13, vcc
+; CGP-NEXT:    v_addc_u32_e64 v13, s[4:5], 0, 0, s[4:5]
+; CGP-NEXT:    v_addc_u32_e32 v2, vcc, v2, v4, vcc
+; CGP-NEXT:    v_addc_u32_e32 v4, vcc, 0, v13, vcc
+; CGP-NEXT:    v_add_i32_e32 v3, vcc, v3, v4
+; CGP-NEXT:    v_add_i32_e32 v7, vcc, v7, v2
+; CGP-NEXT:    v_addc_u32_e32 v12, vcc, v12, v3, vcc
+; CGP-NEXT:    v_mad_u64_u32 v[2:3], s[4:5], v10, v7, 0
+; CGP-NEXT:    v_mad_u64_u32 v[3:4], s[4:5], v10, v12, v[3:4]
 ; CGP-NEXT:    v_ashrrev_i32_e32 v10, 31, v9
-; CGP-NEXT:    v_mul_hi_u32 v13, v7, v4
-; CGP-NEXT:    v_mad_u64_u32 v[5:6], s[4:5], v11, v7, v[5:6]
-; CGP-NEXT:    v_add_i32_e32 v6, vcc, v8, v10
+; CGP-NEXT:    v_mul_hi_u32 v13, v7, v2
+; CGP-NEXT:    v_mad_u64_u32 v[3:4], s[4:5], v11, v7, v[3:4]
+; CGP-NEXT:    v_add_i32_e32 v4, vcc, v8, v10
 ; CGP-NEXT:    v_addc_u32_e32 v8, vcc, v9, v10, vcc
-; CGP-NEXT:    v_xor_b32_e32 v9, v6, v10
-; CGP-NEXT:    v_mul_lo_u32 v6, v12, v4
-; CGP-NEXT:    v_mul_lo_u32 v11, v7, v5
-; CGP-NEXT:    v_mul_hi_u32 v4, v12, v4
+; CGP-NEXT:    v_xor_b32_e32 v9, v4, v10
+; CGP-NEXT:    v_mul_lo_u32 v4, v12, v2
+; CGP-NEXT:    v_mul_lo_u32 v11, v7, v3
+; CGP-NEXT:    v_mul_hi_u32 v2, v12, v2
 ; CGP-NEXT:    v_xor_b32_e32 v8, v8, v10
-; CGP-NEXT:    v_add_i32_e32 v6, vcc, v6, v11
-; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v6, vcc, v6, v13
-; CGP-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
-; CGP-NEXT:    v_mul_lo_u32 v13, v12, v5
-; CGP-NEXT:    v_add_i32_e32 v6, vcc, v11, v6
-; CGP-NEXT:    v_mul_hi_u32 v11, v7, v5
-; CGP-NEXT:    v_add_i32_e32 v4, vcc, v13, v4
-; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
 ; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v11
-; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v11, vcc, v13, v11
-; CGP-NEXT:    v_mul_hi_u32 v5, v12, v5
-; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v6
-; CGP-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v6, vcc, v11, v6
-; CGP-NEXT:    v_add_i32_e32 v5, vcc, v5, v6
-; CGP-NEXT:    v_add_i32_e32 v4, vcc, v7, v4
-; CGP-NEXT:    v_addc_u32_e32 v5, vcc, v12, v5, vcc
-; CGP-NEXT:    v_mul_lo_u32 v6, v8, v4
-; CGP-NEXT:    v_mul_lo_u32 v7, v9, v5
-; CGP-NEXT:    v_mul_hi_u32 v11, v9, v4
-; CGP-NEXT:    v_mul_hi_u32 v4, v8, v4
-; CGP-NEXT:    v_mul_hi_u32 v12, v8, v5
-; CGP-NEXT:    v_add_i32_e32 v6, vcc, v6, v7
-; CGP-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v6, vcc, v6, v11
-; CGP-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
-; CGP-NEXT:    v_mul_lo_u32 v11, v8, v5
-; CGP-NEXT:    v_add_i32_e32 v6, vcc, v7, v6
-; CGP-NEXT:    v_mul_hi_u32 v7, v9, v5
-; CGP-NEXT:    v_add_i32_e32 v4, vcc, v11, v4
-; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
+; CGP-NEXT:    v_addc_u32_e32 v4, vcc, v4, v13, vcc
+; CGP-NEXT:    v_mul_lo_u32 v4, v12, v3
+; CGP-NEXT:    v_mul_hi_u32 v13, v7, v3
+; CGP-NEXT:    v_addc_u32_e64 v11, s[4:5], 0, 0, vcc
+; CGP-NEXT:    v_add_i32_e32 v2, vcc, v4, v2
+; CGP-NEXT:    v_mul_hi_u32 v3, v12, v3
+; CGP-NEXT:    v_addc_u32_e64 v2, s[4:5], v2, v13, vcc
+; CGP-NEXT:    v_addc_u32_e64 v4, s[4:5], 0, 0, s[4:5]
+; CGP-NEXT:    v_addc_u32_e32 v2, vcc, v2, v11, vcc
+; CGP-NEXT:    v_addc_u32_e32 v4, vcc, 0, v4, vcc
+; CGP-NEXT:    v_add_i32_e32 v3, vcc, v3, v4
+; CGP-NEXT:    v_add_i32_e32 v2, vcc, v7, v2
+; CGP-NEXT:    v_addc_u32_e32 v3, vcc, v12, v3, vcc
+; CGP-NEXT:    v_mul_lo_u32 v4, v8, v2
+; CGP-NEXT:    v_mul_lo_u32 v7, v9, v3
+; CGP-NEXT:    v_mul_hi_u32 v11, v9, v2
+; CGP-NEXT:    v_mul_hi_u32 v2, v8, v2
 ; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v7
-; CGP-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v7, vcc, v11, v7
-; CGP-NEXT:    v_add_i32_e32 v11, vcc, v4, v6
-; CGP-NEXT:    v_mad_u64_u32 v[4:5], s[4:5], v2, v11, 0
-; CGP-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v6, vcc, v7, v6
-; CGP-NEXT:    v_add_i32_e32 v6, vcc, v12, v6
-; CGP-NEXT:    v_mad_u64_u32 v[5:6], s[4:5], v2, v6, v[5:6]
-; CGP-NEXT:    v_sub_i32_e32 v4, vcc, v9, v4
-; CGP-NEXT:    v_mad_u64_u32 v[5:6], s[4:5], v3, v11, v[5:6]
-; CGP-NEXT:    v_subb_u32_e64 v6, s[4:5], v8, v5, vcc
-; CGP-NEXT:    v_sub_i32_e64 v5, s[4:5], v8, v5
-; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v6, v3
+; CGP-NEXT:    v_addc_u32_e32 v4, vcc, v4, v11, vcc
+; CGP-NEXT:    v_mul_lo_u32 v4, v8, v3
+; CGP-NEXT:    v_mul_hi_u32 v11, v9, v3
+; CGP-NEXT:    v_addc_u32_e64 v7, s[4:5], 0, 0, vcc
+; CGP-NEXT:    v_add_i32_e32 v2, vcc, v4, v2
+; CGP-NEXT:    v_addc_u32_e64 v2, s[4:5], v2, v11, vcc
+; CGP-NEXT:    v_addc_u32_e32 v7, vcc, v2, v7, vcc
+; CGP-NEXT:    v_mul_hi_u32 v11, v8, v3
+; CGP-NEXT:    v_addc_u32_e64 v4, s[4:5], 0, 0, s[4:5]
+; CGP-NEXT:    v_mad_u64_u32 v[2:3], s[4:5], v5, v7, 0
+; CGP-NEXT:    v_addc_u32_e32 v4, vcc, 0, v4, vcc
+; CGP-NEXT:    v_add_i32_e32 v4, vcc, v11, v4
+; CGP-NEXT:    v_mad_u64_u32 v[3:4], s[4:5], v5, v4, v[3:4]
+; CGP-NEXT:    v_sub_i32_e32 v2, vcc, v9, v2
+; CGP-NEXT:    v_mad_u64_u32 v[3:4], s[4:5], v6, v7, v[3:4]
+; CGP-NEXT:    v_subb_u32_e64 v4, s[4:5], v8, v3, vcc
+; CGP-NEXT:    v_sub_i32_e64 v3, s[4:5], v8, v3
+; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v4, v6
 ; CGP-NEXT:    v_cndmask_b32_e64 v7, 0, -1, s[4:5]
-; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v4, v2
+; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v2, v5
 ; CGP-NEXT:    v_cndmask_b32_e64 v8, 0, -1, s[4:5]
-; CGP-NEXT:    v_cmp_eq_u32_e64 s[4:5], v6, v3
-; CGP-NEXT:    v_subb_u32_e32 v5, vcc, v5, v3, vcc
+; CGP-NEXT:    v_cmp_eq_u32_e64 s[4:5], v4, v6
+; CGP-NEXT:    v_subb_u32_e32 v3, vcc, v3, v6, vcc
 ; CGP-NEXT:    v_cndmask_b32_e64 v7, v7, v8, s[4:5]
-; CGP-NEXT:    v_sub_i32_e32 v8, vcc, v4, v2
-; CGP-NEXT:    v_subbrev_u32_e64 v9, s[4:5], 0, v5, vcc
-; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v9, v3
+; CGP-NEXT:    v_sub_i32_e32 v8, vcc, v2, v5
+; CGP-NEXT:    v_subbrev_u32_e64 v9, s[4:5], 0, v3, vcc
+; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v9, v6
 ; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, -1, s[4:5]
-; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v8, v2
+; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v8, v5
+; CGP-NEXT:    v_subb_u32_e32 v3, vcc, v3, v6, vcc
 ; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, -1, s[4:5]
-; CGP-NEXT:    v_cmp_eq_u32_e64 s[4:5], v9, v3
-; CGP-NEXT:    v_subb_u32_e32 v3, vcc, v5, v3, vcc
-; CGP-NEXT:    v_sub_i32_e32 v2, vcc, v8, v2
+; CGP-NEXT:    v_cmp_eq_u32_e64 s[4:5], v9, v6
+; CGP-NEXT:    v_sub_i32_e32 v5, vcc, v8, v5
 ; CGP-NEXT:    v_cndmask_b32_e64 v11, v11, v12, s[4:5]
 ; CGP-NEXT:    v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
 ; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v11
-; CGP-NEXT:    v_cndmask_b32_e32 v2, v8, v2, vcc
+; CGP-NEXT:    v_cndmask_b32_e32 v5, v8, v5, vcc
 ; CGP-NEXT:    v_cndmask_b32_e32 v3, v9, v3, vcc
 ; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v7
-; CGP-NEXT:    v_cndmask_b32_e32 v2, v4, v2, vcc
-; CGP-NEXT:    v_cndmask_b32_e32 v3, v6, v3, vcc
+; CGP-NEXT:    v_cndmask_b32_e32 v2, v2, v5, vcc
+; CGP-NEXT:    v_cndmask_b32_e32 v3, v4, v3, vcc
 ; CGP-NEXT:    v_xor_b32_e32 v2, v2, v10
 ; CGP-NEXT:    v_xor_b32_e32 v3, v3, v10
 ; CGP-NEXT:    v_sub_i32_e32 v2, vcc, v2, v10
@@ -999,18 +909,13 @@ define i64 @v_srem_i64_pow2k_denom(i64 %num) {
 ; CHECK-NEXT:    v_mul_hi_u32 v11, v5, v3
 ; CHECK-NEXT:    v_mul_hi_u32 v3, v7, v3
 ; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v4, v9
-; CHECK-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v10, v2
-; CHECK-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v4, v8
-; CHECK-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v9, v4
-; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v11
-; CHECK-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v8, vcc, v10, v8
-; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v4
-; CHECK-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v8, v4
+; CHECK-NEXT:    v_addc_u32_e32 v4, vcc, v4, v8, vcc
+; CHECK-NEXT:    v_add_i32_e64 v2, s[4:5], v10, v2
+; CHECK-NEXT:    v_addc_u32_e64 v2, s[6:7], v2, v11, s[4:5]
+; CHECK-NEXT:    v_addc_u32_e64 v4, s[8:9], 0, 0, vcc
+; CHECK-NEXT:    v_addc_u32_e64 v8, s[6:7], 0, 0, s[6:7]
+; CHECK-NEXT:    v_addc_u32_e64 v2, vcc, v2, v4, s[4:5]
+; CHECK-NEXT:    v_addc_u32_e32 v4, vcc, 0, v8, vcc
 ; CHECK-NEXT:    v_add_i32_e32 v3, vcc, v3, v4
 ; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v5, v2
 ; CHECK-NEXT:    v_addc_u32_e32 v7, vcc, v7, v3, vcc
@@ -1025,23 +930,18 @@ define i64 @v_srem_i64_pow2k_denom(i64 %num) {
 ; CHECK-NEXT:    v_mul_lo_u32 v8, v5, v3
 ; CHECK-NEXT:    v_xor_b32_e32 v9, v1, v6
 ; CHECK-NEXT:    v_mul_hi_u32 v1, v5, v2
-; CHECK-NEXT:    v_mul_hi_u32 v2, v7, v2
 ; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v8
-; CHECK-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v1
-; CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
-; CHECK-NEXT:    v_mul_lo_u32 v1, v7, v3
-; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v8, v0
+; CHECK-NEXT:    v_addc_u32_e32 v0, vcc, v0, v1, vcc
+; CHECK-NEXT:    v_mul_lo_u32 v0, v7, v3
+; CHECK-NEXT:    v_mul_hi_u32 v1, v7, v2
 ; CHECK-NEXT:    v_mul_hi_u32 v8, v5, v3
-; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v1, v2
-; CHECK-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v1, v8
-; CHECK-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v8
+; CHECK-NEXT:    v_addc_u32_e64 v2, s[4:5], 0, 0, vcc
+; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v1
 ; CHECK-NEXT:    v_mul_hi_u32 v3, v7, v3
-; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v1, v0
-; CHECK-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v2, v1
+; CHECK-NEXT:    v_addc_u32_e64 v0, s[4:5], v0, v8, vcc
+; CHECK-NEXT:    v_addc_u32_e64 v1, s[4:5], 0, 0, s[4:5]
+; CHECK-NEXT:    v_addc_u32_e32 v0, vcc, v0, v2, vcc
+; CHECK-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
 ; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v3, v1
 ; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v5, v0
 ; CHECK-NEXT:    v_addc_u32_e32 v1, vcc, v7, v1, vcc
@@ -1051,23 +951,18 @@ define i64 @v_srem_i64_pow2k_denom(i64 %num) {
 ; CHECK-NEXT:    v_mul_hi_u32 v0, v9, v0
 ; CHECK-NEXT:    v_mov_b32_e32 v5, 0x1000
 ; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v3
-; CHECK-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v7
-; CHECK-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; CHECK-NEXT:    v_mul_lo_u32 v7, v9, v1
-; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v3, v2
-; CHECK-NEXT:    v_mul_hi_u32 v3, v4, v1
-; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v7, v0
-; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v3
-; CHECK-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v3, vcc, v7, v3
-; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
-; CHECK-NEXT:    v_mul_hi_u32 v7, v9, v1
+; CHECK-NEXT:    v_addc_u32_e32 v2, vcc, v2, v7, vcc
+; CHECK-NEXT:    v_mul_lo_u32 v2, v9, v1
+; CHECK-NEXT:    v_mul_hi_u32 v7, v4, v1
+; CHECK-NEXT:    v_addc_u32_e64 v3, s[4:5], 0, 0, vcc
+; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v2, v0
+; CHECK-NEXT:    v_addc_u32_e64 v0, s[4:5], v0, v7, vcc
+; CHECK-NEXT:    v_addc_u32_e32 v0, vcc, v0, v3, vcc
+; CHECK-NEXT:    v_mul_hi_u32 v3, v9, v1
+; CHECK-NEXT:    v_addc_u32_e64 v2, s[4:5], 0, 0, s[4:5]
 ; CHECK-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v5, v0, 0
-; CHECK-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
+; CHECK-NEXT:    v_addc_u32_e32 v2, vcc, 0, v2, vcc
 ; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v3, v2
-; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v7, v2
 ; CHECK-NEXT:    v_mad_u64_u32 v[1:2], s[4:5], v5, v2, v[1:2]
 ; CHECK-NEXT:    v_sub_i32_e64 v0, s[4:5], v4, v0
 ; CHECK-NEXT:    v_subb_u32_e64 v2, vcc, v9, v1, s[4:5]
@@ -1110,6 +1005,8 @@ define <2 x i64> @v_srem_v2i64_pow2k_denom(<2 x i64> %num) {
 ; GISEL-NEXT:    s_subb_u32 s7, 0, 0
 ; GISEL-NEXT:    v_mac_f32_e32 v4, 0x4f800000, v5
 ; GISEL-NEXT:    v_rcp_iflag_f32_e32 v4, v4
+; GISEL-NEXT:    s_sub_u32 s8, 0, 0x1000
+; GISEL-NEXT:    s_subb_u32 s9, 0, 0
 ; GISEL-NEXT:    v_mul_f32_e32 v4, 0x5f7ffffc, v4
 ; GISEL-NEXT:    v_mul_f32_e32 v5, 0x2f800000, v4
 ; GISEL-NEXT:    v_trunc_f32_e32 v7, v5
@@ -1127,18 +1024,13 @@ define <2 x i64> @v_srem_v2i64_pow2k_denom(<2 x i64> %num) {
 ; GISEL-NEXT:    v_mul_hi_u32 v12, v6, v8
 ; GISEL-NEXT:    v_mul_hi_u32 v8, v7, v8
 ; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v10
-; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v9
-; GISEL-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v10, v5
+; GISEL-NEXT:    v_addc_u32_e32 v5, vcc, v5, v9, vcc
+; GISEL-NEXT:    v_addc_u32_e64 v5, s[4:5], 0, 0, vcc
 ; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v11, v4
-; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v12
-; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v10
-; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v5
-; GISEL-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v9, v5
+; GISEL-NEXT:    v_addc_u32_e64 v4, s[4:5], v4, v12, vcc
+; GISEL-NEXT:    v_addc_u32_e64 v9, s[4:5], 0, 0, s[4:5]
+; GISEL-NEXT:    v_addc_u32_e32 v4, vcc, v4, v5, vcc
+; GISEL-NEXT:    v_addc_u32_e32 v5, vcc, 0, v9, vcc
 ; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v8, v5
 ; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v6, v4
 ; GISEL-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], s6, v11, 0
@@ -1154,23 +1046,18 @@ define <2 x i64> @v_srem_v2i64_pow2k_denom(<2 x i64> %num) {
 ; GISEL-NEXT:    v_mul_lo_u32 v12, v11, v9
 ; GISEL-NEXT:    v_xor_b32_e32 v13, v1, v4
 ; GISEL-NEXT:    v_mul_hi_u32 v1, v11, v8
-; GISEL-NEXT:    v_mul_hi_u32 v8, v5, v8
 ; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v12
-; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v1
-; GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
-; GISEL-NEXT:    v_mul_lo_u32 v1, v5, v9
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v12, v0
+; GISEL-NEXT:    v_addc_u32_e32 v0, vcc, v0, v1, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v0, v5, v9
+; GISEL-NEXT:    v_mul_hi_u32 v1, v5, v8
 ; GISEL-NEXT:    v_mul_hi_u32 v12, v11, v9
-; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v1, v8
-; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v1, v12
-; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v12
+; GISEL-NEXT:    v_addc_u32_e64 v8, s[4:5], 0, 0, vcc
+; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v1
 ; GISEL-NEXT:    v_mul_hi_u32 v9, v5, v9
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v1, v0
-; GISEL-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v8, v1
+; GISEL-NEXT:    v_addc_u32_e64 v0, s[4:5], v0, v12, vcc
+; GISEL-NEXT:    v_addc_u32_e64 v1, s[4:5], 0, 0, s[4:5]
+; GISEL-NEXT:    v_addc_u32_e32 v0, vcc, v0, v8, vcc
+; GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
 ; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v9, v1
 ; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v11, v0
 ; GISEL-NEXT:    v_addc_u32_e32 v1, vcc, v5, v1, vcc
@@ -1180,104 +1067,86 @@ define <2 x i64> @v_srem_v2i64_pow2k_denom(<2 x i64> %num) {
 ; GISEL-NEXT:    v_mul_hi_u32 v0, v13, v0
 ; GISEL-NEXT:    v_mov_b32_e32 v5, 0x1000
 ; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v9
-; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v11
-; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
-; GISEL-NEXT:    v_mul_lo_u32 v11, v13, v1
-; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v9, v8
-; GISEL-NEXT:    v_mul_hi_u32 v9, v10, v1
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v11, v0
-; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v9
-; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v11, v9
-; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v0, v8
-; GISEL-NEXT:    v_mul_hi_u32 v12, v13, v1
+; GISEL-NEXT:    v_addc_u32_e32 v8, vcc, v8, v11, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v8, v13, v1
+; GISEL-NEXT:    v_mul_hi_u32 v11, v10, v1
+; GISEL-NEXT:    v_addc_u32_e64 v9, s[4:5], 0, 0, vcc
+; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v8, v0
+; GISEL-NEXT:    v_addc_u32_e64 v0, s[4:5], v0, v11, vcc
+; GISEL-NEXT:    v_addc_u32_e32 v11, vcc, v0, v9, vcc
+; GISEL-NEXT:    v_mul_hi_u32 v9, v13, v1
+; GISEL-NEXT:    v_addc_u32_e64 v8, s[4:5], 0, 0, s[4:5]
 ; GISEL-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v5, v11, 0
-; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
+; GISEL-NEXT:    v_addc_u32_e32 v8, vcc, 0, v8, vcc
 ; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v9, v8
-; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v12, v8
 ; GISEL-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], v5, v8, v[1:2]
 ; GISEL-NEXT:    v_sub_i32_e32 v10, vcc, v10, v0
 ; GISEL-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], 0, v11, v[8:9]
-; GISEL-NEXT:    s_sub_u32 s6, 0, 0x1000
-; GISEL-NEXT:    s_subb_u32 s7, 0, 0
 ; GISEL-NEXT:    v_subb_u32_e64 v11, s[4:5], v13, v8, vcc
 ; GISEL-NEXT:    v_sub_i32_e64 v0, s[4:5], v13, v8
 ; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v10, v5
-; GISEL-NEXT:    v_subbrev_u32_e32 v0, vcc, 0, v0, vcc
 ; GISEL-NEXT:    v_cndmask_b32_e64 v1, 0, -1, s[4:5]
 ; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v11
-; GISEL-NEXT:    v_sub_i32_e32 v13, vcc, v10, v5
 ; GISEL-NEXT:    v_cndmask_b32_e64 v12, -1, v1, s[4:5]
-; GISEL-NEXT:    v_subbrev_u32_e32 v14, vcc, 0, v0, vcc
-; GISEL-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], s6, v6, 0
+; GISEL-NEXT:    v_subbrev_u32_e32 v8, vcc, 0, v0, vcc
+; GISEL-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], s8, v6, 0
+; GISEL-NEXT:    v_sub_i32_e32 v13, vcc, v10, v5
+; GISEL-NEXT:    v_subbrev_u32_e32 v14, vcc, 0, v8, vcc
+; GISEL-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], s8, v7, v[1:2]
 ; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v13, v5
-; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, -1, vcc
+; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, -1, vcc
+; GISEL-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], s9, v6, v[8:9]
 ; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v14
-; GISEL-NEXT:    v_cndmask_b32_e32 v15, -1, v8, vcc
-; GISEL-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], s6, v7, v[1:2]
-; GISEL-NEXT:    v_sub_i32_e32 v1, vcc, v13, v5
-; GISEL-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], s7, v6, v[8:9]
-; GISEL-NEXT:    v_subbrev_u32_e32 v16, vcc, 0, v14, vcc
-; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v15
-; GISEL-NEXT:    v_cndmask_b32_e32 v9, v13, v1, vcc
-; GISEL-NEXT:    v_mul_lo_u32 v1, v7, v0
-; GISEL-NEXT:    v_mul_lo_u32 v13, v6, v8
-; GISEL-NEXT:    v_mul_hi_u32 v15, v6, v0
-; GISEL-NEXT:    v_cndmask_b32_e32 v14, v14, v16, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v1, -1, v15, vcc
+; GISEL-NEXT:    v_sub_i32_e32 v9, vcc, v13, v5
+; GISEL-NEXT:    v_subbrev_u32_e32 v15, vcc, 0, v14, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v16, v7, v0
+; GISEL-NEXT:    v_mul_lo_u32 v17, v6, v8
+; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v1
+; GISEL-NEXT:    v_mul_hi_u32 v1, v6, v0
+; GISEL-NEXT:    v_cndmask_b32_e32 v9, v13, v9, vcc
+; GISEL-NEXT:    v_add_i32_e64 v13, s[4:5], v16, v17
+; GISEL-NEXT:    v_addc_u32_e64 v1, s[4:5], v13, v1, s[4:5]
+; GISEL-NEXT:    v_mul_lo_u32 v1, v7, v8
 ; GISEL-NEXT:    v_mul_hi_u32 v0, v7, v0
-; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v1, v13
-; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v1, v15
-; GISEL-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; GISEL-NEXT:    v_mul_lo_u32 v15, v7, v8
-; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v13, v1
-; GISEL-NEXT:    v_mul_hi_u32 v13, v6, v8
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v15, v0
-; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v13
-; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v15, v13
+; GISEL-NEXT:    v_mul_hi_u32 v16, v6, v8
+; GISEL-NEXT:    v_addc_u32_e64 v13, s[4:5], 0, 0, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v0, s[4:5], v1, v0
 ; GISEL-NEXT:    v_mul_hi_u32 v8, v7, v8
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v1
-; GISEL-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v13, v1
-; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v8, v1
-; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v6, v0
-; GISEL-NEXT:    v_addc_u32_e32 v13, vcc, v7, v1, vcc
-; GISEL-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], s6, v8, 0
+; GISEL-NEXT:    v_addc_u32_e64 v0, s[6:7], v0, v16, s[4:5]
+; GISEL-NEXT:    v_addc_u32_e64 v1, s[6:7], 0, 0, s[6:7]
+; GISEL-NEXT:    v_addc_u32_e64 v0, s[4:5], v0, v13, s[4:5]
+; GISEL-NEXT:    v_addc_u32_e64 v1, s[4:5], 0, v1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v1, s[4:5], v8, v1
+; GISEL-NEXT:    v_add_i32_e64 v8, s[4:5], v6, v0
+; GISEL-NEXT:    v_addc_u32_e64 v13, s[4:5], v7, v1, s[4:5]
+; GISEL-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], s8, v8, 0
+; GISEL-NEXT:    v_cndmask_b32_e32 v14, v14, v15, vcc
 ; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v12
+; GISEL-NEXT:    v_mad_u64_u32 v[6:7], s[4:5], s8, v13, v[1:2]
 ; GISEL-NEXT:    v_cndmask_b32_e32 v9, v10, v9, vcc
-; GISEL-NEXT:    v_mad_u64_u32 v[6:7], s[4:5], s6, v13, v[1:2]
-; GISEL-NEXT:    v_xor_b32_e32 v1, v9, v4
-; GISEL-NEXT:    v_ashrrev_i32_e32 v9, 31, v3
-; GISEL-NEXT:    v_mad_u64_u32 v[6:7], s[4:5], s7, v8, v[6:7]
-; GISEL-NEXT:    v_cndmask_b32_e32 v10, v11, v14, vcc
-; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v2, v9
-; GISEL-NEXT:    v_addc_u32_e32 v3, vcc, v3, v9, vcc
-; GISEL-NEXT:    v_xor_b32_e32 v11, v2, v9
+; GISEL-NEXT:    v_ashrrev_i32_e32 v10, 31, v3
+; GISEL-NEXT:    v_mad_u64_u32 v[6:7], s[4:5], s9, v8, v[6:7]
+; GISEL-NEXT:    v_cndmask_b32_e32 v1, v11, v14, vcc
+; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v2, v10
+; GISEL-NEXT:    v_addc_u32_e32 v3, vcc, v3, v10, vcc
+; GISEL-NEXT:    v_xor_b32_e32 v11, v2, v10
 ; GISEL-NEXT:    v_mul_lo_u32 v2, v13, v0
 ; GISEL-NEXT:    v_mul_lo_u32 v7, v8, v6
-; GISEL-NEXT:    v_xor_b32_e32 v12, v3, v9
+; GISEL-NEXT:    v_xor_b32_e32 v12, v3, v10
 ; GISEL-NEXT:    v_mul_hi_u32 v3, v8, v0
 ; GISEL-NEXT:    v_mul_hi_u32 v0, v13, v0
 ; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v2, v7
-; GISEL-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v2, v3
-; GISEL-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; GISEL-NEXT:    v_mul_lo_u32 v3, v13, v6
-; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v7, v2
+; GISEL-NEXT:    v_addc_u32_e32 v2, vcc, v2, v3, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v2, v13, v6
 ; GISEL-NEXT:    v_mul_hi_u32 v7, v8, v6
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v3, v0
-; GISEL-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v7
-; GISEL-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v3, v7
+; GISEL-NEXT:    v_addc_u32_e64 v3, s[4:5], 0, 0, vcc
+; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v2, v0
 ; GISEL-NEXT:    v_mul_hi_u32 v6, v13, v6
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
-; GISEL-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v3, v2
+; GISEL-NEXT:    v_addc_u32_e64 v0, s[4:5], v0, v7, vcc
+; GISEL-NEXT:    v_addc_u32_e64 v2, s[4:5], 0, 0, s[4:5]
+; GISEL-NEXT:    v_addc_u32_e32 v0, vcc, v0, v3, vcc
+; GISEL-NEXT:    v_addc_u32_e32 v2, vcc, 0, v2, vcc
 ; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v6, v2
 ; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v8, v0
 ; GISEL-NEXT:    v_addc_u32_e32 v2, vcc, v13, v2, vcc
@@ -1285,30 +1154,26 @@ define <2 x i64> @v_srem_v2i64_pow2k_denom(<2 x i64> %num) {
 ; GISEL-NEXT:    v_mul_lo_u32 v6, v11, v2
 ; GISEL-NEXT:    v_mul_hi_u32 v7, v11, v0
 ; GISEL-NEXT:    v_mul_hi_u32 v0, v12, v0
-; GISEL-NEXT:    v_xor_b32_e32 v8, v10, v4
+; GISEL-NEXT:    v_xor_b32_e32 v8, v9, v4
 ; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v3, v6
-; GISEL-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v3, v7
-; GISEL-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc
-; GISEL-NEXT:    v_mul_lo_u32 v7, v12, v2
-; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v6, v3
-; GISEL-NEXT:    v_mul_hi_u32 v6, v11, v2
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v7, v0
-; GISEL-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v6
-; GISEL-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v7, v6
-; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v0, v3
-; GISEL-NEXT:    v_mul_hi_u32 v7, v12, v2
-; GISEL-NEXT:    v_mad_u64_u32 v[2:3], s[4:5], v5, v10, 0
-; GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v6, v0
-; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v7, v0
+; GISEL-NEXT:    v_addc_u32_e32 v3, vcc, v3, v7, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v3, v12, v2
+; GISEL-NEXT:    v_mul_hi_u32 v7, v11, v2
+; GISEL-NEXT:    v_addc_u32_e64 v6, s[4:5], 0, 0, vcc
+; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v3, v0
+; GISEL-NEXT:    v_addc_u32_e64 v0, s[4:5], v0, v7, vcc
+; GISEL-NEXT:    v_addc_u32_e64 v3, s[4:5], 0, 0, s[4:5]
+; GISEL-NEXT:    v_addc_u32_e32 v9, vcc, v0, v6, vcc
+; GISEL-NEXT:    v_addc_u32_e32 v0, vcc, 0, v3, vcc
+; GISEL-NEXT:    v_mul_hi_u32 v6, v12, v2
+; GISEL-NEXT:    v_mad_u64_u32 v[2:3], s[4:5], v5, v9, 0
+; GISEL-NEXT:    v_xor_b32_e32 v1, v1, v4
+; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v6, v0
 ; GISEL-NEXT:    v_mov_b32_e32 v0, v3
 ; GISEL-NEXT:    v_mad_u64_u32 v[6:7], s[4:5], v5, v6, v[0:1]
-; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v1, v4
-; GISEL-NEXT:    v_subb_u32_e32 v1, vcc, v8, v4, vcc
-; GISEL-NEXT:    v_mad_u64_u32 v[3:4], s[4:5], 0, v10, v[6:7]
+; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v8, v4
+; GISEL-NEXT:    v_subb_u32_e32 v1, vcc, v1, v4, vcc
+; GISEL-NEXT:    v_mad_u64_u32 v[3:4], s[4:5], 0, v9, v[6:7]
 ; GISEL-NEXT:    v_sub_i32_e32 v2, vcc, v11, v2
 ; GISEL-NEXT:    v_subb_u32_e64 v4, s[4:5], v12, v3, vcc
 ; GISEL-NEXT:    v_sub_i32_e64 v3, s[4:5], v12, v3
@@ -1323,18 +1188,18 @@ define <2 x i64> @v_srem_v2i64_pow2k_denom(<2 x i64> %num) {
 ; GISEL-NEXT:    v_sub_i32_e32 v5, vcc, v7, v5
 ; GISEL-NEXT:    v_cndmask_b32_e64 v6, 0, -1, s[4:5]
 ; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v4
-; GISEL-NEXT:    v_subbrev_u32_e32 v10, vcc, 0, v3, vcc
+; GISEL-NEXT:    v_subbrev_u32_e32 v9, vcc, 0, v3, vcc
 ; GISEL-NEXT:    v_cndmask_b32_e64 v6, -1, v6, s[4:5]
 ; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v8
 ; GISEL-NEXT:    v_cndmask_b32_e32 v5, v7, v5, vcc
-; GISEL-NEXT:    v_cndmask_b32_e32 v3, v3, v10, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v3, v3, v9, vcc
 ; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v6
 ; GISEL-NEXT:    v_cndmask_b32_e32 v2, v2, v5, vcc
 ; GISEL-NEXT:    v_cndmask_b32_e32 v3, v4, v3, vcc
-; GISEL-NEXT:    v_xor_b32_e32 v2, v2, v9
-; GISEL-NEXT:    v_xor_b32_e32 v3, v3, v9
-; GISEL-NEXT:    v_sub_i32_e32 v2, vcc, v2, v9
-; GISEL-NEXT:    v_subb_u32_e32 v3, vcc, v3, v9, vcc
+; GISEL-NEXT:    v_xor_b32_e32 v2, v2, v10
+; GISEL-NEXT:    v_xor_b32_e32 v3, v3, v10
+; GISEL-NEXT:    v_sub_i32_e32 v2, vcc, v2, v10
+; GISEL-NEXT:    v_subb_u32_e32 v3, vcc, v3, v10, vcc
 ; GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; CGP-LABEL: v_srem_v2i64_pow2k_denom:
@@ -1347,218 +1212,188 @@ define <2 x i64> @v_srem_v2i64_pow2k_denom(<2 x i64> %num) {
 ; CGP-NEXT:    v_rcp_iflag_f32_e32 v4, v4
 ; CGP-NEXT:    v_mul_f32_e32 v4, 0x5f7ffffc, v4
 ; CGP-NEXT:    v_mul_f32_e32 v5, 0x2f800000, v4
-; CGP-NEXT:    v_trunc_f32_e32 v7, v5
-; CGP-NEXT:    v_mac_f32_e32 v4, 0xcf800000, v7
-; CGP-NEXT:    v_cvt_u32_f32_e32 v8, v4
-; CGP-NEXT:    v_cvt_u32_f32_e32 v9, v7
-; CGP-NEXT:    v_mad_u64_u32 v[4:5], s[4:5], v6, v8, 0
-; CGP-NEXT:    v_mov_b32_e32 v7, v5
-; CGP-NEXT:    v_mad_u64_u32 v[10:11], s[4:5], v6, v9, v[7:8]
-; CGP-NEXT:    v_mul_hi_u32 v12, v9, v4
-; CGP-NEXT:    v_mad_u64_u32 v[13:14], s[4:5], -1, v8, v[10:11]
-; CGP-NEXT:    v_mul_lo_u32 v10, v9, v4
-; CGP-NEXT:    v_mul_hi_u32 v11, v8, v4
-; CGP-NEXT:    v_mul_lo_u32 v4, v8, v13
-; CGP-NEXT:    v_mul_lo_u32 v7, v9, v13
-; CGP-NEXT:    v_mul_hi_u32 v14, v8, v13
-; CGP-NEXT:    v_mul_hi_u32 v13, v9, v13
-; CGP-NEXT:    v_add_i32_e32 v4, vcc, v10, v4
-; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
+; CGP-NEXT:    v_trunc_f32_e32 v5, v5
+; CGP-NEXT:    v_mac_f32_e32 v4, 0xcf800000, v5
+; CGP-NEXT:    v_cvt_u32_f32_e32 v7, v4
+; CGP-NEXT:    v_cvt_u32_f32_e32 v8, v5
+; CGP-NEXT:    v_mad_u64_u32 v[9:10], s[4:5], v6, v7, 0
+; CGP-NEXT:    v_mov_b32_e32 v4, v10
+; CGP-NEXT:    v_mad_u64_u32 v[4:5], s[4:5], v6, v8, v[4:5]
+; CGP-NEXT:    v_mul_lo_u32 v14, v8, v9
+; CGP-NEXT:    v_mul_hi_u32 v15, v7, v9
+; CGP-NEXT:    v_mad_u64_u32 v[4:5], s[4:5], -1, v7, v[4:5]
+; CGP-NEXT:    v_mul_hi_u32 v9, v8, v9
+; CGP-NEXT:    v_mul_lo_u32 v5, v7, v4
+; CGP-NEXT:    v_mul_lo_u32 v11, v8, v4
+; CGP-NEXT:    v_mul_hi_u32 v12, v7, v4
+; CGP-NEXT:    v_mul_hi_u32 v4, v8, v4
+; CGP-NEXT:    v_add_i32_e32 v5, vcc, v14, v5
+; CGP-NEXT:    v_addc_u32_e32 v5, vcc, v5, v15, vcc
+; CGP-NEXT:    v_addc_u32_e64 v5, s[4:5], 0, 0, vcc
+; CGP-NEXT:    v_add_i32_e32 v11, vcc, v11, v9
+; CGP-NEXT:    v_addc_u32_e64 v11, s[4:5], v11, v12, vcc
+; CGP-NEXT:    v_addc_u32_e64 v12, s[4:5], 0, 0, s[4:5]
+; CGP-NEXT:    v_addc_u32_e32 v5, vcc, v11, v5, vcc
+; CGP-NEXT:    v_addc_u32_e32 v11, vcc, 0, v12, vcc
 ; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v11
-; CGP-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v4, vcc, v15, v4
-; CGP-NEXT:    v_add_i32_e32 v7, vcc, v7, v12
-; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v7, vcc, v7, v14
-; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v14, vcc, v15, v14
-; CGP-NEXT:    v_add_i32_e32 v4, vcc, v7, v4
-; CGP-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v7, vcc, v14, v7
-; CGP-NEXT:    v_add_i32_e32 v7, vcc, v13, v7
-; CGP-NEXT:    v_add_i32_e32 v16, vcc, v8, v4
-; CGP-NEXT:    v_mad_u64_u32 v[13:14], s[4:5], v6, v16, 0
-; CGP-NEXT:    v_addc_u32_e32 v17, vcc, v9, v7, vcc
-; CGP-NEXT:    v_mov_b32_e32 v4, v14
-; CGP-NEXT:    v_mad_u64_u32 v[14:15], s[4:5], v6, v17, v[4:5]
-; CGP-NEXT:    v_ashrrev_i32_e32 v7, 31, v1
-; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v7
-; CGP-NEXT:    v_mad_u64_u32 v[14:15], s[4:5], -1, v16, v[14:15]
-; CGP-NEXT:    v_addc_u32_e32 v1, vcc, v1, v7, vcc
-; CGP-NEXT:    v_xor_b32_e32 v15, v0, v7
-; CGP-NEXT:    v_mul_lo_u32 v0, v17, v13
-; CGP-NEXT:    v_mul_lo_u32 v4, v16, v14
-; CGP-NEXT:    v_xor_b32_e32 v18, v1, v7
-; CGP-NEXT:    v_mul_hi_u32 v1, v16, v13
-; CGP-NEXT:    v_mul_hi_u32 v13, v17, v13
+; CGP-NEXT:    v_add_i32_e32 v5, vcc, v7, v5
+; CGP-NEXT:    v_mad_u64_u32 v[11:12], s[4:5], v6, v5, 0
+; CGP-NEXT:    v_addc_u32_e32 v16, vcc, v8, v4, vcc
+; CGP-NEXT:    v_mov_b32_e32 v4, v12
+; CGP-NEXT:    v_mad_u64_u32 v[12:13], s[4:5], v6, v16, v[4:5]
+; CGP-NEXT:    v_ashrrev_i32_e32 v4, 31, v1
 ; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v4
-; CGP-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
+; CGP-NEXT:    v_mad_u64_u32 v[12:13], s[4:5], -1, v5, v[12:13]
+; CGP-NEXT:    v_addc_u32_e32 v1, vcc, v1, v4, vcc
+; CGP-NEXT:    v_xor_b32_e32 v13, v0, v4
+; CGP-NEXT:    v_mul_lo_u32 v0, v16, v11
+; CGP-NEXT:    v_mul_lo_u32 v17, v5, v12
+; CGP-NEXT:    v_xor_b32_e32 v18, v1, v4
+; CGP-NEXT:    v_mul_hi_u32 v1, v5, v11
+; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v17
+; CGP-NEXT:    v_addc_u32_e32 v0, vcc, v0, v1, vcc
+; CGP-NEXT:    v_mul_lo_u32 v0, v16, v12
+; CGP-NEXT:    v_mul_hi_u32 v1, v16, v11
+; CGP-NEXT:    v_mul_hi_u32 v17, v5, v12
+; CGP-NEXT:    v_addc_u32_e64 v11, s[4:5], 0, 0, vcc
 ; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v1
-; CGP-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
-; CGP-NEXT:    v_mul_lo_u32 v1, v17, v14
-; CGP-NEXT:    v_add_i32_e32 v0, vcc, v4, v0
-; CGP-NEXT:    v_mul_hi_u32 v4, v16, v14
-; CGP-NEXT:    v_add_i32_e32 v1, vcc, v1, v13
-; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v1, vcc, v1, v4
-; CGP-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v4, vcc, v13, v4
-; CGP-NEXT:    v_mul_hi_u32 v13, v17, v14
-; CGP-NEXT:    v_add_i32_e32 v0, vcc, v1, v0
-; CGP-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v1, vcc, v4, v1
-; CGP-NEXT:    v_add_i32_e32 v1, vcc, v13, v1
-; CGP-NEXT:    v_add_i32_e32 v0, vcc, v16, v0
-; CGP-NEXT:    v_addc_u32_e32 v1, vcc, v17, v1, vcc
-; CGP-NEXT:    v_mul_lo_u32 v13, v18, v0
-; CGP-NEXT:    v_mul_lo_u32 v14, v15, v1
-; CGP-NEXT:    v_mul_hi_u32 v16, v15, v0
+; CGP-NEXT:    v_mul_hi_u32 v12, v16, v12
+; CGP-NEXT:    v_addc_u32_e64 v0, s[4:5], v0, v17, vcc
+; CGP-NEXT:    v_addc_u32_e64 v1, s[4:5], 0, 0, s[4:5]
+; CGP-NEXT:    v_addc_u32_e32 v0, vcc, v0, v11, vcc
+; CGP-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; CGP-NEXT:    v_add_i32_e32 v1, vcc, v12, v1
+; CGP-NEXT:    v_add_i32_e32 v0, vcc, v5, v0
+; CGP-NEXT:    v_addc_u32_e32 v1, vcc, v16, v1, vcc
+; CGP-NEXT:    v_mul_lo_u32 v11, v18, v0
+; CGP-NEXT:    v_mul_lo_u32 v12, v13, v1
+; CGP-NEXT:    v_mul_hi_u32 v16, v13, v0
 ; CGP-NEXT:    v_mul_hi_u32 v0, v18, v0
-; CGP-NEXT:    v_mov_b32_e32 v4, 0x1000
-; CGP-NEXT:    v_add_i32_e32 v13, vcc, v13, v14
-; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v13, vcc, v13, v16
-; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
-; CGP-NEXT:    v_mul_lo_u32 v16, v18, v1
-; CGP-NEXT:    v_add_i32_e32 v13, vcc, v14, v13
-; CGP-NEXT:    v_mul_hi_u32 v14, v15, v1
-; CGP-NEXT:    v_add_i32_e32 v0, vcc, v16, v0
-; CGP-NEXT:    v_cndmask_b32_e64 v16, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v14
-; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v14, vcc, v16, v14
-; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v13
-; CGP-NEXT:    v_mul_hi_u32 v16, v18, v1
-; CGP-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v4, v0, 0
-; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v13, vcc, v14, v13
-; CGP-NEXT:    v_add_i32_e32 v13, vcc, v16, v13
-; CGP-NEXT:    v_mad_u64_u32 v[13:14], s[4:5], v4, v13, v[1:2]
-; CGP-NEXT:    v_sub_i32_e32 v14, vcc, v15, v0
-; CGP-NEXT:    v_sub_i32_e64 v0, s[4:5], v18, v13
-; CGP-NEXT:    v_subb_u32_e64 v15, s[4:5], v18, v13, vcc
+; CGP-NEXT:    v_mov_b32_e32 v5, 0x1000
+; CGP-NEXT:    v_add_i32_e32 v11, vcc, v11, v12
+; CGP-NEXT:    v_addc_u32_e32 v11, vcc, v11, v16, vcc
+; CGP-NEXT:    v_mul_lo_u32 v11, v18, v1
+; CGP-NEXT:    v_mul_hi_u32 v16, v13, v1
+; CGP-NEXT:    v_addc_u32_e64 v12, s[4:5], 0, 0, vcc
+; CGP-NEXT:    v_add_i32_e32 v0, vcc, v11, v0
+; CGP-NEXT:    v_addc_u32_e64 v0, s[4:5], v0, v16, vcc
+; CGP-NEXT:    v_addc_u32_e32 v0, vcc, v0, v12, vcc
+; CGP-NEXT:    v_mul_hi_u32 v12, v18, v1
+; CGP-NEXT:    v_addc_u32_e64 v11, s[4:5], 0, 0, s[4:5]
+; CGP-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v5, v0, 0
+; CGP-NEXT:    v_addc_u32_e32 v11, vcc, 0, v11, vcc
+; CGP-NEXT:    v_add_i32_e32 v11, vcc, v12, v11
+; CGP-NEXT:    v_mad_u64_u32 v[11:12], s[4:5], v5, v11, v[1:2]
+; CGP-NEXT:    v_sub_i32_e32 v12, vcc, v13, v0
+; CGP-NEXT:    v_sub_i32_e64 v0, s[4:5], v18, v11
+; CGP-NEXT:    v_subb_u32_e64 v13, s[4:5], v18, v11, vcc
 ; CGP-NEXT:    v_subbrev_u32_e32 v0, vcc, 0, v0, vcc
-; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v14, v4
-; CGP-NEXT:    v_sub_i32_e32 v16, vcc, v14, v4
+; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v12, v5
+; CGP-NEXT:    v_sub_i32_e32 v16, vcc, v12, v5
 ; CGP-NEXT:    v_cndmask_b32_e64 v1, 0, -1, s[4:5]
-; CGP-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v15
+; CGP-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v13
 ; CGP-NEXT:    v_subbrev_u32_e32 v17, vcc, 0, v0, vcc
-; CGP-NEXT:    v_mov_b32_e32 v0, v5
-; CGP-NEXT:    v_cndmask_b32_e64 v13, -1, v1, s[4:5]
-; CGP-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v6, v9, v[0:1]
-; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v16, v4
+; CGP-NEXT:    v_mov_b32_e32 v0, v10
+; CGP-NEXT:    v_cndmask_b32_e64 v11, -1, v1, s[4:5]
+; CGP-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v6, v8, v[0:1]
+; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v16, v5
 ; CGP-NEXT:    v_cndmask_b32_e64 v18, 0, -1, vcc
-; CGP-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], -1, v8, v[0:1]
+; CGP-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], -1, v7, v[0:1]
 ; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v17
-; CGP-NEXT:    v_cndmask_b32_e32 v5, -1, v18, vcc
-; CGP-NEXT:    v_mul_lo_u32 v19, v8, v0
-; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v16, v4
+; CGP-NEXT:    v_cndmask_b32_e32 v10, -1, v18, vcc
+; CGP-NEXT:    v_mul_lo_u32 v19, v7, v0
+; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v16, v5
 ; CGP-NEXT:    v_subbrev_u32_e32 v18, vcc, 0, v17, vcc
-; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v5
-; CGP-NEXT:    v_cndmask_b32_e32 v5, v16, v1, vcc
+; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v10
+; CGP-NEXT:    v_cndmask_b32_e32 v10, v16, v1, vcc
 ; CGP-NEXT:    v_cndmask_b32_e32 v16, v17, v18, vcc
-; CGP-NEXT:    v_add_i32_e32 v1, vcc, v10, v19
-; CGP-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v1, vcc, v1, v11
-; CGP-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; CGP-NEXT:    v_mul_lo_u32 v11, v9, v0
-; CGP-NEXT:    v_add_i32_e32 v1, vcc, v10, v1
-; CGP-NEXT:    v_mul_hi_u32 v10, v8, v0
-; CGP-NEXT:    v_add_i32_e32 v11, vcc, v11, v12
-; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v10, vcc, v11, v10
-; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v11, vcc, v12, v11
-; CGP-NEXT:    v_mul_hi_u32 v0, v9, v0
-; CGP-NEXT:    v_add_i32_e32 v1, vcc, v10, v1
-; CGP-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v10, vcc, v11, v10
-; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v10
-; CGP-NEXT:    v_add_i32_e32 v8, vcc, v8, v1
-; CGP-NEXT:    v_addc_u32_e32 v9, vcc, v9, v0, vcc
-; CGP-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v6, v8, 0
-; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v13
-; CGP-NEXT:    v_cndmask_b32_e32 v5, v14, v5, vcc
-; CGP-NEXT:    v_xor_b32_e32 v11, v5, v7
-; CGP-NEXT:    v_mad_u64_u32 v[5:6], s[4:5], v6, v9, v[1:2]
-; CGP-NEXT:    v_cndmask_b32_e32 v10, v15, v16, vcc
-; CGP-NEXT:    v_xor_b32_e32 v1, v10, v7
-; CGP-NEXT:    v_mad_u64_u32 v[5:6], s[4:5], -1, v8, v[5:6]
+; CGP-NEXT:    v_add_i32_e32 v1, vcc, v14, v19
+; CGP-NEXT:    v_addc_u32_e32 v1, vcc, v1, v15, vcc
+; CGP-NEXT:    v_mul_lo_u32 v1, v8, v0
+; CGP-NEXT:    v_mul_hi_u32 v15, v7, v0
+; CGP-NEXT:    v_addc_u32_e64 v14, s[4:5], 0, 0, vcc
+; CGP-NEXT:    v_add_i32_e32 v1, vcc, v1, v9
+; CGP-NEXT:    v_mul_hi_u32 v0, v8, v0
+; CGP-NEXT:    v_addc_u32_e64 v1, s[4:5], v1, v15, vcc
+; CGP-NEXT:    v_addc_u32_e64 v9, s[4:5], 0, 0, s[4:5]
+; CGP-NEXT:    v_addc_u32_e32 v1, vcc, v1, v14, vcc
+; CGP-NEXT:    v_addc_u32_e32 v9, vcc, 0, v9, vcc
+; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v9
+; CGP-NEXT:    v_add_i32_e32 v9, vcc, v7, v1
+; CGP-NEXT:    v_addc_u32_e32 v8, vcc, v8, v0, vcc
+; CGP-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v6, v9, 0
+; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v11
+; CGP-NEXT:    v_cndmask_b32_e32 v10, v12, v10, vcc
+; CGP-NEXT:    v_mad_u64_u32 v[6:7], s[4:5], v6, v8, v[1:2]
+; CGP-NEXT:    v_xor_b32_e32 v1, v10, v4
 ; CGP-NEXT:    v_ashrrev_i32_e32 v10, 31, v3
+; CGP-NEXT:    v_mad_u64_u32 v[6:7], s[4:5], -1, v9, v[6:7]
+; CGP-NEXT:    v_cndmask_b32_e32 v11, v13, v16, vcc
 ; CGP-NEXT:    v_add_i32_e32 v2, vcc, v2, v10
 ; CGP-NEXT:    v_addc_u32_e32 v3, vcc, v3, v10, vcc
-; CGP-NEXT:    v_xor_b32_e32 v12, v2, v10
-; CGP-NEXT:    v_mul_lo_u32 v2, v9, v0
-; CGP-NEXT:    v_mul_lo_u32 v6, v8, v5
+; CGP-NEXT:    v_xor_b32_e32 v7, v2, v10
+; CGP-NEXT:    v_mul_lo_u32 v2, v8, v0
+; CGP-NEXT:    v_mul_lo_u32 v12, v9, v6
 ; CGP-NEXT:    v_xor_b32_e32 v13, v3, v10
-; CGP-NEXT:    v_mul_hi_u32 v3, v8, v0
-; CGP-NEXT:    v_mul_hi_u32 v0, v9, v0
-; CGP-NEXT:    v_add_i32_e32 v2, vcc, v2, v6
-; CGP-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v2, vcc, v2, v3
-; CGP-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; CGP-NEXT:    v_mul_lo_u32 v3, v9, v5
+; CGP-NEXT:    v_mul_hi_u32 v3, v9, v0
+; CGP-NEXT:    v_mul_hi_u32 v0, v8, v0
+; CGP-NEXT:    v_add_i32_e32 v2, vcc, v2, v12
+; CGP-NEXT:    v_addc_u32_e32 v2, vcc, v2, v3, vcc
+; CGP-NEXT:    v_mul_lo_u32 v2, v8, v6
+; CGP-NEXT:    v_mul_hi_u32 v12, v9, v6
+; CGP-NEXT:    v_addc_u32_e64 v3, s[4:5], 0, 0, vcc
+; CGP-NEXT:    v_add_i32_e32 v0, vcc, v2, v0
+; CGP-NEXT:    v_mul_hi_u32 v6, v8, v6
+; CGP-NEXT:    v_addc_u32_e64 v0, s[4:5], v0, v12, vcc
+; CGP-NEXT:    v_addc_u32_e64 v2, s[4:5], 0, 0, s[4:5]
+; CGP-NEXT:    v_addc_u32_e32 v0, vcc, v0, v3, vcc
+; CGP-NEXT:    v_addc_u32_e32 v2, vcc, 0, v2, vcc
 ; CGP-NEXT:    v_add_i32_e32 v2, vcc, v6, v2
-; CGP-NEXT:    v_mul_hi_u32 v6, v8, v5
-; CGP-NEXT:    v_add_i32_e32 v0, vcc, v3, v0
-; CGP-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v6
-; CGP-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v0, vcc, v9, v0
+; CGP-NEXT:    v_addc_u32_e32 v2, vcc, v8, v2, vcc
+; CGP-NEXT:    v_mul_lo_u32 v3, v13, v0
+; CGP-NEXT:    v_mul_lo_u32 v6, v7, v2
+; CGP-NEXT:    v_mul_hi_u32 v9, v7, v0
+; CGP-NEXT:    v_mul_hi_u32 v0, v13, v0
+; CGP-NEXT:    v_xor_b32_e32 v8, v11, v4
 ; CGP-NEXT:    v_add_i32_e32 v3, vcc, v3, v6
-; CGP-NEXT:    v_mul_hi_u32 v5, v9, v5
-; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
-; CGP-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v2, vcc, v3, v2
-; CGP-NEXT:    v_add_i32_e32 v2, vcc, v5, v2
-; CGP-NEXT:    v_add_i32_e32 v3, vcc, v8, v0
-; CGP-NEXT:    v_addc_u32_e32 v2, vcc, v9, v2, vcc
-; CGP-NEXT:    v_mul_lo_u32 v5, v13, v3
-; CGP-NEXT:    v_mul_lo_u32 v6, v12, v2
-; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v11, v7
-; CGP-NEXT:    v_subb_u32_e32 v1, vcc, v1, v7, vcc
-; CGP-NEXT:    v_mul_hi_u32 v7, v12, v3
-; CGP-NEXT:    v_add_i32_e32 v5, vcc, v5, v6
-; CGP-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v5, vcc, v5, v7
-; CGP-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
-; CGP-NEXT:    v_mul_lo_u32 v7, v13, v2
-; CGP-NEXT:    v_mul_hi_u32 v3, v13, v3
-; CGP-NEXT:    v_add_i32_e32 v5, vcc, v6, v5
-; CGP-NEXT:    v_mul_hi_u32 v6, v12, v2
-; CGP-NEXT:    v_add_i32_e32 v3, vcc, v7, v3
-; CGP-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v3, vcc, v3, v6
-; CGP-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v6, vcc, v7, v6
-; CGP-NEXT:    v_add_i32_e32 v3, vcc, v3, v5
-; CGP-NEXT:    v_mul_hi_u32 v7, v13, v2
-; CGP-NEXT:    v_mad_u64_u32 v[2:3], s[4:5], v4, v3, 0
-; CGP-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v5, vcc, v6, v5
-; CGP-NEXT:    v_add_i32_e32 v5, vcc, v7, v5
-; CGP-NEXT:    v_mad_u64_u32 v[5:6], s[4:5], v4, v5, v[3:4]
-; CGP-NEXT:    v_sub_i32_e32 v2, vcc, v12, v2
-; CGP-NEXT:    v_subb_u32_e64 v3, s[4:5], v13, v5, vcc
-; CGP-NEXT:    v_sub_i32_e64 v5, s[4:5], v13, v5
-; CGP-NEXT:    v_subbrev_u32_e32 v5, vcc, 0, v5, vcc
-; CGP-NEXT:    v_sub_i32_e32 v7, vcc, v2, v4
-; CGP-NEXT:    v_subbrev_u32_e32 v5, vcc, 0, v5, vcc
-; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v7, v4
+; CGP-NEXT:    v_addc_u32_e32 v3, vcc, v3, v9, vcc
+; CGP-NEXT:    v_mul_lo_u32 v3, v13, v2
+; CGP-NEXT:    v_mul_hi_u32 v9, v7, v2
+; CGP-NEXT:    v_addc_u32_e64 v6, s[4:5], 0, 0, vcc
+; CGP-NEXT:    v_add_i32_e32 v0, vcc, v3, v0
+; CGP-NEXT:    v_addc_u32_e64 v0, s[4:5], v0, v9, vcc
+; CGP-NEXT:    v_addc_u32_e64 v3, s[4:5], 0, 0, s[4:5]
+; CGP-NEXT:    v_addc_u32_e32 v0, vcc, v0, v6, vcc
+; CGP-NEXT:    v_mul_hi_u32 v9, v13, v2
+; CGP-NEXT:    v_addc_u32_e32 v6, vcc, 0, v3, vcc
+; CGP-NEXT:    v_mad_u64_u32 v[2:3], s[4:5], v5, v0, 0
+; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v1, v4
+; CGP-NEXT:    v_subb_u32_e32 v1, vcc, v8, v4, vcc
+; CGP-NEXT:    v_add_i32_e32 v4, vcc, v9, v6
+; CGP-NEXT:    v_mad_u64_u32 v[3:4], s[4:5], v5, v4, v[3:4]
+; CGP-NEXT:    v_sub_i32_e32 v2, vcc, v7, v2
+; CGP-NEXT:    v_subb_u32_e64 v4, s[4:5], v13, v3, vcc
+; CGP-NEXT:    v_sub_i32_e64 v3, s[4:5], v13, v3
+; CGP-NEXT:    v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
+; CGP-NEXT:    v_sub_i32_e32 v7, vcc, v2, v5
+; CGP-NEXT:    v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
+; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v7, v5
 ; CGP-NEXT:    v_cndmask_b32_e64 v8, 0, -1, vcc
-; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v5
-; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v2, v4
+; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v3
+; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v2, v5
 ; CGP-NEXT:    v_cndmask_b32_e32 v8, -1, v8, vcc
-; CGP-NEXT:    v_sub_i32_e32 v4, vcc, v7, v4
+; CGP-NEXT:    v_sub_i32_e32 v5, vcc, v7, v5
 ; CGP-NEXT:    v_cndmask_b32_e64 v6, 0, -1, s[4:5]
-; CGP-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v3
-; CGP-NEXT:    v_subbrev_u32_e32 v9, vcc, 0, v5, vcc
+; CGP-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v4
+; CGP-NEXT:    v_subbrev_u32_e32 v9, vcc, 0, v3, vcc
 ; CGP-NEXT:    v_cndmask_b32_e64 v6, -1, v6, s[4:5]
 ; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v8
-; CGP-NEXT:    v_cndmask_b32_e32 v4, v7, v4, vcc
-; CGP-NEXT:    v_cndmask_b32_e32 v5, v5, v9, vcc
+; CGP-NEXT:    v_cndmask_b32_e32 v5, v7, v5, vcc
+; CGP-NEXT:    v_cndmask_b32_e32 v3, v3, v9, vcc
 ; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v6
-; CGP-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
-; CGP-NEXT:    v_cndmask_b32_e32 v3, v3, v5, vcc
+; CGP-NEXT:    v_cndmask_b32_e32 v2, v2, v5, vcc
+; CGP-NEXT:    v_cndmask_b32_e32 v3, v4, v3, vcc
 ; CGP-NEXT:    v_xor_b32_e32 v2, v2, v10
 ; CGP-NEXT:    v_xor_b32_e32 v3, v3, v10
 ; CGP-NEXT:    v_sub_i32_e32 v2, vcc, v2, v10
@@ -1594,18 +1429,13 @@ define i64 @v_srem_i64_oddk_denom(i64 %num) {
 ; CHECK-NEXT:    v_mul_hi_u32 v11, v5, v3
 ; CHECK-NEXT:    v_mul_hi_u32 v3, v7, v3
 ; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v4, v9
-; CHECK-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v10, v2
-; CHECK-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v4, v8
-; CHECK-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v9, v4
-; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v11
-; CHECK-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v8, vcc, v10, v8
-; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v4
-; CHECK-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v8, v4
+; CHECK-NEXT:    v_addc_u32_e32 v4, vcc, v4, v8, vcc
+; CHECK-NEXT:    v_add_i32_e64 v2, s[4:5], v10, v2
+; CHECK-NEXT:    v_addc_u32_e64 v2, s[6:7], v2, v11, s[4:5]
+; CHECK-NEXT:    v_addc_u32_e64 v4, s[8:9], 0, 0, vcc
+; CHECK-NEXT:    v_addc_u32_e64 v8, s[6:7], 0, 0, s[6:7]
+; CHECK-NEXT:    v_addc_u32_e64 v2, vcc, v2, v4, s[4:5]
+; CHECK-NEXT:    v_addc_u32_e32 v4, vcc, 0, v8, vcc
 ; CHECK-NEXT:    v_add_i32_e32 v3, vcc, v3, v4
 ; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v5, v2
 ; CHECK-NEXT:    v_addc_u32_e32 v7, vcc, v7, v3, vcc
@@ -1620,23 +1450,18 @@ define i64 @v_srem_i64_oddk_denom(i64 %num) {
 ; CHECK-NEXT:    v_mul_lo_u32 v8, v5, v3
 ; CHECK-NEXT:    v_xor_b32_e32 v9, v1, v6
 ; CHECK-NEXT:    v_mul_hi_u32 v1, v5, v2
-; CHECK-NEXT:    v_mul_hi_u32 v2, v7, v2
 ; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v8
-; CHECK-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v1
-; CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
-; CHECK-NEXT:    v_mul_lo_u32 v1, v7, v3
-; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v8, v0
+; CHECK-NEXT:    v_addc_u32_e32 v0, vcc, v0, v1, vcc
+; CHECK-NEXT:    v_mul_lo_u32 v0, v7, v3
+; CHECK-NEXT:    v_mul_hi_u32 v1, v7, v2
 ; CHECK-NEXT:    v_mul_hi_u32 v8, v5, v3
-; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v1, v2
-; CHECK-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v1, v8
-; CHECK-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v8
+; CHECK-NEXT:    v_addc_u32_e64 v2, s[4:5], 0, 0, vcc
+; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v1
 ; CHECK-NEXT:    v_mul_hi_u32 v3, v7, v3
-; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v1, v0
-; CHECK-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v2, v1
+; CHECK-NEXT:    v_addc_u32_e64 v0, s[4:5], v0, v8, vcc
+; CHECK-NEXT:    v_addc_u32_e64 v1, s[4:5], 0, 0, s[4:5]
+; CHECK-NEXT:    v_addc_u32_e32 v0, vcc, v0, v2, vcc
+; CHECK-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
 ; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v3, v1
 ; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v5, v0
 ; CHECK-NEXT:    v_addc_u32_e32 v1, vcc, v7, v1, vcc
@@ -1646,23 +1471,18 @@ define i64 @v_srem_i64_oddk_denom(i64 %num) {
 ; CHECK-NEXT:    v_mul_hi_u32 v0, v9, v0
 ; CHECK-NEXT:    v_mov_b32_e32 v5, 0x12d8fb
 ; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v3
-; CHECK-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v7
-; CHECK-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; CHECK-NEXT:    v_mul_lo_u32 v7, v9, v1
-; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v3, v2
-; CHECK-NEXT:    v_mul_hi_u32 v3, v4, v1
-; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v7, v0
-; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v3
-; CHECK-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v3, vcc, v7, v3
-; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
-; CHECK-NEXT:    v_mul_hi_u32 v7, v9, v1
+; CHECK-NEXT:    v_addc_u32_e32 v2, vcc, v2, v7, vcc
+; CHECK-NEXT:    v_mul_lo_u32 v2, v9, v1
+; CHECK-NEXT:    v_mul_hi_u32 v7, v4, v1
+; CHECK-NEXT:    v_addc_u32_e64 v3, s[4:5], 0, 0, vcc
+; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v2, v0
+; CHECK-NEXT:    v_addc_u32_e64 v0, s[4:5], v0, v7, vcc
+; CHECK-NEXT:    v_addc_u32_e32 v0, vcc, v0, v3, vcc
+; CHECK-NEXT:    v_mul_hi_u32 v3, v9, v1
+; CHECK-NEXT:    v_addc_u32_e64 v2, s[4:5], 0, 0, s[4:5]
 ; CHECK-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v5, v0, 0
-; CHECK-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
+; CHECK-NEXT:    v_addc_u32_e32 v2, vcc, 0, v2, vcc
 ; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v3, v2
-; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v7, v2
 ; CHECK-NEXT:    v_mad_u64_u32 v[1:2], s[4:5], v5, v2, v[1:2]
 ; CHECK-NEXT:    v_sub_i32_e64 v0, s[4:5], v4, v0
 ; CHECK-NEXT:    v_subb_u32_e64 v2, vcc, v9, v1, s[4:5]
@@ -1705,6 +1525,8 @@ define <2 x i64> @v_srem_v2i64_oddk_denom(<2 x i64> %num) {
 ; GISEL-NEXT:    s_subb_u32 s7, 0, 0
 ; GISEL-NEXT:    v_mac_f32_e32 v4, 0x4f800000, v5
 ; GISEL-NEXT:    v_rcp_iflag_f32_e32 v4, v4
+; GISEL-NEXT:    s_sub_u32 s8, 0, 0x12d8fb
+; GISEL-NEXT:    s_subb_u32 s9, 0, 0
 ; GISEL-NEXT:    v_mul_f32_e32 v4, 0x5f7ffffc, v4
 ; GISEL-NEXT:    v_mul_f32_e32 v5, 0x2f800000, v4
 ; GISEL-NEXT:    v_trunc_f32_e32 v7, v5
@@ -1722,18 +1544,13 @@ define <2 x i64> @v_srem_v2i64_oddk_denom(<2 x i64> %num) {
 ; GISEL-NEXT:    v_mul_hi_u32 v12, v6, v8
 ; GISEL-NEXT:    v_mul_hi_u32 v8, v7, v8
 ; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v10
-; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v9
-; GISEL-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v10, v5
+; GISEL-NEXT:    v_addc_u32_e32 v5, vcc, v5, v9, vcc
+; GISEL-NEXT:    v_addc_u32_e64 v5, s[4:5], 0, 0, vcc
 ; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v11, v4
-; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v12
-; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v10
-; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v5
-; GISEL-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v9, v5
+; GISEL-NEXT:    v_addc_u32_e64 v4, s[4:5], v4, v12, vcc
+; GISEL-NEXT:    v_addc_u32_e64 v9, s[4:5], 0, 0, s[4:5]
+; GISEL-NEXT:    v_addc_u32_e32 v4, vcc, v4, v5, vcc
+; GISEL-NEXT:    v_addc_u32_e32 v5, vcc, 0, v9, vcc
 ; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v8, v5
 ; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v6, v4
 ; GISEL-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], s6, v11, 0
@@ -1749,23 +1566,18 @@ define <2 x i64> @v_srem_v2i64_oddk_denom(<2 x i64> %num) {
 ; GISEL-NEXT:    v_mul_lo_u32 v12, v11, v9
 ; GISEL-NEXT:    v_xor_b32_e32 v13, v1, v4
 ; GISEL-NEXT:    v_mul_hi_u32 v1, v11, v8
-; GISEL-NEXT:    v_mul_hi_u32 v8, v5, v8
 ; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v12
-; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v1
-; GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
-; GISEL-NEXT:    v_mul_lo_u32 v1, v5, v9
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v12, v0
+; GISEL-NEXT:    v_addc_u32_e32 v0, vcc, v0, v1, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v0, v5, v9
+; GISEL-NEXT:    v_mul_hi_u32 v1, v5, v8
 ; GISEL-NEXT:    v_mul_hi_u32 v12, v11, v9
-; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v1, v8
-; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v1, v12
-; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v12
+; GISEL-NEXT:    v_addc_u32_e64 v8, s[4:5], 0, 0, vcc
+; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v1
 ; GISEL-NEXT:    v_mul_hi_u32 v9, v5, v9
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v1, v0
-; GISEL-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v8, v1
+; GISEL-NEXT:    v_addc_u32_e64 v0, s[4:5], v0, v12, vcc
+; GISEL-NEXT:    v_addc_u32_e64 v1, s[4:5], 0, 0, s[4:5]
+; GISEL-NEXT:    v_addc_u32_e32 v0, vcc, v0, v8, vcc
+; GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
 ; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v9, v1
 ; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v11, v0
 ; GISEL-NEXT:    v_addc_u32_e32 v1, vcc, v5, v1, vcc
@@ -1775,104 +1587,86 @@ define <2 x i64> @v_srem_v2i64_oddk_denom(<2 x i64> %num) {
 ; GISEL-NEXT:    v_mul_hi_u32 v0, v13, v0
 ; GISEL-NEXT:    v_mov_b32_e32 v5, 0x12d8fb
 ; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v9
-; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v11
-; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
-; GISEL-NEXT:    v_mul_lo_u32 v11, v13, v1
-; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v9, v8
-; GISEL-NEXT:    v_mul_hi_u32 v9, v10, v1
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v11, v0
-; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v9
-; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v11, v9
-; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v0, v8
-; GISEL-NEXT:    v_mul_hi_u32 v12, v13, v1
+; GISEL-NEXT:    v_addc_u32_e32 v8, vcc, v8, v11, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v8, v13, v1
+; GISEL-NEXT:    v_mul_hi_u32 v11, v10, v1
+; GISEL-NEXT:    v_addc_u32_e64 v9, s[4:5], 0, 0, vcc
+; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v8, v0
+; GISEL-NEXT:    v_addc_u32_e64 v0, s[4:5], v0, v11, vcc
+; GISEL-NEXT:    v_addc_u32_e32 v11, vcc, v0, v9, vcc
+; GISEL-NEXT:    v_mul_hi_u32 v9, v13, v1
+; GISEL-NEXT:    v_addc_u32_e64 v8, s[4:5], 0, 0, s[4:5]
 ; GISEL-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v5, v11, 0
-; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
+; GISEL-NEXT:    v_addc_u32_e32 v8, vcc, 0, v8, vcc
 ; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v9, v8
-; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v12, v8
 ; GISEL-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], v5, v8, v[1:2]
 ; GISEL-NEXT:    v_sub_i32_e32 v10, vcc, v10, v0
 ; GISEL-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], 0, v11, v[8:9]
-; GISEL-NEXT:    s_sub_u32 s6, 0, 0x12d8fb
-; GISEL-NEXT:    s_subb_u32 s7, 0, 0
 ; GISEL-NEXT:    v_subb_u32_e64 v11, s[4:5], v13, v8, vcc
 ; GISEL-NEXT:    v_sub_i32_e64 v0, s[4:5], v13, v8
 ; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v10, v5
-; GISEL-NEXT:    v_subbrev_u32_e32 v0, vcc, 0, v0, vcc
 ; GISEL-NEXT:    v_cndmask_b32_e64 v1, 0, -1, s[4:5]
 ; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v11
-; GISEL-NEXT:    v_sub_i32_e32 v13, vcc, v10, v5
 ; GISEL-NEXT:    v_cndmask_b32_e64 v12, -1, v1, s[4:5]
-; GISEL-NEXT:    v_subbrev_u32_e32 v14, vcc, 0, v0, vcc
-; GISEL-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], s6, v6, 0
+; GISEL-NEXT:    v_subbrev_u32_e32 v8, vcc, 0, v0, vcc
+; GISEL-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], s8, v6, 0
+; GISEL-NEXT:    v_sub_i32_e32 v13, vcc, v10, v5
+; GISEL-NEXT:    v_subbrev_u32_e32 v14, vcc, 0, v8, vcc
+; GISEL-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], s8, v7, v[1:2]
 ; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v13, v5
-; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, -1, vcc
+; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, -1, vcc
+; GISEL-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], s9, v6, v[8:9]
 ; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v14
-; GISEL-NEXT:    v_cndmask_b32_e32 v15, -1, v8, vcc
-; GISEL-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], s6, v7, v[1:2]
-; GISEL-NEXT:    v_sub_i32_e32 v1, vcc, v13, v5
-; GISEL-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], s7, v6, v[8:9]
-; GISEL-NEXT:    v_subbrev_u32_e32 v16, vcc, 0, v14, vcc
-; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v15
-; GISEL-NEXT:    v_cndmask_b32_e32 v9, v13, v1, vcc
-; GISEL-NEXT:    v_mul_lo_u32 v1, v7, v0
-; GISEL-NEXT:    v_mul_lo_u32 v13, v6, v8
-; GISEL-NEXT:    v_mul_hi_u32 v15, v6, v0
-; GISEL-NEXT:    v_cndmask_b32_e32 v14, v14, v16, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v1, -1, v15, vcc
+; GISEL-NEXT:    v_sub_i32_e32 v9, vcc, v13, v5
+; GISEL-NEXT:    v_subbrev_u32_e32 v15, vcc, 0, v14, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v16, v7, v0
+; GISEL-NEXT:    v_mul_lo_u32 v17, v6, v8
+; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v1
+; GISEL-NEXT:    v_mul_hi_u32 v1, v6, v0
+; GISEL-NEXT:    v_cndmask_b32_e32 v9, v13, v9, vcc
+; GISEL-NEXT:    v_add_i32_e64 v13, s[4:5], v16, v17
+; GISEL-NEXT:    v_addc_u32_e64 v1, s[4:5], v13, v1, s[4:5]
+; GISEL-NEXT:    v_mul_lo_u32 v1, v7, v8
 ; GISEL-NEXT:    v_mul_hi_u32 v0, v7, v0
-; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v1, v13
-; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v1, v15
-; GISEL-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; GISEL-NEXT:    v_mul_lo_u32 v15, v7, v8
-; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v13, v1
-; GISEL-NEXT:    v_mul_hi_u32 v13, v6, v8
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v15, v0
-; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v13
-; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v15, v13
+; GISEL-NEXT:    v_mul_hi_u32 v16, v6, v8
+; GISEL-NEXT:    v_addc_u32_e64 v13, s[4:5], 0, 0, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v0, s[4:5], v1, v0
 ; GISEL-NEXT:    v_mul_hi_u32 v8, v7, v8
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v1
-; GISEL-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v13, v1
-; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v8, v1
-; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v6, v0
-; GISEL-NEXT:    v_addc_u32_e32 v13, vcc, v7, v1, vcc
-; GISEL-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], s6, v8, 0
+; GISEL-NEXT:    v_addc_u32_e64 v0, s[6:7], v0, v16, s[4:5]
+; GISEL-NEXT:    v_addc_u32_e64 v1, s[6:7], 0, 0, s[6:7]
+; GISEL-NEXT:    v_addc_u32_e64 v0, s[4:5], v0, v13, s[4:5]
+; GISEL-NEXT:    v_addc_u32_e64 v1, s[4:5], 0, v1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v1, s[4:5], v8, v1
+; GISEL-NEXT:    v_add_i32_e64 v8, s[4:5], v6, v0
+; GISEL-NEXT:    v_addc_u32_e64 v13, s[4:5], v7, v1, s[4:5]
+; GISEL-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], s8, v8, 0
+; GISEL-NEXT:    v_cndmask_b32_e32 v14, v14, v15, vcc
 ; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v12
+; GISEL-NEXT:    v_mad_u64_u32 v[6:7], s[4:5], s8, v13, v[1:2]
 ; GISEL-NEXT:    v_cndmask_b32_e32 v9, v10, v9, vcc
-; GISEL-NEXT:    v_mad_u64_u32 v[6:7], s[4:5], s6, v13, v[1:2]
-; GISEL-NEXT:    v_xor_b32_e32 v1, v9, v4
-; GISEL-NEXT:    v_ashrrev_i32_e32 v9, 31, v3
-; GISEL-NEXT:    v_mad_u64_u32 v[6:7], s[4:5], s7, v8, v[6:7]
-; GISEL-NEXT:    v_cndmask_b32_e32 v10, v11, v14, vcc
-; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v2, v9
-; GISEL-NEXT:    v_addc_u32_e32 v3, vcc, v3, v9, vcc
-; GISEL-NEXT:    v_xor_b32_e32 v11, v2, v9
+; GISEL-NEXT:    v_ashrrev_i32_e32 v10, 31, v3
+; GISEL-NEXT:    v_mad_u64_u32 v[6:7], s[4:5], s9, v8, v[6:7]
+; GISEL-NEXT:    v_cndmask_b32_e32 v1, v11, v14, vcc
+; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v2, v10
+; GISEL-NEXT:    v_addc_u32_e32 v3, vcc, v3, v10, vcc
+; GISEL-NEXT:    v_xor_b32_e32 v11, v2, v10
 ; GISEL-NEXT:    v_mul_lo_u32 v2, v13, v0
 ; GISEL-NEXT:    v_mul_lo_u32 v7, v8, v6
-; GISEL-NEXT:    v_xor_b32_e32 v12, v3, v9
+; GISEL-NEXT:    v_xor_b32_e32 v12, v3, v10
 ; GISEL-NEXT:    v_mul_hi_u32 v3, v8, v0
 ; GISEL-NEXT:    v_mul_hi_u32 v0, v13, v0
 ; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v2, v7
-; GISEL-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v2, v3
-; GISEL-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; GISEL-NEXT:    v_mul_lo_u32 v3, v13, v6
-; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v7, v2
+; GISEL-NEXT:    v_addc_u32_e32 v2, vcc, v2, v3, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v2, v13, v6
 ; GISEL-NEXT:    v_mul_hi_u32 v7, v8, v6
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v3, v0
-; GISEL-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v7
-; GISEL-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v3, v7
+; GISEL-NEXT:    v_addc_u32_e64 v3, s[4:5], 0, 0, vcc
+; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v2, v0
 ; GISEL-NEXT:    v_mul_hi_u32 v6, v13, v6
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
-; GISEL-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v3, v2
+; GISEL-NEXT:    v_addc_u32_e64 v0, s[4:5], v0, v7, vcc
+; GISEL-NEXT:    v_addc_u32_e64 v2, s[4:5], 0, 0, s[4:5]
+; GISEL-NEXT:    v_addc_u32_e32 v0, vcc, v0, v3, vcc
+; GISEL-NEXT:    v_addc_u32_e32 v2, vcc, 0, v2, vcc
 ; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v6, v2
 ; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v8, v0
 ; GISEL-NEXT:    v_addc_u32_e32 v2, vcc, v13, v2, vcc
@@ -1880,30 +1674,26 @@ define <2 x i64> @v_srem_v2i64_oddk_denom(<2 x i64> %num) {
 ; GISEL-NEXT:    v_mul_lo_u32 v6, v11, v2
 ; GISEL-NEXT:    v_mul_hi_u32 v7, v11, v0
 ; GISEL-NEXT:    v_mul_hi_u32 v0, v12, v0
-; GISEL-NEXT:    v_xor_b32_e32 v8, v10, v4
+; GISEL-NEXT:    v_xor_b32_e32 v8, v9, v4
 ; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v3, v6
-; GISEL-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v3, v7
-; GISEL-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc
-; GISEL-NEXT:    v_mul_lo_u32 v7, v12, v2
-; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v6, v3
-; GISEL-NEXT:    v_mul_hi_u32 v6, v11, v2
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v7, v0
-; GISEL-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v6
-; GISEL-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v7, v6
-; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v0, v3
-; GISEL-NEXT:    v_mul_hi_u32 v7, v12, v2
-; GISEL-NEXT:    v_mad_u64_u32 v[2:3], s[4:5], v5, v10, 0
-; GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v6, v0
-; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v7, v0
+; GISEL-NEXT:    v_addc_u32_e32 v3, vcc, v3, v7, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v3, v12, v2
+; GISEL-NEXT:    v_mul_hi_u32 v7, v11, v2
+; GISEL-NEXT:    v_addc_u32_e64 v6, s[4:5], 0, 0, vcc
+; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v3, v0
+; GISEL-NEXT:    v_addc_u32_e64 v0, s[4:5], v0, v7, vcc
+; GISEL-NEXT:    v_addc_u32_e64 v3, s[4:5], 0, 0, s[4:5]
+; GISEL-NEXT:    v_addc_u32_e32 v9, vcc, v0, v6, vcc
+; GISEL-NEXT:    v_addc_u32_e32 v0, vcc, 0, v3, vcc
+; GISEL-NEXT:    v_mul_hi_u32 v6, v12, v2
+; GISEL-NEXT:    v_mad_u64_u32 v[2:3], s[4:5], v5, v9, 0
+; GISEL-NEXT:    v_xor_b32_e32 v1, v1, v4
+; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v6, v0
 ; GISEL-NEXT:    v_mov_b32_e32 v0, v3
 ; GISEL-NEXT:    v_mad_u64_u32 v[6:7], s[4:5], v5, v6, v[0:1]
-; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v1, v4
-; GISEL-NEXT:    v_subb_u32_e32 v1, vcc, v8, v4, vcc
-; GISEL-NEXT:    v_mad_u64_u32 v[3:4], s[4:5], 0, v10, v[6:7]
+; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v8, v4
+; GISEL-NEXT:    v_subb_u32_e32 v1, vcc, v1, v4, vcc
+; GISEL-NEXT:    v_mad_u64_u32 v[3:4], s[4:5], 0, v9, v[6:7]
 ; GISEL-NEXT:    v_sub_i32_e32 v2, vcc, v11, v2
 ; GISEL-NEXT:    v_subb_u32_e64 v4, s[4:5], v12, v3, vcc
 ; GISEL-NEXT:    v_sub_i32_e64 v3, s[4:5], v12, v3
@@ -1918,18 +1708,18 @@ define <2 x i64> @v_srem_v2i64_oddk_denom(<2 x i64> %num) {
 ; GISEL-NEXT:    v_sub_i32_e32 v5, vcc, v7, v5
 ; GISEL-NEXT:    v_cndmask_b32_e64 v6, 0, -1, s[4:5]
 ; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v4
-; GISEL-NEXT:    v_subbrev_u32_e32 v10, vcc, 0, v3, vcc
+; GISEL-NEXT:    v_subbrev_u32_e32 v9, vcc, 0, v3, vcc
 ; GISEL-NEXT:    v_cndmask_b32_e64 v6, -1, v6, s[4:5]
 ; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v8
 ; GISEL-NEXT:    v_cndmask_b32_e32 v5, v7, v5, vcc
-; GISEL-NEXT:    v_cndmask_b32_e32 v3, v3, v10, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v3, v3, v9, vcc
 ; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v6
 ; GISEL-NEXT:    v_cndmask_b32_e32 v2, v2, v5, vcc
 ; GISEL-NEXT:    v_cndmask_b32_e32 v3, v4, v3, vcc
-; GISEL-NEXT:    v_xor_b32_e32 v2, v2, v9
-; GISEL-NEXT:    v_xor_b32_e32 v3, v3, v9
-; GISEL-NEXT:    v_sub_i32_e32 v2, vcc, v2, v9
-; GISEL-NEXT:    v_subb_u32_e32 v3, vcc, v3, v9, vcc
+; GISEL-NEXT:    v_xor_b32_e32 v2, v2, v10
+; GISEL-NEXT:    v_xor_b32_e32 v3, v3, v10
+; GISEL-NEXT:    v_sub_i32_e32 v2, vcc, v2, v10
+; GISEL-NEXT:    v_subb_u32_e32 v3, vcc, v3, v10, vcc
 ; GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; CGP-LABEL: v_srem_v2i64_oddk_denom:
@@ -1942,218 +1732,188 @@ define <2 x i64> @v_srem_v2i64_oddk_denom(<2 x i64> %num) {
 ; CGP-NEXT:    v_rcp_iflag_f32_e32 v4, v4
 ; CGP-NEXT:    v_mul_f32_e32 v4, 0x5f7ffffc, v4
 ; CGP-NEXT:    v_mul_f32_e32 v5, 0x2f800000, v4
-; CGP-NEXT:    v_trunc_f32_e32 v7, v5
-; CGP-NEXT:    v_mac_f32_e32 v4, 0xcf800000, v7
-; CGP-NEXT:    v_cvt_u32_f32_e32 v8, v4
-; CGP-NEXT:    v_cvt_u32_f32_e32 v9, v7
-; CGP-NEXT:    v_mad_u64_u32 v[4:5], s[4:5], v6, v8, 0
-; CGP-NEXT:    v_mov_b32_e32 v7, v5
-; CGP-NEXT:    v_mad_u64_u32 v[10:11], s[4:5], v6, v9, v[7:8]
-; CGP-NEXT:    v_mul_hi_u32 v12, v9, v4
-; CGP-NEXT:    v_mad_u64_u32 v[13:14], s[4:5], -1, v8, v[10:11]
-; CGP-NEXT:    v_mul_lo_u32 v10, v9, v4
-; CGP-NEXT:    v_mul_hi_u32 v11, v8, v4
-; CGP-NEXT:    v_mul_lo_u32 v4, v8, v13
-; CGP-NEXT:    v_mul_lo_u32 v7, v9, v13
-; CGP-NEXT:    v_mul_hi_u32 v14, v8, v13
-; CGP-NEXT:    v_mul_hi_u32 v13, v9, v13
-; CGP-NEXT:    v_add_i32_e32 v4, vcc, v10, v4
-; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
+; CGP-NEXT:    v_trunc_f32_e32 v5, v5
+; CGP-NEXT:    v_mac_f32_e32 v4, 0xcf800000, v5
+; CGP-NEXT:    v_cvt_u32_f32_e32 v7, v4
+; CGP-NEXT:    v_cvt_u32_f32_e32 v8, v5
+; CGP-NEXT:    v_mad_u64_u32 v[9:10], s[4:5], v6, v7, 0
+; CGP-NEXT:    v_mov_b32_e32 v4, v10
+; CGP-NEXT:    v_mad_u64_u32 v[4:5], s[4:5], v6, v8, v[4:5]
+; CGP-NEXT:    v_mul_lo_u32 v14, v8, v9
+; CGP-NEXT:    v_mul_hi_u32 v15, v7, v9
+; CGP-NEXT:    v_mad_u64_u32 v[4:5], s[4:5], -1, v7, v[4:5]
+; CGP-NEXT:    v_mul_hi_u32 v9, v8, v9
+; CGP-NEXT:    v_mul_lo_u32 v5, v7, v4
+; CGP-NEXT:    v_mul_lo_u32 v11, v8, v4
+; CGP-NEXT:    v_mul_hi_u32 v12, v7, v4
+; CGP-NEXT:    v_mul_hi_u32 v4, v8, v4
+; CGP-NEXT:    v_add_i32_e32 v5, vcc, v14, v5
+; CGP-NEXT:    v_addc_u32_e32 v5, vcc, v5, v15, vcc
+; CGP-NEXT:    v_addc_u32_e64 v5, s[4:5], 0, 0, vcc
+; CGP-NEXT:    v_add_i32_e32 v11, vcc, v11, v9
+; CGP-NEXT:    v_addc_u32_e64 v11, s[4:5], v11, v12, vcc
+; CGP-NEXT:    v_addc_u32_e64 v12, s[4:5], 0, 0, s[4:5]
+; CGP-NEXT:    v_addc_u32_e32 v5, vcc, v11, v5, vcc
+; CGP-NEXT:    v_addc_u32_e32 v11, vcc, 0, v12, vcc
 ; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v11
-; CGP-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v4, vcc, v15, v4
-; CGP-NEXT:    v_add_i32_e32 v7, vcc, v7, v12
-; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v7, vcc, v7, v14
-; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v14, vcc, v15, v14
-; CGP-NEXT:    v_add_i32_e32 v4, vcc, v7, v4
-; CGP-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v7, vcc, v14, v7
-; CGP-NEXT:    v_add_i32_e32 v7, vcc, v13, v7
-; CGP-NEXT:    v_add_i32_e32 v16, vcc, v8, v4
-; CGP-NEXT:    v_mad_u64_u32 v[13:14], s[4:5], v6, v16, 0
-; CGP-NEXT:    v_addc_u32_e32 v17, vcc, v9, v7, vcc
-; CGP-NEXT:    v_mov_b32_e32 v4, v14
-; CGP-NEXT:    v_mad_u64_u32 v[14:15], s[4:5], v6, v17, v[4:5]
-; CGP-NEXT:    v_ashrrev_i32_e32 v7, 31, v1
-; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v7
-; CGP-NEXT:    v_mad_u64_u32 v[14:15], s[4:5], -1, v16, v[14:15]
-; CGP-NEXT:    v_addc_u32_e32 v1, vcc, v1, v7, vcc
-; CGP-NEXT:    v_xor_b32_e32 v15, v0, v7
-; CGP-NEXT:    v_mul_lo_u32 v0, v17, v13
-; CGP-NEXT:    v_mul_lo_u32 v4, v16, v14
-; CGP-NEXT:    v_xor_b32_e32 v18, v1, v7
-; CGP-NEXT:    v_mul_hi_u32 v1, v16, v13
-; CGP-NEXT:    v_mul_hi_u32 v13, v17, v13
+; CGP-NEXT:    v_add_i32_e32 v5, vcc, v7, v5
+; CGP-NEXT:    v_mad_u64_u32 v[11:12], s[4:5], v6, v5, 0
+; CGP-NEXT:    v_addc_u32_e32 v16, vcc, v8, v4, vcc
+; CGP-NEXT:    v_mov_b32_e32 v4, v12
+; CGP-NEXT:    v_mad_u64_u32 v[12:13], s[4:5], v6, v16, v[4:5]
+; CGP-NEXT:    v_ashrrev_i32_e32 v4, 31, v1
 ; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v4
-; CGP-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
+; CGP-NEXT:    v_mad_u64_u32 v[12:13], s[4:5], -1, v5, v[12:13]
+; CGP-NEXT:    v_addc_u32_e32 v1, vcc, v1, v4, vcc
+; CGP-NEXT:    v_xor_b32_e32 v13, v0, v4
+; CGP-NEXT:    v_mul_lo_u32 v0, v16, v11
+; CGP-NEXT:    v_mul_lo_u32 v17, v5, v12
+; CGP-NEXT:    v_xor_b32_e32 v18, v1, v4
+; CGP-NEXT:    v_mul_hi_u32 v1, v5, v11
+; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v17
+; CGP-NEXT:    v_addc_u32_e32 v0, vcc, v0, v1, vcc
+; CGP-NEXT:    v_mul_lo_u32 v0, v16, v12
+; CGP-NEXT:    v_mul_hi_u32 v1, v16, v11
+; CGP-NEXT:    v_mul_hi_u32 v17, v5, v12
+; CGP-NEXT:    v_addc_u32_e64 v11, s[4:5], 0, 0, vcc
 ; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v1
-; CGP-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
-; CGP-NEXT:    v_mul_lo_u32 v1, v17, v14
-; CGP-NEXT:    v_add_i32_e32 v0, vcc, v4, v0
-; CGP-NEXT:    v_mul_hi_u32 v4, v16, v14
-; CGP-NEXT:    v_add_i32_e32 v1, vcc, v1, v13
-; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v1, vcc, v1, v4
-; CGP-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v4, vcc, v13, v4
-; CGP-NEXT:    v_mul_hi_u32 v13, v17, v14
-; CGP-NEXT:    v_add_i32_e32 v0, vcc, v1, v0
-; CGP-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v1, vcc, v4, v1
-; CGP-NEXT:    v_add_i32_e32 v1, vcc, v13, v1
-; CGP-NEXT:    v_add_i32_e32 v0, vcc, v16, v0
-; CGP-NEXT:    v_addc_u32_e32 v1, vcc, v17, v1, vcc
-; CGP-NEXT:    v_mul_lo_u32 v13, v18, v0
-; CGP-NEXT:    v_mul_lo_u32 v14, v15, v1
-; CGP-NEXT:    v_mul_hi_u32 v16, v15, v0
+; CGP-NEXT:    v_mul_hi_u32 v12, v16, v12
+; CGP-NEXT:    v_addc_u32_e64 v0, s[4:5], v0, v17, vcc
+; CGP-NEXT:    v_addc_u32_e64 v1, s[4:5], 0, 0, s[4:5]
+; CGP-NEXT:    v_addc_u32_e32 v0, vcc, v0, v11, vcc
+; CGP-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; CGP-NEXT:    v_add_i32_e32 v1, vcc, v12, v1
+; CGP-NEXT:    v_add_i32_e32 v0, vcc, v5, v0
+; CGP-NEXT:    v_addc_u32_e32 v1, vcc, v16, v1, vcc
+; CGP-NEXT:    v_mul_lo_u32 v11, v18, v0
+; CGP-NEXT:    v_mul_lo_u32 v12, v13, v1
+; CGP-NEXT:    v_mul_hi_u32 v16, v13, v0
 ; CGP-NEXT:    v_mul_hi_u32 v0, v18, v0
-; CGP-NEXT:    v_mov_b32_e32 v4, 0x12d8fb
-; CGP-NEXT:    v_add_i32_e32 v13, vcc, v13, v14
-; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v13, vcc, v13, v16
-; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
-; CGP-NEXT:    v_mul_lo_u32 v16, v18, v1
-; CGP-NEXT:    v_add_i32_e32 v13, vcc, v14, v13
-; CGP-NEXT:    v_mul_hi_u32 v14, v15, v1
-; CGP-NEXT:    v_add_i32_e32 v0, vcc, v16, v0
-; CGP-NEXT:    v_cndmask_b32_e64 v16, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v14
-; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v14, vcc, v16, v14
-; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v13
-; CGP-NEXT:    v_mul_hi_u32 v16, v18, v1
-; CGP-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v4, v0, 0
-; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v13, vcc, v14, v13
-; CGP-NEXT:    v_add_i32_e32 v13, vcc, v16, v13
-; CGP-NEXT:    v_mad_u64_u32 v[13:14], s[4:5], v4, v13, v[1:2]
-; CGP-NEXT:    v_sub_i32_e32 v14, vcc, v15, v0
-; CGP-NEXT:    v_sub_i32_e64 v0, s[4:5], v18, v13
-; CGP-NEXT:    v_subb_u32_e64 v15, s[4:5], v18, v13, vcc
+; CGP-NEXT:    v_mov_b32_e32 v5, 0x12d8fb
+; CGP-NEXT:    v_add_i32_e32 v11, vcc, v11, v12
+; CGP-NEXT:    v_addc_u32_e32 v11, vcc, v11, v16, vcc
+; CGP-NEXT:    v_mul_lo_u32 v11, v18, v1
+; CGP-NEXT:    v_mul_hi_u32 v16, v13, v1
+; CGP-NEXT:    v_addc_u32_e64 v12, s[4:5], 0, 0, vcc
+; CGP-NEXT:    v_add_i32_e32 v0, vcc, v11, v0
+; CGP-NEXT:    v_addc_u32_e64 v0, s[4:5], v0, v16, vcc
+; CGP-NEXT:    v_addc_u32_e32 v0, vcc, v0, v12, vcc
+; CGP-NEXT:    v_mul_hi_u32 v12, v18, v1
+; CGP-NEXT:    v_addc_u32_e64 v11, s[4:5], 0, 0, s[4:5]
+; CGP-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v5, v0, 0
+; CGP-NEXT:    v_addc_u32_e32 v11, vcc, 0, v11, vcc
+; CGP-NEXT:    v_add_i32_e32 v11, vcc, v12, v11
+; CGP-NEXT:    v_mad_u64_u32 v[11:12], s[4:5], v5, v11, v[1:2]
+; CGP-NEXT:    v_sub_i32_e32 v12, vcc, v13, v0
+; CGP-NEXT:    v_sub_i32_e64 v0, s[4:5], v18, v11
+; CGP-NEXT:    v_subb_u32_e64 v13, s[4:5], v18, v11, vcc
 ; CGP-NEXT:    v_subbrev_u32_e32 v0, vcc, 0, v0, vcc
-; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v14, v4
-; CGP-NEXT:    v_sub_i32_e32 v16, vcc, v14, v4
+; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v12, v5
+; CGP-NEXT:    v_sub_i32_e32 v16, vcc, v12, v5
 ; CGP-NEXT:    v_cndmask_b32_e64 v1, 0, -1, s[4:5]
-; CGP-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v15
+; CGP-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v13
 ; CGP-NEXT:    v_subbrev_u32_e32 v17, vcc, 0, v0, vcc
-; CGP-NEXT:    v_mov_b32_e32 v0, v5
-; CGP-NEXT:    v_cndmask_b32_e64 v13, -1, v1, s[4:5]
-; CGP-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v6, v9, v[0:1]
-; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v16, v4
+; CGP-NEXT:    v_mov_b32_e32 v0, v10
+; CGP-NEXT:    v_cndmask_b32_e64 v11, -1, v1, s[4:5]
+; CGP-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v6, v8, v[0:1]
+; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v16, v5
 ; CGP-NEXT:    v_cndmask_b32_e64 v18, 0, -1, vcc
-; CGP-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], -1, v8, v[0:1]
+; CGP-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], -1, v7, v[0:1]
 ; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v17
-; CGP-NEXT:    v_cndmask_b32_e32 v5, -1, v18, vcc
-; CGP-NEXT:    v_mul_lo_u32 v19, v8, v0
-; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v16, v4
+; CGP-NEXT:    v_cndmask_b32_e32 v10, -1, v18, vcc
+; CGP-NEXT:    v_mul_lo_u32 v19, v7, v0
+; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v16, v5
 ; CGP-NEXT:    v_subbrev_u32_e32 v18, vcc, 0, v17, vcc
-; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v5
-; CGP-NEXT:    v_cndmask_b32_e32 v5, v16, v1, vcc
+; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v10
+; CGP-NEXT:    v_cndmask_b32_e32 v10, v16, v1, vcc
 ; CGP-NEXT:    v_cndmask_b32_e32 v16, v17, v18, vcc
-; CGP-NEXT:    v_add_i32_e32 v1, vcc, v10, v19
-; CGP-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v1, vcc, v1, v11
-; CGP-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; CGP-NEXT:    v_mul_lo_u32 v11, v9, v0
-; CGP-NEXT:    v_add_i32_e32 v1, vcc, v10, v1
-; CGP-NEXT:    v_mul_hi_u32 v10, v8, v0
-; CGP-NEXT:    v_add_i32_e32 v11, vcc, v11, v12
-; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v10, vcc, v11, v10
-; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v11, vcc, v12, v11
-; CGP-NEXT:    v_mul_hi_u32 v0, v9, v0
-; CGP-NEXT:    v_add_i32_e32 v1, vcc, v10, v1
-; CGP-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v10, vcc, v11, v10
-; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v10
-; CGP-NEXT:    v_add_i32_e32 v8, vcc, v8, v1
-; CGP-NEXT:    v_addc_u32_e32 v9, vcc, v9, v0, vcc
-; CGP-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v6, v8, 0
-; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v13
-; CGP-NEXT:    v_cndmask_b32_e32 v5, v14, v5, vcc
-; CGP-NEXT:    v_xor_b32_e32 v11, v5, v7
-; CGP-NEXT:    v_mad_u64_u32 v[5:6], s[4:5], v6, v9, v[1:2]
-; CGP-NEXT:    v_cndmask_b32_e32 v10, v15, v16, vcc
-; CGP-NEXT:    v_xor_b32_e32 v1, v10, v7
-; CGP-NEXT:    v_mad_u64_u32 v[5:6], s[4:5], -1, v8, v[5:6]
+; CGP-NEXT:    v_add_i32_e32 v1, vcc, v14, v19
+; CGP-NEXT:    v_addc_u32_e32 v1, vcc, v1, v15, vcc
+; CGP-NEXT:    v_mul_lo_u32 v1, v8, v0
+; CGP-NEXT:    v_mul_hi_u32 v15, v7, v0
+; CGP-NEXT:    v_addc_u32_e64 v14, s[4:5], 0, 0, vcc
+; CGP-NEXT:    v_add_i32_e32 v1, vcc, v1, v9
+; CGP-NEXT:    v_mul_hi_u32 v0, v8, v0
+; CGP-NEXT:    v_addc_u32_e64 v1, s[4:5], v1, v15, vcc
+; CGP-NEXT:    v_addc_u32_e64 v9, s[4:5], 0, 0, s[4:5]
+; CGP-NEXT:    v_addc_u32_e32 v1, vcc, v1, v14, vcc
+; CGP-NEXT:    v_addc_u32_e32 v9, vcc, 0, v9, vcc
+; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v9
+; CGP-NEXT:    v_add_i32_e32 v9, vcc, v7, v1
+; CGP-NEXT:    v_addc_u32_e32 v8, vcc, v8, v0, vcc
+; CGP-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v6, v9, 0
+; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v11
+; CGP-NEXT:    v_cndmask_b32_e32 v10, v12, v10, vcc
+; CGP-NEXT:    v_mad_u64_u32 v[6:7], s[4:5], v6, v8, v[1:2]
+; CGP-NEXT:    v_xor_b32_e32 v1, v10, v4
 ; CGP-NEXT:    v_ashrrev_i32_e32 v10, 31, v3
+; CGP-NEXT:    v_mad_u64_u32 v[6:7], s[4:5], -1, v9, v[6:7]
+; CGP-NEXT:    v_cndmask_b32_e32 v11, v13, v16, vcc
 ; CGP-NEXT:    v_add_i32_e32 v2, vcc, v2, v10
 ; CGP-NEXT:    v_addc_u32_e32 v3, vcc, v3, v10, vcc
-; CGP-NEXT:    v_xor_b32_e32 v12, v2, v10
-; CGP-NEXT:    v_mul_lo_u32 v2, v9, v0
-; CGP-NEXT:    v_mul_lo_u32 v6, v8, v5
+; CGP-NEXT:    v_xor_b32_e32 v7, v2, v10
+; CGP-NEXT:    v_mul_lo_u32 v2, v8, v0
+; CGP-NEXT:    v_mul_lo_u32 v12, v9, v6
 ; CGP-NEXT:    v_xor_b32_e32 v13, v3, v10
-; CGP-NEXT:    v_mul_hi_u32 v3, v8, v0
-; CGP-NEXT:    v_mul_hi_u32 v0, v9, v0
-; CGP-NEXT:    v_add_i32_e32 v2, vcc, v2, v6
-; CGP-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v2, vcc, v2, v3
-; CGP-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; CGP-NEXT:    v_mul_lo_u32 v3, v9, v5
+; CGP-NEXT:    v_mul_hi_u32 v3, v9, v0
+; CGP-NEXT:    v_mul_hi_u32 v0, v8, v0
+; CGP-NEXT:    v_add_i32_e32 v2, vcc, v2, v12
+; CGP-NEXT:    v_addc_u32_e32 v2, vcc, v2, v3, vcc
+; CGP-NEXT:    v_mul_lo_u32 v2, v8, v6
+; CGP-NEXT:    v_mul_hi_u32 v12, v9, v6
+; CGP-NEXT:    v_addc_u32_e64 v3, s[4:5], 0, 0, vcc
+; CGP-NEXT:    v_add_i32_e32 v0, vcc, v2, v0
+; CGP-NEXT:    v_mul_hi_u32 v6, v8, v6
+; CGP-NEXT:    v_addc_u32_e64 v0, s[4:5], v0, v12, vcc
+; CGP-NEXT:    v_addc_u32_e64 v2, s[4:5], 0, 0, s[4:5]
+; CGP-NEXT:    v_addc_u32_e32 v0, vcc, v0, v3, vcc
+; CGP-NEXT:    v_addc_u32_e32 v2, vcc, 0, v2, vcc
 ; CGP-NEXT:    v_add_i32_e32 v2, vcc, v6, v2
-; CGP-NEXT:    v_mul_hi_u32 v6, v8, v5
-; CGP-NEXT:    v_add_i32_e32 v0, vcc, v3, v0
-; CGP-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v6
-; CGP-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v0, vcc, v9, v0
+; CGP-NEXT:    v_addc_u32_e32 v2, vcc, v8, v2, vcc
+; CGP-NEXT:    v_mul_lo_u32 v3, v13, v0
+; CGP-NEXT:    v_mul_lo_u32 v6, v7, v2
+; CGP-NEXT:    v_mul_hi_u32 v9, v7, v0
+; CGP-NEXT:    v_mul_hi_u32 v0, v13, v0
+; CGP-NEXT:    v_xor_b32_e32 v8, v11, v4
 ; CGP-NEXT:    v_add_i32_e32 v3, vcc, v3, v6
-; CGP-NEXT:    v_mul_hi_u32 v5, v9, v5
-; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
-; CGP-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v2, vcc, v3, v2
-; CGP-NEXT:    v_add_i32_e32 v2, vcc, v5, v2
-; CGP-NEXT:    v_add_i32_e32 v3, vcc, v8, v0
-; CGP-NEXT:    v_addc_u32_e32 v2, vcc, v9, v2, vcc
-; CGP-NEXT:    v_mul_lo_u32 v5, v13, v3
-; CGP-NEXT:    v_mul_lo_u32 v6, v12, v2
-; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v11, v7
-; CGP-NEXT:    v_subb_u32_e32 v1, vcc, v1, v7, vcc
-; CGP-NEXT:    v_mul_hi_u32 v7, v12, v3
-; CGP-NEXT:    v_add_i32_e32 v5, vcc, v5, v6
-; CGP-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v5, vcc, v5, v7
-; CGP-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
-; CGP-NEXT:    v_mul_lo_u32 v7, v13, v2
-; CGP-NEXT:    v_mul_hi_u32 v3, v13, v3
-; CGP-NEXT:    v_add_i32_e32 v5, vcc, v6, v5
-; CGP-NEXT:    v_mul_hi_u32 v6, v12, v2
-; CGP-NEXT:    v_add_i32_e32 v3, vcc, v7, v3
-; CGP-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v3, vcc, v3, v6
-; CGP-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v6, vcc, v7, v6
-; CGP-NEXT:    v_add_i32_e32 v3, vcc, v3, v5
-; CGP-NEXT:    v_mul_hi_u32 v7, v13, v2
-; CGP-NEXT:    v_mad_u64_u32 v[2:3], s[4:5], v4, v3, 0
-; CGP-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v5, vcc, v6, v5
-; CGP-NEXT:    v_add_i32_e32 v5, vcc, v7, v5
-; CGP-NEXT:    v_mad_u64_u32 v[5:6], s[4:5], v4, v5, v[3:4]
-; CGP-NEXT:    v_sub_i32_e32 v2, vcc, v12, v2
-; CGP-NEXT:    v_subb_u32_e64 v3, s[4:5], v13, v5, vcc
-; CGP-NEXT:    v_sub_i32_e64 v5, s[4:5], v13, v5
-; CGP-NEXT:    v_subbrev_u32_e32 v5, vcc, 0, v5, vcc
-; CGP-NEXT:    v_sub_i32_e32 v7, vcc, v2, v4
-; CGP-NEXT:    v_subbrev_u32_e32 v5, vcc, 0, v5, vcc
-; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v7, v4
+; CGP-NEXT:    v_addc_u32_e32 v3, vcc, v3, v9, vcc
+; CGP-NEXT:    v_mul_lo_u32 v3, v13, v2
+; CGP-NEXT:    v_mul_hi_u32 v9, v7, v2
+; CGP-NEXT:    v_addc_u32_e64 v6, s[4:5], 0, 0, vcc
+; CGP-NEXT:    v_add_i32_e32 v0, vcc, v3, v0
+; CGP-NEXT:    v_addc_u32_e64 v0, s[4:5], v0, v9, vcc
+; CGP-NEXT:    v_addc_u32_e64 v3, s[4:5], 0, 0, s[4:5]
+; CGP-NEXT:    v_addc_u32_e32 v0, vcc, v0, v6, vcc
+; CGP-NEXT:    v_mul_hi_u32 v9, v13, v2
+; CGP-NEXT:    v_addc_u32_e32 v6, vcc, 0, v3, vcc
+; CGP-NEXT:    v_mad_u64_u32 v[2:3], s[4:5], v5, v0, 0
+; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v1, v4
+; CGP-NEXT:    v_subb_u32_e32 v1, vcc, v8, v4, vcc
+; CGP-NEXT:    v_add_i32_e32 v4, vcc, v9, v6
+; CGP-NEXT:    v_mad_u64_u32 v[3:4], s[4:5], v5, v4, v[3:4]
+; CGP-NEXT:    v_sub_i32_e32 v2, vcc, v7, v2
+; CGP-NEXT:    v_subb_u32_e64 v4, s[4:5], v13, v3, vcc
+; CGP-NEXT:    v_sub_i32_e64 v3, s[4:5], v13, v3
+; CGP-NEXT:    v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
+; CGP-NEXT:    v_sub_i32_e32 v7, vcc, v2, v5
+; CGP-NEXT:    v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
+; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v7, v5
 ; CGP-NEXT:    v_cndmask_b32_e64 v8, 0, -1, vcc
-; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v5
-; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v2, v4
+; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v3
+; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v2, v5
 ; CGP-NEXT:    v_cndmask_b32_e32 v8, -1, v8, vcc
-; CGP-NEXT:    v_sub_i32_e32 v4, vcc, v7, v4
+; CGP-NEXT:    v_sub_i32_e32 v5, vcc, v7, v5
 ; CGP-NEXT:    v_cndmask_b32_e64 v6, 0, -1, s[4:5]
-; CGP-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v3
-; CGP-NEXT:    v_subbrev_u32_e32 v9, vcc, 0, v5, vcc
+; CGP-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v4
+; CGP-NEXT:    v_subbrev_u32_e32 v9, vcc, 0, v3, vcc
 ; CGP-NEXT:    v_cndmask_b32_e64 v6, -1, v6, s[4:5]
 ; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v8
-; CGP-NEXT:    v_cndmask_b32_e32 v4, v7, v4, vcc
-; CGP-NEXT:    v_cndmask_b32_e32 v5, v5, v9, vcc
+; CGP-NEXT:    v_cndmask_b32_e32 v5, v7, v5, vcc
+; CGP-NEXT:    v_cndmask_b32_e32 v3, v3, v9, vcc
 ; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v6
-; CGP-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
-; CGP-NEXT:    v_cndmask_b32_e32 v3, v3, v5, vcc
+; CGP-NEXT:    v_cndmask_b32_e32 v2, v2, v5, vcc
+; CGP-NEXT:    v_cndmask_b32_e32 v3, v4, v3, vcc
 ; CGP-NEXT:    v_xor_b32_e32 v2, v2, v10
 ; CGP-NEXT:    v_xor_b32_e32 v3, v3, v10
 ; CGP-NEXT:    v_sub_i32_e32 v2, vcc, v2, v10
@@ -2186,137 +1946,120 @@ define i64 @v_srem_i64_pow2_shl_denom(i64 %x, i64 %y) {
 ; CHECK-NEXT:    s_or_b64 exec, exec, s[4:5]
 ; CHECK-NEXT:    s_setpc_b64 s[30:31]
 ; CHECK-NEXT:  .LBB7_3:
-; CHECK-NEXT:    v_ashrrev_i32_e32 v1, 31, v6
-; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v5, v1
-; CHECK-NEXT:    v_addc_u32_e32 v2, vcc, v6, v1, vcc
-; CHECK-NEXT:    v_xor_b32_e32 v0, v0, v1
-; CHECK-NEXT:    v_xor_b32_e32 v1, v2, v1
-; CHECK-NEXT:    v_cvt_f32_u32_e32 v2, v0
-; CHECK-NEXT:    v_cvt_f32_u32_e32 v5, v1
-; CHECK-NEXT:    v_sub_i32_e32 v9, vcc, 0, v0
-; CHECK-NEXT:    v_subb_u32_e32 v10, vcc, 0, v1, vcc
-; CHECK-NEXT:    v_mac_f32_e32 v2, 0x4f800000, v5
-; CHECK-NEXT:    v_rcp_iflag_f32_e32 v2, v2
-; CHECK-NEXT:    v_mul_f32_e32 v2, 0x5f7ffffc, v2
-; CHECK-NEXT:    v_mul_f32_e32 v5, 0x2f800000, v2
-; CHECK-NEXT:    v_trunc_f32_e32 v7, v5
-; CHECK-NEXT:    v_mac_f32_e32 v2, 0xcf800000, v7
-; CHECK-NEXT:    v_cvt_u32_f32_e32 v8, v2
-; CHECK-NEXT:    v_cvt_u32_f32_e32 v11, v7
-; CHECK-NEXT:    v_mad_u64_u32 v[5:6], s[4:5], v9, v8, 0
-; CHECK-NEXT:    v_mov_b32_e32 v2, v6
-; CHECK-NEXT:    v_mad_u64_u32 v[6:7], s[4:5], v9, v11, v[2:3]
-; CHECK-NEXT:    v_mul_lo_u32 v2, v11, v5
-; CHECK-NEXT:    v_mad_u64_u32 v[6:7], s[4:5], v10, v8, v[6:7]
-; CHECK-NEXT:    v_mul_hi_u32 v7, v8, v5
-; CHECK-NEXT:    v_mul_hi_u32 v5, v11, v5
-; CHECK-NEXT:    v_mul_lo_u32 v12, v8, v6
-; CHECK-NEXT:    v_mul_lo_u32 v13, v11, v6
+; CHECK-NEXT:    v_ashrrev_i32_e32 v0, 31, v6
+; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v5, v0
+; CHECK-NEXT:    v_addc_u32_e32 v2, vcc, v6, v0, vcc
+; CHECK-NEXT:    v_xor_b32_e32 v5, v1, v0
+; CHECK-NEXT:    v_xor_b32_e32 v6, v2, v0
+; CHECK-NEXT:    v_cvt_f32_u32_e32 v0, v5
+; CHECK-NEXT:    v_cvt_f32_u32_e32 v1, v6
+; CHECK-NEXT:    v_sub_i32_e32 v8, vcc, 0, v5
+; CHECK-NEXT:    v_subb_u32_e32 v9, vcc, 0, v6, vcc
+; CHECK-NEXT:    v_mac_f32_e32 v0, 0x4f800000, v1
+; CHECK-NEXT:    v_rcp_iflag_f32_e32 v0, v0
+; CHECK-NEXT:    v_mul_f32_e32 v0, 0x5f7ffffc, v0
+; CHECK-NEXT:    v_mul_f32_e32 v1, 0x2f800000, v0
+; CHECK-NEXT:    v_trunc_f32_e32 v2, v1
+; CHECK-NEXT:    v_mac_f32_e32 v0, 0xcf800000, v2
+; CHECK-NEXT:    v_cvt_u32_f32_e32 v7, v0
+; CHECK-NEXT:    v_cvt_u32_f32_e32 v10, v2
+; CHECK-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v8, v7, 0
+; CHECK-NEXT:    v_mad_u64_u32 v[1:2], s[4:5], v8, v10, v[1:2]
+; CHECK-NEXT:    v_mul_hi_u32 v11, v7, v0
+; CHECK-NEXT:    v_mad_u64_u32 v[1:2], s[4:5], v9, v7, v[1:2]
+; CHECK-NEXT:    v_mul_lo_u32 v2, v10, v0
+; CHECK-NEXT:    v_mul_hi_u32 v0, v10, v0
+; CHECK-NEXT:    v_mul_lo_u32 v12, v7, v1
+; CHECK-NEXT:    v_mul_lo_u32 v13, v10, v1
 ; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v12
-; CHECK-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
+; CHECK-NEXT:    v_addc_u32_e32 v2, vcc, v2, v11, vcc
+; CHECK-NEXT:    v_mul_hi_u32 v11, v7, v1
+; CHECK-NEXT:    v_addc_u32_e64 v2, s[4:5], 0, 0, vcc
+; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v13, v0
+; CHECK-NEXT:    v_mul_hi_u32 v1, v10, v1
+; CHECK-NEXT:    v_addc_u32_e64 v0, s[4:5], v0, v11, vcc
+; CHECK-NEXT:    v_addc_u32_e64 v11, s[4:5], 0, 0, s[4:5]
+; CHECK-NEXT:    v_addc_u32_e32 v0, vcc, v0, v2, vcc
+; CHECK-NEXT:    v_addc_u32_e32 v2, vcc, 0, v11, vcc
+; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v1, v2
+; CHECK-NEXT:    v_add_i32_e32 v7, vcc, v7, v0
+; CHECK-NEXT:    v_addc_u32_e32 v10, vcc, v10, v1, vcc
+; CHECK-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v8, v7, 0
+; CHECK-NEXT:    v_mad_u64_u32 v[1:2], s[4:5], v8, v10, v[1:2]
+; CHECK-NEXT:    v_ashrrev_i32_e32 v8, 31, v4
+; CHECK-NEXT:    v_mul_hi_u32 v11, v7, v0
+; CHECK-NEXT:    v_mad_u64_u32 v[1:2], s[4:5], v9, v7, v[1:2]
+; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v3, v8
+; CHECK-NEXT:    v_addc_u32_e32 v3, vcc, v4, v8, vcc
+; CHECK-NEXT:    v_xor_b32_e32 v4, v2, v8
+; CHECK-NEXT:    v_mul_lo_u32 v2, v10, v0
+; CHECK-NEXT:    v_mul_lo_u32 v9, v7, v1
+; CHECK-NEXT:    v_mul_hi_u32 v0, v10, v0
+; CHECK-NEXT:    v_xor_b32_e32 v3, v3, v8
+; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v9
+; CHECK-NEXT:    v_addc_u32_e32 v2, vcc, v2, v11, vcc
+; CHECK-NEXT:    v_mul_lo_u32 v2, v10, v1
+; CHECK-NEXT:    v_mul_hi_u32 v11, v7, v1
+; CHECK-NEXT:    v_addc_u32_e64 v9, s[4:5], 0, 0, vcc
+; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v2, v0
+; CHECK-NEXT:    v_mul_hi_u32 v1, v10, v1
+; CHECK-NEXT:    v_addc_u32_e64 v0, s[4:5], v0, v11, vcc
+; CHECK-NEXT:    v_addc_u32_e64 v2, s[4:5], 0, 0, s[4:5]
+; CHECK-NEXT:    v_addc_u32_e32 v0, vcc, v0, v9, vcc
+; CHECK-NEXT:    v_addc_u32_e32 v2, vcc, 0, v2, vcc
+; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v1, v2
+; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v7, v0
+; CHECK-NEXT:    v_addc_u32_e32 v1, vcc, v10, v1, vcc
+; CHECK-NEXT:    v_mul_lo_u32 v2, v3, v0
+; CHECK-NEXT:    v_mul_lo_u32 v7, v4, v1
+; CHECK-NEXT:    v_mul_hi_u32 v9, v4, v0
+; CHECK-NEXT:    v_mul_hi_u32 v0, v3, v0
 ; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v7
-; CHECK-NEXT:    v_mul_hi_u32 v7, v8, v6
-; CHECK-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v12, v2
-; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v13, v5
-; CHECK-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v5, v7
-; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v7, vcc, v12, v7
-; CHECK-NEXT:    v_mul_hi_u32 v6, v11, v6
-; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v5, v2
-; CHECK-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v7, v5
-; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v6, v5
-; CHECK-NEXT:    v_add_i32_e32 v8, vcc, v8, v2
-; CHECK-NEXT:    v_addc_u32_e32 v11, vcc, v11, v5, vcc
-; CHECK-NEXT:    v_mad_u64_u32 v[5:6], s[4:5], v9, v8, 0
-; CHECK-NEXT:    v_mov_b32_e32 v2, v6
-; CHECK-NEXT:    v_mad_u64_u32 v[6:7], s[4:5], v9, v11, v[2:3]
-; CHECK-NEXT:    v_ashrrev_i32_e32 v9, 31, v4
-; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v3, v9
-; CHECK-NEXT:    v_mad_u64_u32 v[6:7], s[4:5], v10, v8, v[6:7]
-; CHECK-NEXT:    v_addc_u32_e32 v3, vcc, v4, v9, vcc
-; CHECK-NEXT:    v_xor_b32_e32 v7, v2, v9
-; CHECK-NEXT:    v_mul_lo_u32 v2, v11, v5
-; CHECK-NEXT:    v_mul_lo_u32 v4, v8, v6
-; CHECK-NEXT:    v_xor_b32_e32 v10, v3, v9
-; CHECK-NEXT:    v_mul_hi_u32 v3, v8, v5
-; CHECK-NEXT:    v_mul_hi_u32 v5, v11, v5
-; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v4
-; CHECK-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v3
-; CHECK-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; CHECK-NEXT:    v_mul_lo_u32 v3, v11, v6
-; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v4, v2
-; CHECK-NEXT:    v_mul_hi_u32 v4, v8, v6
-; CHECK-NEXT:    v_add_i32_e32 v3, vcc, v3, v5
-; CHECK-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v3, vcc, v3, v4
-; CHECK-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v5, v4
-; CHECK-NEXT:    v_mul_hi_u32 v5, v11, v6
-; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v3, v2
-; CHECK-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v3, vcc, v4, v3
-; CHECK-NEXT:    v_add_i32_e32 v3, vcc, v5, v3
-; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v8, v2
-; CHECK-NEXT:    v_addc_u32_e32 v3, vcc, v11, v3, vcc
-; CHECK-NEXT:    v_mul_lo_u32 v4, v10, v2
-; CHECK-NEXT:    v_mul_lo_u32 v5, v7, v3
-; CHECK-NEXT:    v_mul_hi_u32 v6, v7, v2
-; CHECK-NEXT:    v_mul_hi_u32 v2, v10, v2
-; CHECK-NEXT:    v_mul_hi_u32 v8, v10, v3
-; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v4, v5
-; CHECK-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v4, v6
-; CHECK-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
-; CHECK-NEXT:    v_mul_lo_u32 v6, v10, v3
-; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v5, v4
-; CHECK-NEXT:    v_mul_hi_u32 v5, v7, v3
-; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v6, v2
-; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v5
-; CHECK-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v6, v5
-; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v2, v4
-; CHECK-NEXT:    v_mad_u64_u32 v[2:3], s[4:5], v0, v6, 0
-; CHECK-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v5, v4
-; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v8, v4
-; CHECK-NEXT:    v_mad_u64_u32 v[3:4], s[4:5], v0, v4, v[3:4]
-; CHECK-NEXT:    v_sub_i32_e32 v2, vcc, v7, v2
-; CHECK-NEXT:    v_mad_u64_u32 v[3:4], s[4:5], v1, v6, v[3:4]
-; CHECK-NEXT:    v_subb_u32_e64 v4, s[4:5], v10, v3, vcc
-; CHECK-NEXT:    v_sub_i32_e64 v3, s[4:5], v10, v3
-; CHECK-NEXT:    v_cmp_ge_u32_e64 s[4:5], v4, v1
-; CHECK-NEXT:    v_cndmask_b32_e64 v5, 0, -1, s[4:5]
-; CHECK-NEXT:    v_cmp_ge_u32_e64 s[4:5], v2, v0
-; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, -1, s[4:5]
-; CHECK-NEXT:    v_cmp_eq_u32_e64 s[4:5], v4, v1
-; CHECK-NEXT:    v_subb_u32_e32 v3, vcc, v3, v1, vcc
-; CHECK-NEXT:    v_cndmask_b32_e64 v5, v5, v6, s[4:5]
-; CHECK-NEXT:    v_sub_i32_e32 v6, vcc, v2, v0
-; CHECK-NEXT:    v_subbrev_u32_e64 v7, s[4:5], 0, v3, vcc
-; CHECK-NEXT:    v_cmp_ge_u32_e64 s[4:5], v7, v1
-; CHECK-NEXT:    v_cndmask_b32_e64 v8, 0, -1, s[4:5]
-; CHECK-NEXT:    v_cmp_ge_u32_e64 s[4:5], v6, v0
+; CHECK-NEXT:    v_addc_u32_e32 v2, vcc, v2, v9, vcc
+; CHECK-NEXT:    v_mul_lo_u32 v2, v3, v1
+; CHECK-NEXT:    v_mul_hi_u32 v9, v4, v1
+; CHECK-NEXT:    v_addc_u32_e64 v7, s[4:5], 0, 0, vcc
+; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v2, v0
+; CHECK-NEXT:    v_addc_u32_e64 v0, s[4:5], v0, v9, vcc
+; CHECK-NEXT:    v_addc_u32_e32 v7, vcc, v0, v7, vcc
+; CHECK-NEXT:    v_mul_hi_u32 v9, v3, v1
+; CHECK-NEXT:    v_addc_u32_e64 v2, s[4:5], 0, 0, s[4:5]
+; CHECK-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v5, v7, 0
+; CHECK-NEXT:    v_addc_u32_e32 v2, vcc, 0, v2, vcc
+; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v9, v2
+; CHECK-NEXT:    v_mad_u64_u32 v[1:2], s[4:5], v5, v2, v[1:2]
+; CHECK-NEXT:    v_sub_i32_e32 v0, vcc, v4, v0
+; CHECK-NEXT:    v_mad_u64_u32 v[1:2], s[4:5], v6, v7, v[1:2]
+; CHECK-NEXT:    v_subb_u32_e64 v2, s[4:5], v3, v1, vcc
+; CHECK-NEXT:    v_sub_i32_e64 v1, s[4:5], v3, v1
+; CHECK-NEXT:    v_cmp_ge_u32_e64 s[4:5], v2, v6
+; CHECK-NEXT:    v_cndmask_b32_e64 v3, 0, -1, s[4:5]
+; CHECK-NEXT:    v_cmp_ge_u32_e64 s[4:5], v0, v5
+; CHECK-NEXT:    v_cndmask_b32_e64 v4, 0, -1, s[4:5]
+; CHECK-NEXT:    v_cmp_eq_u32_e64 s[4:5], v2, v6
+; CHECK-NEXT:    v_subb_u32_e32 v1, vcc, v1, v6, vcc
+; CHECK-NEXT:    v_cndmask_b32_e64 v3, v3, v4, s[4:5]
+; CHECK-NEXT:    v_sub_i32_e32 v4, vcc, v0, v5
+; CHECK-NEXT:    v_subbrev_u32_e64 v7, s[4:5], 0, v1, vcc
+; CHECK-NEXT:    v_cmp_ge_u32_e64 s[4:5], v7, v6
+; CHECK-NEXT:    v_cndmask_b32_e64 v9, 0, -1, s[4:5]
+; CHECK-NEXT:    v_cmp_ge_u32_e64 s[4:5], v4, v5
+; CHECK-NEXT:    v_subb_u32_e32 v1, vcc, v1, v6, vcc
 ; CHECK-NEXT:    v_cndmask_b32_e64 v10, 0, -1, s[4:5]
-; CHECK-NEXT:    v_cmp_eq_u32_e64 s[4:5], v7, v1
-; CHECK-NEXT:    v_subb_u32_e32 v1, vcc, v3, v1, vcc
-; CHECK-NEXT:    v_sub_i32_e32 v0, vcc, v6, v0
-; CHECK-NEXT:    v_cndmask_b32_e64 v8, v8, v10, s[4:5]
+; CHECK-NEXT:    v_cmp_eq_u32_e64 s[4:5], v7, v6
+; CHECK-NEXT:    v_sub_i32_e32 v5, vcc, v4, v5
+; CHECK-NEXT:    v_cndmask_b32_e64 v9, v9, v10, s[4:5]
 ; CHECK-NEXT:    v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
-; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v8
-; CHECK-NEXT:    v_cndmask_b32_e32 v0, v6, v0, vcc
+; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v9
+; CHECK-NEXT:    v_cndmask_b32_e32 v4, v4, v5, vcc
 ; CHECK-NEXT:    v_cndmask_b32_e32 v1, v7, v1, vcc
-; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v5
-; CHECK-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
-; CHECK-NEXT:    v_cndmask_b32_e32 v1, v4, v1, vcc
-; CHECK-NEXT:    v_xor_b32_e32 v0, v0, v9
-; CHECK-NEXT:    v_xor_b32_e32 v1, v1, v9
-; CHECK-NEXT:    v_sub_i32_e32 v0, vcc, v0, v9
-; CHECK-NEXT:    v_subb_u32_e32 v1, vcc, v1, v9, vcc
+; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v3
+; CHECK-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
+; CHECK-NEXT:    v_cndmask_b32_e32 v1, v2, v1, vcc
+; CHECK-NEXT:    v_xor_b32_e32 v0, v0, v8
+; CHECK-NEXT:    v_xor_b32_e32 v1, v1, v8
+; CHECK-NEXT:    v_sub_i32_e32 v0, vcc, v0, v8
+; CHECK-NEXT:    v_subb_u32_e32 v1, vcc, v1, v8, vcc
 ; CHECK-NEXT:    ; implicit-def: $vgpr5_vgpr6
 ; CHECK-NEXT:    ; implicit-def: $vgpr3
 ; CHECK-NEXT:    s_andn2_saveexec_b64 s[4:5], s[6:7]
@@ -2381,21 +2124,16 @@ define <2 x i64> @v_srem_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) {
 ; GISEL-NEXT:    v_mul_hi_u32 v10, v16, v10
 ; GISEL-NEXT:    v_mul_lo_u32 v12, v13, v11
 ; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v12
-; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v17
-; GISEL-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
-; GISEL-NEXT:    v_mul_lo_u32 v17, v16, v11
-; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v12, v4
-; GISEL-NEXT:    v_mul_hi_u32 v12, v13, v11
-; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v17, v10
-; GISEL-NEXT:    v_cndmask_b32_e64 v17, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v10, v12
-; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v17, v12
+; GISEL-NEXT:    v_addc_u32_e32 v4, vcc, v4, v17, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v4, v16, v11
+; GISEL-NEXT:    v_mul_hi_u32 v17, v13, v11
+; GISEL-NEXT:    v_addc_u32_e64 v12, s[4:5], 0, 0, vcc
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v10
 ; GISEL-NEXT:    v_mul_hi_u32 v11, v16, v11
-; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v10, v4
-; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v12, v10
+; GISEL-NEXT:    v_addc_u32_e64 v4, s[4:5], v4, v17, vcc
+; GISEL-NEXT:    v_addc_u32_e64 v10, s[4:5], 0, 0, s[4:5]
+; GISEL-NEXT:    v_addc_u32_e32 v4, vcc, v4, v12, vcc
+; GISEL-NEXT:    v_addc_u32_e32 v10, vcc, 0, v10, vcc
 ; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v11, v10
 ; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v13, v4
 ; GISEL-NEXT:    v_addc_u32_e32 v16, vcc, v16, v10, vcc
@@ -2411,23 +2149,18 @@ define <2 x i64> @v_srem_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) {
 ; GISEL-NEXT:    v_mul_lo_u32 v14, v13, v11
 ; GISEL-NEXT:    v_xor_b32_e32 v15, v1, v4
 ; GISEL-NEXT:    v_mul_hi_u32 v1, v13, v10
-; GISEL-NEXT:    v_mul_hi_u32 v10, v16, v10
 ; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v14
-; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v1
-; GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
-; GISEL-NEXT:    v_mul_lo_u32 v1, v16, v11
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v14, v0
+; GISEL-NEXT:    v_addc_u32_e32 v0, vcc, v0, v1, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v0, v16, v11
+; GISEL-NEXT:    v_mul_hi_u32 v1, v16, v10
 ; GISEL-NEXT:    v_mul_hi_u32 v14, v13, v11
-; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v1, v10
-; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v1, v14
-; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v10, v14
+; GISEL-NEXT:    v_addc_u32_e64 v10, s[4:5], 0, 0, vcc
+; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v1
 ; GISEL-NEXT:    v_mul_hi_u32 v11, v16, v11
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v1, v0
-; GISEL-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v10, v1
+; GISEL-NEXT:    v_addc_u32_e64 v0, s[4:5], v0, v14, vcc
+; GISEL-NEXT:    v_addc_u32_e64 v1, s[4:5], 0, 0, s[4:5]
+; GISEL-NEXT:    v_addc_u32_e32 v0, vcc, v0, v10, vcc
+; GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
 ; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v11, v1
 ; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v13, v0
 ; GISEL-NEXT:    v_addc_u32_e32 v1, vcc, v16, v1, vcc
@@ -2435,24 +2168,19 @@ define <2 x i64> @v_srem_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) {
 ; GISEL-NEXT:    v_mul_lo_u32 v11, v12, v1
 ; GISEL-NEXT:    v_mul_hi_u32 v13, v12, v0
 ; GISEL-NEXT:    v_mul_hi_u32 v0, v15, v0
-; GISEL-NEXT:    v_mul_hi_u32 v14, v15, v1
 ; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v10, v11
-; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v10, v13
-; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
-; GISEL-NEXT:    v_mul_lo_u32 v13, v15, v1
-; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v11, v10
-; GISEL-NEXT:    v_mul_hi_u32 v11, v12, v1
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v13, v0
-; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v11
-; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v13, v11
-; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v0, v10
+; GISEL-NEXT:    v_addc_u32_e32 v10, vcc, v10, v13, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v10, v15, v1
+; GISEL-NEXT:    v_mul_hi_u32 v13, v12, v1
+; GISEL-NEXT:    v_addc_u32_e64 v11, s[4:5], 0, 0, vcc
+; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v10, v0
+; GISEL-NEXT:    v_addc_u32_e64 v0, s[4:5], v0, v13, vcc
+; GISEL-NEXT:    v_addc_u32_e32 v13, vcc, v0, v11, vcc
+; GISEL-NEXT:    v_mul_hi_u32 v11, v15, v1
+; GISEL-NEXT:    v_addc_u32_e64 v10, s[4:5], 0, 0, s[4:5]
 ; GISEL-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v5, v13, 0
-; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
+; GISEL-NEXT:    v_addc_u32_e32 v10, vcc, 0, v10, vcc
 ; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v11, v10
-; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v14, v10
 ; GISEL-NEXT:    v_mad_u64_u32 v[10:11], s[4:5], v5, v10, v[1:2]
 ; GISEL-NEXT:    v_mad_u64_u32 v[10:11], s[4:5], v7, v13, v[10:11]
 ; GISEL-NEXT:    v_sub_i32_e32 v11, vcc, v12, v0
@@ -2463,7 +2191,7 @@ define <2 x i64> @v_srem_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) {
 ; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v11, v5
 ; GISEL-NEXT:    v_cndmask_b32_e64 v6, 0, -1, s[4:5]
 ; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v12, v7
-; GISEL-NEXT:    v_subb_u32_e32 v10, vcc, v0, v7, vcc
+; GISEL-NEXT:    v_subb_u32_e32 v14, vcc, v0, v7, vcc
 ; GISEL-NEXT:    v_ashrrev_i32_e32 v0, 31, v9
 ; GISEL-NEXT:    v_cndmask_b32_e64 v13, v1, v6, s[4:5]
 ; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v8, v0
@@ -2472,64 +2200,58 @@ define <2 x i64> @v_srem_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) {
 ; GISEL-NEXT:    v_xor_b32_e32 v8, v8, v0
 ; GISEL-NEXT:    v_cvt_f32_u32_e32 v0, v6
 ; GISEL-NEXT:    v_cvt_f32_u32_e32 v1, v8
-; GISEL-NEXT:    v_sub_i32_e32 v14, vcc, v11, v5
-; GISEL-NEXT:    v_subbrev_u32_e64 v15, s[4:5], 0, v10, vcc
+; GISEL-NEXT:    v_sub_i32_e32 v15, vcc, v11, v5
+; GISEL-NEXT:    v_subbrev_u32_e64 v16, s[4:5], 0, v14, vcc
 ; GISEL-NEXT:    v_mac_f32_e32 v0, 0x4f800000, v1
 ; GISEL-NEXT:    v_rcp_iflag_f32_e32 v0, v0
-; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v15, v7
+; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v16, v7
 ; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, -1, s[4:5]
-; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v14, v5
-; GISEL-NEXT:    v_cndmask_b32_e64 v1, 0, -1, s[4:5]
-; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v15, v7
+; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v15, v5
 ; GISEL-NEXT:    v_mul_f32_e32 v0, 0x5f7ffffc, v0
-; GISEL-NEXT:    v_cndmask_b32_e64 v16, v9, v1, s[4:5]
 ; GISEL-NEXT:    v_mul_f32_e32 v1, 0x2f800000, v0
-; GISEL-NEXT:    v_trunc_f32_e32 v9, v1
-; GISEL-NEXT:    v_mac_f32_e32 v0, 0xcf800000, v9
-; GISEL-NEXT:    v_cvt_u32_f32_e32 v17, v0
-; GISEL-NEXT:    v_sub_i32_e64 v18, s[4:5], 0, v6
-; GISEL-NEXT:    v_subb_u32_e64 v19, s[4:5], 0, v8, s[4:5]
-; GISEL-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v18, v17, 0
-; GISEL-NEXT:    v_cvt_u32_f32_e32 v20, v9
-; GISEL-NEXT:    v_subb_u32_e32 v7, vcc, v10, v7, vcc
-; GISEL-NEXT:    v_mad_u64_u32 v[9:10], s[4:5], v18, v20, v[1:2]
-; GISEL-NEXT:    v_sub_i32_e32 v1, vcc, v14, v5
-; GISEL-NEXT:    v_mad_u64_u32 v[9:10], s[4:5], v19, v17, v[9:10]
-; GISEL-NEXT:    v_subbrev_u32_e32 v5, vcc, 0, v7, vcc
-; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v16
-; GISEL-NEXT:    v_cndmask_b32_e32 v7, v14, v1, vcc
-; GISEL-NEXT:    v_mul_lo_u32 v1, v20, v0
-; GISEL-NEXT:    v_mul_lo_u32 v10, v17, v9
-; GISEL-NEXT:    v_mul_hi_u32 v14, v17, v0
+; GISEL-NEXT:    v_trunc_f32_e32 v17, v1
+; GISEL-NEXT:    v_mac_f32_e32 v0, 0xcf800000, v17
+; GISEL-NEXT:    v_cvt_u32_f32_e32 v18, v0
+; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, -1, s[4:5]
+; GISEL-NEXT:    v_sub_i32_e64 v19, s[4:5], 0, v6
+; GISEL-NEXT:    v_subb_u32_e64 v20, s[4:5], 0, v8, s[4:5]
+; GISEL-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v19, v18, 0
+; GISEL-NEXT:    v_cvt_u32_f32_e32 v17, v17
+; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v16, v7
+; GISEL-NEXT:    v_cndmask_b32_e64 v21, v9, v10, s[4:5]
+; GISEL-NEXT:    v_mad_u64_u32 v[9:10], s[4:5], v19, v17, v[1:2]
+; GISEL-NEXT:    v_subb_u32_e32 v1, vcc, v14, v7, vcc
+; GISEL-NEXT:    v_mad_u64_u32 v[9:10], s[4:5], v20, v18, v[9:10]
+; GISEL-NEXT:    v_sub_i32_e32 v5, vcc, v15, v5
+; GISEL-NEXT:    v_subbrev_u32_e32 v7, vcc, 0, v1, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v1, v17, v0
+; GISEL-NEXT:    v_mul_lo_u32 v10, v18, v9
+; GISEL-NEXT:    v_mul_hi_u32 v14, v18, v0
+; GISEL-NEXT:    v_mul_hi_u32 v0, v17, v0
+; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v21
+; GISEL-NEXT:    v_add_i32_e64 v1, s[4:5], v1, v10
+; GISEL-NEXT:    v_addc_u32_e64 v1, s[4:5], v1, v14, s[4:5]
+; GISEL-NEXT:    v_mul_lo_u32 v1, v17, v9
+; GISEL-NEXT:    v_mul_hi_u32 v14, v18, v9
+; GISEL-NEXT:    v_addc_u32_e64 v10, s[4:5], 0, 0, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v0, s[4:5], v1, v0
+; GISEL-NEXT:    v_mul_hi_u32 v9, v17, v9
+; GISEL-NEXT:    v_addc_u32_e64 v0, s[6:7], v0, v14, s[4:5]
+; GISEL-NEXT:    v_addc_u32_e64 v1, s[6:7], 0, 0, s[6:7]
+; GISEL-NEXT:    v_addc_u32_e64 v0, s[4:5], v0, v10, s[4:5]
+; GISEL-NEXT:    v_addc_u32_e64 v1, s[4:5], 0, v1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v1, s[4:5], v9, v1
+; GISEL-NEXT:    v_add_i32_e64 v14, s[4:5], v18, v0
 ; GISEL-NEXT:    v_cndmask_b32_e32 v5, v15, v5, vcc
-; GISEL-NEXT:    v_mul_hi_u32 v0, v20, v0
-; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v1, v10
-; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v1, v14
-; GISEL-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; GISEL-NEXT:    v_mul_lo_u32 v14, v20, v9
-; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v10, v1
-; GISEL-NEXT:    v_mul_hi_u32 v10, v17, v9
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v14, v0
-; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v10
-; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v14, v10
-; GISEL-NEXT:    v_mul_hi_u32 v9, v20, v9
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v1
-; GISEL-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v10, v1
-; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v9, v1
-; GISEL-NEXT:    v_add_i32_e32 v14, vcc, v17, v0
-; GISEL-NEXT:    v_addc_u32_e32 v15, vcc, v20, v1, vcc
-; GISEL-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v18, v14, 0
+; GISEL-NEXT:    v_addc_u32_e64 v15, s[4:5], v17, v1, s[4:5]
+; GISEL-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v19, v14, 0
+; GISEL-NEXT:    v_cndmask_b32_e32 v7, v16, v7, vcc
 ; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v13
-; GISEL-NEXT:    v_cndmask_b32_e32 v7, v11, v7, vcc
-; GISEL-NEXT:    v_mad_u64_u32 v[9:10], s[4:5], v18, v15, v[1:2]
-; GISEL-NEXT:    v_xor_b32_e32 v1, v7, v4
+; GISEL-NEXT:    v_mad_u64_u32 v[9:10], s[4:5], v19, v15, v[1:2]
+; GISEL-NEXT:    v_cndmask_b32_e32 v1, v12, v7, vcc
 ; GISEL-NEXT:    v_ashrrev_i32_e32 v7, 31, v3
-; GISEL-NEXT:    v_mad_u64_u32 v[9:10], s[4:5], v19, v14, v[9:10]
-; GISEL-NEXT:    v_cndmask_b32_e32 v5, v12, v5, vcc
+; GISEL-NEXT:    v_mad_u64_u32 v[9:10], s[4:5], v20, v14, v[9:10]
+; GISEL-NEXT:    v_cndmask_b32_e32 v5, v11, v5, vcc
 ; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v2, v7
 ; GISEL-NEXT:    v_addc_u32_e32 v3, vcc, v3, v7, vcc
 ; GISEL-NEXT:    v_xor_b32_e32 v11, v2, v7
@@ -2539,21 +2261,16 @@ define <2 x i64> @v_srem_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) {
 ; GISEL-NEXT:    v_mul_hi_u32 v3, v14, v0
 ; GISEL-NEXT:    v_mul_hi_u32 v0, v15, v0
 ; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v2, v10
-; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v2, v3
-; GISEL-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; GISEL-NEXT:    v_mul_lo_u32 v3, v15, v9
-; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v10, v2
+; GISEL-NEXT:    v_addc_u32_e32 v2, vcc, v2, v3, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v2, v15, v9
 ; GISEL-NEXT:    v_mul_hi_u32 v10, v14, v9
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v3, v0
-; GISEL-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v10
-; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v3, v10
+; GISEL-NEXT:    v_addc_u32_e64 v3, s[4:5], 0, 0, vcc
+; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v2, v0
 ; GISEL-NEXT:    v_mul_hi_u32 v9, v15, v9
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
-; GISEL-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v3, v2
+; GISEL-NEXT:    v_addc_u32_e64 v0, s[4:5], v0, v10, vcc
+; GISEL-NEXT:    v_addc_u32_e64 v2, s[4:5], 0, 0, s[4:5]
+; GISEL-NEXT:    v_addc_u32_e32 v0, vcc, v0, v3, vcc
+; GISEL-NEXT:    v_addc_u32_e32 v2, vcc, 0, v2, vcc
 ; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v9, v2
 ; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v14, v0
 ; GISEL-NEXT:    v_addc_u32_e32 v2, vcc, v15, v2, vcc
@@ -2561,29 +2278,25 @@ define <2 x i64> @v_srem_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) {
 ; GISEL-NEXT:    v_mul_lo_u32 v9, v11, v2
 ; GISEL-NEXT:    v_mul_hi_u32 v10, v11, v0
 ; GISEL-NEXT:    v_mul_hi_u32 v0, v12, v0
-; GISEL-NEXT:    v_xor_b32_e32 v5, v5, v4
+; GISEL-NEXT:    v_xor_b32_e32 v1, v1, v4
 ; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v3, v9
-; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v3, v10
-; GISEL-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc
-; GISEL-NEXT:    v_mul_lo_u32 v10, v12, v2
-; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v9, v3
-; GISEL-NEXT:    v_mul_hi_u32 v9, v11, v2
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v10, v0
-; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v9
-; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v10, v9
-; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v0, v3
-; GISEL-NEXT:    v_mul_hi_u32 v10, v12, v2
+; GISEL-NEXT:    v_addc_u32_e32 v3, vcc, v3, v10, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v3, v12, v2
+; GISEL-NEXT:    v_mul_hi_u32 v10, v11, v2
+; GISEL-NEXT:    v_addc_u32_e64 v9, s[4:5], 0, 0, vcc
+; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v3, v0
+; GISEL-NEXT:    v_addc_u32_e64 v0, s[4:5], v0, v10, vcc
+; GISEL-NEXT:    v_addc_u32_e64 v3, s[4:5], 0, 0, s[4:5]
+; GISEL-NEXT:    v_addc_u32_e32 v13, vcc, v0, v9, vcc
+; GISEL-NEXT:    v_addc_u32_e32 v0, vcc, 0, v3, vcc
+; GISEL-NEXT:    v_mul_hi_u32 v9, v12, v2
 ; GISEL-NEXT:    v_mad_u64_u32 v[2:3], s[4:5], v6, v13, 0
-; GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v9, v0
-; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v10, v0
+; GISEL-NEXT:    v_xor_b32_e32 v5, v5, v4
+; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v0
 ; GISEL-NEXT:    v_mov_b32_e32 v0, v3
 ; GISEL-NEXT:    v_mad_u64_u32 v[9:10], s[4:5], v6, v9, v[0:1]
-; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v1, v4
-; GISEL-NEXT:    v_subb_u32_e32 v1, vcc, v5, v4, vcc
+; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v5, v4
+; GISEL-NEXT:    v_subb_u32_e32 v1, vcc, v1, v4, vcc
 ; GISEL-NEXT:    v_mad_u64_u32 v[3:4], s[4:5], v8, v13, v[9:10]
 ; GISEL-NEXT:    v_sub_i32_e32 v2, vcc, v11, v2
 ; GISEL-NEXT:    v_subb_u32_e64 v4, s[4:5], v12, v3, vcc
@@ -2636,134 +2349,116 @@ define <2 x i64> @v_srem_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) {
 ; CGP-NEXT:    s_xor_b64 s[6:7], exec, s[4:5]
 ; CGP-NEXT:    s_cbranch_execz .LBB8_2
 ; CGP-NEXT:  ; %bb.1:
-; CGP-NEXT:    v_ashrrev_i32_e32 v1, 31, v12
-; CGP-NEXT:    v_add_i32_e32 v0, vcc, v11, v1
-; CGP-NEXT:    v_addc_u32_e32 v4, vcc, v12, v1, vcc
-; CGP-NEXT:    v_xor_b32_e32 v0, v0, v1
-; CGP-NEXT:    v_xor_b32_e32 v1, v4, v1
-; CGP-NEXT:    v_cvt_f32_u32_e32 v4, v0
-; CGP-NEXT:    v_cvt_f32_u32_e32 v10, v1
-; CGP-NEXT:    v_sub_i32_e32 v14, vcc, 0, v0
-; CGP-NEXT:    v_subb_u32_e32 v15, vcc, 0, v1, vcc
-; CGP-NEXT:    v_mac_f32_e32 v4, 0x4f800000, v10
-; CGP-NEXT:    v_rcp_iflag_f32_e32 v4, v4
-; CGP-NEXT:    v_mul_f32_e32 v4, 0x5f7ffffc, v4
-; CGP-NEXT:    v_mul_f32_e32 v10, 0x2f800000, v4
-; CGP-NEXT:    v_trunc_f32_e32 v12, v10
-; CGP-NEXT:    v_mac_f32_e32 v4, 0xcf800000, v12
-; CGP-NEXT:    v_cvt_u32_f32_e32 v13, v4
-; CGP-NEXT:    v_cvt_u32_f32_e32 v16, v12
-; CGP-NEXT:    v_mad_u64_u32 v[10:11], s[4:5], v14, v13, 0
-; CGP-NEXT:    v_mov_b32_e32 v4, v11
-; CGP-NEXT:    v_mad_u64_u32 v[11:12], s[4:5], v14, v16, v[4:5]
-; CGP-NEXT:    v_mul_lo_u32 v4, v16, v10
-; CGP-NEXT:    v_mad_u64_u32 v[11:12], s[4:5], v15, v13, v[11:12]
-; CGP-NEXT:    v_mul_hi_u32 v12, v13, v10
+; CGP-NEXT:    v_ashrrev_i32_e32 v0, 31, v12
+; CGP-NEXT:    v_add_i32_e32 v1, vcc, v11, v0
+; CGP-NEXT:    v_addc_u32_e32 v4, vcc, v12, v0, vcc
+; CGP-NEXT:    v_xor_b32_e32 v12, v1, v0
+; CGP-NEXT:    v_xor_b32_e32 v4, v4, v0
+; CGP-NEXT:    v_cvt_f32_u32_e32 v0, v12
+; CGP-NEXT:    v_cvt_f32_u32_e32 v1, v4
+; CGP-NEXT:    v_sub_i32_e32 v14, vcc, 0, v12
+; CGP-NEXT:    v_subb_u32_e32 v15, vcc, 0, v4, vcc
+; CGP-NEXT:    v_mac_f32_e32 v0, 0x4f800000, v1
+; CGP-NEXT:    v_rcp_iflag_f32_e32 v0, v0
+; CGP-NEXT:    v_mul_f32_e32 v0, 0x5f7ffffc, v0
+; CGP-NEXT:    v_mul_f32_e32 v1, 0x2f800000, v0
+; CGP-NEXT:    v_trunc_f32_e32 v10, v1
+; CGP-NEXT:    v_mac_f32_e32 v0, 0xcf800000, v10
+; CGP-NEXT:    v_cvt_u32_f32_e32 v13, v0
+; CGP-NEXT:    v_cvt_u32_f32_e32 v16, v10
+; CGP-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v14, v13, 0
+; CGP-NEXT:    v_mad_u64_u32 v[10:11], s[4:5], v14, v16, v[1:2]
+; CGP-NEXT:    v_mul_lo_u32 v1, v16, v0
+; CGP-NEXT:    v_mad_u64_u32 v[10:11], s[4:5], v15, v13, v[10:11]
+; CGP-NEXT:    v_mul_hi_u32 v11, v13, v0
+; CGP-NEXT:    v_mul_hi_u32 v0, v16, v0
+; CGP-NEXT:    v_mul_lo_u32 v17, v13, v10
+; CGP-NEXT:    v_mul_lo_u32 v18, v16, v10
+; CGP-NEXT:    v_add_i32_e32 v1, vcc, v1, v17
+; CGP-NEXT:    v_addc_u32_e32 v1, vcc, v1, v11, vcc
+; CGP-NEXT:    v_mul_hi_u32 v11, v13, v10
+; CGP-NEXT:    v_addc_u32_e64 v1, s[4:5], 0, 0, vcc
+; CGP-NEXT:    v_add_i32_e32 v0, vcc, v18, v0
 ; CGP-NEXT:    v_mul_hi_u32 v10, v16, v10
-; CGP-NEXT:    v_mul_lo_u32 v17, v13, v11
-; CGP-NEXT:    v_mul_lo_u32 v18, v16, v11
-; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v17
-; CGP-NEXT:    v_cndmask_b32_e64 v17, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v12
-; CGP-NEXT:    v_mul_hi_u32 v12, v13, v11
-; CGP-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v4, vcc, v17, v4
-; CGP-NEXT:    v_add_i32_e32 v10, vcc, v18, v10
-; CGP-NEXT:    v_cndmask_b32_e64 v17, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v10, vcc, v10, v12
-; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v12, vcc, v17, v12
-; CGP-NEXT:    v_mul_hi_u32 v11, v16, v11
-; CGP-NEXT:    v_add_i32_e32 v4, vcc, v10, v4
-; CGP-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v10, vcc, v12, v10
-; CGP-NEXT:    v_add_i32_e32 v10, vcc, v11, v10
-; CGP-NEXT:    v_add_i32_e32 v13, vcc, v13, v4
-; CGP-NEXT:    v_addc_u32_e32 v16, vcc, v16, v10, vcc
-; CGP-NEXT:    v_mad_u64_u32 v[10:11], s[4:5], v14, v13, 0
-; CGP-NEXT:    v_mov_b32_e32 v4, v11
-; CGP-NEXT:    v_mad_u64_u32 v[11:12], s[4:5], v14, v16, v[4:5]
+; CGP-NEXT:    v_addc_u32_e64 v0, s[4:5], v0, v11, vcc
+; CGP-NEXT:    v_addc_u32_e64 v11, s[4:5], 0, 0, s[4:5]
+; CGP-NEXT:    v_addc_u32_e32 v0, vcc, v0, v1, vcc
+; CGP-NEXT:    v_addc_u32_e32 v1, vcc, 0, v11, vcc
+; CGP-NEXT:    v_add_i32_e32 v1, vcc, v10, v1
+; CGP-NEXT:    v_add_i32_e32 v13, vcc, v13, v0
+; CGP-NEXT:    v_addc_u32_e32 v16, vcc, v16, v1, vcc
+; CGP-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v14, v13, 0
+; CGP-NEXT:    v_mad_u64_u32 v[10:11], s[4:5], v14, v16, v[1:2]
 ; CGP-NEXT:    v_ashrrev_i32_e32 v14, 31, v9
-; CGP-NEXT:    v_add_i32_e32 v4, vcc, v8, v14
-; CGP-NEXT:    v_mad_u64_u32 v[11:12], s[4:5], v15, v13, v[11:12]
+; CGP-NEXT:    v_add_i32_e32 v1, vcc, v8, v14
+; CGP-NEXT:    v_mad_u64_u32 v[10:11], s[4:5], v15, v13, v[10:11]
 ; CGP-NEXT:    v_addc_u32_e32 v8, vcc, v9, v14, vcc
-; CGP-NEXT:    v_xor_b32_e32 v12, v4, v14
-; CGP-NEXT:    v_mul_lo_u32 v4, v16, v10
-; CGP-NEXT:    v_mul_lo_u32 v9, v13, v11
+; CGP-NEXT:    v_xor_b32_e32 v11, v1, v14
+; CGP-NEXT:    v_mul_lo_u32 v1, v16, v0
+; CGP-NEXT:    v_mul_lo_u32 v9, v13, v10
 ; CGP-NEXT:    v_xor_b32_e32 v15, v8, v14
-; CGP-NEXT:    v_mul_hi_u32 v8, v13, v10
-; CGP-NEXT:    v_mul_hi_u32 v10, v16, v10
-; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v9
-; CGP-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v8
-; CGP-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
-; CGP-NEXT:    v_mul_lo_u32 v8, v16, v11
-; CGP-NEXT:    v_add_i32_e32 v4, vcc, v9, v4
-; CGP-NEXT:    v_mul_hi_u32 v9, v13, v11
-; CGP-NEXT:    v_add_i32_e32 v8, vcc, v8, v10
-; CGP-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
+; CGP-NEXT:    v_mul_hi_u32 v8, v13, v0
+; CGP-NEXT:    v_mul_hi_u32 v0, v16, v0
+; CGP-NEXT:    v_add_i32_e32 v1, vcc, v1, v9
+; CGP-NEXT:    v_addc_u32_e32 v1, vcc, v1, v8, vcc
+; CGP-NEXT:    v_mul_lo_u32 v1, v16, v10
+; CGP-NEXT:    v_mul_hi_u32 v9, v13, v10
+; CGP-NEXT:    v_addc_u32_e64 v8, s[4:5], 0, 0, vcc
+; CGP-NEXT:    v_add_i32_e32 v0, vcc, v1, v0
+; CGP-NEXT:    v_addc_u32_e64 v0, s[4:5], v0, v9, vcc
+; CGP-NEXT:    v_mul_hi_u32 v9, v16, v10
+; CGP-NEXT:    v_addc_u32_e64 v1, s[4:5], 0, 0, s[4:5]
+; CGP-NEXT:    v_addc_u32_e32 v0, vcc, v0, v8, vcc
+; CGP-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; CGP-NEXT:    v_add_i32_e32 v1, vcc, v9, v1
+; CGP-NEXT:    v_add_i32_e32 v0, vcc, v13, v0
+; CGP-NEXT:    v_addc_u32_e32 v1, vcc, v16, v1, vcc
+; CGP-NEXT:    v_mul_lo_u32 v8, v15, v0
+; CGP-NEXT:    v_mul_lo_u32 v9, v11, v1
+; CGP-NEXT:    v_mul_hi_u32 v10, v11, v0
+; CGP-NEXT:    v_mul_hi_u32 v0, v15, v0
 ; CGP-NEXT:    v_add_i32_e32 v8, vcc, v8, v9
-; CGP-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v9, vcc, v10, v9
-; CGP-NEXT:    v_mul_hi_u32 v10, v16, v11
-; CGP-NEXT:    v_add_i32_e32 v4, vcc, v8, v4
-; CGP-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
+; CGP-NEXT:    v_addc_u32_e32 v8, vcc, v8, v10, vcc
+; CGP-NEXT:    v_mul_lo_u32 v8, v15, v1
+; CGP-NEXT:    v_mul_hi_u32 v10, v11, v1
+; CGP-NEXT:    v_addc_u32_e64 v9, s[4:5], 0, 0, vcc
+; CGP-NEXT:    v_add_i32_e32 v0, vcc, v8, v0
+; CGP-NEXT:    v_addc_u32_e64 v0, s[4:5], v0, v10, vcc
+; CGP-NEXT:    v_addc_u32_e32 v10, vcc, v0, v9, vcc
+; CGP-NEXT:    v_mul_hi_u32 v9, v15, v1
+; CGP-NEXT:    v_addc_u32_e64 v8, s[4:5], 0, 0, s[4:5]
+; CGP-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v12, v10, 0
+; CGP-NEXT:    v_addc_u32_e32 v8, vcc, 0, v8, vcc
 ; CGP-NEXT:    v_add_i32_e32 v8, vcc, v9, v8
-; CGP-NEXT:    v_add_i32_e32 v8, vcc, v10, v8
-; CGP-NEXT:    v_add_i32_e32 v4, vcc, v13, v4
-; CGP-NEXT:    v_addc_u32_e32 v8, vcc, v16, v8, vcc
-; CGP-NEXT:    v_mul_lo_u32 v9, v15, v4
-; CGP-NEXT:    v_mul_lo_u32 v10, v12, v8
-; CGP-NEXT:    v_mul_hi_u32 v11, v12, v4
-; CGP-NEXT:    v_mul_hi_u32 v4, v15, v4
-; CGP-NEXT:    v_mul_hi_u32 v13, v15, v8
-; CGP-NEXT:    v_add_i32_e32 v9, vcc, v9, v10
-; CGP-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v9, vcc, v9, v11
-; CGP-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
-; CGP-NEXT:    v_mul_lo_u32 v11, v15, v8
-; CGP-NEXT:    v_add_i32_e32 v9, vcc, v10, v9
-; CGP-NEXT:    v_mul_hi_u32 v10, v12, v8
-; CGP-NEXT:    v_add_i32_e32 v4, vcc, v11, v4
-; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v10
-; CGP-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v10, vcc, v11, v10
-; CGP-NEXT:    v_add_i32_e32 v11, vcc, v4, v9
-; CGP-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], v0, v11, 0
-; CGP-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v4, vcc, v10, v4
-; CGP-NEXT:    v_add_i32_e32 v10, vcc, v13, v4
-; CGP-NEXT:    v_mov_b32_e32 v4, v9
-; CGP-NEXT:    v_mad_u64_u32 v[9:10], s[4:5], v0, v10, v[4:5]
-; CGP-NEXT:    v_sub_i32_e32 v4, vcc, v12, v8
-; CGP-NEXT:    v_mad_u64_u32 v[9:10], s[4:5], v1, v11, v[9:10]
-; CGP-NEXT:    v_subb_u32_e64 v8, s[4:5], v15, v9, vcc
-; CGP-NEXT:    v_sub_i32_e64 v9, s[4:5], v15, v9
-; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v8, v1
+; CGP-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], v12, v8, v[1:2]
+; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v11, v0
+; CGP-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], v4, v10, v[8:9]
+; CGP-NEXT:    v_subb_u32_e64 v1, s[4:5], v15, v8, vcc
+; CGP-NEXT:    v_sub_i32_e64 v8, s[4:5], v15, v8
+; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v1, v4
+; CGP-NEXT:    v_cndmask_b32_e64 v9, 0, -1, s[4:5]
+; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v0, v12
 ; CGP-NEXT:    v_cndmask_b32_e64 v10, 0, -1, s[4:5]
-; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v4, v0
-; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, -1, s[4:5]
-; CGP-NEXT:    v_cmp_eq_u32_e64 s[4:5], v8, v1
-; CGP-NEXT:    v_subb_u32_e32 v9, vcc, v9, v1, vcc
-; CGP-NEXT:    v_cndmask_b32_e64 v10, v10, v11, s[4:5]
-; CGP-NEXT:    v_sub_i32_e32 v11, vcc, v4, v0
-; CGP-NEXT:    v_subbrev_u32_e64 v12, s[4:5], 0, v9, vcc
-; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v12, v1
+; CGP-NEXT:    v_cmp_eq_u32_e64 s[4:5], v1, v4
+; CGP-NEXT:    v_subb_u32_e32 v8, vcc, v8, v4, vcc
+; CGP-NEXT:    v_cndmask_b32_e64 v9, v9, v10, s[4:5]
+; CGP-NEXT:    v_sub_i32_e32 v10, vcc, v0, v12
+; CGP-NEXT:    v_subbrev_u32_e64 v11, s[4:5], 0, v8, vcc
+; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v11, v4
 ; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, -1, s[4:5]
-; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v11, v0
+; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v10, v12
 ; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, -1, s[4:5]
-; CGP-NEXT:    v_cmp_eq_u32_e64 s[4:5], v12, v1
-; CGP-NEXT:    v_subb_u32_e32 v1, vcc, v9, v1, vcc
-; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v11, v0
+; CGP-NEXT:    v_cmp_eq_u32_e64 s[4:5], v11, v4
+; CGP-NEXT:    v_subb_u32_e32 v4, vcc, v8, v4, vcc
+; CGP-NEXT:    v_sub_i32_e32 v8, vcc, v10, v12
 ; CGP-NEXT:    v_cndmask_b32_e64 v13, v13, v15, s[4:5]
-; CGP-NEXT:    v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
+; CGP-NEXT:    v_subbrev_u32_e32 v4, vcc, 0, v4, vcc
 ; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v13
-; CGP-NEXT:    v_cndmask_b32_e32 v0, v11, v0, vcc
-; CGP-NEXT:    v_cndmask_b32_e32 v1, v12, v1, vcc
-; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v10
-; CGP-NEXT:    v_cndmask_b32_e32 v0, v4, v0, vcc
-; CGP-NEXT:    v_cndmask_b32_e32 v1, v8, v1, vcc
+; CGP-NEXT:    v_cndmask_b32_e32 v8, v10, v8, vcc
+; CGP-NEXT:    v_cndmask_b32_e32 v4, v11, v4, vcc
+; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v9
+; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc
+; CGP-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc
 ; CGP-NEXT:    v_xor_b32_e32 v0, v0, v14
 ; CGP-NEXT:    v_xor_b32_e32 v1, v1, v14
 ; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v0, v14
@@ -2810,137 +2505,120 @@ define <2 x i64> @v_srem_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) {
 ; CGP-NEXT:    s_or_b64 exec, exec, s[4:5]
 ; CGP-NEXT:    s_setpc_b64 s[30:31]
 ; CGP-NEXT:  .LBB8_7:
-; CGP-NEXT:    v_ashrrev_i32_e32 v3, 31, v10
-; CGP-NEXT:    v_add_i32_e32 v2, vcc, v9, v3
-; CGP-NEXT:    v_addc_u32_e32 v4, vcc, v10, v3, vcc
-; CGP-NEXT:    v_xor_b32_e32 v2, v2, v3
-; CGP-NEXT:    v_xor_b32_e32 v3, v4, v3
-; CGP-NEXT:    v_cvt_f32_u32_e32 v4, v2
-; CGP-NEXT:    v_cvt_f32_u32_e32 v6, v3
-; CGP-NEXT:    v_sub_i32_e32 v12, vcc, 0, v2
-; CGP-NEXT:    v_subb_u32_e32 v13, vcc, 0, v3, vcc
-; CGP-NEXT:    v_mac_f32_e32 v4, 0x4f800000, v6
-; CGP-NEXT:    v_rcp_iflag_f32_e32 v4, v4
-; CGP-NEXT:    v_mul_f32_e32 v4, 0x5f7ffffc, v4
-; CGP-NEXT:    v_mul_f32_e32 v6, 0x2f800000, v4
-; CGP-NEXT:    v_trunc_f32_e32 v6, v6
-; CGP-NEXT:    v_mac_f32_e32 v4, 0xcf800000, v6
-; CGP-NEXT:    v_cvt_u32_f32_e32 v11, v4
-; CGP-NEXT:    v_cvt_u32_f32_e32 v6, v6
-; CGP-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], v12, v11, 0
-; CGP-NEXT:    v_mov_b32_e32 v4, v9
-; CGP-NEXT:    v_mad_u64_u32 v[9:10], s[4:5], v12, v6, v[4:5]
-; CGP-NEXT:    v_mul_lo_u32 v4, v6, v8
-; CGP-NEXT:    v_mad_u64_u32 v[9:10], s[4:5], v13, v11, v[9:10]
-; CGP-NEXT:    v_mul_hi_u32 v10, v11, v8
-; CGP-NEXT:    v_mul_hi_u32 v8, v6, v8
-; CGP-NEXT:    v_mul_lo_u32 v14, v11, v9
-; CGP-NEXT:    v_mul_lo_u32 v15, v6, v9
+; CGP-NEXT:    v_ashrrev_i32_e32 v2, 31, v10
+; CGP-NEXT:    v_add_i32_e32 v3, vcc, v9, v2
+; CGP-NEXT:    v_addc_u32_e32 v4, vcc, v10, v2, vcc
+; CGP-NEXT:    v_xor_b32_e32 v6, v3, v2
+; CGP-NEXT:    v_xor_b32_e32 v8, v4, v2
+; CGP-NEXT:    v_cvt_f32_u32_e32 v2, v6
+; CGP-NEXT:    v_cvt_f32_u32_e32 v3, v8
+; CGP-NEXT:    v_sub_i32_e32 v10, vcc, 0, v6
+; CGP-NEXT:    v_subb_u32_e32 v11, vcc, 0, v8, vcc
+; CGP-NEXT:    v_mac_f32_e32 v2, 0x4f800000, v3
+; CGP-NEXT:    v_rcp_iflag_f32_e32 v2, v2
+; CGP-NEXT:    v_mul_f32_e32 v2, 0x5f7ffffc, v2
+; CGP-NEXT:    v_mul_f32_e32 v3, 0x2f800000, v2
+; CGP-NEXT:    v_trunc_f32_e32 v4, v3
+; CGP-NEXT:    v_mac_f32_e32 v2, 0xcf800000, v4
+; CGP-NEXT:    v_cvt_u32_f32_e32 v9, v2
+; CGP-NEXT:    v_cvt_u32_f32_e32 v12, v4
+; CGP-NEXT:    v_mad_u64_u32 v[2:3], s[4:5], v10, v9, 0
+; CGP-NEXT:    v_mad_u64_u32 v[3:4], s[4:5], v10, v12, v[3:4]
+; CGP-NEXT:    v_mul_hi_u32 v13, v9, v2
+; CGP-NEXT:    v_mad_u64_u32 v[3:4], s[4:5], v11, v9, v[3:4]
+; CGP-NEXT:    v_mul_lo_u32 v4, v12, v2
+; CGP-NEXT:    v_mul_hi_u32 v2, v12, v2
+; CGP-NEXT:    v_mul_lo_u32 v14, v9, v3
+; CGP-NEXT:    v_mul_lo_u32 v15, v12, v3
 ; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v14
-; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v10
-; CGP-NEXT:    v_mul_hi_u32 v10, v11, v9
-; CGP-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v4, vcc, v14, v4
-; CGP-NEXT:    v_add_i32_e32 v8, vcc, v15, v8
-; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v8, vcc, v8, v10
-; CGP-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v10, vcc, v14, v10
-; CGP-NEXT:    v_mul_hi_u32 v9, v6, v9
-; CGP-NEXT:    v_add_i32_e32 v4, vcc, v8, v4
-; CGP-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v8, vcc, v10, v8
-; CGP-NEXT:    v_add_i32_e32 v8, vcc, v9, v8
-; CGP-NEXT:    v_add_i32_e32 v11, vcc, v11, v4
-; CGP-NEXT:    v_addc_u32_e32 v6, vcc, v6, v8, vcc
-; CGP-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], v12, v11, 0
-; CGP-NEXT:    v_mov_b32_e32 v4, v9
-; CGP-NEXT:    v_mad_u64_u32 v[9:10], s[4:5], v12, v6, v[4:5]
-; CGP-NEXT:    v_ashrrev_i32_e32 v12, 31, v7
-; CGP-NEXT:    v_add_i32_e32 v4, vcc, v5, v12
-; CGP-NEXT:    v_mad_u64_u32 v[9:10], s[4:5], v13, v11, v[9:10]
-; CGP-NEXT:    v_addc_u32_e32 v5, vcc, v7, v12, vcc
-; CGP-NEXT:    v_xor_b32_e32 v7, v4, v12
-; CGP-NEXT:    v_mul_lo_u32 v4, v6, v8
-; CGP-NEXT:    v_mul_lo_u32 v10, v11, v9
-; CGP-NEXT:    v_xor_b32_e32 v13, v5, v12
-; CGP-NEXT:    v_mul_hi_u32 v5, v11, v8
-; CGP-NEXT:    v_mul_hi_u32 v8, v6, v8
-; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v10
-; CGP-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v5
-; CGP-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
-; CGP-NEXT:    v_mul_lo_u32 v5, v6, v9
-; CGP-NEXT:    v_add_i32_e32 v4, vcc, v10, v4
-; CGP-NEXT:    v_mul_hi_u32 v10, v11, v9
-; CGP-NEXT:    v_add_i32_e32 v5, vcc, v5, v8
-; CGP-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v5, vcc, v5, v10
-; CGP-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v8, vcc, v8, v10
-; CGP-NEXT:    v_mul_hi_u32 v9, v6, v9
-; CGP-NEXT:    v_add_i32_e32 v4, vcc, v5, v4
-; CGP-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v5, vcc, v8, v5
-; CGP-NEXT:    v_add_i32_e32 v5, vcc, v9, v5
+; CGP-NEXT:    v_addc_u32_e32 v4, vcc, v4, v13, vcc
+; CGP-NEXT:    v_mul_hi_u32 v13, v9, v3
+; CGP-NEXT:    v_addc_u32_e64 v4, s[4:5], 0, 0, vcc
+; CGP-NEXT:    v_add_i32_e32 v2, vcc, v15, v2
+; CGP-NEXT:    v_mul_hi_u32 v3, v12, v3
+; CGP-NEXT:    v_addc_u32_e64 v2, s[4:5], v2, v13, vcc
+; CGP-NEXT:    v_addc_u32_e64 v13, s[4:5], 0, 0, s[4:5]
+; CGP-NEXT:    v_addc_u32_e32 v2, vcc, v2, v4, vcc
+; CGP-NEXT:    v_addc_u32_e32 v4, vcc, 0, v13, vcc
+; CGP-NEXT:    v_add_i32_e32 v3, vcc, v3, v4
+; CGP-NEXT:    v_add_i32_e32 v9, vcc, v9, v2
+; CGP-NEXT:    v_addc_u32_e32 v12, vcc, v12, v3, vcc
+; CGP-NEXT:    v_mad_u64_u32 v[2:3], s[4:5], v10, v9, 0
+; CGP-NEXT:    v_mad_u64_u32 v[3:4], s[4:5], v10, v12, v[3:4]
+; CGP-NEXT:    v_ashrrev_i32_e32 v10, 31, v7
+; CGP-NEXT:    v_mul_hi_u32 v13, v9, v2
+; CGP-NEXT:    v_mad_u64_u32 v[3:4], s[4:5], v11, v9, v[3:4]
+; CGP-NEXT:    v_add_i32_e32 v4, vcc, v5, v10
+; CGP-NEXT:    v_addc_u32_e32 v5, vcc, v7, v10, vcc
+; CGP-NEXT:    v_xor_b32_e32 v7, v4, v10
+; CGP-NEXT:    v_mul_lo_u32 v4, v12, v2
+; CGP-NEXT:    v_mul_lo_u32 v11, v9, v3
+; CGP-NEXT:    v_mul_hi_u32 v2, v12, v2
+; CGP-NEXT:    v_xor_b32_e32 v5, v5, v10
+; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v11
+; CGP-NEXT:    v_addc_u32_e32 v4, vcc, v4, v13, vcc
+; CGP-NEXT:    v_mul_lo_u32 v4, v12, v3
+; CGP-NEXT:    v_mul_hi_u32 v13, v9, v3
+; CGP-NEXT:    v_addc_u32_e64 v11, s[4:5], 0, 0, vcc
+; CGP-NEXT:    v_add_i32_e32 v2, vcc, v4, v2
+; CGP-NEXT:    v_mul_hi_u32 v3, v12, v3
+; CGP-NEXT:    v_addc_u32_e64 v2, s[4:5], v2, v13, vcc
+; CGP-NEXT:    v_addc_u32_e64 v4, s[4:5], 0, 0, s[4:5]
+; CGP-NEXT:    v_addc_u32_e32 v2, vcc, v2, v11, vcc
+; CGP-NEXT:    v_addc_u32_e32 v4, vcc, 0, v4, vcc
+; CGP-NEXT:    v_add_i32_e32 v3, vcc, v3, v4
+; CGP-NEXT:    v_add_i32_e32 v2, vcc, v9, v2
+; CGP-NEXT:    v_addc_u32_e32 v3, vcc, v12, v3, vcc
+; CGP-NEXT:    v_mul_lo_u32 v4, v5, v2
+; CGP-NEXT:    v_mul_lo_u32 v9, v7, v3
+; CGP-NEXT:    v_mul_hi_u32 v11, v7, v2
+; CGP-NEXT:    v_mul_hi_u32 v2, v5, v2
+; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v9
+; CGP-NEXT:    v_addc_u32_e32 v4, vcc, v4, v11, vcc
+; CGP-NEXT:    v_mul_lo_u32 v4, v5, v3
+; CGP-NEXT:    v_mul_hi_u32 v11, v7, v3
+; CGP-NEXT:    v_addc_u32_e64 v9, s[4:5], 0, 0, vcc
+; CGP-NEXT:    v_add_i32_e32 v2, vcc, v4, v2
+; CGP-NEXT:    v_addc_u32_e64 v2, s[4:5], v2, v11, vcc
+; CGP-NEXT:    v_addc_u32_e32 v9, vcc, v2, v9, vcc
+; CGP-NEXT:    v_mul_hi_u32 v11, v5, v3
+; CGP-NEXT:    v_addc_u32_e64 v4, s[4:5], 0, 0, s[4:5]
+; CGP-NEXT:    v_mad_u64_u32 v[2:3], s[4:5], v6, v9, 0
+; CGP-NEXT:    v_addc_u32_e32 v4, vcc, 0, v4, vcc
 ; CGP-NEXT:    v_add_i32_e32 v4, vcc, v11, v4
-; CGP-NEXT:    v_addc_u32_e32 v5, vcc, v6, v5, vcc
-; CGP-NEXT:    v_mul_lo_u32 v6, v13, v4
-; CGP-NEXT:    v_mul_lo_u32 v8, v7, v5
-; CGP-NEXT:    v_mul_hi_u32 v9, v7, v4
-; CGP-NEXT:    v_mul_hi_u32 v4, v13, v4
-; CGP-NEXT:    v_mul_hi_u32 v10, v13, v5
-; CGP-NEXT:    v_add_i32_e32 v6, vcc, v6, v8
-; CGP-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v6, vcc, v6, v9
-; CGP-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
-; CGP-NEXT:    v_mul_lo_u32 v9, v13, v5
-; CGP-NEXT:    v_add_i32_e32 v6, vcc, v8, v6
-; CGP-NEXT:    v_mul_hi_u32 v8, v7, v5
-; CGP-NEXT:    v_add_i32_e32 v4, vcc, v9, v4
-; CGP-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v8
-; CGP-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v8, vcc, v9, v8
-; CGP-NEXT:    v_add_i32_e32 v9, vcc, v4, v6
-; CGP-NEXT:    v_mad_u64_u32 v[4:5], s[4:5], v2, v9, 0
-; CGP-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v6, vcc, v8, v6
-; CGP-NEXT:    v_add_i32_e32 v6, vcc, v10, v6
-; CGP-NEXT:    v_mad_u64_u32 v[5:6], s[4:5], v2, v6, v[5:6]
-; CGP-NEXT:    v_sub_i32_e32 v4, vcc, v7, v4
-; CGP-NEXT:    v_mad_u64_u32 v[5:6], s[4:5], v3, v9, v[5:6]
-; CGP-NEXT:    v_subb_u32_e64 v6, s[4:5], v13, v5, vcc
-; CGP-NEXT:    v_sub_i32_e64 v5, s[4:5], v13, v5
-; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v6, v3
+; CGP-NEXT:    v_mad_u64_u32 v[3:4], s[4:5], v6, v4, v[3:4]
+; CGP-NEXT:    v_sub_i32_e32 v2, vcc, v7, v2
+; CGP-NEXT:    v_mad_u64_u32 v[3:4], s[4:5], v8, v9, v[3:4]
+; CGP-NEXT:    v_subb_u32_e64 v4, s[4:5], v5, v3, vcc
+; CGP-NEXT:    v_sub_i32_e64 v3, s[4:5], v5, v3
+; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v4, v8
+; CGP-NEXT:    v_cndmask_b32_e64 v5, 0, -1, s[4:5]
+; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v2, v6
 ; CGP-NEXT:    v_cndmask_b32_e64 v7, 0, -1, s[4:5]
-; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v4, v2
-; CGP-NEXT:    v_cndmask_b32_e64 v8, 0, -1, s[4:5]
-; CGP-NEXT:    v_cmp_eq_u32_e64 s[4:5], v6, v3
-; CGP-NEXT:    v_subb_u32_e32 v5, vcc, v5, v3, vcc
-; CGP-NEXT:    v_cndmask_b32_e64 v7, v7, v8, s[4:5]
-; CGP-NEXT:    v_sub_i32_e32 v8, vcc, v4, v2
-; CGP-NEXT:    v_subbrev_u32_e64 v9, s[4:5], 0, v5, vcc
-; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v9, v3
-; CGP-NEXT:    v_cndmask_b32_e64 v10, 0, -1, s[4:5]
-; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v8, v2
+; CGP-NEXT:    v_cmp_eq_u32_e64 s[4:5], v4, v8
+; CGP-NEXT:    v_subb_u32_e32 v3, vcc, v3, v8, vcc
+; CGP-NEXT:    v_cndmask_b32_e64 v5, v5, v7, s[4:5]
+; CGP-NEXT:    v_sub_i32_e32 v7, vcc, v2, v6
+; CGP-NEXT:    v_subbrev_u32_e64 v9, s[4:5], 0, v3, vcc
+; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v9, v8
 ; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, -1, s[4:5]
-; CGP-NEXT:    v_cmp_eq_u32_e64 s[4:5], v9, v3
-; CGP-NEXT:    v_subb_u32_e32 v3, vcc, v5, v3, vcc
-; CGP-NEXT:    v_sub_i32_e32 v2, vcc, v8, v2
-; CGP-NEXT:    v_cndmask_b32_e64 v10, v10, v11, s[4:5]
+; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v7, v6
+; CGP-NEXT:    v_subb_u32_e32 v3, vcc, v3, v8, vcc
+; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, -1, s[4:5]
+; CGP-NEXT:    v_cmp_eq_u32_e64 s[4:5], v9, v8
+; CGP-NEXT:    v_sub_i32_e32 v6, vcc, v7, v6
+; CGP-NEXT:    v_cndmask_b32_e64 v11, v11, v12, s[4:5]
 ; CGP-NEXT:    v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
-; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v10
-; CGP-NEXT:    v_cndmask_b32_e32 v2, v8, v2, vcc
+; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v11
+; CGP-NEXT:    v_cndmask_b32_e32 v6, v7, v6, vcc
 ; CGP-NEXT:    v_cndmask_b32_e32 v3, v9, v3, vcc
-; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v7
-; CGP-NEXT:    v_cndmask_b32_e32 v2, v4, v2, vcc
-; CGP-NEXT:    v_cndmask_b32_e32 v3, v6, v3, vcc
-; CGP-NEXT:    v_xor_b32_e32 v2, v2, v12
-; CGP-NEXT:    v_xor_b32_e32 v3, v3, v12
-; CGP-NEXT:    v_sub_i32_e32 v2, vcc, v2, v12
-; CGP-NEXT:    v_subb_u32_e32 v3, vcc, v3, v12, vcc
+; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v5
+; CGP-NEXT:    v_cndmask_b32_e32 v2, v2, v6, vcc
+; CGP-NEXT:    v_cndmask_b32_e32 v3, v4, v3, vcc
+; CGP-NEXT:    v_xor_b32_e32 v2, v2, v10
+; CGP-NEXT:    v_xor_b32_e32 v3, v3, v10
+; CGP-NEXT:    v_sub_i32_e32 v2, vcc, v2, v10
+; CGP-NEXT:    v_subb_u32_e32 v3, vcc, v3, v10, vcc
 ; CGP-NEXT:    ; implicit-def: $vgpr9_vgpr10
 ; CGP-NEXT:    ; implicit-def: $vgpr5
 ; CGP-NEXT:    s_andn2_saveexec_b64 s[4:5], s[6:7]
@@ -3036,223 +2714,189 @@ define <2 x i64> @v_srem_v2i64_24bit(<2 x i64> %num, <2 x i64> %den) {
 ; GISEL-NEXT:    v_and_b32_e32 v1, 0xffffff, v4
 ; GISEL-NEXT:    v_add_i32_e64 v3, s[4:5], 0, 0
 ; GISEL-NEXT:    v_add_i32_e32 v1, vcc, 0, v1
-; GISEL-NEXT:    v_cvt_f32_u32_e32 v5, v1
-; GISEL-NEXT:    v_cvt_f32_u32_e32 v4, v3
+; GISEL-NEXT:    v_cvt_f32_u32_e32 v4, v1
+; GISEL-NEXT:    v_cvt_f32_u32_e32 v9, v3
 ; GISEL-NEXT:    v_sub_i32_e32 v11, vcc, 0, v1
 ; GISEL-NEXT:    v_subb_u32_e32 v12, vcc, 0, v3, vcc
-; GISEL-NEXT:    v_mac_f32_e32 v5, 0x4f800000, v4
-; GISEL-NEXT:    v_rcp_iflag_f32_e32 v5, v5
+; GISEL-NEXT:    v_mac_f32_e32 v4, 0x4f800000, v9
+; GISEL-NEXT:    v_rcp_iflag_f32_e32 v4, v4
 ; GISEL-NEXT:    v_and_b32_e32 v0, 0xffffff, v0
-; GISEL-NEXT:    v_mul_f32_e32 v5, 0x5f7ffffc, v5
-; GISEL-NEXT:    v_mul_f32_e32 v7, 0x2f800000, v5
-; GISEL-NEXT:    v_trunc_f32_e32 v9, v7
-; GISEL-NEXT:    v_mac_f32_e32 v5, 0xcf800000, v9
-; GISEL-NEXT:    v_cvt_u32_f32_e32 v10, v5
-; GISEL-NEXT:    v_cvt_u32_f32_e32 v13, v9
-; GISEL-NEXT:    v_mad_u64_u32 v[7:8], s[4:5], v11, v10, 0
-; GISEL-NEXT:    v_mov_b32_e32 v5, v8
-; GISEL-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], v11, v13, v[5:6]
-; GISEL-NEXT:    v_mul_lo_u32 v5, v13, v7
-; GISEL-NEXT:    v_mul_hi_u32 v14, v10, v7
-; GISEL-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], v12, v10, v[8:9]
+; GISEL-NEXT:    v_mul_f32_e32 v4, 0x5f7ffffc, v4
+; GISEL-NEXT:    v_mul_f32_e32 v5, 0x2f800000, v4
+; GISEL-NEXT:    v_trunc_f32_e32 v7, v5
+; GISEL-NEXT:    v_mac_f32_e32 v4, 0xcf800000, v7
+; GISEL-NEXT:    v_cvt_u32_f32_e32 v10, v4
+; GISEL-NEXT:    v_cvt_u32_f32_e32 v13, v7
+; GISEL-NEXT:    v_mad_u64_u32 v[4:5], s[4:5], v11, v10, 0
+; GISEL-NEXT:    v_mad_u64_u32 v[7:8], s[4:5], v11, v13, v[5:6]
+; GISEL-NEXT:    v_mul_lo_u32 v5, v13, v4
+; GISEL-NEXT:    v_mul_hi_u32 v14, v10, v4
+; GISEL-NEXT:    v_mad_u64_u32 v[7:8], s[4:5], v12, v10, v[7:8]
+; GISEL-NEXT:    v_mul_hi_u32 v4, v13, v4
+; GISEL-NEXT:    v_mul_lo_u32 v8, v10, v7
+; GISEL-NEXT:    v_mul_lo_u32 v15, v13, v7
+; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v8
+; GISEL-NEXT:    v_mul_hi_u32 v8, v10, v7
+; GISEL-NEXT:    v_addc_u32_e32 v5, vcc, v5, v14, vcc
+; GISEL-NEXT:    v_addc_u32_e64 v5, s[4:5], 0, 0, vcc
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v15, v4
 ; GISEL-NEXT:    v_mul_hi_u32 v7, v13, v7
-; GISEL-NEXT:    v_mul_lo_u32 v9, v10, v8
-; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v9
-; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v14
-; GISEL-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
-; GISEL-NEXT:    v_mul_lo_u32 v14, v13, v8
-; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v9, v5
-; GISEL-NEXT:    v_mul_hi_u32 v9, v10, v8
-; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v14, v7
-; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v7, v9
-; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v14, v9
-; GISEL-NEXT:    v_mul_hi_u32 v8, v13, v8
+; GISEL-NEXT:    v_addc_u32_e64 v4, s[4:5], v4, v8, vcc
+; GISEL-NEXT:    v_addc_u32_e64 v8, s[4:5], 0, 0, s[4:5]
+; GISEL-NEXT:    v_addc_u32_e32 v4, vcc, v4, v5, vcc
+; GISEL-NEXT:    v_addc_u32_e32 v5, vcc, 0, v8, vcc
 ; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v7, v5
-; GISEL-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v9, v7
-; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v8, v7
-; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v10, v5
-; GISEL-NEXT:    v_addc_u32_e32 v13, vcc, v13, v7, vcc
-; GISEL-NEXT:    v_mad_u64_u32 v[7:8], s[4:5], v11, v10, 0
-; GISEL-NEXT:    v_mov_b32_e32 v5, v8
-; GISEL-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], v11, v13, v[5:6]
-; GISEL-NEXT:    v_mul_lo_u32 v5, v13, v7
+; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v10, v4
+; GISEL-NEXT:    v_addc_u32_e32 v13, vcc, v13, v5, vcc
+; GISEL-NEXT:    v_mad_u64_u32 v[4:5], s[4:5], v11, v10, 0
+; GISEL-NEXT:    v_mad_u64_u32 v[7:8], s[4:5], v11, v13, v[5:6]
+; GISEL-NEXT:    v_mul_lo_u32 v5, v13, v4
 ; GISEL-NEXT:    v_add_i32_e32 v11, vcc, 0, v0
-; GISEL-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], v12, v10, v[8:9]
-; GISEL-NEXT:    v_mul_hi_u32 v0, v10, v7
-; GISEL-NEXT:    v_mul_hi_u32 v7, v13, v7
-; GISEL-NEXT:    v_mul_lo_u32 v9, v10, v8
+; GISEL-NEXT:    v_mad_u64_u32 v[7:8], s[4:5], v12, v10, v[7:8]
+; GISEL-NEXT:    v_mul_hi_u32 v0, v10, v4
+; GISEL-NEXT:    v_mul_hi_u32 v4, v13, v4
+; GISEL-NEXT:    v_mul_lo_u32 v8, v10, v7
 ; GISEL-NEXT:    v_and_b32_e32 v12, 0xffffff, v2
-; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v9
-; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v5, v0
-; GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
-; GISEL-NEXT:    v_mul_lo_u32 v5, v13, v8
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v9, v0
-; GISEL-NEXT:    v_mul_hi_u32 v9, v10, v8
-; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v7
-; GISEL-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v9
-; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v7, v9
-; GISEL-NEXT:    v_mul_hi_u32 v8, v13, v8
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v5, v0
-; GISEL-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v7, v5
-; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v8, v5
+; GISEL-NEXT:    v_and_b32_e32 v2, 0xffffff, v6
+; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v8
+; GISEL-NEXT:    v_addc_u32_e32 v0, vcc, v5, v0, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v0, v13, v7
+; GISEL-NEXT:    v_mul_hi_u32 v8, v10, v7
+; GISEL-NEXT:    v_addc_u32_e64 v5, s[4:5], 0, 0, vcc
+; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v4
+; GISEL-NEXT:    v_mul_hi_u32 v7, v13, v7
+; GISEL-NEXT:    v_addc_u32_e64 v0, s[4:5], v0, v8, vcc
+; GISEL-NEXT:    v_addc_u32_e64 v4, s[4:5], 0, 0, s[4:5]
+; GISEL-NEXT:    v_addc_u32_e32 v0, vcc, v0, v5, vcc
+; GISEL-NEXT:    v_addc_u32_e32 v4, vcc, 0, v4, vcc
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v7, v4
 ; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v10, v0
-; GISEL-NEXT:    v_addc_u32_e32 v5, vcc, v13, v5, vcc
-; GISEL-NEXT:    v_mul_lo_u32 v7, v3, v0
-; GISEL-NEXT:    v_mul_lo_u32 v8, v11, v5
-; GISEL-NEXT:    v_mul_hi_u32 v9, v11, v0
+; GISEL-NEXT:    v_addc_u32_e32 v4, vcc, v13, v4, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v5, v3, v0
+; GISEL-NEXT:    v_mul_lo_u32 v7, v11, v4
+; GISEL-NEXT:    v_mul_hi_u32 v8, v11, v0
 ; GISEL-NEXT:    v_mul_hi_u32 v0, v3, v0
-; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v7, v8
-; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v7, v9
-; GISEL-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
-; GISEL-NEXT:    v_mul_lo_u32 v9, v3, v5
-; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v8, v7
-; GISEL-NEXT:    v_mul_hi_u32 v8, v11, v5
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v9, v0
-; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v8
-; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v8
-; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v0, v7
-; GISEL-NEXT:    v_mul_hi_u32 v5, v3, v5
-; GISEL-NEXT:    v_mad_u64_u32 v[7:8], s[4:5], v1, v10, 0
-; GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v9, v0
-; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v0
-; GISEL-NEXT:    v_mov_b32_e32 v0, v8
-; GISEL-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], v1, v5, v[0:1]
-; GISEL-NEXT:    v_and_b32_e32 v0, 0xffffff, v6
-; GISEL-NEXT:    v_sub_i32_e32 v7, vcc, v11, v7
-; GISEL-NEXT:    v_mad_u64_u32 v[5:6], s[4:5], v3, v10, v[8:9]
-; GISEL-NEXT:    v_subb_u32_e64 v8, s[4:5], v3, v5, vcc
-; GISEL-NEXT:    v_sub_i32_e64 v5, s[4:5], v3, v5
-; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v8, v3
-; GISEL-NEXT:    v_cndmask_b32_e64 v6, 0, -1, s[4:5]
-; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v7, v1
-; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, -1, s[4:5]
-; GISEL-NEXT:    v_add_i32_e64 v2, s[4:5], 0, v0
-; GISEL-NEXT:    v_cvt_f32_u32_e32 v0, v2
-; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v8, v3
-; GISEL-NEXT:    v_cndmask_b32_e64 v9, v6, v9, s[4:5]
-; GISEL-NEXT:    v_subb_u32_e32 v10, vcc, v5, v3, vcc
-; GISEL-NEXT:    v_mac_f32_e32 v0, 0x4f800000, v4
-; GISEL-NEXT:    v_rcp_iflag_f32_e32 v0, v0
-; GISEL-NEXT:    v_sub_i32_e32 v11, vcc, v7, v1
-; GISEL-NEXT:    v_subbrev_u32_e64 v13, s[4:5], 0, v10, vcc
-; GISEL-NEXT:    v_mul_f32_e32 v0, 0x5f7ffffc, v0
-; GISEL-NEXT:    v_mul_f32_e32 v4, 0x2f800000, v0
-; GISEL-NEXT:    v_trunc_f32_e32 v6, v4
-; GISEL-NEXT:    v_mac_f32_e32 v0, 0xcf800000, v6
-; GISEL-NEXT:    v_cvt_u32_f32_e32 v14, v0
-; GISEL-NEXT:    v_sub_i32_e64 v15, s[4:5], 0, v2
-; GISEL-NEXT:    v_subb_u32_e64 v16, s[4:5], 0, v3, s[4:5]
-; GISEL-NEXT:    v_mad_u64_u32 v[4:5], s[4:5], v15, v14, 0
-; GISEL-NEXT:    v_cvt_u32_f32_e32 v17, v6
-; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v13, v3
-; GISEL-NEXT:    v_mov_b32_e32 v0, v5
-; GISEL-NEXT:    v_cndmask_b32_e64 v18, 0, -1, s[4:5]
-; GISEL-NEXT:    v_mad_u64_u32 v[5:6], s[4:5], v15, v17, v[0:1]
-; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v11, v1
-; GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, -1, s[4:5]
-; GISEL-NEXT:    v_mad_u64_u32 v[5:6], s[4:5], v16, v14, v[5:6]
-; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v13, v3
-; GISEL-NEXT:    v_cndmask_b32_e64 v6, v18, v0, s[4:5]
-; GISEL-NEXT:    v_mul_lo_u32 v0, v17, v4
-; GISEL-NEXT:    v_mul_lo_u32 v18, v14, v5
-; GISEL-NEXT:    v_mul_hi_u32 v19, v14, v4
-; GISEL-NEXT:    v_subb_u32_e32 v10, vcc, v10, v3, vcc
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v18
-; GISEL-NEXT:    v_cndmask_b32_e64 v18, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v19
-; GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
-; GISEL-NEXT:    v_mul_lo_u32 v19, v17, v5
-; GISEL-NEXT:    v_mul_hi_u32 v4, v17, v4
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v18, v0
-; GISEL-NEXT:    v_mul_hi_u32 v18, v14, v5
-; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v19, v4
-; GISEL-NEXT:    v_cndmask_b32_e64 v19, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v18
-; GISEL-NEXT:    v_cndmask_b32_e64 v18, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v18, vcc, v19, v18
-; GISEL-NEXT:    v_mul_hi_u32 v5, v17, v5
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v4, v0
-; GISEL-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v18, v4
-; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v5, v4
-; GISEL-NEXT:    v_add_i32_e32 v14, vcc, v14, v0
-; GISEL-NEXT:    v_addc_u32_e32 v17, vcc, v17, v4, vcc
-; GISEL-NEXT:    v_mad_u64_u32 v[4:5], s[4:5], v15, v14, 0
-; GISEL-NEXT:    v_sub_i32_e32 v18, vcc, v11, v1
-; GISEL-NEXT:    v_mov_b32_e32 v0, v5
-; GISEL-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v15, v17, v[0:1]
-; GISEL-NEXT:    v_subbrev_u32_e32 v10, vcc, 0, v10, vcc
-; GISEL-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v16, v14, v[0:1]
-; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v6
-; GISEL-NEXT:    v_cndmask_b32_e32 v5, v11, v18, vcc
-; GISEL-NEXT:    v_cndmask_b32_e32 v6, v13, v10, vcc
-; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v9
-; GISEL-NEXT:    v_cndmask_b32_e32 v1, v7, v5, vcc
-; GISEL-NEXT:    v_mul_lo_u32 v5, v17, v4
-; GISEL-NEXT:    v_mul_lo_u32 v7, v14, v0
-; GISEL-NEXT:    v_mul_hi_u32 v10, v14, v4
-; GISEL-NEXT:    v_add_i32_e64 v9, s[4:5], 0, v12
-; GISEL-NEXT:    v_add_i32_e64 v5, s[4:5], v5, v7
-; GISEL-NEXT:    v_cndmask_b32_e64 v7, 0, 1, s[4:5]
-; GISEL-NEXT:    v_add_i32_e64 v5, s[4:5], v5, v10
-; GISEL-NEXT:    v_cndmask_b32_e64 v5, 0, 1, s[4:5]
-; GISEL-NEXT:    v_mul_lo_u32 v10, v17, v0
-; GISEL-NEXT:    v_mul_hi_u32 v4, v17, v4
-; GISEL-NEXT:    v_add_i32_e64 v5, s[4:5], v7, v5
-; GISEL-NEXT:    v_mul_hi_u32 v7, v14, v0
-; GISEL-NEXT:    v_add_i32_e64 v4, s[4:5], v10, v4
-; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, 1, s[4:5]
-; GISEL-NEXT:    v_add_i32_e64 v4, s[4:5], v4, v7
-; GISEL-NEXT:    v_cndmask_b32_e64 v7, 0, 1, s[4:5]
-; GISEL-NEXT:    v_add_i32_e64 v7, s[4:5], v10, v7
-; GISEL-NEXT:    v_mul_hi_u32 v0, v17, v0
-; GISEL-NEXT:    v_add_i32_e64 v4, s[4:5], v4, v5
-; GISEL-NEXT:    v_cndmask_b32_e64 v5, 0, 1, s[4:5]
-; GISEL-NEXT:    v_add_i32_e64 v5, s[4:5], v7, v5
-; GISEL-NEXT:    v_add_i32_e64 v0, s[4:5], v0, v5
-; GISEL-NEXT:    v_add_i32_e64 v4, s[4:5], v14, v4
-; GISEL-NEXT:    v_addc_u32_e64 v0, s[4:5], v17, v0, s[4:5]
+; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v7
+; GISEL-NEXT:    v_addc_u32_e32 v5, vcc, v5, v8, vcc
 ; GISEL-NEXT:    v_mul_lo_u32 v5, v3, v4
-; GISEL-NEXT:    v_mul_lo_u32 v7, v9, v0
-; GISEL-NEXT:    v_cndmask_b32_e32 v8, v8, v6, vcc
-; GISEL-NEXT:    v_mul_hi_u32 v6, v9, v4
+; GISEL-NEXT:    v_mul_hi_u32 v8, v11, v4
+; GISEL-NEXT:    v_addc_u32_e64 v7, s[4:5], 0, 0, vcc
+; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v5, v0
+; GISEL-NEXT:    v_addc_u32_e64 v0, s[4:5], v0, v8, vcc
+; GISEL-NEXT:    v_addc_u32_e64 v5, s[4:5], 0, 0, s[4:5]
+; GISEL-NEXT:    v_addc_u32_e32 v10, vcc, v0, v7, vcc
+; GISEL-NEXT:    v_addc_u32_e32 v0, vcc, 0, v5, vcc
+; GISEL-NEXT:    v_mul_hi_u32 v7, v3, v4
+; GISEL-NEXT:    v_mad_u64_u32 v[4:5], s[4:5], v1, v10, 0
+; GISEL-NEXT:    v_add_i32_e64 v2, s[4:5], 0, v2
+; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v7, v0
+; GISEL-NEXT:    v_mov_b32_e32 v0, v5
+; GISEL-NEXT:    v_mad_u64_u32 v[7:8], s[4:5], v1, v7, v[0:1]
+; GISEL-NEXT:    v_mad_u64_u32 v[5:6], s[4:5], v3, v10, v[7:8]
+; GISEL-NEXT:    v_sub_i32_e32 v7, vcc, v11, v4
+; GISEL-NEXT:    v_cvt_f32_u32_e32 v4, v2
+; GISEL-NEXT:    v_subb_u32_e64 v0, s[4:5], v3, v5, vcc
+; GISEL-NEXT:    v_sub_i32_e64 v6, s[4:5], v3, v5
+; GISEL-NEXT:    v_mac_f32_e32 v4, 0x4f800000, v9
+; GISEL-NEXT:    v_rcp_iflag_f32_e32 v4, v4
+; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v0, v3
+; GISEL-NEXT:    v_cndmask_b32_e64 v5, 0, -1, s[4:5]
+; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v7, v1
+; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, -1, s[4:5]
+; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v0, v3
+; GISEL-NEXT:    v_mul_f32_e32 v4, 0x5f7ffffc, v4
+; GISEL-NEXT:    v_cndmask_b32_e64 v8, v5, v8, s[4:5]
+; GISEL-NEXT:    v_mul_f32_e32 v5, 0x2f800000, v4
+; GISEL-NEXT:    v_trunc_f32_e32 v9, v5
+; GISEL-NEXT:    v_mac_f32_e32 v4, 0xcf800000, v9
+; GISEL-NEXT:    v_cvt_u32_f32_e32 v10, v4
+; GISEL-NEXT:    v_sub_i32_e64 v11, s[4:5], 0, v2
+; GISEL-NEXT:    v_subb_u32_e64 v13, s[4:5], 0, v3, s[4:5]
+; GISEL-NEXT:    v_mad_u64_u32 v[4:5], s[4:5], v11, v10, 0
+; GISEL-NEXT:    v_cvt_u32_f32_e32 v9, v9
+; GISEL-NEXT:    v_subb_u32_e32 v14, vcc, v6, v3, vcc
+; GISEL-NEXT:    v_sub_i32_e32 v15, vcc, v7, v1
+; GISEL-NEXT:    v_mad_u64_u32 v[5:6], s[4:5], v11, v9, v[5:6]
+; GISEL-NEXT:    v_mul_lo_u32 v17, v9, v4
+; GISEL-NEXT:    v_subbrev_u32_e64 v16, s[4:5], 0, v14, vcc
+; GISEL-NEXT:    v_mad_u64_u32 v[5:6], s[4:5], v13, v10, v[5:6]
+; GISEL-NEXT:    v_mul_hi_u32 v20, v10, v4
+; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v16, v3
+; GISEL-NEXT:    v_mul_lo_u32 v18, v10, v5
+; GISEL-NEXT:    v_cndmask_b32_e64 v6, 0, -1, s[4:5]
+; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v15, v1
+; GISEL-NEXT:    v_cndmask_b32_e64 v19, 0, -1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v17, s[4:5], v17, v18
+; GISEL-NEXT:    v_addc_u32_e64 v17, s[4:5], v17, v20, s[4:5]
+; GISEL-NEXT:    v_mul_lo_u32 v17, v9, v5
+; GISEL-NEXT:    v_mul_hi_u32 v4, v9, v4
+; GISEL-NEXT:    v_mul_hi_u32 v20, v10, v5
+; GISEL-NEXT:    v_addc_u32_e64 v18, s[4:5], 0, 0, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v4, s[4:5], v17, v4
+; GISEL-NEXT:    v_mul_hi_u32 v5, v9, v5
+; GISEL-NEXT:    v_addc_u32_e64 v4, s[6:7], v4, v20, s[4:5]
+; GISEL-NEXT:    v_addc_u32_e64 v17, s[6:7], 0, 0, s[6:7]
+; GISEL-NEXT:    v_addc_u32_e64 v4, s[4:5], v4, v18, s[4:5]
+; GISEL-NEXT:    v_addc_u32_e64 v17, s[4:5], 0, v17, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v5, s[4:5], v5, v17
+; GISEL-NEXT:    v_add_i32_e64 v10, s[4:5], v10, v4
+; GISEL-NEXT:    v_addc_u32_e64 v9, s[4:5], v9, v5, s[4:5]
+; GISEL-NEXT:    v_mad_u64_u32 v[4:5], s[4:5], v11, v10, 0
+; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v16, v3
+; GISEL-NEXT:    v_cndmask_b32_e64 v17, v6, v19, s[4:5]
+; GISEL-NEXT:    v_mad_u64_u32 v[5:6], s[4:5], v11, v9, v[5:6]
+; GISEL-NEXT:    v_subb_u32_e32 v14, vcc, v14, v3, vcc
+; GISEL-NEXT:    v_mad_u64_u32 v[5:6], s[4:5], v13, v10, v[5:6]
+; GISEL-NEXT:    v_sub_i32_e32 v1, vcc, v15, v1
+; GISEL-NEXT:    v_subbrev_u32_e32 v11, vcc, 0, v14, vcc
+; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v17
+; GISEL-NEXT:    v_cndmask_b32_e32 v6, v16, v11, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v11, v9, v4
+; GISEL-NEXT:    v_mul_lo_u32 v13, v10, v5
+; GISEL-NEXT:    v_mul_hi_u32 v14, v10, v4
+; GISEL-NEXT:    v_cndmask_b32_e32 v1, v15, v1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v12, vcc, 0, v12
+; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v11, v13
+; GISEL-NEXT:    v_addc_u32_e32 v11, vcc, v11, v14, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v11, v9, v5
+; GISEL-NEXT:    v_mul_hi_u32 v4, v9, v4
+; GISEL-NEXT:    v_mul_hi_u32 v14, v10, v5
+; GISEL-NEXT:    v_addc_u32_e64 v13, s[4:5], 0, 0, vcc
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v11, v4
+; GISEL-NEXT:    v_mul_hi_u32 v5, v9, v5
+; GISEL-NEXT:    v_addc_u32_e64 v4, s[4:5], v4, v14, vcc
+; GISEL-NEXT:    v_addc_u32_e64 v11, s[4:5], 0, 0, s[4:5]
+; GISEL-NEXT:    v_addc_u32_e32 v4, vcc, v4, v13, vcc
+; GISEL-NEXT:    v_addc_u32_e32 v11, vcc, 0, v11, vcc
+; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v11
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v10, v4
+; GISEL-NEXT:    v_addc_u32_e32 v5, vcc, v9, v5, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v9, v3, v4
+; GISEL-NEXT:    v_mul_lo_u32 v10, v12, v5
+; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v8
+; GISEL-NEXT:    v_cndmask_b32_e32 v1, v7, v1, vcc
+; GISEL-NEXT:    v_mul_hi_u32 v7, v12, v4
+; GISEL-NEXT:    v_add_i32_e64 v8, s[4:5], v9, v10
 ; GISEL-NEXT:    v_mul_hi_u32 v4, v3, v4
-; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v7
-; GISEL-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v6
-; GISEL-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
-; GISEL-NEXT:    v_mul_lo_u32 v6, v3, v0
-; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v7, v5
-; GISEL-NEXT:    v_mul_hi_u32 v7, v9, v0
-; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v6, v4
-; GISEL-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v7
-; GISEL-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v6, v7
-; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v4, v5
-; GISEL-NEXT:    v_mul_hi_u32 v0, v3, v0
-; GISEL-NEXT:    v_mad_u64_u32 v[4:5], s[4:5], v2, v7, 0
-; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v6, v10
-; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v0, v6
+; GISEL-NEXT:    v_addc_u32_e64 v7, s[4:5], v8, v7, s[4:5]
+; GISEL-NEXT:    v_mul_lo_u32 v7, v3, v5
+; GISEL-NEXT:    v_mul_hi_u32 v9, v12, v5
+; GISEL-NEXT:    v_addc_u32_e64 v8, s[4:5], 0, 0, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v4, s[4:5], v7, v4
+; GISEL-NEXT:    v_addc_u32_e64 v4, s[6:7], v4, v9, s[4:5]
+; GISEL-NEXT:    v_addc_u32_e64 v7, s[6:7], 0, 0, s[6:7]
+; GISEL-NEXT:    v_addc_u32_e64 v8, s[4:5], v4, v8, s[4:5]
+; GISEL-NEXT:    v_addc_u32_e64 v7, s[4:5], 0, v7, s[4:5]
+; GISEL-NEXT:    v_mul_hi_u32 v9, v3, v5
+; GISEL-NEXT:    v_mad_u64_u32 v[4:5], s[4:5], v2, v8, 0
+; GISEL-NEXT:    v_cndmask_b32_e32 v10, v0, v6, vcc
+; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v9, v7
 ; GISEL-NEXT:    v_mov_b32_e32 v0, v5
 ; GISEL-NEXT:    v_mad_u64_u32 v[5:6], s[4:5], v2, v6, v[0:1]
 ; GISEL-NEXT:    v_subrev_i32_e32 v0, vcc, 0, v1
-; GISEL-NEXT:    v_mad_u64_u32 v[5:6], s[4:5], v3, v7, v[5:6]
-; GISEL-NEXT:    v_subbrev_u32_e32 v1, vcc, 0, v8, vcc
-; GISEL-NEXT:    v_sub_i32_e32 v4, vcc, v9, v4
+; GISEL-NEXT:    v_mad_u64_u32 v[5:6], s[4:5], v3, v8, v[5:6]
+; GISEL-NEXT:    v_subbrev_u32_e32 v1, vcc, 0, v10, vcc
+; GISEL-NEXT:    v_sub_i32_e32 v4, vcc, v12, v4
 ; GISEL-NEXT:    v_subb_u32_e64 v6, s[4:5], v3, v5, vcc
 ; GISEL-NEXT:    v_sub_i32_e64 v5, s[4:5], v3, v5
 ; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v6, v3
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll
index d15551365707b..b152c06b9fda6 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll
@@ -16,10 +16,10 @@ define i64 @v_udiv_i64(i64 %num, i64 %den) {
 ; CHECK-NEXT:    v_cvt_f32_u32_e32 v6, v2
 ; CHECK-NEXT:    ; implicit-def: $vgpr0_vgpr1
 ; CHECK-NEXT:    s_and_saveexec_b64 s[4:5], vcc
-; CHECK-NEXT:    s_xor_b64 s[6:7], exec, s[4:5]
+; CHECK-NEXT:    s_xor_b64 s[8:9], exec, s[4:5]
 ; CHECK-NEXT:    s_cbranch_execnz .LBB0_3
 ; CHECK-NEXT:  ; %bb.1: ; %Flow
-; CHECK-NEXT:    s_andn2_saveexec_b64 s[6:7], s[6:7]
+; CHECK-NEXT:    s_andn2_saveexec_b64 s[6:7], s[8:9]
 ; CHECK-NEXT:    s_cbranch_execnz .LBB0_4
 ; CHECK-NEXT:  .LBB0_2:
 ; CHECK-NEXT:    s_or_b64 exec, exec, s[6:7]
@@ -50,18 +50,13 @@ define i64 @v_udiv_i64(i64 %num, i64 %den) {
 ; CHECK-NEXT:    v_mul_hi_u32 v14, v0, v8
 ; CHECK-NEXT:    v_mul_hi_u32 v8, v6, v8
 ; CHECK-NEXT:    v_add_i32_e32 v10, vcc, v10, v11
-; CHECK-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v9, vcc, v13, v9
-; CHECK-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v10, vcc, v10, v12
-; CHECK-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v9, vcc, v9, v14
-; CHECK-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v10, vcc, v11, v10
-; CHECK-NEXT:    v_add_i32_e32 v11, vcc, v13, v12
-; CHECK-NEXT:    v_add_i32_e32 v9, vcc, v9, v10
-; CHECK-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v10, vcc, v11, v10
+; CHECK-NEXT:    v_addc_u32_e32 v10, vcc, v10, v12, vcc
+; CHECK-NEXT:    v_add_i32_e64 v9, s[4:5], v13, v9
+; CHECK-NEXT:    v_addc_u32_e64 v9, s[6:7], v9, v14, s[4:5]
+; CHECK-NEXT:    v_addc_u32_e64 v10, s[10:11], 0, 0, vcc
+; CHECK-NEXT:    v_addc_u32_e64 v11, s[6:7], 0, 0, s[6:7]
+; CHECK-NEXT:    v_addc_u32_e64 v9, vcc, v9, v10, s[4:5]
+; CHECK-NEXT:    v_addc_u32_e32 v10, vcc, 0, v11, vcc
 ; CHECK-NEXT:    v_add_i32_e32 v8, vcc, v8, v10
 ; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v9
 ; CHECK-NEXT:    v_addc_u32_e32 v6, vcc, v6, v8, vcc
@@ -79,18 +74,13 @@ define i64 @v_udiv_i64(i64 %num, i64 %den) {
 ; CHECK-NEXT:    v_mul_hi_u32 v12, v0, v1
 ; CHECK-NEXT:    v_mul_hi_u32 v1, v6, v1
 ; CHECK-NEXT:    v_add_i32_e32 v7, vcc, v10, v7
-; CHECK-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v8, vcc, v9, v8
-; CHECK-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v7, vcc, v7, v11
-; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v8, vcc, v8, v12
-; CHECK-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v7, vcc, v10, v7
-; CHECK-NEXT:    v_add_i32_e32 v9, vcc, v9, v11
-; CHECK-NEXT:    v_add_i32_e32 v7, vcc, v8, v7
-; CHECK-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v8, vcc, v9, v8
+; CHECK-NEXT:    v_addc_u32_e32 v7, vcc, v7, v11, vcc
+; CHECK-NEXT:    v_add_i32_e64 v7, s[4:5], v9, v8
+; CHECK-NEXT:    v_addc_u32_e64 v7, s[6:7], v7, v12, s[4:5]
+; CHECK-NEXT:    v_addc_u32_e64 v8, s[10:11], 0, 0, vcc
+; CHECK-NEXT:    v_addc_u32_e64 v9, s[6:7], 0, 0, s[6:7]
+; CHECK-NEXT:    v_addc_u32_e64 v7, vcc, v7, v8, s[4:5]
+; CHECK-NEXT:    v_addc_u32_e32 v8, vcc, 0, v9, vcc
 ; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v1, v8
 ; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v7
 ; CHECK-NEXT:    v_addc_u32_e32 v1, vcc, v6, v1, vcc
@@ -102,39 +92,34 @@ define i64 @v_udiv_i64(i64 %num, i64 %den) {
 ; CHECK-NEXT:    v_mul_hi_u32 v10, v4, v1
 ; CHECK-NEXT:    v_mul_hi_u32 v1, v5, v1
 ; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v6, v8
-; CHECK-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v9, v0
-; CHECK-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v6, v7
-; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v10
-; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v8, v6
-; CHECK-NEXT:    v_add_i32_e32 v7, vcc, v9, v7
-; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v6
-; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v7, v6
-; CHECK-NEXT:    v_mul_lo_u32 v7, v2, v0
-; CHECK-NEXT:    v_mul_lo_u32 v8, v3, v0
-; CHECK-NEXT:    v_mul_hi_u32 v9, v2, v0
+; CHECK-NEXT:    v_addc_u32_e32 v6, vcc, v6, v7, vcc
+; CHECK-NEXT:    v_add_i32_e64 v0, s[4:5], v9, v0
+; CHECK-NEXT:    v_addc_u32_e64 v0, s[6:7], v0, v10, s[4:5]
+; CHECK-NEXT:    v_addc_u32_e64 v6, s[10:11], 0, 0, vcc
+; CHECK-NEXT:    v_addc_u32_e64 v7, s[6:7], 0, 0, s[6:7]
+; CHECK-NEXT:    v_addc_u32_e64 v0, vcc, v0, v6, s[4:5]
+; CHECK-NEXT:    v_addc_u32_e32 v6, vcc, 0, v7, vcc
 ; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v1, v6
-; CHECK-NEXT:    v_mul_lo_u32 v6, v2, v1
+; CHECK-NEXT:    v_mul_lo_u32 v6, v2, v0
+; CHECK-NEXT:    v_mul_lo_u32 v7, v3, v0
+; CHECK-NEXT:    v_mul_hi_u32 v8, v2, v0
+; CHECK-NEXT:    v_mul_lo_u32 v9, v2, v1
 ; CHECK-NEXT:    v_add_i32_e32 v10, vcc, 1, v0
 ; CHECK-NEXT:    v_addc_u32_e32 v11, vcc, 0, v1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v8, v6
-; CHECK-NEXT:    v_add_i32_e32 v8, vcc, 1, v10
+; CHECK-NEXT:    v_add_i32_e32 v7, vcc, v7, v9
+; CHECK-NEXT:    v_add_i32_e32 v9, vcc, 1, v10
 ; CHECK-NEXT:    v_addc_u32_e32 v12, vcc, 0, v11, vcc
-; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v6, v9
-; CHECK-NEXT:    v_sub_i32_e32 v4, vcc, v4, v7
-; CHECK-NEXT:    v_subb_u32_e64 v7, s[4:5], v5, v6, vcc
-; CHECK-NEXT:    v_sub_i32_e64 v5, s[4:5], v5, v6
+; CHECK-NEXT:    v_add_i32_e32 v7, vcc, v7, v8
+; CHECK-NEXT:    v_sub_i32_e32 v4, vcc, v4, v6
+; CHECK-NEXT:    v_subb_u32_e64 v6, s[4:5], v5, v7, vcc
+; CHECK-NEXT:    v_sub_i32_e64 v5, s[4:5], v5, v7
 ; CHECK-NEXT:    v_cmp_ge_u32_e64 s[4:5], v4, v2
-; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, -1, s[4:5]
-; CHECK-NEXT:    v_cmp_ge_u32_e64 s[4:5], v7, v3
-; CHECK-NEXT:    v_cndmask_b32_e64 v9, 0, -1, s[4:5]
+; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, -1, s[4:5]
+; CHECK-NEXT:    v_cmp_ge_u32_e64 s[4:5], v6, v3
+; CHECK-NEXT:    v_cndmask_b32_e64 v8, 0, -1, s[4:5]
 ; CHECK-NEXT:    v_subb_u32_e32 v5, vcc, v5, v3, vcc
-; CHECK-NEXT:    v_cmp_eq_u32_e32 vcc, v7, v3
-; CHECK-NEXT:    v_cndmask_b32_e32 v6, v9, v6, vcc
+; CHECK-NEXT:    v_cmp_eq_u32_e32 vcc, v6, v3
+; CHECK-NEXT:    v_cndmask_b32_e32 v6, v8, v7, vcc
 ; CHECK-NEXT:    v_sub_i32_e32 v4, vcc, v4, v2
 ; CHECK-NEXT:    v_subbrev_u32_e32 v5, vcc, 0, v5, vcc
 ; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, v4, v2
@@ -144,7 +129,7 @@ define i64 @v_udiv_i64(i64 %num, i64 %den) {
 ; CHECK-NEXT:    v_cmp_eq_u32_e32 vcc, v5, v3
 ; CHECK-NEXT:    v_cndmask_b32_e32 v2, v4, v2, vcc
 ; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v2
-; CHECK-NEXT:    v_cndmask_b32_e32 v2, v10, v8, vcc
+; CHECK-NEXT:    v_cndmask_b32_e32 v2, v10, v9, vcc
 ; CHECK-NEXT:    v_cndmask_b32_e32 v3, v11, v12, vcc
 ; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v6
 ; CHECK-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
@@ -152,7 +137,7 @@ define i64 @v_udiv_i64(i64 %num, i64 %den) {
 ; CHECK-NEXT:    ; implicit-def: $vgpr6
 ; CHECK-NEXT:    ; implicit-def: $vgpr2
 ; CHECK-NEXT:    ; implicit-def: $vgpr4
-; CHECK-NEXT:    s_andn2_saveexec_b64 s[6:7], s[6:7]
+; CHECK-NEXT:    s_andn2_saveexec_b64 s[6:7], s[8:9]
 ; CHECK-NEXT:    s_cbranch_execz .LBB0_2
 ; CHECK-NEXT:  .LBB0_4:
 ; CHECK-NEXT:    v_rcp_iflag_f32_e32 v0, v6
@@ -197,10 +182,10 @@ define amdgpu_ps i64 @s_udiv_i64(i64 inreg %num, i64 inreg %den) {
 ; CHECK-NEXT:  ; %bb.1:
 ; CHECK-NEXT:    v_mov_b32_e32 v0, s3
 ; CHECK-NEXT:    v_cvt_f32_u32_e32 v1, s3
-; CHECK-NEXT:    s_sub_u32 s4, 0, s2
+; CHECK-NEXT:    s_sub_u32 s10, 0, s2
 ; CHECK-NEXT:    v_mov_b32_e32 v3, s1
 ; CHECK-NEXT:    v_madmk_f32 v1, v1, 0x4f800000, v2
-; CHECK-NEXT:    s_subb_u32 s5, 0, s3
+; CHECK-NEXT:    s_subb_u32 s11, 0, s3
 ; CHECK-NEXT:    v_rcp_iflag_f32_e32 v1, v1
 ; CHECK-NEXT:    v_mul_f32_e32 v1, 0x5f7ffffc, v1
 ; CHECK-NEXT:    v_mul_f32_e32 v4, 0x2f800000, v1
@@ -208,10 +193,10 @@ define amdgpu_ps i64 @s_udiv_i64(i64 inreg %num, i64 inreg %den) {
 ; CHECK-NEXT:    v_mac_f32_e32 v1, 0xcf800000, v4
 ; CHECK-NEXT:    v_cvt_u32_f32_e32 v4, v4
 ; CHECK-NEXT:    v_cvt_u32_f32_e32 v1, v1
-; CHECK-NEXT:    v_mul_lo_u32 v5, s4, v4
-; CHECK-NEXT:    v_mul_lo_u32 v6, s4, v1
-; CHECK-NEXT:    v_mul_lo_u32 v7, s5, v1
-; CHECK-NEXT:    v_mul_hi_u32 v8, s4, v1
+; CHECK-NEXT:    v_mul_lo_u32 v5, s10, v4
+; CHECK-NEXT:    v_mul_lo_u32 v6, s10, v1
+; CHECK-NEXT:    v_mul_lo_u32 v7, s11, v1
+; CHECK-NEXT:    v_mul_hi_u32 v8, s10, v1
 ; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v7, v5
 ; CHECK-NEXT:    v_mul_lo_u32 v7, v4, v6
 ; CHECK-NEXT:    v_mul_hi_u32 v9, v1, v6
@@ -222,25 +207,20 @@ define amdgpu_ps i64 @s_udiv_i64(i64 inreg %num, i64 inreg %den) {
 ; CHECK-NEXT:    v_mul_hi_u32 v11, v1, v5
 ; CHECK-NEXT:    v_mul_hi_u32 v5, v4, v5
 ; CHECK-NEXT:    v_add_i32_e32 v7, vcc, v7, v8
-; CHECK-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v10, v6
-; CHECK-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v7, vcc, v7, v9
-; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v6, v11
-; CHECK-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v7, vcc, v8, v7
-; CHECK-NEXT:    v_add_i32_e32 v8, vcc, v10, v9
-; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v6, v7
-; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v7, vcc, v8, v7
+; CHECK-NEXT:    v_addc_u32_e32 v7, vcc, v7, v9, vcc
+; CHECK-NEXT:    v_add_i32_e64 v6, s[4:5], v10, v6
+; CHECK-NEXT:    v_addc_u32_e64 v6, s[6:7], v6, v11, s[4:5]
+; CHECK-NEXT:    v_addc_u32_e64 v7, s[8:9], 0, 0, vcc
+; CHECK-NEXT:    v_addc_u32_e64 v8, s[6:7], 0, 0, s[6:7]
+; CHECK-NEXT:    v_addc_u32_e64 v6, vcc, v6, v7, s[4:5]
+; CHECK-NEXT:    v_addc_u32_e32 v7, vcc, 0, v8, vcc
 ; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v5, v7
 ; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v1, v6
 ; CHECK-NEXT:    v_addc_u32_e32 v4, vcc, v4, v5, vcc
-; CHECK-NEXT:    v_mul_lo_u32 v5, s4, v1
-; CHECK-NEXT:    v_mul_lo_u32 v6, s5, v1
-; CHECK-NEXT:    v_mul_hi_u32 v7, s4, v1
-; CHECK-NEXT:    v_mul_lo_u32 v8, s4, v4
+; CHECK-NEXT:    v_mul_lo_u32 v5, s10, v1
+; CHECK-NEXT:    v_mul_lo_u32 v6, s11, v1
+; CHECK-NEXT:    v_mul_hi_u32 v7, s10, v1
+; CHECK-NEXT:    v_mul_lo_u32 v8, s10, v4
 ; CHECK-NEXT:    v_mul_lo_u32 v9, v4, v5
 ; CHECK-NEXT:    v_mul_hi_u32 v10, v1, v5
 ; CHECK-NEXT:    v_mul_hi_u32 v5, v4, v5
@@ -251,18 +231,13 @@ define amdgpu_ps i64 @s_udiv_i64(i64 inreg %num, i64 inreg %den) {
 ; CHECK-NEXT:    v_mul_hi_u32 v11, v1, v6
 ; CHECK-NEXT:    v_mul_hi_u32 v6, v4, v6
 ; CHECK-NEXT:    v_add_i32_e32 v7, vcc, v9, v7
-; CHECK-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v8, v5
-; CHECK-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v7, vcc, v7, v10
-; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v5, v11
-; CHECK-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v7, vcc, v9, v7
-; CHECK-NEXT:    v_add_i32_e32 v8, vcc, v8, v10
-; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v5, v7
-; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v7, vcc, v8, v7
+; CHECK-NEXT:    v_addc_u32_e32 v7, vcc, v7, v10, vcc
+; CHECK-NEXT:    v_add_i32_e64 v5, s[4:5], v8, v5
+; CHECK-NEXT:    v_addc_u32_e64 v5, s[6:7], v5, v11, s[4:5]
+; CHECK-NEXT:    v_addc_u32_e64 v7, s[8:9], 0, 0, vcc
+; CHECK-NEXT:    v_addc_u32_e64 v8, s[6:7], 0, 0, s[6:7]
+; CHECK-NEXT:    v_addc_u32_e64 v5, vcc, v5, v7, s[4:5]
+; CHECK-NEXT:    v_addc_u32_e32 v7, vcc, 0, v8, vcc
 ; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v6, v7
 ; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v1, v5
 ; CHECK-NEXT:    v_addc_u32_e32 v4, vcc, v4, v6, vcc
@@ -274,47 +249,42 @@ define amdgpu_ps i64 @s_udiv_i64(i64 inreg %num, i64 inreg %den) {
 ; CHECK-NEXT:    v_mul_hi_u32 v9, s0, v4
 ; CHECK-NEXT:    v_mul_hi_u32 v4, s1, v4
 ; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v5, v7
-; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v8, v1
-; CHECK-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v5, v6
-; CHECK-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v1, v9
-; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v7, v5
-; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v8, v6
-; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v1, v5
-; CHECK-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v6, v5
-; CHECK-NEXT:    v_mul_lo_u32 v6, s2, v1
-; CHECK-NEXT:    v_mul_lo_u32 v7, s3, v1
-; CHECK-NEXT:    v_mul_hi_u32 v8, s2, v1
-; CHECK-NEXT:    v_add_i32_e32 v9, vcc, 1, v1
+; CHECK-NEXT:    v_addc_u32_e32 v5, vcc, v5, v6, vcc
+; CHECK-NEXT:    v_add_i32_e64 v1, s[4:5], v8, v1
+; CHECK-NEXT:    v_addc_u32_e64 v1, s[6:7], v1, v9, s[4:5]
+; CHECK-NEXT:    v_addc_u32_e64 v5, s[8:9], 0, 0, vcc
+; CHECK-NEXT:    v_addc_u32_e64 v6, s[6:7], 0, 0, s[6:7]
+; CHECK-NEXT:    v_addc_u32_e64 v1, vcc, v1, v5, s[4:5]
+; CHECK-NEXT:    v_addc_u32_e32 v5, vcc, 0, v6, vcc
 ; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v4, v5
-; CHECK-NEXT:    v_add_i32_e32 v5, vcc, 1, v9
+; CHECK-NEXT:    v_mul_lo_u32 v5, s2, v1
+; CHECK-NEXT:    v_mul_lo_u32 v6, s3, v1
+; CHECK-NEXT:    v_mul_hi_u32 v7, s2, v1
+; CHECK-NEXT:    v_add_i32_e32 v8, vcc, 1, v1
 ; CHECK-NEXT:    v_mul_lo_u32 v4, s2, v4
-; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v7, v4
-; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v4, v8
-; CHECK-NEXT:    v_sub_i32_e32 v6, vcc, s0, v6
+; CHECK-NEXT:    v_add_i32_e32 v9, vcc, 1, v8
+; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v6, v4
+; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v4, v7
+; CHECK-NEXT:    v_sub_i32_e32 v5, vcc, s0, v5
 ; CHECK-NEXT:    v_subb_u32_e64 v3, s[4:5], v3, v4, vcc
 ; CHECK-NEXT:    v_sub_i32_e64 v4, s[4:5], s1, v4
-; CHECK-NEXT:    v_cmp_le_u32_e64 s[4:5], s2, v6
-; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, -1, s[4:5]
+; CHECK-NEXT:    v_cmp_le_u32_e64 s[4:5], s2, v5
+; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, -1, s[4:5]
 ; CHECK-NEXT:    v_cmp_le_u32_e64 s[4:5], s3, v3
-; CHECK-NEXT:    v_cndmask_b32_e64 v8, 0, -1, s[4:5]
+; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, -1, s[4:5]
 ; CHECK-NEXT:    v_subb_u32_e32 v0, vcc, v4, v0, vcc
 ; CHECK-NEXT:    v_cmp_eq_u32_e32 vcc, s3, v3
-; CHECK-NEXT:    v_cndmask_b32_e32 v3, v8, v7, vcc
-; CHECK-NEXT:    v_subrev_i32_e32 v4, vcc, s2, v6
+; CHECK-NEXT:    v_cndmask_b32_e32 v3, v7, v6, vcc
+; CHECK-NEXT:    v_subrev_i32_e32 v4, vcc, s2, v5
 ; CHECK-NEXT:    v_subbrev_u32_e32 v0, vcc, 0, v0, vcc
 ; CHECK-NEXT:    v_cmp_le_u32_e32 vcc, s2, v4
 ; CHECK-NEXT:    v_cndmask_b32_e64 v4, 0, -1, vcc
 ; CHECK-NEXT:    v_cmp_le_u32_e32 vcc, s3, v0
-; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, -1, vcc
+; CHECK-NEXT:    v_cndmask_b32_e64 v5, 0, -1, vcc
 ; CHECK-NEXT:    v_cmp_eq_u32_e32 vcc, s3, v0
-; CHECK-NEXT:    v_cndmask_b32_e32 v0, v6, v4, vcc
+; CHECK-NEXT:    v_cndmask_b32_e32 v0, v5, v4, vcc
 ; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
-; CHECK-NEXT:    v_cndmask_b32_e32 v0, v9, v5, vcc
+; CHECK-NEXT:    v_cndmask_b32_e32 v0, v8, v9, vcc
 ; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v3
 ; CHECK-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
 ; CHECK-NEXT:    s_mov_b32 s4, 0
@@ -368,233 +338,203 @@ define <2 x i64> @v_udiv_v2i64(<2 x i64> %num, <2 x i64> %den) {
 ; GISEL-NEXT:    v_cvt_f32_u32_e32 v10, v4
 ; GISEL-NEXT:    v_cvt_f32_u32_e32 v11, v5
 ; GISEL-NEXT:    v_sub_i32_e32 v8, vcc, 0, v4
-; GISEL-NEXT:    v_cvt_f32_u32_e32 v13, v6
-; GISEL-NEXT:    v_cvt_f32_u32_e32 v14, v7
+; GISEL-NEXT:    v_cvt_f32_u32_e32 v12, v6
+; GISEL-NEXT:    v_cvt_f32_u32_e32 v13, v7
 ; GISEL-NEXT:    v_sub_i32_e64 v9, s[4:5], 0, v6
-; GISEL-NEXT:    v_subb_u32_e32 v15, vcc, 0, v5, vcc
-; GISEL-NEXT:    v_subb_u32_e64 v12, vcc, 0, v7, s[4:5]
+; GISEL-NEXT:    v_subb_u32_e32 v14, vcc, 0, v5, vcc
+; GISEL-NEXT:    v_subb_u32_e64 v15, vcc, 0, v7, s[4:5]
 ; GISEL-NEXT:    v_mac_f32_e32 v10, 0x4f800000, v11
-; GISEL-NEXT:    v_mac_f32_e32 v13, 0x4f800000, v14
+; GISEL-NEXT:    v_mac_f32_e32 v12, 0x4f800000, v13
 ; GISEL-NEXT:    v_rcp_iflag_f32_e32 v10, v10
-; GISEL-NEXT:    v_rcp_iflag_f32_e32 v11, v13
+; GISEL-NEXT:    v_rcp_iflag_f32_e32 v11, v12
 ; GISEL-NEXT:    v_mul_f32_e32 v10, 0x5f7ffffc, v10
 ; GISEL-NEXT:    v_mul_f32_e32 v11, 0x5f7ffffc, v11
-; GISEL-NEXT:    v_mul_f32_e32 v13, 0x2f800000, v10
-; GISEL-NEXT:    v_mul_f32_e32 v14, 0x2f800000, v11
+; GISEL-NEXT:    v_mul_f32_e32 v12, 0x2f800000, v10
+; GISEL-NEXT:    v_mul_f32_e32 v13, 0x2f800000, v11
+; GISEL-NEXT:    v_trunc_f32_e32 v12, v12
 ; GISEL-NEXT:    v_trunc_f32_e32 v13, v13
-; GISEL-NEXT:    v_trunc_f32_e32 v14, v14
-; GISEL-NEXT:    v_mac_f32_e32 v10, 0xcf800000, v13
+; GISEL-NEXT:    v_mac_f32_e32 v10, 0xcf800000, v12
+; GISEL-NEXT:    v_cvt_u32_f32_e32 v12, v12
+; GISEL-NEXT:    v_mac_f32_e32 v11, 0xcf800000, v13
 ; GISEL-NEXT:    v_cvt_u32_f32_e32 v13, v13
-; GISEL-NEXT:    v_mac_f32_e32 v11, 0xcf800000, v14
-; GISEL-NEXT:    v_cvt_u32_f32_e32 v14, v14
 ; GISEL-NEXT:    v_cvt_u32_f32_e32 v10, v10
-; GISEL-NEXT:    v_mul_lo_u32 v16, v8, v13
+; GISEL-NEXT:    v_mul_lo_u32 v16, v8, v12
 ; GISEL-NEXT:    v_cvt_u32_f32_e32 v11, v11
-; GISEL-NEXT:    v_mul_lo_u32 v17, v9, v14
-; GISEL-NEXT:    v_mul_lo_u32 v18, v9, v11
-; GISEL-NEXT:    v_mul_lo_u32 v19, v12, v11
-; GISEL-NEXT:    v_mul_hi_u32 v20, v9, v11
-; GISEL-NEXT:    v_add_i32_e32 v17, vcc, v19, v17
-; GISEL-NEXT:    v_mul_lo_u32 v19, v14, v18
-; GISEL-NEXT:    v_add_i32_e32 v17, vcc, v17, v20
-; GISEL-NEXT:    v_mul_lo_u32 v20, v11, v17
-; GISEL-NEXT:    v_add_i32_e32 v19, vcc, v19, v20
-; GISEL-NEXT:    v_mul_hi_u32 v20, v11, v18
-; GISEL-NEXT:    v_add_i32_e64 v19, s[4:5], v19, v20
-; GISEL-NEXT:    v_mul_lo_u32 v19, v8, v10
-; GISEL-NEXT:    v_mul_lo_u32 v20, v15, v10
-; GISEL-NEXT:    v_add_i32_e64 v16, s[6:7], v20, v16
+; GISEL-NEXT:    v_mul_lo_u32 v17, v9, v13
+; GISEL-NEXT:    v_mul_lo_u32 v18, v8, v10
+; GISEL-NEXT:    v_mul_lo_u32 v19, v14, v10
 ; GISEL-NEXT:    v_mul_hi_u32 v20, v8, v10
-; GISEL-NEXT:    v_add_i32_e64 v16, s[6:7], v16, v20
+; GISEL-NEXT:    v_add_i32_e32 v16, vcc, v19, v16
+; GISEL-NEXT:    v_mul_lo_u32 v19, v12, v18
+; GISEL-NEXT:    v_add_i32_e32 v16, vcc, v16, v20
+; GISEL-NEXT:    v_mul_lo_u32 v20, v10, v16
+; GISEL-NEXT:    v_add_i32_e32 v19, vcc, v19, v20
+; GISEL-NEXT:    v_mul_hi_u32 v20, v10, v18
+; GISEL-NEXT:    v_addc_u32_e32 v19, vcc, v19, v20, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v19, v9, v11
+; GISEL-NEXT:    v_mul_lo_u32 v20, v15, v11
+; GISEL-NEXT:    v_add_i32_e64 v17, s[4:5], v20, v17
+; GISEL-NEXT:    v_mul_hi_u32 v20, v9, v11
+; GISEL-NEXT:    v_add_i32_e64 v17, s[4:5], v17, v20
 ; GISEL-NEXT:    v_mul_lo_u32 v20, v13, v19
-; GISEL-NEXT:    v_mul_lo_u32 v21, v10, v16
-; GISEL-NEXT:    v_add_i32_e64 v20, s[6:7], v20, v21
-; GISEL-NEXT:    v_mul_hi_u32 v21, v10, v19
-; GISEL-NEXT:    v_add_i32_e64 v20, s[8:9], v20, v21
+; GISEL-NEXT:    v_mul_lo_u32 v21, v11, v17
+; GISEL-NEXT:    v_add_i32_e64 v20, s[4:5], v20, v21
+; GISEL-NEXT:    v_mul_hi_u32 v21, v11, v19
+; GISEL-NEXT:    v_addc_u32_e64 v20, s[4:5], v20, v21, s[4:5]
+; GISEL-NEXT:    v_mul_hi_u32 v18, v12, v18
 ; GISEL-NEXT:    v_mul_hi_u32 v19, v13, v19
-; GISEL-NEXT:    v_mul_hi_u32 v18, v14, v18
-; GISEL-NEXT:    v_mul_lo_u32 v20, v13, v16
-; GISEL-NEXT:    v_add_i32_e64 v19, s[10:11], v20, v19
-; GISEL-NEXT:    v_mul_lo_u32 v20, v14, v17
-; GISEL-NEXT:    v_add_i32_e64 v18, s[12:13], v20, v18
+; GISEL-NEXT:    v_mul_lo_u32 v20, v12, v16
+; GISEL-NEXT:    v_add_i32_e64 v18, s[6:7], v20, v18
+; GISEL-NEXT:    v_mul_lo_u32 v20, v13, v17
+; GISEL-NEXT:    v_add_i32_e64 v19, s[8:9], v20, v19
 ; GISEL-NEXT:    v_mul_hi_u32 v20, v10, v16
-; GISEL-NEXT:    v_add_i32_e64 v19, s[14:15], v19, v20
+; GISEL-NEXT:    v_mul_hi_u32 v16, v12, v16
+; GISEL-NEXT:    v_addc_u32_e64 v18, s[10:11], v18, v20, s[6:7]
 ; GISEL-NEXT:    v_mul_hi_u32 v20, v11, v17
-; GISEL-NEXT:    v_add_i32_e64 v18, s[16:17], v18, v20
-; GISEL-NEXT:    v_cndmask_b32_e64 v20, 0, 1, s[6:7]
-; GISEL-NEXT:    v_cndmask_b32_e64 v21, 0, 1, s[8:9]
-; GISEL-NEXT:    v_add_i32_e64 v20, s[6:7], v20, v21
-; GISEL-NEXT:    v_cndmask_b32_e64 v21, 0, 1, s[10:11]
-; GISEL-NEXT:    v_cndmask_b32_e64 v22, 0, 1, s[14:15]
-; GISEL-NEXT:    v_add_i32_e64 v21, s[6:7], v21, v22
-; GISEL-NEXT:    v_cndmask_b32_e64 v22, 0, 1, vcc
-; GISEL-NEXT:    v_cndmask_b32_e64 v23, 0, 1, s[4:5]
-; GISEL-NEXT:    v_add_i32_e32 v22, vcc, v22, v23
-; GISEL-NEXT:    v_cndmask_b32_e64 v23, 0, 1, s[12:13]
-; GISEL-NEXT:    v_cndmask_b32_e64 v24, 0, 1, s[16:17]
-; GISEL-NEXT:    v_add_i32_e32 v23, vcc, v23, v24
-; GISEL-NEXT:    v_add_i32_e32 v19, vcc, v19, v20
-; GISEL-NEXT:    v_add_i32_e64 v18, s[4:5], v18, v22
-; GISEL-NEXT:    v_add_i32_e64 v10, s[6:7], v10, v19
-; GISEL-NEXT:    v_mul_hi_u32 v16, v13, v16
-; GISEL-NEXT:    v_mul_hi_u32 v17, v14, v17
-; GISEL-NEXT:    v_add_i32_e64 v11, s[8:9], v11, v18
-; GISEL-NEXT:    v_cndmask_b32_e64 v18, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v18, vcc, v21, v18
-; GISEL-NEXT:    v_cndmask_b32_e64 v19, 0, 1, s[4:5]
-; GISEL-NEXT:    v_add_i32_e32 v19, vcc, v23, v19
-; GISEL-NEXT:    v_mul_lo_u32 v20, v8, v10
-; GISEL-NEXT:    v_mul_lo_u32 v15, v15, v10
-; GISEL-NEXT:    v_add_i32_e32 v16, vcc, v16, v18
-; GISEL-NEXT:    v_mul_hi_u32 v18, v8, v10
-; GISEL-NEXT:    v_add_i32_e32 v17, vcc, v17, v19
-; GISEL-NEXT:    v_mul_lo_u32 v19, v9, v11
-; GISEL-NEXT:    v_mul_lo_u32 v12, v12, v11
-; GISEL-NEXT:    v_addc_u32_e64 v13, vcc, v13, v16, s[6:7]
+; GISEL-NEXT:    v_mul_hi_u32 v17, v13, v17
+; GISEL-NEXT:    v_addc_u32_e64 v19, s[12:13], v19, v20, s[8:9]
+; GISEL-NEXT:    v_addc_u32_e64 v20, s[14:15], 0, 0, vcc
+; GISEL-NEXT:    v_addc_u32_e64 v18, vcc, v18, v20, s[6:7]
+; GISEL-NEXT:    v_addc_u32_e64 v20, s[4:5], 0, 0, s[4:5]
+; GISEL-NEXT:    v_addc_u32_e64 v19, s[4:5], v19, v20, s[8:9]
+; GISEL-NEXT:    v_addc_u32_e64 v20, s[6:7], 0, 0, s[10:11]
+; GISEL-NEXT:    v_add_i32_e64 v10, s[6:7], v10, v18
+; GISEL-NEXT:    v_addc_u32_e64 v18, s[8:9], 0, 0, s[12:13]
+; GISEL-NEXT:    v_addc_u32_e32 v20, vcc, 0, v20, vcc
+; GISEL-NEXT:    v_addc_u32_e64 v18, vcc, 0, v18, s[4:5]
+; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v11, v19
+; GISEL-NEXT:    v_add_i32_e64 v16, s[4:5], v16, v20
+; GISEL-NEXT:    v_add_i32_e64 v17, s[4:5], v17, v18
+; GISEL-NEXT:    v_mul_lo_u32 v18, v8, v10
+; GISEL-NEXT:    v_mul_lo_u32 v14, v14, v10
+; GISEL-NEXT:    v_mul_hi_u32 v19, v8, v10
+; GISEL-NEXT:    v_mul_lo_u32 v20, v9, v11
+; GISEL-NEXT:    v_mul_lo_u32 v15, v15, v11
+; GISEL-NEXT:    v_addc_u32_e64 v12, s[4:5], v12, v16, s[6:7]
 ; GISEL-NEXT:    v_mul_hi_u32 v16, v9, v11
-; GISEL-NEXT:    v_addc_u32_e64 v14, vcc, v14, v17, s[8:9]
-; GISEL-NEXT:    v_mul_hi_u32 v17, v10, v20
-; GISEL-NEXT:    v_mul_lo_u32 v8, v8, v13
-; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v15, v8
-; GISEL-NEXT:    v_mul_hi_u32 v15, v11, v19
-; GISEL-NEXT:    v_mul_lo_u32 v9, v9, v14
-; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v12, v9
-; GISEL-NEXT:    v_mul_lo_u32 v12, v13, v20
+; GISEL-NEXT:    v_addc_u32_e32 v13, vcc, v13, v17, vcc
+; GISEL-NEXT:    v_mul_hi_u32 v17, v10, v18
+; GISEL-NEXT:    v_mul_lo_u32 v8, v8, v12
+; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v14, v8
+; GISEL-NEXT:    v_mul_hi_u32 v14, v11, v20
+; GISEL-NEXT:    v_mul_lo_u32 v9, v9, v13
+; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v15, v9
+; GISEL-NEXT:    v_mul_lo_u32 v15, v12, v18
+; GISEL-NEXT:    v_mul_hi_u32 v18, v12, v18
+; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v19
+; GISEL-NEXT:    v_mul_lo_u32 v19, v13, v20
 ; GISEL-NEXT:    v_mul_hi_u32 v20, v13, v20
-; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v18
-; GISEL-NEXT:    v_mul_lo_u32 v18, v14, v19
-; GISEL-NEXT:    v_mul_hi_u32 v19, v14, v19
 ; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v16
 ; GISEL-NEXT:    v_mul_lo_u32 v16, v10, v8
-; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v12, v16
-; GISEL-NEXT:    v_mul_lo_u32 v16, v13, v8
-; GISEL-NEXT:    v_add_i32_e64 v12, s[4:5], v12, v17
-; GISEL-NEXT:    v_mul_hi_u32 v12, v10, v8
-; GISEL-NEXT:    v_mul_hi_u32 v8, v13, v8
+; GISEL-NEXT:    v_add_i32_e32 v15, vcc, v15, v16
+; GISEL-NEXT:    v_mul_lo_u32 v16, v12, v8
+; GISEL-NEXT:    v_addc_u32_e32 v15, vcc, v15, v17, vcc
+; GISEL-NEXT:    v_mul_hi_u32 v15, v10, v8
+; GISEL-NEXT:    v_mul_hi_u32 v8, v12, v8
 ; GISEL-NEXT:    v_mul_lo_u32 v17, v11, v9
-; GISEL-NEXT:    v_add_i32_e64 v16, s[6:7], v16, v20
-; GISEL-NEXT:    v_mul_lo_u32 v20, v14, v9
-; GISEL-NEXT:    v_add_i32_e64 v17, s[8:9], v18, v17
-; GISEL-NEXT:    v_mul_hi_u32 v18, v11, v9
-; GISEL-NEXT:    v_mul_hi_u32 v9, v14, v9
-; GISEL-NEXT:    v_add_i32_e64 v19, s[10:11], v20, v19
-; GISEL-NEXT:    v_cndmask_b32_e64 v20, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v15, vcc, v17, v15
-; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, s[6:7]
-; GISEL-NEXT:    v_cndmask_b32_e64 v17, 0, 1, s[8:9]
-; GISEL-NEXT:    v_add_i32_e64 v12, s[6:7], v16, v12
-; GISEL-NEXT:    v_cndmask_b32_e64 v16, 0, 1, s[10:11]
-; GISEL-NEXT:    v_add_i32_e64 v18, s[8:9], v19, v18
-; GISEL-NEXT:    v_cndmask_b32_e64 v19, 0, 1, s[4:5]
-; GISEL-NEXT:    v_add_i32_e64 v19, s[4:5], v20, v19
-; GISEL-NEXT:    v_cndmask_b32_e64 v20, 0, 1, s[6:7]
-; GISEL-NEXT:    v_add_i32_e64 v15, s[4:5], v15, v20
-; GISEL-NEXT:    v_cndmask_b32_e64 v20, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v17, vcc, v17, v20
-; GISEL-NEXT:    v_cndmask_b32_e64 v20, 0, 1, s[8:9]
-; GISEL-NEXT:    v_add_i32_e32 v16, vcc, v16, v20
-; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v12, v19
-; GISEL-NEXT:    v_add_i32_e64 v17, s[4:5], v18, v17
-; GISEL-NEXT:    v_cndmask_b32_e64 v18, 0, 1, vcc
-; GISEL-NEXT:    v_cndmask_b32_e64 v19, 0, 1, s[4:5]
-; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v10, v12
-; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v17
-; GISEL-NEXT:    v_add_i32_e64 v12, s[6:7], v15, v18
-; GISEL-NEXT:    v_add_i32_e64 v15, s[6:7], v16, v19
-; GISEL-NEXT:    v_mul_lo_u32 v16, v1, v10
-; GISEL-NEXT:    v_mul_hi_u32 v17, v0, v10
+; GISEL-NEXT:    v_add_i32_e64 v16, s[4:5], v16, v18
+; GISEL-NEXT:    v_mul_lo_u32 v18, v13, v9
+; GISEL-NEXT:    v_add_i32_e64 v17, s[6:7], v19, v17
+; GISEL-NEXT:    v_mul_hi_u32 v19, v11, v9
+; GISEL-NEXT:    v_mul_hi_u32 v9, v13, v9
+; GISEL-NEXT:    v_add_i32_e64 v18, s[8:9], v18, v20
+; GISEL-NEXT:    v_addc_u32_e64 v15, s[10:11], v16, v15, s[4:5]
+; GISEL-NEXT:    v_addc_u32_e64 v14, s[6:7], v17, v14, s[6:7]
+; GISEL-NEXT:    v_addc_u32_e64 v14, s[12:13], v18, v19, s[8:9]
+; GISEL-NEXT:    v_addc_u32_e64 v16, s[14:15], 0, 0, vcc
+; GISEL-NEXT:    v_addc_u32_e64 v17, s[10:11], 0, 0, s[10:11]
+; GISEL-NEXT:    v_addc_u32_e64 v18, s[6:7], 0, 0, s[6:7]
+; GISEL-NEXT:    v_addc_u32_e64 v19, s[6:7], 0, 0, s[12:13]
+; GISEL-NEXT:    v_addc_u32_e64 v15, vcc, v15, v16, s[4:5]
+; GISEL-NEXT:    v_addc_u32_e64 v14, s[4:5], v14, v18, s[8:9]
+; GISEL-NEXT:    v_addc_u32_e32 v16, vcc, 0, v17, vcc
+; GISEL-NEXT:    v_addc_u32_e64 v17, vcc, 0, v19, s[4:5]
+; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v10, v15
+; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v14
+; GISEL-NEXT:    v_add_i32_e64 v8, s[6:7], v8, v16
+; GISEL-NEXT:    v_add_i32_e64 v9, s[6:7], v9, v17
+; GISEL-NEXT:    v_mul_lo_u32 v14, v1, v10
+; GISEL-NEXT:    v_mul_hi_u32 v15, v0, v10
 ; GISEL-NEXT:    v_mul_hi_u32 v10, v1, v10
-; GISEL-NEXT:    v_mul_lo_u32 v18, v3, v11
-; GISEL-NEXT:    v_mul_hi_u32 v19, v2, v11
+; GISEL-NEXT:    v_mul_lo_u32 v16, v3, v11
+; GISEL-NEXT:    v_mul_hi_u32 v17, v2, v11
 ; GISEL-NEXT:    v_mul_hi_u32 v11, v3, v11
-; GISEL-NEXT:    v_add_i32_e64 v8, s[6:7], v8, v12
-; GISEL-NEXT:    v_add_i32_e64 v9, s[6:7], v9, v15
-; GISEL-NEXT:    v_addc_u32_e32 v8, vcc, v13, v8, vcc
-; GISEL-NEXT:    v_addc_u32_e64 v9, vcc, v14, v9, s[4:5]
+; GISEL-NEXT:    v_addc_u32_e32 v8, vcc, v12, v8, vcc
+; GISEL-NEXT:    v_addc_u32_e64 v9, vcc, v13, v9, s[4:5]
 ; GISEL-NEXT:    v_mul_lo_u32 v12, v0, v8
 ; GISEL-NEXT:    v_mul_lo_u32 v13, v1, v8
-; GISEL-NEXT:    v_mul_hi_u32 v14, v0, v8
-; GISEL-NEXT:    v_mul_hi_u32 v15, v1, v8
+; GISEL-NEXT:    v_mul_hi_u32 v18, v0, v8
+; GISEL-NEXT:    v_mul_hi_u32 v19, v1, v8
 ; GISEL-NEXT:    v_mul_lo_u32 v8, v2, v9
 ; GISEL-NEXT:    v_mul_lo_u32 v20, v3, v9
-; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v16, v12
-; GISEL-NEXT:    v_mul_hi_u32 v16, v2, v9
+; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v14, v12
+; GISEL-NEXT:    v_mul_hi_u32 v14, v2, v9
 ; GISEL-NEXT:    v_mul_hi_u32 v21, v3, v9
 ; GISEL-NEXT:    v_add_i32_e64 v9, s[4:5], v13, v10
-; GISEL-NEXT:    v_add_i32_e64 v8, s[6:7], v18, v8
+; GISEL-NEXT:    v_add_i32_e64 v8, s[6:7], v16, v8
 ; GISEL-NEXT:    v_add_i32_e64 v10, s[8:9], v20, v11
-; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
-; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, s[4:5]
-; GISEL-NEXT:    v_cndmask_b32_e64 v18, 0, 1, s[6:7]
-; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v12, v17
-; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[8:9]
-; GISEL-NEXT:    v_add_i32_e64 v9, s[4:5], v9, v14
-; GISEL-NEXT:    v_add_i32_e64 v8, s[6:7], v8, v19
-; GISEL-NEXT:    v_add_i32_e64 v10, s[8:9], v10, v16
-; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
-; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, s[4:5]
-; GISEL-NEXT:    v_cndmask_b32_e64 v16, 0, 1, s[6:7]
-; GISEL-NEXT:    v_cndmask_b32_e64 v17, 0, 1, s[8:9]
-; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v11, v8
-; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v13, v14
-; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v18, v16
-; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v12, v17
-; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v9, v8
-; GISEL-NEXT:    v_add_i32_e64 v9, s[4:5], v10, v13
-; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
-; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, s[4:5]
-; GISEL-NEXT:    v_mul_lo_u32 v14, v4, v8
-; GISEL-NEXT:    v_mul_lo_u32 v16, v5, v8
-; GISEL-NEXT:    v_mul_hi_u32 v17, v4, v8
-; GISEL-NEXT:    v_mul_lo_u32 v18, v6, v9
-; GISEL-NEXT:    v_mul_lo_u32 v19, v7, v9
-; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v11, v10
-; GISEL-NEXT:    v_mul_hi_u32 v11, v6, v9
-; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v12, v13
-; GISEL-NEXT:    v_add_i32_e32 v13, vcc, 1, v8
-; GISEL-NEXT:    v_sub_i32_e64 v0, s[4:5], v0, v14
-; GISEL-NEXT:    v_add_i32_e64 v14, s[6:7], 1, v9
-; GISEL-NEXT:    v_sub_i32_e64 v2, s[8:9], v2, v18
-; GISEL-NEXT:    v_add_i32_e64 v18, s[10:11], 1, v13
-; GISEL-NEXT:    v_add_i32_e64 v10, s[12:13], v15, v10
-; GISEL-NEXT:    v_add_i32_e64 v15, s[12:13], 1, v14
-; GISEL-NEXT:    v_add_i32_e64 v12, s[14:15], v21, v12
+; GISEL-NEXT:    v_addc_u32_e32 v11, vcc, v12, v15, vcc
+; GISEL-NEXT:    v_addc_u32_e64 v9, s[10:11], v9, v18, s[4:5]
+; GISEL-NEXT:    v_addc_u32_e64 v8, s[6:7], v8, v17, s[6:7]
+; GISEL-NEXT:    v_addc_u32_e64 v10, s[12:13], v10, v14, s[8:9]
+; GISEL-NEXT:    v_addc_u32_e64 v8, s[14:15], 0, 0, vcc
+; GISEL-NEXT:    v_addc_u32_e64 v11, s[10:11], 0, 0, s[10:11]
+; GISEL-NEXT:    v_addc_u32_e64 v12, s[6:7], 0, 0, s[6:7]
+; GISEL-NEXT:    v_addc_u32_e64 v13, s[6:7], 0, 0, s[12:13]
+; GISEL-NEXT:    v_addc_u32_e64 v8, vcc, v9, v8, s[4:5]
+; GISEL-NEXT:    v_addc_u32_e64 v9, s[4:5], v10, v12, s[8:9]
+; GISEL-NEXT:    v_addc_u32_e32 v10, vcc, 0, v11, vcc
+; GISEL-NEXT:    v_addc_u32_e64 v11, vcc, 0, v13, s[4:5]
+; GISEL-NEXT:    v_mul_lo_u32 v12, v4, v8
+; GISEL-NEXT:    v_mul_lo_u32 v13, v5, v8
+; GISEL-NEXT:    v_mul_hi_u32 v14, v4, v8
+; GISEL-NEXT:    v_mul_lo_u32 v15, v6, v9
+; GISEL-NEXT:    v_mul_lo_u32 v16, v7, v9
+; GISEL-NEXT:    v_mul_hi_u32 v17, v6, v9
+; GISEL-NEXT:    v_add_i32_e32 v18, vcc, 1, v8
+; GISEL-NEXT:    v_add_i32_e64 v10, s[4:5], v19, v10
+; GISEL-NEXT:    v_add_i32_e64 v19, s[4:5], 1, v9
+; GISEL-NEXT:    v_add_i32_e64 v11, s[6:7], v21, v11
+; GISEL-NEXT:    v_add_i32_e64 v20, s[6:7], 1, v18
+; GISEL-NEXT:    v_sub_i32_e64 v0, s[8:9], v0, v12
+; GISEL-NEXT:    v_add_i32_e64 v12, s[10:11], 1, v19
+; GISEL-NEXT:    v_sub_i32_e64 v2, s[12:13], v2, v15
+; GISEL-NEXT:    v_mul_lo_u32 v15, v4, v10
 ; GISEL-NEXT:    v_cmp_ge_u32_e64 s[14:15], v0, v4
-; GISEL-NEXT:    v_cmp_ge_u32_e64 s[16:17], v2, v6
-; GISEL-NEXT:    v_sub_i32_e64 v0, s[18:19], v0, v4
-; GISEL-NEXT:    v_sub_i32_e64 v2, s[20:21], v2, v6
-; GISEL-NEXT:    v_mul_lo_u32 v20, v4, v10
-; GISEL-NEXT:    v_cmp_ge_u32_e64 s[22:23], v0, v4
+; GISEL-NEXT:    v_sub_i32_e64 v0, s[16:17], v0, v4
+; GISEL-NEXT:    v_cmp_ge_u32_e64 s[18:19], v0, v4
 ; GISEL-NEXT:    v_addc_u32_e32 v0, vcc, 0, v10, vcc
-; GISEL-NEXT:    v_mul_lo_u32 v4, v6, v12
+; GISEL-NEXT:    v_mul_lo_u32 v4, v6, v11
+; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v13, v15
+; GISEL-NEXT:    v_addc_u32_e64 v15, vcc, 0, v11, s[4:5]
 ; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v2, v6
-; GISEL-NEXT:    v_addc_u32_e64 v2, s[6:7], 0, v12, s[6:7]
+; GISEL-NEXT:    v_sub_i32_e64 v2, s[4:5], v2, v6
+; GISEL-NEXT:    v_add_i32_e64 v4, s[20:21], v16, v4
+; GISEL-NEXT:    v_addc_u32_e64 v16, s[6:7], 0, v0, s[6:7]
+; GISEL-NEXT:    v_cmp_ge_u32_e64 s[6:7], v2, v6
+; GISEL-NEXT:    v_addc_u32_e64 v2, s[10:11], 0, v15, s[10:11]
 ; GISEL-NEXT:    v_cndmask_b32_e64 v6, 0, -1, s[14:15]
-; GISEL-NEXT:    v_add_i32_e64 v16, s[6:7], v16, v20
-; GISEL-NEXT:    v_cndmask_b32_e64 v20, 0, -1, s[16:17]
-; GISEL-NEXT:    v_add_i32_e64 v4, s[6:7], v19, v4
-; GISEL-NEXT:    v_addc_u32_e64 v19, s[6:7], 0, v0, s[10:11]
-; GISEL-NEXT:    v_add_i32_e64 v16, s[6:7], v16, v17
-; GISEL-NEXT:    v_addc_u32_e64 v17, s[6:7], 0, v2, s[12:13]
-; GISEL-NEXT:    v_add_i32_e64 v4, s[6:7], v4, v11
-; GISEL-NEXT:    v_subb_u32_e64 v11, s[6:7], v1, v16, s[4:5]
-; GISEL-NEXT:    v_sub_i32_e64 v1, s[6:7], v1, v16
-; GISEL-NEXT:    v_subb_u32_e64 v16, s[6:7], v3, v4, s[8:9]
-; GISEL-NEXT:    v_sub_i32_e64 v3, s[6:7], v3, v4
-; GISEL-NEXT:    v_cndmask_b32_e64 v4, 0, -1, s[22:23]
-; GISEL-NEXT:    v_cmp_ge_u32_e64 s[6:7], v11, v5
-; GISEL-NEXT:    v_cmp_eq_u32_e64 s[10:11], v11, v5
-; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, -1, vcc
-; GISEL-NEXT:    v_subb_u32_e64 v1, vcc, v1, v5, s[4:5]
-; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v16, v7
-; GISEL-NEXT:    v_subb_u32_e64 v3, s[4:5], v3, v7, s[8:9]
-; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v16, v7
-; GISEL-NEXT:    v_cndmask_b32_e64 v16, 0, -1, s[6:7]
-; GISEL-NEXT:    v_cndmask_b32_e64 v6, v16, v6, s[10:11]
-; GISEL-NEXT:    v_cndmask_b32_e64 v16, 0, -1, vcc
-; GISEL-NEXT:    v_subbrev_u32_e64 v1, vcc, 0, v1, s[18:19]
-; GISEL-NEXT:    v_subbrev_u32_e64 v3, vcc, 0, v3, s[20:21]
-; GISEL-NEXT:    v_cndmask_b32_e64 v16, v16, v20, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v13, s[10:11], v13, v14
+; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, -1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v17
+; GISEL-NEXT:    v_subb_u32_e64 v17, vcc, v1, v13, s[8:9]
+; GISEL-NEXT:    v_sub_i32_e32 v1, vcc, v1, v13
+; GISEL-NEXT:    v_subb_u32_e64 v13, vcc, v3, v4, s[12:13]
+; GISEL-NEXT:    v_sub_i32_e32 v3, vcc, v3, v4
+; GISEL-NEXT:    v_cndmask_b32_e64 v4, 0, -1, s[18:19]
+; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v17, v5
+; GISEL-NEXT:    v_cmp_eq_u32_e64 s[10:11], v17, v5
+; GISEL-NEXT:    v_cndmask_b32_e64 v17, 0, -1, s[6:7]
+; GISEL-NEXT:    v_subb_u32_e64 v1, s[6:7], v1, v5, s[8:9]
+; GISEL-NEXT:    v_cmp_ge_u32_e64 s[6:7], v13, v7
+; GISEL-NEXT:    v_subb_u32_e64 v3, s[8:9], v3, v7, s[12:13]
+; GISEL-NEXT:    v_cmp_eq_u32_e64 s[8:9], v13, v7
+; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, -1, vcc
+; GISEL-NEXT:    v_cndmask_b32_e64 v6, v13, v6, s[10:11]
+; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, -1, s[6:7]
+; GISEL-NEXT:    v_subbrev_u32_e64 v1, vcc, 0, v1, s[16:17]
+; GISEL-NEXT:    v_subbrev_u32_e64 v3, vcc, 0, v3, s[4:5]
+; GISEL-NEXT:    v_cndmask_b32_e64 v13, v13, v14, s[8:9]
 ; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v5
 ; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v3, v7
 ; GISEL-NEXT:    v_cmp_eq_u32_e64 s[6:7], v1, v5
@@ -602,19 +542,19 @@ define <2 x i64> @v_udiv_v2i64(<2 x i64> %num, <2 x i64> %den) {
 ; GISEL-NEXT:    v_cndmask_b32_e64 v1, 0, -1, vcc
 ; GISEL-NEXT:    v_cndmask_b32_e64 v3, 0, -1, s[4:5]
 ; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v6
-; GISEL-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, v16
+; GISEL-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, v13
 ; GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, v4, s[6:7]
-; GISEL-NEXT:    v_cndmask_b32_e64 v3, v3, v11, s[8:9]
+; GISEL-NEXT:    v_cndmask_b32_e64 v3, v3, v17, s[8:9]
 ; GISEL-NEXT:    v_cmp_ne_u32_e64 s[6:7], 0, v1
 ; GISEL-NEXT:    v_cmp_ne_u32_e64 s[8:9], 0, v3
-; GISEL-NEXT:    v_cndmask_b32_e64 v1, v13, v18, s[6:7]
-; GISEL-NEXT:    v_cndmask_b32_e64 v3, v14, v15, s[8:9]
-; GISEL-NEXT:    v_cndmask_b32_e64 v4, v0, v19, s[6:7]
-; GISEL-NEXT:    v_cndmask_b32_e64 v5, v2, v17, s[8:9]
+; GISEL-NEXT:    v_cndmask_b32_e64 v1, v18, v20, s[6:7]
+; GISEL-NEXT:    v_cndmask_b32_e64 v3, v19, v12, s[8:9]
+; GISEL-NEXT:    v_cndmask_b32_e64 v4, v0, v16, s[6:7]
+; GISEL-NEXT:    v_cndmask_b32_e64 v5, v15, v2, s[8:9]
 ; GISEL-NEXT:    v_cndmask_b32_e32 v0, v8, v1, vcc
 ; GISEL-NEXT:    v_cndmask_b32_e64 v2, v9, v3, s[4:5]
 ; GISEL-NEXT:    v_cndmask_b32_e32 v1, v10, v4, vcc
-; GISEL-NEXT:    v_cndmask_b32_e64 v3, v12, v5, s[4:5]
+; GISEL-NEXT:    v_cndmask_b32_e64 v3, v11, v5, s[4:5]
 ; GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; CGP-LABEL: v_udiv_v2i64:
@@ -630,7 +570,7 @@ define <2 x i64> @v_udiv_v2i64(<2 x i64> %num, <2 x i64> %den) {
 ; CGP-NEXT:    v_cvt_f32_u32_e32 v2, v4
 ; CGP-NEXT:    ; implicit-def: $vgpr0_vgpr1
 ; CGP-NEXT:    s_and_saveexec_b64 s[4:5], vcc
-; CGP-NEXT:    s_xor_b64 s[6:7], exec, s[4:5]
+; CGP-NEXT:    s_xor_b64 s[8:9], exec, s[4:5]
 ; CGP-NEXT:    s_cbranch_execz .LBB2_2
 ; CGP-NEXT:  ; %bb.1:
 ; CGP-NEXT:    v_cvt_f32_u32_e32 v0, v5
@@ -658,18 +598,13 @@ define <2 x i64> @v_udiv_v2i64(<2 x i64> %num, <2 x i64> %den) {
 ; CGP-NEXT:    v_mul_hi_u32 v18, v0, v12
 ; CGP-NEXT:    v_mul_hi_u32 v12, v2, v12
 ; CGP-NEXT:    v_add_i32_e32 v14, vcc, v14, v15
-; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v13, vcc, v17, v13
-; CGP-NEXT:    v_cndmask_b32_e64 v17, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v14, vcc, v14, v16
-; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v13, vcc, v13, v18
-; CGP-NEXT:    v_cndmask_b32_e64 v16, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v14, vcc, v15, v14
-; CGP-NEXT:    v_add_i32_e32 v15, vcc, v17, v16
-; CGP-NEXT:    v_add_i32_e32 v13, vcc, v13, v14
-; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v14, vcc, v15, v14
+; CGP-NEXT:    v_addc_u32_e32 v14, vcc, v14, v16, vcc
+; CGP-NEXT:    v_add_i32_e64 v13, s[4:5], v17, v13
+; CGP-NEXT:    v_addc_u32_e64 v13, s[6:7], v13, v18, s[4:5]
+; CGP-NEXT:    v_addc_u32_e64 v14, s[10:11], 0, 0, vcc
+; CGP-NEXT:    v_addc_u32_e64 v15, s[6:7], 0, 0, s[6:7]
+; CGP-NEXT:    v_addc_u32_e64 v13, vcc, v13, v14, s[4:5]
+; CGP-NEXT:    v_addc_u32_e32 v14, vcc, 0, v15, vcc
 ; CGP-NEXT:    v_add_i32_e32 v12, vcc, v12, v14
 ; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v13
 ; CGP-NEXT:    v_addc_u32_e32 v2, vcc, v2, v12, vcc
@@ -687,18 +622,13 @@ define <2 x i64> @v_udiv_v2i64(<2 x i64> %num, <2 x i64> %den) {
 ; CGP-NEXT:    v_mul_hi_u32 v16, v0, v1
 ; CGP-NEXT:    v_mul_hi_u32 v1, v2, v1
 ; CGP-NEXT:    v_add_i32_e32 v3, vcc, v14, v3
-; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v12, vcc, v13, v12
-; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v3, vcc, v3, v15
-; CGP-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v12, vcc, v12, v16
-; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v3, vcc, v14, v3
-; CGP-NEXT:    v_add_i32_e32 v13, vcc, v13, v15
-; CGP-NEXT:    v_add_i32_e32 v3, vcc, v12, v3
-; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v12, vcc, v13, v12
+; CGP-NEXT:    v_addc_u32_e32 v3, vcc, v3, v15, vcc
+; CGP-NEXT:    v_add_i32_e64 v3, s[4:5], v13, v12
+; CGP-NEXT:    v_addc_u32_e64 v3, s[6:7], v3, v16, s[4:5]
+; CGP-NEXT:    v_addc_u32_e64 v12, s[10:11], 0, 0, vcc
+; CGP-NEXT:    v_addc_u32_e64 v13, s[6:7], 0, 0, s[6:7]
+; CGP-NEXT:    v_addc_u32_e64 v3, vcc, v3, v12, s[4:5]
+; CGP-NEXT:    v_addc_u32_e32 v12, vcc, 0, v13, vcc
 ; CGP-NEXT:    v_add_i32_e32 v1, vcc, v1, v12
 ; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v3
 ; CGP-NEXT:    v_addc_u32_e32 v1, vcc, v2, v1, vcc
@@ -710,49 +640,44 @@ define <2 x i64> @v_udiv_v2i64(<2 x i64> %num, <2 x i64> %den) {
 ; CGP-NEXT:    v_mul_hi_u32 v14, v10, v1
 ; CGP-NEXT:    v_mul_hi_u32 v1, v11, v1
 ; CGP-NEXT:    v_add_i32_e32 v2, vcc, v2, v12
-; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v0, vcc, v13, v0
-; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v2, vcc, v2, v3
-; CGP-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v14
-; CGP-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v2, vcc, v12, v2
-; CGP-NEXT:    v_add_i32_e32 v3, vcc, v13, v3
-; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
-; CGP-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v2, vcc, v3, v2
-; CGP-NEXT:    v_mul_lo_u32 v3, v4, v0
-; CGP-NEXT:    v_mul_lo_u32 v12, v5, v0
-; CGP-NEXT:    v_mul_hi_u32 v13, v4, v0
+; CGP-NEXT:    v_addc_u32_e32 v2, vcc, v2, v3, vcc
+; CGP-NEXT:    v_add_i32_e64 v0, s[4:5], v13, v0
+; CGP-NEXT:    v_addc_u32_e64 v0, s[6:7], v0, v14, s[4:5]
+; CGP-NEXT:    v_addc_u32_e64 v2, s[10:11], 0, 0, vcc
+; CGP-NEXT:    v_addc_u32_e64 v3, s[6:7], 0, 0, s[6:7]
+; CGP-NEXT:    v_addc_u32_e64 v0, vcc, v0, v2, s[4:5]
+; CGP-NEXT:    v_addc_u32_e32 v2, vcc, 0, v3, vcc
 ; CGP-NEXT:    v_add_i32_e32 v1, vcc, v1, v2
-; CGP-NEXT:    v_mul_lo_u32 v2, v4, v1
+; CGP-NEXT:    v_mul_lo_u32 v2, v4, v0
+; CGP-NEXT:    v_mul_lo_u32 v3, v5, v0
+; CGP-NEXT:    v_mul_hi_u32 v12, v4, v0
+; CGP-NEXT:    v_mul_lo_u32 v13, v4, v1
 ; CGP-NEXT:    v_add_i32_e32 v14, vcc, 1, v0
 ; CGP-NEXT:    v_addc_u32_e32 v15, vcc, 0, v1, vcc
-; CGP-NEXT:    v_add_i32_e32 v2, vcc, v12, v2
-; CGP-NEXT:    v_add_i32_e32 v12, vcc, 1, v14
+; CGP-NEXT:    v_add_i32_e32 v3, vcc, v3, v13
+; CGP-NEXT:    v_add_i32_e32 v13, vcc, 1, v14
 ; CGP-NEXT:    v_addc_u32_e32 v16, vcc, 0, v15, vcc
-; CGP-NEXT:    v_add_i32_e32 v2, vcc, v2, v13
-; CGP-NEXT:    v_sub_i32_e32 v3, vcc, v10, v3
-; CGP-NEXT:    v_subb_u32_e64 v10, s[4:5], v11, v2, vcc
-; CGP-NEXT:    v_sub_i32_e64 v2, s[4:5], v11, v2
-; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v3, v4
+; CGP-NEXT:    v_add_i32_e32 v3, vcc, v3, v12
+; CGP-NEXT:    v_sub_i32_e32 v2, vcc, v10, v2
+; CGP-NEXT:    v_subb_u32_e64 v10, s[4:5], v11, v3, vcc
+; CGP-NEXT:    v_sub_i32_e64 v3, s[4:5], v11, v3
+; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v2, v4
 ; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, -1, s[4:5]
 ; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v10, v5
-; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, -1, s[4:5]
-; CGP-NEXT:    v_subb_u32_e32 v2, vcc, v2, v5, vcc
+; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, -1, s[4:5]
+; CGP-NEXT:    v_subb_u32_e32 v3, vcc, v3, v5, vcc
 ; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, v10, v5
-; CGP-NEXT:    v_cndmask_b32_e32 v10, v13, v11, vcc
-; CGP-NEXT:    v_sub_i32_e32 v3, vcc, v3, v4
-; CGP-NEXT:    v_subbrev_u32_e32 v2, vcc, 0, v2, vcc
-; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v3, v4
-; CGP-NEXT:    v_cndmask_b32_e64 v3, 0, -1, vcc
-; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v2, v5
+; CGP-NEXT:    v_cndmask_b32_e32 v10, v12, v11, vcc
+; CGP-NEXT:    v_sub_i32_e32 v2, vcc, v2, v4
+; CGP-NEXT:    v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
+; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v2, v4
+; CGP-NEXT:    v_cndmask_b32_e64 v2, 0, -1, vcc
+; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v3, v5
 ; CGP-NEXT:    v_cndmask_b32_e64 v4, 0, -1, vcc
-; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v5
-; CGP-NEXT:    v_cndmask_b32_e32 v2, v4, v3, vcc
+; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v5
+; CGP-NEXT:    v_cndmask_b32_e32 v2, v4, v2, vcc
 ; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v2
-; CGP-NEXT:    v_cndmask_b32_e32 v2, v14, v12, vcc
+; CGP-NEXT:    v_cndmask_b32_e32 v2, v14, v13, vcc
 ; CGP-NEXT:    v_cndmask_b32_e32 v3, v15, v16, vcc
 ; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v10
 ; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
@@ -761,7 +686,7 @@ define <2 x i64> @v_udiv_v2i64(<2 x i64> %num, <2 x i64> %den) {
 ; CGP-NEXT:    ; implicit-def: $vgpr4
 ; CGP-NEXT:    ; implicit-def: $vgpr10
 ; CGP-NEXT:  .LBB2_2: ; %Flow1
-; CGP-NEXT:    s_andn2_saveexec_b64 s[6:7], s[6:7]
+; CGP-NEXT:    s_andn2_saveexec_b64 s[6:7], s[8:9]
 ; CGP-NEXT:    s_cbranch_execz .LBB2_4
 ; CGP-NEXT:  ; %bb.3:
 ; CGP-NEXT:    v_rcp_iflag_f32_e32 v0, v2
@@ -791,10 +716,10 @@ define <2 x i64> @v_udiv_v2i64(<2 x i64> %num, <2 x i64> %den) {
 ; CGP-NEXT:    v_cvt_f32_u32_e32 v4, v6
 ; CGP-NEXT:    ; implicit-def: $vgpr2_vgpr3
 ; CGP-NEXT:    s_and_saveexec_b64 s[4:5], vcc
-; CGP-NEXT:    s_xor_b64 s[6:7], exec, s[4:5]
+; CGP-NEXT:    s_xor_b64 s[8:9], exec, s[4:5]
 ; CGP-NEXT:    s_cbranch_execnz .LBB2_7
 ; CGP-NEXT:  ; %bb.5: ; %Flow
-; CGP-NEXT:    s_andn2_saveexec_b64 s[6:7], s[6:7]
+; CGP-NEXT:    s_andn2_saveexec_b64 s[6:7], s[8:9]
 ; CGP-NEXT:    s_cbranch_execnz .LBB2_8
 ; CGP-NEXT:  .LBB2_6:
 ; CGP-NEXT:    s_or_b64 exec, exec, s[6:7]
@@ -825,18 +750,13 @@ define <2 x i64> @v_udiv_v2i64(<2 x i64> %num, <2 x i64> %den) {
 ; CGP-NEXT:    v_mul_hi_u32 v16, v2, v10
 ; CGP-NEXT:    v_mul_hi_u32 v10, v4, v10
 ; CGP-NEXT:    v_add_i32_e32 v12, vcc, v12, v13
-; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v11, vcc, v15, v11
-; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v12, vcc, v12, v14
-; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v11, vcc, v11, v16
-; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v12, vcc, v13, v12
-; CGP-NEXT:    v_add_i32_e32 v13, vcc, v15, v14
-; CGP-NEXT:    v_add_i32_e32 v11, vcc, v11, v12
-; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v12, vcc, v13, v12
+; CGP-NEXT:    v_addc_u32_e32 v12, vcc, v12, v14, vcc
+; CGP-NEXT:    v_add_i32_e64 v11, s[4:5], v15, v11
+; CGP-NEXT:    v_addc_u32_e64 v11, s[6:7], v11, v16, s[4:5]
+; CGP-NEXT:    v_addc_u32_e64 v12, s[10:11], 0, 0, vcc
+; CGP-NEXT:    v_addc_u32_e64 v13, s[6:7], 0, 0, s[6:7]
+; CGP-NEXT:    v_addc_u32_e64 v11, vcc, v11, v12, s[4:5]
+; CGP-NEXT:    v_addc_u32_e32 v12, vcc, 0, v13, vcc
 ; CGP-NEXT:    v_add_i32_e32 v10, vcc, v10, v12
 ; CGP-NEXT:    v_add_i32_e32 v2, vcc, v2, v11
 ; CGP-NEXT:    v_addc_u32_e32 v4, vcc, v4, v10, vcc
@@ -854,18 +774,13 @@ define <2 x i64> @v_udiv_v2i64(<2 x i64> %num, <2 x i64> %den) {
 ; CGP-NEXT:    v_mul_hi_u32 v14, v2, v3
 ; CGP-NEXT:    v_mul_hi_u32 v3, v4, v3
 ; CGP-NEXT:    v_add_i32_e32 v5, vcc, v12, v5
-; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v10, vcc, v11, v10
-; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v5, vcc, v5, v13
-; CGP-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v10, vcc, v10, v14
-; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v5, vcc, v12, v5
-; CGP-NEXT:    v_add_i32_e32 v11, vcc, v11, v13
-; CGP-NEXT:    v_add_i32_e32 v5, vcc, v10, v5
-; CGP-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v10, vcc, v11, v10
+; CGP-NEXT:    v_addc_u32_e32 v5, vcc, v5, v13, vcc
+; CGP-NEXT:    v_add_i32_e64 v5, s[4:5], v11, v10
+; CGP-NEXT:    v_addc_u32_e64 v5, s[6:7], v5, v14, s[4:5]
+; CGP-NEXT:    v_addc_u32_e64 v10, s[10:11], 0, 0, vcc
+; CGP-NEXT:    v_addc_u32_e64 v11, s[6:7], 0, 0, s[6:7]
+; CGP-NEXT:    v_addc_u32_e64 v5, vcc, v5, v10, s[4:5]
+; CGP-NEXT:    v_addc_u32_e32 v10, vcc, 0, v11, vcc
 ; CGP-NEXT:    v_add_i32_e32 v3, vcc, v3, v10
 ; CGP-NEXT:    v_add_i32_e32 v2, vcc, v2, v5
 ; CGP-NEXT:    v_addc_u32_e32 v3, vcc, v4, v3, vcc
@@ -877,49 +792,44 @@ define <2 x i64> @v_udiv_v2i64(<2 x i64> %num, <2 x i64> %den) {
 ; CGP-NEXT:    v_mul_hi_u32 v12, v8, v3
 ; CGP-NEXT:    v_mul_hi_u32 v3, v9, v3
 ; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v10
-; CGP-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v2, vcc, v11, v2
-; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v5
-; CGP-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v2, vcc, v2, v12
-; CGP-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v4, vcc, v10, v4
-; CGP-NEXT:    v_add_i32_e32 v5, vcc, v11, v5
-; CGP-NEXT:    v_add_i32_e32 v2, vcc, v2, v4
-; CGP-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v4, vcc, v5, v4
-; CGP-NEXT:    v_mul_lo_u32 v5, v6, v2
-; CGP-NEXT:    v_mul_lo_u32 v10, v7, v2
-; CGP-NEXT:    v_mul_hi_u32 v11, v6, v2
+; CGP-NEXT:    v_addc_u32_e32 v4, vcc, v4, v5, vcc
+; CGP-NEXT:    v_add_i32_e64 v2, s[4:5], v11, v2
+; CGP-NEXT:    v_addc_u32_e64 v2, s[6:7], v2, v12, s[4:5]
+; CGP-NEXT:    v_addc_u32_e64 v4, s[10:11], 0, 0, vcc
+; CGP-NEXT:    v_addc_u32_e64 v5, s[6:7], 0, 0, s[6:7]
+; CGP-NEXT:    v_addc_u32_e64 v2, vcc, v2, v4, s[4:5]
+; CGP-NEXT:    v_addc_u32_e32 v4, vcc, 0, v5, vcc
 ; CGP-NEXT:    v_add_i32_e32 v3, vcc, v3, v4
-; CGP-NEXT:    v_mul_lo_u32 v4, v6, v3
+; CGP-NEXT:    v_mul_lo_u32 v4, v6, v2
+; CGP-NEXT:    v_mul_lo_u32 v5, v7, v2
+; CGP-NEXT:    v_mul_hi_u32 v10, v6, v2
+; CGP-NEXT:    v_mul_lo_u32 v11, v6, v3
 ; CGP-NEXT:    v_add_i32_e32 v12, vcc, 1, v2
 ; CGP-NEXT:    v_addc_u32_e32 v13, vcc, 0, v3, vcc
-; CGP-NEXT:    v_add_i32_e32 v4, vcc, v10, v4
-; CGP-NEXT:    v_add_i32_e32 v10, vcc, 1, v12
+; CGP-NEXT:    v_add_i32_e32 v5, vcc, v5, v11
+; CGP-NEXT:    v_add_i32_e32 v11, vcc, 1, v12
 ; CGP-NEXT:    v_addc_u32_e32 v14, vcc, 0, v13, vcc
-; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v11
-; CGP-NEXT:    v_sub_i32_e32 v5, vcc, v8, v5
-; CGP-NEXT:    v_subb_u32_e64 v8, s[4:5], v9, v4, vcc
-; CGP-NEXT:    v_sub_i32_e64 v4, s[4:5], v9, v4
-; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v5, v6
+; CGP-NEXT:    v_add_i32_e32 v5, vcc, v5, v10
+; CGP-NEXT:    v_sub_i32_e32 v4, vcc, v8, v4
+; CGP-NEXT:    v_subb_u32_e64 v8, s[4:5], v9, v5, vcc
+; CGP-NEXT:    v_sub_i32_e64 v5, s[4:5], v9, v5
+; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v4, v6
 ; CGP-NEXT:    v_cndmask_b32_e64 v9, 0, -1, s[4:5]
 ; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v8, v7
-; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, -1, s[4:5]
-; CGP-NEXT:    v_subb_u32_e32 v4, vcc, v4, v7, vcc
+; CGP-NEXT:    v_cndmask_b32_e64 v10, 0, -1, s[4:5]
+; CGP-NEXT:    v_subb_u32_e32 v5, vcc, v5, v7, vcc
 ; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, v8, v7
-; CGP-NEXT:    v_cndmask_b32_e32 v8, v11, v9, vcc
-; CGP-NEXT:    v_sub_i32_e32 v5, vcc, v5, v6
-; CGP-NEXT:    v_subbrev_u32_e32 v4, vcc, 0, v4, vcc
-; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v5, v6
-; CGP-NEXT:    v_cndmask_b32_e64 v5, 0, -1, vcc
-; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v4, v7
+; CGP-NEXT:    v_cndmask_b32_e32 v8, v10, v9, vcc
+; CGP-NEXT:    v_sub_i32_e32 v4, vcc, v4, v6
+; CGP-NEXT:    v_subbrev_u32_e32 v5, vcc, 0, v5, vcc
+; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v4, v6
+; CGP-NEXT:    v_cndmask_b32_e64 v4, 0, -1, vcc
+; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v5, v7
 ; CGP-NEXT:    v_cndmask_b32_e64 v6, 0, -1, vcc
-; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, v4, v7
-; CGP-NEXT:    v_cndmask_b32_e32 v4, v6, v5, vcc
+; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, v5, v7
+; CGP-NEXT:    v_cndmask_b32_e32 v4, v6, v4, vcc
 ; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v4
-; CGP-NEXT:    v_cndmask_b32_e32 v4, v12, v10, vcc
+; CGP-NEXT:    v_cndmask_b32_e32 v4, v12, v11, vcc
 ; CGP-NEXT:    v_cndmask_b32_e32 v5, v13, v14, vcc
 ; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v8
 ; CGP-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
@@ -927,7 +837,7 @@ define <2 x i64> @v_udiv_v2i64(<2 x i64> %num, <2 x i64> %den) {
 ; CGP-NEXT:    ; implicit-def: $vgpr4
 ; CGP-NEXT:    ; implicit-def: $vgpr6
 ; CGP-NEXT:    ; implicit-def: $vgpr8
-; CGP-NEXT:    s_andn2_saveexec_b64 s[6:7], s[6:7]
+; CGP-NEXT:    s_andn2_saveexec_b64 s[6:7], s[8:9]
 ; CGP-NEXT:    s_cbranch_execz .LBB2_6
 ; CGP-NEXT:  .LBB2_8:
 ; CGP-NEXT:    v_rcp_iflag_f32_e32 v2, v4
@@ -989,18 +899,13 @@ define i64 @v_udiv_i64_oddk_denom(i64 %num) {
 ; CHECK-NEXT:    v_mul_hi_u32 v2, v1, v2
 ; CHECK-NEXT:    v_mul_hi_u32 v0, v0, v3
 ; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v4, v5
-; CHECK-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v7, v2
-; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v4, v6
-; CHECK-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v2, v0
-; CHECK-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v5, v4
-; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v7, v2
-; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v4
-; CHECK-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v4
+; CHECK-NEXT:    v_addc_u32_e32 v4, vcc, v4, v6, vcc
+; CHECK-NEXT:    v_add_i32_e64 v2, s[4:5], v7, v2
+; CHECK-NEXT:    v_addc_u32_e64 v0, s[6:7], v2, v0, s[4:5]
+; CHECK-NEXT:    v_addc_u32_e64 v2, s[8:9], 0, 0, vcc
+; CHECK-NEXT:    v_addc_u32_e64 v4, s[6:7], 0, 0, s[6:7]
+; CHECK-NEXT:    v_addc_u32_e64 v0, vcc, v0, v2, s[4:5]
+; CHECK-NEXT:    v_addc_u32_e32 v2, vcc, 0, v4, vcc
 ; CHECK-NEXT:    v_mul_hi_u32 v1, v1, v3
 ; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v1, v2
 ; CHECK-NEXT:    v_lshr_b64 v[0:1], v[0:1], 20
@@ -1030,33 +935,23 @@ define <2 x i64> @v_udiv_v2i64_oddk_denom(<2 x i64> %num) {
 ; CHECK-NEXT:    v_mul_hi_u32 v2, v2, v5
 ; CHECK-NEXT:    v_mul_hi_u32 v3, v3, v5
 ; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v6, v7
-; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v7, vcc, v9, v10
-; CHECK-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v10, vcc, v11, v12
-; CHECK-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v14, v4
-; CHECK-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v5, v8
-; CHECK-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v7, v0
-; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v8, vcc, v10, v13
-; CHECK-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v4, v2
-; CHECK-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v6, v5
-; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v9, v7
-; CHECK-NEXT:    v_add_i32_e32 v7, vcc, v11, v8
-; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v12, v4
-; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v5
-; CHECK-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v7
-; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v6, v5
-; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v4, v7
-; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v1, v5
-; CHECK-NEXT:    v_add_i32_e32 v3, vcc, v3, v4
+; CHECK-NEXT:    v_addc_u32_e32 v5, vcc, v5, v8, vcc
+; CHECK-NEXT:    v_add_i32_e64 v5, s[4:5], v9, v10
+; CHECK-NEXT:    v_addc_u32_e64 v0, s[6:7], v5, v0, s[4:5]
+; CHECK-NEXT:    v_add_i32_e64 v5, s[8:9], v11, v12
+; CHECK-NEXT:    v_addc_u32_e64 v5, s[8:9], v5, v13, s[8:9]
+; CHECK-NEXT:    v_add_i32_e64 v4, s[10:11], v14, v4
+; CHECK-NEXT:    v_addc_u32_e64 v2, s[12:13], v4, v2, s[10:11]
+; CHECK-NEXT:    v_addc_u32_e64 v4, s[14:15], 0, 0, vcc
+; CHECK-NEXT:    v_addc_u32_e64 v5, s[6:7], 0, 0, s[6:7]
+; CHECK-NEXT:    v_addc_u32_e64 v6, s[6:7], 0, 0, s[8:9]
+; CHECK-NEXT:    v_addc_u32_e64 v7, s[6:7], 0, 0, s[12:13]
+; CHECK-NEXT:    v_addc_u32_e64 v0, vcc, v0, v4, s[4:5]
+; CHECK-NEXT:    v_addc_u32_e32 v4, vcc, 0, v5, vcc
+; CHECK-NEXT:    v_addc_u32_e64 v2, vcc, v2, v6, s[10:11]
+; CHECK-NEXT:    v_addc_u32_e32 v5, vcc, 0, v7, vcc
+; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v1, v4
+; CHECK-NEXT:    v_add_i32_e32 v3, vcc, v3, v5
 ; CHECK-NEXT:    v_lshr_b64 v[0:1], v[0:1], 20
 ; CHECK-NEXT:    v_lshr_b64 v[2:3], v[2:3], 20
 ; CHECK-NEXT:    s_setpc_b64 s[30:31]
@@ -1079,10 +974,10 @@ define i64 @v_udiv_i64_pow2_shl_denom(i64 %x, i64 %y) {
 ; CHECK-NEXT:    v_cvt_f32_u32_e32 v2, v5
 ; CHECK-NEXT:    ; implicit-def: $vgpr0_vgpr1
 ; CHECK-NEXT:    s_and_saveexec_b64 s[4:5], vcc
-; CHECK-NEXT:    s_xor_b64 s[6:7], exec, s[4:5]
+; CHECK-NEXT:    s_xor_b64 s[8:9], exec, s[4:5]
 ; CHECK-NEXT:    s_cbranch_execnz .LBB7_3
 ; CHECK-NEXT:  ; %bb.1: ; %Flow
-; CHECK-NEXT:    s_andn2_saveexec_b64 s[6:7], s[6:7]
+; CHECK-NEXT:    s_andn2_saveexec_b64 s[6:7], s[8:9]
 ; CHECK-NEXT:    s_cbranch_execnz .LBB7_4
 ; CHECK-NEXT:  .LBB7_2:
 ; CHECK-NEXT:    s_or_b64 exec, exec, s[6:7]
@@ -1113,18 +1008,13 @@ define i64 @v_udiv_i64_pow2_shl_denom(i64 %x, i64 %y) {
 ; CHECK-NEXT:    v_mul_hi_u32 v14, v0, v8
 ; CHECK-NEXT:    v_mul_hi_u32 v8, v2, v8
 ; CHECK-NEXT:    v_add_i32_e32 v10, vcc, v10, v11
-; CHECK-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v9, vcc, v13, v9
-; CHECK-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v10, vcc, v10, v12
-; CHECK-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v9, vcc, v9, v14
-; CHECK-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v10, vcc, v11, v10
-; CHECK-NEXT:    v_add_i32_e32 v11, vcc, v13, v12
-; CHECK-NEXT:    v_add_i32_e32 v9, vcc, v9, v10
-; CHECK-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v10, vcc, v11, v10
+; CHECK-NEXT:    v_addc_u32_e32 v10, vcc, v10, v12, vcc
+; CHECK-NEXT:    v_add_i32_e64 v9, s[4:5], v13, v9
+; CHECK-NEXT:    v_addc_u32_e64 v9, s[6:7], v9, v14, s[4:5]
+; CHECK-NEXT:    v_addc_u32_e64 v10, s[10:11], 0, 0, vcc
+; CHECK-NEXT:    v_addc_u32_e64 v11, s[6:7], 0, 0, s[6:7]
+; CHECK-NEXT:    v_addc_u32_e64 v9, vcc, v9, v10, s[4:5]
+; CHECK-NEXT:    v_addc_u32_e32 v10, vcc, 0, v11, vcc
 ; CHECK-NEXT:    v_add_i32_e32 v8, vcc, v8, v10
 ; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v9
 ; CHECK-NEXT:    v_addc_u32_e32 v2, vcc, v2, v8, vcc
@@ -1142,18 +1032,13 @@ define i64 @v_udiv_i64_pow2_shl_denom(i64 %x, i64 %y) {
 ; CHECK-NEXT:    v_mul_hi_u32 v12, v0, v1
 ; CHECK-NEXT:    v_mul_hi_u32 v1, v2, v1
 ; CHECK-NEXT:    v_add_i32_e32 v7, vcc, v10, v7
-; CHECK-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v8, vcc, v9, v8
-; CHECK-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v7, vcc, v7, v11
-; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v8, vcc, v8, v12
-; CHECK-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v7, vcc, v10, v7
-; CHECK-NEXT:    v_add_i32_e32 v9, vcc, v9, v11
-; CHECK-NEXT:    v_add_i32_e32 v7, vcc, v8, v7
-; CHECK-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v8, vcc, v9, v8
+; CHECK-NEXT:    v_addc_u32_e32 v7, vcc, v7, v11, vcc
+; CHECK-NEXT:    v_add_i32_e64 v7, s[4:5], v9, v8
+; CHECK-NEXT:    v_addc_u32_e64 v7, s[6:7], v7, v12, s[4:5]
+; CHECK-NEXT:    v_addc_u32_e64 v8, s[10:11], 0, 0, vcc
+; CHECK-NEXT:    v_addc_u32_e64 v9, s[6:7], 0, 0, s[6:7]
+; CHECK-NEXT:    v_addc_u32_e64 v7, vcc, v7, v8, s[4:5]
+; CHECK-NEXT:    v_addc_u32_e32 v8, vcc, 0, v9, vcc
 ; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v1, v8
 ; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v7
 ; CHECK-NEXT:    v_addc_u32_e32 v1, vcc, v2, v1, vcc
@@ -1165,57 +1050,52 @@ define i64 @v_udiv_i64_pow2_shl_denom(i64 %x, i64 %y) {
 ; CHECK-NEXT:    v_mul_hi_u32 v10, v3, v1
 ; CHECK-NEXT:    v_mul_hi_u32 v1, v4, v1
 ; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v8
-; CHECK-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v9, v0
-; CHECK-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v7
-; CHECK-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v10
-; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v8, v2
-; CHECK-NEXT:    v_add_i32_e32 v7, vcc, v9, v7
-; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
-; CHECK-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v7, v2
-; CHECK-NEXT:    v_mul_lo_u32 v7, v5, v0
-; CHECK-NEXT:    v_mul_lo_u32 v8, v6, v0
-; CHECK-NEXT:    v_mul_hi_u32 v9, v5, v0
+; CHECK-NEXT:    v_addc_u32_e32 v2, vcc, v2, v7, vcc
+; CHECK-NEXT:    v_add_i32_e64 v0, s[4:5], v9, v0
+; CHECK-NEXT:    v_addc_u32_e64 v0, s[6:7], v0, v10, s[4:5]
+; CHECK-NEXT:    v_addc_u32_e64 v2, s[10:11], 0, 0, vcc
+; CHECK-NEXT:    v_addc_u32_e64 v7, s[6:7], 0, 0, s[6:7]
+; CHECK-NEXT:    v_addc_u32_e64 v0, vcc, v0, v2, s[4:5]
+; CHECK-NEXT:    v_addc_u32_e32 v2, vcc, 0, v7, vcc
 ; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v1, v2
-; CHECK-NEXT:    v_mul_lo_u32 v2, v5, v1
+; CHECK-NEXT:    v_mul_lo_u32 v2, v5, v0
+; CHECK-NEXT:    v_mul_lo_u32 v7, v6, v0
+; CHECK-NEXT:    v_mul_hi_u32 v8, v5, v0
+; CHECK-NEXT:    v_mul_lo_u32 v9, v5, v1
 ; CHECK-NEXT:    v_add_i32_e32 v10, vcc, 1, v0
 ; CHECK-NEXT:    v_addc_u32_e32 v11, vcc, 0, v1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v8, v2
-; CHECK-NEXT:    v_add_i32_e32 v8, vcc, 1, v10
+; CHECK-NEXT:    v_add_i32_e32 v7, vcc, v7, v9
+; CHECK-NEXT:    v_add_i32_e32 v9, vcc, 1, v10
 ; CHECK-NEXT:    v_addc_u32_e32 v12, vcc, 0, v11, vcc
-; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v9
-; CHECK-NEXT:    v_sub_i32_e32 v3, vcc, v3, v7
-; CHECK-NEXT:    v_subb_u32_e64 v7, s[4:5], v4, v2, vcc
-; CHECK-NEXT:    v_sub_i32_e64 v2, s[4:5], v4, v2
-; CHECK-NEXT:    v_cmp_ge_u32_e64 s[4:5], v3, v5
-; CHECK-NEXT:    v_cndmask_b32_e64 v4, 0, -1, s[4:5]
-; CHECK-NEXT:    v_cmp_ge_u32_e64 s[4:5], v7, v6
-; CHECK-NEXT:    v_cndmask_b32_e64 v9, 0, -1, s[4:5]
-; CHECK-NEXT:    v_subb_u32_e32 v2, vcc, v2, v6, vcc
-; CHECK-NEXT:    v_cmp_eq_u32_e32 vcc, v7, v6
-; CHECK-NEXT:    v_cndmask_b32_e32 v4, v9, v4, vcc
-; CHECK-NEXT:    v_sub_i32_e32 v3, vcc, v3, v5
-; CHECK-NEXT:    v_subbrev_u32_e32 v2, vcc, 0, v2, vcc
-; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, v3, v5
-; CHECK-NEXT:    v_cndmask_b32_e64 v3, 0, -1, vcc
-; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, v2, v6
+; CHECK-NEXT:    v_add_i32_e32 v7, vcc, v7, v8
+; CHECK-NEXT:    v_sub_i32_e32 v2, vcc, v3, v2
+; CHECK-NEXT:    v_subb_u32_e64 v3, s[4:5], v4, v7, vcc
+; CHECK-NEXT:    v_sub_i32_e64 v4, s[4:5], v4, v7
+; CHECK-NEXT:    v_cmp_ge_u32_e64 s[4:5], v2, v5
+; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, -1, s[4:5]
+; CHECK-NEXT:    v_cmp_ge_u32_e64 s[4:5], v3, v6
+; CHECK-NEXT:    v_cndmask_b32_e64 v8, 0, -1, s[4:5]
+; CHECK-NEXT:    v_subb_u32_e32 v4, vcc, v4, v6, vcc
+; CHECK-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v6
+; CHECK-NEXT:    v_cndmask_b32_e32 v3, v8, v7, vcc
+; CHECK-NEXT:    v_sub_i32_e32 v2, vcc, v2, v5
+; CHECK-NEXT:    v_subbrev_u32_e32 v4, vcc, 0, v4, vcc
+; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, v2, v5
+; CHECK-NEXT:    v_cndmask_b32_e64 v2, 0, -1, vcc
+; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, v4, v6
 ; CHECK-NEXT:    v_cndmask_b32_e64 v5, 0, -1, vcc
-; CHECK-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v6
-; CHECK-NEXT:    v_cndmask_b32_e32 v2, v5, v3, vcc
+; CHECK-NEXT:    v_cmp_eq_u32_e32 vcc, v4, v6
+; CHECK-NEXT:    v_cndmask_b32_e32 v2, v5, v2, vcc
 ; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v2
-; CHECK-NEXT:    v_cndmask_b32_e32 v2, v10, v8, vcc
-; CHECK-NEXT:    v_cndmask_b32_e32 v3, v11, v12, vcc
-; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v4
+; CHECK-NEXT:    v_cndmask_b32_e32 v2, v10, v9, vcc
+; CHECK-NEXT:    v_cndmask_b32_e32 v4, v11, v12, vcc
+; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v3
 ; CHECK-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
-; CHECK-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; CHECK-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc
 ; CHECK-NEXT:    ; implicit-def: $vgpr2
 ; CHECK-NEXT:    ; implicit-def: $vgpr5_vgpr6
 ; CHECK-NEXT:    ; implicit-def: $vgpr3
-; CHECK-NEXT:    s_andn2_saveexec_b64 s[6:7], s[6:7]
+; CHECK-NEXT:    s_andn2_saveexec_b64 s[6:7], s[8:9]
 ; CHECK-NEXT:    s_cbranch_execz .LBB7_2
 ; CHECK-NEXT:  .LBB7_4:
 ; CHECK-NEXT:    v_rcp_iflag_f32_e32 v0, v2
@@ -1255,253 +1135,223 @@ define <2 x i64> @v_udiv_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) {
 ; GISEL-NEXT:    v_cvt_f32_u32_e32 v10, v7
 ; GISEL-NEXT:    v_cvt_f32_u32_e32 v11, v8
 ; GISEL-NEXT:    v_sub_i32_e32 v6, vcc, 0, v7
-; GISEL-NEXT:    v_cvt_f32_u32_e32 v13, v4
-; GISEL-NEXT:    v_cvt_f32_u32_e32 v14, v5
+; GISEL-NEXT:    v_cvt_f32_u32_e32 v12, v4
+; GISEL-NEXT:    v_cvt_f32_u32_e32 v13, v5
 ; GISEL-NEXT:    v_sub_i32_e64 v9, s[4:5], 0, v4
-; GISEL-NEXT:    v_subb_u32_e32 v15, vcc, 0, v8, vcc
-; GISEL-NEXT:    v_subb_u32_e64 v12, vcc, 0, v5, s[4:5]
+; GISEL-NEXT:    v_subb_u32_e32 v14, vcc, 0, v8, vcc
+; GISEL-NEXT:    v_subb_u32_e64 v15, vcc, 0, v5, s[4:5]
 ; GISEL-NEXT:    v_mac_f32_e32 v10, 0x4f800000, v11
-; GISEL-NEXT:    v_mac_f32_e32 v13, 0x4f800000, v14
+; GISEL-NEXT:    v_mac_f32_e32 v12, 0x4f800000, v13
 ; GISEL-NEXT:    v_rcp_iflag_f32_e32 v10, v10
-; GISEL-NEXT:    v_rcp_iflag_f32_e32 v11, v13
+; GISEL-NEXT:    v_rcp_iflag_f32_e32 v11, v12
 ; GISEL-NEXT:    v_mul_f32_e32 v10, 0x5f7ffffc, v10
 ; GISEL-NEXT:    v_mul_f32_e32 v11, 0x5f7ffffc, v11
-; GISEL-NEXT:    v_mul_f32_e32 v13, 0x2f800000, v10
-; GISEL-NEXT:    v_mul_f32_e32 v14, 0x2f800000, v11
+; GISEL-NEXT:    v_mul_f32_e32 v12, 0x2f800000, v10
+; GISEL-NEXT:    v_mul_f32_e32 v13, 0x2f800000, v11
+; GISEL-NEXT:    v_trunc_f32_e32 v12, v12
 ; GISEL-NEXT:    v_trunc_f32_e32 v13, v13
-; GISEL-NEXT:    v_trunc_f32_e32 v14, v14
-; GISEL-NEXT:    v_mac_f32_e32 v10, 0xcf800000, v13
+; GISEL-NEXT:    v_mac_f32_e32 v10, 0xcf800000, v12
+; GISEL-NEXT:    v_cvt_u32_f32_e32 v12, v12
+; GISEL-NEXT:    v_mac_f32_e32 v11, 0xcf800000, v13
 ; GISEL-NEXT:    v_cvt_u32_f32_e32 v13, v13
-; GISEL-NEXT:    v_mac_f32_e32 v11, 0xcf800000, v14
-; GISEL-NEXT:    v_cvt_u32_f32_e32 v14, v14
 ; GISEL-NEXT:    v_cvt_u32_f32_e32 v10, v10
-; GISEL-NEXT:    v_mul_lo_u32 v16, v6, v13
+; GISEL-NEXT:    v_mul_lo_u32 v16, v6, v12
 ; GISEL-NEXT:    v_cvt_u32_f32_e32 v11, v11
-; GISEL-NEXT:    v_mul_lo_u32 v17, v9, v14
-; GISEL-NEXT:    v_mul_lo_u32 v18, v9, v11
-; GISEL-NEXT:    v_mul_lo_u32 v19, v12, v11
-; GISEL-NEXT:    v_mul_hi_u32 v20, v9, v11
-; GISEL-NEXT:    v_add_i32_e32 v17, vcc, v19, v17
-; GISEL-NEXT:    v_mul_lo_u32 v19, v14, v18
-; GISEL-NEXT:    v_add_i32_e32 v17, vcc, v17, v20
-; GISEL-NEXT:    v_mul_lo_u32 v20, v11, v17
-; GISEL-NEXT:    v_add_i32_e32 v19, vcc, v19, v20
-; GISEL-NEXT:    v_mul_hi_u32 v20, v11, v18
-; GISEL-NEXT:    v_add_i32_e64 v19, s[4:5], v19, v20
-; GISEL-NEXT:    v_mul_lo_u32 v19, v6, v10
-; GISEL-NEXT:    v_mul_lo_u32 v20, v15, v10
-; GISEL-NEXT:    v_add_i32_e64 v16, s[6:7], v20, v16
+; GISEL-NEXT:    v_mul_lo_u32 v17, v9, v13
+; GISEL-NEXT:    v_mul_lo_u32 v18, v6, v10
+; GISEL-NEXT:    v_mul_lo_u32 v19, v14, v10
 ; GISEL-NEXT:    v_mul_hi_u32 v20, v6, v10
-; GISEL-NEXT:    v_add_i32_e64 v16, s[6:7], v16, v20
+; GISEL-NEXT:    v_add_i32_e32 v16, vcc, v19, v16
+; GISEL-NEXT:    v_mul_lo_u32 v19, v12, v18
+; GISEL-NEXT:    v_add_i32_e32 v16, vcc, v16, v20
+; GISEL-NEXT:    v_mul_lo_u32 v20, v10, v16
+; GISEL-NEXT:    v_add_i32_e32 v19, vcc, v19, v20
+; GISEL-NEXT:    v_mul_hi_u32 v20, v10, v18
+; GISEL-NEXT:    v_addc_u32_e32 v19, vcc, v19, v20, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v19, v9, v11
+; GISEL-NEXT:    v_mul_lo_u32 v20, v15, v11
+; GISEL-NEXT:    v_add_i32_e64 v17, s[4:5], v20, v17
+; GISEL-NEXT:    v_mul_hi_u32 v20, v9, v11
+; GISEL-NEXT:    v_add_i32_e64 v17, s[4:5], v17, v20
 ; GISEL-NEXT:    v_mul_lo_u32 v20, v13, v19
-; GISEL-NEXT:    v_mul_lo_u32 v21, v10, v16
-; GISEL-NEXT:    v_add_i32_e64 v20, s[6:7], v20, v21
-; GISEL-NEXT:    v_mul_hi_u32 v21, v10, v19
-; GISEL-NEXT:    v_add_i32_e64 v20, s[8:9], v20, v21
+; GISEL-NEXT:    v_mul_lo_u32 v21, v11, v17
+; GISEL-NEXT:    v_add_i32_e64 v20, s[4:5], v20, v21
+; GISEL-NEXT:    v_mul_hi_u32 v21, v11, v19
+; GISEL-NEXT:    v_addc_u32_e64 v20, s[4:5], v20, v21, s[4:5]
+; GISEL-NEXT:    v_mul_hi_u32 v18, v12, v18
 ; GISEL-NEXT:    v_mul_hi_u32 v19, v13, v19
-; GISEL-NEXT:    v_mul_hi_u32 v18, v14, v18
-; GISEL-NEXT:    v_mul_lo_u32 v20, v13, v16
-; GISEL-NEXT:    v_add_i32_e64 v19, s[10:11], v20, v19
-; GISEL-NEXT:    v_mul_lo_u32 v20, v14, v17
-; GISEL-NEXT:    v_add_i32_e64 v18, s[12:13], v20, v18
+; GISEL-NEXT:    v_mul_lo_u32 v20, v12, v16
+; GISEL-NEXT:    v_add_i32_e64 v18, s[6:7], v20, v18
+; GISEL-NEXT:    v_mul_lo_u32 v20, v13, v17
+; GISEL-NEXT:    v_add_i32_e64 v19, s[8:9], v20, v19
 ; GISEL-NEXT:    v_mul_hi_u32 v20, v10, v16
-; GISEL-NEXT:    v_add_i32_e64 v19, s[14:15], v19, v20
+; GISEL-NEXT:    v_mul_hi_u32 v16, v12, v16
+; GISEL-NEXT:    v_addc_u32_e64 v18, s[10:11], v18, v20, s[6:7]
 ; GISEL-NEXT:    v_mul_hi_u32 v20, v11, v17
-; GISEL-NEXT:    v_add_i32_e64 v18, s[16:17], v18, v20
-; GISEL-NEXT:    v_cndmask_b32_e64 v20, 0, 1, s[6:7]
-; GISEL-NEXT:    v_cndmask_b32_e64 v21, 0, 1, s[8:9]
-; GISEL-NEXT:    v_add_i32_e64 v20, s[6:7], v20, v21
-; GISEL-NEXT:    v_cndmask_b32_e64 v21, 0, 1, s[10:11]
-; GISEL-NEXT:    v_cndmask_b32_e64 v22, 0, 1, s[14:15]
-; GISEL-NEXT:    v_add_i32_e64 v21, s[6:7], v21, v22
-; GISEL-NEXT:    v_cndmask_b32_e64 v22, 0, 1, vcc
-; GISEL-NEXT:    v_cndmask_b32_e64 v23, 0, 1, s[4:5]
-; GISEL-NEXT:    v_add_i32_e32 v22, vcc, v22, v23
-; GISEL-NEXT:    v_cndmask_b32_e64 v23, 0, 1, s[12:13]
-; GISEL-NEXT:    v_cndmask_b32_e64 v24, 0, 1, s[16:17]
-; GISEL-NEXT:    v_add_i32_e32 v23, vcc, v23, v24
-; GISEL-NEXT:    v_add_i32_e32 v19, vcc, v19, v20
-; GISEL-NEXT:    v_add_i32_e64 v18, s[4:5], v18, v22
-; GISEL-NEXT:    v_add_i32_e64 v10, s[6:7], v10, v19
-; GISEL-NEXT:    v_mul_hi_u32 v16, v13, v16
-; GISEL-NEXT:    v_mul_hi_u32 v17, v14, v17
-; GISEL-NEXT:    v_add_i32_e64 v11, s[8:9], v11, v18
-; GISEL-NEXT:    v_cndmask_b32_e64 v18, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v18, vcc, v21, v18
-; GISEL-NEXT:    v_cndmask_b32_e64 v19, 0, 1, s[4:5]
-; GISEL-NEXT:    v_add_i32_e32 v19, vcc, v23, v19
-; GISEL-NEXT:    v_mul_lo_u32 v20, v6, v10
-; GISEL-NEXT:    v_mul_lo_u32 v15, v15, v10
-; GISEL-NEXT:    v_add_i32_e32 v16, vcc, v16, v18
-; GISEL-NEXT:    v_mul_hi_u32 v18, v6, v10
-; GISEL-NEXT:    v_add_i32_e32 v17, vcc, v17, v19
-; GISEL-NEXT:    v_mul_lo_u32 v19, v9, v11
-; GISEL-NEXT:    v_mul_lo_u32 v12, v12, v11
-; GISEL-NEXT:    v_addc_u32_e64 v13, vcc, v13, v16, s[6:7]
+; GISEL-NEXT:    v_mul_hi_u32 v17, v13, v17
+; GISEL-NEXT:    v_addc_u32_e64 v19, s[12:13], v19, v20, s[8:9]
+; GISEL-NEXT:    v_addc_u32_e64 v20, s[14:15], 0, 0, vcc
+; GISEL-NEXT:    v_addc_u32_e64 v18, vcc, v18, v20, s[6:7]
+; GISEL-NEXT:    v_addc_u32_e64 v20, s[4:5], 0, 0, s[4:5]
+; GISEL-NEXT:    v_addc_u32_e64 v19, s[4:5], v19, v20, s[8:9]
+; GISEL-NEXT:    v_addc_u32_e64 v20, s[6:7], 0, 0, s[10:11]
+; GISEL-NEXT:    v_add_i32_e64 v10, s[6:7], v10, v18
+; GISEL-NEXT:    v_addc_u32_e64 v18, s[8:9], 0, 0, s[12:13]
+; GISEL-NEXT:    v_addc_u32_e32 v20, vcc, 0, v20, vcc
+; GISEL-NEXT:    v_addc_u32_e64 v18, vcc, 0, v18, s[4:5]
+; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v11, v19
+; GISEL-NEXT:    v_add_i32_e64 v16, s[4:5], v16, v20
+; GISEL-NEXT:    v_add_i32_e64 v17, s[4:5], v17, v18
+; GISEL-NEXT:    v_mul_lo_u32 v18, v6, v10
+; GISEL-NEXT:    v_mul_lo_u32 v14, v14, v10
+; GISEL-NEXT:    v_mul_hi_u32 v19, v6, v10
+; GISEL-NEXT:    v_mul_lo_u32 v20, v9, v11
+; GISEL-NEXT:    v_mul_lo_u32 v15, v15, v11
+; GISEL-NEXT:    v_addc_u32_e64 v12, s[4:5], v12, v16, s[6:7]
 ; GISEL-NEXT:    v_mul_hi_u32 v16, v9, v11
-; GISEL-NEXT:    v_addc_u32_e64 v14, vcc, v14, v17, s[8:9]
-; GISEL-NEXT:    v_mul_hi_u32 v17, v10, v20
-; GISEL-NEXT:    v_mul_lo_u32 v6, v6, v13
-; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v15, v6
-; GISEL-NEXT:    v_mul_hi_u32 v15, v11, v19
-; GISEL-NEXT:    v_mul_lo_u32 v9, v9, v14
-; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v12, v9
-; GISEL-NEXT:    v_mul_lo_u32 v12, v13, v20
+; GISEL-NEXT:    v_addc_u32_e32 v13, vcc, v13, v17, vcc
+; GISEL-NEXT:    v_mul_hi_u32 v17, v10, v18
+; GISEL-NEXT:    v_mul_lo_u32 v6, v6, v12
+; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v14, v6
+; GISEL-NEXT:    v_mul_hi_u32 v14, v11, v20
+; GISEL-NEXT:    v_mul_lo_u32 v9, v9, v13
+; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v15, v9
+; GISEL-NEXT:    v_mul_lo_u32 v15, v12, v18
+; GISEL-NEXT:    v_mul_hi_u32 v18, v12, v18
+; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v6, v19
+; GISEL-NEXT:    v_mul_lo_u32 v19, v13, v20
 ; GISEL-NEXT:    v_mul_hi_u32 v20, v13, v20
-; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v6, v18
-; GISEL-NEXT:    v_mul_lo_u32 v18, v14, v19
-; GISEL-NEXT:    v_mul_hi_u32 v19, v14, v19
 ; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v16
 ; GISEL-NEXT:    v_mul_lo_u32 v16, v10, v6
-; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v12, v16
-; GISEL-NEXT:    v_mul_lo_u32 v16, v13, v6
-; GISEL-NEXT:    v_add_i32_e64 v12, s[4:5], v12, v17
-; GISEL-NEXT:    v_mul_hi_u32 v12, v10, v6
-; GISEL-NEXT:    v_mul_hi_u32 v6, v13, v6
+; GISEL-NEXT:    v_add_i32_e32 v15, vcc, v15, v16
+; GISEL-NEXT:    v_mul_lo_u32 v16, v12, v6
+; GISEL-NEXT:    v_addc_u32_e32 v15, vcc, v15, v17, vcc
+; GISEL-NEXT:    v_mul_hi_u32 v15, v10, v6
+; GISEL-NEXT:    v_mul_hi_u32 v6, v12, v6
 ; GISEL-NEXT:    v_mul_lo_u32 v17, v11, v9
-; GISEL-NEXT:    v_add_i32_e64 v16, s[6:7], v16, v20
-; GISEL-NEXT:    v_mul_lo_u32 v20, v14, v9
-; GISEL-NEXT:    v_add_i32_e64 v17, s[8:9], v18, v17
-; GISEL-NEXT:    v_mul_hi_u32 v18, v11, v9
-; GISEL-NEXT:    v_mul_hi_u32 v9, v14, v9
-; GISEL-NEXT:    v_add_i32_e64 v19, s[10:11], v20, v19
-; GISEL-NEXT:    v_cndmask_b32_e64 v20, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v15, vcc, v17, v15
-; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, s[6:7]
-; GISEL-NEXT:    v_cndmask_b32_e64 v17, 0, 1, s[8:9]
-; GISEL-NEXT:    v_add_i32_e64 v12, s[6:7], v16, v12
-; GISEL-NEXT:    v_cndmask_b32_e64 v16, 0, 1, s[10:11]
-; GISEL-NEXT:    v_add_i32_e64 v18, s[8:9], v19, v18
-; GISEL-NEXT:    v_cndmask_b32_e64 v19, 0, 1, s[4:5]
-; GISEL-NEXT:    v_add_i32_e64 v19, s[4:5], v20, v19
-; GISEL-NEXT:    v_cndmask_b32_e64 v20, 0, 1, s[6:7]
-; GISEL-NEXT:    v_add_i32_e64 v15, s[4:5], v15, v20
-; GISEL-NEXT:    v_cndmask_b32_e64 v20, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v17, vcc, v17, v20
-; GISEL-NEXT:    v_cndmask_b32_e64 v20, 0, 1, s[8:9]
-; GISEL-NEXT:    v_add_i32_e32 v16, vcc, v16, v20
-; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v12, v19
-; GISEL-NEXT:    v_add_i32_e64 v17, s[4:5], v18, v17
-; GISEL-NEXT:    v_cndmask_b32_e64 v18, 0, 1, vcc
-; GISEL-NEXT:    v_cndmask_b32_e64 v19, 0, 1, s[4:5]
-; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v10, v12
-; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v17
-; GISEL-NEXT:    v_add_i32_e64 v12, s[6:7], v15, v18
-; GISEL-NEXT:    v_add_i32_e64 v15, s[6:7], v16, v19
-; GISEL-NEXT:    v_mul_lo_u32 v16, v1, v10
-; GISEL-NEXT:    v_mul_hi_u32 v17, v0, v10
+; GISEL-NEXT:    v_add_i32_e64 v16, s[4:5], v16, v18
+; GISEL-NEXT:    v_mul_lo_u32 v18, v13, v9
+; GISEL-NEXT:    v_add_i32_e64 v17, s[6:7], v19, v17
+; GISEL-NEXT:    v_mul_hi_u32 v19, v11, v9
+; GISEL-NEXT:    v_mul_hi_u32 v9, v13, v9
+; GISEL-NEXT:    v_add_i32_e64 v18, s[8:9], v18, v20
+; GISEL-NEXT:    v_addc_u32_e64 v15, s[10:11], v16, v15, s[4:5]
+; GISEL-NEXT:    v_addc_u32_e64 v14, s[6:7], v17, v14, s[6:7]
+; GISEL-NEXT:    v_addc_u32_e64 v14, s[12:13], v18, v19, s[8:9]
+; GISEL-NEXT:    v_addc_u32_e64 v16, s[14:15], 0, 0, vcc
+; GISEL-NEXT:    v_addc_u32_e64 v17, s[10:11], 0, 0, s[10:11]
+; GISEL-NEXT:    v_addc_u32_e64 v18, s[6:7], 0, 0, s[6:7]
+; GISEL-NEXT:    v_addc_u32_e64 v19, s[6:7], 0, 0, s[12:13]
+; GISEL-NEXT:    v_addc_u32_e64 v15, vcc, v15, v16, s[4:5]
+; GISEL-NEXT:    v_addc_u32_e64 v14, s[4:5], v14, v18, s[8:9]
+; GISEL-NEXT:    v_addc_u32_e32 v16, vcc, 0, v17, vcc
+; GISEL-NEXT:    v_addc_u32_e64 v17, vcc, 0, v19, s[4:5]
+; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v10, v15
+; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v14
+; GISEL-NEXT:    v_add_i32_e64 v6, s[6:7], v6, v16
+; GISEL-NEXT:    v_add_i32_e64 v9, s[6:7], v9, v17
+; GISEL-NEXT:    v_mul_lo_u32 v14, v1, v10
+; GISEL-NEXT:    v_mul_hi_u32 v15, v0, v10
 ; GISEL-NEXT:    v_mul_hi_u32 v10, v1, v10
-; GISEL-NEXT:    v_mul_lo_u32 v18, v3, v11
-; GISEL-NEXT:    v_mul_hi_u32 v19, v2, v11
+; GISEL-NEXT:    v_mul_lo_u32 v16, v3, v11
+; GISEL-NEXT:    v_mul_hi_u32 v17, v2, v11
 ; GISEL-NEXT:    v_mul_hi_u32 v11, v3, v11
-; GISEL-NEXT:    v_add_i32_e64 v6, s[6:7], v6, v12
-; GISEL-NEXT:    v_add_i32_e64 v9, s[6:7], v9, v15
-; GISEL-NEXT:    v_addc_u32_e32 v6, vcc, v13, v6, vcc
-; GISEL-NEXT:    v_addc_u32_e64 v9, vcc, v14, v9, s[4:5]
+; GISEL-NEXT:    v_addc_u32_e32 v6, vcc, v12, v6, vcc
+; GISEL-NEXT:    v_addc_u32_e64 v9, vcc, v13, v9, s[4:5]
 ; GISEL-NEXT:    v_mul_lo_u32 v12, v0, v6
 ; GISEL-NEXT:    v_mul_lo_u32 v13, v1, v6
-; GISEL-NEXT:    v_mul_hi_u32 v14, v0, v6
-; GISEL-NEXT:    v_mul_hi_u32 v15, v1, v6
+; GISEL-NEXT:    v_mul_hi_u32 v18, v0, v6
+; GISEL-NEXT:    v_mul_hi_u32 v19, v1, v6
 ; GISEL-NEXT:    v_mul_lo_u32 v6, v2, v9
 ; GISEL-NEXT:    v_mul_lo_u32 v20, v3, v9
-; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v16, v12
-; GISEL-NEXT:    v_mul_hi_u32 v16, v2, v9
+; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v14, v12
+; GISEL-NEXT:    v_mul_hi_u32 v14, v2, v9
 ; GISEL-NEXT:    v_mul_hi_u32 v21, v3, v9
 ; GISEL-NEXT:    v_add_i32_e64 v9, s[4:5], v13, v10
-; GISEL-NEXT:    v_add_i32_e64 v6, s[6:7], v18, v6
+; GISEL-NEXT:    v_add_i32_e64 v6, s[6:7], v16, v6
 ; GISEL-NEXT:    v_add_i32_e64 v10, s[8:9], v20, v11
-; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
-; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, s[4:5]
-; GISEL-NEXT:    v_cndmask_b32_e64 v18, 0, 1, s[6:7]
-; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v12, v17
-; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[8:9]
-; GISEL-NEXT:    v_add_i32_e64 v9, s[4:5], v9, v14
-; GISEL-NEXT:    v_add_i32_e64 v6, s[6:7], v6, v19
-; GISEL-NEXT:    v_add_i32_e64 v10, s[8:9], v10, v16
-; GISEL-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
-; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, s[4:5]
-; GISEL-NEXT:    v_cndmask_b32_e64 v16, 0, 1, s[6:7]
-; GISEL-NEXT:    v_cndmask_b32_e64 v17, 0, 1, s[8:9]
-; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v11, v6
-; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v13, v14
-; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v18, v16
-; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v12, v17
-; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v9, v6
-; GISEL-NEXT:    v_add_i32_e64 v9, s[4:5], v10, v13
-; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
-; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, s[4:5]
-; GISEL-NEXT:    v_mul_lo_u32 v14, v7, v6
-; GISEL-NEXT:    v_mul_lo_u32 v16, v8, v6
-; GISEL-NEXT:    v_mul_hi_u32 v17, v7, v6
-; GISEL-NEXT:    v_mul_lo_u32 v18, v4, v9
-; GISEL-NEXT:    v_mul_lo_u32 v19, v5, v9
-; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v11, v10
-; GISEL-NEXT:    v_mul_hi_u32 v11, v4, v9
-; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v12, v13
-; GISEL-NEXT:    v_add_i32_e32 v13, vcc, 1, v6
-; GISEL-NEXT:    v_sub_i32_e64 v0, s[4:5], v0, v14
-; GISEL-NEXT:    v_add_i32_e64 v14, s[6:7], 1, v9
-; GISEL-NEXT:    v_sub_i32_e64 v2, s[8:9], v2, v18
-; GISEL-NEXT:    v_add_i32_e64 v18, s[10:11], 1, v13
-; GISEL-NEXT:    v_add_i32_e64 v10, s[12:13], v15, v10
-; GISEL-NEXT:    v_add_i32_e64 v15, s[12:13], 1, v14
-; GISEL-NEXT:    v_add_i32_e64 v12, s[14:15], v21, v12
-; GISEL-NEXT:    v_cmp_ge_u32_e64 s[14:15], v0, v7
-; GISEL-NEXT:    v_cmp_ge_u32_e64 s[16:17], v2, v4
-; GISEL-NEXT:    v_sub_i32_e64 v0, s[18:19], v0, v7
-; GISEL-NEXT:    v_sub_i32_e64 v2, s[20:21], v2, v4
-; GISEL-NEXT:    v_mul_lo_u32 v20, v7, v10
-; GISEL-NEXT:    v_cmp_ge_u32_e64 s[22:23], v0, v7
-; GISEL-NEXT:    v_addc_u32_e32 v0, vcc, 0, v10, vcc
-; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v2, v4
-; GISEL-NEXT:    v_mul_lo_u32 v2, v4, v12
-; GISEL-NEXT:    v_add_i32_e64 v4, s[24:25], v16, v20
-; GISEL-NEXT:    v_addc_u32_e64 v7, s[6:7], 0, v12, s[6:7]
-; GISEL-NEXT:    v_add_i32_e64 v2, s[6:7], v19, v2
-; GISEL-NEXT:    v_cndmask_b32_e64 v16, 0, -1, s[14:15]
-; GISEL-NEXT:    v_add_i32_e64 v4, s[6:7], v4, v17
-; GISEL-NEXT:    v_subb_u32_e64 v17, s[6:7], v1, v4, s[4:5]
+; GISEL-NEXT:    v_addc_u32_e32 v11, vcc, v12, v15, vcc
+; GISEL-NEXT:    v_addc_u32_e64 v9, s[10:11], v9, v18, s[4:5]
+; GISEL-NEXT:    v_addc_u32_e64 v6, s[6:7], v6, v17, s[6:7]
+; GISEL-NEXT:    v_addc_u32_e64 v10, s[12:13], v10, v14, s[8:9]
+; GISEL-NEXT:    v_addc_u32_e64 v6, s[14:15], 0, 0, vcc
+; GISEL-NEXT:    v_addc_u32_e64 v11, s[10:11], 0, 0, s[10:11]
+; GISEL-NEXT:    v_addc_u32_e64 v12, s[6:7], 0, 0, s[6:7]
+; GISEL-NEXT:    v_addc_u32_e64 v13, s[6:7], 0, 0, s[12:13]
+; GISEL-NEXT:    v_addc_u32_e64 v6, vcc, v9, v6, s[4:5]
+; GISEL-NEXT:    v_addc_u32_e64 v9, s[4:5], v10, v12, s[8:9]
+; GISEL-NEXT:    v_addc_u32_e32 v10, vcc, 0, v11, vcc
+; GISEL-NEXT:    v_addc_u32_e64 v11, vcc, 0, v13, s[4:5]
+; GISEL-NEXT:    v_mul_lo_u32 v12, v7, v6
+; GISEL-NEXT:    v_mul_lo_u32 v13, v8, v6
+; GISEL-NEXT:    v_mul_hi_u32 v14, v7, v6
+; GISEL-NEXT:    v_mul_lo_u32 v15, v4, v9
+; GISEL-NEXT:    v_mul_lo_u32 v16, v5, v9
+; GISEL-NEXT:    v_mul_hi_u32 v17, v4, v9
+; GISEL-NEXT:    v_add_i32_e32 v18, vcc, 1, v6
+; GISEL-NEXT:    v_add_i32_e64 v10, s[4:5], v19, v10
+; GISEL-NEXT:    v_add_i32_e64 v19, s[4:5], 1, v9
+; GISEL-NEXT:    v_add_i32_e64 v11, s[6:7], v21, v11
+; GISEL-NEXT:    v_add_i32_e64 v20, s[6:7], 1, v18
+; GISEL-NEXT:    v_sub_i32_e64 v0, s[8:9], v0, v12
+; GISEL-NEXT:    v_add_i32_e64 v12, s[10:11], 1, v19
+; GISEL-NEXT:    v_sub_i32_e64 v2, s[12:13], v2, v15
+; GISEL-NEXT:    v_mul_lo_u32 v15, v7, v10
+; GISEL-NEXT:    v_add_i32_e64 v13, s[14:15], v13, v15
+; GISEL-NEXT:    v_mul_lo_u32 v15, v4, v11
+; GISEL-NEXT:    v_add_i32_e64 v15, s[14:15], v16, v15
+; GISEL-NEXT:    v_addc_u32_e32 v16, vcc, 0, v10, vcc
+; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v7
+; GISEL-NEXT:    v_sub_i32_e64 v0, s[14:15], v0, v7
+; GISEL-NEXT:    v_cmp_ge_u32_e64 s[16:17], v0, v7
+; GISEL-NEXT:    v_addc_u32_e64 v0, s[4:5], 0, v11, s[4:5]
+; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v2, v4
+; GISEL-NEXT:    v_sub_i32_e64 v2, s[18:19], v2, v4
+; GISEL-NEXT:    v_cmp_ge_u32_e64 s[20:21], v2, v4
+; GISEL-NEXT:    v_addc_u32_e64 v2, s[6:7], 0, v16, s[6:7]
+; GISEL-NEXT:    v_add_i32_e64 v4, s[6:7], v13, v14
+; GISEL-NEXT:    v_addc_u32_e64 v7, s[6:7], 0, v0, s[10:11]
+; GISEL-NEXT:    v_add_i32_e64 v13, s[6:7], v15, v17
+; GISEL-NEXT:    v_subb_u32_e64 v14, s[6:7], v1, v4, s[8:9]
 ; GISEL-NEXT:    v_sub_i32_e64 v1, s[6:7], v1, v4
-; GISEL-NEXT:    v_cndmask_b32_e64 v4, 0, -1, s[16:17]
-; GISEL-NEXT:    v_cmp_ge_u32_e64 s[6:7], v17, v8
-; GISEL-NEXT:    v_cmp_eq_u32_e64 s[14:15], v17, v8
-; GISEL-NEXT:    v_addc_u32_e64 v17, s[10:11], 0, v0, s[10:11]
-; GISEL-NEXT:    v_subb_u32_e64 v1, s[4:5], v1, v8, s[4:5]
-; GISEL-NEXT:    v_subbrev_u32_e64 v1, s[4:5], 0, v1, s[18:19]
+; GISEL-NEXT:    v_cndmask_b32_e64 v4, 0, -1, vcc
+; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v14, v8
+; GISEL-NEXT:    v_cmp_eq_u32_e64 s[6:7], v14, v8
+; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, -1, s[4:5]
+; GISEL-NEXT:    v_subb_u32_e64 v1, s[4:5], v1, v8, s[8:9]
+; GISEL-NEXT:    v_subbrev_u32_e64 v1, s[4:5], 0, v1, s[14:15]
 ; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v1, v8
-; GISEL-NEXT:    v_cmp_eq_u32_e64 s[10:11], v1, v8
-; GISEL-NEXT:    v_addc_u32_e64 v1, s[12:13], 0, v7, s[12:13]
-; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, -1, s[22:23]
-; GISEL-NEXT:    v_cndmask_b32_e64 v19, 0, -1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v2, v11
-; GISEL-NEXT:    v_subb_u32_e64 v11, vcc, v3, v2, s[8:9]
-; GISEL-NEXT:    v_sub_i32_e32 v2, vcc, v3, v2
-; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v11, v5
-; GISEL-NEXT:    v_subb_u32_e64 v2, s[8:9], v2, v5, s[8:9]
-; GISEL-NEXT:    v_cmp_eq_u32_e64 s[8:9], v11, v5
-; GISEL-NEXT:    v_cndmask_b32_e64 v3, 0, -1, s[6:7]
-; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, -1, vcc
-; GISEL-NEXT:    v_subbrev_u32_e64 v2, vcc, 0, v2, s[20:21]
-; GISEL-NEXT:    v_cndmask_b32_e64 v3, v3, v16, s[14:15]
-; GISEL-NEXT:    v_cndmask_b32_e64 v4, v11, v4, s[8:9]
-; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v2, v5
-; GISEL-NEXT:    v_cmp_eq_u32_e64 s[6:7], v2, v5
-; GISEL-NEXT:    v_cndmask_b32_e64 v2, 0, -1, s[4:5]
+; GISEL-NEXT:    v_cmp_eq_u32_e64 s[8:9], v1, v8
+; GISEL-NEXT:    v_cndmask_b32_e64 v1, 0, -1, s[16:17]
+; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, -1, s[20:21]
+; GISEL-NEXT:    v_subb_u32_e64 v15, s[10:11], v3, v13, s[12:13]
+; GISEL-NEXT:    v_sub_i32_e64 v3, s[10:11], v3, v13
+; GISEL-NEXT:    v_cmp_ge_u32_e64 s[10:11], v15, v5
+; GISEL-NEXT:    v_subb_u32_e64 v3, s[12:13], v3, v5, s[12:13]
+; GISEL-NEXT:    v_cmp_eq_u32_e64 s[12:13], v15, v5
+; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, -1, vcc
+; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, -1, s[10:11]
+; GISEL-NEXT:    v_subbrev_u32_e64 v3, vcc, 0, v3, s[18:19]
+; GISEL-NEXT:    v_cndmask_b32_e64 v4, v13, v4, s[6:7]
+; GISEL-NEXT:    v_cndmask_b32_e64 v13, v15, v14, s[12:13]
+; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v3, v5
+; GISEL-NEXT:    v_cmp_eq_u32_e64 s[6:7], v3, v5
+; GISEL-NEXT:    v_cndmask_b32_e64 v3, 0, -1, s[4:5]
 ; GISEL-NEXT:    v_cndmask_b32_e64 v5, 0, -1, vcc
-; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v3
-; GISEL-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, v4
-; GISEL-NEXT:    v_cndmask_b32_e64 v2, v2, v8, s[10:11]
-; GISEL-NEXT:    v_cndmask_b32_e64 v3, v5, v19, s[6:7]
-; GISEL-NEXT:    v_cmp_ne_u32_e64 s[6:7], 0, v2
+; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v4
+; GISEL-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, v13
+; GISEL-NEXT:    v_cndmask_b32_e64 v1, v3, v1, s[8:9]
+; GISEL-NEXT:    v_cndmask_b32_e64 v3, v5, v8, s[6:7]
+; GISEL-NEXT:    v_cmp_ne_u32_e64 s[6:7], 0, v1
 ; GISEL-NEXT:    v_cmp_ne_u32_e64 s[8:9], 0, v3
-; GISEL-NEXT:    v_cndmask_b32_e64 v2, v13, v18, s[6:7]
-; GISEL-NEXT:    v_cndmask_b32_e64 v3, v14, v15, s[8:9]
-; GISEL-NEXT:    v_cndmask_b32_e64 v4, v0, v17, s[6:7]
-; GISEL-NEXT:    v_cndmask_b32_e64 v5, v7, v1, s[8:9]
-; GISEL-NEXT:    v_cndmask_b32_e32 v0, v6, v2, vcc
+; GISEL-NEXT:    v_cndmask_b32_e64 v1, v18, v20, s[6:7]
+; GISEL-NEXT:    v_cndmask_b32_e64 v3, v19, v12, s[8:9]
+; GISEL-NEXT:    v_cndmask_b32_e64 v4, v16, v2, s[6:7]
+; GISEL-NEXT:    v_cndmask_b32_e64 v5, v0, v7, s[8:9]
+; GISEL-NEXT:    v_cndmask_b32_e32 v0, v6, v1, vcc
 ; GISEL-NEXT:    v_cndmask_b32_e64 v2, v9, v3, s[4:5]
 ; GISEL-NEXT:    v_cndmask_b32_e32 v1, v10, v4, vcc
-; GISEL-NEXT:    v_cndmask_b32_e64 v3, v12, v5, s[4:5]
+; GISEL-NEXT:    v_cndmask_b32_e64 v3, v11, v5, s[4:5]
 ; GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; CGP-LABEL: v_udiv_v2i64_pow2_shl_denom:
@@ -1520,7 +1370,7 @@ define <2 x i64> @v_udiv_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) {
 ; CGP-NEXT:    v_cvt_f32_u32_e32 v4, v2
 ; CGP-NEXT:    ; implicit-def: $vgpr0_vgpr1
 ; CGP-NEXT:    s_and_saveexec_b64 s[4:5], vcc
-; CGP-NEXT:    s_xor_b64 s[6:7], exec, s[4:5]
+; CGP-NEXT:    s_xor_b64 s[8:9], exec, s[4:5]
 ; CGP-NEXT:    s_cbranch_execz .LBB8_2
 ; CGP-NEXT:  ; %bb.1:
 ; CGP-NEXT:    v_cvt_f32_u32_e32 v0, v3
@@ -1548,18 +1398,13 @@ define <2 x i64> @v_udiv_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) {
 ; CGP-NEXT:    v_mul_hi_u32 v19, v0, v13
 ; CGP-NEXT:    v_mul_hi_u32 v13, v4, v13
 ; CGP-NEXT:    v_add_i32_e32 v15, vcc, v15, v16
-; CGP-NEXT:    v_cndmask_b32_e64 v16, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v14, vcc, v18, v14
-; CGP-NEXT:    v_cndmask_b32_e64 v18, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v15, vcc, v15, v17
-; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v14, vcc, v14, v19
-; CGP-NEXT:    v_cndmask_b32_e64 v17, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v15, vcc, v16, v15
-; CGP-NEXT:    v_add_i32_e32 v16, vcc, v18, v17
-; CGP-NEXT:    v_add_i32_e32 v14, vcc, v14, v15
-; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v15, vcc, v16, v15
+; CGP-NEXT:    v_addc_u32_e32 v15, vcc, v15, v17, vcc
+; CGP-NEXT:    v_add_i32_e64 v14, s[4:5], v18, v14
+; CGP-NEXT:    v_addc_u32_e64 v14, s[6:7], v14, v19, s[4:5]
+; CGP-NEXT:    v_addc_u32_e64 v15, s[10:11], 0, 0, vcc
+; CGP-NEXT:    v_addc_u32_e64 v16, s[6:7], 0, 0, s[6:7]
+; CGP-NEXT:    v_addc_u32_e64 v14, vcc, v14, v15, s[4:5]
+; CGP-NEXT:    v_addc_u32_e32 v15, vcc, 0, v16, vcc
 ; CGP-NEXT:    v_add_i32_e32 v13, vcc, v13, v15
 ; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v14
 ; CGP-NEXT:    v_addc_u32_e32 v4, vcc, v4, v13, vcc
@@ -1577,18 +1422,13 @@ define <2 x i64> @v_udiv_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) {
 ; CGP-NEXT:    v_mul_hi_u32 v17, v0, v1
 ; CGP-NEXT:    v_mul_hi_u32 v1, v4, v1
 ; CGP-NEXT:    v_add_i32_e32 v12, vcc, v15, v12
-; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v13, vcc, v14, v13
-; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v12, vcc, v12, v16
-; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v13, vcc, v13, v17
-; CGP-NEXT:    v_cndmask_b32_e64 v16, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v12, vcc, v15, v12
-; CGP-NEXT:    v_add_i32_e32 v14, vcc, v14, v16
-; CGP-NEXT:    v_add_i32_e32 v12, vcc, v13, v12
-; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v13, vcc, v14, v13
+; CGP-NEXT:    v_addc_u32_e32 v12, vcc, v12, v16, vcc
+; CGP-NEXT:    v_add_i32_e64 v12, s[4:5], v14, v13
+; CGP-NEXT:    v_addc_u32_e64 v12, s[6:7], v12, v17, s[4:5]
+; CGP-NEXT:    v_addc_u32_e64 v13, s[10:11], 0, 0, vcc
+; CGP-NEXT:    v_addc_u32_e64 v14, s[6:7], 0, 0, s[6:7]
+; CGP-NEXT:    v_addc_u32_e64 v12, vcc, v12, v13, s[4:5]
+; CGP-NEXT:    v_addc_u32_e32 v13, vcc, 0, v14, vcc
 ; CGP-NEXT:    v_add_i32_e32 v1, vcc, v1, v13
 ; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v12
 ; CGP-NEXT:    v_addc_u32_e32 v1, vcc, v4, v1, vcc
@@ -1600,58 +1440,53 @@ define <2 x i64> @v_udiv_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) {
 ; CGP-NEXT:    v_mul_hi_u32 v15, v8, v1
 ; CGP-NEXT:    v_mul_hi_u32 v1, v9, v1
 ; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v13
-; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v0, vcc, v14, v0
-; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v12
-; CGP-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v15
-; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v4, vcc, v13, v4
-; CGP-NEXT:    v_add_i32_e32 v12, vcc, v14, v12
-; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v4
-; CGP-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v4, vcc, v12, v4
-; CGP-NEXT:    v_mul_lo_u32 v12, v2, v0
-; CGP-NEXT:    v_mul_lo_u32 v13, v3, v0
-; CGP-NEXT:    v_mul_hi_u32 v14, v2, v0
+; CGP-NEXT:    v_addc_u32_e32 v4, vcc, v4, v12, vcc
+; CGP-NEXT:    v_add_i32_e64 v0, s[4:5], v14, v0
+; CGP-NEXT:    v_addc_u32_e64 v0, s[6:7], v0, v15, s[4:5]
+; CGP-NEXT:    v_addc_u32_e64 v4, s[10:11], 0, 0, vcc
+; CGP-NEXT:    v_addc_u32_e64 v12, s[6:7], 0, 0, s[6:7]
+; CGP-NEXT:    v_addc_u32_e64 v0, vcc, v0, v4, s[4:5]
+; CGP-NEXT:    v_addc_u32_e32 v4, vcc, 0, v12, vcc
 ; CGP-NEXT:    v_add_i32_e32 v1, vcc, v1, v4
-; CGP-NEXT:    v_mul_lo_u32 v4, v2, v1
+; CGP-NEXT:    v_mul_lo_u32 v4, v2, v0
+; CGP-NEXT:    v_mul_lo_u32 v12, v3, v0
+; CGP-NEXT:    v_mul_hi_u32 v13, v2, v0
+; CGP-NEXT:    v_mul_lo_u32 v14, v2, v1
 ; CGP-NEXT:    v_add_i32_e32 v15, vcc, 1, v0
 ; CGP-NEXT:    v_addc_u32_e32 v16, vcc, 0, v1, vcc
-; CGP-NEXT:    v_add_i32_e32 v4, vcc, v13, v4
-; CGP-NEXT:    v_add_i32_e32 v13, vcc, 1, v15
+; CGP-NEXT:    v_add_i32_e32 v12, vcc, v12, v14
+; CGP-NEXT:    v_add_i32_e32 v14, vcc, 1, v15
 ; CGP-NEXT:    v_addc_u32_e32 v17, vcc, 0, v16, vcc
-; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v14
-; CGP-NEXT:    v_sub_i32_e32 v8, vcc, v8, v12
-; CGP-NEXT:    v_subb_u32_e64 v12, s[4:5], v9, v4, vcc
-; CGP-NEXT:    v_sub_i32_e64 v4, s[4:5], v9, v4
-; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v8, v2
-; CGP-NEXT:    v_cndmask_b32_e64 v9, 0, -1, s[4:5]
-; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v12, v3
-; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, -1, s[4:5]
-; CGP-NEXT:    v_subb_u32_e32 v4, vcc, v4, v3, vcc
-; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, v12, v3
-; CGP-NEXT:    v_cndmask_b32_e32 v9, v14, v9, vcc
-; CGP-NEXT:    v_sub_i32_e32 v8, vcc, v8, v2
-; CGP-NEXT:    v_subbrev_u32_e32 v4, vcc, 0, v4, vcc
-; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v8, v2
+; CGP-NEXT:    v_add_i32_e32 v12, vcc, v12, v13
+; CGP-NEXT:    v_sub_i32_e32 v4, vcc, v8, v4
+; CGP-NEXT:    v_subb_u32_e64 v8, s[4:5], v9, v12, vcc
+; CGP-NEXT:    v_sub_i32_e64 v9, s[4:5], v9, v12
+; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v4, v2
+; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, -1, s[4:5]
+; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v8, v3
+; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, -1, s[4:5]
+; CGP-NEXT:    v_subb_u32_e32 v9, vcc, v9, v3, vcc
+; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, v8, v3
+; CGP-NEXT:    v_cndmask_b32_e32 v8, v13, v12, vcc
+; CGP-NEXT:    v_sub_i32_e32 v4, vcc, v4, v2
+; CGP-NEXT:    v_subbrev_u32_e32 v9, vcc, 0, v9, vcc
+; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v4, v2
 ; CGP-NEXT:    v_cndmask_b32_e64 v2, 0, -1, vcc
-; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v4, v3
-; CGP-NEXT:    v_cndmask_b32_e64 v8, 0, -1, vcc
-; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, v4, v3
-; CGP-NEXT:    v_cndmask_b32_e32 v2, v8, v2, vcc
+; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v9, v3
+; CGP-NEXT:    v_cndmask_b32_e64 v4, 0, -1, vcc
+; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, v9, v3
+; CGP-NEXT:    v_cndmask_b32_e32 v2, v4, v2, vcc
 ; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v2
-; CGP-NEXT:    v_cndmask_b32_e32 v2, v15, v13, vcc
+; CGP-NEXT:    v_cndmask_b32_e32 v2, v15, v14, vcc
 ; CGP-NEXT:    v_cndmask_b32_e32 v3, v16, v17, vcc
-; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v9
+; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v8
 ; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
 ; CGP-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
 ; CGP-NEXT:    ; implicit-def: $vgpr4
 ; CGP-NEXT:    ; implicit-def: $vgpr2_vgpr3
 ; CGP-NEXT:    ; implicit-def: $vgpr8
 ; CGP-NEXT:  .LBB8_2: ; %Flow1
-; CGP-NEXT:    s_or_saveexec_b64 s[6:7], s[6:7]
+; CGP-NEXT:    s_or_saveexec_b64 s[6:7], s[8:9]
 ; CGP-NEXT:    v_lshl_b64 v[9:10], v[10:11], v6
 ; CGP-NEXT:    s_xor_b64 exec, exec, s[6:7]
 ; CGP-NEXT:    s_cbranch_execz .LBB8_4
@@ -1683,10 +1518,10 @@ define <2 x i64> @v_udiv_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) {
 ; CGP-NEXT:    v_cvt_f32_u32_e32 v4, v9
 ; CGP-NEXT:    ; implicit-def: $vgpr2_vgpr3
 ; CGP-NEXT:    s_and_saveexec_b64 s[4:5], vcc
-; CGP-NEXT:    s_xor_b64 s[6:7], exec, s[4:5]
+; CGP-NEXT:    s_xor_b64 s[8:9], exec, s[4:5]
 ; CGP-NEXT:    s_cbranch_execnz .LBB8_7
 ; CGP-NEXT:  ; %bb.5: ; %Flow
-; CGP-NEXT:    s_andn2_saveexec_b64 s[6:7], s[6:7]
+; CGP-NEXT:    s_andn2_saveexec_b64 s[6:7], s[8:9]
 ; CGP-NEXT:    s_cbranch_execnz .LBB8_8
 ; CGP-NEXT:  .LBB8_6:
 ; CGP-NEXT:    s_or_b64 exec, exec, s[6:7]
@@ -1717,18 +1552,13 @@ define <2 x i64> @v_udiv_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) {
 ; CGP-NEXT:    v_mul_hi_u32 v16, v2, v8
 ; CGP-NEXT:    v_mul_hi_u32 v8, v4, v8
 ; CGP-NEXT:    v_add_i32_e32 v12, vcc, v12, v13
-; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v11, vcc, v15, v11
-; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v12, vcc, v12, v14
-; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v11, vcc, v11, v16
-; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v12, vcc, v13, v12
-; CGP-NEXT:    v_add_i32_e32 v13, vcc, v15, v14
-; CGP-NEXT:    v_add_i32_e32 v11, vcc, v11, v12
-; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v12, vcc, v13, v12
+; CGP-NEXT:    v_addc_u32_e32 v12, vcc, v12, v14, vcc
+; CGP-NEXT:    v_add_i32_e64 v11, s[4:5], v15, v11
+; CGP-NEXT:    v_addc_u32_e64 v11, s[6:7], v11, v16, s[4:5]
+; CGP-NEXT:    v_addc_u32_e64 v12, s[10:11], 0, 0, vcc
+; CGP-NEXT:    v_addc_u32_e64 v13, s[6:7], 0, 0, s[6:7]
+; CGP-NEXT:    v_addc_u32_e64 v11, vcc, v11, v12, s[4:5]
+; CGP-NEXT:    v_addc_u32_e32 v12, vcc, 0, v13, vcc
 ; CGP-NEXT:    v_add_i32_e32 v8, vcc, v8, v12
 ; CGP-NEXT:    v_add_i32_e32 v2, vcc, v2, v11
 ; CGP-NEXT:    v_addc_u32_e32 v4, vcc, v4, v8, vcc
@@ -1746,18 +1576,13 @@ define <2 x i64> @v_udiv_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) {
 ; CGP-NEXT:    v_mul_hi_u32 v14, v2, v3
 ; CGP-NEXT:    v_mul_hi_u32 v3, v4, v3
 ; CGP-NEXT:    v_add_i32_e32 v6, vcc, v12, v6
-; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v8, vcc, v11, v8
-; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v6, vcc, v6, v13
-; CGP-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v8, vcc, v8, v14
-; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v6, vcc, v12, v6
-; CGP-NEXT:    v_add_i32_e32 v11, vcc, v11, v13
-; CGP-NEXT:    v_add_i32_e32 v6, vcc, v8, v6
-; CGP-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v8, vcc, v11, v8
+; CGP-NEXT:    v_addc_u32_e32 v6, vcc, v6, v13, vcc
+; CGP-NEXT:    v_add_i32_e64 v6, s[4:5], v11, v8
+; CGP-NEXT:    v_addc_u32_e64 v6, s[6:7], v6, v14, s[4:5]
+; CGP-NEXT:    v_addc_u32_e64 v8, s[10:11], 0, 0, vcc
+; CGP-NEXT:    v_addc_u32_e64 v11, s[6:7], 0, 0, s[6:7]
+; CGP-NEXT:    v_addc_u32_e64 v6, vcc, v6, v8, s[4:5]
+; CGP-NEXT:    v_addc_u32_e32 v8, vcc, 0, v11, vcc
 ; CGP-NEXT:    v_add_i32_e32 v3, vcc, v3, v8
 ; CGP-NEXT:    v_add_i32_e32 v2, vcc, v2, v6
 ; CGP-NEXT:    v_addc_u32_e32 v3, vcc, v4, v3, vcc
@@ -1769,57 +1594,52 @@ define <2 x i64> @v_udiv_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) {
 ; CGP-NEXT:    v_mul_hi_u32 v12, v5, v3
 ; CGP-NEXT:    v_mul_hi_u32 v3, v7, v3
 ; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v8
-; CGP-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v2, vcc, v11, v2
-; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v6
-; CGP-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v2, vcc, v2, v12
-; CGP-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v4, vcc, v8, v4
-; CGP-NEXT:    v_add_i32_e32 v6, vcc, v11, v6
-; CGP-NEXT:    v_add_i32_e32 v2, vcc, v2, v4
-; CGP-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v4, vcc, v6, v4
-; CGP-NEXT:    v_mul_lo_u32 v6, v9, v2
-; CGP-NEXT:    v_mul_lo_u32 v8, v10, v2
-; CGP-NEXT:    v_mul_hi_u32 v11, v9, v2
+; CGP-NEXT:    v_addc_u32_e32 v4, vcc, v4, v6, vcc
+; CGP-NEXT:    v_add_i32_e64 v2, s[4:5], v11, v2
+; CGP-NEXT:    v_addc_u32_e64 v2, s[6:7], v2, v12, s[4:5]
+; CGP-NEXT:    v_addc_u32_e64 v4, s[10:11], 0, 0, vcc
+; CGP-NEXT:    v_addc_u32_e64 v6, s[6:7], 0, 0, s[6:7]
+; CGP-NEXT:    v_addc_u32_e64 v2, vcc, v2, v4, s[4:5]
+; CGP-NEXT:    v_addc_u32_e32 v4, vcc, 0, v6, vcc
 ; CGP-NEXT:    v_add_i32_e32 v3, vcc, v3, v4
-; CGP-NEXT:    v_mul_lo_u32 v4, v9, v3
+; CGP-NEXT:    v_mul_lo_u32 v4, v9, v2
+; CGP-NEXT:    v_mul_lo_u32 v6, v10, v2
+; CGP-NEXT:    v_mul_hi_u32 v8, v9, v2
+; CGP-NEXT:    v_mul_lo_u32 v11, v9, v3
 ; CGP-NEXT:    v_add_i32_e32 v12, vcc, 1, v2
 ; CGP-NEXT:    v_addc_u32_e32 v13, vcc, 0, v3, vcc
-; CGP-NEXT:    v_add_i32_e32 v4, vcc, v8, v4
-; CGP-NEXT:    v_add_i32_e32 v8, vcc, 1, v12
+; CGP-NEXT:    v_add_i32_e32 v6, vcc, v6, v11
+; CGP-NEXT:    v_add_i32_e32 v11, vcc, 1, v12
 ; CGP-NEXT:    v_addc_u32_e32 v14, vcc, 0, v13, vcc
-; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v11
-; CGP-NEXT:    v_sub_i32_e32 v5, vcc, v5, v6
-; CGP-NEXT:    v_subb_u32_e64 v6, s[4:5], v7, v4, vcc
-; CGP-NEXT:    v_sub_i32_e64 v4, s[4:5], v7, v4
-; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v5, v9
+; CGP-NEXT:    v_add_i32_e32 v6, vcc, v6, v8
+; CGP-NEXT:    v_sub_i32_e32 v4, vcc, v5, v4
+; CGP-NEXT:    v_subb_u32_e64 v5, s[4:5], v7, v6, vcc
+; CGP-NEXT:    v_sub_i32_e64 v6, s[4:5], v7, v6
+; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v4, v9
 ; CGP-NEXT:    v_cndmask_b32_e64 v7, 0, -1, s[4:5]
-; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v6, v10
-; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, -1, s[4:5]
-; CGP-NEXT:    v_subb_u32_e32 v4, vcc, v4, v10, vcc
-; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, v6, v10
-; CGP-NEXT:    v_cndmask_b32_e32 v6, v11, v7, vcc
-; CGP-NEXT:    v_sub_i32_e32 v5, vcc, v5, v9
-; CGP-NEXT:    v_subbrev_u32_e32 v4, vcc, 0, v4, vcc
-; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v5, v9
-; CGP-NEXT:    v_cndmask_b32_e64 v5, 0, -1, vcc
-; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v4, v10
+; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v5, v10
+; CGP-NEXT:    v_cndmask_b32_e64 v8, 0, -1, s[4:5]
+; CGP-NEXT:    v_subb_u32_e32 v6, vcc, v6, v10, vcc
+; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, v5, v10
+; CGP-NEXT:    v_cndmask_b32_e32 v5, v8, v7, vcc
+; CGP-NEXT:    v_sub_i32_e32 v4, vcc, v4, v9
+; CGP-NEXT:    v_subbrev_u32_e32 v6, vcc, 0, v6, vcc
+; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v4, v9
+; CGP-NEXT:    v_cndmask_b32_e64 v4, 0, -1, vcc
+; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v6, v10
 ; CGP-NEXT:    v_cndmask_b32_e64 v7, 0, -1, vcc
-; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, v4, v10
-; CGP-NEXT:    v_cndmask_b32_e32 v4, v7, v5, vcc
+; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, v6, v10
+; CGP-NEXT:    v_cndmask_b32_e32 v4, v7, v4, vcc
 ; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v4
-; CGP-NEXT:    v_cndmask_b32_e32 v4, v12, v8, vcc
-; CGP-NEXT:    v_cndmask_b32_e32 v5, v13, v14, vcc
-; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v6
+; CGP-NEXT:    v_cndmask_b32_e32 v4, v12, v11, vcc
+; CGP-NEXT:    v_cndmask_b32_e32 v6, v13, v14, vcc
+; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v5
 ; CGP-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
-; CGP-NEXT:    v_cndmask_b32_e32 v3, v3, v5, vcc
+; CGP-NEXT:    v_cndmask_b32_e32 v3, v3, v6, vcc
 ; CGP-NEXT:    ; implicit-def: $vgpr4
 ; CGP-NEXT:    ; implicit-def: $vgpr9_vgpr10
 ; CGP-NEXT:    ; implicit-def: $vgpr5
-; CGP-NEXT:    s_andn2_saveexec_b64 s[6:7], s[6:7]
+; CGP-NEXT:    s_andn2_saveexec_b64 s[6:7], s[8:9]
 ; CGP-NEXT:    s_cbranch_execz .LBB8_6
 ; CGP-NEXT:  .LBB8_8:
 ; CGP-NEXT:    v_rcp_iflag_f32_e32 v2, v4
@@ -1958,39 +1778,29 @@ define <2 x i64> @v_udiv_v2i64_24bit(<2 x i64> %num, <2 x i64> %den) {
 ; GISEL-NEXT:    v_mul_hi_u32 v25, v7, v13
 ; GISEL-NEXT:    v_mul_hi_u32 v13, v11, v13
 ; GISEL-NEXT:    v_add_i32_e32 v15, vcc, v15, v16
-; GISEL-NEXT:    v_cndmask_b32_e64 v16, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v14, vcc, v19, v14
-; GISEL-NEXT:    v_cndmask_b32_e64 v19, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v18, vcc, v18, v23
-; GISEL-NEXT:    v_cndmask_b32_e64 v23, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v17, vcc, v24, v17
-; GISEL-NEXT:    v_cndmask_b32_e64 v24, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v15, vcc, v15, v20
-; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v14, vcc, v14, v22
-; GISEL-NEXT:    v_cndmask_b32_e64 v20, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v18, vcc, v18, v21
-; GISEL-NEXT:    v_cndmask_b32_e64 v18, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v17, vcc, v17, v25
-; GISEL-NEXT:    v_cndmask_b32_e64 v21, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v15, vcc, v16, v15
-; GISEL-NEXT:    v_add_i32_e32 v16, vcc, v19, v20
-; GISEL-NEXT:    v_add_i32_e32 v18, vcc, v23, v18
-; GISEL-NEXT:    v_add_i32_e32 v19, vcc, v24, v21
-; GISEL-NEXT:    v_add_i32_e32 v14, vcc, v14, v15
-; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v17, vcc, v17, v18
-; GISEL-NEXT:    v_cndmask_b32_e64 v18, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v15, vcc, v16, v15
-; GISEL-NEXT:    v_add_i32_e32 v16, vcc, v19, v18
-; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v12, v15
-; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v13, v16
+; GISEL-NEXT:    v_addc_u32_e32 v15, vcc, v15, v20, vcc
+; GISEL-NEXT:    v_add_i32_e64 v14, s[4:5], v19, v14
+; GISEL-NEXT:    v_addc_u32_e64 v14, s[6:7], v14, v22, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v15, s[8:9], v18, v23
+; GISEL-NEXT:    v_addc_u32_e64 v15, s[8:9], v15, v21, s[8:9]
+; GISEL-NEXT:    v_add_i32_e64 v15, s[10:11], v24, v17
+; GISEL-NEXT:    v_addc_u32_e64 v15, s[12:13], v15, v25, s[10:11]
+; GISEL-NEXT:    v_addc_u32_e64 v16, s[14:15], 0, 0, vcc
+; GISEL-NEXT:    v_addc_u32_e64 v17, s[6:7], 0, 0, s[6:7]
+; GISEL-NEXT:    v_addc_u32_e64 v18, s[6:7], 0, 0, s[8:9]
+; GISEL-NEXT:    v_addc_u32_e64 v19, s[6:7], 0, 0, s[12:13]
+; GISEL-NEXT:    v_addc_u32_e64 v14, vcc, v14, v16, s[4:5]
+; GISEL-NEXT:    v_addc_u32_e32 v16, vcc, 0, v17, vcc
+; GISEL-NEXT:    v_addc_u32_e64 v15, vcc, v15, v18, s[10:11]
+; GISEL-NEXT:    v_addc_u32_e32 v17, vcc, 0, v19, vcc
+; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v12, v16
+; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v13, v17
 ; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v6, v14
 ; GISEL-NEXT:    v_addc_u32_e32 v8, vcc, v8, v12, vcc
 ; GISEL-NEXT:    v_mul_lo_u32 v12, v4, v6
 ; GISEL-NEXT:    v_mul_lo_u32 v5, v5, v6
 ; GISEL-NEXT:    v_mul_hi_u32 v14, v4, v6
-; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v7, v17
+; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v7, v15
 ; GISEL-NEXT:    v_addc_u32_e32 v11, vcc, v11, v13, vcc
 ; GISEL-NEXT:    v_mul_lo_u32 v13, v9, v7
 ; GISEL-NEXT:    v_mul_lo_u32 v10, v10, v7
@@ -2016,39 +1826,29 @@ define <2 x i64> @v_udiv_v2i64_24bit(<2 x i64> %num, <2 x i64> %den) {
 ; GISEL-NEXT:    v_mul_hi_u32 v21, v7, v5
 ; GISEL-NEXT:    v_mul_hi_u32 v5, v11, v5
 ; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v16, v9
-; GISEL-NEXT:    v_cndmask_b32_e64 v16, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v10, v12
-; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v15, vcc, v18, v15
-; GISEL-NEXT:    v_cndmask_b32_e64 v18, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v20, v13
-; GISEL-NEXT:    v_cndmask_b32_e64 v20, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v17
-; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v10, v14
-; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v15, vcc, v15, v19
-; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v13, v21
-; GISEL-NEXT:    v_cndmask_b32_e64 v17, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v16, v9
-; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v12, v14
-; GISEL-NEXT:    v_add_i32_e32 v14, vcc, v18, v15
-; GISEL-NEXT:    v_add_i32_e32 v15, vcc, v20, v17
-; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v10, v9
-; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v13, v14
-; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v12, v10
-; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v15, v14
-; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v10
-; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v12
+; GISEL-NEXT:    v_addc_u32_e32 v9, vcc, v9, v17, vcc
+; GISEL-NEXT:    v_add_i32_e64 v9, s[4:5], v10, v12
+; GISEL-NEXT:    v_addc_u32_e64 v9, s[6:7], v9, v14, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v10, s[8:9], v18, v15
+; GISEL-NEXT:    v_addc_u32_e64 v10, s[8:9], v10, v19, s[8:9]
+; GISEL-NEXT:    v_add_i32_e64 v10, s[10:11], v20, v13
+; GISEL-NEXT:    v_addc_u32_e64 v10, s[12:13], v10, v21, s[10:11]
+; GISEL-NEXT:    v_addc_u32_e64 v12, s[14:15], 0, 0, vcc
+; GISEL-NEXT:    v_addc_u32_e64 v13, s[6:7], 0, 0, s[6:7]
+; GISEL-NEXT:    v_addc_u32_e64 v14, s[6:7], 0, 0, s[8:9]
+; GISEL-NEXT:    v_addc_u32_e64 v15, s[6:7], 0, 0, s[12:13]
+; GISEL-NEXT:    v_addc_u32_e64 v9, vcc, v9, v12, s[4:5]
+; GISEL-NEXT:    v_addc_u32_e32 v12, vcc, 0, v13, vcc
+; GISEL-NEXT:    v_addc_u32_e64 v10, vcc, v10, v14, s[10:11]
+; GISEL-NEXT:    v_addc_u32_e32 v13, vcc, 0, v15, vcc
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v12
+; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v13
 ; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v6, v9
 ; GISEL-NEXT:    v_addc_u32_e32 v4, vcc, v8, v4, vcc
 ; GISEL-NEXT:    v_mul_lo_u32 v8, 0, v6
 ; GISEL-NEXT:    v_mul_hi_u32 v9, v3, v6
 ; GISEL-NEXT:    v_mul_hi_u32 v6, 0, v6
-; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v7, v13
+; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v7, v10
 ; GISEL-NEXT:    v_addc_u32_e32 v5, vcc, v11, v5, vcc
 ; GISEL-NEXT:    v_mul_lo_u32 v10, 0, v7
 ; GISEL-NEXT:    v_mul_hi_u32 v11, v2, v7
@@ -2066,78 +1866,76 @@ define <2 x i64> @v_udiv_v2i64_24bit(<2 x i64> %num, <2 x i64> %den) {
 ; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v10, v15
 ; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v16, v7
 ; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v9
-; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
+; GISEL-NEXT:    v_addc_u32_e64 v8, s[4:5], 0, 0, vcc
 ; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v6, v14
-; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
+; GISEL-NEXT:    v_addc_u32_e64 v9, s[4:5], 0, 0, vcc
 ; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v10, v11
-; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
+; GISEL-NEXT:    v_addc_u32_e64 v10, s[4:5], 0, 0, vcc
 ; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v7, v17
-; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
+; GISEL-NEXT:    v_addc_u32_e64 v11, s[4:5], 0, 0, vcc
 ; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v6, v8
-; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
+; GISEL-NEXT:    v_addc_u32_e32 v8, vcc, 0, v9, vcc
 ; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v7, v10
-; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v9, v8
-; GISEL-NEXT:    v_mul_lo_u32 v9, v1, v6
-; GISEL-NEXT:    v_mul_lo_u32 v12, 0, v6
-; GISEL-NEXT:    v_mul_hi_u32 v13, v1, v6
-; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v11, v10
-; GISEL-NEXT:    v_mul_lo_u32 v11, v0, v7
-; GISEL-NEXT:    v_mul_lo_u32 v14, 0, v7
-; GISEL-NEXT:    v_mul_hi_u32 v15, v0, v7
+; GISEL-NEXT:    v_addc_u32_e32 v9, vcc, 0, v11, vcc
 ; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v8
-; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v10
-; GISEL-NEXT:    v_mul_lo_u32 v8, v1, v4
-; GISEL-NEXT:    v_add_i32_e32 v10, vcc, 1, v6
+; GISEL-NEXT:    v_mul_lo_u32 v8, v1, v6
+; GISEL-NEXT:    v_mul_lo_u32 v10, 0, v6
+; GISEL-NEXT:    v_mul_hi_u32 v11, v1, v6
+; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v9
+; GISEL-NEXT:    v_mul_lo_u32 v9, v0, v7
+; GISEL-NEXT:    v_mul_lo_u32 v12, 0, v7
+; GISEL-NEXT:    v_mul_hi_u32 v13, v0, v7
+; GISEL-NEXT:    v_mul_lo_u32 v14, v1, v4
+; GISEL-NEXT:    v_add_i32_e32 v15, vcc, 1, v6
 ; GISEL-NEXT:    v_addc_u32_e32 v16, vcc, 0, v4, vcc
 ; GISEL-NEXT:    v_mul_lo_u32 v17, v0, v5
 ; GISEL-NEXT:    v_add_i32_e32 v18, vcc, 1, v7
 ; GISEL-NEXT:    v_addc_u32_e32 v19, vcc, 0, v5, vcc
-; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v12, v8
-; GISEL-NEXT:    v_add_i32_e32 v12, vcc, 1, v10
+; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v10, v14
+; GISEL-NEXT:    v_add_i32_e32 v14, vcc, 1, v15
 ; GISEL-NEXT:    v_addc_u32_e32 v20, vcc, 0, v16, vcc
-; GISEL-NEXT:    v_add_i32_e32 v14, vcc, v14, v17
+; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v12, v17
 ; GISEL-NEXT:    v_add_i32_e32 v17, vcc, 1, v18
 ; GISEL-NEXT:    v_addc_u32_e32 v21, vcc, 0, v19, vcc
-; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v13
-; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v14, v15
-; GISEL-NEXT:    v_sub_i32_e32 v3, vcc, v3, v9
-; GISEL-NEXT:    v_subb_u32_e64 v9, s[4:5], 0, v8, vcc
-; GISEL-NEXT:    v_sub_i32_e64 v8, s[4:5], 0, v8
+; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v10, v11
+; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v12, v13
+; GISEL-NEXT:    v_sub_i32_e32 v3, vcc, v3, v8
+; GISEL-NEXT:    v_subb_u32_e64 v8, s[4:5], 0, v10, vcc
+; GISEL-NEXT:    v_sub_i32_e64 v10, s[4:5], 0, v10
 ; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v3, v1
-; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, -1, s[4:5]
-; GISEL-NEXT:    v_sub_i32_e64 v2, s[4:5], v2, v11
-; GISEL-NEXT:    v_subb_u32_e64 v11, s[6:7], 0, v13, s[4:5]
-; GISEL-NEXT:    v_sub_i32_e64 v13, s[6:7], 0, v13
+; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, -1, s[4:5]
+; GISEL-NEXT:    v_sub_i32_e64 v2, s[4:5], v2, v9
+; GISEL-NEXT:    v_subb_u32_e64 v9, s[6:7], 0, v11, s[4:5]
+; GISEL-NEXT:    v_sub_i32_e64 v11, s[6:7], 0, v11
 ; GISEL-NEXT:    v_cmp_ge_u32_e64 s[6:7], v2, v0
-; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, -1, s[6:7]
-; GISEL-NEXT:    v_cmp_eq_u32_e64 s[6:7], 0, v9
-; GISEL-NEXT:    v_cndmask_b32_e64 v9, -1, v14, s[6:7]
-; GISEL-NEXT:    v_subbrev_u32_e32 v8, vcc, 0, v8, vcc
-; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v11
-; GISEL-NEXT:    v_cndmask_b32_e32 v11, -1, v15, vcc
-; GISEL-NEXT:    v_subbrev_u32_e64 v13, vcc, 0, v13, s[4:5]
+; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, -1, s[6:7]
+; GISEL-NEXT:    v_cmp_eq_u32_e64 s[6:7], 0, v8
+; GISEL-NEXT:    v_cndmask_b32_e64 v8, -1, v12, s[6:7]
+; GISEL-NEXT:    v_subbrev_u32_e32 v10, vcc, 0, v10, vcc
+; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v9
+; GISEL-NEXT:    v_cndmask_b32_e32 v9, -1, v13, vcc
+; GISEL-NEXT:    v_subbrev_u32_e64 v11, vcc, 0, v11, s[4:5]
 ; GISEL-NEXT:    v_sub_i32_e32 v3, vcc, v3, v1
-; GISEL-NEXT:    v_subbrev_u32_e32 v8, vcc, 0, v8, vcc
+; GISEL-NEXT:    v_subbrev_u32_e32 v10, vcc, 0, v10, vcc
 ; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v3, v1
 ; GISEL-NEXT:    v_cndmask_b32_e64 v1, 0, -1, vcc
 ; GISEL-NEXT:    v_sub_i32_e32 v2, vcc, v2, v0
-; GISEL-NEXT:    v_subbrev_u32_e32 v3, vcc, 0, v13, vcc
+; GISEL-NEXT:    v_subbrev_u32_e32 v3, vcc, 0, v11, vcc
 ; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v2, v0
 ; GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, -1, vcc
-; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v8
+; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v10
 ; GISEL-NEXT:    v_cndmask_b32_e32 v1, -1, v1, vcc
 ; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v3
 ; GISEL-NEXT:    v_cndmask_b32_e32 v0, -1, v0, vcc
 ; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v1
-; GISEL-NEXT:    v_cndmask_b32_e32 v1, v10, v12, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v1, v15, v14, vcc
 ; GISEL-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, v0
 ; GISEL-NEXT:    v_cndmask_b32_e64 v2, v18, v17, s[4:5]
 ; GISEL-NEXT:    v_cndmask_b32_e32 v3, v16, v20, vcc
-; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v9
+; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v8
 ; GISEL-NEXT:    v_cndmask_b32_e32 v0, v6, v1, vcc
 ; GISEL-NEXT:    v_cndmask_b32_e64 v6, v19, v21, s[4:5]
-; GISEL-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, v11
+; GISEL-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, v9
 ; GISEL-NEXT:    v_cndmask_b32_e64 v2, v7, v2, s[4:5]
 ; GISEL-NEXT:    v_cndmask_b32_e32 v1, v4, v3, vcc
 ; GISEL-NEXT:    v_cndmask_b32_e64 v3, v5, v6, s[4:5]
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/udivrem.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/udivrem.ll
index a58397eccaba7..e5dbe607de9bb 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/udivrem.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/udivrem.ll
@@ -116,8 +116,8 @@ define amdgpu_kernel void @udivrem_i64(ptr addrspace(1) %out0, ptr addrspace(1)
 ; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX8-NEXT:    v_cvt_f32_u32_e32 v0, s11
 ; GFX8-NEXT:    v_cvt_f32_u32_e32 v1, s10
-; GFX8-NEXT:    s_sub_u32 s2, 0, s10
-; GFX8-NEXT:    s_subb_u32 s3, 0, s11
+; GFX8-NEXT:    s_sub_u32 s14, 0, s10
+; GFX8-NEXT:    s_subb_u32 s15, 0, s11
 ; GFX8-NEXT:    v_mul_f32_e32 v0, 0x4f800000, v0
 ; GFX8-NEXT:    v_add_f32_e32 v0, v0, v1
 ; GFX8-NEXT:    v_rcp_iflag_f32_e32 v0, v0
@@ -128,10 +128,10 @@ define amdgpu_kernel void @udivrem_i64(ptr addrspace(1) %out0, ptr addrspace(1)
 ; GFX8-NEXT:    v_add_f32_e32 v0, v1, v0
 ; GFX8-NEXT:    v_cvt_u32_f32_e32 v3, v0
 ; GFX8-NEXT:    v_cvt_u32_f32_e32 v4, v2
-; GFX8-NEXT:    v_mad_u64_u32 v[0:1], s[0:1], s2, v3, 0
-; GFX8-NEXT:    v_mad_u64_u32 v[1:2], s[0:1], s2, v4, v[1:2]
+; GFX8-NEXT:    v_mad_u64_u32 v[0:1], s[0:1], s14, v3, 0
+; GFX8-NEXT:    v_mad_u64_u32 v[1:2], s[0:1], s14, v4, v[1:2]
 ; GFX8-NEXT:    v_mul_hi_u32 v5, v3, v0
-; GFX8-NEXT:    v_mad_u64_u32 v[1:2], s[0:1], s3, v3, v[1:2]
+; GFX8-NEXT:    v_mad_u64_u32 v[1:2], s[0:1], s15, v3, v[1:2]
 ; GFX8-NEXT:    v_mul_lo_u32 v2, v4, v0
 ; GFX8-NEXT:    v_mul_hi_u32 v0, v4, v0
 ; GFX8-NEXT:    v_mul_lo_u32 v6, v3, v1
@@ -139,44 +139,34 @@ define amdgpu_kernel void @udivrem_i64(ptr addrspace(1) %out0, ptr addrspace(1)
 ; GFX8-NEXT:    v_mul_hi_u32 v8, v3, v1
 ; GFX8-NEXT:    v_mul_hi_u32 v1, v4, v1
 ; GFX8-NEXT:    v_add_u32_e32 v2, vcc, v2, v6
-; GFX8-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
-; GFX8-NEXT:    v_add_u32_e32 v0, vcc, v7, v0
-; GFX8-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
-; GFX8-NEXT:    v_add_u32_e32 v2, vcc, v2, v5
-; GFX8-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; GFX8-NEXT:    v_add_u32_e32 v0, vcc, v0, v8
-; GFX8-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
-; GFX8-NEXT:    v_add_u32_e32 v2, vcc, v6, v2
-; GFX8-NEXT:    v_add_u32_e32 v5, vcc, v7, v5
-; GFX8-NEXT:    v_add_u32_e32 v0, vcc, v0, v2
-; GFX8-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; GFX8-NEXT:    v_add_u32_e32 v2, vcc, v5, v2
+; GFX8-NEXT:    v_addc_u32_e32 v2, vcc, v2, v5, vcc
+; GFX8-NEXT:    v_add_u32_e64 v0, s[0:1], v7, v0
+; GFX8-NEXT:    v_addc_u32_e64 v0, s[2:3], v0, v8, s[0:1]
+; GFX8-NEXT:    v_addc_u32_e64 v2, s[12:13], 0, 0, vcc
+; GFX8-NEXT:    v_addc_u32_e64 v5, s[2:3], 0, 0, s[2:3]
+; GFX8-NEXT:    v_addc_u32_e64 v0, vcc, v0, v2, s[0:1]
+; GFX8-NEXT:    v_addc_u32_e32 v2, vcc, 0, v5, vcc
 ; GFX8-NEXT:    v_add_u32_e32 v1, vcc, v1, v2
 ; GFX8-NEXT:    v_add_u32_e32 v3, vcc, v3, v0
 ; GFX8-NEXT:    v_addc_u32_e32 v4, vcc, v4, v1, vcc
-; GFX8-NEXT:    v_mad_u64_u32 v[0:1], s[0:1], s2, v3, 0
-; GFX8-NEXT:    v_mad_u64_u32 v[1:2], s[0:1], s2, v4, v[1:2]
+; GFX8-NEXT:    v_mad_u64_u32 v[0:1], s[0:1], s14, v3, 0
+; GFX8-NEXT:    v_mad_u64_u32 v[1:2], s[0:1], s14, v4, v[1:2]
 ; GFX8-NEXT:    v_mul_hi_u32 v6, v3, v0
-; GFX8-NEXT:    v_mad_u64_u32 v[1:2], s[0:1], s3, v3, v[1:2]
+; GFX8-NEXT:    v_mad_u64_u32 v[1:2], s[0:1], s15, v3, v[1:2]
 ; GFX8-NEXT:    v_mul_lo_u32 v2, v4, v0
 ; GFX8-NEXT:    v_mul_hi_u32 v0, v4, v0
 ; GFX8-NEXT:    v_mul_lo_u32 v5, v3, v1
 ; GFX8-NEXT:    v_add_u32_e32 v2, vcc, v2, v5
-; GFX8-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
-; GFX8-NEXT:    v_add_u32_e32 v2, vcc, v2, v6
-; GFX8-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; GFX8-NEXT:    v_mul_lo_u32 v6, v4, v1
-; GFX8-NEXT:    v_add_u32_e32 v2, vcc, v5, v2
-; GFX8-NEXT:    v_mul_hi_u32 v5, v3, v1
-; GFX8-NEXT:    v_add_u32_e32 v0, vcc, v6, v0
-; GFX8-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
-; GFX8-NEXT:    v_add_u32_e32 v0, vcc, v0, v5
-; GFX8-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
-; GFX8-NEXT:    v_add_u32_e32 v5, vcc, v6, v5
+; GFX8-NEXT:    v_addc_u32_e32 v2, vcc, v2, v6, vcc
+; GFX8-NEXT:    v_mul_lo_u32 v2, v4, v1
+; GFX8-NEXT:    v_mul_hi_u32 v6, v3, v1
+; GFX8-NEXT:    v_addc_u32_e64 v5, s[0:1], 0, 0, vcc
+; GFX8-NEXT:    v_add_u32_e32 v0, vcc, v2, v0
 ; GFX8-NEXT:    v_mul_hi_u32 v1, v4, v1
-; GFX8-NEXT:    v_add_u32_e32 v0, vcc, v0, v2
-; GFX8-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; GFX8-NEXT:    v_add_u32_e32 v2, vcc, v5, v2
+; GFX8-NEXT:    v_addc_u32_e64 v0, s[0:1], v0, v6, vcc
+; GFX8-NEXT:    v_addc_u32_e64 v2, s[0:1], 0, 0, s[0:1]
+; GFX8-NEXT:    v_addc_u32_e32 v0, vcc, v0, v5, vcc
+; GFX8-NEXT:    v_addc_u32_e32 v2, vcc, 0, v2, vcc
 ; GFX8-NEXT:    v_add_u32_e32 v1, vcc, v1, v2
 ; GFX8-NEXT:    v_add_u32_e32 v0, vcc, v3, v0
 ; GFX8-NEXT:    v_addc_u32_e32 v1, vcc, v4, v1, vcc
@@ -184,28 +174,23 @@ define amdgpu_kernel void @udivrem_i64(ptr addrspace(1) %out0, ptr addrspace(1)
 ; GFX8-NEXT:    v_mul_lo_u32 v3, s8, v1
 ; GFX8-NEXT:    v_mul_hi_u32 v4, s8, v0
 ; GFX8-NEXT:    v_mul_hi_u32 v0, s9, v0
-; GFX8-NEXT:    v_mul_hi_u32 v5, s9, v1
-; GFX8-NEXT:    v_add_u32_e32 v2, vcc, v2, v3
-; GFX8-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc
-; GFX8-NEXT:    v_add_u32_e32 v2, vcc, v2, v4
-; GFX8-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; GFX8-NEXT:    v_mul_lo_u32 v4, s9, v1
-; GFX8-NEXT:    v_add_u32_e32 v2, vcc, v3, v2
-; GFX8-NEXT:    v_mul_hi_u32 v3, s8, v1
-; GFX8-NEXT:    v_add_u32_e32 v0, vcc, v4, v0
-; GFX8-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
-; GFX8-NEXT:    v_add_u32_e32 v0, vcc, v0, v3
-; GFX8-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc
-; GFX8-NEXT:    v_add_u32_e32 v3, vcc, v4, v3
-; GFX8-NEXT:    v_add_u32_e32 v4, vcc, v0, v2
-; GFX8-NEXT:    v_mad_u64_u32 v[0:1], s[0:1], s10, v4, 0
-; GFX8-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; GFX8-NEXT:    v_add_u32_e32 v2, vcc, v3, v2
-; GFX8-NEXT:    v_add_u32_e32 v3, vcc, v5, v2
-; GFX8-NEXT:    v_mad_u64_u32 v[1:2], s[0:1], s10, v3, v[1:2]
 ; GFX8-NEXT:    v_mov_b32_e32 v6, s9
+; GFX8-NEXT:    v_add_u32_e32 v2, vcc, v2, v3
+; GFX8-NEXT:    v_addc_u32_e32 v2, vcc, v2, v4, vcc
+; GFX8-NEXT:    v_mul_lo_u32 v2, s9, v1
+; GFX8-NEXT:    v_mul_hi_u32 v4, s8, v1
+; GFX8-NEXT:    v_addc_u32_e64 v3, s[0:1], 0, 0, vcc
+; GFX8-NEXT:    v_add_u32_e32 v0, vcc, v2, v0
+; GFX8-NEXT:    v_addc_u32_e64 v0, s[0:1], v0, v4, vcc
+; GFX8-NEXT:    v_addc_u32_e32 v3, vcc, v0, v3, vcc
+; GFX8-NEXT:    v_mul_hi_u32 v4, s9, v1
+; GFX8-NEXT:    v_addc_u32_e64 v2, s[0:1], 0, 0, s[0:1]
+; GFX8-NEXT:    v_mad_u64_u32 v[0:1], s[0:1], s10, v3, 0
+; GFX8-NEXT:    v_addc_u32_e32 v2, vcc, 0, v2, vcc
+; GFX8-NEXT:    v_add_u32_e32 v4, vcc, v4, v2
+; GFX8-NEXT:    v_mad_u64_u32 v[1:2], s[0:1], s10, v4, v[1:2]
 ; GFX8-NEXT:    v_mov_b32_e32 v5, s11
-; GFX8-NEXT:    v_mad_u64_u32 v[1:2], s[0:1], s11, v4, v[1:2]
+; GFX8-NEXT:    v_mad_u64_u32 v[1:2], s[0:1], s11, v3, v[1:2]
 ; GFX8-NEXT:    v_sub_u32_e32 v2, vcc, s8, v0
 ; GFX8-NEXT:    v_subb_u32_e64 v6, s[0:1], v6, v1, vcc
 ; GFX8-NEXT:    v_sub_u32_e64 v0, s[0:1], s9, v1
@@ -218,8 +203,8 @@ define amdgpu_kernel void @udivrem_i64(ptr addrspace(1) %out0, ptr addrspace(1)
 ; GFX8-NEXT:    v_cndmask_b32_e64 v1, v1, v7, s[0:1]
 ; GFX8-NEXT:    v_subrev_u32_e32 v7, vcc, s10, v2
 ; GFX8-NEXT:    v_subbrev_u32_e64 v8, s[0:1], 0, v0, vcc
-; GFX8-NEXT:    v_add_u32_e64 v9, s[0:1], 1, v4
-; GFX8-NEXT:    v_addc_u32_e64 v10, s[0:1], 0, v3, s[0:1]
+; GFX8-NEXT:    v_add_u32_e64 v9, s[0:1], 1, v3
+; GFX8-NEXT:    v_addc_u32_e64 v10, s[0:1], 0, v4, s[0:1]
 ; GFX8-NEXT:    v_cmp_le_u32_e64 s[0:1], s11, v8
 ; GFX8-NEXT:    v_cndmask_b32_e64 v11, 0, -1, s[0:1]
 ; GFX8-NEXT:    v_cmp_le_u32_e64 s[0:1], s10, v7
@@ -235,8 +220,8 @@ define amdgpu_kernel void @udivrem_i64(ptr addrspace(1) %out0, ptr addrspace(1)
 ; GFX8-NEXT:    v_cndmask_b32_e32 v0, v9, v12, vcc
 ; GFX8-NEXT:    v_cndmask_b32_e32 v9, v10, v13, vcc
 ; GFX8-NEXT:    v_cmp_ne_u32_e64 s[0:1], 0, v1
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, v4, v0, s[0:1]
-; GFX8-NEXT:    v_cndmask_b32_e64 v1, v3, v9, s[0:1]
+; GFX8-NEXT:    v_cndmask_b32_e64 v0, v3, v0, s[0:1]
+; GFX8-NEXT:    v_cndmask_b32_e64 v1, v4, v9, s[0:1]
 ; GFX8-NEXT:    v_cndmask_b32_e32 v3, v7, v5, vcc
 ; GFX8-NEXT:    v_cndmask_b32_e32 v4, v8, v14, vcc
 ; GFX8-NEXT:    v_cndmask_b32_e64 v2, v2, v3, s[0:1]
@@ -255,8 +240,8 @@ define amdgpu_kernel void @udivrem_i64(ptr addrspace(1) %out0, ptr addrspace(1)
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    v_cvt_f32_u32_e32 v0, s11
 ; GFX9-NEXT:    v_cvt_f32_u32_e32 v1, s10
-; GFX9-NEXT:    s_sub_u32 s2, 0, s10
-; GFX9-NEXT:    s_subb_u32 s3, 0, s11
+; GFX9-NEXT:    s_sub_u32 s14, 0, s10
+; GFX9-NEXT:    s_subb_u32 s15, 0, s11
 ; GFX9-NEXT:    v_mul_f32_e32 v0, 0x4f800000, v0
 ; GFX9-NEXT:    v_add_f32_e32 v0, v0, v1
 ; GFX9-NEXT:    v_rcp_iflag_f32_e32 v0, v0
@@ -267,10 +252,10 @@ define amdgpu_kernel void @udivrem_i64(ptr addrspace(1) %out0, ptr addrspace(1)
 ; GFX9-NEXT:    v_add_f32_e32 v0, v1, v0
 ; GFX9-NEXT:    v_cvt_u32_f32_e32 v3, v0
 ; GFX9-NEXT:    v_cvt_u32_f32_e32 v4, v2
-; GFX9-NEXT:    v_mad_u64_u32 v[0:1], s[0:1], s2, v3, 0
-; GFX9-NEXT:    v_mad_u64_u32 v[1:2], s[0:1], s2, v4, v[1:2]
+; GFX9-NEXT:    v_mad_u64_u32 v[0:1], s[0:1], s14, v3, 0
+; GFX9-NEXT:    v_mad_u64_u32 v[1:2], s[0:1], s14, v4, v[1:2]
 ; GFX9-NEXT:    v_mul_hi_u32 v5, v3, v0
-; GFX9-NEXT:    v_mad_u64_u32 v[1:2], s[0:1], s3, v3, v[1:2]
+; GFX9-NEXT:    v_mad_u64_u32 v[1:2], s[0:1], s15, v3, v[1:2]
 ; GFX9-NEXT:    v_mul_lo_u32 v2, v4, v0
 ; GFX9-NEXT:    v_mul_hi_u32 v0, v4, v0
 ; GFX9-NEXT:    v_mul_lo_u32 v6, v3, v1
@@ -278,71 +263,59 @@ define amdgpu_kernel void @udivrem_i64(ptr addrspace(1) %out0, ptr addrspace(1)
 ; GFX9-NEXT:    v_mul_hi_u32 v8, v3, v1
 ; GFX9-NEXT:    v_mul_hi_u32 v1, v4, v1
 ; GFX9-NEXT:    v_add_co_u32_e32 v2, vcc, v2, v6
-; GFX9-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
-; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, v7, v0
-; GFX9-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
-; GFX9-NEXT:    v_add_co_u32_e32 v2, vcc, v2, v5
-; GFX9-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v8
-; GFX9-NEXT:    v_add_u32_e32 v2, v6, v2
-; GFX9-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
-; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
-; GFX9-NEXT:    v_add_u32_e32 v5, v7, v5
-; GFX9-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; GFX9-NEXT:    v_add3_u32 v1, v5, v2, v1
+; GFX9-NEXT:    v_addc_co_u32_e32 v2, vcc, v2, v5, vcc
+; GFX9-NEXT:    v_add_co_u32_e64 v0, s[0:1], v7, v0
+; GFX9-NEXT:    v_addc_co_u32_e64 v0, s[2:3], v0, v8, s[0:1]
+; GFX9-NEXT:    v_addc_co_u32_e64 v2, s[12:13], 0, 0, vcc
+; GFX9-NEXT:    v_addc_co_u32_e64 v5, s[2:3], 0, 0, s[2:3]
+; GFX9-NEXT:    v_addc_co_u32_e64 v0, vcc, v0, v2, s[0:1]
+; GFX9-NEXT:    v_addc_co_u32_e32 v2, vcc, 0, v5, vcc
+; GFX9-NEXT:    v_add_u32_e32 v1, v1, v2
 ; GFX9-NEXT:    v_add_co_u32_e32 v3, vcc, v3, v0
 ; GFX9-NEXT:    v_addc_co_u32_e32 v4, vcc, v4, v1, vcc
-; GFX9-NEXT:    v_mad_u64_u32 v[0:1], s[0:1], s2, v3, 0
-; GFX9-NEXT:    v_mov_b32_e32 v7, s11
-; GFX9-NEXT:    v_mad_u64_u32 v[1:2], s[0:1], s2, v4, v[1:2]
+; GFX9-NEXT:    v_mad_u64_u32 v[0:1], s[0:1], s14, v3, 0
+; GFX9-NEXT:    v_mov_b32_e32 v7, 0
+; GFX9-NEXT:    v_mad_u64_u32 v[1:2], s[0:1], s14, v4, v[1:2]
 ; GFX9-NEXT:    v_mul_hi_u32 v6, v3, v0
-; GFX9-NEXT:    v_mad_u64_u32 v[1:2], s[0:1], s3, v3, v[1:2]
+; GFX9-NEXT:    v_mad_u64_u32 v[1:2], s[0:1], s15, v3, v[1:2]
 ; GFX9-NEXT:    v_mul_lo_u32 v2, v4, v0
 ; GFX9-NEXT:    v_mul_hi_u32 v0, v4, v0
 ; GFX9-NEXT:    v_mul_lo_u32 v5, v3, v1
 ; GFX9-NEXT:    v_add_co_u32_e32 v2, vcc, v2, v5
-; GFX9-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
-; GFX9-NEXT:    v_add_co_u32_e32 v2, vcc, v2, v6
-; GFX9-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; GFX9-NEXT:    v_mul_lo_u32 v6, v4, v1
-; GFX9-NEXT:    v_add_u32_e32 v2, v5, v2
-; GFX9-NEXT:    v_mul_hi_u32 v5, v3, v1
+; GFX9-NEXT:    v_addc_co_u32_e32 v2, vcc, v2, v6, vcc
+; GFX9-NEXT:    v_mul_lo_u32 v2, v4, v1
+; GFX9-NEXT:    v_mul_hi_u32 v6, v3, v1
+; GFX9-NEXT:    v_addc_co_u32_e64 v5, s[0:1], 0, 0, vcc
+; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, v2, v0
 ; GFX9-NEXT:    v_mul_hi_u32 v1, v4, v1
-; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, v6, v0
-; GFX9-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
-; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v5
-; GFX9-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
-; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
-; GFX9-NEXT:    v_add_u32_e32 v5, v6, v5
-; GFX9-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; GFX9-NEXT:    v_add3_u32 v1, v5, v2, v1
+; GFX9-NEXT:    v_addc_co_u32_e64 v0, s[0:1], v0, v6, vcc
+; GFX9-NEXT:    v_addc_co_u32_e64 v2, s[0:1], 0, 0, s[0:1]
+; GFX9-NEXT:    v_addc_co_u32_e32 v0, vcc, v0, v5, vcc
+; GFX9-NEXT:    v_addc_co_u32_e32 v2, vcc, 0, v2, vcc
+; GFX9-NEXT:    v_add_u32_e32 v1, v1, v2
 ; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, v3, v0
 ; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, v4, v1, vcc
 ; GFX9-NEXT:    v_mul_lo_u32 v2, s9, v0
 ; GFX9-NEXT:    v_mul_lo_u32 v3, s8, v1
 ; GFX9-NEXT:    v_mul_hi_u32 v4, s8, v0
 ; GFX9-NEXT:    v_mul_hi_u32 v0, s9, v0
-; GFX9-NEXT:    v_mul_hi_u32 v6, s9, v1
-; GFX9-NEXT:    v_add_co_u32_e32 v2, vcc, v2, v3
-; GFX9-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc
-; GFX9-NEXT:    v_add_co_u32_e32 v2, vcc, v2, v4
-; GFX9-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; GFX9-NEXT:    v_mul_lo_u32 v4, s9, v1
-; GFX9-NEXT:    v_add_u32_e32 v2, v3, v2
-; GFX9-NEXT:    v_mul_hi_u32 v3, s8, v1
-; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, v4, v0
-; GFX9-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
-; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v3
-; GFX9-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc
-; GFX9-NEXT:    v_add_co_u32_e32 v5, vcc, v0, v2
-; GFX9-NEXT:    v_mad_u64_u32 v[0:1], s[0:1], s10, v5, 0
-; GFX9-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; GFX9-NEXT:    v_add_u32_e32 v3, v4, v3
-; GFX9-NEXT:    v_add3_u32 v3, v3, v2, v6
-; GFX9-NEXT:    v_mad_u64_u32 v[1:2], s[0:1], s10, v3, v[1:2]
 ; GFX9-NEXT:    v_mov_b32_e32 v6, s9
-; GFX9-NEXT:    v_mov_b32_e32 v4, 0
-; GFX9-NEXT:    v_mad_u64_u32 v[1:2], s[0:1], s11, v5, v[1:2]
+; GFX9-NEXT:    v_add_co_u32_e32 v2, vcc, v2, v3
+; GFX9-NEXT:    v_addc_co_u32_e32 v2, vcc, v2, v4, vcc
+; GFX9-NEXT:    v_mul_lo_u32 v2, s9, v1
+; GFX9-NEXT:    v_mul_hi_u32 v4, s8, v1
+; GFX9-NEXT:    v_addc_co_u32_e64 v3, s[0:1], 0, 0, vcc
+; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, v2, v0
+; GFX9-NEXT:    v_addc_co_u32_e64 v0, s[0:1], v0, v4, vcc
+; GFX9-NEXT:    v_addc_co_u32_e32 v3, vcc, v0, v3, vcc
+; GFX9-NEXT:    v_mul_hi_u32 v4, s9, v1
+; GFX9-NEXT:    v_addc_co_u32_e64 v2, s[0:1], 0, 0, s[0:1]
+; GFX9-NEXT:    v_mad_u64_u32 v[0:1], s[0:1], s10, v3, 0
+; GFX9-NEXT:    v_addc_co_u32_e32 v2, vcc, 0, v2, vcc
+; GFX9-NEXT:    v_add_u32_e32 v4, v4, v2
+; GFX9-NEXT:    v_mad_u64_u32 v[1:2], s[0:1], s10, v4, v[1:2]
+; GFX9-NEXT:    v_mov_b32_e32 v5, s11
+; GFX9-NEXT:    v_mad_u64_u32 v[1:2], s[0:1], s11, v3, v[1:2]
 ; GFX9-NEXT:    v_sub_co_u32_e32 v2, vcc, s8, v0
 ; GFX9-NEXT:    v_subb_co_u32_e64 v6, s[0:1], v6, v1, vcc
 ; GFX9-NEXT:    v_cmp_le_u32_e64 s[0:1], s11, v6
@@ -351,19 +324,19 @@ define amdgpu_kernel void @udivrem_i64(ptr addrspace(1) %out0, ptr addrspace(1)
 ; GFX9-NEXT:    v_cmp_le_u32_e64 s[0:1], s10, v2
 ; GFX9-NEXT:    v_cndmask_b32_e64 v8, 0, -1, s[0:1]
 ; GFX9-NEXT:    v_cmp_eq_u32_e64 s[0:1], s11, v6
-; GFX9-NEXT:    v_subb_co_u32_e32 v0, vcc, v0, v7, vcc
+; GFX9-NEXT:    v_subb_co_u32_e32 v0, vcc, v0, v5, vcc
 ; GFX9-NEXT:    v_cndmask_b32_e64 v1, v1, v8, s[0:1]
 ; GFX9-NEXT:    v_subrev_co_u32_e32 v8, vcc, s10, v2
 ; GFX9-NEXT:    v_subbrev_co_u32_e64 v9, s[0:1], 0, v0, vcc
-; GFX9-NEXT:    v_add_co_u32_e64 v10, s[0:1], 1, v5
-; GFX9-NEXT:    v_addc_co_u32_e64 v11, s[0:1], 0, v3, s[0:1]
+; GFX9-NEXT:    v_add_co_u32_e64 v10, s[0:1], 1, v3
+; GFX9-NEXT:    v_addc_co_u32_e64 v11, s[0:1], 0, v4, s[0:1]
 ; GFX9-NEXT:    v_cmp_le_u32_e64 s[0:1], s11, v9
 ; GFX9-NEXT:    v_cndmask_b32_e64 v12, 0, -1, s[0:1]
 ; GFX9-NEXT:    v_cmp_le_u32_e64 s[0:1], s10, v8
-; GFX9-NEXT:    v_subb_co_u32_e32 v0, vcc, v0, v7, vcc
+; GFX9-NEXT:    v_subb_co_u32_e32 v0, vcc, v0, v5, vcc
 ; GFX9-NEXT:    v_cndmask_b32_e64 v13, 0, -1, s[0:1]
 ; GFX9-NEXT:    v_cmp_eq_u32_e64 s[0:1], s11, v9
-; GFX9-NEXT:    v_subrev_co_u32_e32 v7, vcc, s10, v8
+; GFX9-NEXT:    v_subrev_co_u32_e32 v5, vcc, s10, v8
 ; GFX9-NEXT:    v_cndmask_b32_e64 v12, v12, v13, s[0:1]
 ; GFX9-NEXT:    v_add_co_u32_e64 v13, s[0:1], 1, v10
 ; GFX9-NEXT:    v_subbrev_co_u32_e32 v15, vcc, 0, v0, vcc
@@ -372,14 +345,14 @@ define amdgpu_kernel void @udivrem_i64(ptr addrspace(1) %out0, ptr addrspace(1)
 ; GFX9-NEXT:    v_cndmask_b32_e32 v0, v10, v13, vcc
 ; GFX9-NEXT:    v_cndmask_b32_e32 v10, v11, v14, vcc
 ; GFX9-NEXT:    v_cmp_ne_u32_e64 s[0:1], 0, v1
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, v5, v0, s[0:1]
-; GFX9-NEXT:    v_cndmask_b32_e64 v1, v3, v10, s[0:1]
-; GFX9-NEXT:    v_cndmask_b32_e32 v3, v8, v7, vcc
-; GFX9-NEXT:    v_cndmask_b32_e32 v5, v9, v15, vcc
+; GFX9-NEXT:    v_cndmask_b32_e64 v0, v3, v0, s[0:1]
+; GFX9-NEXT:    v_cndmask_b32_e64 v1, v4, v10, s[0:1]
+; GFX9-NEXT:    v_cndmask_b32_e32 v3, v8, v5, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v4, v9, v15, vcc
 ; GFX9-NEXT:    v_cndmask_b32_e64 v2, v2, v3, s[0:1]
-; GFX9-NEXT:    v_cndmask_b32_e64 v3, v6, v5, s[0:1]
-; GFX9-NEXT:    global_store_dwordx2 v4, v[0:1], s[4:5]
-; GFX9-NEXT:    global_store_dwordx2 v4, v[2:3], s[6:7]
+; GFX9-NEXT:    v_cndmask_b32_e64 v3, v6, v4, s[0:1]
+; GFX9-NEXT:    global_store_dwordx2 v7, v[0:1], s[4:5]
+; GFX9-NEXT:    global_store_dwordx2 v7, v[2:3], s[6:7]
 ; GFX9-NEXT:    s_endpgm
 ;
 ; GFX10-LABEL: udivrem_i64:
@@ -388,7 +361,8 @@ define amdgpu_kernel void @udivrem_i64(ptr addrspace(1) %out0, ptr addrspace(1)
 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX10-NEXT:    v_cvt_f32_u32_e32 v0, s11
 ; GFX10-NEXT:    v_cvt_f32_u32_e32 v1, s10
-; GFX10-NEXT:    s_sub_u32 s0, 0, s10
+; GFX10-NEXT:    s_sub_u32 s2, 0, s10
+; GFX10-NEXT:    s_subb_u32 s3, 0, s11
 ; GFX10-NEXT:    v_mul_f32_e32 v0, 0x4f800000, v0
 ; GFX10-NEXT:    v_add_f32_e32 v0, v0, v1
 ; GFX10-NEXT:    v_rcp_iflag_f32_e32 v0, v0
@@ -399,82 +373,69 @@ define amdgpu_kernel void @udivrem_i64(ptr addrspace(1) %out0, ptr addrspace(1)
 ; GFX10-NEXT:    v_cvt_u32_f32_e32 v4, v2
 ; GFX10-NEXT:    v_add_f32_e32 v0, v1, v0
 ; GFX10-NEXT:    v_cvt_u32_f32_e32 v3, v0
-; GFX10-NEXT:    v_mad_u64_u32 v[0:1], s1, s0, v3, 0
-; GFX10-NEXT:    v_mad_u64_u32 v[1:2], s1, s0, v4, v[1:2]
-; GFX10-NEXT:    s_subb_u32 s1, 0, s11
-; GFX10-NEXT:    v_mul_hi_u32 v6, v4, v0
-; GFX10-NEXT:    v_mad_u64_u32 v[1:2], s2, s1, v3, v[1:2]
+; GFX10-NEXT:    v_mad_u64_u32 v[0:1], s0, s2, v3, 0
+; GFX10-NEXT:    v_mad_u64_u32 v[1:2], s0, s2, v4, v[1:2]
+; GFX10-NEXT:    v_mul_hi_u32 v6, v3, v0
+; GFX10-NEXT:    v_mad_u64_u32 v[1:2], s0, s3, v3, v[1:2]
 ; GFX10-NEXT:    v_mul_lo_u32 v2, v4, v0
-; GFX10-NEXT:    v_mul_hi_u32 v0, v3, v0
+; GFX10-NEXT:    v_mul_hi_u32 v0, v4, v0
 ; GFX10-NEXT:    v_mul_lo_u32 v5, v3, v1
 ; GFX10-NEXT:    v_mul_lo_u32 v7, v4, v1
 ; GFX10-NEXT:    v_mul_hi_u32 v8, v3, v1
 ; GFX10-NEXT:    v_mul_hi_u32 v1, v4, v1
-; GFX10-NEXT:    v_add_co_u32 v2, s2, v2, v5
-; GFX10-NEXT:    v_cndmask_b32_e64 v5, 0, 1, s2
-; GFX10-NEXT:    v_add_co_u32 v6, s2, v7, v6
-; GFX10-NEXT:    v_cndmask_b32_e64 v7, 0, 1, s2
-; GFX10-NEXT:    v_add_co_u32 v0, s2, v2, v0
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s2
-; GFX10-NEXT:    v_add_co_u32 v2, s2, v6, v8
-; GFX10-NEXT:    v_cndmask_b32_e64 v6, 0, 1, s2
-; GFX10-NEXT:    v_add_nc_u32_e32 v0, v5, v0
-; GFX10-NEXT:    v_add_nc_u32_e32 v5, v7, v6
-; GFX10-NEXT:    v_add_co_u32 v0, s2, v2, v0
-; GFX10-NEXT:    v_cndmask_b32_e64 v2, 0, 1, s2
+; GFX10-NEXT:    v_add_co_u32 v2, vcc_lo, v2, v5
+; GFX10-NEXT:    v_add_co_u32 v0, s0, v7, v0
+; GFX10-NEXT:    v_add_co_ci_u32_e32 v2, vcc_lo, v2, v6, vcc_lo
+; GFX10-NEXT:    v_add_co_ci_u32_e64 v0, s1, v0, v8, s0
+; GFX10-NEXT:    v_add_co_ci_u32_e64 v2, s12, 0, 0, vcc_lo
+; GFX10-NEXT:    v_add_co_ci_u32_e64 v5, s1, 0, 0, s1
+; GFX10-NEXT:    v_add_co_ci_u32_e64 v0, vcc_lo, v0, v2, s0
+; GFX10-NEXT:    v_add_co_ci_u32_e32 v2, vcc_lo, 0, v5, vcc_lo
 ; GFX10-NEXT:    v_add_co_u32 v3, vcc_lo, v3, v0
-; GFX10-NEXT:    v_add3_u32 v1, v5, v2, v1
+; GFX10-NEXT:    v_add_nc_u32_e32 v1, v1, v2
 ; GFX10-NEXT:    v_add_co_ci_u32_e32 v4, vcc_lo, v4, v1, vcc_lo
-; GFX10-NEXT:    v_mad_u64_u32 v[0:1], s2, s0, v3, 0
-; GFX10-NEXT:    v_mad_u64_u32 v[1:2], s0, s0, v4, v[1:2]
-; GFX10-NEXT:    v_mul_hi_u32 v6, v4, v0
-; GFX10-NEXT:    v_mad_u64_u32 v[1:2], s0, s1, v3, v[1:2]
+; GFX10-NEXT:    v_mad_u64_u32 v[0:1], s0, s2, v3, 0
+; GFX10-NEXT:    v_mad_u64_u32 v[1:2], s0, s2, v4, v[1:2]
+; GFX10-NEXT:    v_mul_hi_u32 v6, v3, v0
+; GFX10-NEXT:    v_mad_u64_u32 v[1:2], s0, s3, v3, v[1:2]
 ; GFX10-NEXT:    v_mul_lo_u32 v2, v4, v0
-; GFX10-NEXT:    v_mul_hi_u32 v0, v3, v0
+; GFX10-NEXT:    v_mul_hi_u32 v0, v4, v0
 ; GFX10-NEXT:    v_mul_lo_u32 v5, v3, v1
 ; GFX10-NEXT:    v_mul_lo_u32 v7, v4, v1
 ; GFX10-NEXT:    v_mul_hi_u32 v8, v3, v1
 ; GFX10-NEXT:    v_mul_hi_u32 v1, v4, v1
-; GFX10-NEXT:    v_add_co_u32 v2, s0, v2, v5
-; GFX10-NEXT:    v_cndmask_b32_e64 v5, 0, 1, s0
-; GFX10-NEXT:    v_add_co_u32 v6, s0, v7, v6
-; GFX10-NEXT:    v_cndmask_b32_e64 v7, 0, 1, s0
-; GFX10-NEXT:    v_add_co_u32 v0, s0, v2, v0
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
-; GFX10-NEXT:    v_add_co_u32 v2, s0, v6, v8
-; GFX10-NEXT:    v_cndmask_b32_e64 v6, 0, 1, s0
-; GFX10-NEXT:    v_add_nc_u32_e32 v0, v5, v0
-; GFX10-NEXT:    v_add_nc_u32_e32 v5, v7, v6
-; GFX10-NEXT:    v_add_co_u32 v0, s0, v2, v0
-; GFX10-NEXT:    v_cndmask_b32_e64 v2, 0, 1, s0
+; GFX10-NEXT:    v_add_co_u32 v2, vcc_lo, v2, v5
+; GFX10-NEXT:    v_add_co_u32 v0, s0, v7, v0
+; GFX10-NEXT:    v_add_co_ci_u32_e32 v2, vcc_lo, v2, v6, vcc_lo
+; GFX10-NEXT:    v_add_co_ci_u32_e64 v0, s1, v0, v8, s0
+; GFX10-NEXT:    v_add_co_ci_u32_e64 v2, s2, 0, 0, vcc_lo
+; GFX10-NEXT:    v_add_co_ci_u32_e64 v5, s1, 0, 0, s1
+; GFX10-NEXT:    v_add_co_ci_u32_e64 v0, vcc_lo, v0, v2, s0
+; GFX10-NEXT:    v_add_co_ci_u32_e32 v2, vcc_lo, 0, v5, vcc_lo
 ; GFX10-NEXT:    v_add_co_u32 v0, vcc_lo, v3, v0
-; GFX10-NEXT:    v_add3_u32 v1, v5, v2, v1
+; GFX10-NEXT:    v_add_nc_u32_e32 v1, v1, v2
 ; GFX10-NEXT:    v_mul_lo_u32 v2, s9, v0
 ; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, v4, v1, vcc_lo
 ; GFX10-NEXT:    v_mul_hi_u32 v4, s8, v0
 ; GFX10-NEXT:    v_mul_hi_u32 v0, s9, v0
 ; GFX10-NEXT:    v_mul_lo_u32 v3, s8, v1
 ; GFX10-NEXT:    v_mul_lo_u32 v5, s9, v1
-; GFX10-NEXT:    v_add_co_u32 v2, s0, v2, v3
-; GFX10-NEXT:    v_mul_hi_u32 v3, s8, v1
-; GFX10-NEXT:    v_cndmask_b32_e64 v6, 0, 1, s0
-; GFX10-NEXT:    v_add_co_u32 v2, s0, v2, v4
-; GFX10-NEXT:    v_cndmask_b32_e64 v2, 0, 1, s0
+; GFX10-NEXT:    v_mul_hi_u32 v6, s8, v1
+; GFX10-NEXT:    v_add_co_u32 v2, vcc_lo, v2, v3
 ; GFX10-NEXT:    v_add_co_u32 v0, s0, v5, v0
-; GFX10-NEXT:    v_cndmask_b32_e64 v4, 0, 1, s0
-; GFX10-NEXT:    v_add_nc_u32_e32 v2, v6, v2
-; GFX10-NEXT:    v_add_co_u32 v0, s0, v0, v3
-; GFX10-NEXT:    v_cndmask_b32_e64 v3, 0, 1, s0
-; GFX10-NEXT:    v_add_co_u32 v5, s0, v0, v2
+; GFX10-NEXT:    v_add_co_ci_u32_e32 v2, vcc_lo, v2, v4, vcc_lo
+; GFX10-NEXT:    v_add_co_ci_u32_e64 v0, s1, v0, v6, s0
+; GFX10-NEXT:    v_add_co_ci_u32_e64 v2, s2, 0, 0, vcc_lo
+; GFX10-NEXT:    v_add_co_ci_u32_e64 v3, s1, 0, 0, s1
+; GFX10-NEXT:    v_add_co_ci_u32_e64 v4, vcc_lo, v0, v2, s0
 ; GFX10-NEXT:    v_mul_hi_u32 v2, s9, v1
-; GFX10-NEXT:    v_cndmask_b32_e64 v6, 0, 1, s0
-; GFX10-NEXT:    v_add_nc_u32_e32 v3, v4, v3
-; GFX10-NEXT:    v_mad_u64_u32 v[0:1], s0, s10, v5, 0
-; GFX10-NEXT:    v_add3_u32 v3, v3, v6, v2
+; GFX10-NEXT:    v_add_co_ci_u32_e32 v3, vcc_lo, 0, v3, vcc_lo
+; GFX10-NEXT:    v_mad_u64_u32 v[0:1], s0, s10, v4, 0
+; GFX10-NEXT:    v_add_nc_u32_e32 v3, v2, v3
 ; GFX10-NEXT:    v_mad_u64_u32 v[1:2], s0, s10, v3, v[1:2]
-; GFX10-NEXT:    v_mad_u64_u32 v[1:2], s0, s11, v5, v[1:2]
-; GFX10-NEXT:    v_add_co_u32 v2, vcc_lo, v5, 1
-; GFX10-NEXT:    v_add_co_ci_u32_e32 v4, vcc_lo, 0, v3, vcc_lo
+; GFX10-NEXT:    v_mad_u64_u32 v[1:2], s0, s11, v4, v[1:2]
+; GFX10-NEXT:    v_add_co_u32 v2, vcc_lo, v4, 1
+; GFX10-NEXT:    v_add_co_ci_u32_e32 v5, vcc_lo, 0, v3, vcc_lo
 ; GFX10-NEXT:    v_sub_co_u32 v7, vcc_lo, s8, v0
 ; GFX10-NEXT:    v_sub_nc_u32_e32 v6, s9, v1
 ; GFX10-NEXT:    v_sub_co_ci_u32_e64 v8, s0, s9, v1, vcc_lo
@@ -491,7 +452,7 @@ define amdgpu_kernel void @udivrem_i64(ptr addrspace(1) %out0, ptr addrspace(1)
 ; GFX10-NEXT:    v_cmp_le_u32_e64 s0, s11, v9
 ; GFX10-NEXT:    v_cndmask_b32_e64 v12, 0, -1, s0
 ; GFX10-NEXT:    v_add_co_u32 v13, s0, v2, 1
-; GFX10-NEXT:    v_add_co_ci_u32_e64 v14, s0, 0, v4, s0
+; GFX10-NEXT:    v_add_co_ci_u32_e64 v14, s0, 0, v5, s0
 ; GFX10-NEXT:    v_cmp_eq_u32_e64 s0, s11, v9
 ; GFX10-NEXT:    v_cndmask_b32_e64 v11, v12, v11, s0
 ; GFX10-NEXT:    v_cmp_eq_u32_e64 s0, s11, v8
@@ -500,13 +461,13 @@ define amdgpu_kernel void @udivrem_i64(ptr addrspace(1) %out0, ptr addrspace(1)
 ; GFX10-NEXT:    v_sub_co_u32 v10, s0, v6, s10
 ; GFX10-NEXT:    v_subrev_co_ci_u32_e64 v0, s0, 0, v0, s0
 ; GFX10-NEXT:    v_cndmask_b32_e32 v2, v2, v13, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e32 v4, v4, v14, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v5, v5, v14, vcc_lo
 ; GFX10-NEXT:    v_cmp_ne_u32_e64 s0, 0, v1
 ; GFX10-NEXT:    v_cndmask_b32_e32 v6, v6, v10, vcc_lo
 ; GFX10-NEXT:    v_cndmask_b32_e32 v9, v9, v0, vcc_lo
 ; GFX10-NEXT:    v_mov_b32_e32 v10, 0
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, v5, v2, s0
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v3, v4, s0
+; GFX10-NEXT:    v_cndmask_b32_e64 v0, v4, v2, s0
+; GFX10-NEXT:    v_cndmask_b32_e64 v1, v3, v5, s0
 ; GFX10-NEXT:    v_cndmask_b32_e64 v2, v7, v6, s0
 ; GFX10-NEXT:    v_cndmask_b32_e64 v3, v8, v9, s0
 ; GFX10-NEXT:    global_store_dwordx2 v10, v[0:1], s[4:5]
@@ -980,12 +941,11 @@ define amdgpu_kernel void @udivrem_v2i64(ptr addrspace(1) %out0, ptr addrspace(1
 ; GFX8-LABEL: udivrem_v2i64:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_load_dwordx4 s[12:15], s[4:5], 0x20
-; GFX8-NEXT:    s_load_dwordx8 s[4:11], s[4:5], 0x0
 ; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX8-NEXT:    v_cvt_f32_u32_e32 v0, s13
 ; GFX8-NEXT:    v_cvt_f32_u32_e32 v1, s12
-; GFX8-NEXT:    s_sub_u32 s2, 0, s12
-; GFX8-NEXT:    s_subb_u32 s3, 0, s13
+; GFX8-NEXT:    s_sub_u32 s8, 0, s12
+; GFX8-NEXT:    s_subb_u32 s9, 0, s13
 ; GFX8-NEXT:    v_mul_f32_e32 v0, 0x4f800000, v0
 ; GFX8-NEXT:    v_add_f32_e32 v0, v0, v1
 ; GFX8-NEXT:    v_rcp_iflag_f32_e32 v0, v0
@@ -996,10 +956,10 @@ define amdgpu_kernel void @udivrem_v2i64(ptr addrspace(1) %out0, ptr addrspace(1
 ; GFX8-NEXT:    v_add_f32_e32 v0, v1, v0
 ; GFX8-NEXT:    v_cvt_u32_f32_e32 v3, v0
 ; GFX8-NEXT:    v_cvt_u32_f32_e32 v4, v2
-; GFX8-NEXT:    v_mad_u64_u32 v[0:1], s[0:1], s2, v3, 0
-; GFX8-NEXT:    v_mad_u64_u32 v[1:2], s[0:1], s2, v4, v[1:2]
+; GFX8-NEXT:    v_mad_u64_u32 v[0:1], s[0:1], s8, v3, 0
+; GFX8-NEXT:    v_mad_u64_u32 v[1:2], s[0:1], s8, v4, v[1:2]
 ; GFX8-NEXT:    v_mul_hi_u32 v5, v3, v0
-; GFX8-NEXT:    v_mad_u64_u32 v[1:2], s[0:1], s3, v3, v[1:2]
+; GFX8-NEXT:    v_mad_u64_u32 v[1:2], s[0:1], s9, v3, v[1:2]
 ; GFX8-NEXT:    v_mul_lo_u32 v2, v4, v0
 ; GFX8-NEXT:    v_mul_hi_u32 v0, v4, v0
 ; GFX8-NEXT:    v_mul_lo_u32 v6, v3, v1
@@ -1007,257 +967,228 @@ define amdgpu_kernel void @udivrem_v2i64(ptr addrspace(1) %out0, ptr addrspace(1
 ; GFX8-NEXT:    v_mul_hi_u32 v8, v3, v1
 ; GFX8-NEXT:    v_mul_hi_u32 v1, v4, v1
 ; GFX8-NEXT:    v_add_u32_e32 v2, vcc, v2, v6
-; GFX8-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
-; GFX8-NEXT:    v_add_u32_e32 v0, vcc, v7, v0
-; GFX8-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
-; GFX8-NEXT:    v_add_u32_e32 v2, vcc, v2, v5
-; GFX8-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; GFX8-NEXT:    v_add_u32_e32 v0, vcc, v0, v8
-; GFX8-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
-; GFX8-NEXT:    v_add_u32_e32 v2, vcc, v6, v2
-; GFX8-NEXT:    v_add_u32_e32 v5, vcc, v7, v5
-; GFX8-NEXT:    v_add_u32_e32 v0, vcc, v0, v2
-; GFX8-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; GFX8-NEXT:    v_add_u32_e32 v2, vcc, v5, v2
+; GFX8-NEXT:    v_addc_u32_e32 v2, vcc, v2, v5, vcc
+; GFX8-NEXT:    v_add_u32_e64 v0, s[0:1], v7, v0
+; GFX8-NEXT:    v_addc_u32_e64 v0, s[2:3], v0, v8, s[0:1]
+; GFX8-NEXT:    v_addc_u32_e64 v2, s[6:7], 0, 0, vcc
+; GFX8-NEXT:    v_addc_u32_e64 v5, s[2:3], 0, 0, s[2:3]
+; GFX8-NEXT:    v_addc_u32_e64 v0, vcc, v0, v2, s[0:1]
+; GFX8-NEXT:    v_addc_u32_e32 v2, vcc, 0, v5, vcc
 ; GFX8-NEXT:    v_add_u32_e32 v1, vcc, v1, v2
 ; GFX8-NEXT:    v_add_u32_e32 v3, vcc, v3, v0
 ; GFX8-NEXT:    v_addc_u32_e32 v4, vcc, v4, v1, vcc
-; GFX8-NEXT:    v_mad_u64_u32 v[0:1], s[0:1], s2, v3, 0
-; GFX8-NEXT:    v_mad_u64_u32 v[1:2], s[0:1], s2, v4, v[1:2]
-; GFX8-NEXT:    v_mul_hi_u32 v6, v3, v0
+; GFX8-NEXT:    v_mad_u64_u32 v[0:1], s[0:1], s8, v3, 0
 ; GFX8-NEXT:    s_sub_u32 s2, 0, s14
-; GFX8-NEXT:    v_mad_u64_u32 v[1:2], s[0:1], s3, v3, v[1:2]
+; GFX8-NEXT:    s_subb_u32 s3, 0, s15
+; GFX8-NEXT:    v_mad_u64_u32 v[1:2], s[0:1], s8, v4, v[1:2]
+; GFX8-NEXT:    v_mul_hi_u32 v6, v3, v0
+; GFX8-NEXT:    v_mad_u64_u32 v[1:2], s[0:1], s9, v3, v[1:2]
 ; GFX8-NEXT:    v_mul_lo_u32 v2, v4, v0
 ; GFX8-NEXT:    v_mul_hi_u32 v0, v4, v0
 ; GFX8-NEXT:    v_mul_lo_u32 v5, v3, v1
-; GFX8-NEXT:    s_subb_u32 s3, 0, s15
+; GFX8-NEXT:    s_load_dwordx8 s[4:11], s[4:5], 0x0
 ; GFX8-NEXT:    v_add_u32_e32 v2, vcc, v2, v5
-; GFX8-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
-; GFX8-NEXT:    v_add_u32_e32 v2, vcc, v2, v6
-; GFX8-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; GFX8-NEXT:    v_mul_lo_u32 v6, v4, v1
-; GFX8-NEXT:    v_add_u32_e32 v2, vcc, v5, v2
-; GFX8-NEXT:    v_mul_hi_u32 v5, v3, v1
-; GFX8-NEXT:    v_add_u32_e32 v0, vcc, v6, v0
-; GFX8-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
-; GFX8-NEXT:    v_add_u32_e32 v0, vcc, v0, v5
-; GFX8-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
-; GFX8-NEXT:    v_add_u32_e32 v5, vcc, v6, v5
+; GFX8-NEXT:    v_addc_u32_e32 v2, vcc, v2, v6, vcc
+; GFX8-NEXT:    v_mul_lo_u32 v2, v4, v1
+; GFX8-NEXT:    v_mul_hi_u32 v6, v3, v1
+; GFX8-NEXT:    v_addc_u32_e64 v5, s[0:1], 0, 0, vcc
+; GFX8-NEXT:    v_add_u32_e32 v0, vcc, v2, v0
 ; GFX8-NEXT:    v_mul_hi_u32 v1, v4, v1
-; GFX8-NEXT:    v_add_u32_e32 v0, vcc, v0, v2
-; GFX8-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; GFX8-NEXT:    v_add_u32_e32 v2, vcc, v5, v2
+; GFX8-NEXT:    v_addc_u32_e64 v0, s[0:1], v0, v6, vcc
+; GFX8-NEXT:    v_addc_u32_e64 v2, s[0:1], 0, 0, s[0:1]
+; GFX8-NEXT:    v_addc_u32_e32 v0, vcc, v0, v5, vcc
+; GFX8-NEXT:    v_addc_u32_e32 v2, vcc, 0, v2, vcc
 ; GFX8-NEXT:    v_add_u32_e32 v1, vcc, v1, v2
 ; GFX8-NEXT:    v_add_u32_e32 v0, vcc, v3, v0
 ; GFX8-NEXT:    v_addc_u32_e32 v1, vcc, v4, v1, vcc
+; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX8-NEXT:    v_mul_lo_u32 v2, s9, v0
 ; GFX8-NEXT:    v_mul_lo_u32 v3, s8, v1
 ; GFX8-NEXT:    v_mul_hi_u32 v4, s8, v0
 ; GFX8-NEXT:    v_mul_hi_u32 v0, s9, v0
+; GFX8-NEXT:    v_mov_b32_e32 v5, s13
 ; GFX8-NEXT:    v_add_u32_e32 v2, vcc, v2, v3
-; GFX8-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc
-; GFX8-NEXT:    v_add_u32_e32 v2, vcc, v2, v4
-; GFX8-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; GFX8-NEXT:    v_mul_lo_u32 v4, s9, v1
-; GFX8-NEXT:    v_add_u32_e32 v2, vcc, v3, v2
-; GFX8-NEXT:    v_mul_hi_u32 v3, s8, v1
-; GFX8-NEXT:    v_add_u32_e32 v0, vcc, v4, v0
-; GFX8-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
-; GFX8-NEXT:    v_add_u32_e32 v0, vcc, v0, v3
-; GFX8-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc
-; GFX8-NEXT:    v_add_u32_e32 v3, vcc, v4, v3
-; GFX8-NEXT:    v_add_u32_e32 v6, vcc, v0, v2
-; GFX8-NEXT:    v_mul_hi_u32 v4, s9, v1
-; GFX8-NEXT:    v_mad_u64_u32 v[0:1], s[0:1], s12, v6, 0
-; GFX8-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; GFX8-NEXT:    v_add_u32_e32 v2, vcc, v3, v2
-; GFX8-NEXT:    v_add_u32_e32 v7, vcc, v4, v2
-; GFX8-NEXT:    v_mad_u64_u32 v[1:2], s[0:1], s12, v7, v[1:2]
-; GFX8-NEXT:    v_mov_b32_e32 v3, s9
-; GFX8-NEXT:    v_sub_u32_e32 v8, vcc, s8, v0
-; GFX8-NEXT:    v_mad_u64_u32 v[1:2], s[0:1], s13, v6, v[1:2]
-; GFX8-NEXT:    v_mov_b32_e32 v4, s13
-; GFX8-NEXT:    v_subb_u32_e64 v0, s[0:1], v3, v1, vcc
-; GFX8-NEXT:    v_sub_u32_e64 v1, s[0:1], s9, v1
+; GFX8-NEXT:    v_addc_u32_e32 v2, vcc, v2, v4, vcc
+; GFX8-NEXT:    v_mul_lo_u32 v2, s9, v1
+; GFX8-NEXT:    v_mul_hi_u32 v4, s8, v1
+; GFX8-NEXT:    v_addc_u32_e64 v3, s[0:1], 0, 0, vcc
+; GFX8-NEXT:    v_add_u32_e32 v0, vcc, v2, v0
+; GFX8-NEXT:    v_addc_u32_e64 v0, s[0:1], v0, v4, vcc
+; GFX8-NEXT:    v_addc_u32_e32 v7, vcc, v0, v3, vcc
+; GFX8-NEXT:    v_mul_hi_u32 v3, s9, v1
+; GFX8-NEXT:    v_addc_u32_e64 v2, s[0:1], 0, 0, s[0:1]
+; GFX8-NEXT:    v_mad_u64_u32 v[0:1], s[0:1], s12, v7, 0
+; GFX8-NEXT:    v_addc_u32_e32 v2, vcc, 0, v2, vcc
+; GFX8-NEXT:    v_add_u32_e32 v8, vcc, v3, v2
+; GFX8-NEXT:    v_mad_u64_u32 v[1:2], s[0:1], s12, v8, v[1:2]
+; GFX8-NEXT:    v_mov_b32_e32 v4, s9
+; GFX8-NEXT:    v_mad_u64_u32 v[2:3], s[0:1], s13, v7, v[1:2]
+; GFX8-NEXT:    v_sub_u32_e32 v1, vcc, s8, v0
+; GFX8-NEXT:    v_subb_u32_e64 v0, s[0:1], v4, v2, vcc
+; GFX8-NEXT:    v_sub_u32_e64 v2, s[0:1], s9, v2
 ; GFX8-NEXT:    v_cmp_le_u32_e64 s[0:1], s13, v0
-; GFX8-NEXT:    v_cndmask_b32_e64 v2, 0, -1, s[0:1]
-; GFX8-NEXT:    v_cmp_le_u32_e64 s[0:1], s12, v8
 ; GFX8-NEXT:    v_cndmask_b32_e64 v3, 0, -1, s[0:1]
+; GFX8-NEXT:    v_cmp_le_u32_e64 s[0:1], s12, v1
+; GFX8-NEXT:    v_cndmask_b32_e64 v4, 0, -1, s[0:1]
 ; GFX8-NEXT:    v_cmp_eq_u32_e64 s[0:1], s13, v0
-; GFX8-NEXT:    v_cndmask_b32_e64 v9, v2, v3, s[0:1]
-; GFX8-NEXT:    v_cvt_f32_u32_e32 v2, s15
-; GFX8-NEXT:    v_subb_u32_e32 v5, vcc, v1, v4, vcc
-; GFX8-NEXT:    v_cvt_f32_u32_e32 v1, s14
-; GFX8-NEXT:    v_mul_f32_e32 v2, 0x4f800000, v2
-; GFX8-NEXT:    v_subrev_u32_e32 v10, vcc, s12, v8
-; GFX8-NEXT:    v_add_f32_e32 v1, v2, v1
-; GFX8-NEXT:    v_rcp_iflag_f32_e32 v1, v1
-; GFX8-NEXT:    v_subbrev_u32_e64 v11, s[0:1], 0, v5, vcc
-; GFX8-NEXT:    v_add_u32_e64 v12, s[0:1], 1, v6
-; GFX8-NEXT:    v_mul_f32_e32 v1, 0x5f7ffffc, v1
-; GFX8-NEXT:    v_mul_f32_e32 v2, 0x2f800000, v1
-; GFX8-NEXT:    v_trunc_f32_e32 v14, v2
-; GFX8-NEXT:    v_mul_f32_e32 v2, 0xcf800000, v14
-; GFX8-NEXT:    v_add_f32_e32 v1, v2, v1
-; GFX8-NEXT:    v_cvt_u32_f32_e32 v15, v1
-; GFX8-NEXT:    v_addc_u32_e64 v13, s[0:1], 0, v7, s[0:1]
+; GFX8-NEXT:    v_cndmask_b32_e64 v9, v3, v4, s[0:1]
+; GFX8-NEXT:    v_cvt_f32_u32_e32 v3, s15
+; GFX8-NEXT:    v_subb_u32_e32 v6, vcc, v2, v5, vcc
+; GFX8-NEXT:    v_cvt_f32_u32_e32 v2, s14
+; GFX8-NEXT:    v_mul_f32_e32 v3, 0x4f800000, v3
+; GFX8-NEXT:    v_subrev_u32_e32 v10, vcc, s12, v1
+; GFX8-NEXT:    v_add_f32_e32 v2, v3, v2
+; GFX8-NEXT:    v_rcp_iflag_f32_e32 v2, v2
+; GFX8-NEXT:    v_subbrev_u32_e64 v11, s[0:1], 0, v6, vcc
+; GFX8-NEXT:    v_add_u32_e64 v12, s[0:1], 1, v7
+; GFX8-NEXT:    v_mul_f32_e32 v2, 0x5f7ffffc, v2
+; GFX8-NEXT:    v_mul_f32_e32 v3, 0x2f800000, v2
+; GFX8-NEXT:    v_trunc_f32_e32 v14, v3
+; GFX8-NEXT:    v_mul_f32_e32 v3, 0xcf800000, v14
+; GFX8-NEXT:    v_add_f32_e32 v2, v3, v2
+; GFX8-NEXT:    v_cvt_u32_f32_e32 v15, v2
+; GFX8-NEXT:    v_addc_u32_e64 v13, s[0:1], 0, v8, s[0:1]
 ; GFX8-NEXT:    v_cmp_le_u32_e64 s[0:1], s13, v11
-; GFX8-NEXT:    v_cndmask_b32_e64 v3, 0, -1, s[0:1]
+; GFX8-NEXT:    v_cndmask_b32_e64 v4, 0, -1, s[0:1]
 ; GFX8-NEXT:    v_cmp_le_u32_e64 s[0:1], s12, v10
 ; GFX8-NEXT:    v_cndmask_b32_e64 v16, 0, -1, s[0:1]
-; GFX8-NEXT:    v_mad_u64_u32 v[1:2], s[0:1], s2, v15, 0
+; GFX8-NEXT:    v_mad_u64_u32 v[2:3], s[0:1], s2, v15, 0
 ; GFX8-NEXT:    v_cvt_u32_f32_e32 v14, v14
 ; GFX8-NEXT:    v_cmp_eq_u32_e64 s[0:1], s13, v11
-; GFX8-NEXT:    v_cndmask_b32_e64 v16, v3, v16, s[0:1]
-; GFX8-NEXT:    v_mad_u64_u32 v[2:3], s[0:1], s2, v14, v[2:3]
+; GFX8-NEXT:    v_cndmask_b32_e64 v16, v4, v16, s[0:1]
+; GFX8-NEXT:    v_mad_u64_u32 v[3:4], s[0:1], s2, v14, v[3:4]
 ; GFX8-NEXT:    v_add_u32_e64 v17, s[0:1], 1, v12
 ; GFX8-NEXT:    v_addc_u32_e64 v18, s[0:1], 0, v13, s[0:1]
-; GFX8-NEXT:    v_mad_u64_u32 v[2:3], s[0:1], s3, v15, v[2:3]
-; GFX8-NEXT:    v_subb_u32_e32 v3, vcc, v5, v4, vcc
-; GFX8-NEXT:    v_mul_lo_u32 v4, v14, v1
-; GFX8-NEXT:    v_mul_lo_u32 v5, v15, v2
+; GFX8-NEXT:    v_mad_u64_u32 v[3:4], s[0:1], s3, v15, v[3:4]
+; GFX8-NEXT:    v_subb_u32_e32 v4, vcc, v6, v5, vcc
+; GFX8-NEXT:    v_mul_lo_u32 v5, v14, v2
+; GFX8-NEXT:    v_mul_lo_u32 v6, v15, v3
 ; GFX8-NEXT:    v_subrev_u32_e32 v19, vcc, s12, v10
-; GFX8-NEXT:    v_subbrev_u32_e32 v20, vcc, 0, v3, vcc
-; GFX8-NEXT:    v_mul_hi_u32 v3, v15, v1
-; GFX8-NEXT:    v_add_u32_e32 v4, vcc, v4, v5
-; GFX8-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
-; GFX8-NEXT:    v_add_u32_e32 v3, vcc, v4, v3
-; GFX8-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc
-; GFX8-NEXT:    v_mul_lo_u32 v4, v14, v2
-; GFX8-NEXT:    v_mul_hi_u32 v1, v14, v1
-; GFX8-NEXT:    v_add_u32_e32 v3, vcc, v5, v3
-; GFX8-NEXT:    v_mul_hi_u32 v5, v15, v2
-; GFX8-NEXT:    v_add_u32_e32 v1, vcc, v4, v1
-; GFX8-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
-; GFX8-NEXT:    v_add_u32_e32 v1, vcc, v1, v5
-; GFX8-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
-; GFX8-NEXT:    v_add_u32_e32 v4, vcc, v4, v5
+; GFX8-NEXT:    v_subbrev_u32_e32 v20, vcc, 0, v4, vcc
+; GFX8-NEXT:    v_mul_hi_u32 v4, v15, v2
+; GFX8-NEXT:    v_add_u32_e32 v5, vcc, v5, v6
 ; GFX8-NEXT:    v_mul_hi_u32 v2, v14, v2
-; GFX8-NEXT:    v_add_u32_e32 v1, vcc, v1, v3
-; GFX8-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc
-; GFX8-NEXT:    v_add_u32_e32 v3, vcc, v4, v3
-; GFX8-NEXT:    v_add_u32_e32 v2, vcc, v2, v3
-; GFX8-NEXT:    v_add_u32_e32 v15, vcc, v15, v1
-; GFX8-NEXT:    v_mad_u64_u32 v[3:4], s[0:1], s2, v15, 0
-; GFX8-NEXT:    v_addc_u32_e32 v14, vcc, v14, v2, vcc
+; GFX8-NEXT:    v_addc_u32_e32 v4, vcc, v5, v4, vcc
+; GFX8-NEXT:    v_mul_lo_u32 v4, v14, v3
+; GFX8-NEXT:    v_mul_hi_u32 v6, v15, v3
+; GFX8-NEXT:    v_addc_u32_e64 v5, s[0:1], 0, 0, vcc
+; GFX8-NEXT:    v_add_u32_e32 v2, vcc, v4, v2
+; GFX8-NEXT:    v_mul_hi_u32 v3, v14, v3
+; GFX8-NEXT:    v_addc_u32_e64 v2, s[0:1], v2, v6, vcc
+; GFX8-NEXT:    v_addc_u32_e64 v4, s[0:1], 0, 0, s[0:1]
+; GFX8-NEXT:    v_addc_u32_e32 v2, vcc, v2, v5, vcc
+; GFX8-NEXT:    v_addc_u32_e32 v4, vcc, 0, v4, vcc
+; GFX8-NEXT:    v_add_u32_e32 v3, vcc, v3, v4
+; GFX8-NEXT:    v_add_u32_e32 v15, vcc, v15, v2
+; GFX8-NEXT:    v_mad_u64_u32 v[4:5], s[0:1], s2, v15, 0
+; GFX8-NEXT:    v_addc_u32_e32 v14, vcc, v14, v3, vcc
 ; GFX8-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v16
-; GFX8-NEXT:    v_cndmask_b32_e32 v2, v12, v17, vcc
-; GFX8-NEXT:    v_mov_b32_e32 v1, v4
-; GFX8-NEXT:    v_mad_u64_u32 v[4:5], s[0:1], s2, v14, v[1:2]
+; GFX8-NEXT:    v_cndmask_b32_e32 v3, v12, v17, vcc
+; GFX8-NEXT:    v_mov_b32_e32 v2, v5
+; GFX8-NEXT:    v_mad_u64_u32 v[5:6], s[0:1], s2, v14, v[2:3]
 ; GFX8-NEXT:    v_cndmask_b32_e32 v12, v13, v18, vcc
 ; GFX8-NEXT:    v_cmp_ne_u32_e64 s[0:1], 0, v9
-; GFX8-NEXT:    v_mad_u64_u32 v[4:5], s[2:3], s3, v15, v[4:5]
-; GFX8-NEXT:    v_cndmask_b32_e64 v1, v6, v2, s[0:1]
-; GFX8-NEXT:    v_cndmask_b32_e64 v2, v7, v12, s[0:1]
-; GFX8-NEXT:    v_cndmask_b32_e32 v5, v10, v19, vcc
-; GFX8-NEXT:    v_mul_lo_u32 v7, v14, v3
-; GFX8-NEXT:    v_mul_lo_u32 v9, v15, v4
-; GFX8-NEXT:    v_cndmask_b32_e64 v5, v8, v5, s[0:1]
-; GFX8-NEXT:    v_mul_hi_u32 v8, v15, v3
-; GFX8-NEXT:    v_cndmask_b32_e32 v6, v11, v20, vcc
-; GFX8-NEXT:    v_add_u32_e32 v7, vcc, v7, v9
-; GFX8-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
+; GFX8-NEXT:    v_mad_u64_u32 v[5:6], s[2:3], s3, v15, v[5:6]
+; GFX8-NEXT:    v_cndmask_b32_e64 v2, v7, v3, s[0:1]
+; GFX8-NEXT:    v_cndmask_b32_e64 v3, v8, v12, s[0:1]
+; GFX8-NEXT:    v_mul_lo_u32 v7, v14, v4
+; GFX8-NEXT:    v_mul_lo_u32 v8, v15, v5
+; GFX8-NEXT:    v_cndmask_b32_e32 v6, v10, v19, vcc
+; GFX8-NEXT:    v_mul_hi_u32 v10, v15, v4
+; GFX8-NEXT:    v_cndmask_b32_e32 v9, v11, v20, vcc
 ; GFX8-NEXT:    v_add_u32_e32 v7, vcc, v7, v8
-; GFX8-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
-; GFX8-NEXT:    v_mul_lo_u32 v8, v14, v4
-; GFX8-NEXT:    v_mul_hi_u32 v3, v14, v3
-; GFX8-NEXT:    v_add_u32_e32 v7, vcc, v9, v7
-; GFX8-NEXT:    v_mul_hi_u32 v9, v15, v4
-; GFX8-NEXT:    v_add_u32_e32 v3, vcc, v8, v3
-; GFX8-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
-; GFX8-NEXT:    v_add_u32_e32 v3, vcc, v3, v9
-; GFX8-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
-; GFX8-NEXT:    v_add_u32_e32 v8, vcc, v8, v9
+; GFX8-NEXT:    v_addc_u32_e32 v7, vcc, v7, v10, vcc
+; GFX8-NEXT:    v_mul_lo_u32 v7, v14, v5
 ; GFX8-NEXT:    v_mul_hi_u32 v4, v14, v4
-; GFX8-NEXT:    v_add_u32_e32 v3, vcc, v3, v7
-; GFX8-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
-; GFX8-NEXT:    v_add_u32_e32 v7, vcc, v8, v7
-; GFX8-NEXT:    v_add_u32_e32 v4, vcc, v4, v7
-; GFX8-NEXT:    v_add_u32_e32 v3, vcc, v15, v3
-; GFX8-NEXT:    v_addc_u32_e32 v4, vcc, v14, v4, vcc
-; GFX8-NEXT:    v_mul_lo_u32 v7, s11, v3
-; GFX8-NEXT:    v_mul_lo_u32 v8, s10, v4
-; GFX8-NEXT:    v_cndmask_b32_e64 v6, v0, v6, s[0:1]
-; GFX8-NEXT:    v_mul_hi_u32 v0, s10, v3
-; GFX8-NEXT:    v_mul_hi_u32 v3, s11, v3
-; GFX8-NEXT:    v_add_u32_e32 v7, vcc, v7, v8
-; GFX8-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
-; GFX8-NEXT:    v_add_u32_e32 v0, vcc, v7, v0
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
+; GFX8-NEXT:    v_mul_hi_u32 v10, v15, v5
+; GFX8-NEXT:    v_addc_u32_e64 v8, s[2:3], 0, 0, vcc
+; GFX8-NEXT:    v_add_u32_e32 v4, vcc, v7, v4
+; GFX8-NEXT:    v_mul_hi_u32 v5, v14, v5
+; GFX8-NEXT:    v_addc_u32_e64 v4, s[2:3], v4, v10, vcc
+; GFX8-NEXT:    v_addc_u32_e64 v7, s[2:3], 0, 0, s[2:3]
+; GFX8-NEXT:    v_addc_u32_e32 v4, vcc, v4, v8, vcc
+; GFX8-NEXT:    v_addc_u32_e32 v7, vcc, 0, v7, vcc
+; GFX8-NEXT:    v_add_u32_e32 v5, vcc, v5, v7
+; GFX8-NEXT:    v_add_u32_e32 v4, vcc, v15, v4
+; GFX8-NEXT:    v_addc_u32_e32 v5, vcc, v14, v5, vcc
 ; GFX8-NEXT:    v_mul_lo_u32 v7, s11, v4
-; GFX8-NEXT:    v_add_u32_e32 v0, vcc, v8, v0
-; GFX8-NEXT:    v_mul_hi_u32 v8, s10, v4
-; GFX8-NEXT:    v_add_u32_e32 v3, vcc, v7, v3
-; GFX8-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
-; GFX8-NEXT:    v_add_u32_e32 v3, vcc, v3, v8
-; GFX8-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
+; GFX8-NEXT:    v_mul_lo_u32 v8, s10, v5
+; GFX8-NEXT:    v_cndmask_b32_e64 v6, v1, v6, s[0:1]
+; GFX8-NEXT:    v_mul_hi_u32 v1, s10, v4
+; GFX8-NEXT:    v_mul_hi_u32 v4, s11, v4
 ; GFX8-NEXT:    v_add_u32_e32 v7, vcc, v7, v8
-; GFX8-NEXT:    v_add_u32_e32 v9, vcc, v3, v0
-; GFX8-NEXT:    v_mul_hi_u32 v8, s11, v4
-; GFX8-NEXT:    v_mad_u64_u32 v[3:4], s[0:1], s14, v9, 0
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
-; GFX8-NEXT:    v_add_u32_e32 v0, vcc, v7, v0
-; GFX8-NEXT:    v_add_u32_e32 v10, vcc, v8, v0
-; GFX8-NEXT:    v_mov_b32_e32 v0, v4
-; GFX8-NEXT:    v_mad_u64_u32 v[7:8], s[0:1], s14, v10, v[0:1]
-; GFX8-NEXT:    v_mov_b32_e32 v4, s11
-; GFX8-NEXT:    v_mov_b32_e32 v0, s15
-; GFX8-NEXT:    v_mad_u64_u32 v[7:8], s[0:1], s15, v9, v[7:8]
-; GFX8-NEXT:    v_sub_u32_e32 v8, vcc, s10, v3
-; GFX8-NEXT:    v_subb_u32_e64 v11, s[0:1], v4, v7, vcc
-; GFX8-NEXT:    v_sub_u32_e64 v3, s[0:1], s11, v7
-; GFX8-NEXT:    v_cmp_le_u32_e64 s[0:1], s15, v11
+; GFX8-NEXT:    v_addc_u32_e32 v1, vcc, v7, v1, vcc
+; GFX8-NEXT:    v_mul_lo_u32 v1, s11, v5
+; GFX8-NEXT:    v_mul_hi_u32 v8, s10, v5
+; GFX8-NEXT:    v_addc_u32_e64 v7, s[2:3], 0, 0, vcc
+; GFX8-NEXT:    v_add_u32_e32 v1, vcc, v1, v4
+; GFX8-NEXT:    v_addc_u32_e64 v1, s[2:3], v1, v8, vcc
+; GFX8-NEXT:    v_addc_u32_e64 v4, s[2:3], 0, 0, s[2:3]
+; GFX8-NEXT:    v_addc_u32_e32 v8, vcc, v1, v7, vcc
+; GFX8-NEXT:    v_addc_u32_e32 v1, vcc, 0, v4, vcc
+; GFX8-NEXT:    v_mul_hi_u32 v10, s11, v5
+; GFX8-NEXT:    v_mad_u64_u32 v[4:5], s[2:3], s14, v8, 0
+; GFX8-NEXT:    v_cndmask_b32_e64 v7, v0, v9, s[0:1]
+; GFX8-NEXT:    v_add_u32_e32 v9, vcc, v10, v1
+; GFX8-NEXT:    v_mov_b32_e32 v0, v5
+; GFX8-NEXT:    v_mad_u64_u32 v[0:1], s[0:1], s14, v9, v[0:1]
+; GFX8-NEXT:    v_mov_b32_e32 v10, s11
+; GFX8-NEXT:    v_mov_b32_e32 v5, s15
+; GFX8-NEXT:    v_mad_u64_u32 v[0:1], s[0:1], s15, v8, v[0:1]
+; GFX8-NEXT:    v_sub_u32_e32 v1, vcc, s10, v4
+; GFX8-NEXT:    v_subb_u32_e64 v10, s[0:1], v10, v0, vcc
+; GFX8-NEXT:    v_sub_u32_e64 v0, s[0:1], s11, v0
+; GFX8-NEXT:    v_cmp_le_u32_e64 s[0:1], s15, v10
 ; GFX8-NEXT:    v_cndmask_b32_e64 v4, 0, -1, s[0:1]
-; GFX8-NEXT:    v_cmp_le_u32_e64 s[0:1], s14, v8
-; GFX8-NEXT:    v_cndmask_b32_e64 v7, 0, -1, s[0:1]
-; GFX8-NEXT:    v_cmp_eq_u32_e64 s[0:1], s15, v11
-; GFX8-NEXT:    v_subb_u32_e32 v3, vcc, v3, v0, vcc
-; GFX8-NEXT:    v_cndmask_b32_e64 v4, v4, v7, s[0:1]
-; GFX8-NEXT:    v_subrev_u32_e32 v7, vcc, s14, v8
-; GFX8-NEXT:    v_subbrev_u32_e64 v12, s[0:1], 0, v3, vcc
-; GFX8-NEXT:    v_add_u32_e64 v13, s[0:1], 1, v9
-; GFX8-NEXT:    v_addc_u32_e64 v14, s[0:1], 0, v10, s[0:1]
+; GFX8-NEXT:    v_cmp_le_u32_e64 s[0:1], s14, v1
+; GFX8-NEXT:    v_cndmask_b32_e64 v11, 0, -1, s[0:1]
+; GFX8-NEXT:    v_cmp_eq_u32_e64 s[0:1], s15, v10
+; GFX8-NEXT:    v_subb_u32_e32 v0, vcc, v0, v5, vcc
+; GFX8-NEXT:    v_cndmask_b32_e64 v4, v4, v11, s[0:1]
+; GFX8-NEXT:    v_subrev_u32_e32 v11, vcc, s14, v1
+; GFX8-NEXT:    v_subbrev_u32_e64 v12, s[0:1], 0, v0, vcc
+; GFX8-NEXT:    v_add_u32_e64 v13, s[0:1], 1, v8
+; GFX8-NEXT:    v_addc_u32_e64 v14, s[0:1], 0, v9, s[0:1]
 ; GFX8-NEXT:    v_cmp_le_u32_e64 s[0:1], s15, v12
 ; GFX8-NEXT:    v_cndmask_b32_e64 v15, 0, -1, s[0:1]
-; GFX8-NEXT:    v_cmp_le_u32_e64 s[0:1], s14, v7
-; GFX8-NEXT:    v_subb_u32_e32 v0, vcc, v3, v0, vcc
+; GFX8-NEXT:    v_cmp_le_u32_e64 s[0:1], s14, v11
+; GFX8-NEXT:    v_subb_u32_e32 v0, vcc, v0, v5, vcc
 ; GFX8-NEXT:    v_cndmask_b32_e64 v16, 0, -1, s[0:1]
 ; GFX8-NEXT:    v_cmp_eq_u32_e64 s[0:1], s15, v12
-; GFX8-NEXT:    v_subrev_u32_e32 v18, vcc, s14, v7
+; GFX8-NEXT:    v_subrev_u32_e32 v18, vcc, s14, v11
 ; GFX8-NEXT:    v_cndmask_b32_e64 v15, v15, v16, s[0:1]
 ; GFX8-NEXT:    v_add_u32_e64 v16, s[0:1], 1, v13
 ; GFX8-NEXT:    v_subbrev_u32_e32 v0, vcc, 0, v0, vcc
 ; GFX8-NEXT:    v_addc_u32_e64 v17, s[0:1], 0, v14, s[0:1]
 ; GFX8-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v15
-; GFX8-NEXT:    v_cndmask_b32_e32 v3, v13, v16, vcc
-; GFX8-NEXT:    v_cndmask_b32_e32 v13, v14, v17, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v5, v13, v16, vcc
 ; GFX8-NEXT:    v_cmp_ne_u32_e64 s[0:1], 0, v4
-; GFX8-NEXT:    v_cndmask_b32_e64 v3, v9, v3, s[0:1]
-; GFX8-NEXT:    v_cndmask_b32_e64 v4, v10, v13, s[0:1]
-; GFX8-NEXT:    v_mov_b32_e32 v10, s5
-; GFX8-NEXT:    v_cndmask_b32_e32 v7, v7, v18, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v13, v14, v17, vcc
+; GFX8-NEXT:    v_cndmask_b32_e64 v4, v8, v5, s[0:1]
+; GFX8-NEXT:    v_cndmask_b32_e32 v8, v11, v18, vcc
 ; GFX8-NEXT:    v_cndmask_b32_e32 v0, v12, v0, vcc
-; GFX8-NEXT:    v_mov_b32_e32 v9, s4
-; GFX8-NEXT:    v_cndmask_b32_e64 v7, v8, v7, s[0:1]
-; GFX8-NEXT:    v_cndmask_b32_e64 v8, v11, v0, s[0:1]
-; GFX8-NEXT:    flat_store_dwordx4 v[9:10], v[1:4]
+; GFX8-NEXT:    v_cndmask_b32_e64 v5, v9, v13, s[0:1]
+; GFX8-NEXT:    v_cndmask_b32_e64 v8, v1, v8, s[0:1]
+; GFX8-NEXT:    v_cndmask_b32_e64 v9, v10, v0, s[0:1]
+; GFX8-NEXT:    v_mov_b32_e32 v0, s4
+; GFX8-NEXT:    v_mov_b32_e32 v1, s5
+; GFX8-NEXT:    flat_store_dwordx4 v[0:1], v[2:5]
 ; GFX8-NEXT:    v_mov_b32_e32 v0, s6
 ; GFX8-NEXT:    v_mov_b32_e32 v1, s7
-; GFX8-NEXT:    flat_store_dwordx4 v[0:1], v[5:8]
+; GFX8-NEXT:    flat_store_dwordx4 v[0:1], v[6:9]
 ; GFX8-NEXT:    s_endpgm
 ;
 ; GFX9-LABEL: udivrem_v2i64:
 ; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_load_dwordx4 s[12:15], s[4:5], 0x20
+; GFX9-NEXT:    s_load_dwordx4 s[16:19], s[4:5], 0x20
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    v_cvt_f32_u32_e32 v0, s13
-; GFX9-NEXT:    v_cvt_f32_u32_e32 v1, s12
-; GFX9-NEXT:    s_sub_u32 s2, 0, s12
-; GFX9-NEXT:    s_subb_u32 s3, 0, s13
+; GFX9-NEXT:    v_cvt_f32_u32_e32 v0, s17
+; GFX9-NEXT:    v_cvt_f32_u32_e32 v1, s16
+; GFX9-NEXT:    s_sub_u32 s8, 0, s16
+; GFX9-NEXT:    s_subb_u32 s9, 0, s17
 ; GFX9-NEXT:    v_mul_f32_e32 v0, 0x4f800000, v0
 ; GFX9-NEXT:    v_add_f32_e32 v0, v0, v1
 ; GFX9-NEXT:    v_rcp_iflag_f32_e32 v0, v0
-; GFX9-NEXT:    s_load_dwordx8 s[4:11], s[4:5], 0x0
 ; GFX9-NEXT:    v_mul_f32_e32 v0, 0x5f7ffffc, v0
 ; GFX9-NEXT:    v_mul_f32_e32 v1, 0x2f800000, v0
 ; GFX9-NEXT:    v_trunc_f32_e32 v2, v1
@@ -1265,10 +1196,10 @@ define amdgpu_kernel void @udivrem_v2i64(ptr addrspace(1) %out0, ptr addrspace(1
 ; GFX9-NEXT:    v_add_f32_e32 v0, v1, v0
 ; GFX9-NEXT:    v_cvt_u32_f32_e32 v3, v0
 ; GFX9-NEXT:    v_cvt_u32_f32_e32 v4, v2
-; GFX9-NEXT:    v_mad_u64_u32 v[0:1], s[0:1], s2, v3, 0
-; GFX9-NEXT:    v_mad_u64_u32 v[1:2], s[0:1], s2, v4, v[1:2]
+; GFX9-NEXT:    v_mad_u64_u32 v[0:1], s[0:1], s8, v3, 0
+; GFX9-NEXT:    v_mad_u64_u32 v[1:2], s[0:1], s8, v4, v[1:2]
 ; GFX9-NEXT:    v_mul_hi_u32 v5, v3, v0
-; GFX9-NEXT:    v_mad_u64_u32 v[1:2], s[0:1], s3, v3, v[1:2]
+; GFX9-NEXT:    v_mad_u64_u32 v[1:2], s[0:1], s9, v3, v[1:2]
 ; GFX9-NEXT:    v_mul_lo_u32 v2, v4, v0
 ; GFX9-NEXT:    v_mul_hi_u32 v0, v4, v0
 ; GFX9-NEXT:    v_mul_lo_u32 v6, v3, v1
@@ -1276,220 +1207,195 @@ define amdgpu_kernel void @udivrem_v2i64(ptr addrspace(1) %out0, ptr addrspace(1
 ; GFX9-NEXT:    v_mul_hi_u32 v8, v3, v1
 ; GFX9-NEXT:    v_mul_hi_u32 v1, v4, v1
 ; GFX9-NEXT:    v_add_co_u32_e32 v2, vcc, v2, v6
-; GFX9-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
-; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, v7, v0
-; GFX9-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
-; GFX9-NEXT:    v_add_co_u32_e32 v2, vcc, v2, v5
-; GFX9-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v8
-; GFX9-NEXT:    v_add_u32_e32 v2, v6, v2
-; GFX9-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
-; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
-; GFX9-NEXT:    v_add_u32_e32 v5, v7, v5
-; GFX9-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; GFX9-NEXT:    v_add3_u32 v1, v5, v2, v1
+; GFX9-NEXT:    v_addc_co_u32_e32 v2, vcc, v2, v5, vcc
+; GFX9-NEXT:    v_add_co_u32_e64 v0, s[0:1], v7, v0
+; GFX9-NEXT:    v_addc_co_u32_e64 v0, s[2:3], v0, v8, s[0:1]
+; GFX9-NEXT:    v_addc_co_u32_e64 v2, s[6:7], 0, 0, vcc
+; GFX9-NEXT:    v_addc_co_u32_e64 v5, s[2:3], 0, 0, s[2:3]
+; GFX9-NEXT:    v_addc_co_u32_e64 v0, vcc, v0, v2, s[0:1]
+; GFX9-NEXT:    v_addc_co_u32_e32 v2, vcc, 0, v5, vcc
+; GFX9-NEXT:    v_add_u32_e32 v1, v1, v2
 ; GFX9-NEXT:    v_add_co_u32_e32 v3, vcc, v3, v0
 ; GFX9-NEXT:    v_addc_co_u32_e32 v4, vcc, v4, v1, vcc
-; GFX9-NEXT:    v_mad_u64_u32 v[0:1], s[0:1], s2, v3, 0
-; GFX9-NEXT:    v_mad_u64_u32 v[1:2], s[0:1], s2, v4, v[1:2]
+; GFX9-NEXT:    v_mad_u64_u32 v[0:1], s[0:1], s8, v3, 0
+; GFX9-NEXT:    s_sub_u32 s2, 0, s18
+; GFX9-NEXT:    s_subb_u32 s3, 0, s19
+; GFX9-NEXT:    v_mad_u64_u32 v[1:2], s[0:1], s8, v4, v[1:2]
 ; GFX9-NEXT:    v_mul_hi_u32 v6, v3, v0
-; GFX9-NEXT:    s_sub_u32 s2, 0, s14
-; GFX9-NEXT:    v_mad_u64_u32 v[1:2], s[0:1], s3, v3, v[1:2]
+; GFX9-NEXT:    v_mad_u64_u32 v[1:2], s[0:1], s9, v3, v[1:2]
 ; GFX9-NEXT:    v_mul_lo_u32 v2, v4, v0
 ; GFX9-NEXT:    v_mul_hi_u32 v0, v4, v0
 ; GFX9-NEXT:    v_mul_lo_u32 v5, v3, v1
-; GFX9-NEXT:    s_subb_u32 s3, 0, s15
+; GFX9-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x0
 ; GFX9-NEXT:    v_add_co_u32_e32 v2, vcc, v2, v5
-; GFX9-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
-; GFX9-NEXT:    v_add_co_u32_e32 v2, vcc, v2, v6
-; GFX9-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; GFX9-NEXT:    v_mul_lo_u32 v6, v4, v1
-; GFX9-NEXT:    v_add_u32_e32 v2, v5, v2
-; GFX9-NEXT:    v_mul_hi_u32 v5, v3, v1
+; GFX9-NEXT:    v_addc_co_u32_e32 v2, vcc, v2, v6, vcc
+; GFX9-NEXT:    v_mul_lo_u32 v2, v4, v1
+; GFX9-NEXT:    v_mul_hi_u32 v6, v3, v1
+; GFX9-NEXT:    v_addc_co_u32_e64 v5, s[0:1], 0, 0, vcc
+; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, v2, v0
 ; GFX9-NEXT:    v_mul_hi_u32 v1, v4, v1
-; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, v6, v0
-; GFX9-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
-; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v5
-; GFX9-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
-; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
-; GFX9-NEXT:    v_add_u32_e32 v5, v6, v5
-; GFX9-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; GFX9-NEXT:    v_add3_u32 v1, v5, v2, v1
+; GFX9-NEXT:    v_addc_co_u32_e64 v0, s[0:1], v0, v6, vcc
+; GFX9-NEXT:    v_addc_co_u32_e64 v2, s[0:1], 0, 0, s[0:1]
+; GFX9-NEXT:    v_addc_co_u32_e32 v0, vcc, v0, v5, vcc
+; GFX9-NEXT:    v_addc_co_u32_e32 v2, vcc, 0, v2, vcc
+; GFX9-NEXT:    v_add_u32_e32 v1, v1, v2
 ; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, v3, v0
 ; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, v4, v1, vcc
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    v_mul_lo_u32 v2, s9, v0
-; GFX9-NEXT:    v_mul_lo_u32 v3, s8, v1
-; GFX9-NEXT:    v_mul_hi_u32 v4, s8, v0
-; GFX9-NEXT:    v_mul_hi_u32 v0, s9, v0
-; GFX9-NEXT:    v_mul_hi_u32 v5, s9, v1
+; GFX9-NEXT:    v_mul_lo_u32 v2, s13, v0
+; GFX9-NEXT:    v_mul_lo_u32 v3, s12, v1
+; GFX9-NEXT:    v_mul_hi_u32 v4, s12, v0
+; GFX9-NEXT:    v_mul_hi_u32 v0, s13, v0
+; GFX9-NEXT:    v_mov_b32_e32 v5, s13
 ; GFX9-NEXT:    v_add_co_u32_e32 v2, vcc, v2, v3
-; GFX9-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc
-; GFX9-NEXT:    v_add_co_u32_e32 v2, vcc, v2, v4
-; GFX9-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; GFX9-NEXT:    v_mul_lo_u32 v4, s9, v1
-; GFX9-NEXT:    v_add_u32_e32 v2, v3, v2
-; GFX9-NEXT:    v_mul_hi_u32 v3, s8, v1
-; GFX9-NEXT:    v_mov_b32_e32 v6, s13
-; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, v4, v0
-; GFX9-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
-; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v3
-; GFX9-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc
-; GFX9-NEXT:    v_add_co_u32_e32 v8, vcc, v0, v2
-; GFX9-NEXT:    v_mad_u64_u32 v[1:2], s[0:1], s12, v8, 0
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
-; GFX9-NEXT:    v_add_u32_e32 v3, v4, v3
-; GFX9-NEXT:    v_add3_u32 v9, v3, v0, v5
-; GFX9-NEXT:    v_mov_b32_e32 v0, v2
-; GFX9-NEXT:    v_mad_u64_u32 v[2:3], s[0:1], s12, v9, v[0:1]
-; GFX9-NEXT:    v_mov_b32_e32 v5, s9
-; GFX9-NEXT:    v_mov_b32_e32 v0, 0
-; GFX9-NEXT:    v_mad_u64_u32 v[3:4], s[0:1], s13, v8, v[2:3]
-; GFX9-NEXT:    v_sub_co_u32_e32 v2, vcc, s8, v1
-; GFX9-NEXT:    v_subb_co_u32_e64 v1, s[0:1], v5, v3, vcc
-; GFX9-NEXT:    v_cmp_le_u32_e64 s[0:1], s13, v1
+; GFX9-NEXT:    v_addc_co_u32_e32 v2, vcc, v2, v4, vcc
+; GFX9-NEXT:    v_mul_lo_u32 v2, s13, v1
+; GFX9-NEXT:    v_mul_hi_u32 v4, s12, v1
+; GFX9-NEXT:    v_addc_co_u32_e64 v3, s[0:1], 0, 0, vcc
+; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, v2, v0
+; GFX9-NEXT:    v_addc_co_u32_e64 v0, s[0:1], v0, v4, vcc
+; GFX9-NEXT:    v_addc_co_u32_e32 v8, vcc, v0, v3, vcc
+; GFX9-NEXT:    v_mul_hi_u32 v3, s13, v1
+; GFX9-NEXT:    v_addc_co_u32_e64 v2, s[0:1], 0, 0, s[0:1]
+; GFX9-NEXT:    v_mad_u64_u32 v[0:1], s[0:1], s16, v8, 0
+; GFX9-NEXT:    v_addc_co_u32_e32 v2, vcc, 0, v2, vcc
+; GFX9-NEXT:    v_add_u32_e32 v9, v3, v2
+; GFX9-NEXT:    v_mad_u64_u32 v[1:2], s[0:1], s16, v9, v[1:2]
+; GFX9-NEXT:    v_mov_b32_e32 v6, s17
+; GFX9-NEXT:    v_mad_u64_u32 v[3:4], s[0:1], s17, v8, v[1:2]
+; GFX9-NEXT:    v_sub_co_u32_e32 v1, vcc, s12, v0
+; GFX9-NEXT:    v_subb_co_u32_e64 v2, s[0:1], v5, v3, vcc
+; GFX9-NEXT:    v_cmp_le_u32_e64 s[0:1], s17, v2
 ; GFX9-NEXT:    v_cndmask_b32_e64 v4, 0, -1, s[0:1]
-; GFX9-NEXT:    v_cmp_le_u32_e64 s[0:1], s12, v2
+; GFX9-NEXT:    v_cmp_le_u32_e64 s[0:1], s16, v1
 ; GFX9-NEXT:    v_cndmask_b32_e64 v5, 0, -1, s[0:1]
-; GFX9-NEXT:    v_cmp_eq_u32_e64 s[0:1], s13, v1
-; GFX9-NEXT:    v_sub_u32_e32 v3, s9, v3
+; GFX9-NEXT:    v_cmp_eq_u32_e64 s[0:1], s17, v2
+; GFX9-NEXT:    v_sub_u32_e32 v3, s13, v3
 ; GFX9-NEXT:    v_cndmask_b32_e64 v10, v4, v5, s[0:1]
-; GFX9-NEXT:    v_cvt_f32_u32_e32 v4, s15
+; GFX9-NEXT:    v_cvt_f32_u32_e32 v4, s19
 ; GFX9-NEXT:    v_subb_co_u32_e32 v7, vcc, v3, v6, vcc
-; GFX9-NEXT:    v_cvt_f32_u32_e32 v3, s14
+; GFX9-NEXT:    v_cvt_f32_u32_e32 v3, s18
 ; GFX9-NEXT:    v_mul_f32_e32 v4, 0x4f800000, v4
-; GFX9-NEXT:    v_subrev_co_u32_e32 v11, vcc, s12, v2
+; GFX9-NEXT:    v_subrev_co_u32_e32 v11, vcc, s16, v1
 ; GFX9-NEXT:    v_add_f32_e32 v3, v4, v3
 ; GFX9-NEXT:    v_rcp_iflag_f32_e32 v3, v3
 ; GFX9-NEXT:    v_subbrev_co_u32_e64 v12, s[0:1], 0, v7, vcc
 ; GFX9-NEXT:    v_add_co_u32_e64 v13, s[0:1], 1, v8
 ; GFX9-NEXT:    v_mul_f32_e32 v3, 0x5f7ffffc, v3
 ; GFX9-NEXT:    v_mul_f32_e32 v4, 0x2f800000, v3
-; GFX9-NEXT:    v_trunc_f32_e32 v15, v4
-; GFX9-NEXT:    v_mul_f32_e32 v4, 0xcf800000, v15
+; GFX9-NEXT:    v_trunc_f32_e32 v5, v4
+; GFX9-NEXT:    v_mul_f32_e32 v4, 0xcf800000, v5
 ; GFX9-NEXT:    v_add_f32_e32 v3, v4, v3
-; GFX9-NEXT:    v_cvt_u32_f32_e32 v16, v3
+; GFX9-NEXT:    v_cvt_u32_f32_e32 v15, v3
 ; GFX9-NEXT:    v_addc_co_u32_e64 v14, s[0:1], 0, v9, s[0:1]
-; GFX9-NEXT:    v_cmp_le_u32_e64 s[0:1], s13, v12
-; GFX9-NEXT:    v_cndmask_b32_e64 v5, 0, -1, s[0:1]
-; GFX9-NEXT:    v_cmp_le_u32_e64 s[0:1], s12, v11
-; GFX9-NEXT:    v_cndmask_b32_e64 v17, 0, -1, s[0:1]
-; GFX9-NEXT:    v_mad_u64_u32 v[3:4], s[0:1], s2, v16, 0
-; GFX9-NEXT:    v_cvt_u32_f32_e32 v15, v15
-; GFX9-NEXT:    v_cmp_eq_u32_e64 s[0:1], s13, v12
-; GFX9-NEXT:    v_cndmask_b32_e64 v17, v5, v17, s[0:1]
-; GFX9-NEXT:    v_mad_u64_u32 v[4:5], s[0:1], s2, v15, v[4:5]
+; GFX9-NEXT:    v_cmp_le_u32_e64 s[0:1], s17, v12
+; GFX9-NEXT:    v_cndmask_b32_e64 v16, 0, -1, s[0:1]
+; GFX9-NEXT:    v_mad_u64_u32 v[3:4], s[0:1], s2, v15, 0
+; GFX9-NEXT:    v_cvt_u32_f32_e32 v17, v5
+; GFX9-NEXT:    v_cmp_le_u32_e64 s[0:1], s16, v11
+; GFX9-NEXT:    v_cndmask_b32_e64 v18, 0, -1, s[0:1]
+; GFX9-NEXT:    v_subb_co_u32_e32 v7, vcc, v7, v6, vcc
+; GFX9-NEXT:    v_mad_u64_u32 v[4:5], s[0:1], s2, v17, v[4:5]
+; GFX9-NEXT:    v_cmp_eq_u32_e64 s[0:1], s17, v12
+; GFX9-NEXT:    v_cndmask_b32_e64 v16, v16, v18, s[0:1]
+; GFX9-NEXT:    v_mad_u64_u32 v[4:5], s[0:1], s3, v15, v[4:5]
+; GFX9-NEXT:    v_mul_lo_u32 v5, v17, v3
+; GFX9-NEXT:    v_mul_hi_u32 v6, v15, v3
+; GFX9-NEXT:    v_mul_lo_u32 v20, v15, v4
+; GFX9-NEXT:    v_mul_hi_u32 v3, v17, v3
 ; GFX9-NEXT:    v_add_co_u32_e64 v18, s[0:1], 1, v13
+; GFX9-NEXT:    v_add_co_u32_e32 v5, vcc, v5, v20
+; GFX9-NEXT:    v_addc_co_u32_e32 v5, vcc, v5, v6, vcc
+; GFX9-NEXT:    v_mul_lo_u32 v5, v17, v4
+; GFX9-NEXT:    v_mul_hi_u32 v20, v15, v4
 ; GFX9-NEXT:    v_addc_co_u32_e64 v19, s[0:1], 0, v14, s[0:1]
-; GFX9-NEXT:    v_mad_u64_u32 v[4:5], s[0:1], s3, v16, v[4:5]
-; GFX9-NEXT:    v_subb_co_u32_e32 v5, vcc, v7, v6, vcc
-; GFX9-NEXT:    v_mul_lo_u32 v6, v15, v3
-; GFX9-NEXT:    v_mul_lo_u32 v7, v16, v4
-; GFX9-NEXT:    v_subrev_co_u32_e32 v20, vcc, s12, v11
-; GFX9-NEXT:    v_subbrev_co_u32_e32 v21, vcc, 0, v5, vcc
-; GFX9-NEXT:    v_mul_hi_u32 v5, v16, v3
-; GFX9-NEXT:    v_add_co_u32_e32 v6, vcc, v6, v7
-; GFX9-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
-; GFX9-NEXT:    v_add_co_u32_e32 v5, vcc, v6, v5
-; GFX9-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
-; GFX9-NEXT:    v_mul_lo_u32 v6, v15, v4
-; GFX9-NEXT:    v_mul_hi_u32 v3, v15, v3
-; GFX9-NEXT:    v_add_u32_e32 v5, v7, v5
-; GFX9-NEXT:    v_mul_hi_u32 v7, v16, v4
-; GFX9-NEXT:    v_mul_hi_u32 v4, v15, v4
-; GFX9-NEXT:    v_add_co_u32_e32 v3, vcc, v6, v3
-; GFX9-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
-; GFX9-NEXT:    v_add_co_u32_e32 v3, vcc, v3, v7
-; GFX9-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
-; GFX9-NEXT:    v_add_co_u32_e32 v3, vcc, v3, v5
-; GFX9-NEXT:    v_add_u32_e32 v6, v6, v7
-; GFX9-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
-; GFX9-NEXT:    v_add_co_u32_e32 v16, vcc, v16, v3
-; GFX9-NEXT:    v_add3_u32 v4, v6, v5, v4
-; GFX9-NEXT:    v_mad_u64_u32 v[5:6], s[0:1], s2, v16, 0
-; GFX9-NEXT:    v_addc_co_u32_e32 v15, vcc, v15, v4, vcc
-; GFX9-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v17
-; GFX9-NEXT:    v_cndmask_b32_e32 v4, v13, v18, vcc
+; GFX9-NEXT:    v_addc_co_u32_e64 v6, s[0:1], 0, 0, vcc
+; GFX9-NEXT:    v_add_co_u32_e32 v3, vcc, v5, v3
+; GFX9-NEXT:    v_addc_co_u32_e64 v3, s[0:1], v3, v20, vcc
+; GFX9-NEXT:    v_mul_hi_u32 v4, v17, v4
+; GFX9-NEXT:    v_addc_co_u32_e64 v5, s[0:1], 0, 0, s[0:1]
+; GFX9-NEXT:    v_addc_co_u32_e32 v3, vcc, v3, v6, vcc
+; GFX9-NEXT:    v_addc_co_u32_e32 v5, vcc, 0, v5, vcc
+; GFX9-NEXT:    v_add_co_u32_e32 v15, vcc, v15, v3
+; GFX9-NEXT:    v_add_u32_e32 v4, v4, v5
+; GFX9-NEXT:    v_mad_u64_u32 v[5:6], s[0:1], s2, v15, 0
+; GFX9-NEXT:    v_addc_co_u32_e32 v17, vcc, v17, v4, vcc
 ; GFX9-NEXT:    v_mov_b32_e32 v3, v6
-; GFX9-NEXT:    v_mad_u64_u32 v[6:7], s[0:1], s2, v15, v[3:4]
-; GFX9-NEXT:    v_cndmask_b32_e32 v13, v14, v19, vcc
+; GFX9-NEXT:    v_mad_u64_u32 v[3:4], s[0:1], s2, v17, v[3:4]
+; GFX9-NEXT:    v_subrev_co_u32_e32 v20, vcc, s16, v11
+; GFX9-NEXT:    v_subbrev_co_u32_e32 v21, vcc, 0, v7, vcc
+; GFX9-NEXT:    v_mad_u64_u32 v[6:7], s[0:1], s3, v15, v[3:4]
+; GFX9-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v16
+; GFX9-NEXT:    v_cndmask_b32_e32 v13, v13, v18, vcc
 ; GFX9-NEXT:    v_cmp_ne_u32_e64 s[0:1], 0, v10
-; GFX9-NEXT:    v_mad_u64_u32 v[6:7], s[2:3], s3, v16, v[6:7]
-; GFX9-NEXT:    v_cndmask_b32_e64 v3, v8, v4, s[0:1]
-; GFX9-NEXT:    v_cndmask_b32_e64 v4, v9, v13, s[0:1]
-; GFX9-NEXT:    v_mul_lo_u32 v8, v15, v5
-; GFX9-NEXT:    v_mul_lo_u32 v9, v16, v6
-; GFX9-NEXT:    v_cndmask_b32_e32 v7, v11, v20, vcc
-; GFX9-NEXT:    v_mul_hi_u32 v11, v16, v5
+; GFX9-NEXT:    v_cndmask_b32_e64 v3, v8, v13, s[0:1]
+; GFX9-NEXT:    v_mul_lo_u32 v7, v17, v5
+; GFX9-NEXT:    v_mul_lo_u32 v8, v15, v6
+; GFX9-NEXT:    v_mul_hi_u32 v10, v15, v5
+; GFX9-NEXT:    v_mul_hi_u32 v5, v17, v5
+; GFX9-NEXT:    v_cndmask_b32_e32 v14, v14, v19, vcc
+; GFX9-NEXT:    v_add_co_u32_e64 v7, s[2:3], v7, v8
+; GFX9-NEXT:    v_addc_co_u32_e64 v7, s[2:3], v7, v10, s[2:3]
+; GFX9-NEXT:    v_mul_lo_u32 v7, v17, v6
+; GFX9-NEXT:    v_mul_hi_u32 v10, v15, v6
+; GFX9-NEXT:    v_addc_co_u32_e64 v8, s[2:3], 0, 0, s[2:3]
+; GFX9-NEXT:    v_add_co_u32_e64 v5, s[2:3], v7, v5
+; GFX9-NEXT:    v_mul_hi_u32 v6, v17, v6
+; GFX9-NEXT:    v_addc_co_u32_e64 v5, s[4:5], v5, v10, s[2:3]
+; GFX9-NEXT:    v_addc_co_u32_e64 v7, s[4:5], 0, 0, s[4:5]
+; GFX9-NEXT:    v_addc_co_u32_e64 v5, s[2:3], v5, v8, s[2:3]
+; GFX9-NEXT:    v_addc_co_u32_e64 v7, s[2:3], 0, v7, s[2:3]
+; GFX9-NEXT:    v_add_u32_e32 v6, v6, v7
+; GFX9-NEXT:    v_add_co_u32_e64 v5, s[2:3], v15, v5
+; GFX9-NEXT:    v_addc_co_u32_e64 v6, s[2:3], v17, v6, s[2:3]
+; GFX9-NEXT:    v_mul_lo_u32 v7, s15, v5
+; GFX9-NEXT:    v_mul_lo_u32 v8, s14, v6
+; GFX9-NEXT:    v_cndmask_b32_e64 v4, v9, v14, s[0:1]
+; GFX9-NEXT:    v_cndmask_b32_e32 v9, v11, v20, vcc
+; GFX9-NEXT:    v_mul_hi_u32 v11, s14, v5
 ; GFX9-NEXT:    v_cndmask_b32_e32 v10, v12, v21, vcc
-; GFX9-NEXT:    v_add_co_u32_e32 v8, vcc, v8, v9
-; GFX9-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
-; GFX9-NEXT:    v_add_co_u32_e32 v8, vcc, v8, v11
-; GFX9-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
-; GFX9-NEXT:    v_mul_lo_u32 v11, v15, v6
-; GFX9-NEXT:    v_mul_hi_u32 v5, v15, v5
-; GFX9-NEXT:    v_add_u32_e32 v8, v9, v8
-; GFX9-NEXT:    v_mul_hi_u32 v9, v16, v6
-; GFX9-NEXT:    v_mul_hi_u32 v6, v15, v6
-; GFX9-NEXT:    v_add_co_u32_e32 v5, vcc, v11, v5
-; GFX9-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
-; GFX9-NEXT:    v_add_co_u32_e32 v5, vcc, v5, v9
-; GFX9-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
-; GFX9-NEXT:    v_add_co_u32_e32 v5, vcc, v5, v8
-; GFX9-NEXT:    v_add_u32_e32 v9, v11, v9
-; GFX9-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
-; GFX9-NEXT:    v_add3_u32 v6, v9, v8, v6
-; GFX9-NEXT:    v_add_co_u32_e32 v5, vcc, v16, v5
-; GFX9-NEXT:    v_addc_co_u32_e32 v6, vcc, v15, v6, vcc
-; GFX9-NEXT:    v_mul_lo_u32 v8, s11, v5
-; GFX9-NEXT:    v_mul_lo_u32 v9, s10, v6
-; GFX9-NEXT:    v_cndmask_b32_e64 v7, v2, v7, s[0:1]
-; GFX9-NEXT:    v_mul_hi_u32 v2, s10, v5
-; GFX9-NEXT:    v_mul_hi_u32 v5, s11, v5
-; GFX9-NEXT:    v_add_co_u32_e32 v8, vcc, v8, v9
-; GFX9-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
-; GFX9-NEXT:    v_add_co_u32_e32 v2, vcc, v8, v2
-; GFX9-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; GFX9-NEXT:    v_mul_lo_u32 v8, s11, v6
-; GFX9-NEXT:    v_add_u32_e32 v2, v9, v2
-; GFX9-NEXT:    v_mul_hi_u32 v9, s10, v6
-; GFX9-NEXT:    v_mul_hi_u32 v13, s11, v6
-; GFX9-NEXT:    v_add_co_u32_e32 v5, vcc, v8, v5
-; GFX9-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
-; GFX9-NEXT:    v_add_co_u32_e32 v5, vcc, v5, v9
-; GFX9-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
-; GFX9-NEXT:    v_add_co_u32_e32 v12, vcc, v5, v2
-; GFX9-NEXT:    v_mad_u64_u32 v[5:6], s[2:3], s14, v12, 0
-; GFX9-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; GFX9-NEXT:    v_cndmask_b32_e64 v8, v1, v10, s[0:1]
-; GFX9-NEXT:    v_add_u32_e32 v1, v11, v9
-; GFX9-NEXT:    v_add3_u32 v9, v1, v2, v13
+; GFX9-NEXT:    v_add_co_u32_e32 v7, vcc, v7, v8
+; GFX9-NEXT:    v_addc_co_u32_e32 v7, vcc, v7, v11, vcc
+; GFX9-NEXT:    v_mul_lo_u32 v7, s15, v6
+; GFX9-NEXT:    v_mul_hi_u32 v5, s15, v5
+; GFX9-NEXT:    v_mul_hi_u32 v11, s14, v6
+; GFX9-NEXT:    v_addc_co_u32_e64 v8, s[2:3], 0, 0, vcc
+; GFX9-NEXT:    v_add_co_u32_e32 v5, vcc, v7, v5
+; GFX9-NEXT:    v_addc_co_u32_e64 v5, s[2:3], v5, v11, vcc
+; GFX9-NEXT:    v_addc_co_u32_e32 v11, vcc, v5, v8, vcc
+; GFX9-NEXT:    v_addc_co_u32_e64 v7, s[2:3], 0, 0, s[2:3]
+; GFX9-NEXT:    v_mul_hi_u32 v13, s15, v6
+; GFX9-NEXT:    v_mad_u64_u32 v[5:6], s[2:3], s18, v11, 0
+; GFX9-NEXT:    v_addc_co_u32_e32 v12, vcc, 0, v7, vcc
+; GFX9-NEXT:    v_cndmask_b32_e64 v7, v1, v9, s[0:1]
+; GFX9-NEXT:    v_add_u32_e32 v9, v13, v12
 ; GFX9-NEXT:    v_mov_b32_e32 v1, v6
-; GFX9-NEXT:    v_mad_u64_u32 v[1:2], s[0:1], s14, v9, v[1:2]
-; GFX9-NEXT:    v_mov_b32_e32 v10, s11
-; GFX9-NEXT:    v_mov_b32_e32 v6, s15
-; GFX9-NEXT:    v_mad_u64_u32 v[1:2], s[0:1], s15, v12, v[1:2]
-; GFX9-NEXT:    v_sub_co_u32_e32 v2, vcc, s10, v5
+; GFX9-NEXT:    v_cndmask_b32_e64 v8, v2, v10, s[0:1]
+; GFX9-NEXT:    v_mad_u64_u32 v[1:2], s[0:1], s18, v9, v[1:2]
+; GFX9-NEXT:    v_mov_b32_e32 v10, s15
+; GFX9-NEXT:    v_mov_b32_e32 v6, s19
+; GFX9-NEXT:    v_mad_u64_u32 v[1:2], s[0:1], s19, v11, v[1:2]
+; GFX9-NEXT:    v_sub_co_u32_e32 v2, vcc, s14, v5
 ; GFX9-NEXT:    v_subb_co_u32_e64 v10, s[0:1], v10, v1, vcc
-; GFX9-NEXT:    v_cmp_le_u32_e64 s[0:1], s15, v10
-; GFX9-NEXT:    v_sub_u32_e32 v1, s11, v1
+; GFX9-NEXT:    v_cmp_le_u32_e64 s[0:1], s19, v10
+; GFX9-NEXT:    v_sub_u32_e32 v1, s15, v1
 ; GFX9-NEXT:    v_cndmask_b32_e64 v5, 0, -1, s[0:1]
-; GFX9-NEXT:    v_cmp_le_u32_e64 s[0:1], s14, v2
-; GFX9-NEXT:    v_cndmask_b32_e64 v11, 0, -1, s[0:1]
-; GFX9-NEXT:    v_cmp_eq_u32_e64 s[0:1], s15, v10
+; GFX9-NEXT:    v_cmp_le_u32_e64 s[0:1], s18, v2
+; GFX9-NEXT:    v_cndmask_b32_e64 v12, 0, -1, s[0:1]
+; GFX9-NEXT:    v_cmp_eq_u32_e64 s[0:1], s19, v10
 ; GFX9-NEXT:    v_subb_co_u32_e32 v1, vcc, v1, v6, vcc
-; GFX9-NEXT:    v_cndmask_b32_e64 v5, v5, v11, s[0:1]
-; GFX9-NEXT:    v_subrev_co_u32_e32 v11, vcc, s14, v2
+; GFX9-NEXT:    v_cndmask_b32_e64 v5, v5, v12, s[0:1]
+; GFX9-NEXT:    v_subrev_co_u32_e32 v12, vcc, s18, v2
 ; GFX9-NEXT:    v_subbrev_co_u32_e64 v13, s[0:1], 0, v1, vcc
-; GFX9-NEXT:    v_add_co_u32_e64 v14, s[0:1], 1, v12
+; GFX9-NEXT:    v_add_co_u32_e64 v14, s[0:1], 1, v11
 ; GFX9-NEXT:    v_addc_co_u32_e64 v15, s[0:1], 0, v9, s[0:1]
-; GFX9-NEXT:    v_cmp_le_u32_e64 s[0:1], s15, v13
+; GFX9-NEXT:    v_cmp_le_u32_e64 s[0:1], s19, v13
 ; GFX9-NEXT:    v_cndmask_b32_e64 v16, 0, -1, s[0:1]
-; GFX9-NEXT:    v_cmp_le_u32_e64 s[0:1], s14, v11
+; GFX9-NEXT:    v_cmp_le_u32_e64 s[0:1], s18, v12
 ; GFX9-NEXT:    v_subb_co_u32_e32 v1, vcc, v1, v6, vcc
 ; GFX9-NEXT:    v_cndmask_b32_e64 v17, 0, -1, s[0:1]
-; GFX9-NEXT:    v_cmp_eq_u32_e64 s[0:1], s15, v13
-; GFX9-NEXT:    v_subrev_co_u32_e32 v19, vcc, s14, v11
+; GFX9-NEXT:    v_cmp_eq_u32_e64 s[0:1], s19, v13
+; GFX9-NEXT:    v_subrev_co_u32_e32 v19, vcc, s18, v12
 ; GFX9-NEXT:    v_cndmask_b32_e64 v16, v16, v17, s[0:1]
 ; GFX9-NEXT:    v_add_co_u32_e64 v17, s[0:1], 1, v14
 ; GFX9-NEXT:    v_subbrev_co_u32_e32 v1, vcc, 0, v1, vcc
@@ -1498,28 +1404,31 @@ define amdgpu_kernel void @udivrem_v2i64(ptr addrspace(1) %out0, ptr addrspace(1
 ; GFX9-NEXT:    v_cndmask_b32_e32 v6, v14, v17, vcc
 ; GFX9-NEXT:    v_cndmask_b32_e32 v14, v15, v18, vcc
 ; GFX9-NEXT:    v_cmp_ne_u32_e64 s[0:1], 0, v5
-; GFX9-NEXT:    v_cndmask_b32_e64 v5, v12, v6, s[0:1]
+; GFX9-NEXT:    v_mov_b32_e32 v0, 0
+; GFX9-NEXT:    v_cndmask_b32_e64 v5, v11, v6, s[0:1]
 ; GFX9-NEXT:    v_cndmask_b32_e64 v6, v9, v14, s[0:1]
-; GFX9-NEXT:    v_cndmask_b32_e32 v9, v11, v19, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v9, v12, v19, vcc
 ; GFX9-NEXT:    v_cndmask_b32_e32 v1, v13, v1, vcc
 ; GFX9-NEXT:    v_cndmask_b32_e64 v9, v2, v9, s[0:1]
 ; GFX9-NEXT:    v_cndmask_b32_e64 v10, v10, v1, s[0:1]
-; GFX9-NEXT:    global_store_dwordx4 v0, v[3:6], s[4:5]
-; GFX9-NEXT:    global_store_dwordx4 v0, v[7:10], s[6:7]
+; GFX9-NEXT:    global_store_dwordx4 v0, v[3:6], s[8:9]
+; GFX9-NEXT:    global_store_dwordx4 v0, v[7:10], s[10:11]
 ; GFX9-NEXT:    s_endpgm
 ;
 ; GFX10-LABEL: udivrem_v2i64:
 ; GFX10:       ; %bb.0:
-; GFX10-NEXT:    s_load_dwordx4 s[12:15], s[4:5], 0x20
+; GFX10-NEXT:    s_load_dwordx4 s[16:19], s[4:5], 0x20
 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX10-NEXT:    v_cvt_f32_u32_e32 v0, s13
-; GFX10-NEXT:    v_cvt_f32_u32_e32 v1, s15
-; GFX10-NEXT:    v_cvt_f32_u32_e32 v2, s12
-; GFX10-NEXT:    v_cvt_f32_u32_e32 v3, s14
-; GFX10-NEXT:    s_sub_u32 s0, 0, s12
+; GFX10-NEXT:    v_cvt_f32_u32_e32 v0, s17
+; GFX10-NEXT:    v_cvt_f32_u32_e32 v1, s19
+; GFX10-NEXT:    v_cvt_f32_u32_e32 v2, s16
+; GFX10-NEXT:    v_cvt_f32_u32_e32 v3, s18
+; GFX10-NEXT:    s_sub_u32 s7, 0, s16
 ; GFX10-NEXT:    v_mul_f32_e32 v0, 0x4f800000, v0
 ; GFX10-NEXT:    v_mul_f32_e32 v1, 0x4f800000, v1
-; GFX10-NEXT:    s_subb_u32 s1, 0, s13
+; GFX10-NEXT:    s_subb_u32 s20, 0, s17
+; GFX10-NEXT:    s_sub_u32 s8, 0, s18
+; GFX10-NEXT:    s_subb_u32 s21, 0, s19
 ; GFX10-NEXT:    v_add_f32_e32 v0, v0, v2
 ; GFX10-NEXT:    v_add_f32_e32 v1, v1, v3
 ; GFX10-NEXT:    v_rcp_iflag_f32_e32 v0, v0
@@ -1538,17 +1447,15 @@ define amdgpu_kernel void @udivrem_v2i64(ptr addrspace(1) %out0, ptr addrspace(1
 ; GFX10-NEXT:    v_add_f32_e32 v1, v3, v1
 ; GFX10-NEXT:    v_cvt_u32_f32_e32 v7, v0
 ; GFX10-NEXT:    v_cvt_u32_f32_e32 v8, v1
-; GFX10-NEXT:    v_mad_u64_u32 v[0:1], s2, s0, v7, 0
-; GFX10-NEXT:    s_sub_u32 s2, 0, s14
-; GFX10-NEXT:    v_mad_u64_u32 v[2:3], s3, s2, v8, 0
+; GFX10-NEXT:    v_mad_u64_u32 v[0:1], s0, s7, v7, 0
+; GFX10-NEXT:    v_mad_u64_u32 v[2:3], s0, s8, v8, 0
 ; GFX10-NEXT:    v_mul_hi_u32 v11, v9, v0
-; GFX10-NEXT:    v_mad_u64_u32 v[4:5], s3, s0, v9, v[1:2]
-; GFX10-NEXT:    v_mad_u64_u32 v[5:6], s3, s2, v10, v[3:4]
+; GFX10-NEXT:    v_mad_u64_u32 v[4:5], s0, s7, v9, v[1:2]
+; GFX10-NEXT:    v_mad_u64_u32 v[5:6], s0, s8, v10, v[3:4]
 ; GFX10-NEXT:    v_mul_lo_u32 v6, v9, v0
-; GFX10-NEXT:    s_subb_u32 s3, 0, s15
-; GFX10-NEXT:    v_mad_u64_u32 v[3:4], s6, s1, v7, v[4:5]
+; GFX10-NEXT:    v_mad_u64_u32 v[3:4], s0, s20, v7, v[4:5]
 ; GFX10-NEXT:    v_mul_hi_u32 v4, v7, v0
-; GFX10-NEXT:    v_mad_u64_u32 v[0:1], s6, s3, v8, v[5:6]
+; GFX10-NEXT:    v_mad_u64_u32 v[0:1], s0, s21, v8, v[5:6]
 ; GFX10-NEXT:    v_mul_lo_u32 v1, v10, v2
 ; GFX10-NEXT:    v_mul_hi_u32 v5, v8, v2
 ; GFX10-NEXT:    v_mul_hi_u32 v2, v10, v2
@@ -1560,46 +1467,38 @@ define amdgpu_kernel void @udivrem_v2i64(ptr addrspace(1) %out0, ptr addrspace(1
 ; GFX10-NEXT:    v_mul_hi_u32 v17, v8, v0
 ; GFX10-NEXT:    v_mul_hi_u32 v3, v9, v3
 ; GFX10-NEXT:    v_mul_hi_u32 v0, v10, v0
-; GFX10-NEXT:    v_add_co_u32 v6, s6, v6, v12
-; GFX10-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s6
-; GFX10-NEXT:    v_add_co_u32 v11, s6, v13, v11
-; GFX10-NEXT:    v_cndmask_b32_e64 v13, 0, 1, s6
-; GFX10-NEXT:    v_add_co_u32 v1, s6, v1, v15
-; GFX10-NEXT:    v_cndmask_b32_e64 v15, 0, 1, s6
-; GFX10-NEXT:    v_add_co_u32 v2, s6, v16, v2
-; GFX10-NEXT:    v_cndmask_b32_e64 v16, 0, 1, s6
-; GFX10-NEXT:    v_add_co_u32 v4, s6, v6, v4
-; GFX10-NEXT:    v_cndmask_b32_e64 v4, 0, 1, s6
-; GFX10-NEXT:    v_add_co_u32 v6, s6, v11, v14
-; GFX10-NEXT:    v_cndmask_b32_e64 v11, 0, 1, s6
-; GFX10-NEXT:    v_add_co_u32 v1, s6, v1, v5
-; GFX10-NEXT:    v_add_nc_u32_e32 v4, v12, v4
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, 0, 1, s6
-; GFX10-NEXT:    v_add_co_u32 v2, s6, v2, v17
-; GFX10-NEXT:    v_cndmask_b32_e64 v5, 0, 1, s6
-; GFX10-NEXT:    v_add_co_u32 v4, s6, v6, v4
-; GFX10-NEXT:    v_add_nc_u32_e32 v1, v15, v1
-; GFX10-NEXT:    v_add_nc_u32_e32 v11, v13, v11
-; GFX10-NEXT:    v_cndmask_b32_e64 v6, 0, 1, s6
-; GFX10-NEXT:    v_add_nc_u32_e32 v5, v16, v5
-; GFX10-NEXT:    v_add_co_u32 v7, vcc_lo, v7, v4
-; GFX10-NEXT:    v_add_co_u32 v1, s6, v2, v1
-; GFX10-NEXT:    v_add3_u32 v3, v11, v6, v3
-; GFX10-NEXT:    v_cndmask_b32_e64 v2, 0, 1, s6
+; GFX10-NEXT:    v_add_co_u32 v6, vcc_lo, v6, v12
+; GFX10-NEXT:    v_add_co_u32 v1, s2, v1, v15
+; GFX10-NEXT:    v_add_co_ci_u32_e32 v4, vcc_lo, v6, v4, vcc_lo
+; GFX10-NEXT:    v_add_co_u32 v4, s0, v13, v11
+; GFX10-NEXT:    v_add_co_ci_u32_e64 v1, s2, v1, v5, s2
+; GFX10-NEXT:    v_add_co_u32 v1, s3, v16, v2
+; GFX10-NEXT:    v_add_co_ci_u32_e64 v4, s1, v4, v14, s0
+; GFX10-NEXT:    v_add_co_ci_u32_e64 v2, s9, 0, 0, vcc_lo
+; GFX10-NEXT:    v_add_co_ci_u32_e64 v5, s1, 0, 0, s1
+; GFX10-NEXT:    v_add_co_ci_u32_e64 v1, s6, v1, v17, s3
+; GFX10-NEXT:    v_add_co_ci_u32_e64 v2, vcc_lo, v4, v2, s0
+; GFX10-NEXT:    v_add_co_ci_u32_e64 v6, s1, 0, 0, s2
+; GFX10-NEXT:    v_add_co_ci_u32_e32 v4, vcc_lo, 0, v5, vcc_lo
+; GFX10-NEXT:    v_add_co_ci_u32_e64 v11, s1, 0, 0, s6
+; GFX10-NEXT:    v_add_co_ci_u32_e64 v1, vcc_lo, v1, v6, s3
+; GFX10-NEXT:    v_add_nc_u32_e32 v3, v3, v4
+; GFX10-NEXT:    v_add_co_ci_u32_e32 v5, vcc_lo, 0, v11, vcc_lo
+; GFX10-NEXT:    v_add_co_u32 v7, vcc_lo, v7, v2
 ; GFX10-NEXT:    v_add_co_ci_u32_e32 v9, vcc_lo, v9, v3, vcc_lo
-; GFX10-NEXT:    v_add3_u32 v2, v5, v2, v0
 ; GFX10-NEXT:    v_add_co_u32 v8, vcc_lo, v8, v1
-; GFX10-NEXT:    v_mad_u64_u32 v[0:1], s6, s0, v7, 0
-; GFX10-NEXT:    v_add_co_ci_u32_e32 v10, vcc_lo, v10, v2, vcc_lo
-; GFX10-NEXT:    v_mad_u64_u32 v[2:3], s6, s2, v8, 0
-; GFX10-NEXT:    s_load_dwordx8 s[4:11], s[4:5], 0x0
+; GFX10-NEXT:    v_add_nc_u32_e32 v4, v0, v5
+; GFX10-NEXT:    v_mad_u64_u32 v[0:1], s0, s7, v7, 0
+; GFX10-NEXT:    v_mad_u64_u32 v[2:3], s0, s8, v8, 0
+; GFX10-NEXT:    v_add_co_ci_u32_e32 v10, vcc_lo, v10, v4, vcc_lo
 ; GFX10-NEXT:    v_mul_hi_u32 v11, v9, v0
-; GFX10-NEXT:    v_mad_u64_u32 v[4:5], s0, s0, v9, v[1:2]
-; GFX10-NEXT:    v_mad_u64_u32 v[5:6], s0, s2, v10, v[3:4]
+; GFX10-NEXT:    v_mad_u64_u32 v[4:5], s0, s7, v9, v[1:2]
+; GFX10-NEXT:    v_mad_u64_u32 v[5:6], s0, s8, v10, v[3:4]
 ; GFX10-NEXT:    v_mul_lo_u32 v6, v9, v0
-; GFX10-NEXT:    v_mad_u64_u32 v[3:4], s0, s1, v7, v[4:5]
+; GFX10-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x0
+; GFX10-NEXT:    v_mad_u64_u32 v[3:4], s0, s20, v7, v[4:5]
 ; GFX10-NEXT:    v_mul_hi_u32 v4, v7, v0
-; GFX10-NEXT:    v_mad_u64_u32 v[0:1], s0, s3, v8, v[5:6]
+; GFX10-NEXT:    v_mad_u64_u32 v[0:1], s0, s21, v8, v[5:6]
 ; GFX10-NEXT:    v_mul_lo_u32 v1, v10, v2
 ; GFX10-NEXT:    v_mul_hi_u32 v5, v8, v2
 ; GFX10-NEXT:    v_mul_hi_u32 v2, v10, v2
@@ -1611,156 +1510,142 @@ define amdgpu_kernel void @udivrem_v2i64(ptr addrspace(1) %out0, ptr addrspace(1
 ; GFX10-NEXT:    v_mul_hi_u32 v17, v8, v0
 ; GFX10-NEXT:    v_mul_hi_u32 v3, v9, v3
 ; GFX10-NEXT:    v_mul_hi_u32 v0, v10, v0
-; GFX10-NEXT:    v_add_co_u32 v6, s0, v6, v12
-; GFX10-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s0
-; GFX10-NEXT:    v_add_co_u32 v11, s0, v13, v11
-; GFX10-NEXT:    v_cndmask_b32_e64 v13, 0, 1, s0
-; GFX10-NEXT:    v_add_co_u32 v1, s0, v1, v15
-; GFX10-NEXT:    v_cndmask_b32_e64 v15, 0, 1, s0
-; GFX10-NEXT:    v_add_co_u32 v2, s0, v16, v2
-; GFX10-NEXT:    v_cndmask_b32_e64 v16, 0, 1, s0
-; GFX10-NEXT:    v_add_co_u32 v4, s0, v6, v4
-; GFX10-NEXT:    v_cndmask_b32_e64 v4, 0, 1, s0
-; GFX10-NEXT:    v_add_co_u32 v6, s0, v11, v14
-; GFX10-NEXT:    v_cndmask_b32_e64 v11, 0, 1, s0
-; GFX10-NEXT:    v_add_co_u32 v1, s0, v1, v5
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, 0, 1, s0
-; GFX10-NEXT:    v_add_nc_u32_e32 v4, v12, v4
-; GFX10-NEXT:    v_add_co_u32 v2, s0, v2, v17
-; GFX10-NEXT:    v_cndmask_b32_e64 v5, 0, 1, s0
-; GFX10-NEXT:    v_add_nc_u32_e32 v1, v15, v1
-; GFX10-NEXT:    v_add_co_u32 v4, s0, v6, v4
-; GFX10-NEXT:    v_add_nc_u32_e32 v11, v13, v11
-; GFX10-NEXT:    v_cndmask_b32_e64 v6, 0, 1, s0
-; GFX10-NEXT:    v_add_co_u32 v1, s0, v2, v1
-; GFX10-NEXT:    v_add_nc_u32_e32 v5, v16, v5
-; GFX10-NEXT:    v_cndmask_b32_e64 v2, 0, 1, s0
-; GFX10-NEXT:    v_add3_u32 v3, v11, v6, v3
-; GFX10-NEXT:    v_add_co_u32 v4, vcc_lo, v7, v4
-; GFX10-NEXT:    v_add3_u32 v0, v5, v2, v0
-; GFX10-NEXT:    v_add_co_ci_u32_e32 v2, vcc_lo, v9, v3, vcc_lo
+; GFX10-NEXT:    v_add_co_u32 v6, vcc_lo, v6, v12
+; GFX10-NEXT:    v_add_co_u32 v1, s2, v1, v15
+; GFX10-NEXT:    v_add_co_ci_u32_e32 v4, vcc_lo, v6, v4, vcc_lo
+; GFX10-NEXT:    v_add_co_u32 v4, s0, v13, v11
+; GFX10-NEXT:    v_add_co_ci_u32_e64 v1, s2, v1, v5, s2
+; GFX10-NEXT:    v_add_co_u32 v1, s3, v16, v2
+; GFX10-NEXT:    v_add_co_ci_u32_e64 v4, s1, v4, v14, s0
+; GFX10-NEXT:    s_mov_b32 null, 0
+; GFX10-NEXT:    v_add_co_ci_u32_e64 v2, s5, 0, 0, vcc_lo
+; GFX10-NEXT:    v_add_co_ci_u32_e64 v5, s1, 0, 0, s1
+; GFX10-NEXT:    v_add_co_ci_u32_e64 v1, s4, v1, v17, s3
+; GFX10-NEXT:    v_add_co_ci_u32_e64 v6, s1, 0, 0, s2
+; GFX10-NEXT:    v_add_co_ci_u32_e64 v2, vcc_lo, v4, v2, s0
+; GFX10-NEXT:    v_add_co_ci_u32_e64 v11, s1, 0, 0, s4
+; GFX10-NEXT:    v_add_co_ci_u32_e32 v4, vcc_lo, 0, v5, vcc_lo
+; GFX10-NEXT:    v_add_co_ci_u32_e64 v1, vcc_lo, v1, v6, s3
+; GFX10-NEXT:    v_add_co_ci_u32_e32 v5, vcc_lo, 0, v11, vcc_lo
+; GFX10-NEXT:    v_add_nc_u32_e32 v3, v3, v4
+; GFX10-NEXT:    v_add_co_u32 v2, vcc_lo, v7, v2
+; GFX10-NEXT:    v_add_nc_u32_e32 v0, v0, v5
+; GFX10-NEXT:    v_add_co_ci_u32_e32 v3, vcc_lo, v9, v3, vcc_lo
 ; GFX10-NEXT:    v_add_co_u32 v1, vcc_lo, v8, v1
-; GFX10-NEXT:    v_add_co_ci_u32_e32 v0, vcc_lo, v10, v0, vcc_lo
 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX10-NEXT:    v_mul_lo_u32 v3, s9, v4
-; GFX10-NEXT:    v_mul_lo_u32 v8, s8, v2
-; GFX10-NEXT:    v_mul_hi_u32 v5, s8, v4
-; GFX10-NEXT:    v_mul_hi_u32 v4, s9, v4
-; GFX10-NEXT:    v_mul_lo_u32 v9, s9, v2
-; GFX10-NEXT:    v_mul_lo_u32 v6, s11, v1
-; GFX10-NEXT:    v_mul_hi_u32 v10, s8, v2
-; GFX10-NEXT:    v_mul_hi_u32 v11, s9, v2
-; GFX10-NEXT:    v_mul_lo_u32 v2, s10, v0
-; GFX10-NEXT:    v_mul_hi_u32 v7, s10, v1
-; GFX10-NEXT:    v_mul_hi_u32 v1, s11, v1
-; GFX10-NEXT:    v_mul_lo_u32 v12, s11, v0
-; GFX10-NEXT:    v_mul_hi_u32 v13, s10, v0
-; GFX10-NEXT:    v_mul_hi_u32 v14, s11, v0
-; GFX10-NEXT:    v_add_co_u32 v0, s0, v3, v8
-; GFX10-NEXT:    v_cndmask_b32_e64 v3, 0, 1, s0
-; GFX10-NEXT:    v_add_co_u32 v4, s0, v9, v4
-; GFX10-NEXT:    v_cndmask_b32_e64 v8, 0, 1, s0
-; GFX10-NEXT:    v_add_co_u32 v2, s0, v6, v2
-; GFX10-NEXT:    v_cndmask_b32_e64 v6, 0, 1, s0
-; GFX10-NEXT:    v_add_co_u32 v1, s0, v12, v1
-; GFX10-NEXT:    v_cndmask_b32_e64 v9, 0, 1, s0
-; GFX10-NEXT:    v_add_co_u32 v0, s0, v0, v5
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
-; GFX10-NEXT:    v_add_co_u32 v4, s0, v4, v10
-; GFX10-NEXT:    v_cndmask_b32_e64 v5, 0, 1, s0
-; GFX10-NEXT:    v_add_co_u32 v2, s0, v2, v7
-; GFX10-NEXT:    v_cndmask_b32_e64 v2, 0, 1, s0
-; GFX10-NEXT:    v_add_nc_u32_e32 v0, v3, v0
-; GFX10-NEXT:    v_add_co_u32 v1, s0, v1, v13
-; GFX10-NEXT:    v_cndmask_b32_e64 v7, 0, 1, s0
-; GFX10-NEXT:    v_add_nc_u32_e32 v2, v6, v2
-; GFX10-NEXT:    v_add_nc_u32_e32 v5, v8, v5
-; GFX10-NEXT:    v_add_co_u32 v8, s0, v4, v0
-; GFX10-NEXT:    v_cndmask_b32_e64 v4, 0, 1, s0
-; GFX10-NEXT:    v_add_co_u32 v10, s0, v1, v2
-; GFX10-NEXT:    v_mad_u64_u32 v[0:1], s1, s12, v8, 0
-; GFX10-NEXT:    v_cndmask_b32_e64 v6, 0, 1, s0
-; GFX10-NEXT:    v_mad_u64_u32 v[2:3], s0, s14, v10, 0
-; GFX10-NEXT:    v_add_nc_u32_e32 v7, v9, v7
-; GFX10-NEXT:    v_add3_u32 v9, v5, v4, v11
-; GFX10-NEXT:    v_add_co_u32 v12, vcc_lo, v8, 1
+; GFX10-NEXT:    v_mul_lo_u32 v4, s13, v2
+; GFX10-NEXT:    v_add_co_ci_u32_e32 v6, vcc_lo, v10, v0, vcc_lo
+; GFX10-NEXT:    v_mul_lo_u32 v0, s12, v3
+; GFX10-NEXT:    v_mul_hi_u32 v5, s12, v2
+; GFX10-NEXT:    v_mul_hi_u32 v2, s13, v2
+; GFX10-NEXT:    v_mul_lo_u32 v7, s15, v1
+; GFX10-NEXT:    v_mul_hi_u32 v8, s14, v1
+; GFX10-NEXT:    v_mul_hi_u32 v9, s15, v1
+; GFX10-NEXT:    v_mul_lo_u32 v1, s13, v3
+; GFX10-NEXT:    v_mul_hi_u32 v10, s12, v3
+; GFX10-NEXT:    v_add_co_u32 v0, vcc_lo, v4, v0
+; GFX10-NEXT:    v_mul_lo_u32 v11, s14, v6
+; GFX10-NEXT:    v_mul_hi_u32 v3, s13, v3
+; GFX10-NEXT:    v_add_co_ci_u32_e32 v0, vcc_lo, v0, v5, vcc_lo
+; GFX10-NEXT:    v_add_co_u32 v0, s0, v1, v2
+; GFX10-NEXT:    v_add_co_ci_u32_e64 v1, s2, 0, 0, vcc_lo
+; GFX10-NEXT:    v_mul_lo_u32 v2, s15, v6
+; GFX10-NEXT:    v_add_co_ci_u32_e64 v0, s1, v0, v10, s0
+; GFX10-NEXT:    v_add_co_ci_u32_e64 v4, s1, 0, 0, s1
+; GFX10-NEXT:    v_mul_hi_u32 v5, s14, v6
+; GFX10-NEXT:    v_add_co_ci_u32_e64 v10, vcc_lo, v0, v1, s0
+; GFX10-NEXT:    v_add_co_ci_u32_e32 v4, vcc_lo, 0, v4, vcc_lo
+; GFX10-NEXT:    v_add_co_u32 v7, vcc_lo, v7, v11
+; GFX10-NEXT:    v_mad_u64_u32 v[0:1], s0, s16, v10, 0
+; GFX10-NEXT:    v_add_co_u32 v2, s0, v2, v9
+; GFX10-NEXT:    v_add_co_ci_u32_e32 v7, vcc_lo, v7, v8, vcc_lo
+; GFX10-NEXT:    v_add_nc_u32_e32 v7, v3, v4
+; GFX10-NEXT:    v_add_co_ci_u32_e64 v5, s1, v2, v5, s0
+; GFX10-NEXT:    v_add_co_ci_u32_e64 v3, s2, 0, 0, vcc_lo
+; GFX10-NEXT:    v_add_co_ci_u32_e64 v4, s1, 0, 0, s1
+; GFX10-NEXT:    v_mad_u64_u32 v[1:2], s1, s16, v7, v[1:2]
+; GFX10-NEXT:    v_add_co_ci_u32_e64 v8, vcc_lo, v5, v3, s0
+; GFX10-NEXT:    v_mul_hi_u32 v6, s15, v6
+; GFX10-NEXT:    v_add_co_ci_u32_e32 v9, vcc_lo, 0, v4, vcc_lo
+; GFX10-NEXT:    v_mad_u64_u32 v[2:3], s0, s18, v8, 0
 ; GFX10-NEXT:    v_mov_b32_e32 v11, 0
-; GFX10-NEXT:    v_add3_u32 v7, v7, v6, v14
-; GFX10-NEXT:    v_mad_u64_u32 v[4:5], s0, s12, v9, v[1:2]
-; GFX10-NEXT:    v_add_co_ci_u32_e32 v13, vcc_lo, 0, v9, vcc_lo
-; GFX10-NEXT:    v_mad_u64_u32 v[5:6], s0, s14, v7, v[3:4]
-; GFX10-NEXT:    v_mad_u64_u32 v[3:4], s0, s13, v8, v[4:5]
-; GFX10-NEXT:    v_add_co_u32 v4, vcc_lo, v12, 1
-; GFX10-NEXT:    v_add_co_ci_u32_e32 v6, vcc_lo, 0, v13, vcc_lo
-; GFX10-NEXT:    v_sub_co_u32 v14, vcc_lo, s8, v0
-; GFX10-NEXT:    v_mad_u64_u32 v[0:1], s0, s15, v10, v[5:6]
-; GFX10-NEXT:    v_sub_co_ci_u32_e64 v5, s0, s9, v3, vcc_lo
-; GFX10-NEXT:    v_cmp_le_u32_e64 s0, s12, v14
-; GFX10-NEXT:    v_sub_nc_u32_e32 v1, s9, v3
-; GFX10-NEXT:    v_cndmask_b32_e64 v3, 0, -1, s0
-; GFX10-NEXT:    v_sub_co_u32 v15, s0, s10, v2
-; GFX10-NEXT:    v_subrev_co_ci_u32_e32 v1, vcc_lo, s13, v1, vcc_lo
-; GFX10-NEXT:    v_sub_co_ci_u32_e64 v16, s1, s11, v0, s0
-; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, s14, v15
-; GFX10-NEXT:    v_sub_nc_u32_e32 v0, s11, v0
-; GFX10-NEXT:    v_cndmask_b32_e64 v2, 0, -1, vcc_lo
-; GFX10-NEXT:    v_sub_co_u32 v17, vcc_lo, v14, s12
-; GFX10-NEXT:    v_subrev_co_ci_u32_e64 v18, s1, 0, v1, vcc_lo
-; GFX10-NEXT:    v_cmp_le_u32_e64 s1, s13, v5
-; GFX10-NEXT:    v_subrev_co_ci_u32_e64 v23, s0, s15, v0, s0
-; GFX10-NEXT:    v_cmp_eq_u32_e64 s0, s13, v18
-; GFX10-NEXT:    v_subrev_co_ci_u32_e32 v1, vcc_lo, s13, v1, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v19, 0, -1, s1
-; GFX10-NEXT:    v_cmp_le_u32_e64 s1, s12, v17
-; GFX10-NEXT:    v_cndmask_b32_e64 v20, 0, -1, s1
-; GFX10-NEXT:    v_cmp_le_u32_e64 s1, s13, v18
-; GFX10-NEXT:    v_cndmask_b32_e64 v21, 0, -1, s1
-; GFX10-NEXT:    v_cmp_le_u32_e64 s1, s15, v16
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, v21, v20, s0
-; GFX10-NEXT:    v_cmp_eq_u32_e64 s0, s13, v5
-; GFX10-NEXT:    v_cndmask_b32_e64 v22, 0, -1, s1
-; GFX10-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
-; GFX10-NEXT:    v_cndmask_b32_e64 v3, v19, v3, s0
-; GFX10-NEXT:    v_sub_co_u32 v0, s0, v17, s12
-; GFX10-NEXT:    v_subrev_co_ci_u32_e64 v19, s0, 0, v1, s0
-; GFX10-NEXT:    v_cndmask_b32_e32 v1, v12, v4, vcc_lo
-; GFX10-NEXT:    v_cmp_ne_u32_e64 s0, 0, v3
-; GFX10-NEXT:    v_cndmask_b32_e32 v3, v13, v6, vcc_lo
-; GFX10-NEXT:    v_sub_co_u32 v6, s1, v15, s14
-; GFX10-NEXT:    v_cndmask_b32_e32 v4, v17, v0, vcc_lo
-; GFX10-NEXT:    v_subrev_co_ci_u32_e64 v12, s2, 0, v23, s1
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, v8, v1, s0
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v9, v3, s0
-; GFX10-NEXT:    v_cndmask_b32_e32 v3, v18, v19, vcc_lo
-; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, s15, v16
-; GFX10-NEXT:    v_cndmask_b32_e64 v4, v14, v4, s0
-; GFX10-NEXT:    v_cndmask_b32_e64 v5, v5, v3, s0
-; GFX10-NEXT:    v_cndmask_b32_e32 v2, v22, v2, vcc_lo
-; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, s15, v12
-; GFX10-NEXT:    v_cndmask_b32_e64 v8, 0, -1, vcc_lo
-; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, s14, v6
-; GFX10-NEXT:    v_cndmask_b32_e64 v9, 0, -1, vcc_lo
-; GFX10-NEXT:    v_add_co_u32 v13, vcc_lo, v10, 1
-; GFX10-NEXT:    v_add_co_ci_u32_e32 v14, vcc_lo, 0, v7, vcc_lo
-; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, s15, v12
-; GFX10-NEXT:    v_cndmask_b32_e32 v8, v8, v9, vcc_lo
-; GFX10-NEXT:    v_add_co_u32 v9, vcc_lo, v13, 1
-; GFX10-NEXT:    v_add_co_ci_u32_e32 v17, vcc_lo, 0, v14, vcc_lo
-; GFX10-NEXT:    v_subrev_co_ci_u32_e64 v18, vcc_lo, s15, v23, s1
-; GFX10-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v8
-; GFX10-NEXT:    v_sub_co_u32 v8, s1, v6, s14
-; GFX10-NEXT:    v_subrev_co_ci_u32_e64 v18, s1, 0, v18, s1
-; GFX10-NEXT:    v_cndmask_b32_e32 v9, v13, v9, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e32 v13, v14, v17, vcc_lo
-; GFX10-NEXT:    v_cmp_ne_u32_e64 s1, 0, v2
-; GFX10-NEXT:    v_cndmask_b32_e32 v6, v6, v8, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e32 v8, v12, v18, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v2, v10, v9, s1
-; GFX10-NEXT:    v_cndmask_b32_e64 v3, v7, v13, s1
-; GFX10-NEXT:    v_cndmask_b32_e64 v6, v15, v6, s1
-; GFX10-NEXT:    v_cndmask_b32_e64 v7, v16, v8, s1
-; GFX10-NEXT:    global_store_dwordx4 v11, v[0:3], s[4:5]
-; GFX10-NEXT:    global_store_dwordx4 v11, v[4:7], s[6:7]
+; GFX10-NEXT:    v_add_nc_u32_e32 v6, v6, v9
+; GFX10-NEXT:    v_mad_u64_u32 v[4:5], s0, s17, v10, v[1:2]
+; GFX10-NEXT:    v_add_co_u32 v5, vcc_lo, v10, 1
+; GFX10-NEXT:    v_add_co_ci_u32_e32 v9, vcc_lo, 0, v7, vcc_lo
+; GFX10-NEXT:    v_mov_b32_e32 v1, v3
+; GFX10-NEXT:    v_sub_co_u32 v12, vcc_lo, s12, v0
+; GFX10-NEXT:    v_sub_nc_u32_e32 v3, s13, v4
+; GFX10-NEXT:    v_sub_co_ci_u32_e64 v13, s0, s13, v4, vcc_lo
+; GFX10-NEXT:    v_subrev_co_ci_u32_e32 v3, vcc_lo, s17, v3, vcc_lo
+; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, s16, v12
+; GFX10-NEXT:    v_cndmask_b32_e64 v4, 0, -1, vcc_lo
+; GFX10-NEXT:    v_sub_co_u32 v14, vcc_lo, v12, s16
+; GFX10-NEXT:    v_subrev_co_ci_u32_e64 v15, s0, 0, v3, vcc_lo
+; GFX10-NEXT:    v_cmp_le_u32_e64 s0, s17, v13
+; GFX10-NEXT:    v_cndmask_b32_e64 v16, 0, -1, s0
+; GFX10-NEXT:    v_cmp_le_u32_e64 s0, s16, v14
+; GFX10-NEXT:    v_cndmask_b32_e64 v17, 0, -1, s0
+; GFX10-NEXT:    v_cmp_le_u32_e64 s0, s17, v15
+; GFX10-NEXT:    v_cndmask_b32_e64 v18, 0, -1, s0
+; GFX10-NEXT:    v_mad_u64_u32 v[0:1], s0, s18, v6, v[1:2]
+; GFX10-NEXT:    v_cmp_eq_u32_e64 s0, s17, v13
+; GFX10-NEXT:    v_cndmask_b32_e64 v1, v16, v4, s0
+; GFX10-NEXT:    v_cmp_eq_u32_e64 s0, s17, v15
+; GFX10-NEXT:    v_cndmask_b32_e64 v4, v18, v17, s0
+; GFX10-NEXT:    v_add_co_u32 v16, s0, v5, 1
+; GFX10-NEXT:    v_subrev_co_ci_u32_e32 v18, vcc_lo, s17, v3, vcc_lo
+; GFX10-NEXT:    v_add_co_ci_u32_e64 v17, s0, 0, v9, s0
+; GFX10-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v4
+; GFX10-NEXT:    v_mad_u64_u32 v[3:4], s0, s19, v8, v[0:1]
+; GFX10-NEXT:    v_sub_co_u32 v4, s0, v14, s16
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v5, v16, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v5, v9, v17, vcc_lo
+; GFX10-NEXT:    v_sub_co_u32 v9, s1, s14, v2
+; GFX10-NEXT:    v_sub_nc_u32_e32 v2, s15, v3
+; GFX10-NEXT:    v_sub_co_ci_u32_e64 v16, s2, s15, v3, s1
+; GFX10-NEXT:    v_subrev_co_ci_u32_e64 v18, s0, 0, v18, s0
+; GFX10-NEXT:    v_subrev_co_ci_u32_e64 v2, s1, s19, v2, s1
+; GFX10-NEXT:    v_cmp_ne_u32_e64 s0, 0, v1
+; GFX10-NEXT:    v_cmp_le_u32_e64 s1, s18, v9
+; GFX10-NEXT:    v_cmp_le_u32_e64 s2, s19, v16
+; GFX10-NEXT:    v_cndmask_b32_e32 v3, v14, v4, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v15, v15, v18, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e64 v0, v10, v0, s0
+; GFX10-NEXT:    v_cndmask_b32_e64 v1, v7, v5, s0
+; GFX10-NEXT:    v_cndmask_b32_e64 v7, 0, -1, s1
+; GFX10-NEXT:    v_sub_co_u32 v10, s1, v9, s18
+; GFX10-NEXT:    v_cndmask_b32_e64 v5, 0, -1, s2
+; GFX10-NEXT:    v_subrev_co_ci_u32_e64 v14, s2, 0, v2, s1
+; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, s19, v16
+; GFX10-NEXT:    v_cndmask_b32_e64 v4, v12, v3, s0
+; GFX10-NEXT:    v_cndmask_b32_e32 v3, v5, v7, vcc_lo
+; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, s19, v14
+; GFX10-NEXT:    v_cndmask_b32_e64 v5, 0, -1, vcc_lo
+; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, s18, v10
+; GFX10-NEXT:    v_cndmask_b32_e64 v7, 0, -1, vcc_lo
+; GFX10-NEXT:    v_add_co_u32 v12, vcc_lo, v8, 1
+; GFX10-NEXT:    v_add_co_ci_u32_e32 v17, vcc_lo, 0, v6, vcc_lo
+; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, s19, v14
+; GFX10-NEXT:    v_cndmask_b32_e32 v5, v5, v7, vcc_lo
+; GFX10-NEXT:    v_add_co_u32 v7, vcc_lo, v12, 1
+; GFX10-NEXT:    v_add_co_ci_u32_e32 v18, vcc_lo, 0, v17, vcc_lo
+; GFX10-NEXT:    v_subrev_co_ci_u32_e64 v2, vcc_lo, s19, v2, s1
+; GFX10-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v5
+; GFX10-NEXT:    v_sub_co_u32 v5, s1, v10, s18
+; GFX10-NEXT:    v_subrev_co_ci_u32_e64 v2, s1, 0, v2, s1
+; GFX10-NEXT:    v_cndmask_b32_e32 v7, v12, v7, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v12, v17, v18, vcc_lo
+; GFX10-NEXT:    v_cmp_ne_u32_e64 s1, 0, v3
+; GFX10-NEXT:    v_cndmask_b32_e32 v10, v10, v5, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v14, v14, v2, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e64 v5, v13, v15, s0
+; GFX10-NEXT:    v_cndmask_b32_e64 v2, v8, v7, s1
+; GFX10-NEXT:    v_cndmask_b32_e64 v3, v6, v12, s1
+; GFX10-NEXT:    v_cndmask_b32_e64 v6, v9, v10, s1
+; GFX10-NEXT:    v_cndmask_b32_e64 v7, v16, v14, s1
+; GFX10-NEXT:    global_store_dwordx4 v11, v[0:3], s[8:9]
+; GFX10-NEXT:    global_store_dwordx4 v11, v[4:7], s[10:11]
 ; GFX10-NEXT:    s_endpgm
   %div = udiv <2 x i64> %x, %y
   store <2 x i64> %div, ptr addrspace(1) %out0
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll
index cc0f7e2ca5a54..4810540d4453b 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll
@@ -16,10 +16,10 @@ define i64 @v_urem_i64(i64 %num, i64 %den) {
 ; CHECK-NEXT:    v_cvt_f32_u32_e32 v6, v2
 ; CHECK-NEXT:    ; implicit-def: $vgpr0_vgpr1
 ; CHECK-NEXT:    s_and_saveexec_b64 s[4:5], vcc
-; CHECK-NEXT:    s_xor_b64 s[6:7], exec, s[4:5]
+; CHECK-NEXT:    s_xor_b64 s[8:9], exec, s[4:5]
 ; CHECK-NEXT:    s_cbranch_execnz .LBB0_3
 ; CHECK-NEXT:  ; %bb.1: ; %Flow
-; CHECK-NEXT:    s_andn2_saveexec_b64 s[4:5], s[6:7]
+; CHECK-NEXT:    s_andn2_saveexec_b64 s[4:5], s[8:9]
 ; CHECK-NEXT:    s_cbranch_execnz .LBB0_4
 ; CHECK-NEXT:  .LBB0_2:
 ; CHECK-NEXT:    s_or_b64 exec, exec, s[4:5]
@@ -50,18 +50,13 @@ define i64 @v_urem_i64(i64 %num, i64 %den) {
 ; CHECK-NEXT:    v_mul_hi_u32 v14, v0, v8
 ; CHECK-NEXT:    v_mul_hi_u32 v8, v6, v8
 ; CHECK-NEXT:    v_add_i32_e32 v10, vcc, v10, v11
-; CHECK-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v9, vcc, v13, v9
-; CHECK-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v10, vcc, v10, v12
-; CHECK-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v9, vcc, v9, v14
-; CHECK-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v10, vcc, v11, v10
-; CHECK-NEXT:    v_add_i32_e32 v11, vcc, v13, v12
-; CHECK-NEXT:    v_add_i32_e32 v9, vcc, v9, v10
-; CHECK-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v10, vcc, v11, v10
+; CHECK-NEXT:    v_addc_u32_e32 v10, vcc, v10, v12, vcc
+; CHECK-NEXT:    v_add_i32_e64 v9, s[4:5], v13, v9
+; CHECK-NEXT:    v_addc_u32_e64 v9, s[6:7], v9, v14, s[4:5]
+; CHECK-NEXT:    v_addc_u32_e64 v10, s[10:11], 0, 0, vcc
+; CHECK-NEXT:    v_addc_u32_e64 v11, s[6:7], 0, 0, s[6:7]
+; CHECK-NEXT:    v_addc_u32_e64 v9, vcc, v9, v10, s[4:5]
+; CHECK-NEXT:    v_addc_u32_e32 v10, vcc, 0, v11, vcc
 ; CHECK-NEXT:    v_add_i32_e32 v8, vcc, v8, v10
 ; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v9
 ; CHECK-NEXT:    v_addc_u32_e32 v6, vcc, v6, v8, vcc
@@ -79,18 +74,13 @@ define i64 @v_urem_i64(i64 %num, i64 %den) {
 ; CHECK-NEXT:    v_mul_hi_u32 v12, v0, v1
 ; CHECK-NEXT:    v_mul_hi_u32 v1, v6, v1
 ; CHECK-NEXT:    v_add_i32_e32 v7, vcc, v10, v7
-; CHECK-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v8, vcc, v9, v8
-; CHECK-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v7, vcc, v7, v11
-; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v8, vcc, v8, v12
-; CHECK-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v7, vcc, v10, v7
-; CHECK-NEXT:    v_add_i32_e32 v9, vcc, v9, v11
-; CHECK-NEXT:    v_add_i32_e32 v7, vcc, v8, v7
-; CHECK-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v8, vcc, v9, v8
+; CHECK-NEXT:    v_addc_u32_e32 v7, vcc, v7, v11, vcc
+; CHECK-NEXT:    v_add_i32_e64 v7, s[4:5], v9, v8
+; CHECK-NEXT:    v_addc_u32_e64 v7, s[6:7], v7, v12, s[4:5]
+; CHECK-NEXT:    v_addc_u32_e64 v8, s[10:11], 0, 0, vcc
+; CHECK-NEXT:    v_addc_u32_e64 v9, s[6:7], 0, 0, s[6:7]
+; CHECK-NEXT:    v_addc_u32_e64 v7, vcc, v7, v8, s[4:5]
+; CHECK-NEXT:    v_addc_u32_e32 v8, vcc, 0, v9, vcc
 ; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v1, v8
 ; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v7
 ; CHECK-NEXT:    v_addc_u32_e32 v1, vcc, v6, v1, vcc
@@ -102,26 +92,21 @@ define i64 @v_urem_i64(i64 %num, i64 %den) {
 ; CHECK-NEXT:    v_mul_hi_u32 v10, v4, v1
 ; CHECK-NEXT:    v_mul_hi_u32 v1, v5, v1
 ; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v6, v8
-; CHECK-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v9, v0
-; CHECK-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v6, v7
-; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v10
-; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v8, v6
-; CHECK-NEXT:    v_add_i32_e32 v7, vcc, v9, v7
-; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v6
-; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v7, v6
-; CHECK-NEXT:    v_mul_lo_u32 v7, v2, v0
-; CHECK-NEXT:    v_mul_lo_u32 v8, v3, v0
-; CHECK-NEXT:    v_mul_hi_u32 v0, v2, v0
+; CHECK-NEXT:    v_addc_u32_e32 v6, vcc, v6, v7, vcc
+; CHECK-NEXT:    v_add_i32_e64 v0, s[4:5], v9, v0
+; CHECK-NEXT:    v_addc_u32_e64 v0, s[6:7], v0, v10, s[4:5]
+; CHECK-NEXT:    v_addc_u32_e64 v6, s[10:11], 0, 0, vcc
+; CHECK-NEXT:    v_addc_u32_e64 v7, s[6:7], 0, 0, s[6:7]
+; CHECK-NEXT:    v_addc_u32_e64 v0, vcc, v0, v6, s[4:5]
+; CHECK-NEXT:    v_addc_u32_e32 v6, vcc, 0, v7, vcc
 ; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v1, v6
+; CHECK-NEXT:    v_mul_lo_u32 v6, v2, v0
+; CHECK-NEXT:    v_mul_lo_u32 v7, v3, v0
+; CHECK-NEXT:    v_mul_hi_u32 v0, v2, v0
 ; CHECK-NEXT:    v_mul_lo_u32 v1, v2, v1
-; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v8, v1
+; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v7, v1
 ; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v1, v0
-; CHECK-NEXT:    v_sub_i32_e32 v1, vcc, v4, v7
+; CHECK-NEXT:    v_sub_i32_e32 v1, vcc, v4, v6
 ; CHECK-NEXT:    v_subb_u32_e64 v4, s[4:5], v5, v0, vcc
 ; CHECK-NEXT:    v_sub_i32_e64 v0, s[4:5], v5, v0
 ; CHECK-NEXT:    v_cmp_ge_u32_e64 s[4:5], v1, v2
@@ -151,7 +136,7 @@ define i64 @v_urem_i64(i64 %num, i64 %den) {
 ; CHECK-NEXT:    ; implicit-def: $vgpr6
 ; CHECK-NEXT:    ; implicit-def: $vgpr2
 ; CHECK-NEXT:    ; implicit-def: $vgpr4
-; CHECK-NEXT:    s_andn2_saveexec_b64 s[4:5], s[6:7]
+; CHECK-NEXT:    s_andn2_saveexec_b64 s[4:5], s[8:9]
 ; CHECK-NEXT:    s_cbranch_execz .LBB0_2
 ; CHECK-NEXT:  .LBB0_4:
 ; CHECK-NEXT:    v_rcp_iflag_f32_e32 v0, v6
@@ -194,10 +179,10 @@ define amdgpu_ps i64 @s_urem_i64(i64 inreg %num, i64 inreg %den) {
 ; CHECK-NEXT:  ; %bb.1:
 ; CHECK-NEXT:    v_mov_b32_e32 v0, s3
 ; CHECK-NEXT:    v_cvt_f32_u32_e32 v1, s3
-; CHECK-NEXT:    s_sub_u32 s4, 0, s2
+; CHECK-NEXT:    s_sub_u32 s10, 0, s2
 ; CHECK-NEXT:    v_mov_b32_e32 v3, s1
 ; CHECK-NEXT:    v_madmk_f32 v1, v1, 0x4f800000, v2
-; CHECK-NEXT:    s_subb_u32 s5, 0, s3
+; CHECK-NEXT:    s_subb_u32 s11, 0, s3
 ; CHECK-NEXT:    v_rcp_iflag_f32_e32 v1, v1
 ; CHECK-NEXT:    v_mul_f32_e32 v1, 0x5f7ffffc, v1
 ; CHECK-NEXT:    v_mul_f32_e32 v4, 0x2f800000, v1
@@ -205,10 +190,10 @@ define amdgpu_ps i64 @s_urem_i64(i64 inreg %num, i64 inreg %den) {
 ; CHECK-NEXT:    v_mac_f32_e32 v1, 0xcf800000, v4
 ; CHECK-NEXT:    v_cvt_u32_f32_e32 v4, v4
 ; CHECK-NEXT:    v_cvt_u32_f32_e32 v1, v1
-; CHECK-NEXT:    v_mul_lo_u32 v5, s4, v4
-; CHECK-NEXT:    v_mul_lo_u32 v6, s4, v1
-; CHECK-NEXT:    v_mul_lo_u32 v7, s5, v1
-; CHECK-NEXT:    v_mul_hi_u32 v8, s4, v1
+; CHECK-NEXT:    v_mul_lo_u32 v5, s10, v4
+; CHECK-NEXT:    v_mul_lo_u32 v6, s10, v1
+; CHECK-NEXT:    v_mul_lo_u32 v7, s11, v1
+; CHECK-NEXT:    v_mul_hi_u32 v8, s10, v1
 ; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v7, v5
 ; CHECK-NEXT:    v_mul_lo_u32 v7, v4, v6
 ; CHECK-NEXT:    v_mul_hi_u32 v9, v1, v6
@@ -219,25 +204,20 @@ define amdgpu_ps i64 @s_urem_i64(i64 inreg %num, i64 inreg %den) {
 ; CHECK-NEXT:    v_mul_hi_u32 v11, v1, v5
 ; CHECK-NEXT:    v_mul_hi_u32 v5, v4, v5
 ; CHECK-NEXT:    v_add_i32_e32 v7, vcc, v7, v8
-; CHECK-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v10, v6
-; CHECK-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v7, vcc, v7, v9
-; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v6, v11
-; CHECK-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v7, vcc, v8, v7
-; CHECK-NEXT:    v_add_i32_e32 v8, vcc, v10, v9
-; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v6, v7
-; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v7, vcc, v8, v7
+; CHECK-NEXT:    v_addc_u32_e32 v7, vcc, v7, v9, vcc
+; CHECK-NEXT:    v_add_i32_e64 v6, s[4:5], v10, v6
+; CHECK-NEXT:    v_addc_u32_e64 v6, s[6:7], v6, v11, s[4:5]
+; CHECK-NEXT:    v_addc_u32_e64 v7, s[8:9], 0, 0, vcc
+; CHECK-NEXT:    v_addc_u32_e64 v8, s[6:7], 0, 0, s[6:7]
+; CHECK-NEXT:    v_addc_u32_e64 v6, vcc, v6, v7, s[4:5]
+; CHECK-NEXT:    v_addc_u32_e32 v7, vcc, 0, v8, vcc
 ; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v5, v7
 ; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v1, v6
 ; CHECK-NEXT:    v_addc_u32_e32 v4, vcc, v4, v5, vcc
-; CHECK-NEXT:    v_mul_lo_u32 v5, s4, v1
-; CHECK-NEXT:    v_mul_lo_u32 v6, s5, v1
-; CHECK-NEXT:    v_mul_hi_u32 v7, s4, v1
-; CHECK-NEXT:    v_mul_lo_u32 v8, s4, v4
+; CHECK-NEXT:    v_mul_lo_u32 v5, s10, v1
+; CHECK-NEXT:    v_mul_lo_u32 v6, s11, v1
+; CHECK-NEXT:    v_mul_hi_u32 v7, s10, v1
+; CHECK-NEXT:    v_mul_lo_u32 v8, s10, v4
 ; CHECK-NEXT:    v_mul_lo_u32 v9, v4, v5
 ; CHECK-NEXT:    v_mul_hi_u32 v10, v1, v5
 ; CHECK-NEXT:    v_mul_hi_u32 v5, v4, v5
@@ -248,18 +228,13 @@ define amdgpu_ps i64 @s_urem_i64(i64 inreg %num, i64 inreg %den) {
 ; CHECK-NEXT:    v_mul_hi_u32 v11, v1, v6
 ; CHECK-NEXT:    v_mul_hi_u32 v6, v4, v6
 ; CHECK-NEXT:    v_add_i32_e32 v7, vcc, v9, v7
-; CHECK-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v8, v5
-; CHECK-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v7, vcc, v7, v10
-; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v5, v11
-; CHECK-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v7, vcc, v9, v7
-; CHECK-NEXT:    v_add_i32_e32 v8, vcc, v8, v10
-; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v5, v7
-; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v7, vcc, v8, v7
+; CHECK-NEXT:    v_addc_u32_e32 v7, vcc, v7, v10, vcc
+; CHECK-NEXT:    v_add_i32_e64 v5, s[4:5], v8, v5
+; CHECK-NEXT:    v_addc_u32_e64 v5, s[6:7], v5, v11, s[4:5]
+; CHECK-NEXT:    v_addc_u32_e64 v7, s[8:9], 0, 0, vcc
+; CHECK-NEXT:    v_addc_u32_e64 v8, s[6:7], 0, 0, s[6:7]
+; CHECK-NEXT:    v_addc_u32_e64 v5, vcc, v5, v7, s[4:5]
+; CHECK-NEXT:    v_addc_u32_e32 v7, vcc, 0, v8, vcc
 ; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v6, v7
 ; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v1, v5
 ; CHECK-NEXT:    v_addc_u32_e32 v4, vcc, v4, v6, vcc
@@ -271,26 +246,21 @@ define amdgpu_ps i64 @s_urem_i64(i64 inreg %num, i64 inreg %den) {
 ; CHECK-NEXT:    v_mul_hi_u32 v9, s0, v4
 ; CHECK-NEXT:    v_mul_hi_u32 v4, s1, v4
 ; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v5, v7
-; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v8, v1
-; CHECK-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v5, v6
-; CHECK-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v1, v9
-; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v7, v5
-; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v8, v6
-; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v1, v5
-; CHECK-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v6, v5
-; CHECK-NEXT:    v_mul_lo_u32 v6, s2, v1
-; CHECK-NEXT:    v_mul_lo_u32 v7, s3, v1
-; CHECK-NEXT:    v_mul_hi_u32 v1, s2, v1
+; CHECK-NEXT:    v_addc_u32_e32 v5, vcc, v5, v6, vcc
+; CHECK-NEXT:    v_add_i32_e64 v1, s[4:5], v8, v1
+; CHECK-NEXT:    v_addc_u32_e64 v1, s[6:7], v1, v9, s[4:5]
+; CHECK-NEXT:    v_addc_u32_e64 v5, s[8:9], 0, 0, vcc
+; CHECK-NEXT:    v_addc_u32_e64 v6, s[6:7], 0, 0, s[6:7]
+; CHECK-NEXT:    v_addc_u32_e64 v1, vcc, v1, v5, s[4:5]
+; CHECK-NEXT:    v_addc_u32_e32 v5, vcc, 0, v6, vcc
 ; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v4, v5
+; CHECK-NEXT:    v_mul_lo_u32 v5, s2, v1
+; CHECK-NEXT:    v_mul_lo_u32 v6, s3, v1
+; CHECK-NEXT:    v_mul_hi_u32 v1, s2, v1
 ; CHECK-NEXT:    v_mul_lo_u32 v4, s2, v4
-; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v7, v4
+; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v6, v4
 ; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v4, v1
-; CHECK-NEXT:    v_sub_i32_e32 v4, vcc, s0, v6
+; CHECK-NEXT:    v_sub_i32_e32 v4, vcc, s0, v5
 ; CHECK-NEXT:    v_subb_u32_e64 v3, s[4:5], v3, v1, vcc
 ; CHECK-NEXT:    v_sub_i32_e64 v1, s[4:5], s1, v1
 ; CHECK-NEXT:    v_cmp_le_u32_e64 s[4:5], s2, v4
@@ -362,229 +332,199 @@ define <2 x i64> @v_urem_v2i64(<2 x i64> %num, <2 x i64> %den) {
 ; GISEL-NEXT:    v_cvt_f32_u32_e32 v10, v4
 ; GISEL-NEXT:    v_cvt_f32_u32_e32 v11, v5
 ; GISEL-NEXT:    v_sub_i32_e32 v8, vcc, 0, v4
-; GISEL-NEXT:    v_cvt_f32_u32_e32 v13, v6
-; GISEL-NEXT:    v_cvt_f32_u32_e32 v14, v7
+; GISEL-NEXT:    v_cvt_f32_u32_e32 v12, v6
+; GISEL-NEXT:    v_cvt_f32_u32_e32 v13, v7
 ; GISEL-NEXT:    v_sub_i32_e64 v9, s[4:5], 0, v6
-; GISEL-NEXT:    v_subb_u32_e32 v15, vcc, 0, v5, vcc
-; GISEL-NEXT:    v_subb_u32_e64 v12, vcc, 0, v7, s[4:5]
+; GISEL-NEXT:    v_subb_u32_e32 v14, vcc, 0, v5, vcc
+; GISEL-NEXT:    v_subb_u32_e64 v15, vcc, 0, v7, s[4:5]
 ; GISEL-NEXT:    v_mac_f32_e32 v10, 0x4f800000, v11
-; GISEL-NEXT:    v_mac_f32_e32 v13, 0x4f800000, v14
+; GISEL-NEXT:    v_mac_f32_e32 v12, 0x4f800000, v13
 ; GISEL-NEXT:    v_rcp_iflag_f32_e32 v10, v10
-; GISEL-NEXT:    v_rcp_iflag_f32_e32 v11, v13
+; GISEL-NEXT:    v_rcp_iflag_f32_e32 v11, v12
 ; GISEL-NEXT:    v_mul_f32_e32 v10, 0x5f7ffffc, v10
 ; GISEL-NEXT:    v_mul_f32_e32 v11, 0x5f7ffffc, v11
-; GISEL-NEXT:    v_mul_f32_e32 v13, 0x2f800000, v10
-; GISEL-NEXT:    v_mul_f32_e32 v14, 0x2f800000, v11
+; GISEL-NEXT:    v_mul_f32_e32 v12, 0x2f800000, v10
+; GISEL-NEXT:    v_mul_f32_e32 v13, 0x2f800000, v11
+; GISEL-NEXT:    v_trunc_f32_e32 v12, v12
 ; GISEL-NEXT:    v_trunc_f32_e32 v13, v13
-; GISEL-NEXT:    v_trunc_f32_e32 v14, v14
-; GISEL-NEXT:    v_mac_f32_e32 v10, 0xcf800000, v13
+; GISEL-NEXT:    v_mac_f32_e32 v10, 0xcf800000, v12
+; GISEL-NEXT:    v_cvt_u32_f32_e32 v12, v12
+; GISEL-NEXT:    v_mac_f32_e32 v11, 0xcf800000, v13
 ; GISEL-NEXT:    v_cvt_u32_f32_e32 v13, v13
-; GISEL-NEXT:    v_mac_f32_e32 v11, 0xcf800000, v14
-; GISEL-NEXT:    v_cvt_u32_f32_e32 v14, v14
 ; GISEL-NEXT:    v_cvt_u32_f32_e32 v10, v10
-; GISEL-NEXT:    v_mul_lo_u32 v16, v8, v13
+; GISEL-NEXT:    v_mul_lo_u32 v16, v8, v12
 ; GISEL-NEXT:    v_cvt_u32_f32_e32 v11, v11
-; GISEL-NEXT:    v_mul_lo_u32 v17, v9, v14
-; GISEL-NEXT:    v_mul_lo_u32 v18, v9, v11
-; GISEL-NEXT:    v_mul_lo_u32 v19, v12, v11
-; GISEL-NEXT:    v_mul_hi_u32 v20, v9, v11
-; GISEL-NEXT:    v_add_i32_e32 v17, vcc, v19, v17
-; GISEL-NEXT:    v_mul_lo_u32 v19, v14, v18
-; GISEL-NEXT:    v_add_i32_e32 v17, vcc, v17, v20
-; GISEL-NEXT:    v_mul_lo_u32 v20, v11, v17
-; GISEL-NEXT:    v_add_i32_e32 v19, vcc, v19, v20
-; GISEL-NEXT:    v_mul_hi_u32 v20, v11, v18
-; GISEL-NEXT:    v_add_i32_e64 v19, s[4:5], v19, v20
-; GISEL-NEXT:    v_mul_lo_u32 v19, v8, v10
-; GISEL-NEXT:    v_mul_lo_u32 v20, v15, v10
-; GISEL-NEXT:    v_add_i32_e64 v16, s[6:7], v20, v16
+; GISEL-NEXT:    v_mul_lo_u32 v17, v9, v13
+; GISEL-NEXT:    v_mul_lo_u32 v18, v8, v10
+; GISEL-NEXT:    v_mul_lo_u32 v19, v14, v10
 ; GISEL-NEXT:    v_mul_hi_u32 v20, v8, v10
-; GISEL-NEXT:    v_add_i32_e64 v16, s[6:7], v16, v20
+; GISEL-NEXT:    v_add_i32_e32 v16, vcc, v19, v16
+; GISEL-NEXT:    v_mul_lo_u32 v19, v12, v18
+; GISEL-NEXT:    v_add_i32_e32 v16, vcc, v16, v20
+; GISEL-NEXT:    v_mul_lo_u32 v20, v10, v16
+; GISEL-NEXT:    v_add_i32_e32 v19, vcc, v19, v20
+; GISEL-NEXT:    v_mul_hi_u32 v20, v10, v18
+; GISEL-NEXT:    v_addc_u32_e32 v19, vcc, v19, v20, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v19, v9, v11
+; GISEL-NEXT:    v_mul_lo_u32 v20, v15, v11
+; GISEL-NEXT:    v_add_i32_e64 v17, s[4:5], v20, v17
+; GISEL-NEXT:    v_mul_hi_u32 v20, v9, v11
+; GISEL-NEXT:    v_add_i32_e64 v17, s[4:5], v17, v20
 ; GISEL-NEXT:    v_mul_lo_u32 v20, v13, v19
-; GISEL-NEXT:    v_mul_lo_u32 v21, v10, v16
-; GISEL-NEXT:    v_add_i32_e64 v20, s[6:7], v20, v21
-; GISEL-NEXT:    v_mul_hi_u32 v21, v10, v19
-; GISEL-NEXT:    v_add_i32_e64 v20, s[8:9], v20, v21
+; GISEL-NEXT:    v_mul_lo_u32 v21, v11, v17
+; GISEL-NEXT:    v_add_i32_e64 v20, s[4:5], v20, v21
+; GISEL-NEXT:    v_mul_hi_u32 v21, v11, v19
+; GISEL-NEXT:    v_addc_u32_e64 v20, s[4:5], v20, v21, s[4:5]
+; GISEL-NEXT:    v_mul_hi_u32 v18, v12, v18
 ; GISEL-NEXT:    v_mul_hi_u32 v19, v13, v19
-; GISEL-NEXT:    v_mul_hi_u32 v18, v14, v18
-; GISEL-NEXT:    v_mul_lo_u32 v20, v13, v16
-; GISEL-NEXT:    v_add_i32_e64 v19, s[10:11], v20, v19
-; GISEL-NEXT:    v_mul_lo_u32 v20, v14, v17
-; GISEL-NEXT:    v_add_i32_e64 v18, s[12:13], v20, v18
+; GISEL-NEXT:    v_mul_lo_u32 v20, v12, v16
+; GISEL-NEXT:    v_add_i32_e64 v18, s[6:7], v20, v18
+; GISEL-NEXT:    v_mul_lo_u32 v20, v13, v17
+; GISEL-NEXT:    v_add_i32_e64 v19, s[8:9], v20, v19
 ; GISEL-NEXT:    v_mul_hi_u32 v20, v10, v16
-; GISEL-NEXT:    v_add_i32_e64 v19, s[14:15], v19, v20
+; GISEL-NEXT:    v_mul_hi_u32 v16, v12, v16
+; GISEL-NEXT:    v_addc_u32_e64 v18, s[10:11], v18, v20, s[6:7]
 ; GISEL-NEXT:    v_mul_hi_u32 v20, v11, v17
-; GISEL-NEXT:    v_add_i32_e64 v18, s[16:17], v18, v20
-; GISEL-NEXT:    v_cndmask_b32_e64 v20, 0, 1, s[6:7]
-; GISEL-NEXT:    v_cndmask_b32_e64 v21, 0, 1, s[8:9]
-; GISEL-NEXT:    v_add_i32_e64 v20, s[6:7], v20, v21
-; GISEL-NEXT:    v_cndmask_b32_e64 v21, 0, 1, s[10:11]
-; GISEL-NEXT:    v_cndmask_b32_e64 v22, 0, 1, s[14:15]
-; GISEL-NEXT:    v_add_i32_e64 v21, s[6:7], v21, v22
-; GISEL-NEXT:    v_cndmask_b32_e64 v22, 0, 1, vcc
-; GISEL-NEXT:    v_cndmask_b32_e64 v23, 0, 1, s[4:5]
-; GISEL-NEXT:    v_add_i32_e32 v22, vcc, v22, v23
-; GISEL-NEXT:    v_cndmask_b32_e64 v23, 0, 1, s[12:13]
-; GISEL-NEXT:    v_cndmask_b32_e64 v24, 0, 1, s[16:17]
-; GISEL-NEXT:    v_add_i32_e32 v23, vcc, v23, v24
-; GISEL-NEXT:    v_add_i32_e32 v19, vcc, v19, v20
-; GISEL-NEXT:    v_add_i32_e64 v18, s[4:5], v18, v22
-; GISEL-NEXT:    v_add_i32_e64 v10, s[6:7], v10, v19
-; GISEL-NEXT:    v_mul_hi_u32 v16, v13, v16
-; GISEL-NEXT:    v_mul_hi_u32 v17, v14, v17
-; GISEL-NEXT:    v_add_i32_e64 v11, s[8:9], v11, v18
-; GISEL-NEXT:    v_cndmask_b32_e64 v18, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v18, vcc, v21, v18
-; GISEL-NEXT:    v_cndmask_b32_e64 v19, 0, 1, s[4:5]
-; GISEL-NEXT:    v_add_i32_e32 v19, vcc, v23, v19
-; GISEL-NEXT:    v_mul_lo_u32 v20, v8, v10
-; GISEL-NEXT:    v_mul_lo_u32 v15, v15, v10
-; GISEL-NEXT:    v_add_i32_e32 v16, vcc, v16, v18
-; GISEL-NEXT:    v_mul_hi_u32 v18, v8, v10
-; GISEL-NEXT:    v_add_i32_e32 v17, vcc, v17, v19
-; GISEL-NEXT:    v_mul_lo_u32 v19, v9, v11
-; GISEL-NEXT:    v_mul_lo_u32 v12, v12, v11
-; GISEL-NEXT:    v_addc_u32_e64 v13, vcc, v13, v16, s[6:7]
+; GISEL-NEXT:    v_mul_hi_u32 v17, v13, v17
+; GISEL-NEXT:    v_addc_u32_e64 v19, s[12:13], v19, v20, s[8:9]
+; GISEL-NEXT:    v_addc_u32_e64 v20, s[14:15], 0, 0, vcc
+; GISEL-NEXT:    v_addc_u32_e64 v18, vcc, v18, v20, s[6:7]
+; GISEL-NEXT:    v_addc_u32_e64 v20, s[4:5], 0, 0, s[4:5]
+; GISEL-NEXT:    v_addc_u32_e64 v19, s[4:5], v19, v20, s[8:9]
+; GISEL-NEXT:    v_addc_u32_e64 v20, s[6:7], 0, 0, s[10:11]
+; GISEL-NEXT:    v_add_i32_e64 v10, s[6:7], v10, v18
+; GISEL-NEXT:    v_addc_u32_e64 v18, s[8:9], 0, 0, s[12:13]
+; GISEL-NEXT:    v_addc_u32_e32 v20, vcc, 0, v20, vcc
+; GISEL-NEXT:    v_addc_u32_e64 v18, vcc, 0, v18, s[4:5]
+; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v11, v19
+; GISEL-NEXT:    v_add_i32_e64 v16, s[4:5], v16, v20
+; GISEL-NEXT:    v_add_i32_e64 v17, s[4:5], v17, v18
+; GISEL-NEXT:    v_mul_lo_u32 v18, v8, v10
+; GISEL-NEXT:    v_mul_lo_u32 v14, v14, v10
+; GISEL-NEXT:    v_mul_hi_u32 v19, v8, v10
+; GISEL-NEXT:    v_mul_lo_u32 v20, v9, v11
+; GISEL-NEXT:    v_mul_lo_u32 v15, v15, v11
+; GISEL-NEXT:    v_addc_u32_e64 v12, s[4:5], v12, v16, s[6:7]
 ; GISEL-NEXT:    v_mul_hi_u32 v16, v9, v11
-; GISEL-NEXT:    v_addc_u32_e64 v14, vcc, v14, v17, s[8:9]
-; GISEL-NEXT:    v_mul_hi_u32 v17, v10, v20
-; GISEL-NEXT:    v_mul_lo_u32 v8, v8, v13
-; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v15, v8
-; GISEL-NEXT:    v_mul_hi_u32 v15, v11, v19
-; GISEL-NEXT:    v_mul_lo_u32 v9, v9, v14
-; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v12, v9
-; GISEL-NEXT:    v_mul_lo_u32 v12, v13, v20
+; GISEL-NEXT:    v_addc_u32_e32 v13, vcc, v13, v17, vcc
+; GISEL-NEXT:    v_mul_hi_u32 v17, v10, v18
+; GISEL-NEXT:    v_mul_lo_u32 v8, v8, v12
+; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v14, v8
+; GISEL-NEXT:    v_mul_hi_u32 v14, v11, v20
+; GISEL-NEXT:    v_mul_lo_u32 v9, v9, v13
+; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v15, v9
+; GISEL-NEXT:    v_mul_lo_u32 v15, v12, v18
+; GISEL-NEXT:    v_mul_hi_u32 v18, v12, v18
+; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v19
+; GISEL-NEXT:    v_mul_lo_u32 v19, v13, v20
 ; GISEL-NEXT:    v_mul_hi_u32 v20, v13, v20
-; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v18
-; GISEL-NEXT:    v_mul_lo_u32 v18, v14, v19
-; GISEL-NEXT:    v_mul_hi_u32 v19, v14, v19
 ; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v16
 ; GISEL-NEXT:    v_mul_lo_u32 v16, v10, v8
-; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v12, v16
-; GISEL-NEXT:    v_mul_lo_u32 v16, v13, v8
-; GISEL-NEXT:    v_add_i32_e64 v12, s[4:5], v12, v17
-; GISEL-NEXT:    v_mul_hi_u32 v12, v10, v8
-; GISEL-NEXT:    v_mul_hi_u32 v8, v13, v8
+; GISEL-NEXT:    v_add_i32_e32 v15, vcc, v15, v16
+; GISEL-NEXT:    v_mul_lo_u32 v16, v12, v8
+; GISEL-NEXT:    v_addc_u32_e32 v15, vcc, v15, v17, vcc
+; GISEL-NEXT:    v_mul_hi_u32 v15, v10, v8
+; GISEL-NEXT:    v_mul_hi_u32 v8, v12, v8
 ; GISEL-NEXT:    v_mul_lo_u32 v17, v11, v9
-; GISEL-NEXT:    v_add_i32_e64 v16, s[6:7], v16, v20
-; GISEL-NEXT:    v_mul_lo_u32 v20, v14, v9
-; GISEL-NEXT:    v_add_i32_e64 v17, s[8:9], v18, v17
-; GISEL-NEXT:    v_mul_hi_u32 v18, v11, v9
-; GISEL-NEXT:    v_mul_hi_u32 v9, v14, v9
-; GISEL-NEXT:    v_add_i32_e64 v19, s[10:11], v20, v19
-; GISEL-NEXT:    v_cndmask_b32_e64 v20, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v15, vcc, v17, v15
-; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, s[6:7]
-; GISEL-NEXT:    v_cndmask_b32_e64 v17, 0, 1, s[8:9]
-; GISEL-NEXT:    v_add_i32_e64 v12, s[6:7], v16, v12
-; GISEL-NEXT:    v_cndmask_b32_e64 v16, 0, 1, s[10:11]
-; GISEL-NEXT:    v_add_i32_e64 v18, s[8:9], v19, v18
-; GISEL-NEXT:    v_cndmask_b32_e64 v19, 0, 1, s[4:5]
-; GISEL-NEXT:    v_add_i32_e64 v19, s[4:5], v20, v19
-; GISEL-NEXT:    v_cndmask_b32_e64 v20, 0, 1, s[6:7]
-; GISEL-NEXT:    v_add_i32_e64 v15, s[4:5], v15, v20
-; GISEL-NEXT:    v_cndmask_b32_e64 v20, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v17, vcc, v17, v20
-; GISEL-NEXT:    v_cndmask_b32_e64 v20, 0, 1, s[8:9]
-; GISEL-NEXT:    v_add_i32_e32 v16, vcc, v16, v20
-; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v12, v19
-; GISEL-NEXT:    v_add_i32_e64 v17, s[4:5], v18, v17
-; GISEL-NEXT:    v_cndmask_b32_e64 v18, 0, 1, vcc
-; GISEL-NEXT:    v_cndmask_b32_e64 v19, 0, 1, s[4:5]
-; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v10, v12
-; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v17
-; GISEL-NEXT:    v_add_i32_e64 v12, s[6:7], v15, v18
-; GISEL-NEXT:    v_add_i32_e64 v15, s[6:7], v16, v19
-; GISEL-NEXT:    v_mul_lo_u32 v16, v1, v10
-; GISEL-NEXT:    v_mul_hi_u32 v17, v0, v10
+; GISEL-NEXT:    v_add_i32_e64 v16, s[4:5], v16, v18
+; GISEL-NEXT:    v_mul_lo_u32 v18, v13, v9
+; GISEL-NEXT:    v_add_i32_e64 v17, s[6:7], v19, v17
+; GISEL-NEXT:    v_mul_hi_u32 v19, v11, v9
+; GISEL-NEXT:    v_mul_hi_u32 v9, v13, v9
+; GISEL-NEXT:    v_add_i32_e64 v18, s[8:9], v18, v20
+; GISEL-NEXT:    v_addc_u32_e64 v15, s[10:11], v16, v15, s[4:5]
+; GISEL-NEXT:    v_addc_u32_e64 v14, s[6:7], v17, v14, s[6:7]
+; GISEL-NEXT:    v_addc_u32_e64 v14, s[12:13], v18, v19, s[8:9]
+; GISEL-NEXT:    v_addc_u32_e64 v16, s[14:15], 0, 0, vcc
+; GISEL-NEXT:    v_addc_u32_e64 v17, s[10:11], 0, 0, s[10:11]
+; GISEL-NEXT:    v_addc_u32_e64 v18, s[6:7], 0, 0, s[6:7]
+; GISEL-NEXT:    v_addc_u32_e64 v19, s[6:7], 0, 0, s[12:13]
+; GISEL-NEXT:    v_addc_u32_e64 v15, vcc, v15, v16, s[4:5]
+; GISEL-NEXT:    v_addc_u32_e64 v14, s[4:5], v14, v18, s[8:9]
+; GISEL-NEXT:    v_addc_u32_e32 v16, vcc, 0, v17, vcc
+; GISEL-NEXT:    v_addc_u32_e64 v17, vcc, 0, v19, s[4:5]
+; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v10, v15
+; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v14
+; GISEL-NEXT:    v_add_i32_e64 v8, s[6:7], v8, v16
+; GISEL-NEXT:    v_add_i32_e64 v9, s[6:7], v9, v17
+; GISEL-NEXT:    v_mul_lo_u32 v14, v1, v10
+; GISEL-NEXT:    v_mul_hi_u32 v15, v0, v10
 ; GISEL-NEXT:    v_mul_hi_u32 v10, v1, v10
-; GISEL-NEXT:    v_mul_lo_u32 v18, v3, v11
-; GISEL-NEXT:    v_mul_hi_u32 v19, v2, v11
+; GISEL-NEXT:    v_mul_lo_u32 v16, v3, v11
+; GISEL-NEXT:    v_mul_hi_u32 v17, v2, v11
 ; GISEL-NEXT:    v_mul_hi_u32 v11, v3, v11
-; GISEL-NEXT:    v_add_i32_e64 v8, s[6:7], v8, v12
-; GISEL-NEXT:    v_add_i32_e64 v9, s[6:7], v9, v15
-; GISEL-NEXT:    v_addc_u32_e32 v8, vcc, v13, v8, vcc
-; GISEL-NEXT:    v_addc_u32_e64 v9, vcc, v14, v9, s[4:5]
+; GISEL-NEXT:    v_addc_u32_e32 v8, vcc, v12, v8, vcc
+; GISEL-NEXT:    v_addc_u32_e64 v9, vcc, v13, v9, s[4:5]
 ; GISEL-NEXT:    v_mul_lo_u32 v12, v0, v8
 ; GISEL-NEXT:    v_mul_lo_u32 v13, v1, v8
-; GISEL-NEXT:    v_mul_hi_u32 v14, v0, v8
+; GISEL-NEXT:    v_mul_hi_u32 v18, v0, v8
 ; GISEL-NEXT:    v_mul_hi_u32 v8, v1, v8
-; GISEL-NEXT:    v_mul_lo_u32 v15, v2, v9
+; GISEL-NEXT:    v_mul_lo_u32 v19, v2, v9
 ; GISEL-NEXT:    v_mul_lo_u32 v20, v3, v9
-; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v16, v12
-; GISEL-NEXT:    v_mul_hi_u32 v16, v2, v9
+; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v14, v12
+; GISEL-NEXT:    v_mul_hi_u32 v14, v2, v9
 ; GISEL-NEXT:    v_mul_hi_u32 v9, v3, v9
 ; GISEL-NEXT:    v_add_i32_e64 v10, s[4:5], v13, v10
-; GISEL-NEXT:    v_add_i32_e64 v13, s[6:7], v18, v15
+; GISEL-NEXT:    v_add_i32_e64 v13, s[6:7], v16, v19
 ; GISEL-NEXT:    v_add_i32_e64 v11, s[8:9], v20, v11
-; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
-; GISEL-NEXT:    v_cndmask_b32_e64 v18, 0, 1, s[4:5]
-; GISEL-NEXT:    v_cndmask_b32_e64 v20, 0, 1, s[6:7]
-; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v12, v17
-; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[8:9]
-; GISEL-NEXT:    v_add_i32_e64 v10, s[4:5], v10, v14
-; GISEL-NEXT:    v_add_i32_e64 v13, s[6:7], v13, v19
-; GISEL-NEXT:    v_add_i32_e64 v11, s[8:9], v11, v16
-; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
-; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, s[4:5]
-; GISEL-NEXT:    v_cndmask_b32_e64 v16, 0, 1, s[6:7]
-; GISEL-NEXT:    v_cndmask_b32_e64 v17, 0, 1, s[8:9]
-; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v15, v13
-; GISEL-NEXT:    v_add_i32_e32 v14, vcc, v18, v14
-; GISEL-NEXT:    v_add_i32_e32 v15, vcc, v20, v16
-; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v12, v17
-; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v10, v13
-; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v15
-; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
-; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, s[4:5]
-; GISEL-NEXT:    v_mul_lo_u32 v16, v4, v10
-; GISEL-NEXT:    v_mul_lo_u32 v17, v5, v10
+; GISEL-NEXT:    v_addc_u32_e32 v12, vcc, v12, v15, vcc
+; GISEL-NEXT:    v_addc_u32_e64 v10, s[10:11], v10, v18, s[4:5]
+; GISEL-NEXT:    v_addc_u32_e64 v12, s[6:7], v13, v17, s[6:7]
+; GISEL-NEXT:    v_addc_u32_e64 v11, s[12:13], v11, v14, s[8:9]
+; GISEL-NEXT:    v_addc_u32_e64 v12, s[14:15], 0, 0, vcc
+; GISEL-NEXT:    v_addc_u32_e64 v13, s[10:11], 0, 0, s[10:11]
+; GISEL-NEXT:    v_addc_u32_e64 v14, s[6:7], 0, 0, s[6:7]
+; GISEL-NEXT:    v_addc_u32_e64 v15, s[6:7], 0, 0, s[12:13]
+; GISEL-NEXT:    v_addc_u32_e64 v10, vcc, v10, v12, s[4:5]
+; GISEL-NEXT:    v_addc_u32_e64 v11, s[4:5], v11, v14, s[8:9]
+; GISEL-NEXT:    v_addc_u32_e32 v12, vcc, 0, v13, vcc
+; GISEL-NEXT:    v_addc_u32_e64 v13, vcc, 0, v15, s[4:5]
+; GISEL-NEXT:    v_mul_lo_u32 v14, v4, v10
+; GISEL-NEXT:    v_mul_lo_u32 v15, v5, v10
 ; GISEL-NEXT:    v_mul_hi_u32 v10, v4, v10
-; GISEL-NEXT:    v_mul_lo_u32 v18, v6, v11
-; GISEL-NEXT:    v_mul_lo_u32 v19, v7, v11
+; GISEL-NEXT:    v_mul_lo_u32 v16, v6, v11
+; GISEL-NEXT:    v_mul_lo_u32 v17, v7, v11
 ; GISEL-NEXT:    v_mul_hi_u32 v11, v6, v11
-; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v14, v13
-; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v12, v15
-; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v16
-; GISEL-NEXT:    v_sub_i32_e64 v2, s[4:5], v2, v18
-; GISEL-NEXT:    v_add_i32_e64 v8, s[6:7], v8, v13
-; GISEL-NEXT:    v_add_i32_e64 v9, s[6:7], v9, v12
+; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v12
+; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v13
+; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v14
+; GISEL-NEXT:    v_sub_i32_e64 v2, s[4:5], v2, v16
+; GISEL-NEXT:    v_mul_lo_u32 v8, v4, v8
+; GISEL-NEXT:    v_mul_lo_u32 v9, v6, v9
 ; GISEL-NEXT:    v_cmp_ge_u32_e64 s[6:7], v0, v4
 ; GISEL-NEXT:    v_cmp_ge_u32_e64 s[8:9], v2, v6
 ; GISEL-NEXT:    v_sub_i32_e64 v12, s[10:11], v0, v4
 ; GISEL-NEXT:    v_sub_i32_e64 v13, s[12:13], v2, v6
-; GISEL-NEXT:    v_mul_lo_u32 v8, v4, v8
-; GISEL-NEXT:    v_mul_lo_u32 v9, v6, v9
+; GISEL-NEXT:    v_add_i32_e64 v8, s[14:15], v15, v8
+; GISEL-NEXT:    v_add_i32_e64 v9, s[14:15], v17, v9
 ; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, -1, s[6:7]
 ; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, -1, s[8:9]
 ; GISEL-NEXT:    v_cmp_ge_u32_e64 s[6:7], v12, v4
 ; GISEL-NEXT:    v_cmp_ge_u32_e64 s[8:9], v13, v6
 ; GISEL-NEXT:    v_sub_i32_e64 v4, s[14:15], v12, v4
 ; GISEL-NEXT:    v_sub_i32_e64 v6, s[16:17], v13, v6
-; GISEL-NEXT:    v_add_i32_e64 v8, s[18:19], v17, v8
-; GISEL-NEXT:    v_add_i32_e64 v9, s[18:19], v19, v9
-; GISEL-NEXT:    v_cndmask_b32_e64 v16, 0, -1, s[6:7]
-; GISEL-NEXT:    v_cndmask_b32_e64 v17, 0, -1, s[8:9]
-; GISEL-NEXT:    v_add_i32_e64 v8, s[6:7], v8, v10
-; GISEL-NEXT:    v_add_i32_e64 v9, s[6:7], v9, v11
-; GISEL-NEXT:    v_subb_u32_e64 v10, s[6:7], v1, v8, vcc
+; GISEL-NEXT:    v_add_i32_e64 v8, s[18:19], v8, v10
+; GISEL-NEXT:    v_add_i32_e64 v9, s[18:19], v9, v11
+; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, -1, s[6:7]
+; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, -1, s[8:9]
+; GISEL-NEXT:    v_subb_u32_e64 v16, s[6:7], v1, v8, vcc
 ; GISEL-NEXT:    v_sub_i32_e64 v1, s[6:7], v1, v8
 ; GISEL-NEXT:    v_subb_u32_e64 v8, s[6:7], v3, v9, s[4:5]
 ; GISEL-NEXT:    v_sub_i32_e64 v3, s[6:7], v3, v9
-; GISEL-NEXT:    v_cmp_ge_u32_e64 s[6:7], v10, v5
+; GISEL-NEXT:    v_cmp_ge_u32_e64 s[6:7], v16, v5
 ; GISEL-NEXT:    v_subb_u32_e32 v1, vcc, v1, v5, vcc
 ; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v8, v7
 ; GISEL-NEXT:    v_subb_u32_e64 v3, s[4:5], v3, v7, s[4:5]
-; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v10, v5
+; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v16, v5
 ; GISEL-NEXT:    v_cmp_eq_u32_e64 s[8:9], v8, v7
 ; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, -1, s[6:7]
-; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, -1, vcc
+; GISEL-NEXT:    v_cndmask_b32_e64 v17, 0, -1, vcc
 ; GISEL-NEXT:    v_subbrev_u32_e64 v18, vcc, 0, v1, s[10:11]
 ; GISEL-NEXT:    v_subb_u32_e64 v1, vcc, v1, v5, s[10:11]
 ; GISEL-NEXT:    v_subbrev_u32_e64 v19, vcc, 0, v3, s[12:13]
 ; GISEL-NEXT:    v_subb_u32_e64 v3, vcc, v3, v7, s[12:13]
 ; GISEL-NEXT:    v_cndmask_b32_e64 v9, v9, v14, s[4:5]
-; GISEL-NEXT:    v_cndmask_b32_e64 v11, v11, v15, s[8:9]
+; GISEL-NEXT:    v_cndmask_b32_e64 v14, v17, v15, s[8:9]
 ; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v18, v5
 ; GISEL-NEXT:    v_subbrev_u32_e64 v1, s[4:5], 0, v1, s[14:15]
 ; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v19, v7
@@ -594,9 +534,9 @@ define <2 x i64> @v_urem_v2i64(<2 x i64> %num, <2 x i64> %den) {
 ; GISEL-NEXT:    v_cndmask_b32_e64 v5, 0, -1, vcc
 ; GISEL-NEXT:    v_cndmask_b32_e64 v7, 0, -1, s[4:5]
 ; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v9
-; GISEL-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, v11
-; GISEL-NEXT:    v_cndmask_b32_e64 v5, v5, v16, s[6:7]
-; GISEL-NEXT:    v_cndmask_b32_e64 v7, v7, v17, s[8:9]
+; GISEL-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, v14
+; GISEL-NEXT:    v_cndmask_b32_e64 v5, v5, v10, s[6:7]
+; GISEL-NEXT:    v_cndmask_b32_e64 v7, v7, v11, s[8:9]
 ; GISEL-NEXT:    v_cmp_ne_u32_e64 s[6:7], 0, v5
 ; GISEL-NEXT:    v_cmp_ne_u32_e64 s[8:9], 0, v7
 ; GISEL-NEXT:    v_cndmask_b32_e64 v4, v12, v4, s[6:7]
@@ -605,7 +545,7 @@ define <2 x i64> @v_urem_v2i64(<2 x i64> %num, <2 x i64> %den) {
 ; GISEL-NEXT:    v_cndmask_b32_e64 v3, v19, v3, s[8:9]
 ; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
 ; GISEL-NEXT:    v_cndmask_b32_e64 v2, v2, v5, s[4:5]
-; GISEL-NEXT:    v_cndmask_b32_e32 v1, v10, v1, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v1, v16, v1, vcc
 ; GISEL-NEXT:    v_cndmask_b32_e64 v3, v8, v3, s[4:5]
 ; GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -622,7 +562,7 @@ define <2 x i64> @v_urem_v2i64(<2 x i64> %num, <2 x i64> %den) {
 ; CGP-NEXT:    v_cvt_f32_u32_e32 v2, v4
 ; CGP-NEXT:    ; implicit-def: $vgpr0_vgpr1
 ; CGP-NEXT:    s_and_saveexec_b64 s[4:5], vcc
-; CGP-NEXT:    s_xor_b64 s[6:7], exec, s[4:5]
+; CGP-NEXT:    s_xor_b64 s[8:9], exec, s[4:5]
 ; CGP-NEXT:    s_cbranch_execz .LBB2_2
 ; CGP-NEXT:  ; %bb.1:
 ; CGP-NEXT:    v_cvt_f32_u32_e32 v0, v5
@@ -650,18 +590,13 @@ define <2 x i64> @v_urem_v2i64(<2 x i64> %num, <2 x i64> %den) {
 ; CGP-NEXT:    v_mul_hi_u32 v18, v0, v12
 ; CGP-NEXT:    v_mul_hi_u32 v12, v2, v12
 ; CGP-NEXT:    v_add_i32_e32 v14, vcc, v14, v15
-; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v13, vcc, v17, v13
-; CGP-NEXT:    v_cndmask_b32_e64 v17, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v14, vcc, v14, v16
-; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v13, vcc, v13, v18
-; CGP-NEXT:    v_cndmask_b32_e64 v16, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v14, vcc, v15, v14
-; CGP-NEXT:    v_add_i32_e32 v15, vcc, v17, v16
-; CGP-NEXT:    v_add_i32_e32 v13, vcc, v13, v14
-; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v14, vcc, v15, v14
+; CGP-NEXT:    v_addc_u32_e32 v14, vcc, v14, v16, vcc
+; CGP-NEXT:    v_add_i32_e64 v13, s[4:5], v17, v13
+; CGP-NEXT:    v_addc_u32_e64 v13, s[6:7], v13, v18, s[4:5]
+; CGP-NEXT:    v_addc_u32_e64 v14, s[10:11], 0, 0, vcc
+; CGP-NEXT:    v_addc_u32_e64 v15, s[6:7], 0, 0, s[6:7]
+; CGP-NEXT:    v_addc_u32_e64 v13, vcc, v13, v14, s[4:5]
+; CGP-NEXT:    v_addc_u32_e32 v14, vcc, 0, v15, vcc
 ; CGP-NEXT:    v_add_i32_e32 v12, vcc, v12, v14
 ; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v13
 ; CGP-NEXT:    v_addc_u32_e32 v2, vcc, v2, v12, vcc
@@ -679,18 +614,13 @@ define <2 x i64> @v_urem_v2i64(<2 x i64> %num, <2 x i64> %den) {
 ; CGP-NEXT:    v_mul_hi_u32 v16, v0, v1
 ; CGP-NEXT:    v_mul_hi_u32 v1, v2, v1
 ; CGP-NEXT:    v_add_i32_e32 v3, vcc, v14, v3
-; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v12, vcc, v13, v12
-; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v3, vcc, v3, v15
-; CGP-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v12, vcc, v12, v16
-; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v3, vcc, v14, v3
-; CGP-NEXT:    v_add_i32_e32 v13, vcc, v13, v15
-; CGP-NEXT:    v_add_i32_e32 v3, vcc, v12, v3
-; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v12, vcc, v13, v12
+; CGP-NEXT:    v_addc_u32_e32 v3, vcc, v3, v15, vcc
+; CGP-NEXT:    v_add_i32_e64 v3, s[4:5], v13, v12
+; CGP-NEXT:    v_addc_u32_e64 v3, s[6:7], v3, v16, s[4:5]
+; CGP-NEXT:    v_addc_u32_e64 v12, s[10:11], 0, 0, vcc
+; CGP-NEXT:    v_addc_u32_e64 v13, s[6:7], 0, 0, s[6:7]
+; CGP-NEXT:    v_addc_u32_e64 v3, vcc, v3, v12, s[4:5]
+; CGP-NEXT:    v_addc_u32_e32 v12, vcc, 0, v13, vcc
 ; CGP-NEXT:    v_add_i32_e32 v1, vcc, v1, v12
 ; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v3
 ; CGP-NEXT:    v_addc_u32_e32 v1, vcc, v2, v1, vcc
@@ -702,26 +632,21 @@ define <2 x i64> @v_urem_v2i64(<2 x i64> %num, <2 x i64> %den) {
 ; CGP-NEXT:    v_mul_hi_u32 v14, v10, v1
 ; CGP-NEXT:    v_mul_hi_u32 v1, v11, v1
 ; CGP-NEXT:    v_add_i32_e32 v2, vcc, v2, v12
-; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v0, vcc, v13, v0
-; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v2, vcc, v2, v3
-; CGP-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v14
-; CGP-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v2, vcc, v12, v2
-; CGP-NEXT:    v_add_i32_e32 v3, vcc, v13, v3
-; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
-; CGP-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v2, vcc, v3, v2
-; CGP-NEXT:    v_mul_lo_u32 v3, v4, v0
-; CGP-NEXT:    v_mul_lo_u32 v12, v5, v0
-; CGP-NEXT:    v_mul_hi_u32 v0, v4, v0
+; CGP-NEXT:    v_addc_u32_e32 v2, vcc, v2, v3, vcc
+; CGP-NEXT:    v_add_i32_e64 v0, s[4:5], v13, v0
+; CGP-NEXT:    v_addc_u32_e64 v0, s[6:7], v0, v14, s[4:5]
+; CGP-NEXT:    v_addc_u32_e64 v2, s[10:11], 0, 0, vcc
+; CGP-NEXT:    v_addc_u32_e64 v3, s[6:7], 0, 0, s[6:7]
+; CGP-NEXT:    v_addc_u32_e64 v0, vcc, v0, v2, s[4:5]
+; CGP-NEXT:    v_addc_u32_e32 v2, vcc, 0, v3, vcc
 ; CGP-NEXT:    v_add_i32_e32 v1, vcc, v1, v2
+; CGP-NEXT:    v_mul_lo_u32 v2, v4, v0
+; CGP-NEXT:    v_mul_lo_u32 v3, v5, v0
+; CGP-NEXT:    v_mul_hi_u32 v0, v4, v0
 ; CGP-NEXT:    v_mul_lo_u32 v1, v4, v1
-; CGP-NEXT:    v_add_i32_e32 v1, vcc, v12, v1
+; CGP-NEXT:    v_add_i32_e32 v1, vcc, v3, v1
 ; CGP-NEXT:    v_add_i32_e32 v0, vcc, v1, v0
-; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v10, v3
+; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v10, v2
 ; CGP-NEXT:    v_subb_u32_e64 v2, s[4:5], v11, v0, vcc
 ; CGP-NEXT:    v_sub_i32_e64 v0, s[4:5], v11, v0
 ; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v1, v4
@@ -752,7 +677,7 @@ define <2 x i64> @v_urem_v2i64(<2 x i64> %num, <2 x i64> %den) {
 ; CGP-NEXT:    ; implicit-def: $vgpr4
 ; CGP-NEXT:    ; implicit-def: $vgpr10
 ; CGP-NEXT:  .LBB2_2: ; %Flow1
-; CGP-NEXT:    s_andn2_saveexec_b64 s[4:5], s[6:7]
+; CGP-NEXT:    s_andn2_saveexec_b64 s[4:5], s[8:9]
 ; CGP-NEXT:    s_cbranch_execz .LBB2_4
 ; CGP-NEXT:  ; %bb.3:
 ; CGP-NEXT:    v_rcp_iflag_f32_e32 v0, v2
@@ -780,10 +705,10 @@ define <2 x i64> @v_urem_v2i64(<2 x i64> %num, <2 x i64> %den) {
 ; CGP-NEXT:    v_cvt_f32_u32_e32 v4, v6
 ; CGP-NEXT:    ; implicit-def: $vgpr2_vgpr3
 ; CGP-NEXT:    s_and_saveexec_b64 s[4:5], vcc
-; CGP-NEXT:    s_xor_b64 s[6:7], exec, s[4:5]
+; CGP-NEXT:    s_xor_b64 s[8:9], exec, s[4:5]
 ; CGP-NEXT:    s_cbranch_execnz .LBB2_7
 ; CGP-NEXT:  ; %bb.5: ; %Flow
-; CGP-NEXT:    s_andn2_saveexec_b64 s[4:5], s[6:7]
+; CGP-NEXT:    s_andn2_saveexec_b64 s[4:5], s[8:9]
 ; CGP-NEXT:    s_cbranch_execnz .LBB2_8
 ; CGP-NEXT:  .LBB2_6:
 ; CGP-NEXT:    s_or_b64 exec, exec, s[4:5]
@@ -814,18 +739,13 @@ define <2 x i64> @v_urem_v2i64(<2 x i64> %num, <2 x i64> %den) {
 ; CGP-NEXT:    v_mul_hi_u32 v16, v2, v10
 ; CGP-NEXT:    v_mul_hi_u32 v10, v4, v10
 ; CGP-NEXT:    v_add_i32_e32 v12, vcc, v12, v13
-; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v11, vcc, v15, v11
-; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v12, vcc, v12, v14
-; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v11, vcc, v11, v16
-; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v12, vcc, v13, v12
-; CGP-NEXT:    v_add_i32_e32 v13, vcc, v15, v14
-; CGP-NEXT:    v_add_i32_e32 v11, vcc, v11, v12
-; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v12, vcc, v13, v12
+; CGP-NEXT:    v_addc_u32_e32 v12, vcc, v12, v14, vcc
+; CGP-NEXT:    v_add_i32_e64 v11, s[4:5], v15, v11
+; CGP-NEXT:    v_addc_u32_e64 v11, s[6:7], v11, v16, s[4:5]
+; CGP-NEXT:    v_addc_u32_e64 v12, s[10:11], 0, 0, vcc
+; CGP-NEXT:    v_addc_u32_e64 v13, s[6:7], 0, 0, s[6:7]
+; CGP-NEXT:    v_addc_u32_e64 v11, vcc, v11, v12, s[4:5]
+; CGP-NEXT:    v_addc_u32_e32 v12, vcc, 0, v13, vcc
 ; CGP-NEXT:    v_add_i32_e32 v10, vcc, v10, v12
 ; CGP-NEXT:    v_add_i32_e32 v2, vcc, v2, v11
 ; CGP-NEXT:    v_addc_u32_e32 v4, vcc, v4, v10, vcc
@@ -843,18 +763,13 @@ define <2 x i64> @v_urem_v2i64(<2 x i64> %num, <2 x i64> %den) {
 ; CGP-NEXT:    v_mul_hi_u32 v14, v2, v3
 ; CGP-NEXT:    v_mul_hi_u32 v3, v4, v3
 ; CGP-NEXT:    v_add_i32_e32 v5, vcc, v12, v5
-; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v10, vcc, v11, v10
-; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v5, vcc, v5, v13
-; CGP-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v10, vcc, v10, v14
-; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v5, vcc, v12, v5
-; CGP-NEXT:    v_add_i32_e32 v11, vcc, v11, v13
-; CGP-NEXT:    v_add_i32_e32 v5, vcc, v10, v5
-; CGP-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v10, vcc, v11, v10
+; CGP-NEXT:    v_addc_u32_e32 v5, vcc, v5, v13, vcc
+; CGP-NEXT:    v_add_i32_e64 v5, s[4:5], v11, v10
+; CGP-NEXT:    v_addc_u32_e64 v5, s[6:7], v5, v14, s[4:5]
+; CGP-NEXT:    v_addc_u32_e64 v10, s[10:11], 0, 0, vcc
+; CGP-NEXT:    v_addc_u32_e64 v11, s[6:7], 0, 0, s[6:7]
+; CGP-NEXT:    v_addc_u32_e64 v5, vcc, v5, v10, s[4:5]
+; CGP-NEXT:    v_addc_u32_e32 v10, vcc, 0, v11, vcc
 ; CGP-NEXT:    v_add_i32_e32 v3, vcc, v3, v10
 ; CGP-NEXT:    v_add_i32_e32 v2, vcc, v2, v5
 ; CGP-NEXT:    v_addc_u32_e32 v3, vcc, v4, v3, vcc
@@ -866,26 +781,21 @@ define <2 x i64> @v_urem_v2i64(<2 x i64> %num, <2 x i64> %den) {
 ; CGP-NEXT:    v_mul_hi_u32 v12, v8, v3
 ; CGP-NEXT:    v_mul_hi_u32 v3, v9, v3
 ; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v10
-; CGP-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v2, vcc, v11, v2
-; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v5
-; CGP-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v2, vcc, v2, v12
-; CGP-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v4, vcc, v10, v4
-; CGP-NEXT:    v_add_i32_e32 v5, vcc, v11, v5
-; CGP-NEXT:    v_add_i32_e32 v2, vcc, v2, v4
-; CGP-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v4, vcc, v5, v4
-; CGP-NEXT:    v_mul_lo_u32 v5, v6, v2
-; CGP-NEXT:    v_mul_lo_u32 v10, v7, v2
-; CGP-NEXT:    v_mul_hi_u32 v2, v6, v2
+; CGP-NEXT:    v_addc_u32_e32 v4, vcc, v4, v5, vcc
+; CGP-NEXT:    v_add_i32_e64 v2, s[4:5], v11, v2
+; CGP-NEXT:    v_addc_u32_e64 v2, s[6:7], v2, v12, s[4:5]
+; CGP-NEXT:    v_addc_u32_e64 v4, s[10:11], 0, 0, vcc
+; CGP-NEXT:    v_addc_u32_e64 v5, s[6:7], 0, 0, s[6:7]
+; CGP-NEXT:    v_addc_u32_e64 v2, vcc, v2, v4, s[4:5]
+; CGP-NEXT:    v_addc_u32_e32 v4, vcc, 0, v5, vcc
 ; CGP-NEXT:    v_add_i32_e32 v3, vcc, v3, v4
+; CGP-NEXT:    v_mul_lo_u32 v4, v6, v2
+; CGP-NEXT:    v_mul_lo_u32 v5, v7, v2
+; CGP-NEXT:    v_mul_hi_u32 v2, v6, v2
 ; CGP-NEXT:    v_mul_lo_u32 v3, v6, v3
-; CGP-NEXT:    v_add_i32_e32 v3, vcc, v10, v3
+; CGP-NEXT:    v_add_i32_e32 v3, vcc, v5, v3
 ; CGP-NEXT:    v_add_i32_e32 v2, vcc, v3, v2
-; CGP-NEXT:    v_sub_i32_e32 v3, vcc, v8, v5
+; CGP-NEXT:    v_sub_i32_e32 v3, vcc, v8, v4
 ; CGP-NEXT:    v_subb_u32_e64 v4, s[4:5], v9, v2, vcc
 ; CGP-NEXT:    v_sub_i32_e64 v2, s[4:5], v9, v2
 ; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v3, v6
@@ -915,7 +825,7 @@ define <2 x i64> @v_urem_v2i64(<2 x i64> %num, <2 x i64> %den) {
 ; CGP-NEXT:    ; implicit-def: $vgpr4
 ; CGP-NEXT:    ; implicit-def: $vgpr6
 ; CGP-NEXT:    ; implicit-def: $vgpr8
-; CGP-NEXT:    s_andn2_saveexec_b64 s[4:5], s[6:7]
+; CGP-NEXT:    s_andn2_saveexec_b64 s[4:5], s[8:9]
 ; CGP-NEXT:    s_cbranch_execz .LBB2_6
 ; CGP-NEXT:  .LBB2_8:
 ; CGP-NEXT:    v_rcp_iflag_f32_e32 v2, v4
@@ -994,18 +904,13 @@ define i64 @v_urem_i64_oddk_denom(i64 %num) {
 ; CHECK-NEXT:    v_mul_hi_u32 v12, v3, v6
 ; CHECK-NEXT:    v_mul_hi_u32 v6, v4, v6
 ; CHECK-NEXT:    v_add_i32_e32 v8, vcc, v8, v10
-; CHECK-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v7, vcc, v11, v7
-; CHECK-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v8, vcc, v8, v9
-; CHECK-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v7, vcc, v7, v12
-; CHECK-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v8, vcc, v10, v8
-; CHECK-NEXT:    v_add_i32_e32 v9, vcc, v11, v9
-; CHECK-NEXT:    v_add_i32_e32 v7, vcc, v7, v8
-; CHECK-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v8, vcc, v9, v8
+; CHECK-NEXT:    v_addc_u32_e32 v8, vcc, v8, v9, vcc
+; CHECK-NEXT:    v_add_i32_e64 v7, s[4:5], v11, v7
+; CHECK-NEXT:    v_addc_u32_e64 v7, s[6:7], v7, v12, s[4:5]
+; CHECK-NEXT:    v_addc_u32_e64 v8, s[8:9], 0, 0, vcc
+; CHECK-NEXT:    v_addc_u32_e64 v9, s[6:7], 0, 0, s[6:7]
+; CHECK-NEXT:    v_addc_u32_e64 v7, vcc, v7, v8, s[4:5]
+; CHECK-NEXT:    v_addc_u32_e32 v8, vcc, 0, v9, vcc
 ; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v6, v8
 ; CHECK-NEXT:    v_add_i32_e32 v3, vcc, v3, v7
 ; CHECK-NEXT:    v_addc_u32_e32 v4, vcc, v4, v6, vcc
@@ -1022,18 +927,13 @@ define i64 @v_urem_i64_oddk_denom(i64 %num) {
 ; CHECK-NEXT:    v_mul_hi_u32 v11, v3, v5
 ; CHECK-NEXT:    v_mul_hi_u32 v5, v4, v5
 ; CHECK-NEXT:    v_add_i32_e32 v7, vcc, v8, v7
-; CHECK-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v10, v6
-; CHECK-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v7, vcc, v7, v9
-; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v6, v11
-; CHECK-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v7, vcc, v8, v7
-; CHECK-NEXT:    v_add_i32_e32 v8, vcc, v10, v9
-; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v6, v7
-; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v7, vcc, v8, v7
+; CHECK-NEXT:    v_addc_u32_e32 v7, vcc, v7, v9, vcc
+; CHECK-NEXT:    v_add_i32_e64 v6, s[4:5], v10, v6
+; CHECK-NEXT:    v_addc_u32_e64 v6, s[6:7], v6, v11, s[4:5]
+; CHECK-NEXT:    v_addc_u32_e64 v7, s[8:9], 0, 0, vcc
+; CHECK-NEXT:    v_addc_u32_e64 v8, s[6:7], 0, 0, s[6:7]
+; CHECK-NEXT:    v_addc_u32_e64 v6, vcc, v6, v7, s[4:5]
+; CHECK-NEXT:    v_addc_u32_e32 v7, vcc, 0, v8, vcc
 ; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v5, v7
 ; CHECK-NEXT:    v_add_i32_e32 v3, vcc, v3, v6
 ; CHECK-NEXT:    v_addc_u32_e32 v4, vcc, v4, v5, vcc
@@ -1045,24 +945,19 @@ define i64 @v_urem_i64_oddk_denom(i64 %num) {
 ; CHECK-NEXT:    v_mul_hi_u32 v9, v0, v4
 ; CHECK-NEXT:    v_mul_hi_u32 v4, v1, v4
 ; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v5, v7
-; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v3, vcc, v8, v3
-; CHECK-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v5, v6
-; CHECK-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v3, vcc, v3, v9
-; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v7, v5
-; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v8, v6
-; CHECK-NEXT:    v_add_i32_e32 v3, vcc, v3, v5
-; CHECK-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v6, v5
-; CHECK-NEXT:    v_mul_lo_u32 v6, v3, v2
-; CHECK-NEXT:    v_mul_hi_u32 v3, v3, v2
+; CHECK-NEXT:    v_addc_u32_e32 v5, vcc, v5, v6, vcc
+; CHECK-NEXT:    v_add_i32_e64 v3, s[4:5], v8, v3
+; CHECK-NEXT:    v_addc_u32_e64 v3, s[6:7], v3, v9, s[4:5]
+; CHECK-NEXT:    v_addc_u32_e64 v5, s[8:9], 0, 0, vcc
+; CHECK-NEXT:    v_addc_u32_e64 v6, s[6:7], 0, 0, s[6:7]
+; CHECK-NEXT:    v_addc_u32_e64 v3, vcc, v3, v5, s[4:5]
+; CHECK-NEXT:    v_addc_u32_e32 v5, vcc, 0, v6, vcc
 ; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v4, v5
+; CHECK-NEXT:    v_mul_lo_u32 v5, v3, v2
+; CHECK-NEXT:    v_mul_hi_u32 v3, v3, v2
 ; CHECK-NEXT:    v_mul_lo_u32 v4, v4, v2
 ; CHECK-NEXT:    v_add_i32_e32 v3, vcc, v4, v3
-; CHECK-NEXT:    v_sub_i32_e64 v0, s[4:5], v0, v6
+; CHECK-NEXT:    v_sub_i32_e64 v0, s[4:5], v0, v5
 ; CHECK-NEXT:    v_subb_u32_e64 v4, vcc, v1, v3, s[4:5]
 ; CHECK-NEXT:    v_sub_i32_e32 v1, vcc, v1, v3
 ; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v2
@@ -1097,26 +992,26 @@ define <2 x i64> @v_urem_v2i64_oddk_denom(<2 x i64> %num) {
 ; GISEL-NEXT:    v_mov_b32_e32 v4, 0x12d8fb
 ; GISEL-NEXT:    v_cvt_f32_u32_e32 v5, 0x12d8fb
 ; GISEL-NEXT:    v_cvt_f32_ubyte0_e32 v6, 0
-; GISEL-NEXT:    s_sub_u32 s4, 0, 0x12d8fb
+; GISEL-NEXT:    s_sub_u32 s16, 0, 0x12d8fb
 ; GISEL-NEXT:    v_mac_f32_e32 v5, 0x4f800000, v6
-; GISEL-NEXT:    s_subb_u32 s5, 0, 0
+; GISEL-NEXT:    s_subb_u32 s17, 0, 0
 ; GISEL-NEXT:    v_rcp_iflag_f32_e32 v5, v5
-; GISEL-NEXT:    s_sub_u32 s6, 0, 0x12d8fb
+; GISEL-NEXT:    s_sub_u32 s18, 0, 0x12d8fb
 ; GISEL-NEXT:    v_mul_f32_e32 v5, 0x5f7ffffc, v5
-; GISEL-NEXT:    s_subb_u32 s7, 0, 0
+; GISEL-NEXT:    s_subb_u32 s19, 0, 0
 ; GISEL-NEXT:    v_mul_f32_e32 v6, 0x2f800000, v5
 ; GISEL-NEXT:    v_trunc_f32_e32 v6, v6
 ; GISEL-NEXT:    v_mac_f32_e32 v5, 0xcf800000, v6
 ; GISEL-NEXT:    v_cvt_u32_f32_e32 v6, v6
 ; GISEL-NEXT:    v_cvt_u32_f32_e32 v5, v5
-; GISEL-NEXT:    v_mul_lo_u32 v7, s4, v6
-; GISEL-NEXT:    v_mul_lo_u32 v8, s6, v6
-; GISEL-NEXT:    v_mul_lo_u32 v9, s4, v5
-; GISEL-NEXT:    v_mul_lo_u32 v10, s5, v5
-; GISEL-NEXT:    v_mul_hi_u32 v11, s4, v5
-; GISEL-NEXT:    v_mul_lo_u32 v12, s6, v5
-; GISEL-NEXT:    v_mul_lo_u32 v13, s7, v5
-; GISEL-NEXT:    v_mul_hi_u32 v14, s6, v5
+; GISEL-NEXT:    v_mul_lo_u32 v7, s16, v6
+; GISEL-NEXT:    v_mul_lo_u32 v8, s18, v6
+; GISEL-NEXT:    v_mul_lo_u32 v9, s16, v5
+; GISEL-NEXT:    v_mul_lo_u32 v10, s17, v5
+; GISEL-NEXT:    v_mul_hi_u32 v11, s16, v5
+; GISEL-NEXT:    v_mul_lo_u32 v12, s18, v5
+; GISEL-NEXT:    v_mul_lo_u32 v13, s19, v5
+; GISEL-NEXT:    v_mul_hi_u32 v14, s18, v5
 ; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v10, v7
 ; GISEL-NEXT:    v_mul_lo_u32 v10, v6, v9
 ; GISEL-NEXT:    v_mul_hi_u32 v15, v5, v9
@@ -1136,100 +1031,80 @@ define <2 x i64> @v_urem_v2i64_oddk_denom(<2 x i64> %num) {
 ; GISEL-NEXT:    v_mul_hi_u32 v20, v5, v8
 ; GISEL-NEXT:    v_mul_hi_u32 v8, v6, v8
 ; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v10, v11
-; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v14, v9
-; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v13, v18
-; GISEL-NEXT:    v_cndmask_b32_e64 v18, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v19, v12
-; GISEL-NEXT:    v_cndmask_b32_e64 v19, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v10, v15
-; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v17
-; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v13, v16
-; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v12, v20
-; GISEL-NEXT:    v_cndmask_b32_e64 v16, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v11, v10
-; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v14, v15
-; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v18, v13
-; GISEL-NEXT:    v_add_i32_e32 v14, vcc, v19, v16
-; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v10
-; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v12, v13
-; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v11, v10
-; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v14, v13
-; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v7, v10
-; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v11
+; GISEL-NEXT:    v_addc_u32_e32 v10, vcc, v10, v15, vcc
+; GISEL-NEXT:    v_add_i32_e64 v9, s[4:5], v14, v9
+; GISEL-NEXT:    v_addc_u32_e64 v9, s[6:7], v9, v17, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v10, s[8:9], v13, v18
+; GISEL-NEXT:    v_addc_u32_e64 v10, s[8:9], v10, v16, s[8:9]
+; GISEL-NEXT:    v_add_i32_e64 v10, s[10:11], v19, v12
+; GISEL-NEXT:    v_addc_u32_e64 v10, s[12:13], v10, v20, s[10:11]
+; GISEL-NEXT:    v_addc_u32_e64 v11, s[14:15], 0, 0, vcc
+; GISEL-NEXT:    v_addc_u32_e64 v12, s[6:7], 0, 0, s[6:7]
+; GISEL-NEXT:    v_addc_u32_e64 v13, s[6:7], 0, 0, s[8:9]
+; GISEL-NEXT:    v_addc_u32_e64 v14, s[6:7], 0, 0, s[12:13]
+; GISEL-NEXT:    v_addc_u32_e64 v9, vcc, v9, v11, s[4:5]
+; GISEL-NEXT:    v_addc_u32_e32 v11, vcc, 0, v12, vcc
+; GISEL-NEXT:    v_addc_u32_e64 v10, vcc, v10, v13, s[10:11]
+; GISEL-NEXT:    v_addc_u32_e32 v12, vcc, 0, v14, vcc
+; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v7, v11
+; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v12
 ; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v5, v9
 ; GISEL-NEXT:    v_addc_u32_e32 v7, vcc, v6, v7, vcc
-; GISEL-NEXT:    v_mul_lo_u32 v10, s4, v9
-; GISEL-NEXT:    v_mul_lo_u32 v11, s5, v9
-; GISEL-NEXT:    v_mul_hi_u32 v13, s4, v9
-; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v12
+; GISEL-NEXT:    v_mul_lo_u32 v11, s16, v9
+; GISEL-NEXT:    v_mul_lo_u32 v12, s17, v9
+; GISEL-NEXT:    v_mul_hi_u32 v13, s16, v9
+; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v10
 ; GISEL-NEXT:    v_addc_u32_e32 v6, vcc, v6, v8, vcc
-; GISEL-NEXT:    v_mul_lo_u32 v8, s6, v5
-; GISEL-NEXT:    v_mul_lo_u32 v12, s7, v5
-; GISEL-NEXT:    v_mul_hi_u32 v14, s6, v5
-; GISEL-NEXT:    v_mul_lo_u32 v15, s4, v7
-; GISEL-NEXT:    v_mul_lo_u32 v16, v7, v10
-; GISEL-NEXT:    v_mul_hi_u32 v17, v9, v10
-; GISEL-NEXT:    v_mul_hi_u32 v10, v7, v10
-; GISEL-NEXT:    v_mul_lo_u32 v18, s6, v6
+; GISEL-NEXT:    v_mul_lo_u32 v8, s18, v5
+; GISEL-NEXT:    v_mul_lo_u32 v10, s19, v5
+; GISEL-NEXT:    v_mul_hi_u32 v14, s18, v5
+; GISEL-NEXT:    v_mul_lo_u32 v15, s16, v7
+; GISEL-NEXT:    v_mul_lo_u32 v16, v7, v11
+; GISEL-NEXT:    v_mul_hi_u32 v17, v9, v11
+; GISEL-NEXT:    v_mul_hi_u32 v11, v7, v11
+; GISEL-NEXT:    v_mul_lo_u32 v18, s18, v6
 ; GISEL-NEXT:    v_mul_lo_u32 v19, v6, v8
 ; GISEL-NEXT:    v_mul_hi_u32 v20, v5, v8
 ; GISEL-NEXT:    v_mul_hi_u32 v8, v6, v8
-; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v11, v15
-; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v12, v18
-; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v11, v13
-; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v12, v14
-; GISEL-NEXT:    v_mul_lo_u32 v13, v9, v11
-; GISEL-NEXT:    v_mul_lo_u32 v14, v7, v11
-; GISEL-NEXT:    v_mul_hi_u32 v15, v9, v11
-; GISEL-NEXT:    v_mul_hi_u32 v11, v7, v11
-; GISEL-NEXT:    v_mul_lo_u32 v18, v5, v12
-; GISEL-NEXT:    v_mul_lo_u32 v21, v6, v12
-; GISEL-NEXT:    v_mul_hi_u32 v22, v5, v12
-; GISEL-NEXT:    v_mul_hi_u32 v12, v6, v12
-; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v16, v13
-; GISEL-NEXT:    v_cndmask_b32_e64 v16, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v14, v10
-; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v18, vcc, v19, v18
-; GISEL-NEXT:    v_cndmask_b32_e64 v19, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v21, v8
-; GISEL-NEXT:    v_cndmask_b32_e64 v21, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v13, v17
-; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v10, v15
-; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v17, vcc, v18, v20
-; GISEL-NEXT:    v_cndmask_b32_e64 v17, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v22
-; GISEL-NEXT:    v_cndmask_b32_e64 v18, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v12, v15
+; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v10, v18
+; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v12, v13
+; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v10, v14
+; GISEL-NEXT:    v_mul_lo_u32 v13, v9, v12
+; GISEL-NEXT:    v_mul_lo_u32 v14, v7, v12
+; GISEL-NEXT:    v_mul_hi_u32 v15, v9, v12
+; GISEL-NEXT:    v_mul_hi_u32 v12, v7, v12
+; GISEL-NEXT:    v_mul_lo_u32 v18, v5, v10
+; GISEL-NEXT:    v_mul_lo_u32 v21, v6, v10
+; GISEL-NEXT:    v_mul_hi_u32 v22, v5, v10
+; GISEL-NEXT:    v_mul_hi_u32 v10, v6, v10
 ; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v16, v13
-; GISEL-NEXT:    v_add_i32_e32 v14, vcc, v14, v15
-; GISEL-NEXT:    v_add_i32_e32 v15, vcc, v19, v17
-; GISEL-NEXT:    v_add_i32_e32 v16, vcc, v21, v18
-; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v10, v13
-; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v15
-; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v14, v13
-; GISEL-NEXT:    v_add_i32_e32 v14, vcc, v16, v15
-; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v11, v13
-; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v12, v14
-; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v10
-; GISEL-NEXT:    v_addc_u32_e32 v7, vcc, v7, v11, vcc
-; GISEL-NEXT:    v_mul_lo_u32 v10, v1, v9
-; GISEL-NEXT:    v_mul_hi_u32 v11, v0, v9
+; GISEL-NEXT:    v_addc_u32_e32 v13, vcc, v13, v17, vcc
+; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v14, v11
+; GISEL-NEXT:    v_addc_u32_e64 v11, s[6:7], v11, v15, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v13, s[8:9], v19, v18
+; GISEL-NEXT:    v_addc_u32_e64 v13, s[8:9], v13, v20, s[8:9]
+; GISEL-NEXT:    v_add_i32_e64 v8, s[10:11], v21, v8
+; GISEL-NEXT:    v_addc_u32_e64 v8, s[12:13], v8, v22, s[10:11]
+; GISEL-NEXT:    v_addc_u32_e64 v13, s[14:15], 0, 0, vcc
+; GISEL-NEXT:    v_addc_u32_e64 v14, s[6:7], 0, 0, s[6:7]
+; GISEL-NEXT:    v_addc_u32_e64 v15, s[6:7], 0, 0, s[8:9]
+; GISEL-NEXT:    v_addc_u32_e64 v16, s[6:7], 0, 0, s[12:13]
+; GISEL-NEXT:    v_addc_u32_e64 v11, vcc, v11, v13, s[4:5]
+; GISEL-NEXT:    v_addc_u32_e32 v13, vcc, 0, v14, vcc
+; GISEL-NEXT:    v_addc_u32_e64 v8, vcc, v8, v15, s[10:11]
+; GISEL-NEXT:    v_addc_u32_e32 v14, vcc, 0, v16, vcc
+; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v12, v13
+; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v10, v14
+; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v11
+; GISEL-NEXT:    v_addc_u32_e32 v7, vcc, v7, v12, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v11, v1, v9
+; GISEL-NEXT:    v_mul_hi_u32 v12, v0, v9
 ; GISEL-NEXT:    v_mul_hi_u32 v9, v1, v9
 ; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v8
-; GISEL-NEXT:    v_addc_u32_e32 v6, vcc, v6, v12, vcc
+; GISEL-NEXT:    v_addc_u32_e32 v6, vcc, v6, v10, vcc
 ; GISEL-NEXT:    v_mul_lo_u32 v8, v3, v5
-; GISEL-NEXT:    v_mul_hi_u32 v12, v2, v5
+; GISEL-NEXT:    v_mul_hi_u32 v10, v2, v5
 ; GISEL-NEXT:    v_mul_hi_u32 v5, v3, v5
 ; GISEL-NEXT:    v_mul_lo_u32 v13, v0, v7
 ; GISEL-NEXT:    v_mul_lo_u32 v14, v1, v7
@@ -1239,48 +1114,38 @@ define <2 x i64> @v_urem_v2i64_oddk_denom(<2 x i64> %num) {
 ; GISEL-NEXT:    v_mul_lo_u32 v17, v3, v6
 ; GISEL-NEXT:    v_mul_hi_u32 v18, v2, v6
 ; GISEL-NEXT:    v_mul_hi_u32 v6, v3, v6
-; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v10, v13
-; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v14, v9
-; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v16
-; GISEL-NEXT:    v_cndmask_b32_e64 v16, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v17, v5
-; GISEL-NEXT:    v_cndmask_b32_e64 v17, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v10, v11
-; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v15
-; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v12
-; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v18
-; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v13, v10
-; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v14, v11
-; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v16, v8
-; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v17, v12
-; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v10
-; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v8
-; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v11, v10
-; GISEL-NEXT:    v_mul_lo_u32 v11, v9, v4
-; GISEL-NEXT:    v_mul_hi_u32 v9, v9, v4
-; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v12, v8
-; GISEL-NEXT:    v_mul_lo_u32 v12, v5, v4
+; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v11, v13
+; GISEL-NEXT:    v_addc_u32_e32 v11, vcc, v11, v12, vcc
+; GISEL-NEXT:    v_add_i32_e64 v9, s[4:5], v14, v9
+; GISEL-NEXT:    v_addc_u32_e64 v9, s[6:7], v9, v15, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v8, s[8:9], v8, v16
+; GISEL-NEXT:    v_addc_u32_e64 v8, s[8:9], v8, v10, s[8:9]
+; GISEL-NEXT:    v_add_i32_e64 v5, s[10:11], v17, v5
+; GISEL-NEXT:    v_addc_u32_e64 v5, s[12:13], v5, v18, s[10:11]
+; GISEL-NEXT:    v_addc_u32_e64 v8, s[14:15], 0, 0, vcc
+; GISEL-NEXT:    v_addc_u32_e64 v10, s[6:7], 0, 0, s[6:7]
+; GISEL-NEXT:    v_addc_u32_e64 v11, s[6:7], 0, 0, s[8:9]
+; GISEL-NEXT:    v_addc_u32_e64 v12, s[6:7], 0, 0, s[12:13]
+; GISEL-NEXT:    v_addc_u32_e64 v8, vcc, v9, v8, s[4:5]
+; GISEL-NEXT:    v_addc_u32_e32 v9, vcc, 0, v10, vcc
+; GISEL-NEXT:    v_addc_u32_e64 v5, vcc, v5, v11, s[10:11]
+; GISEL-NEXT:    v_addc_u32_e32 v10, vcc, 0, v12, vcc
+; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v7, v9
+; GISEL-NEXT:    v_mul_lo_u32 v9, v8, v4
+; GISEL-NEXT:    v_mul_hi_u32 v8, v8, v4
+; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v6, v10
+; GISEL-NEXT:    v_mul_lo_u32 v10, v5, v4
 ; GISEL-NEXT:    v_mul_hi_u32 v5, v5, v4
-; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v7, v10
-; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v6, v8
 ; GISEL-NEXT:    v_mul_lo_u32 v7, v7, v4
 ; GISEL-NEXT:    v_mul_lo_u32 v6, v6, v4
-; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v7, v9
+; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v7, v8
 ; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v6, v5
-; GISEL-NEXT:    v_sub_i32_e64 v0, s[4:5], v0, v11
+; GISEL-NEXT:    v_sub_i32_e64 v0, s[4:5], v0, v9
 ; GISEL-NEXT:    v_subb_u32_e64 v6, vcc, v1, v7, s[4:5]
 ; GISEL-NEXT:    v_sub_i32_e32 v1, vcc, v1, v7
 ; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v4
 ; GISEL-NEXT:    v_cndmask_b32_e64 v7, 0, -1, vcc
-; GISEL-NEXT:    v_sub_i32_e64 v2, s[6:7], v2, v12
+; GISEL-NEXT:    v_sub_i32_e64 v2, s[6:7], v2, v10
 ; GISEL-NEXT:    v_subb_u32_e64 v8, vcc, v3, v5, s[6:7]
 ; GISEL-NEXT:    v_sub_i32_e32 v3, vcc, v3, v5
 ; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v2, v4
@@ -1350,18 +1215,13 @@ define <2 x i64> @v_urem_v2i64_oddk_denom(<2 x i64> %num) {
 ; CGP-NEXT:    v_mul_hi_u32 v14, v5, v8
 ; CGP-NEXT:    v_mul_hi_u32 v8, v6, v8
 ; CGP-NEXT:    v_add_i32_e32 v10, vcc, v10, v12
-; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v9, vcc, v13, v9
-; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v10, vcc, v10, v11
-; CGP-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v9, vcc, v9, v14
-; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v10, vcc, v12, v10
-; CGP-NEXT:    v_add_i32_e32 v11, vcc, v13, v11
-; CGP-NEXT:    v_add_i32_e32 v9, vcc, v9, v10
-; CGP-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v10, vcc, v11, v10
+; CGP-NEXT:    v_addc_u32_e32 v10, vcc, v10, v11, vcc
+; CGP-NEXT:    v_add_i32_e64 v9, s[4:5], v13, v9
+; CGP-NEXT:    v_addc_u32_e64 v9, s[6:7], v9, v14, s[4:5]
+; CGP-NEXT:    v_addc_u32_e64 v10, s[8:9], 0, 0, vcc
+; CGP-NEXT:    v_addc_u32_e64 v11, s[6:7], 0, 0, s[6:7]
+; CGP-NEXT:    v_addc_u32_e64 v9, vcc, v9, v10, s[4:5]
+; CGP-NEXT:    v_addc_u32_e32 v10, vcc, 0, v11, vcc
 ; CGP-NEXT:    v_add_i32_e32 v8, vcc, v8, v10
 ; CGP-NEXT:    v_add_i32_e32 v5, vcc, v5, v9
 ; CGP-NEXT:    v_addc_u32_e32 v6, vcc, v6, v8, vcc
@@ -1378,18 +1238,13 @@ define <2 x i64> @v_urem_v2i64_oddk_denom(<2 x i64> %num) {
 ; CGP-NEXT:    v_mul_hi_u32 v13, v5, v7
 ; CGP-NEXT:    v_mul_hi_u32 v7, v6, v7
 ; CGP-NEXT:    v_add_i32_e32 v9, vcc, v10, v9
-; CGP-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v8, vcc, v12, v8
-; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v9, vcc, v9, v11
-; CGP-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v8, vcc, v8, v13
-; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v9, vcc, v10, v9
-; CGP-NEXT:    v_add_i32_e32 v10, vcc, v12, v11
-; CGP-NEXT:    v_add_i32_e32 v8, vcc, v8, v9
-; CGP-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v9, vcc, v10, v9
+; CGP-NEXT:    v_addc_u32_e32 v9, vcc, v9, v11, vcc
+; CGP-NEXT:    v_add_i32_e64 v8, s[4:5], v12, v8
+; CGP-NEXT:    v_addc_u32_e64 v8, s[6:7], v8, v13, s[4:5]
+; CGP-NEXT:    v_addc_u32_e64 v9, s[8:9], 0, 0, vcc
+; CGP-NEXT:    v_addc_u32_e64 v10, s[6:7], 0, 0, s[6:7]
+; CGP-NEXT:    v_addc_u32_e64 v8, vcc, v8, v9, s[4:5]
+; CGP-NEXT:    v_addc_u32_e32 v9, vcc, 0, v10, vcc
 ; CGP-NEXT:    v_add_i32_e32 v7, vcc, v7, v9
 ; CGP-NEXT:    v_add_i32_e32 v5, vcc, v5, v8
 ; CGP-NEXT:    v_addc_u32_e32 v6, vcc, v6, v7, vcc
@@ -1408,47 +1263,37 @@ define <2 x i64> @v_urem_v2i64_oddk_denom(<2 x i64> %num) {
 ; CGP-NEXT:    v_mul_hi_u32 v18, v2, v6
 ; CGP-NEXT:    v_mul_hi_u32 v6, v3, v6
 ; CGP-NEXT:    v_add_i32_e32 v7, vcc, v7, v12
-; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v9, vcc, v13, v9
-; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v10, vcc, v10, v16
-; CGP-NEXT:    v_cndmask_b32_e64 v16, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v5, vcc, v17, v5
-; CGP-NEXT:    v_cndmask_b32_e64 v17, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v7, vcc, v7, v8
-; CGP-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v8, vcc, v9, v14
-; CGP-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v10, vcc, v10, v11
-; CGP-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v5, vcc, v5, v18
-; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v7, vcc, v12, v7
-; CGP-NEXT:    v_add_i32_e32 v9, vcc, v13, v9
-; CGP-NEXT:    v_add_i32_e32 v10, vcc, v16, v10
-; CGP-NEXT:    v_add_i32_e32 v11, vcc, v17, v11
-; CGP-NEXT:    v_add_i32_e32 v7, vcc, v8, v7
-; CGP-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v5, vcc, v5, v10
-; CGP-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v8, vcc, v9, v8
-; CGP-NEXT:    v_mul_lo_u32 v9, v7, v4
+; CGP-NEXT:    v_addc_u32_e32 v7, vcc, v7, v8, vcc
+; CGP-NEXT:    v_add_i32_e64 v7, s[4:5], v13, v9
+; CGP-NEXT:    v_addc_u32_e64 v7, s[6:7], v7, v14, s[4:5]
+; CGP-NEXT:    v_add_i32_e64 v8, s[8:9], v10, v16
+; CGP-NEXT:    v_addc_u32_e64 v8, s[8:9], v8, v11, s[8:9]
+; CGP-NEXT:    v_add_i32_e64 v5, s[10:11], v17, v5
+; CGP-NEXT:    v_addc_u32_e64 v5, s[12:13], v5, v18, s[10:11]
+; CGP-NEXT:    v_addc_u32_e64 v8, s[14:15], 0, 0, vcc
+; CGP-NEXT:    v_addc_u32_e64 v9, s[6:7], 0, 0, s[6:7]
+; CGP-NEXT:    v_addc_u32_e64 v10, s[6:7], 0, 0, s[8:9]
+; CGP-NEXT:    v_addc_u32_e64 v11, s[6:7], 0, 0, s[12:13]
+; CGP-NEXT:    v_addc_u32_e64 v7, vcc, v7, v8, s[4:5]
+; CGP-NEXT:    v_addc_u32_e32 v8, vcc, 0, v9, vcc
+; CGP-NEXT:    v_addc_u32_e64 v5, vcc, v5, v10, s[10:11]
+; CGP-NEXT:    v_addc_u32_e32 v9, vcc, 0, v11, vcc
+; CGP-NEXT:    v_add_i32_e32 v8, vcc, v15, v8
+; CGP-NEXT:    v_mul_lo_u32 v10, v7, v4
 ; CGP-NEXT:    v_mul_hi_u32 v7, v7, v4
-; CGP-NEXT:    v_add_i32_e32 v10, vcc, v11, v10
-; CGP-NEXT:    v_mul_lo_u32 v11, v5, v4
+; CGP-NEXT:    v_add_i32_e32 v6, vcc, v6, v9
+; CGP-NEXT:    v_mul_lo_u32 v9, v5, v4
 ; CGP-NEXT:    v_mul_hi_u32 v5, v5, v4
-; CGP-NEXT:    v_add_i32_e32 v8, vcc, v15, v8
-; CGP-NEXT:    v_add_i32_e32 v6, vcc, v6, v10
 ; CGP-NEXT:    v_mul_lo_u32 v8, v8, v4
 ; CGP-NEXT:    v_mul_lo_u32 v6, v6, v4
 ; CGP-NEXT:    v_add_i32_e32 v7, vcc, v8, v7
 ; CGP-NEXT:    v_add_i32_e32 v5, vcc, v6, v5
-; CGP-NEXT:    v_sub_i32_e64 v0, s[4:5], v0, v9
+; CGP-NEXT:    v_sub_i32_e64 v0, s[4:5], v0, v10
 ; CGP-NEXT:    v_subb_u32_e64 v6, vcc, v1, v7, s[4:5]
 ; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v1, v7
 ; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v4
 ; CGP-NEXT:    v_cndmask_b32_e64 v7, 0, -1, vcc
-; CGP-NEXT:    v_sub_i32_e64 v2, s[6:7], v2, v11
+; CGP-NEXT:    v_sub_i32_e64 v2, s[6:7], v2, v9
 ; CGP-NEXT:    v_subb_u32_e64 v8, vcc, v3, v5, s[6:7]
 ; CGP-NEXT:    v_sub_i32_e32 v3, vcc, v3, v5
 ; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v2, v4
@@ -1508,10 +1353,10 @@ define i64 @v_urem_i64_pow2_shl_denom(i64 %x, i64 %y) {
 ; CHECK-NEXT:    v_cvt_f32_u32_e32 v2, v5
 ; CHECK-NEXT:    ; implicit-def: $vgpr0_vgpr1
 ; CHECK-NEXT:    s_and_saveexec_b64 s[4:5], vcc
-; CHECK-NEXT:    s_xor_b64 s[6:7], exec, s[4:5]
+; CHECK-NEXT:    s_xor_b64 s[8:9], exec, s[4:5]
 ; CHECK-NEXT:    s_cbranch_execnz .LBB7_3
 ; CHECK-NEXT:  ; %bb.1: ; %Flow
-; CHECK-NEXT:    s_andn2_saveexec_b64 s[4:5], s[6:7]
+; CHECK-NEXT:    s_andn2_saveexec_b64 s[4:5], s[8:9]
 ; CHECK-NEXT:    s_cbranch_execnz .LBB7_4
 ; CHECK-NEXT:  .LBB7_2:
 ; CHECK-NEXT:    s_or_b64 exec, exec, s[4:5]
@@ -1542,18 +1387,13 @@ define i64 @v_urem_i64_pow2_shl_denom(i64 %x, i64 %y) {
 ; CHECK-NEXT:    v_mul_hi_u32 v14, v0, v8
 ; CHECK-NEXT:    v_mul_hi_u32 v8, v2, v8
 ; CHECK-NEXT:    v_add_i32_e32 v10, vcc, v10, v11
-; CHECK-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v9, vcc, v13, v9
-; CHECK-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v10, vcc, v10, v12
-; CHECK-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v9, vcc, v9, v14
-; CHECK-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v10, vcc, v11, v10
-; CHECK-NEXT:    v_add_i32_e32 v11, vcc, v13, v12
-; CHECK-NEXT:    v_add_i32_e32 v9, vcc, v9, v10
-; CHECK-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v10, vcc, v11, v10
+; CHECK-NEXT:    v_addc_u32_e32 v10, vcc, v10, v12, vcc
+; CHECK-NEXT:    v_add_i32_e64 v9, s[4:5], v13, v9
+; CHECK-NEXT:    v_addc_u32_e64 v9, s[6:7], v9, v14, s[4:5]
+; CHECK-NEXT:    v_addc_u32_e64 v10, s[10:11], 0, 0, vcc
+; CHECK-NEXT:    v_addc_u32_e64 v11, s[6:7], 0, 0, s[6:7]
+; CHECK-NEXT:    v_addc_u32_e64 v9, vcc, v9, v10, s[4:5]
+; CHECK-NEXT:    v_addc_u32_e32 v10, vcc, 0, v11, vcc
 ; CHECK-NEXT:    v_add_i32_e32 v8, vcc, v8, v10
 ; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v9
 ; CHECK-NEXT:    v_addc_u32_e32 v2, vcc, v2, v8, vcc
@@ -1571,18 +1411,13 @@ define i64 @v_urem_i64_pow2_shl_denom(i64 %x, i64 %y) {
 ; CHECK-NEXT:    v_mul_hi_u32 v12, v0, v1
 ; CHECK-NEXT:    v_mul_hi_u32 v1, v2, v1
 ; CHECK-NEXT:    v_add_i32_e32 v7, vcc, v10, v7
-; CHECK-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v8, vcc, v9, v8
-; CHECK-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v7, vcc, v7, v11
-; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v8, vcc, v8, v12
-; CHECK-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v7, vcc, v10, v7
-; CHECK-NEXT:    v_add_i32_e32 v9, vcc, v9, v11
-; CHECK-NEXT:    v_add_i32_e32 v7, vcc, v8, v7
-; CHECK-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v8, vcc, v9, v8
+; CHECK-NEXT:    v_addc_u32_e32 v7, vcc, v7, v11, vcc
+; CHECK-NEXT:    v_add_i32_e64 v7, s[4:5], v9, v8
+; CHECK-NEXT:    v_addc_u32_e64 v7, s[6:7], v7, v12, s[4:5]
+; CHECK-NEXT:    v_addc_u32_e64 v8, s[10:11], 0, 0, vcc
+; CHECK-NEXT:    v_addc_u32_e64 v9, s[6:7], 0, 0, s[6:7]
+; CHECK-NEXT:    v_addc_u32_e64 v7, vcc, v7, v8, s[4:5]
+; CHECK-NEXT:    v_addc_u32_e32 v8, vcc, 0, v9, vcc
 ; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v1, v8
 ; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v7
 ; CHECK-NEXT:    v_addc_u32_e32 v1, vcc, v2, v1, vcc
@@ -1594,26 +1429,21 @@ define i64 @v_urem_i64_pow2_shl_denom(i64 %x, i64 %y) {
 ; CHECK-NEXT:    v_mul_hi_u32 v10, v3, v1
 ; CHECK-NEXT:    v_mul_hi_u32 v1, v4, v1
 ; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v8
-; CHECK-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v9, v0
-; CHECK-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v7
-; CHECK-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v10
-; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v8, v2
-; CHECK-NEXT:    v_add_i32_e32 v7, vcc, v9, v7
-; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
-; CHECK-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v7, v2
-; CHECK-NEXT:    v_mul_lo_u32 v7, v5, v0
-; CHECK-NEXT:    v_mul_lo_u32 v8, v6, v0
-; CHECK-NEXT:    v_mul_hi_u32 v0, v5, v0
+; CHECK-NEXT:    v_addc_u32_e32 v2, vcc, v2, v7, vcc
+; CHECK-NEXT:    v_add_i32_e64 v0, s[4:5], v9, v0
+; CHECK-NEXT:    v_addc_u32_e64 v0, s[6:7], v0, v10, s[4:5]
+; CHECK-NEXT:    v_addc_u32_e64 v2, s[10:11], 0, 0, vcc
+; CHECK-NEXT:    v_addc_u32_e64 v7, s[6:7], 0, 0, s[6:7]
+; CHECK-NEXT:    v_addc_u32_e64 v0, vcc, v0, v2, s[4:5]
+; CHECK-NEXT:    v_addc_u32_e32 v2, vcc, 0, v7, vcc
 ; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v1, v2
+; CHECK-NEXT:    v_mul_lo_u32 v2, v5, v0
+; CHECK-NEXT:    v_mul_lo_u32 v7, v6, v0
+; CHECK-NEXT:    v_mul_hi_u32 v0, v5, v0
 ; CHECK-NEXT:    v_mul_lo_u32 v1, v5, v1
-; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v8, v1
+; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v7, v1
 ; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v1, v0
-; CHECK-NEXT:    v_sub_i32_e32 v1, vcc, v3, v7
+; CHECK-NEXT:    v_sub_i32_e32 v1, vcc, v3, v2
 ; CHECK-NEXT:    v_subb_u32_e64 v2, s[4:5], v4, v0, vcc
 ; CHECK-NEXT:    v_sub_i32_e64 v0, s[4:5], v4, v0
 ; CHECK-NEXT:    v_cmp_ge_u32_e64 s[4:5], v1, v5
@@ -1643,7 +1473,7 @@ define i64 @v_urem_i64_pow2_shl_denom(i64 %x, i64 %y) {
 ; CHECK-NEXT:    ; implicit-def: $vgpr2
 ; CHECK-NEXT:    ; implicit-def: $vgpr5_vgpr6
 ; CHECK-NEXT:    ; implicit-def: $vgpr3
-; CHECK-NEXT:    s_andn2_saveexec_b64 s[4:5], s[6:7]
+; CHECK-NEXT:    s_andn2_saveexec_b64 s[4:5], s[8:9]
 ; CHECK-NEXT:    s_cbranch_execz .LBB7_2
 ; CHECK-NEXT:  .LBB7_4:
 ; CHECK-NEXT:    v_rcp_iflag_f32_e32 v0, v2
@@ -1681,229 +1511,199 @@ define <2 x i64> @v_urem_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) {
 ; GISEL-NEXT:    v_cvt_f32_u32_e32 v10, v7
 ; GISEL-NEXT:    v_cvt_f32_u32_e32 v11, v8
 ; GISEL-NEXT:    v_sub_i32_e32 v6, vcc, 0, v7
-; GISEL-NEXT:    v_cvt_f32_u32_e32 v13, v4
-; GISEL-NEXT:    v_cvt_f32_u32_e32 v14, v5
+; GISEL-NEXT:    v_cvt_f32_u32_e32 v12, v4
+; GISEL-NEXT:    v_cvt_f32_u32_e32 v13, v5
 ; GISEL-NEXT:    v_sub_i32_e64 v9, s[4:5], 0, v4
-; GISEL-NEXT:    v_subb_u32_e32 v15, vcc, 0, v8, vcc
-; GISEL-NEXT:    v_subb_u32_e64 v12, vcc, 0, v5, s[4:5]
+; GISEL-NEXT:    v_subb_u32_e32 v14, vcc, 0, v8, vcc
+; GISEL-NEXT:    v_subb_u32_e64 v15, vcc, 0, v5, s[4:5]
 ; GISEL-NEXT:    v_mac_f32_e32 v10, 0x4f800000, v11
-; GISEL-NEXT:    v_mac_f32_e32 v13, 0x4f800000, v14
+; GISEL-NEXT:    v_mac_f32_e32 v12, 0x4f800000, v13
 ; GISEL-NEXT:    v_rcp_iflag_f32_e32 v10, v10
-; GISEL-NEXT:    v_rcp_iflag_f32_e32 v11, v13
+; GISEL-NEXT:    v_rcp_iflag_f32_e32 v11, v12
 ; GISEL-NEXT:    v_mul_f32_e32 v10, 0x5f7ffffc, v10
 ; GISEL-NEXT:    v_mul_f32_e32 v11, 0x5f7ffffc, v11
-; GISEL-NEXT:    v_mul_f32_e32 v13, 0x2f800000, v10
-; GISEL-NEXT:    v_mul_f32_e32 v14, 0x2f800000, v11
+; GISEL-NEXT:    v_mul_f32_e32 v12, 0x2f800000, v10
+; GISEL-NEXT:    v_mul_f32_e32 v13, 0x2f800000, v11
+; GISEL-NEXT:    v_trunc_f32_e32 v12, v12
 ; GISEL-NEXT:    v_trunc_f32_e32 v13, v13
-; GISEL-NEXT:    v_trunc_f32_e32 v14, v14
-; GISEL-NEXT:    v_mac_f32_e32 v10, 0xcf800000, v13
+; GISEL-NEXT:    v_mac_f32_e32 v10, 0xcf800000, v12
+; GISEL-NEXT:    v_cvt_u32_f32_e32 v12, v12
+; GISEL-NEXT:    v_mac_f32_e32 v11, 0xcf800000, v13
 ; GISEL-NEXT:    v_cvt_u32_f32_e32 v13, v13
-; GISEL-NEXT:    v_mac_f32_e32 v11, 0xcf800000, v14
-; GISEL-NEXT:    v_cvt_u32_f32_e32 v14, v14
 ; GISEL-NEXT:    v_cvt_u32_f32_e32 v10, v10
-; GISEL-NEXT:    v_mul_lo_u32 v16, v6, v13
+; GISEL-NEXT:    v_mul_lo_u32 v16, v6, v12
 ; GISEL-NEXT:    v_cvt_u32_f32_e32 v11, v11
-; GISEL-NEXT:    v_mul_lo_u32 v17, v9, v14
-; GISEL-NEXT:    v_mul_lo_u32 v18, v9, v11
-; GISEL-NEXT:    v_mul_lo_u32 v19, v12, v11
-; GISEL-NEXT:    v_mul_hi_u32 v20, v9, v11
-; GISEL-NEXT:    v_add_i32_e32 v17, vcc, v19, v17
-; GISEL-NEXT:    v_mul_lo_u32 v19, v14, v18
-; GISEL-NEXT:    v_add_i32_e32 v17, vcc, v17, v20
-; GISEL-NEXT:    v_mul_lo_u32 v20, v11, v17
-; GISEL-NEXT:    v_add_i32_e32 v19, vcc, v19, v20
-; GISEL-NEXT:    v_mul_hi_u32 v20, v11, v18
-; GISEL-NEXT:    v_add_i32_e64 v19, s[4:5], v19, v20
-; GISEL-NEXT:    v_mul_lo_u32 v19, v6, v10
-; GISEL-NEXT:    v_mul_lo_u32 v20, v15, v10
-; GISEL-NEXT:    v_add_i32_e64 v16, s[6:7], v20, v16
+; GISEL-NEXT:    v_mul_lo_u32 v17, v9, v13
+; GISEL-NEXT:    v_mul_lo_u32 v18, v6, v10
+; GISEL-NEXT:    v_mul_lo_u32 v19, v14, v10
 ; GISEL-NEXT:    v_mul_hi_u32 v20, v6, v10
-; GISEL-NEXT:    v_add_i32_e64 v16, s[6:7], v16, v20
+; GISEL-NEXT:    v_add_i32_e32 v16, vcc, v19, v16
+; GISEL-NEXT:    v_mul_lo_u32 v19, v12, v18
+; GISEL-NEXT:    v_add_i32_e32 v16, vcc, v16, v20
+; GISEL-NEXT:    v_mul_lo_u32 v20, v10, v16
+; GISEL-NEXT:    v_add_i32_e32 v19, vcc, v19, v20
+; GISEL-NEXT:    v_mul_hi_u32 v20, v10, v18
+; GISEL-NEXT:    v_addc_u32_e32 v19, vcc, v19, v20, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v19, v9, v11
+; GISEL-NEXT:    v_mul_lo_u32 v20, v15, v11
+; GISEL-NEXT:    v_add_i32_e64 v17, s[4:5], v20, v17
+; GISEL-NEXT:    v_mul_hi_u32 v20, v9, v11
+; GISEL-NEXT:    v_add_i32_e64 v17, s[4:5], v17, v20
 ; GISEL-NEXT:    v_mul_lo_u32 v20, v13, v19
-; GISEL-NEXT:    v_mul_lo_u32 v21, v10, v16
-; GISEL-NEXT:    v_add_i32_e64 v20, s[6:7], v20, v21
-; GISEL-NEXT:    v_mul_hi_u32 v21, v10, v19
-; GISEL-NEXT:    v_add_i32_e64 v20, s[8:9], v20, v21
+; GISEL-NEXT:    v_mul_lo_u32 v21, v11, v17
+; GISEL-NEXT:    v_add_i32_e64 v20, s[4:5], v20, v21
+; GISEL-NEXT:    v_mul_hi_u32 v21, v11, v19
+; GISEL-NEXT:    v_addc_u32_e64 v20, s[4:5], v20, v21, s[4:5]
+; GISEL-NEXT:    v_mul_hi_u32 v18, v12, v18
 ; GISEL-NEXT:    v_mul_hi_u32 v19, v13, v19
-; GISEL-NEXT:    v_mul_hi_u32 v18, v14, v18
-; GISEL-NEXT:    v_mul_lo_u32 v20, v13, v16
-; GISEL-NEXT:    v_add_i32_e64 v19, s[10:11], v20, v19
-; GISEL-NEXT:    v_mul_lo_u32 v20, v14, v17
-; GISEL-NEXT:    v_add_i32_e64 v18, s[12:13], v20, v18
+; GISEL-NEXT:    v_mul_lo_u32 v20, v12, v16
+; GISEL-NEXT:    v_add_i32_e64 v18, s[6:7], v20, v18
+; GISEL-NEXT:    v_mul_lo_u32 v20, v13, v17
+; GISEL-NEXT:    v_add_i32_e64 v19, s[8:9], v20, v19
 ; GISEL-NEXT:    v_mul_hi_u32 v20, v10, v16
-; GISEL-NEXT:    v_add_i32_e64 v19, s[14:15], v19, v20
+; GISEL-NEXT:    v_mul_hi_u32 v16, v12, v16
+; GISEL-NEXT:    v_addc_u32_e64 v18, s[10:11], v18, v20, s[6:7]
 ; GISEL-NEXT:    v_mul_hi_u32 v20, v11, v17
-; GISEL-NEXT:    v_add_i32_e64 v18, s[16:17], v18, v20
-; GISEL-NEXT:    v_cndmask_b32_e64 v20, 0, 1, s[6:7]
-; GISEL-NEXT:    v_cndmask_b32_e64 v21, 0, 1, s[8:9]
-; GISEL-NEXT:    v_add_i32_e64 v20, s[6:7], v20, v21
-; GISEL-NEXT:    v_cndmask_b32_e64 v21, 0, 1, s[10:11]
-; GISEL-NEXT:    v_cndmask_b32_e64 v22, 0, 1, s[14:15]
-; GISEL-NEXT:    v_add_i32_e64 v21, s[6:7], v21, v22
-; GISEL-NEXT:    v_cndmask_b32_e64 v22, 0, 1, vcc
-; GISEL-NEXT:    v_cndmask_b32_e64 v23, 0, 1, s[4:5]
-; GISEL-NEXT:    v_add_i32_e32 v22, vcc, v22, v23
-; GISEL-NEXT:    v_cndmask_b32_e64 v23, 0, 1, s[12:13]
-; GISEL-NEXT:    v_cndmask_b32_e64 v24, 0, 1, s[16:17]
-; GISEL-NEXT:    v_add_i32_e32 v23, vcc, v23, v24
-; GISEL-NEXT:    v_add_i32_e32 v19, vcc, v19, v20
-; GISEL-NEXT:    v_add_i32_e64 v18, s[4:5], v18, v22
-; GISEL-NEXT:    v_add_i32_e64 v10, s[6:7], v10, v19
-; GISEL-NEXT:    v_mul_hi_u32 v16, v13, v16
-; GISEL-NEXT:    v_mul_hi_u32 v17, v14, v17
-; GISEL-NEXT:    v_add_i32_e64 v11, s[8:9], v11, v18
-; GISEL-NEXT:    v_cndmask_b32_e64 v18, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v18, vcc, v21, v18
-; GISEL-NEXT:    v_cndmask_b32_e64 v19, 0, 1, s[4:5]
-; GISEL-NEXT:    v_add_i32_e32 v19, vcc, v23, v19
-; GISEL-NEXT:    v_mul_lo_u32 v20, v6, v10
-; GISEL-NEXT:    v_mul_lo_u32 v15, v15, v10
-; GISEL-NEXT:    v_add_i32_e32 v16, vcc, v16, v18
-; GISEL-NEXT:    v_mul_hi_u32 v18, v6, v10
-; GISEL-NEXT:    v_add_i32_e32 v17, vcc, v17, v19
-; GISEL-NEXT:    v_mul_lo_u32 v19, v9, v11
-; GISEL-NEXT:    v_mul_lo_u32 v12, v12, v11
-; GISEL-NEXT:    v_addc_u32_e64 v13, vcc, v13, v16, s[6:7]
+; GISEL-NEXT:    v_mul_hi_u32 v17, v13, v17
+; GISEL-NEXT:    v_addc_u32_e64 v19, s[12:13], v19, v20, s[8:9]
+; GISEL-NEXT:    v_addc_u32_e64 v20, s[14:15], 0, 0, vcc
+; GISEL-NEXT:    v_addc_u32_e64 v18, vcc, v18, v20, s[6:7]
+; GISEL-NEXT:    v_addc_u32_e64 v20, s[4:5], 0, 0, s[4:5]
+; GISEL-NEXT:    v_addc_u32_e64 v19, s[4:5], v19, v20, s[8:9]
+; GISEL-NEXT:    v_addc_u32_e64 v20, s[6:7], 0, 0, s[10:11]
+; GISEL-NEXT:    v_add_i32_e64 v10, s[6:7], v10, v18
+; GISEL-NEXT:    v_addc_u32_e64 v18, s[8:9], 0, 0, s[12:13]
+; GISEL-NEXT:    v_addc_u32_e32 v20, vcc, 0, v20, vcc
+; GISEL-NEXT:    v_addc_u32_e64 v18, vcc, 0, v18, s[4:5]
+; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v11, v19
+; GISEL-NEXT:    v_add_i32_e64 v16, s[4:5], v16, v20
+; GISEL-NEXT:    v_add_i32_e64 v17, s[4:5], v17, v18
+; GISEL-NEXT:    v_mul_lo_u32 v18, v6, v10
+; GISEL-NEXT:    v_mul_lo_u32 v14, v14, v10
+; GISEL-NEXT:    v_mul_hi_u32 v19, v6, v10
+; GISEL-NEXT:    v_mul_lo_u32 v20, v9, v11
+; GISEL-NEXT:    v_mul_lo_u32 v15, v15, v11
+; GISEL-NEXT:    v_addc_u32_e64 v12, s[4:5], v12, v16, s[6:7]
 ; GISEL-NEXT:    v_mul_hi_u32 v16, v9, v11
-; GISEL-NEXT:    v_addc_u32_e64 v14, vcc, v14, v17, s[8:9]
-; GISEL-NEXT:    v_mul_hi_u32 v17, v10, v20
-; GISEL-NEXT:    v_mul_lo_u32 v6, v6, v13
-; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v15, v6
-; GISEL-NEXT:    v_mul_hi_u32 v15, v11, v19
-; GISEL-NEXT:    v_mul_lo_u32 v9, v9, v14
-; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v12, v9
-; GISEL-NEXT:    v_mul_lo_u32 v12, v13, v20
+; GISEL-NEXT:    v_addc_u32_e32 v13, vcc, v13, v17, vcc
+; GISEL-NEXT:    v_mul_hi_u32 v17, v10, v18
+; GISEL-NEXT:    v_mul_lo_u32 v6, v6, v12
+; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v14, v6
+; GISEL-NEXT:    v_mul_hi_u32 v14, v11, v20
+; GISEL-NEXT:    v_mul_lo_u32 v9, v9, v13
+; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v15, v9
+; GISEL-NEXT:    v_mul_lo_u32 v15, v12, v18
+; GISEL-NEXT:    v_mul_hi_u32 v18, v12, v18
+; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v6, v19
+; GISEL-NEXT:    v_mul_lo_u32 v19, v13, v20
 ; GISEL-NEXT:    v_mul_hi_u32 v20, v13, v20
-; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v6, v18
-; GISEL-NEXT:    v_mul_lo_u32 v18, v14, v19
-; GISEL-NEXT:    v_mul_hi_u32 v19, v14, v19
 ; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v16
 ; GISEL-NEXT:    v_mul_lo_u32 v16, v10, v6
-; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v12, v16
-; GISEL-NEXT:    v_mul_lo_u32 v16, v13, v6
-; GISEL-NEXT:    v_add_i32_e64 v12, s[4:5], v12, v17
-; GISEL-NEXT:    v_mul_hi_u32 v12, v10, v6
-; GISEL-NEXT:    v_mul_hi_u32 v6, v13, v6
+; GISEL-NEXT:    v_add_i32_e32 v15, vcc, v15, v16
+; GISEL-NEXT:    v_mul_lo_u32 v16, v12, v6
+; GISEL-NEXT:    v_addc_u32_e32 v15, vcc, v15, v17, vcc
+; GISEL-NEXT:    v_mul_hi_u32 v15, v10, v6
+; GISEL-NEXT:    v_mul_hi_u32 v6, v12, v6
 ; GISEL-NEXT:    v_mul_lo_u32 v17, v11, v9
-; GISEL-NEXT:    v_add_i32_e64 v16, s[6:7], v16, v20
-; GISEL-NEXT:    v_mul_lo_u32 v20, v14, v9
-; GISEL-NEXT:    v_add_i32_e64 v17, s[8:9], v18, v17
-; GISEL-NEXT:    v_mul_hi_u32 v18, v11, v9
-; GISEL-NEXT:    v_mul_hi_u32 v9, v14, v9
-; GISEL-NEXT:    v_add_i32_e64 v19, s[10:11], v20, v19
-; GISEL-NEXT:    v_cndmask_b32_e64 v20, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v15, vcc, v17, v15
-; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, s[6:7]
-; GISEL-NEXT:    v_cndmask_b32_e64 v17, 0, 1, s[8:9]
-; GISEL-NEXT:    v_add_i32_e64 v12, s[6:7], v16, v12
-; GISEL-NEXT:    v_cndmask_b32_e64 v16, 0, 1, s[10:11]
-; GISEL-NEXT:    v_add_i32_e64 v18, s[8:9], v19, v18
-; GISEL-NEXT:    v_cndmask_b32_e64 v19, 0, 1, s[4:5]
-; GISEL-NEXT:    v_add_i32_e64 v19, s[4:5], v20, v19
-; GISEL-NEXT:    v_cndmask_b32_e64 v20, 0, 1, s[6:7]
-; GISEL-NEXT:    v_add_i32_e64 v15, s[4:5], v15, v20
-; GISEL-NEXT:    v_cndmask_b32_e64 v20, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v17, vcc, v17, v20
-; GISEL-NEXT:    v_cndmask_b32_e64 v20, 0, 1, s[8:9]
-; GISEL-NEXT:    v_add_i32_e32 v16, vcc, v16, v20
-; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v12, v19
-; GISEL-NEXT:    v_add_i32_e64 v17, s[4:5], v18, v17
-; GISEL-NEXT:    v_cndmask_b32_e64 v18, 0, 1, vcc
-; GISEL-NEXT:    v_cndmask_b32_e64 v19, 0, 1, s[4:5]
-; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v10, v12
-; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v17
-; GISEL-NEXT:    v_add_i32_e64 v12, s[6:7], v15, v18
-; GISEL-NEXT:    v_add_i32_e64 v15, s[6:7], v16, v19
-; GISEL-NEXT:    v_mul_lo_u32 v16, v1, v10
-; GISEL-NEXT:    v_mul_hi_u32 v17, v0, v10
+; GISEL-NEXT:    v_add_i32_e64 v16, s[4:5], v16, v18
+; GISEL-NEXT:    v_mul_lo_u32 v18, v13, v9
+; GISEL-NEXT:    v_add_i32_e64 v17, s[6:7], v19, v17
+; GISEL-NEXT:    v_mul_hi_u32 v19, v11, v9
+; GISEL-NEXT:    v_mul_hi_u32 v9, v13, v9
+; GISEL-NEXT:    v_add_i32_e64 v18, s[8:9], v18, v20
+; GISEL-NEXT:    v_addc_u32_e64 v15, s[10:11], v16, v15, s[4:5]
+; GISEL-NEXT:    v_addc_u32_e64 v14, s[6:7], v17, v14, s[6:7]
+; GISEL-NEXT:    v_addc_u32_e64 v14, s[12:13], v18, v19, s[8:9]
+; GISEL-NEXT:    v_addc_u32_e64 v16, s[14:15], 0, 0, vcc
+; GISEL-NEXT:    v_addc_u32_e64 v17, s[10:11], 0, 0, s[10:11]
+; GISEL-NEXT:    v_addc_u32_e64 v18, s[6:7], 0, 0, s[6:7]
+; GISEL-NEXT:    v_addc_u32_e64 v19, s[6:7], 0, 0, s[12:13]
+; GISEL-NEXT:    v_addc_u32_e64 v15, vcc, v15, v16, s[4:5]
+; GISEL-NEXT:    v_addc_u32_e64 v14, s[4:5], v14, v18, s[8:9]
+; GISEL-NEXT:    v_addc_u32_e32 v16, vcc, 0, v17, vcc
+; GISEL-NEXT:    v_addc_u32_e64 v17, vcc, 0, v19, s[4:5]
+; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v10, v15
+; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v14
+; GISEL-NEXT:    v_add_i32_e64 v6, s[6:7], v6, v16
+; GISEL-NEXT:    v_add_i32_e64 v9, s[6:7], v9, v17
+; GISEL-NEXT:    v_mul_lo_u32 v14, v1, v10
+; GISEL-NEXT:    v_mul_hi_u32 v15, v0, v10
 ; GISEL-NEXT:    v_mul_hi_u32 v10, v1, v10
-; GISEL-NEXT:    v_mul_lo_u32 v18, v3, v11
-; GISEL-NEXT:    v_mul_hi_u32 v19, v2, v11
+; GISEL-NEXT:    v_mul_lo_u32 v16, v3, v11
+; GISEL-NEXT:    v_mul_hi_u32 v17, v2, v11
 ; GISEL-NEXT:    v_mul_hi_u32 v11, v3, v11
-; GISEL-NEXT:    v_add_i32_e64 v6, s[6:7], v6, v12
-; GISEL-NEXT:    v_add_i32_e64 v9, s[6:7], v9, v15
-; GISEL-NEXT:    v_addc_u32_e32 v6, vcc, v13, v6, vcc
-; GISEL-NEXT:    v_addc_u32_e64 v9, vcc, v14, v9, s[4:5]
+; GISEL-NEXT:    v_addc_u32_e32 v6, vcc, v12, v6, vcc
+; GISEL-NEXT:    v_addc_u32_e64 v9, vcc, v13, v9, s[4:5]
 ; GISEL-NEXT:    v_mul_lo_u32 v12, v0, v6
 ; GISEL-NEXT:    v_mul_lo_u32 v13, v1, v6
-; GISEL-NEXT:    v_mul_hi_u32 v14, v0, v6
+; GISEL-NEXT:    v_mul_hi_u32 v18, v0, v6
 ; GISEL-NEXT:    v_mul_hi_u32 v6, v1, v6
-; GISEL-NEXT:    v_mul_lo_u32 v15, v2, v9
+; GISEL-NEXT:    v_mul_lo_u32 v19, v2, v9
 ; GISEL-NEXT:    v_mul_lo_u32 v20, v3, v9
-; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v16, v12
-; GISEL-NEXT:    v_mul_hi_u32 v16, v2, v9
+; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v14, v12
+; GISEL-NEXT:    v_mul_hi_u32 v14, v2, v9
 ; GISEL-NEXT:    v_mul_hi_u32 v9, v3, v9
 ; GISEL-NEXT:    v_add_i32_e64 v10, s[4:5], v13, v10
-; GISEL-NEXT:    v_add_i32_e64 v13, s[6:7], v18, v15
+; GISEL-NEXT:    v_add_i32_e64 v13, s[6:7], v16, v19
 ; GISEL-NEXT:    v_add_i32_e64 v11, s[8:9], v20, v11
-; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
-; GISEL-NEXT:    v_cndmask_b32_e64 v18, 0, 1, s[4:5]
-; GISEL-NEXT:    v_cndmask_b32_e64 v20, 0, 1, s[6:7]
-; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v12, v17
-; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[8:9]
-; GISEL-NEXT:    v_add_i32_e64 v10, s[4:5], v10, v14
-; GISEL-NEXT:    v_add_i32_e64 v13, s[6:7], v13, v19
-; GISEL-NEXT:    v_add_i32_e64 v11, s[8:9], v11, v16
-; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
-; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, s[4:5]
-; GISEL-NEXT:    v_cndmask_b32_e64 v16, 0, 1, s[6:7]
-; GISEL-NEXT:    v_cndmask_b32_e64 v17, 0, 1, s[8:9]
-; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v15, v13
-; GISEL-NEXT:    v_add_i32_e32 v14, vcc, v18, v14
-; GISEL-NEXT:    v_add_i32_e32 v15, vcc, v20, v16
-; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v12, v17
-; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v10, v13
-; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v15
-; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
-; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, s[4:5]
-; GISEL-NEXT:    v_mul_lo_u32 v16, v7, v10
-; GISEL-NEXT:    v_mul_lo_u32 v17, v8, v10
+; GISEL-NEXT:    v_addc_u32_e32 v12, vcc, v12, v15, vcc
+; GISEL-NEXT:    v_addc_u32_e64 v10, s[10:11], v10, v18, s[4:5]
+; GISEL-NEXT:    v_addc_u32_e64 v12, s[6:7], v13, v17, s[6:7]
+; GISEL-NEXT:    v_addc_u32_e64 v11, s[12:13], v11, v14, s[8:9]
+; GISEL-NEXT:    v_addc_u32_e64 v12, s[14:15], 0, 0, vcc
+; GISEL-NEXT:    v_addc_u32_e64 v13, s[10:11], 0, 0, s[10:11]
+; GISEL-NEXT:    v_addc_u32_e64 v14, s[6:7], 0, 0, s[6:7]
+; GISEL-NEXT:    v_addc_u32_e64 v15, s[6:7], 0, 0, s[12:13]
+; GISEL-NEXT:    v_addc_u32_e64 v10, vcc, v10, v12, s[4:5]
+; GISEL-NEXT:    v_addc_u32_e64 v11, s[4:5], v11, v14, s[8:9]
+; GISEL-NEXT:    v_addc_u32_e32 v12, vcc, 0, v13, vcc
+; GISEL-NEXT:    v_addc_u32_e64 v13, vcc, 0, v15, s[4:5]
+; GISEL-NEXT:    v_mul_lo_u32 v14, v7, v10
+; GISEL-NEXT:    v_mul_lo_u32 v15, v8, v10
 ; GISEL-NEXT:    v_mul_hi_u32 v10, v7, v10
-; GISEL-NEXT:    v_mul_lo_u32 v18, v4, v11
-; GISEL-NEXT:    v_mul_lo_u32 v19, v5, v11
+; GISEL-NEXT:    v_mul_lo_u32 v16, v4, v11
+; GISEL-NEXT:    v_mul_lo_u32 v17, v5, v11
 ; GISEL-NEXT:    v_mul_hi_u32 v11, v4, v11
-; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v14, v13
-; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v12, v15
-; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v16
-; GISEL-NEXT:    v_sub_i32_e64 v2, s[4:5], v2, v18
-; GISEL-NEXT:    v_add_i32_e64 v6, s[6:7], v6, v13
-; GISEL-NEXT:    v_add_i32_e64 v9, s[6:7], v9, v12
+; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v6, v12
+; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v13
+; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v14
+; GISEL-NEXT:    v_sub_i32_e64 v2, s[4:5], v2, v16
+; GISEL-NEXT:    v_mul_lo_u32 v6, v7, v6
+; GISEL-NEXT:    v_mul_lo_u32 v9, v4, v9
 ; GISEL-NEXT:    v_cmp_ge_u32_e64 s[6:7], v0, v7
 ; GISEL-NEXT:    v_cmp_ge_u32_e64 s[8:9], v2, v4
 ; GISEL-NEXT:    v_sub_i32_e64 v12, s[10:11], v0, v7
 ; GISEL-NEXT:    v_sub_i32_e64 v13, s[12:13], v2, v4
-; GISEL-NEXT:    v_mul_lo_u32 v6, v7, v6
-; GISEL-NEXT:    v_mul_lo_u32 v9, v4, v9
+; GISEL-NEXT:    v_add_i32_e64 v6, s[14:15], v15, v6
+; GISEL-NEXT:    v_add_i32_e64 v9, s[14:15], v17, v9
 ; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, -1, s[6:7]
 ; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, -1, s[8:9]
 ; GISEL-NEXT:    v_cmp_ge_u32_e64 s[6:7], v12, v7
 ; GISEL-NEXT:    v_cmp_ge_u32_e64 s[8:9], v13, v4
 ; GISEL-NEXT:    v_sub_i32_e64 v7, s[14:15], v12, v7
 ; GISEL-NEXT:    v_sub_i32_e64 v4, s[16:17], v13, v4
-; GISEL-NEXT:    v_add_i32_e64 v6, s[18:19], v17, v6
-; GISEL-NEXT:    v_add_i32_e64 v9, s[18:19], v19, v9
-; GISEL-NEXT:    v_cndmask_b32_e64 v16, 0, -1, s[6:7]
-; GISEL-NEXT:    v_cndmask_b32_e64 v17, 0, -1, s[8:9]
-; GISEL-NEXT:    v_add_i32_e64 v6, s[6:7], v6, v10
-; GISEL-NEXT:    v_add_i32_e64 v9, s[6:7], v9, v11
-; GISEL-NEXT:    v_subb_u32_e64 v10, s[6:7], v1, v6, vcc
+; GISEL-NEXT:    v_add_i32_e64 v6, s[18:19], v6, v10
+; GISEL-NEXT:    v_add_i32_e64 v9, s[18:19], v9, v11
+; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, -1, s[6:7]
+; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, -1, s[8:9]
+; GISEL-NEXT:    v_subb_u32_e64 v16, s[6:7], v1, v6, vcc
 ; GISEL-NEXT:    v_sub_i32_e64 v1, s[6:7], v1, v6
 ; GISEL-NEXT:    v_subb_u32_e64 v6, s[6:7], v3, v9, s[4:5]
 ; GISEL-NEXT:    v_sub_i32_e64 v3, s[6:7], v3, v9
-; GISEL-NEXT:    v_cmp_ge_u32_e64 s[6:7], v10, v8
+; GISEL-NEXT:    v_cmp_ge_u32_e64 s[6:7], v16, v8
 ; GISEL-NEXT:    v_subb_u32_e32 v1, vcc, v1, v8, vcc
 ; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v6, v5
 ; GISEL-NEXT:    v_subb_u32_e64 v3, s[4:5], v3, v5, s[4:5]
-; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v10, v8
+; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v16, v8
 ; GISEL-NEXT:    v_cmp_eq_u32_e64 s[8:9], v6, v5
 ; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, -1, s[6:7]
-; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, -1, vcc
+; GISEL-NEXT:    v_cndmask_b32_e64 v17, 0, -1, vcc
 ; GISEL-NEXT:    v_subbrev_u32_e64 v18, vcc, 0, v1, s[10:11]
 ; GISEL-NEXT:    v_subb_u32_e64 v1, vcc, v1, v8, s[10:11]
 ; GISEL-NEXT:    v_cndmask_b32_e64 v9, v9, v14, s[4:5]
 ; GISEL-NEXT:    v_subbrev_u32_e64 v14, vcc, 0, v3, s[12:13]
 ; GISEL-NEXT:    v_subb_u32_e64 v3, vcc, v3, v5, s[12:13]
-; GISEL-NEXT:    v_cndmask_b32_e64 v11, v11, v15, s[8:9]
+; GISEL-NEXT:    v_cndmask_b32_e64 v15, v17, v15, s[8:9]
 ; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v18, v8
 ; GISEL-NEXT:    v_subbrev_u32_e64 v1, s[4:5], 0, v1, s[14:15]
 ; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v14, v5
@@ -1913,9 +1713,9 @@ define <2 x i64> @v_urem_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) {
 ; GISEL-NEXT:    v_cndmask_b32_e64 v5, 0, -1, vcc
 ; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, -1, s[4:5]
 ; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v9
-; GISEL-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, v11
-; GISEL-NEXT:    v_cndmask_b32_e64 v5, v5, v16, s[6:7]
-; GISEL-NEXT:    v_cndmask_b32_e64 v8, v8, v17, s[8:9]
+; GISEL-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, v15
+; GISEL-NEXT:    v_cndmask_b32_e64 v5, v5, v10, s[6:7]
+; GISEL-NEXT:    v_cndmask_b32_e64 v8, v8, v11, s[8:9]
 ; GISEL-NEXT:    v_cmp_ne_u32_e64 s[6:7], 0, v5
 ; GISEL-NEXT:    v_cmp_ne_u32_e64 s[8:9], 0, v8
 ; GISEL-NEXT:    v_cndmask_b32_e64 v5, v12, v7, s[6:7]
@@ -1924,7 +1724,7 @@ define <2 x i64> @v_urem_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) {
 ; GISEL-NEXT:    v_cndmask_b32_e64 v3, v14, v3, s[8:9]
 ; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc
 ; GISEL-NEXT:    v_cndmask_b32_e64 v2, v2, v4, s[4:5]
-; GISEL-NEXT:    v_cndmask_b32_e32 v1, v10, v1, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v1, v16, v1, vcc
 ; GISEL-NEXT:    v_cndmask_b32_e64 v3, v6, v3, s[4:5]
 ; GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -1944,7 +1744,7 @@ define <2 x i64> @v_urem_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) {
 ; CGP-NEXT:    v_cvt_f32_u32_e32 v4, v2
 ; CGP-NEXT:    ; implicit-def: $vgpr0_vgpr1
 ; CGP-NEXT:    s_and_saveexec_b64 s[4:5], vcc
-; CGP-NEXT:    s_xor_b64 s[6:7], exec, s[4:5]
+; CGP-NEXT:    s_xor_b64 s[8:9], exec, s[4:5]
 ; CGP-NEXT:    s_cbranch_execz .LBB8_2
 ; CGP-NEXT:  ; %bb.1:
 ; CGP-NEXT:    v_cvt_f32_u32_e32 v0, v3
@@ -1972,18 +1772,13 @@ define <2 x i64> @v_urem_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) {
 ; CGP-NEXT:    v_mul_hi_u32 v19, v0, v13
 ; CGP-NEXT:    v_mul_hi_u32 v13, v4, v13
 ; CGP-NEXT:    v_add_i32_e32 v15, vcc, v15, v16
-; CGP-NEXT:    v_cndmask_b32_e64 v16, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v14, vcc, v18, v14
-; CGP-NEXT:    v_cndmask_b32_e64 v18, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v15, vcc, v15, v17
-; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v14, vcc, v14, v19
-; CGP-NEXT:    v_cndmask_b32_e64 v17, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v15, vcc, v16, v15
-; CGP-NEXT:    v_add_i32_e32 v16, vcc, v18, v17
-; CGP-NEXT:    v_add_i32_e32 v14, vcc, v14, v15
-; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v15, vcc, v16, v15
+; CGP-NEXT:    v_addc_u32_e32 v15, vcc, v15, v17, vcc
+; CGP-NEXT:    v_add_i32_e64 v14, s[4:5], v18, v14
+; CGP-NEXT:    v_addc_u32_e64 v14, s[6:7], v14, v19, s[4:5]
+; CGP-NEXT:    v_addc_u32_e64 v15, s[10:11], 0, 0, vcc
+; CGP-NEXT:    v_addc_u32_e64 v16, s[6:7], 0, 0, s[6:7]
+; CGP-NEXT:    v_addc_u32_e64 v14, vcc, v14, v15, s[4:5]
+; CGP-NEXT:    v_addc_u32_e32 v15, vcc, 0, v16, vcc
 ; CGP-NEXT:    v_add_i32_e32 v13, vcc, v13, v15
 ; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v14
 ; CGP-NEXT:    v_addc_u32_e32 v4, vcc, v4, v13, vcc
@@ -2001,18 +1796,13 @@ define <2 x i64> @v_urem_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) {
 ; CGP-NEXT:    v_mul_hi_u32 v17, v0, v1
 ; CGP-NEXT:    v_mul_hi_u32 v1, v4, v1
 ; CGP-NEXT:    v_add_i32_e32 v12, vcc, v15, v12
-; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v13, vcc, v14, v13
-; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v12, vcc, v12, v16
-; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v13, vcc, v13, v17
-; CGP-NEXT:    v_cndmask_b32_e64 v16, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v12, vcc, v15, v12
-; CGP-NEXT:    v_add_i32_e32 v14, vcc, v14, v16
-; CGP-NEXT:    v_add_i32_e32 v12, vcc, v13, v12
-; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v13, vcc, v14, v13
+; CGP-NEXT:    v_addc_u32_e32 v12, vcc, v12, v16, vcc
+; CGP-NEXT:    v_add_i32_e64 v12, s[4:5], v14, v13
+; CGP-NEXT:    v_addc_u32_e64 v12, s[6:7], v12, v17, s[4:5]
+; CGP-NEXT:    v_addc_u32_e64 v13, s[10:11], 0, 0, vcc
+; CGP-NEXT:    v_addc_u32_e64 v14, s[6:7], 0, 0, s[6:7]
+; CGP-NEXT:    v_addc_u32_e64 v12, vcc, v12, v13, s[4:5]
+; CGP-NEXT:    v_addc_u32_e32 v13, vcc, 0, v14, vcc
 ; CGP-NEXT:    v_add_i32_e32 v1, vcc, v1, v13
 ; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v12
 ; CGP-NEXT:    v_addc_u32_e32 v1, vcc, v4, v1, vcc
@@ -2024,26 +1814,21 @@ define <2 x i64> @v_urem_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) {
 ; CGP-NEXT:    v_mul_hi_u32 v15, v8, v1
 ; CGP-NEXT:    v_mul_hi_u32 v1, v9, v1
 ; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v13
-; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v0, vcc, v14, v0
-; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v12
-; CGP-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v15
-; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v4, vcc, v13, v4
-; CGP-NEXT:    v_add_i32_e32 v12, vcc, v14, v12
-; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v4
-; CGP-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v4, vcc, v12, v4
-; CGP-NEXT:    v_mul_lo_u32 v12, v2, v0
-; CGP-NEXT:    v_mul_lo_u32 v13, v3, v0
-; CGP-NEXT:    v_mul_hi_u32 v0, v2, v0
+; CGP-NEXT:    v_addc_u32_e32 v4, vcc, v4, v12, vcc
+; CGP-NEXT:    v_add_i32_e64 v0, s[4:5], v14, v0
+; CGP-NEXT:    v_addc_u32_e64 v0, s[6:7], v0, v15, s[4:5]
+; CGP-NEXT:    v_addc_u32_e64 v4, s[10:11], 0, 0, vcc
+; CGP-NEXT:    v_addc_u32_e64 v12, s[6:7], 0, 0, s[6:7]
+; CGP-NEXT:    v_addc_u32_e64 v0, vcc, v0, v4, s[4:5]
+; CGP-NEXT:    v_addc_u32_e32 v4, vcc, 0, v12, vcc
 ; CGP-NEXT:    v_add_i32_e32 v1, vcc, v1, v4
+; CGP-NEXT:    v_mul_lo_u32 v4, v2, v0
+; CGP-NEXT:    v_mul_lo_u32 v12, v3, v0
+; CGP-NEXT:    v_mul_hi_u32 v0, v2, v0
 ; CGP-NEXT:    v_mul_lo_u32 v1, v2, v1
-; CGP-NEXT:    v_add_i32_e32 v1, vcc, v13, v1
+; CGP-NEXT:    v_add_i32_e32 v1, vcc, v12, v1
 ; CGP-NEXT:    v_add_i32_e32 v0, vcc, v1, v0
-; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v8, v12
+; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v8, v4
 ; CGP-NEXT:    v_subb_u32_e64 v4, s[4:5], v9, v0, vcc
 ; CGP-NEXT:    v_sub_i32_e64 v0, s[4:5], v9, v0
 ; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v1, v2
@@ -2074,7 +1859,7 @@ define <2 x i64> @v_urem_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) {
 ; CGP-NEXT:    ; implicit-def: $vgpr2_vgpr3
 ; CGP-NEXT:    ; implicit-def: $vgpr8
 ; CGP-NEXT:  .LBB8_2: ; %Flow1
-; CGP-NEXT:    s_or_saveexec_b64 s[4:5], s[6:7]
+; CGP-NEXT:    s_or_saveexec_b64 s[4:5], s[8:9]
 ; CGP-NEXT:    v_lshl_b64 v[9:10], v[10:11], v6
 ; CGP-NEXT:    s_xor_b64 exec, exec, s[4:5]
 ; CGP-NEXT:    s_cbranch_execz .LBB8_4
@@ -2104,10 +1889,10 @@ define <2 x i64> @v_urem_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) {
 ; CGP-NEXT:    v_cvt_f32_u32_e32 v4, v9
 ; CGP-NEXT:    ; implicit-def: $vgpr2_vgpr3
 ; CGP-NEXT:    s_and_saveexec_b64 s[4:5], vcc
-; CGP-NEXT:    s_xor_b64 s[6:7], exec, s[4:5]
+; CGP-NEXT:    s_xor_b64 s[8:9], exec, s[4:5]
 ; CGP-NEXT:    s_cbranch_execnz .LBB8_7
 ; CGP-NEXT:  ; %bb.5: ; %Flow
-; CGP-NEXT:    s_andn2_saveexec_b64 s[4:5], s[6:7]
+; CGP-NEXT:    s_andn2_saveexec_b64 s[4:5], s[8:9]
 ; CGP-NEXT:    s_cbranch_execnz .LBB8_8
 ; CGP-NEXT:  .LBB8_6:
 ; CGP-NEXT:    s_or_b64 exec, exec, s[4:5]
@@ -2138,18 +1923,13 @@ define <2 x i64> @v_urem_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) {
 ; CGP-NEXT:    v_mul_hi_u32 v16, v2, v8
 ; CGP-NEXT:    v_mul_hi_u32 v8, v4, v8
 ; CGP-NEXT:    v_add_i32_e32 v12, vcc, v12, v13
-; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v11, vcc, v15, v11
-; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v12, vcc, v12, v14
-; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v11, vcc, v11, v16
-; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v12, vcc, v13, v12
-; CGP-NEXT:    v_add_i32_e32 v13, vcc, v15, v14
-; CGP-NEXT:    v_add_i32_e32 v11, vcc, v11, v12
-; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v12, vcc, v13, v12
+; CGP-NEXT:    v_addc_u32_e32 v12, vcc, v12, v14, vcc
+; CGP-NEXT:    v_add_i32_e64 v11, s[4:5], v15, v11
+; CGP-NEXT:    v_addc_u32_e64 v11, s[6:7], v11, v16, s[4:5]
+; CGP-NEXT:    v_addc_u32_e64 v12, s[10:11], 0, 0, vcc
+; CGP-NEXT:    v_addc_u32_e64 v13, s[6:7], 0, 0, s[6:7]
+; CGP-NEXT:    v_addc_u32_e64 v11, vcc, v11, v12, s[4:5]
+; CGP-NEXT:    v_addc_u32_e32 v12, vcc, 0, v13, vcc
 ; CGP-NEXT:    v_add_i32_e32 v8, vcc, v8, v12
 ; CGP-NEXT:    v_add_i32_e32 v2, vcc, v2, v11
 ; CGP-NEXT:    v_addc_u32_e32 v4, vcc, v4, v8, vcc
@@ -2167,18 +1947,13 @@ define <2 x i64> @v_urem_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) {
 ; CGP-NEXT:    v_mul_hi_u32 v14, v2, v3
 ; CGP-NEXT:    v_mul_hi_u32 v3, v4, v3
 ; CGP-NEXT:    v_add_i32_e32 v6, vcc, v12, v6
-; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v8, vcc, v11, v8
-; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v6, vcc, v6, v13
-; CGP-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v8, vcc, v8, v14
-; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v6, vcc, v12, v6
-; CGP-NEXT:    v_add_i32_e32 v11, vcc, v11, v13
-; CGP-NEXT:    v_add_i32_e32 v6, vcc, v8, v6
-; CGP-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v8, vcc, v11, v8
+; CGP-NEXT:    v_addc_u32_e32 v6, vcc, v6, v13, vcc
+; CGP-NEXT:    v_add_i32_e64 v6, s[4:5], v11, v8
+; CGP-NEXT:    v_addc_u32_e64 v6, s[6:7], v6, v14, s[4:5]
+; CGP-NEXT:    v_addc_u32_e64 v8, s[10:11], 0, 0, vcc
+; CGP-NEXT:    v_addc_u32_e64 v11, s[6:7], 0, 0, s[6:7]
+; CGP-NEXT:    v_addc_u32_e64 v6, vcc, v6, v8, s[4:5]
+; CGP-NEXT:    v_addc_u32_e32 v8, vcc, 0, v11, vcc
 ; CGP-NEXT:    v_add_i32_e32 v3, vcc, v3, v8
 ; CGP-NEXT:    v_add_i32_e32 v2, vcc, v2, v6
 ; CGP-NEXT:    v_addc_u32_e32 v3, vcc, v4, v3, vcc
@@ -2190,26 +1965,21 @@ define <2 x i64> @v_urem_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) {
 ; CGP-NEXT:    v_mul_hi_u32 v12, v5, v3
 ; CGP-NEXT:    v_mul_hi_u32 v3, v7, v3
 ; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v8
-; CGP-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v2, vcc, v11, v2
-; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v6
-; CGP-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v2, vcc, v2, v12
-; CGP-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v4, vcc, v8, v4
-; CGP-NEXT:    v_add_i32_e32 v6, vcc, v11, v6
-; CGP-NEXT:    v_add_i32_e32 v2, vcc, v2, v4
-; CGP-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v4, vcc, v6, v4
-; CGP-NEXT:    v_mul_lo_u32 v6, v9, v2
-; CGP-NEXT:    v_mul_lo_u32 v8, v10, v2
-; CGP-NEXT:    v_mul_hi_u32 v2, v9, v2
+; CGP-NEXT:    v_addc_u32_e32 v4, vcc, v4, v6, vcc
+; CGP-NEXT:    v_add_i32_e64 v2, s[4:5], v11, v2
+; CGP-NEXT:    v_addc_u32_e64 v2, s[6:7], v2, v12, s[4:5]
+; CGP-NEXT:    v_addc_u32_e64 v4, s[10:11], 0, 0, vcc
+; CGP-NEXT:    v_addc_u32_e64 v6, s[6:7], 0, 0, s[6:7]
+; CGP-NEXT:    v_addc_u32_e64 v2, vcc, v2, v4, s[4:5]
+; CGP-NEXT:    v_addc_u32_e32 v4, vcc, 0, v6, vcc
 ; CGP-NEXT:    v_add_i32_e32 v3, vcc, v3, v4
+; CGP-NEXT:    v_mul_lo_u32 v4, v9, v2
+; CGP-NEXT:    v_mul_lo_u32 v6, v10, v2
+; CGP-NEXT:    v_mul_hi_u32 v2, v9, v2
 ; CGP-NEXT:    v_mul_lo_u32 v3, v9, v3
-; CGP-NEXT:    v_add_i32_e32 v3, vcc, v8, v3
+; CGP-NEXT:    v_add_i32_e32 v3, vcc, v6, v3
 ; CGP-NEXT:    v_add_i32_e32 v2, vcc, v3, v2
-; CGP-NEXT:    v_sub_i32_e32 v3, vcc, v5, v6
+; CGP-NEXT:    v_sub_i32_e32 v3, vcc, v5, v4
 ; CGP-NEXT:    v_subb_u32_e64 v4, s[4:5], v7, v2, vcc
 ; CGP-NEXT:    v_sub_i32_e64 v2, s[4:5], v7, v2
 ; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v3, v9
@@ -2239,7 +2009,7 @@ define <2 x i64> @v_urem_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) {
 ; CGP-NEXT:    ; implicit-def: $vgpr4
 ; CGP-NEXT:    ; implicit-def: $vgpr9_vgpr10
 ; CGP-NEXT:    ; implicit-def: $vgpr5
-; CGP-NEXT:    s_andn2_saveexec_b64 s[4:5], s[6:7]
+; CGP-NEXT:    s_andn2_saveexec_b64 s[4:5], s[8:9]
 ; CGP-NEXT:    s_cbranch_execz .LBB8_6
 ; CGP-NEXT:  .LBB8_8:
 ; CGP-NEXT:    v_rcp_iflag_f32_e32 v2, v4
@@ -2376,39 +2146,29 @@ define <2 x i64> @v_urem_v2i64_24bit(<2 x i64> %num, <2 x i64> %den) {
 ; GISEL-NEXT:    v_mul_hi_u32 v25, v7, v13
 ; GISEL-NEXT:    v_mul_hi_u32 v13, v11, v13
 ; GISEL-NEXT:    v_add_i32_e32 v15, vcc, v15, v16
-; GISEL-NEXT:    v_cndmask_b32_e64 v16, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v14, vcc, v19, v14
-; GISEL-NEXT:    v_cndmask_b32_e64 v19, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v18, vcc, v18, v23
-; GISEL-NEXT:    v_cndmask_b32_e64 v23, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v17, vcc, v24, v17
-; GISEL-NEXT:    v_cndmask_b32_e64 v24, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v15, vcc, v15, v20
-; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v14, vcc, v14, v22
-; GISEL-NEXT:    v_cndmask_b32_e64 v20, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v18, vcc, v18, v21
-; GISEL-NEXT:    v_cndmask_b32_e64 v18, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v17, vcc, v17, v25
-; GISEL-NEXT:    v_cndmask_b32_e64 v21, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v15, vcc, v16, v15
-; GISEL-NEXT:    v_add_i32_e32 v16, vcc, v19, v20
-; GISEL-NEXT:    v_add_i32_e32 v18, vcc, v23, v18
-; GISEL-NEXT:    v_add_i32_e32 v19, vcc, v24, v21
-; GISEL-NEXT:    v_add_i32_e32 v14, vcc, v14, v15
-; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v17, vcc, v17, v18
-; GISEL-NEXT:    v_cndmask_b32_e64 v18, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v15, vcc, v16, v15
-; GISEL-NEXT:    v_add_i32_e32 v16, vcc, v19, v18
-; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v12, v15
-; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v13, v16
+; GISEL-NEXT:    v_addc_u32_e32 v15, vcc, v15, v20, vcc
+; GISEL-NEXT:    v_add_i32_e64 v14, s[4:5], v19, v14
+; GISEL-NEXT:    v_addc_u32_e64 v14, s[6:7], v14, v22, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v15, s[8:9], v18, v23
+; GISEL-NEXT:    v_addc_u32_e64 v15, s[8:9], v15, v21, s[8:9]
+; GISEL-NEXT:    v_add_i32_e64 v15, s[10:11], v24, v17
+; GISEL-NEXT:    v_addc_u32_e64 v15, s[12:13], v15, v25, s[10:11]
+; GISEL-NEXT:    v_addc_u32_e64 v16, s[14:15], 0, 0, vcc
+; GISEL-NEXT:    v_addc_u32_e64 v17, s[6:7], 0, 0, s[6:7]
+; GISEL-NEXT:    v_addc_u32_e64 v18, s[6:7], 0, 0, s[8:9]
+; GISEL-NEXT:    v_addc_u32_e64 v19, s[6:7], 0, 0, s[12:13]
+; GISEL-NEXT:    v_addc_u32_e64 v14, vcc, v14, v16, s[4:5]
+; GISEL-NEXT:    v_addc_u32_e32 v16, vcc, 0, v17, vcc
+; GISEL-NEXT:    v_addc_u32_e64 v15, vcc, v15, v18, s[10:11]
+; GISEL-NEXT:    v_addc_u32_e32 v17, vcc, 0, v19, vcc
+; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v12, v16
+; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v13, v17
 ; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v6, v14
 ; GISEL-NEXT:    v_addc_u32_e32 v8, vcc, v8, v12, vcc
 ; GISEL-NEXT:    v_mul_lo_u32 v12, v4, v6
 ; GISEL-NEXT:    v_mul_lo_u32 v5, v5, v6
 ; GISEL-NEXT:    v_mul_hi_u32 v14, v4, v6
-; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v7, v17
+; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v7, v15
 ; GISEL-NEXT:    v_addc_u32_e32 v11, vcc, v11, v13, vcc
 ; GISEL-NEXT:    v_mul_lo_u32 v13, v9, v7
 ; GISEL-NEXT:    v_mul_lo_u32 v10, v10, v7
@@ -2434,39 +2194,29 @@ define <2 x i64> @v_urem_v2i64_24bit(<2 x i64> %num, <2 x i64> %den) {
 ; GISEL-NEXT:    v_mul_hi_u32 v21, v7, v5
 ; GISEL-NEXT:    v_mul_hi_u32 v5, v11, v5
 ; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v16, v9
-; GISEL-NEXT:    v_cndmask_b32_e64 v16, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v10, v12
-; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v15, vcc, v18, v15
-; GISEL-NEXT:    v_cndmask_b32_e64 v18, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v20, v13
-; GISEL-NEXT:    v_cndmask_b32_e64 v20, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v17
-; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v10, v14
-; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v15, vcc, v15, v19
-; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v13, v21
-; GISEL-NEXT:    v_cndmask_b32_e64 v17, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v16, v9
-; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v12, v14
-; GISEL-NEXT:    v_add_i32_e32 v14, vcc, v18, v15
-; GISEL-NEXT:    v_add_i32_e32 v15, vcc, v20, v17
-; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v10, v9
-; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v13, v14
-; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v12, v10
-; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v15, v14
-; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v10
-; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v12
+; GISEL-NEXT:    v_addc_u32_e32 v9, vcc, v9, v17, vcc
+; GISEL-NEXT:    v_add_i32_e64 v9, s[4:5], v10, v12
+; GISEL-NEXT:    v_addc_u32_e64 v9, s[6:7], v9, v14, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v10, s[8:9], v18, v15
+; GISEL-NEXT:    v_addc_u32_e64 v10, s[8:9], v10, v19, s[8:9]
+; GISEL-NEXT:    v_add_i32_e64 v10, s[10:11], v20, v13
+; GISEL-NEXT:    v_addc_u32_e64 v10, s[12:13], v10, v21, s[10:11]
+; GISEL-NEXT:    v_addc_u32_e64 v12, s[14:15], 0, 0, vcc
+; GISEL-NEXT:    v_addc_u32_e64 v13, s[6:7], 0, 0, s[6:7]
+; GISEL-NEXT:    v_addc_u32_e64 v14, s[6:7], 0, 0, s[8:9]
+; GISEL-NEXT:    v_addc_u32_e64 v15, s[6:7], 0, 0, s[12:13]
+; GISEL-NEXT:    v_addc_u32_e64 v9, vcc, v9, v12, s[4:5]
+; GISEL-NEXT:    v_addc_u32_e32 v12, vcc, 0, v13, vcc
+; GISEL-NEXT:    v_addc_u32_e64 v10, vcc, v10, v14, s[10:11]
+; GISEL-NEXT:    v_addc_u32_e32 v13, vcc, 0, v15, vcc
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v12
+; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v13
 ; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v6, v9
 ; GISEL-NEXT:    v_addc_u32_e32 v4, vcc, v8, v4, vcc
 ; GISEL-NEXT:    v_mul_lo_u32 v8, 0, v6
 ; GISEL-NEXT:    v_mul_hi_u32 v9, v3, v6
 ; GISEL-NEXT:    v_mul_hi_u32 v6, 0, v6
-; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v7, v13
+; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v7, v10
 ; GISEL-NEXT:    v_addc_u32_e32 v5, vcc, v11, v5, vcc
 ; GISEL-NEXT:    v_mul_lo_u32 v10, 0, v7
 ; GISEL-NEXT:    v_mul_hi_u32 v11, v2, v7
@@ -2484,39 +2234,37 @@ define <2 x i64> @v_urem_v2i64_24bit(<2 x i64> %num, <2 x i64> %den) {
 ; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v10, v15
 ; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v16, v7
 ; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v9
-; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
+; GISEL-NEXT:    v_addc_u32_e64 v8, s[4:5], 0, 0, vcc
 ; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v6, v14
-; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
+; GISEL-NEXT:    v_addc_u32_e64 v9, s[4:5], 0, 0, vcc
 ; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v10, v11
-; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
+; GISEL-NEXT:    v_addc_u32_e64 v10, s[4:5], 0, 0, vcc
 ; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v7, v17
-; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
+; GISEL-NEXT:    v_addc_u32_e64 v11, s[4:5], 0, 0, vcc
 ; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v6, v8
-; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
+; GISEL-NEXT:    v_addc_u32_e32 v8, vcc, 0, v9, vcc
 ; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v7, v10
-; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v9, v8
-; GISEL-NEXT:    v_mul_lo_u32 v9, v1, v6
-; GISEL-NEXT:    v_mul_lo_u32 v12, 0, v6
+; GISEL-NEXT:    v_addc_u32_e32 v9, vcc, 0, v11, vcc
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v8
+; GISEL-NEXT:    v_mul_lo_u32 v8, v1, v6
+; GISEL-NEXT:    v_mul_lo_u32 v10, 0, v6
 ; GISEL-NEXT:    v_mul_hi_u32 v6, v1, v6
-; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v11, v10
-; GISEL-NEXT:    v_mul_lo_u32 v11, v0, v7
-; GISEL-NEXT:    v_mul_lo_u32 v13, 0, v7
+; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v9
+; GISEL-NEXT:    v_mul_lo_u32 v9, v0, v7
+; GISEL-NEXT:    v_mul_lo_u32 v11, 0, v7
 ; GISEL-NEXT:    v_mul_hi_u32 v7, v0, v7
-; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v8
-; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v10
 ; GISEL-NEXT:    v_mul_lo_u32 v4, v1, v4
 ; GISEL-NEXT:    v_mul_lo_u32 v5, v0, v5
-; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v12, v4
-; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v13, v5
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v10, v4
+; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v11, v5
 ; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v6
 ; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v7
-; GISEL-NEXT:    v_sub_i32_e32 v3, vcc, v3, v9
+; GISEL-NEXT:    v_sub_i32_e32 v3, vcc, v3, v8
 ; GISEL-NEXT:    v_subb_u32_e64 v6, s[4:5], 0, v4, vcc
 ; GISEL-NEXT:    v_sub_i32_e64 v4, s[4:5], 0, v4
 ; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v3, v1
 ; GISEL-NEXT:    v_cndmask_b32_e64 v7, 0, -1, s[4:5]
-; GISEL-NEXT:    v_sub_i32_e64 v2, s[4:5], v2, v11
+; GISEL-NEXT:    v_sub_i32_e64 v2, s[4:5], v2, v9
 ; GISEL-NEXT:    v_subb_u32_e64 v8, s[6:7], 0, v5, s[4:5]
 ; GISEL-NEXT:    v_sub_i32_e64 v5, s[6:7], 0, v5
 ; GISEL-NEXT:    v_cmp_ge_u32_e64 s[6:7], v2, v0
diff --git a/llvm/test/CodeGen/Mips/GlobalISel/legalizer/mul.mir b/llvm/test/CodeGen/Mips/GlobalISel/legalizer/mul.mir
index 674d7b68bfae6..2e61d8771988a 100644
--- a/llvm/test/CodeGen/Mips/GlobalISel/legalizer/mul.mir
+++ b/llvm/test/CodeGen/Mips/GlobalISel/legalizer/mul.mir
@@ -270,6 +270,7 @@ body:             |
     ; MIPS32-NEXT: [[FRAME_INDEX3:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.3
     ; MIPS32-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX3]](p0) :: (load (s32) from %fixed-stack.3)
     ; MIPS32-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[LOAD]], [[COPY]]
+    ; MIPS32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
     ; MIPS32-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[LOAD1]], [[COPY]]
     ; MIPS32-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[LOAD]], [[COPY1]]
     ; MIPS32-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[LOAD]], [[COPY]]
@@ -277,33 +278,67 @@ body:             |
     ; MIPS32-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD]](s32), [[MUL2]]
     ; MIPS32-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[ADD]](s32)
     ; MIPS32-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[COPY4]], [[UMULH]]
-    ; MIPS32-NEXT: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD1]](s32), [[UMULH]]
-    ; MIPS32-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[ADD1]](s32)
-    ; MIPS32-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ICMP]], [[ICMP1]]
+    ; MIPS32-NEXT: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD1]](s32), [[COPY4]]
+    ; MIPS32-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ICMP]]
+    ; MIPS32-NEXT: [[ICMP2:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[ADD2]](s32), [[C]]
+    ; MIPS32-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP2]], [[ICMP]]
+    ; MIPS32-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ICMP1]], [[AND]]
+    ; MIPS32-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[ADD2]](s32)
+    ; MIPS32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; MIPS32-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C1]]
+    ; MIPS32-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[C]], [[AND1]]
+    ; MIPS32-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[ADD3]](s32)
     ; MIPS32-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[LOAD2]], [[COPY]]
     ; MIPS32-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[LOAD1]], [[COPY1]]
     ; MIPS32-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[LOAD]], [[COPY2]]
     ; MIPS32-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[LOAD1]], [[COPY]]
     ; MIPS32-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[LOAD]], [[COPY1]]
-    ; MIPS32-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[MUL3]], [[MUL4]]
-    ; MIPS32-NEXT: [[ICMP2:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD3]](s32), [[MUL4]]
-    ; MIPS32-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[ADD3]](s32)
-    ; MIPS32-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[COPY6]], [[MUL5]]
-    ; MIPS32-NEXT: [[ICMP3:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD4]](s32), [[MUL5]]
+    ; MIPS32-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[MUL3]], [[MUL4]]
+    ; MIPS32-NEXT: [[ICMP3:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD4]](s32), [[MUL4]]
     ; MIPS32-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[ADD4]](s32)
-    ; MIPS32-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ICMP2]], [[ICMP3]]
-    ; MIPS32-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[COPY7]], [[UMULH1]]
-    ; MIPS32-NEXT: [[ICMP4:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD6]](s32), [[UMULH1]]
+    ; MIPS32-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[COPY7]], [[MUL5]]
+    ; MIPS32-NEXT: [[ICMP4:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD5]](s32), [[COPY7]]
+    ; MIPS32-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ADD5]], [[ICMP3]]
+    ; MIPS32-NEXT: [[ICMP5:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[ADD6]](s32), [[C]]
+    ; MIPS32-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ICMP5]], [[ICMP3]]
+    ; MIPS32-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ICMP4]], [[AND2]]
     ; MIPS32-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[ADD6]](s32)
-    ; MIPS32-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD5]], [[ICMP4]]
-    ; MIPS32-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[COPY8]], [[UMULH2]]
-    ; MIPS32-NEXT: [[ICMP5:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD8]](s32), [[UMULH2]]
-    ; MIPS32-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[ADD8]](s32)
-    ; MIPS32-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ADD7]], [[ICMP5]]
-    ; MIPS32-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[COPY9]], [[ADD2]]
-    ; MIPS32-NEXT: [[ICMP6:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD10]](s32), [[ADD2]]
-    ; MIPS32-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY [[ADD10]](s32)
-    ; MIPS32-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ICMP6]]
+    ; MIPS32-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C1]]
+    ; MIPS32-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[C]], [[AND3]]
+    ; MIPS32-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[ADD7]](s32)
+    ; MIPS32-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[COPY8]], [[UMULH1]]
+    ; MIPS32-NEXT: [[ICMP6:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD8]](s32), [[COPY8]]
+    ; MIPS32-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ADD8]], [[ICMP3]]
+    ; MIPS32-NEXT: [[ICMP7:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[ADD9]](s32), [[C]]
+    ; MIPS32-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[ICMP7]], [[ICMP3]]
+    ; MIPS32-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ICMP6]], [[AND4]]
+    ; MIPS32-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY [[ADD9]](s32)
+    ; MIPS32-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[COPY9]], [[C]]
+    ; MIPS32-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[OR2]], [[C1]]
+    ; MIPS32-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[ADD10]], [[AND5]]
+    ; MIPS32-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[ADD11]](s32)
+    ; MIPS32-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[COPY10]], [[UMULH2]]
+    ; MIPS32-NEXT: [[ICMP8:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD12]](s32), [[COPY10]]
+    ; MIPS32-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ADD12]], [[ICMP3]]
+    ; MIPS32-NEXT: [[ICMP9:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[ADD13]](s32), [[C]]
+    ; MIPS32-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[ICMP9]], [[ICMP3]]
+    ; MIPS32-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ICMP8]], [[AND6]]
+    ; MIPS32-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[ADD13]](s32)
+    ; MIPS32-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[COPY11]], [[C]]
+    ; MIPS32-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[OR3]], [[C1]]
+    ; MIPS32-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[ADD14]], [[AND7]]
+    ; MIPS32-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[ADD15]](s32)
+    ; MIPS32-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[COPY12]], [[COPY6]]
+    ; MIPS32-NEXT: [[ICMP10:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD16]](s32), [[COPY12]]
+    ; MIPS32-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[ICMP3]]
+    ; MIPS32-NEXT: [[ICMP11:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[ADD17]](s32), [[C]]
+    ; MIPS32-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[ICMP11]], [[ICMP3]]
+    ; MIPS32-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ICMP10]], [[AND8]]
+    ; MIPS32-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[ADD17]](s32)
+    ; MIPS32-NEXT: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[COPY13]], [[C]]
+    ; MIPS32-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[OR4]], [[C1]]
+    ; MIPS32-NEXT: [[ADD19:%[0-9]+]]:_(s32) = G_ADD [[ADD18]], [[AND9]]
+    ; MIPS32-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[ADD19]](s32)
     ; MIPS32-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[LOAD3]], [[COPY]]
     ; MIPS32-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[LOAD2]], [[COPY1]]
     ; MIPS32-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[LOAD1]], [[COPY2]]
@@ -311,17 +346,17 @@ body:             |
     ; MIPS32-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[LOAD2]], [[COPY]]
     ; MIPS32-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[LOAD1]], [[COPY1]]
     ; MIPS32-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[LOAD]], [[COPY2]]
-    ; MIPS32-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[MUL6]], [[MUL7]]
-    ; MIPS32-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ADD12]], [[MUL8]]
-    ; MIPS32-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[MUL9]]
-    ; MIPS32-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[ADD14]], [[UMULH3]]
-    ; MIPS32-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[ADD15]], [[UMULH4]]
-    ; MIPS32-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[UMULH5]]
-    ; MIPS32-NEXT: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[ADD17]], [[ADD11]]
+    ; MIPS32-NEXT: [[ADD20:%[0-9]+]]:_(s32) = G_ADD [[MUL6]], [[MUL7]]
+    ; MIPS32-NEXT: [[ADD21:%[0-9]+]]:_(s32) = G_ADD [[ADD20]], [[MUL8]]
+    ; MIPS32-NEXT: [[ADD22:%[0-9]+]]:_(s32) = G_ADD [[ADD21]], [[MUL9]]
+    ; MIPS32-NEXT: [[ADD23:%[0-9]+]]:_(s32) = G_ADD [[ADD22]], [[UMULH3]]
+    ; MIPS32-NEXT: [[ADD24:%[0-9]+]]:_(s32) = G_ADD [[ADD23]], [[UMULH4]]
+    ; MIPS32-NEXT: [[ADD25:%[0-9]+]]:_(s32) = G_ADD [[ADD24]], [[UMULH5]]
+    ; MIPS32-NEXT: [[ADD26:%[0-9]+]]:_(s32) = G_ADD [[ADD25]], [[COPY15]]
     ; MIPS32-NEXT: $v0 = COPY [[MUL]](s32)
     ; MIPS32-NEXT: $v1 = COPY [[COPY5]](s32)
-    ; MIPS32-NEXT: $a0 = COPY [[COPY10]](s32)
-    ; MIPS32-NEXT: $a1 = COPY [[ADD18]](s32)
+    ; MIPS32-NEXT: $a0 = COPY [[COPY14]](s32)
+    ; MIPS32-NEXT: $a1 = COPY [[ADD26]](s32)
     ; MIPS32-NEXT: RetRA implicit $v0, implicit $v1, implicit $a0, implicit $a1
     %2:_(s32) = COPY $a0
     %3:_(s32) = COPY $a1
@@ -361,6 +396,7 @@ body:             |
     ; MIPS32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1
     ; MIPS32-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $a2
     ; MIPS32-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $a3
+    ; MIPS32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
     ; MIPS32-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[COPY3]], [[COPY]]
     ; MIPS32-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[COPY2]], [[COPY1]]
     ; MIPS32-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[COPY2]], [[COPY]]
@@ -368,27 +404,47 @@ body:             |
     ; MIPS32-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD]](s32), [[MUL1]]
     ; MIPS32-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[ADD]](s32)
     ; MIPS32-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[COPY4]], [[UMULH]]
-    ; MIPS32-NEXT: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD1]](s32), [[UMULH]]
-    ; MIPS32-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[ADD1]](s32)
-    ; MIPS32-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ICMP]], [[ICMP1]]
+    ; MIPS32-NEXT: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD1]](s32), [[COPY4]]
+    ; MIPS32-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ICMP]]
+    ; MIPS32-NEXT: [[ICMP2:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[ADD2]](s32), [[C]]
+    ; MIPS32-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP2]], [[ICMP]]
+    ; MIPS32-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ICMP1]], [[AND]]
+    ; MIPS32-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[ADD2]](s32)
+    ; MIPS32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; MIPS32-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C1]]
+    ; MIPS32-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[C]], [[AND1]]
+    ; MIPS32-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[ADD3]](s32)
     ; MIPS32-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[COPY3]], [[COPY1]]
     ; MIPS32-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[COPY3]], [[COPY]]
     ; MIPS32-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[COPY2]], [[COPY1]]
-    ; MIPS32-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[MUL2]], [[UMULH1]]
-    ; MIPS32-NEXT: [[ICMP2:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD3]](s32), [[UMULH1]]
-    ; MIPS32-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[ADD3]](s32)
-    ; MIPS32-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[COPY6]], [[UMULH2]]
-    ; MIPS32-NEXT: [[ICMP3:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD4]](s32), [[UMULH2]]
+    ; MIPS32-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[MUL2]], [[UMULH1]]
+    ; MIPS32-NEXT: [[ICMP3:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD4]](s32), [[UMULH1]]
     ; MIPS32-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[ADD4]](s32)
-    ; MIPS32-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ICMP2]], [[ICMP3]]
-    ; MIPS32-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[COPY7]], [[ADD2]]
-    ; MIPS32-NEXT: [[ICMP4:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD6]](s32), [[ADD2]]
+    ; MIPS32-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[COPY7]], [[UMULH2]]
+    ; MIPS32-NEXT: [[ICMP4:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD5]](s32), [[COPY7]]
+    ; MIPS32-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ADD5]], [[ICMP3]]
+    ; MIPS32-NEXT: [[ICMP5:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[ADD6]](s32), [[C]]
+    ; MIPS32-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ICMP5]], [[ICMP3]]
+    ; MIPS32-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ICMP4]], [[AND2]]
     ; MIPS32-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[ADD6]](s32)
-    ; MIPS32-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD5]], [[ICMP4]]
+    ; MIPS32-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C1]]
+    ; MIPS32-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[C]], [[AND3]]
+    ; MIPS32-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[ADD7]](s32)
+    ; MIPS32-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[COPY8]], [[COPY6]]
+    ; MIPS32-NEXT: [[ICMP6:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD8]](s32), [[COPY8]]
+    ; MIPS32-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ADD8]], [[ICMP3]]
+    ; MIPS32-NEXT: [[ICMP7:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[ADD9]](s32), [[C]]
+    ; MIPS32-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[ICMP7]], [[ICMP3]]
+    ; MIPS32-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ICMP6]], [[AND4]]
+    ; MIPS32-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY [[ADD9]](s32)
+    ; MIPS32-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[COPY9]], [[C]]
+    ; MIPS32-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[OR2]], [[C1]]
+    ; MIPS32-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[ADD10]], [[AND5]]
+    ; MIPS32-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[ADD11]](s32)
     ; MIPS32-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[COPY3]], [[COPY1]]
-    ; MIPS32-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD7]]
-    ; MIPS32-NEXT: $v0 = COPY [[COPY8]](s32)
-    ; MIPS32-NEXT: $v1 = COPY [[ADD8]](s32)
+    ; MIPS32-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[COPY11]]
+    ; MIPS32-NEXT: $v0 = COPY [[COPY10]](s32)
+    ; MIPS32-NEXT: $v1 = COPY [[ADD12]](s32)
     ; MIPS32-NEXT: RetRA implicit $v0, implicit $v1
     %2:_(s32) = COPY $a0
     %3:_(s32) = COPY $a1
diff --git a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/mul.ll b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/mul.ll
index c8c66fabf202b..9ef2228c2424c 100644
--- a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/mul.ll
+++ b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/mul.ll
@@ -117,35 +117,64 @@ define i128 @mul_i128(i128 %a, i128 %b) {
 ; MIPS32-NEXT:    addiu $1, $sp, 28
 ; MIPS32-NEXT:    lw $1, 0($1)
 ; MIPS32-NEXT:    mul $2, $6, $14
-; MIPS32-NEXT:    mul $3, $7, $14
-; MIPS32-NEXT:    mul $4, $6, $13
+; MIPS32-NEXT:    ori $5, $zero, 0
+; MIPS32-NEXT:    mul $4, $7, $14
+; MIPS32-NEXT:    mul $10, $6, $13
 ; MIPS32-NEXT:    multu $6, $14
-; MIPS32-NEXT:    mfhi $5
-; MIPS32-NEXT:    addu $3, $3, $4
+; MIPS32-NEXT:    mfhi $3
+; MIPS32-NEXT:    addu $4, $4, $10
+; MIPS32-NEXT:    sltu $11, $4, $10
+; MIPS32-NEXT:    addu $3, $4, $3
 ; MIPS32-NEXT:    sltu $4, $3, $4
-; MIPS32-NEXT:    addu $3, $3, $5
-; MIPS32-NEXT:    sltu $5, $3, $5
-; MIPS32-NEXT:    addu $10, $4, $5
-; MIPS32-NEXT:    mul $4, $8, $14
-; MIPS32-NEXT:    mul $5, $7, $13
+; MIPS32-NEXT:    addu $3, $3, $11
+; MIPS32-NEXT:    sltiu $10, $3, 1
+; MIPS32-NEXT:    and $10, $10, $11
+; MIPS32-NEXT:    or $4, $4, $10
+; MIPS32-NEXT:    andi $4, $4, 1
+; MIPS32-NEXT:    addu $4, $5, $4
+; MIPS32-NEXT:    mul $25, $8, $14
+; MIPS32-NEXT:    mul $15, $7, $13
 ; MIPS32-NEXT:    mul $24, $6, $12
 ; MIPS32-NEXT:    multu $7, $14
-; MIPS32-NEXT:    mfhi $15
-; MIPS32-NEXT:    multu $6, $13
 ; MIPS32-NEXT:    mfhi $11
-; MIPS32-NEXT:    addu $4, $4, $5
-; MIPS32-NEXT:    sltu $5, $4, $5
-; MIPS32-NEXT:    addu $4, $4, $24
-; MIPS32-NEXT:    sltu $24, $4, $24
+; MIPS32-NEXT:    multu $6, $13
+; MIPS32-NEXT:    mfhi $10
+; MIPS32-NEXT:    addu $25, $25, $15
+; MIPS32-NEXT:    sltu $15, $25, $15
+; MIPS32-NEXT:    addu $24, $25, $24
+; MIPS32-NEXT:    sltu $25, $24, $25
+; MIPS32-NEXT:    addu $24, $24, $15
+; MIPS32-NEXT:    sltiu $gp, $24, 1
+; MIPS32-NEXT:    and $gp, $gp, $15
+; MIPS32-NEXT:    or $25, $25, $gp
+; MIPS32-NEXT:    andi $25, $25, 1
+; MIPS32-NEXT:    addu $5, $5, $25
+; MIPS32-NEXT:    addu $11, $24, $11
+; MIPS32-NEXT:    sltu $24, $11, $24
+; MIPS32-NEXT:    addu $11, $11, $15
+; MIPS32-NEXT:    sltiu $25, $11, 1
+; MIPS32-NEXT:    and $25, $25, $15
+; MIPS32-NEXT:    or $24, $24, $25
+; MIPS32-NEXT:    addiu $5, $5, 0
+; MIPS32-NEXT:    andi $24, $24, 1
 ; MIPS32-NEXT:    addu $5, $5, $24
-; MIPS32-NEXT:    addu $4, $4, $15
-; MIPS32-NEXT:    sltu $15, $4, $15
-; MIPS32-NEXT:    addu $5, $5, $15
-; MIPS32-NEXT:    addu $4, $4, $11
-; MIPS32-NEXT:    sltu $11, $4, $11
+; MIPS32-NEXT:    addu $10, $11, $10
+; MIPS32-NEXT:    sltu $11, $10, $11
+; MIPS32-NEXT:    addu $10, $10, $15
+; MIPS32-NEXT:    sltiu $24, $10, 1
+; MIPS32-NEXT:    and $24, $24, $15
+; MIPS32-NEXT:    or $11, $11, $24
+; MIPS32-NEXT:    addiu $5, $5, 0
+; MIPS32-NEXT:    andi $11, $11, 1
 ; MIPS32-NEXT:    addu $5, $5, $11
-; MIPS32-NEXT:    addu $4, $4, $10
+; MIPS32-NEXT:    addu $4, $10, $4
 ; MIPS32-NEXT:    sltu $10, $4, $10
+; MIPS32-NEXT:    addu $4, $4, $15
+; MIPS32-NEXT:    sltiu $11, $4, 1
+; MIPS32-NEXT:    and $11, $11, $15
+; MIPS32-NEXT:    or $10, $10, $11
+; MIPS32-NEXT:    addiu $5, $5, 0
+; MIPS32-NEXT:    andi $10, $10, 1
 ; MIPS32-NEXT:    addu $5, $5, $10
 ; MIPS32-NEXT:    mul $1, $1, $14
 ; MIPS32-NEXT:    mul $11, $8, $13
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-mul-ext-rv32.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-mul-ext-rv32.mir
index ec2dc568a5ec3..d63d03e185b82 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-mul-ext-rv32.mir
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-mul-ext-rv32.mir
@@ -164,27 +164,39 @@ body:             |
     ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD]](s32), [[MUL2]]
     ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[ADD]](s32)
     ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[UMULH]]
-    ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD1]](s32), [[UMULH]]
-    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[ADD1]](s32)
-    ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ICMP]], [[ICMP1]]
+    ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD1]](s32), [[COPY]]
+    ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ICMP]]
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[ADD2]](s32), [[C]]
+    ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP2]], [[ICMP]]
+    ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ICMP1]], [[AND]]
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[ADD2]](s32)
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[C1]], [[OR]]
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[ADD3]](s32)
     ; CHECK-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL %hi1, %lo2
     ; CHECK-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL %mid1, %mid2
     ; CHECK-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL %lo1, %hi2
     ; CHECK-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH %mid1, %lo2
     ; CHECK-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH %lo1, %mid2
-    ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[MUL3]], [[MUL4]]
-    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[ADD3]](s32)
-    ; CHECK-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[COPY2]], [[MUL5]]
+    ; CHECK-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[MUL3]], [[MUL4]]
+    ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD4]](s32), [[MUL4]]
     ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[ADD4]](s32)
-    ; CHECK-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[COPY3]], [[UMULH1]]
-    ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[ADD5]](s32)
-    ; CHECK-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[COPY4]], [[UMULH2]]
-    ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[ADD6]](s32)
-    ; CHECK-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[COPY5]], [[ADD2]]
-    ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[ADD7]](s32)
+    ; CHECK-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[COPY3]], [[MUL5]]
+    ; CHECK-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ADD5]], [[ICMP3]]
+    ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[ADD6]](s32)
+    ; CHECK-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[COPY4]], [[UMULH1]]
+    ; CHECK-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ADD7]], [[ICMP3]]
+    ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[ADD8]](s32)
+    ; CHECK-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[COPY5]], [[UMULH2]]
+    ; CHECK-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ICMP3]]
+    ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[ADD10]](s32)
+    ; CHECK-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[COPY6]], [[COPY2]]
+    ; CHECK-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ADD11]], [[ICMP3]]
+    ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[ADD12]](s32)
     ; CHECK-NEXT: $x10 = COPY [[MUL]](s32)
     ; CHECK-NEXT: $x11 = COPY [[COPY1]](s32)
-    ; CHECK-NEXT: $x12 = COPY [[COPY6]](s32)
+    ; CHECK-NEXT: $x12 = COPY [[COPY7]](s32)
     ; CHECK-NEXT: PseudoRET implicit $x10, implicit $x11, implicit $x12
     %lo1:_(s32) = COPY $x10
     %mid1:_(s32) = COPY $x11
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-mul-ext-rv64.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-mul-ext-rv64.mir
index 39d9c5b7dfd1e..931e3f5c279af 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-mul-ext-rv64.mir
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-mul-ext-rv64.mir
@@ -196,27 +196,45 @@ body:             |
     ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s64) = G_ICMP intpred(ult), [[ADD]](s64), [[MUL2]]
     ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY [[ADD]](s64)
     ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s64) = G_ADD [[COPY]], [[UMULH]]
-    ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s64) = G_ICMP intpred(ult), [[ADD1]](s64), [[UMULH]]
-    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY [[ADD1]](s64)
-    ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s64) = G_ADD [[ICMP]], [[ICMP1]]
+    ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s64) = G_ICMP intpred(ult), [[ADD1]](s64), [[COPY]]
+    ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s64) = G_ADD [[ADD1]], [[ICMP]]
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s64) = G_ICMP intpred(eq), [[ADD2]](s64), [[C]]
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[ICMP2]](s64)
+    ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[ICMP]](s64)
+    ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[TRUNC1]]
+    ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s32) = G_TRUNC [[ICMP1]](s64)
+    ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[TRUNC2]], [[AND]]
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY [[ADD2]](s64)
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR]](s32)
+    ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[ANYEXT]], [[C2]]
+    ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(s64) = G_ADD [[C1]], [[AND1]]
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY [[ADD3]](s64)
     ; CHECK-NEXT: [[MUL3:%[0-9]+]]:_(s64) = G_MUL %hi1, %lo2
     ; CHECK-NEXT: [[MUL4:%[0-9]+]]:_(s64) = G_MUL %mid1, %mid2
     ; CHECK-NEXT: [[MUL5:%[0-9]+]]:_(s64) = G_MUL %lo1, %hi2
     ; CHECK-NEXT: [[UMULH1:%[0-9]+]]:_(s64) = G_UMULH %mid1, %lo2
     ; CHECK-NEXT: [[UMULH2:%[0-9]+]]:_(s64) = G_UMULH %lo1, %mid2
-    ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(s64) = G_ADD [[MUL3]], [[MUL4]]
-    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY [[ADD3]](s64)
-    ; CHECK-NEXT: [[ADD4:%[0-9]+]]:_(s64) = G_ADD [[COPY2]], [[MUL5]]
+    ; CHECK-NEXT: [[ADD4:%[0-9]+]]:_(s64) = G_ADD [[MUL3]], [[MUL4]]
+    ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:_(s64) = G_ICMP intpred(ult), [[ADD4]](s64), [[MUL4]]
     ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s64) = COPY [[ADD4]](s64)
-    ; CHECK-NEXT: [[ADD5:%[0-9]+]]:_(s64) = G_ADD [[COPY3]], [[UMULH1]]
-    ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s64) = COPY [[ADD5]](s64)
-    ; CHECK-NEXT: [[ADD6:%[0-9]+]]:_(s64) = G_ADD [[COPY4]], [[UMULH2]]
-    ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s64) = COPY [[ADD6]](s64)
-    ; CHECK-NEXT: [[ADD7:%[0-9]+]]:_(s64) = G_ADD [[COPY5]], [[ADD2]]
-    ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s64) = COPY [[ADD7]](s64)
+    ; CHECK-NEXT: [[ADD5:%[0-9]+]]:_(s64) = G_ADD [[COPY3]], [[MUL5]]
+    ; CHECK-NEXT: [[ADD6:%[0-9]+]]:_(s64) = G_ADD [[ADD5]], [[ICMP3]]
+    ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s64) = COPY [[ADD6]](s64)
+    ; CHECK-NEXT: [[ADD7:%[0-9]+]]:_(s64) = G_ADD [[COPY4]], [[UMULH1]]
+    ; CHECK-NEXT: [[ADD8:%[0-9]+]]:_(s64) = G_ADD [[ADD7]], [[ICMP3]]
+    ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s64) = COPY [[ADD8]](s64)
+    ; CHECK-NEXT: [[ADD9:%[0-9]+]]:_(s64) = G_ADD [[COPY5]], [[UMULH2]]
+    ; CHECK-NEXT: [[ADD10:%[0-9]+]]:_(s64) = G_ADD [[ADD9]], [[ICMP3]]
+    ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s64) = COPY [[ADD10]](s64)
+    ; CHECK-NEXT: [[ADD11:%[0-9]+]]:_(s64) = G_ADD [[COPY6]], [[COPY2]]
+    ; CHECK-NEXT: [[ADD12:%[0-9]+]]:_(s64) = G_ADD [[ADD11]], [[ICMP3]]
+    ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s64) = COPY [[ADD12]](s64)
     ; CHECK-NEXT: $x10 = COPY [[MUL]](s64)
     ; CHECK-NEXT: $x11 = COPY [[COPY1]](s64)
-    ; CHECK-NEXT: $x12 = COPY [[COPY6]](s64)
+    ; CHECK-NEXT: $x12 = COPY [[COPY7]](s64)
     ; CHECK-NEXT: PseudoRET implicit $x10, implicit $x11, implicit $x12
     %lo1:_(s64) = COPY $x10
     %mid1:_(s64) = COPY $x11



More information about the llvm-commits mailing list