[llvm] a36278c - [GlobalISel] Add G_UNMERGE(Cst) -> Cst1, Cst2, ... combine

Quentin Colombet via llvm-commits llvm-commits at lists.llvm.org
Mon Sep 14 16:31:50 PDT 2020


Author: Quentin Colombet
Date: 2020-09-14T16:30:18-07:00
New Revision: a36278c2f8b5ba7e964ef2cdc14ef8c3f8b8a045

URL: https://github.com/llvm/llvm-project/commit/a36278c2f8b5ba7e964ef2cdc14ef8c3f8b8a045
DIFF: https://github.com/llvm/llvm-project/commit/a36278c2f8b5ba7e964ef2cdc14ef8c3f8b8a045.diff

LOG: [GlobalISel] Add G_UNMERGE(Cst) -> Cst1, Cst2, ... combine

Add a combiner helper that replaces G_UNMERGE of big constants into direct
use of smaller constants.

Differential Revision: https://reviews.llvm.org/D87166

Added: 
    

Modified: 
    llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
    llvm/include/llvm/Target/GlobalISel/Combine.td
    llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
    llvm/test/CodeGen/AArch64/GlobalISel/combine-unmerge.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll
    llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll
    llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll
    llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll
    llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll
    llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
index 8a5e80386e7e..2854025b0191 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
@@ -252,6 +252,12 @@ class CombinerHelper {
   applyCombineUnmergeMergeToPlainValues(MachineInstr &MI,
                                         SmallVectorImpl<Register> &Operands);
 
+  /// Transform G_UNMERGE Constant -> Constant1, Constant2, ...
+  bool matchCombineUnmergeConstant(MachineInstr &MI,
+                                   SmallVectorImpl<APInt> &Csts);
+  bool applyCombineUnmergeConstant(MachineInstr &MI,
+                                   SmallVectorImpl<APInt> &Csts);
+
   /// Transform IntToPtr(PtrToInt(x)) to x if cast is in the same address space.
   bool matchCombineI2PToP2I(MachineInstr &MI, Register &Reg);
   bool applyCombineI2PToP2I(MachineInstr &MI, Register &Reg);

diff  --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index f99252935db4..95da231f517f 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -412,6 +412,15 @@ def fabs_fabs_fold: GICombineRule<
   (apply [{ return Helper.applyCombineFAbsOfFAbs(*${root}, ${matchinfo}); }])
 >;
 
+// Fold (unmerge cst) -> cst1, cst2, ...
+def unmerge_cst_matchinfo : GIDefMatchData<"SmallVector<APInt, 8>">;
+def unmerge_cst : GICombineRule<
+  (defs root:$d, unmerge_cst_matchinfo:$info),
+  (match (wip_match_opcode G_UNMERGE_VALUES): $d,
+  [{ return Helper.matchCombineUnmergeConstant(*${d}, ${info}); }]),
+  (apply [{ return Helper.applyCombineUnmergeConstant(*${d}, ${info}); }])
+>;
+
 // FIXME: These should use the custom predicate feature once it lands.
 def undef_combines : GICombineGroup<[undef_to_fp_zero, undef_to_int_zero,
                                      undef_to_negative_one,
@@ -443,4 +452,4 @@ def all_combines : GICombineGroup<[trivial_combines, ptr_add_immed_chain,
     width_reduction_combines, select_combines,
     known_bits_simplifications, ext_ext_fold,
     not_cmp_fold, opt_brcond_by_inverting_cond,
-    unmerge_merge, fabs_fabs_fold]>;
+    unmerge_merge, fabs_fabs_fold, unmerge_cst]>;

diff  --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index a2a7d6b928d4..ccc75d44a9ab 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -1612,6 +1612,48 @@ bool CombinerHelper::applyCombineUnmergeMergeToPlainValues(
   return true;
 }
 
+bool CombinerHelper::matchCombineUnmergeConstant(MachineInstr &MI,
+                                                 SmallVectorImpl<APInt> &Csts) {
+  unsigned SrcIdx = MI.getNumOperands() - 1;
+  Register SrcReg = MI.getOperand(SrcIdx).getReg();
+  MachineInstr *SrcInstr = MRI.getVRegDef(SrcReg);
+  if (SrcInstr->getOpcode() != TargetOpcode::G_CONSTANT &&
+      SrcInstr->getOpcode() != TargetOpcode::G_FCONSTANT)
+    return false;
+  // Break down the big constant in smaller ones.
+  const MachineOperand &CstVal = SrcInstr->getOperand(1);
+  APInt Val = SrcInstr->getOpcode() == TargetOpcode::G_CONSTANT
+                  ? CstVal.getCImm()->getValue()
+                  : CstVal.getFPImm()->getValueAPF().bitcastToAPInt();
+
+  LLT Dst0Ty = MRI.getType(MI.getOperand(0).getReg());
+  unsigned ShiftAmt = Dst0Ty.getSizeInBits();
+  // Unmerge a constant.
+  for (unsigned Idx = 0; Idx != SrcIdx; ++Idx) {
+    Csts.emplace_back(Val.trunc(ShiftAmt));
+    Val = Val.lshr(ShiftAmt);
+  }
+
+  return true;
+}
+
+bool CombinerHelper::applyCombineUnmergeConstant(MachineInstr &MI,
+                                                 SmallVectorImpl<APInt> &Csts) {
+  assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
+         "Expected an unmerge");
+  assert((MI.getNumOperands() - 1 == Csts.size()) &&
+         "Not enough operands to replace all defs");
+  unsigned NumElems = MI.getNumOperands() - 1;
+  Builder.setInstrAndDebugLoc(MI);
+  for (unsigned Idx = 0; Idx < NumElems; ++Idx) {
+    Register DstReg = MI.getOperand(Idx).getReg();
+    Builder.buildConstant(DstReg, Csts[Idx]);
+  }
+
+  MI.eraseFromParent();
+  return true;
+}
+
 bool CombinerHelper::matchCombineShiftToUnmerge(MachineInstr &MI,
                                                 unsigned TargetShiftSize,
                                                 unsigned &ShiftVal) {

diff  --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-unmerge.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-unmerge.mir
index 73401374ef9d..52f0836efec4 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-unmerge.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-unmerge.mir
@@ -181,3 +181,114 @@ body:             |
     $w1 = COPY %4(s32)
 ...
 
+# Unmerge a constant into a bunch of smaller constant.
+# Constant is 0x0102030405060708090a0b0c0d0e0f10 and we break it down into
+# bytes:
+# cst1 0x10
+# cst2 0x0f
+# cst3 0x0e
+# ...
+---
+name:            test_combine_unmerge_cst
+body:             |
+  bb.1:
+    ; CHECK-LABEL: name: test_combine_unmerge_cst
+    ; CHECK: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 16
+    ; CHECK: [[C1:%[0-9]+]]:_(s8) = G_CONSTANT i8 15
+    ; CHECK: [[C2:%[0-9]+]]:_(s8) = G_CONSTANT i8 14
+    ; CHECK: [[C3:%[0-9]+]]:_(s8) = G_CONSTANT i8 13
+    ; CHECK: [[C4:%[0-9]+]]:_(s8) = G_CONSTANT i8 12
+    ; CHECK: [[C5:%[0-9]+]]:_(s8) = G_CONSTANT i8 11
+    ; CHECK: [[C6:%[0-9]+]]:_(s8) = G_CONSTANT i8 10
+    ; CHECK: [[C7:%[0-9]+]]:_(s8) = G_CONSTANT i8 9
+    ; CHECK: [[C8:%[0-9]+]]:_(s8) = G_CONSTANT i8 8
+    ; CHECK: [[C9:%[0-9]+]]:_(s8) = G_CONSTANT i8 7
+    ; CHECK: [[C10:%[0-9]+]]:_(s8) = G_CONSTANT i8 6
+    ; CHECK: [[C11:%[0-9]+]]:_(s8) = G_CONSTANT i8 5
+    ; CHECK: [[C12:%[0-9]+]]:_(s8) = G_CONSTANT i8 4
+    ; CHECK: [[C13:%[0-9]+]]:_(s8) = G_CONSTANT i8 3
+    ; CHECK: [[C14:%[0-9]+]]:_(s8) = G_CONSTANT i8 2
+    ; CHECK: [[C15:%[0-9]+]]:_(s8) = G_CONSTANT i8 1
+    ; CHECK: $b0 = COPY [[C]](s8)
+    ; CHECK: $b1 = COPY [[C1]](s8)
+    ; CHECK: $b2 = COPY [[C2]](s8)
+    ; CHECK: $b3 = COPY [[C3]](s8)
+    ; CHECK: $b4 = COPY [[C4]](s8)
+    ; CHECK: $b5 = COPY [[C5]](s8)
+    ; CHECK: $b6 = COPY [[C6]](s8)
+    ; CHECK: $b7 = COPY [[C7]](s8)
+    ; CHECK: $b8 = COPY [[C8]](s8)
+    ; CHECK: $b9 = COPY [[C9]](s8)
+    ; CHECK: $b10 = COPY [[C10]](s8)
+    ; CHECK: $b11 = COPY [[C11]](s8)
+    ; CHECK: $b12 = COPY [[C12]](s8)
+    ; CHECK: $b13 = COPY [[C13]](s8)
+    ; CHECK: $b14 = COPY [[C14]](s8)
+    ; CHECK: $b15 = COPY [[C15]](s8)
+    %0:_(s128) = G_CONSTANT i128 1339673755198158349044581307228491536
+    %1:_(s8),%2:_(s8),%3:_(s8),%4:_(s8),%5:_(s8),%6:_(s8),%7:_(s8),%8:_(s8),%9:_(s8),%10:_(s8),%11:_(s8),%12:_(s8),%13:_(s8),%14:_(s8),%15:_(s8),%16:_(s8) = G_UNMERGE_VALUES %0(s128)
+    $b0 = COPY %1(s8)
+    $b1 = COPY %2(s8)
+    $b2 = COPY %3(s8)
+    $b3 = COPY %4(s8)
+    $b4 = COPY %5(s8)
+    $b5 = COPY %6(s8)
+    $b6 = COPY %7(s8)
+    $b7 = COPY %8(s8)
+    $b8 = COPY %9(s8)
+    $b9 = COPY %10(s8)
+    $b10 = COPY %11(s8)
+    $b11 = COPY %12(s8)
+    $b12 = COPY %13(s8)
+    $b13 = COPY %14(s8)
+    $b14 = COPY %15(s8)
+    $b15 = COPY %16(s8)
+...
+
+# Unmerge a constant on a non-power of 2 type into a bunch of smaller constant.
+# Constant is a 3 | 2 | 1 in chunks of 13-bit.
+---
+name:            test_combine_unmerge_cst_36bit
+body:             |
+  bb.1:
+    ; CHECK-LABEL: name: test_combine_unmerge_cst_36bit
+    ; CHECK: [[C:%[0-9]+]]:_(s13) = G_CONSTANT i13 1
+    ; CHECK: [[C1:%[0-9]+]]:_(s13) = G_CONSTANT i13 2
+    ; CHECK: [[C2:%[0-9]+]]:_(s13) = G_CONSTANT i13 3
+    ; CHECK: [[ZEXT:%[0-9]+]]:_(s16) = G_ZEXT [[C]](s13)
+    ; CHECK: [[ZEXT1:%[0-9]+]]:_(s16) = G_ZEXT [[C1]](s13)
+    ; CHECK: [[ZEXT2:%[0-9]+]]:_(s16) = G_ZEXT [[C2]](s13)
+    ; CHECK: $h0 = COPY [[ZEXT]](s16)
+    ; CHECK: $h1 = COPY [[ZEXT1]](s16)
+    ; CHECK: $h2 = COPY [[ZEXT2]](s16)
+    %0:_(s39) = G_CONSTANT i39 201342977
+    %1:_(s13),%2:_(s13),%3:_(s13) = G_UNMERGE_VALUES %0(s39)
+    %4:_(s16) = G_ZEXT %1(s13)
+    %5:_(s16) = G_ZEXT %2(s13)
+    %6:_(s16) = G_ZEXT %3(s13)
+    $h0 = COPY %4(s16)
+    $h1 = COPY %5(s16)
+    $h2 = COPY %6(s16)
+...
+
+# Unmerge floating point constant.
+---
+name:            test_combine_unmerge_fpcst
+body:             |
+  bb.1:
+    ; CHECK-LABEL: name: test_combine_unmerge_fpcst
+    ; CHECK: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 1
+    ; CHECK: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 2
+    ; CHECK: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 3
+    ; CHECK: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 4
+    ; CHECK: $h0 = COPY [[C]](s16)
+    ; CHECK: $h1 = COPY [[C1]](s16)
+    ; CHECK: $h2 = COPY [[C2]](s16)
+    ; CHECK: $h3 = COPY [[C3]](s16)
+    %0:_(s64) = G_FCONSTANT double 0x0004000300020001
+    %1:_(s16),%2:_(s16),%3:_(s16),%4:_(s16) = G_UNMERGE_VALUES %0(s64)
+    $h0 = COPY %1(s16)
+    $h1 = COPY %2(s16)
+    $h2 = COPY %3(s16)
+    $h3 = COPY %4(s16)
+...

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll
index dad8a5ac58e8..26a8d8112054 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll
@@ -4999,24 +4999,22 @@ define <2 x i64> @v_saddsat_v2i64(<2 x i64> %lhs, <2 x i64> %rhs) {
 ; GFX6-NEXT:    v_cmp_lt_i64_e32 vcc, v[8:9], v[0:1]
 ; GFX6-NEXT:    v_cmp_gt_i64_e64 s[4:5], 0, v[4:5]
 ; GFX6-NEXT:    v_ashrrev_i32_e32 v0, 31, v9
-; GFX6-NEXT:    s_brev_b32 s8, 1
-; GFX6-NEXT:    v_mov_b32_e32 v1, s8
-; GFX6-NEXT:    v_add_i32_e64 v4, s[6:7], 0, v0
+; GFX6-NEXT:    v_bfrev_b32_e32 v10, 1
+; GFX6-NEXT:    v_add_i32_e64 v1, s[6:7], 0, v0
 ; GFX6-NEXT:    s_xor_b64 vcc, s[4:5], vcc
-; GFX6-NEXT:    v_addc_u32_e64 v1, s[6:7], v0, v1, s[6:7]
-; GFX6-NEXT:    v_cndmask_b32_e32 v0, v8, v4, vcc
-; GFX6-NEXT:    v_cndmask_b32_e32 v1, v9, v1, vcc
+; GFX6-NEXT:    v_addc_u32_e64 v4, s[6:7], v0, v10, s[6:7]
+; GFX6-NEXT:    v_cndmask_b32_e32 v0, v8, v1, vcc
+; GFX6-NEXT:    v_cndmask_b32_e32 v1, v9, v4, vcc
 ; GFX6-NEXT:    v_add_i32_e32 v4, vcc, v2, v6
 ; GFX6-NEXT:    v_addc_u32_e32 v5, vcc, v3, v7, vcc
 ; GFX6-NEXT:    v_cmp_lt_i64_e32 vcc, v[4:5], v[2:3]
 ; GFX6-NEXT:    v_cmp_gt_i64_e64 s[4:5], 0, v[6:7]
 ; GFX6-NEXT:    v_ashrrev_i32_e32 v2, 31, v5
-; GFX6-NEXT:    v_mov_b32_e32 v3, s8
-; GFX6-NEXT:    v_add_i32_e64 v6, s[6:7], 0, v2
-; GFX6-NEXT:    v_addc_u32_e64 v3, s[6:7], v2, v3, s[6:7]
+; GFX6-NEXT:    v_add_i32_e64 v3, s[6:7], 0, v2
+; GFX6-NEXT:    v_addc_u32_e64 v6, s[6:7], v2, v10, s[6:7]
 ; GFX6-NEXT:    s_xor_b64 vcc, s[4:5], vcc
-; GFX6-NEXT:    v_cndmask_b32_e32 v2, v4, v6, vcc
-; GFX6-NEXT:    v_cndmask_b32_e32 v3, v5, v3, vcc
+; GFX6-NEXT:    v_cndmask_b32_e32 v2, v4, v3, vcc
+; GFX6-NEXT:    v_cndmask_b32_e32 v3, v5, v6, vcc
 ; GFX6-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-LABEL: v_saddsat_v2i64:
@@ -5027,24 +5025,22 @@ define <2 x i64> @v_saddsat_v2i64(<2 x i64> %lhs, <2 x i64> %rhs) {
 ; GFX8-NEXT:    v_cmp_lt_i64_e32 vcc, v[8:9], v[0:1]
 ; GFX8-NEXT:    v_cmp_gt_i64_e64 s[4:5], 0, v[4:5]
 ; GFX8-NEXT:    v_ashrrev_i32_e32 v0, 31, v9
-; GFX8-NEXT:    s_brev_b32 s8, 1
-; GFX8-NEXT:    v_mov_b32_e32 v1, s8
-; GFX8-NEXT:    v_add_u32_e64 v4, s[6:7], 0, v0
+; GFX8-NEXT:    v_bfrev_b32_e32 v10, 1
+; GFX8-NEXT:    v_add_u32_e64 v1, s[6:7], 0, v0
 ; GFX8-NEXT:    s_xor_b64 vcc, s[4:5], vcc
-; GFX8-NEXT:    v_addc_u32_e64 v1, s[6:7], v0, v1, s[6:7]
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v8, v4, vcc
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v9, v1, vcc
+; GFX8-NEXT:    v_addc_u32_e64 v4, s[6:7], v0, v10, s[6:7]
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v8, v1, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v9, v4, vcc
 ; GFX8-NEXT:    v_add_u32_e32 v4, vcc, v2, v6
 ; GFX8-NEXT:    v_addc_u32_e32 v5, vcc, v3, v7, vcc
 ; GFX8-NEXT:    v_cmp_lt_i64_e32 vcc, v[4:5], v[2:3]
 ; GFX8-NEXT:    v_cmp_gt_i64_e64 s[4:5], 0, v[6:7]
 ; GFX8-NEXT:    v_ashrrev_i32_e32 v2, 31, v5
-; GFX8-NEXT:    v_mov_b32_e32 v3, s8
-; GFX8-NEXT:    v_add_u32_e64 v6, s[6:7], 0, v2
-; GFX8-NEXT:    v_addc_u32_e64 v3, s[6:7], v2, v3, s[6:7]
+; GFX8-NEXT:    v_add_u32_e64 v3, s[6:7], 0, v2
+; GFX8-NEXT:    v_addc_u32_e64 v6, s[6:7], v2, v10, s[6:7]
 ; GFX8-NEXT:    s_xor_b64 vcc, s[4:5], vcc
-; GFX8-NEXT:    v_cndmask_b32_e32 v2, v4, v6, vcc
-; GFX8-NEXT:    v_cndmask_b32_e32 v3, v5, v3, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v2, v4, v3, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v3, v5, v6, vcc
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX9-LABEL: v_saddsat_v2i64:
@@ -5055,56 +5051,53 @@ define <2 x i64> @v_saddsat_v2i64(<2 x i64> %lhs, <2 x i64> %rhs) {
 ; GFX9-NEXT:    v_cmp_lt_i64_e32 vcc, v[8:9], v[0:1]
 ; GFX9-NEXT:    v_cmp_gt_i64_e64 s[4:5], 0, v[4:5]
 ; GFX9-NEXT:    v_ashrrev_i32_e32 v0, 31, v9
-; GFX9-NEXT:    s_brev_b32 s8, 1
-; GFX9-NEXT:    v_mov_b32_e32 v1, s8
-; GFX9-NEXT:    v_add_co_u32_e64 v4, s[6:7], 0, v0
+; GFX9-NEXT:    v_bfrev_b32_e32 v10, 1
+; GFX9-NEXT:    v_add_co_u32_e64 v1, s[6:7], 0, v0
 ; GFX9-NEXT:    s_xor_b64 vcc, s[4:5], vcc
-; GFX9-NEXT:    v_addc_co_u32_e64 v1, s[6:7], v0, v1, s[6:7]
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v8, v4, vcc
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v9, v1, vcc
+; GFX9-NEXT:    v_addc_co_u32_e64 v4, s[6:7], v0, v10, s[6:7]
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v8, v1, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v9, v4, vcc
 ; GFX9-NEXT:    v_add_co_u32_e32 v4, vcc, v2, v6
 ; GFX9-NEXT:    v_addc_co_u32_e32 v5, vcc, v3, v7, vcc
 ; GFX9-NEXT:    v_cmp_lt_i64_e32 vcc, v[4:5], v[2:3]
 ; GFX9-NEXT:    v_cmp_gt_i64_e64 s[4:5], 0, v[6:7]
 ; GFX9-NEXT:    v_ashrrev_i32_e32 v2, 31, v5
-; GFX9-NEXT:    v_mov_b32_e32 v3, s8
-; GFX9-NEXT:    v_add_co_u32_e64 v6, s[6:7], 0, v2
-; GFX9-NEXT:    v_addc_co_u32_e64 v3, s[6:7], v2, v3, s[6:7]
+; GFX9-NEXT:    v_add_co_u32_e64 v3, s[6:7], 0, v2
+; GFX9-NEXT:    v_addc_co_u32_e64 v6, s[6:7], v2, v10, s[6:7]
 ; GFX9-NEXT:    s_xor_b64 vcc, s[4:5], vcc
-; GFX9-NEXT:    v_cndmask_b32_e32 v2, v4, v6, vcc
-; GFX9-NEXT:    v_cndmask_b32_e32 v3, v5, v3, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v2, v4, v3, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v3, v5, v6, vcc
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: v_saddsat_v2i64:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT:    v_mov_b32_e32 v9, v0
-; GFX10-NEXT:    v_mov_b32_e32 v10, v1
-; GFX10-NEXT:    v_mov_b32_e32 v13, v2
-; GFX10-NEXT:    v_mov_b32_e32 v14, v3
+; GFX10-NEXT:    v_mov_b32_e32 v14, v0
+; GFX10-NEXT:    v_mov_b32_e32 v15, v1
+; GFX10-NEXT:    v_mov_b32_e32 v17, v2
+; GFX10-NEXT:    v_mov_b32_e32 v18, v3
 ; GFX10-NEXT:    v_cmp_gt_i64_e64 s4, 0, v[4:5]
-; GFX10-NEXT:    v_add_co_u32_e64 v19, vcc_lo, v9, v4
-; GFX10-NEXT:    s_brev_b32 s8, 1
-; GFX10-NEXT:    v_add_co_ci_u32_e32 v20, vcc_lo, v10, v5, vcc_lo
-; GFX10-NEXT:    v_add_co_u32_e64 v23, vcc_lo, v13, v6
+; GFX10-NEXT:    v_add_co_u32_e64 v8, vcc_lo, v14, v4
 ; GFX10-NEXT:    v_cmp_gt_i64_e64 s6, 0, v[6:7]
-; GFX10-NEXT:    v_add_co_ci_u32_e32 v24, vcc_lo, v14, v7, vcc_lo
+; GFX10-NEXT:    v_add_co_ci_u32_e32 v9, vcc_lo, v15, v5, vcc_lo
+; GFX10-NEXT:    v_add_co_u32_e64 v19, vcc_lo, v17, v6
 ; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    v_add_co_ci_u32_e32 v20, vcc_lo, v18, v7, vcc_lo
+; GFX10-NEXT:    v_ashrrev_i32_e32 v12, 31, v9
+; GFX10-NEXT:    v_cmp_lt_i64_e32 vcc_lo, v[8:9], v[14:15]
 ; GFX10-NEXT:    v_ashrrev_i32_e32 v0, 31, v20
-; GFX10-NEXT:    v_cmp_lt_i64_e32 vcc_lo, v[19:20], v[9:10]
-; GFX10-NEXT:    v_ashrrev_i32_e32 v1, 31, v24
-; GFX10-NEXT:    v_add_co_u32_e64 v4, s5, v0, 0
-; GFX10-NEXT:    v_add_co_ci_u32_e64 v5, s5, s8, v0, s5
-; GFX10-NEXT:    v_cmp_lt_i64_e64 s5, v[23:24], v[13:14]
-; GFX10-NEXT:    v_add_co_u32_e64 v2, s7, v1, 0
+; GFX10-NEXT:    v_add_co_u32_e64 v1, s5, v12, 0
+; GFX10-NEXT:    v_add_co_ci_u32_e64 v4, s5, 0x80000000, v12, s5
+; GFX10-NEXT:    v_cmp_lt_i64_e64 s5, v[19:20], v[17:18]
+; GFX10-NEXT:    v_add_co_u32_e64 v2, s7, v0, 0
 ; GFX10-NEXT:    s_xor_b32 vcc_lo, s4, vcc_lo
-; GFX10-NEXT:    v_add_co_ci_u32_e64 v3, s7, s8, v1, s7
-; GFX10-NEXT:    v_cndmask_b32_e32 v0, v19, v4, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e32 v1, v20, v5, vcc_lo
+; GFX10-NEXT:    v_add_co_ci_u32_e64 v3, s7, 0x80000000, v0, s7
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v8, v1, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v9, v4, vcc_lo
 ; GFX10-NEXT:    s_xor_b32 vcc_lo, s6, s5
-; GFX10-NEXT:    v_cndmask_b32_e32 v2, v23, v2, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e32 v3, v24, v3, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v2, v19, v2, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v3, v20, v3, vcc_lo
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
   %result = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> %lhs, <2 x i64> %rhs)
   ret <2 x i64> %result
@@ -6225,15 +6218,14 @@ define <2 x i128> @v_saddsat_v2i128(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; GFX6-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
 ; GFX6-NEXT:    v_ashrrev_i32_e32 v11, 31, v19
 ; GFX6-NEXT:    v_cndmask_b32_e64 v0, v0, v16, s[4:5]
-; GFX6-NEXT:    v_cndmask_b32_e64 v1, v1, v17, s[4:5]
 ; GFX6-NEXT:    v_cndmask_b32_e32 v2, v11, v8, vcc
 ; GFX6-NEXT:    v_cndmask_b32_e32 v3, v11, v9, vcc
+; GFX6-NEXT:    v_cndmask_b32_e64 v1, v1, v17, s[4:5]
 ; GFX6-NEXT:    v_add_i32_e32 v0, vcc, 0, v0
 ; GFX6-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
-; GFX6-NEXT:    s_brev_b32 s4, 1
-; GFX6-NEXT:    v_mov_b32_e32 v8, s4
+; GFX6-NEXT:    v_bfrev_b32_e32 v20, 1
 ; GFX6-NEXT:    v_addc_u32_e32 v2, vcc, 0, v2, vcc
-; GFX6-NEXT:    v_addc_u32_e32 v3, vcc, v3, v8, vcc
+; GFX6-NEXT:    v_addc_u32_e32 v3, vcc, v3, v20, vcc
 ; GFX6-NEXT:    v_and_b32_e32 v8, 1, v10
 ; GFX6-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v8
 ; GFX6-NEXT:    v_cndmask_b32_e32 v0, v16, v0, vcc
@@ -6248,43 +6240,42 @@ define <2 x i128> @v_saddsat_v2i128(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; GFX6-NEXT:    s_cmp_lt_u32 s6, 64
 ; GFX6-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
 ; GFX6-NEXT:    v_cmp_lt_i64_e32 vcc, v[10:11], v[6:7]
-; GFX6-NEXT:    s_cselect_b32 s5, 1, 0
+; GFX6-NEXT:    s_cselect_b32 s4, 1, 0
 ; GFX6-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
 ; GFX6-NEXT:    v_cmp_eq_u64_e32 vcc, v[10:11], v[6:7]
 ; GFX6-NEXT:    s_cmp_eq_u32 s6, 0
 ; GFX6-NEXT:    v_cndmask_b32_e32 v4, v5, v4, vcc
 ; GFX6-NEXT:    v_cmp_gt_u64_e32 vcc, 0, v[12:13]
-; GFX6-NEXT:    s_cselect_b32 s9, 1, 0
+; GFX6-NEXT:    s_cselect_b32 s5, 1, 0
 ; GFX6-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
 ; GFX6-NEXT:    v_cmp_gt_i64_e32 vcc, 0, v[14:15]
 ; GFX6-NEXT:    v_ashr_i64 v[12:13], v[10:11], s6
 ; GFX6-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
 ; GFX6-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[14:15]
-; GFX6-NEXT:    v_ashrrev_i32_e32 v15, 31, v11
+; GFX6-NEXT:    s_and_b32 s5, 1, s5
 ; GFX6-NEXT:    v_cndmask_b32_e32 v5, v6, v5, vcc
 ; GFX6-NEXT:    v_xor_b32_e32 v14, v5, v4
 ; GFX6-NEXT:    v_lshr_b64 v[4:5], v[8:9], s6
 ; GFX6-NEXT:    v_lshl_b64 v[6:7], v[10:11], s8
-; GFX6-NEXT:    s_and_b32 s6, 1, s5
+; GFX6-NEXT:    s_and_b32 s6, 1, s4
 ; GFX6-NEXT:    v_or_b32_e32 v6, v4, v6
 ; GFX6-NEXT:    v_or_b32_e32 v7, v5, v7
 ; GFX6-NEXT:    v_ashr_i64 v[4:5], v[10:11], s7
 ; GFX6-NEXT:    v_cmp_ne_u32_e64 vcc, 0, s6
-; GFX6-NEXT:    s_and_b32 s6, 1, s9
 ; GFX6-NEXT:    v_cndmask_b32_e32 v4, v4, v6, vcc
 ; GFX6-NEXT:    v_cndmask_b32_e32 v5, v5, v7, vcc
-; GFX6-NEXT:    v_cmp_ne_u32_e64 vcc, 0, s6
-; GFX6-NEXT:    s_and_b32 s5, 1, s5
+; GFX6-NEXT:    v_cmp_ne_u32_e64 vcc, 0, s5
+; GFX6-NEXT:    s_and_b32 s4, 1, s4
+; GFX6-NEXT:    v_ashrrev_i32_e32 v15, 31, v11
 ; GFX6-NEXT:    v_cndmask_b32_e32 v4, v4, v8, vcc
 ; GFX6-NEXT:    v_cndmask_b32_e32 v5, v5, v9, vcc
-; GFX6-NEXT:    v_cmp_ne_u32_e64 vcc, 0, s5
+; GFX6-NEXT:    v_cmp_ne_u32_e64 vcc, 0, s4
 ; GFX6-NEXT:    v_cndmask_b32_e32 v6, v15, v12, vcc
 ; GFX6-NEXT:    v_cndmask_b32_e32 v7, v15, v13, vcc
 ; GFX6-NEXT:    v_add_i32_e32 v4, vcc, 0, v4
 ; GFX6-NEXT:    v_addc_u32_e32 v5, vcc, 0, v5, vcc
-; GFX6-NEXT:    v_mov_b32_e32 v12, s4
 ; GFX6-NEXT:    v_addc_u32_e32 v6, vcc, 0, v6, vcc
-; GFX6-NEXT:    v_addc_u32_e32 v7, vcc, v7, v12, vcc
+; GFX6-NEXT:    v_addc_u32_e32 v7, vcc, v7, v20, vcc
 ; GFX6-NEXT:    v_and_b32_e32 v12, 1, v14
 ; GFX6-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v12
 ; GFX6-NEXT:    v_cndmask_b32_e32 v4, v8, v4, vcc
@@ -6334,15 +6325,14 @@ define <2 x i128> @v_saddsat_v2i128(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
 ; GFX8-NEXT:    v_ashrrev_i32_e32 v11, 31, v19
 ; GFX8-NEXT:    v_cndmask_b32_e64 v0, v0, v16, s[4:5]
-; GFX8-NEXT:    v_cndmask_b32_e64 v1, v1, v17, s[4:5]
 ; GFX8-NEXT:    v_cndmask_b32_e32 v2, v11, v8, vcc
 ; GFX8-NEXT:    v_cndmask_b32_e32 v3, v11, v9, vcc
+; GFX8-NEXT:    v_cndmask_b32_e64 v1, v1, v17, s[4:5]
 ; GFX8-NEXT:    v_add_u32_e32 v0, vcc, 0, v0
 ; GFX8-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
-; GFX8-NEXT:    s_brev_b32 s4, 1
-; GFX8-NEXT:    v_mov_b32_e32 v8, s4
+; GFX8-NEXT:    v_bfrev_b32_e32 v20, 1
 ; GFX8-NEXT:    v_addc_u32_e32 v2, vcc, 0, v2, vcc
-; GFX8-NEXT:    v_addc_u32_e32 v3, vcc, v3, v8, vcc
+; GFX8-NEXT:    v_addc_u32_e32 v3, vcc, v3, v20, vcc
 ; GFX8-NEXT:    v_and_b32_e32 v8, 1, v10
 ; GFX8-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v8
 ; GFX8-NEXT:    v_cndmask_b32_e32 v0, v16, v0, vcc
@@ -6357,43 +6347,42 @@ define <2 x i128> @v_saddsat_v2i128(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; GFX8-NEXT:    s_cmp_lt_u32 s6, 64
 ; GFX8-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
 ; GFX8-NEXT:    v_cmp_lt_i64_e32 vcc, v[10:11], v[6:7]
-; GFX8-NEXT:    s_cselect_b32 s5, 1, 0
+; GFX8-NEXT:    s_cselect_b32 s4, 1, 0
 ; GFX8-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
 ; GFX8-NEXT:    v_cmp_eq_u64_e32 vcc, v[10:11], v[6:7]
 ; GFX8-NEXT:    s_cmp_eq_u32 s6, 0
 ; GFX8-NEXT:    v_cndmask_b32_e32 v4, v5, v4, vcc
 ; GFX8-NEXT:    v_cmp_gt_u64_e32 vcc, 0, v[12:13]
-; GFX8-NEXT:    s_cselect_b32 s9, 1, 0
+; GFX8-NEXT:    s_cselect_b32 s5, 1, 0
 ; GFX8-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
 ; GFX8-NEXT:    v_cmp_gt_i64_e32 vcc, 0, v[14:15]
 ; GFX8-NEXT:    v_ashrrev_i64 v[12:13], s6, v[10:11]
 ; GFX8-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
 ; GFX8-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[14:15]
-; GFX8-NEXT:    v_ashrrev_i32_e32 v15, 31, v11
+; GFX8-NEXT:    s_and_b32 s5, 1, s5
 ; GFX8-NEXT:    v_cndmask_b32_e32 v5, v6, v5, vcc
 ; GFX8-NEXT:    v_xor_b32_e32 v14, v5, v4
 ; GFX8-NEXT:    v_lshrrev_b64 v[4:5], s6, v[8:9]
 ; GFX8-NEXT:    v_lshlrev_b64 v[6:7], s8, v[10:11]
-; GFX8-NEXT:    s_and_b32 s6, 1, s5
+; GFX8-NEXT:    s_and_b32 s6, 1, s4
 ; GFX8-NEXT:    v_or_b32_e32 v6, v4, v6
 ; GFX8-NEXT:    v_or_b32_e32 v7, v5, v7
 ; GFX8-NEXT:    v_ashrrev_i64 v[4:5], s7, v[10:11]
 ; GFX8-NEXT:    v_cmp_ne_u32_e64 vcc, 0, s6
-; GFX8-NEXT:    s_and_b32 s6, 1, s9
 ; GFX8-NEXT:    v_cndmask_b32_e32 v4, v4, v6, vcc
 ; GFX8-NEXT:    v_cndmask_b32_e32 v5, v5, v7, vcc
-; GFX8-NEXT:    v_cmp_ne_u32_e64 vcc, 0, s6
-; GFX8-NEXT:    s_and_b32 s5, 1, s5
+; GFX8-NEXT:    v_cmp_ne_u32_e64 vcc, 0, s5
+; GFX8-NEXT:    s_and_b32 s4, 1, s4
+; GFX8-NEXT:    v_ashrrev_i32_e32 v15, 31, v11
 ; GFX8-NEXT:    v_cndmask_b32_e32 v4, v4, v8, vcc
 ; GFX8-NEXT:    v_cndmask_b32_e32 v5, v5, v9, vcc
-; GFX8-NEXT:    v_cmp_ne_u32_e64 vcc, 0, s5
+; GFX8-NEXT:    v_cmp_ne_u32_e64 vcc, 0, s4
 ; GFX8-NEXT:    v_cndmask_b32_e32 v6, v15, v12, vcc
 ; GFX8-NEXT:    v_cndmask_b32_e32 v7, v15, v13, vcc
 ; GFX8-NEXT:    v_add_u32_e32 v4, vcc, 0, v4
 ; GFX8-NEXT:    v_addc_u32_e32 v5, vcc, 0, v5, vcc
-; GFX8-NEXT:    v_mov_b32_e32 v12, s4
 ; GFX8-NEXT:    v_addc_u32_e32 v6, vcc, 0, v6, vcc
-; GFX8-NEXT:    v_addc_u32_e32 v7, vcc, v7, v12, vcc
+; GFX8-NEXT:    v_addc_u32_e32 v7, vcc, v7, v20, vcc
 ; GFX8-NEXT:    v_and_b32_e32 v12, 1, v14
 ; GFX8-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v12
 ; GFX8-NEXT:    v_cndmask_b32_e32 v4, v8, v4, vcc
@@ -6443,15 +6432,14 @@ define <2 x i128> @v_saddsat_v2i128(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
 ; GFX9-NEXT:    v_ashrrev_i32_e32 v11, 31, v19
 ; GFX9-NEXT:    v_cndmask_b32_e64 v0, v0, v16, s[4:5]
-; GFX9-NEXT:    v_cndmask_b32_e64 v1, v1, v17, s[4:5]
 ; GFX9-NEXT:    v_cndmask_b32_e32 v2, v11, v8, vcc
 ; GFX9-NEXT:    v_cndmask_b32_e32 v3, v11, v9, vcc
+; GFX9-NEXT:    v_cndmask_b32_e64 v1, v1, v17, s[4:5]
 ; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0, v0
 ; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
-; GFX9-NEXT:    s_brev_b32 s4, 1
-; GFX9-NEXT:    v_mov_b32_e32 v8, s4
+; GFX9-NEXT:    v_bfrev_b32_e32 v20, 1
 ; GFX9-NEXT:    v_addc_co_u32_e32 v2, vcc, 0, v2, vcc
-; GFX9-NEXT:    v_addc_co_u32_e32 v3, vcc, v3, v8, vcc
+; GFX9-NEXT:    v_addc_co_u32_e32 v3, vcc, v3, v20, vcc
 ; GFX9-NEXT:    v_and_b32_e32 v8, 1, v10
 ; GFX9-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v8
 ; GFX9-NEXT:    v_cndmask_b32_e32 v0, v16, v0, vcc
@@ -6466,43 +6454,42 @@ define <2 x i128> @v_saddsat_v2i128(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; GFX9-NEXT:    s_cmp_lt_u32 s6, 64
 ; GFX9-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
 ; GFX9-NEXT:    v_cmp_lt_i64_e32 vcc, v[10:11], v[6:7]
-; GFX9-NEXT:    s_cselect_b32 s5, 1, 0
+; GFX9-NEXT:    s_cselect_b32 s4, 1, 0
 ; GFX9-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
 ; GFX9-NEXT:    v_cmp_eq_u64_e32 vcc, v[10:11], v[6:7]
 ; GFX9-NEXT:    s_cmp_eq_u32 s6, 0
 ; GFX9-NEXT:    v_cndmask_b32_e32 v4, v5, v4, vcc
 ; GFX9-NEXT:    v_cmp_gt_u64_e32 vcc, 0, v[12:13]
-; GFX9-NEXT:    s_cselect_b32 s9, 1, 0
+; GFX9-NEXT:    s_cselect_b32 s5, 1, 0
 ; GFX9-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
 ; GFX9-NEXT:    v_cmp_gt_i64_e32 vcc, 0, v[14:15]
 ; GFX9-NEXT:    v_ashrrev_i64 v[12:13], s6, v[10:11]
 ; GFX9-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
 ; GFX9-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[14:15]
-; GFX9-NEXT:    v_ashrrev_i32_e32 v15, 31, v11
+; GFX9-NEXT:    s_and_b32 s5, 1, s5
 ; GFX9-NEXT:    v_cndmask_b32_e32 v5, v6, v5, vcc
 ; GFX9-NEXT:    v_xor_b32_e32 v14, v5, v4
 ; GFX9-NEXT:    v_lshrrev_b64 v[4:5], s6, v[8:9]
 ; GFX9-NEXT:    v_lshlrev_b64 v[6:7], s8, v[10:11]
-; GFX9-NEXT:    s_and_b32 s6, 1, s5
+; GFX9-NEXT:    s_and_b32 s6, 1, s4
 ; GFX9-NEXT:    v_or_b32_e32 v6, v4, v6
 ; GFX9-NEXT:    v_or_b32_e32 v7, v5, v7
 ; GFX9-NEXT:    v_ashrrev_i64 v[4:5], s7, v[10:11]
 ; GFX9-NEXT:    v_cmp_ne_u32_e64 vcc, 0, s6
-; GFX9-NEXT:    s_and_b32 s6, 1, s9
 ; GFX9-NEXT:    v_cndmask_b32_e32 v4, v4, v6, vcc
 ; GFX9-NEXT:    v_cndmask_b32_e32 v5, v5, v7, vcc
-; GFX9-NEXT:    v_cmp_ne_u32_e64 vcc, 0, s6
-; GFX9-NEXT:    s_and_b32 s5, 1, s5
+; GFX9-NEXT:    v_cmp_ne_u32_e64 vcc, 0, s5
+; GFX9-NEXT:    s_and_b32 s4, 1, s4
+; GFX9-NEXT:    v_ashrrev_i32_e32 v15, 31, v11
 ; GFX9-NEXT:    v_cndmask_b32_e32 v4, v4, v8, vcc
 ; GFX9-NEXT:    v_cndmask_b32_e32 v5, v5, v9, vcc
-; GFX9-NEXT:    v_cmp_ne_u32_e64 vcc, 0, s5
+; GFX9-NEXT:    v_cmp_ne_u32_e64 vcc, 0, s4
 ; GFX9-NEXT:    v_cndmask_b32_e32 v6, v15, v12, vcc
 ; GFX9-NEXT:    v_cndmask_b32_e32 v7, v15, v13, vcc
 ; GFX9-NEXT:    v_add_co_u32_e32 v4, vcc, 0, v4
 ; GFX9-NEXT:    v_addc_co_u32_e32 v5, vcc, 0, v5, vcc
-; GFX9-NEXT:    v_mov_b32_e32 v12, s4
 ; GFX9-NEXT:    v_addc_co_u32_e32 v6, vcc, 0, v6, vcc
-; GFX9-NEXT:    v_addc_co_u32_e32 v7, vcc, v7, v12, vcc
+; GFX9-NEXT:    v_addc_co_u32_e32 v7, vcc, v7, v20, vcc
 ; GFX9-NEXT:    v_and_b32_e32 v12, 1, v14
 ; GFX9-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v12
 ; GFX9-NEXT:    v_cndmask_b32_e32 v4, v8, v4, vcc
@@ -6561,7 +6548,6 @@ define <2 x i128> @v_saddsat_v2i128(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; GFX10-NEXT:    v_cmp_ne_u32_e64 vcc_lo, 0, s4
 ; GFX10-NEXT:    v_cmp_ne_u32_e64 s4, 0, s8
 ; GFX10-NEXT:    v_xor_b32_e32 v9, v10, v20
-; GFX10-NEXT:    s_brev_b32 s8, 1
 ; GFX10-NEXT:    s_cmp_lt_u32 s5, 64
 ; GFX10-NEXT:    v_cndmask_b32_e32 v2, v2, v16, vcc_lo
 ; GFX10-NEXT:    v_cndmask_b32_e32 v3, v3, v17, vcc_lo
@@ -6571,7 +6557,7 @@ define <2 x i128> @v_saddsat_v2i128(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; GFX10-NEXT:    v_add_co_u32_e64 v2, vcc_lo, v2, 0
 ; GFX10-NEXT:    v_add_co_ci_u32_e32 v3, vcc_lo, 0, v3, vcc_lo
 ; GFX10-NEXT:    v_add_co_ci_u32_e32 v20, vcc_lo, 0, v0, vcc_lo
-; GFX10-NEXT:    v_add_co_ci_u32_e32 v21, vcc_lo, s8, v1, vcc_lo
+; GFX10-NEXT:    v_add_co_ci_u32_e32 v21, vcc_lo, 0x80000000, v1, vcc_lo
 ; GFX10-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v8
 ; GFX10-NEXT:    v_add_co_u32_e64 v8, s4, v26, v12
 ; GFX10-NEXT:    v_add_co_ci_u32_e64 v9, s4, v27, v13, s4
@@ -6619,7 +6605,7 @@ define <2 x i128> @v_saddsat_v2i128(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; GFX10-NEXT:    v_cmp_ne_u32_e64 s5, 0, v7
 ; GFX10-NEXT:    v_add_co_ci_u32_e64 v7, s4, 0, v3, s4
 ; GFX10-NEXT:    v_cndmask_b32_e32 v3, v19, v21, vcc_lo
-; GFX10-NEXT:    v_add_co_ci_u32_e64 v12, s4, s8, v4, s4
+; GFX10-NEXT:    v_add_co_ci_u32_e64 v12, s4, 0x80000000, v4, s4
 ; GFX10-NEXT:    v_cndmask_b32_e64 v4, v8, v5, s5
 ; GFX10-NEXT:    v_cndmask_b32_e64 v5, v9, v6, s5
 ; GFX10-NEXT:    v_cndmask_b32_e64 v6, v10, v7, s5

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll
index 9e2f881ee8df..f188fc05f363 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll
@@ -1057,10 +1057,9 @@ define i64 @v_sdiv_i64_pow2k_denom(i64 %num) {
 ; CHECK-LABEL: v_sdiv_i64_pow2k_denom:
 ; CHECK:       ; %bb.0:
 ; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CHECK-NEXT:    s_movk_i32 s6, 0x1000
-; CHECK-NEXT:    v_cvt_f32_u32_e32 v2, s6
+; CHECK-NEXT:    v_cvt_f32_u32_e32 v2, 0x1000
 ; CHECK-NEXT:    v_cvt_f32_ubyte0_e32 v4, 0
-; CHECK-NEXT:    s_mov_b32 s7, 0xfffff000
+; CHECK-NEXT:    s_movk_i32 s6, 0xf000
 ; CHECK-NEXT:    v_ashrrev_i32_e32 v3, 31, v1
 ; CHECK-NEXT:    v_mac_f32_e32 v2, 0x4f800000, v4
 ; CHECK-NEXT:    v_rcp_iflag_f32_e32 v2, v2
@@ -1075,9 +1074,9 @@ define i64 @v_sdiv_i64_pow2k_denom(i64 %num) {
 ; CHECK-NEXT:    v_cvt_u32_f32_e32 v4, v4
 ; CHECK-NEXT:    v_xor_b32_e32 v1, v1, v3
 ; CHECK-NEXT:    v_mul_lo_u32 v5, -1, v2
-; CHECK-NEXT:    v_mul_lo_u32 v6, s7, v4
-; CHECK-NEXT:    v_mul_hi_u32 v8, s7, v2
-; CHECK-NEXT:    v_mul_lo_u32 v7, s7, v2
+; CHECK-NEXT:    v_mul_lo_u32 v6, s6, v4
+; CHECK-NEXT:    v_mul_hi_u32 v8, s6, v2
+; CHECK-NEXT:    v_mul_lo_u32 v7, s6, v2
 ; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v5, v6
 ; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v5, v8
 ; CHECK-NEXT:    v_mul_lo_u32 v6, v4, v7
@@ -1104,9 +1103,9 @@ define i64 @v_sdiv_i64_pow2k_denom(i64 %num) {
 ; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v6
 ; CHECK-NEXT:    v_addc_u32_e64 v6, s[4:5], v4, v5, vcc
 ; CHECK-NEXT:    v_mul_lo_u32 v7, -1, v2
-; CHECK-NEXT:    v_mul_lo_u32 v8, s7, v6
-; CHECK-NEXT:    v_mul_hi_u32 v10, s7, v2
-; CHECK-NEXT:    v_mul_lo_u32 v9, s7, v2
+; CHECK-NEXT:    v_mul_lo_u32 v8, s6, v6
+; CHECK-NEXT:    v_mul_hi_u32 v10, s6, v2
+; CHECK-NEXT:    v_mul_lo_u32 v9, s6, v2
 ; CHECK-NEXT:    v_add_i32_e64 v4, s[4:5], v4, v5
 ; CHECK-NEXT:    v_add_i32_e64 v7, s[4:5], v7, v8
 ; CHECK-NEXT:    v_add_i32_e64 v7, s[4:5], v7, v10
@@ -1114,6 +1113,7 @@ define i64 @v_sdiv_i64_pow2k_denom(i64 %num) {
 ; CHECK-NEXT:    v_mul_lo_u32 v10, v2, v7
 ; CHECK-NEXT:    v_mul_hi_u32 v5, v2, v9
 ; CHECK-NEXT:    v_mul_hi_u32 v9, v6, v9
+; CHECK-NEXT:    s_movk_i32 s6, 0x1000
 ; CHECK-NEXT:    v_add_i32_e64 v8, s[4:5], v8, v10
 ; CHECK-NEXT:    v_cndmask_b32_e64 v10, 0, 1, s[4:5]
 ; CHECK-NEXT:    v_add_i32_e64 v5, s[4:5], v8, v5
@@ -1502,10 +1502,9 @@ define <2 x i64> @v_sdiv_v2i64_pow2k_denom(<2 x i64> %num) {
 ; CGP-LABEL: v_sdiv_v2i64_pow2k_denom:
 ; CGP:       ; %bb.0:
 ; CGP-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CGP-NEXT:    s_movk_i32 s6, 0x1000
-; CGP-NEXT:    v_cvt_f32_u32_e32 v4, s6
+; CGP-NEXT:    v_cvt_f32_u32_e32 v4, 0x1000
 ; CGP-NEXT:    v_cvt_f32_ubyte0_e32 v6, 0
-; CGP-NEXT:    s_mov_b32 s7, 0xfffff000
+; CGP-NEXT:    s_movk_i32 s6, 0xf000
 ; CGP-NEXT:    v_ashrrev_i32_e32 v5, 31, v1
 ; CGP-NEXT:    v_mov_b32_e32 v7, v4
 ; CGP-NEXT:    v_mac_f32_e32 v7, 0x4f800000, v6
@@ -1520,19 +1519,19 @@ define <2 x i64> @v_sdiv_v2i64_pow2k_denom(<2 x i64> %num) {
 ; CGP-NEXT:    v_cvt_u32_f32_e32 v7, v7
 ; CGP-NEXT:    v_cvt_u32_f32_e32 v8, v8
 ; CGP-NEXT:    v_xor_b32_e32 v1, v1, v5
-; CGP-NEXT:    v_mac_f32_e32 v4, 0x4f800000, v6
+; CGP-NEXT:    s_movk_i32 s7, 0x1000
 ; CGP-NEXT:    v_mul_lo_u32 v9, -1, v7
-; CGP-NEXT:    v_mul_lo_u32 v10, s7, v8
-; CGP-NEXT:    v_mul_hi_u32 v12, s7, v7
-; CGP-NEXT:    v_mul_lo_u32 v11, s7, v7
-; CGP-NEXT:    v_rcp_iflag_f32_e32 v4, v4
+; CGP-NEXT:    v_mul_lo_u32 v10, s6, v8
+; CGP-NEXT:    v_mul_hi_u32 v12, s6, v7
+; CGP-NEXT:    v_mul_lo_u32 v11, s6, v7
+; CGP-NEXT:    v_mac_f32_e32 v4, 0x4f800000, v6
 ; CGP-NEXT:    v_add_i32_e32 v9, vcc, v9, v10
 ; CGP-NEXT:    v_add_i32_e32 v9, vcc, v9, v12
 ; CGP-NEXT:    v_mul_lo_u32 v10, v8, v11
 ; CGP-NEXT:    v_mul_lo_u32 v12, v7, v9
 ; CGP-NEXT:    v_mul_hi_u32 v13, v7, v11
 ; CGP-NEXT:    v_mul_hi_u32 v11, v8, v11
-; CGP-NEXT:    v_mul_f32_e32 v4, 0x5f7ffffc, v4
+; CGP-NEXT:    v_rcp_iflag_f32_e32 v4, v4
 ; CGP-NEXT:    v_add_i32_e32 v10, vcc, v10, v12
 ; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
 ; CGP-NEXT:    v_add_i32_e32 v10, vcc, v10, v13
@@ -1553,9 +1552,9 @@ define <2 x i64> @v_sdiv_v2i64_pow2k_denom(<2 x i64> %num) {
 ; CGP-NEXT:    v_add_i32_e32 v7, vcc, v7, v10
 ; CGP-NEXT:    v_addc_u32_e64 v10, s[4:5], v8, v9, vcc
 ; CGP-NEXT:    v_mul_lo_u32 v11, -1, v7
-; CGP-NEXT:    v_mul_lo_u32 v12, s7, v10
-; CGP-NEXT:    v_mul_hi_u32 v14, s7, v7
-; CGP-NEXT:    v_mul_lo_u32 v13, s7, v7
+; CGP-NEXT:    v_mul_lo_u32 v12, s6, v10
+; CGP-NEXT:    v_mul_hi_u32 v14, s6, v7
+; CGP-NEXT:    v_mul_lo_u32 v13, s6, v7
 ; CGP-NEXT:    v_add_i32_e64 v8, s[4:5], v8, v9
 ; CGP-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v12
 ; CGP-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v14
@@ -1563,7 +1562,7 @@ define <2 x i64> @v_sdiv_v2i64_pow2k_denom(<2 x i64> %num) {
 ; CGP-NEXT:    v_mul_lo_u32 v14, v7, v11
 ; CGP-NEXT:    v_mul_hi_u32 v9, v7, v13
 ; CGP-NEXT:    v_mul_hi_u32 v13, v10, v13
-; CGP-NEXT:    v_ashrrev_i32_e32 v6, 31, v3
+; CGP-NEXT:    v_mul_f32_e32 v4, 0x5f7ffffc, v4
 ; CGP-NEXT:    v_add_i32_e64 v12, s[4:5], v12, v14
 ; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, 1, s[4:5]
 ; CGP-NEXT:    v_add_i32_e64 v9, s[4:5], v12, v9
@@ -1588,6 +1587,7 @@ define <2 x i64> @v_sdiv_v2i64_pow2k_denom(<2 x i64> %num) {
 ; CGP-NEXT:    v_mul_lo_u32 v10, v0, v8
 ; CGP-NEXT:    v_mul_hi_u32 v11, v0, v7
 ; CGP-NEXT:    v_mul_hi_u32 v7, v1, v7
+; CGP-NEXT:    v_ashrrev_i32_e32 v6, 31, v3
 ; CGP-NEXT:    v_add_i32_e32 v9, vcc, v9, v10
 ; CGP-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
 ; CGP-NEXT:    v_add_i32_e32 v9, vcc, v9, v11
@@ -1606,9 +1606,9 @@ define <2 x i64> @v_sdiv_v2i64_pow2k_denom(<2 x i64> %num) {
 ; CGP-NEXT:    v_add_i32_e32 v9, vcc, v10, v9
 ; CGP-NEXT:    v_add_i32_e32 v8, vcc, v8, v9
 ; CGP-NEXT:    v_mul_lo_u32 v9, 0, v7
-; CGP-NEXT:    v_mul_lo_u32 v10, s6, v8
-; CGP-NEXT:    v_mul_hi_u32 v12, s6, v7
-; CGP-NEXT:    v_mul_lo_u32 v11, s6, v7
+; CGP-NEXT:    v_mul_lo_u32 v10, s7, v8
+; CGP-NEXT:    v_mul_hi_u32 v12, s7, v7
+; CGP-NEXT:    v_mul_lo_u32 v11, s7, v7
 ; CGP-NEXT:    v_add_i32_e32 v9, vcc, v9, v10
 ; CGP-NEXT:    v_add_i32_e32 v9, vcc, v9, v12
 ; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v0, v11
@@ -1617,8 +1617,8 @@ define <2 x i64> @v_sdiv_v2i64_pow2k_denom(<2 x i64> %num) {
 ; CGP-NEXT:    v_cmp_le_u32_e64 s[4:5], 0, v10
 ; CGP-NEXT:    v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
 ; CGP-NEXT:    v_cndmask_b32_e64 v9, 0, -1, s[4:5]
-; CGP-NEXT:    v_cmp_le_u32_e64 s[4:5], s6, v0
-; CGP-NEXT:    v_subrev_i32_e32 v0, vcc, s6, v0
+; CGP-NEXT:    v_cmp_le_u32_e64 s[4:5], s7, v0
+; CGP-NEXT:    v_subrev_i32_e32 v0, vcc, s7, v0
 ; CGP-NEXT:    v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
 ; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, -1, s[4:5]
 ; CGP-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v10
@@ -1627,7 +1627,7 @@ define <2 x i64> @v_sdiv_v2i64_pow2k_denom(<2 x i64> %num) {
 ; CGP-NEXT:    v_addc_u32_e32 v11, vcc, 0, v8, vcc
 ; CGP-NEXT:    v_cmp_le_u32_e32 vcc, 0, v1
 ; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, -1, vcc
-; CGP-NEXT:    v_cmp_le_u32_e32 vcc, s6, v0
+; CGP-NEXT:    v_cmp_le_u32_e32 vcc, s7, v0
 ; CGP-NEXT:    v_cndmask_b32_e64 v0, 0, -1, vcc
 ; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v1
 ; CGP-NEXT:    v_cndmask_b32_e32 v0, v12, v0, vcc
@@ -1646,9 +1646,9 @@ define <2 x i64> @v_sdiv_v2i64_pow2k_denom(<2 x i64> %num) {
 ; CGP-NEXT:    v_cndmask_b32_e32 v1, v8, v1, vcc
 ; CGP-NEXT:    v_add_i32_e32 v2, vcc, v2, v6
 ; CGP-NEXT:    v_mul_lo_u32 v8, -1, v4
-; CGP-NEXT:    v_mul_lo_u32 v9, s7, v7
-; CGP-NEXT:    v_mul_hi_u32 v11, s7, v4
-; CGP-NEXT:    v_mul_lo_u32 v10, s7, v4
+; CGP-NEXT:    v_mul_lo_u32 v9, s6, v7
+; CGP-NEXT:    v_mul_hi_u32 v11, s6, v4
+; CGP-NEXT:    v_mul_lo_u32 v10, s6, v4
 ; CGP-NEXT:    v_addc_u32_e32 v3, vcc, v3, v6, vcc
 ; CGP-NEXT:    v_add_i32_e32 v8, vcc, v8, v9
 ; CGP-NEXT:    v_add_i32_e32 v8, vcc, v8, v11
@@ -1677,9 +1677,9 @@ define <2 x i64> @v_sdiv_v2i64_pow2k_denom(<2 x i64> %num) {
 ; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v9
 ; CGP-NEXT:    v_addc_u32_e64 v9, s[4:5], v7, v8, vcc
 ; CGP-NEXT:    v_mul_lo_u32 v10, -1, v4
-; CGP-NEXT:    v_mul_lo_u32 v11, s7, v9
-; CGP-NEXT:    v_mul_hi_u32 v13, s7, v4
-; CGP-NEXT:    v_mul_lo_u32 v12, s7, v4
+; CGP-NEXT:    v_mul_lo_u32 v11, s6, v9
+; CGP-NEXT:    v_mul_hi_u32 v13, s6, v4
+; CGP-NEXT:    v_mul_lo_u32 v12, s6, v4
 ; CGP-NEXT:    v_add_i32_e64 v7, s[4:5], v7, v8
 ; CGP-NEXT:    v_add_i32_e64 v10, s[4:5], v10, v11
 ; CGP-NEXT:    v_add_i32_e64 v10, s[4:5], v10, v13
@@ -1734,9 +1734,9 @@ define <2 x i64> @v_sdiv_v2i64_pow2k_denom(<2 x i64> %num) {
 ; CGP-NEXT:    v_add_i32_e32 v5, vcc, v8, v5
 ; CGP-NEXT:    v_add_i32_e32 v5, vcc, v7, v5
 ; CGP-NEXT:    v_mul_lo_u32 v7, 0, v4
-; CGP-NEXT:    v_mul_lo_u32 v8, s6, v5
-; CGP-NEXT:    v_mul_hi_u32 v10, s6, v4
-; CGP-NEXT:    v_mul_lo_u32 v9, s6, v4
+; CGP-NEXT:    v_mul_lo_u32 v8, s7, v5
+; CGP-NEXT:    v_mul_hi_u32 v10, s7, v4
+; CGP-NEXT:    v_mul_lo_u32 v9, s7, v4
 ; CGP-NEXT:    v_add_i32_e32 v7, vcc, v7, v8
 ; CGP-NEXT:    v_add_i32_e32 v7, vcc, v7, v10
 ; CGP-NEXT:    v_sub_i32_e32 v2, vcc, v2, v9
@@ -1745,8 +1745,8 @@ define <2 x i64> @v_sdiv_v2i64_pow2k_denom(<2 x i64> %num) {
 ; CGP-NEXT:    v_cmp_le_u32_e64 s[4:5], 0, v8
 ; CGP-NEXT:    v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
 ; CGP-NEXT:    v_cndmask_b32_e64 v7, 0, -1, s[4:5]
-; CGP-NEXT:    v_cmp_le_u32_e64 s[4:5], s6, v2
-; CGP-NEXT:    v_subrev_i32_e32 v2, vcc, s6, v2
+; CGP-NEXT:    v_cmp_le_u32_e64 s[4:5], s7, v2
+; CGP-NEXT:    v_subrev_i32_e32 v2, vcc, s7, v2
 ; CGP-NEXT:    v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
 ; CGP-NEXT:    v_cndmask_b32_e64 v9, 0, -1, s[4:5]
 ; CGP-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v8
@@ -1755,7 +1755,7 @@ define <2 x i64> @v_sdiv_v2i64_pow2k_denom(<2 x i64> %num) {
 ; CGP-NEXT:    v_addc_u32_e32 v9, vcc, 0, v5, vcc
 ; CGP-NEXT:    v_cmp_le_u32_e32 vcc, 0, v3
 ; CGP-NEXT:    v_cndmask_b32_e64 v10, 0, -1, vcc
-; CGP-NEXT:    v_cmp_le_u32_e32 vcc, s6, v2
+; CGP-NEXT:    v_cmp_le_u32_e32 vcc, s7, v2
 ; CGP-NEXT:    v_cndmask_b32_e64 v2, 0, -1, vcc
 ; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v3
 ; CGP-NEXT:    v_cndmask_b32_e32 v2, v10, v2, vcc
@@ -1780,10 +1780,9 @@ define i64 @v_sdiv_i64_oddk_denom(i64 %num) {
 ; CHECK-LABEL: v_sdiv_i64_oddk_denom:
 ; CHECK:       ; %bb.0:
 ; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CHECK-NEXT:    s_mov_b32 s6, 0x12d8fb
-; CHECK-NEXT:    v_cvt_f32_u32_e32 v2, s6
+; CHECK-NEXT:    v_cvt_f32_u32_e32 v2, 0x12d8fb
 ; CHECK-NEXT:    v_cvt_f32_ubyte0_e32 v4, 0
-; CHECK-NEXT:    s_mov_b32 s7, 0xffed2705
+; CHECK-NEXT:    s_mov_b32 s6, 0xffed2705
 ; CHECK-NEXT:    v_ashrrev_i32_e32 v3, 31, v1
 ; CHECK-NEXT:    v_mac_f32_e32 v2, 0x4f800000, v4
 ; CHECK-NEXT:    v_rcp_iflag_f32_e32 v2, v2
@@ -1798,9 +1797,9 @@ define i64 @v_sdiv_i64_oddk_denom(i64 %num) {
 ; CHECK-NEXT:    v_cvt_u32_f32_e32 v4, v4
 ; CHECK-NEXT:    v_xor_b32_e32 v1, v1, v3
 ; CHECK-NEXT:    v_mul_lo_u32 v5, -1, v2
-; CHECK-NEXT:    v_mul_lo_u32 v6, s7, v4
-; CHECK-NEXT:    v_mul_hi_u32 v8, s7, v2
-; CHECK-NEXT:    v_mul_lo_u32 v7, s7, v2
+; CHECK-NEXT:    v_mul_lo_u32 v6, s6, v4
+; CHECK-NEXT:    v_mul_hi_u32 v8, s6, v2
+; CHECK-NEXT:    v_mul_lo_u32 v7, s6, v2
 ; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v5, v6
 ; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v5, v8
 ; CHECK-NEXT:    v_mul_lo_u32 v6, v4, v7
@@ -1827,9 +1826,9 @@ define i64 @v_sdiv_i64_oddk_denom(i64 %num) {
 ; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v6
 ; CHECK-NEXT:    v_addc_u32_e64 v6, s[4:5], v4, v5, vcc
 ; CHECK-NEXT:    v_mul_lo_u32 v7, -1, v2
-; CHECK-NEXT:    v_mul_lo_u32 v8, s7, v6
-; CHECK-NEXT:    v_mul_hi_u32 v10, s7, v2
-; CHECK-NEXT:    v_mul_lo_u32 v9, s7, v2
+; CHECK-NEXT:    v_mul_lo_u32 v8, s6, v6
+; CHECK-NEXT:    v_mul_hi_u32 v10, s6, v2
+; CHECK-NEXT:    v_mul_lo_u32 v9, s6, v2
 ; CHECK-NEXT:    v_add_i32_e64 v4, s[4:5], v4, v5
 ; CHECK-NEXT:    v_add_i32_e64 v7, s[4:5], v7, v8
 ; CHECK-NEXT:    v_add_i32_e64 v7, s[4:5], v7, v10
@@ -1837,6 +1836,7 @@ define i64 @v_sdiv_i64_oddk_denom(i64 %num) {
 ; CHECK-NEXT:    v_mul_lo_u32 v10, v2, v7
 ; CHECK-NEXT:    v_mul_hi_u32 v5, v2, v9
 ; CHECK-NEXT:    v_mul_hi_u32 v9, v6, v9
+; CHECK-NEXT:    s_mov_b32 s6, 0x12d8fb
 ; CHECK-NEXT:    v_add_i32_e64 v8, s[4:5], v8, v10
 ; CHECK-NEXT:    v_cndmask_b32_e64 v10, 0, 1, s[4:5]
 ; CHECK-NEXT:    v_add_i32_e64 v5, s[4:5], v8, v5
@@ -2225,10 +2225,9 @@ define <2 x i64> @v_sdiv_v2i64_oddk_denom(<2 x i64> %num) {
 ; CGP-LABEL: v_sdiv_v2i64_oddk_denom:
 ; CGP:       ; %bb.0:
 ; CGP-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CGP-NEXT:    s_mov_b32 s6, 0x12d8fb
-; CGP-NEXT:    v_cvt_f32_u32_e32 v4, s6
+; CGP-NEXT:    v_cvt_f32_u32_e32 v4, 0x12d8fb
 ; CGP-NEXT:    v_cvt_f32_ubyte0_e32 v6, 0
-; CGP-NEXT:    s_mov_b32 s7, 0xffed2705
+; CGP-NEXT:    s_mov_b32 s6, 0xffed2705
 ; CGP-NEXT:    v_ashrrev_i32_e32 v5, 31, v1
 ; CGP-NEXT:    v_mov_b32_e32 v7, v4
 ; CGP-NEXT:    v_mac_f32_e32 v7, 0x4f800000, v6
@@ -2243,19 +2242,19 @@ define <2 x i64> @v_sdiv_v2i64_oddk_denom(<2 x i64> %num) {
 ; CGP-NEXT:    v_cvt_u32_f32_e32 v7, v7
 ; CGP-NEXT:    v_cvt_u32_f32_e32 v8, v8
 ; CGP-NEXT:    v_xor_b32_e32 v1, v1, v5
-; CGP-NEXT:    v_mac_f32_e32 v4, 0x4f800000, v6
+; CGP-NEXT:    s_mov_b32 s7, 0x12d8fb
 ; CGP-NEXT:    v_mul_lo_u32 v9, -1, v7
-; CGP-NEXT:    v_mul_lo_u32 v10, s7, v8
-; CGP-NEXT:    v_mul_hi_u32 v12, s7, v7
-; CGP-NEXT:    v_mul_lo_u32 v11, s7, v7
-; CGP-NEXT:    v_rcp_iflag_f32_e32 v4, v4
+; CGP-NEXT:    v_mul_lo_u32 v10, s6, v8
+; CGP-NEXT:    v_mul_hi_u32 v12, s6, v7
+; CGP-NEXT:    v_mul_lo_u32 v11, s6, v7
+; CGP-NEXT:    v_mac_f32_e32 v4, 0x4f800000, v6
 ; CGP-NEXT:    v_add_i32_e32 v9, vcc, v9, v10
 ; CGP-NEXT:    v_add_i32_e32 v9, vcc, v9, v12
 ; CGP-NEXT:    v_mul_lo_u32 v10, v8, v11
 ; CGP-NEXT:    v_mul_lo_u32 v12, v7, v9
 ; CGP-NEXT:    v_mul_hi_u32 v13, v7, v11
 ; CGP-NEXT:    v_mul_hi_u32 v11, v8, v11
-; CGP-NEXT:    v_mul_f32_e32 v4, 0x5f7ffffc, v4
+; CGP-NEXT:    v_rcp_iflag_f32_e32 v4, v4
 ; CGP-NEXT:    v_add_i32_e32 v10, vcc, v10, v12
 ; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
 ; CGP-NEXT:    v_add_i32_e32 v10, vcc, v10, v13
@@ -2276,9 +2275,9 @@ define <2 x i64> @v_sdiv_v2i64_oddk_denom(<2 x i64> %num) {
 ; CGP-NEXT:    v_add_i32_e32 v7, vcc, v7, v10
 ; CGP-NEXT:    v_addc_u32_e64 v10, s[4:5], v8, v9, vcc
 ; CGP-NEXT:    v_mul_lo_u32 v11, -1, v7
-; CGP-NEXT:    v_mul_lo_u32 v12, s7, v10
-; CGP-NEXT:    v_mul_hi_u32 v14, s7, v7
-; CGP-NEXT:    v_mul_lo_u32 v13, s7, v7
+; CGP-NEXT:    v_mul_lo_u32 v12, s6, v10
+; CGP-NEXT:    v_mul_hi_u32 v14, s6, v7
+; CGP-NEXT:    v_mul_lo_u32 v13, s6, v7
 ; CGP-NEXT:    v_add_i32_e64 v8, s[4:5], v8, v9
 ; CGP-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v12
 ; CGP-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v14
@@ -2286,7 +2285,7 @@ define <2 x i64> @v_sdiv_v2i64_oddk_denom(<2 x i64> %num) {
 ; CGP-NEXT:    v_mul_lo_u32 v14, v7, v11
 ; CGP-NEXT:    v_mul_hi_u32 v9, v7, v13
 ; CGP-NEXT:    v_mul_hi_u32 v13, v10, v13
-; CGP-NEXT:    v_ashrrev_i32_e32 v6, 31, v3
+; CGP-NEXT:    v_mul_f32_e32 v4, 0x5f7ffffc, v4
 ; CGP-NEXT:    v_add_i32_e64 v12, s[4:5], v12, v14
 ; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, 1, s[4:5]
 ; CGP-NEXT:    v_add_i32_e64 v9, s[4:5], v12, v9
@@ -2311,6 +2310,7 @@ define <2 x i64> @v_sdiv_v2i64_oddk_denom(<2 x i64> %num) {
 ; CGP-NEXT:    v_mul_lo_u32 v10, v0, v8
 ; CGP-NEXT:    v_mul_hi_u32 v11, v0, v7
 ; CGP-NEXT:    v_mul_hi_u32 v7, v1, v7
+; CGP-NEXT:    v_ashrrev_i32_e32 v6, 31, v3
 ; CGP-NEXT:    v_add_i32_e32 v9, vcc, v9, v10
 ; CGP-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
 ; CGP-NEXT:    v_add_i32_e32 v9, vcc, v9, v11
@@ -2329,9 +2329,9 @@ define <2 x i64> @v_sdiv_v2i64_oddk_denom(<2 x i64> %num) {
 ; CGP-NEXT:    v_add_i32_e32 v9, vcc, v10, v9
 ; CGP-NEXT:    v_add_i32_e32 v8, vcc, v8, v9
 ; CGP-NEXT:    v_mul_lo_u32 v9, 0, v7
-; CGP-NEXT:    v_mul_lo_u32 v10, s6, v8
-; CGP-NEXT:    v_mul_hi_u32 v12, s6, v7
-; CGP-NEXT:    v_mul_lo_u32 v11, s6, v7
+; CGP-NEXT:    v_mul_lo_u32 v10, s7, v8
+; CGP-NEXT:    v_mul_hi_u32 v12, s7, v7
+; CGP-NEXT:    v_mul_lo_u32 v11, s7, v7
 ; CGP-NEXT:    v_add_i32_e32 v9, vcc, v9, v10
 ; CGP-NEXT:    v_add_i32_e32 v9, vcc, v9, v12
 ; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v0, v11
@@ -2340,8 +2340,8 @@ define <2 x i64> @v_sdiv_v2i64_oddk_denom(<2 x i64> %num) {
 ; CGP-NEXT:    v_cmp_le_u32_e64 s[4:5], 0, v10
 ; CGP-NEXT:    v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
 ; CGP-NEXT:    v_cndmask_b32_e64 v9, 0, -1, s[4:5]
-; CGP-NEXT:    v_cmp_le_u32_e64 s[4:5], s6, v0
-; CGP-NEXT:    v_subrev_i32_e32 v0, vcc, s6, v0
+; CGP-NEXT:    v_cmp_le_u32_e64 s[4:5], s7, v0
+; CGP-NEXT:    v_subrev_i32_e32 v0, vcc, s7, v0
 ; CGP-NEXT:    v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
 ; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, -1, s[4:5]
 ; CGP-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v10
@@ -2350,7 +2350,7 @@ define <2 x i64> @v_sdiv_v2i64_oddk_denom(<2 x i64> %num) {
 ; CGP-NEXT:    v_addc_u32_e32 v11, vcc, 0, v8, vcc
 ; CGP-NEXT:    v_cmp_le_u32_e32 vcc, 0, v1
 ; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, -1, vcc
-; CGP-NEXT:    v_cmp_le_u32_e32 vcc, s6, v0
+; CGP-NEXT:    v_cmp_le_u32_e32 vcc, s7, v0
 ; CGP-NEXT:    v_cndmask_b32_e64 v0, 0, -1, vcc
 ; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v1
 ; CGP-NEXT:    v_cndmask_b32_e32 v0, v12, v0, vcc
@@ -2369,9 +2369,9 @@ define <2 x i64> @v_sdiv_v2i64_oddk_denom(<2 x i64> %num) {
 ; CGP-NEXT:    v_cndmask_b32_e32 v1, v8, v1, vcc
 ; CGP-NEXT:    v_add_i32_e32 v2, vcc, v2, v6
 ; CGP-NEXT:    v_mul_lo_u32 v8, -1, v4
-; CGP-NEXT:    v_mul_lo_u32 v9, s7, v7
-; CGP-NEXT:    v_mul_hi_u32 v11, s7, v4
-; CGP-NEXT:    v_mul_lo_u32 v10, s7, v4
+; CGP-NEXT:    v_mul_lo_u32 v9, s6, v7
+; CGP-NEXT:    v_mul_hi_u32 v11, s6, v4
+; CGP-NEXT:    v_mul_lo_u32 v10, s6, v4
 ; CGP-NEXT:    v_addc_u32_e32 v3, vcc, v3, v6, vcc
 ; CGP-NEXT:    v_add_i32_e32 v8, vcc, v8, v9
 ; CGP-NEXT:    v_add_i32_e32 v8, vcc, v8, v11
@@ -2400,9 +2400,9 @@ define <2 x i64> @v_sdiv_v2i64_oddk_denom(<2 x i64> %num) {
 ; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v9
 ; CGP-NEXT:    v_addc_u32_e64 v9, s[4:5], v7, v8, vcc
 ; CGP-NEXT:    v_mul_lo_u32 v10, -1, v4
-; CGP-NEXT:    v_mul_lo_u32 v11, s7, v9
-; CGP-NEXT:    v_mul_hi_u32 v13, s7, v4
-; CGP-NEXT:    v_mul_lo_u32 v12, s7, v4
+; CGP-NEXT:    v_mul_lo_u32 v11, s6, v9
+; CGP-NEXT:    v_mul_hi_u32 v13, s6, v4
+; CGP-NEXT:    v_mul_lo_u32 v12, s6, v4
 ; CGP-NEXT:    v_add_i32_e64 v7, s[4:5], v7, v8
 ; CGP-NEXT:    v_add_i32_e64 v10, s[4:5], v10, v11
 ; CGP-NEXT:    v_add_i32_e64 v10, s[4:5], v10, v13
@@ -2457,9 +2457,9 @@ define <2 x i64> @v_sdiv_v2i64_oddk_denom(<2 x i64> %num) {
 ; CGP-NEXT:    v_add_i32_e32 v5, vcc, v8, v5
 ; CGP-NEXT:    v_add_i32_e32 v5, vcc, v7, v5
 ; CGP-NEXT:    v_mul_lo_u32 v7, 0, v4
-; CGP-NEXT:    v_mul_lo_u32 v8, s6, v5
-; CGP-NEXT:    v_mul_hi_u32 v10, s6, v4
-; CGP-NEXT:    v_mul_lo_u32 v9, s6, v4
+; CGP-NEXT:    v_mul_lo_u32 v8, s7, v5
+; CGP-NEXT:    v_mul_hi_u32 v10, s7, v4
+; CGP-NEXT:    v_mul_lo_u32 v9, s7, v4
 ; CGP-NEXT:    v_add_i32_e32 v7, vcc, v7, v8
 ; CGP-NEXT:    v_add_i32_e32 v7, vcc, v7, v10
 ; CGP-NEXT:    v_sub_i32_e32 v2, vcc, v2, v9
@@ -2468,8 +2468,8 @@ define <2 x i64> @v_sdiv_v2i64_oddk_denom(<2 x i64> %num) {
 ; CGP-NEXT:    v_cmp_le_u32_e64 s[4:5], 0, v8
 ; CGP-NEXT:    v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
 ; CGP-NEXT:    v_cndmask_b32_e64 v7, 0, -1, s[4:5]
-; CGP-NEXT:    v_cmp_le_u32_e64 s[4:5], s6, v2
-; CGP-NEXT:    v_subrev_i32_e32 v2, vcc, s6, v2
+; CGP-NEXT:    v_cmp_le_u32_e64 s[4:5], s7, v2
+; CGP-NEXT:    v_subrev_i32_e32 v2, vcc, s7, v2
 ; CGP-NEXT:    v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
 ; CGP-NEXT:    v_cndmask_b32_e64 v9, 0, -1, s[4:5]
 ; CGP-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v8
@@ -2478,7 +2478,7 @@ define <2 x i64> @v_sdiv_v2i64_oddk_denom(<2 x i64> %num) {
 ; CGP-NEXT:    v_addc_u32_e32 v9, vcc, 0, v5, vcc
 ; CGP-NEXT:    v_cmp_le_u32_e32 vcc, 0, v3
 ; CGP-NEXT:    v_cndmask_b32_e64 v10, 0, -1, vcc
-; CGP-NEXT:    v_cmp_le_u32_e32 vcc, s6, v2
+; CGP-NEXT:    v_cmp_le_u32_e32 vcc, s7, v2
 ; CGP-NEXT:    v_cndmask_b32_e64 v2, 0, -1, vcc
 ; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v3
 ; CGP-NEXT:    v_cndmask_b32_e32 v2, v10, v2, vcc

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll
index 2217e17358b3..f769b826b1ea 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll
@@ -1037,10 +1037,9 @@ define i64 @v_srem_i64_pow2k_denom(i64 %num) {
 ; CHECK-LABEL: v_srem_i64_pow2k_denom:
 ; CHECK:       ; %bb.0:
 ; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CHECK-NEXT:    s_movk_i32 s6, 0x1000
-; CHECK-NEXT:    v_cvt_f32_u32_e32 v2, s6
+; CHECK-NEXT:    v_cvt_f32_u32_e32 v2, 0x1000
 ; CHECK-NEXT:    v_cvt_f32_ubyte0_e32 v4, 0
-; CHECK-NEXT:    s_mov_b32 s7, 0xfffff000
+; CHECK-NEXT:    s_movk_i32 s6, 0xf000
 ; CHECK-NEXT:    v_ashrrev_i32_e32 v3, 31, v1
 ; CHECK-NEXT:    v_mac_f32_e32 v2, 0x4f800000, v4
 ; CHECK-NEXT:    v_rcp_iflag_f32_e32 v2, v2
@@ -1055,9 +1054,9 @@ define i64 @v_srem_i64_pow2k_denom(i64 %num) {
 ; CHECK-NEXT:    v_cvt_u32_f32_e32 v4, v4
 ; CHECK-NEXT:    v_xor_b32_e32 v1, v1, v3
 ; CHECK-NEXT:    v_mul_lo_u32 v5, -1, v2
-; CHECK-NEXT:    v_mul_lo_u32 v6, s7, v4
-; CHECK-NEXT:    v_mul_hi_u32 v8, s7, v2
-; CHECK-NEXT:    v_mul_lo_u32 v7, s7, v2
+; CHECK-NEXT:    v_mul_lo_u32 v6, s6, v4
+; CHECK-NEXT:    v_mul_hi_u32 v8, s6, v2
+; CHECK-NEXT:    v_mul_lo_u32 v7, s6, v2
 ; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v5, v6
 ; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v5, v8
 ; CHECK-NEXT:    v_mul_lo_u32 v6, v4, v7
@@ -1084,9 +1083,9 @@ define i64 @v_srem_i64_pow2k_denom(i64 %num) {
 ; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v6
 ; CHECK-NEXT:    v_addc_u32_e64 v6, s[4:5], v4, v5, vcc
 ; CHECK-NEXT:    v_mul_lo_u32 v7, -1, v2
-; CHECK-NEXT:    v_mul_lo_u32 v8, s7, v6
-; CHECK-NEXT:    v_mul_hi_u32 v10, s7, v2
-; CHECK-NEXT:    v_mul_lo_u32 v9, s7, v2
+; CHECK-NEXT:    v_mul_lo_u32 v8, s6, v6
+; CHECK-NEXT:    v_mul_hi_u32 v10, s6, v2
+; CHECK-NEXT:    v_mul_lo_u32 v9, s6, v2
 ; CHECK-NEXT:    v_add_i32_e64 v4, s[4:5], v4, v5
 ; CHECK-NEXT:    v_add_i32_e64 v7, s[4:5], v7, v8
 ; CHECK-NEXT:    v_add_i32_e64 v7, s[4:5], v7, v10
@@ -1094,6 +1093,7 @@ define i64 @v_srem_i64_pow2k_denom(i64 %num) {
 ; CHECK-NEXT:    v_mul_lo_u32 v10, v2, v7
 ; CHECK-NEXT:    v_mul_hi_u32 v5, v2, v9
 ; CHECK-NEXT:    v_mul_hi_u32 v9, v6, v9
+; CHECK-NEXT:    s_movk_i32 s6, 0x1000
 ; CHECK-NEXT:    v_add_i32_e64 v8, s[4:5], v8, v10
 ; CHECK-NEXT:    v_cndmask_b32_e64 v10, 0, 1, s[4:5]
 ; CHECK-NEXT:    v_add_i32_e64 v5, s[4:5], v8, v5
@@ -1478,10 +1478,9 @@ define <2 x i64> @v_srem_v2i64_pow2k_denom(<2 x i64> %num) {
 ; CGP-LABEL: v_srem_v2i64_pow2k_denom:
 ; CGP:       ; %bb.0:
 ; CGP-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CGP-NEXT:    s_movk_i32 s6, 0x1000
-; CGP-NEXT:    v_cvt_f32_u32_e32 v4, s6
+; CGP-NEXT:    v_cvt_f32_u32_e32 v4, 0x1000
 ; CGP-NEXT:    v_cvt_f32_ubyte0_e32 v6, 0
-; CGP-NEXT:    s_mov_b32 s7, 0xfffff000
+; CGP-NEXT:    s_movk_i32 s6, 0xf000
 ; CGP-NEXT:    v_ashrrev_i32_e32 v5, 31, v1
 ; CGP-NEXT:    v_mov_b32_e32 v7, v4
 ; CGP-NEXT:    v_mac_f32_e32 v7, 0x4f800000, v6
@@ -1496,19 +1495,19 @@ define <2 x i64> @v_srem_v2i64_pow2k_denom(<2 x i64> %num) {
 ; CGP-NEXT:    v_cvt_u32_f32_e32 v7, v7
 ; CGP-NEXT:    v_cvt_u32_f32_e32 v8, v8
 ; CGP-NEXT:    v_xor_b32_e32 v1, v1, v5
-; CGP-NEXT:    v_mac_f32_e32 v4, 0x4f800000, v6
+; CGP-NEXT:    s_movk_i32 s7, 0x1000
 ; CGP-NEXT:    v_mul_lo_u32 v9, -1, v7
-; CGP-NEXT:    v_mul_lo_u32 v10, s7, v8
-; CGP-NEXT:    v_mul_hi_u32 v12, s7, v7
-; CGP-NEXT:    v_mul_lo_u32 v11, s7, v7
-; CGP-NEXT:    v_rcp_iflag_f32_e32 v4, v4
+; CGP-NEXT:    v_mul_lo_u32 v10, s6, v8
+; CGP-NEXT:    v_mul_hi_u32 v12, s6, v7
+; CGP-NEXT:    v_mul_lo_u32 v11, s6, v7
+; CGP-NEXT:    v_mac_f32_e32 v4, 0x4f800000, v6
 ; CGP-NEXT:    v_add_i32_e32 v9, vcc, v9, v10
 ; CGP-NEXT:    v_add_i32_e32 v9, vcc, v9, v12
 ; CGP-NEXT:    v_mul_lo_u32 v10, v8, v11
 ; CGP-NEXT:    v_mul_lo_u32 v12, v7, v9
 ; CGP-NEXT:    v_mul_hi_u32 v13, v7, v11
 ; CGP-NEXT:    v_mul_hi_u32 v11, v8, v11
-; CGP-NEXT:    v_mul_f32_e32 v4, 0x5f7ffffc, v4
+; CGP-NEXT:    v_rcp_iflag_f32_e32 v4, v4
 ; CGP-NEXT:    v_add_i32_e32 v10, vcc, v10, v12
 ; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
 ; CGP-NEXT:    v_add_i32_e32 v10, vcc, v10, v13
@@ -1529,9 +1528,9 @@ define <2 x i64> @v_srem_v2i64_pow2k_denom(<2 x i64> %num) {
 ; CGP-NEXT:    v_add_i32_e32 v7, vcc, v7, v10
 ; CGP-NEXT:    v_addc_u32_e64 v10, s[4:5], v8, v9, vcc
 ; CGP-NEXT:    v_mul_lo_u32 v11, -1, v7
-; CGP-NEXT:    v_mul_lo_u32 v12, s7, v10
-; CGP-NEXT:    v_mul_hi_u32 v14, s7, v7
-; CGP-NEXT:    v_mul_lo_u32 v13, s7, v7
+; CGP-NEXT:    v_mul_lo_u32 v12, s6, v10
+; CGP-NEXT:    v_mul_hi_u32 v14, s6, v7
+; CGP-NEXT:    v_mul_lo_u32 v13, s6, v7
 ; CGP-NEXT:    v_add_i32_e64 v8, s[4:5], v8, v9
 ; CGP-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v12
 ; CGP-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v14
@@ -1539,7 +1538,7 @@ define <2 x i64> @v_srem_v2i64_pow2k_denom(<2 x i64> %num) {
 ; CGP-NEXT:    v_mul_lo_u32 v14, v7, v11
 ; CGP-NEXT:    v_mul_hi_u32 v9, v7, v13
 ; CGP-NEXT:    v_mul_hi_u32 v13, v10, v13
-; CGP-NEXT:    v_ashrrev_i32_e32 v6, 31, v3
+; CGP-NEXT:    v_mul_f32_e32 v4, 0x5f7ffffc, v4
 ; CGP-NEXT:    v_add_i32_e64 v12, s[4:5], v12, v14
 ; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, 1, s[4:5]
 ; CGP-NEXT:    v_add_i32_e64 v9, s[4:5], v12, v9
@@ -1564,6 +1563,7 @@ define <2 x i64> @v_srem_v2i64_pow2k_denom(<2 x i64> %num) {
 ; CGP-NEXT:    v_mul_lo_u32 v10, v0, v8
 ; CGP-NEXT:    v_mul_hi_u32 v11, v0, v7
 ; CGP-NEXT:    v_mul_hi_u32 v7, v1, v7
+; CGP-NEXT:    v_ashrrev_i32_e32 v6, 31, v3
 ; CGP-NEXT:    v_add_i32_e32 v9, vcc, v9, v10
 ; CGP-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
 ; CGP-NEXT:    v_add_i32_e32 v9, vcc, v9, v11
@@ -1582,9 +1582,9 @@ define <2 x i64> @v_srem_v2i64_pow2k_denom(<2 x i64> %num) {
 ; CGP-NEXT:    v_add_i32_e32 v9, vcc, v10, v9
 ; CGP-NEXT:    v_add_i32_e32 v8, vcc, v8, v9
 ; CGP-NEXT:    v_mul_lo_u32 v9, 0, v7
-; CGP-NEXT:    v_mul_lo_u32 v8, s6, v8
-; CGP-NEXT:    v_mul_lo_u32 v10, s6, v7
-; CGP-NEXT:    v_mul_hi_u32 v7, s6, v7
+; CGP-NEXT:    v_mul_lo_u32 v8, s7, v8
+; CGP-NEXT:    v_mul_lo_u32 v10, s7, v7
+; CGP-NEXT:    v_mul_hi_u32 v7, s7, v7
 ; CGP-NEXT:    v_add_i32_e32 v8, vcc, v9, v8
 ; CGP-NEXT:    v_add_i32_e32 v7, vcc, v8, v7
 ; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v0, v10
@@ -1592,20 +1592,20 @@ define <2 x i64> @v_srem_v2i64_pow2k_denom(<2 x i64> %num) {
 ; CGP-NEXT:    v_sub_i32_e64 v1, s[4:5], v1, v7
 ; CGP-NEXT:    v_cmp_le_u32_e64 s[4:5], 0, v8
 ; CGP-NEXT:    v_cndmask_b32_e64 v7, 0, -1, s[4:5]
-; CGP-NEXT:    v_cmp_le_u32_e64 s[4:5], s6, v0
+; CGP-NEXT:    v_cmp_le_u32_e64 s[4:5], s7, v0
 ; CGP-NEXT:    v_cndmask_b32_e64 v9, 0, -1, s[4:5]
 ; CGP-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v8
 ; CGP-NEXT:    v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
 ; CGP-NEXT:    v_cndmask_b32_e64 v7, v7, v9, s[4:5]
-; CGP-NEXT:    v_subrev_i32_e32 v9, vcc, s6, v0
+; CGP-NEXT:    v_subrev_i32_e32 v9, vcc, s7, v0
 ; CGP-NEXT:    v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
 ; CGP-NEXT:    v_cmp_le_u32_e32 vcc, 0, v1
 ; CGP-NEXT:    v_cndmask_b32_e64 v10, 0, -1, vcc
-; CGP-NEXT:    v_cmp_le_u32_e32 vcc, s6, v9
+; CGP-NEXT:    v_cmp_le_u32_e32 vcc, s7, v9
 ; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, -1, vcc
 ; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v1
 ; CGP-NEXT:    v_cndmask_b32_e32 v10, v10, v11, vcc
-; CGP-NEXT:    v_subrev_i32_e32 v11, vcc, s6, v9
+; CGP-NEXT:    v_subrev_i32_e32 v11, vcc, s7, v9
 ; CGP-NEXT:    v_subbrev_u32_e32 v12, vcc, 0, v1, vcc
 ; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v10
 ; CGP-NEXT:    v_cndmask_b32_e32 v9, v9, v11, vcc
@@ -1619,9 +1619,9 @@ define <2 x i64> @v_srem_v2i64_pow2k_denom(<2 x i64> %num) {
 ; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v9, vcc
 ; CGP-NEXT:    v_cndmask_b32_e32 v1, v8, v1, vcc
 ; CGP-NEXT:    v_mul_lo_u32 v8, -1, v4
-; CGP-NEXT:    v_mul_lo_u32 v9, s7, v7
-; CGP-NEXT:    v_mul_hi_u32 v11, s7, v4
-; CGP-NEXT:    v_mul_lo_u32 v10, s7, v4
+; CGP-NEXT:    v_mul_lo_u32 v9, s6, v7
+; CGP-NEXT:    v_mul_hi_u32 v11, s6, v4
+; CGP-NEXT:    v_mul_lo_u32 v10, s6, v4
 ; CGP-NEXT:    v_add_i32_e32 v2, vcc, v2, v6
 ; CGP-NEXT:    v_addc_u32_e32 v3, vcc, v3, v6, vcc
 ; CGP-NEXT:    v_add_i32_e32 v8, vcc, v8, v9
@@ -1651,9 +1651,9 @@ define <2 x i64> @v_srem_v2i64_pow2k_denom(<2 x i64> %num) {
 ; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v9
 ; CGP-NEXT:    v_addc_u32_e64 v9, s[4:5], v7, v8, vcc
 ; CGP-NEXT:    v_mul_lo_u32 v10, -1, v4
-; CGP-NEXT:    v_mul_lo_u32 v11, s7, v9
-; CGP-NEXT:    v_mul_hi_u32 v13, s7, v4
-; CGP-NEXT:    v_mul_lo_u32 v12, s7, v4
+; CGP-NEXT:    v_mul_lo_u32 v11, s6, v9
+; CGP-NEXT:    v_mul_hi_u32 v13, s6, v4
+; CGP-NEXT:    v_mul_lo_u32 v12, s6, v4
 ; CGP-NEXT:    v_add_i32_e64 v7, s[4:5], v7, v8
 ; CGP-NEXT:    v_add_i32_e64 v10, s[4:5], v10, v11
 ; CGP-NEXT:    v_add_i32_e64 v10, s[4:5], v10, v13
@@ -1708,9 +1708,9 @@ define <2 x i64> @v_srem_v2i64_pow2k_denom(<2 x i64> %num) {
 ; CGP-NEXT:    v_add_i32_e32 v5, vcc, v8, v5
 ; CGP-NEXT:    v_add_i32_e32 v5, vcc, v7, v5
 ; CGP-NEXT:    v_mul_lo_u32 v7, 0, v4
-; CGP-NEXT:    v_mul_lo_u32 v5, s6, v5
-; CGP-NEXT:    v_mul_lo_u32 v8, s6, v4
-; CGP-NEXT:    v_mul_hi_u32 v4, s6, v4
+; CGP-NEXT:    v_mul_lo_u32 v5, s7, v5
+; CGP-NEXT:    v_mul_lo_u32 v8, s7, v4
+; CGP-NEXT:    v_mul_hi_u32 v4, s7, v4
 ; CGP-NEXT:    v_add_i32_e32 v5, vcc, v7, v5
 ; CGP-NEXT:    v_add_i32_e32 v4, vcc, v5, v4
 ; CGP-NEXT:    v_sub_i32_e32 v2, vcc, v2, v8
@@ -1718,20 +1718,20 @@ define <2 x i64> @v_srem_v2i64_pow2k_denom(<2 x i64> %num) {
 ; CGP-NEXT:    v_sub_i32_e64 v3, s[4:5], v3, v4
 ; CGP-NEXT:    v_cmp_le_u32_e64 s[4:5], 0, v5
 ; CGP-NEXT:    v_cndmask_b32_e64 v4, 0, -1, s[4:5]
-; CGP-NEXT:    v_cmp_le_u32_e64 s[4:5], s6, v2
+; CGP-NEXT:    v_cmp_le_u32_e64 s[4:5], s7, v2
 ; CGP-NEXT:    v_cndmask_b32_e64 v7, 0, -1, s[4:5]
 ; CGP-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v5
 ; CGP-NEXT:    v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
 ; CGP-NEXT:    v_cndmask_b32_e64 v4, v4, v7, s[4:5]
-; CGP-NEXT:    v_subrev_i32_e32 v7, vcc, s6, v2
+; CGP-NEXT:    v_subrev_i32_e32 v7, vcc, s7, v2
 ; CGP-NEXT:    v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
 ; CGP-NEXT:    v_cmp_le_u32_e32 vcc, 0, v3
 ; CGP-NEXT:    v_cndmask_b32_e64 v8, 0, -1, vcc
-; CGP-NEXT:    v_cmp_le_u32_e32 vcc, s6, v7
+; CGP-NEXT:    v_cmp_le_u32_e32 vcc, s7, v7
 ; CGP-NEXT:    v_cndmask_b32_e64 v9, 0, -1, vcc
 ; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v3
 ; CGP-NEXT:    v_cndmask_b32_e32 v8, v8, v9, vcc
-; CGP-NEXT:    v_subrev_i32_e32 v9, vcc, s6, v7
+; CGP-NEXT:    v_subrev_i32_e32 v9, vcc, s7, v7
 ; CGP-NEXT:    v_subbrev_u32_e32 v10, vcc, 0, v3, vcc
 ; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v8
 ; CGP-NEXT:    v_cndmask_b32_e32 v7, v7, v9, vcc
@@ -1752,10 +1752,9 @@ define i64 @v_srem_i64_oddk_denom(i64 %num) {
 ; CHECK-LABEL: v_srem_i64_oddk_denom:
 ; CHECK:       ; %bb.0:
 ; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CHECK-NEXT:    s_mov_b32 s6, 0x12d8fb
-; CHECK-NEXT:    v_cvt_f32_u32_e32 v2, s6
+; CHECK-NEXT:    v_cvt_f32_u32_e32 v2, 0x12d8fb
 ; CHECK-NEXT:    v_cvt_f32_ubyte0_e32 v4, 0
-; CHECK-NEXT:    s_mov_b32 s7, 0xffed2705
+; CHECK-NEXT:    s_mov_b32 s6, 0xffed2705
 ; CHECK-NEXT:    v_ashrrev_i32_e32 v3, 31, v1
 ; CHECK-NEXT:    v_mac_f32_e32 v2, 0x4f800000, v4
 ; CHECK-NEXT:    v_rcp_iflag_f32_e32 v2, v2
@@ -1770,9 +1769,9 @@ define i64 @v_srem_i64_oddk_denom(i64 %num) {
 ; CHECK-NEXT:    v_cvt_u32_f32_e32 v4, v4
 ; CHECK-NEXT:    v_xor_b32_e32 v1, v1, v3
 ; CHECK-NEXT:    v_mul_lo_u32 v5, -1, v2
-; CHECK-NEXT:    v_mul_lo_u32 v6, s7, v4
-; CHECK-NEXT:    v_mul_hi_u32 v8, s7, v2
-; CHECK-NEXT:    v_mul_lo_u32 v7, s7, v2
+; CHECK-NEXT:    v_mul_lo_u32 v6, s6, v4
+; CHECK-NEXT:    v_mul_hi_u32 v8, s6, v2
+; CHECK-NEXT:    v_mul_lo_u32 v7, s6, v2
 ; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v5, v6
 ; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v5, v8
 ; CHECK-NEXT:    v_mul_lo_u32 v6, v4, v7
@@ -1799,9 +1798,9 @@ define i64 @v_srem_i64_oddk_denom(i64 %num) {
 ; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v6
 ; CHECK-NEXT:    v_addc_u32_e64 v6, s[4:5], v4, v5, vcc
 ; CHECK-NEXT:    v_mul_lo_u32 v7, -1, v2
-; CHECK-NEXT:    v_mul_lo_u32 v8, s7, v6
-; CHECK-NEXT:    v_mul_hi_u32 v10, s7, v2
-; CHECK-NEXT:    v_mul_lo_u32 v9, s7, v2
+; CHECK-NEXT:    v_mul_lo_u32 v8, s6, v6
+; CHECK-NEXT:    v_mul_hi_u32 v10, s6, v2
+; CHECK-NEXT:    v_mul_lo_u32 v9, s6, v2
 ; CHECK-NEXT:    v_add_i32_e64 v4, s[4:5], v4, v5
 ; CHECK-NEXT:    v_add_i32_e64 v7, s[4:5], v7, v8
 ; CHECK-NEXT:    v_add_i32_e64 v7, s[4:5], v7, v10
@@ -1809,6 +1808,7 @@ define i64 @v_srem_i64_oddk_denom(i64 %num) {
 ; CHECK-NEXT:    v_mul_lo_u32 v10, v2, v7
 ; CHECK-NEXT:    v_mul_hi_u32 v5, v2, v9
 ; CHECK-NEXT:    v_mul_hi_u32 v9, v6, v9
+; CHECK-NEXT:    s_mov_b32 s6, 0x12d8fb
 ; CHECK-NEXT:    v_add_i32_e64 v8, s[4:5], v8, v10
 ; CHECK-NEXT:    v_cndmask_b32_e64 v10, 0, 1, s[4:5]
 ; CHECK-NEXT:    v_add_i32_e64 v5, s[4:5], v8, v5
@@ -2193,10 +2193,9 @@ define <2 x i64> @v_srem_v2i64_oddk_denom(<2 x i64> %num) {
 ; CGP-LABEL: v_srem_v2i64_oddk_denom:
 ; CGP:       ; %bb.0:
 ; CGP-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CGP-NEXT:    s_mov_b32 s6, 0x12d8fb
-; CGP-NEXT:    v_cvt_f32_u32_e32 v4, s6
+; CGP-NEXT:    v_cvt_f32_u32_e32 v4, 0x12d8fb
 ; CGP-NEXT:    v_cvt_f32_ubyte0_e32 v6, 0
-; CGP-NEXT:    s_mov_b32 s7, 0xffed2705
+; CGP-NEXT:    s_mov_b32 s6, 0xffed2705
 ; CGP-NEXT:    v_ashrrev_i32_e32 v5, 31, v1
 ; CGP-NEXT:    v_mov_b32_e32 v7, v4
 ; CGP-NEXT:    v_mac_f32_e32 v7, 0x4f800000, v6
@@ -2211,19 +2210,19 @@ define <2 x i64> @v_srem_v2i64_oddk_denom(<2 x i64> %num) {
 ; CGP-NEXT:    v_cvt_u32_f32_e32 v7, v7
 ; CGP-NEXT:    v_cvt_u32_f32_e32 v8, v8
 ; CGP-NEXT:    v_xor_b32_e32 v1, v1, v5
-; CGP-NEXT:    v_mac_f32_e32 v4, 0x4f800000, v6
+; CGP-NEXT:    s_mov_b32 s7, 0x12d8fb
 ; CGP-NEXT:    v_mul_lo_u32 v9, -1, v7
-; CGP-NEXT:    v_mul_lo_u32 v10, s7, v8
-; CGP-NEXT:    v_mul_hi_u32 v12, s7, v7
-; CGP-NEXT:    v_mul_lo_u32 v11, s7, v7
-; CGP-NEXT:    v_rcp_iflag_f32_e32 v4, v4
+; CGP-NEXT:    v_mul_lo_u32 v10, s6, v8
+; CGP-NEXT:    v_mul_hi_u32 v12, s6, v7
+; CGP-NEXT:    v_mul_lo_u32 v11, s6, v7
+; CGP-NEXT:    v_mac_f32_e32 v4, 0x4f800000, v6
 ; CGP-NEXT:    v_add_i32_e32 v9, vcc, v9, v10
 ; CGP-NEXT:    v_add_i32_e32 v9, vcc, v9, v12
 ; CGP-NEXT:    v_mul_lo_u32 v10, v8, v11
 ; CGP-NEXT:    v_mul_lo_u32 v12, v7, v9
 ; CGP-NEXT:    v_mul_hi_u32 v13, v7, v11
 ; CGP-NEXT:    v_mul_hi_u32 v11, v8, v11
-; CGP-NEXT:    v_mul_f32_e32 v4, 0x5f7ffffc, v4
+; CGP-NEXT:    v_rcp_iflag_f32_e32 v4, v4
 ; CGP-NEXT:    v_add_i32_e32 v10, vcc, v10, v12
 ; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
 ; CGP-NEXT:    v_add_i32_e32 v10, vcc, v10, v13
@@ -2244,9 +2243,9 @@ define <2 x i64> @v_srem_v2i64_oddk_denom(<2 x i64> %num) {
 ; CGP-NEXT:    v_add_i32_e32 v7, vcc, v7, v10
 ; CGP-NEXT:    v_addc_u32_e64 v10, s[4:5], v8, v9, vcc
 ; CGP-NEXT:    v_mul_lo_u32 v11, -1, v7
-; CGP-NEXT:    v_mul_lo_u32 v12, s7, v10
-; CGP-NEXT:    v_mul_hi_u32 v14, s7, v7
-; CGP-NEXT:    v_mul_lo_u32 v13, s7, v7
+; CGP-NEXT:    v_mul_lo_u32 v12, s6, v10
+; CGP-NEXT:    v_mul_hi_u32 v14, s6, v7
+; CGP-NEXT:    v_mul_lo_u32 v13, s6, v7
 ; CGP-NEXT:    v_add_i32_e64 v8, s[4:5], v8, v9
 ; CGP-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v12
 ; CGP-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v14
@@ -2254,7 +2253,7 @@ define <2 x i64> @v_srem_v2i64_oddk_denom(<2 x i64> %num) {
 ; CGP-NEXT:    v_mul_lo_u32 v14, v7, v11
 ; CGP-NEXT:    v_mul_hi_u32 v9, v7, v13
 ; CGP-NEXT:    v_mul_hi_u32 v13, v10, v13
-; CGP-NEXT:    v_ashrrev_i32_e32 v6, 31, v3
+; CGP-NEXT:    v_mul_f32_e32 v4, 0x5f7ffffc, v4
 ; CGP-NEXT:    v_add_i32_e64 v12, s[4:5], v12, v14
 ; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, 1, s[4:5]
 ; CGP-NEXT:    v_add_i32_e64 v9, s[4:5], v12, v9
@@ -2279,6 +2278,7 @@ define <2 x i64> @v_srem_v2i64_oddk_denom(<2 x i64> %num) {
 ; CGP-NEXT:    v_mul_lo_u32 v10, v0, v8
 ; CGP-NEXT:    v_mul_hi_u32 v11, v0, v7
 ; CGP-NEXT:    v_mul_hi_u32 v7, v1, v7
+; CGP-NEXT:    v_ashrrev_i32_e32 v6, 31, v3
 ; CGP-NEXT:    v_add_i32_e32 v9, vcc, v9, v10
 ; CGP-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
 ; CGP-NEXT:    v_add_i32_e32 v9, vcc, v9, v11
@@ -2297,9 +2297,9 @@ define <2 x i64> @v_srem_v2i64_oddk_denom(<2 x i64> %num) {
 ; CGP-NEXT:    v_add_i32_e32 v9, vcc, v10, v9
 ; CGP-NEXT:    v_add_i32_e32 v8, vcc, v8, v9
 ; CGP-NEXT:    v_mul_lo_u32 v9, 0, v7
-; CGP-NEXT:    v_mul_lo_u32 v8, s6, v8
-; CGP-NEXT:    v_mul_lo_u32 v10, s6, v7
-; CGP-NEXT:    v_mul_hi_u32 v7, s6, v7
+; CGP-NEXT:    v_mul_lo_u32 v8, s7, v8
+; CGP-NEXT:    v_mul_lo_u32 v10, s7, v7
+; CGP-NEXT:    v_mul_hi_u32 v7, s7, v7
 ; CGP-NEXT:    v_add_i32_e32 v8, vcc, v9, v8
 ; CGP-NEXT:    v_add_i32_e32 v7, vcc, v8, v7
 ; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v0, v10
@@ -2307,20 +2307,20 @@ define <2 x i64> @v_srem_v2i64_oddk_denom(<2 x i64> %num) {
 ; CGP-NEXT:    v_sub_i32_e64 v1, s[4:5], v1, v7
 ; CGP-NEXT:    v_cmp_le_u32_e64 s[4:5], 0, v8
 ; CGP-NEXT:    v_cndmask_b32_e64 v7, 0, -1, s[4:5]
-; CGP-NEXT:    v_cmp_le_u32_e64 s[4:5], s6, v0
+; CGP-NEXT:    v_cmp_le_u32_e64 s[4:5], s7, v0
 ; CGP-NEXT:    v_cndmask_b32_e64 v9, 0, -1, s[4:5]
 ; CGP-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v8
 ; CGP-NEXT:    v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
 ; CGP-NEXT:    v_cndmask_b32_e64 v7, v7, v9, s[4:5]
-; CGP-NEXT:    v_subrev_i32_e32 v9, vcc, s6, v0
+; CGP-NEXT:    v_subrev_i32_e32 v9, vcc, s7, v0
 ; CGP-NEXT:    v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
 ; CGP-NEXT:    v_cmp_le_u32_e32 vcc, 0, v1
 ; CGP-NEXT:    v_cndmask_b32_e64 v10, 0, -1, vcc
-; CGP-NEXT:    v_cmp_le_u32_e32 vcc, s6, v9
+; CGP-NEXT:    v_cmp_le_u32_e32 vcc, s7, v9
 ; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, -1, vcc
 ; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v1
 ; CGP-NEXT:    v_cndmask_b32_e32 v10, v10, v11, vcc
-; CGP-NEXT:    v_subrev_i32_e32 v11, vcc, s6, v9
+; CGP-NEXT:    v_subrev_i32_e32 v11, vcc, s7, v9
 ; CGP-NEXT:    v_subbrev_u32_e32 v12, vcc, 0, v1, vcc
 ; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v10
 ; CGP-NEXT:    v_cndmask_b32_e32 v9, v9, v11, vcc
@@ -2334,9 +2334,9 @@ define <2 x i64> @v_srem_v2i64_oddk_denom(<2 x i64> %num) {
 ; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v9, vcc
 ; CGP-NEXT:    v_cndmask_b32_e32 v1, v8, v1, vcc
 ; CGP-NEXT:    v_mul_lo_u32 v8, -1, v4
-; CGP-NEXT:    v_mul_lo_u32 v9, s7, v7
-; CGP-NEXT:    v_mul_hi_u32 v11, s7, v4
-; CGP-NEXT:    v_mul_lo_u32 v10, s7, v4
+; CGP-NEXT:    v_mul_lo_u32 v9, s6, v7
+; CGP-NEXT:    v_mul_hi_u32 v11, s6, v4
+; CGP-NEXT:    v_mul_lo_u32 v10, s6, v4
 ; CGP-NEXT:    v_add_i32_e32 v2, vcc, v2, v6
 ; CGP-NEXT:    v_addc_u32_e32 v3, vcc, v3, v6, vcc
 ; CGP-NEXT:    v_add_i32_e32 v8, vcc, v8, v9
@@ -2366,9 +2366,9 @@ define <2 x i64> @v_srem_v2i64_oddk_denom(<2 x i64> %num) {
 ; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v9
 ; CGP-NEXT:    v_addc_u32_e64 v9, s[4:5], v7, v8, vcc
 ; CGP-NEXT:    v_mul_lo_u32 v10, -1, v4
-; CGP-NEXT:    v_mul_lo_u32 v11, s7, v9
-; CGP-NEXT:    v_mul_hi_u32 v13, s7, v4
-; CGP-NEXT:    v_mul_lo_u32 v12, s7, v4
+; CGP-NEXT:    v_mul_lo_u32 v11, s6, v9
+; CGP-NEXT:    v_mul_hi_u32 v13, s6, v4
+; CGP-NEXT:    v_mul_lo_u32 v12, s6, v4
 ; CGP-NEXT:    v_add_i32_e64 v7, s[4:5], v7, v8
 ; CGP-NEXT:    v_add_i32_e64 v10, s[4:5], v10, v11
 ; CGP-NEXT:    v_add_i32_e64 v10, s[4:5], v10, v13
@@ -2423,9 +2423,9 @@ define <2 x i64> @v_srem_v2i64_oddk_denom(<2 x i64> %num) {
 ; CGP-NEXT:    v_add_i32_e32 v5, vcc, v8, v5
 ; CGP-NEXT:    v_add_i32_e32 v5, vcc, v7, v5
 ; CGP-NEXT:    v_mul_lo_u32 v7, 0, v4
-; CGP-NEXT:    v_mul_lo_u32 v5, s6, v5
-; CGP-NEXT:    v_mul_lo_u32 v8, s6, v4
-; CGP-NEXT:    v_mul_hi_u32 v4, s6, v4
+; CGP-NEXT:    v_mul_lo_u32 v5, s7, v5
+; CGP-NEXT:    v_mul_lo_u32 v8, s7, v4
+; CGP-NEXT:    v_mul_hi_u32 v4, s7, v4
 ; CGP-NEXT:    v_add_i32_e32 v5, vcc, v7, v5
 ; CGP-NEXT:    v_add_i32_e32 v4, vcc, v5, v4
 ; CGP-NEXT:    v_sub_i32_e32 v2, vcc, v2, v8
@@ -2433,20 +2433,20 @@ define <2 x i64> @v_srem_v2i64_oddk_denom(<2 x i64> %num) {
 ; CGP-NEXT:    v_sub_i32_e64 v3, s[4:5], v3, v4
 ; CGP-NEXT:    v_cmp_le_u32_e64 s[4:5], 0, v5
 ; CGP-NEXT:    v_cndmask_b32_e64 v4, 0, -1, s[4:5]
-; CGP-NEXT:    v_cmp_le_u32_e64 s[4:5], s6, v2
+; CGP-NEXT:    v_cmp_le_u32_e64 s[4:5], s7, v2
 ; CGP-NEXT:    v_cndmask_b32_e64 v7, 0, -1, s[4:5]
 ; CGP-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v5
 ; CGP-NEXT:    v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
 ; CGP-NEXT:    v_cndmask_b32_e64 v4, v4, v7, s[4:5]
-; CGP-NEXT:    v_subrev_i32_e32 v7, vcc, s6, v2
+; CGP-NEXT:    v_subrev_i32_e32 v7, vcc, s7, v2
 ; CGP-NEXT:    v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
 ; CGP-NEXT:    v_cmp_le_u32_e32 vcc, 0, v3
 ; CGP-NEXT:    v_cndmask_b32_e64 v8, 0, -1, vcc
-; CGP-NEXT:    v_cmp_le_u32_e32 vcc, s6, v7
+; CGP-NEXT:    v_cmp_le_u32_e32 vcc, s7, v7
 ; CGP-NEXT:    v_cndmask_b32_e64 v9, 0, -1, vcc
 ; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v3
 ; CGP-NEXT:    v_cndmask_b32_e32 v8, v8, v9, vcc
-; CGP-NEXT:    v_subrev_i32_e32 v9, vcc, s6, v7
+; CGP-NEXT:    v_subrev_i32_e32 v9, vcc, s7, v7
 ; CGP-NEXT:    v_subbrev_u32_e32 v10, vcc, 0, v3, vcc
 ; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v8
 ; CGP-NEXT:    v_cndmask_b32_e32 v7, v7, v9, vcc

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll
index d2c65aa5a178..76aa2f511b14 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll
@@ -4984,24 +4984,22 @@ define <2 x i64> @v_ssubsat_v2i64(<2 x i64> %lhs, <2 x i64> %rhs) {
 ; GFX6-NEXT:    v_cmp_lt_i64_e32 vcc, v[8:9], v[0:1]
 ; GFX6-NEXT:    v_cmp_lt_i64_e64 s[4:5], 0, v[4:5]
 ; GFX6-NEXT:    v_ashrrev_i32_e32 v0, 31, v9
-; GFX6-NEXT:    s_brev_b32 s8, 1
-; GFX6-NEXT:    v_mov_b32_e32 v1, s8
-; GFX6-NEXT:    v_add_i32_e64 v4, s[6:7], 0, v0
+; GFX6-NEXT:    v_bfrev_b32_e32 v10, 1
+; GFX6-NEXT:    v_add_i32_e64 v1, s[6:7], 0, v0
 ; GFX6-NEXT:    s_xor_b64 vcc, s[4:5], vcc
-; GFX6-NEXT:    v_addc_u32_e64 v1, s[6:7], v0, v1, s[6:7]
-; GFX6-NEXT:    v_cndmask_b32_e32 v0, v8, v4, vcc
-; GFX6-NEXT:    v_cndmask_b32_e32 v1, v9, v1, vcc
+; GFX6-NEXT:    v_addc_u32_e64 v4, s[6:7], v0, v10, s[6:7]
+; GFX6-NEXT:    v_cndmask_b32_e32 v0, v8, v1, vcc
+; GFX6-NEXT:    v_cndmask_b32_e32 v1, v9, v4, vcc
 ; GFX6-NEXT:    v_sub_i32_e32 v4, vcc, v2, v6
 ; GFX6-NEXT:    v_subb_u32_e32 v5, vcc, v3, v7, vcc
 ; GFX6-NEXT:    v_cmp_lt_i64_e32 vcc, v[4:5], v[2:3]
 ; GFX6-NEXT:    v_cmp_lt_i64_e64 s[4:5], 0, v[6:7]
 ; GFX6-NEXT:    v_ashrrev_i32_e32 v2, 31, v5
-; GFX6-NEXT:    v_mov_b32_e32 v3, s8
-; GFX6-NEXT:    v_add_i32_e64 v6, s[6:7], 0, v2
-; GFX6-NEXT:    v_addc_u32_e64 v3, s[6:7], v2, v3, s[6:7]
+; GFX6-NEXT:    v_add_i32_e64 v3, s[6:7], 0, v2
+; GFX6-NEXT:    v_addc_u32_e64 v6, s[6:7], v2, v10, s[6:7]
 ; GFX6-NEXT:    s_xor_b64 vcc, s[4:5], vcc
-; GFX6-NEXT:    v_cndmask_b32_e32 v2, v4, v6, vcc
-; GFX6-NEXT:    v_cndmask_b32_e32 v3, v5, v3, vcc
+; GFX6-NEXT:    v_cndmask_b32_e32 v2, v4, v3, vcc
+; GFX6-NEXT:    v_cndmask_b32_e32 v3, v5, v6, vcc
 ; GFX6-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-LABEL: v_ssubsat_v2i64:
@@ -5012,24 +5010,22 @@ define <2 x i64> @v_ssubsat_v2i64(<2 x i64> %lhs, <2 x i64> %rhs) {
 ; GFX8-NEXT:    v_cmp_lt_i64_e32 vcc, v[8:9], v[0:1]
 ; GFX8-NEXT:    v_cmp_lt_i64_e64 s[4:5], 0, v[4:5]
 ; GFX8-NEXT:    v_ashrrev_i32_e32 v0, 31, v9
-; GFX8-NEXT:    s_brev_b32 s8, 1
-; GFX8-NEXT:    v_mov_b32_e32 v1, s8
-; GFX8-NEXT:    v_add_u32_e64 v4, s[6:7], 0, v0
+; GFX8-NEXT:    v_bfrev_b32_e32 v10, 1
+; GFX8-NEXT:    v_add_u32_e64 v1, s[6:7], 0, v0
 ; GFX8-NEXT:    s_xor_b64 vcc, s[4:5], vcc
-; GFX8-NEXT:    v_addc_u32_e64 v1, s[6:7], v0, v1, s[6:7]
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v8, v4, vcc
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v9, v1, vcc
+; GFX8-NEXT:    v_addc_u32_e64 v4, s[6:7], v0, v10, s[6:7]
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v8, v1, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v9, v4, vcc
 ; GFX8-NEXT:    v_sub_u32_e32 v4, vcc, v2, v6
 ; GFX8-NEXT:    v_subb_u32_e32 v5, vcc, v3, v7, vcc
 ; GFX8-NEXT:    v_cmp_lt_i64_e32 vcc, v[4:5], v[2:3]
 ; GFX8-NEXT:    v_cmp_lt_i64_e64 s[4:5], 0, v[6:7]
 ; GFX8-NEXT:    v_ashrrev_i32_e32 v2, 31, v5
-; GFX8-NEXT:    v_mov_b32_e32 v3, s8
-; GFX8-NEXT:    v_add_u32_e64 v6, s[6:7], 0, v2
-; GFX8-NEXT:    v_addc_u32_e64 v3, s[6:7], v2, v3, s[6:7]
+; GFX8-NEXT:    v_add_u32_e64 v3, s[6:7], 0, v2
+; GFX8-NEXT:    v_addc_u32_e64 v6, s[6:7], v2, v10, s[6:7]
 ; GFX8-NEXT:    s_xor_b64 vcc, s[4:5], vcc
-; GFX8-NEXT:    v_cndmask_b32_e32 v2, v4, v6, vcc
-; GFX8-NEXT:    v_cndmask_b32_e32 v3, v5, v3, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v2, v4, v3, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v3, v5, v6, vcc
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX9-LABEL: v_ssubsat_v2i64:
@@ -5040,56 +5036,53 @@ define <2 x i64> @v_ssubsat_v2i64(<2 x i64> %lhs, <2 x i64> %rhs) {
 ; GFX9-NEXT:    v_cmp_lt_i64_e32 vcc, v[8:9], v[0:1]
 ; GFX9-NEXT:    v_cmp_lt_i64_e64 s[4:5], 0, v[4:5]
 ; GFX9-NEXT:    v_ashrrev_i32_e32 v0, 31, v9
-; GFX9-NEXT:    s_brev_b32 s8, 1
-; GFX9-NEXT:    v_mov_b32_e32 v1, s8
-; GFX9-NEXT:    v_add_co_u32_e64 v4, s[6:7], 0, v0
+; GFX9-NEXT:    v_bfrev_b32_e32 v10, 1
+; GFX9-NEXT:    v_add_co_u32_e64 v1, s[6:7], 0, v0
 ; GFX9-NEXT:    s_xor_b64 vcc, s[4:5], vcc
-; GFX9-NEXT:    v_addc_co_u32_e64 v1, s[6:7], v0, v1, s[6:7]
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v8, v4, vcc
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v9, v1, vcc
+; GFX9-NEXT:    v_addc_co_u32_e64 v4, s[6:7], v0, v10, s[6:7]
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v8, v1, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v9, v4, vcc
 ; GFX9-NEXT:    v_sub_co_u32_e32 v4, vcc, v2, v6
 ; GFX9-NEXT:    v_subb_co_u32_e32 v5, vcc, v3, v7, vcc
 ; GFX9-NEXT:    v_cmp_lt_i64_e32 vcc, v[4:5], v[2:3]
 ; GFX9-NEXT:    v_cmp_lt_i64_e64 s[4:5], 0, v[6:7]
 ; GFX9-NEXT:    v_ashrrev_i32_e32 v2, 31, v5
-; GFX9-NEXT:    v_mov_b32_e32 v3, s8
-; GFX9-NEXT:    v_add_co_u32_e64 v6, s[6:7], 0, v2
-; GFX9-NEXT:    v_addc_co_u32_e64 v3, s[6:7], v2, v3, s[6:7]
+; GFX9-NEXT:    v_add_co_u32_e64 v3, s[6:7], 0, v2
+; GFX9-NEXT:    v_addc_co_u32_e64 v6, s[6:7], v2, v10, s[6:7]
 ; GFX9-NEXT:    s_xor_b64 vcc, s[4:5], vcc
-; GFX9-NEXT:    v_cndmask_b32_e32 v2, v4, v6, vcc
-; GFX9-NEXT:    v_cndmask_b32_e32 v3, v5, v3, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v2, v4, v3, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v3, v5, v6, vcc
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: v_ssubsat_v2i64:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT:    v_mov_b32_e32 v9, v0
-; GFX10-NEXT:    v_mov_b32_e32 v10, v1
-; GFX10-NEXT:    v_mov_b32_e32 v13, v2
-; GFX10-NEXT:    v_mov_b32_e32 v14, v3
+; GFX10-NEXT:    v_mov_b32_e32 v14, v0
+; GFX10-NEXT:    v_mov_b32_e32 v15, v1
+; GFX10-NEXT:    v_mov_b32_e32 v17, v2
+; GFX10-NEXT:    v_mov_b32_e32 v18, v3
 ; GFX10-NEXT:    v_cmp_lt_i64_e64 s4, 0, v[4:5]
-; GFX10-NEXT:    v_sub_co_u32_e64 v19, vcc_lo, v9, v4
-; GFX10-NEXT:    s_brev_b32 s8, 1
-; GFX10-NEXT:    v_sub_co_ci_u32_e32 v20, vcc_lo, v10, v5, vcc_lo
-; GFX10-NEXT:    v_sub_co_u32_e64 v23, vcc_lo, v13, v6
+; GFX10-NEXT:    v_sub_co_u32_e64 v8, vcc_lo, v14, v4
 ; GFX10-NEXT:    v_cmp_lt_i64_e64 s6, 0, v[6:7]
-; GFX10-NEXT:    v_sub_co_ci_u32_e32 v24, vcc_lo, v14, v7, vcc_lo
+; GFX10-NEXT:    v_sub_co_ci_u32_e32 v9, vcc_lo, v15, v5, vcc_lo
+; GFX10-NEXT:    v_sub_co_u32_e64 v19, vcc_lo, v17, v6
 ; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    v_sub_co_ci_u32_e32 v20, vcc_lo, v18, v7, vcc_lo
+; GFX10-NEXT:    v_ashrrev_i32_e32 v12, 31, v9
+; GFX10-NEXT:    v_cmp_lt_i64_e32 vcc_lo, v[8:9], v[14:15]
 ; GFX10-NEXT:    v_ashrrev_i32_e32 v0, 31, v20
-; GFX10-NEXT:    v_cmp_lt_i64_e32 vcc_lo, v[19:20], v[9:10]
-; GFX10-NEXT:    v_ashrrev_i32_e32 v1, 31, v24
-; GFX10-NEXT:    v_add_co_u32_e64 v4, s5, v0, 0
-; GFX10-NEXT:    v_add_co_ci_u32_e64 v5, s5, s8, v0, s5
-; GFX10-NEXT:    v_cmp_lt_i64_e64 s5, v[23:24], v[13:14]
-; GFX10-NEXT:    v_add_co_u32_e64 v2, s7, v1, 0
+; GFX10-NEXT:    v_add_co_u32_e64 v1, s5, v12, 0
+; GFX10-NEXT:    v_add_co_ci_u32_e64 v4, s5, 0x80000000, v12, s5
+; GFX10-NEXT:    v_cmp_lt_i64_e64 s5, v[19:20], v[17:18]
+; GFX10-NEXT:    v_add_co_u32_e64 v2, s7, v0, 0
 ; GFX10-NEXT:    s_xor_b32 vcc_lo, s4, vcc_lo
-; GFX10-NEXT:    v_add_co_ci_u32_e64 v3, s7, s8, v1, s7
-; GFX10-NEXT:    v_cndmask_b32_e32 v0, v19, v4, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e32 v1, v20, v5, vcc_lo
+; GFX10-NEXT:    v_add_co_ci_u32_e64 v3, s7, 0x80000000, v0, s7
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v8, v1, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v1, v9, v4, vcc_lo
 ; GFX10-NEXT:    s_xor_b32 vcc_lo, s6, s5
-; GFX10-NEXT:    v_cndmask_b32_e32 v2, v23, v2, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e32 v3, v24, v3, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v2, v19, v2, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v3, v20, v3, vcc_lo
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
   %result = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> %lhs, <2 x i64> %rhs)
   ret <2 x i64> %result
@@ -6210,15 +6203,14 @@ define <2 x i128> @v_ssubsat_v2i128(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; GFX6-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
 ; GFX6-NEXT:    v_ashrrev_i32_e32 v11, 31, v19
 ; GFX6-NEXT:    v_cndmask_b32_e64 v0, v0, v16, s[4:5]
-; GFX6-NEXT:    v_cndmask_b32_e64 v1, v1, v17, s[4:5]
 ; GFX6-NEXT:    v_cndmask_b32_e32 v2, v11, v8, vcc
 ; GFX6-NEXT:    v_cndmask_b32_e32 v3, v11, v9, vcc
+; GFX6-NEXT:    v_cndmask_b32_e64 v1, v1, v17, s[4:5]
 ; GFX6-NEXT:    v_add_i32_e32 v0, vcc, 0, v0
 ; GFX6-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
-; GFX6-NEXT:    s_brev_b32 s4, 1
-; GFX6-NEXT:    v_mov_b32_e32 v8, s4
+; GFX6-NEXT:    v_bfrev_b32_e32 v20, 1
 ; GFX6-NEXT:    v_addc_u32_e32 v2, vcc, 0, v2, vcc
-; GFX6-NEXT:    v_addc_u32_e32 v3, vcc, v3, v8, vcc
+; GFX6-NEXT:    v_addc_u32_e32 v3, vcc, v3, v20, vcc
 ; GFX6-NEXT:    v_and_b32_e32 v8, 1, v10
 ; GFX6-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v8
 ; GFX6-NEXT:    v_cndmask_b32_e32 v0, v16, v0, vcc
@@ -6233,43 +6225,42 @@ define <2 x i128> @v_ssubsat_v2i128(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; GFX6-NEXT:    s_cmp_lt_u32 s6, 64
 ; GFX6-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
 ; GFX6-NEXT:    v_cmp_lt_i64_e32 vcc, v[10:11], v[6:7]
-; GFX6-NEXT:    s_cselect_b32 s5, 1, 0
+; GFX6-NEXT:    s_cselect_b32 s4, 1, 0
 ; GFX6-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
 ; GFX6-NEXT:    v_cmp_eq_u64_e32 vcc, v[10:11], v[6:7]
 ; GFX6-NEXT:    s_cmp_eq_u32 s6, 0
 ; GFX6-NEXT:    v_cndmask_b32_e32 v4, v5, v4, vcc
 ; GFX6-NEXT:    v_cmp_lt_u64_e32 vcc, 0, v[12:13]
-; GFX6-NEXT:    s_cselect_b32 s9, 1, 0
+; GFX6-NEXT:    s_cselect_b32 s5, 1, 0
 ; GFX6-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
 ; GFX6-NEXT:    v_cmp_lt_i64_e32 vcc, 0, v[14:15]
 ; GFX6-NEXT:    v_ashr_i64 v[12:13], v[10:11], s6
 ; GFX6-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
 ; GFX6-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[14:15]
-; GFX6-NEXT:    v_ashrrev_i32_e32 v15, 31, v11
+; GFX6-NEXT:    s_and_b32 s5, 1, s5
 ; GFX6-NEXT:    v_cndmask_b32_e32 v5, v6, v5, vcc
 ; GFX6-NEXT:    v_xor_b32_e32 v14, v5, v4
 ; GFX6-NEXT:    v_lshr_b64 v[4:5], v[8:9], s6
 ; GFX6-NEXT:    v_lshl_b64 v[6:7], v[10:11], s8
-; GFX6-NEXT:    s_and_b32 s6, 1, s5
+; GFX6-NEXT:    s_and_b32 s6, 1, s4
 ; GFX6-NEXT:    v_or_b32_e32 v6, v4, v6
 ; GFX6-NEXT:    v_or_b32_e32 v7, v5, v7
 ; GFX6-NEXT:    v_ashr_i64 v[4:5], v[10:11], s7
 ; GFX6-NEXT:    v_cmp_ne_u32_e64 vcc, 0, s6
-; GFX6-NEXT:    s_and_b32 s6, 1, s9
 ; GFX6-NEXT:    v_cndmask_b32_e32 v4, v4, v6, vcc
 ; GFX6-NEXT:    v_cndmask_b32_e32 v5, v5, v7, vcc
-; GFX6-NEXT:    v_cmp_ne_u32_e64 vcc, 0, s6
-; GFX6-NEXT:    s_and_b32 s5, 1, s5
+; GFX6-NEXT:    v_cmp_ne_u32_e64 vcc, 0, s5
+; GFX6-NEXT:    s_and_b32 s4, 1, s4
+; GFX6-NEXT:    v_ashrrev_i32_e32 v15, 31, v11
 ; GFX6-NEXT:    v_cndmask_b32_e32 v4, v4, v8, vcc
 ; GFX6-NEXT:    v_cndmask_b32_e32 v5, v5, v9, vcc
-; GFX6-NEXT:    v_cmp_ne_u32_e64 vcc, 0, s5
+; GFX6-NEXT:    v_cmp_ne_u32_e64 vcc, 0, s4
 ; GFX6-NEXT:    v_cndmask_b32_e32 v6, v15, v12, vcc
 ; GFX6-NEXT:    v_cndmask_b32_e32 v7, v15, v13, vcc
 ; GFX6-NEXT:    v_add_i32_e32 v4, vcc, 0, v4
 ; GFX6-NEXT:    v_addc_u32_e32 v5, vcc, 0, v5, vcc
-; GFX6-NEXT:    v_mov_b32_e32 v12, s4
 ; GFX6-NEXT:    v_addc_u32_e32 v6, vcc, 0, v6, vcc
-; GFX6-NEXT:    v_addc_u32_e32 v7, vcc, v7, v12, vcc
+; GFX6-NEXT:    v_addc_u32_e32 v7, vcc, v7, v20, vcc
 ; GFX6-NEXT:    v_and_b32_e32 v12, 1, v14
 ; GFX6-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v12
 ; GFX6-NEXT:    v_cndmask_b32_e32 v4, v8, v4, vcc
@@ -6319,15 +6310,14 @@ define <2 x i128> @v_ssubsat_v2i128(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
 ; GFX8-NEXT:    v_ashrrev_i32_e32 v11, 31, v19
 ; GFX8-NEXT:    v_cndmask_b32_e64 v0, v0, v16, s[4:5]
-; GFX8-NEXT:    v_cndmask_b32_e64 v1, v1, v17, s[4:5]
 ; GFX8-NEXT:    v_cndmask_b32_e32 v2, v11, v8, vcc
 ; GFX8-NEXT:    v_cndmask_b32_e32 v3, v11, v9, vcc
+; GFX8-NEXT:    v_cndmask_b32_e64 v1, v1, v17, s[4:5]
 ; GFX8-NEXT:    v_add_u32_e32 v0, vcc, 0, v0
 ; GFX8-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
-; GFX8-NEXT:    s_brev_b32 s4, 1
-; GFX8-NEXT:    v_mov_b32_e32 v8, s4
+; GFX8-NEXT:    v_bfrev_b32_e32 v20, 1
 ; GFX8-NEXT:    v_addc_u32_e32 v2, vcc, 0, v2, vcc
-; GFX8-NEXT:    v_addc_u32_e32 v3, vcc, v3, v8, vcc
+; GFX8-NEXT:    v_addc_u32_e32 v3, vcc, v3, v20, vcc
 ; GFX8-NEXT:    v_and_b32_e32 v8, 1, v10
 ; GFX8-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v8
 ; GFX8-NEXT:    v_cndmask_b32_e32 v0, v16, v0, vcc
@@ -6342,43 +6332,42 @@ define <2 x i128> @v_ssubsat_v2i128(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; GFX8-NEXT:    s_cmp_lt_u32 s6, 64
 ; GFX8-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
 ; GFX8-NEXT:    v_cmp_lt_i64_e32 vcc, v[10:11], v[6:7]
-; GFX8-NEXT:    s_cselect_b32 s5, 1, 0
+; GFX8-NEXT:    s_cselect_b32 s4, 1, 0
 ; GFX8-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
 ; GFX8-NEXT:    v_cmp_eq_u64_e32 vcc, v[10:11], v[6:7]
 ; GFX8-NEXT:    s_cmp_eq_u32 s6, 0
 ; GFX8-NEXT:    v_cndmask_b32_e32 v4, v5, v4, vcc
 ; GFX8-NEXT:    v_cmp_lt_u64_e32 vcc, 0, v[12:13]
-; GFX8-NEXT:    s_cselect_b32 s9, 1, 0
+; GFX8-NEXT:    s_cselect_b32 s5, 1, 0
 ; GFX8-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
 ; GFX8-NEXT:    v_cmp_lt_i64_e32 vcc, 0, v[14:15]
 ; GFX8-NEXT:    v_ashrrev_i64 v[12:13], s6, v[10:11]
 ; GFX8-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
 ; GFX8-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[14:15]
-; GFX8-NEXT:    v_ashrrev_i32_e32 v15, 31, v11
+; GFX8-NEXT:    s_and_b32 s5, 1, s5
 ; GFX8-NEXT:    v_cndmask_b32_e32 v5, v6, v5, vcc
 ; GFX8-NEXT:    v_xor_b32_e32 v14, v5, v4
 ; GFX8-NEXT:    v_lshrrev_b64 v[4:5], s6, v[8:9]
 ; GFX8-NEXT:    v_lshlrev_b64 v[6:7], s8, v[10:11]
-; GFX8-NEXT:    s_and_b32 s6, 1, s5
+; GFX8-NEXT:    s_and_b32 s6, 1, s4
 ; GFX8-NEXT:    v_or_b32_e32 v6, v4, v6
 ; GFX8-NEXT:    v_or_b32_e32 v7, v5, v7
 ; GFX8-NEXT:    v_ashrrev_i64 v[4:5], s7, v[10:11]
 ; GFX8-NEXT:    v_cmp_ne_u32_e64 vcc, 0, s6
-; GFX8-NEXT:    s_and_b32 s6, 1, s9
 ; GFX8-NEXT:    v_cndmask_b32_e32 v4, v4, v6, vcc
 ; GFX8-NEXT:    v_cndmask_b32_e32 v5, v5, v7, vcc
-; GFX8-NEXT:    v_cmp_ne_u32_e64 vcc, 0, s6
-; GFX8-NEXT:    s_and_b32 s5, 1, s5
+; GFX8-NEXT:    v_cmp_ne_u32_e64 vcc, 0, s5
+; GFX8-NEXT:    s_and_b32 s4, 1, s4
+; GFX8-NEXT:    v_ashrrev_i32_e32 v15, 31, v11
 ; GFX8-NEXT:    v_cndmask_b32_e32 v4, v4, v8, vcc
 ; GFX8-NEXT:    v_cndmask_b32_e32 v5, v5, v9, vcc
-; GFX8-NEXT:    v_cmp_ne_u32_e64 vcc, 0, s5
+; GFX8-NEXT:    v_cmp_ne_u32_e64 vcc, 0, s4
 ; GFX8-NEXT:    v_cndmask_b32_e32 v6, v15, v12, vcc
 ; GFX8-NEXT:    v_cndmask_b32_e32 v7, v15, v13, vcc
 ; GFX8-NEXT:    v_add_u32_e32 v4, vcc, 0, v4
 ; GFX8-NEXT:    v_addc_u32_e32 v5, vcc, 0, v5, vcc
-; GFX8-NEXT:    v_mov_b32_e32 v12, s4
 ; GFX8-NEXT:    v_addc_u32_e32 v6, vcc, 0, v6, vcc
-; GFX8-NEXT:    v_addc_u32_e32 v7, vcc, v7, v12, vcc
+; GFX8-NEXT:    v_addc_u32_e32 v7, vcc, v7, v20, vcc
 ; GFX8-NEXT:    v_and_b32_e32 v12, 1, v14
 ; GFX8-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v12
 ; GFX8-NEXT:    v_cndmask_b32_e32 v4, v8, v4, vcc
@@ -6428,15 +6417,14 @@ define <2 x i128> @v_ssubsat_v2i128(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
 ; GFX9-NEXT:    v_ashrrev_i32_e32 v11, 31, v19
 ; GFX9-NEXT:    v_cndmask_b32_e64 v0, v0, v16, s[4:5]
-; GFX9-NEXT:    v_cndmask_b32_e64 v1, v1, v17, s[4:5]
 ; GFX9-NEXT:    v_cndmask_b32_e32 v2, v11, v8, vcc
 ; GFX9-NEXT:    v_cndmask_b32_e32 v3, v11, v9, vcc
+; GFX9-NEXT:    v_cndmask_b32_e64 v1, v1, v17, s[4:5]
 ; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0, v0
 ; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
-; GFX9-NEXT:    s_brev_b32 s4, 1
-; GFX9-NEXT:    v_mov_b32_e32 v8, s4
+; GFX9-NEXT:    v_bfrev_b32_e32 v20, 1
 ; GFX9-NEXT:    v_addc_co_u32_e32 v2, vcc, 0, v2, vcc
-; GFX9-NEXT:    v_addc_co_u32_e32 v3, vcc, v3, v8, vcc
+; GFX9-NEXT:    v_addc_co_u32_e32 v3, vcc, v3, v20, vcc
 ; GFX9-NEXT:    v_and_b32_e32 v8, 1, v10
 ; GFX9-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v8
 ; GFX9-NEXT:    v_cndmask_b32_e32 v0, v16, v0, vcc
@@ -6451,43 +6439,42 @@ define <2 x i128> @v_ssubsat_v2i128(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; GFX9-NEXT:    s_cmp_lt_u32 s6, 64
 ; GFX9-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
 ; GFX9-NEXT:    v_cmp_lt_i64_e32 vcc, v[10:11], v[6:7]
-; GFX9-NEXT:    s_cselect_b32 s5, 1, 0
+; GFX9-NEXT:    s_cselect_b32 s4, 1, 0
 ; GFX9-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
 ; GFX9-NEXT:    v_cmp_eq_u64_e32 vcc, v[10:11], v[6:7]
 ; GFX9-NEXT:    s_cmp_eq_u32 s6, 0
 ; GFX9-NEXT:    v_cndmask_b32_e32 v4, v5, v4, vcc
 ; GFX9-NEXT:    v_cmp_lt_u64_e32 vcc, 0, v[12:13]
-; GFX9-NEXT:    s_cselect_b32 s9, 1, 0
+; GFX9-NEXT:    s_cselect_b32 s5, 1, 0
 ; GFX9-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
 ; GFX9-NEXT:    v_cmp_lt_i64_e32 vcc, 0, v[14:15]
 ; GFX9-NEXT:    v_ashrrev_i64 v[12:13], s6, v[10:11]
 ; GFX9-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
 ; GFX9-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[14:15]
-; GFX9-NEXT:    v_ashrrev_i32_e32 v15, 31, v11
+; GFX9-NEXT:    s_and_b32 s5, 1, s5
 ; GFX9-NEXT:    v_cndmask_b32_e32 v5, v6, v5, vcc
 ; GFX9-NEXT:    v_xor_b32_e32 v14, v5, v4
 ; GFX9-NEXT:    v_lshrrev_b64 v[4:5], s6, v[8:9]
 ; GFX9-NEXT:    v_lshlrev_b64 v[6:7], s8, v[10:11]
-; GFX9-NEXT:    s_and_b32 s6, 1, s5
+; GFX9-NEXT:    s_and_b32 s6, 1, s4
 ; GFX9-NEXT:    v_or_b32_e32 v6, v4, v6
 ; GFX9-NEXT:    v_or_b32_e32 v7, v5, v7
 ; GFX9-NEXT:    v_ashrrev_i64 v[4:5], s7, v[10:11]
 ; GFX9-NEXT:    v_cmp_ne_u32_e64 vcc, 0, s6
-; GFX9-NEXT:    s_and_b32 s6, 1, s9
 ; GFX9-NEXT:    v_cndmask_b32_e32 v4, v4, v6, vcc
 ; GFX9-NEXT:    v_cndmask_b32_e32 v5, v5, v7, vcc
-; GFX9-NEXT:    v_cmp_ne_u32_e64 vcc, 0, s6
-; GFX9-NEXT:    s_and_b32 s5, 1, s5
+; GFX9-NEXT:    v_cmp_ne_u32_e64 vcc, 0, s5
+; GFX9-NEXT:    s_and_b32 s4, 1, s4
+; GFX9-NEXT:    v_ashrrev_i32_e32 v15, 31, v11
 ; GFX9-NEXT:    v_cndmask_b32_e32 v4, v4, v8, vcc
 ; GFX9-NEXT:    v_cndmask_b32_e32 v5, v5, v9, vcc
-; GFX9-NEXT:    v_cmp_ne_u32_e64 vcc, 0, s5
+; GFX9-NEXT:    v_cmp_ne_u32_e64 vcc, 0, s4
 ; GFX9-NEXT:    v_cndmask_b32_e32 v6, v15, v12, vcc
 ; GFX9-NEXT:    v_cndmask_b32_e32 v7, v15, v13, vcc
 ; GFX9-NEXT:    v_add_co_u32_e32 v4, vcc, 0, v4
 ; GFX9-NEXT:    v_addc_co_u32_e32 v5, vcc, 0, v5, vcc
-; GFX9-NEXT:    v_mov_b32_e32 v12, s4
 ; GFX9-NEXT:    v_addc_co_u32_e32 v6, vcc, 0, v6, vcc
-; GFX9-NEXT:    v_addc_co_u32_e32 v7, vcc, v7, v12, vcc
+; GFX9-NEXT:    v_addc_co_u32_e32 v7, vcc, v7, v20, vcc
 ; GFX9-NEXT:    v_and_b32_e32 v12, 1, v14
 ; GFX9-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v12
 ; GFX9-NEXT:    v_cndmask_b32_e32 v4, v8, v4, vcc
@@ -6546,7 +6533,6 @@ define <2 x i128> @v_ssubsat_v2i128(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; GFX10-NEXT:    v_cmp_ne_u32_e64 vcc_lo, 0, s4
 ; GFX10-NEXT:    v_cmp_ne_u32_e64 s4, 0, s8
 ; GFX10-NEXT:    v_xor_b32_e32 v9, v10, v20
-; GFX10-NEXT:    s_brev_b32 s8, 1
 ; GFX10-NEXT:    s_cmp_lt_u32 s5, 64
 ; GFX10-NEXT:    v_cndmask_b32_e32 v2, v2, v16, vcc_lo
 ; GFX10-NEXT:    v_cndmask_b32_e32 v3, v3, v17, vcc_lo
@@ -6556,7 +6542,7 @@ define <2 x i128> @v_ssubsat_v2i128(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; GFX10-NEXT:    v_add_co_u32_e64 v2, vcc_lo, v2, 0
 ; GFX10-NEXT:    v_add_co_ci_u32_e32 v3, vcc_lo, 0, v3, vcc_lo
 ; GFX10-NEXT:    v_add_co_ci_u32_e32 v20, vcc_lo, 0, v0, vcc_lo
-; GFX10-NEXT:    v_add_co_ci_u32_e32 v21, vcc_lo, s8, v1, vcc_lo
+; GFX10-NEXT:    v_add_co_ci_u32_e32 v21, vcc_lo, 0x80000000, v1, vcc_lo
 ; GFX10-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v8
 ; GFX10-NEXT:    v_sub_co_u32_e64 v8, s4, v26, v12
 ; GFX10-NEXT:    v_sub_co_ci_u32_e64 v9, s4, v27, v13, s4
@@ -6604,7 +6590,7 @@ define <2 x i128> @v_ssubsat_v2i128(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; GFX10-NEXT:    v_cmp_ne_u32_e64 s5, 0, v7
 ; GFX10-NEXT:    v_add_co_ci_u32_e64 v7, s4, 0, v3, s4
 ; GFX10-NEXT:    v_cndmask_b32_e32 v3, v19, v21, vcc_lo
-; GFX10-NEXT:    v_add_co_ci_u32_e64 v12, s4, s8, v4, s4
+; GFX10-NEXT:    v_add_co_ci_u32_e64 v12, s4, 0x80000000, v4, s4
 ; GFX10-NEXT:    v_cndmask_b32_e64 v4, v8, v5, s5
 ; GFX10-NEXT:    v_cndmask_b32_e64 v5, v9, v6, s5
 ; GFX10-NEXT:    v_cndmask_b32_e64 v6, v10, v7, s5

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll
index 402ae90219eb..f0984a239736 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll
@@ -963,22 +963,22 @@ define i64 @v_udiv_i64_pow2k_denom(i64 %num) {
 ; CHECK-LABEL: v_udiv_i64_pow2k_denom:
 ; CHECK:       ; %bb.0:
 ; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CHECK-NEXT:    s_movk_i32 s6, 0x1000
-; CHECK-NEXT:    v_cvt_f32_ubyte0_e32 v2, 0
-; CHECK-NEXT:    s_mov_b32 s7, 0xfffff000
-; CHECK-NEXT:    v_cvt_f32_u32_e32 v3, s6
-; CHECK-NEXT:    v_mac_f32_e32 v3, 0x4f800000, v2
-; CHECK-NEXT:    v_rcp_iflag_f32_e32 v2, v3
+; CHECK-NEXT:    v_cvt_f32_u32_e32 v2, 0x1000
+; CHECK-NEXT:    v_cvt_f32_ubyte0_e32 v3, 0
+; CHECK-NEXT:    s_movk_i32 s6, 0xf000
+; CHECK-NEXT:    s_movk_i32 s7, 0x1000
+; CHECK-NEXT:    v_mac_f32_e32 v2, 0x4f800000, v3
+; CHECK-NEXT:    v_rcp_iflag_f32_e32 v2, v2
 ; CHECK-NEXT:    v_mul_f32_e32 v2, 0x5f7ffffc, v2
 ; CHECK-NEXT:    v_mul_f32_e32 v3, 0x2f800000, v2
 ; CHECK-NEXT:    v_trunc_f32_e32 v3, v3
 ; CHECK-NEXT:    v_mac_f32_e32 v2, 0xcf800000, v3
 ; CHECK-NEXT:    v_cvt_u32_f32_e32 v3, v3
 ; CHECK-NEXT:    v_cvt_u32_f32_e32 v2, v2
-; CHECK-NEXT:    v_mul_lo_u32 v4, s7, v3
-; CHECK-NEXT:    v_mul_lo_u32 v5, s7, v2
+; CHECK-NEXT:    v_mul_lo_u32 v4, s6, v3
+; CHECK-NEXT:    v_mul_lo_u32 v5, s6, v2
 ; CHECK-NEXT:    v_mul_lo_u32 v6, -1, v2
-; CHECK-NEXT:    v_mul_hi_u32 v7, s7, v2
+; CHECK-NEXT:    v_mul_hi_u32 v7, s6, v2
 ; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v6, v4
 ; CHECK-NEXT:    v_mul_lo_u32 v6, v3, v5
 ; CHECK-NEXT:    v_mul_hi_u32 v8, v2, v5
@@ -1005,10 +1005,10 @@ define i64 @v_udiv_i64_pow2k_denom(i64 %num) {
 ; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v5
 ; CHECK-NEXT:    v_addc_u32_e64 v5, s[4:5], v3, v4, vcc
 ; CHECK-NEXT:    v_add_i32_e64 v3, s[4:5], v3, v4
-; CHECK-NEXT:    v_mul_lo_u32 v4, s7, v2
+; CHECK-NEXT:    v_mul_lo_u32 v4, s6, v2
 ; CHECK-NEXT:    v_mul_lo_u32 v6, -1, v2
-; CHECK-NEXT:    v_mul_hi_u32 v7, s7, v2
-; CHECK-NEXT:    v_mul_lo_u32 v8, s7, v5
+; CHECK-NEXT:    v_mul_hi_u32 v7, s6, v2
+; CHECK-NEXT:    v_mul_lo_u32 v8, s6, v5
 ; CHECK-NEXT:    v_mul_lo_u32 v9, v5, v4
 ; CHECK-NEXT:    v_mul_hi_u32 v10, v2, v4
 ; CHECK-NEXT:    v_mul_hi_u32 v4, v5, v4
@@ -1055,11 +1055,11 @@ define i64 @v_udiv_i64_pow2k_denom(i64 %num) {
 ; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v4
 ; CHECK-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
 ; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v5, v4
-; CHECK-NEXT:    v_mul_lo_u32 v5, s6, v2
+; CHECK-NEXT:    v_mul_lo_u32 v5, s7, v2
 ; CHECK-NEXT:    v_mul_lo_u32 v6, 0, v2
-; CHECK-NEXT:    v_mul_hi_u32 v7, s6, v2
+; CHECK-NEXT:    v_mul_hi_u32 v7, s7, v2
 ; CHECK-NEXT:    v_add_i32_e32 v3, vcc, v3, v4
-; CHECK-NEXT:    v_mul_lo_u32 v4, s6, v3
+; CHECK-NEXT:    v_mul_lo_u32 v4, s7, v3
 ; CHECK-NEXT:    v_add_i32_e32 v8, vcc, 1, v2
 ; CHECK-NEXT:    v_addc_u32_e32 v9, vcc, 0, v3, vcc
 ; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v6, v4
@@ -1069,16 +1069,16 @@ define i64 @v_udiv_i64_pow2k_denom(i64 %num) {
 ; CHECK-NEXT:    v_sub_i32_e32 v0, vcc, v0, v5
 ; CHECK-NEXT:    v_subb_u32_e64 v5, s[4:5], v1, v4, vcc
 ; CHECK-NEXT:    v_sub_i32_e64 v1, s[4:5], v1, v4
-; CHECK-NEXT:    v_cmp_le_u32_e64 s[4:5], s6, v0
+; CHECK-NEXT:    v_cmp_le_u32_e64 s[4:5], s7, v0
 ; CHECK-NEXT:    v_cndmask_b32_e64 v4, 0, -1, s[4:5]
 ; CHECK-NEXT:    v_cmp_le_u32_e64 s[4:5], 0, v5
 ; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, -1, s[4:5]
 ; CHECK-NEXT:    v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
 ; CHECK-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v5
 ; CHECK-NEXT:    v_cndmask_b32_e32 v4, v7, v4, vcc
-; CHECK-NEXT:    v_subrev_i32_e32 v0, vcc, s6, v0
+; CHECK-NEXT:    v_subrev_i32_e32 v0, vcc, s7, v0
 ; CHECK-NEXT:    v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
-; CHECK-NEXT:    v_cmp_le_u32_e32 vcc, s6, v0
+; CHECK-NEXT:    v_cmp_le_u32_e32 vcc, s7, v0
 ; CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, -1, vcc
 ; CHECK-NEXT:    v_cmp_le_u32_e32 vcc, 0, v1
 ; CHECK-NEXT:    v_cndmask_b32_e64 v5, 0, -1, vcc
@@ -1364,14 +1364,14 @@ define <2 x i64> @v_udiv_v2i64_pow2k_denom(<2 x i64> %num) {
 ; CGP-LABEL: v_udiv_v2i64_pow2k_denom:
 ; CGP:       ; %bb.0:
 ; CGP-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CGP-NEXT:    v_cvt_f32_u32_e32 v4, 0x1000
+; CGP-NEXT:    v_cvt_f32_ubyte0_e32 v5, 0
+; CGP-NEXT:    s_movk_i32 s8, 0xf000
 ; CGP-NEXT:    s_movk_i32 s10, 0x1000
-; CGP-NEXT:    v_cvt_f32_ubyte0_e32 v4, 0
-; CGP-NEXT:    v_cvt_f32_u32_e32 v5, s10
-; CGP-NEXT:    s_mov_b32 s8, 0xfffff000
-; CGP-NEXT:    v_mov_b32_e32 v6, v5
-; CGP-NEXT:    v_mac_f32_e32 v5, 0x4f800000, v4
-; CGP-NEXT:    v_mac_f32_e32 v6, 0x4f800000, v4
-; CGP-NEXT:    v_rcp_iflag_f32_e32 v4, v5
+; CGP-NEXT:    v_mov_b32_e32 v6, v4
+; CGP-NEXT:    v_mac_f32_e32 v4, 0x4f800000, v5
+; CGP-NEXT:    v_mac_f32_e32 v6, 0x4f800000, v5
+; CGP-NEXT:    v_rcp_iflag_f32_e32 v4, v4
 ; CGP-NEXT:    v_rcp_iflag_f32_e32 v5, v6
 ; CGP-NEXT:    v_mul_f32_e32 v4, 0x5f7ffffc, v4
 ; CGP-NEXT:    v_mul_f32_e32 v5, 0x5f7ffffc, v5
@@ -1624,22 +1624,22 @@ define i64 @v_udiv_i64_oddk_denom(i64 %num) {
 ; CHECK-LABEL: v_udiv_i64_oddk_denom:
 ; CHECK:       ; %bb.0:
 ; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CHECK-NEXT:    s_mov_b32 s6, 0x12d8fb
-; CHECK-NEXT:    v_cvt_f32_ubyte0_e32 v2, 0
-; CHECK-NEXT:    s_mov_b32 s7, 0xffed2705
-; CHECK-NEXT:    v_cvt_f32_u32_e32 v3, s6
-; CHECK-NEXT:    v_mac_f32_e32 v3, 0x4f800000, v2
-; CHECK-NEXT:    v_rcp_iflag_f32_e32 v2, v3
+; CHECK-NEXT:    v_cvt_f32_u32_e32 v2, 0x12d8fb
+; CHECK-NEXT:    v_cvt_f32_ubyte0_e32 v3, 0
+; CHECK-NEXT:    s_mov_b32 s6, 0xffed2705
+; CHECK-NEXT:    s_mov_b32 s7, 0x12d8fb
+; CHECK-NEXT:    v_mac_f32_e32 v2, 0x4f800000, v3
+; CHECK-NEXT:    v_rcp_iflag_f32_e32 v2, v2
 ; CHECK-NEXT:    v_mul_f32_e32 v2, 0x5f7ffffc, v2
 ; CHECK-NEXT:    v_mul_f32_e32 v3, 0x2f800000, v2
 ; CHECK-NEXT:    v_trunc_f32_e32 v3, v3
 ; CHECK-NEXT:    v_mac_f32_e32 v2, 0xcf800000, v3
 ; CHECK-NEXT:    v_cvt_u32_f32_e32 v3, v3
 ; CHECK-NEXT:    v_cvt_u32_f32_e32 v2, v2
-; CHECK-NEXT:    v_mul_lo_u32 v4, s7, v3
-; CHECK-NEXT:    v_mul_lo_u32 v5, s7, v2
+; CHECK-NEXT:    v_mul_lo_u32 v4, s6, v3
+; CHECK-NEXT:    v_mul_lo_u32 v5, s6, v2
 ; CHECK-NEXT:    v_mul_lo_u32 v6, -1, v2
-; CHECK-NEXT:    v_mul_hi_u32 v7, s7, v2
+; CHECK-NEXT:    v_mul_hi_u32 v7, s6, v2
 ; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v6, v4
 ; CHECK-NEXT:    v_mul_lo_u32 v6, v3, v5
 ; CHECK-NEXT:    v_mul_hi_u32 v8, v2, v5
@@ -1666,10 +1666,10 @@ define i64 @v_udiv_i64_oddk_denom(i64 %num) {
 ; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v5
 ; CHECK-NEXT:    v_addc_u32_e64 v5, s[4:5], v3, v4, vcc
 ; CHECK-NEXT:    v_add_i32_e64 v3, s[4:5], v3, v4
-; CHECK-NEXT:    v_mul_lo_u32 v4, s7, v2
+; CHECK-NEXT:    v_mul_lo_u32 v4, s6, v2
 ; CHECK-NEXT:    v_mul_lo_u32 v6, -1, v2
-; CHECK-NEXT:    v_mul_hi_u32 v7, s7, v2
-; CHECK-NEXT:    v_mul_lo_u32 v8, s7, v5
+; CHECK-NEXT:    v_mul_hi_u32 v7, s6, v2
+; CHECK-NEXT:    v_mul_lo_u32 v8, s6, v5
 ; CHECK-NEXT:    v_mul_lo_u32 v9, v5, v4
 ; CHECK-NEXT:    v_mul_hi_u32 v10, v2, v4
 ; CHECK-NEXT:    v_mul_hi_u32 v4, v5, v4
@@ -1716,11 +1716,11 @@ define i64 @v_udiv_i64_oddk_denom(i64 %num) {
 ; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v4
 ; CHECK-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
 ; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v5, v4
-; CHECK-NEXT:    v_mul_lo_u32 v5, s6, v2
+; CHECK-NEXT:    v_mul_lo_u32 v5, s7, v2
 ; CHECK-NEXT:    v_mul_lo_u32 v6, 0, v2
-; CHECK-NEXT:    v_mul_hi_u32 v7, s6, v2
+; CHECK-NEXT:    v_mul_hi_u32 v7, s7, v2
 ; CHECK-NEXT:    v_add_i32_e32 v3, vcc, v3, v4
-; CHECK-NEXT:    v_mul_lo_u32 v4, s6, v3
+; CHECK-NEXT:    v_mul_lo_u32 v4, s7, v3
 ; CHECK-NEXT:    v_add_i32_e32 v8, vcc, 1, v2
 ; CHECK-NEXT:    v_addc_u32_e32 v9, vcc, 0, v3, vcc
 ; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v6, v4
@@ -1730,16 +1730,16 @@ define i64 @v_udiv_i64_oddk_denom(i64 %num) {
 ; CHECK-NEXT:    v_sub_i32_e32 v0, vcc, v0, v5
 ; CHECK-NEXT:    v_subb_u32_e64 v5, s[4:5], v1, v4, vcc
 ; CHECK-NEXT:    v_sub_i32_e64 v1, s[4:5], v1, v4
-; CHECK-NEXT:    v_cmp_le_u32_e64 s[4:5], s6, v0
+; CHECK-NEXT:    v_cmp_le_u32_e64 s[4:5], s7, v0
 ; CHECK-NEXT:    v_cndmask_b32_e64 v4, 0, -1, s[4:5]
 ; CHECK-NEXT:    v_cmp_le_u32_e64 s[4:5], 0, v5
 ; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, -1, s[4:5]
 ; CHECK-NEXT:    v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
 ; CHECK-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v5
 ; CHECK-NEXT:    v_cndmask_b32_e32 v4, v7, v4, vcc
-; CHECK-NEXT:    v_subrev_i32_e32 v0, vcc, s6, v0
+; CHECK-NEXT:    v_subrev_i32_e32 v0, vcc, s7, v0
 ; CHECK-NEXT:    v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
-; CHECK-NEXT:    v_cmp_le_u32_e32 vcc, s6, v0
+; CHECK-NEXT:    v_cmp_le_u32_e32 vcc, s7, v0
 ; CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, -1, vcc
 ; CHECK-NEXT:    v_cmp_le_u32_e32 vcc, 0, v1
 ; CHECK-NEXT:    v_cndmask_b32_e64 v5, 0, -1, vcc
@@ -2025,14 +2025,14 @@ define <2 x i64> @v_udiv_v2i64_oddk_denom(<2 x i64> %num) {
 ; CGP-LABEL: v_udiv_v2i64_oddk_denom:
 ; CGP:       ; %bb.0:
 ; CGP-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CGP-NEXT:    s_mov_b32 s10, 0x12d8fb
-; CGP-NEXT:    v_cvt_f32_ubyte0_e32 v4, 0
-; CGP-NEXT:    v_cvt_f32_u32_e32 v5, s10
+; CGP-NEXT:    v_cvt_f32_u32_e32 v4, 0x12d8fb
+; CGP-NEXT:    v_cvt_f32_ubyte0_e32 v5, 0
 ; CGP-NEXT:    s_mov_b32 s8, 0xffed2705
-; CGP-NEXT:    v_mov_b32_e32 v6, v5
-; CGP-NEXT:    v_mac_f32_e32 v5, 0x4f800000, v4
-; CGP-NEXT:    v_mac_f32_e32 v6, 0x4f800000, v4
-; CGP-NEXT:    v_rcp_iflag_f32_e32 v4, v5
+; CGP-NEXT:    s_mov_b32 s10, 0x12d8fb
+; CGP-NEXT:    v_mov_b32_e32 v6, v4
+; CGP-NEXT:    v_mac_f32_e32 v4, 0x4f800000, v5
+; CGP-NEXT:    v_mac_f32_e32 v6, 0x4f800000, v5
+; CGP-NEXT:    v_rcp_iflag_f32_e32 v4, v4
 ; CGP-NEXT:    v_rcp_iflag_f32_e32 v5, v6
 ; CGP-NEXT:    v_mul_f32_e32 v4, 0x5f7ffffc, v4
 ; CGP-NEXT:    v_mul_f32_e32 v5, 0x5f7ffffc, v5

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll
index 348f38ef250e..e79c300a56b8 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll
@@ -949,22 +949,22 @@ define i64 @v_urem_i64_pow2k_denom(i64 %num) {
 ; CHECK-LABEL: v_urem_i64_pow2k_denom:
 ; CHECK:       ; %bb.0:
 ; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CHECK-NEXT:    s_movk_i32 s6, 0x1000
-; CHECK-NEXT:    v_cvt_f32_ubyte0_e32 v2, 0
-; CHECK-NEXT:    s_mov_b32 s7, 0xfffff000
-; CHECK-NEXT:    v_cvt_f32_u32_e32 v3, s6
-; CHECK-NEXT:    v_mac_f32_e32 v3, 0x4f800000, v2
-; CHECK-NEXT:    v_rcp_iflag_f32_e32 v2, v3
+; CHECK-NEXT:    v_cvt_f32_u32_e32 v2, 0x1000
+; CHECK-NEXT:    v_cvt_f32_ubyte0_e32 v3, 0
+; CHECK-NEXT:    s_movk_i32 s6, 0xf000
+; CHECK-NEXT:    s_movk_i32 s7, 0x1000
+; CHECK-NEXT:    v_mac_f32_e32 v2, 0x4f800000, v3
+; CHECK-NEXT:    v_rcp_iflag_f32_e32 v2, v2
 ; CHECK-NEXT:    v_mul_f32_e32 v2, 0x5f7ffffc, v2
 ; CHECK-NEXT:    v_mul_f32_e32 v3, 0x2f800000, v2
 ; CHECK-NEXT:    v_trunc_f32_e32 v3, v3
 ; CHECK-NEXT:    v_mac_f32_e32 v2, 0xcf800000, v3
 ; CHECK-NEXT:    v_cvt_u32_f32_e32 v3, v3
 ; CHECK-NEXT:    v_cvt_u32_f32_e32 v2, v2
-; CHECK-NEXT:    v_mul_lo_u32 v4, s7, v3
-; CHECK-NEXT:    v_mul_lo_u32 v5, s7, v2
+; CHECK-NEXT:    v_mul_lo_u32 v4, s6, v3
+; CHECK-NEXT:    v_mul_lo_u32 v5, s6, v2
 ; CHECK-NEXT:    v_mul_lo_u32 v6, -1, v2
-; CHECK-NEXT:    v_mul_hi_u32 v7, s7, v2
+; CHECK-NEXT:    v_mul_hi_u32 v7, s6, v2
 ; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v6, v4
 ; CHECK-NEXT:    v_mul_lo_u32 v6, v3, v5
 ; CHECK-NEXT:    v_mul_hi_u32 v8, v2, v5
@@ -991,10 +991,10 @@ define i64 @v_urem_i64_pow2k_denom(i64 %num) {
 ; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v5
 ; CHECK-NEXT:    v_addc_u32_e64 v5, s[4:5], v3, v4, vcc
 ; CHECK-NEXT:    v_add_i32_e64 v3, s[4:5], v3, v4
-; CHECK-NEXT:    v_mul_lo_u32 v4, s7, v2
+; CHECK-NEXT:    v_mul_lo_u32 v4, s6, v2
 ; CHECK-NEXT:    v_mul_lo_u32 v6, -1, v2
-; CHECK-NEXT:    v_mul_hi_u32 v7, s7, v2
-; CHECK-NEXT:    v_mul_lo_u32 v8, s7, v5
+; CHECK-NEXT:    v_mul_hi_u32 v7, s6, v2
+; CHECK-NEXT:    v_mul_lo_u32 v8, s6, v5
 ; CHECK-NEXT:    v_mul_lo_u32 v9, v5, v4
 ; CHECK-NEXT:    v_mul_hi_u32 v10, v2, v4
 ; CHECK-NEXT:    v_mul_hi_u32 v4, v5, v4
@@ -1041,30 +1041,30 @@ define i64 @v_urem_i64_pow2k_denom(i64 %num) {
 ; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v4
 ; CHECK-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
 ; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v5, v4
-; CHECK-NEXT:    v_mul_lo_u32 v5, s6, v2
+; CHECK-NEXT:    v_mul_lo_u32 v5, s7, v2
 ; CHECK-NEXT:    v_mul_lo_u32 v6, 0, v2
-; CHECK-NEXT:    v_mul_hi_u32 v2, s6, v2
+; CHECK-NEXT:    v_mul_hi_u32 v2, s7, v2
 ; CHECK-NEXT:    v_add_i32_e32 v3, vcc, v3, v4
-; CHECK-NEXT:    v_mul_lo_u32 v3, s6, v3
+; CHECK-NEXT:    v_mul_lo_u32 v3, s7, v3
 ; CHECK-NEXT:    v_add_i32_e32 v3, vcc, v6, v3
 ; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v3, v2
 ; CHECK-NEXT:    v_sub_i32_e32 v0, vcc, v0, v5
 ; CHECK-NEXT:    v_subb_u32_e64 v3, s[4:5], v1, v2, vcc
 ; CHECK-NEXT:    v_sub_i32_e64 v1, s[4:5], v1, v2
-; CHECK-NEXT:    v_cmp_le_u32_e64 s[4:5], s6, v0
+; CHECK-NEXT:    v_cmp_le_u32_e64 s[4:5], s7, v0
 ; CHECK-NEXT:    v_cndmask_b32_e64 v2, 0, -1, s[4:5]
 ; CHECK-NEXT:    v_cmp_le_u32_e64 s[4:5], 0, v3
 ; CHECK-NEXT:    v_cndmask_b32_e64 v4, 0, -1, s[4:5]
 ; CHECK-NEXT:    v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
 ; CHECK-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v3
 ; CHECK-NEXT:    v_cndmask_b32_e32 v2, v4, v2, vcc
-; CHECK-NEXT:    v_subrev_i32_e32 v4, vcc, s6, v0
+; CHECK-NEXT:    v_subrev_i32_e32 v4, vcc, s7, v0
 ; CHECK-NEXT:    v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
-; CHECK-NEXT:    v_cmp_le_u32_e32 vcc, s6, v4
+; CHECK-NEXT:    v_cmp_le_u32_e32 vcc, s7, v4
 ; CHECK-NEXT:    v_cndmask_b32_e64 v5, 0, -1, vcc
 ; CHECK-NEXT:    v_cmp_le_u32_e32 vcc, 0, v1
 ; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, -1, vcc
-; CHECK-NEXT:    v_subrev_i32_e32 v7, vcc, s6, v4
+; CHECK-NEXT:    v_subrev_i32_e32 v7, vcc, s7, v4
 ; CHECK-NEXT:    v_subbrev_u32_e32 v8, vcc, 0, v1, vcc
 ; CHECK-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v1
 ; CHECK-NEXT:    v_cndmask_b32_e32 v5, v6, v5, vcc
@@ -1344,14 +1344,14 @@ define <2 x i64> @v_urem_v2i64_pow2k_denom(<2 x i64> %num) {
 ; CGP-LABEL: v_urem_v2i64_pow2k_denom:
 ; CGP:       ; %bb.0:
 ; CGP-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CGP-NEXT:    v_cvt_f32_u32_e32 v4, 0x1000
+; CGP-NEXT:    v_cvt_f32_ubyte0_e32 v5, 0
+; CGP-NEXT:    s_movk_i32 s8, 0xf000
 ; CGP-NEXT:    s_movk_i32 s10, 0x1000
-; CGP-NEXT:    v_cvt_f32_ubyte0_e32 v4, 0
-; CGP-NEXT:    v_cvt_f32_u32_e32 v5, s10
-; CGP-NEXT:    s_mov_b32 s8, 0xfffff000
-; CGP-NEXT:    v_mov_b32_e32 v6, v5
-; CGP-NEXT:    v_mac_f32_e32 v5, 0x4f800000, v4
-; CGP-NEXT:    v_mac_f32_e32 v6, 0x4f800000, v4
-; CGP-NEXT:    v_rcp_iflag_f32_e32 v4, v5
+; CGP-NEXT:    v_mov_b32_e32 v6, v4
+; CGP-NEXT:    v_mac_f32_e32 v4, 0x4f800000, v5
+; CGP-NEXT:    v_mac_f32_e32 v6, 0x4f800000, v5
+; CGP-NEXT:    v_rcp_iflag_f32_e32 v4, v4
 ; CGP-NEXT:    v_rcp_iflag_f32_e32 v5, v6
 ; CGP-NEXT:    v_mul_f32_e32 v4, 0x5f7ffffc, v4
 ; CGP-NEXT:    v_mul_f32_e32 v5, 0x5f7ffffc, v5
@@ -1600,22 +1600,22 @@ define i64 @v_urem_i64_oddk_denom(i64 %num) {
 ; CHECK-LABEL: v_urem_i64_oddk_denom:
 ; CHECK:       ; %bb.0:
 ; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CHECK-NEXT:    s_mov_b32 s6, 0x12d8fb
-; CHECK-NEXT:    v_cvt_f32_ubyte0_e32 v2, 0
-; CHECK-NEXT:    s_mov_b32 s7, 0xffed2705
-; CHECK-NEXT:    v_cvt_f32_u32_e32 v3, s6
-; CHECK-NEXT:    v_mac_f32_e32 v3, 0x4f800000, v2
-; CHECK-NEXT:    v_rcp_iflag_f32_e32 v2, v3
+; CHECK-NEXT:    v_cvt_f32_u32_e32 v2, 0x12d8fb
+; CHECK-NEXT:    v_cvt_f32_ubyte0_e32 v3, 0
+; CHECK-NEXT:    s_mov_b32 s6, 0xffed2705
+; CHECK-NEXT:    s_mov_b32 s7, 0x12d8fb
+; CHECK-NEXT:    v_mac_f32_e32 v2, 0x4f800000, v3
+; CHECK-NEXT:    v_rcp_iflag_f32_e32 v2, v2
 ; CHECK-NEXT:    v_mul_f32_e32 v2, 0x5f7ffffc, v2
 ; CHECK-NEXT:    v_mul_f32_e32 v3, 0x2f800000, v2
 ; CHECK-NEXT:    v_trunc_f32_e32 v3, v3
 ; CHECK-NEXT:    v_mac_f32_e32 v2, 0xcf800000, v3
 ; CHECK-NEXT:    v_cvt_u32_f32_e32 v3, v3
 ; CHECK-NEXT:    v_cvt_u32_f32_e32 v2, v2
-; CHECK-NEXT:    v_mul_lo_u32 v4, s7, v3
-; CHECK-NEXT:    v_mul_lo_u32 v5, s7, v2
+; CHECK-NEXT:    v_mul_lo_u32 v4, s6, v3
+; CHECK-NEXT:    v_mul_lo_u32 v5, s6, v2
 ; CHECK-NEXT:    v_mul_lo_u32 v6, -1, v2
-; CHECK-NEXT:    v_mul_hi_u32 v7, s7, v2
+; CHECK-NEXT:    v_mul_hi_u32 v7, s6, v2
 ; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v6, v4
 ; CHECK-NEXT:    v_mul_lo_u32 v6, v3, v5
 ; CHECK-NEXT:    v_mul_hi_u32 v8, v2, v5
@@ -1642,10 +1642,10 @@ define i64 @v_urem_i64_oddk_denom(i64 %num) {
 ; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v5
 ; CHECK-NEXT:    v_addc_u32_e64 v5, s[4:5], v3, v4, vcc
 ; CHECK-NEXT:    v_add_i32_e64 v3, s[4:5], v3, v4
-; CHECK-NEXT:    v_mul_lo_u32 v4, s7, v2
+; CHECK-NEXT:    v_mul_lo_u32 v4, s6, v2
 ; CHECK-NEXT:    v_mul_lo_u32 v6, -1, v2
-; CHECK-NEXT:    v_mul_hi_u32 v7, s7, v2
-; CHECK-NEXT:    v_mul_lo_u32 v8, s7, v5
+; CHECK-NEXT:    v_mul_hi_u32 v7, s6, v2
+; CHECK-NEXT:    v_mul_lo_u32 v8, s6, v5
 ; CHECK-NEXT:    v_mul_lo_u32 v9, v5, v4
 ; CHECK-NEXT:    v_mul_hi_u32 v10, v2, v4
 ; CHECK-NEXT:    v_mul_hi_u32 v4, v5, v4
@@ -1692,30 +1692,30 @@ define i64 @v_urem_i64_oddk_denom(i64 %num) {
 ; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v4
 ; CHECK-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
 ; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v5, v4
-; CHECK-NEXT:    v_mul_lo_u32 v5, s6, v2
+; CHECK-NEXT:    v_mul_lo_u32 v5, s7, v2
 ; CHECK-NEXT:    v_mul_lo_u32 v6, 0, v2
-; CHECK-NEXT:    v_mul_hi_u32 v2, s6, v2
+; CHECK-NEXT:    v_mul_hi_u32 v2, s7, v2
 ; CHECK-NEXT:    v_add_i32_e32 v3, vcc, v3, v4
-; CHECK-NEXT:    v_mul_lo_u32 v3, s6, v3
+; CHECK-NEXT:    v_mul_lo_u32 v3, s7, v3
 ; CHECK-NEXT:    v_add_i32_e32 v3, vcc, v6, v3
 ; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v3, v2
 ; CHECK-NEXT:    v_sub_i32_e32 v0, vcc, v0, v5
 ; CHECK-NEXT:    v_subb_u32_e64 v3, s[4:5], v1, v2, vcc
 ; CHECK-NEXT:    v_sub_i32_e64 v1, s[4:5], v1, v2
-; CHECK-NEXT:    v_cmp_le_u32_e64 s[4:5], s6, v0
+; CHECK-NEXT:    v_cmp_le_u32_e64 s[4:5], s7, v0
 ; CHECK-NEXT:    v_cndmask_b32_e64 v2, 0, -1, s[4:5]
 ; CHECK-NEXT:    v_cmp_le_u32_e64 s[4:5], 0, v3
 ; CHECK-NEXT:    v_cndmask_b32_e64 v4, 0, -1, s[4:5]
 ; CHECK-NEXT:    v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
 ; CHECK-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v3
 ; CHECK-NEXT:    v_cndmask_b32_e32 v2, v4, v2, vcc
-; CHECK-NEXT:    v_subrev_i32_e32 v4, vcc, s6, v0
+; CHECK-NEXT:    v_subrev_i32_e32 v4, vcc, s7, v0
 ; CHECK-NEXT:    v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
-; CHECK-NEXT:    v_cmp_le_u32_e32 vcc, s6, v4
+; CHECK-NEXT:    v_cmp_le_u32_e32 vcc, s7, v4
 ; CHECK-NEXT:    v_cndmask_b32_e64 v5, 0, -1, vcc
 ; CHECK-NEXT:    v_cmp_le_u32_e32 vcc, 0, v1
 ; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, -1, vcc
-; CHECK-NEXT:    v_subrev_i32_e32 v7, vcc, s6, v4
+; CHECK-NEXT:    v_subrev_i32_e32 v7, vcc, s7, v4
 ; CHECK-NEXT:    v_subbrev_u32_e32 v8, vcc, 0, v1, vcc
 ; CHECK-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v1
 ; CHECK-NEXT:    v_cndmask_b32_e32 v5, v6, v5, vcc
@@ -1995,14 +1995,14 @@ define <2 x i64> @v_urem_v2i64_oddk_denom(<2 x i64> %num) {
 ; CGP-LABEL: v_urem_v2i64_oddk_denom:
 ; CGP:       ; %bb.0:
 ; CGP-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CGP-NEXT:    s_mov_b32 s10, 0x12d8fb
-; CGP-NEXT:    v_cvt_f32_ubyte0_e32 v4, 0
-; CGP-NEXT:    v_cvt_f32_u32_e32 v5, s10
+; CGP-NEXT:    v_cvt_f32_u32_e32 v4, 0x12d8fb
+; CGP-NEXT:    v_cvt_f32_ubyte0_e32 v5, 0
 ; CGP-NEXT:    s_mov_b32 s8, 0xffed2705
-; CGP-NEXT:    v_mov_b32_e32 v6, v5
-; CGP-NEXT:    v_mac_f32_e32 v5, 0x4f800000, v4
-; CGP-NEXT:    v_mac_f32_e32 v6, 0x4f800000, v4
-; CGP-NEXT:    v_rcp_iflag_f32_e32 v4, v5
+; CGP-NEXT:    s_mov_b32 s10, 0x12d8fb
+; CGP-NEXT:    v_mov_b32_e32 v6, v4
+; CGP-NEXT:    v_mac_f32_e32 v4, 0x4f800000, v5
+; CGP-NEXT:    v_mac_f32_e32 v6, 0x4f800000, v5
+; CGP-NEXT:    v_rcp_iflag_f32_e32 v4, v4
 ; CGP-NEXT:    v_rcp_iflag_f32_e32 v5, v6
 ; CGP-NEXT:    v_mul_f32_e32 v4, 0x5f7ffffc, v4
 ; CGP-NEXT:    v_mul_f32_e32 v5, 0x5f7ffffc, v5


        


More information about the llvm-commits mailing list