[llvm] [AMDGPU] Fold operand after shrinking instruction in SIFoldOperands (PR #68426)

Jay Foad via llvm-commits llvm-commits at lists.llvm.org
Fri Oct 6 08:59:00 PDT 2023


https://github.com/jayfoad created https://github.com/llvm/llvm-project/pull/68426

D51345 added support for shrinking instructions in SIFoldOperands to
enable a fold, but did not actually do the fold. SIFoldOperands is run
twice so in many cases the first run did the shrink enabling the second
run to do the fold, but that only works for instructions that are
created early enough. Some shrinkable/foldable instructions are created
after the first run on SIFoldOperands, e.g. in SILoadStoreOptimizer.

This patch improves SIFoldOperands to do the fold immediately after
shrinking an instruction.


>From 89de8694fac710723f442608244f61971f837c72 Mon Sep 17 00:00:00 2001
From: Jay Foad <jay.foad at amd.com>
Date: Fri, 6 Oct 2023 16:20:38 +0100
Subject: [PATCH] [AMDGPU] Fold operand after shrinking instruction in
 SIFoldOperands

D51345 added support for shrinking instructions in SIFoldOperands to
enable a fold, but did not actually do the fold. SIFoldOperands is run
twice so in many cases the first run did the shrink enabling the second
run to do the fold, but that only works for instructions that are
created early enough. Some shrinkable/foldable instructions are created
after the first run on SIFoldOperands, e.g. in SILoadStoreOptimizer.

This patch improves SIFoldOperands to do the fold immediately after
shrinking an instruction.
---
 llvm/lib/Target/AMDGPU/SIFoldOperands.cpp     |   38 +-
 .../CodeGen/AMDGPU/GlobalISel/add.v2i16.ll    |    7 +-
 .../test/CodeGen/AMDGPU/GlobalISel/add_shl.ll |    4 +-
 .../AMDGPU/GlobalISel/extractelement.ll       |    1 +
 .../test/CodeGen/AMDGPU/GlobalISel/saddsat.ll |  286 ++--
 .../CodeGen/AMDGPU/GlobalISel/sdiv.i32.ll     |   78 +-
 .../CodeGen/AMDGPU/GlobalISel/srem.i32.ll     |   50 +-
 .../CodeGen/AMDGPU/GlobalISel/srem.i64.ll     |    8 +-
 .../test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll |  414 +++---
 .../CodeGen/AMDGPU/GlobalISel/urem.i32.ll     |   40 +-
 .../CodeGen/AMDGPU/GlobalISel/urem.i64.ll     | 1030 ++++++-------
 .../CodeGen/AMDGPU/ds-combine-large-stride.ll |   30 +-
 .../CodeGen/AMDGPU/fold-fi-operand-shrink.mir |   24 +-
 ...ld-immediate-operand-shrink-with-carry.mir |   22 +-
 .../AMDGPU/fold-immediate-operand-shrink.mir  |   51 +-
 .../CodeGen/AMDGPU/integer-mad-patterns.ll    |    3 +-
 .../CodeGen/AMDGPU/llvm.is.fpclass.f16.ll     |  126 +-
 .../AMDGPU/promote-constOffset-to-imm.ll      |  106 +-
 llvm/test/CodeGen/AMDGPU/sdiv64.ll            |    9 +-
 .../CodeGen/AMDGPU/shrink-add-sub-constant.ll |  119 +-
 .../CodeGen/AMDGPU/spill-scavenge-offset.ll   | 1298 ++++++++---------
 llvm/test/CodeGen/AMDGPU/srem64.ll            |    9 +-
 llvm/test/CodeGen/AMDGPU/udiv64.ll            |    3 +-
 llvm/test/CodeGen/AMDGPU/urem64.ll            |    3 +-
 24 files changed, 1832 insertions(+), 1927 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
index 1ebfa297f4fc339..a581acd7ea73aaf 100644
--- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -198,9 +198,8 @@ FunctionPass *llvm::createSIFoldOperandsPass() {
 
 bool SIFoldOperands::updateOperand(FoldCandidate &Fold) const {
   MachineInstr *MI = Fold.UseMI;
-  MachineOperand &Old = MI->getOperand(Fold.UseOpNo);
-  assert(Old.isReg());
-
+  MachineOperand *Old = &MI->getOperand(Fold.UseOpNo);
+  assert(Old->isReg());
 
   const uint64_t TSFlags = MI->getDesc().TSFlags;
   if (Fold.isImm()) {
@@ -211,7 +210,7 @@ bool SIFoldOperands::updateOperand(FoldCandidate &Fold) const {
       // Set op_sel/op_sel_hi on this operand or bail out if op_sel is
       // already set.
       unsigned Opcode = MI->getOpcode();
-      int OpNo = MI->getOperandNo(&Old);
+      int OpNo = MI->getOperandNo(Old);
       int ModIdx = -1;
       if (OpNo == AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0))
         ModIdx = AMDGPU::OpName::src0_modifiers;
@@ -236,11 +235,11 @@ bool SIFoldOperands::updateOperand(FoldCandidate &Fold) const {
             if (!(Fold.ImmToFold & 0xffff)) {
               Mod.setImm(Mod.getImm() | SISrcMods::OP_SEL_0);
               Mod.setImm(Mod.getImm() & ~SISrcMods::OP_SEL_1);
-              Old.ChangeToImmediate((Fold.ImmToFold >> 16) & 0xffff);
+              Old->ChangeToImmediate((Fold.ImmToFold >> 16) & 0xffff);
               return true;
             }
             Mod.setImm(Mod.getImm() & ~SISrcMods::OP_SEL_1);
-            Old.ChangeToImmediate(Fold.ImmToFold & 0xffff);
+            Old->ChangeToImmediate(Fold.ImmToFold & 0xffff);
             return true;
           }
           break;
@@ -251,7 +250,9 @@ bool SIFoldOperands::updateOperand(FoldCandidate &Fold) const {
     }
   }
 
-  if ((Fold.isImm() || Fold.isFI() || Fold.isGlobal()) && Fold.needsShrink()) {
+  if (Fold.needsShrink()) {
+    assert((Fold.isImm() || Fold.isFI() || Fold.isGlobal()) && "not handled");
+
     MachineBasicBlock *MBB = MI->getParent();
     auto Liveness = MBB->computeRegisterLiveness(TRI, AMDGPU::VCC, MI, 16);
     if (Liveness != MachineBasicBlock::LQR_Dead) {
@@ -290,37 +291,40 @@ bool SIFoldOperands::updateOperand(FoldCandidate &Fold) const {
 
     if (Fold.Commuted)
       TII->commuteInstruction(*Inst32, false);
-    return true;
-  }
 
-  assert(!Fold.needsShrink() && "not handled");
+    Fold.UseMI = Inst32;
+    Fold.UseOpNo = AMDGPU::getNamedOperandIdx(Fold.UseMI->getOpcode(),
+                                              AMDGPU::OpName::src0);
+    MI = Fold.UseMI;
+    Old = &MI->getOperand(Fold.UseOpNo);
+  }
 
   if (Fold.isImm()) {
-    if (Old.isTied()) {
+    if (Old->isTied()) {
       int NewMFMAOpc = AMDGPU::getMFMAEarlyClobberOp(MI->getOpcode());
       if (NewMFMAOpc == -1)
         return false;
       MI->setDesc(TII->get(NewMFMAOpc));
       MI->untieRegOperand(0);
     }
-    Old.ChangeToImmediate(Fold.ImmToFold);
+    Old->ChangeToImmediate(Fold.ImmToFold);
     return true;
   }
 
   if (Fold.isGlobal()) {
-    Old.ChangeToGA(Fold.OpToFold->getGlobal(), Fold.OpToFold->getOffset(),
-                   Fold.OpToFold->getTargetFlags());
+    Old->ChangeToGA(Fold.OpToFold->getGlobal(), Fold.OpToFold->getOffset(),
+                    Fold.OpToFold->getTargetFlags());
     return true;
   }
 
   if (Fold.isFI()) {
-    Old.ChangeToFrameIndex(Fold.FrameIndexToFold);
+    Old->ChangeToFrameIndex(Fold.FrameIndexToFold);
     return true;
   }
 
   MachineOperand *New = Fold.OpToFold;
-  Old.substVirtReg(New->getReg(), New->getSubReg(), *TRI);
-  Old.setIsUndef(New->isUndef());
+  Old->substVirtReg(New->getReg(), New->getSubReg(), *TRI);
+  Old->setIsUndef(New->isUndef());
   return true;
 }
 
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/add.v2i16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/add.v2i16.ll
index 26d1fbb09210c64..cd4b3150bd193b5 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/add.v2i16.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/add.v2i16.ll
@@ -165,9 +165,8 @@ define <2 x i16> @v_add_v2i16_neg_inline_imm_splat(<2 x i16> %a) {
 ; GFX7-LABEL: v_add_v2i16_neg_inline_imm_splat:
 ; GFX7:       ; %bb.0:
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT:    s_movk_i32 s4, 0xffc0
-; GFX7-NEXT:    v_add_i32_e32 v0, vcc, s4, v0
-; GFX7-NEXT:    v_add_i32_e32 v1, vcc, s4, v1
+; GFX7-NEXT:    v_add_i32_e32 v0, vcc, 0xffffffc0, v0
+; GFX7-NEXT:    v_add_i32_e32 v1, vcc, 0xffffffc0, v1
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX9-LABEL: v_add_v2i16_neg_inline_imm_splat:
@@ -199,6 +198,7 @@ define <2 x i16> @v_add_v2i16_neg_inline_imm_lo(<2 x i16> %a) {
 ; GFX7-LABEL: v_add_v2i16_neg_inline_imm_lo:
 ; GFX7:       ; %bb.0:
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT:    v_mov_b32_e32 v0, 0xffffffc0
 ; GFX7-NEXT:    v_add_i32_e32 v0, vcc, 0xffffffc0, v0
 ; GFX7-NEXT:    v_add_i32_e32 v1, vcc, 4, v1
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
@@ -232,6 +232,7 @@ define <2 x i16> @v_add_v2i16_neg_inline_imm_hi(<2 x i16> %a) {
 ; GFX7-LABEL: v_add_v2i16_neg_inline_imm_hi:
 ; GFX7:       ; %bb.0:
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT:    v_mov_b32_e32 v1, 0xffffffc0
 ; GFX7-NEXT:    v_add_i32_e32 v0, vcc, 4, v0
 ; GFX7-NEXT:    v_add_i32_e32 v1, vcc, 0xffffffc0, v1
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/add_shl.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/add_shl.ll
index a727ed39c79c659..c8c97dd072dc228 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/add_shl.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/add_shl.ll
@@ -101,7 +101,7 @@ define amdgpu_ps float @add_shl_vgpr_const(i32 %a, i32 %b) {
 define amdgpu_ps float @add_shl_vgpr_const_inline_const(i32 %a) {
 ; VI-LABEL: add_shl_vgpr_const_inline_const:
 ; VI:       ; %bb.0:
-; VI-NEXT:    v_lshlrev_b32_e32 v0, 9, v0
+; VI-NEXT:    v_mov_b32_e32 v0, 0x7e800
 ; VI-NEXT:    v_add_u32_e32 v0, vcc, 0x7e800, v0
 ; VI-NEXT:    ; return to shader part epilog
 ;
@@ -124,7 +124,7 @@ define amdgpu_ps float @add_shl_vgpr_const_inline_const(i32 %a) {
 define amdgpu_ps float @add_shl_vgpr_inline_const_x2(i32 %a) {
 ; VI-LABEL: add_shl_vgpr_inline_const_x2:
 ; VI:       ; %bb.0:
-; VI-NEXT:    v_lshlrev_b32_e32 v0, 9, v0
+; VI-NEXT:    v_mov_b32_e32 v0, 0x600
 ; VI-NEXT:    v_add_u32_e32 v0, vcc, 0x600, v0
 ; VI-NEXT:    ; return to shader part epilog
 ;
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll
index 701a733d9e8e957..59314737cf0628b 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll
@@ -4865,6 +4865,7 @@ define i32 @v_extract_v64i32_37(ptr addrspace(1) %ptr) {
 ; MOVREL-LABEL: v_extract_v64i32_37:
 ; MOVREL:       ; %bb.0:
 ; MOVREL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; MOVREL-NEXT:    v_mov_b32_e32 v0, 0x90
 ; MOVREL-NEXT:    v_add_u32_e32 v0, vcc, 0x90, v0
 ; MOVREL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
 ; MOVREL-NEXT:    flat_load_dwordx4 v[4:7], v[0:1]
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll
index cded5c94edf8cc3..3699206273574d8 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll
@@ -231,14 +231,12 @@ define i16 @v_saddsat_v2i8(i16 %lhs.arg, i16 %rhs.arg) {
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX6-NEXT:    v_lshrrev_b32_e32 v2, 8, v0
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v0, 24, v0
-; GFX6-NEXT:    s_brev_b32 s5, 1
 ; GFX6-NEXT:    v_min_i32_e32 v5, 0, v0
 ; GFX6-NEXT:    v_lshrrev_b32_e32 v3, 8, v1
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 24, v1
-; GFX6-NEXT:    s_brev_b32 s4, -2
 ; GFX6-NEXT:    v_max_i32_e32 v4, 0, v0
-; GFX6-NEXT:    v_sub_i32_e32 v5, vcc, s5, v5
-; GFX6-NEXT:    v_sub_i32_e32 v4, vcc, s4, v4
+; GFX6-NEXT:    v_sub_i32_e32 v5, vcc, 0x80000000, v5
+; GFX6-NEXT:    v_sub_i32_e32 v4, vcc, 0x7fffffff, v4
 ; GFX6-NEXT:    v_max_i32_e32 v1, v5, v1
 ; GFX6-NEXT:    v_min_i32_e32 v1, v1, v4
 ; GFX6-NEXT:    v_add_i32_e32 v0, vcc, v0, v1
@@ -246,8 +244,8 @@ define i16 @v_saddsat_v2i8(i16 %lhs.arg, i16 %rhs.arg) {
 ; GFX6-NEXT:    v_min_i32_e32 v4, 0, v1
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v2, 24, v3
 ; GFX6-NEXT:    v_max_i32_e32 v3, 0, v1
-; GFX6-NEXT:    v_sub_i32_e32 v4, vcc, s5, v4
-; GFX6-NEXT:    v_sub_i32_e32 v3, vcc, s4, v3
+; GFX6-NEXT:    v_sub_i32_e32 v4, vcc, 0x80000000, v4
+; GFX6-NEXT:    v_sub_i32_e32 v3, vcc, 0x7fffffff, v3
 ; GFX6-NEXT:    v_max_i32_e32 v2, v4, v2
 ; GFX6-NEXT:    v_min_i32_e32 v2, v2, v3
 ; GFX6-NEXT:    v_add_i32_e32 v1, vcc, v1, v2
@@ -512,15 +510,15 @@ define i32 @v_saddsat_v4i8(i32 %lhs.arg, i32 %rhs.arg) {
 ; GFX6-NEXT:    v_lshrrev_b32_e32 v6, 16, v1
 ; GFX6-NEXT:    v_lshrrev_b32_e32 v7, 24, v1
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 24, v1
-; GFX6-NEXT:    s_brev_b32 s4, -2
 ; GFX6-NEXT:    v_max_i32_e32 v8, 0, v0
 ; GFX6-NEXT:    v_sub_i32_e32 v10, vcc, s5, v10
-; GFX6-NEXT:    v_sub_i32_e32 v8, vcc, s4, v8
+; GFX6-NEXT:    v_sub_i32_e32 v8, vcc, 0x7fffffff, v8
 ; GFX6-NEXT:    v_max_i32_e32 v1, v10, v1
 ; GFX6-NEXT:    v_min_i32_e32 v1, v1, v8
 ; GFX6-NEXT:    v_add_i32_e32 v0, vcc, v0, v1
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 24, v2
 ; GFX6-NEXT:    v_min_i32_e32 v8, 0, v1
+; GFX6-NEXT:    s_brev_b32 s4, -2
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v2, 24, v5
 ; GFX6-NEXT:    v_max_i32_e32 v5, 0, v1
 ; GFX6-NEXT:    v_sub_i32_e32 v8, vcc, s5, v8
@@ -1026,7 +1024,7 @@ define i24 @v_saddsat_i24(i24 %lhs, i24 %rhs) {
 ; GFX8-NEXT:    v_cmp_lt_i32_e64 s[4:5], v3, v0
 ; GFX8-NEXT:    v_bfe_i32 v0, v1, 0, 24
 ; GFX8-NEXT:    v_cmp_gt_i32_e64 s[6:7], 0, v0
-; GFX8-NEXT:    v_ashrrev_i32_e32 v0, 23, v3
+; GFX8-NEXT:    v_mov_b32_e32 v0, 0xff800000
 ; GFX8-NEXT:    v_add_u32_e32 v0, vcc, 0xff800000, v0
 ; GFX8-NEXT:    s_xor_b64 vcc, s[6:7], s[4:5]
 ; GFX8-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
@@ -1265,19 +1263,17 @@ define <2 x i32> @v_saddsat_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
 ; GFX6-LABEL: v_saddsat_v2i32:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX6-NEXT:    s_brev_b32 s5, 1
 ; GFX6-NEXT:    v_min_i32_e32 v5, 0, v0
-; GFX6-NEXT:    s_brev_b32 s4, -2
 ; GFX6-NEXT:    v_max_i32_e32 v4, 0, v0
-; GFX6-NEXT:    v_sub_i32_e32 v5, vcc, s5, v5
-; GFX6-NEXT:    v_sub_i32_e32 v4, vcc, s4, v4
+; GFX6-NEXT:    v_sub_i32_e32 v5, vcc, 0x80000000, v5
+; GFX6-NEXT:    v_sub_i32_e32 v4, vcc, 0x7fffffff, v4
 ; GFX6-NEXT:    v_max_i32_e32 v2, v5, v2
 ; GFX6-NEXT:    v_min_i32_e32 v2, v2, v4
 ; GFX6-NEXT:    v_min_i32_e32 v4, 0, v1
 ; GFX6-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
 ; GFX6-NEXT:    v_max_i32_e32 v2, 0, v1
-; GFX6-NEXT:    v_sub_i32_e32 v4, vcc, s5, v4
-; GFX6-NEXT:    v_sub_i32_e32 v2, vcc, s4, v2
+; GFX6-NEXT:    v_sub_i32_e32 v4, vcc, 0x80000000, v4
+; GFX6-NEXT:    v_sub_i32_e32 v2, vcc, 0x7fffffff, v2
 ; GFX6-NEXT:    v_max_i32_e32 v3, v4, v3
 ; GFX6-NEXT:    v_min_i32_e32 v2, v3, v2
 ; GFX6-NEXT:    v_add_i32_e32 v1, vcc, v1, v2
@@ -1286,19 +1282,17 @@ define <2 x i32> @v_saddsat_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
 ; GFX8-LABEL: v_saddsat_v2i32:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    s_brev_b32 s5, 1
 ; GFX8-NEXT:    v_min_i32_e32 v5, 0, v0
-; GFX8-NEXT:    s_brev_b32 s4, -2
 ; GFX8-NEXT:    v_max_i32_e32 v4, 0, v0
-; GFX8-NEXT:    v_sub_u32_e32 v5, vcc, s5, v5
-; GFX8-NEXT:    v_sub_u32_e32 v4, vcc, s4, v4
+; GFX8-NEXT:    v_sub_u32_e32 v5, vcc, 0x80000000, v5
+; GFX8-NEXT:    v_sub_u32_e32 v4, vcc, 0x7fffffff, v4
 ; GFX8-NEXT:    v_max_i32_e32 v2, v5, v2
 ; GFX8-NEXT:    v_min_i32_e32 v2, v2, v4
 ; GFX8-NEXT:    v_min_i32_e32 v4, 0, v1
 ; GFX8-NEXT:    v_add_u32_e32 v0, vcc, v0, v2
 ; GFX8-NEXT:    v_max_i32_e32 v2, 0, v1
-; GFX8-NEXT:    v_sub_u32_e32 v4, vcc, s5, v4
-; GFX8-NEXT:    v_sub_u32_e32 v2, vcc, s4, v2
+; GFX8-NEXT:    v_sub_u32_e32 v4, vcc, 0x80000000, v4
+; GFX8-NEXT:    v_sub_u32_e32 v2, vcc, 0x7fffffff, v2
 ; GFX8-NEXT:    v_max_i32_e32 v3, v4, v3
 ; GFX8-NEXT:    v_min_i32_e32 v2, v3, v2
 ; GFX8-NEXT:    v_add_u32_e32 v1, vcc, v1, v2
@@ -1383,26 +1377,25 @@ define <3 x i32> @v_saddsat_v3i32(<3 x i32> %lhs, <3 x i32> %rhs) {
 ; GFX6-LABEL: v_saddsat_v3i32:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX6-NEXT:    s_brev_b32 s5, 1
 ; GFX6-NEXT:    v_min_i32_e32 v7, 0, v0
-; GFX6-NEXT:    s_brev_b32 s4, -2
 ; GFX6-NEXT:    v_max_i32_e32 v6, 0, v0
-; GFX6-NEXT:    v_sub_i32_e32 v7, vcc, s5, v7
-; GFX6-NEXT:    v_sub_i32_e32 v6, vcc, s4, v6
+; GFX6-NEXT:    v_sub_i32_e32 v7, vcc, 0x80000000, v7
+; GFX6-NEXT:    v_sub_i32_e32 v6, vcc, 0x7fffffff, v6
 ; GFX6-NEXT:    v_max_i32_e32 v3, v7, v3
 ; GFX6-NEXT:    v_min_i32_e32 v3, v3, v6
 ; GFX6-NEXT:    v_min_i32_e32 v6, 0, v1
+; GFX6-NEXT:    s_brev_b32 s4, -2
 ; GFX6-NEXT:    v_add_i32_e32 v0, vcc, v0, v3
 ; GFX6-NEXT:    v_max_i32_e32 v3, 0, v1
-; GFX6-NEXT:    v_sub_i32_e32 v6, vcc, s5, v6
+; GFX6-NEXT:    v_sub_i32_e32 v6, vcc, 0x80000000, v6
 ; GFX6-NEXT:    v_sub_i32_e32 v3, vcc, s4, v3
 ; GFX6-NEXT:    v_max_i32_e32 v4, v6, v4
 ; GFX6-NEXT:    v_min_i32_e32 v3, v4, v3
 ; GFX6-NEXT:    v_min_i32_e32 v4, 0, v2
 ; GFX6-NEXT:    v_add_i32_e32 v1, vcc, v1, v3
 ; GFX6-NEXT:    v_max_i32_e32 v3, 0, v2
-; GFX6-NEXT:    v_sub_i32_e32 v4, vcc, s5, v4
-; GFX6-NEXT:    v_sub_i32_e32 v3, vcc, s4, v3
+; GFX6-NEXT:    v_sub_i32_e32 v4, vcc, 0x80000000, v4
+; GFX6-NEXT:    v_sub_i32_e32 v3, vcc, 0x7fffffff, v3
 ; GFX6-NEXT:    v_max_i32_e32 v4, v4, v5
 ; GFX6-NEXT:    v_min_i32_e32 v3, v4, v3
 ; GFX6-NEXT:    v_add_i32_e32 v2, vcc, v2, v3
@@ -1411,26 +1404,25 @@ define <3 x i32> @v_saddsat_v3i32(<3 x i32> %lhs, <3 x i32> %rhs) {
 ; GFX8-LABEL: v_saddsat_v3i32:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    s_brev_b32 s5, 1
 ; GFX8-NEXT:    v_min_i32_e32 v7, 0, v0
-; GFX8-NEXT:    s_brev_b32 s4, -2
 ; GFX8-NEXT:    v_max_i32_e32 v6, 0, v0
-; GFX8-NEXT:    v_sub_u32_e32 v7, vcc, s5, v7
-; GFX8-NEXT:    v_sub_u32_e32 v6, vcc, s4, v6
+; GFX8-NEXT:    v_sub_u32_e32 v7, vcc, 0x80000000, v7
+; GFX8-NEXT:    v_sub_u32_e32 v6, vcc, 0x7fffffff, v6
 ; GFX8-NEXT:    v_max_i32_e32 v3, v7, v3
 ; GFX8-NEXT:    v_min_i32_e32 v3, v3, v6
 ; GFX8-NEXT:    v_min_i32_e32 v6, 0, v1
+; GFX8-NEXT:    s_brev_b32 s4, -2
 ; GFX8-NEXT:    v_add_u32_e32 v0, vcc, v0, v3
 ; GFX8-NEXT:    v_max_i32_e32 v3, 0, v1
-; GFX8-NEXT:    v_sub_u32_e32 v6, vcc, s5, v6
+; GFX8-NEXT:    v_sub_u32_e32 v6, vcc, 0x80000000, v6
 ; GFX8-NEXT:    v_sub_u32_e32 v3, vcc, s4, v3
 ; GFX8-NEXT:    v_max_i32_e32 v4, v6, v4
 ; GFX8-NEXT:    v_min_i32_e32 v3, v4, v3
 ; GFX8-NEXT:    v_min_i32_e32 v4, 0, v2
 ; GFX8-NEXT:    v_add_u32_e32 v1, vcc, v1, v3
 ; GFX8-NEXT:    v_max_i32_e32 v3, 0, v2
-; GFX8-NEXT:    v_sub_u32_e32 v4, vcc, s5, v4
-; GFX8-NEXT:    v_sub_u32_e32 v3, vcc, s4, v3
+; GFX8-NEXT:    v_sub_u32_e32 v4, vcc, 0x80000000, v4
+; GFX8-NEXT:    v_sub_u32_e32 v3, vcc, 0x7fffffff, v3
 ; GFX8-NEXT:    v_max_i32_e32 v4, v4, v5
 ; GFX8-NEXT:    v_min_i32_e32 v3, v4, v3
 ; GFX8-NEXT:    v_add_u32_e32 v2, vcc, v2, v3
@@ -1536,26 +1528,24 @@ define <4 x i32> @v_saddsat_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
 ; GFX6-LABEL: v_saddsat_v4i32:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX6-NEXT:    s_brev_b32 s5, 1
 ; GFX6-NEXT:    v_min_i32_e32 v9, 0, v0
-; GFX6-NEXT:    s_brev_b32 s4, -2
 ; GFX6-NEXT:    v_max_i32_e32 v8, 0, v0
-; GFX6-NEXT:    v_sub_i32_e32 v9, vcc, s5, v9
-; GFX6-NEXT:    v_sub_i32_e32 v8, vcc, s4, v8
+; GFX6-NEXT:    v_sub_i32_e32 v9, vcc, 0x80000000, v9
+; GFX6-NEXT:    v_sub_i32_e32 v8, vcc, 0x7fffffff, v8
 ; GFX6-NEXT:    v_max_i32_e32 v4, v9, v4
 ; GFX6-NEXT:    v_min_i32_e32 v4, v4, v8
 ; GFX6-NEXT:    v_min_i32_e32 v8, 0, v1
 ; GFX6-NEXT:    v_add_i32_e32 v0, vcc, v0, v4
 ; GFX6-NEXT:    v_max_i32_e32 v4, 0, v1
-; GFX6-NEXT:    v_sub_i32_e32 v8, vcc, s5, v8
-; GFX6-NEXT:    v_sub_i32_e32 v4, vcc, s4, v4
+; GFX6-NEXT:    v_sub_i32_e32 v8, vcc, 0x80000000, v8
+; GFX6-NEXT:    v_sub_i32_e32 v4, vcc, 0x7fffffff, v4
 ; GFX6-NEXT:    v_max_i32_e32 v5, v8, v5
 ; GFX6-NEXT:    v_min_i32_e32 v4, v5, v4
 ; GFX6-NEXT:    v_min_i32_e32 v5, 0, v2
 ; GFX6-NEXT:    v_add_i32_e32 v1, vcc, v1, v4
 ; GFX6-NEXT:    v_max_i32_e32 v4, 0, v2
-; GFX6-NEXT:    v_sub_i32_e32 v5, vcc, s5, v5
-; GFX6-NEXT:    v_sub_i32_e32 v4, vcc, s4, v4
+; GFX6-NEXT:    v_sub_i32_e32 v5, vcc, 0x80000000, v5
+; GFX6-NEXT:    v_sub_i32_e32 v4, vcc, 0x7fffffff, v4
 ; GFX6-NEXT:    v_max_i32_e32 v5, v5, v6
 ; GFX6-NEXT:    v_min_i32_e32 v4, v5, v4
 ; GFX6-NEXT:    v_min_i32_e32 v5, 0, v3
@@ -1571,26 +1561,24 @@ define <4 x i32> @v_saddsat_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
 ; GFX8-LABEL: v_saddsat_v4i32:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    s_brev_b32 s5, 1
 ; GFX8-NEXT:    v_min_i32_e32 v9, 0, v0
-; GFX8-NEXT:    s_brev_b32 s4, -2
 ; GFX8-NEXT:    v_max_i32_e32 v8, 0, v0
-; GFX8-NEXT:    v_sub_u32_e32 v9, vcc, s5, v9
-; GFX8-NEXT:    v_sub_u32_e32 v8, vcc, s4, v8
+; GFX8-NEXT:    v_sub_u32_e32 v9, vcc, 0x80000000, v9
+; GFX8-NEXT:    v_sub_u32_e32 v8, vcc, 0x7fffffff, v8
 ; GFX8-NEXT:    v_max_i32_e32 v4, v9, v4
 ; GFX8-NEXT:    v_min_i32_e32 v4, v4, v8
 ; GFX8-NEXT:    v_min_i32_e32 v8, 0, v1
 ; GFX8-NEXT:    v_add_u32_e32 v0, vcc, v0, v4
 ; GFX8-NEXT:    v_max_i32_e32 v4, 0, v1
-; GFX8-NEXT:    v_sub_u32_e32 v8, vcc, s5, v8
-; GFX8-NEXT:    v_sub_u32_e32 v4, vcc, s4, v4
+; GFX8-NEXT:    v_sub_u32_e32 v8, vcc, 0x80000000, v8
+; GFX8-NEXT:    v_sub_u32_e32 v4, vcc, 0x7fffffff, v4
 ; GFX8-NEXT:    v_max_i32_e32 v5, v8, v5
 ; GFX8-NEXT:    v_min_i32_e32 v4, v5, v4
 ; GFX8-NEXT:    v_min_i32_e32 v5, 0, v2
 ; GFX8-NEXT:    v_add_u32_e32 v1, vcc, v1, v4
 ; GFX8-NEXT:    v_max_i32_e32 v4, 0, v2
-; GFX8-NEXT:    v_sub_u32_e32 v5, vcc, s5, v5
-; GFX8-NEXT:    v_sub_u32_e32 v4, vcc, s4, v4
+; GFX8-NEXT:    v_sub_u32_e32 v5, vcc, 0x80000000, v5
+; GFX8-NEXT:    v_sub_u32_e32 v4, vcc, 0x7fffffff, v4
 ; GFX8-NEXT:    v_max_i32_e32 v5, v5, v6
 ; GFX8-NEXT:    v_min_i32_e32 v4, v5, v4
 ; GFX8-NEXT:    v_min_i32_e32 v5, 0, v3
@@ -1724,34 +1712,32 @@ define <5 x i32> @v_saddsat_v5i32(<5 x i32> %lhs, <5 x i32> %rhs) {
 ; GFX6-LABEL: v_saddsat_v5i32:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX6-NEXT:    s_brev_b32 s5, 1
-; GFX6-NEXT:    v_min_i32_e32 v12, 0, v0
-; GFX6-NEXT:    s_brev_b32 s4, -2
+; GFX6-NEXT:    v_min_i32_e32 v11, 0, v0
 ; GFX6-NEXT:    v_max_i32_e32 v10, 0, v0
-; GFX6-NEXT:    v_sub_i32_e32 v12, vcc, s5, v12
-; GFX6-NEXT:    v_sub_i32_e32 v10, vcc, s4, v10
-; GFX6-NEXT:    v_max_i32_e32 v5, v12, v5
+; GFX6-NEXT:    v_sub_i32_e32 v11, vcc, 0x80000000, v11
+; GFX6-NEXT:    v_sub_i32_e32 v10, vcc, 0x7fffffff, v10
+; GFX6-NEXT:    v_max_i32_e32 v5, v11, v5
 ; GFX6-NEXT:    v_min_i32_e32 v5, v5, v10
 ; GFX6-NEXT:    v_min_i32_e32 v10, 0, v1
+; GFX6-NEXT:    s_brev_b32 s4, -2
 ; GFX6-NEXT:    v_add_i32_e32 v0, vcc, v0, v5
 ; GFX6-NEXT:    v_max_i32_e32 v5, 0, v1
-; GFX6-NEXT:    v_sub_i32_e32 v10, vcc, s5, v10
+; GFX6-NEXT:    v_sub_i32_e32 v10, vcc, 0x80000000, v10
 ; GFX6-NEXT:    v_sub_i32_e32 v5, vcc, s4, v5
 ; GFX6-NEXT:    v_max_i32_e32 v6, v10, v6
 ; GFX6-NEXT:    v_min_i32_e32 v5, v6, v5
 ; GFX6-NEXT:    v_min_i32_e32 v6, 0, v2
 ; GFX6-NEXT:    v_add_i32_e32 v1, vcc, v1, v5
 ; GFX6-NEXT:    v_max_i32_e32 v5, 0, v2
-; GFX6-NEXT:    v_sub_i32_e32 v6, vcc, s5, v6
-; GFX6-NEXT:    v_sub_i32_e32 v5, vcc, s4, v5
+; GFX6-NEXT:    v_sub_i32_e32 v6, vcc, 0x80000000, v6
+; GFX6-NEXT:    v_sub_i32_e32 v5, vcc, 0x7fffffff, v5
 ; GFX6-NEXT:    v_max_i32_e32 v6, v6, v7
 ; GFX6-NEXT:    v_min_i32_e32 v5, v6, v5
 ; GFX6-NEXT:    v_min_i32_e32 v6, 0, v3
-; GFX6-NEXT:    v_bfrev_b32_e32 v11, -2
 ; GFX6-NEXT:    v_add_i32_e32 v2, vcc, v2, v5
 ; GFX6-NEXT:    v_max_i32_e32 v5, 0, v3
 ; GFX6-NEXT:    v_sub_i32_e32 v6, vcc, 0x80000000, v6
-; GFX6-NEXT:    v_sub_i32_e32 v5, vcc, v11, v5
+; GFX6-NEXT:    v_sub_i32_e32 v5, vcc, 0x7fffffff, v5
 ; GFX6-NEXT:    v_max_i32_e32 v6, v6, v8
 ; GFX6-NEXT:    v_min_i32_e32 v5, v6, v5
 ; GFX6-NEXT:    v_min_i32_e32 v6, 0, v4
@@ -1767,34 +1753,32 @@ define <5 x i32> @v_saddsat_v5i32(<5 x i32> %lhs, <5 x i32> %rhs) {
 ; GFX8-LABEL: v_saddsat_v5i32:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    s_brev_b32 s5, 1
-; GFX8-NEXT:    v_min_i32_e32 v12, 0, v0
-; GFX8-NEXT:    s_brev_b32 s4, -2
+; GFX8-NEXT:    v_min_i32_e32 v11, 0, v0
 ; GFX8-NEXT:    v_max_i32_e32 v10, 0, v0
-; GFX8-NEXT:    v_sub_u32_e32 v12, vcc, s5, v12
-; GFX8-NEXT:    v_sub_u32_e32 v10, vcc, s4, v10
-; GFX8-NEXT:    v_max_i32_e32 v5, v12, v5
+; GFX8-NEXT:    v_sub_u32_e32 v11, vcc, 0x80000000, v11
+; GFX8-NEXT:    v_sub_u32_e32 v10, vcc, 0x7fffffff, v10
+; GFX8-NEXT:    v_max_i32_e32 v5, v11, v5
 ; GFX8-NEXT:    v_min_i32_e32 v5, v5, v10
 ; GFX8-NEXT:    v_min_i32_e32 v10, 0, v1
+; GFX8-NEXT:    s_brev_b32 s4, -2
 ; GFX8-NEXT:    v_add_u32_e32 v0, vcc, v0, v5
 ; GFX8-NEXT:    v_max_i32_e32 v5, 0, v1
-; GFX8-NEXT:    v_sub_u32_e32 v10, vcc, s5, v10
+; GFX8-NEXT:    v_sub_u32_e32 v10, vcc, 0x80000000, v10
 ; GFX8-NEXT:    v_sub_u32_e32 v5, vcc, s4, v5
 ; GFX8-NEXT:    v_max_i32_e32 v6, v10, v6
 ; GFX8-NEXT:    v_min_i32_e32 v5, v6, v5
 ; GFX8-NEXT:    v_min_i32_e32 v6, 0, v2
 ; GFX8-NEXT:    v_add_u32_e32 v1, vcc, v1, v5
 ; GFX8-NEXT:    v_max_i32_e32 v5, 0, v2
-; GFX8-NEXT:    v_sub_u32_e32 v6, vcc, s5, v6
-; GFX8-NEXT:    v_sub_u32_e32 v5, vcc, s4, v5
+; GFX8-NEXT:    v_sub_u32_e32 v6, vcc, 0x80000000, v6
+; GFX8-NEXT:    v_sub_u32_e32 v5, vcc, 0x7fffffff, v5
 ; GFX8-NEXT:    v_max_i32_e32 v6, v6, v7
 ; GFX8-NEXT:    v_min_i32_e32 v5, v6, v5
 ; GFX8-NEXT:    v_min_i32_e32 v6, 0, v3
-; GFX8-NEXT:    v_bfrev_b32_e32 v11, -2
 ; GFX8-NEXT:    v_add_u32_e32 v2, vcc, v2, v5
 ; GFX8-NEXT:    v_max_i32_e32 v5, 0, v3
 ; GFX8-NEXT:    v_sub_u32_e32 v6, vcc, 0x80000000, v6
-; GFX8-NEXT:    v_sub_u32_e32 v5, vcc, v11, v5
+; GFX8-NEXT:    v_sub_u32_e32 v5, vcc, 0x7fffffff, v5
 ; GFX8-NEXT:    v_max_i32_e32 v6, v6, v8
 ; GFX8-NEXT:    v_min_i32_e32 v5, v6, v5
 ; GFX8-NEXT:    v_min_i32_e32 v6, 0, v4
@@ -2766,13 +2750,11 @@ define <2 x i16> @v_saddsat_v2i16(<2 x i16> %lhs, <2 x i16> %rhs) {
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
-; GFX6-NEXT:    s_brev_b32 s5, 1
 ; GFX6-NEXT:    v_min_i32_e32 v5, 0, v0
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
-; GFX6-NEXT:    s_brev_b32 s4, -2
 ; GFX6-NEXT:    v_max_i32_e32 v4, 0, v0
-; GFX6-NEXT:    v_sub_i32_e32 v5, vcc, s5, v5
-; GFX6-NEXT:    v_sub_i32_e32 v4, vcc, s4, v4
+; GFX6-NEXT:    v_sub_i32_e32 v5, vcc, 0x80000000, v5
+; GFX6-NEXT:    v_sub_i32_e32 v4, vcc, 0x7fffffff, v4
 ; GFX6-NEXT:    v_max_i32_e32 v2, v5, v2
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
 ; GFX6-NEXT:    v_min_i32_e32 v2, v2, v4
@@ -2780,8 +2762,8 @@ define <2 x i16> @v_saddsat_v2i16(<2 x i16> %lhs, <2 x i16> %rhs) {
 ; GFX6-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v2, 16, v3
 ; GFX6-NEXT:    v_max_i32_e32 v3, 0, v1
-; GFX6-NEXT:    v_sub_i32_e32 v4, vcc, s5, v4
-; GFX6-NEXT:    v_sub_i32_e32 v3, vcc, s4, v3
+; GFX6-NEXT:    v_sub_i32_e32 v4, vcc, 0x80000000, v4
+; GFX6-NEXT:    v_sub_i32_e32 v3, vcc, 0x7fffffff, v3
 ; GFX6-NEXT:    v_max_i32_e32 v2, v4, v2
 ; GFX6-NEXT:    v_min_i32_e32 v2, v2, v3
 ; GFX6-NEXT:    v_add_i32_e32 v1, vcc, v1, v2
@@ -2978,13 +2960,11 @@ define amdgpu_ps float @saddsat_v2i16_vs(<2 x i16> %lhs, <2 x i16> inreg %rhs) {
 ; GFX6-LABEL: saddsat_v2i16_vs:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
-; GFX6-NEXT:    s_brev_b32 s3, 1
 ; GFX6-NEXT:    v_min_i32_e32 v3, 0, v0
 ; GFX6-NEXT:    s_lshl_b32 s0, s0, 16
-; GFX6-NEXT:    s_brev_b32 s2, -2
 ; GFX6-NEXT:    v_max_i32_e32 v2, 0, v0
-; GFX6-NEXT:    v_sub_i32_e32 v3, vcc, s3, v3
-; GFX6-NEXT:    v_sub_i32_e32 v2, vcc, s2, v2
+; GFX6-NEXT:    v_sub_i32_e32 v3, vcc, 0x80000000, v3
+; GFX6-NEXT:    v_sub_i32_e32 v2, vcc, 0x7fffffff, v2
 ; GFX6-NEXT:    v_max_i32_e32 v3, s0, v3
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
 ; GFX6-NEXT:    v_min_i32_e32 v2, v3, v2
@@ -2992,8 +2972,8 @@ define amdgpu_ps float @saddsat_v2i16_vs(<2 x i16> %lhs, <2 x i16> inreg %rhs) {
 ; GFX6-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
 ; GFX6-NEXT:    s_lshl_b32 s0, s1, 16
 ; GFX6-NEXT:    v_max_i32_e32 v2, 0, v1
-; GFX6-NEXT:    v_sub_i32_e32 v3, vcc, s3, v3
-; GFX6-NEXT:    v_sub_i32_e32 v2, vcc, s2, v2
+; GFX6-NEXT:    v_sub_i32_e32 v3, vcc, 0x80000000, v3
+; GFX6-NEXT:    v_sub_i32_e32 v2, vcc, 0x7fffffff, v2
 ; GFX6-NEXT:    v_max_i32_e32 v3, s0, v3
 ; GFX6-NEXT:    v_min_i32_e32 v2, v3, v2
 ; GFX6-NEXT:    v_add_i32_e32 v1, vcc, v1, v2
@@ -3059,14 +3039,14 @@ define <2 x float> @v_saddsat_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
 ; GFX6-NEXT:    s_brev_b32 s5, 1
 ; GFX6-NEXT:    v_min_i32_e32 v10, 0, v0
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v4, 16, v4
-; GFX6-NEXT:    s_brev_b32 s4, -2
 ; GFX6-NEXT:    v_max_i32_e32 v8, 0, v0
 ; GFX6-NEXT:    v_sub_i32_e32 v10, vcc, s5, v10
-; GFX6-NEXT:    v_sub_i32_e32 v8, vcc, s4, v8
+; GFX6-NEXT:    v_sub_i32_e32 v8, vcc, 0x7fffffff, v8
 ; GFX6-NEXT:    v_max_i32_e32 v4, v10, v4
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
 ; GFX6-NEXT:    v_min_i32_e32 v4, v4, v8
 ; GFX6-NEXT:    v_min_i32_e32 v8, 0, v1
+; GFX6-NEXT:    s_brev_b32 s4, -2
 ; GFX6-NEXT:    v_add_i32_e32 v0, vcc, v0, v4
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v4, 16, v5
 ; GFX6-NEXT:    v_max_i32_e32 v5, 0, v1
@@ -4135,12 +4115,12 @@ define i48 @v_saddsat_i48(i48 %lhs, i48 %rhs) {
 ; GFX6-NEXT:    v_bfe_i32 v3, v2, 0, 16
 ; GFX6-NEXT:    v_cmp_lt_i64_e64 s[4:5], v[4:5], v[0:1]
 ; GFX6-NEXT:    v_cmp_gt_i64_e64 s[6:7], 0, v[2:3]
-; GFX6-NEXT:    v_ashrrev_i32_e32 v0, 31, v5
-; GFX6-NEXT:    v_add_i32_e32 v2, vcc, 0xffff8000, v0
-; GFX6-NEXT:    v_ashrrev_i32_e32 v1, 15, v5
+; GFX6-NEXT:    v_mov_b32_e32 v1, 0xffff8000
+; GFX6-NEXT:    v_add_i32_e32 v1, vcc, 0xffff8000, v1
+; GFX6-NEXT:    v_ashrrev_i32_e32 v0, 15, v5
 ; GFX6-NEXT:    s_xor_b64 vcc, s[6:7], s[4:5]
-; GFX6-NEXT:    v_cndmask_b32_e32 v0, v4, v1, vcc
-; GFX6-NEXT:    v_cndmask_b32_e32 v1, v6, v2, vcc
+; GFX6-NEXT:    v_cndmask_b32_e32 v0, v4, v0, vcc
+; GFX6-NEXT:    v_cndmask_b32_e32 v1, v6, v1, vcc
 ; GFX6-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-LABEL: v_saddsat_i48:
@@ -4153,12 +4133,12 @@ define i48 @v_saddsat_i48(i48 %lhs, i48 %rhs) {
 ; GFX8-NEXT:    v_bfe_i32 v3, v2, 0, 16
 ; GFX8-NEXT:    v_cmp_lt_i64_e64 s[4:5], v[4:5], v[0:1]
 ; GFX8-NEXT:    v_cmp_gt_i64_e64 s[6:7], 0, v[2:3]
-; GFX8-NEXT:    v_ashrrev_i32_e32 v0, 31, v5
-; GFX8-NEXT:    v_add_u32_e32 v2, vcc, 0xffff8000, v0
-; GFX8-NEXT:    v_ashrrev_i32_e32 v1, 15, v5
+; GFX8-NEXT:    v_mov_b32_e32 v1, 0xffff8000
+; GFX8-NEXT:    v_add_u32_e32 v1, vcc, 0xffff8000, v1
+; GFX8-NEXT:    v_ashrrev_i32_e32 v0, 15, v5
 ; GFX8-NEXT:    s_xor_b64 vcc, s[6:7], s[4:5]
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v4, v1, vcc
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v6, v2, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v4, v0, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v6, v1, vcc
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX9-LABEL: v_saddsat_i48:
@@ -4170,8 +4150,9 @@ define i48 @v_saddsat_i48(i48 %lhs, i48 %rhs) {
 ; GFX9-NEXT:    v_addc_co_u32_e32 v5, vcc, v1, v3, vcc
 ; GFX9-NEXT:    v_cmp_lt_i64_e64 s[4:5], v[4:5], v[0:1]
 ; GFX9-NEXT:    v_cmp_gt_i64_e64 s[6:7], 0, v[2:3]
+; GFX9-NEXT:    v_bfrev_b32_e32 v1, 1
+; GFX9-NEXT:    v_add_co_u32_e32 v1, vcc, 0x80000000, v1
 ; GFX9-NEXT:    v_ashrrev_i32_e32 v0, 31, v5
-; GFX9-NEXT:    v_add_co_u32_e32 v1, vcc, 0x80000000, v0
 ; GFX9-NEXT:    s_xor_b64 vcc, s[6:7], s[4:5]
 ; GFX9-NEXT:    v_cndmask_b32_e32 v0, v4, v0, vcc
 ; GFX9-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc
@@ -4342,12 +4323,12 @@ define amdgpu_ps <2 x float> @saddsat_i48_sv(i48 inreg %lhs, i48 %rhs) {
 ; GFX6-NEXT:    v_bfe_i32 v1, v0, 0, 16
 ; GFX6-NEXT:    v_cmp_gt_i64_e64 s[0:1], s[0:1], v[2:3]
 ; GFX6-NEXT:    v_cmp_gt_i64_e64 s[2:3], 0, v[0:1]
-; GFX6-NEXT:    v_ashrrev_i32_e32 v0, 31, v3
-; GFX6-NEXT:    v_ashrrev_i32_e32 v1, 15, v3
-; GFX6-NEXT:    v_add_i32_e32 v3, vcc, 0xffff8000, v0
+; GFX6-NEXT:    v_mov_b32_e32 v1, 0xffff8000
+; GFX6-NEXT:    v_add_i32_e32 v1, vcc, 0xffff8000, v1
 ; GFX6-NEXT:    s_xor_b64 vcc, s[2:3], s[0:1]
-; GFX6-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
-; GFX6-NEXT:    v_cndmask_b32_e32 v1, v4, v3, vcc
+; GFX6-NEXT:    v_ashrrev_i32_e32 v0, 15, v3
+; GFX6-NEXT:    v_cndmask_b32_e32 v1, v4, v1, vcc
+; GFX6-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
 ; GFX6-NEXT:    v_and_b32_e32 v1, 0xffff, v1
 ; GFX6-NEXT:    ; return to shader part epilog
 ;
@@ -4361,12 +4342,12 @@ define amdgpu_ps <2 x float> @saddsat_i48_sv(i48 inreg %lhs, i48 %rhs) {
 ; GFX8-NEXT:    v_bfe_i32 v1, v0, 0, 16
 ; GFX8-NEXT:    v_cmp_gt_i64_e64 s[0:1], s[0:1], v[2:3]
 ; GFX8-NEXT:    v_cmp_gt_i64_e64 s[2:3], 0, v[0:1]
-; GFX8-NEXT:    v_ashrrev_i32_e32 v0, 31, v3
-; GFX8-NEXT:    v_ashrrev_i32_e32 v1, 15, v3
-; GFX8-NEXT:    v_add_u32_e32 v3, vcc, 0xffff8000, v0
+; GFX8-NEXT:    v_mov_b32_e32 v1, 0xffff8000
+; GFX8-NEXT:    v_add_u32_e32 v1, vcc, 0xffff8000, v1
 ; GFX8-NEXT:    s_xor_b64 vcc, s[2:3], s[0:1]
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v4, v3, vcc
+; GFX8-NEXT:    v_ashrrev_i32_e32 v0, 15, v3
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v4, v1, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
 ; GFX8-NEXT:    v_and_b32_e32 v1, 0xffff, v1
 ; GFX8-NEXT:    ; return to shader part epilog
 ;
@@ -4379,8 +4360,9 @@ define amdgpu_ps <2 x float> @saddsat_i48_sv(i48 inreg %lhs, i48 %rhs) {
 ; GFX9-NEXT:    v_addc_co_u32_e32 v3, vcc, v3, v1, vcc
 ; GFX9-NEXT:    v_cmp_gt_i64_e64 s[0:1], s[0:1], v[2:3]
 ; GFX9-NEXT:    v_cmp_gt_i64_e64 s[2:3], 0, v[0:1]
+; GFX9-NEXT:    v_bfrev_b32_e32 v1, 1
+; GFX9-NEXT:    v_add_co_u32_e32 v1, vcc, 0x80000000, v1
 ; GFX9-NEXT:    v_ashrrev_i32_e32 v0, 31, v3
-; GFX9-NEXT:    v_add_co_u32_e32 v1, vcc, 0x80000000, v0
 ; GFX9-NEXT:    s_xor_b64 vcc, s[2:3], s[0:1]
 ; GFX9-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
 ; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
@@ -4437,12 +4419,12 @@ define amdgpu_ps <2 x float> @saddsat_i48_vs(i48 %lhs, i48 inreg %rhs) {
 ; GFX6-NEXT:    s_bfe_i64 s[0:1], s[0:1], 0x300000
 ; GFX6-NEXT:    v_cmp_lt_i64_e64 s[2:3], v[2:3], v[0:1]
 ; GFX6-NEXT:    v_cmp_lt_i64_e64 s[0:1], s[0:1], 0
-; GFX6-NEXT:    v_ashrrev_i32_e32 v0, 31, v3
-; GFX6-NEXT:    v_ashrrev_i32_e32 v1, 15, v3
-; GFX6-NEXT:    v_add_i32_e32 v3, vcc, 0xffff8000, v0
+; GFX6-NEXT:    v_mov_b32_e32 v1, 0xffff8000
+; GFX6-NEXT:    v_add_i32_e32 v1, vcc, 0xffff8000, v1
 ; GFX6-NEXT:    s_xor_b64 vcc, s[0:1], s[2:3]
-; GFX6-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
-; GFX6-NEXT:    v_cndmask_b32_e32 v1, v4, v3, vcc
+; GFX6-NEXT:    v_ashrrev_i32_e32 v0, 15, v3
+; GFX6-NEXT:    v_cndmask_b32_e32 v1, v4, v1, vcc
+; GFX6-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
 ; GFX6-NEXT:    v_and_b32_e32 v1, 0xffff, v1
 ; GFX6-NEXT:    ; return to shader part epilog
 ;
@@ -4456,12 +4438,12 @@ define amdgpu_ps <2 x float> @saddsat_i48_vs(i48 %lhs, i48 inreg %rhs) {
 ; GFX8-NEXT:    s_bfe_i64 s[0:1], s[0:1], 0x300000
 ; GFX8-NEXT:    v_cmp_lt_i64_e64 s[2:3], v[2:3], v[0:1]
 ; GFX8-NEXT:    v_cmp_lt_i64_e64 s[0:1], s[0:1], 0
-; GFX8-NEXT:    v_ashrrev_i32_e32 v0, 31, v3
-; GFX8-NEXT:    v_ashrrev_i32_e32 v1, 15, v3
-; GFX8-NEXT:    v_add_u32_e32 v3, vcc, 0xffff8000, v0
+; GFX8-NEXT:    v_mov_b32_e32 v1, 0xffff8000
+; GFX8-NEXT:    v_add_u32_e32 v1, vcc, 0xffff8000, v1
 ; GFX8-NEXT:    s_xor_b64 vcc, s[0:1], s[2:3]
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v4, v3, vcc
+; GFX8-NEXT:    v_ashrrev_i32_e32 v0, 15, v3
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v4, v1, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
 ; GFX8-NEXT:    v_and_b32_e32 v1, 0xffff, v1
 ; GFX8-NEXT:    ; return to shader part epilog
 ;
@@ -4474,8 +4456,9 @@ define amdgpu_ps <2 x float> @saddsat_i48_vs(i48 %lhs, i48 inreg %rhs) {
 ; GFX9-NEXT:    v_addc_co_u32_e32 v3, vcc, v1, v3, vcc
 ; GFX9-NEXT:    v_cmp_lt_i64_e64 s[0:1], v[2:3], v[0:1]
 ; GFX9-NEXT:    v_cmp_lt_i64_e64 s[2:3], s[2:3], 0
+; GFX9-NEXT:    v_bfrev_b32_e32 v1, 1
+; GFX9-NEXT:    v_add_co_u32_e32 v1, vcc, 0x80000000, v1
 ; GFX9-NEXT:    v_ashrrev_i32_e32 v0, 31, v3
-; GFX9-NEXT:    v_add_co_u32_e32 v1, vcc, 0x80000000, v0
 ; GFX9-NEXT:    s_xor_b64 vcc, s[2:3], s[0:1]
 ; GFX9-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
 ; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
@@ -4529,8 +4512,9 @@ define i64 @v_saddsat_i64(i64 %lhs, i64 %rhs) {
 ; GFX6-NEXT:    v_addc_u32_e32 v5, vcc, v1, v3, vcc
 ; GFX6-NEXT:    v_cmp_lt_i64_e64 s[4:5], v[4:5], v[0:1]
 ; GFX6-NEXT:    v_cmp_gt_i64_e64 s[6:7], 0, v[2:3]
+; GFX6-NEXT:    v_bfrev_b32_e32 v1, 1
+; GFX6-NEXT:    v_add_i32_e32 v1, vcc, 0x80000000, v1
 ; GFX6-NEXT:    v_ashrrev_i32_e32 v0, 31, v5
-; GFX6-NEXT:    v_add_i32_e32 v1, vcc, 0x80000000, v0
 ; GFX6-NEXT:    s_xor_b64 vcc, s[6:7], s[4:5]
 ; GFX6-NEXT:    v_cndmask_b32_e32 v0, v4, v0, vcc
 ; GFX6-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc
@@ -4543,8 +4527,9 @@ define i64 @v_saddsat_i64(i64 %lhs, i64 %rhs) {
 ; GFX8-NEXT:    v_addc_u32_e32 v5, vcc, v1, v3, vcc
 ; GFX8-NEXT:    v_cmp_lt_i64_e64 s[4:5], v[4:5], v[0:1]
 ; GFX8-NEXT:    v_cmp_gt_i64_e64 s[6:7], 0, v[2:3]
+; GFX8-NEXT:    v_bfrev_b32_e32 v1, 1
+; GFX8-NEXT:    v_add_u32_e32 v1, vcc, 0x80000000, v1
 ; GFX8-NEXT:    v_ashrrev_i32_e32 v0, 31, v5
-; GFX8-NEXT:    v_add_u32_e32 v1, vcc, 0x80000000, v0
 ; GFX8-NEXT:    s_xor_b64 vcc, s[6:7], s[4:5]
 ; GFX8-NEXT:    v_cndmask_b32_e32 v0, v4, v0, vcc
 ; GFX8-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc
@@ -4557,8 +4542,9 @@ define i64 @v_saddsat_i64(i64 %lhs, i64 %rhs) {
 ; GFX9-NEXT:    v_addc_co_u32_e32 v5, vcc, v1, v3, vcc
 ; GFX9-NEXT:    v_cmp_lt_i64_e64 s[4:5], v[4:5], v[0:1]
 ; GFX9-NEXT:    v_cmp_gt_i64_e64 s[6:7], 0, v[2:3]
+; GFX9-NEXT:    v_bfrev_b32_e32 v1, 1
+; GFX9-NEXT:    v_add_co_u32_e32 v1, vcc, 0x80000000, v1
 ; GFX9-NEXT:    v_ashrrev_i32_e32 v0, 31, v5
-; GFX9-NEXT:    v_add_co_u32_e32 v1, vcc, 0x80000000, v0
 ; GFX9-NEXT:    s_xor_b64 vcc, s[6:7], s[4:5]
 ; GFX9-NEXT:    v_cndmask_b32_e32 v0, v4, v0, vcc
 ; GFX9-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc
@@ -4702,8 +4688,9 @@ define amdgpu_ps <2 x float> @saddsat_i64_sv(i64 inreg %lhs, i64 %rhs) {
 ; GFX6-NEXT:    v_addc_u32_e32 v3, vcc, v3, v1, vcc
 ; GFX6-NEXT:    v_cmp_gt_i64_e64 s[0:1], s[0:1], v[2:3]
 ; GFX6-NEXT:    v_cmp_gt_i64_e64 s[2:3], 0, v[0:1]
+; GFX6-NEXT:    v_bfrev_b32_e32 v1, 1
+; GFX6-NEXT:    v_add_i32_e32 v1, vcc, 0x80000000, v1
 ; GFX6-NEXT:    v_ashrrev_i32_e32 v0, 31, v3
-; GFX6-NEXT:    v_add_i32_e32 v1, vcc, 0x80000000, v0
 ; GFX6-NEXT:    s_xor_b64 vcc, s[2:3], s[0:1]
 ; GFX6-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
 ; GFX6-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
@@ -4716,8 +4703,9 @@ define amdgpu_ps <2 x float> @saddsat_i64_sv(i64 inreg %lhs, i64 %rhs) {
 ; GFX8-NEXT:    v_addc_u32_e32 v3, vcc, v3, v1, vcc
 ; GFX8-NEXT:    v_cmp_gt_i64_e64 s[0:1], s[0:1], v[2:3]
 ; GFX8-NEXT:    v_cmp_gt_i64_e64 s[2:3], 0, v[0:1]
+; GFX8-NEXT:    v_bfrev_b32_e32 v1, 1
+; GFX8-NEXT:    v_add_u32_e32 v1, vcc, 0x80000000, v1
 ; GFX8-NEXT:    v_ashrrev_i32_e32 v0, 31, v3
-; GFX8-NEXT:    v_add_u32_e32 v1, vcc, 0x80000000, v0
 ; GFX8-NEXT:    s_xor_b64 vcc, s[2:3], s[0:1]
 ; GFX8-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
 ; GFX8-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
@@ -4730,8 +4718,9 @@ define amdgpu_ps <2 x float> @saddsat_i64_sv(i64 inreg %lhs, i64 %rhs) {
 ; GFX9-NEXT:    v_addc_co_u32_e32 v3, vcc, v3, v1, vcc
 ; GFX9-NEXT:    v_cmp_gt_i64_e64 s[0:1], s[0:1], v[2:3]
 ; GFX9-NEXT:    v_cmp_gt_i64_e64 s[2:3], 0, v[0:1]
+; GFX9-NEXT:    v_bfrev_b32_e32 v1, 1
+; GFX9-NEXT:    v_add_co_u32_e32 v1, vcc, 0x80000000, v1
 ; GFX9-NEXT:    v_ashrrev_i32_e32 v0, 31, v3
-; GFX9-NEXT:    v_add_co_u32_e32 v1, vcc, 0x80000000, v0
 ; GFX9-NEXT:    s_xor_b64 vcc, s[2:3], s[0:1]
 ; GFX9-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
 ; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
@@ -4774,8 +4763,9 @@ define amdgpu_ps <2 x float> @saddsat_i64_vs(i64 %lhs, i64 inreg %rhs) {
 ; GFX6-NEXT:    v_addc_u32_e32 v3, vcc, v1, v3, vcc
 ; GFX6-NEXT:    v_cmp_lt_i64_e64 s[2:3], v[2:3], v[0:1]
 ; GFX6-NEXT:    v_cmp_lt_i64_e64 s[0:1], s[0:1], 0
+; GFX6-NEXT:    v_bfrev_b32_e32 v1, 1
+; GFX6-NEXT:    v_add_i32_e32 v1, vcc, 0x80000000, v1
 ; GFX6-NEXT:    v_ashrrev_i32_e32 v0, 31, v3
-; GFX6-NEXT:    v_add_i32_e32 v1, vcc, 0x80000000, v0
 ; GFX6-NEXT:    s_xor_b64 vcc, s[0:1], s[2:3]
 ; GFX6-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
 ; GFX6-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
@@ -4788,8 +4778,9 @@ define amdgpu_ps <2 x float> @saddsat_i64_vs(i64 %lhs, i64 inreg %rhs) {
 ; GFX8-NEXT:    v_addc_u32_e32 v3, vcc, v1, v3, vcc
 ; GFX8-NEXT:    v_cmp_lt_i64_e64 s[2:3], v[2:3], v[0:1]
 ; GFX8-NEXT:    v_cmp_lt_i64_e64 s[0:1], s[0:1], 0
+; GFX8-NEXT:    v_bfrev_b32_e32 v1, 1
+; GFX8-NEXT:    v_add_u32_e32 v1, vcc, 0x80000000, v1
 ; GFX8-NEXT:    v_ashrrev_i32_e32 v0, 31, v3
-; GFX8-NEXT:    v_add_u32_e32 v1, vcc, 0x80000000, v0
 ; GFX8-NEXT:    s_xor_b64 vcc, s[0:1], s[2:3]
 ; GFX8-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
 ; GFX8-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
@@ -4802,8 +4793,9 @@ define amdgpu_ps <2 x float> @saddsat_i64_vs(i64 %lhs, i64 inreg %rhs) {
 ; GFX9-NEXT:    v_addc_co_u32_e32 v3, vcc, v1, v3, vcc
 ; GFX9-NEXT:    v_cmp_lt_i64_e64 s[2:3], v[2:3], v[0:1]
 ; GFX9-NEXT:    v_cmp_lt_i64_e64 s[0:1], s[0:1], 0
+; GFX9-NEXT:    v_bfrev_b32_e32 v1, 1
+; GFX9-NEXT:    v_add_co_u32_e32 v1, vcc, 0x80000000, v1
 ; GFX9-NEXT:    v_ashrrev_i32_e32 v0, 31, v3
-; GFX9-NEXT:    v_add_co_u32_e32 v1, vcc, 0x80000000, v0
 ; GFX9-NEXT:    s_xor_b64 vcc, s[0:1], s[2:3]
 ; GFX9-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
 ; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
@@ -4846,9 +4838,9 @@ define <2 x i64> @v_saddsat_v2i64(<2 x i64> %lhs, <2 x i64> %rhs) {
 ; GFX6-NEXT:    v_addc_u32_e32 v9, vcc, v1, v5, vcc
 ; GFX6-NEXT:    v_cmp_lt_i64_e64 s[4:5], v[8:9], v[0:1]
 ; GFX6-NEXT:    v_cmp_gt_i64_e64 s[6:7], 0, v[4:5]
+; GFX6-NEXT:    v_bfrev_b32_e32 v10, 1
+; GFX6-NEXT:    v_add_i32_e32 v1, vcc, 0x80000000, v10
 ; GFX6-NEXT:    v_ashrrev_i32_e32 v0, 31, v9
-; GFX6-NEXT:    v_bfrev_b32_e32 v1, 1
-; GFX6-NEXT:    v_add_i32_e32 v1, vcc, v0, v1
 ; GFX6-NEXT:    s_xor_b64 vcc, s[6:7], s[4:5]
 ; GFX6-NEXT:    v_cndmask_b32_e32 v0, v8, v0, vcc
 ; GFX6-NEXT:    v_cndmask_b32_e32 v1, v9, v1, vcc
@@ -4856,8 +4848,8 @@ define <2 x i64> @v_saddsat_v2i64(<2 x i64> %lhs, <2 x i64> %rhs) {
 ; GFX6-NEXT:    v_addc_u32_e32 v5, vcc, v3, v7, vcc
 ; GFX6-NEXT:    v_cmp_lt_i64_e64 s[4:5], v[4:5], v[2:3]
 ; GFX6-NEXT:    v_cmp_gt_i64_e64 s[6:7], 0, v[6:7]
+; GFX6-NEXT:    v_add_i32_e32 v3, vcc, 0x80000000, v10
 ; GFX6-NEXT:    v_ashrrev_i32_e32 v2, 31, v5
-; GFX6-NEXT:    v_add_i32_e32 v3, vcc, 0x80000000, v2
 ; GFX6-NEXT:    s_xor_b64 vcc, s[6:7], s[4:5]
 ; GFX6-NEXT:    v_cndmask_b32_e32 v2, v4, v2, vcc
 ; GFX6-NEXT:    v_cndmask_b32_e32 v3, v5, v3, vcc
@@ -4870,9 +4862,9 @@ define <2 x i64> @v_saddsat_v2i64(<2 x i64> %lhs, <2 x i64> %rhs) {
 ; GFX8-NEXT:    v_addc_u32_e32 v9, vcc, v1, v5, vcc
 ; GFX8-NEXT:    v_cmp_lt_i64_e64 s[4:5], v[8:9], v[0:1]
 ; GFX8-NEXT:    v_cmp_gt_i64_e64 s[6:7], 0, v[4:5]
+; GFX8-NEXT:    v_bfrev_b32_e32 v10, 1
+; GFX8-NEXT:    v_add_u32_e32 v1, vcc, 0x80000000, v10
 ; GFX8-NEXT:    v_ashrrev_i32_e32 v0, 31, v9
-; GFX8-NEXT:    v_bfrev_b32_e32 v1, 1
-; GFX8-NEXT:    v_add_u32_e32 v1, vcc, v0, v1
 ; GFX8-NEXT:    s_xor_b64 vcc, s[6:7], s[4:5]
 ; GFX8-NEXT:    v_cndmask_b32_e32 v0, v8, v0, vcc
 ; GFX8-NEXT:    v_cndmask_b32_e32 v1, v9, v1, vcc
@@ -4880,8 +4872,8 @@ define <2 x i64> @v_saddsat_v2i64(<2 x i64> %lhs, <2 x i64> %rhs) {
 ; GFX8-NEXT:    v_addc_u32_e32 v5, vcc, v3, v7, vcc
 ; GFX8-NEXT:    v_cmp_lt_i64_e64 s[4:5], v[4:5], v[2:3]
 ; GFX8-NEXT:    v_cmp_gt_i64_e64 s[6:7], 0, v[6:7]
+; GFX8-NEXT:    v_add_u32_e32 v3, vcc, 0x80000000, v10
 ; GFX8-NEXT:    v_ashrrev_i32_e32 v2, 31, v5
-; GFX8-NEXT:    v_add_u32_e32 v3, vcc, 0x80000000, v2
 ; GFX8-NEXT:    s_xor_b64 vcc, s[6:7], s[4:5]
 ; GFX8-NEXT:    v_cndmask_b32_e32 v2, v4, v2, vcc
 ; GFX8-NEXT:    v_cndmask_b32_e32 v3, v5, v3, vcc
@@ -4894,9 +4886,9 @@ define <2 x i64> @v_saddsat_v2i64(<2 x i64> %lhs, <2 x i64> %rhs) {
 ; GFX9-NEXT:    v_addc_co_u32_e32 v9, vcc, v1, v5, vcc
 ; GFX9-NEXT:    v_cmp_lt_i64_e64 s[4:5], v[8:9], v[0:1]
 ; GFX9-NEXT:    v_cmp_gt_i64_e64 s[6:7], 0, v[4:5]
+; GFX9-NEXT:    v_bfrev_b32_e32 v10, 1
+; GFX9-NEXT:    v_add_co_u32_e32 v1, vcc, 0x80000000, v10
 ; GFX9-NEXT:    v_ashrrev_i32_e32 v0, 31, v9
-; GFX9-NEXT:    v_bfrev_b32_e32 v1, 1
-; GFX9-NEXT:    v_add_co_u32_e32 v1, vcc, v0, v1
 ; GFX9-NEXT:    s_xor_b64 vcc, s[6:7], s[4:5]
 ; GFX9-NEXT:    v_cndmask_b32_e32 v0, v8, v0, vcc
 ; GFX9-NEXT:    v_cndmask_b32_e32 v1, v9, v1, vcc
@@ -4904,8 +4896,8 @@ define <2 x i64> @v_saddsat_v2i64(<2 x i64> %lhs, <2 x i64> %rhs) {
 ; GFX9-NEXT:    v_addc_co_u32_e32 v5, vcc, v3, v7, vcc
 ; GFX9-NEXT:    v_cmp_lt_i64_e64 s[4:5], v[4:5], v[2:3]
 ; GFX9-NEXT:    v_cmp_gt_i64_e64 s[6:7], 0, v[6:7]
+; GFX9-NEXT:    v_add_co_u32_e32 v3, vcc, 0x80000000, v10
 ; GFX9-NEXT:    v_ashrrev_i32_e32 v2, 31, v5
-; GFX9-NEXT:    v_add_co_u32_e32 v3, vcc, 0x80000000, v2
 ; GFX9-NEXT:    s_xor_b64 vcc, s[6:7], s[4:5]
 ; GFX9-NEXT:    v_cndmask_b32_e32 v2, v4, v2, vcc
 ; GFX9-NEXT:    v_cndmask_b32_e32 v3, v5, v3, vcc
@@ -5677,6 +5669,7 @@ define <2 x i128> @v_saddsat_v2i128(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; GFX6-NEXT:    v_addc_u32_e32 v16, vcc, v2, v10, vcc
 ; GFX6-NEXT:    v_addc_u32_e32 v17, vcc, v3, v11, vcc
 ; GFX6-NEXT:    v_cmp_lt_u64_e32 vcc, v[8:9], v[0:1]
+; GFX6-NEXT:    v_bfrev_b32_e32 v18, 1
 ; GFX6-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
 ; GFX6-NEXT:    v_cmp_lt_i64_e32 vcc, v[16:17], v[2:3]
 ; GFX6-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
@@ -5688,8 +5681,7 @@ define <2 x i128> @v_saddsat_v2i128(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; GFX6-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[10:11]
 ; GFX6-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
 ; GFX6-NEXT:    v_xor_b32_e32 v0, v1, v0
-; GFX6-NEXT:    v_bfrev_b32_e32 v1, 1
-; GFX6-NEXT:    v_add_i32_e32 v3, vcc, v2, v1
+; GFX6-NEXT:    v_add_i32_e32 v3, vcc, v2, v18
 ; GFX6-NEXT:    v_and_b32_e32 v0, 1, v0
 ; GFX6-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
 ; GFX6-NEXT:    v_cndmask_b32_e32 v0, v8, v2, vcc
@@ -5712,7 +5704,7 @@ define <2 x i128> @v_saddsat_v2i128(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; GFX6-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[14:15]
 ; GFX6-NEXT:    v_cndmask_b32_e64 v5, v5, 0, vcc
 ; GFX6-NEXT:    v_xor_b32_e32 v4, v5, v4
-; GFX6-NEXT:    v_add_i32_e32 v7, vcc, 0x80000000, v6
+; GFX6-NEXT:    v_add_i32_e32 v7, vcc, 0x80000000, v18
 ; GFX6-NEXT:    v_and_b32_e32 v4, 1, v4
 ; GFX6-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v4
 ; GFX6-NEXT:    v_cndmask_b32_e32 v4, v8, v6, vcc
@@ -5729,6 +5721,7 @@ define <2 x i128> @v_saddsat_v2i128(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; GFX8-NEXT:    v_addc_u32_e32 v16, vcc, v2, v10, vcc
 ; GFX8-NEXT:    v_addc_u32_e32 v17, vcc, v3, v11, vcc
 ; GFX8-NEXT:    v_cmp_lt_u64_e32 vcc, v[8:9], v[0:1]
+; GFX8-NEXT:    v_bfrev_b32_e32 v18, 1
 ; GFX8-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
 ; GFX8-NEXT:    v_cmp_lt_i64_e32 vcc, v[16:17], v[2:3]
 ; GFX8-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
@@ -5740,8 +5733,7 @@ define <2 x i128> @v_saddsat_v2i128(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; GFX8-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[10:11]
 ; GFX8-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
 ; GFX8-NEXT:    v_xor_b32_e32 v0, v1, v0
-; GFX8-NEXT:    v_bfrev_b32_e32 v1, 1
-; GFX8-NEXT:    v_add_u32_e32 v3, vcc, v2, v1
+; GFX8-NEXT:    v_add_u32_e32 v3, vcc, v2, v18
 ; GFX8-NEXT:    v_and_b32_e32 v0, 1, v0
 ; GFX8-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
 ; GFX8-NEXT:    v_cndmask_b32_e32 v0, v8, v2, vcc
@@ -5764,7 +5756,7 @@ define <2 x i128> @v_saddsat_v2i128(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; GFX8-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[14:15]
 ; GFX8-NEXT:    v_cndmask_b32_e64 v5, v5, 0, vcc
 ; GFX8-NEXT:    v_xor_b32_e32 v4, v5, v4
-; GFX8-NEXT:    v_add_u32_e32 v7, vcc, 0x80000000, v6
+; GFX8-NEXT:    v_add_u32_e32 v7, vcc, 0x80000000, v18
 ; GFX8-NEXT:    v_and_b32_e32 v4, 1, v4
 ; GFX8-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v4
 ; GFX8-NEXT:    v_cndmask_b32_e32 v4, v8, v6, vcc
@@ -5781,6 +5773,7 @@ define <2 x i128> @v_saddsat_v2i128(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; GFX9-NEXT:    v_addc_co_u32_e32 v16, vcc, v2, v10, vcc
 ; GFX9-NEXT:    v_addc_co_u32_e32 v17, vcc, v3, v11, vcc
 ; GFX9-NEXT:    v_cmp_lt_u64_e32 vcc, v[8:9], v[0:1]
+; GFX9-NEXT:    v_bfrev_b32_e32 v18, 1
 ; GFX9-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
 ; GFX9-NEXT:    v_cmp_lt_i64_e32 vcc, v[16:17], v[2:3]
 ; GFX9-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
@@ -5792,8 +5785,7 @@ define <2 x i128> @v_saddsat_v2i128(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; GFX9-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[10:11]
 ; GFX9-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
 ; GFX9-NEXT:    v_xor_b32_e32 v0, v1, v0
-; GFX9-NEXT:    v_bfrev_b32_e32 v1, 1
-; GFX9-NEXT:    v_add_co_u32_e32 v3, vcc, v2, v1
+; GFX9-NEXT:    v_add_co_u32_e32 v3, vcc, v2, v18
 ; GFX9-NEXT:    v_and_b32_e32 v0, 1, v0
 ; GFX9-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
 ; GFX9-NEXT:    v_cndmask_b32_e32 v0, v8, v2, vcc
@@ -5816,7 +5808,7 @@ define <2 x i128> @v_saddsat_v2i128(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; GFX9-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[14:15]
 ; GFX9-NEXT:    v_cndmask_b32_e64 v5, v5, 0, vcc
 ; GFX9-NEXT:    v_xor_b32_e32 v4, v5, v4
-; GFX9-NEXT:    v_add_co_u32_e32 v7, vcc, 0x80000000, v6
+; GFX9-NEXT:    v_add_co_u32_e32 v7, vcc, 0x80000000, v18
 ; GFX9-NEXT:    v_and_b32_e32 v4, 1, v4
 ; GFX9-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v4
 ; GFX9-NEXT:    v_cndmask_b32_e32 v4, v8, v6, vcc
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i32.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i32.ll
index ab000d91a3ef23d..bf658c9e72422f8 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i32.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i32.ll
@@ -295,7 +295,7 @@ define i32 @v_sdiv_i32_pow2k_denom(i32 %num) {
 ; CHECK-NEXT:    v_sub_i32_e32 v0, vcc, v0, v3
 ; CHECK-NEXT:    v_cmp_le_u32_e64 s[4:5], s6, v0
 ; CHECK-NEXT:    v_cndmask_b32_e64 v2, v2, v4, s[4:5]
-; CHECK-NEXT:    v_subrev_i32_e32 v3, vcc, s6, v0
+; CHECK-NEXT:    v_subrev_i32_e32 v3, vcc, 0x1000, v0
 ; CHECK-NEXT:    v_cndmask_b32_e64 v0, v0, v3, s[4:5]
 ; CHECK-NEXT:    v_add_i32_e32 v3, vcc, 1, v2
 ; CHECK-NEXT:    v_cmp_le_u32_e32 vcc, s6, v0
@@ -345,7 +345,7 @@ define <2 x i32> @v_sdiv_v2i32_pow2k_denom(<2 x i32> %num) {
 ; GISEL-NEXT:    v_subrev_i32_e32 v7, vcc, s8, v0
 ; GISEL-NEXT:    v_cmp_le_u32_e64 s[6:7], s8, v1
 ; GISEL-NEXT:    v_cndmask_b32_e64 v4, v4, v10, s[6:7]
-; GISEL-NEXT:    v_subrev_i32_e32 v8, vcc, s8, v1
+; GISEL-NEXT:    v_subrev_i32_e32 v8, vcc, 0x1000, v1
 ; GISEL-NEXT:    v_cndmask_b32_e64 v0, v0, v7, s[4:5]
 ; GISEL-NEXT:    v_add_i32_e32 v7, vcc, 1, v5
 ; GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, v8, s[6:7]
@@ -363,7 +363,7 @@ define <2 x i32> @v_sdiv_v2i32_pow2k_denom(<2 x i32> %num) {
 ; CGP-LABEL: v_sdiv_v2i32_pow2k_denom:
 ; CGP:       ; %bb.0:
 ; CGP-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CGP-NEXT:    s_movk_i32 s8, 0x1000
+; CGP-NEXT:    s_movk_i32 s6, 0x1000
 ; CGP-NEXT:    v_ashrrev_i32_e32 v2, 31, v0
 ; CGP-NEXT:    v_rcp_iflag_f32_e32 v3, 0x45800000
 ; CGP-NEXT:    s_movk_i32 s4, 0xf000
@@ -375,35 +375,35 @@ define <2 x i32> @v_sdiv_v2i32_pow2k_denom(<2 x i32> %num) {
 ; CGP-NEXT:    v_mul_f32_e32 v3, 0x4f7ffffe, v3
 ; CGP-NEXT:    v_add_i32_e32 v1, vcc, v1, v6
 ; CGP-NEXT:    v_mul_f32_e32 v7, 0x4f7ffffe, v7
+; CGP-NEXT:    v_sub_i32_e32 v8, vcc, 0x1000, v5
 ; CGP-NEXT:    v_xor_b32_e32 v0, v0, v2
 ; CGP-NEXT:    v_cvt_u32_f32_e32 v3, v3
 ; CGP-NEXT:    v_xor_b32_e32 v1, v1, v6
 ; CGP-NEXT:    v_cvt_u32_f32_e32 v7, v7
-; CGP-NEXT:    v_mul_lo_u32 v8, v3, s4
+; CGP-NEXT:    v_mul_lo_u32 v9, v3, s4
 ; CGP-NEXT:    v_mul_lo_u32 v4, v7, v4
-; CGP-NEXT:    v_mul_hi_u32 v8, v3, v8
+; CGP-NEXT:    v_mul_hi_u32 v9, v3, v9
 ; CGP-NEXT:    v_mul_hi_u32 v4, v7, v4
-; CGP-NEXT:    v_add_i32_e32 v3, vcc, v3, v8
+; CGP-NEXT:    v_add_i32_e32 v3, vcc, v3, v9
 ; CGP-NEXT:    v_add_i32_e32 v4, vcc, v7, v4
 ; CGP-NEXT:    v_mul_hi_u32 v3, v0, v3
 ; CGP-NEXT:    v_mul_hi_u32 v4, v1, v4
 ; CGP-NEXT:    v_lshlrev_b32_e32 v7, 12, v3
-; CGP-NEXT:    v_add_i32_e32 v8, vcc, 1, v3
-; CGP-NEXT:    v_lshlrev_b32_e32 v9, 12, v4
-; CGP-NEXT:    v_add_i32_e32 v10, vcc, 1, v4
+; CGP-NEXT:    v_add_i32_e32 v9, vcc, 1, v3
+; CGP-NEXT:    v_lshlrev_b32_e32 v10, 12, v4
+; CGP-NEXT:    v_add_i32_e32 v11, vcc, 1, v4
 ; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v0, v7
-; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v1, v9
-; CGP-NEXT:    v_cmp_le_u32_e64 s[4:5], s8, v0
-; CGP-NEXT:    v_cndmask_b32_e64 v3, v3, v8, s[4:5]
-; CGP-NEXT:    v_subrev_i32_e32 v7, vcc, s8, v0
-; CGP-NEXT:    v_cmp_ge_u32_e64 s[6:7], v1, v5
-; CGP-NEXT:    v_cndmask_b32_e64 v4, v4, v10, s[6:7]
-; CGP-NEXT:    v_subrev_i32_e32 v8, vcc, 0x1000, v1
-; CGP-NEXT:    v_cndmask_b32_e64 v0, v0, v7, s[4:5]
+; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v1, v10
+; CGP-NEXT:    v_cmp_le_u32_e32 vcc, s6, v0
+; CGP-NEXT:    v_cndmask_b32_e32 v3, v3, v9, vcc
+; CGP-NEXT:    v_subrev_i32_e64 v7, s[4:5], s6, v0
+; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v1, v5
+; CGP-NEXT:    v_cndmask_b32_e64 v4, v4, v11, s[4:5]
+; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v7, vcc
 ; CGP-NEXT:    v_add_i32_e32 v7, vcc, 1, v3
-; CGP-NEXT:    v_cndmask_b32_e64 v1, v1, v8, s[6:7]
+; CGP-NEXT:    v_cndmask_b32_e64 v1, v1, v8, s[4:5]
 ; CGP-NEXT:    v_add_i32_e32 v8, vcc, 1, v4
-; CGP-NEXT:    v_cmp_le_u32_e32 vcc, s8, v0
+; CGP-NEXT:    v_cmp_le_u32_e32 vcc, s6, v0
 ; CGP-NEXT:    v_cndmask_b32_e32 v0, v3, v7, vcc
 ; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v5
 ; CGP-NEXT:    v_cndmask_b32_e32 v1, v4, v8, vcc
@@ -437,7 +437,7 @@ define i32 @v_sdiv_i32_oddk_denom(i32 %num) {
 ; CHECK-NEXT:    v_sub_i32_e32 v0, vcc, v0, v3
 ; CHECK-NEXT:    v_cmp_le_u32_e64 s[4:5], s6, v0
 ; CHECK-NEXT:    v_cndmask_b32_e64 v2, v2, v4, s[4:5]
-; CHECK-NEXT:    v_subrev_i32_e32 v3, vcc, s6, v0
+; CHECK-NEXT:    v_subrev_i32_e32 v3, vcc, 0x12d8fb, v0
 ; CHECK-NEXT:    v_cndmask_b32_e64 v0, v0, v3, s[4:5]
 ; CHECK-NEXT:    v_add_i32_e32 v3, vcc, 1, v2
 ; CHECK-NEXT:    v_cmp_le_u32_e32 vcc, s6, v0
@@ -486,7 +486,7 @@ define <2 x i32> @v_sdiv_v2i32_oddk_denom(<2 x i32> %num) {
 ; GISEL-NEXT:    v_subrev_i32_e32 v6, vcc, s8, v0
 ; GISEL-NEXT:    v_cmp_le_u32_e64 s[6:7], s8, v1
 ; GISEL-NEXT:    v_cndmask_b32_e64 v3, v3, v9, s[6:7]
-; GISEL-NEXT:    v_subrev_i32_e32 v7, vcc, s8, v1
+; GISEL-NEXT:    v_subrev_i32_e32 v7, vcc, 0x12d8fb, v1
 ; GISEL-NEXT:    v_cndmask_b32_e64 v0, v0, v6, s[4:5]
 ; GISEL-NEXT:    v_add_i32_e32 v6, vcc, 1, v4
 ; GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, v7, s[6:7]
@@ -504,7 +504,7 @@ define <2 x i32> @v_sdiv_v2i32_oddk_denom(<2 x i32> %num) {
 ; CGP-LABEL: v_sdiv_v2i32_oddk_denom:
 ; CGP:       ; %bb.0:
 ; CGP-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CGP-NEXT:    s_mov_b32 s8, 0x12d8fb
+; CGP-NEXT:    s_mov_b32 s6, 0x12d8fb
 ; CGP-NEXT:    v_ashrrev_i32_e32 v2, 31, v0
 ; CGP-NEXT:    v_rcp_iflag_f32_e32 v3, 0x4996c7d8
 ; CGP-NEXT:    s_mov_b32 s4, 0xffed2705
@@ -516,35 +516,35 @@ define <2 x i32> @v_sdiv_v2i32_oddk_denom(<2 x i32> %num) {
 ; CGP-NEXT:    v_mul_f32_e32 v3, 0x4f7ffffe, v3
 ; CGP-NEXT:    v_add_i32_e32 v1, vcc, v1, v6
 ; CGP-NEXT:    v_mul_f32_e32 v7, 0x4f7ffffe, v7
+; CGP-NEXT:    v_sub_i32_e32 v8, vcc, 0x12d8fb, v5
 ; CGP-NEXT:    v_xor_b32_e32 v0, v0, v2
 ; CGP-NEXT:    v_cvt_u32_f32_e32 v3, v3
 ; CGP-NEXT:    v_xor_b32_e32 v1, v1, v6
 ; CGP-NEXT:    v_cvt_u32_f32_e32 v7, v7
-; CGP-NEXT:    v_mul_lo_u32 v8, v3, s4
+; CGP-NEXT:    v_mul_lo_u32 v9, v3, s4
 ; CGP-NEXT:    v_mul_lo_u32 v4, v7, v4
-; CGP-NEXT:    v_mul_hi_u32 v8, v3, v8
+; CGP-NEXT:    v_mul_hi_u32 v9, v3, v9
 ; CGP-NEXT:    v_mul_hi_u32 v4, v7, v4
-; CGP-NEXT:    v_add_i32_e32 v3, vcc, v3, v8
+; CGP-NEXT:    v_add_i32_e32 v3, vcc, v3, v9
 ; CGP-NEXT:    v_add_i32_e32 v4, vcc, v7, v4
 ; CGP-NEXT:    v_mul_hi_u32 v3, v0, v3
 ; CGP-NEXT:    v_mul_hi_u32 v4, v1, v4
-; CGP-NEXT:    v_mul_lo_u32 v7, v3, s8
-; CGP-NEXT:    v_add_i32_e32 v8, vcc, 1, v3
-; CGP-NEXT:    v_mul_lo_u32 v9, v4, v5
-; CGP-NEXT:    v_add_i32_e32 v10, vcc, 1, v4
+; CGP-NEXT:    v_mul_lo_u32 v7, v3, s6
+; CGP-NEXT:    v_add_i32_e32 v9, vcc, 1, v3
+; CGP-NEXT:    v_mul_lo_u32 v10, v4, v5
+; CGP-NEXT:    v_add_i32_e32 v11, vcc, 1, v4
 ; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v0, v7
-; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v1, v9
-; CGP-NEXT:    v_cmp_le_u32_e64 s[4:5], s8, v0
-; CGP-NEXT:    v_cndmask_b32_e64 v3, v3, v8, s[4:5]
-; CGP-NEXT:    v_subrev_i32_e32 v7, vcc, s8, v0
-; CGP-NEXT:    v_cmp_ge_u32_e64 s[6:7], v1, v5
-; CGP-NEXT:    v_cndmask_b32_e64 v4, v4, v10, s[6:7]
-; CGP-NEXT:    v_subrev_i32_e32 v8, vcc, 0x12d8fb, v1
-; CGP-NEXT:    v_cndmask_b32_e64 v0, v0, v7, s[4:5]
+; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v1, v10
+; CGP-NEXT:    v_cmp_le_u32_e32 vcc, s6, v0
+; CGP-NEXT:    v_cndmask_b32_e32 v3, v3, v9, vcc
+; CGP-NEXT:    v_subrev_i32_e64 v7, s[4:5], s6, v0
+; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v1, v5
+; CGP-NEXT:    v_cndmask_b32_e64 v4, v4, v11, s[4:5]
+; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v7, vcc
 ; CGP-NEXT:    v_add_i32_e32 v7, vcc, 1, v3
-; CGP-NEXT:    v_cndmask_b32_e64 v1, v1, v8, s[6:7]
+; CGP-NEXT:    v_cndmask_b32_e64 v1, v1, v8, s[4:5]
 ; CGP-NEXT:    v_add_i32_e32 v8, vcc, 1, v4
-; CGP-NEXT:    v_cmp_le_u32_e32 vcc, s8, v0
+; CGP-NEXT:    v_cmp_le_u32_e32 vcc, s6, v0
 ; CGP-NEXT:    v_cndmask_b32_e32 v0, v3, v7, vcc
 ; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v5
 ; CGP-NEXT:    v_cndmask_b32_e32 v1, v4, v8, vcc
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i32.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i32.ll
index 88ace1c51f5b023..b945bd7ec9cdeb0 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i32.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i32.ll
@@ -268,10 +268,10 @@ define i32 @v_srem_i32_pow2k_denom(i32 %num) {
 ; CHECK-NEXT:    v_mul_hi_u32 v2, v0, v2
 ; CHECK-NEXT:    v_lshlrev_b32_e32 v2, 12, v2
 ; CHECK-NEXT:    v_sub_i32_e32 v0, vcc, v0, v2
-; CHECK-NEXT:    v_subrev_i32_e32 v2, vcc, s4, v0
+; CHECK-NEXT:    v_subrev_i32_e32 v2, vcc, 0x1000, v0
 ; CHECK-NEXT:    v_cmp_le_u32_e32 vcc, s4, v0
 ; CHECK-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
-; CHECK-NEXT:    v_subrev_i32_e32 v2, vcc, s4, v0
+; CHECK-NEXT:    v_subrev_i32_e32 v2, vcc, 0x1000, v0
 ; CHECK-NEXT:    v_cmp_le_u32_e32 vcc, s4, v0
 ; CHECK-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
 ; CHECK-NEXT:    v_xor_b32_e32 v0, v0, v1
@@ -312,13 +312,13 @@ define <2 x i32> @v_srem_v2i32_pow2k_denom(<2 x i32> %num) {
 ; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v4
 ; GISEL-NEXT:    v_sub_i32_e32 v1, vcc, v1, v3
 ; GISEL-NEXT:    v_subrev_i32_e32 v3, vcc, s4, v0
-; GISEL-NEXT:    v_subrev_i32_e32 v4, vcc, s4, v1
+; GISEL-NEXT:    v_subrev_i32_e32 v4, vcc, 0x1000, v1
 ; GISEL-NEXT:    v_cmp_le_u32_e32 vcc, s4, v0
 ; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
 ; GISEL-NEXT:    v_cmp_le_u32_e32 vcc, s4, v1
 ; GISEL-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc
 ; GISEL-NEXT:    v_subrev_i32_e32 v3, vcc, s4, v0
-; GISEL-NEXT:    v_subrev_i32_e32 v4, vcc, s4, v1
+; GISEL-NEXT:    v_subrev_i32_e32 v4, vcc, 0x1000, v1
 ; GISEL-NEXT:    v_cmp_le_u32_e32 vcc, s4, v0
 ; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
 ; GISEL-NEXT:    v_cmp_le_u32_e32 vcc, s4, v1
@@ -344,15 +344,16 @@ define <2 x i32> @v_srem_v2i32_pow2k_denom(<2 x i32> %num) {
 ; CGP-NEXT:    v_mul_f32_e32 v3, 0x4f7ffffe, v3
 ; CGP-NEXT:    v_add_i32_e32 v1, vcc, v1, v6
 ; CGP-NEXT:    v_mul_f32_e32 v7, 0x4f7ffffe, v7
+; CGP-NEXT:    v_sub_i32_e32 v8, vcc, 0x1000, v5
 ; CGP-NEXT:    v_xor_b32_e32 v0, v0, v2
 ; CGP-NEXT:    v_cvt_u32_f32_e32 v3, v3
 ; CGP-NEXT:    v_xor_b32_e32 v1, v1, v6
 ; CGP-NEXT:    v_cvt_u32_f32_e32 v7, v7
-; CGP-NEXT:    v_mul_lo_u32 v8, v3, s5
+; CGP-NEXT:    v_mul_lo_u32 v9, v3, s5
 ; CGP-NEXT:    v_mul_lo_u32 v4, v7, v4
-; CGP-NEXT:    v_mul_hi_u32 v8, v3, v8
+; CGP-NEXT:    v_mul_hi_u32 v9, v3, v9
 ; CGP-NEXT:    v_mul_hi_u32 v4, v7, v4
-; CGP-NEXT:    v_add_i32_e32 v3, vcc, v3, v8
+; CGP-NEXT:    v_add_i32_e32 v3, vcc, v3, v9
 ; CGP-NEXT:    v_add_i32_e32 v4, vcc, v7, v4
 ; CGP-NEXT:    v_mul_hi_u32 v3, v0, v3
 ; CGP-NEXT:    v_mul_hi_u32 v4, v1, v4
@@ -361,19 +362,17 @@ define <2 x i32> @v_srem_v2i32_pow2k_denom(<2 x i32> %num) {
 ; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v0, v3
 ; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v1, v4
 ; CGP-NEXT:    v_subrev_i32_e32 v3, vcc, s4, v0
-; CGP-NEXT:    v_subrev_i32_e32 v4, vcc, 0x1000, v1
+; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v5
+; CGP-NEXT:    v_cndmask_b32_e32 v1, v1, v8, vcc
 ; CGP-NEXT:    v_cmp_le_u32_e32 vcc, s4, v0
 ; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
 ; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v5
-; CGP-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc
+; CGP-NEXT:    v_cndmask_b32_e32 v1, v1, v8, vcc
 ; CGP-NEXT:    v_subrev_i32_e32 v3, vcc, s4, v0
-; CGP-NEXT:    v_subrev_i32_e32 v4, vcc, 0x1000, v1
+; CGP-NEXT:    v_xor_b32_e32 v1, v1, v6
 ; CGP-NEXT:    v_cmp_le_u32_e32 vcc, s4, v0
 ; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
-; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v5
-; CGP-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc
 ; CGP-NEXT:    v_xor_b32_e32 v0, v0, v2
-; CGP-NEXT:    v_xor_b32_e32 v1, v1, v6
 ; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v0, v2
 ; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v1, v6
 ; CGP-NEXT:    s_setpc_b64 s[30:31]
@@ -399,10 +398,10 @@ define i32 @v_srem_i32_oddk_denom(i32 %num) {
 ; CHECK-NEXT:    v_mul_hi_u32 v2, v0, v2
 ; CHECK-NEXT:    v_mul_lo_u32 v2, v2, s4
 ; CHECK-NEXT:    v_sub_i32_e32 v0, vcc, v0, v2
-; CHECK-NEXT:    v_subrev_i32_e32 v2, vcc, s4, v0
+; CHECK-NEXT:    v_subrev_i32_e32 v2, vcc, 0x12d8fb, v0
 ; CHECK-NEXT:    v_cmp_le_u32_e32 vcc, s4, v0
 ; CHECK-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
-; CHECK-NEXT:    v_subrev_i32_e32 v2, vcc, s4, v0
+; CHECK-NEXT:    v_subrev_i32_e32 v2, vcc, 0x12d8fb, v0
 ; CHECK-NEXT:    v_cmp_le_u32_e32 vcc, s4, v0
 ; CHECK-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
 ; CHECK-NEXT:    v_xor_b32_e32 v0, v0, v1
@@ -443,13 +442,13 @@ define <2 x i32> @v_srem_v2i32_oddk_denom(<2 x i32> %num) {
 ; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v4
 ; GISEL-NEXT:    v_sub_i32_e32 v1, vcc, v1, v3
 ; GISEL-NEXT:    v_subrev_i32_e32 v3, vcc, s4, v0
-; GISEL-NEXT:    v_subrev_i32_e32 v4, vcc, s4, v1
+; GISEL-NEXT:    v_subrev_i32_e32 v4, vcc, 0x12d8fb, v1
 ; GISEL-NEXT:    v_cmp_le_u32_e32 vcc, s4, v0
 ; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
 ; GISEL-NEXT:    v_cmp_le_u32_e32 vcc, s4, v1
 ; GISEL-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc
 ; GISEL-NEXT:    v_subrev_i32_e32 v3, vcc, s4, v0
-; GISEL-NEXT:    v_subrev_i32_e32 v4, vcc, s4, v1
+; GISEL-NEXT:    v_subrev_i32_e32 v4, vcc, 0x12d8fb, v1
 ; GISEL-NEXT:    v_cmp_le_u32_e32 vcc, s4, v0
 ; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
 ; GISEL-NEXT:    v_cmp_le_u32_e32 vcc, s4, v1
@@ -475,15 +474,16 @@ define <2 x i32> @v_srem_v2i32_oddk_denom(<2 x i32> %num) {
 ; CGP-NEXT:    v_mul_f32_e32 v3, 0x4f7ffffe, v3
 ; CGP-NEXT:    v_add_i32_e32 v1, vcc, v1, v6
 ; CGP-NEXT:    v_mul_f32_e32 v7, 0x4f7ffffe, v7
+; CGP-NEXT:    v_sub_i32_e32 v8, vcc, 0x12d8fb, v5
 ; CGP-NEXT:    v_xor_b32_e32 v0, v0, v2
 ; CGP-NEXT:    v_cvt_u32_f32_e32 v3, v3
 ; CGP-NEXT:    v_xor_b32_e32 v1, v1, v6
 ; CGP-NEXT:    v_cvt_u32_f32_e32 v7, v7
-; CGP-NEXT:    v_mul_lo_u32 v8, v3, s5
+; CGP-NEXT:    v_mul_lo_u32 v9, v3, s5
 ; CGP-NEXT:    v_mul_lo_u32 v4, v7, v4
-; CGP-NEXT:    v_mul_hi_u32 v8, v3, v8
+; CGP-NEXT:    v_mul_hi_u32 v9, v3, v9
 ; CGP-NEXT:    v_mul_hi_u32 v4, v7, v4
-; CGP-NEXT:    v_add_i32_e32 v3, vcc, v3, v8
+; CGP-NEXT:    v_add_i32_e32 v3, vcc, v3, v9
 ; CGP-NEXT:    v_add_i32_e32 v4, vcc, v7, v4
 ; CGP-NEXT:    v_mul_hi_u32 v3, v0, v3
 ; CGP-NEXT:    v_mul_hi_u32 v4, v1, v4
@@ -492,19 +492,17 @@ define <2 x i32> @v_srem_v2i32_oddk_denom(<2 x i32> %num) {
 ; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v0, v3
 ; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v1, v4
 ; CGP-NEXT:    v_subrev_i32_e32 v3, vcc, s4, v0
-; CGP-NEXT:    v_subrev_i32_e32 v4, vcc, 0x12d8fb, v1
+; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v5
+; CGP-NEXT:    v_cndmask_b32_e32 v1, v1, v8, vcc
 ; CGP-NEXT:    v_cmp_le_u32_e32 vcc, s4, v0
 ; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
 ; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v5
-; CGP-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc
+; CGP-NEXT:    v_cndmask_b32_e32 v1, v1, v8, vcc
 ; CGP-NEXT:    v_subrev_i32_e32 v3, vcc, s4, v0
-; CGP-NEXT:    v_subrev_i32_e32 v4, vcc, 0x12d8fb, v1
+; CGP-NEXT:    v_xor_b32_e32 v1, v1, v6
 ; CGP-NEXT:    v_cmp_le_u32_e32 vcc, s4, v0
 ; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
-; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v5
-; CGP-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc
 ; CGP-NEXT:    v_xor_b32_e32 v0, v0, v2
-; CGP-NEXT:    v_xor_b32_e32 v1, v1, v6
 ; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v0, v2
 ; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v1, v6
 ; CGP-NEXT:    s_setpc_b64 s[30:31]
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll
index d0c55c69f508775..b5aaa244a83827d 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll
@@ -1078,14 +1078,14 @@ define i64 @v_srem_i64_pow2k_denom(i64 %num) {
 ; CHECK-NEXT:    v_cndmask_b32_e64 v3, 0, -1, vcc
 ; CHECK-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v2
 ; CHECK-NEXT:    v_cndmask_b32_e32 v3, -1, v3, vcc
-; CHECK-NEXT:    v_sub_i32_e32 v4, vcc, v0, v6
+; CHECK-NEXT:    v_sub_i32_e32 v4, vcc, 0x1000, v6
 ; CHECK-NEXT:    v_subbrev_u32_e64 v1, s[4:5], 0, v1, s[4:5]
 ; CHECK-NEXT:    v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
 ; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, v4, v6
 ; CHECK-NEXT:    v_cndmask_b32_e64 v5, 0, -1, vcc
 ; CHECK-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v1
 ; CHECK-NEXT:    v_cndmask_b32_e32 v5, -1, v5, vcc
-; CHECK-NEXT:    v_subrev_i32_e32 v6, vcc, 0x1000, v4
+; CHECK-NEXT:    v_sub_i32_e32 v6, vcc, 0x1000, v6
 ; CHECK-NEXT:    v_subbrev_u32_e32 v8, vcc, 0, v1, vcc
 ; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v5
 ; CHECK-NEXT:    v_cndmask_b32_e32 v4, v4, v6, vcc
@@ -1699,14 +1699,14 @@ define i64 @v_srem_i64_oddk_denom(i64 %num) {
 ; CHECK-NEXT:    v_cndmask_b32_e64 v3, 0, -1, vcc
 ; CHECK-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v2
 ; CHECK-NEXT:    v_cndmask_b32_e32 v3, -1, v3, vcc
-; CHECK-NEXT:    v_sub_i32_e32 v4, vcc, v0, v6
+; CHECK-NEXT:    v_sub_i32_e32 v4, vcc, 0x12d8fb, v6
 ; CHECK-NEXT:    v_subbrev_u32_e64 v1, s[4:5], 0, v1, s[4:5]
 ; CHECK-NEXT:    v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
 ; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, v4, v6
 ; CHECK-NEXT:    v_cndmask_b32_e64 v5, 0, -1, vcc
 ; CHECK-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v1
 ; CHECK-NEXT:    v_cndmask_b32_e32 v5, -1, v5, vcc
-; CHECK-NEXT:    v_subrev_i32_e32 v6, vcc, 0x12d8fb, v4
+; CHECK-NEXT:    v_sub_i32_e32 v6, vcc, 0x12d8fb, v6
 ; CHECK-NEXT:    v_subbrev_u32_e32 v8, vcc, 0, v1, vcc
 ; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v5
 ; CHECK-NEXT:    v_cndmask_b32_e32 v4, v4, v6, vcc
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll
index 65455d754be4f53..88b30a043033286 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll
@@ -9,13 +9,13 @@ define i7 @v_ssubsat_i7(i7 %lhs, i7 %rhs) {
 ; GFX6-LABEL: v_ssubsat_i7:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX6-NEXT:    v_lshlrev_b32_e32 v0, 25, v0
-; GFX6-NEXT:    v_max_i32_e32 v2, -1, v0
+; GFX6-NEXT:    v_bfrev_b32_e32 v2, -2
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 25, v1
-; GFX6-NEXT:    v_subrev_i32_e32 v2, vcc, 0x7fffffff, v2
-; GFX6-NEXT:    v_min_i32_e32 v3, -1, v0
-; GFX6-NEXT:    v_subrev_i32_e32 v3, vcc, 0x80000000, v3
+; GFX6-NEXT:    v_sub_i32_e32 v2, vcc, 0x7fffffff, v2
+; GFX6-NEXT:    v_bfrev_b32_e32 v3, 1
+; GFX6-NEXT:    v_sub_i32_e32 v3, vcc, 0x80000000, v3
 ; GFX6-NEXT:    v_max_i32_e32 v1, v2, v1
+; GFX6-NEXT:    v_lshlrev_b32_e32 v0, 25, v0
 ; GFX6-NEXT:    v_min_i32_e32 v1, v1, v3
 ; GFX6-NEXT:    v_sub_i32_e32 v0, vcc, v0, v1
 ; GFX6-NEXT:    v_ashrrev_i32_e32 v0, 25, v0
@@ -119,13 +119,13 @@ define i8 @v_ssubsat_i8(i8 %lhs, i8 %rhs) {
 ; GFX6-LABEL: v_ssubsat_i8:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX6-NEXT:    v_lshlrev_b32_e32 v0, 24, v0
-; GFX6-NEXT:    v_max_i32_e32 v2, -1, v0
+; GFX6-NEXT:    v_bfrev_b32_e32 v2, -2
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 24, v1
-; GFX6-NEXT:    v_subrev_i32_e32 v2, vcc, 0x7fffffff, v2
-; GFX6-NEXT:    v_min_i32_e32 v3, -1, v0
-; GFX6-NEXT:    v_subrev_i32_e32 v3, vcc, 0x80000000, v3
+; GFX6-NEXT:    v_sub_i32_e32 v2, vcc, 0x7fffffff, v2
+; GFX6-NEXT:    v_bfrev_b32_e32 v3, 1
+; GFX6-NEXT:    v_sub_i32_e32 v3, vcc, 0x80000000, v3
 ; GFX6-NEXT:    v_max_i32_e32 v1, v2, v1
+; GFX6-NEXT:    v_lshlrev_b32_e32 v0, 24, v0
 ; GFX6-NEXT:    v_min_i32_e32 v1, v1, v3
 ; GFX6-NEXT:    v_sub_i32_e32 v0, vcc, v0, v1
 ; GFX6-NEXT:    v_ashrrev_i32_e32 v0, 24, v0
@@ -231,23 +231,21 @@ define i16 @v_ssubsat_v2i8(i16 %lhs.arg, i16 %rhs.arg) {
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX6-NEXT:    v_lshrrev_b32_e32 v2, 8, v0
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v0, 24, v0
-; GFX6-NEXT:    s_brev_b32 s4, -2
 ; GFX6-NEXT:    v_max_i32_e32 v4, -1, v0
 ; GFX6-NEXT:    v_lshrrev_b32_e32 v3, 8, v1
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 24, v1
-; GFX6-NEXT:    s_brev_b32 s5, 1
-; GFX6-NEXT:    v_subrev_i32_e32 v4, vcc, s4, v4
+; GFX6-NEXT:    v_subrev_i32_e32 v4, vcc, 0x7fffffff, v4
 ; GFX6-NEXT:    v_min_i32_e32 v5, -1, v0
-; GFX6-NEXT:    v_subrev_i32_e32 v5, vcc, s5, v5
+; GFX6-NEXT:    v_subrev_i32_e32 v5, vcc, 0x80000000, v5
 ; GFX6-NEXT:    v_max_i32_e32 v1, v4, v1
 ; GFX6-NEXT:    v_min_i32_e32 v1, v1, v5
 ; GFX6-NEXT:    v_sub_i32_e32 v0, vcc, v0, v1
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 24, v2
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v2, 24, v3
 ; GFX6-NEXT:    v_max_i32_e32 v3, -1, v1
-; GFX6-NEXT:    v_subrev_i32_e32 v3, vcc, s4, v3
+; GFX6-NEXT:    v_subrev_i32_e32 v3, vcc, 0x7fffffff, v3
 ; GFX6-NEXT:    v_min_i32_e32 v4, -1, v1
-; GFX6-NEXT:    v_subrev_i32_e32 v4, vcc, s5, v4
+; GFX6-NEXT:    v_subrev_i32_e32 v4, vcc, 0x80000000, v4
 ; GFX6-NEXT:    v_max_i32_e32 v2, v3, v2
 ; GFX6-NEXT:    v_min_i32_e32 v2, v2, v4
 ; GFX6-NEXT:    v_sub_i32_e32 v1, vcc, v1, v2
@@ -506,20 +504,20 @@ define i32 @v_ssubsat_v4i8(i32 %lhs.arg, i32 %rhs.arg) {
 ; GFX6-NEXT:    v_lshrrev_b32_e32 v3, 16, v0
 ; GFX6-NEXT:    v_lshrrev_b32_e32 v4, 24, v0
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v0, 24, v0
-; GFX6-NEXT:    s_brev_b32 s4, -2
 ; GFX6-NEXT:    v_max_i32_e32 v8, -1, v0
 ; GFX6-NEXT:    v_lshrrev_b32_e32 v5, 8, v1
 ; GFX6-NEXT:    v_lshrrev_b32_e32 v6, 16, v1
 ; GFX6-NEXT:    v_lshrrev_b32_e32 v7, 24, v1
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 24, v1
 ; GFX6-NEXT:    s_brev_b32 s5, 1
-; GFX6-NEXT:    v_subrev_i32_e32 v8, vcc, s4, v8
+; GFX6-NEXT:    v_subrev_i32_e32 v8, vcc, 0x7fffffff, v8
 ; GFX6-NEXT:    v_min_i32_e32 v10, -1, v0
 ; GFX6-NEXT:    v_subrev_i32_e32 v10, vcc, s5, v10
 ; GFX6-NEXT:    v_max_i32_e32 v1, v8, v1
 ; GFX6-NEXT:    v_min_i32_e32 v1, v1, v10
 ; GFX6-NEXT:    v_sub_i32_e32 v0, vcc, v0, v1
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 24, v2
+; GFX6-NEXT:    s_brev_b32 s4, -2
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v2, 24, v5
 ; GFX6-NEXT:    v_max_i32_e32 v5, -1, v1
 ; GFX6-NEXT:    v_subrev_i32_e32 v5, vcc, s4, v5
@@ -1005,13 +1003,13 @@ define i24 @v_ssubsat_i24(i24 %lhs, i24 %rhs) {
 ; GFX6-LABEL: v_ssubsat_i24:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX6-NEXT:    v_lshlrev_b32_e32 v0, 8, v0
-; GFX6-NEXT:    v_max_i32_e32 v2, -1, v0
+; GFX6-NEXT:    v_bfrev_b32_e32 v2, -2
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 8, v1
-; GFX6-NEXT:    v_subrev_i32_e32 v2, vcc, 0x7fffffff, v2
-; GFX6-NEXT:    v_min_i32_e32 v3, -1, v0
-; GFX6-NEXT:    v_subrev_i32_e32 v3, vcc, 0x80000000, v3
+; GFX6-NEXT:    v_sub_i32_e32 v2, vcc, 0x7fffffff, v2
+; GFX6-NEXT:    v_bfrev_b32_e32 v3, 1
+; GFX6-NEXT:    v_sub_i32_e32 v3, vcc, 0x80000000, v3
 ; GFX6-NEXT:    v_max_i32_e32 v1, v2, v1
+; GFX6-NEXT:    v_lshlrev_b32_e32 v0, 8, v0
 ; GFX6-NEXT:    v_min_i32_e32 v1, v1, v3
 ; GFX6-NEXT:    v_sub_i32_e32 v0, vcc, v0, v1
 ; GFX6-NEXT:    v_ashrrev_i32_e32 v0, 8, v0
@@ -1026,7 +1024,7 @@ define i24 @v_ssubsat_i24(i24 %lhs, i24 %rhs) {
 ; GFX8-NEXT:    v_cmp_lt_i32_e64 s[4:5], v3, v0
 ; GFX8-NEXT:    v_bfe_i32 v0, v1, 0, 24
 ; GFX8-NEXT:    v_cmp_lt_i32_e64 s[6:7], 0, v0
-; GFX8-NEXT:    v_ashrrev_i32_e32 v0, 23, v3
+; GFX8-NEXT:    v_mov_b32_e32 v0, 0xff800000
 ; GFX8-NEXT:    v_add_u32_e32 v0, vcc, 0xff800000, v0
 ; GFX8-NEXT:    s_xor_b64 vcc, s[6:7], s[4:5]
 ; GFX8-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
@@ -1112,10 +1110,10 @@ define i32 @v_ssubsat_i32(i32 %lhs, i32 %rhs) {
 ; GFX6-LABEL: v_ssubsat_i32:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX6-NEXT:    v_max_i32_e32 v2, -1, v0
-; GFX6-NEXT:    v_subrev_i32_e32 v2, vcc, 0x7fffffff, v2
-; GFX6-NEXT:    v_min_i32_e32 v3, -1, v0
-; GFX6-NEXT:    v_subrev_i32_e32 v3, vcc, 0x80000000, v3
+; GFX6-NEXT:    v_bfrev_b32_e32 v2, -2
+; GFX6-NEXT:    v_sub_i32_e32 v2, vcc, 0x7fffffff, v2
+; GFX6-NEXT:    v_bfrev_b32_e32 v3, 1
+; GFX6-NEXT:    v_sub_i32_e32 v3, vcc, 0x80000000, v3
 ; GFX6-NEXT:    v_max_i32_e32 v1, v2, v1
 ; GFX6-NEXT:    v_min_i32_e32 v1, v1, v3
 ; GFX6-NEXT:    v_sub_i32_e32 v0, vcc, v0, v1
@@ -1124,10 +1122,10 @@ define i32 @v_ssubsat_i32(i32 %lhs, i32 %rhs) {
 ; GFX8-LABEL: v_ssubsat_i32:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    v_max_i32_e32 v2, -1, v0
-; GFX8-NEXT:    v_subrev_u32_e32 v2, vcc, 0x7fffffff, v2
-; GFX8-NEXT:    v_min_i32_e32 v3, -1, v0
-; GFX8-NEXT:    v_subrev_u32_e32 v3, vcc, 0x80000000, v3
+; GFX8-NEXT:    v_bfrev_b32_e32 v2, -2
+; GFX8-NEXT:    v_sub_u32_e32 v2, vcc, 0x7fffffff, v2
+; GFX8-NEXT:    v_bfrev_b32_e32 v3, 1
+; GFX8-NEXT:    v_sub_u32_e32 v3, vcc, 0x80000000, v3
 ; GFX8-NEXT:    v_max_i32_e32 v1, v2, v1
 ; GFX8-NEXT:    v_min_i32_e32 v1, v1, v3
 ; GFX8-NEXT:    v_sub_u32_e32 v0, vcc, v0, v1
@@ -1227,10 +1225,10 @@ define amdgpu_ps float @ssubsat_i32_sv(i32 inreg %lhs, i32 %rhs) {
 define amdgpu_ps float @ssubsat_i32_vs(i32 %lhs, i32 inreg %rhs) {
 ; GFX6-LABEL: ssubsat_i32_vs:
 ; GFX6:       ; %bb.0:
-; GFX6-NEXT:    v_max_i32_e32 v1, -1, v0
-; GFX6-NEXT:    v_subrev_i32_e32 v1, vcc, 0x7fffffff, v1
-; GFX6-NEXT:    v_min_i32_e32 v2, -1, v0
-; GFX6-NEXT:    v_subrev_i32_e32 v2, vcc, 0x80000000, v2
+; GFX6-NEXT:    v_bfrev_b32_e32 v1, -2
+; GFX6-NEXT:    v_sub_i32_e32 v1, vcc, 0x7fffffff, v1
+; GFX6-NEXT:    v_bfrev_b32_e32 v2, 1
+; GFX6-NEXT:    v_sub_i32_e32 v2, vcc, 0x80000000, v2
 ; GFX6-NEXT:    v_max_i32_e32 v1, s0, v1
 ; GFX6-NEXT:    v_min_i32_e32 v1, v1, v2
 ; GFX6-NEXT:    v_sub_i32_e32 v0, vcc, v0, v1
@@ -1238,10 +1236,10 @@ define amdgpu_ps float @ssubsat_i32_vs(i32 %lhs, i32 inreg %rhs) {
 ;
 ; GFX8-LABEL: ssubsat_i32_vs:
 ; GFX8:       ; %bb.0:
-; GFX8-NEXT:    v_max_i32_e32 v1, -1, v0
-; GFX8-NEXT:    v_subrev_u32_e32 v1, vcc, 0x7fffffff, v1
-; GFX8-NEXT:    v_min_i32_e32 v2, -1, v0
-; GFX8-NEXT:    v_subrev_u32_e32 v2, vcc, 0x80000000, v2
+; GFX8-NEXT:    v_bfrev_b32_e32 v1, -2
+; GFX8-NEXT:    v_sub_u32_e32 v1, vcc, 0x7fffffff, v1
+; GFX8-NEXT:    v_bfrev_b32_e32 v2, 1
+; GFX8-NEXT:    v_sub_u32_e32 v2, vcc, 0x80000000, v2
 ; GFX8-NEXT:    v_max_i32_e32 v1, s0, v1
 ; GFX8-NEXT:    v_min_i32_e32 v1, v1, v2
 ; GFX8-NEXT:    v_sub_u32_e32 v0, vcc, v0, v1
@@ -1265,19 +1263,17 @@ define <2 x i32> @v_ssubsat_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
 ; GFX6-LABEL: v_ssubsat_v2i32:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX6-NEXT:    s_brev_b32 s4, -2
 ; GFX6-NEXT:    v_max_i32_e32 v4, -1, v0
-; GFX6-NEXT:    s_brev_b32 s5, 1
-; GFX6-NEXT:    v_subrev_i32_e32 v4, vcc, s4, v4
+; GFX6-NEXT:    v_subrev_i32_e32 v4, vcc, 0x7fffffff, v4
 ; GFX6-NEXT:    v_min_i32_e32 v5, -1, v0
-; GFX6-NEXT:    v_subrev_i32_e32 v5, vcc, s5, v5
+; GFX6-NEXT:    v_subrev_i32_e32 v5, vcc, 0x80000000, v5
 ; GFX6-NEXT:    v_max_i32_e32 v2, v4, v2
 ; GFX6-NEXT:    v_min_i32_e32 v2, v2, v5
 ; GFX6-NEXT:    v_sub_i32_e32 v0, vcc, v0, v2
 ; GFX6-NEXT:    v_max_i32_e32 v2, -1, v1
-; GFX6-NEXT:    v_subrev_i32_e32 v2, vcc, s4, v2
+; GFX6-NEXT:    v_subrev_i32_e32 v2, vcc, 0x7fffffff, v2
 ; GFX6-NEXT:    v_min_i32_e32 v4, -1, v1
-; GFX6-NEXT:    v_subrev_i32_e32 v4, vcc, s5, v4
+; GFX6-NEXT:    v_subrev_i32_e32 v4, vcc, 0x80000000, v4
 ; GFX6-NEXT:    v_max_i32_e32 v2, v2, v3
 ; GFX6-NEXT:    v_min_i32_e32 v2, v2, v4
 ; GFX6-NEXT:    v_sub_i32_e32 v1, vcc, v1, v2
@@ -1286,19 +1282,17 @@ define <2 x i32> @v_ssubsat_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
 ; GFX8-LABEL: v_ssubsat_v2i32:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    s_brev_b32 s4, -2
 ; GFX8-NEXT:    v_max_i32_e32 v4, -1, v0
-; GFX8-NEXT:    s_brev_b32 s5, 1
-; GFX8-NEXT:    v_subrev_u32_e32 v4, vcc, s4, v4
+; GFX8-NEXT:    v_subrev_u32_e32 v4, vcc, 0x7fffffff, v4
 ; GFX8-NEXT:    v_min_i32_e32 v5, -1, v0
-; GFX8-NEXT:    v_subrev_u32_e32 v5, vcc, s5, v5
+; GFX8-NEXT:    v_subrev_u32_e32 v5, vcc, 0x80000000, v5
 ; GFX8-NEXT:    v_max_i32_e32 v2, v4, v2
 ; GFX8-NEXT:    v_min_i32_e32 v2, v2, v5
 ; GFX8-NEXT:    v_sub_u32_e32 v0, vcc, v0, v2
 ; GFX8-NEXT:    v_max_i32_e32 v2, -1, v1
-; GFX8-NEXT:    v_subrev_u32_e32 v2, vcc, s4, v2
+; GFX8-NEXT:    v_subrev_u32_e32 v2, vcc, 0x7fffffff, v2
 ; GFX8-NEXT:    v_min_i32_e32 v4, -1, v1
-; GFX8-NEXT:    v_subrev_u32_e32 v4, vcc, s5, v4
+; GFX8-NEXT:    v_subrev_u32_e32 v4, vcc, 0x80000000, v4
 ; GFX8-NEXT:    v_max_i32_e32 v2, v2, v3
 ; GFX8-NEXT:    v_min_i32_e32 v2, v2, v4
 ; GFX8-NEXT:    v_sub_u32_e32 v1, vcc, v1, v2
@@ -1383,26 +1377,25 @@ define <3 x i32> @v_ssubsat_v3i32(<3 x i32> %lhs, <3 x i32> %rhs) {
 ; GFX6-LABEL: v_ssubsat_v3i32:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX6-NEXT:    s_brev_b32 s4, -2
 ; GFX6-NEXT:    v_max_i32_e32 v6, -1, v0
-; GFX6-NEXT:    s_brev_b32 s5, 1
-; GFX6-NEXT:    v_subrev_i32_e32 v6, vcc, s4, v6
+; GFX6-NEXT:    v_subrev_i32_e32 v6, vcc, 0x7fffffff, v6
 ; GFX6-NEXT:    v_min_i32_e32 v7, -1, v0
-; GFX6-NEXT:    v_subrev_i32_e32 v7, vcc, s5, v7
+; GFX6-NEXT:    v_subrev_i32_e32 v7, vcc, 0x80000000, v7
 ; GFX6-NEXT:    v_max_i32_e32 v3, v6, v3
 ; GFX6-NEXT:    v_min_i32_e32 v3, v3, v7
+; GFX6-NEXT:    s_brev_b32 s4, -2
 ; GFX6-NEXT:    v_sub_i32_e32 v0, vcc, v0, v3
 ; GFX6-NEXT:    v_max_i32_e32 v3, -1, v1
 ; GFX6-NEXT:    v_subrev_i32_e32 v3, vcc, s4, v3
 ; GFX6-NEXT:    v_min_i32_e32 v6, -1, v1
-; GFX6-NEXT:    v_subrev_i32_e32 v6, vcc, s5, v6
+; GFX6-NEXT:    v_subrev_i32_e32 v6, vcc, 0x80000000, v6
 ; GFX6-NEXT:    v_max_i32_e32 v3, v3, v4
 ; GFX6-NEXT:    v_min_i32_e32 v3, v3, v6
 ; GFX6-NEXT:    v_sub_i32_e32 v1, vcc, v1, v3
 ; GFX6-NEXT:    v_max_i32_e32 v3, -1, v2
-; GFX6-NEXT:    v_subrev_i32_e32 v3, vcc, s4, v3
+; GFX6-NEXT:    v_subrev_i32_e32 v3, vcc, 0x7fffffff, v3
 ; GFX6-NEXT:    v_min_i32_e32 v4, -1, v2
-; GFX6-NEXT:    v_subrev_i32_e32 v4, vcc, s5, v4
+; GFX6-NEXT:    v_subrev_i32_e32 v4, vcc, 0x80000000, v4
 ; GFX6-NEXT:    v_max_i32_e32 v3, v3, v5
 ; GFX6-NEXT:    v_min_i32_e32 v3, v3, v4
 ; GFX6-NEXT:    v_sub_i32_e32 v2, vcc, v2, v3
@@ -1411,26 +1404,25 @@ define <3 x i32> @v_ssubsat_v3i32(<3 x i32> %lhs, <3 x i32> %rhs) {
 ; GFX8-LABEL: v_ssubsat_v3i32:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    s_brev_b32 s4, -2
 ; GFX8-NEXT:    v_max_i32_e32 v6, -1, v0
-; GFX8-NEXT:    s_brev_b32 s5, 1
-; GFX8-NEXT:    v_subrev_u32_e32 v6, vcc, s4, v6
+; GFX8-NEXT:    v_subrev_u32_e32 v6, vcc, 0x7fffffff, v6
 ; GFX8-NEXT:    v_min_i32_e32 v7, -1, v0
-; GFX8-NEXT:    v_subrev_u32_e32 v7, vcc, s5, v7
+; GFX8-NEXT:    v_subrev_u32_e32 v7, vcc, 0x80000000, v7
 ; GFX8-NEXT:    v_max_i32_e32 v3, v6, v3
 ; GFX8-NEXT:    v_min_i32_e32 v3, v3, v7
+; GFX8-NEXT:    s_brev_b32 s4, -2
 ; GFX8-NEXT:    v_sub_u32_e32 v0, vcc, v0, v3
 ; GFX8-NEXT:    v_max_i32_e32 v3, -1, v1
 ; GFX8-NEXT:    v_subrev_u32_e32 v3, vcc, s4, v3
 ; GFX8-NEXT:    v_min_i32_e32 v6, -1, v1
-; GFX8-NEXT:    v_subrev_u32_e32 v6, vcc, s5, v6
+; GFX8-NEXT:    v_subrev_u32_e32 v6, vcc, 0x80000000, v6
 ; GFX8-NEXT:    v_max_i32_e32 v3, v3, v4
 ; GFX8-NEXT:    v_min_i32_e32 v3, v3, v6
 ; GFX8-NEXT:    v_sub_u32_e32 v1, vcc, v1, v3
 ; GFX8-NEXT:    v_max_i32_e32 v3, -1, v2
-; GFX8-NEXT:    v_subrev_u32_e32 v3, vcc, s4, v3
+; GFX8-NEXT:    v_subrev_u32_e32 v3, vcc, 0x7fffffff, v3
 ; GFX8-NEXT:    v_min_i32_e32 v4, -1, v2
-; GFX8-NEXT:    v_subrev_u32_e32 v4, vcc, s5, v4
+; GFX8-NEXT:    v_subrev_u32_e32 v4, vcc, 0x80000000, v4
 ; GFX8-NEXT:    v_max_i32_e32 v3, v3, v5
 ; GFX8-NEXT:    v_min_i32_e32 v3, v3, v4
 ; GFX8-NEXT:    v_sub_u32_e32 v2, vcc, v2, v3
@@ -1536,33 +1528,31 @@ define <4 x i32> @v_ssubsat_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
 ; GFX6-LABEL: v_ssubsat_v4i32:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX6-NEXT:    s_brev_b32 s4, -2
 ; GFX6-NEXT:    v_max_i32_e32 v8, -1, v0
-; GFX6-NEXT:    s_brev_b32 s5, 1
-; GFX6-NEXT:    v_subrev_i32_e32 v8, vcc, s4, v8
-; GFX6-NEXT:    v_min_i32_e32 v9, -1, v0
-; GFX6-NEXT:    v_subrev_i32_e32 v9, vcc, s5, v9
+; GFX6-NEXT:    v_subrev_i32_e32 v8, vcc, 0x7fffffff, v8
+; GFX6-NEXT:    v_min_i32_e32 v10, -1, v0
+; GFX6-NEXT:    v_subrev_i32_e32 v10, vcc, 0x80000000, v10
 ; GFX6-NEXT:    v_max_i32_e32 v4, v8, v4
-; GFX6-NEXT:    v_min_i32_e32 v4, v4, v9
+; GFX6-NEXT:    v_min_i32_e32 v4, v4, v10
 ; GFX6-NEXT:    v_sub_i32_e32 v0, vcc, v0, v4
 ; GFX6-NEXT:    v_max_i32_e32 v4, -1, v1
-; GFX6-NEXT:    v_subrev_i32_e32 v4, vcc, s4, v4
+; GFX6-NEXT:    v_subrev_i32_e32 v4, vcc, 0x7fffffff, v4
 ; GFX6-NEXT:    v_min_i32_e32 v8, -1, v1
-; GFX6-NEXT:    v_subrev_i32_e32 v8, vcc, s5, v8
+; GFX6-NEXT:    v_subrev_i32_e32 v8, vcc, 0x80000000, v8
 ; GFX6-NEXT:    v_max_i32_e32 v4, v4, v5
 ; GFX6-NEXT:    v_min_i32_e32 v4, v4, v8
 ; GFX6-NEXT:    v_sub_i32_e32 v1, vcc, v1, v4
 ; GFX6-NEXT:    v_max_i32_e32 v4, -1, v2
-; GFX6-NEXT:    v_subrev_i32_e32 v4, vcc, s4, v4
+; GFX6-NEXT:    v_subrev_i32_e32 v4, vcc, 0x7fffffff, v4
 ; GFX6-NEXT:    v_min_i32_e32 v5, -1, v2
-; GFX6-NEXT:    v_subrev_i32_e32 v5, vcc, s5, v5
+; GFX6-NEXT:    v_subrev_i32_e32 v5, vcc, 0x80000000, v5
 ; GFX6-NEXT:    v_max_i32_e32 v4, v4, v6
+; GFX6-NEXT:    v_bfrev_b32_e32 v9, -2
 ; GFX6-NEXT:    v_min_i32_e32 v4, v4, v5
+; GFX6-NEXT:    v_bfrev_b32_e32 v11, 1
 ; GFX6-NEXT:    v_sub_i32_e32 v2, vcc, v2, v4
-; GFX6-NEXT:    v_max_i32_e32 v4, -1, v3
-; GFX6-NEXT:    v_subrev_i32_e32 v4, vcc, 0x7fffffff, v4
-; GFX6-NEXT:    v_min_i32_e32 v5, -1, v3
-; GFX6-NEXT:    v_subrev_i32_e32 v5, vcc, 0x80000000, v5
+; GFX6-NEXT:    v_sub_i32_e32 v4, vcc, 0x7fffffff, v9
+; GFX6-NEXT:    v_sub_i32_e32 v5, vcc, 0x80000000, v11
 ; GFX6-NEXT:    v_max_i32_e32 v4, v4, v7
 ; GFX6-NEXT:    v_min_i32_e32 v4, v4, v5
 ; GFX6-NEXT:    v_sub_i32_e32 v3, vcc, v3, v4
@@ -1571,33 +1561,31 @@ define <4 x i32> @v_ssubsat_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
 ; GFX8-LABEL: v_ssubsat_v4i32:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    s_brev_b32 s4, -2
 ; GFX8-NEXT:    v_max_i32_e32 v8, -1, v0
-; GFX8-NEXT:    s_brev_b32 s5, 1
-; GFX8-NEXT:    v_subrev_u32_e32 v8, vcc, s4, v8
-; GFX8-NEXT:    v_min_i32_e32 v9, -1, v0
-; GFX8-NEXT:    v_subrev_u32_e32 v9, vcc, s5, v9
+; GFX8-NEXT:    v_subrev_u32_e32 v8, vcc, 0x7fffffff, v8
+; GFX8-NEXT:    v_min_i32_e32 v10, -1, v0
+; GFX8-NEXT:    v_subrev_u32_e32 v10, vcc, 0x80000000, v10
 ; GFX8-NEXT:    v_max_i32_e32 v4, v8, v4
-; GFX8-NEXT:    v_min_i32_e32 v4, v4, v9
+; GFX8-NEXT:    v_min_i32_e32 v4, v4, v10
 ; GFX8-NEXT:    v_sub_u32_e32 v0, vcc, v0, v4
 ; GFX8-NEXT:    v_max_i32_e32 v4, -1, v1
-; GFX8-NEXT:    v_subrev_u32_e32 v4, vcc, s4, v4
+; GFX8-NEXT:    v_subrev_u32_e32 v4, vcc, 0x7fffffff, v4
 ; GFX8-NEXT:    v_min_i32_e32 v8, -1, v1
-; GFX8-NEXT:    v_subrev_u32_e32 v8, vcc, s5, v8
+; GFX8-NEXT:    v_subrev_u32_e32 v8, vcc, 0x80000000, v8
 ; GFX8-NEXT:    v_max_i32_e32 v4, v4, v5
 ; GFX8-NEXT:    v_min_i32_e32 v4, v4, v8
 ; GFX8-NEXT:    v_sub_u32_e32 v1, vcc, v1, v4
 ; GFX8-NEXT:    v_max_i32_e32 v4, -1, v2
-; GFX8-NEXT:    v_subrev_u32_e32 v4, vcc, s4, v4
+; GFX8-NEXT:    v_subrev_u32_e32 v4, vcc, 0x7fffffff, v4
 ; GFX8-NEXT:    v_min_i32_e32 v5, -1, v2
-; GFX8-NEXT:    v_subrev_u32_e32 v5, vcc, s5, v5
+; GFX8-NEXT:    v_subrev_u32_e32 v5, vcc, 0x80000000, v5
 ; GFX8-NEXT:    v_max_i32_e32 v4, v4, v6
+; GFX8-NEXT:    v_bfrev_b32_e32 v9, -2
 ; GFX8-NEXT:    v_min_i32_e32 v4, v4, v5
+; GFX8-NEXT:    v_bfrev_b32_e32 v11, 1
 ; GFX8-NEXT:    v_sub_u32_e32 v2, vcc, v2, v4
-; GFX8-NEXT:    v_max_i32_e32 v4, -1, v3
-; GFX8-NEXT:    v_subrev_u32_e32 v4, vcc, 0x7fffffff, v4
-; GFX8-NEXT:    v_min_i32_e32 v5, -1, v3
-; GFX8-NEXT:    v_subrev_u32_e32 v5, vcc, 0x80000000, v5
+; GFX8-NEXT:    v_sub_u32_e32 v4, vcc, 0x7fffffff, v9
+; GFX8-NEXT:    v_sub_u32_e32 v5, vcc, 0x80000000, v11
 ; GFX8-NEXT:    v_max_i32_e32 v4, v4, v7
 ; GFX8-NEXT:    v_min_i32_e32 v4, v4, v5
 ; GFX8-NEXT:    v_sub_u32_e32 v3, vcc, v3, v4
@@ -1724,41 +1712,37 @@ define <5 x i32> @v_ssubsat_v5i32(<5 x i32> %lhs, <5 x i32> %rhs) {
 ; GFX6-LABEL: v_ssubsat_v5i32:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX6-NEXT:    s_brev_b32 s4, -2
 ; GFX6-NEXT:    v_max_i32_e32 v10, -1, v0
-; GFX6-NEXT:    s_brev_b32 s5, 1
-; GFX6-NEXT:    v_subrev_i32_e32 v10, vcc, s4, v10
+; GFX6-NEXT:    v_subrev_i32_e32 v10, vcc, 0x7fffffff, v10
 ; GFX6-NEXT:    v_min_i32_e32 v12, -1, v0
-; GFX6-NEXT:    v_subrev_i32_e32 v12, vcc, s5, v12
+; GFX6-NEXT:    v_subrev_i32_e32 v12, vcc, 0x80000000, v12
 ; GFX6-NEXT:    v_max_i32_e32 v5, v10, v5
 ; GFX6-NEXT:    v_min_i32_e32 v5, v5, v12
+; GFX6-NEXT:    s_brev_b32 s4, -2
 ; GFX6-NEXT:    v_sub_i32_e32 v0, vcc, v0, v5
 ; GFX6-NEXT:    v_max_i32_e32 v5, -1, v1
 ; GFX6-NEXT:    v_subrev_i32_e32 v5, vcc, s4, v5
 ; GFX6-NEXT:    v_min_i32_e32 v10, -1, v1
-; GFX6-NEXT:    v_subrev_i32_e32 v10, vcc, s5, v10
+; GFX6-NEXT:    v_subrev_i32_e32 v10, vcc, 0x80000000, v10
 ; GFX6-NEXT:    v_max_i32_e32 v5, v5, v6
 ; GFX6-NEXT:    v_min_i32_e32 v5, v5, v10
 ; GFX6-NEXT:    v_sub_i32_e32 v1, vcc, v1, v5
 ; GFX6-NEXT:    v_max_i32_e32 v5, -1, v2
-; GFX6-NEXT:    v_subrev_i32_e32 v5, vcc, s4, v5
+; GFX6-NEXT:    v_subrev_i32_e32 v5, vcc, 0x7fffffff, v5
 ; GFX6-NEXT:    v_min_i32_e32 v6, -1, v2
-; GFX6-NEXT:    v_subrev_i32_e32 v6, vcc, s5, v6
+; GFX6-NEXT:    v_subrev_i32_e32 v6, vcc, 0x80000000, v6
 ; GFX6-NEXT:    v_max_i32_e32 v5, v5, v7
-; GFX6-NEXT:    v_min_i32_e32 v5, v5, v6
 ; GFX6-NEXT:    v_bfrev_b32_e32 v11, -2
+; GFX6-NEXT:    v_min_i32_e32 v5, v5, v6
+; GFX6-NEXT:    v_bfrev_b32_e32 v13, 1
 ; GFX6-NEXT:    v_sub_i32_e32 v2, vcc, v2, v5
-; GFX6-NEXT:    v_max_i32_e32 v5, -1, v3
-; GFX6-NEXT:    v_sub_i32_e32 v5, vcc, v5, v11
-; GFX6-NEXT:    v_min_i32_e32 v6, -1, v3
-; GFX6-NEXT:    v_subrev_i32_e32 v6, vcc, 0x80000000, v6
+; GFX6-NEXT:    v_sub_i32_e32 v5, vcc, 0x7fffffff, v11
+; GFX6-NEXT:    v_sub_i32_e32 v6, vcc, 0x80000000, v13
 ; GFX6-NEXT:    v_max_i32_e32 v5, v5, v8
 ; GFX6-NEXT:    v_min_i32_e32 v5, v5, v6
 ; GFX6-NEXT:    v_sub_i32_e32 v3, vcc, v3, v5
-; GFX6-NEXT:    v_max_i32_e32 v5, -1, v4
-; GFX6-NEXT:    v_subrev_i32_e32 v5, vcc, 0x7fffffff, v5
-; GFX6-NEXT:    v_min_i32_e32 v6, -1, v4
-; GFX6-NEXT:    v_subrev_i32_e32 v6, vcc, 0x80000000, v6
+; GFX6-NEXT:    v_sub_i32_e32 v5, vcc, 0x7fffffff, v11
+; GFX6-NEXT:    v_sub_i32_e32 v6, vcc, 0x80000000, v13
 ; GFX6-NEXT:    v_max_i32_e32 v5, v5, v9
 ; GFX6-NEXT:    v_min_i32_e32 v5, v5, v6
 ; GFX6-NEXT:    v_sub_i32_e32 v4, vcc, v4, v5
@@ -1767,41 +1751,37 @@ define <5 x i32> @v_ssubsat_v5i32(<5 x i32> %lhs, <5 x i32> %rhs) {
 ; GFX8-LABEL: v_ssubsat_v5i32:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    s_brev_b32 s4, -2
 ; GFX8-NEXT:    v_max_i32_e32 v10, -1, v0
-; GFX8-NEXT:    s_brev_b32 s5, 1
-; GFX8-NEXT:    v_subrev_u32_e32 v10, vcc, s4, v10
+; GFX8-NEXT:    v_subrev_u32_e32 v10, vcc, 0x7fffffff, v10
 ; GFX8-NEXT:    v_min_i32_e32 v12, -1, v0
-; GFX8-NEXT:    v_subrev_u32_e32 v12, vcc, s5, v12
+; GFX8-NEXT:    v_subrev_u32_e32 v12, vcc, 0x80000000, v12
 ; GFX8-NEXT:    v_max_i32_e32 v5, v10, v5
 ; GFX8-NEXT:    v_min_i32_e32 v5, v5, v12
+; GFX8-NEXT:    s_brev_b32 s4, -2
 ; GFX8-NEXT:    v_sub_u32_e32 v0, vcc, v0, v5
 ; GFX8-NEXT:    v_max_i32_e32 v5, -1, v1
 ; GFX8-NEXT:    v_subrev_u32_e32 v5, vcc, s4, v5
 ; GFX8-NEXT:    v_min_i32_e32 v10, -1, v1
-; GFX8-NEXT:    v_subrev_u32_e32 v10, vcc, s5, v10
+; GFX8-NEXT:    v_subrev_u32_e32 v10, vcc, 0x80000000, v10
 ; GFX8-NEXT:    v_max_i32_e32 v5, v5, v6
 ; GFX8-NEXT:    v_min_i32_e32 v5, v5, v10
 ; GFX8-NEXT:    v_sub_u32_e32 v1, vcc, v1, v5
 ; GFX8-NEXT:    v_max_i32_e32 v5, -1, v2
-; GFX8-NEXT:    v_subrev_u32_e32 v5, vcc, s4, v5
+; GFX8-NEXT:    v_subrev_u32_e32 v5, vcc, 0x7fffffff, v5
 ; GFX8-NEXT:    v_min_i32_e32 v6, -1, v2
-; GFX8-NEXT:    v_subrev_u32_e32 v6, vcc, s5, v6
+; GFX8-NEXT:    v_subrev_u32_e32 v6, vcc, 0x80000000, v6
 ; GFX8-NEXT:    v_max_i32_e32 v5, v5, v7
-; GFX8-NEXT:    v_min_i32_e32 v5, v5, v6
 ; GFX8-NEXT:    v_bfrev_b32_e32 v11, -2
+; GFX8-NEXT:    v_min_i32_e32 v5, v5, v6
+; GFX8-NEXT:    v_bfrev_b32_e32 v13, 1
 ; GFX8-NEXT:    v_sub_u32_e32 v2, vcc, v2, v5
-; GFX8-NEXT:    v_max_i32_e32 v5, -1, v3
-; GFX8-NEXT:    v_sub_u32_e32 v5, vcc, v5, v11
-; GFX8-NEXT:    v_min_i32_e32 v6, -1, v3
-; GFX8-NEXT:    v_subrev_u32_e32 v6, vcc, 0x80000000, v6
+; GFX8-NEXT:    v_sub_u32_e32 v5, vcc, 0x7fffffff, v11
+; GFX8-NEXT:    v_sub_u32_e32 v6, vcc, 0x80000000, v13
 ; GFX8-NEXT:    v_max_i32_e32 v5, v5, v8
 ; GFX8-NEXT:    v_min_i32_e32 v5, v5, v6
 ; GFX8-NEXT:    v_sub_u32_e32 v3, vcc, v3, v5
-; GFX8-NEXT:    v_max_i32_e32 v5, -1, v4
-; GFX8-NEXT:    v_subrev_u32_e32 v5, vcc, 0x7fffffff, v5
-; GFX8-NEXT:    v_min_i32_e32 v6, -1, v4
-; GFX8-NEXT:    v_subrev_u32_e32 v6, vcc, 0x80000000, v6
+; GFX8-NEXT:    v_sub_u32_e32 v5, vcc, 0x7fffffff, v11
+; GFX8-NEXT:    v_sub_u32_e32 v6, vcc, 0x80000000, v13
 ; GFX8-NEXT:    v_max_i32_e32 v5, v5, v9
 ; GFX8-NEXT:    v_min_i32_e32 v5, v5, v6
 ; GFX8-NEXT:    v_sub_u32_e32 v4, vcc, v4, v5
@@ -2592,13 +2572,13 @@ define i16 @v_ssubsat_i16(i16 %lhs, i16 %rhs) {
 ; GFX6-LABEL: v_ssubsat_i16:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX6-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
-; GFX6-NEXT:    v_max_i32_e32 v2, -1, v0
+; GFX6-NEXT:    v_bfrev_b32_e32 v2, -2
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
-; GFX6-NEXT:    v_subrev_i32_e32 v2, vcc, 0x7fffffff, v2
-; GFX6-NEXT:    v_min_i32_e32 v3, -1, v0
-; GFX6-NEXT:    v_subrev_i32_e32 v3, vcc, 0x80000000, v3
+; GFX6-NEXT:    v_sub_i32_e32 v2, vcc, 0x7fffffff, v2
+; GFX6-NEXT:    v_bfrev_b32_e32 v3, 1
+; GFX6-NEXT:    v_sub_i32_e32 v3, vcc, 0x80000000, v3
 ; GFX6-NEXT:    v_max_i32_e32 v1, v2, v1
+; GFX6-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
 ; GFX6-NEXT:    v_min_i32_e32 v1, v1, v3
 ; GFX6-NEXT:    v_sub_i32_e32 v0, vcc, v0, v1
 ; GFX6-NEXT:    v_ashrrev_i32_e32 v0, 16, v0
@@ -2724,13 +2704,13 @@ define amdgpu_ps half @ssubsat_i16_sv(i16 inreg %lhs, i16 %rhs) {
 define amdgpu_ps half @ssubsat_i16_vs(i16 %lhs, i16 inreg %rhs) {
 ; GFX6-LABEL: ssubsat_i16_vs:
 ; GFX6:       ; %bb.0:
-; GFX6-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
-; GFX6-NEXT:    v_max_i32_e32 v1, -1, v0
+; GFX6-NEXT:    v_bfrev_b32_e32 v1, -2
 ; GFX6-NEXT:    s_lshl_b32 s0, s0, 16
-; GFX6-NEXT:    v_subrev_i32_e32 v1, vcc, 0x7fffffff, v1
-; GFX6-NEXT:    v_min_i32_e32 v2, -1, v0
-; GFX6-NEXT:    v_subrev_i32_e32 v2, vcc, 0x80000000, v2
+; GFX6-NEXT:    v_sub_i32_e32 v1, vcc, 0x7fffffff, v1
+; GFX6-NEXT:    v_bfrev_b32_e32 v2, 1
+; GFX6-NEXT:    v_sub_i32_e32 v2, vcc, 0x80000000, v2
 ; GFX6-NEXT:    v_max_i32_e32 v1, s0, v1
+; GFX6-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
 ; GFX6-NEXT:    v_min_i32_e32 v1, v1, v2
 ; GFX6-NEXT:    v_sub_i32_e32 v0, vcc, v0, v1
 ; GFX6-NEXT:    v_ashrrev_i32_e32 v0, 16, v0
@@ -2766,22 +2746,20 @@ define <2 x i16> @v_ssubsat_v2i16(<2 x i16> %lhs, <2 x i16> %rhs) {
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
-; GFX6-NEXT:    s_brev_b32 s4, -2
 ; GFX6-NEXT:    v_max_i32_e32 v4, -1, v0
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
-; GFX6-NEXT:    s_brev_b32 s5, 1
-; GFX6-NEXT:    v_subrev_i32_e32 v4, vcc, s4, v4
+; GFX6-NEXT:    v_subrev_i32_e32 v4, vcc, 0x7fffffff, v4
 ; GFX6-NEXT:    v_min_i32_e32 v5, -1, v0
-; GFX6-NEXT:    v_subrev_i32_e32 v5, vcc, s5, v5
+; GFX6-NEXT:    v_subrev_i32_e32 v5, vcc, 0x80000000, v5
 ; GFX6-NEXT:    v_max_i32_e32 v2, v4, v2
 ; GFX6-NEXT:    v_min_i32_e32 v2, v2, v5
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
 ; GFX6-NEXT:    v_sub_i32_e32 v0, vcc, v0, v2
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v2, 16, v3
 ; GFX6-NEXT:    v_max_i32_e32 v3, -1, v1
-; GFX6-NEXT:    v_subrev_i32_e32 v3, vcc, s4, v3
+; GFX6-NEXT:    v_subrev_i32_e32 v3, vcc, 0x7fffffff, v3
 ; GFX6-NEXT:    v_min_i32_e32 v4, -1, v1
-; GFX6-NEXT:    v_subrev_i32_e32 v4, vcc, s5, v4
+; GFX6-NEXT:    v_subrev_i32_e32 v4, vcc, 0x80000000, v4
 ; GFX6-NEXT:    v_max_i32_e32 v2, v3, v2
 ; GFX6-NEXT:    v_min_i32_e32 v2, v2, v4
 ; GFX6-NEXT:    v_sub_i32_e32 v1, vcc, v1, v2
@@ -2978,22 +2956,20 @@ define amdgpu_ps float @ssubsat_v2i16_vs(<2 x i16> %lhs, <2 x i16> inreg %rhs) {
 ; GFX6-LABEL: ssubsat_v2i16_vs:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
-; GFX6-NEXT:    s_brev_b32 s2, -2
 ; GFX6-NEXT:    v_max_i32_e32 v2, -1, v0
 ; GFX6-NEXT:    s_lshl_b32 s0, s0, 16
-; GFX6-NEXT:    s_brev_b32 s3, 1
-; GFX6-NEXT:    v_subrev_i32_e32 v2, vcc, s2, v2
+; GFX6-NEXT:    v_subrev_i32_e32 v2, vcc, 0x7fffffff, v2
 ; GFX6-NEXT:    v_min_i32_e32 v3, -1, v0
-; GFX6-NEXT:    v_subrev_i32_e32 v3, vcc, s3, v3
+; GFX6-NEXT:    v_subrev_i32_e32 v3, vcc, 0x80000000, v3
 ; GFX6-NEXT:    v_max_i32_e32 v2, s0, v2
 ; GFX6-NEXT:    v_min_i32_e32 v2, v2, v3
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
 ; GFX6-NEXT:    v_sub_i32_e32 v0, vcc, v0, v2
 ; GFX6-NEXT:    v_max_i32_e32 v2, -1, v1
 ; GFX6-NEXT:    s_lshl_b32 s0, s1, 16
-; GFX6-NEXT:    v_subrev_i32_e32 v2, vcc, s2, v2
+; GFX6-NEXT:    v_subrev_i32_e32 v2, vcc, 0x7fffffff, v2
 ; GFX6-NEXT:    v_min_i32_e32 v3, -1, v1
-; GFX6-NEXT:    v_subrev_i32_e32 v3, vcc, s3, v3
+; GFX6-NEXT:    v_subrev_i32_e32 v3, vcc, 0x80000000, v3
 ; GFX6-NEXT:    v_max_i32_e32 v2, s0, v2
 ; GFX6-NEXT:    v_min_i32_e32 v2, v2, v3
 ; GFX6-NEXT:    v_sub_i32_e32 v1, vcc, v1, v2
@@ -3056,16 +3032,16 @@ define <2 x float> @v_ssubsat_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
-; GFX6-NEXT:    s_brev_b32 s4, -2
 ; GFX6-NEXT:    v_max_i32_e32 v8, -1, v0
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v4, 16, v4
 ; GFX6-NEXT:    s_brev_b32 s5, 1
-; GFX6-NEXT:    v_subrev_i32_e32 v8, vcc, s4, v8
+; GFX6-NEXT:    v_subrev_i32_e32 v8, vcc, 0x7fffffff, v8
 ; GFX6-NEXT:    v_min_i32_e32 v10, -1, v0
 ; GFX6-NEXT:    v_subrev_i32_e32 v10, vcc, s5, v10
 ; GFX6-NEXT:    v_max_i32_e32 v4, v8, v4
 ; GFX6-NEXT:    v_min_i32_e32 v4, v4, v10
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
+; GFX6-NEXT:    s_brev_b32 s4, -2
 ; GFX6-NEXT:    v_sub_i32_e32 v0, vcc, v0, v4
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v4, 16, v5
 ; GFX6-NEXT:    v_max_i32_e32 v5, -1, v1
@@ -4135,12 +4111,12 @@ define i48 @v_ssubsat_i48(i48 %lhs, i48 %rhs) {
 ; GFX6-NEXT:    v_bfe_i32 v3, v2, 0, 16
 ; GFX6-NEXT:    v_cmp_lt_i64_e64 s[4:5], v[4:5], v[0:1]
 ; GFX6-NEXT:    v_cmp_lt_i64_e64 s[6:7], 0, v[2:3]
-; GFX6-NEXT:    v_ashrrev_i32_e32 v0, 31, v5
-; GFX6-NEXT:    v_add_i32_e32 v2, vcc, 0xffff8000, v0
-; GFX6-NEXT:    v_ashrrev_i32_e32 v1, 15, v5
+; GFX6-NEXT:    v_mov_b32_e32 v1, 0xffff8000
+; GFX6-NEXT:    v_add_i32_e32 v1, vcc, 0xffff8000, v1
+; GFX6-NEXT:    v_ashrrev_i32_e32 v0, 15, v5
 ; GFX6-NEXT:    s_xor_b64 vcc, s[6:7], s[4:5]
-; GFX6-NEXT:    v_cndmask_b32_e32 v0, v4, v1, vcc
-; GFX6-NEXT:    v_cndmask_b32_e32 v1, v6, v2, vcc
+; GFX6-NEXT:    v_cndmask_b32_e32 v0, v4, v0, vcc
+; GFX6-NEXT:    v_cndmask_b32_e32 v1, v6, v1, vcc
 ; GFX6-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-LABEL: v_ssubsat_i48:
@@ -4153,12 +4129,12 @@ define i48 @v_ssubsat_i48(i48 %lhs, i48 %rhs) {
 ; GFX8-NEXT:    v_bfe_i32 v3, v2, 0, 16
 ; GFX8-NEXT:    v_cmp_lt_i64_e64 s[4:5], v[4:5], v[0:1]
 ; GFX8-NEXT:    v_cmp_lt_i64_e64 s[6:7], 0, v[2:3]
-; GFX8-NEXT:    v_ashrrev_i32_e32 v0, 31, v5
-; GFX8-NEXT:    v_add_u32_e32 v2, vcc, 0xffff8000, v0
-; GFX8-NEXT:    v_ashrrev_i32_e32 v1, 15, v5
+; GFX8-NEXT:    v_mov_b32_e32 v1, 0xffff8000
+; GFX8-NEXT:    v_add_u32_e32 v1, vcc, 0xffff8000, v1
+; GFX8-NEXT:    v_ashrrev_i32_e32 v0, 15, v5
 ; GFX8-NEXT:    s_xor_b64 vcc, s[6:7], s[4:5]
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v4, v1, vcc
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v6, v2, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v4, v0, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v6, v1, vcc
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX9-LABEL: v_ssubsat_i48:
@@ -4170,8 +4146,9 @@ define i48 @v_ssubsat_i48(i48 %lhs, i48 %rhs) {
 ; GFX9-NEXT:    v_subb_co_u32_e32 v5, vcc, v1, v3, vcc
 ; GFX9-NEXT:    v_cmp_lt_i64_e64 s[4:5], v[4:5], v[0:1]
 ; GFX9-NEXT:    v_cmp_lt_i64_e64 s[6:7], 0, v[2:3]
+; GFX9-NEXT:    v_bfrev_b32_e32 v1, 1
+; GFX9-NEXT:    v_add_co_u32_e32 v1, vcc, 0x80000000, v1
 ; GFX9-NEXT:    v_ashrrev_i32_e32 v0, 31, v5
-; GFX9-NEXT:    v_add_co_u32_e32 v1, vcc, 0x80000000, v0
 ; GFX9-NEXT:    s_xor_b64 vcc, s[6:7], s[4:5]
 ; GFX9-NEXT:    v_cndmask_b32_e32 v0, v4, v0, vcc
 ; GFX9-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc
@@ -4342,12 +4319,12 @@ define amdgpu_ps <2 x float> @ssubsat_i48_sv(i48 inreg %lhs, i48 %rhs) {
 ; GFX6-NEXT:    v_bfe_i32 v1, v0, 0, 16
 ; GFX6-NEXT:    v_cmp_gt_i64_e64 s[0:1], s[0:1], v[2:3]
 ; GFX6-NEXT:    v_cmp_lt_i64_e64 s[2:3], 0, v[0:1]
-; GFX6-NEXT:    v_ashrrev_i32_e32 v0, 31, v3
-; GFX6-NEXT:    v_ashrrev_i32_e32 v1, 15, v3
-; GFX6-NEXT:    v_add_i32_e32 v3, vcc, 0xffff8000, v0
+; GFX6-NEXT:    v_mov_b32_e32 v1, 0xffff8000
+; GFX6-NEXT:    v_add_i32_e32 v1, vcc, 0xffff8000, v1
 ; GFX6-NEXT:    s_xor_b64 vcc, s[2:3], s[0:1]
-; GFX6-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
-; GFX6-NEXT:    v_cndmask_b32_e32 v1, v4, v3, vcc
+; GFX6-NEXT:    v_ashrrev_i32_e32 v0, 15, v3
+; GFX6-NEXT:    v_cndmask_b32_e32 v1, v4, v1, vcc
+; GFX6-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
 ; GFX6-NEXT:    v_and_b32_e32 v1, 0xffff, v1
 ; GFX6-NEXT:    ; return to shader part epilog
 ;
@@ -4361,12 +4338,12 @@ define amdgpu_ps <2 x float> @ssubsat_i48_sv(i48 inreg %lhs, i48 %rhs) {
 ; GFX8-NEXT:    v_bfe_i32 v1, v0, 0, 16
 ; GFX8-NEXT:    v_cmp_gt_i64_e64 s[0:1], s[0:1], v[2:3]
 ; GFX8-NEXT:    v_cmp_lt_i64_e64 s[2:3], 0, v[0:1]
-; GFX8-NEXT:    v_ashrrev_i32_e32 v0, 31, v3
-; GFX8-NEXT:    v_ashrrev_i32_e32 v1, 15, v3
-; GFX8-NEXT:    v_add_u32_e32 v3, vcc, 0xffff8000, v0
+; GFX8-NEXT:    v_mov_b32_e32 v1, 0xffff8000
+; GFX8-NEXT:    v_add_u32_e32 v1, vcc, 0xffff8000, v1
 ; GFX8-NEXT:    s_xor_b64 vcc, s[2:3], s[0:1]
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v4, v3, vcc
+; GFX8-NEXT:    v_ashrrev_i32_e32 v0, 15, v3
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v4, v1, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
 ; GFX8-NEXT:    v_and_b32_e32 v1, 0xffff, v1
 ; GFX8-NEXT:    ; return to shader part epilog
 ;
@@ -4379,8 +4356,9 @@ define amdgpu_ps <2 x float> @ssubsat_i48_sv(i48 inreg %lhs, i48 %rhs) {
 ; GFX9-NEXT:    v_subb_co_u32_e32 v3, vcc, v3, v1, vcc
 ; GFX9-NEXT:    v_cmp_gt_i64_e64 s[0:1], s[0:1], v[2:3]
 ; GFX9-NEXT:    v_cmp_lt_i64_e64 s[2:3], 0, v[0:1]
+; GFX9-NEXT:    v_bfrev_b32_e32 v1, 1
+; GFX9-NEXT:    v_add_co_u32_e32 v1, vcc, 0x80000000, v1
 ; GFX9-NEXT:    v_ashrrev_i32_e32 v0, 31, v3
-; GFX9-NEXT:    v_add_co_u32_e32 v1, vcc, 0x80000000, v0
 ; GFX9-NEXT:    s_xor_b64 vcc, s[2:3], s[0:1]
 ; GFX9-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
 ; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
@@ -4437,12 +4415,12 @@ define amdgpu_ps <2 x float> @ssubsat_i48_vs(i48 %lhs, i48 inreg %rhs) {
 ; GFX6-NEXT:    s_bfe_i64 s[0:1], s[0:1], 0x300000
 ; GFX6-NEXT:    v_cmp_lt_i64_e64 s[2:3], v[2:3], v[0:1]
 ; GFX6-NEXT:    v_cmp_gt_i64_e64 s[0:1], s[0:1], 0
-; GFX6-NEXT:    v_ashrrev_i32_e32 v0, 31, v3
-; GFX6-NEXT:    v_ashrrev_i32_e32 v1, 15, v3
-; GFX6-NEXT:    v_add_i32_e32 v3, vcc, 0xffff8000, v0
+; GFX6-NEXT:    v_mov_b32_e32 v1, 0xffff8000
+; GFX6-NEXT:    v_add_i32_e32 v1, vcc, 0xffff8000, v1
 ; GFX6-NEXT:    s_xor_b64 vcc, s[0:1], s[2:3]
-; GFX6-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
-; GFX6-NEXT:    v_cndmask_b32_e32 v1, v4, v3, vcc
+; GFX6-NEXT:    v_ashrrev_i32_e32 v0, 15, v3
+; GFX6-NEXT:    v_cndmask_b32_e32 v1, v4, v1, vcc
+; GFX6-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
 ; GFX6-NEXT:    v_and_b32_e32 v1, 0xffff, v1
 ; GFX6-NEXT:    ; return to shader part epilog
 ;
@@ -4456,12 +4434,12 @@ define amdgpu_ps <2 x float> @ssubsat_i48_vs(i48 %lhs, i48 inreg %rhs) {
 ; GFX8-NEXT:    s_bfe_i64 s[0:1], s[0:1], 0x300000
 ; GFX8-NEXT:    v_cmp_lt_i64_e64 s[2:3], v[2:3], v[0:1]
 ; GFX8-NEXT:    v_cmp_gt_i64_e64 s[0:1], s[0:1], 0
-; GFX8-NEXT:    v_ashrrev_i32_e32 v0, 31, v3
-; GFX8-NEXT:    v_ashrrev_i32_e32 v1, 15, v3
-; GFX8-NEXT:    v_add_u32_e32 v3, vcc, 0xffff8000, v0
+; GFX8-NEXT:    v_mov_b32_e32 v1, 0xffff8000
+; GFX8-NEXT:    v_add_u32_e32 v1, vcc, 0xffff8000, v1
 ; GFX8-NEXT:    s_xor_b64 vcc, s[0:1], s[2:3]
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v4, v3, vcc
+; GFX8-NEXT:    v_ashrrev_i32_e32 v0, 15, v3
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, v4, v1, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
 ; GFX8-NEXT:    v_and_b32_e32 v1, 0xffff, v1
 ; GFX8-NEXT:    ; return to shader part epilog
 ;
@@ -4474,8 +4452,9 @@ define amdgpu_ps <2 x float> @ssubsat_i48_vs(i48 %lhs, i48 inreg %rhs) {
 ; GFX9-NEXT:    v_subb_co_u32_e32 v3, vcc, v1, v3, vcc
 ; GFX9-NEXT:    v_cmp_lt_i64_e64 s[0:1], v[2:3], v[0:1]
 ; GFX9-NEXT:    v_cmp_gt_i64_e64 s[2:3], s[2:3], 0
+; GFX9-NEXT:    v_bfrev_b32_e32 v1, 1
+; GFX9-NEXT:    v_add_co_u32_e32 v1, vcc, 0x80000000, v1
 ; GFX9-NEXT:    v_ashrrev_i32_e32 v0, 31, v3
-; GFX9-NEXT:    v_add_co_u32_e32 v1, vcc, 0x80000000, v0
 ; GFX9-NEXT:    s_xor_b64 vcc, s[2:3], s[0:1]
 ; GFX9-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
 ; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
@@ -4529,8 +4508,9 @@ define i64 @v_ssubsat_i64(i64 %lhs, i64 %rhs) {
 ; GFX6-NEXT:    v_subb_u32_e32 v5, vcc, v1, v3, vcc
 ; GFX6-NEXT:    v_cmp_lt_i64_e64 s[4:5], v[4:5], v[0:1]
 ; GFX6-NEXT:    v_cmp_lt_i64_e64 s[6:7], 0, v[2:3]
+; GFX6-NEXT:    v_bfrev_b32_e32 v1, 1
+; GFX6-NEXT:    v_add_i32_e32 v1, vcc, 0x80000000, v1
 ; GFX6-NEXT:    v_ashrrev_i32_e32 v0, 31, v5
-; GFX6-NEXT:    v_add_i32_e32 v1, vcc, 0x80000000, v0
 ; GFX6-NEXT:    s_xor_b64 vcc, s[6:7], s[4:5]
 ; GFX6-NEXT:    v_cndmask_b32_e32 v0, v4, v0, vcc
 ; GFX6-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc
@@ -4543,8 +4523,9 @@ define i64 @v_ssubsat_i64(i64 %lhs, i64 %rhs) {
 ; GFX8-NEXT:    v_subb_u32_e32 v5, vcc, v1, v3, vcc
 ; GFX8-NEXT:    v_cmp_lt_i64_e64 s[4:5], v[4:5], v[0:1]
 ; GFX8-NEXT:    v_cmp_lt_i64_e64 s[6:7], 0, v[2:3]
+; GFX8-NEXT:    v_bfrev_b32_e32 v1, 1
+; GFX8-NEXT:    v_add_u32_e32 v1, vcc, 0x80000000, v1
 ; GFX8-NEXT:    v_ashrrev_i32_e32 v0, 31, v5
-; GFX8-NEXT:    v_add_u32_e32 v1, vcc, 0x80000000, v0
 ; GFX8-NEXT:    s_xor_b64 vcc, s[6:7], s[4:5]
 ; GFX8-NEXT:    v_cndmask_b32_e32 v0, v4, v0, vcc
 ; GFX8-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc
@@ -4557,8 +4538,9 @@ define i64 @v_ssubsat_i64(i64 %lhs, i64 %rhs) {
 ; GFX9-NEXT:    v_subb_co_u32_e32 v5, vcc, v1, v3, vcc
 ; GFX9-NEXT:    v_cmp_lt_i64_e64 s[4:5], v[4:5], v[0:1]
 ; GFX9-NEXT:    v_cmp_lt_i64_e64 s[6:7], 0, v[2:3]
+; GFX9-NEXT:    v_bfrev_b32_e32 v1, 1
+; GFX9-NEXT:    v_add_co_u32_e32 v1, vcc, 0x80000000, v1
 ; GFX9-NEXT:    v_ashrrev_i32_e32 v0, 31, v5
-; GFX9-NEXT:    v_add_co_u32_e32 v1, vcc, 0x80000000, v0
 ; GFX9-NEXT:    s_xor_b64 vcc, s[6:7], s[4:5]
 ; GFX9-NEXT:    v_cndmask_b32_e32 v0, v4, v0, vcc
 ; GFX9-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc
@@ -4702,8 +4684,9 @@ define amdgpu_ps <2 x float> @ssubsat_i64_sv(i64 inreg %lhs, i64 %rhs) {
 ; GFX6-NEXT:    v_subb_u32_e32 v3, vcc, v3, v1, vcc
 ; GFX6-NEXT:    v_cmp_gt_i64_e64 s[0:1], s[0:1], v[2:3]
 ; GFX6-NEXT:    v_cmp_lt_i64_e64 s[2:3], 0, v[0:1]
+; GFX6-NEXT:    v_bfrev_b32_e32 v1, 1
+; GFX6-NEXT:    v_add_i32_e32 v1, vcc, 0x80000000, v1
 ; GFX6-NEXT:    v_ashrrev_i32_e32 v0, 31, v3
-; GFX6-NEXT:    v_add_i32_e32 v1, vcc, 0x80000000, v0
 ; GFX6-NEXT:    s_xor_b64 vcc, s[2:3], s[0:1]
 ; GFX6-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
 ; GFX6-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
@@ -4716,8 +4699,9 @@ define amdgpu_ps <2 x float> @ssubsat_i64_sv(i64 inreg %lhs, i64 %rhs) {
 ; GFX8-NEXT:    v_subb_u32_e32 v3, vcc, v3, v1, vcc
 ; GFX8-NEXT:    v_cmp_gt_i64_e64 s[0:1], s[0:1], v[2:3]
 ; GFX8-NEXT:    v_cmp_lt_i64_e64 s[2:3], 0, v[0:1]
+; GFX8-NEXT:    v_bfrev_b32_e32 v1, 1
+; GFX8-NEXT:    v_add_u32_e32 v1, vcc, 0x80000000, v1
 ; GFX8-NEXT:    v_ashrrev_i32_e32 v0, 31, v3
-; GFX8-NEXT:    v_add_u32_e32 v1, vcc, 0x80000000, v0
 ; GFX8-NEXT:    s_xor_b64 vcc, s[2:3], s[0:1]
 ; GFX8-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
 ; GFX8-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
@@ -4730,8 +4714,9 @@ define amdgpu_ps <2 x float> @ssubsat_i64_sv(i64 inreg %lhs, i64 %rhs) {
 ; GFX9-NEXT:    v_subb_co_u32_e32 v3, vcc, v3, v1, vcc
 ; GFX9-NEXT:    v_cmp_gt_i64_e64 s[0:1], s[0:1], v[2:3]
 ; GFX9-NEXT:    v_cmp_lt_i64_e64 s[2:3], 0, v[0:1]
+; GFX9-NEXT:    v_bfrev_b32_e32 v1, 1
+; GFX9-NEXT:    v_add_co_u32_e32 v1, vcc, 0x80000000, v1
 ; GFX9-NEXT:    v_ashrrev_i32_e32 v0, 31, v3
-; GFX9-NEXT:    v_add_co_u32_e32 v1, vcc, 0x80000000, v0
 ; GFX9-NEXT:    s_xor_b64 vcc, s[2:3], s[0:1]
 ; GFX9-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
 ; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
@@ -4774,8 +4759,9 @@ define amdgpu_ps <2 x float> @ssubsat_i64_vs(i64 %lhs, i64 inreg %rhs) {
 ; GFX6-NEXT:    v_subb_u32_e32 v3, vcc, v1, v3, vcc
 ; GFX6-NEXT:    v_cmp_lt_i64_e64 s[2:3], v[2:3], v[0:1]
 ; GFX6-NEXT:    v_cmp_gt_i64_e64 s[0:1], s[0:1], 0
+; GFX6-NEXT:    v_bfrev_b32_e32 v1, 1
+; GFX6-NEXT:    v_add_i32_e32 v1, vcc, 0x80000000, v1
 ; GFX6-NEXT:    v_ashrrev_i32_e32 v0, 31, v3
-; GFX6-NEXT:    v_add_i32_e32 v1, vcc, 0x80000000, v0
 ; GFX6-NEXT:    s_xor_b64 vcc, s[0:1], s[2:3]
 ; GFX6-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
 ; GFX6-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
@@ -4788,8 +4774,9 @@ define amdgpu_ps <2 x float> @ssubsat_i64_vs(i64 %lhs, i64 inreg %rhs) {
 ; GFX8-NEXT:    v_subb_u32_e32 v3, vcc, v1, v3, vcc
 ; GFX8-NEXT:    v_cmp_lt_i64_e64 s[2:3], v[2:3], v[0:1]
 ; GFX8-NEXT:    v_cmp_gt_i64_e64 s[0:1], s[0:1], 0
+; GFX8-NEXT:    v_bfrev_b32_e32 v1, 1
+; GFX8-NEXT:    v_add_u32_e32 v1, vcc, 0x80000000, v1
 ; GFX8-NEXT:    v_ashrrev_i32_e32 v0, 31, v3
-; GFX8-NEXT:    v_add_u32_e32 v1, vcc, 0x80000000, v0
 ; GFX8-NEXT:    s_xor_b64 vcc, s[0:1], s[2:3]
 ; GFX8-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
 ; GFX8-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
@@ -4802,8 +4789,9 @@ define amdgpu_ps <2 x float> @ssubsat_i64_vs(i64 %lhs, i64 inreg %rhs) {
 ; GFX9-NEXT:    v_subb_co_u32_e32 v3, vcc, v1, v3, vcc
 ; GFX9-NEXT:    v_cmp_lt_i64_e64 s[2:3], v[2:3], v[0:1]
 ; GFX9-NEXT:    v_cmp_gt_i64_e64 s[0:1], s[0:1], 0
+; GFX9-NEXT:    v_bfrev_b32_e32 v1, 1
+; GFX9-NEXT:    v_add_co_u32_e32 v1, vcc, 0x80000000, v1
 ; GFX9-NEXT:    v_ashrrev_i32_e32 v0, 31, v3
-; GFX9-NEXT:    v_add_co_u32_e32 v1, vcc, 0x80000000, v0
 ; GFX9-NEXT:    s_xor_b64 vcc, s[0:1], s[2:3]
 ; GFX9-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
 ; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
@@ -4846,9 +4834,9 @@ define <2 x i64> @v_ssubsat_v2i64(<2 x i64> %lhs, <2 x i64> %rhs) {
 ; GFX6-NEXT:    v_subb_u32_e32 v9, vcc, v1, v5, vcc
 ; GFX6-NEXT:    v_cmp_lt_i64_e64 s[4:5], v[8:9], v[0:1]
 ; GFX6-NEXT:    v_cmp_lt_i64_e64 s[6:7], 0, v[4:5]
+; GFX6-NEXT:    v_bfrev_b32_e32 v10, 1
+; GFX6-NEXT:    v_add_i32_e32 v1, vcc, 0x80000000, v10
 ; GFX6-NEXT:    v_ashrrev_i32_e32 v0, 31, v9
-; GFX6-NEXT:    v_bfrev_b32_e32 v1, 1
-; GFX6-NEXT:    v_add_i32_e32 v1, vcc, v0, v1
 ; GFX6-NEXT:    s_xor_b64 vcc, s[6:7], s[4:5]
 ; GFX6-NEXT:    v_cndmask_b32_e32 v0, v8, v0, vcc
 ; GFX6-NEXT:    v_cndmask_b32_e32 v1, v9, v1, vcc
@@ -4856,8 +4844,8 @@ define <2 x i64> @v_ssubsat_v2i64(<2 x i64> %lhs, <2 x i64> %rhs) {
 ; GFX6-NEXT:    v_subb_u32_e32 v5, vcc, v3, v7, vcc
 ; GFX6-NEXT:    v_cmp_lt_i64_e64 s[4:5], v[4:5], v[2:3]
 ; GFX6-NEXT:    v_cmp_lt_i64_e64 s[6:7], 0, v[6:7]
+; GFX6-NEXT:    v_add_i32_e32 v3, vcc, 0x80000000, v10
 ; GFX6-NEXT:    v_ashrrev_i32_e32 v2, 31, v5
-; GFX6-NEXT:    v_add_i32_e32 v3, vcc, 0x80000000, v2
 ; GFX6-NEXT:    s_xor_b64 vcc, s[6:7], s[4:5]
 ; GFX6-NEXT:    v_cndmask_b32_e32 v2, v4, v2, vcc
 ; GFX6-NEXT:    v_cndmask_b32_e32 v3, v5, v3, vcc
@@ -4870,9 +4858,9 @@ define <2 x i64> @v_ssubsat_v2i64(<2 x i64> %lhs, <2 x i64> %rhs) {
 ; GFX8-NEXT:    v_subb_u32_e32 v9, vcc, v1, v5, vcc
 ; GFX8-NEXT:    v_cmp_lt_i64_e64 s[4:5], v[8:9], v[0:1]
 ; GFX8-NEXT:    v_cmp_lt_i64_e64 s[6:7], 0, v[4:5]
+; GFX8-NEXT:    v_bfrev_b32_e32 v10, 1
+; GFX8-NEXT:    v_add_u32_e32 v1, vcc, 0x80000000, v10
 ; GFX8-NEXT:    v_ashrrev_i32_e32 v0, 31, v9
-; GFX8-NEXT:    v_bfrev_b32_e32 v1, 1
-; GFX8-NEXT:    v_add_u32_e32 v1, vcc, v0, v1
 ; GFX8-NEXT:    s_xor_b64 vcc, s[6:7], s[4:5]
 ; GFX8-NEXT:    v_cndmask_b32_e32 v0, v8, v0, vcc
 ; GFX8-NEXT:    v_cndmask_b32_e32 v1, v9, v1, vcc
@@ -4880,8 +4868,8 @@ define <2 x i64> @v_ssubsat_v2i64(<2 x i64> %lhs, <2 x i64> %rhs) {
 ; GFX8-NEXT:    v_subb_u32_e32 v5, vcc, v3, v7, vcc
 ; GFX8-NEXT:    v_cmp_lt_i64_e64 s[4:5], v[4:5], v[2:3]
 ; GFX8-NEXT:    v_cmp_lt_i64_e64 s[6:7], 0, v[6:7]
+; GFX8-NEXT:    v_add_u32_e32 v3, vcc, 0x80000000, v10
 ; GFX8-NEXT:    v_ashrrev_i32_e32 v2, 31, v5
-; GFX8-NEXT:    v_add_u32_e32 v3, vcc, 0x80000000, v2
 ; GFX8-NEXT:    s_xor_b64 vcc, s[6:7], s[4:5]
 ; GFX8-NEXT:    v_cndmask_b32_e32 v2, v4, v2, vcc
 ; GFX8-NEXT:    v_cndmask_b32_e32 v3, v5, v3, vcc
@@ -4894,9 +4882,9 @@ define <2 x i64> @v_ssubsat_v2i64(<2 x i64> %lhs, <2 x i64> %rhs) {
 ; GFX9-NEXT:    v_subb_co_u32_e32 v9, vcc, v1, v5, vcc
 ; GFX9-NEXT:    v_cmp_lt_i64_e64 s[4:5], v[8:9], v[0:1]
 ; GFX9-NEXT:    v_cmp_lt_i64_e64 s[6:7], 0, v[4:5]
+; GFX9-NEXT:    v_bfrev_b32_e32 v10, 1
+; GFX9-NEXT:    v_add_co_u32_e32 v1, vcc, 0x80000000, v10
 ; GFX9-NEXT:    v_ashrrev_i32_e32 v0, 31, v9
-; GFX9-NEXT:    v_bfrev_b32_e32 v1, 1
-; GFX9-NEXT:    v_add_co_u32_e32 v1, vcc, v0, v1
 ; GFX9-NEXT:    s_xor_b64 vcc, s[6:7], s[4:5]
 ; GFX9-NEXT:    v_cndmask_b32_e32 v0, v8, v0, vcc
 ; GFX9-NEXT:    v_cndmask_b32_e32 v1, v9, v1, vcc
@@ -4904,8 +4892,8 @@ define <2 x i64> @v_ssubsat_v2i64(<2 x i64> %lhs, <2 x i64> %rhs) {
 ; GFX9-NEXT:    v_subb_co_u32_e32 v5, vcc, v3, v7, vcc
 ; GFX9-NEXT:    v_cmp_lt_i64_e64 s[4:5], v[4:5], v[2:3]
 ; GFX9-NEXT:    v_cmp_lt_i64_e64 s[6:7], 0, v[6:7]
+; GFX9-NEXT:    v_add_co_u32_e32 v3, vcc, 0x80000000, v10
 ; GFX9-NEXT:    v_ashrrev_i32_e32 v2, 31, v5
-; GFX9-NEXT:    v_add_co_u32_e32 v3, vcc, 0x80000000, v2
 ; GFX9-NEXT:    s_xor_b64 vcc, s[6:7], s[4:5]
 ; GFX9-NEXT:    v_cndmask_b32_e32 v2, v4, v2, vcc
 ; GFX9-NEXT:    v_cndmask_b32_e32 v3, v5, v3, vcc
@@ -5706,6 +5694,7 @@ define <2 x i128> @v_ssubsat_v2i128(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; GFX6-NEXT:    v_subb_u32_e32 v18, vcc, v2, v10, vcc
 ; GFX6-NEXT:    v_subb_u32_e32 v19, vcc, v3, v11, vcc
 ; GFX6-NEXT:    v_cmp_lt_u64_e32 vcc, v[16:17], v[0:1]
+; GFX6-NEXT:    v_bfrev_b32_e32 v20, 1
 ; GFX6-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
 ; GFX6-NEXT:    v_cmp_lt_i64_e32 vcc, v[18:19], v[2:3]
 ; GFX6-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
@@ -5719,8 +5708,7 @@ define <2 x i128> @v_ssubsat_v2i128(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; GFX6-NEXT:    v_cndmask_b32_e32 v1, v2, v1, vcc
 ; GFX6-NEXT:    v_xor_b32_e32 v0, v1, v0
 ; GFX6-NEXT:    v_ashrrev_i32_e32 v2, 31, v19
-; GFX6-NEXT:    v_bfrev_b32_e32 v1, 1
-; GFX6-NEXT:    v_add_i32_e32 v3, vcc, v2, v1
+; GFX6-NEXT:    v_add_i32_e32 v3, vcc, v2, v20
 ; GFX6-NEXT:    v_and_b32_e32 v0, 1, v0
 ; GFX6-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
 ; GFX6-NEXT:    v_cndmask_b32_e32 v0, v16, v2, vcc
@@ -5744,9 +5732,9 @@ define <2 x i128> @v_ssubsat_v2i128(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; GFX6-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[14:15]
 ; GFX6-NEXT:    v_cndmask_b32_e32 v5, v6, v5, vcc
 ; GFX6-NEXT:    v_xor_b32_e32 v4, v5, v4
-; GFX6-NEXT:    v_ashrrev_i32_e32 v6, 31, v11
-; GFX6-NEXT:    v_add_i32_e32 v7, vcc, 0x80000000, v6
+; GFX6-NEXT:    v_add_i32_e32 v7, vcc, 0x80000000, v20
 ; GFX6-NEXT:    v_and_b32_e32 v4, 1, v4
+; GFX6-NEXT:    v_ashrrev_i32_e32 v6, 31, v11
 ; GFX6-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v4
 ; GFX6-NEXT:    v_cndmask_b32_e32 v4, v8, v6, vcc
 ; GFX6-NEXT:    v_cndmask_b32_e32 v5, v9, v6, vcc
@@ -5762,6 +5750,7 @@ define <2 x i128> @v_ssubsat_v2i128(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; GFX8-NEXT:    v_subb_u32_e32 v18, vcc, v2, v10, vcc
 ; GFX8-NEXT:    v_subb_u32_e32 v19, vcc, v3, v11, vcc
 ; GFX8-NEXT:    v_cmp_lt_u64_e32 vcc, v[16:17], v[0:1]
+; GFX8-NEXT:    v_bfrev_b32_e32 v20, 1
 ; GFX8-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
 ; GFX8-NEXT:    v_cmp_lt_i64_e32 vcc, v[18:19], v[2:3]
 ; GFX8-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
@@ -5775,8 +5764,7 @@ define <2 x i128> @v_ssubsat_v2i128(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; GFX8-NEXT:    v_cndmask_b32_e32 v1, v2, v1, vcc
 ; GFX8-NEXT:    v_xor_b32_e32 v0, v1, v0
 ; GFX8-NEXT:    v_ashrrev_i32_e32 v2, 31, v19
-; GFX8-NEXT:    v_bfrev_b32_e32 v1, 1
-; GFX8-NEXT:    v_add_u32_e32 v3, vcc, v2, v1
+; GFX8-NEXT:    v_add_u32_e32 v3, vcc, v2, v20
 ; GFX8-NEXT:    v_and_b32_e32 v0, 1, v0
 ; GFX8-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
 ; GFX8-NEXT:    v_cndmask_b32_e32 v0, v16, v2, vcc
@@ -5800,9 +5788,9 @@ define <2 x i128> @v_ssubsat_v2i128(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; GFX8-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[14:15]
 ; GFX8-NEXT:    v_cndmask_b32_e32 v5, v6, v5, vcc
 ; GFX8-NEXT:    v_xor_b32_e32 v4, v5, v4
-; GFX8-NEXT:    v_ashrrev_i32_e32 v6, 31, v11
-; GFX8-NEXT:    v_add_u32_e32 v7, vcc, 0x80000000, v6
+; GFX8-NEXT:    v_add_u32_e32 v7, vcc, 0x80000000, v20
 ; GFX8-NEXT:    v_and_b32_e32 v4, 1, v4
+; GFX8-NEXT:    v_ashrrev_i32_e32 v6, 31, v11
 ; GFX8-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v4
 ; GFX8-NEXT:    v_cndmask_b32_e32 v4, v8, v6, vcc
 ; GFX8-NEXT:    v_cndmask_b32_e32 v5, v9, v6, vcc
@@ -5818,6 +5806,7 @@ define <2 x i128> @v_ssubsat_v2i128(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; GFX9-NEXT:    v_subb_co_u32_e32 v18, vcc, v2, v10, vcc
 ; GFX9-NEXT:    v_subb_co_u32_e32 v19, vcc, v3, v11, vcc
 ; GFX9-NEXT:    v_cmp_lt_u64_e32 vcc, v[16:17], v[0:1]
+; GFX9-NEXT:    v_bfrev_b32_e32 v20, 1
 ; GFX9-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
 ; GFX9-NEXT:    v_cmp_lt_i64_e32 vcc, v[18:19], v[2:3]
 ; GFX9-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
@@ -5831,8 +5820,7 @@ define <2 x i128> @v_ssubsat_v2i128(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; GFX9-NEXT:    v_cndmask_b32_e32 v1, v2, v1, vcc
 ; GFX9-NEXT:    v_xor_b32_e32 v0, v1, v0
 ; GFX9-NEXT:    v_ashrrev_i32_e32 v2, 31, v19
-; GFX9-NEXT:    v_bfrev_b32_e32 v1, 1
-; GFX9-NEXT:    v_add_co_u32_e32 v3, vcc, v2, v1
+; GFX9-NEXT:    v_add_co_u32_e32 v3, vcc, v2, v20
 ; GFX9-NEXT:    v_and_b32_e32 v0, 1, v0
 ; GFX9-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
 ; GFX9-NEXT:    v_cndmask_b32_e32 v0, v16, v2, vcc
@@ -5856,9 +5844,9 @@ define <2 x i128> @v_ssubsat_v2i128(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; GFX9-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[14:15]
 ; GFX9-NEXT:    v_cndmask_b32_e32 v5, v6, v5, vcc
 ; GFX9-NEXT:    v_xor_b32_e32 v4, v5, v4
-; GFX9-NEXT:    v_ashrrev_i32_e32 v6, 31, v11
-; GFX9-NEXT:    v_add_co_u32_e32 v7, vcc, 0x80000000, v6
+; GFX9-NEXT:    v_add_co_u32_e32 v7, vcc, 0x80000000, v20
 ; GFX9-NEXT:    v_and_b32_e32 v4, 1, v4
+; GFX9-NEXT:    v_ashrrev_i32_e32 v6, 31, v11
 ; GFX9-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v4
 ; GFX9-NEXT:    v_cndmask_b32_e32 v4, v8, v6, vcc
 ; GFX9-NEXT:    v_cndmask_b32_e32 v5, v9, v6, vcc
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i32.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i32.ll
index 48f05a33f03649c..482baf3249ed5bd 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i32.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i32.ll
@@ -222,10 +222,10 @@ define i32 @v_urem_i32_oddk_denom(i32 %num) {
 ; CHECK-NEXT:    v_mul_hi_u32 v1, v0, v1
 ; CHECK-NEXT:    v_mul_lo_u32 v1, v1, s4
 ; CHECK-NEXT:    v_sub_i32_e32 v0, vcc, v0, v1
-; CHECK-NEXT:    v_subrev_i32_e32 v1, vcc, s4, v0
+; CHECK-NEXT:    v_subrev_i32_e32 v1, vcc, 0x12d8fb, v0
 ; CHECK-NEXT:    v_cmp_le_u32_e32 vcc, s4, v0
 ; CHECK-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
-; CHECK-NEXT:    v_subrev_i32_e32 v1, vcc, s4, v0
+; CHECK-NEXT:    v_subrev_i32_e32 v1, vcc, 0x12d8fb, v0
 ; CHECK-NEXT:    v_cmp_le_u32_e32 vcc, s4, v0
 ; CHECK-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
 ; CHECK-NEXT:    s_setpc_b64 s[30:31]
@@ -242,6 +242,7 @@ define <2 x i32> @v_urem_v2i32_oddk_denom(<2 x i32> %num) {
 ; GISEL-NEXT:    v_cvt_f32_u32_e32 v3, 0x12d8fb
 ; GISEL-NEXT:    v_mov_b32_e32 v4, 0xffed2705
 ; GISEL-NEXT:    v_rcp_iflag_f32_e32 v3, v3
+; GISEL-NEXT:    v_sub_i32_e32 v5, vcc, 0x12d8fb, v2
 ; GISEL-NEXT:    v_mul_f32_e32 v3, 0x4f7ffffe, v3
 ; GISEL-NEXT:    v_cvt_u32_f32_e32 v3, v3
 ; GISEL-NEXT:    v_mul_lo_u32 v4, v3, v4
@@ -253,18 +254,16 @@ define <2 x i32> @v_urem_v2i32_oddk_denom(<2 x i32> %num) {
 ; GISEL-NEXT:    v_mul_lo_u32 v3, v3, v2
 ; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v4
 ; GISEL-NEXT:    v_sub_i32_e32 v1, vcc, v1, v3
-; GISEL-NEXT:    v_subrev_i32_e32 v3, vcc, s4, v0
-; GISEL-NEXT:    v_subrev_i32_e32 v4, vcc, 0x12d8fb, v1
+; GISEL-NEXT:    v_subrev_i32_e32 v3, vcc, 0x12d8fb, v0
+; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v2
+; GISEL-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
 ; GISEL-NEXT:    v_cmp_le_u32_e32 vcc, s4, v0
 ; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
-; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v2
-; GISEL-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc
-; GISEL-NEXT:    v_subrev_i32_e32 v3, vcc, s4, v0
-; GISEL-NEXT:    v_subrev_i32_e32 v4, vcc, 0x12d8fb, v1
+; GISEL-NEXT:    v_subrev_i32_e32 v3, vcc, 0x12d8fb, v0
 ; GISEL-NEXT:    v_cmp_le_u32_e32 vcc, s4, v0
 ; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
 ; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v2
-; GISEL-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
 ; GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; CGP-LABEL: v_urem_v2i32_oddk_denom:
@@ -273,25 +272,26 @@ define <2 x i32> @v_urem_v2i32_oddk_denom(<2 x i32> %num) {
 ; CGP-NEXT:    s_mov_b32 s4, 0x12d8fb
 ; CGP-NEXT:    v_rcp_iflag_f32_e32 v2, 0x4996c7d8
 ; CGP-NEXT:    s_mov_b32 s5, 0xffed2705
+; CGP-NEXT:    v_mov_b32_e32 v3, 0x12d8fb
 ; CGP-NEXT:    v_mul_f32_e32 v2, 0x4f7ffffe, v2
+; CGP-NEXT:    v_sub_i32_e32 v3, vcc, 0x12d8fb, v3
 ; CGP-NEXT:    v_cvt_u32_f32_e32 v2, v2
-; CGP-NEXT:    v_mul_lo_u32 v3, v2, s5
-; CGP-NEXT:    v_mul_hi_u32 v3, v2, v3
-; CGP-NEXT:    v_add_i32_e32 v2, vcc, v2, v3
-; CGP-NEXT:    v_mul_hi_u32 v3, v0, v2
+; CGP-NEXT:    v_mul_lo_u32 v4, v2, s5
+; CGP-NEXT:    v_mul_hi_u32 v4, v2, v4
+; CGP-NEXT:    v_add_i32_e32 v2, vcc, v2, v4
+; CGP-NEXT:    v_mul_hi_u32 v4, v0, v2
 ; CGP-NEXT:    v_mul_hi_u32 v2, v1, v2
-; CGP-NEXT:    v_mul_lo_u32 v3, v3, s4
+; CGP-NEXT:    v_mul_lo_u32 v4, v4, s4
 ; CGP-NEXT:    v_mul_lo_u32 v2, v2, s4
-; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v0, v3
+; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v0, v4
 ; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v1, v2
-; CGP-NEXT:    v_subrev_i32_e32 v2, vcc, s4, v0
-; CGP-NEXT:    v_subrev_i32_e32 v3, vcc, s4, v1
+; CGP-NEXT:    v_subrev_i32_e32 v2, vcc, 0x12d8fb, v0
+; CGP-NEXT:    v_subrev_i32_e32 v4, vcc, 0x12d8fb, v1
 ; CGP-NEXT:    v_cmp_le_u32_e32 vcc, s4, v0
 ; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
 ; CGP-NEXT:    v_cmp_le_u32_e32 vcc, s4, v1
-; CGP-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
-; CGP-NEXT:    v_subrev_i32_e32 v2, vcc, s4, v0
-; CGP-NEXT:    v_subrev_i32_e32 v3, vcc, 0x12d8fb, v1
+; CGP-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc
+; CGP-NEXT:    v_subrev_i32_e32 v2, vcc, 0x12d8fb, v0
 ; CGP-NEXT:    v_cmp_le_u32_e32 vcc, s4, v0
 ; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
 ; CGP-NEXT:    v_cmp_le_u32_e32 vcc, s4, v1
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll
index 097f6642cbc669b..00de46b168acb3d 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll
@@ -969,123 +969,123 @@ define i64 @v_urem_i64_oddk_denom(i64 %num) {
 ; CHECK-LABEL: v_urem_i64_oddk_denom:
 ; CHECK:       ; %bb.0:
 ; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CHECK-NEXT:    s_mov_b32 s4, 0x12d8fb
+; CHECK-NEXT:    s_mov_b32 s6, 0x12d8fb
 ; CHECK-NEXT:    v_mov_b32_e32 v2, 0x12d8fb
 ; CHECK-NEXT:    v_cvt_f32_u32_e32 v3, 0x12d8fb
 ; CHECK-NEXT:    v_cvt_f32_ubyte0_e32 v4, 0
-; CHECK-NEXT:    s_mov_b32 s5, 0xffed2705
+; CHECK-NEXT:    s_mov_b32 s7, 0xffed2705
 ; CHECK-NEXT:    v_mac_f32_e32 v3, 0x4f800000, v4
+; CHECK-NEXT:    v_sub_i32_e32 v4, vcc, 0x12d8fb, v2
 ; CHECK-NEXT:    v_rcp_iflag_f32_e32 v3, v3
+; CHECK-NEXT:    v_cmp_ge_u32_e64 s[4:5], v4, v2
+; CHECK-NEXT:    v_cndmask_b32_e64 v5, 0, -1, s[4:5]
 ; CHECK-NEXT:    v_mul_f32_e32 v3, 0x5f7ffffc, v3
-; CHECK-NEXT:    v_mul_f32_e32 v4, 0x2f800000, v3
-; CHECK-NEXT:    v_trunc_f32_e32 v4, v4
-; CHECK-NEXT:    v_mac_f32_e32 v3, 0xcf800000, v4
-; CHECK-NEXT:    v_cvt_u32_f32_e32 v4, v4
+; CHECK-NEXT:    v_mul_f32_e32 v6, 0x2f800000, v3
+; CHECK-NEXT:    v_trunc_f32_e32 v6, v6
+; CHECK-NEXT:    v_mac_f32_e32 v3, 0xcf800000, v6
+; CHECK-NEXT:    v_cvt_u32_f32_e32 v6, v6
 ; CHECK-NEXT:    v_cvt_u32_f32_e32 v3, v3
-; CHECK-NEXT:    v_mul_lo_u32 v5, v4, s5
-; CHECK-NEXT:    v_mul_lo_u32 v6, v3, s5
-; CHECK-NEXT:    v_mul_hi_u32 v7, s5, v3
-; CHECK-NEXT:    v_sub_i32_e32 v5, vcc, v5, v3
-; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v5, v7
-; CHECK-NEXT:    v_mul_lo_u32 v7, v4, v6
-; CHECK-NEXT:    v_mul_hi_u32 v8, v3, v6
-; CHECK-NEXT:    v_mul_hi_u32 v6, v4, v6
-; CHECK-NEXT:    v_mul_lo_u32 v9, v3, v5
-; CHECK-NEXT:    v_mul_lo_u32 v10, v4, v5
-; CHECK-NEXT:    v_mul_hi_u32 v11, v3, v5
-; CHECK-NEXT:    v_mul_hi_u32 v5, v4, v5
-; CHECK-NEXT:    v_add_i32_e32 v7, vcc, v7, v9
-; CHECK-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v10, v6
-; CHECK-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v7, vcc, v7, v8
-; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v6, v11
-; CHECK-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v7, vcc, v9, v7
-; CHECK-NEXT:    v_add_i32_e32 v8, vcc, v10, v8
-; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v6, v7
-; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v7, vcc, v8, v7
-; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v5, v7
-; CHECK-NEXT:    v_add_i32_e32 v3, vcc, v3, v6
-; CHECK-NEXT:    v_addc_u32_e32 v4, vcc, v4, v5, vcc
-; CHECK-NEXT:    v_mul_lo_u32 v5, v3, s5
-; CHECK-NEXT:    v_mul_hi_u32 v6, s5, v3
-; CHECK-NEXT:    v_mul_lo_u32 v7, v4, s5
-; CHECK-NEXT:    v_mul_lo_u32 v8, v4, v5
-; CHECK-NEXT:    v_mul_hi_u32 v9, v3, v5
-; CHECK-NEXT:    v_mul_hi_u32 v5, v4, v5
-; CHECK-NEXT:    v_sub_i32_e32 v7, vcc, v7, v3
-; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v7, v6
-; CHECK-NEXT:    v_mul_lo_u32 v7, v3, v6
-; CHECK-NEXT:    v_mul_lo_u32 v10, v4, v6
-; CHECK-NEXT:    v_mul_hi_u32 v11, v3, v6
-; CHECK-NEXT:    v_mul_hi_u32 v6, v4, v6
-; CHECK-NEXT:    v_add_i32_e32 v7, vcc, v8, v7
-; CHECK-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v10, v5
-; CHECK-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v7, vcc, v7, v9
-; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v5, v11
-; CHECK-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v7, vcc, v8, v7
-; CHECK-NEXT:    v_add_i32_e32 v8, vcc, v10, v9
-; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v5, v7
-; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v7, vcc, v8, v7
-; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v6, v7
-; CHECK-NEXT:    v_add_i32_e32 v3, vcc, v3, v5
-; CHECK-NEXT:    v_addc_u32_e32 v4, vcc, v4, v6, vcc
-; CHECK-NEXT:    v_mul_lo_u32 v5, v1, v3
-; CHECK-NEXT:    v_mul_hi_u32 v6, v0, v3
+; CHECK-NEXT:    v_mul_lo_u32 v7, v6, s7
+; CHECK-NEXT:    v_mul_lo_u32 v8, v3, s7
+; CHECK-NEXT:    v_mul_hi_u32 v9, s7, v3
+; CHECK-NEXT:    v_sub_i32_e64 v7, s[4:5], v7, v3
+; CHECK-NEXT:    v_add_i32_e64 v7, s[4:5], v7, v9
+; CHECK-NEXT:    v_mul_lo_u32 v9, v6, v8
+; CHECK-NEXT:    v_mul_hi_u32 v10, v3, v8
+; CHECK-NEXT:    v_mul_hi_u32 v8, v6, v8
+; CHECK-NEXT:    v_mul_lo_u32 v11, v3, v7
+; CHECK-NEXT:    v_mul_lo_u32 v12, v6, v7
+; CHECK-NEXT:    v_mul_hi_u32 v13, v3, v7
+; CHECK-NEXT:    v_mul_hi_u32 v7, v6, v7
+; CHECK-NEXT:    v_add_i32_e64 v9, s[4:5], v9, v11
+; CHECK-NEXT:    v_cndmask_b32_e64 v11, 0, 1, s[4:5]
+; CHECK-NEXT:    v_add_i32_e64 v8, s[4:5], v12, v8
+; CHECK-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[4:5]
+; CHECK-NEXT:    v_add_i32_e64 v9, s[4:5], v9, v10
+; CHECK-NEXT:    v_cndmask_b32_e64 v9, 0, 1, s[4:5]
+; CHECK-NEXT:    v_add_i32_e64 v8, s[4:5], v8, v13
+; CHECK-NEXT:    v_cndmask_b32_e64 v10, 0, 1, s[4:5]
+; CHECK-NEXT:    v_add_i32_e64 v9, s[4:5], v11, v9
+; CHECK-NEXT:    v_add_i32_e64 v10, s[4:5], v12, v10
+; CHECK-NEXT:    v_add_i32_e64 v8, s[4:5], v8, v9
+; CHECK-NEXT:    v_cndmask_b32_e64 v9, 0, 1, s[4:5]
+; CHECK-NEXT:    v_add_i32_e64 v9, s[4:5], v10, v9
+; CHECK-NEXT:    v_add_i32_e64 v7, s[4:5], v7, v9
+; CHECK-NEXT:    v_add_i32_e64 v3, s[4:5], v3, v8
+; CHECK-NEXT:    v_addc_u32_e64 v6, s[4:5], v6, v7, s[4:5]
+; CHECK-NEXT:    v_mul_lo_u32 v7, v3, s7
+; CHECK-NEXT:    v_mul_hi_u32 v8, s7, v3
+; CHECK-NEXT:    v_mul_lo_u32 v9, v6, s7
+; CHECK-NEXT:    v_mul_lo_u32 v10, v6, v7
+; CHECK-NEXT:    v_mul_hi_u32 v11, v3, v7
+; CHECK-NEXT:    v_mul_hi_u32 v7, v6, v7
+; CHECK-NEXT:    v_sub_i32_e64 v9, s[4:5], v9, v3
+; CHECK-NEXT:    v_add_i32_e64 v8, s[4:5], v9, v8
+; CHECK-NEXT:    v_mul_lo_u32 v9, v3, v8
+; CHECK-NEXT:    v_mul_lo_u32 v12, v6, v8
+; CHECK-NEXT:    v_mul_hi_u32 v13, v3, v8
+; CHECK-NEXT:    v_mul_hi_u32 v8, v6, v8
+; CHECK-NEXT:    v_add_i32_e64 v9, s[4:5], v10, v9
+; CHECK-NEXT:    v_cndmask_b32_e64 v10, 0, 1, s[4:5]
+; CHECK-NEXT:    v_add_i32_e64 v7, s[4:5], v12, v7
+; CHECK-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[4:5]
+; CHECK-NEXT:    v_add_i32_e64 v9, s[4:5], v9, v11
+; CHECK-NEXT:    v_cndmask_b32_e64 v9, 0, 1, s[4:5]
+; CHECK-NEXT:    v_add_i32_e64 v7, s[4:5], v7, v13
+; CHECK-NEXT:    v_cndmask_b32_e64 v11, 0, 1, s[4:5]
+; CHECK-NEXT:    v_add_i32_e64 v9, s[4:5], v10, v9
+; CHECK-NEXT:    v_add_i32_e64 v10, s[4:5], v12, v11
+; CHECK-NEXT:    v_add_i32_e64 v7, s[4:5], v7, v9
+; CHECK-NEXT:    v_cndmask_b32_e64 v9, 0, 1, s[4:5]
+; CHECK-NEXT:    v_add_i32_e64 v9, s[4:5], v10, v9
+; CHECK-NEXT:    v_add_i32_e64 v8, s[4:5], v8, v9
+; CHECK-NEXT:    v_add_i32_e64 v3, s[4:5], v3, v7
+; CHECK-NEXT:    v_addc_u32_e64 v6, s[4:5], v6, v8, s[4:5]
+; CHECK-NEXT:    v_mul_lo_u32 v7, v1, v3
+; CHECK-NEXT:    v_mul_hi_u32 v8, v0, v3
 ; CHECK-NEXT:    v_mul_hi_u32 v3, v1, v3
-; CHECK-NEXT:    v_mul_lo_u32 v7, v0, v4
-; CHECK-NEXT:    v_mul_lo_u32 v8, v1, v4
-; CHECK-NEXT:    v_mul_hi_u32 v9, v0, v4
-; CHECK-NEXT:    v_mul_hi_u32 v4, v1, v4
-; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v5, v7
-; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v3, vcc, v8, v3
-; CHECK-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v5, v6
-; CHECK-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v3, vcc, v3, v9
-; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v7, v5
-; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v8, v6
-; CHECK-NEXT:    v_add_i32_e32 v3, vcc, v3, v5
-; CHECK-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v6, v5
-; CHECK-NEXT:    v_mul_lo_u32 v6, v3, s4
-; CHECK-NEXT:    v_mul_hi_u32 v3, s4, v3
-; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v4, v5
-; CHECK-NEXT:    v_mul_lo_u32 v4, v4, s4
-; CHECK-NEXT:    v_add_i32_e32 v3, vcc, v4, v3
-; CHECK-NEXT:    v_sub_i32_e64 v0, s[4:5], v0, v6
-; CHECK-NEXT:    v_subb_u32_e64 v4, vcc, v1, v3, s[4:5]
-; CHECK-NEXT:    v_sub_i32_e32 v1, vcc, v1, v3
-; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v2
-; CHECK-NEXT:    v_cndmask_b32_e64 v3, 0, -1, vcc
-; CHECK-NEXT:    v_sub_i32_e32 v5, vcc, v0, v2
-; CHECK-NEXT:    v_cmp_eq_u32_e64 s[6:7], 0, v4
+; CHECK-NEXT:    v_mul_lo_u32 v9, v0, v6
+; CHECK-NEXT:    v_mul_lo_u32 v10, v1, v6
+; CHECK-NEXT:    v_mul_hi_u32 v11, v0, v6
+; CHECK-NEXT:    v_mul_hi_u32 v6, v1, v6
+; CHECK-NEXT:    v_add_i32_e64 v7, s[4:5], v7, v9
+; CHECK-NEXT:    v_cndmask_b32_e64 v9, 0, 1, s[4:5]
+; CHECK-NEXT:    v_add_i32_e64 v3, s[4:5], v10, v3
+; CHECK-NEXT:    v_cndmask_b32_e64 v10, 0, 1, s[4:5]
+; CHECK-NEXT:    v_add_i32_e64 v7, s[4:5], v7, v8
+; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, 1, s[4:5]
+; CHECK-NEXT:    v_add_i32_e64 v3, s[4:5], v3, v11
+; CHECK-NEXT:    v_cndmask_b32_e64 v8, 0, 1, s[4:5]
+; CHECK-NEXT:    v_add_i32_e64 v7, s[4:5], v9, v7
+; CHECK-NEXT:    v_add_i32_e64 v8, s[4:5], v10, v8
+; CHECK-NEXT:    v_add_i32_e64 v3, s[4:5], v3, v7
+; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, 1, s[4:5]
+; CHECK-NEXT:    v_add_i32_e64 v7, s[4:5], v8, v7
+; CHECK-NEXT:    v_mul_lo_u32 v8, v3, s6
+; CHECK-NEXT:    v_mul_hi_u32 v3, s6, v3
+; CHECK-NEXT:    v_add_i32_e64 v6, s[4:5], v6, v7
+; CHECK-NEXT:    v_mul_lo_u32 v6, v6, s6
+; CHECK-NEXT:    v_add_i32_e64 v3, s[4:5], v6, v3
+; CHECK-NEXT:    v_sub_i32_e64 v0, s[4:5], v0, v8
+; CHECK-NEXT:    v_subb_u32_e64 v6, s[6:7], v1, v3, s[4:5]
+; CHECK-NEXT:    v_sub_i32_e64 v1, s[6:7], v1, v3
+; CHECK-NEXT:    v_cmp_ge_u32_e64 s[6:7], v0, v2
+; CHECK-NEXT:    v_cndmask_b32_e64 v3, 0, -1, s[6:7]
+; CHECK-NEXT:    v_cmp_eq_u32_e64 s[6:7], 0, v6
 ; CHECK-NEXT:    v_cndmask_b32_e64 v3, -1, v3, s[6:7]
 ; CHECK-NEXT:    v_subbrev_u32_e64 v1, s[4:5], 0, v1, s[4:5]
-; CHECK-NEXT:    v_cmp_ge_u32_e64 s[4:5], v5, v2
-; CHECK-NEXT:    v_cndmask_b32_e64 v2, 0, -1, s[4:5]
 ; CHECK-NEXT:    s_mov_b64 s[4:5], vcc
-; CHECK-NEXT:    v_subrev_i32_e32 v6, vcc, 0x12d8fb, v5
+; CHECK-NEXT:    v_sub_i32_e32 v2, vcc, 0x12d8fb, v2
 ; CHECK-NEXT:    v_subbrev_u32_e64 v1, s[4:5], 0, v1, s[4:5]
 ; CHECK-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v1
-; CHECK-NEXT:    v_cndmask_b32_e64 v2, -1, v2, s[4:5]
+; CHECK-NEXT:    v_cndmask_b32_e64 v5, -1, v5, s[4:5]
 ; CHECK-NEXT:    v_subbrev_u32_e32 v7, vcc, 0, v1, vcc
-; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v2
-; CHECK-NEXT:    v_cndmask_b32_e32 v2, v5, v6, vcc
+; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v5
+; CHECK-NEXT:    v_cndmask_b32_e32 v2, v4, v2, vcc
 ; CHECK-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
 ; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v3
 ; CHECK-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
-; CHECK-NEXT:    v_cndmask_b32_e32 v1, v4, v1, vcc
+; CHECK-NEXT:    v_cndmask_b32_e32 v1, v6, v1, vcc
 ; CHECK-NEXT:    s_setpc_b64 s[30:31]
   %result = urem i64 %num, 1235195
   ret i64 %result
@@ -1095,217 +1095,217 @@ define <2 x i64> @v_urem_v2i64_oddk_denom(<2 x i64> %num) {
 ; GISEL-LABEL: v_urem_v2i64_oddk_denom:
 ; GISEL:       ; %bb.0:
 ; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GISEL-NEXT:    s_mov_b32 s4, 0x12d8fb
+; GISEL-NEXT:    s_mov_b32 s6, 0x12d8fb
 ; GISEL-NEXT:    v_mov_b32_e32 v4, 0x12d8fb
-; GISEL-NEXT:    v_cvt_f32_u32_e32 v5, 0x12d8fb
-; GISEL-NEXT:    v_cvt_f32_ubyte0_e32 v6, 0
-; GISEL-NEXT:    s_sub_u32 s5, 0, 0x12d8fb
-; GISEL-NEXT:    v_madmk_f32 v7, v6, 0x4f800000, v5
-; GISEL-NEXT:    s_subb_u32 s6, 0, 0
-; GISEL-NEXT:    v_mac_f32_e32 v5, 0x4f800000, v6
-; GISEL-NEXT:    v_rcp_iflag_f32_e32 v6, v7
-; GISEL-NEXT:    v_rcp_iflag_f32_e32 v5, v5
+; GISEL-NEXT:    v_cvt_f32_u32_e32 v6, 0x12d8fb
+; GISEL-NEXT:    v_cvt_f32_ubyte0_e32 v5, 0
 ; GISEL-NEXT:    s_sub_u32 s7, 0, 0x12d8fb
-; GISEL-NEXT:    v_mul_f32_e32 v6, 0x5f7ffffc, v6
-; GISEL-NEXT:    v_mul_f32_e32 v5, 0x5f7ffffc, v5
+; GISEL-NEXT:    v_madmk_f32 v7, v5, 0x4f800000, v6
 ; GISEL-NEXT:    s_subb_u32 s8, 0, 0
-; GISEL-NEXT:    v_mul_f32_e32 v7, 0x2f800000, v6
-; GISEL-NEXT:    v_mul_f32_e32 v8, 0x2f800000, v5
-; GISEL-NEXT:    v_trunc_f32_e32 v7, v7
-; GISEL-NEXT:    v_trunc_f32_e32 v8, v8
-; GISEL-NEXT:    v_mac_f32_e32 v6, 0xcf800000, v7
+; GISEL-NEXT:    v_mac_f32_e32 v6, 0x4f800000, v5
+; GISEL-NEXT:    v_sub_i32_e32 v5, vcc, 0x12d8fb, v4
+; GISEL-NEXT:    v_rcp_iflag_f32_e32 v7, v7
+; GISEL-NEXT:    v_rcp_iflag_f32_e32 v8, v6
+; GISEL-NEXT:    s_sub_u32 s9, 0, 0x12d8fb
+; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v5, v4
+; GISEL-NEXT:    v_cndmask_b32_e64 v6, 0, -1, s[4:5]
+; GISEL-NEXT:    v_mul_f32_e32 v7, 0x5f7ffffc, v7
+; GISEL-NEXT:    v_mul_f32_e32 v8, 0x5f7ffffc, v8
+; GISEL-NEXT:    s_subb_u32 s10, 0, 0
+; GISEL-NEXT:    v_mul_f32_e32 v9, 0x2f800000, v7
+; GISEL-NEXT:    v_mul_f32_e32 v10, 0x2f800000, v8
+; GISEL-NEXT:    v_trunc_f32_e32 v9, v9
+; GISEL-NEXT:    v_trunc_f32_e32 v10, v10
+; GISEL-NEXT:    v_mac_f32_e32 v7, 0xcf800000, v9
+; GISEL-NEXT:    v_cvt_u32_f32_e32 v9, v9
+; GISEL-NEXT:    v_mac_f32_e32 v8, 0xcf800000, v10
+; GISEL-NEXT:    v_cvt_u32_f32_e32 v10, v10
 ; GISEL-NEXT:    v_cvt_u32_f32_e32 v7, v7
-; GISEL-NEXT:    v_mac_f32_e32 v5, 0xcf800000, v8
+; GISEL-NEXT:    v_mul_lo_u32 v11, s7, v9
 ; GISEL-NEXT:    v_cvt_u32_f32_e32 v8, v8
-; GISEL-NEXT:    v_cvt_u32_f32_e32 v6, v6
-; GISEL-NEXT:    v_mul_lo_u32 v9, s5, v7
-; GISEL-NEXT:    v_cvt_u32_f32_e32 v5, v5
-; GISEL-NEXT:    v_mul_lo_u32 v10, s7, v8
-; GISEL-NEXT:    v_mul_lo_u32 v11, s5, v6
-; GISEL-NEXT:    v_mul_lo_u32 v12, s6, v6
-; GISEL-NEXT:    v_mul_hi_u32 v13, s5, v6
-; GISEL-NEXT:    v_mul_lo_u32 v14, s7, v5
-; GISEL-NEXT:    v_mul_lo_u32 v15, s8, v5
-; GISEL-NEXT:    v_mul_hi_u32 v16, s7, v5
-; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v12, v9
-; GISEL-NEXT:    v_mul_lo_u32 v12, v7, v11
-; GISEL-NEXT:    v_mul_hi_u32 v17, v6, v11
-; GISEL-NEXT:    v_mul_hi_u32 v11, v7, v11
-; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v15, v10
-; GISEL-NEXT:    v_mul_lo_u32 v15, v8, v14
-; GISEL-NEXT:    v_mul_hi_u32 v18, v5, v14
-; GISEL-NEXT:    v_mul_hi_u32 v14, v8, v14
-; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v13
-; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v10, v16
-; GISEL-NEXT:    v_mul_lo_u32 v13, v6, v9
-; GISEL-NEXT:    v_mul_lo_u32 v16, v7, v9
-; GISEL-NEXT:    v_mul_hi_u32 v19, v6, v9
-; GISEL-NEXT:    v_mul_hi_u32 v9, v7, v9
-; GISEL-NEXT:    v_mul_lo_u32 v20, v5, v10
-; GISEL-NEXT:    v_mul_lo_u32 v21, v8, v10
-; GISEL-NEXT:    v_mul_hi_u32 v22, v5, v10
-; GISEL-NEXT:    v_mul_hi_u32 v10, v8, v10
-; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v12, v13
-; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v16, v11
-; GISEL-NEXT:    v_cndmask_b32_e64 v16, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v15, vcc, v15, v20
-; GISEL-NEXT:    v_cndmask_b32_e64 v20, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v14, vcc, v21, v14
-; GISEL-NEXT:    v_cndmask_b32_e64 v21, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v12, v17
-; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v11, v19
-; GISEL-NEXT:    v_cndmask_b32_e64 v17, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v15, vcc, v15, v18
-; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v14, vcc, v14, v22
-; GISEL-NEXT:    v_cndmask_b32_e64 v18, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v13, v12
-; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v16, v17
-; GISEL-NEXT:    v_add_i32_e32 v15, vcc, v20, v15
-; GISEL-NEXT:    v_add_i32_e32 v16, vcc, v21, v18
-; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v11, v12
-; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v14, vcc, v14, v15
-; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v13, v12
-; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v16, v15
-; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v12
-; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v10, v13
-; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v6, v11
-; GISEL-NEXT:    v_addc_u32_e32 v7, vcc, v7, v9, vcc
-; GISEL-NEXT:    v_mul_lo_u32 v9, s5, v6
-; GISEL-NEXT:    v_mul_lo_u32 v11, s6, v6
-; GISEL-NEXT:    v_mul_hi_u32 v12, s5, v6
-; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v14
-; GISEL-NEXT:    v_addc_u32_e32 v8, vcc, v8, v10, vcc
-; GISEL-NEXT:    v_mul_lo_u32 v10, s7, v5
-; GISEL-NEXT:    v_mul_lo_u32 v13, s8, v5
-; GISEL-NEXT:    v_mul_hi_u32 v14, s7, v5
-; GISEL-NEXT:    v_mul_lo_u32 v15, s5, v7
-; GISEL-NEXT:    v_mul_lo_u32 v16, v7, v9
-; GISEL-NEXT:    v_mul_hi_u32 v17, v6, v9
-; GISEL-NEXT:    v_mul_hi_u32 v9, v7, v9
-; GISEL-NEXT:    v_mul_lo_u32 v18, s7, v8
-; GISEL-NEXT:    v_mul_lo_u32 v19, v8, v10
-; GISEL-NEXT:    v_mul_hi_u32 v20, v5, v10
-; GISEL-NEXT:    v_mul_hi_u32 v10, v8, v10
-; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v11, v15
-; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v13, v18
-; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v11, v12
-; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v13, v14
-; GISEL-NEXT:    v_mul_lo_u32 v13, v6, v11
-; GISEL-NEXT:    v_mul_lo_u32 v14, v7, v11
-; GISEL-NEXT:    v_mul_hi_u32 v15, v6, v11
-; GISEL-NEXT:    v_mul_hi_u32 v11, v7, v11
-; GISEL-NEXT:    v_mul_lo_u32 v18, v5, v12
-; GISEL-NEXT:    v_mul_lo_u32 v21, v8, v12
-; GISEL-NEXT:    v_mul_hi_u32 v22, v5, v12
-; GISEL-NEXT:    v_mul_hi_u32 v12, v8, v12
-; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v16, v13
-; GISEL-NEXT:    v_cndmask_b32_e64 v16, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v14, v9
-; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v18, vcc, v19, v18
-; GISEL-NEXT:    v_cndmask_b32_e64 v19, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v21, v10
-; GISEL-NEXT:    v_cndmask_b32_e64 v21, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v13, v17
-; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v15
-; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v17, vcc, v18, v20
-; GISEL-NEXT:    v_cndmask_b32_e64 v17, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v10, v22
-; GISEL-NEXT:    v_cndmask_b32_e64 v18, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v16, v13
-; GISEL-NEXT:    v_add_i32_e32 v14, vcc, v14, v15
-; GISEL-NEXT:    v_add_i32_e32 v15, vcc, v19, v17
-; GISEL-NEXT:    v_add_i32_e32 v16, vcc, v21, v18
-; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v13
-; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v10, v15
-; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v14, v13
-; GISEL-NEXT:    v_add_i32_e32 v14, vcc, v16, v15
-; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v11, v13
-; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v12, v14
-; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v6, v9
-; GISEL-NEXT:    v_addc_u32_e32 v7, vcc, v7, v11, vcc
-; GISEL-NEXT:    v_mul_lo_u32 v9, v1, v6
-; GISEL-NEXT:    v_mul_hi_u32 v11, v0, v6
-; GISEL-NEXT:    v_mul_hi_u32 v6, v1, v6
-; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v10
-; GISEL-NEXT:    v_addc_u32_e32 v8, vcc, v8, v12, vcc
-; GISEL-NEXT:    v_mul_lo_u32 v10, v3, v5
-; GISEL-NEXT:    v_mul_hi_u32 v12, v2, v5
-; GISEL-NEXT:    v_mul_hi_u32 v5, v3, v5
-; GISEL-NEXT:    v_mul_lo_u32 v13, v0, v7
-; GISEL-NEXT:    v_mul_lo_u32 v14, v1, v7
-; GISEL-NEXT:    v_mul_hi_u32 v15, v0, v7
+; GISEL-NEXT:    v_mul_lo_u32 v12, s9, v10
+; GISEL-NEXT:    v_mul_lo_u32 v13, s7, v7
+; GISEL-NEXT:    v_mul_lo_u32 v14, s8, v7
+; GISEL-NEXT:    v_mul_hi_u32 v15, s7, v7
+; GISEL-NEXT:    v_mul_lo_u32 v16, s9, v8
+; GISEL-NEXT:    v_mul_lo_u32 v17, s10, v8
+; GISEL-NEXT:    v_mul_hi_u32 v18, s9, v8
+; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v14, v11
+; GISEL-NEXT:    v_mul_lo_u32 v14, v9, v13
+; GISEL-NEXT:    v_mul_hi_u32 v19, v7, v13
+; GISEL-NEXT:    v_mul_hi_u32 v13, v9, v13
+; GISEL-NEXT:    v_add_i32_e64 v12, s[4:5], v17, v12
+; GISEL-NEXT:    v_mul_lo_u32 v17, v10, v16
+; GISEL-NEXT:    v_mul_hi_u32 v20, v8, v16
+; GISEL-NEXT:    v_mul_hi_u32 v16, v10, v16
+; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v15
+; GISEL-NEXT:    v_add_i32_e64 v12, s[4:5], v12, v18
+; GISEL-NEXT:    v_mul_lo_u32 v15, v7, v11
+; GISEL-NEXT:    v_mul_lo_u32 v18, v9, v11
+; GISEL-NEXT:    v_mul_hi_u32 v21, v7, v11
+; GISEL-NEXT:    v_mul_hi_u32 v11, v9, v11
+; GISEL-NEXT:    v_mul_lo_u32 v22, v8, v12
+; GISEL-NEXT:    v_mul_lo_u32 v23, v10, v12
+; GISEL-NEXT:    v_mul_hi_u32 v24, v8, v12
+; GISEL-NEXT:    v_mul_hi_u32 v12, v10, v12
+; GISEL-NEXT:    v_add_i32_e64 v14, s[4:5], v14, v15
+; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v13, s[4:5], v18, v13
+; GISEL-NEXT:    v_cndmask_b32_e64 v18, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v17, s[4:5], v17, v22
+; GISEL-NEXT:    v_cndmask_b32_e64 v22, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v16, s[4:5], v23, v16
+; GISEL-NEXT:    v_cndmask_b32_e64 v23, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v14, s[4:5], v14, v19
+; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v13, s[4:5], v13, v21
+; GISEL-NEXT:    v_cndmask_b32_e64 v19, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v17, s[4:5], v17, v20
+; GISEL-NEXT:    v_cndmask_b32_e64 v17, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v16, s[4:5], v16, v24
+; GISEL-NEXT:    v_cndmask_b32_e64 v20, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v14, s[4:5], v15, v14
+; GISEL-NEXT:    v_add_i32_e64 v15, s[4:5], v18, v19
+; GISEL-NEXT:    v_add_i32_e64 v17, s[4:5], v22, v17
+; GISEL-NEXT:    v_add_i32_e64 v18, s[4:5], v23, v20
+; GISEL-NEXT:    v_add_i32_e64 v13, s[4:5], v13, v14
+; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v16, s[4:5], v16, v17
+; GISEL-NEXT:    v_cndmask_b32_e64 v17, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v14, s[4:5], v15, v14
+; GISEL-NEXT:    v_add_i32_e64 v15, s[4:5], v18, v17
+; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v14
+; GISEL-NEXT:    v_add_i32_e64 v12, s[4:5], v12, v15
+; GISEL-NEXT:    v_add_i32_e64 v7, s[4:5], v7, v13
+; GISEL-NEXT:    v_addc_u32_e64 v9, s[4:5], v9, v11, s[4:5]
+; GISEL-NEXT:    v_mul_lo_u32 v11, s7, v7
+; GISEL-NEXT:    v_mul_lo_u32 v13, s8, v7
+; GISEL-NEXT:    v_mul_hi_u32 v14, s7, v7
+; GISEL-NEXT:    v_add_i32_e64 v8, s[4:5], v8, v16
+; GISEL-NEXT:    v_addc_u32_e64 v10, s[4:5], v10, v12, s[4:5]
+; GISEL-NEXT:    v_mul_lo_u32 v12, s9, v8
+; GISEL-NEXT:    v_mul_lo_u32 v15, s10, v8
+; GISEL-NEXT:    v_mul_hi_u32 v16, s9, v8
+; GISEL-NEXT:    v_mul_lo_u32 v17, s7, v9
+; GISEL-NEXT:    v_mul_lo_u32 v18, v9, v11
+; GISEL-NEXT:    v_mul_hi_u32 v19, v7, v11
+; GISEL-NEXT:    v_mul_hi_u32 v11, v9, v11
+; GISEL-NEXT:    v_mul_lo_u32 v20, s9, v10
+; GISEL-NEXT:    v_mul_lo_u32 v21, v10, v12
+; GISEL-NEXT:    v_mul_hi_u32 v22, v8, v12
+; GISEL-NEXT:    v_mul_hi_u32 v12, v10, v12
+; GISEL-NEXT:    v_add_i32_e64 v13, s[4:5], v13, v17
+; GISEL-NEXT:    v_add_i32_e64 v15, s[4:5], v15, v20
+; GISEL-NEXT:    v_add_i32_e64 v13, s[4:5], v13, v14
+; GISEL-NEXT:    v_add_i32_e64 v14, s[4:5], v15, v16
+; GISEL-NEXT:    v_mul_lo_u32 v15, v7, v13
+; GISEL-NEXT:    v_mul_lo_u32 v16, v9, v13
+; GISEL-NEXT:    v_mul_hi_u32 v17, v7, v13
+; GISEL-NEXT:    v_mul_hi_u32 v13, v9, v13
+; GISEL-NEXT:    v_mul_lo_u32 v20, v8, v14
+; GISEL-NEXT:    v_mul_lo_u32 v23, v10, v14
+; GISEL-NEXT:    v_mul_hi_u32 v24, v8, v14
+; GISEL-NEXT:    v_mul_hi_u32 v14, v10, v14
+; GISEL-NEXT:    v_add_i32_e64 v15, s[4:5], v18, v15
+; GISEL-NEXT:    v_cndmask_b32_e64 v18, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v16, v11
+; GISEL-NEXT:    v_cndmask_b32_e64 v16, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v20, s[4:5], v21, v20
+; GISEL-NEXT:    v_cndmask_b32_e64 v21, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v12, s[4:5], v23, v12
+; GISEL-NEXT:    v_cndmask_b32_e64 v23, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v15, s[4:5], v15, v19
+; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v17
+; GISEL-NEXT:    v_cndmask_b32_e64 v17, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v19, s[4:5], v20, v22
+; GISEL-NEXT:    v_cndmask_b32_e64 v19, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v12, s[4:5], v12, v24
+; GISEL-NEXT:    v_cndmask_b32_e64 v20, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v15, s[4:5], v18, v15
+; GISEL-NEXT:    v_add_i32_e64 v16, s[4:5], v16, v17
+; GISEL-NEXT:    v_add_i32_e64 v17, s[4:5], v21, v19
+; GISEL-NEXT:    v_add_i32_e64 v18, s[4:5], v23, v20
+; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v15
+; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v12, s[4:5], v12, v17
+; GISEL-NEXT:    v_cndmask_b32_e64 v17, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v15, s[4:5], v16, v15
+; GISEL-NEXT:    v_add_i32_e64 v16, s[4:5], v18, v17
+; GISEL-NEXT:    v_add_i32_e64 v13, s[4:5], v13, v15
+; GISEL-NEXT:    v_add_i32_e64 v14, s[4:5], v14, v16
+; GISEL-NEXT:    v_add_i32_e64 v7, s[4:5], v7, v11
+; GISEL-NEXT:    v_addc_u32_e64 v9, s[4:5], v9, v13, s[4:5]
+; GISEL-NEXT:    v_mul_lo_u32 v11, v1, v7
+; GISEL-NEXT:    v_mul_hi_u32 v13, v0, v7
 ; GISEL-NEXT:    v_mul_hi_u32 v7, v1, v7
-; GISEL-NEXT:    v_mul_lo_u32 v16, v2, v8
-; GISEL-NEXT:    v_mul_lo_u32 v17, v3, v8
-; GISEL-NEXT:    v_mul_hi_u32 v18, v2, v8
+; GISEL-NEXT:    v_add_i32_e64 v8, s[4:5], v8, v12
+; GISEL-NEXT:    v_addc_u32_e64 v10, s[4:5], v10, v14, s[4:5]
+; GISEL-NEXT:    v_mul_lo_u32 v12, v3, v8
+; GISEL-NEXT:    v_mul_hi_u32 v14, v2, v8
 ; GISEL-NEXT:    v_mul_hi_u32 v8, v3, v8
-; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v13
-; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v14, v6
-; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v10, v16
-; GISEL-NEXT:    v_cndmask_b32_e64 v16, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v17, v5
-; GISEL-NEXT:    v_cndmask_b32_e64 v17, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v11
-; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v6, v15
-; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v10, v12
-; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v18
-; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v13, v9
-; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v14, v11
-; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v16, v10
-; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v17, v12
-; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v6, v9
-; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v10
-; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v11, v9
-; GISEL-NEXT:    v_mul_lo_u32 v11, v6, s4
-; GISEL-NEXT:    v_mul_hi_u32 v6, s4, v6
-; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v12, v10
-; GISEL-NEXT:    v_mul_lo_u32 v12, v5, s4
-; GISEL-NEXT:    v_mul_hi_u32 v5, s4, v5
-; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v7, v9
-; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v10
-; GISEL-NEXT:    v_mul_lo_u32 v7, v7, s4
-; GISEL-NEXT:    v_mul_lo_u32 v8, v8, s4
-; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v7, v6
-; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v8, v5
-; GISEL-NEXT:    v_sub_i32_e64 v0, s[4:5], v0, v11
-; GISEL-NEXT:    v_subb_u32_e64 v7, vcc, v1, v6, s[4:5]
-; GISEL-NEXT:    v_sub_i32_e32 v1, vcc, v1, v6
-; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v4
-; GISEL-NEXT:    v_cndmask_b32_e64 v6, 0, -1, vcc
-; GISEL-NEXT:    v_sub_i32_e64 v2, s[6:7], v2, v12
-; GISEL-NEXT:    v_subb_u32_e64 v8, vcc, v3, v5, s[6:7]
-; GISEL-NEXT:    v_sub_i32_e32 v3, vcc, v3, v5
-; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v2, v4
-; GISEL-NEXT:    v_cndmask_b32_e64 v5, 0, -1, vcc
-; GISEL-NEXT:    v_sub_i32_e32 v9, vcc, v2, v4
-; GISEL-NEXT:    v_cmp_eq_u32_e64 s[8:9], 0, v7
-; GISEL-NEXT:    v_cndmask_b32_e64 v6, -1, v6, s[8:9]
+; GISEL-NEXT:    v_mul_lo_u32 v15, v0, v9
+; GISEL-NEXT:    v_mul_lo_u32 v16, v1, v9
+; GISEL-NEXT:    v_mul_hi_u32 v17, v0, v9
+; GISEL-NEXT:    v_mul_hi_u32 v9, v1, v9
+; GISEL-NEXT:    v_mul_lo_u32 v18, v2, v10
+; GISEL-NEXT:    v_mul_lo_u32 v19, v3, v10
+; GISEL-NEXT:    v_mul_hi_u32 v20, v2, v10
+; GISEL-NEXT:    v_mul_hi_u32 v10, v3, v10
+; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v15
+; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v7, s[4:5], v16, v7
+; GISEL-NEXT:    v_cndmask_b32_e64 v16, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v12, s[4:5], v12, v18
+; GISEL-NEXT:    v_cndmask_b32_e64 v18, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v8, s[4:5], v19, v8
+; GISEL-NEXT:    v_cndmask_b32_e64 v19, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v13
+; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v7, s[4:5], v7, v17
+; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v12, s[4:5], v12, v14
+; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v8, s[4:5], v8, v20
+; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v15, v11
+; GISEL-NEXT:    v_add_i32_e64 v13, s[4:5], v16, v13
+; GISEL-NEXT:    v_add_i32_e64 v12, s[4:5], v18, v12
+; GISEL-NEXT:    v_add_i32_e64 v14, s[4:5], v19, v14
+; GISEL-NEXT:    v_add_i32_e64 v7, s[4:5], v7, v11
+; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v8, s[4:5], v8, v12
+; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v13, v11
+; GISEL-NEXT:    v_mul_lo_u32 v13, v7, s6
+; GISEL-NEXT:    v_mul_hi_u32 v7, s6, v7
+; GISEL-NEXT:    v_add_i32_e64 v12, s[4:5], v14, v12
+; GISEL-NEXT:    v_mul_lo_u32 v14, v8, s6
+; GISEL-NEXT:    v_mul_hi_u32 v8, s6, v8
+; GISEL-NEXT:    v_add_i32_e64 v9, s[4:5], v9, v11
+; GISEL-NEXT:    v_add_i32_e64 v10, s[4:5], v10, v12
+; GISEL-NEXT:    v_mul_lo_u32 v9, v9, s6
+; GISEL-NEXT:    v_mul_lo_u32 v10, v10, s6
+; GISEL-NEXT:    v_add_i32_e64 v7, s[4:5], v9, v7
+; GISEL-NEXT:    v_add_i32_e64 v8, s[4:5], v10, v8
+; GISEL-NEXT:    v_sub_i32_e64 v0, s[4:5], v0, v13
+; GISEL-NEXT:    v_subb_u32_e64 v9, s[6:7], v1, v7, s[4:5]
+; GISEL-NEXT:    v_sub_i32_e64 v1, s[6:7], v1, v7
+; GISEL-NEXT:    v_cmp_ge_u32_e64 s[6:7], v0, v4
+; GISEL-NEXT:    v_cndmask_b32_e64 v7, 0, -1, s[6:7]
+; GISEL-NEXT:    v_sub_i32_e64 v2, s[6:7], v2, v14
+; GISEL-NEXT:    v_subb_u32_e64 v10, s[8:9], v3, v8, s[6:7]
+; GISEL-NEXT:    v_sub_i32_e64 v3, s[8:9], v3, v8
+; GISEL-NEXT:    v_cmp_ge_u32_e64 s[8:9], v2, v4
+; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, -1, s[8:9]
+; GISEL-NEXT:    v_cmp_eq_u32_e64 s[8:9], 0, v9
+; GISEL-NEXT:    v_cndmask_b32_e64 v7, -1, v7, s[8:9]
 ; GISEL-NEXT:    v_subbrev_u32_e64 v1, s[4:5], 0, v1, s[4:5]
-; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v8
-; GISEL-NEXT:    v_cndmask_b32_e64 v5, -1, v5, s[4:5]
+; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v10
+; GISEL-NEXT:    v_cndmask_b32_e64 v8, -1, v8, s[4:5]
 ; GISEL-NEXT:    v_subbrev_u32_e64 v3, s[4:5], 0, v3, s[6:7]
-; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v9, v4
-; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, -1, s[4:5]
 ; GISEL-NEXT:    s_mov_b64 s[4:5], vcc
-; GISEL-NEXT:    v_subrev_i32_e32 v11, vcc, 0x12d8fb, v9
+; GISEL-NEXT:    v_sub_i32_e32 v11, vcc, 0x12d8fb, v4
 ; GISEL-NEXT:    v_sub_i32_e64 v12, s[6:7], v0, v4
 ; GISEL-NEXT:    v_subbrev_u32_e64 v1, s[6:7], 0, v1, s[6:7]
 ; GISEL-NEXT:    v_cmp_ge_u32_e64 s[6:7], v12, v4
@@ -1316,231 +1316,231 @@ define <2 x i64> @v_urem_v2i64_oddk_denom(<2 x i64> %num) {
 ; GISEL-NEXT:    v_sub_i32_e64 v4, s[4:5], v12, v4
 ; GISEL-NEXT:    v_subbrev_u32_e64 v14, s[4:5], 0, v1, s[4:5]
 ; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v3
-; GISEL-NEXT:    v_cndmask_b32_e64 v10, -1, v10, s[4:5]
+; GISEL-NEXT:    v_cndmask_b32_e64 v6, -1, v6, s[4:5]
 ; GISEL-NEXT:    v_subbrev_u32_e32 v15, vcc, 0, v3, vcc
 ; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v13
 ; GISEL-NEXT:    v_cndmask_b32_e32 v4, v12, v4, vcc
-; GISEL-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, v10
-; GISEL-NEXT:    v_cndmask_b32_e64 v9, v9, v11, s[4:5]
+; GISEL-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, v6
+; GISEL-NEXT:    v_cndmask_b32_e64 v5, v5, v11, s[4:5]
 ; GISEL-NEXT:    v_cndmask_b32_e32 v1, v1, v14, vcc
-; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v6
+; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v7
 ; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
 ; GISEL-NEXT:    v_cndmask_b32_e64 v3, v3, v15, s[4:5]
-; GISEL-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, v5
-; GISEL-NEXT:    v_cndmask_b32_e64 v2, v2, v9, s[4:5]
-; GISEL-NEXT:    v_cndmask_b32_e32 v1, v7, v1, vcc
-; GISEL-NEXT:    v_cndmask_b32_e64 v3, v8, v3, s[4:5]
+; GISEL-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, v8
+; GISEL-NEXT:    v_cndmask_b32_e64 v2, v2, v5, s[4:5]
+; GISEL-NEXT:    v_cndmask_b32_e32 v1, v9, v1, vcc
+; GISEL-NEXT:    v_cndmask_b32_e64 v3, v10, v3, s[4:5]
 ; GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; CGP-LABEL: v_urem_v2i64_oddk_denom:
 ; CGP:       ; %bb.0:
 ; CGP-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CGP-NEXT:    s_mov_b32 s4, 0x12d8fb
+; CGP-NEXT:    s_mov_b32 s6, 0x12d8fb
 ; CGP-NEXT:    v_mov_b32_e32 v4, 0x12d8fb
-; CGP-NEXT:    v_cvt_f32_u32_e32 v5, 0x12d8fb
-; CGP-NEXT:    v_cvt_f32_ubyte0_e32 v6, 0
-; CGP-NEXT:    s_mov_b32 s5, 0xffed2705
+; CGP-NEXT:    v_cvt_f32_u32_e32 v6, 0x12d8fb
+; CGP-NEXT:    v_cvt_f32_ubyte0_e32 v5, 0
+; CGP-NEXT:    s_mov_b32 s7, 0xffed2705
 ; CGP-NEXT:    v_cvt_f32_u32_e32 v7, 0x12d8fb
 ; CGP-NEXT:    v_cvt_f32_ubyte0_e32 v8, 0
-; CGP-NEXT:    v_mac_f32_e32 v5, 0x4f800000, v6
+; CGP-NEXT:    v_mac_f32_e32 v6, 0x4f800000, v5
 ; CGP-NEXT:    v_mac_f32_e32 v7, 0x4f800000, v8
-; CGP-NEXT:    v_rcp_iflag_f32_e32 v5, v5
-; CGP-NEXT:    v_rcp_iflag_f32_e32 v6, v7
-; CGP-NEXT:    v_mul_f32_e32 v5, 0x5f7ffffc, v5
-; CGP-NEXT:    v_mul_f32_e32 v6, 0x5f7ffffc, v6
-; CGP-NEXT:    v_mul_f32_e32 v7, 0x2f800000, v5
-; CGP-NEXT:    v_mul_f32_e32 v8, 0x2f800000, v6
-; CGP-NEXT:    v_trunc_f32_e32 v7, v7
-; CGP-NEXT:    v_trunc_f32_e32 v8, v8
-; CGP-NEXT:    v_mac_f32_e32 v5, 0xcf800000, v7
-; CGP-NEXT:    v_cvt_u32_f32_e32 v7, v7
-; CGP-NEXT:    v_mac_f32_e32 v6, 0xcf800000, v8
+; CGP-NEXT:    v_sub_i32_e32 v5, vcc, 0x12d8fb, v4
+; CGP-NEXT:    v_rcp_iflag_f32_e32 v8, v6
+; CGP-NEXT:    v_rcp_iflag_f32_e32 v7, v7
+; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v5, v4
+; CGP-NEXT:    v_cndmask_b32_e64 v6, 0, -1, s[4:5]
+; CGP-NEXT:    v_mul_f32_e32 v8, 0x5f7ffffc, v8
+; CGP-NEXT:    v_mul_f32_e32 v7, 0x5f7ffffc, v7
+; CGP-NEXT:    v_mul_f32_e32 v9, 0x2f800000, v8
+; CGP-NEXT:    v_mul_f32_e32 v10, 0x2f800000, v7
+; CGP-NEXT:    v_trunc_f32_e32 v9, v9
+; CGP-NEXT:    v_trunc_f32_e32 v10, v10
+; CGP-NEXT:    v_mac_f32_e32 v8, 0xcf800000, v9
+; CGP-NEXT:    v_cvt_u32_f32_e32 v9, v9
+; CGP-NEXT:    v_mac_f32_e32 v7, 0xcf800000, v10
+; CGP-NEXT:    v_cvt_u32_f32_e32 v10, v10
 ; CGP-NEXT:    v_cvt_u32_f32_e32 v8, v8
-; CGP-NEXT:    v_cvt_u32_f32_e32 v5, v5
-; CGP-NEXT:    v_mul_lo_u32 v9, v7, s5
-; CGP-NEXT:    v_cvt_u32_f32_e32 v6, v6
-; CGP-NEXT:    v_mul_lo_u32 v10, v8, s5
-; CGP-NEXT:    v_mul_lo_u32 v11, v5, s5
-; CGP-NEXT:    v_mul_hi_u32 v12, s5, v5
-; CGP-NEXT:    v_sub_i32_e32 v9, vcc, v9, v5
-; CGP-NEXT:    v_mul_lo_u32 v13, v6, s5
-; CGP-NEXT:    v_mul_hi_u32 v14, s5, v6
-; CGP-NEXT:    v_sub_i32_e32 v10, vcc, v10, v6
-; CGP-NEXT:    v_add_i32_e32 v9, vcc, v9, v12
-; CGP-NEXT:    v_mul_lo_u32 v12, v7, v11
-; CGP-NEXT:    v_mul_hi_u32 v15, v5, v11
-; CGP-NEXT:    v_mul_hi_u32 v11, v7, v11
-; CGP-NEXT:    v_add_i32_e32 v10, vcc, v10, v14
-; CGP-NEXT:    v_mul_lo_u32 v14, v8, v13
-; CGP-NEXT:    v_mul_hi_u32 v16, v6, v13
-; CGP-NEXT:    v_mul_hi_u32 v13, v8, v13
-; CGP-NEXT:    v_mul_lo_u32 v17, v5, v9
-; CGP-NEXT:    v_mul_lo_u32 v18, v7, v9
-; CGP-NEXT:    v_mul_hi_u32 v19, v5, v9
-; CGP-NEXT:    v_mul_hi_u32 v9, v7, v9
-; CGP-NEXT:    v_mul_lo_u32 v20, v6, v10
-; CGP-NEXT:    v_mul_lo_u32 v21, v8, v10
-; CGP-NEXT:    v_mul_hi_u32 v22, v6, v10
-; CGP-NEXT:    v_mul_hi_u32 v10, v8, v10
-; CGP-NEXT:    v_add_i32_e32 v12, vcc, v12, v17
-; CGP-NEXT:    v_cndmask_b32_e64 v17, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v11, vcc, v18, v11
-; CGP-NEXT:    v_cndmask_b32_e64 v18, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v14, vcc, v14, v20
-; CGP-NEXT:    v_cndmask_b32_e64 v20, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v13, vcc, v21, v13
-; CGP-NEXT:    v_cndmask_b32_e64 v21, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v12, vcc, v12, v15
-; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v11, vcc, v11, v19
-; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v14, vcc, v14, v16
-; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v13, vcc, v13, v22
-; CGP-NEXT:    v_cndmask_b32_e64 v16, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v12, vcc, v17, v12
-; CGP-NEXT:    v_add_i32_e32 v15, vcc, v18, v15
-; CGP-NEXT:    v_add_i32_e32 v14, vcc, v20, v14
-; CGP-NEXT:    v_add_i32_e32 v16, vcc, v21, v16
-; CGP-NEXT:    v_add_i32_e32 v11, vcc, v11, v12
-; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v13, vcc, v13, v14
-; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v12, vcc, v15, v12
-; CGP-NEXT:    v_add_i32_e32 v14, vcc, v16, v14
-; CGP-NEXT:    v_add_i32_e32 v9, vcc, v9, v12
-; CGP-NEXT:    v_add_i32_e32 v10, vcc, v10, v14
-; CGP-NEXT:    v_add_i32_e32 v5, vcc, v5, v11
-; CGP-NEXT:    v_addc_u32_e32 v7, vcc, v7, v9, vcc
-; CGP-NEXT:    v_mul_lo_u32 v9, v5, s5
-; CGP-NEXT:    v_mul_hi_u32 v11, s5, v5
-; CGP-NEXT:    v_add_i32_e32 v6, vcc, v6, v13
-; CGP-NEXT:    v_addc_u32_e32 v8, vcc, v8, v10, vcc
-; CGP-NEXT:    v_mul_lo_u32 v10, v6, s5
-; CGP-NEXT:    v_mul_hi_u32 v12, s5, v6
-; CGP-NEXT:    v_mul_lo_u32 v13, v7, s5
-; CGP-NEXT:    v_mul_lo_u32 v14, v7, v9
-; CGP-NEXT:    v_mul_hi_u32 v15, v5, v9
-; CGP-NEXT:    v_mul_hi_u32 v9, v7, v9
-; CGP-NEXT:    v_mul_lo_u32 v16, v8, s5
-; CGP-NEXT:    v_mul_lo_u32 v17, v8, v10
-; CGP-NEXT:    v_mul_hi_u32 v18, v6, v10
-; CGP-NEXT:    v_mul_hi_u32 v10, v8, v10
-; CGP-NEXT:    v_sub_i32_e32 v13, vcc, v13, v5
-; CGP-NEXT:    v_sub_i32_e32 v16, vcc, v16, v6
-; CGP-NEXT:    v_add_i32_e32 v11, vcc, v13, v11
-; CGP-NEXT:    v_add_i32_e32 v12, vcc, v16, v12
-; CGP-NEXT:    v_mul_lo_u32 v13, v5, v11
-; CGP-NEXT:    v_mul_lo_u32 v16, v7, v11
-; CGP-NEXT:    v_mul_hi_u32 v19, v5, v11
-; CGP-NEXT:    v_mul_hi_u32 v11, v7, v11
-; CGP-NEXT:    v_mul_lo_u32 v20, v6, v12
-; CGP-NEXT:    v_mul_lo_u32 v21, v8, v12
-; CGP-NEXT:    v_mul_hi_u32 v22, v6, v12
-; CGP-NEXT:    v_mul_hi_u32 v12, v8, v12
-; CGP-NEXT:    v_add_i32_e32 v13, vcc, v14, v13
-; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v9, vcc, v16, v9
-; CGP-NEXT:    v_cndmask_b32_e64 v16, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v17, vcc, v17, v20
-; CGP-NEXT:    v_cndmask_b32_e64 v20, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v10, vcc, v21, v10
-; CGP-NEXT:    v_cndmask_b32_e64 v21, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v13, vcc, v13, v15
-; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v9, vcc, v9, v19
-; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v17, vcc, v17, v18
-; CGP-NEXT:    v_cndmask_b32_e64 v17, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v10, vcc, v10, v22
-; CGP-NEXT:    v_cndmask_b32_e64 v18, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v13, vcc, v14, v13
-; CGP-NEXT:    v_add_i32_e32 v14, vcc, v16, v15
-; CGP-NEXT:    v_add_i32_e32 v15, vcc, v20, v17
-; CGP-NEXT:    v_add_i32_e32 v16, vcc, v21, v18
-; CGP-NEXT:    v_add_i32_e32 v9, vcc, v9, v13
-; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v10, vcc, v10, v15
-; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v13, vcc, v14, v13
-; CGP-NEXT:    v_add_i32_e32 v14, vcc, v16, v15
-; CGP-NEXT:    v_add_i32_e32 v11, vcc, v11, v13
-; CGP-NEXT:    v_add_i32_e32 v12, vcc, v12, v14
-; CGP-NEXT:    v_add_i32_e32 v5, vcc, v5, v9
-; CGP-NEXT:    v_addc_u32_e32 v7, vcc, v7, v11, vcc
-; CGP-NEXT:    v_mul_lo_u32 v9, v1, v5
-; CGP-NEXT:    v_mul_hi_u32 v11, v0, v5
-; CGP-NEXT:    v_mul_hi_u32 v5, v1, v5
-; CGP-NEXT:    v_add_i32_e32 v6, vcc, v6, v10
-; CGP-NEXT:    v_addc_u32_e32 v8, vcc, v8, v12, vcc
-; CGP-NEXT:    v_mul_lo_u32 v10, v3, v6
-; CGP-NEXT:    v_mul_hi_u32 v12, v2, v6
-; CGP-NEXT:    v_mul_hi_u32 v6, v3, v6
-; CGP-NEXT:    v_mul_lo_u32 v13, v0, v7
-; CGP-NEXT:    v_mul_lo_u32 v14, v1, v7
-; CGP-NEXT:    v_mul_hi_u32 v15, v0, v7
-; CGP-NEXT:    v_mul_hi_u32 v7, v1, v7
-; CGP-NEXT:    v_mul_lo_u32 v16, v2, v8
-; CGP-NEXT:    v_mul_lo_u32 v17, v3, v8
-; CGP-NEXT:    v_mul_hi_u32 v18, v2, v8
-; CGP-NEXT:    v_mul_hi_u32 v8, v3, v8
-; CGP-NEXT:    v_add_i32_e32 v9, vcc, v9, v13
-; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v5, vcc, v14, v5
-; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v10, vcc, v10, v16
-; CGP-NEXT:    v_cndmask_b32_e64 v16, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v6, vcc, v17, v6
-; CGP-NEXT:    v_cndmask_b32_e64 v17, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v9, vcc, v9, v11
-; CGP-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v5, vcc, v5, v15
-; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v10, vcc, v10, v12
-; CGP-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v6, vcc, v6, v18
-; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v9, vcc, v13, v9
-; CGP-NEXT:    v_add_i32_e32 v11, vcc, v14, v11
-; CGP-NEXT:    v_add_i32_e32 v10, vcc, v16, v10
-; CGP-NEXT:    v_add_i32_e32 v12, vcc, v17, v12
-; CGP-NEXT:    v_add_i32_e32 v5, vcc, v5, v9
-; CGP-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v6, vcc, v6, v10
-; CGP-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v9, vcc, v11, v9
-; CGP-NEXT:    v_mul_lo_u32 v11, v5, s4
-; CGP-NEXT:    v_mul_hi_u32 v5, s4, v5
-; CGP-NEXT:    v_add_i32_e32 v10, vcc, v12, v10
-; CGP-NEXT:    v_mul_lo_u32 v12, v6, s4
-; CGP-NEXT:    v_mul_hi_u32 v6, s4, v6
-; CGP-NEXT:    v_add_i32_e32 v7, vcc, v7, v9
-; CGP-NEXT:    v_add_i32_e32 v8, vcc, v8, v10
-; CGP-NEXT:    v_mul_lo_u32 v7, v7, s4
-; CGP-NEXT:    v_mul_lo_u32 v8, v8, s4
-; CGP-NEXT:    v_add_i32_e32 v5, vcc, v7, v5
-; CGP-NEXT:    v_add_i32_e32 v6, vcc, v8, v6
-; CGP-NEXT:    v_sub_i32_e64 v0, s[4:5], v0, v11
-; CGP-NEXT:    v_subb_u32_e64 v7, vcc, v1, v5, s[4:5]
-; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v1, v5
-; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v4
-; CGP-NEXT:    v_cndmask_b32_e64 v5, 0, -1, vcc
-; CGP-NEXT:    v_sub_i32_e64 v2, s[6:7], v2, v12
-; CGP-NEXT:    v_subb_u32_e64 v8, vcc, v3, v6, s[6:7]
-; CGP-NEXT:    v_sub_i32_e32 v3, vcc, v3, v6
-; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v2, v4
-; CGP-NEXT:    v_cndmask_b32_e64 v6, 0, -1, vcc
-; CGP-NEXT:    v_sub_i32_e32 v9, vcc, v2, v4
-; CGP-NEXT:    v_cmp_eq_u32_e64 s[8:9], 0, v7
-; CGP-NEXT:    v_cndmask_b32_e64 v5, -1, v5, s[8:9]
+; CGP-NEXT:    v_mul_lo_u32 v11, v9, s7
+; CGP-NEXT:    v_cvt_u32_f32_e32 v7, v7
+; CGP-NEXT:    v_mul_lo_u32 v12, v10, s7
+; CGP-NEXT:    v_mul_lo_u32 v13, v8, s7
+; CGP-NEXT:    v_mul_hi_u32 v14, s7, v8
+; CGP-NEXT:    v_sub_i32_e64 v11, s[4:5], v11, v8
+; CGP-NEXT:    v_mul_lo_u32 v15, v7, s7
+; CGP-NEXT:    v_mul_hi_u32 v16, s7, v7
+; CGP-NEXT:    v_sub_i32_e64 v12, s[4:5], v12, v7
+; CGP-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v14
+; CGP-NEXT:    v_mul_lo_u32 v14, v9, v13
+; CGP-NEXT:    v_mul_hi_u32 v17, v8, v13
+; CGP-NEXT:    v_mul_hi_u32 v13, v9, v13
+; CGP-NEXT:    v_add_i32_e64 v12, s[4:5], v12, v16
+; CGP-NEXT:    v_mul_lo_u32 v16, v10, v15
+; CGP-NEXT:    v_mul_hi_u32 v18, v7, v15
+; CGP-NEXT:    v_mul_hi_u32 v15, v10, v15
+; CGP-NEXT:    v_mul_lo_u32 v19, v8, v11
+; CGP-NEXT:    v_mul_lo_u32 v20, v9, v11
+; CGP-NEXT:    v_mul_hi_u32 v21, v8, v11
+; CGP-NEXT:    v_mul_hi_u32 v11, v9, v11
+; CGP-NEXT:    v_mul_lo_u32 v22, v7, v12
+; CGP-NEXT:    v_mul_lo_u32 v23, v10, v12
+; CGP-NEXT:    v_mul_hi_u32 v24, v7, v12
+; CGP-NEXT:    v_mul_hi_u32 v12, v10, v12
+; CGP-NEXT:    v_add_i32_e64 v14, s[4:5], v14, v19
+; CGP-NEXT:    v_cndmask_b32_e64 v19, 0, 1, s[4:5]
+; CGP-NEXT:    v_add_i32_e64 v13, s[4:5], v20, v13
+; CGP-NEXT:    v_cndmask_b32_e64 v20, 0, 1, s[4:5]
+; CGP-NEXT:    v_add_i32_e64 v16, s[4:5], v16, v22
+; CGP-NEXT:    v_cndmask_b32_e64 v22, 0, 1, s[4:5]
+; CGP-NEXT:    v_add_i32_e64 v15, s[4:5], v23, v15
+; CGP-NEXT:    v_cndmask_b32_e64 v23, 0, 1, s[4:5]
+; CGP-NEXT:    v_add_i32_e64 v14, s[4:5], v14, v17
+; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, 1, s[4:5]
+; CGP-NEXT:    v_add_i32_e64 v13, s[4:5], v13, v21
+; CGP-NEXT:    v_cndmask_b32_e64 v17, 0, 1, s[4:5]
+; CGP-NEXT:    v_add_i32_e64 v16, s[4:5], v16, v18
+; CGP-NEXT:    v_cndmask_b32_e64 v16, 0, 1, s[4:5]
+; CGP-NEXT:    v_add_i32_e64 v15, s[4:5], v15, v24
+; CGP-NEXT:    v_cndmask_b32_e64 v18, 0, 1, s[4:5]
+; CGP-NEXT:    v_add_i32_e64 v14, s[4:5], v19, v14
+; CGP-NEXT:    v_add_i32_e64 v17, s[4:5], v20, v17
+; CGP-NEXT:    v_add_i32_e64 v16, s[4:5], v22, v16
+; CGP-NEXT:    v_add_i32_e64 v18, s[4:5], v23, v18
+; CGP-NEXT:    v_add_i32_e64 v13, s[4:5], v13, v14
+; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, 1, s[4:5]
+; CGP-NEXT:    v_add_i32_e64 v15, s[4:5], v15, v16
+; CGP-NEXT:    v_cndmask_b32_e64 v16, 0, 1, s[4:5]
+; CGP-NEXT:    v_add_i32_e64 v14, s[4:5], v17, v14
+; CGP-NEXT:    v_add_i32_e64 v16, s[4:5], v18, v16
+; CGP-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v14
+; CGP-NEXT:    v_add_i32_e64 v12, s[4:5], v12, v16
+; CGP-NEXT:    v_add_i32_e64 v8, s[4:5], v8, v13
+; CGP-NEXT:    v_addc_u32_e64 v9, s[4:5], v9, v11, s[4:5]
+; CGP-NEXT:    v_mul_lo_u32 v11, v8, s7
+; CGP-NEXT:    v_mul_hi_u32 v13, s7, v8
+; CGP-NEXT:    v_add_i32_e64 v7, s[4:5], v7, v15
+; CGP-NEXT:    v_addc_u32_e64 v10, s[4:5], v10, v12, s[4:5]
+; CGP-NEXT:    v_mul_lo_u32 v12, v7, s7
+; CGP-NEXT:    v_mul_hi_u32 v14, s7, v7
+; CGP-NEXT:    v_mul_lo_u32 v15, v9, s7
+; CGP-NEXT:    v_mul_lo_u32 v16, v9, v11
+; CGP-NEXT:    v_mul_hi_u32 v17, v8, v11
+; CGP-NEXT:    v_mul_hi_u32 v11, v9, v11
+; CGP-NEXT:    v_mul_lo_u32 v18, v10, s7
+; CGP-NEXT:    v_mul_lo_u32 v19, v10, v12
+; CGP-NEXT:    v_mul_hi_u32 v20, v7, v12
+; CGP-NEXT:    v_mul_hi_u32 v12, v10, v12
+; CGP-NEXT:    v_sub_i32_e64 v15, s[4:5], v15, v8
+; CGP-NEXT:    v_sub_i32_e64 v18, s[4:5], v18, v7
+; CGP-NEXT:    v_add_i32_e64 v13, s[4:5], v15, v13
+; CGP-NEXT:    v_add_i32_e64 v14, s[4:5], v18, v14
+; CGP-NEXT:    v_mul_lo_u32 v15, v8, v13
+; CGP-NEXT:    v_mul_lo_u32 v18, v9, v13
+; CGP-NEXT:    v_mul_hi_u32 v21, v8, v13
+; CGP-NEXT:    v_mul_hi_u32 v13, v9, v13
+; CGP-NEXT:    v_mul_lo_u32 v22, v7, v14
+; CGP-NEXT:    v_mul_lo_u32 v23, v10, v14
+; CGP-NEXT:    v_mul_hi_u32 v24, v7, v14
+; CGP-NEXT:    v_mul_hi_u32 v14, v10, v14
+; CGP-NEXT:    v_add_i32_e64 v15, s[4:5], v16, v15
+; CGP-NEXT:    v_cndmask_b32_e64 v16, 0, 1, s[4:5]
+; CGP-NEXT:    v_add_i32_e64 v11, s[4:5], v18, v11
+; CGP-NEXT:    v_cndmask_b32_e64 v18, 0, 1, s[4:5]
+; CGP-NEXT:    v_add_i32_e64 v19, s[4:5], v19, v22
+; CGP-NEXT:    v_cndmask_b32_e64 v22, 0, 1, s[4:5]
+; CGP-NEXT:    v_add_i32_e64 v12, s[4:5], v23, v12
+; CGP-NEXT:    v_cndmask_b32_e64 v23, 0, 1, s[4:5]
+; CGP-NEXT:    v_add_i32_e64 v15, s[4:5], v15, v17
+; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, 1, s[4:5]
+; CGP-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v21
+; CGP-NEXT:    v_cndmask_b32_e64 v17, 0, 1, s[4:5]
+; CGP-NEXT:    v_add_i32_e64 v19, s[4:5], v19, v20
+; CGP-NEXT:    v_cndmask_b32_e64 v19, 0, 1, s[4:5]
+; CGP-NEXT:    v_add_i32_e64 v12, s[4:5], v12, v24
+; CGP-NEXT:    v_cndmask_b32_e64 v20, 0, 1, s[4:5]
+; CGP-NEXT:    v_add_i32_e64 v15, s[4:5], v16, v15
+; CGP-NEXT:    v_add_i32_e64 v16, s[4:5], v18, v17
+; CGP-NEXT:    v_add_i32_e64 v17, s[4:5], v22, v19
+; CGP-NEXT:    v_add_i32_e64 v18, s[4:5], v23, v20
+; CGP-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v15
+; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, 1, s[4:5]
+; CGP-NEXT:    v_add_i32_e64 v12, s[4:5], v12, v17
+; CGP-NEXT:    v_cndmask_b32_e64 v17, 0, 1, s[4:5]
+; CGP-NEXT:    v_add_i32_e64 v15, s[4:5], v16, v15
+; CGP-NEXT:    v_add_i32_e64 v16, s[4:5], v18, v17
+; CGP-NEXT:    v_add_i32_e64 v13, s[4:5], v13, v15
+; CGP-NEXT:    v_add_i32_e64 v14, s[4:5], v14, v16
+; CGP-NEXT:    v_add_i32_e64 v8, s[4:5], v8, v11
+; CGP-NEXT:    v_addc_u32_e64 v9, s[4:5], v9, v13, s[4:5]
+; CGP-NEXT:    v_mul_lo_u32 v11, v1, v8
+; CGP-NEXT:    v_mul_hi_u32 v13, v0, v8
+; CGP-NEXT:    v_mul_hi_u32 v8, v1, v8
+; CGP-NEXT:    v_add_i32_e64 v7, s[4:5], v7, v12
+; CGP-NEXT:    v_addc_u32_e64 v10, s[4:5], v10, v14, s[4:5]
+; CGP-NEXT:    v_mul_lo_u32 v12, v3, v7
+; CGP-NEXT:    v_mul_hi_u32 v14, v2, v7
+; CGP-NEXT:    v_mul_hi_u32 v7, v3, v7
+; CGP-NEXT:    v_mul_lo_u32 v15, v0, v9
+; CGP-NEXT:    v_mul_lo_u32 v16, v1, v9
+; CGP-NEXT:    v_mul_hi_u32 v17, v0, v9
+; CGP-NEXT:    v_mul_hi_u32 v9, v1, v9
+; CGP-NEXT:    v_mul_lo_u32 v18, v2, v10
+; CGP-NEXT:    v_mul_lo_u32 v19, v3, v10
+; CGP-NEXT:    v_mul_hi_u32 v20, v2, v10
+; CGP-NEXT:    v_mul_hi_u32 v10, v3, v10
+; CGP-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v15
+; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, 1, s[4:5]
+; CGP-NEXT:    v_add_i32_e64 v8, s[4:5], v16, v8
+; CGP-NEXT:    v_cndmask_b32_e64 v16, 0, 1, s[4:5]
+; CGP-NEXT:    v_add_i32_e64 v12, s[4:5], v12, v18
+; CGP-NEXT:    v_cndmask_b32_e64 v18, 0, 1, s[4:5]
+; CGP-NEXT:    v_add_i32_e64 v7, s[4:5], v19, v7
+; CGP-NEXT:    v_cndmask_b32_e64 v19, 0, 1, s[4:5]
+; CGP-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v13
+; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, 1, s[4:5]
+; CGP-NEXT:    v_add_i32_e64 v8, s[4:5], v8, v17
+; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, 1, s[4:5]
+; CGP-NEXT:    v_add_i32_e64 v12, s[4:5], v12, v14
+; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[4:5]
+; CGP-NEXT:    v_add_i32_e64 v7, s[4:5], v7, v20
+; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, 1, s[4:5]
+; CGP-NEXT:    v_add_i32_e64 v11, s[4:5], v15, v11
+; CGP-NEXT:    v_add_i32_e64 v13, s[4:5], v16, v13
+; CGP-NEXT:    v_add_i32_e64 v12, s[4:5], v18, v12
+; CGP-NEXT:    v_add_i32_e64 v14, s[4:5], v19, v14
+; CGP-NEXT:    v_add_i32_e64 v8, s[4:5], v8, v11
+; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, 1, s[4:5]
+; CGP-NEXT:    v_add_i32_e64 v7, s[4:5], v7, v12
+; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[4:5]
+; CGP-NEXT:    v_add_i32_e64 v11, s[4:5], v13, v11
+; CGP-NEXT:    v_mul_lo_u32 v13, v8, s6
+; CGP-NEXT:    v_mul_hi_u32 v8, s6, v8
+; CGP-NEXT:    v_add_i32_e64 v12, s[4:5], v14, v12
+; CGP-NEXT:    v_mul_lo_u32 v14, v7, s6
+; CGP-NEXT:    v_mul_hi_u32 v7, s6, v7
+; CGP-NEXT:    v_add_i32_e64 v9, s[4:5], v9, v11
+; CGP-NEXT:    v_add_i32_e64 v10, s[4:5], v10, v12
+; CGP-NEXT:    v_mul_lo_u32 v9, v9, s6
+; CGP-NEXT:    v_mul_lo_u32 v10, v10, s6
+; CGP-NEXT:    v_add_i32_e64 v8, s[4:5], v9, v8
+; CGP-NEXT:    v_add_i32_e64 v7, s[4:5], v10, v7
+; CGP-NEXT:    v_sub_i32_e64 v0, s[4:5], v0, v13
+; CGP-NEXT:    v_subb_u32_e64 v9, s[6:7], v1, v8, s[4:5]
+; CGP-NEXT:    v_sub_i32_e64 v1, s[6:7], v1, v8
+; CGP-NEXT:    v_cmp_ge_u32_e64 s[6:7], v0, v4
+; CGP-NEXT:    v_cndmask_b32_e64 v8, 0, -1, s[6:7]
+; CGP-NEXT:    v_sub_i32_e64 v2, s[6:7], v2, v14
+; CGP-NEXT:    v_subb_u32_e64 v10, s[8:9], v3, v7, s[6:7]
+; CGP-NEXT:    v_sub_i32_e64 v3, s[8:9], v3, v7
+; CGP-NEXT:    v_cmp_ge_u32_e64 s[8:9], v2, v4
+; CGP-NEXT:    v_cndmask_b32_e64 v7, 0, -1, s[8:9]
+; CGP-NEXT:    v_cmp_eq_u32_e64 s[8:9], 0, v9
+; CGP-NEXT:    v_cndmask_b32_e64 v8, -1, v8, s[8:9]
 ; CGP-NEXT:    v_subbrev_u32_e64 v1, s[4:5], 0, v1, s[4:5]
-; CGP-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v8
-; CGP-NEXT:    v_cndmask_b32_e64 v6, -1, v6, s[4:5]
+; CGP-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v10
+; CGP-NEXT:    v_cndmask_b32_e64 v7, -1, v7, s[4:5]
 ; CGP-NEXT:    v_subbrev_u32_e64 v3, s[4:5], 0, v3, s[6:7]
-; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v9, v4
-; CGP-NEXT:    v_cndmask_b32_e64 v10, 0, -1, s[4:5]
 ; CGP-NEXT:    s_mov_b64 s[4:5], vcc
-; CGP-NEXT:    v_subrev_i32_e32 v11, vcc, 0x12d8fb, v9
+; CGP-NEXT:    v_sub_i32_e32 v11, vcc, 0x12d8fb, v4
 ; CGP-NEXT:    v_sub_i32_e64 v12, s[6:7], v0, v4
 ; CGP-NEXT:    v_subbrev_u32_e64 v1, s[6:7], 0, v1, s[6:7]
 ; CGP-NEXT:    v_cmp_ge_u32_e64 s[6:7], v12, v4
@@ -1551,20 +1551,20 @@ define <2 x i64> @v_urem_v2i64_oddk_denom(<2 x i64> %num) {
 ; CGP-NEXT:    v_sub_i32_e64 v4, s[4:5], v12, v4
 ; CGP-NEXT:    v_subbrev_u32_e64 v14, s[4:5], 0, v1, s[4:5]
 ; CGP-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v3
-; CGP-NEXT:    v_cndmask_b32_e64 v10, -1, v10, s[4:5]
+; CGP-NEXT:    v_cndmask_b32_e64 v6, -1, v6, s[4:5]
 ; CGP-NEXT:    v_subbrev_u32_e32 v15, vcc, 0, v3, vcc
 ; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v13
 ; CGP-NEXT:    v_cndmask_b32_e32 v4, v12, v4, vcc
-; CGP-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, v10
-; CGP-NEXT:    v_cndmask_b32_e64 v9, v9, v11, s[4:5]
+; CGP-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, v6
+; CGP-NEXT:    v_cndmask_b32_e64 v5, v5, v11, s[4:5]
 ; CGP-NEXT:    v_cndmask_b32_e32 v1, v1, v14, vcc
-; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v5
+; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v8
 ; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
 ; CGP-NEXT:    v_cndmask_b32_e64 v3, v3, v15, s[4:5]
-; CGP-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, v6
-; CGP-NEXT:    v_cndmask_b32_e64 v2, v2, v9, s[4:5]
-; CGP-NEXT:    v_cndmask_b32_e32 v1, v7, v1, vcc
-; CGP-NEXT:    v_cndmask_b32_e64 v3, v8, v3, s[4:5]
+; CGP-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, v7
+; CGP-NEXT:    v_cndmask_b32_e64 v2, v2, v5, s[4:5]
+; CGP-NEXT:    v_cndmask_b32_e32 v1, v9, v1, vcc
+; CGP-NEXT:    v_cndmask_b32_e64 v3, v10, v3, s[4:5]
 ; CGP-NEXT:    s_setpc_b64 s[30:31]
   %result = urem <2 x i64> %num, <i64 1235195, i64 1235195>
   ret <2 x i64> %result
diff --git a/llvm/test/CodeGen/AMDGPU/ds-combine-large-stride.ll b/llvm/test/CodeGen/AMDGPU/ds-combine-large-stride.ll
index aa1d44c31606b8f..be3762844d9eaad 100644
--- a/llvm/test/CodeGen/AMDGPU/ds-combine-large-stride.ll
+++ b/llvm/test/CodeGen/AMDGPU/ds-combine-large-stride.ll
@@ -5,9 +5,9 @@
 ; GCN:     s_load_dword [[ARG:s[0-9]+]], s[4:5], 0x0
 ; GCN:     v_mov_b32_e32 [[BASE:v[0-9]+]], [[ARG]]
 
-; VI-DAG: v_add_u32_e32 [[B1:v[0-9]+]], vcc, {{s[0-9]+}}, [[BASE]]
-; VI-DAG: v_add_u32_e32 [[B2:v[0-9]+]], vcc, {{s[0-9]+}}, [[BASE]]
-; VI-DAG: v_add_u32_e32 [[B3:v[0-9]+]], vcc, {{s[0-9]+}}, [[BASE]]
+; VI-DAG: v_add_u32_e32 [[B1:v[0-9]+]], vcc, 0x200, [[BASE]]
+; VI-DAG: v_add_u32_e32 [[B2:v[0-9]+]], vcc, 0x400, [[BASE]]
+; VI-DAG: v_add_u32_e32 [[B3:v[0-9]+]], vcc, 0x800, [[BASE]]
 
 ; GFX9-DAG: v_add_u32_e32 [[B1:v[0-9]+]], 0x200, [[BASE]]
 ; GFX9-DAG: v_add_u32_e32 [[B2:v[0-9]+]], 0x400, [[BASE]]
@@ -50,8 +50,8 @@ bb:
 ; GCN:     s_load_dword [[ARG:s[0-9]+]], s[4:5], 0x0
 ; GCN:     v_mov_b32_e32 [[BASE:v[0-9]+]], [[ARG]]
 
-; VI-DAG: v_add_u32_e32 [[B1:v[0-9]+]], vcc, {{s[0-9]+}}, [[BASE]]
-; VI-DAG: v_add_u32_e32 [[B2:v[0-9]+]], vcc, {{s[0-9]+}}, [[BASE]]
+; VI-DAG: v_add_u32_e32 [[B1:v[0-9]+]], vcc, 0x400, [[BASE]]
+; VI-DAG: v_add_u32_e32 [[B2:v[0-9]+]], vcc, 0x800, [[BASE]]
 
 ; GFX9-DAG: v_add_u32_e32 [[B1:v[0-9]+]], 0x400, [[BASE]]
 ; GFX9-DAG: v_add_u32_e32 [[B2:v[0-9]+]], 0x800, [[BASE]]
@@ -94,9 +94,9 @@ bb:
 ; GCN:     s_load_dword [[ARG:s[0-9]+]], s[4:5], 0x0
 ; GCN:     v_mov_b32_e32 [[BASE:v[0-9]+]], [[ARG]]
 
-; VI-DAG: v_add_u32_e32 [[B1:v[0-9]+]], vcc, {{s[0-9]+}}, [[BASE]]
+; VI-DAG: v_add_u32_e32 [[B1:v[0-9]+]], vcc, 0x800, [[BASE]]
 ; VI-DAG: v_add_u32_e32 [[B2:v[0-9]+]], vcc, {{s[0-9]+}}, [[BASE]]
-; VI-DAG: v_add_u32_e32 [[B3:v[0-9]+]], vcc, {{s[0-9]+}}, [[BASE]]
+; VI-DAG: v_add_u32_e32 [[B3:v[0-9]+]], vcc, 0x200, [[BASE]]
 
 ; GFX9-DAG: v_add_u32_e32 [[B1:v[0-9]+]], 0x800, [[BASE]]
 ; GFX9-DAG: v_add_u32_e32 [[B2:v[0-9]+]], 0x400, [[BASE]]
@@ -209,7 +209,7 @@ bb:
 ; GCN:     s_load_dword [[ARG:s[0-9]+]], s[4:5], 0x0
 ; GCN:     v_mov_b32_e32 [[BASE:v[0-9]+]], [[ARG]]
 
-; VI-DAG: v_add_u32_e32 [[B1:v[0-9]+]], vcc, {{s[0-9]+}}, [[BASE]]
+; VI-DAG: v_add_u32_e32 [[B1:v[0-9]+]], vcc, 0x800, [[BASE]]
 ; GFX9-DAG: v_add_u32_e32 [[B1:v[0-9]+]], 0x800, [[BASE]]
 
 ; GCN-DAG: ds_read2_b64  v[{{[0-9]+:[0-9]+}}], [[BASE]] offset1:50
@@ -283,9 +283,9 @@ bb:
 ; GCN:     s_load_dword [[ARG:s[0-9]+]], s[4:5], 0x0
 ; GCN:     v_mov_b32_e32 [[BASE:v[0-9]+]], [[ARG]]
 
-; VI-DAG: v_add_u32_e32 [[B1:v[0-9]+]], vcc, {{s[0-9]+}}, [[BASE]]
-; VI-DAG: v_add_u32_e32 [[B2:v[0-9]+]], vcc, {{s[0-9]+}}, [[BASE]]
-; VI-DAG: v_add_u32_e32 [[B3:v[0-9]+]], vcc, {{s[0-9]+}}, [[BASE]]
+; VI-DAG: v_add_u32_e32 [[B1:v[0-9]+]], vcc, 0x200, [[BASE]]
+; VI-DAG: v_add_u32_e32 [[B2:v[0-9]+]], vcc, 0x400, [[BASE]]
+; VI-DAG: v_add_u32_e32 [[B3:v[0-9]+]], vcc, 0x800, [[BASE]]
 
 ; GFX9-DAG: v_add_u32_e32 [[B1:v[0-9]+]], 0x200, [[BASE]]
 ; GFX9-DAG: v_add_u32_e32 [[B2:v[0-9]+]], 0x400, [[BASE]]
@@ -319,9 +319,9 @@ bb:
 ; GCN:     s_load_dword [[ARG:s[0-9]+]], s[4:5], 0x0
 ; GCN:     v_mov_b32_e32 [[BASE:v[0-9]+]], [[ARG]]
 
-; VI-DAG: v_add_u32_e32 [[B1:v[0-9]+]], vcc, {{s[0-9]+}}, [[BASE]]
-; VI-DAG: v_add_u32_e32 [[B2:v[0-9]+]], vcc, {{s[0-9]+}}, [[BASE]]
-; VI-DAG: v_add_u32_e32 [[B3:v[0-9]+]], vcc, {{s[0-9]+}}, [[BASE]]
+; VI-DAG: v_add_u32_e32 [[B1:v[0-9]+]], vcc, 0x800, [[BASE]]
+; VI-DAG: v_add_u32_e32 [[B2:v[0-9]+]], vcc, 0x400, [[BASE]]
+; VI-DAG: v_add_u32_e32 [[B3:v[0-9]+]], vcc, 0x200, [[BASE]]
 
 ; GFX9-DAG: v_add_u32_e32 [[B1:v[0-9]+]], 0x800, [[BASE]]
 ; GFX9-DAG: v_add_u32_e32 [[B2:v[0-9]+]], 0x400, [[BASE]]
@@ -409,7 +409,7 @@ bb:
 ; GCN:     s_load_dword [[ARG:s[0-9]+]], s[4:5], 0x0
 ; GCN:     v_mov_b32_e32 [[BASE:v[0-9]+]], [[ARG]]
 
-; VI-DAG: v_add_u32_e32 [[B1:v[0-9]+]], vcc, {{s[0-9]+}}, [[BASE]]
+; VI-DAG: v_add_u32_e32 [[B1:v[0-9]+]], vcc, 0x800, [[BASE]]
 ; GFX9-DAG: v_add_u32_e32 [[B1:v[0-9]+]], 0x800, [[BASE]]
 
 ; GCN-DAG: ds_write2_b64 [[BASE]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] offset1:50
diff --git a/llvm/test/CodeGen/AMDGPU/fold-fi-operand-shrink.mir b/llvm/test/CodeGen/AMDGPU/fold-fi-operand-shrink.mir
index 2b5ec86244ec2a2..c872857df440b56 100644
--- a/llvm/test/CodeGen/AMDGPU/fold-fi-operand-shrink.mir
+++ b/llvm/test/CodeGen/AMDGPU/fold-fi-operand-shrink.mir
@@ -15,9 +15,8 @@ body:             |
     ; GCN-LABEL: name: shrink_vgpr_fi_vgpr_v_add_i32_e64_no_carry_out_use
     ; GCN: liveins: $vgpr0
     ; GCN-NEXT: {{  $}}
-    ; GCN-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
     ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-    ; GCN-NEXT: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 [[V_MOV_B32_e32_]], [[COPY]], implicit-def $vcc, implicit $exec
+    ; GCN-NEXT: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 %stack.0, [[COPY]], implicit-def $vcc, implicit $exec
     ; GCN-NEXT: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e32_]]
     %0:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
     %1:vgpr_32 = COPY $vgpr0
@@ -40,9 +39,8 @@ body:             |
     ; GCN-LABEL: name: shrink_vgpr_vgpr_fi_v_add_i32_e64_no_carry_out_use
     ; GCN: liveins: $vgpr0
     ; GCN-NEXT: {{  $}}
-    ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GCN-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
-    ; GCN-NEXT: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 [[COPY]], [[V_MOV_B32_e32_]], implicit-def $vcc, implicit $exec
+    ; GCN-NEXT: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 %stack.0, [[V_MOV_B32_e32_]], implicit-def $vcc, implicit $exec
     ; GCN-NEXT: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e32_]]
     %0:vgpr_32 = COPY $vgpr0
     %1:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
@@ -115,9 +113,8 @@ body:             |
     ; GCN-LABEL: name: shrink_sgpr_fi_vgpr_v_add_i32_e64_no_carry_out_use
     ; GCN: liveins: $vgpr0
     ; GCN-NEXT: {{  $}}
-    ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 %stack.0
     ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-    ; GCN-NEXT: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 [[S_MOV_B32_]], [[COPY]], implicit-def $vcc, implicit $exec
+    ; GCN-NEXT: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 %stack.0, [[COPY]], implicit-def $vcc, implicit $exec
     ; GCN-NEXT: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e32_]]
     %0:sreg_32_xm0 = S_MOV_B32 %stack.0
     %1:vgpr_32 = COPY $vgpr0
@@ -141,8 +138,7 @@ body:             |
     ; GCN: liveins: $vgpr0
     ; GCN-NEXT: {{  $}}
     ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-    ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 %stack.0
-    ; GCN-NEXT: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 [[S_MOV_B32_]], [[COPY]], implicit-def $vcc, implicit $exec
+    ; GCN-NEXT: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 %stack.0, [[COPY]], implicit-def $vcc, implicit $exec
     ; GCN-NEXT: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e32_]]
     %0:vgpr_32 = COPY $vgpr0
     %1:sreg_32_xm0 = S_MOV_B32 %stack.0
@@ -162,8 +158,8 @@ body:             |
   bb.0:
 
     ; GCN-LABEL: name: shrink_vgpr_imm_fi_vgpr_v_add_i32_e64_no_carry_out_use
-    ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
-    ; GCN-NEXT: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 16, [[V_MOV_B32_e32_]], implicit-def $vcc, implicit $exec
+    ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 16, implicit $exec
+    ; GCN-NEXT: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 %stack.0, [[V_MOV_B32_e32_]], implicit-def $vcc, implicit $exec
     ; GCN-NEXT: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e32_]]
     %0:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
     %1:vgpr_32 = V_MOV_B32_e32 16, implicit $exec
@@ -204,8 +200,8 @@ body:             |
   bb.0:
 
     ; GCN-LABEL: name: shrink_vgpr_k_fi_vgpr_v_add_i32_e64_no_carry_out_use
-    ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
-    ; GCN-NEXT: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 1234, [[V_MOV_B32_e32_]], implicit-def $vcc, implicit $exec
+    ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1234, implicit $exec
+    ; GCN-NEXT: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 %stack.0, [[V_MOV_B32_e32_]], implicit-def $vcc, implicit $exec
     ; GCN-NEXT: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e32_]]
     %0:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
     %1:vgpr_32 = V_MOV_B32_e32 1234, implicit $exec
@@ -225,8 +221,8 @@ body:             |
   bb.0:
 
     ; GCN-LABEL: name: shrink_vgpr_k_vgpr_fi_v_add_i32_e64_no_carry_out_use
-    ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1234, implicit $exec
-    ; GCN-NEXT: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 %stack.0, [[V_MOV_B32_e32_]], implicit-def $vcc, implicit $exec
+    ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
+    ; GCN-NEXT: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 1234, [[V_MOV_B32_e32_]], implicit-def $vcc, implicit $exec
     ; GCN-NEXT: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e32_]]
     %0:vgpr_32 = V_MOV_B32_e32 1234, implicit $exec
     %1:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
diff --git a/llvm/test/CodeGen/AMDGPU/fold-immediate-operand-shrink-with-carry.mir b/llvm/test/CodeGen/AMDGPU/fold-immediate-operand-shrink-with-carry.mir
index fc2d4807f72d42e..da8c98501b1d1cb 100644
--- a/llvm/test/CodeGen/AMDGPU/fold-immediate-operand-shrink-with-carry.mir
+++ b/llvm/test/CodeGen/AMDGPU/fold-immediate-operand-shrink-with-carry.mir
@@ -11,9 +11,8 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: shrink_scalar_imm_vgpr_v_add_i32_e64_other_carry_out_use
-    ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 12345
-    ; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
-    ; GCN-NEXT: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 [[S_MOV_B32_]], [[DEF]], implicit-def $vcc, implicit $exec
+    ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; GCN-NEXT: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 12345, [[DEF]], implicit-def $vcc, implicit $exec
     ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY killed $vcc
     ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
     %0:sreg_32_xm0 = S_MOV_B32 12345
@@ -33,12 +32,11 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: shrink_scalar_imm_multi_use_with_used_carry
-    ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 12345
-    ; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
     ; GCN-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
-    ; GCN-NEXT: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 [[S_MOV_B32_]], [[DEF]], implicit-def $vcc, implicit $exec
+    ; GCN-NEXT: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 12345, [[DEF]], implicit-def $vcc, implicit $exec
     ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY killed $vcc
-    ; GCN-NEXT: [[V_ADD_CO_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 [[S_MOV_B32_]], [[DEF1]], implicit-def $vcc, implicit $exec
+    ; GCN-NEXT: [[V_ADD_CO_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 12345, [[DEF1]], implicit-def $vcc, implicit $exec
     ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]], implicit [[V_ADD_CO_U32_e32_1]]
     %0:sreg_32_xm0 = S_MOV_B32 12345
     %1:vgpr_32 = IMPLICIT_DEF
@@ -61,9 +59,8 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: shrink_scalar_imm_vgpr_v_add_i32_e64_dbg_only_carry_out_use
-    ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 12345
-    ; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
-    ; GCN-NEXT: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 [[S_MOV_B32_]], [[DEF]], implicit-def $vcc, implicit $exec
+    ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; GCN-NEXT: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 12345, [[DEF]], implicit-def $vcc, implicit $exec
     ; GCN-NEXT: DBG_VALUE %5:sreg_64_xexec, $noreg
     ; GCN-NEXT: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e32_]]
     %0:sreg_32_xm0 = S_MOV_B32 12345
@@ -87,11 +84,10 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: shrink_scalar_imm_vgpr_v_add_i32_e64_carry_out_use
-    ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 12345
-    ; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
     ; GCN-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
     ; GCN-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
-    ; GCN-NEXT: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 [[S_MOV_B32_]], [[DEF]], implicit-def $vcc, implicit $exec
+    ; GCN-NEXT: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 12345, [[DEF]], implicit-def $vcc, implicit $exec
     ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY killed $vcc
     ; GCN-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[DEF1]], [[DEF2]], [[COPY]], 0, implicit $exec
     ; GCN-NEXT: S_ENDPGM 0, implicit [[V_ADDC_U32_e64_]]
diff --git a/llvm/test/CodeGen/AMDGPU/fold-immediate-operand-shrink.mir b/llvm/test/CodeGen/AMDGPU/fold-immediate-operand-shrink.mir
index 2bf0ceaa568185a..ed966efaf06c44f 100644
--- a/llvm/test/CodeGen/AMDGPU/fold-immediate-operand-shrink.mir
+++ b/llvm/test/CodeGen/AMDGPU/fold-immediate-operand-shrink.mir
@@ -9,9 +9,8 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: shrink_scalar_imm_vgpr_v_add_i32_e64_no_carry_out_use
-    ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 12345
-    ; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
-    ; GCN-NEXT: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 [[S_MOV_B32_]], [[DEF]], implicit-def $vcc, implicit $exec
+    ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; GCN-NEXT: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 12345, [[DEF]], implicit-def $vcc, implicit $exec
     ; GCN-NEXT: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e32_]]
     %0:sreg_32_xm0 = S_MOV_B32 12345
     %1:vgpr_32 = IMPLICIT_DEF
@@ -29,8 +28,7 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: shrink_vgpr_scalar_imm_v_add_i32_e64_no_carry_out_use
     ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
-    ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 12345
-    ; GCN-NEXT: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 [[S_MOV_B32_]], [[DEF]], implicit-def $vcc, implicit $exec
+    ; GCN-NEXT: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 12345, [[DEF]], implicit-def $vcc, implicit $exec
     ; GCN-NEXT: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e32_]]
     %0:vgpr_32 = IMPLICIT_DEF
     %1:sreg_32_xm0 = S_MOV_B32 12345
@@ -46,9 +44,8 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: shrink_scalar_imm_vgpr_v_add_i32_e64_carry_out_use
-    ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 12345
-    ; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
-    ; GCN-NEXT: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 [[S_MOV_B32_]], [[DEF]], implicit-def $vcc, implicit $exec
+    ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; GCN-NEXT: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 12345, [[DEF]], implicit-def $vcc, implicit $exec
     ; GCN-NEXT: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e32_]]
     %0:sreg_32_xm0 = S_MOV_B32 12345
     %1:vgpr_32 = IMPLICIT_DEF
@@ -263,9 +260,8 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: shrink_scalar_imm_vgpr_v_sub_i32_e64_no_carry_out_use
-    ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 12345
-    ; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
-    ; GCN-NEXT: [[V_SUB_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_SUB_CO_U32_e32 [[S_MOV_B32_]], [[DEF]], implicit-def $vcc, implicit $exec
+    ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; GCN-NEXT: [[V_SUB_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_SUB_CO_U32_e32 12345, [[DEF]], implicit-def $vcc, implicit $exec
     ; GCN-NEXT: S_ENDPGM 0, implicit [[V_SUB_CO_U32_e32_]]
     %0:sreg_32_xm0 = S_MOV_B32 12345
     %1:vgpr_32 = IMPLICIT_DEF
@@ -283,8 +279,7 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: shrink_vgpr_scalar_imm_v_sub_i32_e64_no_carry_out_use
     ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
-    ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 12345
-    ; GCN-NEXT: [[V_SUBREV_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_SUBREV_CO_U32_e32 [[S_MOV_B32_]], [[DEF]], implicit-def $vcc, implicit $exec
+    ; GCN-NEXT: [[V_SUBREV_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_SUBREV_CO_U32_e32 12345, [[DEF]], implicit-def $vcc, implicit $exec
     ; GCN-NEXT: S_ENDPGM 0, implicit [[V_SUBREV_CO_U32_e32_]]
     %0:vgpr_32 = IMPLICIT_DEF
     %1:sreg_32_xm0 = S_MOV_B32 12345
@@ -301,9 +296,8 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: shrink_scalar_imm_vgpr_v_subrev_i32_e64_no_carry_out_use
-    ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 12345
-    ; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
-    ; GCN-NEXT: [[V_SUBREV_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_SUBREV_CO_U32_e32 [[S_MOV_B32_]], [[DEF]], implicit-def $vcc, implicit $exec
+    ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; GCN-NEXT: [[V_SUBREV_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_SUBREV_CO_U32_e32 12345, [[DEF]], implicit-def $vcc, implicit $exec
     ; GCN-NEXT: S_ENDPGM 0, implicit [[V_SUBREV_CO_U32_e32_]]
     %0:sreg_32_xm0 = S_MOV_B32 12345
     %1:vgpr_32 = IMPLICIT_DEF
@@ -321,8 +315,7 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: shrink_vgpr_scalar_imm_v_subrev_i32_e64_no_carry_out_use
     ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
-    ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 12345
-    ; GCN-NEXT: [[V_SUB_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_SUB_CO_U32_e32 [[S_MOV_B32_]], [[DEF]], implicit-def $vcc, implicit $exec
+    ; GCN-NEXT: [[V_SUB_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_SUB_CO_U32_e32 12345, [[DEF]], implicit-def $vcc, implicit $exec
     ; GCN-NEXT: S_ENDPGM 0, implicit [[V_SUB_CO_U32_e32_]]
     %0:vgpr_32 = IMPLICIT_DEF
     %1:sreg_32_xm0 = S_MOV_B32 12345
@@ -373,9 +366,8 @@ body:             |
   ; GCN-NEXT:   S_NOP 0
   ; GCN-NEXT:   S_NOP 0
   ; GCN-NEXT:   S_NOP 0
-  ; GCN-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 12345
   ; GCN-NEXT:   [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
-  ; GCN-NEXT:   [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 [[S_MOV_B32_]], [[DEF]], implicit-def $vcc, implicit $exec
+  ; GCN-NEXT:   [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 12345, [[DEF]], implicit-def $vcc, implicit $exec
   ; GCN-NEXT:   S_NOP 0
   ; GCN-NEXT:   S_NOP 0
   ; GCN-NEXT: {{  $}}
@@ -439,9 +431,8 @@ body:             |
   ; GCN-NEXT:   successors: %bb.1(0x80000000)
   ; GCN-NEXT: {{  $}}
   ; GCN-NEXT:   S_NOP 0, implicit-def $vcc
-  ; GCN-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 12345
   ; GCN-NEXT:   [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
-  ; GCN-NEXT:   [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 [[S_MOV_B32_]], [[DEF]], implicit-def $vcc, implicit $exec
+  ; GCN-NEXT:   [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 12345, [[DEF]], implicit-def $vcc, implicit $exec
   ; GCN-NEXT:   S_NOP 0
   ; GCN-NEXT:   S_NOP 0
   ; GCN-NEXT: {{  $}}
@@ -472,8 +463,7 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: vcc_liveness_dbg_value_search_before
-    ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 12345
-    ; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
     ; GCN-NEXT: DBG_VALUE $noreg, 0
     ; GCN-NEXT: DBG_VALUE $noreg, 0
     ; GCN-NEXT: DBG_VALUE $noreg, 0
@@ -502,7 +492,7 @@ body:             |
     ; GCN-NEXT: DBG_VALUE $noreg, 0
     ; GCN-NEXT: DBG_VALUE $noreg, 0
     ; GCN-NEXT: DBG_VALUE $noreg, 0
-    ; GCN-NEXT: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 [[S_MOV_B32_]], [[DEF]], implicit-def $vcc, implicit $exec
+    ; GCN-NEXT: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 12345, [[DEF]], implicit-def $vcc, implicit $exec
     ; GCN-NEXT: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e32_]]
     %0:sreg_32_xm0 = S_MOV_B32 12345
     %1:vgpr_32 = IMPLICIT_DEF
@@ -549,8 +539,7 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: vcc_liveness_dbg_value_search_after
-    ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 12345
-    ; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
     ; GCN-NEXT: S_NOP 0
     ; GCN-NEXT: S_NOP 0
     ; GCN-NEXT: S_NOP 0
@@ -579,7 +568,7 @@ body:             |
     ; GCN-NEXT: S_NOP 0
     ; GCN-NEXT: S_NOP 0
     ; GCN-NEXT: S_NOP 0
-    ; GCN-NEXT: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 [[S_MOV_B32_]], [[DEF]], implicit-def $vcc, implicit $exec
+    ; GCN-NEXT: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 12345, [[DEF]], implicit-def $vcc, implicit $exec
     ; GCN-NEXT: DBG_VALUE $noreg, 0
     ; GCN-NEXT: DBG_VALUE $noreg, 0
     ; GCN-NEXT: DBG_VALUE $noreg, 0
@@ -683,8 +672,7 @@ body:             |
     ; GCN: liveins: $vgpr0
     ; GCN-NEXT: {{  $}}
     ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-    ; GCN-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 518144, implicit $exec
-    ; GCN-NEXT: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 killed [[V_MOV_B32_e32_]], [[COPY]], implicit-def $vcc, implicit $exec
+    ; GCN-NEXT: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 518144, [[COPY]], implicit-def $vcc, implicit $exec
     ; GCN-NEXT: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e32_]]
     %0:vgpr_32 = COPY $vgpr0
     %1:vgpr_32 = V_MOV_B32_e32 518144, implicit $exec
@@ -702,8 +690,7 @@ body:             |
     ; GCN: liveins: $vgpr0
     ; GCN-NEXT: {{  $}}
     ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-    ; GCN-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 518144, implicit $exec
-    ; GCN-NEXT: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 [[V_MOV_B32_e32_]], killed [[COPY]], implicit-def $vcc, implicit $exec
+    ; GCN-NEXT: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 518144, killed [[COPY]], implicit-def $vcc, implicit $exec
     ; GCN-NEXT: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e32_]]
     %0:vgpr_32 = COPY $vgpr0
     %1:vgpr_32 = V_MOV_B32_e32 518144, implicit $exec
diff --git a/llvm/test/CodeGen/AMDGPU/integer-mad-patterns.ll b/llvm/test/CodeGen/AMDGPU/integer-mad-patterns.ll
index 61017e809c86365..70a7f67f5b8d0d6 100644
--- a/llvm/test/CodeGen/AMDGPU/integer-mad-patterns.ll
+++ b/llvm/test/CodeGen/AMDGPU/integer-mad-patterns.ll
@@ -3273,9 +3273,8 @@ define <2 x i8> @clpeak_imad_pat_v2i8(<2 x i8> %x, <2 x i8> %y) {
 ; GFX67-SDAG-NEXT:    v_lshlrev_b32_e32 v4, 8, v6
 ; GFX67-SDAG-NEXT:    v_mul_u32_u24_e32 v5, v0, v2
 ; GFX67-SDAG-NEXT:    v_or_b32_e32 v3, v4, v3
-; GFX67-SDAG-NEXT:    s_movk_i32 s4, 0x100
 ; GFX67-SDAG-NEXT:    v_mad_u32_u24 v0, v0, v2, 1
-; GFX67-SDAG-NEXT:    v_add_i32_e32 v3, vcc, s4, v3
+; GFX67-SDAG-NEXT:    v_add_i32_e32 v3, vcc, 0x100, v3
 ; GFX67-SDAG-NEXT:    v_and_b32_e32 v0, 0xff, v0
 ; GFX67-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 8, v1
 ; GFX67-SDAG-NEXT:    v_or_b32_e32 v0, v2, v0
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll
index 671ead6127308dd..440a0666aef8c73 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll
@@ -397,9 +397,10 @@ define i1 @posnormal_f16(half %x) nounwind {
 ; GFX7GLISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX7GLISEL-NEXT:    v_and_b32_e32 v1, 0x7fff, v0
 ; GFX7GLISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
-; GFX7GLISEL-NEXT:    v_and_b32_e32 v2, 0xffff, v1
-; GFX7GLISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v0, v2
-; GFX7GLISEL-NEXT:    v_subrev_i32_e32 v0, vcc, 0x400, v1
+; GFX7GLISEL-NEXT:    v_and_b32_e32 v1, 0xffff, v1
+; GFX7GLISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v0, v1
+; GFX7GLISEL-NEXT:    v_mov_b32_e32 v0, 0x400
+; GFX7GLISEL-NEXT:    v_sub_i32_e32 v0, vcc, 0x400, v0
 ; GFX7GLISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
 ; GFX7GLISEL-NEXT:    v_mov_b32_e32 v1, 0x7800
 ; GFX7GLISEL-NEXT:    v_cmp_lt_u32_e32 vcc, v0, v1
@@ -461,9 +462,10 @@ define i1 @negnormal_f16(half %x) nounwind {
 ; GFX7GLISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX7GLISEL-NEXT:    v_and_b32_e32 v1, 0x7fff, v0
 ; GFX7GLISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
-; GFX7GLISEL-NEXT:    v_and_b32_e32 v2, 0xffff, v1
-; GFX7GLISEL-NEXT:    v_cmp_ne_u32_e64 s[4:5], v0, v2
-; GFX7GLISEL-NEXT:    v_subrev_i32_e32 v0, vcc, 0x400, v1
+; GFX7GLISEL-NEXT:    v_and_b32_e32 v1, 0xffff, v1
+; GFX7GLISEL-NEXT:    v_cmp_ne_u32_e64 s[4:5], v0, v1
+; GFX7GLISEL-NEXT:    v_mov_b32_e32 v0, 0x400
+; GFX7GLISEL-NEXT:    v_sub_i32_e32 v0, vcc, 0x400, v0
 ; GFX7GLISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
 ; GFX7GLISEL-NEXT:    v_mov_b32_e32 v1, 0x7800
 ; GFX7GLISEL-NEXT:    v_cmp_lt_u32_e32 vcc, v0, v1
@@ -1537,12 +1539,13 @@ define i1 @not_issubnormal_or_zero_f16(half %x) {
 ; GFX7GLISEL:       ; %bb.0: ; %entry
 ; GFX7GLISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX7GLISEL-NEXT:    v_and_b32_e32 v0, 0x7fff, v0
-; GFX7GLISEL-NEXT:    v_and_b32_e32 v1, 0xffff, v0
+; GFX7GLISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
 ; GFX7GLISEL-NEXT:    s_movk_i32 s4, 0x7c00
-; GFX7GLISEL-NEXT:    v_cmp_eq_u32_e32 vcc, s4, v1
-; GFX7GLISEL-NEXT:    v_cmp_lt_u32_e64 s[4:5], s4, v1
+; GFX7GLISEL-NEXT:    v_cmp_eq_u32_e32 vcc, s4, v0
+; GFX7GLISEL-NEXT:    v_cmp_lt_u32_e64 s[4:5], s4, v0
+; GFX7GLISEL-NEXT:    v_mov_b32_e32 v0, 0x400
 ; GFX7GLISEL-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
-; GFX7GLISEL-NEXT:    v_subrev_i32_e32 v0, vcc, 0x400, v0
+; GFX7GLISEL-NEXT:    v_sub_i32_e32 v0, vcc, 0x400, v0
 ; GFX7GLISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
 ; GFX7GLISEL-NEXT:    v_mov_b32_e32 v1, 0x7800
 ; GFX7GLISEL-NEXT:    v_cmp_lt_u32_e32 vcc, v0, v1
@@ -1600,8 +1603,8 @@ define i1 @isnormal_f16(half %x) {
 ; GFX7GLISEL-LABEL: isnormal_f16:
 ; GFX7GLISEL:       ; %bb.0:
 ; GFX7GLISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7GLISEL-NEXT:    v_and_b32_e32 v0, 0x7fff, v0
-; GFX7GLISEL-NEXT:    v_subrev_i32_e32 v0, vcc, 0x400, v0
+; GFX7GLISEL-NEXT:    v_mov_b32_e32 v0, 0x400
+; GFX7GLISEL-NEXT:    v_sub_i32_e32 v0, vcc, 0x400, v0
 ; GFX7GLISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
 ; GFX7GLISEL-NEXT:    v_mov_b32_e32 v1, 0x7800
 ; GFX7GLISEL-NEXT:    v_cmp_lt_u32_e32 vcc, v0, v1
@@ -1725,18 +1728,19 @@ define i1 @not_is_plus_normal_f16(half %x) {
 ; GFX7GLISEL-NEXT:    v_and_b32_e32 v1, 0x7fff, v0
 ; GFX7GLISEL-NEXT:    v_and_b32_e32 v2, 0xffff, v0
 ; GFX7GLISEL-NEXT:    v_and_b32_e32 v0, 0x7c00, v0
-; GFX7GLISEL-NEXT:    v_and_b32_e32 v3, 0xffff, v1
+; GFX7GLISEL-NEXT:    v_and_b32_e32 v1, 0xffff, v1
 ; GFX7GLISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
 ; GFX7GLISEL-NEXT:    s_movk_i32 s8, 0x7c00
 ; GFX7GLISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
-; GFX7GLISEL-NEXT:    v_cmp_eq_u32_e64 s[6:7], s8, v3
+; GFX7GLISEL-NEXT:    v_cmp_eq_u32_e64 s[6:7], s8, v1
 ; GFX7GLISEL-NEXT:    s_or_b64 s[6:7], vcc, s[6:7]
-; GFX7GLISEL-NEXT:    v_cmp_lt_u32_e32 vcc, s8, v3
+; GFX7GLISEL-NEXT:    v_cmp_lt_u32_e32 vcc, s8, v1
+; GFX7GLISEL-NEXT:    v_mov_b32_e32 v0, 0x400
 ; GFX7GLISEL-NEXT:    s_or_b64 s[6:7], s[6:7], vcc
-; GFX7GLISEL-NEXT:    v_subrev_i32_e32 v0, vcc, 0x400, v1
+; GFX7GLISEL-NEXT:    v_sub_i32_e32 v0, vcc, 0x400, v0
+; GFX7GLISEL-NEXT:    v_cmp_ne_u32_e64 s[4:5], v2, v1
 ; GFX7GLISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
 ; GFX7GLISEL-NEXT:    v_mov_b32_e32 v1, 0x7800
-; GFX7GLISEL-NEXT:    v_cmp_ne_u32_e64 s[4:5], v2, v3
 ; GFX7GLISEL-NEXT:    v_cmp_lt_u32_e32 vcc, v0, v1
 ; GFX7GLISEL-NEXT:    s_and_b64 s[4:5], vcc, s[4:5]
 ; GFX7GLISEL-NEXT:    s_or_b64 s[4:5], s[6:7], s[4:5]
@@ -1798,18 +1802,19 @@ define i1 @not_is_neg_normal_f16(half %x) {
 ; GFX7GLISEL-NEXT:    v_and_b32_e32 v1, 0x7fff, v0
 ; GFX7GLISEL-NEXT:    v_and_b32_e32 v2, 0xffff, v0
 ; GFX7GLISEL-NEXT:    v_and_b32_e32 v0, 0x7c00, v0
-; GFX7GLISEL-NEXT:    v_and_b32_e32 v3, 0xffff, v1
+; GFX7GLISEL-NEXT:    v_and_b32_e32 v1, 0xffff, v1
 ; GFX7GLISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
 ; GFX7GLISEL-NEXT:    s_movk_i32 s8, 0x7c00
 ; GFX7GLISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
-; GFX7GLISEL-NEXT:    v_cmp_eq_u32_e64 s[6:7], s8, v3
+; GFX7GLISEL-NEXT:    v_cmp_eq_u32_e64 s[6:7], s8, v1
 ; GFX7GLISEL-NEXT:    s_or_b64 s[6:7], vcc, s[6:7]
-; GFX7GLISEL-NEXT:    v_cmp_lt_u32_e32 vcc, s8, v3
+; GFX7GLISEL-NEXT:    v_cmp_lt_u32_e32 vcc, s8, v1
+; GFX7GLISEL-NEXT:    v_mov_b32_e32 v0, 0x400
 ; GFX7GLISEL-NEXT:    s_or_b64 s[6:7], s[6:7], vcc
-; GFX7GLISEL-NEXT:    v_subrev_i32_e32 v0, vcc, 0x400, v1
+; GFX7GLISEL-NEXT:    v_sub_i32_e32 v0, vcc, 0x400, v0
+; GFX7GLISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v2, v1
 ; GFX7GLISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
 ; GFX7GLISEL-NEXT:    v_mov_b32_e32 v1, 0x7800
-; GFX7GLISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v2, v3
 ; GFX7GLISEL-NEXT:    v_cmp_lt_u32_e32 vcc, v0, v1
 ; GFX7GLISEL-NEXT:    s_and_b64 s[4:5], vcc, s[4:5]
 ; GFX7GLISEL-NEXT:    s_or_b64 s[4:5], s[6:7], s[4:5]
@@ -1921,14 +1926,15 @@ define i1 @not_issubnormal_f16(half %x) {
 ; GFX7GLISEL:       ; %bb.0:
 ; GFX7GLISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX7GLISEL-NEXT:    v_and_b32_e32 v0, 0x7fff, v0
-; GFX7GLISEL-NEXT:    v_and_b32_e32 v1, 0xffff, v0
+; GFX7GLISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
 ; GFX7GLISEL-NEXT:    s_movk_i32 s6, 0x7c00
-; GFX7GLISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v1
-; GFX7GLISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], s6, v1
+; GFX7GLISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
+; GFX7GLISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], s6, v0
 ; GFX7GLISEL-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
-; GFX7GLISEL-NEXT:    v_cmp_lt_u32_e32 vcc, s6, v1
+; GFX7GLISEL-NEXT:    v_cmp_lt_u32_e32 vcc, s6, v0
+; GFX7GLISEL-NEXT:    v_mov_b32_e32 v0, 0x400
 ; GFX7GLISEL-NEXT:    s_or_b64 s[4:5], s[4:5], vcc
-; GFX7GLISEL-NEXT:    v_subrev_i32_e32 v0, vcc, 0x400, v0
+; GFX7GLISEL-NEXT:    v_sub_i32_e32 v0, vcc, 0x400, v0
 ; GFX7GLISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
 ; GFX7GLISEL-NEXT:    v_mov_b32_e32 v1, 0x7800
 ; GFX7GLISEL-NEXT:    v_cmp_lt_u32_e32 vcc, v0, v1
@@ -2038,14 +2044,15 @@ define i1 @not_iszero_f16(half %x) {
 ; GFX7GLISEL-NEXT:    v_subrev_i32_e32 v1, vcc, 1, v0
 ; GFX7GLISEL-NEXT:    v_and_b32_e32 v1, 0xffff, v1
 ; GFX7GLISEL-NEXT:    v_mov_b32_e32 v2, 0x3ff
-; GFX7GLISEL-NEXT:    v_cmp_lt_u32_e32 vcc, v1, v2
-; GFX7GLISEL-NEXT:    v_and_b32_e32 v1, 0xffff, v0
+; GFX7GLISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
 ; GFX7GLISEL-NEXT:    s_movk_i32 s6, 0x7c00
-; GFX7GLISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], s6, v1
+; GFX7GLISEL-NEXT:    v_cmp_lt_u32_e32 vcc, v1, v2
+; GFX7GLISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], s6, v0
 ; GFX7GLISEL-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
-; GFX7GLISEL-NEXT:    v_cmp_lt_u32_e32 vcc, s6, v1
+; GFX7GLISEL-NEXT:    v_cmp_lt_u32_e32 vcc, s6, v0
+; GFX7GLISEL-NEXT:    v_mov_b32_e32 v0, 0x400
 ; GFX7GLISEL-NEXT:    s_or_b64 s[4:5], s[4:5], vcc
-; GFX7GLISEL-NEXT:    v_subrev_i32_e32 v0, vcc, 0x400, v0
+; GFX7GLISEL-NEXT:    v_sub_i32_e32 v0, vcc, 0x400, v0
 ; GFX7GLISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
 ; GFX7GLISEL-NEXT:    v_mov_b32_e32 v1, 0x7800
 ; GFX7GLISEL-NEXT:    v_cmp_lt_u32_e32 vcc, v0, v1
@@ -2532,11 +2539,12 @@ define i1 @not_iszero_or_nan_f16(half %x) {
 ; GFX7GLISEL-NEXT:    v_and_b32_e32 v1, 0xffff, v1
 ; GFX7GLISEL-NEXT:    v_mov_b32_e32 v2, 0x3ff
 ; GFX7GLISEL-NEXT:    v_cmp_lt_u32_e32 vcc, v1, v2
-; GFX7GLISEL-NEXT:    v_and_b32_e32 v1, 0xffff, v0
-; GFX7GLISEL-NEXT:    v_mov_b32_e32 v2, 0x7c00
-; GFX7GLISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v1, v2
+; GFX7GLISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; GFX7GLISEL-NEXT:    v_mov_b32_e32 v1, 0x7c00
+; GFX7GLISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v0, v1
+; GFX7GLISEL-NEXT:    v_mov_b32_e32 v0, 0x400
 ; GFX7GLISEL-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
-; GFX7GLISEL-NEXT:    v_subrev_i32_e32 v0, vcc, 0x400, v0
+; GFX7GLISEL-NEXT:    v_sub_i32_e32 v0, vcc, 0x400, v0
 ; GFX7GLISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
 ; GFX7GLISEL-NEXT:    v_mov_b32_e32 v1, 0x7800
 ; GFX7GLISEL-NEXT:    v_cmp_lt_u32_e32 vcc, v0, v1
@@ -2599,11 +2607,12 @@ define i1 @not_iszero_or_nan_f_daz(half %x) #0 {
 ; GFX7GLISEL-NEXT:    v_and_b32_e32 v1, 0xffff, v1
 ; GFX7GLISEL-NEXT:    v_mov_b32_e32 v2, 0x3ff
 ; GFX7GLISEL-NEXT:    v_cmp_lt_u32_e32 vcc, v1, v2
-; GFX7GLISEL-NEXT:    v_and_b32_e32 v1, 0xffff, v0
-; GFX7GLISEL-NEXT:    v_mov_b32_e32 v2, 0x7c00
-; GFX7GLISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v1, v2
+; GFX7GLISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; GFX7GLISEL-NEXT:    v_mov_b32_e32 v1, 0x7c00
+; GFX7GLISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v0, v1
+; GFX7GLISEL-NEXT:    v_mov_b32_e32 v0, 0x400
 ; GFX7GLISEL-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
-; GFX7GLISEL-NEXT:    v_subrev_i32_e32 v0, vcc, 0x400, v0
+; GFX7GLISEL-NEXT:    v_sub_i32_e32 v0, vcc, 0x400, v0
 ; GFX7GLISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
 ; GFX7GLISEL-NEXT:    v_mov_b32_e32 v1, 0x7800
 ; GFX7GLISEL-NEXT:    v_cmp_lt_u32_e32 vcc, v0, v1
@@ -2666,11 +2675,12 @@ define i1 @not_iszero_or_nan_f_maybe_daz(half %x) #1 {
 ; GFX7GLISEL-NEXT:    v_and_b32_e32 v1, 0xffff, v1
 ; GFX7GLISEL-NEXT:    v_mov_b32_e32 v2, 0x3ff
 ; GFX7GLISEL-NEXT:    v_cmp_lt_u32_e32 vcc, v1, v2
-; GFX7GLISEL-NEXT:    v_and_b32_e32 v1, 0xffff, v0
-; GFX7GLISEL-NEXT:    v_mov_b32_e32 v2, 0x7c00
-; GFX7GLISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v1, v2
+; GFX7GLISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; GFX7GLISEL-NEXT:    v_mov_b32_e32 v1, 0x7c00
+; GFX7GLISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v0, v1
+; GFX7GLISEL-NEXT:    v_mov_b32_e32 v0, 0x400
 ; GFX7GLISEL-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
-; GFX7GLISEL-NEXT:    v_subrev_i32_e32 v0, vcc, 0x400, v0
+; GFX7GLISEL-NEXT:    v_sub_i32_e32 v0, vcc, 0x400, v0
 ; GFX7GLISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
 ; GFX7GLISEL-NEXT:    v_mov_b32_e32 v1, 0x7800
 ; GFX7GLISEL-NEXT:    v_cmp_lt_u32_e32 vcc, v0, v1
@@ -2868,16 +2878,17 @@ define i1 @not_iszero_or_qnan_f16(half %x) {
 ; GFX7GLISEL-NEXT:    v_subrev_i32_e32 v1, vcc, 1, v0
 ; GFX7GLISEL-NEXT:    v_and_b32_e32 v1, 0xffff, v1
 ; GFX7GLISEL-NEXT:    v_mov_b32_e32 v2, 0x3ff
-; GFX7GLISEL-NEXT:    v_cmp_lt_u32_e32 vcc, v1, v2
-; GFX7GLISEL-NEXT:    v_and_b32_e32 v1, 0xffff, v0
+; GFX7GLISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
 ; GFX7GLISEL-NEXT:    s_movk_i32 s8, 0x7c00
-; GFX7GLISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], s8, v1
-; GFX7GLISEL-NEXT:    v_mov_b32_e32 v2, 0x7e00
+; GFX7GLISEL-NEXT:    v_cmp_lt_u32_e32 vcc, v1, v2
+; GFX7GLISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], s8, v0
+; GFX7GLISEL-NEXT:    v_mov_b32_e32 v1, 0x7e00
 ; GFX7GLISEL-NEXT:    s_or_b64 s[6:7], vcc, s[4:5]
-; GFX7GLISEL-NEXT:    v_cmp_lt_u32_e32 vcc, s8, v1
-; GFX7GLISEL-NEXT:    v_cmp_lt_u32_e64 s[4:5], v1, v2
+; GFX7GLISEL-NEXT:    v_cmp_lt_u32_e32 vcc, s8, v0
+; GFX7GLISEL-NEXT:    v_cmp_lt_u32_e64 s[4:5], v0, v1
+; GFX7GLISEL-NEXT:    v_mov_b32_e32 v0, 0x400
 ; GFX7GLISEL-NEXT:    s_and_b64 s[4:5], vcc, s[4:5]
-; GFX7GLISEL-NEXT:    v_subrev_i32_e32 v0, vcc, 0x400, v0
+; GFX7GLISEL-NEXT:    v_sub_i32_e32 v0, vcc, 0x400, v0
 ; GFX7GLISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
 ; GFX7GLISEL-NEXT:    v_mov_b32_e32 v1, 0x7800
 ; GFX7GLISEL-NEXT:    s_or_b64 s[4:5], s[6:7], s[4:5]
@@ -2951,14 +2962,15 @@ define i1 @not_iszero_or_snan_f16(half %x) {
 ; GFX7GLISEL-NEXT:    v_and_b32_e32 v1, 0xffff, v1
 ; GFX7GLISEL-NEXT:    v_mov_b32_e32 v2, 0x3ff
 ; GFX7GLISEL-NEXT:    v_cmp_lt_u32_e32 vcc, v1, v2
-; GFX7GLISEL-NEXT:    v_and_b32_e32 v1, 0xffff, v0
-; GFX7GLISEL-NEXT:    v_mov_b32_e32 v2, 0x7c00
-; GFX7GLISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v1, v2
-; GFX7GLISEL-NEXT:    v_mov_b32_e32 v2, 0x7e00
+; GFX7GLISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; GFX7GLISEL-NEXT:    v_mov_b32_e32 v1, 0x7c00
+; GFX7GLISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v0, v1
+; GFX7GLISEL-NEXT:    v_mov_b32_e32 v1, 0x7e00
 ; GFX7GLISEL-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
-; GFX7GLISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v2
+; GFX7GLISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v1
+; GFX7GLISEL-NEXT:    v_mov_b32_e32 v0, 0x400
 ; GFX7GLISEL-NEXT:    s_or_b64 s[4:5], s[4:5], vcc
-; GFX7GLISEL-NEXT:    v_subrev_i32_e32 v0, vcc, 0x400, v0
+; GFX7GLISEL-NEXT:    v_sub_i32_e32 v0, vcc, 0x400, v0
 ; GFX7GLISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
 ; GFX7GLISEL-NEXT:    v_mov_b32_e32 v1, 0x7800
 ; GFX7GLISEL-NEXT:    v_cmp_lt_u32_e32 vcc, v0, v1
diff --git a/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.ll b/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.ll
index a462c19ce645d4a..c2b10c160bf586a 100644
--- a/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.ll
+++ b/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.ll
@@ -54,13 +54,11 @@ define amdgpu_kernel void @clmem_read_simplified(ptr addrspace(1)  %buffer) {
 ; GFX8-NEXT:    s_movk_i32 s0, 0x2000
 ; GFX8-NEXT:    v_add_u32_e32 v13, vcc, s0, v0
 ; GFX8-NEXT:    v_addc_u32_e32 v14, vcc, 0, v1, vcc
-; GFX8-NEXT:    s_movk_i32 s0, 0x2800
-; GFX8-NEXT:    v_add_u32_e32 v15, vcc, s0, v0
+; GFX8-NEXT:    v_add_u32_e32 v15, vcc, 0x2800, v0
 ; GFX8-NEXT:    v_addc_u32_e32 v16, vcc, 0, v1, vcc
 ; GFX8-NEXT:    flat_load_dwordx2 v[13:14], v[13:14]
 ; GFX8-NEXT:    flat_load_dwordx2 v[15:16], v[15:16]
-; GFX8-NEXT:    s_movk_i32 s0, 0x3000
-; GFX8-NEXT:    v_add_u32_e32 v17, vcc, s0, v0
+; GFX8-NEXT:    v_add_u32_e32 v17, vcc, 0x3000, v0
 ; GFX8-NEXT:    v_addc_u32_e32 v18, vcc, 0, v1, vcc
 ; GFX8-NEXT:    flat_load_dwordx2 v[17:18], v[17:18]
 ; GFX8-NEXT:    v_add_u32_e32 v0, vcc, 0x3800, v0
@@ -132,8 +130,7 @@ define amdgpu_kernel void @clmem_read_simplified(ptr addrspace(1)  %buffer) {
 ; GFX900-NEXT:    global_load_dwordx2 v[12:13], v[10:11], off offset:2048
 ; GFX900-NEXT:    global_load_dwordx2 v[14:15], v[6:7], off
 ; GFX900-NEXT:    global_load_dwordx2 v[16:17], v[6:7], off offset:2048
-; GFX900-NEXT:    s_movk_i32 s0, 0x3000
-; GFX900-NEXT:    v_add_co_u32_e32 v0, vcc, s0, v0
+; GFX900-NEXT:    v_add_co_u32_e32 v0, vcc, 0x3000, v0
 ; GFX900-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
 ; GFX900-NEXT:    global_load_dwordx2 v[6:7], v[0:1], off
 ; GFX900-NEXT:    global_load_dwordx2 v[10:11], v[0:1], off offset:2048
@@ -276,8 +273,7 @@ define amdgpu_kernel void @clmem_read_simplified(ptr addrspace(1)  %buffer) {
 ; GFX90A-NEXT:    global_load_dwordx2 v[12:13], v[10:11], off offset:2048
 ; GFX90A-NEXT:    global_load_dwordx2 v[14:15], v[6:7], off
 ; GFX90A-NEXT:    global_load_dwordx2 v[16:17], v[6:7], off offset:2048
-; GFX90A-NEXT:    s_movk_i32 s0, 0x3000
-; GFX90A-NEXT:    v_add_co_u32_e32 v0, vcc, s0, v0
+; GFX90A-NEXT:    v_add_co_u32_e32 v0, vcc, 0x3000, v0
 ; GFX90A-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
 ; GFX90A-NEXT:    global_load_dwordx2 v[6:7], v[0:1], off
 ; GFX90A-NEXT:    global_load_dwordx2 v[10:11], v[0:1], off offset:2048
@@ -573,21 +569,17 @@ define hidden amdgpu_kernel void @clmem_read(ptr addrspace(1)  %buffer) {
 ; GFX900-NEXT:    v_mov_b32_e32 v3, s35
 ; GFX900-NEXT:    v_add_co_u32_e32 v1, vcc, s34, v1
 ; GFX900-NEXT:    v_addc_co_u32_e32 v2, vcc, v2, v3, vcc
-; GFX900-NEXT:    s_movk_i32 s0, 0x5000
-; GFX900-NEXT:    v_add_co_u32_e32 v1, vcc, s0, v1
+; GFX900-NEXT:    v_add_co_u32_e32 v1, vcc, 0x5000, v1
 ; GFX900-NEXT:    v_mov_b32_e32 v3, 0
 ; GFX900-NEXT:    v_addc_co_u32_e32 v2, vcc, 0, v2, vcc
-; GFX900-NEXT:    s_movk_i32 s2, 0x7f
 ; GFX900-NEXT:    v_mov_b32_e32 v4, 0
-; GFX900-NEXT:    s_movk_i32 s0, 0xd000
-; GFX900-NEXT:    s_movk_i32 s1, 0xe000
-; GFX900-NEXT:    s_movk_i32 s3, 0xf000
+; GFX900-NEXT:    s_movk_i32 s0, 0x7f
 ; GFX900-NEXT:  .LBB1_1: ; %for.cond.preheader
 ; GFX900-NEXT:    ; =>This Loop Header: Depth=1
 ; GFX900-NEXT:    ; Child Loop BB1_2 Depth 2
 ; GFX900-NEXT:    v_mov_b32_e32 v6, v2
 ; GFX900-NEXT:    v_mov_b32_e32 v5, v1
-; GFX900-NEXT:    s_mov_b32 s4, 0
+; GFX900-NEXT:    s_mov_b32 s1, 0
 ; GFX900-NEXT:  .LBB1_2: ; %for.body
 ; GFX900-NEXT:    ; Parent Loop BB1_1 Depth=1
 ; GFX900-NEXT:    ; => This Inner Loop Header: Depth=2
@@ -599,23 +591,23 @@ define hidden amdgpu_kernel void @clmem_read(ptr addrspace(1)  %buffer) {
 ; GFX900-NEXT:    global_load_dwordx2 v[7:8], v[7:8], off
 ; GFX900-NEXT:    v_addc_co_u32_e32 v14, vcc, -1, v6, vcc
 ; GFX900-NEXT:    global_load_dwordx2 v[17:18], v[13:14], off offset:-2048
-; GFX900-NEXT:    v_add_co_u32_e32 v15, vcc, s0, v5
+; GFX900-NEXT:    v_add_co_u32_e32 v15, vcc, 0xffffd000, v5
 ; GFX900-NEXT:    v_addc_co_u32_e32 v16, vcc, -1, v6, vcc
 ; GFX900-NEXT:    global_load_dwordx2 v[15:16], v[15:16], off offset:-2048
-; GFX900-NEXT:    v_add_co_u32_e32 v19, vcc, s1, v5
+; GFX900-NEXT:    v_add_co_u32_e32 v19, vcc, 0xffffe000, v5
 ; GFX900-NEXT:    global_load_dwordx2 v[13:14], v[13:14], off
 ; GFX900-NEXT:    v_addc_co_u32_e32 v20, vcc, -1, v6, vcc
 ; GFX900-NEXT:    global_load_dwordx2 v[23:24], v[19:20], off offset:-4096
 ; GFX900-NEXT:    global_load_dwordx2 v[25:26], v[19:20], off offset:-2048
 ; GFX900-NEXT:    global_load_dwordx2 v[27:28], v[19:20], off
-; GFX900-NEXT:    v_add_co_u32_e32 v21, vcc, s3, v5
+; GFX900-NEXT:    v_add_co_u32_e32 v21, vcc, 0xfffff000, v5
 ; GFX900-NEXT:    v_addc_co_u32_e32 v22, vcc, -1, v6, vcc
 ; GFX900-NEXT:    global_load_dwordx2 v[19:20], v[21:22], off offset:-2048
 ; GFX900-NEXT:    global_load_dwordx2 v[29:30], v[5:6], off
 ; GFX900-NEXT:    v_add_co_u32_e32 v5, vcc, 0x10000, v5
 ; GFX900-NEXT:    v_addc_co_u32_e32 v6, vcc, 0, v6, vcc
-; GFX900-NEXT:    s_addk_i32 s4, 0x2000
-; GFX900-NEXT:    s_cmp_gt_u32 s4, 0x3fffff
+; GFX900-NEXT:    s_addk_i32 s1, 0x2000
+; GFX900-NEXT:    s_cmp_gt_u32 s1, 0x3fffff
 ; GFX900-NEXT:    s_waitcnt vmcnt(8)
 ; GFX900-NEXT:    v_add_co_u32_e32 v3, vcc, v7, v3
 ; GFX900-NEXT:    v_addc_co_u32_e32 v4, vcc, v8, v4, vcc
@@ -649,11 +641,11 @@ define hidden amdgpu_kernel void @clmem_read(ptr addrspace(1)  %buffer) {
 ; GFX900-NEXT:    s_cbranch_scc0 .LBB1_2
 ; GFX900-NEXT:  ; %bb.3: ; %while.cond.loopexit
 ; GFX900-NEXT:    ; in Loop: Header=BB1_1 Depth=1
-; GFX900-NEXT:    s_add_i32 s4, s2, -1
-; GFX900-NEXT:    s_cmp_eq_u32 s2, 0
+; GFX900-NEXT:    s_add_i32 s1, s0, -1
+; GFX900-NEXT:    s_cmp_eq_u32 s0, 0
 ; GFX900-NEXT:    s_cbranch_scc1 .LBB1_5
 ; GFX900-NEXT:  ; %bb.4: ; in Loop: Header=BB1_1 Depth=1
-; GFX900-NEXT:    s_mov_b32 s2, s4
+; GFX900-NEXT:    s_mov_b32 s0, s1
 ; GFX900-NEXT:    s_branch .LBB1_1
 ; GFX900-NEXT:  .LBB1_5: ; %while.end
 ; GFX900-NEXT:    v_mov_b32_e32 v1, s35
@@ -805,19 +797,15 @@ define hidden amdgpu_kernel void @clmem_read(ptr addrspace(1)  %buffer) {
 ; GFX90A-NEXT:    v_mov_b32_e32 v2, s35
 ; GFX90A-NEXT:    v_add_co_u32_e32 v1, vcc, s34, v1
 ; GFX90A-NEXT:    v_addc_co_u32_e32 v3, vcc, v3, v2, vcc
-; GFX90A-NEXT:    s_movk_i32 s0, 0x5000
-; GFX90A-NEXT:    v_add_co_u32_e32 v2, vcc, s0, v1
+; GFX90A-NEXT:    v_add_co_u32_e32 v2, vcc, 0x5000, v1
 ; GFX90A-NEXT:    v_addc_co_u32_e32 v3, vcc, 0, v3, vcc
-; GFX90A-NEXT:    s_movk_i32 s2, 0x7f
+; GFX90A-NEXT:    s_movk_i32 s0, 0x7f
 ; GFX90A-NEXT:    v_pk_mov_b32 v[4:5], 0, 0
-; GFX90A-NEXT:    s_movk_i32 s0, 0xd000
-; GFX90A-NEXT:    s_movk_i32 s1, 0xe000
-; GFX90A-NEXT:    s_movk_i32 s3, 0xf000
 ; GFX90A-NEXT:  .LBB1_1: ; %for.cond.preheader
 ; GFX90A-NEXT:    ; =>This Loop Header: Depth=1
 ; GFX90A-NEXT:    ; Child Loop BB1_2 Depth 2
 ; GFX90A-NEXT:    v_pk_mov_b32 v[6:7], v[2:3], v[2:3] op_sel:[0,1]
-; GFX90A-NEXT:    s_mov_b32 s4, 0
+; GFX90A-NEXT:    s_mov_b32 s1, 0
 ; GFX90A-NEXT:  .LBB1_2: ; %for.body
 ; GFX90A-NEXT:    ; Parent Loop BB1_1 Depth=1
 ; GFX90A-NEXT:    ; => This Inner Loop Header: Depth=2
@@ -829,23 +817,23 @@ define hidden amdgpu_kernel void @clmem_read(ptr addrspace(1)  %buffer) {
 ; GFX90A-NEXT:    global_load_dwordx2 v[12:13], v[12:13], off
 ; GFX90A-NEXT:    v_addc_co_u32_e32 v15, vcc, -1, v7, vcc
 ; GFX90A-NEXT:    global_load_dwordx2 v[18:19], v[14:15], off offset:-2048
-; GFX90A-NEXT:    v_add_co_u32_e32 v16, vcc, s0, v6
+; GFX90A-NEXT:    v_add_co_u32_e32 v16, vcc, 0xffffd000, v6
 ; GFX90A-NEXT:    v_addc_co_u32_e32 v17, vcc, -1, v7, vcc
 ; GFX90A-NEXT:    global_load_dwordx2 v[16:17], v[16:17], off offset:-2048
-; GFX90A-NEXT:    v_add_co_u32_e32 v20, vcc, s1, v6
+; GFX90A-NEXT:    v_add_co_u32_e32 v20, vcc, 0xffffe000, v6
 ; GFX90A-NEXT:    global_load_dwordx2 v[14:15], v[14:15], off
 ; GFX90A-NEXT:    v_addc_co_u32_e32 v21, vcc, -1, v7, vcc
 ; GFX90A-NEXT:    global_load_dwordx2 v[24:25], v[20:21], off offset:-4096
 ; GFX90A-NEXT:    global_load_dwordx2 v[26:27], v[20:21], off offset:-2048
 ; GFX90A-NEXT:    global_load_dwordx2 v[28:29], v[20:21], off
-; GFX90A-NEXT:    v_add_co_u32_e32 v22, vcc, s3, v6
+; GFX90A-NEXT:    v_add_co_u32_e32 v22, vcc, 0xfffff000, v6
 ; GFX90A-NEXT:    v_addc_co_u32_e32 v23, vcc, -1, v7, vcc
 ; GFX90A-NEXT:    global_load_dwordx2 v[20:21], v[22:23], off offset:-2048
 ; GFX90A-NEXT:    global_load_dwordx2 v[30:31], v[6:7], off
 ; GFX90A-NEXT:    v_add_co_u32_e32 v6, vcc, 0x10000, v6
 ; GFX90A-NEXT:    v_addc_co_u32_e32 v7, vcc, 0, v7, vcc
-; GFX90A-NEXT:    s_addk_i32 s4, 0x2000
-; GFX90A-NEXT:    s_cmp_gt_u32 s4, 0x3fffff
+; GFX90A-NEXT:    s_addk_i32 s1, 0x2000
+; GFX90A-NEXT:    s_cmp_gt_u32 s1, 0x3fffff
 ; GFX90A-NEXT:    s_waitcnt vmcnt(8)
 ; GFX90A-NEXT:    v_add_co_u32_e32 v1, vcc, v12, v4
 ; GFX90A-NEXT:    v_addc_co_u32_e32 v4, vcc, v13, v5, vcc
@@ -879,11 +867,11 @@ define hidden amdgpu_kernel void @clmem_read(ptr addrspace(1)  %buffer) {
 ; GFX90A-NEXT:    s_cbranch_scc0 .LBB1_2
 ; GFX90A-NEXT:  ; %bb.3: ; %while.cond.loopexit
 ; GFX90A-NEXT:    ; in Loop: Header=BB1_1 Depth=1
-; GFX90A-NEXT:    s_add_i32 s4, s2, -1
-; GFX90A-NEXT:    s_cmp_eq_u32 s2, 0
+; GFX90A-NEXT:    s_add_i32 s1, s0, -1
+; GFX90A-NEXT:    s_cmp_eq_u32 s0, 0
 ; GFX90A-NEXT:    s_cbranch_scc1 .LBB1_5
 ; GFX90A-NEXT:  ; %bb.4: ; in Loop: Header=BB1_1 Depth=1
-; GFX90A-NEXT:    s_mov_b32 s2, s4
+; GFX90A-NEXT:    s_mov_b32 s0, s1
 ; GFX90A-NEXT:    s_branch .LBB1_1
 ; GFX90A-NEXT:  .LBB1_5: ; %while.end
 ; GFX90A-NEXT:    v_mov_b32_e32 v1, s35
@@ -1163,10 +1151,8 @@ define amdgpu_kernel void @Address32(ptr addrspace(1) %buffer) {
 ; GFX8-NEXT:    s_movk_i32 s0, 0x1800
 ; GFX8-NEXT:    v_add_u32_e32 v15, vcc, s0, v0
 ; GFX8-NEXT:    v_addc_u32_e32 v16, vcc, 0, v1, vcc
-; GFX8-NEXT:    s_movk_i32 s0, 0x1c00
-; GFX8-NEXT:    v_add_u32_e32 v17, vcc, s0, v0
+; GFX8-NEXT:    v_add_u32_e32 v17, vcc, 0x1c00, v0
 ; GFX8-NEXT:    v_addc_u32_e32 v18, vcc, 0, v1, vcc
-; GFX8-NEXT:    s_movk_i32 s0, 0x2000
 ; GFX8-NEXT:    flat_load_dword v2, v[0:1]
 ; GFX8-NEXT:    flat_load_dword v19, v[5:6]
 ; GFX8-NEXT:    flat_load_dword v7, v[7:8]
@@ -1175,7 +1161,7 @@ define amdgpu_kernel void @Address32(ptr addrspace(1) %buffer) {
 ; GFX8-NEXT:    flat_load_dword v10, v[13:14]
 ; GFX8-NEXT:    flat_load_dword v11, v[15:16]
 ; GFX8-NEXT:    flat_load_dword v12, v[17:18]
-; GFX8-NEXT:    v_add_u32_e32 v5, vcc, s0, v0
+; GFX8-NEXT:    v_add_u32_e32 v5, vcc, 0x2000, v0
 ; GFX8-NEXT:    v_addc_u32_e32 v6, vcc, 0, v1, vcc
 ; GFX8-NEXT:    v_add_u32_e32 v0, vcc, 0x2400, v0
 ; GFX8-NEXT:    flat_load_dword v5, v[5:6]
@@ -1230,10 +1216,9 @@ define amdgpu_kernel void @Address32(ptr addrspace(1) %buffer) {
 ; GFX900-NEXT:    v_add_co_u32_e32 v3, vcc, s34, v4
 ; GFX900-NEXT:    v_addc_co_u32_e32 v5, vcc, 0, v0, vcc
 ; GFX900-NEXT:    v_lshlrev_b64 v[0:1], 2, v[1:2]
-; GFX900-NEXT:    s_movk_i32 s0, 0x1000
 ; GFX900-NEXT:    v_add_co_u32_e32 v0, vcc, v3, v0
 ; GFX900-NEXT:    v_addc_co_u32_e32 v1, vcc, v5, v1, vcc
-; GFX900-NEXT:    v_add_co_u32_e32 v2, vcc, s0, v0
+; GFX900-NEXT:    v_add_co_u32_e32 v2, vcc, 0x1000, v0
 ; GFX900-NEXT:    v_addc_co_u32_e32 v3, vcc, 0, v1, vcc
 ; GFX900-NEXT:    global_load_dword v5, v[0:1], off
 ; GFX900-NEXT:    global_load_dword v6, v[0:1], off offset:1024
@@ -1357,8 +1342,7 @@ define amdgpu_kernel void @Address32(ptr addrspace(1) %buffer) {
 ; GFX90A-NEXT:    v_lshlrev_b64 v[0:1], 2, v[2:3]
 ; GFX90A-NEXT:    v_add_co_u32_e32 v0, vcc, v5, v0
 ; GFX90A-NEXT:    v_addc_co_u32_e32 v1, vcc, v6, v1, vcc
-; GFX90A-NEXT:    s_movk_i32 s0, 0x1000
-; GFX90A-NEXT:    v_add_co_u32_e32 v2, vcc, s0, v0
+; GFX90A-NEXT:    v_add_co_u32_e32 v2, vcc, 0x1000, v0
 ; GFX90A-NEXT:    v_addc_co_u32_e32 v3, vcc, 0, v1, vcc
 ; GFX90A-NEXT:    global_load_dword v5, v[0:1], off
 ; GFX90A-NEXT:    global_load_dword v6, v[0:1], off offset:1024
@@ -1526,10 +1510,9 @@ define amdgpu_kernel void @Offset64(ptr addrspace(1)  %buffer) {
 ; GFX8-NEXT:    s_movk_i32 s0, 0xf000
 ; GFX8-NEXT:    v_add_u32_e32 v5, vcc, s0, v0
 ; GFX8-NEXT:    v_addc_u32_e32 v6, vcc, 0, v1, vcc
-; GFX8-NEXT:    s_movk_i32 s0, 0xf800
 ; GFX8-NEXT:    flat_load_dwordx2 v[7:8], v[0:1]
 ; GFX8-NEXT:    flat_load_dwordx2 v[5:6], v[5:6]
-; GFX8-NEXT:    v_add_u32_e32 v9, vcc, s0, v0
+; GFX8-NEXT:    v_add_u32_e32 v9, vcc, 0xfffff800, v0
 ; GFX8-NEXT:    v_addc_u32_e32 v10, vcc, 0, v1, vcc
 ; GFX8-NEXT:    flat_load_dwordx2 v[9:10], v[9:10]
 ; GFX8-NEXT:    v_add_u32_e32 v0, vcc, 0, v0
@@ -1804,11 +1787,9 @@ define amdgpu_kernel void @p32Offset64(ptr addrspace(1)  %buffer) {
 ; GFX8-NEXT:    v_addc_u32_e32 v4, vcc, 0, v4, vcc
 ; GFX8-NEXT:    v_add_u32_e32 v0, vcc, v3, v0
 ; GFX8-NEXT:    v_addc_u32_e32 v1, vcc, v4, v1, vcc
-; GFX8-NEXT:    s_mov_b32 s0, 0x7ffff800
-; GFX8-NEXT:    v_add_u32_e32 v5, vcc, s0, v0
+; GFX8-NEXT:    v_add_u32_e32 v5, vcc, 0x7ffff800, v0
 ; GFX8-NEXT:    v_addc_u32_e32 v6, vcc, 0, v1, vcc
-; GFX8-NEXT:    s_mov_b32 s0, 0x7ffffc00
-; GFX8-NEXT:    v_add_u32_e32 v7, vcc, s0, v0
+; GFX8-NEXT:    v_add_u32_e32 v7, vcc, 0x7ffffc00, v0
 ; GFX8-NEXT:    v_addc_u32_e32 v8, vcc, 0, v1, vcc
 ; GFX8-NEXT:    flat_load_dword v2, v[0:1]
 ; GFX8-NEXT:    flat_load_dword v5, v[5:6]
@@ -2348,13 +2329,11 @@ define amdgpu_kernel void @ReverseOrder(ptr addrspace(1) %buffer) {
 ; GFX8-NEXT:    s_movk_i32 s0, 0x2000
 ; GFX8-NEXT:    v_add_u32_e32 v13, vcc, s0, v0
 ; GFX8-NEXT:    v_addc_u32_e32 v14, vcc, 0, v1, vcc
-; GFX8-NEXT:    s_movk_i32 s0, 0x1800
-; GFX8-NEXT:    v_add_u32_e32 v15, vcc, s0, v0
+; GFX8-NEXT:    v_add_u32_e32 v15, vcc, 0x1800, v0
 ; GFX8-NEXT:    v_addc_u32_e32 v16, vcc, 0, v1, vcc
 ; GFX8-NEXT:    flat_load_dwordx2 v[13:14], v[13:14]
 ; GFX8-NEXT:    flat_load_dwordx2 v[15:16], v[15:16]
-; GFX8-NEXT:    s_movk_i32 s0, 0x1000
-; GFX8-NEXT:    v_add_u32_e32 v17, vcc, s0, v0
+; GFX8-NEXT:    v_add_u32_e32 v17, vcc, 0x1000, v0
 ; GFX8-NEXT:    v_addc_u32_e32 v18, vcc, 0, v1, vcc
 ; GFX8-NEXT:    flat_load_dwordx2 v[17:18], v[17:18]
 ; GFX8-NEXT:    v_add_u32_e32 v0, vcc, 0x800, v0
@@ -2424,8 +2403,7 @@ define amdgpu_kernel void @ReverseOrder(ptr addrspace(1) %buffer) {
 ; GFX900-NEXT:    v_add_co_u32_e32 v4, vcc, s0, v0
 ; GFX900-NEXT:    v_addc_co_u32_e32 v5, vcc, 0, v1, vcc
 ; GFX900-NEXT:    global_load_dwordx2 v[10:11], v[4:5], off offset:2048
-; GFX900-NEXT:    s_movk_i32 s0, 0x1000
-; GFX900-NEXT:    v_add_co_u32_e32 v12, vcc, s0, v0
+; GFX900-NEXT:    v_add_co_u32_e32 v12, vcc, 0x1000, v0
 ; GFX900-NEXT:    v_addc_co_u32_e32 v13, vcc, 0, v1, vcc
 ; GFX900-NEXT:    global_load_dwordx2 v[14:15], v[12:13], off
 ; GFX900-NEXT:    global_load_dwordx2 v[16:17], v[4:5], off
@@ -2571,8 +2549,7 @@ define amdgpu_kernel void @ReverseOrder(ptr addrspace(1) %buffer) {
 ; GFX90A-NEXT:    v_add_co_u32_e32 v4, vcc, s0, v0
 ; GFX90A-NEXT:    v_addc_co_u32_e32 v5, vcc, 0, v1, vcc
 ; GFX90A-NEXT:    global_load_dwordx2 v[10:11], v[4:5], off offset:2048
-; GFX90A-NEXT:    s_movk_i32 s0, 0x1000
-; GFX90A-NEXT:    v_add_co_u32_e32 v12, vcc, s0, v0
+; GFX90A-NEXT:    v_add_co_u32_e32 v12, vcc, 0x1000, v0
 ; GFX90A-NEXT:    v_addc_co_u32_e32 v13, vcc, 0, v1, vcc
 ; GFX90A-NEXT:    global_load_dwordx2 v[14:15], v[12:13], off
 ; GFX90A-NEXT:    global_load_dwordx2 v[16:17], v[4:5], off
@@ -2743,8 +2720,7 @@ define hidden amdgpu_kernel void @negativeoffset(ptr addrspace(1) nocapture %buf
 ; GFX8-NEXT:    v_addc_u32_e32 v4, vcc, 0, v4, vcc
 ; GFX8-NEXT:    v_add_u32_e32 v2, vcc, v3, v0
 ; GFX8-NEXT:    v_addc_u32_e32 v6, vcc, v4, v1, vcc
-; GFX8-NEXT:    s_movk_i32 s0, 0x800
-; GFX8-NEXT:    v_add_u32_e32 v0, vcc, s0, v2
+; GFX8-NEXT:    v_add_u32_e32 v0, vcc, 0x800, v2
 ; GFX8-NEXT:    v_addc_u32_e32 v1, vcc, -1, v6, vcc
 ; GFX8-NEXT:    v_add_u32_e32 v5, vcc, 0, v2
 ; GFX8-NEXT:    v_addc_u32_e32 v6, vcc, -1, v6, vcc
@@ -2784,10 +2760,9 @@ define hidden amdgpu_kernel void @negativeoffset(ptr addrspace(1) nocapture %buf
 ; GFX900-NEXT:    v_add_co_u32_e32 v3, vcc, s34, v8
 ; GFX900-NEXT:    v_addc_co_u32_e32 v4, vcc, 0, v0, vcc
 ; GFX900-NEXT:    v_lshlrev_b64 v[0:1], 3, v[1:2]
-; GFX900-NEXT:    s_movk_i32 s0, 0x1000
 ; GFX900-NEXT:    v_add_co_u32_e32 v2, vcc, v3, v0
 ; GFX900-NEXT:    v_addc_co_u32_e32 v3, vcc, v4, v1, vcc
-; GFX900-NEXT:    v_add_co_u32_e32 v0, vcc, s0, v2
+; GFX900-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1000, v2
 ; GFX900-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v3, vcc
 ; GFX900-NEXT:    v_add_co_u32_e32 v2, vcc, 0, v2
 ; GFX900-NEXT:    v_addc_co_u32_e32 v3, vcc, -1, v3, vcc
@@ -2871,8 +2846,7 @@ define hidden amdgpu_kernel void @negativeoffset(ptr addrspace(1) nocapture %buf
 ; GFX90A-NEXT:    v_lshlrev_b64 v[0:1], 3, v[2:3]
 ; GFX90A-NEXT:    v_add_co_u32_e32 v2, vcc, v4, v0
 ; GFX90A-NEXT:    v_addc_co_u32_e32 v3, vcc, v5, v1, vcc
-; GFX90A-NEXT:    s_movk_i32 s0, 0x1000
-; GFX90A-NEXT:    v_add_co_u32_e32 v0, vcc, s0, v2
+; GFX90A-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1000, v2
 ; GFX90A-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v3, vcc
 ; GFX90A-NEXT:    v_add_co_u32_e32 v2, vcc, 0, v2
 ; GFX90A-NEXT:    v_addc_co_u32_e32 v3, vcc, -1, v3, vcc
diff --git a/llvm/test/CodeGen/AMDGPU/sdiv64.ll b/llvm/test/CodeGen/AMDGPU/sdiv64.ll
index 4f2fd3f50494c94..d79e4553f7574b1 100644
--- a/llvm/test/CodeGen/AMDGPU/sdiv64.ll
+++ b/llvm/test/CodeGen/AMDGPU/sdiv64.ll
@@ -1394,8 +1394,7 @@ define i64 @v_test_sdiv_k_num_i64(i64 %x) {
 ; GCN-IR-NEXT:    v_add_i32_e32 v4, vcc, 32, v4
 ; GCN-IR-NEXT:    v_ffbh_u32_e32 v5, v1
 ; GCN-IR-NEXT:    v_min_u32_e32 v8, v4, v5
-; GCN-IR-NEXT:    s_movk_i32 s6, 0xffc5
-; GCN-IR-NEXT:    v_add_i32_e32 v5, vcc, s6, v8
+; GCN-IR-NEXT:    v_add_i32_e32 v5, vcc, 0xffffffc5, v8
 ; GCN-IR-NEXT:    v_addc_u32_e64 v6, s[6:7], 0, -1, vcc
 ; GCN-IR-NEXT:    v_cmp_eq_u64_e64 s[4:5], 0, v[0:1]
 ; GCN-IR-NEXT:    v_cmp_lt_u64_e32 vcc, 63, v[5:6]
@@ -1587,8 +1586,7 @@ define i64 @v_test_sdiv_pow2_k_num_i64(i64 %x) {
 ; GCN-IR-NEXT:    v_add_i32_e32 v4, vcc, 32, v4
 ; GCN-IR-NEXT:    v_ffbh_u32_e32 v5, v1
 ; GCN-IR-NEXT:    v_min_u32_e32 v8, v4, v5
-; GCN-IR-NEXT:    s_movk_i32 s6, 0xffd0
-; GCN-IR-NEXT:    v_add_i32_e32 v5, vcc, s6, v8
+; GCN-IR-NEXT:    v_add_i32_e32 v5, vcc, 0xffffffd0, v8
 ; GCN-IR-NEXT:    v_addc_u32_e64 v6, s[6:7], 0, -1, vcc
 ; GCN-IR-NEXT:    v_cmp_eq_u64_e64 s[4:5], 0, v[0:1]
 ; GCN-IR-NEXT:    v_cmp_lt_u64_e32 vcc, 63, v[5:6]
@@ -1722,13 +1720,12 @@ define i64 @v_test_sdiv_pow2_k_den_i64(i64 %x) {
 ; GCN-IR-NEXT:    s_mov_b64 s[10:11], 0
 ; GCN-IR-NEXT:    v_mov_b32_e32 v12, 0
 ; GCN-IR-NEXT:    v_mov_b32_e32 v6, 0
-; GCN-IR-NEXT:    s_movk_i32 s12, 0x7fff
 ; GCN-IR-NEXT:  .LBB13_3: ; %udiv-do-while
 ; GCN-IR-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; GCN-IR-NEXT:    v_lshl_b64 v[9:10], v[9:10], 1
 ; GCN-IR-NEXT:    v_lshrrev_b32_e32 v0, 31, v4
 ; GCN-IR-NEXT:    v_or_b32_e32 v0, v9, v0
-; GCN-IR-NEXT:    v_sub_i32_e32 v5, vcc, s12, v0
+; GCN-IR-NEXT:    v_sub_i32_e32 v5, vcc, 0x7fff, v0
 ; GCN-IR-NEXT:    v_subb_u32_e32 v5, vcc, 0, v10, vcc
 ; GCN-IR-NEXT:    v_add_i32_e32 v7, vcc, 1, v7
 ; GCN-IR-NEXT:    v_lshl_b64 v[3:4], v[3:4], 1
diff --git a/llvm/test/CodeGen/AMDGPU/shrink-add-sub-constant.ll b/llvm/test/CodeGen/AMDGPU/shrink-add-sub-constant.ll
index 8da720d7f991cdd..2be03b9083412bb 100644
--- a/llvm/test/CodeGen/AMDGPU/shrink-add-sub-constant.ll
+++ b/llvm/test/CodeGen/AMDGPU/shrink-add-sub-constant.ll
@@ -419,14 +419,11 @@ define amdgpu_kernel void @v_test_i32_x_sub_65(ptr addrspace(1) %out, ptr addrsp
 ; SI-GISEL-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
 ; SI-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v1, 0
-; SI-GISEL-NEXT:    s_mov_b32 s6, 0
-; SI-GISEL-NEXT:    s_mov_b32 s7, 0xf000
 ; SI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; SI-GISEL-NEXT:    s_mov_b64 s[4:5], s[2:3]
-; SI-GISEL-NEXT:    buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
-; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
-; SI-GISEL-NEXT:    v_subrev_i32_e32 v2, vcc, 0x41, v2
-; SI-GISEL-NEXT:    s_mov_b64 s[2:3], s[6:7]
+; SI-GISEL-NEXT:    s_mov_b32 s2, 0
+; SI-GISEL-NEXT:    s_mov_b32 s3, 0xf000
+; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x41
+; SI-GISEL-NEXT:    v_sub_i32_e32 v2, vcc, 0x41, v2
 ; SI-GISEL-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
 ; SI-GISEL-NEXT:    s_endpgm
 ;
@@ -452,17 +449,12 @@ define amdgpu_kernel void @v_test_i32_x_sub_65(ptr addrspace(1) %out, ptr addrsp
 ; VI-GISEL-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
 ; VI-GISEL-NEXT:    v_lshlrev_b32_e32 v2, 2, v0
 ; VI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-GISEL-NEXT:    v_mov_b32_e32 v0, s2
-; VI-GISEL-NEXT:    v_mov_b32_e32 v1, s3
-; VI-GISEL-NEXT:    v_add_u32_e32 v0, vcc, v0, v2
-; VI-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
-; VI-GISEL-NEXT:    flat_load_dword v3, v[0:1]
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v0, s0
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v1, s1
 ; VI-GISEL-NEXT:    v_add_u32_e32 v0, vcc, v0, v2
+; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x41
 ; VI-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
-; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
-; VI-GISEL-NEXT:    v_subrev_u32_e32 v2, vcc, 0x41, v3
+; VI-GISEL-NEXT:    v_sub_u32_e32 v2, vcc, 0x41, v2
 ; VI-GISEL-NEXT:    flat_store_dword v[0:1], v2
 ; VI-GISEL-NEXT:    s_endpgm
 ;
@@ -941,14 +933,11 @@ define amdgpu_kernel void @v_test_i32_x_sub_neg17(ptr addrspace(1) %out, ptr add
 ; SI-GISEL-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
 ; SI-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v1, 0
-; SI-GISEL-NEXT:    s_mov_b32 s6, 0
-; SI-GISEL-NEXT:    s_mov_b32 s7, 0xf000
 ; SI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; SI-GISEL-NEXT:    s_mov_b64 s[4:5], s[2:3]
-; SI-GISEL-NEXT:    buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
-; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
-; SI-GISEL-NEXT:    v_subrev_i32_e32 v2, vcc, 0xffffffef, v2
-; SI-GISEL-NEXT:    s_mov_b64 s[2:3], s[6:7]
+; SI-GISEL-NEXT:    s_mov_b32 s2, 0
+; SI-GISEL-NEXT:    s_mov_b32 s3, 0xf000
+; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xffffffef
+; SI-GISEL-NEXT:    v_sub_i32_e32 v2, vcc, 0xffffffef, v2
 ; SI-GISEL-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
 ; SI-GISEL-NEXT:    s_endpgm
 ;
@@ -974,17 +963,12 @@ define amdgpu_kernel void @v_test_i32_x_sub_neg17(ptr addrspace(1) %out, ptr add
 ; VI-GISEL-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
 ; VI-GISEL-NEXT:    v_lshlrev_b32_e32 v2, 2, v0
 ; VI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-GISEL-NEXT:    v_mov_b32_e32 v0, s2
-; VI-GISEL-NEXT:    v_mov_b32_e32 v1, s3
-; VI-GISEL-NEXT:    v_add_u32_e32 v0, vcc, v0, v2
-; VI-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
-; VI-GISEL-NEXT:    flat_load_dword v3, v[0:1]
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v0, s0
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v1, s1
 ; VI-GISEL-NEXT:    v_add_u32_e32 v0, vcc, v0, v2
+; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xffffffef
 ; VI-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
-; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
-; VI-GISEL-NEXT:    v_subrev_u32_e32 v2, vcc, 0xffffffef, v3
+; VI-GISEL-NEXT:    v_sub_u32_e32 v2, vcc, 0xffffffef, v2
 ; VI-GISEL-NEXT:    flat_store_dword v[0:1], v2
 ; VI-GISEL-NEXT:    s_endpgm
 ;
@@ -1931,12 +1915,12 @@ define amdgpu_kernel void @v_test_v2i16_x_sub_64_123(ptr addrspace(1) %out, ptr
 ; SI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; SI-GISEL-NEXT:    s_mov_b64 s[4:5], s[2:3]
 ; SI-GISEL-NEXT:    buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
+; SI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7b
+; SI-GISEL-NEXT:    v_sub_i32_e32 v3, vcc, 0x7b, v3
+; SI-GISEL-NEXT:    v_and_b32_e32 v3, 0xffff, v3
 ; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
-; SI-GISEL-NEXT:    v_lshrrev_b32_e32 v3, 16, v2
 ; SI-GISEL-NEXT:    v_subrev_i32_e32 v2, vcc, 64, v2
-; SI-GISEL-NEXT:    v_subrev_i32_e32 v3, vcc, 0x7b, v3
 ; SI-GISEL-NEXT:    v_and_b32_e32 v2, 0xffff, v2
-; SI-GISEL-NEXT:    v_and_b32_e32 v3, 0xffff, v3
 ; SI-GISEL-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
 ; SI-GISEL-NEXT:    v_or_b32_e32 v2, v2, v3
 ; SI-GISEL-NEXT:    s_mov_b64 s[2:3], s[6:7]
@@ -2317,11 +2301,11 @@ define amdgpu_kernel void @v_test_v2i16_x_sub_0_1_0(ptr addrspace(1) %out, ptr a
 ; SI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; SI-GISEL-NEXT:    s_mov_b64 s[4:5], s[2:3]
 ; SI-GISEL-NEXT:    buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
+; SI-GISEL-NEXT:    v_mov_b32_e32 v3, 0xffffc400
+; SI-GISEL-NEXT:    v_sub_i32_e32 v3, vcc, 0xffffc400, v3
+; SI-GISEL-NEXT:    v_and_b32_e32 v3, 0xffff, v3
 ; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
-; SI-GISEL-NEXT:    v_lshrrev_b32_e32 v3, 16, v2
-; SI-GISEL-NEXT:    v_subrev_i32_e32 v3, vcc, 0xffffc400, v3
 ; SI-GISEL-NEXT:    v_and_b32_e32 v2, 0xffff, v2
-; SI-GISEL-NEXT:    v_and_b32_e32 v3, 0xffff, v3
 ; SI-GISEL-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
 ; SI-GISEL-NEXT:    v_or_b32_e32 v2, v2, v3
 ; SI-GISEL-NEXT:    s_mov_b64 s[2:3], s[6:7]
@@ -2452,11 +2436,11 @@ define amdgpu_kernel void @v_test_v2i16_x_sub_0_neg1_0(ptr addrspace(1) %out, pt
 ; SI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; SI-GISEL-NEXT:    s_mov_b64 s[4:5], s[2:3]
 ; SI-GISEL-NEXT:    buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
+; SI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x4400
+; SI-GISEL-NEXT:    v_sub_i32_e32 v3, vcc, 0x4400, v3
+; SI-GISEL-NEXT:    v_and_b32_e32 v3, 0xffff, v3
 ; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
-; SI-GISEL-NEXT:    v_lshrrev_b32_e32 v3, 16, v2
-; SI-GISEL-NEXT:    v_subrev_i32_e32 v3, vcc, 0x4400, v3
 ; SI-GISEL-NEXT:    v_and_b32_e32 v2, 0xffff, v2
-; SI-GISEL-NEXT:    v_and_b32_e32 v3, 0xffff, v3
 ; SI-GISEL-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
 ; SI-GISEL-NEXT:    v_or_b32_e32 v2, v2, v3
 ; SI-GISEL-NEXT:    s_mov_b64 s[2:3], s[6:7]
@@ -2591,11 +2575,10 @@ define amdgpu_kernel void @v_test_v2i16_x_add_neg32_neg32(ptr addrspace(1) %out,
 ; SI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; SI-GISEL-NEXT:    s_mov_b64 s[4:5], s[2:3]
 ; SI-GISEL-NEXT:    buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
-; SI-GISEL-NEXT:    s_movk_i32 s2, 0xffe0
 ; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
 ; SI-GISEL-NEXT:    v_lshrrev_b32_e32 v3, 16, v2
-; SI-GISEL-NEXT:    v_add_i32_e32 v2, vcc, s2, v2
-; SI-GISEL-NEXT:    v_add_i32_e32 v3, vcc, s2, v3
+; SI-GISEL-NEXT:    v_add_i32_e32 v2, vcc, 0xffffffe0, v2
+; SI-GISEL-NEXT:    v_add_i32_e32 v3, vcc, 0xffffffe0, v3
 ; SI-GISEL-NEXT:    v_and_b32_e32 v2, 0xffff, v2
 ; SI-GISEL-NEXT:    v_and_b32_e32 v3, 0xffff, v3
 ; SI-GISEL-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
@@ -2753,11 +2736,11 @@ define amdgpu_kernel void @v_test_v2i16_x_add_0_neg32(ptr addrspace(1) %out, ptr
 ; SI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; SI-GISEL-NEXT:    s_mov_b64 s[4:5], s[2:3]
 ; SI-GISEL-NEXT:    buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
-; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
-; SI-GISEL-NEXT:    v_lshrrev_b32_e32 v3, 16, v2
+; SI-GISEL-NEXT:    v_mov_b32_e32 v3, 0xffffffe0
 ; SI-GISEL-NEXT:    v_add_i32_e32 v3, vcc, 0xffffffe0, v3
-; SI-GISEL-NEXT:    v_and_b32_e32 v2, 0xffff, v2
 ; SI-GISEL-NEXT:    v_and_b32_e32 v3, 0xffff, v3
+; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; SI-GISEL-NEXT:    v_and_b32_e32 v2, 0xffff, v2
 ; SI-GISEL-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
 ; SI-GISEL-NEXT:    v_or_b32_e32 v2, v2, v3
 ; SI-GISEL-NEXT:    s_mov_b64 s[2:3], s[6:7]
@@ -2913,12 +2896,13 @@ define amdgpu_kernel void @v_test_v2i16_x_add_neg32_0(ptr addrspace(1) %out, ptr
 ; SI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; SI-GISEL-NEXT:    s_mov_b64 s[4:5], s[2:3]
 ; SI-GISEL-NEXT:    buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
+; SI-GISEL-NEXT:    v_mov_b32_e32 v3, 0xffffffe0
+; SI-GISEL-NEXT:    v_add_i32_e32 v3, vcc, 0xffffffe0, v3
 ; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
-; SI-GISEL-NEXT:    v_lshrrev_b32_e32 v3, 16, v2
-; SI-GISEL-NEXT:    v_add_i32_e32 v2, vcc, 0xffffffe0, v2
-; SI-GISEL-NEXT:    v_and_b32_e32 v2, 0xffff, v2
-; SI-GISEL-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
-; SI-GISEL-NEXT:    v_or_b32_e32 v2, v2, v3
+; SI-GISEL-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
+; SI-GISEL-NEXT:    v_and_b32_e32 v3, 0xffff, v3
+; SI-GISEL-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
+; SI-GISEL-NEXT:    v_or_b32_e32 v2, v3, v2
 ; SI-GISEL-NEXT:    s_mov_b64 s[2:3], s[6:7]
 ; SI-GISEL-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
 ; SI-GISEL-NEXT:    s_endpgm
@@ -3554,11 +3538,10 @@ define amdgpu_kernel void @v_test_v2i16_x_add_neg_fpone(ptr addrspace(1) %out, p
 ; SI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; SI-GISEL-NEXT:    s_mov_b64 s[4:5], s[2:3]
 ; SI-GISEL-NEXT:    buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
-; SI-GISEL-NEXT:    s_movk_i32 s2, 0xc400
 ; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
 ; SI-GISEL-NEXT:    v_lshrrev_b32_e32 v3, 16, v2
-; SI-GISEL-NEXT:    v_add_i32_e32 v2, vcc, s2, v2
-; SI-GISEL-NEXT:    v_add_i32_e32 v3, vcc, s2, v3
+; SI-GISEL-NEXT:    v_add_i32_e32 v2, vcc, 0xffffc400, v2
+; SI-GISEL-NEXT:    v_add_i32_e32 v3, vcc, 0xffffc400, v3
 ; SI-GISEL-NEXT:    v_and_b32_e32 v2, 0xffff, v2
 ; SI-GISEL-NEXT:    v_and_b32_e32 v3, 0xffff, v3
 ; SI-GISEL-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
@@ -3720,11 +3703,10 @@ define amdgpu_kernel void @v_test_v2i16_x_add_neg_negfpone(ptr addrspace(1) %out
 ; SI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; SI-GISEL-NEXT:    s_mov_b64 s[4:5], s[2:3]
 ; SI-GISEL-NEXT:    buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
-; SI-GISEL-NEXT:    s_movk_i32 s2, 0x4400
 ; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
 ; SI-GISEL-NEXT:    v_lshrrev_b32_e32 v3, 16, v2
-; SI-GISEL-NEXT:    v_add_i32_e32 v2, vcc, s2, v2
-; SI-GISEL-NEXT:    v_add_i32_e32 v3, vcc, s2, v3
+; SI-GISEL-NEXT:    v_add_i32_e32 v2, vcc, 0x4400, v2
+; SI-GISEL-NEXT:    v_add_i32_e32 v3, vcc, 0x4400, v3
 ; SI-GISEL-NEXT:    v_and_b32_e32 v2, 0xffff, v2
 ; SI-GISEL-NEXT:    v_and_b32_e32 v3, 0xffff, v3
 ; SI-GISEL-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
@@ -3886,11 +3868,10 @@ define amdgpu_kernel void @v_test_v2i16_x_add_neg_fptwo(ptr addrspace(1) %out, p
 ; SI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; SI-GISEL-NEXT:    s_mov_b64 s[4:5], s[2:3]
 ; SI-GISEL-NEXT:    buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
-; SI-GISEL-NEXT:    s_movk_i32 s2, 0x4000
 ; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
 ; SI-GISEL-NEXT:    v_lshrrev_b32_e32 v3, 16, v2
-; SI-GISEL-NEXT:    v_add_i32_e32 v2, vcc, s2, v2
-; SI-GISEL-NEXT:    v_add_i32_e32 v3, vcc, s2, v3
+; SI-GISEL-NEXT:    v_add_i32_e32 v2, vcc, 0x4000, v2
+; SI-GISEL-NEXT:    v_add_i32_e32 v3, vcc, 0x4000, v3
 ; SI-GISEL-NEXT:    v_and_b32_e32 v2, 0xffff, v2
 ; SI-GISEL-NEXT:    v_and_b32_e32 v3, 0xffff, v3
 ; SI-GISEL-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
@@ -4052,11 +4033,10 @@ define amdgpu_kernel void @v_test_v2i16_x_add_neg_negfptwo(ptr addrspace(1) %out
 ; SI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; SI-GISEL-NEXT:    s_mov_b64 s[4:5], s[2:3]
 ; SI-GISEL-NEXT:    buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
-; SI-GISEL-NEXT:    s_movk_i32 s2, 0xc000
 ; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
 ; SI-GISEL-NEXT:    v_lshrrev_b32_e32 v3, 16, v2
-; SI-GISEL-NEXT:    v_add_i32_e32 v2, vcc, s2, v2
-; SI-GISEL-NEXT:    v_add_i32_e32 v3, vcc, s2, v3
+; SI-GISEL-NEXT:    v_add_i32_e32 v2, vcc, 0xffffc000, v2
+; SI-GISEL-NEXT:    v_add_i32_e32 v3, vcc, 0xffffc000, v3
 ; SI-GISEL-NEXT:    v_and_b32_e32 v2, 0xffff, v2
 ; SI-GISEL-NEXT:    v_and_b32_e32 v3, 0xffff, v3
 ; SI-GISEL-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
@@ -4211,17 +4191,13 @@ define amdgpu_kernel void @v_test_v2i16_x_add_undef_neg32(ptr addrspace(1) %out,
 ; SI-GISEL-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
 ; SI-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v1, 0
-; SI-GISEL-NEXT:    s_mov_b32 s6, 0
-; SI-GISEL-NEXT:    s_mov_b32 s7, 0xf000
+; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xffffffe0
 ; SI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; SI-GISEL-NEXT:    s_mov_b64 s[4:5], s[2:3]
-; SI-GISEL-NEXT:    buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
-; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
-; SI-GISEL-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
+; SI-GISEL-NEXT:    s_mov_b32 s2, 0
+; SI-GISEL-NEXT:    s_mov_b32 s3, 0xf000
 ; SI-GISEL-NEXT:    v_add_i32_e32 v2, vcc, 0xffffffe0, v2
 ; SI-GISEL-NEXT:    v_and_b32_e32 v2, 0xffff, v2
 ; SI-GISEL-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
-; SI-GISEL-NEXT:    s_mov_b64 s[2:3], s[6:7]
 ; SI-GISEL-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
 ; SI-GISEL-NEXT:    s_endpgm
 ;
@@ -4368,15 +4344,12 @@ define amdgpu_kernel void @v_test_v2i16_x_add_neg32_undef(ptr addrspace(1) %out,
 ; SI-GISEL-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
 ; SI-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v1, 0
-; SI-GISEL-NEXT:    s_mov_b32 s6, 0
-; SI-GISEL-NEXT:    s_mov_b32 s7, 0xf000
 ; SI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; SI-GISEL-NEXT:    s_mov_b64 s[4:5], s[2:3]
-; SI-GISEL-NEXT:    buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
-; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; SI-GISEL-NEXT:    s_mov_b32 s2, 0
+; SI-GISEL-NEXT:    s_mov_b32 s3, 0xf000
+; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xffffffe0
 ; SI-GISEL-NEXT:    v_add_i32_e32 v2, vcc, 0xffffffe0, v2
 ; SI-GISEL-NEXT:    v_and_b32_e32 v2, 0xffff, v2
-; SI-GISEL-NEXT:    s_mov_b64 s[2:3], s[6:7]
 ; SI-GISEL-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
 ; SI-GISEL-NEXT:    s_endpgm
 ;
diff --git a/llvm/test/CodeGen/AMDGPU/spill-scavenge-offset.ll b/llvm/test/CodeGen/AMDGPU/spill-scavenge-offset.ll
index 08db1e7fee259d6..dd4eb0ae2a09ed6 100644
--- a/llvm/test/CodeGen/AMDGPU/spill-scavenge-offset.ll
+++ b/llvm/test/CodeGen/AMDGPU/spill-scavenge-offset.ll
@@ -5000,22 +5000,14 @@ define amdgpu_kernel void @test(ptr addrspace(1) %out, ptr addrspace(1) %in) {
 ; GFX9-FLATSCR-NEXT:    v_mbcnt_lo_u32_b32 v0, -1, 0
 ; GFX9-FLATSCR-NEXT:    v_mbcnt_hi_u32_b32 v0, -1, v0
 ; GFX9-FLATSCR-NEXT:    v_lshlrev_b32_e32 v5, 13, v0
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x80
+; GFX9-FLATSCR-NEXT:    s_mov_b32 s4, 4
 ; GFX9-FLATSCR-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-FLATSCR-NEXT:    v_add_co_u32_e32 v2, vcc, s2, v5
 ; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v0, s3
 ; GFX9-FLATSCR-NEXT:    v_addc_co_u32_e32 v3, vcc, 0, v0, vcc
-; GFX9-FLATSCR-NEXT:    v_add_co_u32_e32 v0, vcc, s4, v2
+; GFX9-FLATSCR-NEXT:    v_add_co_u32_e32 v0, vcc, 0x80, v2
 ; GFX9-FLATSCR-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v3, vcc
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:3968
-; GFX9-FLATSCR-NEXT:    s_mov_b32 s4, 4
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s5, 0x84
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s6, 0x104
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s7, 0x184
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s8, 0x204
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s9, 0x284
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s10, 0x304
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x384
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:3984
@@ -5043,1268 +5035,1268 @@ define amdgpu_kernel void @test(ptr addrspace(1) %out, ptr addrspace(1) %in) {
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:4080
+; GFX9-FLATSCR-NEXT:    v_add_co_u32_e32 v0, vcc, 0x100, v2
 ; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x74
+; GFX9-FLATSCR-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v3, vcc
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x100
-; GFX9-FLATSCR-NEXT:    v_add_co_u32_e32 v0, vcc, s4, v2
-; GFX9-FLATSCR-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v3, vcc
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:3968
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x84
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s5 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:3984
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s5, 0x94
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x94
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s5 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:4000
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s5, 0xa4
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0xa4
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s5 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:4016
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s5, 0xb4
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0xb4
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s5 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:4032
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s5, 0xc4
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0xc4
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s5 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:4048
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s5, 0xd4
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0xd4
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s5 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:4064
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s5, 0xe4
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0xe4
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s5 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:4080
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s5, 0xf4
-; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s5 ; 16-byte Folded Spill
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s5, 0x180
-; GFX9-FLATSCR-NEXT:    v_add_co_u32_e32 v0, vcc, s5, v2
+; GFX9-FLATSCR-NEXT:    v_add_co_u32_e32 v0, vcc, 0x180, v2
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0xf4
 ; GFX9-FLATSCR-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v3, vcc
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:3968
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x104
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s6 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:3984
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s6, 0x114
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x114
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s6 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:4000
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s6, 0x124
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x124
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s6 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:4016
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s6, 0x134
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x134
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s6 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:4032
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s6, 0x144
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x144
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s6 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:4048
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s6, 0x154
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x154
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s6 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:4064
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s6, 0x164
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x164
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s6 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:4080
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s6, 0x174
-; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s6 ; 16-byte Folded Spill
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s6, 0x200
-; GFX9-FLATSCR-NEXT:    v_add_co_u32_e32 v0, vcc, s6, v2
+; GFX9-FLATSCR-NEXT:    v_add_co_u32_e32 v0, vcc, 0x200, v2
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x174
 ; GFX9-FLATSCR-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v3, vcc
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:3968
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x184
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s7 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:3984
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s7, 0x194
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x194
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s7 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:4000
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s7, 0x1a4
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x1a4
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s7 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:4016
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s7, 0x1b4
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x1b4
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s7 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:4032
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s7, 0x1c4
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x1c4
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s7 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:4048
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s7, 0x1d4
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x1d4
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s7 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:4064
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s7, 0x1e4
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x1e4
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s7 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:4080
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s7, 0x1f4
-; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s7 ; 16-byte Folded Spill
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s7, 0x280
-; GFX9-FLATSCR-NEXT:    v_add_co_u32_e32 v0, vcc, s7, v2
+; GFX9-FLATSCR-NEXT:    v_add_co_u32_e32 v0, vcc, 0x280, v2
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x1f4
 ; GFX9-FLATSCR-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v3, vcc
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:3968
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x204
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s8 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:3984
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s8, 0x214
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x214
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s8 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:4000
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s8, 0x224
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x224
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s8 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:4016
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s8, 0x234
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x234
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s8 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:4032
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s8, 0x244
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x244
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s8 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:4048
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s8, 0x254
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x254
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s8 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:4064
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s8, 0x264
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x264
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s8 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:4080
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s8, 0x274
-; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s8 ; 16-byte Folded Spill
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s8, 0x300
-; GFX9-FLATSCR-NEXT:    v_add_co_u32_e32 v0, vcc, s8, v2
+; GFX9-FLATSCR-NEXT:    v_add_co_u32_e32 v0, vcc, 0x300, v2
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x274
 ; GFX9-FLATSCR-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v3, vcc
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:3968
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x284
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s9 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:3984
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s9, 0x294
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x294
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s9 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:4000
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s9, 0x2a4
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x2a4
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s9 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:4016
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s9, 0x2b4
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x2b4
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s9 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:4032
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s9, 0x2c4
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x2c4
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s9 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:4048
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s9, 0x2d4
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x2d4
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s9 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:4064
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s9, 0x2e4
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x2e4
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s9 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:4080
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s9, 0x2f4
-; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s9 ; 16-byte Folded Spill
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s9, 0x380
-; GFX9-FLATSCR-NEXT:    v_add_co_u32_e32 v0, vcc, s9, v2
+; GFX9-FLATSCR-NEXT:    v_add_co_u32_e32 v0, vcc, 0x380, v2
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x2f4
 ; GFX9-FLATSCR-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v3, vcc
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:3968
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x304
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s10 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:3984
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s10, 0x314
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x314
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s10 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:4000
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s10, 0x324
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x324
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s10 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:4016
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s10, 0x334
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x334
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s10 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:4032
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s10, 0x344
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x344
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s10 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:4048
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s10, 0x354
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x354
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s10 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:4064
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s10, 0x364
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x364
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s10 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:4080
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s10, 0x374
-; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s10 ; 16-byte Folded Spill
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s10, 0x400
-; GFX9-FLATSCR-NEXT:    v_add_co_u32_e32 v0, vcc, s10, v2
+; GFX9-FLATSCR-NEXT:    v_add_co_u32_e32 v0, vcc, 0x400, v2
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x374
 ; GFX9-FLATSCR-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v3, vcc
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:3968
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x384
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:3984
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x394
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x394
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:4000
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x3a4
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x3a4
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:4016
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x3b4
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x3b4
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:4032
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x3c4
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x3c4
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:4048
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x3d4
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x3d4
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[6:9], v[0:1], off offset:4064
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:4080
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x3e4
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x3e4
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(1)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s11 ; 16-byte Folded Spill
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x3f4
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x3f4
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(1)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3]
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x404
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x404
 ; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v6, s1
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:16
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x414
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x414
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:32
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x424
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x424
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:48
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x434
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x434
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:64
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x444
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x444
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:80
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x454
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x454
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:96
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x464
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x464
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:112
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x474
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x474
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:128
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x484
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x484
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:144
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x494
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x494
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:160
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x4a4
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x4a4
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:176
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x4b4
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x4b4
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:192
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x4c4
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x4c4
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:208
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x4d4
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x4d4
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:224
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x4e4
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x4e4
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:240
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x4f4
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x4f4
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:256
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x504
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x504
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:272
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x514
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x514
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:288
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x524
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x524
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:304
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x534
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x534
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:320
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x544
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x544
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:336
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x554
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x554
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:352
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x564
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x564
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:368
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x574
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x574
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:384
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x584
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x584
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:400
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x594
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x594
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:416
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x5a4
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x5a4
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:432
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x5b4
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x5b4
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:448
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x5c4
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x5c4
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:464
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x5d4
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x5d4
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:480
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x5e4
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x5e4
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:496
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x5f4
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x5f4
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:512
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x604
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x604
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:528
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x614
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x614
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:544
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x624
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x624
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:560
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x634
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x634
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:576
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x644
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x644
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:592
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x654
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x654
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:608
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x664
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x664
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:624
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x674
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x674
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:640
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x684
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x684
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:656
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x694
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x694
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:672
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x6a4
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x6a4
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:688
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x6b4
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x6b4
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:704
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x6c4
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x6c4
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:720
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x6d4
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x6d4
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:736
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x6e4
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x6e4
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:752
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x6f4
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x6f4
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:768
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x704
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x704
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:784
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x714
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x714
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:800
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x724
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x724
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:816
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x734
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x734
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:832
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x744
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x744
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:848
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x754
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x754
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:864
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x764
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x764
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:880
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x774
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x774
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:896
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x784
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x784
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:912
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x794
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x794
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:928
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x7a4
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x7a4
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:944
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x7b4
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x7b4
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:960
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x7c4
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x7c4
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:976
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x7d4
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x7d4
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:992
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x7e4
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x7e4
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1008
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x7f4
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x7f4
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1024
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x804
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x804
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1040
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x814
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x814
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1056
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x824
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x824
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1072
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x834
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x834
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1088
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x844
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x844
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1104
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x854
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x854
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1120
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x864
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x864
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1136
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x874
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x874
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1152
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x884
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x884
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1168
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x894
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x894
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1184
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x8a4
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x8a4
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1200
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x8b4
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x8b4
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1216
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x8c4
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x8c4
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1232
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x8d4
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x8d4
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1248
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x8e4
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x8e4
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1264
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x8f4
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x8f4
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1280
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x904
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x904
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1296
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x914
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x914
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1312
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x924
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x924
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1328
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x934
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x934
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1344
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x944
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x944
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1360
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x954
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x954
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1376
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x964
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x964
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1392
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x974
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x974
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1408
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x984
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x984
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1424
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x994
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x994
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1440
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x9a4
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x9a4
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1456
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x9b4
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x9b4
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1472
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x9c4
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x9c4
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1488
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x9d4
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x9d4
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1504
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x9e4
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x9e4
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1520
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x9f4
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x9f4
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1536
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xa04
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0xa04
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1552
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xa14
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0xa14
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1568
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xa24
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0xa24
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1584
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xa34
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0xa34
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1600
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xa44
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0xa44
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1616
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xa54
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0xa54
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1632
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xa64
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0xa64
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1648
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xa74
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0xa74
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1664
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xa84
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0xa84
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1680
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xa94
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0xa94
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1696
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xaa4
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0xaa4
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1712
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xab4
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0xab4
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1728
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xac4
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0xac4
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1744
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xad4
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0xad4
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1760
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xae4
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0xae4
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1776
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xaf4
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0xaf4
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1792
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xb04
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0xb04
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1808
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xb14
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0xb14
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1824
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xb24
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0xb24
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1840
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xb34
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0xb34
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1856
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xb44
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0xb44
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1872
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xb54
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0xb54
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1888
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xb64
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0xb64
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1904
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xb74
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0xb74
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1920
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xb84
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0xb84
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1936
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xb94
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0xb94
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1952
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xba4
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0xba4
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1968
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xbb4
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0xbb4
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:1984
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xbc4
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0xbc4
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2000
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xbd4
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0xbd4
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2016
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xbe4
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0xbe4
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2032
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xbf4
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0xbf4
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2048
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xc04
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0xc04
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2064
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xc14
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0xc14
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2080
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xc24
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0xc24
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2096
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xc34
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0xc34
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2112
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xc44
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0xc44
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2128
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xc54
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0xc54
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2144
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xc64
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0xc64
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2160
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xc74
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0xc74
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2176
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xc84
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0xc84
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2192
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xc94
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0xc94
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2208
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xca4
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0xca4
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2224
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xcb4
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0xcb4
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2240
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xcc4
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0xcc4
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2256
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xcd4
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0xcd4
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2272
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xce4
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0xce4
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2288
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xcf4
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0xcf4
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2304
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xd04
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0xd04
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2320
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xd14
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0xd14
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2336
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xd24
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0xd24
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2352
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xd34
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0xd34
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2368
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xd44
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0xd44
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2384
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xd54
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0xd54
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2400
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xd64
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0xd64
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2416
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xd74
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0xd74
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2432
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xd84
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0xd84
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2448
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xd94
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0xd94
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2464
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xda4
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0xda4
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2480
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xdb4
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0xdb4
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2496
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xdc4
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0xdc4
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2512
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xdd4
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0xdd4
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2528
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xde4
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0xde4
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2544
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xdf4
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0xdf4
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2560
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xe04
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0xe04
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2576
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xe14
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0xe14
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2592
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xe24
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0xe24
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2608
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xe34
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0xe34
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2624
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xe44
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0xe44
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2640
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xe54
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0xe54
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2656
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xe64
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0xe64
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2672
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xe74
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0xe74
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2688
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xe84
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0xe84
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2704
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xe94
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0xe94
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2720
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xea4
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0xea4
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2736
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xeb4
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0xeb4
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2752
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xec4
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0xec4
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2768
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xed4
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0xed4
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2784
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xee4
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0xee4
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2800
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xef4
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0xef4
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2816
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xf04
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0xf04
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2832
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xf14
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0xf14
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2848
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xf24
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0xf24
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2864
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xf34
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0xf34
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2880
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xf44
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0xf44
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2896
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xf54
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0xf54
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2912
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xf64
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0xf64
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2928
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xf74
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0xf74
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2944
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xf84
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0xf84
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2960
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xf94
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0xf94
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2976
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xfa4
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0xfa4
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:2992
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xfb4
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0xfb4
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3008
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xfc4
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0xfc4
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3024
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xfd4
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0xfd4
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3040
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xfe4
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0xfe4
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3056
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0xff4
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0xff4
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3072
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x1004
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x1004
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3088
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x1014
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x1014
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3104
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x1024
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x1024
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3120
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x1034
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x1034
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3136
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x1044
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x1044
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3152
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x1054
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x1054
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3168
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x1064
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x1064
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3184
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x1074
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x1074
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3200
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x1084
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x1084
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3216
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x1094
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x1094
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3232
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x10a4
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x10a4
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3248
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x10b4
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x10b4
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3264
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x10c4
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x10c4
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3280
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x10d4
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x10d4
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3296
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x10e4
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x10e4
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3312
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x10f4
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x10f4
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3328
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x1104
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x1104
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3344
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x1114
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x1114
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3360
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x1124
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x1124
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3376
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x1134
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x1134
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3392
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x1144
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x1144
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3408
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x1154
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x1154
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3424
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x1164
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x1164
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3440
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x1174
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x1174
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3456
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x1184
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x1184
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3472
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x1194
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x1194
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3488
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x11a4
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x11a4
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3504
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x11b4
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x11b4
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3520
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x11c4
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x11c4
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3536
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x11d4
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x11d4
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3552
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x11e4
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x11e4
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3568
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x11f4
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x11f4
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3584
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x1204
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x1204
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3600
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x1214
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x1214
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3616
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x1224
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x1224
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3632
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x1234
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x1234
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3648
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x1244
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x1244
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3664
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x1254
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x1254
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3680
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x1264
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x1264
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3696
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x1274
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x1274
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3712
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x1284
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x1284
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3728
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x1294
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x1294
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3744
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x12a4
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x12a4
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3760
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x12b4
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x12b4
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3776
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x12c4
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x12c4
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3792
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x12d4
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x12d4
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3808
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x12e4
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x12e4
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3824
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x12f4
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x12f4
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3840
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x1304
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x1304
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3856
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x1314
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x1314
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3872
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x1324
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x1324
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3888
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x1334
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x1334
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3904
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x1344
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x1344
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3920
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x1354
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x1354
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3936
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x1364
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x1364
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3952
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x1374
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x1374
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3968
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x1384
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x1384
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:3984
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x1394
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x1394
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:4000
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x13a4
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x13a4
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:4016
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x13b4
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x13b4
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:4032
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x13c4
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x13c4
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:4048
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x13d4
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x13d4
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:4064
-; GFX9-FLATSCR-NEXT:    s_movk_i32 s11, 0x13e4
+; GFX9-FLATSCR-NEXT:    s_movk_i32 s4, 0x13e4
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill
+; GFX9-FLATSCR-NEXT:    scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill
 ; GFX9-FLATSCR-NEXT:    global_load_dwordx4 v[0:3], v5, s[2:3] offset:4080
 ; GFX9-FLATSCR-NEXT:    s_movk_i32 s2, 0x13e4
 ; GFX9-FLATSCR-NEXT:    ;;#ASMSTART
@@ -7346,7 +7338,7 @@ define amdgpu_kernel void @test(ptr addrspace(1) %out, ptr addrspace(1) %in) {
 ; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1]
 ; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0x3f4
 ; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload
-; GFX9-FLATSCR-NEXT:    v_add_co_u32_e32 v0, vcc, s10, v4
+; GFX9-FLATSCR-NEXT:    v_add_co_u32_e32 v0, vcc, 0x400, v4
 ; GFX9-FLATSCR-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v6, vcc
 ; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0x3e4
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
@@ -7380,7 +7372,7 @@ define amdgpu_kernel void @test(ptr addrspace(1) %out, ptr addrspace(1) %in) {
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v[0:1], v[7:10], off offset:3968
 ; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload
-; GFX9-FLATSCR-NEXT:    v_add_co_u32_e32 v0, vcc, s9, v4
+; GFX9-FLATSCR-NEXT:    v_add_co_u32_e32 v0, vcc, 0x380, v4
 ; GFX9-FLATSCR-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v6, vcc
 ; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0x364
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
@@ -7414,7 +7406,7 @@ define amdgpu_kernel void @test(ptr addrspace(1) %out, ptr addrspace(1) %in) {
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v[0:1], v[7:10], off offset:3968
 ; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload
-; GFX9-FLATSCR-NEXT:    v_add_co_u32_e32 v0, vcc, s8, v4
+; GFX9-FLATSCR-NEXT:    v_add_co_u32_e32 v0, vcc, 0x300, v4
 ; GFX9-FLATSCR-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v6, vcc
 ; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0x2e4
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
@@ -7448,7 +7440,7 @@ define amdgpu_kernel void @test(ptr addrspace(1) %out, ptr addrspace(1) %in) {
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v[0:1], v[7:10], off offset:3968
 ; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload
-; GFX9-FLATSCR-NEXT:    v_add_co_u32_e32 v0, vcc, s7, v4
+; GFX9-FLATSCR-NEXT:    v_add_co_u32_e32 v0, vcc, 0x280, v4
 ; GFX9-FLATSCR-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v6, vcc
 ; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0x264
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
@@ -7482,7 +7474,7 @@ define amdgpu_kernel void @test(ptr addrspace(1) %out, ptr addrspace(1) %in) {
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v[0:1], v[7:10], off offset:3968
 ; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload
-; GFX9-FLATSCR-NEXT:    v_add_co_u32_e32 v0, vcc, s6, v4
+; GFX9-FLATSCR-NEXT:    v_add_co_u32_e32 v0, vcc, 0x200, v4
 ; GFX9-FLATSCR-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v6, vcc
 ; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0x1e4
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
@@ -7516,7 +7508,7 @@ define amdgpu_kernel void @test(ptr addrspace(1) %out, ptr addrspace(1) %in) {
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v[0:1], v[7:10], off offset:3968
 ; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload
-; GFX9-FLATSCR-NEXT:    v_add_co_u32_e32 v0, vcc, s5, v4
+; GFX9-FLATSCR-NEXT:    v_add_co_u32_e32 v0, vcc, 0x180, v4
 ; GFX9-FLATSCR-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v6, vcc
 ; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0x164
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
@@ -7550,7 +7542,7 @@ define amdgpu_kernel void @test(ptr addrspace(1) %out, ptr addrspace(1) %in) {
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-FLATSCR-NEXT:    global_store_dwordx4 v[0:1], v[7:10], off offset:3968
 ; GFX9-FLATSCR-NEXT:    scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload
-; GFX9-FLATSCR-NEXT:    v_add_co_u32_e32 v0, vcc, s4, v4
+; GFX9-FLATSCR-NEXT:    v_add_co_u32_e32 v0, vcc, 0x100, v4
 ; GFX9-FLATSCR-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v6, vcc
 ; GFX9-FLATSCR-NEXT:    s_movk_i32 s0, 0xe4
 ; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
diff --git a/llvm/test/CodeGen/AMDGPU/srem64.ll b/llvm/test/CodeGen/AMDGPU/srem64.ll
index 24319a639da4472..4df9f7831e499ad 100644
--- a/llvm/test/CodeGen/AMDGPU/srem64.ll
+++ b/llvm/test/CodeGen/AMDGPU/srem64.ll
@@ -1583,8 +1583,7 @@ define i64 @v_test_srem_k_num_i64(i64 %x) {
 ; GCN-IR-NEXT:    v_add_i32_e32 v2, vcc, 32, v2
 ; GCN-IR-NEXT:    v_ffbh_u32_e32 v3, v1
 ; GCN-IR-NEXT:    v_min_u32_e32 v6, v2, v3
-; GCN-IR-NEXT:    s_movk_i32 s6, 0xffc5
-; GCN-IR-NEXT:    v_add_i32_e32 v3, vcc, s6, v6
+; GCN-IR-NEXT:    v_add_i32_e32 v3, vcc, 0xffffffc5, v6
 ; GCN-IR-NEXT:    v_addc_u32_e64 v4, s[6:7], 0, -1, vcc
 ; GCN-IR-NEXT:    v_cmp_eq_u64_e64 s[4:5], 0, v[0:1]
 ; GCN-IR-NEXT:    v_cmp_lt_u64_e32 vcc, 63, v[3:4]
@@ -1774,8 +1773,7 @@ define i64 @v_test_srem_pow2_k_num_i64(i64 %x) {
 ; GCN-IR-NEXT:    v_add_i32_e32 v2, vcc, 32, v2
 ; GCN-IR-NEXT:    v_ffbh_u32_e32 v3, v1
 ; GCN-IR-NEXT:    v_min_u32_e32 v6, v2, v3
-; GCN-IR-NEXT:    s_movk_i32 s6, 0xffd0
-; GCN-IR-NEXT:    v_add_i32_e32 v3, vcc, s6, v6
+; GCN-IR-NEXT:    v_add_i32_e32 v3, vcc, 0xffffffd0, v6
 ; GCN-IR-NEXT:    v_addc_u32_e64 v4, s[6:7], 0, -1, vcc
 ; GCN-IR-NEXT:    v_cmp_eq_u64_e64 s[4:5], 0, v[0:1]
 ; GCN-IR-NEXT:    v_cmp_lt_u64_e32 vcc, 63, v[3:4]
@@ -1914,13 +1912,12 @@ define i64 @v_test_srem_pow2_k_den_i64(i64 %x) {
 ; GCN-IR-NEXT:    s_mov_b64 s[10:11], 0
 ; GCN-IR-NEXT:    v_mov_b32_e32 v13, 0
 ; GCN-IR-NEXT:    v_mov_b32_e32 v7, 0
-; GCN-IR-NEXT:    s_movk_i32 s12, 0x7fff
 ; GCN-IR-NEXT:  .LBB13_3: ; %udiv-do-while
 ; GCN-IR-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; GCN-IR-NEXT:    v_lshl_b64 v[10:11], v[10:11], 1
 ; GCN-IR-NEXT:    v_lshrrev_b32_e32 v6, 31, v5
 ; GCN-IR-NEXT:    v_or_b32_e32 v10, v10, v6
-; GCN-IR-NEXT:    v_sub_i32_e32 v6, vcc, s12, v10
+; GCN-IR-NEXT:    v_sub_i32_e32 v6, vcc, 0x7fff, v10
 ; GCN-IR-NEXT:    v_lshl_b64 v[4:5], v[4:5], 1
 ; GCN-IR-NEXT:    v_subb_u32_e32 v6, vcc, 0, v11, vcc
 ; GCN-IR-NEXT:    v_add_i32_e32 v8, vcc, 1, v8
diff --git a/llvm/test/CodeGen/AMDGPU/udiv64.ll b/llvm/test/CodeGen/AMDGPU/udiv64.ll
index e23f3cfad89bc88..3d684be885f2971 100644
--- a/llvm/test/CodeGen/AMDGPU/udiv64.ll
+++ b/llvm/test/CodeGen/AMDGPU/udiv64.ll
@@ -1283,13 +1283,12 @@ define i64 @v_test_udiv_pow2_k_den_i64(i64 %x) {
 ; GCN-IR-NEXT:    s_mov_b64 s[10:11], 0
 ; GCN-IR-NEXT:    v_mov_b32_e32 v10, 0
 ; GCN-IR-NEXT:    v_mov_b32_e32 v5, 0
-; GCN-IR-NEXT:    s_movk_i32 s12, 0x7fff
 ; GCN-IR-NEXT:  .LBB10_3: ; %udiv-do-while
 ; GCN-IR-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; GCN-IR-NEXT:    v_lshl_b64 v[7:8], v[7:8], 1
 ; GCN-IR-NEXT:    v_lshrrev_b32_e32 v4, 31, v3
 ; GCN-IR-NEXT:    v_or_b32_e32 v6, v7, v4
-; GCN-IR-NEXT:    v_sub_i32_e32 v4, vcc, s12, v6
+; GCN-IR-NEXT:    v_sub_i32_e32 v4, vcc, 0x7fff, v6
 ; GCN-IR-NEXT:    v_subb_u32_e32 v4, vcc, 0, v8, vcc
 ; GCN-IR-NEXT:    v_add_i32_e32 v0, vcc, 1, v0
 ; GCN-IR-NEXT:    v_lshl_b64 v[2:3], v[2:3], 1
diff --git a/llvm/test/CodeGen/AMDGPU/urem64.ll b/llvm/test/CodeGen/AMDGPU/urem64.ll
index f68d14a32b929a5..2bd7560f91a5b7e 100644
--- a/llvm/test/CodeGen/AMDGPU/urem64.ll
+++ b/llvm/test/CodeGen/AMDGPU/urem64.ll
@@ -1307,13 +1307,12 @@ define i64 @v_test_urem_pow2_k_den_i64(i64 %x) {
 ; GCN-IR-NEXT:    s_mov_b64 s[10:11], 0
 ; GCN-IR-NEXT:    v_mov_b32_e32 v11, 0
 ; GCN-IR-NEXT:    v_mov_b32_e32 v5, 0
-; GCN-IR-NEXT:    s_movk_i32 s12, 0x7fff
 ; GCN-IR-NEXT:  .LBB9_3: ; %udiv-do-while
 ; GCN-IR-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; GCN-IR-NEXT:    v_lshl_b64 v[8:9], v[8:9], 1
 ; GCN-IR-NEXT:    v_lshrrev_b32_e32 v4, 31, v3
 ; GCN-IR-NEXT:    v_or_b32_e32 v8, v8, v4
-; GCN-IR-NEXT:    v_sub_i32_e32 v4, vcc, s12, v8
+; GCN-IR-NEXT:    v_sub_i32_e32 v4, vcc, 0x7fff, v8
 ; GCN-IR-NEXT:    v_lshl_b64 v[2:3], v[2:3], 1
 ; GCN-IR-NEXT:    v_subb_u32_e32 v4, vcc, 0, v9, vcc
 ; GCN-IR-NEXT:    v_add_i32_e32 v6, vcc, 1, v6



More information about the llvm-commits mailing list