[llvm] [GISel][AArch64][AMDGPU][RISCV] Canonicalize (sub X, C) -> (add X, -C) (PR #114309)

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Mon Nov 4 11:47:01 PST 2024


https://github.com/topperc updated https://github.com/llvm/llvm-project/pull/114309

>From 13c2599e3991d82bfce685af13777af7b0ff8d1a Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper at sifive.com>
Date: Wed, 30 Oct 2024 14:49:55 -0700
Subject: [PATCH 1/5] [GISel][AArch64][AMDGPU][RISCV] Canonicalize (sub X, C)
 -> (add X, -C)

This matches InstCombine and DAGCombine.

RISC-V only has an ADDI instruction so without this we need additional
patterns to do the conversion.

Some of the AMDGPU tests look like possible regressions.
---
 .../llvm/CodeGen/GlobalISel/CombinerHelper.h  |    3 +
 .../include/llvm/Target/GlobalISel/Combine.td |   12 +-
 .../lib/CodeGen/GlobalISel/CombinerHelper.cpp |   22 +
 llvm/lib/Target/RISCV/RISCVGISel.td           |    9 -
 .../AArch64/GlobalISel/combine-integer.mir    |    4 +-
 .../GlobalISel/combine-narrow-binop.mir       |    8 +-
 ...ercombiner-extending-loads-cornercases.mir |    4 +-
 .../prelegalizercombiner-trivial-arith.mir    |    4 +-
 llvm/test/CodeGen/AMDGPU/GlobalISel/ashr.ll   |   10 +-
 .../AMDGPU/GlobalISel/cvt_f32_ubyte.ll        |    4 +-
 .../CodeGen/AMDGPU/GlobalISel/fdiv.f32.ll     |    7 +-
 llvm/test/CodeGen/AMDGPU/GlobalISel/fshl.ll   |  560 ++++-----
 llvm/test/CodeGen/AMDGPU/GlobalISel/fshr.ll   |  627 +++++-----
 llvm/test/CodeGen/AMDGPU/GlobalISel/lshr.ll   |   18 +-
 .../CodeGen/AMDGPU/GlobalISel/sdiv.i32.ll     |   70 +-
 llvm/test/CodeGen/AMDGPU/GlobalISel/shl.ll    |   10 +-
 .../CodeGen/AMDGPU/GlobalISel/srem.i32.ll     |   96 +-
 .../test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll | 1066 +++++++++--------
 .../CodeGen/AMDGPU/GlobalISel/sub.v2i16.ll    |   34 +-
 .../CodeGen/AMDGPU/GlobalISel/urem.i32.ll     |   48 +-
 llvm/test/CodeGen/AMDGPU/ctlz.ll              |    8 +-
 llvm/test/CodeGen/AMDGPU/div_i128.ll          |   28 +-
 llvm/test/CodeGen/AMDGPU/div_v2i128.ll        |  280 ++---
 llvm/test/CodeGen/AMDGPU/fptoi.i128.ll        |   72 +-
 llvm/test/CodeGen/AMDGPU/itofp.i128.ll        |  144 +--
 .../CodeGen/AMDGPU/llvm.amdgcn.s.barrier.ll   |    2 +-
 .../CodeGen/AMDGPU/llvm.is.fpclass.f16.ll     |   44 +-
 .../CodeGen/AMDGPU/shrink-add-sub-constant.ll | 1018 +++++++++-------
 .../CodeGen/RISCV/GlobalISel/alu-roundtrip.ll |   18 +-
 .../instruction-select/alu-rv32.mir           |    5 +-
 .../instruction-select/alu-rv64.mir           |   10 +-
 .../jump-table-brjt-medium-rv64.mir           |    4 +-
 .../jump-table-brjt-pic-rv32.mir              |    4 +-
 .../jump-table-brjt-pic-rv64.mir              |    4 +-
 .../jump-table-brjt-rv32.mir                  |    4 +-
 .../jump-table-brjt-small-rv64.mir            |    4 +-
 36 files changed, 2214 insertions(+), 2051 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
index 9240a3c3127eb4..b09981eaef506e 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
@@ -321,6 +321,9 @@ class CombinerHelper {
   bool matchCombineMulToShl(MachineInstr &MI, unsigned &ShiftVal);
   void applyCombineMulToShl(MachineInstr &MI, unsigned &ShiftVal);
 
+  // Transform a G_SUB with constant on the RHS to G_ADD.
+  bool matchCombineSubToAdd(MachineInstr &MI, BuildFnTy &MatchInfo);
+
   // Transform a G_SHL with an extended source into a narrower shift if
   // possible.
   bool matchCombineShlOfExtend(MachineInstr &MI, RegisterImmPair &MatchData);
diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index ead4149fc11068..9891db5ceb6fa9 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -335,6 +335,13 @@ def mul_to_shl : GICombineRule<
          [{ return Helper.matchCombineMulToShl(*${mi}, ${matchinfo}); }]),
   (apply [{ Helper.applyCombineMulToShl(*${mi}, ${matchinfo}); }])>;
 
+// (sub x, C) -> (add x, -C)
+def sub_to_add : GICombineRule<
+  (defs root:$d, build_fn_matchinfo:$matchinfo),
+  (match (G_SUB $d, $op1, $op2):$mi,
+         [{ return Helper.matchCombineSubToAdd(*${mi}, ${matchinfo}); }]),
+  (apply [{ Helper.applyBuildFnNoErase(*${mi}, ${matchinfo}); }])>;
+
 // shl ([asz]ext x), y => zext (shl x, y), if shift does not overflow int
 def reduce_shl_of_extend_matchdata : GIDefMatchData<"RegisterImmPair">;
 def reduce_shl_of_extend : GICombineRule<
@@ -1903,8 +1910,9 @@ def bitreverse_shift : GICombineGroup<[bitreverse_shl, bitreverse_lshr]>;
 def select_combines : GICombineGroup<[select_undef_cmp, select_constant_cmp,
                                       select_to_iminmax, match_selects]>;
 
-def trivial_combines : GICombineGroup<[copy_prop, mul_to_shl, add_p2i_to_ptradd,
-                                       mul_by_neg_one, idempotent_prop]>;
+def trivial_combines : GICombineGroup<[copy_prop, mul_to_shl, sub_to_add,
+                                       add_p2i_to_ptradd, mul_by_neg_one,
+                                       idempotent_prop]>;
 
 def fma_combines : GICombineGroup<[combine_fadd_fmul_to_fmad_or_fma,
   combine_fadd_fpext_fmul_to_fmad_or_fma, combine_fadd_fma_fmul_to_fmad_or_fma,
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index b7ddf9f479ef8e..91e5af9dfd8e25 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -2041,6 +2041,28 @@ void CombinerHelper::applyCombineMulToShl(MachineInstr &MI,
   Observer.changedInstr(MI);
 }
 
+bool CombinerHelper::matchCombineSubToAdd(MachineInstr &MI,
+                                          BuildFnTy &MatchInfo) {
+  assert(MI.getOpcode() == TargetOpcode::G_SUB && "Expected a G_SUB");
+  auto MaybeImmVal =
+      getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
+  if (!MaybeImmVal)
+    return false;
+
+  LLT Ty = MRI.getType(MI.getOperand(0).getReg());
+
+  APInt NegImm = -MaybeImmVal->Value;
+  MatchInfo = [=, &MI](MachineIRBuilder &B) {
+    auto NegCst = B.buildConstant(Ty, NegImm);
+    Observer.changingInstr(MI);
+    MI.setDesc(B.getTII().get(TargetOpcode::G_ADD));
+    MI.getOperand(2).setReg(NegCst.getReg(0));
+    MI.clearFlag(MachineInstr::MIFlag::NoUWrap);
+    Observer.changedInstr(MI);
+  };
+  return true;
+}
+
 // shl ([sza]ext x), y => zext (shl x, y), if shift does not overflow source
 bool CombinerHelper::matchCombineShlOfExtend(MachineInstr &MI,
                                              RegisterImmPair &MatchData) {
diff --git a/llvm/lib/Target/RISCV/RISCVGISel.td b/llvm/lib/Target/RISCV/RISCVGISel.td
index 67e93b812421b4..40aae220fbd47e 100644
--- a/llvm/lib/Target/RISCV/RISCVGISel.td
+++ b/llvm/lib/Target/RISCV/RISCVGISel.td
@@ -96,15 +96,6 @@ def gi_sh2add_uw_op : GIComplexOperandMatcher<s32, "selectSHXADD_UWOp<2>">,
 def gi_sh3add_uw_op : GIComplexOperandMatcher<s32, "selectSHXADD_UWOp<3>">,
                       GIComplexPatternEquiv<sh3add_uw_op>;
 
-// FIXME: Canonicalize (sub X, C) -> (add X, -C) earlier.
-def : Pat<(XLenVT (sub GPR:$rs1, simm12Plus1:$imm)),
-          (ADDI GPR:$rs1, (NegImm simm12Plus1:$imm))>;
-
-let Predicates = [IsRV64] in {
-def : Pat<(i32 (sub GPR:$rs1, simm12Plus1i32:$imm)),
-          (ADDIW GPR:$rs1, (i64 (NegImm $imm)))>;
-}
-
 // Ptr type used in patterns with GlobalISelEmitter
 def PtrVT : PtrValueTypeByHwMode<XLenVT, 0>;
 
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-integer.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-integer.mir
index 2f10a497fa74cb..5cbff0f0c74cb7 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-integer.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-integer.mir
@@ -308,8 +308,8 @@ body:             |
     ; CHECK: liveins: $w0, $w1
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: %a:_(s64) = COPY $x0
-    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 71
-    ; CHECK-NEXT: %sub:_(s64) = G_SUB %a, [[C]]
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -71
+    ; CHECK-NEXT: %sub:_(s64) = G_ADD %a, [[C]]
     ; CHECK-NEXT: $x0 = COPY %sub(s64)
     ; CHECK-NEXT: RET_ReallyLR implicit $x0
     %a:_(s64) = COPY $x0
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-narrow-binop.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-narrow-binop.mir
index f207e9c149a476..e9d4af7da5d06f 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-narrow-binop.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-narrow-binop.mir
@@ -88,8 +88,8 @@ body:             |
     ; CHECK-LABEL: name: test_combine_trunc_sub_i128
     ; CHECK: %lhs:_(s128) = COPY $q0
     ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC %lhs(s128)
-    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
-    ; CHECK-NEXT: %small:_(s32) = G_SUB [[TRUNC]], [[C]]
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -5
+    ; CHECK-NEXT: %small:_(s32) = G_ADD [[TRUNC]], [[C]]
     ; CHECK-NEXT: $w0 = COPY %small(s32)
     %lhs:_(s128) = COPY $q0
     %rhs:_(s128) = G_CONSTANT i128 5
@@ -103,8 +103,8 @@ body:             |
   bb.1:
     ; CHECK-LABEL: name: test_combine_trunc_sub_i128_multi_use
     ; CHECK: %lhs:_(s128) = COPY $q0
-    ; CHECK-NEXT: %rhs:_(s128) = G_CONSTANT i128 5
-    ; CHECK-NEXT: %res:_(s128) = G_SUB %lhs, %rhs
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s128) = G_CONSTANT i128 -5
+    ; CHECK-NEXT: %res:_(s128) = G_ADD %lhs, [[C]]
     ; CHECK-NEXT: %small:_(s32) = G_TRUNC %res(s128)
     ; CHECK-NEXT: $q0 = COPY %res(s128)
     ; CHECK-NEXT: $w0 = COPY %small(s32)
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-extending-loads-cornercases.mir b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-extending-loads-cornercases.mir
index 04968dab3a37ce..591b6a17928cb1 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-extending-loads-cornercases.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-extending-loads-cornercases.mir
@@ -95,7 +95,7 @@ body: |
     %11:_(s8) = G_CONSTANT i8 1
     ; CHECK: [[T3:%[0-9]+]]:_(s8) = G_TRUNC [[T0]](s32)
     %7:_(s8) = G_SUB %2, %11
-    ; CHECK: [[T4:%[0-9]+]]:_(s8) = G_SUB [[T3]], {{.*}}
+    ; CHECK: [[T4:%[0-9]+]]:_(s8) = G_ADD [[T3]], {{.*}}
     G_BR %bb.3.exit
   bb.3.exit:
   ; CHECK: bb.3.exit:
@@ -197,7 +197,7 @@ body: |
     %7:_(s8) = G_CONSTANT i8 1
     ; CHECK: [[T3:%[0-9]+]]:_(s8) = G_TRUNC [[T0]](s32)
     %8:_(s8) = G_SUB %2, %7
-    ; CHECK: [[T4:%[0-9]+]]:_(s8) = G_SUB [[T3]], {{.*}}
+    ; CHECK: [[T4:%[0-9]+]]:_(s8) = G_ADD [[T3]], {{.*}}
     G_BR %bb.3.exit
   bb.3.exit:
   ; CHECK: bb.3.exit:
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-trivial-arith.mir b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-trivial-arith.mir
index 0900dd4267a2e4..bc3be691bd25a0 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-trivial-arith.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-trivial-arith.mir
@@ -289,8 +289,8 @@ body:             |
     ; CHECK: liveins: $w0
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: %x:_(s32) = COPY $w0
-    ; CHECK-NEXT: %cst:_(s32) = G_CONSTANT i32 1
-    ; CHECK-NEXT: %op:_(s32) = G_SUB %x, %cst
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
+    ; CHECK-NEXT: %op:_(s32) = G_ADD %x, [[C]]
     ; CHECK-NEXT: $w0 = COPY %op(s32)
     ; CHECK-NEXT: RET_ReallyLR implicit $w0
     %x:_(s32) = COPY $w0
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/ashr.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/ashr.ll
index 63f5464371cc62..493e8cef638902 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/ashr.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/ashr.ll
@@ -1669,7 +1669,7 @@ define i65 @v_ashr_i65(i65 %value, i65 %amount) {
 ; GFX6-NEXT:    v_sub_i32_e32 v8, vcc, 64, v3
 ; GFX6-NEXT:    v_lshr_b64 v[6:7], v[0:1], v3
 ; GFX6-NEXT:    v_lshl_b64 v[8:9], v[4:5], v8
-; GFX6-NEXT:    v_subrev_i32_e32 v2, vcc, 64, v3
+; GFX6-NEXT:    v_add_i32_e32 v2, vcc, 0xffffffc0, v3
 ; GFX6-NEXT:    v_ashr_i64 v[10:11], v[4:5], v3
 ; GFX6-NEXT:    v_or_b32_e32 v6, v6, v8
 ; GFX6-NEXT:    v_ashrrev_i32_e32 v8, 31, v5
@@ -1692,7 +1692,7 @@ define i65 @v_ashr_i65(i65 %value, i65 %amount) {
 ; GFX8-NEXT:    v_sub_u32_e32 v8, vcc, 64, v3
 ; GFX8-NEXT:    v_lshrrev_b64 v[6:7], v3, v[0:1]
 ; GFX8-NEXT:    v_lshlrev_b64 v[8:9], v8, v[4:5]
-; GFX8-NEXT:    v_subrev_u32_e32 v2, vcc, 64, v3
+; GFX8-NEXT:    v_add_u32_e32 v2, vcc, 0xffffffc0, v3
 ; GFX8-NEXT:    v_ashrrev_i64 v[10:11], v3, v[4:5]
 ; GFX8-NEXT:    v_or_b32_e32 v6, v6, v8
 ; GFX8-NEXT:    v_ashrrev_i32_e32 v8, 31, v5
@@ -1715,7 +1715,7 @@ define i65 @v_ashr_i65(i65 %value, i65 %amount) {
 ; GFX9-NEXT:    v_sub_u32_e32 v8, 64, v3
 ; GFX9-NEXT:    v_lshrrev_b64 v[6:7], v3, v[0:1]
 ; GFX9-NEXT:    v_lshlrev_b64 v[8:9], v8, v[4:5]
-; GFX9-NEXT:    v_subrev_u32_e32 v2, 64, v3
+; GFX9-NEXT:    v_add_u32_e32 v2, 0xffffffc0, v3
 ; GFX9-NEXT:    v_ashrrev_i64 v[10:11], v3, v[4:5]
 ; GFX9-NEXT:    v_or_b32_e32 v6, v6, v8
 ; GFX9-NEXT:    v_ashrrev_i32_e32 v8, 31, v5
@@ -1735,7 +1735,7 @@ define i65 @v_ashr_i65(i65 %value, i65 %amount) {
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-NEXT:    v_bfe_i32 v4, v2, 0, 1
 ; GFX10-NEXT:    v_sub_nc_u32_e32 v2, 64, v3
-; GFX10-NEXT:    v_subrev_nc_u32_e32 v10, 64, v3
+; GFX10-NEXT:    v_add_nc_u32_e32 v10, 0xffffffc0, v3
 ; GFX10-NEXT:    v_lshrrev_b64 v[6:7], v3, v[0:1]
 ; GFX10-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 64, v3
 ; GFX10-NEXT:    v_ashrrev_i32_e32 v5, 31, v4
@@ -1758,7 +1758,7 @@ define i65 @v_ashr_i65(i65 %value, i65 %amount) {
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-NEXT:    v_bfe_i32 v4, v2, 0, 1
 ; GFX11-NEXT:    v_sub_nc_u32_e32 v2, 64, v3
-; GFX11-NEXT:    v_subrev_nc_u32_e32 v10, 64, v3
+; GFX11-NEXT:    v_add_nc_u32_e32 v10, 0xffffffc0, v3
 ; GFX11-NEXT:    v_lshrrev_b64 v[6:7], v3, v[0:1]
 ; GFX11-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 64, v3
 ; GFX11-NEXT:    v_ashrrev_i32_e32 v5, 31, v4
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/cvt_f32_ubyte.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/cvt_f32_ubyte.ll
index 405b1e8f3a250f..46d6b86789c778 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/cvt_f32_ubyte.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/cvt_f32_ubyte.ll
@@ -1438,7 +1438,7 @@ define float @v_test_sitofp_i64_byte_to_f32(i64 %arg0) {
 ; SI-NEXT:    v_ashrrev_i32_e32 v2, 31, v0
 ; SI-NEXT:    v_ffbh_i32_e32 v3, 0
 ; SI-NEXT:    v_add_i32_e32 v2, vcc, 32, v2
-; SI-NEXT:    v_subrev_i32_e32 v3, vcc, 1, v3
+; SI-NEXT:    v_add_i32_e32 v3, vcc, -1, v3
 ; SI-NEXT:    v_mov_b32_e32 v1, 0
 ; SI-NEXT:    v_min_u32_e32 v2, v3, v2
 ; SI-NEXT:    v_lshl_b64 v[0:1], v[0:1], v2
@@ -1456,7 +1456,7 @@ define float @v_test_sitofp_i64_byte_to_f32(i64 %arg0) {
 ; VI-NEXT:    v_ashrrev_i32_e32 v2, 31, v0
 ; VI-NEXT:    v_ffbh_i32_e32 v3, 0
 ; VI-NEXT:    v_add_u32_e32 v2, vcc, 32, v2
-; VI-NEXT:    v_subrev_u32_e32 v3, vcc, 1, v3
+; VI-NEXT:    v_add_u32_e32 v3, vcc, -1, v3
 ; VI-NEXT:    v_mov_b32_e32 v1, 0
 ; VI-NEXT:    v_min_u32_e32 v2, v3, v2
 ; VI-NEXT:    v_lshlrev_b64 v[0:1], v2, v[0:1]
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f32.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f32.ll
index 146f344930a4ee..6e55d7fdb5e957 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f32.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f32.ll
@@ -4101,7 +4101,7 @@ define float @v_fdiv_f32_constrhs0_dynamic_25ulp(float %x) #0 {
 ; GFX10-NEXT:    v_rcp_f32_e32 v1, 0x3f40e400
 ; GFX10-NEXT:    v_frexp_mant_f32_e32 v2, v0
 ; GFX10-NEXT:    v_frexp_exp_i32_f32_e32 v0, v0
-; GFX10-NEXT:    v_subrev_nc_u32_e32 v0, 14, v0
+; GFX10-NEXT:    v_add_nc_u32_e32 v0, -14, v0
 ; GFX10-NEXT:    v_mul_f32_e32 v1, v2, v1
 ; GFX10-NEXT:    v_ldexp_f32 v0, v1, v0
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
@@ -4112,10 +4112,9 @@ define float @v_fdiv_f32_constrhs0_dynamic_25ulp(float %x) #0 {
 ; GFX11-NEXT:    v_rcp_f32_e32 v1, 0x3f40e400
 ; GFX11-NEXT:    v_frexp_mant_f32_e32 v2, v0
 ; GFX11-NEXT:    v_frexp_exp_i32_f32_e32 v0, v0
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_subrev_nc_u32_e32 v0, 14, v0
 ; GFX11-NEXT:    s_waitcnt_depctr 0xfff
-; GFX11-NEXT:    v_mul_f32_e32 v1, v2, v1
+; GFX11-NEXT:    v_dual_mul_f32 v1, v2, v1 :: v_dual_add_nc_u32 v0, -14, v0
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX11-NEXT:    v_ldexp_f32 v0, v1, v0
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
 ; EG-LABEL: v_fdiv_f32_constrhs0_dynamic_25ulp:
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fshl.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fshl.ll
index 3bd3486ec261d4..5d76b542fad894 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fshl.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fshl.ll
@@ -20,10 +20,10 @@ define amdgpu_ps i7 @s_fshl_i7(i7 inreg %lhs, i7 inreg %rhs, i7 inreg %amt) {
 ; GFX6-NEXT:    v_mul_hi_u32 v0, s2, v0
 ; GFX6-NEXT:    v_mul_lo_u32 v0, v0, 7
 ; GFX6-NEXT:    v_sub_i32_e32 v0, vcc, s2, v0
-; GFX6-NEXT:    v_subrev_i32_e32 v1, vcc, 7, v0
+; GFX6-NEXT:    v_add_i32_e32 v1, vcc, -7, v0
 ; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, 7, v0
 ; GFX6-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
-; GFX6-NEXT:    v_subrev_i32_e32 v1, vcc, 7, v0
+; GFX6-NEXT:    v_add_i32_e32 v1, vcc, -7, v0
 ; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, 7, v0
 ; GFX6-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
 ; GFX6-NEXT:    v_sub_i32_e32 v1, vcc, 6, v0
@@ -51,10 +51,10 @@ define amdgpu_ps i7 @s_fshl_i7(i7 inreg %lhs, i7 inreg %rhs, i7 inreg %amt) {
 ; GFX8-NEXT:    v_mul_hi_u32 v0, s2, v0
 ; GFX8-NEXT:    v_mul_lo_u32 v0, v0, 7
 ; GFX8-NEXT:    v_sub_u32_e32 v0, vcc, s2, v0
-; GFX8-NEXT:    v_subrev_u32_e32 v1, vcc, 7, v0
+; GFX8-NEXT:    v_add_u32_e32 v1, vcc, -7, v0
 ; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, 7, v0
 ; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
-; GFX8-NEXT:    v_subrev_u32_e32 v1, vcc, 7, v0
+; GFX8-NEXT:    v_add_u32_e32 v1, vcc, -7, v0
 ; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, 7, v0
 ; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
 ; GFX8-NEXT:    v_sub_u16_e32 v1, 6, v0
@@ -82,10 +82,10 @@ define amdgpu_ps i7 @s_fshl_i7(i7 inreg %lhs, i7 inreg %rhs, i7 inreg %amt) {
 ; GFX9-NEXT:    v_mul_hi_u32 v0, s2, v0
 ; GFX9-NEXT:    v_mul_lo_u32 v0, v0, 7
 ; GFX9-NEXT:    v_sub_u32_e32 v0, s2, v0
-; GFX9-NEXT:    v_subrev_u32_e32 v1, 7, v0
+; GFX9-NEXT:    v_add_u32_e32 v1, -7, v0
 ; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, 7, v0
 ; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
-; GFX9-NEXT:    v_subrev_u32_e32 v1, 7, v0
+; GFX9-NEXT:    v_add_u32_e32 v1, -7, v0
 ; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, 7, v0
 ; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
 ; GFX9-NEXT:    v_sub_u16_e32 v1, 6, v0
@@ -113,10 +113,10 @@ define amdgpu_ps i7 @s_fshl_i7(i7 inreg %lhs, i7 inreg %rhs, i7 inreg %amt) {
 ; GFX10-NEXT:    v_mul_hi_u32 v0, s2, v0
 ; GFX10-NEXT:    v_mul_lo_u32 v0, v0, 7
 ; GFX10-NEXT:    v_sub_nc_u32_e32 v0, s2, v0
-; GFX10-NEXT:    v_subrev_nc_u32_e32 v1, 7, v0
+; GFX10-NEXT:    v_add_nc_u32_e32 v1, -7, v0
 ; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, 7, v0
 ; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
-; GFX10-NEXT:    v_subrev_nc_u32_e32 v1, 7, v0
+; GFX10-NEXT:    v_add_nc_u32_e32 v1, -7, v0
 ; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, 7, v0
 ; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
 ; GFX10-NEXT:    v_sub_nc_u16 v1, 6, v0
@@ -150,11 +150,11 @@ define amdgpu_ps i7 @s_fshl_i7(i7 inreg %lhs, i7 inreg %rhs, i7 inreg %amt) {
 ; GFX11-NEXT:    v_mul_lo_u32 v0, v0, 7
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX11-NEXT:    v_sub_nc_u32_e32 v0, s2, v0
-; GFX11-NEXT:    v_subrev_nc_u32_e32 v1, 7, v0
+; GFX11-NEXT:    v_add_nc_u32_e32 v1, -7, v0
 ; GFX11-NEXT:    v_cmp_le_u32_e32 vcc_lo, 7, v0
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX11-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
-; GFX11-NEXT:    v_subrev_nc_u32_e32 v1, 7, v0
+; GFX11-NEXT:    v_add_nc_u32_e32 v1, -7, v0
 ; GFX11-NEXT:    v_cmp_le_u32_e32 vcc_lo, 7, v0
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX11-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
@@ -189,10 +189,10 @@ define i7 @v_fshl_i7(i7 %lhs, i7 %rhs, i7 %amt) {
 ; GFX6-NEXT:    v_mul_hi_u32 v3, v2, v3
 ; GFX6-NEXT:    v_mul_lo_u32 v3, v3, 7
 ; GFX6-NEXT:    v_sub_i32_e32 v2, vcc, v2, v3
-; GFX6-NEXT:    v_subrev_i32_e32 v3, vcc, 7, v2
+; GFX6-NEXT:    v_add_i32_e32 v3, vcc, -7, v2
 ; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, 7, v2
 ; GFX6-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
-; GFX6-NEXT:    v_subrev_i32_e32 v3, vcc, 7, v2
+; GFX6-NEXT:    v_add_i32_e32 v3, vcc, -7, v2
 ; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, 7, v2
 ; GFX6-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
 ; GFX6-NEXT:    v_sub_i32_e32 v3, vcc, 6, v2
@@ -219,10 +219,10 @@ define i7 @v_fshl_i7(i7 %lhs, i7 %rhs, i7 %amt) {
 ; GFX8-NEXT:    v_mul_hi_u32 v3, v2, v3
 ; GFX8-NEXT:    v_mul_lo_u32 v3, v3, 7
 ; GFX8-NEXT:    v_sub_u32_e32 v2, vcc, v2, v3
-; GFX8-NEXT:    v_subrev_u32_e32 v3, vcc, 7, v2
+; GFX8-NEXT:    v_add_u32_e32 v3, vcc, -7, v2
 ; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, 7, v2
 ; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
-; GFX8-NEXT:    v_subrev_u32_e32 v3, vcc, 7, v2
+; GFX8-NEXT:    v_add_u32_e32 v3, vcc, -7, v2
 ; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, 7, v2
 ; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
 ; GFX8-NEXT:    v_sub_u16_e32 v3, 6, v2
@@ -249,10 +249,10 @@ define i7 @v_fshl_i7(i7 %lhs, i7 %rhs, i7 %amt) {
 ; GFX9-NEXT:    v_mul_hi_u32 v3, v2, v3
 ; GFX9-NEXT:    v_mul_lo_u32 v3, v3, 7
 ; GFX9-NEXT:    v_sub_u32_e32 v2, v2, v3
-; GFX9-NEXT:    v_subrev_u32_e32 v3, 7, v2
+; GFX9-NEXT:    v_add_u32_e32 v3, -7, v2
 ; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, 7, v2
 ; GFX9-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
-; GFX9-NEXT:    v_subrev_u32_e32 v3, 7, v2
+; GFX9-NEXT:    v_add_u32_e32 v3, -7, v2
 ; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, 7, v2
 ; GFX9-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
 ; GFX9-NEXT:    v_sub_u16_e32 v3, 6, v2
@@ -279,10 +279,10 @@ define i7 @v_fshl_i7(i7 %lhs, i7 %rhs, i7 %amt) {
 ; GFX10-NEXT:    v_mul_hi_u32 v3, v2, v3
 ; GFX10-NEXT:    v_mul_lo_u32 v3, v3, 7
 ; GFX10-NEXT:    v_sub_nc_u32_e32 v2, v2, v3
-; GFX10-NEXT:    v_subrev_nc_u32_e32 v3, 7, v2
+; GFX10-NEXT:    v_add_nc_u32_e32 v3, -7, v2
 ; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, 7, v2
 ; GFX10-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc_lo
-; GFX10-NEXT:    v_subrev_nc_u32_e32 v3, 7, v2
+; GFX10-NEXT:    v_add_nc_u32_e32 v3, -7, v2
 ; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, 7, v2
 ; GFX10-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc_lo
 ; GFX10-NEXT:    v_sub_nc_u16 v3, 6, v2
@@ -315,11 +315,11 @@ define i7 @v_fshl_i7(i7 %lhs, i7 %rhs, i7 %amt) {
 ; GFX11-NEXT:    v_mul_lo_u32 v3, v3, 7
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX11-NEXT:    v_sub_nc_u32_e32 v2, v2, v3
-; GFX11-NEXT:    v_subrev_nc_u32_e32 v3, 7, v2
+; GFX11-NEXT:    v_add_nc_u32_e32 v3, -7, v2
 ; GFX11-NEXT:    v_cmp_le_u32_e32 vcc_lo, 7, v2
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX11-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc_lo
-; GFX11-NEXT:    v_subrev_nc_u32_e32 v3, 7, v2
+; GFX11-NEXT:    v_add_nc_u32_e32 v3, -7, v2
 ; GFX11-NEXT:    v_cmp_le_u32_e32 vcc_lo, 7, v2
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX11-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc_lo
@@ -1550,16 +1550,16 @@ define amdgpu_ps i24 @s_fshl_i24(i24 inreg %lhs, i24 inreg %rhs, i24 inreg %amt)
 ; GFX6-NEXT:    s_bfe_u32 s1, s1, 0x170001
 ; GFX6-NEXT:    v_mul_f32_e32 v0, 0x4f7ffffe, v0
 ; GFX6-NEXT:    v_cvt_u32_f32_e32 v0, v0
-; GFX6-NEXT:    v_mul_lo_u32 v1, v0, v1
-; GFX6-NEXT:    v_mul_hi_u32 v1, v0, v1
-; GFX6-NEXT:    v_add_i32_e32 v0, vcc, v0, v1
+; GFX6-NEXT:    v_mul_lo_u32 v2, v0, v1
+; GFX6-NEXT:    v_mul_hi_u32 v2, v0, v2
+; GFX6-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
 ; GFX6-NEXT:    v_mul_hi_u32 v0, s2, v0
 ; GFX6-NEXT:    v_mul_lo_u32 v0, v0, 24
 ; GFX6-NEXT:    v_sub_i32_e32 v0, vcc, s2, v0
-; GFX6-NEXT:    v_subrev_i32_e32 v1, vcc, 24, v0
+; GFX6-NEXT:    v_add_i32_e32 v2, vcc, v0, v1
 ; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, 24, v0
-; GFX6-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
-; GFX6-NEXT:    v_subrev_i32_e32 v1, vcc, 24, v0
+; GFX6-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX6-NEXT:    v_add_i32_e32 v1, vcc, v0, v1
 ; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, 24, v0
 ; GFX6-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
 ; GFX6-NEXT:    v_sub_i32_e32 v1, vcc, 23, v0
@@ -1580,16 +1580,16 @@ define amdgpu_ps i24 @s_fshl_i24(i24 inreg %lhs, i24 inreg %rhs, i24 inreg %amt)
 ; GFX8-NEXT:    s_bfe_u32 s1, s1, 0x170001
 ; GFX8-NEXT:    v_mul_f32_e32 v0, 0x4f7ffffe, v0
 ; GFX8-NEXT:    v_cvt_u32_f32_e32 v0, v0
-; GFX8-NEXT:    v_mul_lo_u32 v1, v0, v1
-; GFX8-NEXT:    v_mul_hi_u32 v1, v0, v1
-; GFX8-NEXT:    v_add_u32_e32 v0, vcc, v0, v1
+; GFX8-NEXT:    v_mul_lo_u32 v2, v0, v1
+; GFX8-NEXT:    v_mul_hi_u32 v2, v0, v2
+; GFX8-NEXT:    v_add_u32_e32 v0, vcc, v0, v2
 ; GFX8-NEXT:    v_mul_hi_u32 v0, s2, v0
 ; GFX8-NEXT:    v_mul_lo_u32 v0, v0, 24
 ; GFX8-NEXT:    v_sub_u32_e32 v0, vcc, s2, v0
-; GFX8-NEXT:    v_subrev_u32_e32 v1, vcc, 24, v0
+; GFX8-NEXT:    v_add_u32_e32 v2, vcc, v0, v1
 ; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, 24, v0
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
-; GFX8-NEXT:    v_subrev_u32_e32 v1, vcc, 24, v0
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX8-NEXT:    v_add_u32_e32 v1, vcc, v0, v1
 ; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, 24, v0
 ; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
 ; GFX8-NEXT:    v_sub_u32_e32 v1, vcc, 23, v0
@@ -1616,10 +1616,10 @@ define amdgpu_ps i24 @s_fshl_i24(i24 inreg %lhs, i24 inreg %rhs, i24 inreg %amt)
 ; GFX9-NEXT:    v_mul_hi_u32 v0, s2, v0
 ; GFX9-NEXT:    v_mul_lo_u32 v0, v0, 24
 ; GFX9-NEXT:    v_sub_u32_e32 v0, s2, v0
-; GFX9-NEXT:    v_subrev_u32_e32 v1, 24, v0
+; GFX9-NEXT:    v_add_u32_e32 v1, 0xffffffe8, v0
 ; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, 24, v0
 ; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
-; GFX9-NEXT:    v_subrev_u32_e32 v1, 24, v0
+; GFX9-NEXT:    v_add_u32_e32 v1, 0xffffffe8, v0
 ; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, 24, v0
 ; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
 ; GFX9-NEXT:    v_sub_u32_e32 v1, 23, v0
@@ -1644,10 +1644,10 @@ define amdgpu_ps i24 @s_fshl_i24(i24 inreg %lhs, i24 inreg %rhs, i24 inreg %amt)
 ; GFX10-NEXT:    v_mul_hi_u32 v0, s2, v0
 ; GFX10-NEXT:    v_mul_lo_u32 v0, v0, 24
 ; GFX10-NEXT:    v_sub_nc_u32_e32 v0, s2, v0
-; GFX10-NEXT:    v_subrev_nc_u32_e32 v1, 24, v0
+; GFX10-NEXT:    v_add_nc_u32_e32 v1, 0xffffffe8, v0
 ; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v0
 ; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
-; GFX10-NEXT:    v_subrev_nc_u32_e32 v1, 24, v0
+; GFX10-NEXT:    v_add_nc_u32_e32 v1, 0xffffffe8, v0
 ; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v0
 ; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
 ; GFX10-NEXT:    v_sub_nc_u32_e32 v1, 23, v0
@@ -1678,11 +1678,11 @@ define amdgpu_ps i24 @s_fshl_i24(i24 inreg %lhs, i24 inreg %rhs, i24 inreg %amt)
 ; GFX11-NEXT:    v_mul_lo_u32 v0, v0, 24
 ; GFX11-NEXT:    v_sub_nc_u32_e32 v0, s2, v0
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-NEXT:    v_subrev_nc_u32_e32 v1, 24, v0
+; GFX11-NEXT:    v_add_nc_u32_e32 v1, 0xffffffe8, v0
 ; GFX11-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v0
 ; GFX11-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-NEXT:    v_subrev_nc_u32_e32 v1, 24, v0
+; GFX11-NEXT:    v_add_nc_u32_e32 v1, 0xffffffe8, v0
 ; GFX11-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v0
 ; GFX11-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
@@ -1710,16 +1710,16 @@ define i24 @v_fshl_i24(i24 %lhs, i24 %rhs, i24 %amt) {
 ; GFX6-NEXT:    v_bfe_u32 v1, v1, 1, 23
 ; GFX6-NEXT:    v_mul_f32_e32 v3, 0x4f7ffffe, v3
 ; GFX6-NEXT:    v_cvt_u32_f32_e32 v3, v3
-; GFX6-NEXT:    v_mul_lo_u32 v4, v3, v4
-; GFX6-NEXT:    v_mul_hi_u32 v4, v3, v4
-; GFX6-NEXT:    v_add_i32_e32 v3, vcc, v3, v4
+; GFX6-NEXT:    v_mul_lo_u32 v5, v3, v4
+; GFX6-NEXT:    v_mul_hi_u32 v5, v3, v5
+; GFX6-NEXT:    v_add_i32_e32 v3, vcc, v3, v5
 ; GFX6-NEXT:    v_mul_hi_u32 v3, v2, v3
 ; GFX6-NEXT:    v_mul_lo_u32 v3, v3, 24
 ; GFX6-NEXT:    v_sub_i32_e32 v2, vcc, v2, v3
-; GFX6-NEXT:    v_subrev_i32_e32 v3, vcc, 24, v2
+; GFX6-NEXT:    v_add_i32_e32 v3, vcc, v2, v4
 ; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, 24, v2
 ; GFX6-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
-; GFX6-NEXT:    v_subrev_i32_e32 v3, vcc, 24, v2
+; GFX6-NEXT:    v_add_i32_e32 v3, vcc, 0xffffffe8, v2
 ; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, 24, v2
 ; GFX6-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
 ; GFX6-NEXT:    v_sub_i32_e32 v3, vcc, 23, v2
@@ -1740,16 +1740,16 @@ define i24 @v_fshl_i24(i24 %lhs, i24 %rhs, i24 %amt) {
 ; GFX8-NEXT:    v_bfe_u32 v1, v1, 1, 23
 ; GFX8-NEXT:    v_mul_f32_e32 v3, 0x4f7ffffe, v3
 ; GFX8-NEXT:    v_cvt_u32_f32_e32 v3, v3
-; GFX8-NEXT:    v_mul_lo_u32 v4, v3, v4
-; GFX8-NEXT:    v_mul_hi_u32 v4, v3, v4
-; GFX8-NEXT:    v_add_u32_e32 v3, vcc, v3, v4
+; GFX8-NEXT:    v_mul_lo_u32 v5, v3, v4
+; GFX8-NEXT:    v_mul_hi_u32 v5, v3, v5
+; GFX8-NEXT:    v_add_u32_e32 v3, vcc, v3, v5
 ; GFX8-NEXT:    v_mul_hi_u32 v3, v2, v3
 ; GFX8-NEXT:    v_mul_lo_u32 v3, v3, 24
 ; GFX8-NEXT:    v_sub_u32_e32 v2, vcc, v2, v3
-; GFX8-NEXT:    v_subrev_u32_e32 v3, vcc, 24, v2
+; GFX8-NEXT:    v_add_u32_e32 v3, vcc, v2, v4
 ; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, 24, v2
 ; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
-; GFX8-NEXT:    v_subrev_u32_e32 v3, vcc, 24, v2
+; GFX8-NEXT:    v_add_u32_e32 v3, vcc, 0xffffffe8, v2
 ; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, 24, v2
 ; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
 ; GFX8-NEXT:    v_sub_u32_e32 v3, vcc, 23, v2
@@ -1776,10 +1776,10 @@ define i24 @v_fshl_i24(i24 %lhs, i24 %rhs, i24 %amt) {
 ; GFX9-NEXT:    v_mul_hi_u32 v3, v2, v3
 ; GFX9-NEXT:    v_mul_lo_u32 v3, v3, 24
 ; GFX9-NEXT:    v_sub_u32_e32 v2, v2, v3
-; GFX9-NEXT:    v_subrev_u32_e32 v3, 24, v2
+; GFX9-NEXT:    v_add_u32_e32 v3, 0xffffffe8, v2
 ; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, 24, v2
 ; GFX9-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
-; GFX9-NEXT:    v_subrev_u32_e32 v3, 24, v2
+; GFX9-NEXT:    v_add_u32_e32 v3, 0xffffffe8, v2
 ; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, 24, v2
 ; GFX9-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
 ; GFX9-NEXT:    v_sub_u32_e32 v3, 23, v2
@@ -1804,10 +1804,10 @@ define i24 @v_fshl_i24(i24 %lhs, i24 %rhs, i24 %amt) {
 ; GFX10-NEXT:    v_mul_hi_u32 v3, v2, v3
 ; GFX10-NEXT:    v_mul_lo_u32 v3, v3, 24
 ; GFX10-NEXT:    v_sub_nc_u32_e32 v2, v2, v3
-; GFX10-NEXT:    v_subrev_nc_u32_e32 v3, 24, v2
+; GFX10-NEXT:    v_add_nc_u32_e32 v3, 0xffffffe8, v2
 ; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v2
 ; GFX10-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc_lo
-; GFX10-NEXT:    v_subrev_nc_u32_e32 v3, 24, v2
+; GFX10-NEXT:    v_add_nc_u32_e32 v3, 0xffffffe8, v2
 ; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v2
 ; GFX10-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc_lo
 ; GFX10-NEXT:    v_sub_nc_u32_e32 v3, 23, v2
@@ -1838,11 +1838,11 @@ define i24 @v_fshl_i24(i24 %lhs, i24 %rhs, i24 %amt) {
 ; GFX11-NEXT:    v_mul_lo_u32 v3, v3, 24
 ; GFX11-NEXT:    v_sub_nc_u32_e32 v2, v2, v3
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-NEXT:    v_subrev_nc_u32_e32 v3, 24, v2
+; GFX11-NEXT:    v_add_nc_u32_e32 v3, 0xffffffe8, v2
 ; GFX11-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v2
 ; GFX11-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc_lo
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-NEXT:    v_subrev_nc_u32_e32 v3, 24, v2
+; GFX11-NEXT:    v_add_nc_u32_e32 v3, 0xffffffe8, v2
 ; GFX11-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v2
 ; GFX11-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc_lo
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
@@ -1887,7 +1887,7 @@ define amdgpu_ps i48 @s_fshl_v2i24(i48 inreg %lhs.arg, i48 inreg %rhs.arg, i48 i
 ; GFX6-NEXT:    s_lshr_b32 s0, s2, 16
 ; GFX6-NEXT:    s_lshr_b32 s1, s3, 8
 ; GFX6-NEXT:    s_bfe_u32 s8, s2, 0x80008
-; GFX6-NEXT:    v_mul_lo_u32 v3, v2, v3
+; GFX6-NEXT:    v_mul_lo_u32 v4, v2, v3
 ; GFX6-NEXT:    s_and_b32 s7, s2, 0xff
 ; GFX6-NEXT:    s_lshl_b32 s8, s8, 8
 ; GFX6-NEXT:    s_and_b32 s0, s0, 0xff
@@ -1906,7 +1906,7 @@ define amdgpu_ps i48 @s_fshl_v2i24(i48 inreg %lhs.arg, i48 inreg %rhs.arg, i48 i
 ; GFX6-NEXT:    v_or_b32_e32 v1, s1, v1
 ; GFX6-NEXT:    s_lshr_b32 s1, s4, 16
 ; GFX6-NEXT:    s_bfe_u32 s7, s4, 0x80008
-; GFX6-NEXT:    v_mul_hi_u32 v3, v2, v3
+; GFX6-NEXT:    v_mul_hi_u32 v4, v2, v4
 ; GFX6-NEXT:    s_and_b32 s3, s4, 0xff
 ; GFX6-NEXT:    s_lshl_b32 s7, s7, 8
 ; GFX6-NEXT:    s_and_b32 s1, s1, 0xff
@@ -1915,53 +1915,53 @@ define amdgpu_ps i48 @s_fshl_v2i24(i48 inreg %lhs.arg, i48 inreg %rhs.arg, i48 i
 ; GFX6-NEXT:    s_and_b32 s3, 0xffff, s3
 ; GFX6-NEXT:    s_lshl_b32 s1, s1, 16
 ; GFX6-NEXT:    s_or_b32 s1, s3, s1
-; GFX6-NEXT:    v_add_i32_e32 v2, vcc, v2, v3
-; GFX6-NEXT:    v_mul_hi_u32 v3, s1, v2
+; GFX6-NEXT:    v_add_i32_e32 v2, vcc, v2, v4
+; GFX6-NEXT:    v_mul_hi_u32 v4, s1, v2
 ; GFX6-NEXT:    s_lshr_b32 s2, s5, 8
 ; GFX6-NEXT:    s_and_b32 s3, s5, 0xff
-; GFX6-NEXT:    v_mov_b32_e32 v4, s4
+; GFX6-NEXT:    v_mov_b32_e32 v5, s4
 ; GFX6-NEXT:    s_and_b32 s2, s2, 0xff
-; GFX6-NEXT:    v_alignbit_b32 v4, s3, v4, 24
+; GFX6-NEXT:    v_alignbit_b32 v5, s3, v5, 24
 ; GFX6-NEXT:    s_and_b32 s2, 0xffff, s2
-; GFX6-NEXT:    v_and_b32_e32 v4, 0xffff, v4
-; GFX6-NEXT:    v_mul_lo_u32 v3, v3, 24
+; GFX6-NEXT:    v_and_b32_e32 v5, 0xffff, v5
+; GFX6-NEXT:    v_mul_lo_u32 v4, v4, 24
 ; GFX6-NEXT:    s_lshl_b32 s2, s2, 16
-; GFX6-NEXT:    v_or_b32_e32 v4, s2, v4
-; GFX6-NEXT:    v_mul_hi_u32 v2, v4, v2
-; GFX6-NEXT:    v_sub_i32_e32 v3, vcc, s1, v3
-; GFX6-NEXT:    v_subrev_i32_e32 v5, vcc, 24, v3
-; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, 24, v3
+; GFX6-NEXT:    v_or_b32_e32 v5, s2, v5
+; GFX6-NEXT:    v_mul_hi_u32 v2, v5, v2
+; GFX6-NEXT:    v_sub_i32_e32 v4, vcc, s1, v4
+; GFX6-NEXT:    v_add_i32_e32 v6, vcc, v4, v3
+; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, 24, v4
 ; GFX6-NEXT:    v_mul_lo_u32 v2, v2, 24
-; GFX6-NEXT:    v_cndmask_b32_e32 v3, v3, v5, vcc
-; GFX6-NEXT:    v_subrev_i32_e32 v5, vcc, 24, v3
-; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, 24, v3
-; GFX6-NEXT:    v_cndmask_b32_e32 v3, v3, v5, vcc
-; GFX6-NEXT:    v_sub_i32_e32 v2, vcc, v4, v2
-; GFX6-NEXT:    v_sub_i32_e32 v5, vcc, 23, v3
-; GFX6-NEXT:    v_subrev_i32_e32 v4, vcc, 24, v2
+; GFX6-NEXT:    v_cndmask_b32_e32 v4, v4, v6, vcc
+; GFX6-NEXT:    v_add_i32_e32 v6, vcc, v4, v3
+; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, 24, v4
+; GFX6-NEXT:    v_cndmask_b32_e32 v4, v4, v6, vcc
+; GFX6-NEXT:    v_sub_i32_e32 v2, vcc, v5, v2
+; GFX6-NEXT:    v_sub_i32_e32 v6, vcc, 23, v4
+; GFX6-NEXT:    v_add_i32_e32 v5, vcc, v2, v3
 ; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, 24, v2
-; GFX6-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
-; GFX6-NEXT:    v_subrev_i32_e32 v4, vcc, 24, v2
+; GFX6-NEXT:    v_cndmask_b32_e32 v2, v2, v5, vcc
+; GFX6-NEXT:    v_add_i32_e32 v3, vcc, v2, v3
 ; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, 24, v2
-; GFX6-NEXT:    v_and_b32_e32 v3, 0xffffff, v3
+; GFX6-NEXT:    v_and_b32_e32 v4, 0xffffff, v4
 ; GFX6-NEXT:    s_lshr_b32 s0, s0, 1
-; GFX6-NEXT:    v_and_b32_e32 v5, 0xffffff, v5
-; GFX6-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
-; GFX6-NEXT:    v_lshl_b32_e32 v3, s6, v3
-; GFX6-NEXT:    v_lshr_b32_e32 v5, s0, v5
-; GFX6-NEXT:    v_sub_i32_e32 v4, vcc, 23, v2
+; GFX6-NEXT:    v_and_b32_e32 v6, 0xffffff, v6
+; GFX6-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
+; GFX6-NEXT:    v_lshl_b32_e32 v4, s6, v4
+; GFX6-NEXT:    v_lshr_b32_e32 v6, s0, v6
+; GFX6-NEXT:    v_sub_i32_e32 v3, vcc, 23, v2
 ; GFX6-NEXT:    v_and_b32_e32 v2, 0xffffff, v2
-; GFX6-NEXT:    v_or_b32_e32 v3, v3, v5
+; GFX6-NEXT:    v_or_b32_e32 v4, v4, v6
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v0, v2, v0
 ; GFX6-NEXT:    v_lshrrev_b32_e32 v1, 1, v1
-; GFX6-NEXT:    v_and_b32_e32 v2, 0xffffff, v4
+; GFX6-NEXT:    v_and_b32_e32 v2, 0xffffff, v3
 ; GFX6-NEXT:    v_lshrrev_b32_e32 v1, v2, v1
-; GFX6-NEXT:    v_bfe_u32 v2, v3, 8, 8
+; GFX6-NEXT:    v_bfe_u32 v2, v4, 8, 8
 ; GFX6-NEXT:    v_or_b32_e32 v0, v0, v1
-; GFX6-NEXT:    v_and_b32_e32 v1, 0xff, v3
+; GFX6-NEXT:    v_and_b32_e32 v1, 0xff, v4
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v2, 8, v2
 ; GFX6-NEXT:    v_or_b32_e32 v1, v1, v2
-; GFX6-NEXT:    v_bfe_u32 v2, v3, 16, 8
+; GFX6-NEXT:    v_bfe_u32 v2, v4, 16, 8
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
 ; GFX6-NEXT:    v_or_b32_e32 v1, v1, v2
 ; GFX6-NEXT:    v_and_b32_e32 v2, 0xff, v0
@@ -2021,7 +2021,7 @@ define amdgpu_ps i48 @s_fshl_v2i24(i48 inreg %lhs.arg, i48 inreg %rhs.arg, i48 i
 ; GFX8-NEXT:    v_not_b32_e32 v1, 23
 ; GFX8-NEXT:    s_or_b32 s3, s8, s3
 ; GFX8-NEXT:    s_and_b32 s6, 0xffff, s6
-; GFX8-NEXT:    v_mul_lo_u32 v1, v0, v1
+; GFX8-NEXT:    v_mul_lo_u32 v2, v0, v1
 ; GFX8-NEXT:    s_and_b32 s3, 0xffff, s3
 ; GFX8-NEXT:    s_lshl_b32 s6, s6, 16
 ; GFX8-NEXT:    s_or_b32 s3, s3, s6
@@ -2031,67 +2031,67 @@ define amdgpu_ps i48 @s_fshl_v2i24(i48 inreg %lhs.arg, i48 inreg %rhs.arg, i48 i
 ; GFX8-NEXT:    s_lshr_b32 s8, s4, 24
 ; GFX8-NEXT:    s_and_b32 s4, s4, 0xff
 ; GFX8-NEXT:    s_lshl_b32 s6, s6, 8
-; GFX8-NEXT:    v_mul_hi_u32 v1, v0, v1
+; GFX8-NEXT:    v_mul_hi_u32 v2, v0, v2
 ; GFX8-NEXT:    s_or_b32 s4, s4, s6
 ; GFX8-NEXT:    s_and_b32 s6, s7, 0xff
 ; GFX8-NEXT:    s_and_b32 s6, 0xffff, s6
 ; GFX8-NEXT:    s_and_b32 s4, 0xffff, s4
 ; GFX8-NEXT:    s_lshl_b32 s6, s6, 16
 ; GFX8-NEXT:    s_or_b32 s4, s4, s6
-; GFX8-NEXT:    v_add_u32_e32 v0, vcc, v0, v1
-; GFX8-NEXT:    v_mul_hi_u32 v1, s4, v0
+; GFX8-NEXT:    v_add_u32_e32 v0, vcc, v0, v2
+; GFX8-NEXT:    v_mul_hi_u32 v2, s4, v0
 ; GFX8-NEXT:    s_lshr_b32 s9, s5, 8
 ; GFX8-NEXT:    s_and_b32 s5, s5, 0xff
 ; GFX8-NEXT:    s_lshl_b32 s5, s5, 8
-; GFX8-NEXT:    v_mul_lo_u32 v1, v1, 24
+; GFX8-NEXT:    v_mul_lo_u32 v2, v2, 24
 ; GFX8-NEXT:    s_and_b32 s6, s9, 0xff
 ; GFX8-NEXT:    s_or_b32 s5, s8, s5
 ; GFX8-NEXT:    s_and_b32 s6, 0xffff, s6
 ; GFX8-NEXT:    s_and_b32 s5, 0xffff, s5
 ; GFX8-NEXT:    s_lshl_b32 s6, s6, 16
 ; GFX8-NEXT:    s_or_b32 s5, s5, s6
-; GFX8-NEXT:    v_sub_u32_e32 v1, vcc, s4, v1
-; GFX8-NEXT:    v_subrev_u32_e32 v2, vcc, 24, v1
+; GFX8-NEXT:    v_sub_u32_e32 v2, vcc, s4, v2
+; GFX8-NEXT:    v_add_u32_e32 v3, vcc, v2, v1
 ; GFX8-NEXT:    v_mul_hi_u32 v0, s5, v0
-; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, 24, v1
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
-; GFX8-NEXT:    v_subrev_u32_e32 v2, vcc, 24, v1
-; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, 24, v1
+; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, 24, v2
+; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
+; GFX8-NEXT:    v_add_u32_e32 v3, vcc, v2, v1
+; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, 24, v2
 ; GFX8-NEXT:    v_mul_lo_u32 v0, v0, 24
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
-; GFX8-NEXT:    v_sub_u32_e32 v2, vcc, 23, v1
-; GFX8-NEXT:    v_and_b32_e32 v1, 0xffffff, v1
-; GFX8-NEXT:    v_lshlrev_b32_e64 v1, v1, s0
-; GFX8-NEXT:    s_lshr_b32 s0, s2, 1
+; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
+; GFX8-NEXT:    v_sub_u32_e32 v3, vcc, 23, v2
 ; GFX8-NEXT:    v_and_b32_e32 v2, 0xffffff, v2
-; GFX8-NEXT:    v_lshrrev_b32_e64 v2, v2, s0
+; GFX8-NEXT:    v_lshlrev_b32_e64 v2, v2, s0
+; GFX8-NEXT:    s_lshr_b32 s0, s2, 1
+; GFX8-NEXT:    v_and_b32_e32 v3, 0xffffff, v3
+; GFX8-NEXT:    v_lshrrev_b32_e64 v3, v3, s0
 ; GFX8-NEXT:    v_sub_u32_e32 v0, vcc, s5, v0
-; GFX8-NEXT:    v_or_b32_e32 v1, v1, v2
-; GFX8-NEXT:    v_subrev_u32_e32 v2, vcc, 24, v0
+; GFX8-NEXT:    v_or_b32_e32 v2, v2, v3
+; GFX8-NEXT:    v_add_u32_e32 v3, vcc, v0, v1
 ; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, 24, v0
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
-; GFX8-NEXT:    v_subrev_u32_e32 v2, vcc, 24, v0
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
+; GFX8-NEXT:    v_add_u32_e32 v1, vcc, v0, v1
 ; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, 24, v0
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
-; GFX8-NEXT:    v_sub_u32_e32 v2, vcc, 23, v0
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX8-NEXT:    v_sub_u32_e32 v1, vcc, 23, v0
 ; GFX8-NEXT:    v_and_b32_e32 v0, 0xffffff, v0
 ; GFX8-NEXT:    s_lshr_b32 s0, s3, 1
-; GFX8-NEXT:    v_and_b32_e32 v2, 0xffffff, v2
+; GFX8-NEXT:    v_and_b32_e32 v1, 0xffffff, v1
 ; GFX8-NEXT:    v_lshlrev_b32_e64 v0, v0, s1
-; GFX8-NEXT:    v_lshrrev_b32_e64 v2, v2, s0
-; GFX8-NEXT:    v_or_b32_e32 v0, v0, v2
-; GFX8-NEXT:    v_mov_b32_e32 v2, 8
-; GFX8-NEXT:    v_lshlrev_b32_sdwa v3, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
+; GFX8-NEXT:    v_lshrrev_b32_e64 v1, v1, s0
+; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
+; GFX8-NEXT:    v_mov_b32_e32 v1, 8
+; GFX8-NEXT:    v_lshlrev_b32_sdwa v3, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
 ; GFX8-NEXT:    v_mov_b32_e32 v4, 16
-; GFX8-NEXT:    v_or_b32_sdwa v3, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
-; GFX8-NEXT:    v_lshlrev_b32_sdwa v1, v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
-; GFX8-NEXT:    v_or_b32_e32 v1, v3, v1
+; GFX8-NEXT:    v_or_b32_sdwa v3, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
+; GFX8-NEXT:    v_lshlrev_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
+; GFX8-NEXT:    v_or_b32_e32 v2, v3, v2
 ; GFX8-NEXT:    v_and_b32_e32 v3, 0xff, v0
 ; GFX8-NEXT:    v_lshlrev_b32_e32 v3, 24, v3
-; GFX8-NEXT:    v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
-; GFX8-NEXT:    v_or_b32_e32 v1, v1, v3
-; GFX8-NEXT:    v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD
-; GFX8-NEXT:    v_readfirstlane_b32 s0, v1
+; GFX8-NEXT:    v_lshlrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
+; GFX8-NEXT:    v_or_b32_e32 v2, v2, v3
+; GFX8-NEXT:    v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD
+; GFX8-NEXT:    v_readfirstlane_b32 s0, v2
 ; GFX8-NEXT:    v_readfirstlane_b32 s1, v0
 ; GFX8-NEXT:    ; return to shader part epilog
 ;
@@ -2172,10 +2172,10 @@ define amdgpu_ps i48 @s_fshl_v2i24(i48 inreg %lhs.arg, i48 inreg %rhs.arg, i48 i
 ; GFX9-NEXT:    s_or_b32 s5, s5, s6
 ; GFX9-NEXT:    v_mul_hi_u32 v0, s5, v0
 ; GFX9-NEXT:    v_sub_u32_e32 v1, s4, v1
-; GFX9-NEXT:    v_subrev_u32_e32 v2, 24, v1
+; GFX9-NEXT:    v_add_u32_e32 v2, 0xffffffe8, v1
 ; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, 24, v1
 ; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
-; GFX9-NEXT:    v_subrev_u32_e32 v2, 24, v1
+; GFX9-NEXT:    v_add_u32_e32 v2, 0xffffffe8, v1
 ; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, 24, v1
 ; GFX9-NEXT:    v_mul_lo_u32 v0, v0, 24
 ; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
@@ -2186,10 +2186,10 @@ define amdgpu_ps i48 @s_fshl_v2i24(i48 inreg %lhs.arg, i48 inreg %rhs.arg, i48 i
 ; GFX9-NEXT:    v_lshrrev_b32_e64 v2, v2, s2
 ; GFX9-NEXT:    v_sub_u32_e32 v0, s5, v0
 ; GFX9-NEXT:    v_lshl_or_b32 v1, s0, v1, v2
-; GFX9-NEXT:    v_subrev_u32_e32 v2, 24, v0
+; GFX9-NEXT:    v_add_u32_e32 v2, 0xffffffe8, v0
 ; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, 24, v0
 ; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
-; GFX9-NEXT:    v_subrev_u32_e32 v2, 24, v0
+; GFX9-NEXT:    v_add_u32_e32 v2, 0xffffffe8, v0
 ; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, 24, v0
 ; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
 ; GFX9-NEXT:    v_sub_u32_e32 v2, 23, v0
@@ -2282,9 +2282,9 @@ define amdgpu_ps i48 @s_fshl_v2i24(i48 inreg %lhs.arg, i48 inreg %rhs.arg, i48 i
 ; GFX10-NEXT:    s_lshr_b32 s4, s3, 8
 ; GFX10-NEXT:    s_and_b32 s5, s9, 0xff
 ; GFX10-NEXT:    s_and_b32 s3, s3, 0xff
-; GFX10-NEXT:    v_subrev_nc_u32_e32 v2, 24, v1
+; GFX10-NEXT:    v_add_nc_u32_e32 v2, 0xffffffe8, v1
 ; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v1
-; GFX10-NEXT:    v_subrev_nc_u32_e32 v3, 24, v0
+; GFX10-NEXT:    v_add_nc_u32_e32 v3, 0xffffffe8, v0
 ; GFX10-NEXT:    s_and_b32 s5, 0xffff, s5
 ; GFX10-NEXT:    s_lshl_b32 s3, s3, 8
 ; GFX10-NEXT:    s_and_b32 s4, s4, 0xff
@@ -2293,13 +2293,13 @@ define amdgpu_ps i48 @s_fshl_v2i24(i48 inreg %lhs.arg, i48 inreg %rhs.arg, i48 i
 ; GFX10-NEXT:    s_lshl_b32 s5, s5, 16
 ; GFX10-NEXT:    s_or_b32 s3, s10, s3
 ; GFX10-NEXT:    s_and_b32 s4, 0xffff, s4
-; GFX10-NEXT:    v_subrev_nc_u32_e32 v2, 24, v1
+; GFX10-NEXT:    v_add_nc_u32_e32 v2, 0xffffffe8, v1
 ; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc_lo
 ; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v1
 ; GFX10-NEXT:    s_or_b32 s2, s2, s5
 ; GFX10-NEXT:    s_and_b32 s3, 0xffff, s3
 ; GFX10-NEXT:    s_lshl_b32 s4, s4, 16
-; GFX10-NEXT:    v_subrev_nc_u32_e32 v3, 24, v0
+; GFX10-NEXT:    v_add_nc_u32_e32 v3, 0xffffffe8, v0
 ; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc_lo
 ; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v0
 ; GFX10-NEXT:    s_or_b32 s3, s3, s4
@@ -2399,9 +2399,9 @@ define amdgpu_ps i48 @s_fshl_v2i24(i48 inreg %lhs.arg, i48 inreg %rhs.arg, i48 i
 ; GFX11-NEXT:    s_and_b32 s5, s8, 0xff
 ; GFX11-NEXT:    s_lshr_b32 s4, s3, 8
 ; GFX11-NEXT:    s_and_b32 s5, 0xffff, s5
-; GFX11-NEXT:    v_subrev_nc_u32_e32 v2, 24, v1
+; GFX11-NEXT:    v_add_nc_u32_e32 v2, 0xffffffe8, v1
 ; GFX11-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v1
-; GFX11-NEXT:    v_subrev_nc_u32_e32 v3, 24, v0
+; GFX11-NEXT:    v_add_nc_u32_e32 v3, 0xffffffe8, v0
 ; GFX11-NEXT:    s_and_b32 s3, s3, 0xff
 ; GFX11-NEXT:    s_lshl_b32 s5, s5, 16
 ; GFX11-NEXT:    s_lshl_b32 s3, s3, 8
@@ -2410,7 +2410,7 @@ define amdgpu_ps i48 @s_fshl_v2i24(i48 inreg %lhs.arg, i48 inreg %rhs.arg, i48 i
 ; GFX11-NEXT:    s_and_b32 s4, s4, 0xff
 ; GFX11-NEXT:    s_or_b32 s2, s2, s5
 ; GFX11-NEXT:    s_or_b32 s3, s9, s3
-; GFX11-NEXT:    v_subrev_nc_u32_e32 v2, 24, v1
+; GFX11-NEXT:    v_add_nc_u32_e32 v2, 0xffffffe8, v1
 ; GFX11-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc_lo
 ; GFX11-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v1
 ; GFX11-NEXT:    s_and_b32 s4, 0xffff, s4
@@ -2423,7 +2423,7 @@ define amdgpu_ps i48 @s_fshl_v2i24(i48 inreg %lhs.arg, i48 inreg %rhs.arg, i48 i
 ; GFX11-NEXT:    s_lshr_b32 s3, s3, 1
 ; GFX11-NEXT:    v_sub_nc_u32_e32 v2, 23, v1
 ; GFX11-NEXT:    v_and_b32_e32 v1, 0xffffff, v1
-; GFX11-NEXT:    v_subrev_nc_u32_e32 v3, 24, v0
+; GFX11-NEXT:    v_add_nc_u32_e32 v3, 0xffffffe8, v0
 ; GFX11-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v0
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
 ; GFX11-NEXT:    v_and_b32_e32 v2, 0xffffff, v2
@@ -2479,31 +2479,31 @@ define <2 x i24> @v_fshl_v2i24(<2 x i24> %lhs, <2 x i24> %rhs, <2 x i24> %amt) {
 ; GFX6-NEXT:    v_mul_f32_e32 v6, 0x4f7ffffe, v6
 ; GFX6-NEXT:    v_cvt_u32_f32_e32 v6, v6
 ; GFX6-NEXT:    v_bfe_u32 v2, v2, 1, 23
-; GFX6-NEXT:    v_mul_lo_u32 v7, v6, v7
-; GFX6-NEXT:    v_mul_hi_u32 v7, v6, v7
-; GFX6-NEXT:    v_add_i32_e32 v6, vcc, v6, v7
-; GFX6-NEXT:    v_mul_hi_u32 v7, v4, v6
+; GFX6-NEXT:    v_mul_lo_u32 v8, v6, v7
+; GFX6-NEXT:    v_mul_hi_u32 v8, v6, v8
+; GFX6-NEXT:    v_add_i32_e32 v6, vcc, v6, v8
+; GFX6-NEXT:    v_mul_hi_u32 v8, v4, v6
 ; GFX6-NEXT:    v_mul_hi_u32 v6, v5, v6
-; GFX6-NEXT:    v_mul_lo_u32 v7, v7, 24
+; GFX6-NEXT:    v_mul_lo_u32 v8, v8, 24
 ; GFX6-NEXT:    v_mul_lo_u32 v6, v6, 24
-; GFX6-NEXT:    v_sub_i32_e32 v4, vcc, v4, v7
-; GFX6-NEXT:    v_subrev_i32_e32 v7, vcc, 24, v4
+; GFX6-NEXT:    v_sub_i32_e32 v4, vcc, v4, v8
+; GFX6-NEXT:    v_add_i32_e32 v8, vcc, v4, v7
 ; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, 24, v4
-; GFX6-NEXT:    v_cndmask_b32_e32 v4, v4, v7, vcc
-; GFX6-NEXT:    v_subrev_i32_e32 v7, vcc, 24, v4
+; GFX6-NEXT:    v_cndmask_b32_e32 v4, v4, v8, vcc
+; GFX6-NEXT:    v_add_i32_e32 v8, vcc, v4, v7
 ; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, 24, v4
-; GFX6-NEXT:    v_cndmask_b32_e32 v4, v4, v7, vcc
-; GFX6-NEXT:    v_sub_i32_e32 v7, vcc, 23, v4
+; GFX6-NEXT:    v_cndmask_b32_e32 v4, v4, v8, vcc
+; GFX6-NEXT:    v_sub_i32_e32 v8, vcc, 23, v4
 ; GFX6-NEXT:    v_and_b32_e32 v4, 0xffffff, v4
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v0, v4, v0
-; GFX6-NEXT:    v_and_b32_e32 v4, 0xffffff, v7
+; GFX6-NEXT:    v_and_b32_e32 v4, 0xffffff, v8
 ; GFX6-NEXT:    v_lshrrev_b32_e32 v2, v4, v2
 ; GFX6-NEXT:    v_or_b32_e32 v0, v0, v2
 ; GFX6-NEXT:    v_sub_i32_e32 v2, vcc, v5, v6
-; GFX6-NEXT:    v_subrev_i32_e32 v4, vcc, 24, v2
+; GFX6-NEXT:    v_add_i32_e32 v4, vcc, v2, v7
 ; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, 24, v2
 ; GFX6-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
-; GFX6-NEXT:    v_subrev_i32_e32 v4, vcc, 24, v2
+; GFX6-NEXT:    v_add_i32_e32 v4, vcc, 0xffffffe8, v2
 ; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, 24, v2
 ; GFX6-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
 ; GFX6-NEXT:    v_sub_i32_e32 v4, vcc, 23, v2
@@ -2526,31 +2526,31 @@ define <2 x i24> @v_fshl_v2i24(<2 x i24> %lhs, <2 x i24> %rhs, <2 x i24> %amt) {
 ; GFX8-NEXT:    v_mul_f32_e32 v6, 0x4f7ffffe, v6
 ; GFX8-NEXT:    v_cvt_u32_f32_e32 v6, v6
 ; GFX8-NEXT:    v_bfe_u32 v2, v2, 1, 23
-; GFX8-NEXT:    v_mul_lo_u32 v7, v6, v7
-; GFX8-NEXT:    v_mul_hi_u32 v7, v6, v7
-; GFX8-NEXT:    v_add_u32_e32 v6, vcc, v6, v7
-; GFX8-NEXT:    v_mul_hi_u32 v7, v4, v6
+; GFX8-NEXT:    v_mul_lo_u32 v8, v6, v7
+; GFX8-NEXT:    v_mul_hi_u32 v8, v6, v8
+; GFX8-NEXT:    v_add_u32_e32 v6, vcc, v6, v8
+; GFX8-NEXT:    v_mul_hi_u32 v8, v4, v6
 ; GFX8-NEXT:    v_mul_hi_u32 v6, v5, v6
-; GFX8-NEXT:    v_mul_lo_u32 v7, v7, 24
+; GFX8-NEXT:    v_mul_lo_u32 v8, v8, 24
 ; GFX8-NEXT:    v_mul_lo_u32 v6, v6, 24
-; GFX8-NEXT:    v_sub_u32_e32 v4, vcc, v4, v7
-; GFX8-NEXT:    v_subrev_u32_e32 v7, vcc, 24, v4
+; GFX8-NEXT:    v_sub_u32_e32 v4, vcc, v4, v8
+; GFX8-NEXT:    v_add_u32_e32 v8, vcc, v4, v7
 ; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, 24, v4
-; GFX8-NEXT:    v_cndmask_b32_e32 v4, v4, v7, vcc
-; GFX8-NEXT:    v_subrev_u32_e32 v7, vcc, 24, v4
+; GFX8-NEXT:    v_cndmask_b32_e32 v4, v4, v8, vcc
+; GFX8-NEXT:    v_add_u32_e32 v8, vcc, v4, v7
 ; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, 24, v4
-; GFX8-NEXT:    v_cndmask_b32_e32 v4, v4, v7, vcc
-; GFX8-NEXT:    v_sub_u32_e32 v7, vcc, 23, v4
+; GFX8-NEXT:    v_cndmask_b32_e32 v4, v4, v8, vcc
+; GFX8-NEXT:    v_sub_u32_e32 v8, vcc, 23, v4
 ; GFX8-NEXT:    v_and_b32_e32 v4, 0xffffff, v4
 ; GFX8-NEXT:    v_lshlrev_b32_e32 v0, v4, v0
-; GFX8-NEXT:    v_and_b32_e32 v4, 0xffffff, v7
+; GFX8-NEXT:    v_and_b32_e32 v4, 0xffffff, v8
 ; GFX8-NEXT:    v_lshrrev_b32_e32 v2, v4, v2
 ; GFX8-NEXT:    v_or_b32_e32 v0, v0, v2
 ; GFX8-NEXT:    v_sub_u32_e32 v2, vcc, v5, v6
-; GFX8-NEXT:    v_subrev_u32_e32 v4, vcc, 24, v2
+; GFX8-NEXT:    v_add_u32_e32 v4, vcc, v2, v7
 ; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, 24, v2
 ; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
-; GFX8-NEXT:    v_subrev_u32_e32 v4, vcc, 24, v2
+; GFX8-NEXT:    v_add_u32_e32 v4, vcc, 0xffffffe8, v2
 ; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, 24, v2
 ; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
 ; GFX8-NEXT:    v_sub_u32_e32 v4, vcc, 23, v2
@@ -2583,21 +2583,21 @@ define <2 x i24> @v_fshl_v2i24(<2 x i24> %lhs, <2 x i24> %rhs, <2 x i24> %amt) {
 ; GFX9-NEXT:    v_mul_lo_u32 v6, v6, 24
 ; GFX9-NEXT:    v_sub_u32_e32 v4, v4, v7
 ; GFX9-NEXT:    v_sub_u32_e32 v5, v5, v6
-; GFX9-NEXT:    v_subrev_u32_e32 v6, 24, v4
+; GFX9-NEXT:    v_add_u32_e32 v6, 0xffffffe8, v4
 ; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, 24, v4
 ; GFX9-NEXT:    v_cndmask_b32_e32 v4, v4, v6, vcc
-; GFX9-NEXT:    v_subrev_u32_e32 v6, 24, v4
+; GFX9-NEXT:    v_add_u32_e32 v6, 0xffffffe8, v4
 ; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, 24, v4
 ; GFX9-NEXT:    v_cndmask_b32_e32 v4, v4, v6, vcc
 ; GFX9-NEXT:    v_sub_u32_e32 v6, 23, v4
 ; GFX9-NEXT:    v_and_b32_e32 v6, 0xffffff, v6
-; GFX9-NEXT:    v_subrev_u32_e32 v7, 24, v5
+; GFX9-NEXT:    v_add_u32_e32 v7, 0xffffffe8, v5
 ; GFX9-NEXT:    v_and_b32_e32 v4, 0xffffff, v4
 ; GFX9-NEXT:    v_lshrrev_b32_e32 v2, v6, v2
 ; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, 24, v5
 ; GFX9-NEXT:    v_lshl_or_b32 v0, v0, v4, v2
 ; GFX9-NEXT:    v_cndmask_b32_e32 v2, v5, v7, vcc
-; GFX9-NEXT:    v_subrev_u32_e32 v4, 24, v2
+; GFX9-NEXT:    v_add_u32_e32 v4, 0xffffffe8, v2
 ; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, 24, v2
 ; GFX9-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
 ; GFX9-NEXT:    v_sub_u32_e32 v4, 23, v2
@@ -2627,15 +2627,15 @@ define <2 x i24> @v_fshl_v2i24(<2 x i24> %lhs, <2 x i24> %rhs, <2 x i24> %amt) {
 ; GFX10-NEXT:    v_mul_lo_u32 v6, v6, 24
 ; GFX10-NEXT:    v_sub_nc_u32_e32 v4, v4, v7
 ; GFX10-NEXT:    v_sub_nc_u32_e32 v5, v5, v6
-; GFX10-NEXT:    v_subrev_nc_u32_e32 v6, 24, v4
+; GFX10-NEXT:    v_add_nc_u32_e32 v6, 0xffffffe8, v4
 ; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v4
-; GFX10-NEXT:    v_subrev_nc_u32_e32 v7, 24, v5
+; GFX10-NEXT:    v_add_nc_u32_e32 v7, 0xffffffe8, v5
 ; GFX10-NEXT:    v_cndmask_b32_e32 v4, v4, v6, vcc_lo
 ; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v5
-; GFX10-NEXT:    v_subrev_nc_u32_e32 v6, 24, v4
+; GFX10-NEXT:    v_add_nc_u32_e32 v6, 0xffffffe8, v4
 ; GFX10-NEXT:    v_cndmask_b32_e32 v5, v5, v7, vcc_lo
 ; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v4
-; GFX10-NEXT:    v_subrev_nc_u32_e32 v7, 24, v5
+; GFX10-NEXT:    v_add_nc_u32_e32 v7, 0xffffffe8, v5
 ; GFX10-NEXT:    v_cndmask_b32_e32 v4, v4, v6, vcc_lo
 ; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v5
 ; GFX10-NEXT:    v_sub_nc_u32_e32 v6, 23, v4
@@ -2679,34 +2679,32 @@ define <2 x i24> @v_fshl_v2i24(<2 x i24> %lhs, <2 x i24> %rhs, <2 x i24> %amt) {
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX11-NEXT:    v_mul_lo_u32 v6, v6, 24
 ; GFX11-NEXT:    v_sub_nc_u32_e32 v5, v5, v6
-; GFX11-NEXT:    v_subrev_nc_u32_e32 v6, 24, v4
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NEXT:    v_subrev_nc_u32_e32 v7, 24, v5
+; GFX11-NEXT:    v_add_nc_u32_e32 v6, 0xffffffe8, v4
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3)
 ; GFX11-NEXT:    v_cndmask_b32_e32 v4, v4, v6, vcc_lo
 ; GFX11-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v5
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_4)
-; GFX11-NEXT:    v_subrev_nc_u32_e32 v6, 24, v4
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX11-NEXT:    v_add_nc_u32_e32 v6, 0xffffffe8, v4
+; GFX11-NEXT:    v_add_nc_u32_e32 v7, 0xffffffe8, v5
 ; GFX11-NEXT:    v_cndmask_b32_e32 v5, v5, v7, vcc_lo
 ; GFX11-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v4
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_4)
-; GFX11-NEXT:    v_subrev_nc_u32_e32 v7, 24, v5
-; GFX11-NEXT:    v_cndmask_b32_e32 v4, v4, v6, vcc_lo
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-NEXT:    v_dual_cndmask_b32 v4, v4, v6 :: v_dual_add_nc_u32 v7, 0xffffffe8, v5
 ; GFX11-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v5
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_cndmask_b32_e32 v5, v5, v7, vcc_lo
-; GFX11-NEXT:    v_sub_nc_u32_e32 v7, 23, v5
-; GFX11-NEXT:    v_and_b32_e32 v5, 0xffffff, v5
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
-; GFX11-NEXT:    v_and_b32_e32 v7, 0xffffff, v7
 ; GFX11-NEXT:    v_sub_nc_u32_e32 v6, 23, v4
-; GFX11-NEXT:    v_and_b32_e32 v4, 0xffffff, v4
-; GFX11-NEXT:    v_lshrrev_b32_e32 v3, v7, v3
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NEXT:    v_dual_cndmask_b32 v5, v5, v7 :: v_dual_and_b32 v4, 0xffffff, v4
 ; GFX11-NEXT:    v_and_b32_e32 v6, 0xffffff, v6
-; GFX11-NEXT:    v_lshl_or_b32 v1, v1, v5, v3
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3)
+; GFX11-NEXT:    v_sub_nc_u32_e32 v7, 23, v5
+; GFX11-NEXT:    v_and_b32_e32 v5, 0xffffff, v5
 ; GFX11-NEXT:    v_lshrrev_b32_e32 v2, v6, v2
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NEXT:    v_and_b32_e32 v7, 0xffffff, v7
 ; GFX11-NEXT:    v_lshl_or_b32 v0, v0, v4, v2
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT:    v_lshrrev_b32_e32 v3, v7, v3
+; GFX11-NEXT:    v_lshl_or_b32 v1, v1, v5, v3
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
   %result = call <2 x i24> @llvm.fshl.v2i24(<2 x i24> %lhs, <2 x i24> %rhs, <2 x i24> %amt)
   ret <2 x i24> %result
@@ -6061,11 +6059,11 @@ define i128 @v_fshl_i128(i128 %lhs, i128 %rhs, i128 %amt) {
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX6-NEXT:    v_and_b32_e32 v15, 0x7f, v8
 ; GFX6-NEXT:    v_sub_i32_e32 v9, vcc, 64, v15
-; GFX6-NEXT:    v_subrev_i32_e32 v16, vcc, 64, v15
+; GFX6-NEXT:    v_add_i32_e32 v17, vcc, 0xffffffc0, v15
 ; GFX6-NEXT:    v_lshr_b64 v[9:10], v[0:1], v9
 ; GFX6-NEXT:    v_lshl_b64 v[11:12], v[2:3], v15
 ; GFX6-NEXT:    v_lshl_b64 v[13:14], v[0:1], v15
-; GFX6-NEXT:    v_lshl_b64 v[0:1], v[0:1], v16
+; GFX6-NEXT:    v_lshl_b64 v[0:1], v[0:1], v17
 ; GFX6-NEXT:    v_or_b32_e32 v9, v9, v11
 ; GFX6-NEXT:    v_or_b32_e32 v10, v10, v12
 ; GFX6-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v15
@@ -6082,8 +6080,9 @@ define i128 @v_fshl_i128(i128 %lhs, i128 %rhs, i128 %amt) {
 ; GFX6-NEXT:    v_or_b32_e32 v1, v1, v2
 ; GFX6-NEXT:    v_lshr_b64 v[2:3], v[6:7], 1
 ; GFX6-NEXT:    v_and_b32_e32 v14, 0x7f, v4
+; GFX6-NEXT:    v_not_b32_e32 v16, 63
 ; GFX6-NEXT:    v_sub_i32_e32 v6, vcc, 64, v14
-; GFX6-NEXT:    v_subrev_i32_e32 v15, vcc, 64, v14
+; GFX6-NEXT:    v_add_i32_e32 v15, vcc, v14, v16
 ; GFX6-NEXT:    v_lshr_b64 v[4:5], v[0:1], v14
 ; GFX6-NEXT:    v_lshl_b64 v[6:7], v[2:3], v6
 ; GFX6-NEXT:    v_lshr_b64 v[8:9], v[2:3], v14
@@ -6109,11 +6108,11 @@ define i128 @v_fshl_i128(i128 %lhs, i128 %rhs, i128 %amt) {
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX8-NEXT:    v_and_b32_e32 v15, 0x7f, v8
 ; GFX8-NEXT:    v_sub_u32_e32 v9, vcc, 64, v15
-; GFX8-NEXT:    v_subrev_u32_e32 v16, vcc, 64, v15
+; GFX8-NEXT:    v_add_u32_e32 v17, vcc, 0xffffffc0, v15
 ; GFX8-NEXT:    v_lshrrev_b64 v[9:10], v9, v[0:1]
 ; GFX8-NEXT:    v_lshlrev_b64 v[11:12], v15, v[2:3]
 ; GFX8-NEXT:    v_lshlrev_b64 v[13:14], v15, v[0:1]
-; GFX8-NEXT:    v_lshlrev_b64 v[0:1], v16, v[0:1]
+; GFX8-NEXT:    v_lshlrev_b64 v[0:1], v17, v[0:1]
 ; GFX8-NEXT:    v_or_b32_e32 v9, v9, v11
 ; GFX8-NEXT:    v_or_b32_e32 v10, v10, v12
 ; GFX8-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v15
@@ -6130,8 +6129,9 @@ define i128 @v_fshl_i128(i128 %lhs, i128 %rhs, i128 %amt) {
 ; GFX8-NEXT:    v_or_b32_e32 v1, v1, v2
 ; GFX8-NEXT:    v_lshrrev_b64 v[2:3], 1, v[6:7]
 ; GFX8-NEXT:    v_and_b32_e32 v14, 0x7f, v4
+; GFX8-NEXT:    v_not_b32_e32 v16, 63
 ; GFX8-NEXT:    v_sub_u32_e32 v6, vcc, 64, v14
-; GFX8-NEXT:    v_subrev_u32_e32 v15, vcc, 64, v14
+; GFX8-NEXT:    v_add_u32_e32 v15, vcc, v14, v16
 ; GFX8-NEXT:    v_lshrrev_b64 v[4:5], v14, v[0:1]
 ; GFX8-NEXT:    v_lshlrev_b64 v[6:7], v6, v[2:3]
 ; GFX8-NEXT:    v_lshrrev_b64 v[8:9], v14, v[2:3]
@@ -6157,7 +6157,7 @@ define i128 @v_fshl_i128(i128 %lhs, i128 %rhs, i128 %amt) {
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX9-NEXT:    v_and_b32_e32 v15, 0x7f, v8
 ; GFX9-NEXT:    v_sub_u32_e32 v9, 64, v15
-; GFX9-NEXT:    v_subrev_u32_e32 v16, 64, v15
+; GFX9-NEXT:    v_add_u32_e32 v16, 0xffffffc0, v15
 ; GFX9-NEXT:    v_lshrrev_b64 v[9:10], v9, v[0:1]
 ; GFX9-NEXT:    v_lshlrev_b64 v[11:12], v15, v[2:3]
 ; GFX9-NEXT:    v_lshlrev_b64 v[13:14], v15, v[0:1]
@@ -6178,7 +6178,7 @@ define i128 @v_fshl_i128(i128 %lhs, i128 %rhs, i128 %amt) {
 ; GFX9-NEXT:    v_and_b32_e32 v14, 0x7f, v4
 ; GFX9-NEXT:    v_lshl_or_b32 v1, v6, 31, v1
 ; GFX9-NEXT:    v_sub_u32_e32 v6, 64, v14
-; GFX9-NEXT:    v_subrev_u32_e32 v15, 64, v14
+; GFX9-NEXT:    v_add_u32_e32 v15, 0xffffffc0, v14
 ; GFX9-NEXT:    v_lshrrev_b64 v[4:5], v14, v[0:1]
 ; GFX9-NEXT:    v_lshlrev_b64 v[6:7], v6, v[2:3]
 ; GFX9-NEXT:    v_lshrrev_b64 v[8:9], v14, v[2:3]
@@ -6210,7 +6210,7 @@ define i128 @v_fshl_i128(i128 %lhs, i128 %rhs, i128 %amt) {
 ; GFX10-NEXT:    v_and_b32_e32 v19, 0x7f, v10
 ; GFX10-NEXT:    v_lshlrev_b64 v[8:9], v18, v[2:3]
 ; GFX10-NEXT:    v_lshl_or_b32 v5, v6, 31, v5
-; GFX10-NEXT:    v_subrev_nc_u32_e32 v20, 64, v18
+; GFX10-NEXT:    v_add_nc_u32_e32 v20, 0xffffffc0, v18
 ; GFX10-NEXT:    v_lshrrev_b64 v[10:11], v11, v[0:1]
 ; GFX10-NEXT:    v_sub_nc_u32_e32 v16, 64, v19
 ; GFX10-NEXT:    v_lshlrev_b64 v[6:7], v18, v[0:1]
@@ -6218,7 +6218,7 @@ define i128 @v_fshl_i128(i128 %lhs, i128 %rhs, i128 %amt) {
 ; GFX10-NEXT:    v_lshlrev_b64 v[0:1], v20, v[0:1]
 ; GFX10-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 64, v18
 ; GFX10-NEXT:    v_or_b32_e32 v10, v10, v8
-; GFX10-NEXT:    v_subrev_nc_u32_e32 v8, 64, v19
+; GFX10-NEXT:    v_add_nc_u32_e32 v8, 0xffffffc0, v19
 ; GFX10-NEXT:    v_lshlrev_b64 v[16:17], v16, v[12:13]
 ; GFX10-NEXT:    v_or_b32_e32 v11, v11, v9
 ; GFX10-NEXT:    v_cmp_gt_u32_e64 s4, 64, v19
@@ -6258,34 +6258,34 @@ define i128 @v_fshl_i128(i128 %lhs, i128 %rhs, i128 %amt) {
 ; GFX11-NEXT:    v_and_b32_e32 v19, 0x7f, v10
 ; GFX11-NEXT:    v_lshlrev_b64 v[8:9], v18, v[2:3]
 ; GFX11-NEXT:    v_lshl_or_b32 v5, v6, 31, v5
-; GFX11-NEXT:    v_subrev_nc_u32_e32 v20, 64, v18
+; GFX11-NEXT:    v_lshlrev_b64 v[6:7], v18, v[0:1]
 ; GFX11-NEXT:    v_lshrrev_b64 v[10:11], v11, v[0:1]
 ; GFX11-NEXT:    v_sub_nc_u32_e32 v16, 64, v19
-; GFX11-NEXT:    v_lshlrev_b64 v[6:7], v18, v[0:1]
-; GFX11-NEXT:    v_lshrrev_b64 v[14:15], v19, v[4:5]
-; GFX11-NEXT:    v_lshlrev_b64 v[0:1], v20, v[0:1]
 ; GFX11-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 64, v18
+; GFX11-NEXT:    v_add_nc_u32_e32 v20, 0xffffffc0, v18
+; GFX11-NEXT:    v_lshrrev_b64 v[14:15], v19, v[4:5]
+; GFX11-NEXT:    v_cmp_gt_u32_e64 s0, 64, v19
 ; GFX11-NEXT:    v_or_b32_e32 v10, v10, v8
-; GFX11-NEXT:    v_subrev_nc_u32_e32 v8, 64, v19
+; GFX11-NEXT:    v_cndmask_b32_e32 v7, 0, v7, vcc_lo
+; GFX11-NEXT:    v_add_nc_u32_e32 v8, 0xffffffc0, v19
 ; GFX11-NEXT:    v_lshlrev_b64 v[16:17], v16, v[12:13]
+; GFX11-NEXT:    v_lshlrev_b64 v[0:1], v20, v[0:1]
 ; GFX11-NEXT:    v_or_b32_e32 v11, v11, v9
-; GFX11-NEXT:    v_cmp_gt_u32_e64 s0, 64, v19
-; GFX11-NEXT:    v_cndmask_b32_e32 v10, v0, v10, vcc_lo
-; GFX11-NEXT:    v_lshrrev_b64 v[8:9], v8, v[12:13]
 ; GFX11-NEXT:    v_cmp_eq_u32_e64 s1, 0, v19
-; GFX11-NEXT:    v_cndmask_b32_e32 v11, v1, v11, vcc_lo
+; GFX11-NEXT:    v_lshrrev_b64 v[8:9], v8, v[12:13]
+; GFX11-NEXT:    v_cndmask_b32_e32 v6, 0, v6, vcc_lo
 ; GFX11-NEXT:    v_or_b32_e32 v14, v14, v16
 ; GFX11-NEXT:    v_or_b32_e32 v15, v15, v17
+; GFX11-NEXT:    v_dual_cndmask_b32 v10, v0, v10 :: v_dual_cndmask_b32 v11, v1, v11
 ; GFX11-NEXT:    v_lshrrev_b64 v[0:1], v19, v[12:13]
-; GFX11-NEXT:    v_cmp_eq_u32_e64 s2, 0, v18
-; GFX11-NEXT:    v_dual_cndmask_b32 v6, 0, v6 :: v_dual_cndmask_b32 v7, 0, v7
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_2) | instid1(VALU_DEP_3)
 ; GFX11-NEXT:    v_cndmask_b32_e64 v8, v8, v14, s0
+; GFX11-NEXT:    v_cmp_eq_u32_e64 s2, 0, v18
 ; GFX11-NEXT:    v_cndmask_b32_e64 v9, v9, v15, s0
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4)
+; GFX11-NEXT:    v_cndmask_b32_e64 v4, v8, v4, s1
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_4)
 ; GFX11-NEXT:    v_cndmask_b32_e64 v2, v10, v2, s2
 ; GFX11-NEXT:    v_cndmask_b32_e64 v3, v11, v3, s2
-; GFX11-NEXT:    v_cndmask_b32_e64 v4, v8, v4, s1
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_2) | instid1(VALU_DEP_4)
 ; GFX11-NEXT:    v_cndmask_b32_e64 v5, v9, v5, s1
 ; GFX11-NEXT:    v_cndmask_b32_e64 v8, 0, v0, s0
 ; GFX11-NEXT:    v_cndmask_b32_e64 v9, 0, v1, s0
@@ -6307,15 +6307,15 @@ define amdgpu_ps <4 x float> @v_fshl_i128_ssv(i128 inreg %lhs, i128 inreg %rhs,
 ; GFX6-NEXT:    v_sub_i32_e32 v1, vcc, 64, v7
 ; GFX6-NEXT:    v_lshr_b64 v[1:2], s[0:1], v1
 ; GFX6-NEXT:    v_lshl_b64 v[3:4], s[2:3], v7
-; GFX6-NEXT:    v_subrev_i32_e32 v8, vcc, 64, v7
+; GFX6-NEXT:    v_add_i32_e32 v9, vcc, 0xffffffc0, v7
 ; GFX6-NEXT:    v_lshl_b64 v[5:6], s[0:1], v7
 ; GFX6-NEXT:    v_or_b32_e32 v3, v1, v3
 ; GFX6-NEXT:    v_or_b32_e32 v4, v2, v4
-; GFX6-NEXT:    v_lshl_b64 v[1:2], s[0:1], v8
+; GFX6-NEXT:    v_lshl_b64 v[1:2], s[0:1], v9
 ; GFX6-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v7
 ; GFX6-NEXT:    v_not_b32_e32 v0, v0
 ; GFX6-NEXT:    s_mov_b32 s8, 0
-; GFX6-NEXT:    v_cndmask_b32_e32 v8, 0, v5, vcc
+; GFX6-NEXT:    v_cndmask_b32_e32 v9, 0, v5, vcc
 ; GFX6-NEXT:    v_cndmask_b32_e32 v6, 0, v6, vcc
 ; GFX6-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
 ; GFX6-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
@@ -6324,33 +6324,34 @@ define amdgpu_ps <4 x float> @v_fshl_i128_ssv(i128 inreg %lhs, i128 inreg %rhs,
 ; GFX6-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v7
 ; GFX6-NEXT:    s_lshr_b64 s[0:1], s[4:5], 1
 ; GFX6-NEXT:    s_lshl_b32 s9, s6, 31
-; GFX6-NEXT:    v_and_b32_e32 v10, 0x7f, v0
+; GFX6-NEXT:    v_and_b32_e32 v11, 0x7f, v0
 ; GFX6-NEXT:    v_cndmask_b32_e32 v7, v1, v3, vcc
-; GFX6-NEXT:    v_cndmask_b32_e32 v9, v2, v4, vcc
+; GFX6-NEXT:    v_cndmask_b32_e32 v10, v2, v4, vcc
 ; GFX6-NEXT:    s_or_b64 s[0:1], s[0:1], s[8:9]
 ; GFX6-NEXT:    s_lshr_b64 s[2:3], s[6:7], 1
-; GFX6-NEXT:    v_sub_i32_e32 v2, vcc, 64, v10
-; GFX6-NEXT:    v_lshr_b64 v[0:1], s[0:1], v10
+; GFX6-NEXT:    v_sub_i32_e32 v2, vcc, 64, v11
+; GFX6-NEXT:    v_not_b32_e32 v8, 63
+; GFX6-NEXT:    v_lshr_b64 v[0:1], s[0:1], v11
 ; GFX6-NEXT:    v_lshl_b64 v[2:3], s[2:3], v2
-; GFX6-NEXT:    v_subrev_i32_e32 v11, vcc, 64, v10
+; GFX6-NEXT:    v_add_i32_e32 v8, vcc, v11, v8
 ; GFX6-NEXT:    v_or_b32_e32 v2, v0, v2
 ; GFX6-NEXT:    v_or_b32_e32 v3, v1, v3
-; GFX6-NEXT:    v_lshr_b64 v[0:1], s[2:3], v11
-; GFX6-NEXT:    v_lshr_b64 v[4:5], s[2:3], v10
-; GFX6-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v10
+; GFX6-NEXT:    v_lshr_b64 v[0:1], s[2:3], v8
+; GFX6-NEXT:    v_lshr_b64 v[4:5], s[2:3], v11
+; GFX6-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v11
 ; GFX6-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
 ; GFX6-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
 ; GFX6-NEXT:    v_mov_b32_e32 v2, s0
 ; GFX6-NEXT:    v_mov_b32_e32 v3, s1
-; GFX6-NEXT:    v_cmp_eq_u32_e64 s[0:1], 0, v10
+; GFX6-NEXT:    v_cmp_eq_u32_e64 s[0:1], 0, v11
 ; GFX6-NEXT:    v_cndmask_b32_e64 v0, v0, v2, s[0:1]
 ; GFX6-NEXT:    v_cndmask_b32_e64 v1, v1, v3, s[0:1]
 ; GFX6-NEXT:    v_cndmask_b32_e32 v2, 0, v4, vcc
 ; GFX6-NEXT:    v_cndmask_b32_e32 v3, 0, v5, vcc
-; GFX6-NEXT:    v_or_b32_e32 v0, v8, v0
+; GFX6-NEXT:    v_or_b32_e32 v0, v9, v0
 ; GFX6-NEXT:    v_or_b32_e32 v1, v6, v1
 ; GFX6-NEXT:    v_or_b32_e32 v2, v7, v2
-; GFX6-NEXT:    v_or_b32_e32 v3, v9, v3
+; GFX6-NEXT:    v_or_b32_e32 v3, v10, v3
 ; GFX6-NEXT:    ; return to shader part epilog
 ;
 ; GFX8-LABEL: v_fshl_i128_ssv:
@@ -6359,15 +6360,15 @@ define amdgpu_ps <4 x float> @v_fshl_i128_ssv(i128 inreg %lhs, i128 inreg %rhs,
 ; GFX8-NEXT:    v_sub_u32_e32 v1, vcc, 64, v7
 ; GFX8-NEXT:    v_lshrrev_b64 v[1:2], v1, s[0:1]
 ; GFX8-NEXT:    v_lshlrev_b64 v[3:4], v7, s[2:3]
-; GFX8-NEXT:    v_subrev_u32_e32 v8, vcc, 64, v7
+; GFX8-NEXT:    v_add_u32_e32 v9, vcc, 0xffffffc0, v7
 ; GFX8-NEXT:    v_lshlrev_b64 v[5:6], v7, s[0:1]
 ; GFX8-NEXT:    v_or_b32_e32 v3, v1, v3
 ; GFX8-NEXT:    v_or_b32_e32 v4, v2, v4
-; GFX8-NEXT:    v_lshlrev_b64 v[1:2], v8, s[0:1]
+; GFX8-NEXT:    v_lshlrev_b64 v[1:2], v9, s[0:1]
 ; GFX8-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v7
 ; GFX8-NEXT:    v_not_b32_e32 v0, v0
 ; GFX8-NEXT:    s_mov_b32 s8, 0
-; GFX8-NEXT:    v_cndmask_b32_e32 v8, 0, v5, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v9, 0, v5, vcc
 ; GFX8-NEXT:    v_cndmask_b32_e32 v6, 0, v6, vcc
 ; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
 ; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
@@ -6376,33 +6377,34 @@ define amdgpu_ps <4 x float> @v_fshl_i128_ssv(i128 inreg %lhs, i128 inreg %rhs,
 ; GFX8-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v7
 ; GFX8-NEXT:    s_lshr_b64 s[0:1], s[4:5], 1
 ; GFX8-NEXT:    s_lshl_b32 s9, s6, 31
-; GFX8-NEXT:    v_and_b32_e32 v10, 0x7f, v0
+; GFX8-NEXT:    v_and_b32_e32 v11, 0x7f, v0
 ; GFX8-NEXT:    v_cndmask_b32_e32 v7, v1, v3, vcc
-; GFX8-NEXT:    v_cndmask_b32_e32 v9, v2, v4, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v10, v2, v4, vcc
 ; GFX8-NEXT:    s_or_b64 s[0:1], s[0:1], s[8:9]
 ; GFX8-NEXT:    s_lshr_b64 s[2:3], s[6:7], 1
-; GFX8-NEXT:    v_sub_u32_e32 v2, vcc, 64, v10
-; GFX8-NEXT:    v_lshrrev_b64 v[0:1], v10, s[0:1]
+; GFX8-NEXT:    v_sub_u32_e32 v2, vcc, 64, v11
+; GFX8-NEXT:    v_not_b32_e32 v8, 63
+; GFX8-NEXT:    v_lshrrev_b64 v[0:1], v11, s[0:1]
 ; GFX8-NEXT:    v_lshlrev_b64 v[2:3], v2, s[2:3]
-; GFX8-NEXT:    v_subrev_u32_e32 v11, vcc, 64, v10
+; GFX8-NEXT:    v_add_u32_e32 v8, vcc, v11, v8
 ; GFX8-NEXT:    v_or_b32_e32 v2, v0, v2
 ; GFX8-NEXT:    v_or_b32_e32 v3, v1, v3
-; GFX8-NEXT:    v_lshrrev_b64 v[0:1], v11, s[2:3]
-; GFX8-NEXT:    v_lshrrev_b64 v[4:5], v10, s[2:3]
-; GFX8-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v10
+; GFX8-NEXT:    v_lshrrev_b64 v[0:1], v8, s[2:3]
+; GFX8-NEXT:    v_lshrrev_b64 v[4:5], v11, s[2:3]
+; GFX8-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v11
 ; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
 ; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
 ; GFX8-NEXT:    v_mov_b32_e32 v2, s0
 ; GFX8-NEXT:    v_mov_b32_e32 v3, s1
-; GFX8-NEXT:    v_cmp_eq_u32_e64 s[0:1], 0, v10
+; GFX8-NEXT:    v_cmp_eq_u32_e64 s[0:1], 0, v11
 ; GFX8-NEXT:    v_cndmask_b32_e64 v0, v0, v2, s[0:1]
 ; GFX8-NEXT:    v_cndmask_b32_e64 v1, v1, v3, s[0:1]
 ; GFX8-NEXT:    v_cndmask_b32_e32 v2, 0, v4, vcc
 ; GFX8-NEXT:    v_cndmask_b32_e32 v3, 0, v5, vcc
-; GFX8-NEXT:    v_or_b32_e32 v0, v8, v0
+; GFX8-NEXT:    v_or_b32_e32 v0, v9, v0
 ; GFX8-NEXT:    v_or_b32_e32 v1, v6, v1
 ; GFX8-NEXT:    v_or_b32_e32 v2, v7, v2
-; GFX8-NEXT:    v_or_b32_e32 v3, v9, v3
+; GFX8-NEXT:    v_or_b32_e32 v3, v10, v3
 ; GFX8-NEXT:    ; return to shader part epilog
 ;
 ; GFX9-LABEL: v_fshl_i128_ssv:
@@ -6411,7 +6413,7 @@ define amdgpu_ps <4 x float> @v_fshl_i128_ssv(i128 inreg %lhs, i128 inreg %rhs,
 ; GFX9-NEXT:    v_sub_u32_e32 v1, 64, v7
 ; GFX9-NEXT:    v_lshrrev_b64 v[1:2], v1, s[0:1]
 ; GFX9-NEXT:    v_lshlrev_b64 v[3:4], v7, s[2:3]
-; GFX9-NEXT:    v_subrev_u32_e32 v8, 64, v7
+; GFX9-NEXT:    v_add_u32_e32 v8, 0xffffffc0, v7
 ; GFX9-NEXT:    v_lshlrev_b64 v[5:6], v7, s[0:1]
 ; GFX9-NEXT:    v_or_b32_e32 v3, v1, v3
 ; GFX9-NEXT:    v_or_b32_e32 v4, v2, v4
@@ -6436,7 +6438,7 @@ define amdgpu_ps <4 x float> @v_fshl_i128_ssv(i128 inreg %lhs, i128 inreg %rhs,
 ; GFX9-NEXT:    v_cndmask_b32_e32 v7, v1, v3, vcc
 ; GFX9-NEXT:    v_lshrrev_b64 v[0:1], v10, s[0:1]
 ; GFX9-NEXT:    v_lshlrev_b64 v[2:3], v2, s[2:3]
-; GFX9-NEXT:    v_subrev_u32_e32 v11, 64, v10
+; GFX9-NEXT:    v_add_u32_e32 v11, 0xffffffc0, v10
 ; GFX9-NEXT:    v_or_b32_e32 v2, v0, v2
 ; GFX9-NEXT:    v_or_b32_e32 v3, v1, v3
 ; GFX9-NEXT:    v_lshrrev_b64 v[0:1], v11, s[2:3]
@@ -6471,12 +6473,12 @@ define amdgpu_ps <4 x float> @v_fshl_i128_ssv(i128 inreg %lhs, i128 inreg %rhs,
 ; GFX10-NEXT:    s_lshr_b64 s[6:7], s[6:7], 1
 ; GFX10-NEXT:    v_lshrrev_b64 v[2:3], v3, s[0:1]
 ; GFX10-NEXT:    v_sub_nc_u32_e32 v8, 64, v13
-; GFX10-NEXT:    v_subrev_nc_u32_e32 v10, 64, v12
+; GFX10-NEXT:    v_add_nc_u32_e32 v10, 0xffffffc0, v12
 ; GFX10-NEXT:    v_lshrrev_b64 v[6:7], v13, s[8:9]
 ; GFX10-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 64, v12
 ; GFX10-NEXT:    v_lshlrev_b64 v[4:5], v12, s[0:1]
 ; GFX10-NEXT:    v_or_b32_e32 v2, v2, v0
-; GFX10-NEXT:    v_subrev_nc_u32_e32 v0, 64, v13
+; GFX10-NEXT:    v_add_nc_u32_e32 v0, 0xffffffc0, v13
 ; GFX10-NEXT:    v_lshlrev_b64 v[8:9], v8, s[6:7]
 ; GFX10-NEXT:    v_lshlrev_b64 v[10:11], v10, s[0:1]
 ; GFX10-NEXT:    v_or_b32_e32 v3, v3, v1
@@ -6522,7 +6524,7 @@ define amdgpu_ps <4 x float> @v_fshl_i128_ssv(i128 inreg %lhs, i128 inreg %rhs,
 ; GFX11-NEXT:    v_sub_nc_u32_e32 v3, 64, v12
 ; GFX11-NEXT:    v_lshlrev_b64 v[0:1], v12, s[2:3]
 ; GFX11-NEXT:    v_sub_nc_u32_e32 v8, 64, v13
-; GFX11-NEXT:    v_subrev_nc_u32_e32 v10, 64, v12
+; GFX11-NEXT:    v_add_nc_u32_e32 v10, 0xffffffc0, v12
 ; GFX11-NEXT:    v_lshrrev_b64 v[6:7], v13, s[8:9]
 ; GFX11-NEXT:    v_lshrrev_b64 v[2:3], v3, s[0:1]
 ; GFX11-NEXT:    v_cmp_eq_u32_e64 s4, 0, v12
@@ -6531,7 +6533,7 @@ define amdgpu_ps <4 x float> @v_fshl_i128_ssv(i128 inreg %lhs, i128 inreg %rhs,
 ; GFX11-NEXT:    v_cmp_gt_u32_e64 s0, 64, v13
 ; GFX11-NEXT:    v_cmp_eq_u32_e64 s1, 0, v13
 ; GFX11-NEXT:    v_or_b32_e32 v2, v2, v0
-; GFX11-NEXT:    v_subrev_nc_u32_e32 v0, 64, v13
+; GFX11-NEXT:    v_add_nc_u32_e32 v0, 0xffffffc0, v13
 ; GFX11-NEXT:    v_or_b32_e32 v3, v3, v1
 ; GFX11-NEXT:    v_or_b32_e32 v6, v6, v8
 ; GFX11-NEXT:    v_or_b32_e32 v7, v7, v9
@@ -7677,12 +7679,13 @@ define <2 x i128> @v_fshl_v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %a
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX6-NEXT:    v_and_b32_e32 v19, 0x7f, v16
+; GFX6-NEXT:    v_not_b32_e32 v25, 63
 ; GFX6-NEXT:    v_sub_i32_e32 v17, vcc, 64, v19
-; GFX6-NEXT:    v_subrev_i32_e32 v25, vcc, 64, v19
+; GFX6-NEXT:    v_add_i32_e32 v26, vcc, v19, v25
 ; GFX6-NEXT:    v_lshr_b64 v[17:18], v[0:1], v17
 ; GFX6-NEXT:    v_lshl_b64 v[21:22], v[2:3], v19
 ; GFX6-NEXT:    v_lshl_b64 v[23:24], v[0:1], v19
-; GFX6-NEXT:    v_lshl_b64 v[0:1], v[0:1], v25
+; GFX6-NEXT:    v_lshl_b64 v[0:1], v[0:1], v26
 ; GFX6-NEXT:    v_or_b32_e32 v17, v17, v21
 ; GFX6-NEXT:    v_or_b32_e32 v18, v18, v22
 ; GFX6-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v19
@@ -7700,7 +7703,7 @@ define <2 x i128> @v_fshl_v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %a
 ; GFX6-NEXT:    v_lshr_b64 v[2:3], v[10:11], 1
 ; GFX6-NEXT:    v_and_b32_e32 v23, 0x7f, v8
 ; GFX6-NEXT:    v_sub_i32_e32 v10, vcc, 64, v23
-; GFX6-NEXT:    v_subrev_i32_e32 v24, vcc, 64, v23
+; GFX6-NEXT:    v_add_i32_e32 v24, vcc, v23, v25
 ; GFX6-NEXT:    v_lshr_b64 v[8:9], v[0:1], v23
 ; GFX6-NEXT:    v_lshl_b64 v[10:11], v[2:3], v10
 ; GFX6-NEXT:    v_lshr_b64 v[16:17], v[2:3], v23
@@ -7719,7 +7722,7 @@ define <2 x i128> @v_fshl_v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %a
 ; GFX6-NEXT:    v_cndmask_b32_e32 v3, 0, v17, vcc
 ; GFX6-NEXT:    v_sub_i32_e32 v8, vcc, 64, v18
 ; GFX6-NEXT:    v_or_b32_e32 v3, v19, v3
-; GFX6-NEXT:    v_subrev_i32_e32 v19, vcc, 64, v18
+; GFX6-NEXT:    v_add_i32_e32 v19, vcc, v18, v25
 ; GFX6-NEXT:    v_lshr_b64 v[8:9], v[4:5], v8
 ; GFX6-NEXT:    v_lshl_b64 v[10:11], v[6:7], v18
 ; GFX6-NEXT:    v_lshl_b64 v[16:17], v[4:5], v18
@@ -7741,7 +7744,7 @@ define <2 x i128> @v_fshl_v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %a
 ; GFX6-NEXT:    v_lshr_b64 v[6:7], v[14:15], 1
 ; GFX6-NEXT:    v_and_b32_e32 v14, 0x7f, v8
 ; GFX6-NEXT:    v_sub_i32_e32 v10, vcc, 64, v14
-; GFX6-NEXT:    v_subrev_i32_e32 v15, vcc, 64, v14
+; GFX6-NEXT:    v_add_i32_e32 v15, vcc, v14, v25
 ; GFX6-NEXT:    v_lshr_b64 v[8:9], v[4:5], v14
 ; GFX6-NEXT:    v_lshl_b64 v[10:11], v[6:7], v10
 ; GFX6-NEXT:    v_lshr_b64 v[12:13], v[6:7], v14
@@ -7768,12 +7771,13 @@ define <2 x i128> @v_fshl_v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %a
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX8-NEXT:    v_and_b32_e32 v19, 0x7f, v16
+; GFX8-NEXT:    v_not_b32_e32 v25, 63
 ; GFX8-NEXT:    v_sub_u32_e32 v17, vcc, 64, v19
-; GFX8-NEXT:    v_subrev_u32_e32 v25, vcc, 64, v19
+; GFX8-NEXT:    v_add_u32_e32 v26, vcc, v19, v25
 ; GFX8-NEXT:    v_lshrrev_b64 v[17:18], v17, v[0:1]
 ; GFX8-NEXT:    v_lshlrev_b64 v[21:22], v19, v[2:3]
 ; GFX8-NEXT:    v_lshlrev_b64 v[23:24], v19, v[0:1]
-; GFX8-NEXT:    v_lshlrev_b64 v[0:1], v25, v[0:1]
+; GFX8-NEXT:    v_lshlrev_b64 v[0:1], v26, v[0:1]
 ; GFX8-NEXT:    v_or_b32_e32 v17, v17, v21
 ; GFX8-NEXT:    v_or_b32_e32 v18, v18, v22
 ; GFX8-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v19
@@ -7791,7 +7795,7 @@ define <2 x i128> @v_fshl_v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %a
 ; GFX8-NEXT:    v_lshrrev_b64 v[2:3], 1, v[10:11]
 ; GFX8-NEXT:    v_and_b32_e32 v23, 0x7f, v8
 ; GFX8-NEXT:    v_sub_u32_e32 v10, vcc, 64, v23
-; GFX8-NEXT:    v_subrev_u32_e32 v24, vcc, 64, v23
+; GFX8-NEXT:    v_add_u32_e32 v24, vcc, v23, v25
 ; GFX8-NEXT:    v_lshrrev_b64 v[8:9], v23, v[0:1]
 ; GFX8-NEXT:    v_lshlrev_b64 v[10:11], v10, v[2:3]
 ; GFX8-NEXT:    v_lshrrev_b64 v[16:17], v23, v[2:3]
@@ -7810,7 +7814,7 @@ define <2 x i128> @v_fshl_v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %a
 ; GFX8-NEXT:    v_cndmask_b32_e32 v3, 0, v17, vcc
 ; GFX8-NEXT:    v_sub_u32_e32 v8, vcc, 64, v18
 ; GFX8-NEXT:    v_or_b32_e32 v3, v19, v3
-; GFX8-NEXT:    v_subrev_u32_e32 v19, vcc, 64, v18
+; GFX8-NEXT:    v_add_u32_e32 v19, vcc, v18, v25
 ; GFX8-NEXT:    v_lshrrev_b64 v[8:9], v8, v[4:5]
 ; GFX8-NEXT:    v_lshlrev_b64 v[10:11], v18, v[6:7]
 ; GFX8-NEXT:    v_lshlrev_b64 v[16:17], v18, v[4:5]
@@ -7832,7 +7836,7 @@ define <2 x i128> @v_fshl_v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %a
 ; GFX8-NEXT:    v_lshrrev_b64 v[6:7], 1, v[14:15]
 ; GFX8-NEXT:    v_and_b32_e32 v14, 0x7f, v8
 ; GFX8-NEXT:    v_sub_u32_e32 v10, vcc, 64, v14
-; GFX8-NEXT:    v_subrev_u32_e32 v15, vcc, 64, v14
+; GFX8-NEXT:    v_add_u32_e32 v15, vcc, v14, v25
 ; GFX8-NEXT:    v_lshrrev_b64 v[8:9], v14, v[4:5]
 ; GFX8-NEXT:    v_lshlrev_b64 v[10:11], v10, v[6:7]
 ; GFX8-NEXT:    v_lshrrev_b64 v[12:13], v14, v[6:7]
@@ -7860,7 +7864,7 @@ define <2 x i128> @v_fshl_v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %a
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX9-NEXT:    v_and_b32_e32 v19, 0x7f, v16
 ; GFX9-NEXT:    v_sub_u32_e32 v17, 64, v19
-; GFX9-NEXT:    v_subrev_u32_e32 v25, 64, v19
+; GFX9-NEXT:    v_add_u32_e32 v25, 0xffffffc0, v19
 ; GFX9-NEXT:    v_lshrrev_b64 v[17:18], v17, v[0:1]
 ; GFX9-NEXT:    v_lshlrev_b64 v[21:22], v19, v[2:3]
 ; GFX9-NEXT:    v_lshlrev_b64 v[23:24], v19, v[0:1]
@@ -7881,7 +7885,7 @@ define <2 x i128> @v_fshl_v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %a
 ; GFX9-NEXT:    v_and_b32_e32 v23, 0x7f, v8
 ; GFX9-NEXT:    v_lshl_or_b32 v1, v10, 31, v1
 ; GFX9-NEXT:    v_sub_u32_e32 v10, 64, v23
-; GFX9-NEXT:    v_subrev_u32_e32 v24, 64, v23
+; GFX9-NEXT:    v_add_u32_e32 v24, 0xffffffc0, v23
 ; GFX9-NEXT:    v_lshrrev_b64 v[8:9], v23, v[0:1]
 ; GFX9-NEXT:    v_lshlrev_b64 v[10:11], v10, v[2:3]
 ; GFX9-NEXT:    v_lshrrev_b64 v[16:17], v23, v[2:3]
@@ -7900,7 +7904,7 @@ define <2 x i128> @v_fshl_v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %a
 ; GFX9-NEXT:    v_cndmask_b32_e32 v3, 0, v17, vcc
 ; GFX9-NEXT:    v_sub_u32_e32 v8, 64, v18
 ; GFX9-NEXT:    v_or_b32_e32 v3, v19, v3
-; GFX9-NEXT:    v_subrev_u32_e32 v19, 64, v18
+; GFX9-NEXT:    v_add_u32_e32 v19, 0xffffffc0, v18
 ; GFX9-NEXT:    v_lshrrev_b64 v[8:9], v8, v[4:5]
 ; GFX9-NEXT:    v_lshlrev_b64 v[10:11], v18, v[6:7]
 ; GFX9-NEXT:    v_lshlrev_b64 v[16:17], v18, v[4:5]
@@ -7921,7 +7925,7 @@ define <2 x i128> @v_fshl_v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %a
 ; GFX9-NEXT:    v_lshrrev_b64 v[6:7], 1, v[14:15]
 ; GFX9-NEXT:    v_and_b32_e32 v14, 0x7f, v8
 ; GFX9-NEXT:    v_sub_u32_e32 v10, 64, v14
-; GFX9-NEXT:    v_subrev_u32_e32 v15, 64, v14
+; GFX9-NEXT:    v_add_u32_e32 v15, 0xffffffc0, v14
 ; GFX9-NEXT:    v_lshrrev_b64 v[8:9], v14, v[4:5]
 ; GFX9-NEXT:    v_lshlrev_b64 v[10:11], v10, v[6:7]
 ; GFX9-NEXT:    v_lshrrev_b64 v[12:13], v14, v[6:7]
@@ -7956,13 +7960,13 @@ define <2 x i128> @v_fshl_v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %a
 ; GFX10-NEXT:    v_lshl_or_b32 v9, v10, 31, v9
 ; GFX10-NEXT:    v_lshrrev_b64 v[10:11], 1, v[10:11]
 ; GFX10-NEXT:    v_lshrrev_b64 v[16:17], v17, v[0:1]
-; GFX10-NEXT:    v_subrev_nc_u32_e32 v29, 64, v27
+; GFX10-NEXT:    v_add_nc_u32_e32 v29, 0xffffffc0, v27
 ; GFX10-NEXT:    v_sub_nc_u32_e32 v25, 64, v28
 ; GFX10-NEXT:    v_lshlrev_b64 v[21:22], v27, v[0:1]
 ; GFX10-NEXT:    v_lshrrev_b64 v[23:24], v28, v[8:9]
 ; GFX10-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 64, v27
 ; GFX10-NEXT:    v_or_b32_e32 v18, v16, v18
-; GFX10-NEXT:    v_subrev_nc_u32_e32 v16, 64, v28
+; GFX10-NEXT:    v_add_nc_u32_e32 v16, 0xffffffc0, v28
 ; GFX10-NEXT:    v_lshlrev_b64 v[25:26], v25, v[10:11]
 ; GFX10-NEXT:    v_lshlrev_b64 v[0:1], v29, v[0:1]
 ; GFX10-NEXT:    v_or_b32_e32 v19, v17, v19
@@ -7999,10 +8003,10 @@ define <2 x i128> @v_fshl_v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %a
 ; GFX10-NEXT:    v_lshl_or_b32 v9, v14, 31, v9
 ; GFX10-NEXT:    v_lshrrev_b64 v[14:15], 1, v[14:15]
 ; GFX10-NEXT:    v_sub_nc_u32_e32 v20, 64, v22
-; GFX10-NEXT:    v_subrev_nc_u32_e32 v3, 64, v24
+; GFX10-NEXT:    v_add_nc_u32_e32 v3, 0xffffffc0, v24
 ; GFX10-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 64, v24
 ; GFX10-NEXT:    v_or_b32_e32 v12, v10, v12
-; GFX10-NEXT:    v_subrev_nc_u32_e32 v10, 64, v22
+; GFX10-NEXT:    v_add_nc_u32_e32 v10, 0xffffffc0, v22
 ; GFX10-NEXT:    v_lshrrev_b64 v[18:19], v22, v[8:9]
 ; GFX10-NEXT:    v_lshlrev_b64 v[20:21], v20, v[14:15]
 ; GFX10-NEXT:    v_lshlrev_b64 v[3:4], v3, v[4:5]
@@ -8049,19 +8053,19 @@ define <2 x i128> @v_fshl_v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %a
 ; GFX11-NEXT:    v_cndmask_b32_e32 v22, 0, v22, vcc_lo
 ; GFX11-NEXT:    v_sub_nc_u32_e32 v17, 64, v27
 ; GFX11-NEXT:    v_lshlrev_b64 v[18:19], v27, v[2:3]
-; GFX11-NEXT:    v_subrev_nc_u32_e32 v29, 64, v27
 ; GFX11-NEXT:    v_cmp_eq_u32_e64 s0, 0, v27
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11-NEXT:    v_cndmask_b32_e32 v21, 0, v21, vcc_lo
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX11-NEXT:    v_lshrrev_b64 v[16:17], v17, v[0:1]
-; GFX11-NEXT:    v_lshlrev_b64 v[0:1], v29, v[0:1]
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NEXT:    v_or_b32_e32 v19, v17, v19
 ; GFX11-NEXT:    v_or_b32_e32 v18, v16, v18
-; GFX11-NEXT:    v_cndmask_b32_e32 v21, 0, v21, vcc_lo
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2)
-; GFX11-NEXT:    v_dual_cndmask_b32 v19, v1, v19 :: v_dual_cndmask_b32 v18, v0, v18
+; GFX11-NEXT:    v_add_nc_u32_e32 v29, 0xffffffc0, v27
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NEXT:    v_or_b32_e32 v19, v17, v19
+; GFX11-NEXT:    v_lshlrev_b64 v[0:1], v29, v[0:1]
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT:    v_dual_cndmask_b32 v18, v0, v18 :: v_dual_cndmask_b32 v19, v1, v19
 ; GFX11-NEXT:    v_sub_nc_u32_e32 v25, 64, v28
-; GFX11-NEXT:    v_subrev_nc_u32_e32 v16, 64, v28
+; GFX11-NEXT:    v_add_nc_u32_e32 v16, 0xffffffc0, v28
 ; GFX11-NEXT:    v_lshrrev_b64 v[23:24], v28, v[8:9]
 ; GFX11-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 64, v28
 ; GFX11-NEXT:    v_lshrrev_b64 v[0:1], v28, v[10:11]
@@ -8095,26 +8099,26 @@ define <2 x i128> @v_fshl_v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %a
 ; GFX11-NEXT:    v_lshl_or_b32 v9, v14, 31, v9
 ; GFX11-NEXT:    v_lshrrev_b64 v[14:15], 1, v[14:15]
 ; GFX11-NEXT:    v_sub_nc_u32_e32 v20, 64, v22
-; GFX11-NEXT:    v_subrev_nc_u32_e32 v3, 64, v24
+; GFX11-NEXT:    v_add_nc_u32_e32 v3, 0xffffffc0, v24
 ; GFX11-NEXT:    v_cmp_gt_u32_e64 s0, 64, v22
 ; GFX11-NEXT:    v_or_b32_e32 v12, v10, v12
-; GFX11-NEXT:    v_subrev_nc_u32_e32 v10, 64, v22
+; GFX11-NEXT:    v_add_nc_u32_e32 v10, 0xffffffc0, v22
 ; GFX11-NEXT:    v_lshrrev_b64 v[18:19], v22, v[8:9]
 ; GFX11-NEXT:    v_lshlrev_b64 v[20:21], v20, v[14:15]
 ; GFX11-NEXT:    v_lshlrev_b64 v[3:4], v3, v[4:5]
 ; GFX11-NEXT:    v_or_b32_e32 v5, v11, v13
-; GFX11-NEXT:    v_lshrrev_b64 v[10:11], v10, v[14:15]
 ; GFX11-NEXT:    v_cndmask_b32_e32 v13, 0, v16, vcc_lo
+; GFX11-NEXT:    v_lshrrev_b64 v[10:11], v10, v[14:15]
 ; GFX11-NEXT:    v_cmp_eq_u32_e64 s1, 0, v22
 ; GFX11-NEXT:    v_or_b32_e32 v16, v18, v20
 ; GFX11-NEXT:    v_or_b32_e32 v18, v19, v21
 ; GFX11-NEXT:    v_dual_cndmask_b32 v12, v3, v12 :: v_dual_cndmask_b32 v5, v4, v5
 ; GFX11-NEXT:    v_lshrrev_b64 v[3:4], v22, v[14:15]
-; GFX11-NEXT:    v_cndmask_b32_e32 v14, 0, v17, vcc_lo
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_3) | instid1(VALU_DEP_3)
 ; GFX11-NEXT:    v_cndmask_b32_e64 v10, v10, v16, s0
 ; GFX11-NEXT:    v_cmp_eq_u32_e64 s2, 0, v24
 ; GFX11-NEXT:    v_cndmask_b32_e64 v11, v11, v18, s0
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_4)
+; GFX11-NEXT:    v_cndmask_b32_e32 v14, 0, v17, vcc_lo
 ; GFX11-NEXT:    v_cndmask_b32_e64 v6, v12, v6, s2
 ; GFX11-NEXT:    v_cndmask_b32_e64 v7, v5, v7, s2
 ; GFX11-NEXT:    v_cndmask_b32_e64 v5, v10, v8, s1
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fshr.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fshr.ll
index 58304d2072d7f6..dbc8f12c2c25c4 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fshr.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fshr.ll
@@ -21,10 +21,10 @@ define amdgpu_ps i7 @s_fshr_i7(i7 inreg %lhs, i7 inreg %rhs, i7 inreg %amt) {
 ; GFX6-NEXT:    v_mul_hi_u32 v0, s2, v0
 ; GFX6-NEXT:    v_mul_lo_u32 v0, v0, 7
 ; GFX6-NEXT:    v_sub_i32_e32 v0, vcc, s2, v0
-; GFX6-NEXT:    v_subrev_i32_e32 v1, vcc, 7, v0
+; GFX6-NEXT:    v_add_i32_e32 v1, vcc, -7, v0
 ; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, 7, v0
 ; GFX6-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
-; GFX6-NEXT:    v_subrev_i32_e32 v1, vcc, 7, v0
+; GFX6-NEXT:    v_add_i32_e32 v1, vcc, -7, v0
 ; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, 7, v0
 ; GFX6-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
 ; GFX6-NEXT:    v_sub_i32_e32 v1, vcc, 6, v0
@@ -51,10 +51,10 @@ define amdgpu_ps i7 @s_fshr_i7(i7 inreg %lhs, i7 inreg %rhs, i7 inreg %amt) {
 ; GFX8-NEXT:    v_mul_hi_u32 v0, s2, v0
 ; GFX8-NEXT:    v_mul_lo_u32 v0, v0, 7
 ; GFX8-NEXT:    v_sub_u32_e32 v0, vcc, s2, v0
-; GFX8-NEXT:    v_subrev_u32_e32 v1, vcc, 7, v0
+; GFX8-NEXT:    v_add_u32_e32 v1, vcc, -7, v0
 ; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, 7, v0
 ; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
-; GFX8-NEXT:    v_subrev_u32_e32 v1, vcc, 7, v0
+; GFX8-NEXT:    v_add_u32_e32 v1, vcc, -7, v0
 ; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, 7, v0
 ; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
 ; GFX8-NEXT:    v_sub_u16_e32 v1, 6, v0
@@ -81,10 +81,10 @@ define amdgpu_ps i7 @s_fshr_i7(i7 inreg %lhs, i7 inreg %rhs, i7 inreg %amt) {
 ; GFX9-NEXT:    v_mul_hi_u32 v0, s2, v0
 ; GFX9-NEXT:    v_mul_lo_u32 v0, v0, 7
 ; GFX9-NEXT:    v_sub_u32_e32 v0, s2, v0
-; GFX9-NEXT:    v_subrev_u32_e32 v1, 7, v0
+; GFX9-NEXT:    v_add_u32_e32 v1, -7, v0
 ; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, 7, v0
 ; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
-; GFX9-NEXT:    v_subrev_u32_e32 v1, 7, v0
+; GFX9-NEXT:    v_add_u32_e32 v1, -7, v0
 ; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, 7, v0
 ; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
 ; GFX9-NEXT:    v_sub_u16_e32 v1, 6, v0
@@ -111,10 +111,10 @@ define amdgpu_ps i7 @s_fshr_i7(i7 inreg %lhs, i7 inreg %rhs, i7 inreg %amt) {
 ; GFX10-NEXT:    v_mul_hi_u32 v0, s2, v0
 ; GFX10-NEXT:    v_mul_lo_u32 v0, v0, 7
 ; GFX10-NEXT:    v_sub_nc_u32_e32 v0, s2, v0
-; GFX10-NEXT:    v_subrev_nc_u32_e32 v1, 7, v0
+; GFX10-NEXT:    v_add_nc_u32_e32 v1, -7, v0
 ; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, 7, v0
 ; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
-; GFX10-NEXT:    v_subrev_nc_u32_e32 v1, 7, v0
+; GFX10-NEXT:    v_add_nc_u32_e32 v1, -7, v0
 ; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, 7, v0
 ; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
 ; GFX10-NEXT:    v_sub_nc_u16 v1, 6, v0
@@ -147,11 +147,11 @@ define amdgpu_ps i7 @s_fshr_i7(i7 inreg %lhs, i7 inreg %rhs, i7 inreg %amt) {
 ; GFX11-NEXT:    v_mul_lo_u32 v0, v0, 7
 ; GFX11-NEXT:    v_sub_nc_u32_e32 v0, s2, v0
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-NEXT:    v_subrev_nc_u32_e32 v1, 7, v0
+; GFX11-NEXT:    v_add_nc_u32_e32 v1, -7, v0
 ; GFX11-NEXT:    v_cmp_le_u32_e32 vcc_lo, 7, v0
 ; GFX11-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-NEXT:    v_subrev_nc_u32_e32 v1, 7, v0
+; GFX11-NEXT:    v_add_nc_u32_e32 v1, -7, v0
 ; GFX11-NEXT:    v_cmp_le_u32_e32 vcc_lo, 7, v0
 ; GFX11-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
@@ -186,10 +186,10 @@ define i7 @v_fshr_i7(i7 %lhs, i7 %rhs, i7 %amt) {
 ; GFX6-NEXT:    v_mul_hi_u32 v3, v2, v3
 ; GFX6-NEXT:    v_mul_lo_u32 v3, v3, 7
 ; GFX6-NEXT:    v_sub_i32_e32 v2, vcc, v2, v3
-; GFX6-NEXT:    v_subrev_i32_e32 v3, vcc, 7, v2
+; GFX6-NEXT:    v_add_i32_e32 v3, vcc, -7, v2
 ; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, 7, v2
 ; GFX6-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
-; GFX6-NEXT:    v_subrev_i32_e32 v3, vcc, 7, v2
+; GFX6-NEXT:    v_add_i32_e32 v3, vcc, -7, v2
 ; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, 7, v2
 ; GFX6-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
 ; GFX6-NEXT:    v_sub_i32_e32 v3, vcc, 6, v2
@@ -216,10 +216,10 @@ define i7 @v_fshr_i7(i7 %lhs, i7 %rhs, i7 %amt) {
 ; GFX8-NEXT:    v_mul_hi_u32 v3, v2, v3
 ; GFX8-NEXT:    v_mul_lo_u32 v3, v3, 7
 ; GFX8-NEXT:    v_sub_u32_e32 v2, vcc, v2, v3
-; GFX8-NEXT:    v_subrev_u32_e32 v3, vcc, 7, v2
+; GFX8-NEXT:    v_add_u32_e32 v3, vcc, -7, v2
 ; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, 7, v2
 ; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
-; GFX8-NEXT:    v_subrev_u32_e32 v3, vcc, 7, v2
+; GFX8-NEXT:    v_add_u32_e32 v3, vcc, -7, v2
 ; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, 7, v2
 ; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
 ; GFX8-NEXT:    v_sub_u16_e32 v3, 6, v2
@@ -246,10 +246,10 @@ define i7 @v_fshr_i7(i7 %lhs, i7 %rhs, i7 %amt) {
 ; GFX9-NEXT:    v_mul_hi_u32 v3, v2, v3
 ; GFX9-NEXT:    v_mul_lo_u32 v3, v3, 7
 ; GFX9-NEXT:    v_sub_u32_e32 v2, v2, v3
-; GFX9-NEXT:    v_subrev_u32_e32 v3, 7, v2
+; GFX9-NEXT:    v_add_u32_e32 v3, -7, v2
 ; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, 7, v2
 ; GFX9-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
-; GFX9-NEXT:    v_subrev_u32_e32 v3, 7, v2
+; GFX9-NEXT:    v_add_u32_e32 v3, -7, v2
 ; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, 7, v2
 ; GFX9-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
 ; GFX9-NEXT:    v_sub_u16_e32 v3, 6, v2
@@ -276,10 +276,10 @@ define i7 @v_fshr_i7(i7 %lhs, i7 %rhs, i7 %amt) {
 ; GFX10-NEXT:    v_mul_hi_u32 v3, v2, v3
 ; GFX10-NEXT:    v_mul_lo_u32 v3, v3, 7
 ; GFX10-NEXT:    v_sub_nc_u32_e32 v2, v2, v3
-; GFX10-NEXT:    v_subrev_nc_u32_e32 v3, 7, v2
+; GFX10-NEXT:    v_add_nc_u32_e32 v3, -7, v2
 ; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, 7, v2
 ; GFX10-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc_lo
-; GFX10-NEXT:    v_subrev_nc_u32_e32 v3, 7, v2
+; GFX10-NEXT:    v_add_nc_u32_e32 v3, -7, v2
 ; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, 7, v2
 ; GFX10-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc_lo
 ; GFX10-NEXT:    v_sub_nc_u16 v3, 6, v2
@@ -312,11 +312,11 @@ define i7 @v_fshr_i7(i7 %lhs, i7 %rhs, i7 %amt) {
 ; GFX11-NEXT:    v_mul_lo_u32 v3, v3, 7
 ; GFX11-NEXT:    v_sub_nc_u32_e32 v2, v2, v3
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-NEXT:    v_subrev_nc_u32_e32 v3, 7, v2
+; GFX11-NEXT:    v_add_nc_u32_e32 v3, -7, v2
 ; GFX11-NEXT:    v_cmp_le_u32_e32 vcc_lo, 7, v2
 ; GFX11-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc_lo
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-NEXT:    v_subrev_nc_u32_e32 v3, 7, v2
+; GFX11-NEXT:    v_add_nc_u32_e32 v3, -7, v2
 ; GFX11-NEXT:    v_cmp_le_u32_e32 vcc_lo, 7, v2
 ; GFX11-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc_lo
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
@@ -1552,16 +1552,16 @@ define amdgpu_ps i24 @s_fshr_i24(i24 inreg %lhs, i24 inreg %rhs, i24 inreg %amt)
 ; GFX6-NEXT:    v_mul_f32_e32 v0, 0x4f7ffffe, v0
 ; GFX6-NEXT:    v_cvt_u32_f32_e32 v0, v0
 ; GFX6-NEXT:    s_and_b32 s1, s1, 0xffffff
-; GFX6-NEXT:    v_mul_lo_u32 v1, v0, v1
-; GFX6-NEXT:    v_mul_hi_u32 v1, v0, v1
-; GFX6-NEXT:    v_add_i32_e32 v0, vcc, v0, v1
+; GFX6-NEXT:    v_mul_lo_u32 v2, v0, v1
+; GFX6-NEXT:    v_mul_hi_u32 v2, v0, v2
+; GFX6-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
 ; GFX6-NEXT:    v_mul_hi_u32 v0, s2, v0
 ; GFX6-NEXT:    v_mul_lo_u32 v0, v0, 24
 ; GFX6-NEXT:    v_sub_i32_e32 v0, vcc, s2, v0
-; GFX6-NEXT:    v_subrev_i32_e32 v1, vcc, 24, v0
+; GFX6-NEXT:    v_add_i32_e32 v2, vcc, v0, v1
 ; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, 24, v0
-; GFX6-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
-; GFX6-NEXT:    v_subrev_i32_e32 v1, vcc, 24, v0
+; GFX6-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX6-NEXT:    v_add_i32_e32 v1, vcc, v0, v1
 ; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, 24, v0
 ; GFX6-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
 ; GFX6-NEXT:    v_sub_i32_e32 v1, vcc, 23, v0
@@ -1583,16 +1583,16 @@ define amdgpu_ps i24 @s_fshr_i24(i24 inreg %lhs, i24 inreg %rhs, i24 inreg %amt)
 ; GFX8-NEXT:    v_mul_f32_e32 v0, 0x4f7ffffe, v0
 ; GFX8-NEXT:    v_cvt_u32_f32_e32 v0, v0
 ; GFX8-NEXT:    s_and_b32 s1, s1, 0xffffff
-; GFX8-NEXT:    v_mul_lo_u32 v1, v0, v1
-; GFX8-NEXT:    v_mul_hi_u32 v1, v0, v1
-; GFX8-NEXT:    v_add_u32_e32 v0, vcc, v0, v1
+; GFX8-NEXT:    v_mul_lo_u32 v2, v0, v1
+; GFX8-NEXT:    v_mul_hi_u32 v2, v0, v2
+; GFX8-NEXT:    v_add_u32_e32 v0, vcc, v0, v2
 ; GFX8-NEXT:    v_mul_hi_u32 v0, s2, v0
 ; GFX8-NEXT:    v_mul_lo_u32 v0, v0, 24
 ; GFX8-NEXT:    v_sub_u32_e32 v0, vcc, s2, v0
-; GFX8-NEXT:    v_subrev_u32_e32 v1, vcc, 24, v0
+; GFX8-NEXT:    v_add_u32_e32 v2, vcc, v0, v1
 ; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, 24, v0
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
-; GFX8-NEXT:    v_subrev_u32_e32 v1, vcc, 24, v0
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX8-NEXT:    v_add_u32_e32 v1, vcc, v0, v1
 ; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, 24, v0
 ; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
 ; GFX8-NEXT:    v_sub_u32_e32 v1, vcc, 23, v0
@@ -1620,10 +1620,10 @@ define amdgpu_ps i24 @s_fshr_i24(i24 inreg %lhs, i24 inreg %rhs, i24 inreg %amt)
 ; GFX9-NEXT:    v_mul_hi_u32 v0, s2, v0
 ; GFX9-NEXT:    v_mul_lo_u32 v0, v0, 24
 ; GFX9-NEXT:    v_sub_u32_e32 v0, s2, v0
-; GFX9-NEXT:    v_subrev_u32_e32 v1, 24, v0
+; GFX9-NEXT:    v_add_u32_e32 v1, 0xffffffe8, v0
 ; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, 24, v0
 ; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
-; GFX9-NEXT:    v_subrev_u32_e32 v1, 24, v0
+; GFX9-NEXT:    v_add_u32_e32 v1, 0xffffffe8, v0
 ; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, 24, v0
 ; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
 ; GFX9-NEXT:    v_sub_u32_e32 v1, 23, v0
@@ -1649,10 +1649,10 @@ define amdgpu_ps i24 @s_fshr_i24(i24 inreg %lhs, i24 inreg %rhs, i24 inreg %amt)
 ; GFX10-NEXT:    v_mul_hi_u32 v0, s2, v0
 ; GFX10-NEXT:    v_mul_lo_u32 v0, v0, 24
 ; GFX10-NEXT:    v_sub_nc_u32_e32 v0, s2, v0
-; GFX10-NEXT:    v_subrev_nc_u32_e32 v1, 24, v0
+; GFX10-NEXT:    v_add_nc_u32_e32 v1, 0xffffffe8, v0
 ; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v0
 ; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
-; GFX10-NEXT:    v_subrev_nc_u32_e32 v1, 24, v0
+; GFX10-NEXT:    v_add_nc_u32_e32 v1, 0xffffffe8, v0
 ; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v0
 ; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
 ; GFX10-NEXT:    v_sub_nc_u32_e32 v1, 23, v0
@@ -1684,11 +1684,11 @@ define amdgpu_ps i24 @s_fshr_i24(i24 inreg %lhs, i24 inreg %rhs, i24 inreg %amt)
 ; GFX11-NEXT:    v_mul_lo_u32 v0, v0, 24
 ; GFX11-NEXT:    v_sub_nc_u32_e32 v0, s2, v0
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-NEXT:    v_subrev_nc_u32_e32 v1, 24, v0
+; GFX11-NEXT:    v_add_nc_u32_e32 v1, 0xffffffe8, v0
 ; GFX11-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v0
 ; GFX11-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-NEXT:    v_subrev_nc_u32_e32 v1, 24, v0
+; GFX11-NEXT:    v_add_nc_u32_e32 v1, 0xffffffe8, v0
 ; GFX11-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v0
 ; GFX11-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
@@ -1717,16 +1717,16 @@ define i24 @v_fshr_i24(i24 %lhs, i24 %rhs, i24 %amt) {
 ; GFX6-NEXT:    v_mul_f32_e32 v3, 0x4f7ffffe, v3
 ; GFX6-NEXT:    v_cvt_u32_f32_e32 v3, v3
 ; GFX6-NEXT:    v_and_b32_e32 v1, 0xffffff, v1
-; GFX6-NEXT:    v_mul_lo_u32 v4, v3, v4
-; GFX6-NEXT:    v_mul_hi_u32 v4, v3, v4
-; GFX6-NEXT:    v_add_i32_e32 v3, vcc, v3, v4
+; GFX6-NEXT:    v_mul_lo_u32 v5, v3, v4
+; GFX6-NEXT:    v_mul_hi_u32 v5, v3, v5
+; GFX6-NEXT:    v_add_i32_e32 v3, vcc, v3, v5
 ; GFX6-NEXT:    v_mul_hi_u32 v3, v2, v3
 ; GFX6-NEXT:    v_mul_lo_u32 v3, v3, 24
 ; GFX6-NEXT:    v_sub_i32_e32 v2, vcc, v2, v3
-; GFX6-NEXT:    v_subrev_i32_e32 v3, vcc, 24, v2
+; GFX6-NEXT:    v_add_i32_e32 v3, vcc, v2, v4
 ; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, 24, v2
 ; GFX6-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
-; GFX6-NEXT:    v_subrev_i32_e32 v3, vcc, 24, v2
+; GFX6-NEXT:    v_add_i32_e32 v3, vcc, 0xffffffe8, v2
 ; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, 24, v2
 ; GFX6-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
 ; GFX6-NEXT:    v_sub_i32_e32 v3, vcc, 23, v2
@@ -1748,16 +1748,16 @@ define i24 @v_fshr_i24(i24 %lhs, i24 %rhs, i24 %amt) {
 ; GFX8-NEXT:    v_mul_f32_e32 v3, 0x4f7ffffe, v3
 ; GFX8-NEXT:    v_cvt_u32_f32_e32 v3, v3
 ; GFX8-NEXT:    v_and_b32_e32 v1, 0xffffff, v1
-; GFX8-NEXT:    v_mul_lo_u32 v4, v3, v4
-; GFX8-NEXT:    v_mul_hi_u32 v4, v3, v4
-; GFX8-NEXT:    v_add_u32_e32 v3, vcc, v3, v4
+; GFX8-NEXT:    v_mul_lo_u32 v5, v3, v4
+; GFX8-NEXT:    v_mul_hi_u32 v5, v3, v5
+; GFX8-NEXT:    v_add_u32_e32 v3, vcc, v3, v5
 ; GFX8-NEXT:    v_mul_hi_u32 v3, v2, v3
 ; GFX8-NEXT:    v_mul_lo_u32 v3, v3, 24
 ; GFX8-NEXT:    v_sub_u32_e32 v2, vcc, v2, v3
-; GFX8-NEXT:    v_subrev_u32_e32 v3, vcc, 24, v2
+; GFX8-NEXT:    v_add_u32_e32 v3, vcc, v2, v4
 ; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, 24, v2
 ; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
-; GFX8-NEXT:    v_subrev_u32_e32 v3, vcc, 24, v2
+; GFX8-NEXT:    v_add_u32_e32 v3, vcc, 0xffffffe8, v2
 ; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, 24, v2
 ; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
 ; GFX8-NEXT:    v_sub_u32_e32 v3, vcc, 23, v2
@@ -1785,10 +1785,10 @@ define i24 @v_fshr_i24(i24 %lhs, i24 %rhs, i24 %amt) {
 ; GFX9-NEXT:    v_mul_hi_u32 v3, v2, v3
 ; GFX9-NEXT:    v_mul_lo_u32 v3, v3, 24
 ; GFX9-NEXT:    v_sub_u32_e32 v2, v2, v3
-; GFX9-NEXT:    v_subrev_u32_e32 v3, 24, v2
+; GFX9-NEXT:    v_add_u32_e32 v3, 0xffffffe8, v2
 ; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, 24, v2
 ; GFX9-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
-; GFX9-NEXT:    v_subrev_u32_e32 v3, 24, v2
+; GFX9-NEXT:    v_add_u32_e32 v3, 0xffffffe8, v2
 ; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, 24, v2
 ; GFX9-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
 ; GFX9-NEXT:    v_sub_u32_e32 v3, 23, v2
@@ -1814,10 +1814,10 @@ define i24 @v_fshr_i24(i24 %lhs, i24 %rhs, i24 %amt) {
 ; GFX10-NEXT:    v_mul_hi_u32 v3, v2, v3
 ; GFX10-NEXT:    v_mul_lo_u32 v3, v3, 24
 ; GFX10-NEXT:    v_sub_nc_u32_e32 v2, v2, v3
-; GFX10-NEXT:    v_subrev_nc_u32_e32 v3, 24, v2
+; GFX10-NEXT:    v_add_nc_u32_e32 v3, 0xffffffe8, v2
 ; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v2
 ; GFX10-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc_lo
-; GFX10-NEXT:    v_subrev_nc_u32_e32 v3, 24, v2
+; GFX10-NEXT:    v_add_nc_u32_e32 v3, 0xffffffe8, v2
 ; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v2
 ; GFX10-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc_lo
 ; GFX10-NEXT:    v_sub_nc_u32_e32 v3, 23, v2
@@ -1849,11 +1849,11 @@ define i24 @v_fshr_i24(i24 %lhs, i24 %rhs, i24 %amt) {
 ; GFX11-NEXT:    v_mul_lo_u32 v3, v3, 24
 ; GFX11-NEXT:    v_sub_nc_u32_e32 v2, v2, v3
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-NEXT:    v_subrev_nc_u32_e32 v3, 24, v2
+; GFX11-NEXT:    v_add_nc_u32_e32 v3, 0xffffffe8, v2
 ; GFX11-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v2
 ; GFX11-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc_lo
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-NEXT:    v_subrev_nc_u32_e32 v3, 24, v2
+; GFX11-NEXT:    v_add_nc_u32_e32 v3, 0xffffffe8, v2
 ; GFX11-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v2
 ; GFX11-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc_lo
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
@@ -1888,7 +1888,7 @@ define amdgpu_ps i48 @s_fshr_v2i24(i48 inreg %lhs.arg, i48 inreg %rhs.arg, i48 i
 ; GFX6-NEXT:    s_lshr_b32 s1, s2, 16
 ; GFX6-NEXT:    s_lshr_b32 s7, s3, 8
 ; GFX6-NEXT:    s_bfe_u32 s10, s2, 0x80008
-; GFX6-NEXT:    v_mul_lo_u32 v3, v2, v3
+; GFX6-NEXT:    v_mul_lo_u32 v4, v2, v3
 ; GFX6-NEXT:    s_or_b32 s8, s8, s9
 ; GFX6-NEXT:    s_and_b32 s9, s2, 0xff
 ; GFX6-NEXT:    s_lshl_b32 s10, s10, 8
@@ -1908,7 +1908,7 @@ define amdgpu_ps i48 @s_fshr_v2i24(i48 inreg %lhs.arg, i48 inreg %rhs.arg, i48 i
 ; GFX6-NEXT:    v_or_b32_e32 v1, s2, v1
 ; GFX6-NEXT:    s_lshr_b32 s2, s4, 16
 ; GFX6-NEXT:    s_bfe_u32 s9, s4, 0x80008
-; GFX6-NEXT:    v_mul_hi_u32 v3, v2, v3
+; GFX6-NEXT:    v_mul_hi_u32 v4, v2, v4
 ; GFX6-NEXT:    s_and_b32 s7, s4, 0xff
 ; GFX6-NEXT:    s_lshl_b32 s9, s9, 8
 ; GFX6-NEXT:    s_and_b32 s2, s2, 0xff
@@ -1917,62 +1917,62 @@ define amdgpu_ps i48 @s_fshr_v2i24(i48 inreg %lhs.arg, i48 inreg %rhs.arg, i48 i
 ; GFX6-NEXT:    s_and_b32 s7, 0xffff, s7
 ; GFX6-NEXT:    s_lshl_b32 s2, s2, 16
 ; GFX6-NEXT:    s_or_b32 s2, s7, s2
-; GFX6-NEXT:    v_add_i32_e32 v2, vcc, v2, v3
-; GFX6-NEXT:    v_mul_hi_u32 v3, s2, v2
+; GFX6-NEXT:    v_add_i32_e32 v2, vcc, v2, v4
+; GFX6-NEXT:    v_mul_hi_u32 v4, s2, v2
 ; GFX6-NEXT:    s_lshr_b32 s3, s5, 8
 ; GFX6-NEXT:    s_and_b32 s5, s5, 0xff
-; GFX6-NEXT:    v_mov_b32_e32 v4, s4
+; GFX6-NEXT:    v_mov_b32_e32 v5, s4
 ; GFX6-NEXT:    s_and_b32 s3, s3, 0xff
-; GFX6-NEXT:    v_alignbit_b32 v4, s5, v4, 24
+; GFX6-NEXT:    v_alignbit_b32 v5, s5, v5, 24
 ; GFX6-NEXT:    s_and_b32 s3, 0xffff, s3
-; GFX6-NEXT:    v_and_b32_e32 v4, 0xffff, v4
-; GFX6-NEXT:    v_mul_lo_u32 v3, v3, 24
+; GFX6-NEXT:    v_and_b32_e32 v5, 0xffff, v5
+; GFX6-NEXT:    v_mul_lo_u32 v4, v4, 24
 ; GFX6-NEXT:    s_lshl_b32 s3, s3, 16
-; GFX6-NEXT:    v_or_b32_e32 v4, s3, v4
-; GFX6-NEXT:    v_mul_hi_u32 v2, v4, v2
-; GFX6-NEXT:    v_sub_i32_e32 v3, vcc, s2, v3
-; GFX6-NEXT:    v_subrev_i32_e32 v5, vcc, 24, v3
-; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, 24, v3
+; GFX6-NEXT:    v_or_b32_e32 v5, s3, v5
+; GFX6-NEXT:    v_mul_hi_u32 v2, v5, v2
+; GFX6-NEXT:    v_sub_i32_e32 v4, vcc, s2, v4
+; GFX6-NEXT:    v_add_i32_e32 v6, vcc, v4, v3
+; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, 24, v4
 ; GFX6-NEXT:    v_mul_lo_u32 v2, v2, 24
-; GFX6-NEXT:    v_cndmask_b32_e32 v3, v3, v5, vcc
-; GFX6-NEXT:    v_subrev_i32_e32 v5, vcc, 24, v3
-; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, 24, v3
-; GFX6-NEXT:    v_cndmask_b32_e32 v3, v3, v5, vcc
-; GFX6-NEXT:    v_sub_i32_e32 v2, vcc, v4, v2
-; GFX6-NEXT:    v_sub_i32_e32 v5, vcc, 23, v3
-; GFX6-NEXT:    v_subrev_i32_e32 v4, vcc, 24, v2
+; GFX6-NEXT:    v_cndmask_b32_e32 v4, v4, v6, vcc
+; GFX6-NEXT:    v_add_i32_e32 v6, vcc, v4, v3
+; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, 24, v4
+; GFX6-NEXT:    v_cndmask_b32_e32 v4, v4, v6, vcc
+; GFX6-NEXT:    v_sub_i32_e32 v2, vcc, v5, v2
+; GFX6-NEXT:    v_sub_i32_e32 v6, vcc, 23, v4
+; GFX6-NEXT:    v_add_i32_e32 v5, vcc, v2, v3
 ; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, 24, v2
 ; GFX6-NEXT:    s_and_b32 s6, s6, 0xff
-; GFX6-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
+; GFX6-NEXT:    v_cndmask_b32_e32 v2, v2, v5, vcc
 ; GFX6-NEXT:    s_and_b32 s8, 0xffff, s8
 ; GFX6-NEXT:    s_and_b32 s6, 0xffff, s6
-; GFX6-NEXT:    v_subrev_i32_e32 v4, vcc, 24, v2
+; GFX6-NEXT:    v_add_i32_e32 v3, vcc, v2, v3
 ; GFX6-NEXT:    s_lshl_b32 s2, s6, 17
 ; GFX6-NEXT:    s_lshl_b32 s3, s8, 1
 ; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, 24, v2
 ; GFX6-NEXT:    v_and_b32_e32 v0, 0xffff, v0
 ; GFX6-NEXT:    s_and_b32 s0, 0xffff, s0
 ; GFX6-NEXT:    s_or_b32 s2, s2, s3
-; GFX6-NEXT:    v_and_b32_e32 v5, 0xffffff, v5
-; GFX6-NEXT:    v_and_b32_e32 v3, 0xffffff, v3
-; GFX6-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
-; GFX6-NEXT:    v_lshl_b32_e32 v5, s2, v5
-; GFX6-NEXT:    v_lshr_b32_e32 v3, s1, v3
-; GFX6-NEXT:    v_sub_i32_e32 v4, vcc, 23, v2
+; GFX6-NEXT:    v_and_b32_e32 v6, 0xffffff, v6
+; GFX6-NEXT:    v_and_b32_e32 v4, 0xffffff, v4
+; GFX6-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
+; GFX6-NEXT:    v_lshl_b32_e32 v6, s2, v6
+; GFX6-NEXT:    v_lshr_b32_e32 v4, s1, v4
+; GFX6-NEXT:    v_sub_i32_e32 v3, vcc, 23, v2
 ; GFX6-NEXT:    s_lshl_b32 s0, s0, 17
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
-; GFX6-NEXT:    v_or_b32_e32 v3, v5, v3
+; GFX6-NEXT:    v_or_b32_e32 v4, v6, v4
 ; GFX6-NEXT:    v_or_b32_e32 v0, s0, v0
-; GFX6-NEXT:    v_and_b32_e32 v4, 0xffffff, v4
+; GFX6-NEXT:    v_and_b32_e32 v3, 0xffffff, v3
 ; GFX6-NEXT:    v_and_b32_e32 v2, 0xffffff, v2
-; GFX6-NEXT:    v_lshlrev_b32_e32 v0, v4, v0
+; GFX6-NEXT:    v_lshlrev_b32_e32 v0, v3, v0
 ; GFX6-NEXT:    v_lshrrev_b32_e32 v1, v2, v1
-; GFX6-NEXT:    v_bfe_u32 v2, v3, 8, 8
+; GFX6-NEXT:    v_bfe_u32 v2, v4, 8, 8
 ; GFX6-NEXT:    v_or_b32_e32 v0, v0, v1
-; GFX6-NEXT:    v_and_b32_e32 v1, 0xff, v3
+; GFX6-NEXT:    v_and_b32_e32 v1, 0xff, v4
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v2, 8, v2
 ; GFX6-NEXT:    v_or_b32_e32 v1, v1, v2
-; GFX6-NEXT:    v_bfe_u32 v2, v3, 16, 8
+; GFX6-NEXT:    v_bfe_u32 v2, v4, 16, 8
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
 ; GFX6-NEXT:    v_or_b32_e32 v1, v1, v2
 ; GFX6-NEXT:    v_and_b32_e32 v2, 0xff, v0
@@ -2024,7 +2024,7 @@ define amdgpu_ps i48 @s_fshr_v2i24(i48 inreg %lhs.arg, i48 inreg %rhs.arg, i48 i
 ; GFX8-NEXT:    v_not_b32_e32 v1, 23
 ; GFX8-NEXT:    s_or_b32 s3, s10, s3
 ; GFX8-NEXT:    s_and_b32 s8, 0xffff, s8
-; GFX8-NEXT:    v_mul_lo_u32 v1, v0, v1
+; GFX8-NEXT:    v_mul_lo_u32 v2, v0, v1
 ; GFX8-NEXT:    s_and_b32 s3, 0xffff, s3
 ; GFX8-NEXT:    s_lshl_b32 s8, s8, 16
 ; GFX8-NEXT:    s_or_b32 s3, s3, s8
@@ -2034,75 +2034,75 @@ define amdgpu_ps i48 @s_fshr_v2i24(i48 inreg %lhs.arg, i48 inreg %rhs.arg, i48 i
 ; GFX8-NEXT:    s_lshr_b32 s10, s4, 24
 ; GFX8-NEXT:    s_and_b32 s4, s4, 0xff
 ; GFX8-NEXT:    s_lshl_b32 s8, s8, 8
-; GFX8-NEXT:    v_mul_hi_u32 v1, v0, v1
+; GFX8-NEXT:    v_mul_hi_u32 v2, v0, v2
 ; GFX8-NEXT:    s_or_b32 s4, s4, s8
 ; GFX8-NEXT:    s_and_b32 s8, s9, 0xff
 ; GFX8-NEXT:    s_and_b32 s8, 0xffff, s8
 ; GFX8-NEXT:    s_and_b32 s4, 0xffff, s4
 ; GFX8-NEXT:    s_lshl_b32 s8, s8, 16
 ; GFX8-NEXT:    s_or_b32 s4, s4, s8
-; GFX8-NEXT:    v_add_u32_e32 v0, vcc, v0, v1
-; GFX8-NEXT:    v_mul_hi_u32 v1, s4, v0
+; GFX8-NEXT:    v_add_u32_e32 v0, vcc, v0, v2
+; GFX8-NEXT:    v_mul_hi_u32 v2, s4, v0
 ; GFX8-NEXT:    s_lshr_b32 s11, s5, 8
 ; GFX8-NEXT:    s_and_b32 s5, s5, 0xff
 ; GFX8-NEXT:    s_lshl_b32 s5, s5, 8
-; GFX8-NEXT:    v_mul_lo_u32 v1, v1, 24
+; GFX8-NEXT:    v_mul_lo_u32 v2, v2, 24
 ; GFX8-NEXT:    s_and_b32 s8, s11, 0xff
 ; GFX8-NEXT:    s_or_b32 s5, s10, s5
 ; GFX8-NEXT:    s_and_b32 s8, 0xffff, s8
 ; GFX8-NEXT:    s_and_b32 s5, 0xffff, s5
 ; GFX8-NEXT:    s_lshl_b32 s8, s8, 16
 ; GFX8-NEXT:    s_or_b32 s5, s5, s8
-; GFX8-NEXT:    v_sub_u32_e32 v1, vcc, s4, v1
-; GFX8-NEXT:    v_subrev_u32_e32 v2, vcc, 24, v1
+; GFX8-NEXT:    v_sub_u32_e32 v2, vcc, s4, v2
+; GFX8-NEXT:    v_add_u32_e32 v3, vcc, v2, v1
 ; GFX8-NEXT:    v_mul_hi_u32 v0, s5, v0
-; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, 24, v1
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
-; GFX8-NEXT:    v_subrev_u32_e32 v2, vcc, 24, v1
-; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, 24, v1
+; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, 24, v2
+; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
+; GFX8-NEXT:    v_add_u32_e32 v3, vcc, v2, v1
+; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, 24, v2
 ; GFX8-NEXT:    v_mul_lo_u32 v0, v0, 24
 ; GFX8-NEXT:    s_and_b32 s0, 0xffff, s0
 ; GFX8-NEXT:    s_and_b32 s6, 0xffff, s6
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
-; GFX8-NEXT:    v_sub_u32_e32 v2, vcc, 23, v1
+; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
+; GFX8-NEXT:    v_sub_u32_e32 v3, vcc, 23, v2
 ; GFX8-NEXT:    s_lshl_b32 s4, s6, 17
 ; GFX8-NEXT:    s_lshl_b32 s0, s0, 1
 ; GFX8-NEXT:    s_or_b32 s0, s4, s0
+; GFX8-NEXT:    v_and_b32_e32 v3, 0xffffff, v3
 ; GFX8-NEXT:    v_and_b32_e32 v2, 0xffffff, v2
-; GFX8-NEXT:    v_and_b32_e32 v1, 0xffffff, v1
-; GFX8-NEXT:    v_lshlrev_b32_e64 v2, v2, s0
-; GFX8-NEXT:    v_lshrrev_b32_e64 v1, v1, s2
+; GFX8-NEXT:    v_lshlrev_b32_e64 v3, v3, s0
+; GFX8-NEXT:    v_lshrrev_b32_e64 v2, v2, s2
 ; GFX8-NEXT:    v_sub_u32_e32 v0, vcc, s5, v0
-; GFX8-NEXT:    v_or_b32_e32 v1, v2, v1
-; GFX8-NEXT:    v_subrev_u32_e32 v2, vcc, 24, v0
+; GFX8-NEXT:    v_or_b32_e32 v2, v3, v2
+; GFX8-NEXT:    v_add_u32_e32 v3, vcc, v0, v1
 ; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, 24, v0
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
-; GFX8-NEXT:    v_subrev_u32_e32 v2, vcc, 24, v0
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
+; GFX8-NEXT:    v_add_u32_e32 v1, vcc, v0, v1
 ; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, 24, v0
 ; GFX8-NEXT:    s_and_b32 s1, 0xffff, s1
 ; GFX8-NEXT:    s_and_b32 s7, 0xffff, s7
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
-; GFX8-NEXT:    v_sub_u32_e32 v2, vcc, 23, v0
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX8-NEXT:    v_sub_u32_e32 v1, vcc, 23, v0
 ; GFX8-NEXT:    s_lshl_b32 s0, s7, 17
 ; GFX8-NEXT:    s_lshl_b32 s1, s1, 1
 ; GFX8-NEXT:    s_or_b32 s0, s0, s1
-; GFX8-NEXT:    v_and_b32_e32 v2, 0xffffff, v2
+; GFX8-NEXT:    v_and_b32_e32 v1, 0xffffff, v1
 ; GFX8-NEXT:    v_and_b32_e32 v0, 0xffffff, v0
-; GFX8-NEXT:    v_lshlrev_b32_e64 v2, v2, s0
+; GFX8-NEXT:    v_lshlrev_b32_e64 v1, v1, s0
 ; GFX8-NEXT:    v_lshrrev_b32_e64 v0, v0, s3
-; GFX8-NEXT:    v_or_b32_e32 v0, v2, v0
-; GFX8-NEXT:    v_mov_b32_e32 v2, 8
-; GFX8-NEXT:    v_lshlrev_b32_sdwa v3, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
+; GFX8-NEXT:    v_or_b32_e32 v0, v1, v0
+; GFX8-NEXT:    v_mov_b32_e32 v1, 8
+; GFX8-NEXT:    v_lshlrev_b32_sdwa v3, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
 ; GFX8-NEXT:    v_mov_b32_e32 v4, 16
-; GFX8-NEXT:    v_or_b32_sdwa v3, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
-; GFX8-NEXT:    v_lshlrev_b32_sdwa v1, v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
-; GFX8-NEXT:    v_or_b32_e32 v1, v3, v1
+; GFX8-NEXT:    v_or_b32_sdwa v3, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
+; GFX8-NEXT:    v_lshlrev_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
+; GFX8-NEXT:    v_or_b32_e32 v2, v3, v2
 ; GFX8-NEXT:    v_and_b32_e32 v3, 0xff, v0
 ; GFX8-NEXT:    v_lshlrev_b32_e32 v3, 24, v3
-; GFX8-NEXT:    v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
-; GFX8-NEXT:    v_or_b32_e32 v1, v1, v3
-; GFX8-NEXT:    v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD
-; GFX8-NEXT:    v_readfirstlane_b32 s0, v1
+; GFX8-NEXT:    v_lshlrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
+; GFX8-NEXT:    v_or_b32_e32 v2, v2, v3
+; GFX8-NEXT:    v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD
+; GFX8-NEXT:    v_readfirstlane_b32 s0, v2
 ; GFX8-NEXT:    v_readfirstlane_b32 s1, v0
 ; GFX8-NEXT:    ; return to shader part epilog
 ;
@@ -2175,11 +2175,11 @@ define amdgpu_ps i48 @s_fshr_v2i24(i48 inreg %lhs.arg, i48 inreg %rhs.arg, i48 i
 ; GFX9-NEXT:    s_or_b32 s5, s5, s8
 ; GFX9-NEXT:    v_mul_hi_u32 v0, s5, v0
 ; GFX9-NEXT:    v_sub_u32_e32 v1, s4, v1
-; GFX9-NEXT:    v_subrev_u32_e32 v2, 24, v1
+; GFX9-NEXT:    v_add_u32_e32 v2, 0xffffffe8, v1
 ; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, 24, v1
 ; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
 ; GFX9-NEXT:    v_mul_lo_u32 v0, v0, 24
-; GFX9-NEXT:    v_subrev_u32_e32 v2, 24, v1
+; GFX9-NEXT:    v_add_u32_e32 v2, 0xffffffe8, v1
 ; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, 24, v1
 ; GFX9-NEXT:    s_and_b32 s0, 0xffff, s0
 ; GFX9-NEXT:    s_and_b32 s6, 0xffff, s6
@@ -2193,10 +2193,10 @@ define amdgpu_ps i48 @s_fshr_v2i24(i48 inreg %lhs.arg, i48 inreg %rhs.arg, i48 i
 ; GFX9-NEXT:    v_lshrrev_b32_e64 v1, v1, s2
 ; GFX9-NEXT:    v_sub_u32_e32 v0, s5, v0
 ; GFX9-NEXT:    v_lshl_or_b32 v1, s0, v2, v1
-; GFX9-NEXT:    v_subrev_u32_e32 v2, 24, v0
+; GFX9-NEXT:    v_add_u32_e32 v2, 0xffffffe8, v0
 ; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, 24, v0
 ; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
-; GFX9-NEXT:    v_subrev_u32_e32 v2, 24, v0
+; GFX9-NEXT:    v_add_u32_e32 v2, 0xffffffe8, v0
 ; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, 24, v0
 ; GFX9-NEXT:    s_and_b32 s1, 0xffff, s1
 ; GFX9-NEXT:    s_and_b32 s7, 0xffff, s7
@@ -2294,23 +2294,23 @@ define amdgpu_ps i48 @s_fshr_v2i24(i48 inreg %lhs.arg, i48 inreg %rhs.arg, i48 i
 ; GFX10-NEXT:    s_and_b32 s4, s11, 0xff
 ; GFX10-NEXT:    s_lshl_b32 s0, s0, 1
 ; GFX10-NEXT:    s_and_b32 s4, 0xffff, s4
-; GFX10-NEXT:    v_subrev_nc_u32_e32 v2, 24, v1
+; GFX10-NEXT:    v_add_nc_u32_e32 v2, 0xffffffe8, v1
 ; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v1
 ; GFX10-NEXT:    s_lshl_b32 s4, s4, 16
 ; GFX10-NEXT:    s_lshl_b32 s1, s1, 1
 ; GFX10-NEXT:    s_or_b32 s2, s2, s4
 ; GFX10-NEXT:    s_and_b32 s4, s13, 0xff
 ; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc_lo
-; GFX10-NEXT:    v_subrev_nc_u32_e32 v2, 24, v0
+; GFX10-NEXT:    v_add_nc_u32_e32 v2, 0xffffffe8, v0
 ; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v0
 ; GFX10-NEXT:    s_and_b32 s4, 0xffff, s4
-; GFX10-NEXT:    v_subrev_nc_u32_e32 v3, 24, v1
+; GFX10-NEXT:    v_add_nc_u32_e32 v3, 0xffffffe8, v1
 ; GFX10-NEXT:    s_lshl_b32 s4, s4, 16
 ; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc_lo
 ; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v1
 ; GFX10-NEXT:    s_or_b32 s3, s3, s4
 ; GFX10-NEXT:    s_lshl_b32 s4, s7, 17
-; GFX10-NEXT:    v_subrev_nc_u32_e32 v2, 24, v0
+; GFX10-NEXT:    v_add_nc_u32_e32 v2, 0xffffffe8, v0
 ; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc_lo
 ; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v0
 ; GFX10-NEXT:    s_or_b32 s0, s4, s0
@@ -2393,69 +2393,67 @@ define amdgpu_ps i48 @s_fshr_v2i24(i48 inreg %lhs.arg, i48 inreg %rhs.arg, i48 i
 ; GFX11-NEXT:    s_and_b32 s0, s0, 0xff
 ; GFX11-NEXT:    v_mul_hi_u32 v1, s4, v0
 ; GFX11-NEXT:    v_mul_hi_u32 v0, s5, v0
+; GFX11-NEXT:    s_lshr_b32 s13, s3, 8
+; GFX11-NEXT:    s_and_b32 s3, s3, 0xff
 ; GFX11-NEXT:    s_lshl_b32 s6, s6, 8
 ; GFX11-NEXT:    s_and_b32 s7, s7, 0xff
+; GFX11-NEXT:    s_lshl_b32 s3, s3, 8
+; GFX11-NEXT:    s_and_b32 s13, s13, 0xff
+; GFX11-NEXT:    v_mul_lo_u32 v1, v1, 24
+; GFX11-NEXT:    v_mul_lo_u32 v0, v0, 24
 ; GFX11-NEXT:    s_or_b32 s0, s0, s6
 ; GFX11-NEXT:    s_and_b32 s7, 0xffff, s7
+; GFX11-NEXT:    s_or_b32 s3, s12, s3
 ; GFX11-NEXT:    s_and_b32 s0, 0xffff, s0
-; GFX11-NEXT:    s_lshr_b32 s13, s3, 8
-; GFX11-NEXT:    v_mul_lo_u32 v1, v1, 24
-; GFX11-NEXT:    v_mul_lo_u32 v0, v0, 24
+; GFX11-NEXT:    s_and_b32 s3, 0xffff, s3
 ; GFX11-NEXT:    s_lshl_b32 s0, s0, 1
-; GFX11-NEXT:    s_and_b32 s3, s3, 0xff
-; GFX11-NEXT:    s_and_b32 s13, s13, 0xff
-; GFX11-NEXT:    s_lshl_b32 s3, s3, 8
-; GFX11-NEXT:    s_and_b32 s1, 0xffff, s1
-; GFX11-NEXT:    s_or_b32 s3, s12, s3
 ; GFX11-NEXT:    v_sub_nc_u32_e32 v1, s4, v1
 ; GFX11-NEXT:    v_sub_nc_u32_e32 v0, s5, v0
 ; GFX11-NEXT:    s_lshl_b32 s4, s10, 8
 ; GFX11-NEXT:    s_and_b32 s10, 0xffff, s13
 ; GFX11-NEXT:    s_or_b32 s2, s2, s4
-; GFX11-NEXT:    v_subrev_nc_u32_e32 v2, 24, v1
+; GFX11-NEXT:    v_add_nc_u32_e32 v2, 0xffffffe8, v1
 ; GFX11-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v1
 ; GFX11-NEXT:    s_lshl_b32 s4, s9, 16
 ; GFX11-NEXT:    s_and_b32 s2, 0xffff, s2
-; GFX11-NEXT:    s_and_b32 s3, 0xffff, s3
+; GFX11-NEXT:    s_lshl_b32 s5, s10, 16
 ; GFX11-NEXT:    s_or_b32 s2, s2, s4
-; GFX11-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc_lo
-; GFX11-NEXT:    v_subrev_nc_u32_e32 v2, 24, v0
+; GFX11-NEXT:    v_dual_cndmask_b32 v1, v1, v2 :: v_dual_add_nc_u32 v2, 0xffffffe8, v0
 ; GFX11-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v0
 ; GFX11-NEXT:    s_lshl_b32 s4, s7, 17
-; GFX11-NEXT:    s_lshl_b32 s5, s10, 16
-; GFX11-NEXT:    v_subrev_nc_u32_e32 v3, 24, v1
+; GFX11-NEXT:    s_and_b32 s1, 0xffff, s1
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_4) | instid1(VALU_DEP_2)
+; GFX11-NEXT:    v_add_nc_u32_e32 v3, 0xffffffe8, v1
 ; GFX11-NEXT:    s_or_b32 s0, s4, s0
 ; GFX11-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc_lo
 ; GFX11-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v1
 ; GFX11-NEXT:    s_lshl_b32 s1, s1, 1
-; GFX11-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc_lo
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_3)
-; GFX11-NEXT:    v_sub_nc_u32_e32 v3, 23, v1
-; GFX11-NEXT:    v_and_b32_e32 v1, 0xffffff, v1
-; GFX11-NEXT:    v_subrev_nc_u32_e32 v2, 24, v0
+; GFX11-NEXT:    v_dual_cndmask_b32 v1, v1, v3 :: v_dual_add_nc_u32 v2, 0xffffffe8, v0
 ; GFX11-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v0
-; GFX11-NEXT:    v_lshrrev_b32_e64 v1, v1, s2
-; GFX11-NEXT:    s_or_b32 s2, s3, s5
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc_lo
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11-NEXT:    v_sub_nc_u32_e32 v3, 23, v1
+; GFX11-NEXT:    v_dual_cndmask_b32 v0, v0, v2 :: v_dual_and_b32 v1, 0xffffff, v1
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
 ; GFX11-NEXT:    v_and_b32_e32 v2, 0xffffff, v3
+; GFX11-NEXT:    v_lshrrev_b32_e64 v1, v1, s2
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_3)
 ; GFX11-NEXT:    v_sub_nc_u32_e32 v3, 23, v0
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
+; GFX11-NEXT:    v_and_b32_e32 v0, 0xffffff, v0
+; GFX11-NEXT:    s_or_b32 s2, s3, s5
 ; GFX11-NEXT:    v_lshl_or_b32 v1, s0, v2, v1
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11-NEXT:    v_and_b32_e32 v2, 0xffffff, v3
+; GFX11-NEXT:    v_lshrrev_b32_e64 v0, v0, s2
 ; GFX11-NEXT:    s_lshl_b32 s0, s8, 17
+; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
 ; GFX11-NEXT:    s_or_b32 s0, s0, s1
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NEXT:    v_and_b32_e32 v2, 0xffffff, v3
 ; GFX11-NEXT:    v_bfe_u32 v3, v1, 16, 8
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
-; GFX11-NEXT:    v_and_b32_e32 v0, 0xffffff, v0
-; GFX11-NEXT:    v_lshrrev_b32_e64 v0, v0, s2
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
 ; GFX11-NEXT:    v_lshl_or_b32 v0, s0, v2, v0
 ; GFX11-NEXT:    v_bfe_u32 v2, v1, 8, 8
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
 ; GFX11-NEXT:    v_and_b32_e32 v4, 0xff, v0
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
 ; GFX11-NEXT:    v_lshlrev_b32_e32 v2, 8, v2
 ; GFX11-NEXT:    v_lshlrev_b32_e32 v4, 24, v4
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
@@ -2491,32 +2489,32 @@ define <2 x i24> @v_fshr_v2i24(<2 x i24> %lhs, <2 x i24> %rhs, <2 x i24> %amt) {
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
 ; GFX6-NEXT:    v_and_b32_e32 v2, 0xffffff, v2
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 1, v1
-; GFX6-NEXT:    v_mul_lo_u32 v7, v6, v7
+; GFX6-NEXT:    v_mul_lo_u32 v8, v6, v7
 ; GFX6-NEXT:    v_and_b32_e32 v3, 0xffffff, v3
-; GFX6-NEXT:    v_mul_hi_u32 v7, v6, v7
-; GFX6-NEXT:    v_add_i32_e32 v6, vcc, v6, v7
-; GFX6-NEXT:    v_mul_hi_u32 v7, v4, v6
+; GFX6-NEXT:    v_mul_hi_u32 v8, v6, v8
+; GFX6-NEXT:    v_add_i32_e32 v6, vcc, v6, v8
+; GFX6-NEXT:    v_mul_hi_u32 v8, v4, v6
 ; GFX6-NEXT:    v_mul_hi_u32 v6, v5, v6
-; GFX6-NEXT:    v_mul_lo_u32 v7, v7, 24
+; GFX6-NEXT:    v_mul_lo_u32 v8, v8, 24
 ; GFX6-NEXT:    v_mul_lo_u32 v6, v6, 24
-; GFX6-NEXT:    v_sub_i32_e32 v4, vcc, v4, v7
-; GFX6-NEXT:    v_subrev_i32_e32 v7, vcc, 24, v4
+; GFX6-NEXT:    v_sub_i32_e32 v4, vcc, v4, v8
+; GFX6-NEXT:    v_add_i32_e32 v8, vcc, v4, v7
 ; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, 24, v4
-; GFX6-NEXT:    v_cndmask_b32_e32 v4, v4, v7, vcc
-; GFX6-NEXT:    v_subrev_i32_e32 v7, vcc, 24, v4
+; GFX6-NEXT:    v_cndmask_b32_e32 v4, v4, v8, vcc
+; GFX6-NEXT:    v_add_i32_e32 v8, vcc, v4, v7
 ; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, 24, v4
-; GFX6-NEXT:    v_cndmask_b32_e32 v4, v4, v7, vcc
-; GFX6-NEXT:    v_sub_i32_e32 v7, vcc, 23, v4
-; GFX6-NEXT:    v_and_b32_e32 v7, 0xffffff, v7
+; GFX6-NEXT:    v_cndmask_b32_e32 v4, v4, v8, vcc
+; GFX6-NEXT:    v_sub_i32_e32 v8, vcc, 23, v4
+; GFX6-NEXT:    v_and_b32_e32 v8, 0xffffff, v8
 ; GFX6-NEXT:    v_and_b32_e32 v4, 0xffffff, v4
-; GFX6-NEXT:    v_lshlrev_b32_e32 v0, v7, v0
+; GFX6-NEXT:    v_lshlrev_b32_e32 v0, v8, v0
 ; GFX6-NEXT:    v_lshrrev_b32_e32 v2, v4, v2
 ; GFX6-NEXT:    v_or_b32_e32 v0, v0, v2
 ; GFX6-NEXT:    v_sub_i32_e32 v2, vcc, v5, v6
-; GFX6-NEXT:    v_subrev_i32_e32 v4, vcc, 24, v2
+; GFX6-NEXT:    v_add_i32_e32 v4, vcc, v2, v7
 ; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, 24, v2
 ; GFX6-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
-; GFX6-NEXT:    v_subrev_i32_e32 v4, vcc, 24, v2
+; GFX6-NEXT:    v_add_i32_e32 v4, vcc, 0xffffffe8, v2
 ; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, 24, v2
 ; GFX6-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
 ; GFX6-NEXT:    v_sub_i32_e32 v4, vcc, 23, v2
@@ -2540,32 +2538,32 @@ define <2 x i24> @v_fshr_v2i24(<2 x i24> %lhs, <2 x i24> %rhs, <2 x i24> %amt) {
 ; GFX8-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
 ; GFX8-NEXT:    v_and_b32_e32 v2, 0xffffff, v2
 ; GFX8-NEXT:    v_lshlrev_b32_e32 v1, 1, v1
-; GFX8-NEXT:    v_mul_lo_u32 v7, v6, v7
+; GFX8-NEXT:    v_mul_lo_u32 v8, v6, v7
 ; GFX8-NEXT:    v_and_b32_e32 v3, 0xffffff, v3
-; GFX8-NEXT:    v_mul_hi_u32 v7, v6, v7
-; GFX8-NEXT:    v_add_u32_e32 v6, vcc, v6, v7
-; GFX8-NEXT:    v_mul_hi_u32 v7, v4, v6
+; GFX8-NEXT:    v_mul_hi_u32 v8, v6, v8
+; GFX8-NEXT:    v_add_u32_e32 v6, vcc, v6, v8
+; GFX8-NEXT:    v_mul_hi_u32 v8, v4, v6
 ; GFX8-NEXT:    v_mul_hi_u32 v6, v5, v6
-; GFX8-NEXT:    v_mul_lo_u32 v7, v7, 24
+; GFX8-NEXT:    v_mul_lo_u32 v8, v8, 24
 ; GFX8-NEXT:    v_mul_lo_u32 v6, v6, 24
-; GFX8-NEXT:    v_sub_u32_e32 v4, vcc, v4, v7
-; GFX8-NEXT:    v_subrev_u32_e32 v7, vcc, 24, v4
+; GFX8-NEXT:    v_sub_u32_e32 v4, vcc, v4, v8
+; GFX8-NEXT:    v_add_u32_e32 v8, vcc, v4, v7
 ; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, 24, v4
-; GFX8-NEXT:    v_cndmask_b32_e32 v4, v4, v7, vcc
-; GFX8-NEXT:    v_subrev_u32_e32 v7, vcc, 24, v4
+; GFX8-NEXT:    v_cndmask_b32_e32 v4, v4, v8, vcc
+; GFX8-NEXT:    v_add_u32_e32 v8, vcc, v4, v7
 ; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, 24, v4
-; GFX8-NEXT:    v_cndmask_b32_e32 v4, v4, v7, vcc
-; GFX8-NEXT:    v_sub_u32_e32 v7, vcc, 23, v4
-; GFX8-NEXT:    v_and_b32_e32 v7, 0xffffff, v7
+; GFX8-NEXT:    v_cndmask_b32_e32 v4, v4, v8, vcc
+; GFX8-NEXT:    v_sub_u32_e32 v8, vcc, 23, v4
+; GFX8-NEXT:    v_and_b32_e32 v8, 0xffffff, v8
 ; GFX8-NEXT:    v_and_b32_e32 v4, 0xffffff, v4
-; GFX8-NEXT:    v_lshlrev_b32_e32 v0, v7, v0
+; GFX8-NEXT:    v_lshlrev_b32_e32 v0, v8, v0
 ; GFX8-NEXT:    v_lshrrev_b32_e32 v2, v4, v2
 ; GFX8-NEXT:    v_or_b32_e32 v0, v0, v2
 ; GFX8-NEXT:    v_sub_u32_e32 v2, vcc, v5, v6
-; GFX8-NEXT:    v_subrev_u32_e32 v4, vcc, 24, v2
+; GFX8-NEXT:    v_add_u32_e32 v4, vcc, v2, v7
 ; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, 24, v2
 ; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
-; GFX8-NEXT:    v_subrev_u32_e32 v4, vcc, 24, v2
+; GFX8-NEXT:    v_add_u32_e32 v4, vcc, 0xffffffe8, v2
 ; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, 24, v2
 ; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
 ; GFX8-NEXT:    v_sub_u32_e32 v4, vcc, 23, v2
@@ -2599,10 +2597,10 @@ define <2 x i24> @v_fshr_v2i24(<2 x i24> %lhs, <2 x i24> %rhs, <2 x i24> %amt) {
 ; GFX9-NEXT:    v_mul_lo_u32 v6, v6, 24
 ; GFX9-NEXT:    v_sub_u32_e32 v4, v4, v7
 ; GFX9-NEXT:    v_sub_u32_e32 v5, v5, v6
-; GFX9-NEXT:    v_subrev_u32_e32 v6, 24, v4
+; GFX9-NEXT:    v_add_u32_e32 v6, 0xffffffe8, v4
 ; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, 24, v4
 ; GFX9-NEXT:    v_cndmask_b32_e32 v4, v4, v6, vcc
-; GFX9-NEXT:    v_subrev_u32_e32 v6, 24, v4
+; GFX9-NEXT:    v_add_u32_e32 v6, 0xffffffe8, v4
 ; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, 24, v4
 ; GFX9-NEXT:    v_cndmask_b32_e32 v4, v4, v6, vcc
 ; GFX9-NEXT:    v_sub_u32_e32 v6, 23, v4
@@ -2610,10 +2608,10 @@ define <2 x i24> @v_fshr_v2i24(<2 x i24> %lhs, <2 x i24> %rhs, <2 x i24> %amt) {
 ; GFX9-NEXT:    v_and_b32_e32 v6, 0xffffff, v6
 ; GFX9-NEXT:    v_lshrrev_b32_e32 v2, v4, v2
 ; GFX9-NEXT:    v_lshl_or_b32 v0, v0, v6, v2
-; GFX9-NEXT:    v_subrev_u32_e32 v2, 24, v5
+; GFX9-NEXT:    v_add_u32_e32 v2, 0xffffffe8, v5
 ; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, 24, v5
 ; GFX9-NEXT:    v_cndmask_b32_e32 v2, v5, v2, vcc
-; GFX9-NEXT:    v_subrev_u32_e32 v4, 24, v2
+; GFX9-NEXT:    v_add_u32_e32 v4, 0xffffffe8, v2
 ; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, 24, v2
 ; GFX9-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
 ; GFX9-NEXT:    v_sub_u32_e32 v4, 23, v2
@@ -2645,15 +2643,15 @@ define <2 x i24> @v_fshr_v2i24(<2 x i24> %lhs, <2 x i24> %rhs, <2 x i24> %amt) {
 ; GFX10-NEXT:    v_mul_lo_u32 v6, v6, 24
 ; GFX10-NEXT:    v_sub_nc_u32_e32 v4, v4, v7
 ; GFX10-NEXT:    v_sub_nc_u32_e32 v5, v5, v6
-; GFX10-NEXT:    v_subrev_nc_u32_e32 v6, 24, v4
+; GFX10-NEXT:    v_add_nc_u32_e32 v6, 0xffffffe8, v4
 ; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v4
-; GFX10-NEXT:    v_subrev_nc_u32_e32 v7, 24, v5
+; GFX10-NEXT:    v_add_nc_u32_e32 v7, 0xffffffe8, v5
 ; GFX10-NEXT:    v_cndmask_b32_e32 v4, v4, v6, vcc_lo
 ; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v5
-; GFX10-NEXT:    v_subrev_nc_u32_e32 v6, 24, v4
+; GFX10-NEXT:    v_add_nc_u32_e32 v6, 0xffffffe8, v4
 ; GFX10-NEXT:    v_cndmask_b32_e32 v5, v5, v7, vcc_lo
 ; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v4
-; GFX10-NEXT:    v_subrev_nc_u32_e32 v7, 24, v5
+; GFX10-NEXT:    v_add_nc_u32_e32 v7, 0xffffffe8, v5
 ; GFX10-NEXT:    v_cndmask_b32_e32 v4, v4, v6, vcc_lo
 ; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v5
 ; GFX10-NEXT:    v_sub_nc_u32_e32 v6, 23, v4
@@ -2675,12 +2673,11 @@ define <2 x i24> @v_fshr_v2i24(<2 x i24> %lhs, <2 x i24> %rhs, <2 x i24> %amt) {
 ; GFX11-NEXT:    v_cvt_f32_ubyte0_e32 v6, 24
 ; GFX11-NEXT:    v_and_b32_e32 v4, 0xffffff, v4
 ; GFX11-NEXT:    v_and_b32_e32 v2, 0xffffff, v2
+; GFX11-NEXT:    v_and_b32_e32 v3, 0xffffff, v3
 ; GFX11-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
-; GFX11-NEXT:    v_lshlrev_b32_e32 v1, 1, v1
 ; GFX11-NEXT:    v_rcp_iflag_f32_e32 v6, v6
-; GFX11-NEXT:    v_and_b32_e32 v3, 0xffffff, v3
 ; GFX11-NEXT:    s_waitcnt_depctr 0xfff
-; GFX11-NEXT:    v_mul_f32_e32 v6, 0x4f7ffffe, v6
+; GFX11-NEXT:    v_dual_mul_f32 v6, 0x4f7ffffe, v6 :: v_dual_lshlrev_b32 v1, 1, v1
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX11-NEXT:    v_cvt_u32_f32_e32 v6, v6
 ; GFX11-NEXT:    v_mul_lo_u32 v7, 0xffffffe8, v6
@@ -2697,34 +2694,33 @@ define <2 x i24> @v_fshr_v2i24(<2 x i24> %lhs, <2 x i24> %rhs, <2 x i24> %amt) {
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX11-NEXT:    v_mul_hi_u32 v6, v5, v6
 ; GFX11-NEXT:    v_mul_lo_u32 v6, v6, 24
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
 ; GFX11-NEXT:    v_sub_nc_u32_e32 v5, v5, v6
-; GFX11-NEXT:    v_subrev_nc_u32_e32 v6, 24, v4
-; GFX11-NEXT:    v_subrev_nc_u32_e32 v7, 24, v5
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-NEXT:    v_add_nc_u32_e32 v6, 0xffffffe8, v4
 ; GFX11-NEXT:    v_cndmask_b32_e32 v4, v4, v6, vcc_lo
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
 ; GFX11-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v5
-; GFX11-NEXT:    v_subrev_nc_u32_e32 v6, 24, v4
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-NEXT:    v_add_nc_u32_e32 v6, 0xffffffe8, v4
+; GFX11-NEXT:    v_add_nc_u32_e32 v7, 0xffffffe8, v5
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
 ; GFX11-NEXT:    v_cndmask_b32_e32 v5, v5, v7, vcc_lo
 ; GFX11-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v4
-; GFX11-NEXT:    v_subrev_nc_u32_e32 v7, 24, v5
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-NEXT:    v_cndmask_b32_e32 v4, v4, v6, vcc_lo
+; GFX11-NEXT:    v_dual_cndmask_b32 v4, v4, v6 :: v_dual_add_nc_u32 v7, 0xffffffe8, v5
 ; GFX11-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v5
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3)
 ; GFX11-NEXT:    v_sub_nc_u32_e32 v6, 23, v4
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2)
 ; GFX11-NEXT:    v_dual_cndmask_b32 v5, v5, v7 :: v_dual_and_b32 v4, 0xffffff, v4
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
 ; GFX11-NEXT:    v_and_b32_e32 v6, 0xffffff, v6
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_4)
 ; GFX11-NEXT:    v_sub_nc_u32_e32 v7, 23, v5
 ; GFX11-NEXT:    v_and_b32_e32 v5, 0xffffff, v5
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
 ; GFX11-NEXT:    v_lshrrev_b32_e32 v2, v4, v2
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
 ; GFX11-NEXT:    v_and_b32_e32 v4, 0xffffff, v7
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
 ; GFX11-NEXT:    v_lshrrev_b32_e32 v3, v5, v3
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
 ; GFX11-NEXT:    v_lshl_or_b32 v0, v0, v6, v2
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2)
 ; GFX11-NEXT:    v_lshl_or_b32 v1, v1, v4, v3
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
   %result = call <2 x i24> @llvm.fshr.v2i24(<2 x i24> %lhs, <2 x i24> %rhs, <2 x i24> %amt)
@@ -6087,13 +6083,14 @@ define i128 @v_fshr_i128(i128 %lhs, i128 %rhs, i128 %amt) {
 ; GFX6-NEXT:    v_not_b32_e32 v0, v8
 ; GFX6-NEXT:    v_and_b32_e32 v15, 0x7f, v0
 ; GFX6-NEXT:    v_sub_i32_e32 v0, vcc, 64, v15
+; GFX6-NEXT:    v_not_b32_e32 v16, 63
 ; GFX6-NEXT:    v_lshr_b64 v[0:1], v[9:10], v0
 ; GFX6-NEXT:    v_lshl_b64 v[11:12], v[2:3], v15
-; GFX6-NEXT:    v_subrev_i32_e32 v16, vcc, 64, v15
+; GFX6-NEXT:    v_add_i32_e32 v17, vcc, v15, v16
 ; GFX6-NEXT:    v_lshl_b64 v[13:14], v[9:10], v15
 ; GFX6-NEXT:    v_or_b32_e32 v11, v0, v11
 ; GFX6-NEXT:    v_or_b32_e32 v12, v1, v12
-; GFX6-NEXT:    v_lshl_b64 v[0:1], v[9:10], v16
+; GFX6-NEXT:    v_lshl_b64 v[0:1], v[9:10], v17
 ; GFX6-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v15
 ; GFX6-NEXT:    v_cndmask_b32_e32 v10, 0, v13, vcc
 ; GFX6-NEXT:    v_cndmask_b32_e32 v13, 0, v14, vcc
@@ -6106,7 +6103,7 @@ define i128 @v_fshr_i128(i128 %lhs, i128 %rhs, i128 %amt) {
 ; GFX6-NEXT:    v_sub_i32_e32 v2, vcc, 64, v14
 ; GFX6-NEXT:    v_lshr_b64 v[0:1], v[4:5], v14
 ; GFX6-NEXT:    v_lshl_b64 v[2:3], v[6:7], v2
-; GFX6-NEXT:    v_subrev_i32_e32 v15, vcc, 64, v14
+; GFX6-NEXT:    v_add_i32_e32 v15, vcc, v14, v16
 ; GFX6-NEXT:    v_or_b32_e32 v2, v0, v2
 ; GFX6-NEXT:    v_or_b32_e32 v3, v1, v3
 ; GFX6-NEXT:    v_lshr_b64 v[0:1], v[6:7], v15
@@ -6135,13 +6132,14 @@ define i128 @v_fshr_i128(i128 %lhs, i128 %rhs, i128 %amt) {
 ; GFX8-NEXT:    v_not_b32_e32 v0, v8
 ; GFX8-NEXT:    v_and_b32_e32 v15, 0x7f, v0
 ; GFX8-NEXT:    v_sub_u32_e32 v0, vcc, 64, v15
+; GFX8-NEXT:    v_not_b32_e32 v16, 63
 ; GFX8-NEXT:    v_lshrrev_b64 v[0:1], v0, v[9:10]
 ; GFX8-NEXT:    v_lshlrev_b64 v[11:12], v15, v[2:3]
-; GFX8-NEXT:    v_subrev_u32_e32 v16, vcc, 64, v15
+; GFX8-NEXT:    v_add_u32_e32 v17, vcc, v15, v16
 ; GFX8-NEXT:    v_lshlrev_b64 v[13:14], v15, v[9:10]
 ; GFX8-NEXT:    v_or_b32_e32 v11, v0, v11
 ; GFX8-NEXT:    v_or_b32_e32 v12, v1, v12
-; GFX8-NEXT:    v_lshlrev_b64 v[0:1], v16, v[9:10]
+; GFX8-NEXT:    v_lshlrev_b64 v[0:1], v17, v[9:10]
 ; GFX8-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v15
 ; GFX8-NEXT:    v_cndmask_b32_e32 v10, 0, v13, vcc
 ; GFX8-NEXT:    v_cndmask_b32_e32 v13, 0, v14, vcc
@@ -6154,7 +6152,7 @@ define i128 @v_fshr_i128(i128 %lhs, i128 %rhs, i128 %amt) {
 ; GFX8-NEXT:    v_sub_u32_e32 v2, vcc, 64, v14
 ; GFX8-NEXT:    v_lshrrev_b64 v[0:1], v14, v[4:5]
 ; GFX8-NEXT:    v_lshlrev_b64 v[2:3], v2, v[6:7]
-; GFX8-NEXT:    v_subrev_u32_e32 v15, vcc, 64, v14
+; GFX8-NEXT:    v_add_u32_e32 v15, vcc, v14, v16
 ; GFX8-NEXT:    v_or_b32_e32 v2, v0, v2
 ; GFX8-NEXT:    v_or_b32_e32 v3, v1, v3
 ; GFX8-NEXT:    v_lshrrev_b64 v[0:1], v15, v[6:7]
@@ -6185,7 +6183,7 @@ define i128 @v_fshr_i128(i128 %lhs, i128 %rhs, i128 %amt) {
 ; GFX9-NEXT:    v_sub_u32_e32 v0, 64, v15
 ; GFX9-NEXT:    v_lshrrev_b64 v[0:1], v0, v[9:10]
 ; GFX9-NEXT:    v_lshlrev_b64 v[11:12], v15, v[2:3]
-; GFX9-NEXT:    v_subrev_u32_e32 v16, 64, v15
+; GFX9-NEXT:    v_add_u32_e32 v16, 0xffffffc0, v15
 ; GFX9-NEXT:    v_lshlrev_b64 v[13:14], v15, v[9:10]
 ; GFX9-NEXT:    v_or_b32_e32 v11, v0, v11
 ; GFX9-NEXT:    v_or_b32_e32 v12, v1, v12
@@ -6202,7 +6200,7 @@ define i128 @v_fshr_i128(i128 %lhs, i128 %rhs, i128 %amt) {
 ; GFX9-NEXT:    v_cndmask_b32_e32 v12, v1, v3, vcc
 ; GFX9-NEXT:    v_lshrrev_b64 v[0:1], v14, v[4:5]
 ; GFX9-NEXT:    v_lshlrev_b64 v[2:3], v2, v[6:7]
-; GFX9-NEXT:    v_subrev_u32_e32 v15, 64, v14
+; GFX9-NEXT:    v_add_u32_e32 v15, 0xffffffc0, v14
 ; GFX9-NEXT:    v_or_b32_e32 v2, v0, v2
 ; GFX9-NEXT:    v_or_b32_e32 v3, v1, v3
 ; GFX9-NEXT:    v_lshrrev_b64 v[0:1], v15, v[6:7]
@@ -6232,9 +6230,9 @@ define i128 @v_fshr_i128(i128 %lhs, i128 %rhs, i128 %amt) {
 ; GFX10-NEXT:    v_and_b32_e32 v18, 0x7f, v9
 ; GFX10-NEXT:    v_or_b32_e32 v2, v2, v10
 ; GFX10-NEXT:    v_sub_nc_u32_e32 v16, 64, v19
-; GFX10-NEXT:    v_subrev_nc_u32_e32 v21, 64, v19
+; GFX10-NEXT:    v_add_nc_u32_e32 v21, 0xffffffc0, v19
 ; GFX10-NEXT:    v_sub_nc_u32_e32 v10, 64, v18
-; GFX10-NEXT:    v_subrev_nc_u32_e32 v20, 64, v18
+; GFX10-NEXT:    v_add_nc_u32_e32 v20, 0xffffffc0, v18
 ; GFX10-NEXT:    v_lshlrev_b64 v[8:9], v18, v[2:3]
 ; GFX10-NEXT:    v_lshrrev_b64 v[12:13], v19, v[4:5]
 ; GFX10-NEXT:    v_lshlrev_b64 v[16:17], v16, v[6:7]
@@ -6273,47 +6271,48 @@ define i128 @v_fshr_i128(i128 %lhs, i128 %rhs, i128 %amt) {
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-NEXT:    v_not_b32_e32 v9, v8
+; GFX11-NEXT:    v_lshlrev_b64 v[2:3], 1, v[2:3]
 ; GFX11-NEXT:    v_lshrrev_b32_e32 v10, 31, v1
 ; GFX11-NEXT:    v_lshlrev_b64 v[0:1], 1, v[0:1]
-; GFX11-NEXT:    v_lshlrev_b64 v[2:3], 1, v[2:3]
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
 ; GFX11-NEXT:    v_and_b32_e32 v18, 0x7f, v9
 ; GFX11-NEXT:    v_or_b32_e32 v2, v2, v10
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2)
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-NEXT:    v_sub_nc_u32_e32 v10, 64, v18
 ; GFX11-NEXT:    v_lshlrev_b64 v[14:15], v18, v[0:1]
 ; GFX11-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 64, v18
 ; GFX11-NEXT:    v_and_b32_e32 v19, 0x7f, v8
-; GFX11-NEXT:    v_sub_nc_u32_e32 v10, 64, v18
-; GFX11-NEXT:    v_subrev_nc_u32_e32 v20, 64, v18
+; GFX11-NEXT:    v_add_nc_u32_e32 v20, 0xffffffc0, v18
 ; GFX11-NEXT:    v_lshlrev_b64 v[8:9], v18, v[2:3]
+; GFX11-NEXT:    v_lshrrev_b64 v[10:11], v10, v[0:1]
 ; GFX11-NEXT:    v_cndmask_b32_e32 v14, 0, v14, vcc_lo
 ; GFX11-NEXT:    v_sub_nc_u32_e32 v16, 64, v19
-; GFX11-NEXT:    v_lshrrev_b64 v[10:11], v10, v[0:1]
-; GFX11-NEXT:    v_subrev_nc_u32_e32 v21, 64, v19
-; GFX11-NEXT:    v_lshrrev_b64 v[12:13], v19, v[4:5]
 ; GFX11-NEXT:    v_lshlrev_b64 v[0:1], v20, v[0:1]
-; GFX11-NEXT:    v_lshlrev_b64 v[16:17], v16, v[6:7]
+; GFX11-NEXT:    v_lshrrev_b64 v[12:13], v19, v[4:5]
 ; GFX11-NEXT:    v_cmp_gt_u32_e64 s0, 64, v19
 ; GFX11-NEXT:    v_or_b32_e32 v10, v10, v8
+; GFX11-NEXT:    v_add_nc_u32_e32 v21, 0xffffffc0, v19
+; GFX11-NEXT:    v_lshlrev_b64 v[16:17], v16, v[6:7]
 ; GFX11-NEXT:    v_or_b32_e32 v11, v11, v9
-; GFX11-NEXT:    v_lshrrev_b64 v[8:9], v21, v[6:7]
 ; GFX11-NEXT:    v_cmp_eq_u32_e64 s1, 0, v19
+; GFX11-NEXT:    v_cndmask_b32_e32 v10, v0, v10, vcc_lo
+; GFX11-NEXT:    v_lshrrev_b64 v[8:9], v21, v[6:7]
 ; GFX11-NEXT:    v_or_b32_e32 v12, v12, v16
 ; GFX11-NEXT:    v_or_b32_e32 v13, v13, v17
-; GFX11-NEXT:    v_dual_cndmask_b32 v10, v0, v10 :: v_dual_cndmask_b32 v11, v1, v11
+; GFX11-NEXT:    v_cndmask_b32_e32 v11, v1, v11, vcc_lo
 ; GFX11-NEXT:    v_lshrrev_b64 v[0:1], v19, v[6:7]
-; GFX11-NEXT:    v_cndmask_b32_e32 v7, 0, v15, vcc_lo
-; GFX11-NEXT:    v_cndmask_b32_e64 v8, v8, v12, s0
 ; GFX11-NEXT:    v_cmp_eq_u32_e64 s2, 0, v18
+; GFX11-NEXT:    v_cndmask_b32_e64 v8, v8, v12, s0
 ; GFX11-NEXT:    v_cndmask_b32_e64 v6, v9, v13, s0
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NEXT:    v_cndmask_b32_e64 v4, v8, v4, s1
+; GFX11-NEXT:    v_cndmask_b32_e32 v7, 0, v15, vcc_lo
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4)
 ; GFX11-NEXT:    v_cndmask_b32_e64 v2, v10, v2, s2
 ; GFX11-NEXT:    v_cndmask_b32_e64 v3, v11, v3, s2
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_3) | instid1(VALU_DEP_4)
+; GFX11-NEXT:    v_cndmask_b32_e64 v4, v8, v4, s1
 ; GFX11-NEXT:    v_cndmask_b32_e64 v5, v6, v5, s1
 ; GFX11-NEXT:    v_cndmask_b32_e64 v6, 0, v0, s0
 ; GFX11-NEXT:    v_cndmask_b32_e64 v8, 0, v1, s0
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
 ; GFX11-NEXT:    v_or_b32_e32 v0, v14, v4
 ; GFX11-NEXT:    v_or_b32_e32 v1, v7, v5
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
@@ -6335,46 +6334,47 @@ define amdgpu_ps <4 x float> @v_fshr_i128_ssv(i128 inreg %lhs, i128 inreg %rhs,
 ; GFX6-NEXT:    v_and_b32_e32 v7, 0x7f, v1
 ; GFX6-NEXT:    s_or_b64 s[0:1], s[2:3], s[0:1]
 ; GFX6-NEXT:    v_sub_i32_e32 v1, vcc, 64, v7
+; GFX6-NEXT:    v_not_b32_e32 v8, 63
 ; GFX6-NEXT:    v_lshr_b64 v[1:2], s[8:9], v1
 ; GFX6-NEXT:    v_lshl_b64 v[3:4], s[0:1], v7
-; GFX6-NEXT:    v_subrev_i32_e32 v8, vcc, 64, v7
+; GFX6-NEXT:    v_add_i32_e32 v9, vcc, v7, v8
 ; GFX6-NEXT:    v_lshl_b64 v[5:6], s[8:9], v7
 ; GFX6-NEXT:    v_or_b32_e32 v3, v1, v3
 ; GFX6-NEXT:    v_or_b32_e32 v4, v2, v4
-; GFX6-NEXT:    v_lshl_b64 v[1:2], s[8:9], v8
+; GFX6-NEXT:    v_lshl_b64 v[1:2], s[8:9], v9
 ; GFX6-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v7
-; GFX6-NEXT:    v_cndmask_b32_e32 v8, 0, v5, vcc
+; GFX6-NEXT:    v_cndmask_b32_e32 v9, 0, v5, vcc
 ; GFX6-NEXT:    v_cndmask_b32_e32 v6, 0, v6, vcc
 ; GFX6-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
 ; GFX6-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
 ; GFX6-NEXT:    v_mov_b32_e32 v3, s0
 ; GFX6-NEXT:    v_mov_b32_e32 v4, s1
 ; GFX6-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v7
-; GFX6-NEXT:    v_and_b32_e32 v10, 0x7f, v0
+; GFX6-NEXT:    v_and_b32_e32 v11, 0x7f, v0
 ; GFX6-NEXT:    v_cndmask_b32_e32 v7, v1, v3, vcc
-; GFX6-NEXT:    v_cndmask_b32_e32 v9, v2, v4, vcc
-; GFX6-NEXT:    v_sub_i32_e32 v2, vcc, 64, v10
-; GFX6-NEXT:    v_lshr_b64 v[0:1], s[4:5], v10
+; GFX6-NEXT:    v_cndmask_b32_e32 v10, v2, v4, vcc
+; GFX6-NEXT:    v_sub_i32_e32 v2, vcc, 64, v11
+; GFX6-NEXT:    v_lshr_b64 v[0:1], s[4:5], v11
 ; GFX6-NEXT:    v_lshl_b64 v[2:3], s[6:7], v2
-; GFX6-NEXT:    v_subrev_i32_e32 v11, vcc, 64, v10
+; GFX6-NEXT:    v_add_i32_e32 v8, vcc, v11, v8
 ; GFX6-NEXT:    v_or_b32_e32 v2, v0, v2
 ; GFX6-NEXT:    v_or_b32_e32 v3, v1, v3
-; GFX6-NEXT:    v_lshr_b64 v[0:1], s[6:7], v11
-; GFX6-NEXT:    v_lshr_b64 v[4:5], s[6:7], v10
-; GFX6-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v10
+; GFX6-NEXT:    v_lshr_b64 v[0:1], s[6:7], v8
+; GFX6-NEXT:    v_lshr_b64 v[4:5], s[6:7], v11
+; GFX6-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v11
 ; GFX6-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
 ; GFX6-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
 ; GFX6-NEXT:    v_mov_b32_e32 v2, s4
 ; GFX6-NEXT:    v_mov_b32_e32 v3, s5
-; GFX6-NEXT:    v_cmp_eq_u32_e64 s[0:1], 0, v10
+; GFX6-NEXT:    v_cmp_eq_u32_e64 s[0:1], 0, v11
 ; GFX6-NEXT:    v_cndmask_b32_e64 v0, v0, v2, s[0:1]
 ; GFX6-NEXT:    v_cndmask_b32_e64 v1, v1, v3, s[0:1]
 ; GFX6-NEXT:    v_cndmask_b32_e32 v2, 0, v4, vcc
 ; GFX6-NEXT:    v_cndmask_b32_e32 v3, 0, v5, vcc
-; GFX6-NEXT:    v_or_b32_e32 v0, v8, v0
+; GFX6-NEXT:    v_or_b32_e32 v0, v9, v0
 ; GFX6-NEXT:    v_or_b32_e32 v1, v6, v1
 ; GFX6-NEXT:    v_or_b32_e32 v2, v7, v2
-; GFX6-NEXT:    v_or_b32_e32 v3, v9, v3
+; GFX6-NEXT:    v_or_b32_e32 v3, v10, v3
 ; GFX6-NEXT:    ; return to shader part epilog
 ;
 ; GFX8-LABEL: v_fshr_i128_ssv:
@@ -6387,46 +6387,47 @@ define amdgpu_ps <4 x float> @v_fshr_i128_ssv(i128 inreg %lhs, i128 inreg %rhs,
 ; GFX8-NEXT:    v_and_b32_e32 v7, 0x7f, v1
 ; GFX8-NEXT:    s_or_b64 s[0:1], s[2:3], s[0:1]
 ; GFX8-NEXT:    v_sub_u32_e32 v1, vcc, 64, v7
+; GFX8-NEXT:    v_not_b32_e32 v8, 63
 ; GFX8-NEXT:    v_lshrrev_b64 v[1:2], v1, s[8:9]
 ; GFX8-NEXT:    v_lshlrev_b64 v[3:4], v7, s[0:1]
-; GFX8-NEXT:    v_subrev_u32_e32 v8, vcc, 64, v7
+; GFX8-NEXT:    v_add_u32_e32 v9, vcc, v7, v8
 ; GFX8-NEXT:    v_lshlrev_b64 v[5:6], v7, s[8:9]
 ; GFX8-NEXT:    v_or_b32_e32 v3, v1, v3
 ; GFX8-NEXT:    v_or_b32_e32 v4, v2, v4
-; GFX8-NEXT:    v_lshlrev_b64 v[1:2], v8, s[8:9]
+; GFX8-NEXT:    v_lshlrev_b64 v[1:2], v9, s[8:9]
 ; GFX8-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v7
-; GFX8-NEXT:    v_cndmask_b32_e32 v8, 0, v5, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v9, 0, v5, vcc
 ; GFX8-NEXT:    v_cndmask_b32_e32 v6, 0, v6, vcc
 ; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
 ; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
 ; GFX8-NEXT:    v_mov_b32_e32 v3, s0
 ; GFX8-NEXT:    v_mov_b32_e32 v4, s1
 ; GFX8-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v7
-; GFX8-NEXT:    v_and_b32_e32 v10, 0x7f, v0
+; GFX8-NEXT:    v_and_b32_e32 v11, 0x7f, v0
 ; GFX8-NEXT:    v_cndmask_b32_e32 v7, v1, v3, vcc
-; GFX8-NEXT:    v_cndmask_b32_e32 v9, v2, v4, vcc
-; GFX8-NEXT:    v_sub_u32_e32 v2, vcc, 64, v10
-; GFX8-NEXT:    v_lshrrev_b64 v[0:1], v10, s[4:5]
+; GFX8-NEXT:    v_cndmask_b32_e32 v10, v2, v4, vcc
+; GFX8-NEXT:    v_sub_u32_e32 v2, vcc, 64, v11
+; GFX8-NEXT:    v_lshrrev_b64 v[0:1], v11, s[4:5]
 ; GFX8-NEXT:    v_lshlrev_b64 v[2:3], v2, s[6:7]
-; GFX8-NEXT:    v_subrev_u32_e32 v11, vcc, 64, v10
+; GFX8-NEXT:    v_add_u32_e32 v8, vcc, v11, v8
 ; GFX8-NEXT:    v_or_b32_e32 v2, v0, v2
 ; GFX8-NEXT:    v_or_b32_e32 v3, v1, v3
-; GFX8-NEXT:    v_lshrrev_b64 v[0:1], v11, s[6:7]
-; GFX8-NEXT:    v_lshrrev_b64 v[4:5], v10, s[6:7]
-; GFX8-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v10
+; GFX8-NEXT:    v_lshrrev_b64 v[0:1], v8, s[6:7]
+; GFX8-NEXT:    v_lshrrev_b64 v[4:5], v11, s[6:7]
+; GFX8-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v11
 ; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
 ; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
 ; GFX8-NEXT:    v_mov_b32_e32 v2, s4
 ; GFX8-NEXT:    v_mov_b32_e32 v3, s5
-; GFX8-NEXT:    v_cmp_eq_u32_e64 s[0:1], 0, v10
+; GFX8-NEXT:    v_cmp_eq_u32_e64 s[0:1], 0, v11
 ; GFX8-NEXT:    v_cndmask_b32_e64 v0, v0, v2, s[0:1]
 ; GFX8-NEXT:    v_cndmask_b32_e64 v1, v1, v3, s[0:1]
 ; GFX8-NEXT:    v_cndmask_b32_e32 v2, 0, v4, vcc
 ; GFX8-NEXT:    v_cndmask_b32_e32 v3, 0, v5, vcc
-; GFX8-NEXT:    v_or_b32_e32 v0, v8, v0
+; GFX8-NEXT:    v_or_b32_e32 v0, v9, v0
 ; GFX8-NEXT:    v_or_b32_e32 v1, v6, v1
 ; GFX8-NEXT:    v_or_b32_e32 v2, v7, v2
-; GFX8-NEXT:    v_or_b32_e32 v3, v9, v3
+; GFX8-NEXT:    v_or_b32_e32 v3, v10, v3
 ; GFX8-NEXT:    ; return to shader part epilog
 ;
 ; GFX9-LABEL: v_fshr_i128_ssv:
@@ -6441,7 +6442,7 @@ define amdgpu_ps <4 x float> @v_fshr_i128_ssv(i128 inreg %lhs, i128 inreg %rhs,
 ; GFX9-NEXT:    v_sub_u32_e32 v1, 64, v7
 ; GFX9-NEXT:    v_lshrrev_b64 v[1:2], v1, s[8:9]
 ; GFX9-NEXT:    v_lshlrev_b64 v[3:4], v7, s[0:1]
-; GFX9-NEXT:    v_subrev_u32_e32 v8, 64, v7
+; GFX9-NEXT:    v_add_u32_e32 v8, 0xffffffc0, v7
 ; GFX9-NEXT:    v_lshlrev_b64 v[5:6], v7, s[8:9]
 ; GFX9-NEXT:    v_or_b32_e32 v3, v1, v3
 ; GFX9-NEXT:    v_or_b32_e32 v4, v2, v4
@@ -6460,7 +6461,7 @@ define amdgpu_ps <4 x float> @v_fshr_i128_ssv(i128 inreg %lhs, i128 inreg %rhs,
 ; GFX9-NEXT:    v_cndmask_b32_e32 v7, v1, v3, vcc
 ; GFX9-NEXT:    v_lshrrev_b64 v[0:1], v10, s[4:5]
 ; GFX9-NEXT:    v_lshlrev_b64 v[2:3], v2, s[6:7]
-; GFX9-NEXT:    v_subrev_u32_e32 v11, 64, v10
+; GFX9-NEXT:    v_add_u32_e32 v11, 0xffffffc0, v10
 ; GFX9-NEXT:    v_or_b32_e32 v2, v0, v2
 ; GFX9-NEXT:    v_or_b32_e32 v3, v1, v3
 ; GFX9-NEXT:    v_lshrrev_b64 v[0:1], v11, s[6:7]
@@ -6492,10 +6493,10 @@ define amdgpu_ps <4 x float> @v_fshr_i128_ssv(i128 inreg %lhs, i128 inreg %rhs,
 ; GFX10-NEXT:    v_sub_nc_u32_e32 v8, 64, v13
 ; GFX10-NEXT:    s_lshl_b64 s[0:1], s[0:1], 1
 ; GFX10-NEXT:    s_or_b64 s[8:9], s[2:3], s[8:9]
-; GFX10-NEXT:    v_subrev_nc_u32_e32 v14, 64, v13
+; GFX10-NEXT:    v_add_nc_u32_e32 v14, 0xffffffc0, v13
 ; GFX10-NEXT:    v_sub_nc_u32_e32 v2, 64, v12
 ; GFX10-NEXT:    v_lshlrev_b64 v[0:1], v12, s[8:9]
-; GFX10-NEXT:    v_subrev_nc_u32_e32 v10, 64, v12
+; GFX10-NEXT:    v_add_nc_u32_e32 v10, 0xffffffc0, v12
 ; GFX10-NEXT:    v_lshrrev_b64 v[4:5], v13, s[4:5]
 ; GFX10-NEXT:    v_lshlrev_b64 v[8:9], v8, s[6:7]
 ; GFX10-NEXT:    v_lshrrev_b64 v[2:3], v2, s[0:1]
@@ -6544,11 +6545,11 @@ define amdgpu_ps <4 x float> @v_fshr_i128_ssv(i128 inreg %lhs, i128 inreg %rhs,
 ; GFX11-NEXT:    v_and_b32_e32 v13, 0x7f, v0
 ; GFX11-NEXT:    v_sub_nc_u32_e32 v2, 64, v12
 ; GFX11-NEXT:    v_lshlrev_b64 v[0:1], v12, s[8:9]
-; GFX11-NEXT:    v_subrev_nc_u32_e32 v10, 64, v12
+; GFX11-NEXT:    v_add_nc_u32_e32 v10, 0xffffffc0, v12
 ; GFX11-NEXT:    v_cndmask_b32_e32 v6, 0, v6, vcc_lo
 ; GFX11-NEXT:    v_sub_nc_u32_e32 v8, 64, v13
 ; GFX11-NEXT:    v_lshrrev_b64 v[2:3], v2, s[0:1]
-; GFX11-NEXT:    v_subrev_nc_u32_e32 v14, 64, v13
+; GFX11-NEXT:    v_add_nc_u32_e32 v14, 0xffffffc0, v13
 ; GFX11-NEXT:    v_lshrrev_b64 v[4:5], v13, s[4:5]
 ; GFX11-NEXT:    v_lshlrev_b64 v[10:11], v10, s[0:1]
 ; GFX11-NEXT:    v_lshlrev_b64 v[8:9], v8, s[6:7]
@@ -7718,13 +7719,14 @@ define <2 x i128> @v_fshr_v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %a
 ; GFX6-NEXT:    v_not_b32_e32 v0, v16
 ; GFX6-NEXT:    v_and_b32_e32 v19, 0x7f, v0
 ; GFX6-NEXT:    v_sub_i32_e32 v0, vcc, 64, v19
+; GFX6-NEXT:    v_not_b32_e32 v25, 63
 ; GFX6-NEXT:    v_lshr_b64 v[0:1], v[17:18], v0
 ; GFX6-NEXT:    v_lshl_b64 v[21:22], v[2:3], v19
-; GFX6-NEXT:    v_subrev_i32_e32 v25, vcc, 64, v19
+; GFX6-NEXT:    v_add_i32_e32 v26, vcc, v19, v25
 ; GFX6-NEXT:    v_lshl_b64 v[23:24], v[17:18], v19
 ; GFX6-NEXT:    v_or_b32_e32 v21, v0, v21
 ; GFX6-NEXT:    v_or_b32_e32 v22, v1, v22
-; GFX6-NEXT:    v_lshl_b64 v[0:1], v[17:18], v25
+; GFX6-NEXT:    v_lshl_b64 v[0:1], v[17:18], v26
 ; GFX6-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v19
 ; GFX6-NEXT:    v_cndmask_b32_e32 v18, 0, v23, vcc
 ; GFX6-NEXT:    v_cndmask_b32_e32 v23, 0, v24, vcc
@@ -7737,7 +7739,7 @@ define <2 x i128> @v_fshr_v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %a
 ; GFX6-NEXT:    v_sub_i32_e32 v2, vcc, 64, v22
 ; GFX6-NEXT:    v_lshr_b64 v[0:1], v[8:9], v22
 ; GFX6-NEXT:    v_lshl_b64 v[2:3], v[10:11], v2
-; GFX6-NEXT:    v_subrev_i32_e32 v24, vcc, 64, v22
+; GFX6-NEXT:    v_add_i32_e32 v24, vcc, v22, v25
 ; GFX6-NEXT:    v_or_b32_e32 v2, v0, v2
 ; GFX6-NEXT:    v_or_b32_e32 v3, v1, v3
 ; GFX6-NEXT:    v_lshr_b64 v[0:1], v[10:11], v24
@@ -7761,7 +7763,7 @@ define <2 x i128> @v_fshr_v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %a
 ; GFX6-NEXT:    v_lshr_b64 v[4:5], v[8:9], v4
 ; GFX6-NEXT:    v_lshl_b64 v[10:11], v[6:7], v18
 ; GFX6-NEXT:    v_or_b32_e32 v2, v19, v2
-; GFX6-NEXT:    v_subrev_i32_e32 v19, vcc, 64, v18
+; GFX6-NEXT:    v_add_i32_e32 v19, vcc, v18, v25
 ; GFX6-NEXT:    v_lshl_b64 v[16:17], v[8:9], v18
 ; GFX6-NEXT:    v_or_b32_e32 v10, v4, v10
 ; GFX6-NEXT:    v_or_b32_e32 v11, v5, v11
@@ -7778,7 +7780,7 @@ define <2 x i128> @v_fshr_v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %a
 ; GFX6-NEXT:    v_sub_i32_e32 v6, vcc, 64, v18
 ; GFX6-NEXT:    v_lshr_b64 v[4:5], v[12:13], v18
 ; GFX6-NEXT:    v_lshl_b64 v[6:7], v[14:15], v6
-; GFX6-NEXT:    v_subrev_i32_e32 v19, vcc, 64, v18
+; GFX6-NEXT:    v_add_i32_e32 v19, vcc, v18, v25
 ; GFX6-NEXT:    v_or_b32_e32 v6, v4, v6
 ; GFX6-NEXT:    v_or_b32_e32 v7, v5, v7
 ; GFX6-NEXT:    v_lshr_b64 v[4:5], v[14:15], v19
@@ -7809,13 +7811,14 @@ define <2 x i128> @v_fshr_v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %a
 ; GFX8-NEXT:    v_not_b32_e32 v0, v16
 ; GFX8-NEXT:    v_and_b32_e32 v19, 0x7f, v0
 ; GFX8-NEXT:    v_sub_u32_e32 v0, vcc, 64, v19
+; GFX8-NEXT:    v_not_b32_e32 v25, 63
 ; GFX8-NEXT:    v_lshrrev_b64 v[0:1], v0, v[17:18]
 ; GFX8-NEXT:    v_lshlrev_b64 v[21:22], v19, v[2:3]
-; GFX8-NEXT:    v_subrev_u32_e32 v25, vcc, 64, v19
+; GFX8-NEXT:    v_add_u32_e32 v26, vcc, v19, v25
 ; GFX8-NEXT:    v_lshlrev_b64 v[23:24], v19, v[17:18]
 ; GFX8-NEXT:    v_or_b32_e32 v21, v0, v21
 ; GFX8-NEXT:    v_or_b32_e32 v22, v1, v22
-; GFX8-NEXT:    v_lshlrev_b64 v[0:1], v25, v[17:18]
+; GFX8-NEXT:    v_lshlrev_b64 v[0:1], v26, v[17:18]
 ; GFX8-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v19
 ; GFX8-NEXT:    v_cndmask_b32_e32 v18, 0, v23, vcc
 ; GFX8-NEXT:    v_cndmask_b32_e32 v23, 0, v24, vcc
@@ -7828,7 +7831,7 @@ define <2 x i128> @v_fshr_v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %a
 ; GFX8-NEXT:    v_sub_u32_e32 v2, vcc, 64, v22
 ; GFX8-NEXT:    v_lshrrev_b64 v[0:1], v22, v[8:9]
 ; GFX8-NEXT:    v_lshlrev_b64 v[2:3], v2, v[10:11]
-; GFX8-NEXT:    v_subrev_u32_e32 v24, vcc, 64, v22
+; GFX8-NEXT:    v_add_u32_e32 v24, vcc, v22, v25
 ; GFX8-NEXT:    v_or_b32_e32 v2, v0, v2
 ; GFX8-NEXT:    v_or_b32_e32 v3, v1, v3
 ; GFX8-NEXT:    v_lshrrev_b64 v[0:1], v24, v[10:11]
@@ -7852,7 +7855,7 @@ define <2 x i128> @v_fshr_v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %a
 ; GFX8-NEXT:    v_lshrrev_b64 v[4:5], v4, v[8:9]
 ; GFX8-NEXT:    v_lshlrev_b64 v[10:11], v18, v[6:7]
 ; GFX8-NEXT:    v_or_b32_e32 v2, v19, v2
-; GFX8-NEXT:    v_subrev_u32_e32 v19, vcc, 64, v18
+; GFX8-NEXT:    v_add_u32_e32 v19, vcc, v18, v25
 ; GFX8-NEXT:    v_lshlrev_b64 v[16:17], v18, v[8:9]
 ; GFX8-NEXT:    v_or_b32_e32 v10, v4, v10
 ; GFX8-NEXT:    v_or_b32_e32 v11, v5, v11
@@ -7869,7 +7872,7 @@ define <2 x i128> @v_fshr_v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %a
 ; GFX8-NEXT:    v_sub_u32_e32 v6, vcc, 64, v18
 ; GFX8-NEXT:    v_lshrrev_b64 v[4:5], v18, v[12:13]
 ; GFX8-NEXT:    v_lshlrev_b64 v[6:7], v6, v[14:15]
-; GFX8-NEXT:    v_subrev_u32_e32 v19, vcc, 64, v18
+; GFX8-NEXT:    v_add_u32_e32 v19, vcc, v18, v25
 ; GFX8-NEXT:    v_or_b32_e32 v6, v4, v6
 ; GFX8-NEXT:    v_or_b32_e32 v7, v5, v7
 ; GFX8-NEXT:    v_lshrrev_b64 v[4:5], v19, v[14:15]
@@ -7902,7 +7905,7 @@ define <2 x i128> @v_fshr_v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %a
 ; GFX9-NEXT:    v_sub_u32_e32 v0, 64, v19
 ; GFX9-NEXT:    v_lshrrev_b64 v[0:1], v0, v[17:18]
 ; GFX9-NEXT:    v_lshlrev_b64 v[21:22], v19, v[2:3]
-; GFX9-NEXT:    v_subrev_u32_e32 v25, 64, v19
+; GFX9-NEXT:    v_add_u32_e32 v25, 0xffffffc0, v19
 ; GFX9-NEXT:    v_lshlrev_b64 v[23:24], v19, v[17:18]
 ; GFX9-NEXT:    v_or_b32_e32 v21, v0, v21
 ; GFX9-NEXT:    v_or_b32_e32 v22, v1, v22
@@ -7919,7 +7922,7 @@ define <2 x i128> @v_fshr_v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %a
 ; GFX9-NEXT:    v_cndmask_b32_e32 v21, v1, v3, vcc
 ; GFX9-NEXT:    v_lshrrev_b64 v[0:1], v22, v[8:9]
 ; GFX9-NEXT:    v_lshlrev_b64 v[2:3], v2, v[10:11]
-; GFX9-NEXT:    v_subrev_u32_e32 v24, 64, v22
+; GFX9-NEXT:    v_add_u32_e32 v24, 0xffffffc0, v22
 ; GFX9-NEXT:    v_or_b32_e32 v2, v0, v2
 ; GFX9-NEXT:    v_or_b32_e32 v3, v1, v3
 ; GFX9-NEXT:    v_lshrrev_b64 v[0:1], v24, v[10:11]
@@ -7942,7 +7945,7 @@ define <2 x i128> @v_fshr_v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %a
 ; GFX9-NEXT:    v_lshrrev_b64 v[4:5], v4, v[8:9]
 ; GFX9-NEXT:    v_lshlrev_b64 v[10:11], v18, v[6:7]
 ; GFX9-NEXT:    v_or_b32_e32 v2, v19, v2
-; GFX9-NEXT:    v_subrev_u32_e32 v19, 64, v18
+; GFX9-NEXT:    v_add_u32_e32 v19, 0xffffffc0, v18
 ; GFX9-NEXT:    v_cndmask_b32_e32 v3, 0, v17, vcc
 ; GFX9-NEXT:    v_lshlrev_b64 v[16:17], v18, v[8:9]
 ; GFX9-NEXT:    v_or_b32_e32 v10, v4, v10
@@ -7960,7 +7963,7 @@ define <2 x i128> @v_fshr_v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %a
 ; GFX9-NEXT:    v_cndmask_b32_e32 v11, v5, v7, vcc
 ; GFX9-NEXT:    v_lshrrev_b64 v[4:5], v18, v[12:13]
 ; GFX9-NEXT:    v_lshlrev_b64 v[6:7], v6, v[14:15]
-; GFX9-NEXT:    v_subrev_u32_e32 v19, 64, v18
+; GFX9-NEXT:    v_add_u32_e32 v19, 0xffffffc0, v18
 ; GFX9-NEXT:    v_or_b32_e32 v6, v4, v6
 ; GFX9-NEXT:    v_or_b32_e32 v7, v5, v7
 ; GFX9-NEXT:    v_lshrrev_b64 v[4:5], v19, v[14:15]
@@ -7991,11 +7994,11 @@ define <2 x i128> @v_fshr_v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %a
 ; GFX10-NEXT:    v_and_b32_e32 v25, 0x7f, v17
 ; GFX10-NEXT:    v_lshrrev_b32_e32 v17, 31, v1
 ; GFX10-NEXT:    v_lshlrev_b64 v[0:1], 1, v[0:1]
-; GFX10-NEXT:    v_subrev_nc_u32_e32 v27, 64, v26
+; GFX10-NEXT:    v_add_nc_u32_e32 v27, 0xffffffc0, v26
 ; GFX10-NEXT:    v_cmp_gt_u32_e64 s4, 64, v26
 ; GFX10-NEXT:    v_sub_nc_u32_e32 v18, 64, v25
 ; GFX10-NEXT:    v_or_b32_e32 v2, v2, v17
-; GFX10-NEXT:    v_subrev_nc_u32_e32 v19, 64, v25
+; GFX10-NEXT:    v_add_nc_u32_e32 v19, 0xffffffc0, v25
 ; GFX10-NEXT:    v_lshlrev_b64 v[23:24], v25, v[0:1]
 ; GFX10-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 64, v25
 ; GFX10-NEXT:    v_lshrrev_b64 v[17:18], v18, v[0:1]
@@ -8035,12 +8038,12 @@ define <2 x i128> @v_fshr_v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %a
 ; GFX10-NEXT:    v_lshrrev_b64 v[8:9], v9, v[4:5]
 ; GFX10-NEXT:    v_lshlrev_b64 v[10:11], v25, v[6:7]
 ; GFX10-NEXT:    v_sub_nc_u32_e32 v20, 64, v23
-; GFX10-NEXT:    v_subrev_nc_u32_e32 v3, 64, v25
+; GFX10-NEXT:    v_add_nc_u32_e32 v3, 0xffffffc0, v25
 ; GFX10-NEXT:    v_or_b32_e32 v2, v18, v2
 ; GFX10-NEXT:    v_lshlrev_b64 v[16:17], v25, v[4:5]
 ; GFX10-NEXT:    v_lshrrev_b64 v[18:19], v23, v[12:13]
 ; GFX10-NEXT:    v_or_b32_e32 v10, v8, v10
-; GFX10-NEXT:    v_subrev_nc_u32_e32 v8, 64, v23
+; GFX10-NEXT:    v_add_nc_u32_e32 v8, 0xffffffc0, v23
 ; GFX10-NEXT:    v_lshlrev_b64 v[20:21], v20, v[14:15]
 ; GFX10-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 64, v25
 ; GFX10-NEXT:    v_lshlrev_b64 v[3:4], v3, v[4:5]
@@ -8091,41 +8094,41 @@ define <2 x i128> @v_fshr_v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %a
 ; GFX11-NEXT:    v_cndmask_b32_e32 v24, 0, v24, vcc_lo
 ; GFX11-NEXT:    v_sub_nc_u32_e32 v18, 64, v25
 ; GFX11-NEXT:    v_lshlrev_b64 v[21:22], v25, v[2:3]
-; GFX11-NEXT:    v_subrev_nc_u32_e32 v19, 64, v25
-; GFX11-NEXT:    v_subrev_nc_u32_e32 v27, 64, v26
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
 ; GFX11-NEXT:    v_cmp_gt_u32_e64 s0, 64, v26
 ; GFX11-NEXT:    v_lshrrev_b64 v[17:18], v18, v[0:1]
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NEXT:    v_lshlrev_b64 v[0:1], v19, v[0:1]
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_3)
 ; GFX11-NEXT:    v_or_b32_e32 v22, v18, v22
-; GFX11-NEXT:    v_sub_nc_u32_e32 v18, 64, v26
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4)
+; GFX11-NEXT:    v_add_nc_u32_e32 v19, 0xffffffc0, v25
 ; GFX11-NEXT:    v_or_b32_e32 v21, v17, v21
+; GFX11-NEXT:    v_sub_nc_u32_e32 v18, 64, v26
 ; GFX11-NEXT:    v_lshrrev_b64 v[16:17], v26, v[8:9]
-; GFX11-NEXT:    v_cndmask_b32_e32 v22, v1, v22, vcc_lo
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11-NEXT:    v_lshlrev_b64 v[0:1], v19, v[0:1]
 ; GFX11-NEXT:    v_lshlrev_b64 v[18:19], v18, v[10:11]
-; GFX11-NEXT:    v_cndmask_b32_e32 v21, v0, v21, vcc_lo
-; GFX11-NEXT:    v_lshrrev_b64 v[0:1], v27, v[10:11]
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_4)
+; GFX11-NEXT:    v_dual_cndmask_b32 v22, v1, v22 :: v_dual_cndmask_b32 v21, v0, v21
 ; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v25
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-NEXT:    v_add_nc_u32_e32 v27, 0xffffffc0, v26
 ; GFX11-NEXT:    v_or_b32_e32 v16, v16, v18
 ; GFX11-NEXT:    v_or_b32_e32 v17, v17, v19
+; GFX11-NEXT:    v_cndmask_b32_e32 v22, v22, v3, vcc_lo
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT:    v_lshrrev_b64 v[0:1], v27, v[10:11]
 ; GFX11-NEXT:    v_cndmask_b32_e64 v0, v0, v16, s0
 ; GFX11-NEXT:    v_not_b32_e32 v16, v20
 ; GFX11-NEXT:    v_cndmask_b32_e32 v18, v21, v2, vcc_lo
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4)
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v1, v17, s0
-; GFX11-NEXT:    v_cndmask_b32_e32 v22, v22, v3, vcc_lo
 ; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v26
-; GFX11-NEXT:    v_and_b32_e32 v25, 0x7f, v16
+; GFX11-NEXT:    v_cndmask_b32_e64 v1, v1, v17, s0
 ; GFX11-NEXT:    v_lshrrev_b64 v[2:3], v26, v[10:11]
-; GFX11-NEXT:    v_dual_cndmask_b32 v1, v1, v9 :: v_dual_cndmask_b32 v0, v0, v8
+; GFX11-NEXT:    v_and_b32_e32 v25, 0x7f, v16
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_4)
+; GFX11-NEXT:    v_dual_cndmask_b32 v0, v0, v8 :: v_dual_cndmask_b32 v1, v1, v9
 ; GFX11-NEXT:    v_lshrrev_b32_e32 v8, 31, v5
 ; GFX11-NEXT:    v_lshlrev_b64 v[4:5], 1, v[4:5]
 ; GFX11-NEXT:    v_sub_nc_u32_e32 v9, 64, v25
 ; GFX11-NEXT:    v_cndmask_b32_e64 v26, 0, v3, s0
-; GFX11-NEXT:    v_subrev_nc_u32_e32 v3, 64, v25
+; GFX11-NEXT:    v_add_nc_u32_e32 v3, 0xffffffc0, v25
 ; GFX11-NEXT:    v_or_b32_e32 v6, v6, v8
 ; GFX11-NEXT:    v_or_b32_e32 v0, v23, v0
 ; GFX11-NEXT:    v_lshrrev_b64 v[8:9], v9, v[4:5]
@@ -8143,7 +8146,7 @@ define <2 x i128> @v_fshr_v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %a
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
 ; GFX11-NEXT:    v_dual_cndmask_b32 v11, 0, v16 :: v_dual_cndmask_b32 v10, v3, v10
 ; GFX11-NEXT:    v_sub_nc_u32_e32 v20, 64, v23
-; GFX11-NEXT:    v_subrev_nc_u32_e32 v8, 64, v23
+; GFX11-NEXT:    v_add_nc_u32_e32 v8, 0xffffffc0, v23
 ; GFX11-NEXT:    v_lshrrev_b64 v[18:19], v23, v[12:13]
 ; GFX11-NEXT:    v_cmp_gt_u32_e64 s0, 64, v23
 ; GFX11-NEXT:    v_cndmask_b32_e32 v5, v4, v5, vcc_lo
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/lshr.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/lshr.ll
index 5dd4fa0809131f..cc185aff9eff22 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/lshr.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/lshr.ll
@@ -1577,7 +1577,7 @@ define i65 @v_lshr_i65(i65 %value, i65 %amount) {
 ; GFX6-NEXT:    v_and_b32_e32 v4, 1, v2
 ; GFX6-NEXT:    v_mov_b32_e32 v5, 0
 ; GFX6-NEXT:    v_sub_i32_e32 v8, vcc, 64, v3
-; GFX6-NEXT:    v_subrev_i32_e32 v2, vcc, 64, v3
+; GFX6-NEXT:    v_add_i32_e32 v2, vcc, 0xffffffc0, v3
 ; GFX6-NEXT:    v_lshr_b64 v[6:7], v[0:1], v3
 ; GFX6-NEXT:    v_lshl_b64 v[8:9], v[4:5], v8
 ; GFX6-NEXT:    v_lshr_b64 v[10:11], v[4:5], v3
@@ -1599,7 +1599,7 @@ define i65 @v_lshr_i65(i65 %value, i65 %amount) {
 ; GFX8-NEXT:    v_and_b32_e32 v4, 1, v2
 ; GFX8-NEXT:    v_mov_b32_e32 v5, 0
 ; GFX8-NEXT:    v_sub_u32_e32 v8, vcc, 64, v3
-; GFX8-NEXT:    v_subrev_u32_e32 v2, vcc, 64, v3
+; GFX8-NEXT:    v_add_u32_e32 v2, vcc, 0xffffffc0, v3
 ; GFX8-NEXT:    v_lshrrev_b64 v[6:7], v3, v[0:1]
 ; GFX8-NEXT:    v_lshlrev_b64 v[8:9], v8, v[4:5]
 ; GFX8-NEXT:    v_lshrrev_b64 v[10:11], v3, v[4:5]
@@ -1621,7 +1621,7 @@ define i65 @v_lshr_i65(i65 %value, i65 %amount) {
 ; GFX9-NEXT:    v_and_b32_e32 v4, 1, v2
 ; GFX9-NEXT:    v_mov_b32_e32 v5, 0
 ; GFX9-NEXT:    v_sub_u32_e32 v8, 64, v3
-; GFX9-NEXT:    v_subrev_u32_e32 v2, 64, v3
+; GFX9-NEXT:    v_add_u32_e32 v2, 0xffffffc0, v3
 ; GFX9-NEXT:    v_lshrrev_b64 v[6:7], v3, v[0:1]
 ; GFX9-NEXT:    v_lshlrev_b64 v[8:9], v8, v[4:5]
 ; GFX9-NEXT:    v_lshrrev_b64 v[10:11], v3, v[4:5]
@@ -1643,7 +1643,7 @@ define i65 @v_lshr_i65(i65 %value, i65 %amount) {
 ; GFX10-NEXT:    v_mov_b32_e32 v5, 0
 ; GFX10-NEXT:    v_and_b32_e32 v4, 1, v2
 ; GFX10-NEXT:    v_sub_nc_u32_e32 v2, 64, v3
-; GFX10-NEXT:    v_subrev_nc_u32_e32 v10, 64, v3
+; GFX10-NEXT:    v_add_nc_u32_e32 v10, 0xffffffc0, v3
 ; GFX10-NEXT:    v_lshrrev_b64 v[6:7], v3, v[0:1]
 ; GFX10-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 64, v3
 ; GFX10-NEXT:    v_cmp_eq_u32_e64 s4, 0, v3
@@ -1664,20 +1664,20 @@ define i65 @v_lshr_i65(i65 %value, i65 %amount) {
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-NEXT:    v_dual_mov_b32 v5, 0 :: v_dual_and_b32 v4, 1, v2
 ; GFX11-NEXT:    v_sub_nc_u32_e32 v2, 64, v3
-; GFX11-NEXT:    v_subrev_nc_u32_e32 v10, 64, v3
 ; GFX11-NEXT:    v_lshrrev_b64 v[6:7], v3, v[0:1]
 ; GFX11-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 64, v3
 ; GFX11-NEXT:    v_cmp_eq_u32_e64 s0, 0, v3
 ; GFX11-NEXT:    v_lshlrev_b64 v[8:9], v2, v[4:5]
-; GFX11-NEXT:    v_lshrrev_b64 v[10:11], v10, v[4:5]
-; GFX11-NEXT:    v_lshrrev_b64 v[4:5], v3, v[4:5]
 ; GFX11-NEXT:    v_or_b32_e32 v2, v6, v8
 ; GFX11-NEXT:    v_or_b32_e32 v6, v7, v9
-; GFX11-NEXT:    v_cndmask_b32_e32 v2, v10, v2, vcc_lo
+; GFX11-NEXT:    v_add_nc_u32_e32 v10, 0xffffffc0, v3
+; GFX11-NEXT:    v_lshrrev_b64 v[10:11], v10, v[4:5]
+; GFX11-NEXT:    v_lshrrev_b64 v[4:5], v3, v[4:5]
 ; GFX11-NEXT:    v_cndmask_b32_e32 v5, v11, v6, vcc_lo
+; GFX11-NEXT:    v_cndmask_b32_e32 v2, v10, v2, vcc_lo
+; GFX11-NEXT:    v_cndmask_b32_e64 v1, v5, v1, s0
 ; GFX11-NEXT:    v_cndmask_b32_e64 v0, v2, v0, s0
 ; GFX11-NEXT:    v_cndmask_b32_e32 v2, 0, v4, vcc_lo
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v5, v1, s0
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
   %result = lshr i65 %value, %amount
   ret i65 %result
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i32.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i32.ll
index 2c2f8e914447d1..88eb0e4b848c95 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i32.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i32.ll
@@ -325,7 +325,7 @@ define i32 @v_sdiv_i32_oddk_denom(i32 %num) {
 ; CHECK-NEXT:    v_sub_i32_e32 v0, vcc, v0, v3
 ; CHECK-NEXT:    v_cmp_ge_u32_e64 s[4:5], v0, v4
 ; CHECK-NEXT:    v_cndmask_b32_e64 v2, v2, v5, s[4:5]
-; CHECK-NEXT:    v_subrev_i32_e32 v3, vcc, 0x12d8fb, v0
+; CHECK-NEXT:    v_add_i32_e32 v3, vcc, 0xffed2705, v0
 ; CHECK-NEXT:    v_cndmask_b32_e64 v0, v0, v3, s[4:5]
 ; CHECK-NEXT:    v_add_i32_e32 v3, vcc, 1, v2
 ; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v4
@@ -353,29 +353,29 @@ define <2 x i32> @v_sdiv_v2i32_oddk_denom(<2 x i32> %num) {
 ; GISEL-NEXT:    v_mul_f32_e32 v4, 0x4f7ffffe, v4
 ; GISEL-NEXT:    v_xor_b32_e32 v1, v1, v6
 ; GISEL-NEXT:    v_cvt_u32_f32_e32 v4, v4
-; GISEL-NEXT:    v_mul_lo_u32 v5, v4, v5
-; GISEL-NEXT:    v_mul_hi_u32 v5, v4, v5
-; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v5
-; GISEL-NEXT:    v_mul_hi_u32 v5, v0, v4
+; GISEL-NEXT:    v_mul_lo_u32 v7, v4, v5
+; GISEL-NEXT:    v_mul_hi_u32 v7, v4, v7
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v7
+; GISEL-NEXT:    v_mul_hi_u32 v7, v0, v4
 ; GISEL-NEXT:    v_mul_hi_u32 v4, v1, v4
-; GISEL-NEXT:    v_mul_lo_u32 v7, v5, v3
-; GISEL-NEXT:    v_add_i32_e32 v8, vcc, 1, v5
-; GISEL-NEXT:    v_mul_lo_u32 v9, v4, v3
-; GISEL-NEXT:    v_add_i32_e32 v10, vcc, 1, v4
-; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v7
-; GISEL-NEXT:    v_sub_i32_e32 v1, vcc, v1, v9
+; GISEL-NEXT:    v_mul_lo_u32 v8, v7, v3
+; GISEL-NEXT:    v_add_i32_e32 v9, vcc, 1, v7
+; GISEL-NEXT:    v_mul_lo_u32 v10, v4, v3
+; GISEL-NEXT:    v_add_i32_e32 v11, vcc, 1, v4
+; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v8
+; GISEL-NEXT:    v_sub_i32_e32 v1, vcc, v1, v10
 ; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v0, v3
-; GISEL-NEXT:    v_cndmask_b32_e64 v5, v5, v8, s[4:5]
-; GISEL-NEXT:    v_sub_i32_e32 v7, vcc, v0, v3
+; GISEL-NEXT:    v_cndmask_b32_e64 v7, v7, v9, s[4:5]
+; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v0, v5
 ; GISEL-NEXT:    v_cmp_ge_u32_e64 s[6:7], v1, v3
-; GISEL-NEXT:    v_cndmask_b32_e64 v4, v4, v10, s[6:7]
-; GISEL-NEXT:    v_subrev_i32_e32 v8, vcc, 0x12d8fb, v1
-; GISEL-NEXT:    v_cndmask_b32_e64 v0, v0, v7, s[4:5]
-; GISEL-NEXT:    v_add_i32_e32 v7, vcc, 1, v5
+; GISEL-NEXT:    v_cndmask_b32_e64 v4, v4, v11, s[6:7]
+; GISEL-NEXT:    v_add_i32_e32 v8, vcc, 0xffed2705, v1
+; GISEL-NEXT:    v_cndmask_b32_e64 v0, v0, v5, s[4:5]
+; GISEL-NEXT:    v_add_i32_e32 v5, vcc, 1, v7
 ; GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, v8, s[6:7]
 ; GISEL-NEXT:    v_add_i32_e32 v8, vcc, 1, v4
 ; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v3
-; GISEL-NEXT:    v_cndmask_b32_e32 v0, v5, v7, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v0, v7, v5, vcc
 ; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v3
 ; GISEL-NEXT:    v_cndmask_b32_e32 v1, v4, v8, vcc
 ; GISEL-NEXT:    v_xor_b32_e32 v0, v0, v2
@@ -398,29 +398,29 @@ define <2 x i32> @v_sdiv_v2i32_oddk_denom(<2 x i32> %num) {
 ; CGP-NEXT:    v_xor_b32_e32 v0, v0, v2
 ; CGP-NEXT:    v_cvt_u32_f32_e32 v3, v3
 ; CGP-NEXT:    v_xor_b32_e32 v1, v1, v6
-; CGP-NEXT:    v_mul_lo_u32 v4, v3, v4
-; CGP-NEXT:    v_mul_hi_u32 v4, v3, v4
-; CGP-NEXT:    v_add_i32_e32 v3, vcc, v3, v4
-; CGP-NEXT:    v_mul_hi_u32 v4, v0, v3
+; CGP-NEXT:    v_mul_lo_u32 v7, v3, v4
+; CGP-NEXT:    v_mul_hi_u32 v7, v3, v7
+; CGP-NEXT:    v_add_i32_e32 v3, vcc, v3, v7
+; CGP-NEXT:    v_mul_hi_u32 v7, v0, v3
 ; CGP-NEXT:    v_mul_hi_u32 v3, v1, v3
-; CGP-NEXT:    v_mul_lo_u32 v7, v4, v5
-; CGP-NEXT:    v_add_i32_e32 v8, vcc, 1, v4
-; CGP-NEXT:    v_mul_lo_u32 v9, v3, v5
-; CGP-NEXT:    v_add_i32_e32 v10, vcc, 1, v3
-; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v0, v7
-; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v1, v9
+; CGP-NEXT:    v_mul_lo_u32 v8, v7, v5
+; CGP-NEXT:    v_add_i32_e32 v9, vcc, 1, v7
+; CGP-NEXT:    v_mul_lo_u32 v10, v3, v5
+; CGP-NEXT:    v_add_i32_e32 v11, vcc, 1, v3
+; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v0, v8
+; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v1, v10
 ; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v0, v5
-; CGP-NEXT:    v_cndmask_b32_e64 v4, v4, v8, s[4:5]
-; CGP-NEXT:    v_sub_i32_e32 v7, vcc, v0, v5
+; CGP-NEXT:    v_cndmask_b32_e64 v7, v7, v9, s[4:5]
+; CGP-NEXT:    v_add_i32_e32 v4, vcc, v0, v4
 ; CGP-NEXT:    v_cmp_ge_u32_e64 s[6:7], v1, v5
-; CGP-NEXT:    v_cndmask_b32_e64 v3, v3, v10, s[6:7]
-; CGP-NEXT:    v_subrev_i32_e32 v8, vcc, 0x12d8fb, v1
-; CGP-NEXT:    v_cndmask_b32_e64 v0, v0, v7, s[4:5]
-; CGP-NEXT:    v_add_i32_e32 v7, vcc, 1, v4
+; CGP-NEXT:    v_cndmask_b32_e64 v3, v3, v11, s[6:7]
+; CGP-NEXT:    v_add_i32_e32 v8, vcc, 0xffed2705, v1
+; CGP-NEXT:    v_cndmask_b32_e64 v0, v0, v4, s[4:5]
+; CGP-NEXT:    v_add_i32_e32 v4, vcc, 1, v7
 ; CGP-NEXT:    v_cndmask_b32_e64 v1, v1, v8, s[6:7]
 ; CGP-NEXT:    v_add_i32_e32 v8, vcc, 1, v3
 ; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v5
-; CGP-NEXT:    v_cndmask_b32_e32 v0, v4, v7, vcc
+; CGP-NEXT:    v_cndmask_b32_e32 v0, v7, v4, vcc
 ; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v5
 ; CGP-NEXT:    v_cndmask_b32_e32 v1, v3, v8, vcc
 ; CGP-NEXT:    v_xor_b32_e32 v0, v0, v2
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/shl.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/shl.ll
index 4cf1c92539c36f..b12e915c7d21b1 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/shl.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/shl.ll
@@ -1583,7 +1583,7 @@ define i65 @v_shl_i65(i65 %value, i65 %amount) {
 ; GFX6-NEXT:    v_sub_i32_e32 v4, vcc, 64, v3
 ; GFX6-NEXT:    v_lshr_b64 v[4:5], v[0:1], v4
 ; GFX6-NEXT:    v_lshl_b64 v[5:6], v[2:3], v3
-; GFX6-NEXT:    v_subrev_i32_e32 v8, vcc, 64, v3
+; GFX6-NEXT:    v_add_i32_e32 v8, vcc, 0xffffffc0, v3
 ; GFX6-NEXT:    v_lshl_b64 v[6:7], v[0:1], v3
 ; GFX6-NEXT:    v_or_b32_e32 v9, v4, v5
 ; GFX6-NEXT:    v_lshl_b64 v[4:5], v[0:1], v8
@@ -1601,7 +1601,7 @@ define i65 @v_shl_i65(i65 %value, i65 %amount) {
 ; GFX8-NEXT:    v_sub_u32_e32 v4, vcc, 64, v3
 ; GFX8-NEXT:    v_lshrrev_b64 v[4:5], v4, v[0:1]
 ; GFX8-NEXT:    v_lshlrev_b64 v[5:6], v3, v[2:3]
-; GFX8-NEXT:    v_subrev_u32_e32 v8, vcc, 64, v3
+; GFX8-NEXT:    v_add_u32_e32 v8, vcc, 0xffffffc0, v3
 ; GFX8-NEXT:    v_lshlrev_b64 v[6:7], v3, v[0:1]
 ; GFX8-NEXT:    v_or_b32_e32 v9, v4, v5
 ; GFX8-NEXT:    v_lshlrev_b64 v[4:5], v8, v[0:1]
@@ -1619,7 +1619,7 @@ define i65 @v_shl_i65(i65 %value, i65 %amount) {
 ; GFX9-NEXT:    v_sub_u32_e32 v4, 64, v3
 ; GFX9-NEXT:    v_lshrrev_b64 v[4:5], v4, v[0:1]
 ; GFX9-NEXT:    v_lshlrev_b64 v[5:6], v3, v[2:3]
-; GFX9-NEXT:    v_subrev_u32_e32 v8, 64, v3
+; GFX9-NEXT:    v_add_u32_e32 v8, 0xffffffc0, v3
 ; GFX9-NEXT:    v_lshlrev_b64 v[6:7], v3, v[0:1]
 ; GFX9-NEXT:    v_or_b32_e32 v9, v4, v5
 ; GFX9-NEXT:    v_lshlrev_b64 v[4:5], v8, v[0:1]
@@ -1636,7 +1636,7 @@ define i65 @v_shl_i65(i65 %value, i65 %amount) {
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-NEXT:    v_sub_nc_u32_e32 v6, 64, v3
 ; GFX10-NEXT:    v_lshlrev_b64 v[4:5], v3, v[2:3]
-; GFX10-NEXT:    v_subrev_nc_u32_e32 v8, 64, v3
+; GFX10-NEXT:    v_add_nc_u32_e32 v8, 0xffffffc0, v3
 ; GFX10-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 64, v3
 ; GFX10-NEXT:    v_lshrrev_b64 v[5:6], v6, v[0:1]
 ; GFX10-NEXT:    v_lshlrev_b64 v[6:7], v3, v[0:1]
@@ -1654,7 +1654,7 @@ define i65 @v_shl_i65(i65 %value, i65 %amount) {
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-NEXT:    v_sub_nc_u32_e32 v6, 64, v3
 ; GFX11-NEXT:    v_lshlrev_b64 v[4:5], v3, v[2:3]
-; GFX11-NEXT:    v_subrev_nc_u32_e32 v8, 64, v3
+; GFX11-NEXT:    v_add_nc_u32_e32 v8, 0xffffffc0, v3
 ; GFX11-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 64, v3
 ; GFX11-NEXT:    v_lshrrev_b64 v[5:6], v6, v[0:1]
 ; GFX11-NEXT:    v_lshlrev_b64 v[6:7], v3, v[0:1]
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i32.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i32.ll
index 1bb606f36e48d2..2b12e4b973acb2 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i32.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i32.ll
@@ -268,10 +268,10 @@ define i32 @v_srem_i32_pow2k_denom(i32 %num) {
 ; CHECK-NEXT:    v_mul_hi_u32 v2, v0, v2
 ; CHECK-NEXT:    v_lshlrev_b32_e32 v2, 12, v2
 ; CHECK-NEXT:    v_sub_i32_e32 v0, vcc, v0, v2
-; CHECK-NEXT:    v_subrev_i32_e32 v2, vcc, 0x1000, v0
+; CHECK-NEXT:    v_add_i32_e32 v2, vcc, 0xfffff000, v0
 ; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v4
 ; CHECK-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
-; CHECK-NEXT:    v_subrev_i32_e32 v2, vcc, 0x1000, v0
+; CHECK-NEXT:    v_add_i32_e32 v2, vcc, 0xfffff000, v0
 ; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v4
 ; CHECK-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
 ; CHECK-NEXT:    v_xor_b32_e32 v0, v0, v1
@@ -297,23 +297,23 @@ define <2 x i32> @v_srem_v2i32_pow2k_denom(<2 x i32> %num) {
 ; GISEL-NEXT:    v_mul_f32_e32 v4, 0x4f7ffffe, v4
 ; GISEL-NEXT:    v_xor_b32_e32 v1, v1, v6
 ; GISEL-NEXT:    v_cvt_u32_f32_e32 v4, v4
-; GISEL-NEXT:    v_mul_lo_u32 v5, v4, v5
-; GISEL-NEXT:    v_mul_hi_u32 v5, v4, v5
-; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v5
-; GISEL-NEXT:    v_mul_hi_u32 v5, v0, v4
+; GISEL-NEXT:    v_mul_lo_u32 v7, v4, v5
+; GISEL-NEXT:    v_mul_hi_u32 v7, v4, v7
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v7
+; GISEL-NEXT:    v_mul_hi_u32 v7, v0, v4
 ; GISEL-NEXT:    v_mul_hi_u32 v4, v1, v4
-; GISEL-NEXT:    v_lshlrev_b32_e32 v5, 12, v5
+; GISEL-NEXT:    v_lshlrev_b32_e32 v7, 12, v7
 ; GISEL-NEXT:    v_lshlrev_b32_e32 v4, 12, v4
-; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v5
+; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v7
 ; GISEL-NEXT:    v_sub_i32_e32 v1, vcc, v1, v4
-; GISEL-NEXT:    v_sub_i32_e32 v4, vcc, v0, v3
-; GISEL-NEXT:    v_subrev_i32_e32 v5, vcc, 0x1000, v1
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v0, v5
+; GISEL-NEXT:    v_add_i32_e32 v7, vcc, 0xfffff000, v1
 ; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v3
 ; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
 ; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v3
-; GISEL-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
-; GISEL-NEXT:    v_sub_i32_e32 v4, vcc, v0, v3
-; GISEL-NEXT:    v_subrev_i32_e32 v5, vcc, 0x1000, v1
+; GISEL-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v0, v5
+; GISEL-NEXT:    v_add_i32_e32 v5, vcc, 0xfffff000, v1
 ; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v3
 ; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
 ; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v3
@@ -338,23 +338,23 @@ define <2 x i32> @v_srem_v2i32_pow2k_denom(<2 x i32> %num) {
 ; CGP-NEXT:    v_xor_b32_e32 v0, v0, v2
 ; CGP-NEXT:    v_cvt_u32_f32_e32 v3, v3
 ; CGP-NEXT:    v_xor_b32_e32 v1, v1, v6
-; CGP-NEXT:    v_mul_lo_u32 v4, v3, v4
-; CGP-NEXT:    v_mul_hi_u32 v4, v3, v4
-; CGP-NEXT:    v_add_i32_e32 v3, vcc, v3, v4
-; CGP-NEXT:    v_mul_hi_u32 v4, v0, v3
+; CGP-NEXT:    v_mul_lo_u32 v7, v3, v4
+; CGP-NEXT:    v_mul_hi_u32 v7, v3, v7
+; CGP-NEXT:    v_add_i32_e32 v3, vcc, v3, v7
+; CGP-NEXT:    v_mul_hi_u32 v7, v0, v3
 ; CGP-NEXT:    v_mul_hi_u32 v3, v1, v3
-; CGP-NEXT:    v_lshlrev_b32_e32 v4, 12, v4
+; CGP-NEXT:    v_lshlrev_b32_e32 v7, 12, v7
 ; CGP-NEXT:    v_lshlrev_b32_e32 v3, 12, v3
-; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v0, v4
+; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v0, v7
 ; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v1, v3
-; CGP-NEXT:    v_sub_i32_e32 v3, vcc, v0, v5
-; CGP-NEXT:    v_subrev_i32_e32 v4, vcc, 0x1000, v1
+; CGP-NEXT:    v_add_i32_e32 v3, vcc, v0, v4
+; CGP-NEXT:    v_add_i32_e32 v7, vcc, 0xfffff000, v1
 ; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v5
 ; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
 ; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v5
-; CGP-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc
-; CGP-NEXT:    v_sub_i32_e32 v3, vcc, v0, v5
-; CGP-NEXT:    v_subrev_i32_e32 v4, vcc, 0x1000, v1
+; CGP-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
+; CGP-NEXT:    v_add_i32_e32 v3, vcc, v0, v4
+; CGP-NEXT:    v_add_i32_e32 v4, vcc, 0xfffff000, v1
 ; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v5
 ; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
 ; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v5
@@ -386,10 +386,10 @@ define i32 @v_srem_i32_oddk_denom(i32 %num) {
 ; CHECK-NEXT:    v_mul_hi_u32 v2, v0, v2
 ; CHECK-NEXT:    v_mul_lo_u32 v2, v2, v4
 ; CHECK-NEXT:    v_sub_i32_e32 v0, vcc, v0, v2
-; CHECK-NEXT:    v_subrev_i32_e32 v2, vcc, 0x12d8fb, v0
+; CHECK-NEXT:    v_add_i32_e32 v2, vcc, 0xffed2705, v0
 ; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v4
 ; CHECK-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
-; CHECK-NEXT:    v_subrev_i32_e32 v2, vcc, 0x12d8fb, v0
+; CHECK-NEXT:    v_add_i32_e32 v2, vcc, 0xffed2705, v0
 ; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v4
 ; CHECK-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
 ; CHECK-NEXT:    v_xor_b32_e32 v0, v0, v1
@@ -415,23 +415,23 @@ define <2 x i32> @v_srem_v2i32_oddk_denom(<2 x i32> %num) {
 ; GISEL-NEXT:    v_mul_f32_e32 v4, 0x4f7ffffe, v4
 ; GISEL-NEXT:    v_xor_b32_e32 v1, v1, v6
 ; GISEL-NEXT:    v_cvt_u32_f32_e32 v4, v4
-; GISEL-NEXT:    v_mul_lo_u32 v5, v4, v5
-; GISEL-NEXT:    v_mul_hi_u32 v5, v4, v5
-; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v5
-; GISEL-NEXT:    v_mul_hi_u32 v5, v0, v4
+; GISEL-NEXT:    v_mul_lo_u32 v7, v4, v5
+; GISEL-NEXT:    v_mul_hi_u32 v7, v4, v7
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v7
+; GISEL-NEXT:    v_mul_hi_u32 v7, v0, v4
 ; GISEL-NEXT:    v_mul_hi_u32 v4, v1, v4
-; GISEL-NEXT:    v_mul_lo_u32 v5, v5, v3
+; GISEL-NEXT:    v_mul_lo_u32 v7, v7, v3
 ; GISEL-NEXT:    v_mul_lo_u32 v4, v4, v3
-; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v5
+; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v7
 ; GISEL-NEXT:    v_sub_i32_e32 v1, vcc, v1, v4
-; GISEL-NEXT:    v_sub_i32_e32 v4, vcc, v0, v3
-; GISEL-NEXT:    v_subrev_i32_e32 v5, vcc, 0x12d8fb, v1
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v0, v5
+; GISEL-NEXT:    v_add_i32_e32 v7, vcc, 0xffed2705, v1
 ; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v3
 ; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
 ; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v3
-; GISEL-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
-; GISEL-NEXT:    v_sub_i32_e32 v4, vcc, v0, v3
-; GISEL-NEXT:    v_subrev_i32_e32 v5, vcc, 0x12d8fb, v1
+; GISEL-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v0, v5
+; GISEL-NEXT:    v_add_i32_e32 v5, vcc, 0xffed2705, v1
 ; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v3
 ; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
 ; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v3
@@ -456,23 +456,23 @@ define <2 x i32> @v_srem_v2i32_oddk_denom(<2 x i32> %num) {
 ; CGP-NEXT:    v_xor_b32_e32 v0, v0, v2
 ; CGP-NEXT:    v_cvt_u32_f32_e32 v3, v3
 ; CGP-NEXT:    v_xor_b32_e32 v1, v1, v6
-; CGP-NEXT:    v_mul_lo_u32 v4, v3, v4
-; CGP-NEXT:    v_mul_hi_u32 v4, v3, v4
-; CGP-NEXT:    v_add_i32_e32 v3, vcc, v3, v4
-; CGP-NEXT:    v_mul_hi_u32 v4, v0, v3
+; CGP-NEXT:    v_mul_lo_u32 v7, v3, v4
+; CGP-NEXT:    v_mul_hi_u32 v7, v3, v7
+; CGP-NEXT:    v_add_i32_e32 v3, vcc, v3, v7
+; CGP-NEXT:    v_mul_hi_u32 v7, v0, v3
 ; CGP-NEXT:    v_mul_hi_u32 v3, v1, v3
-; CGP-NEXT:    v_mul_lo_u32 v4, v4, v5
+; CGP-NEXT:    v_mul_lo_u32 v7, v7, v5
 ; CGP-NEXT:    v_mul_lo_u32 v3, v3, v5
-; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v0, v4
+; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v0, v7
 ; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v1, v3
-; CGP-NEXT:    v_sub_i32_e32 v3, vcc, v0, v5
-; CGP-NEXT:    v_subrev_i32_e32 v4, vcc, 0x12d8fb, v1
+; CGP-NEXT:    v_add_i32_e32 v3, vcc, v0, v4
+; CGP-NEXT:    v_add_i32_e32 v7, vcc, 0xffed2705, v1
 ; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v5
 ; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
 ; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v5
-; CGP-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc
-; CGP-NEXT:    v_sub_i32_e32 v3, vcc, v0, v5
-; CGP-NEXT:    v_subrev_i32_e32 v4, vcc, 0x12d8fb, v1
+; CGP-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
+; CGP-NEXT:    v_add_i32_e32 v3, vcc, v0, v4
+; CGP-NEXT:    v_add_i32_e32 v4, vcc, 0xffed2705, v1
 ; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v5
 ; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
 ; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v5
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll
index 2572f8581f0edf..7214f4ab581d5d 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll
@@ -12,9 +12,9 @@ define i7 @v_ssubsat_i7(i7 %lhs, i7 %rhs) {
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v0, 25, v0
 ; GFX6-NEXT:    v_max_i32_e32 v2, -1, v0
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 25, v1
-; GFX6-NEXT:    v_subrev_i32_e32 v2, vcc, 0x7fffffff, v2
+; GFX6-NEXT:    v_add_i32_e32 v2, vcc, 0x80000001, v2
 ; GFX6-NEXT:    v_min_i32_e32 v3, -1, v0
-; GFX6-NEXT:    v_subrev_i32_e32 v3, vcc, 0x80000000, v3
+; GFX6-NEXT:    v_add_i32_e32 v3, vcc, 0x80000000, v3
 ; GFX6-NEXT:    v_max_i32_e32 v1, v2, v1
 ; GFX6-NEXT:    v_min_i32_e32 v1, v1, v3
 ; GFX6-NEXT:    v_sub_i32_e32 v0, vcc, v0, v1
@@ -27,9 +27,9 @@ define i7 @v_ssubsat_i7(i7 %lhs, i7 %rhs) {
 ; GFX8-NEXT:    v_lshlrev_b16_e32 v0, 9, v0
 ; GFX8-NEXT:    v_max_i16_e32 v2, -1, v0
 ; GFX8-NEXT:    v_lshlrev_b16_e32 v1, 9, v1
-; GFX8-NEXT:    v_subrev_u16_e32 v2, 0x7fff, v2
+; GFX8-NEXT:    v_add_u16_e32 v2, 0x8001, v2
 ; GFX8-NEXT:    v_min_i16_e32 v3, -1, v0
-; GFX8-NEXT:    v_subrev_u16_e32 v3, 0x8000, v3
+; GFX8-NEXT:    v_add_u16_e32 v3, 0x8000, v3
 ; GFX8-NEXT:    v_max_i16_e32 v1, v2, v1
 ; GFX8-NEXT:    v_min_i16_e32 v1, v1, v3
 ; GFX8-NEXT:    v_sub_u16_e32 v0, v0, v1
@@ -63,9 +63,9 @@ define amdgpu_ps i7 @s_ssubsat_i7(i7 inreg %lhs, i7 inreg %rhs) {
 ; GFX6-NEXT:    s_lshl_b32 s0, s0, 25
 ; GFX6-NEXT:    s_max_i32 s2, s0, -1
 ; GFX6-NEXT:    s_lshl_b32 s1, s1, 25
-; GFX6-NEXT:    s_sub_i32 s2, s2, 0x7fffffff
+; GFX6-NEXT:    s_add_i32 s2, s2, 0x80000001
 ; GFX6-NEXT:    s_min_i32 s3, s0, -1
-; GFX6-NEXT:    s_sub_i32 s3, s3, 0x80000000
+; GFX6-NEXT:    s_add_i32 s3, s3, 0x80000000
 ; GFX6-NEXT:    s_max_i32 s1, s2, s1
 ; GFX6-NEXT:    s_min_i32 s1, s1, s3
 ; GFX6-NEXT:    s_sub_i32 s0, s0, s1
@@ -79,11 +79,11 @@ define amdgpu_ps i7 @s_ssubsat_i7(i7 inreg %lhs, i7 inreg %rhs) {
 ; GFX8-NEXT:    s_sext_i32_i16 s3, -1
 ; GFX8-NEXT:    s_max_i32 s4, s2, s3
 ; GFX8-NEXT:    s_lshl_b32 s1, s1, 9
-; GFX8-NEXT:    s_sub_i32 s4, s4, 0x7fff
+; GFX8-NEXT:    s_addk_i32 s4, 0x8001
 ; GFX8-NEXT:    s_min_i32 s2, s2, s3
 ; GFX8-NEXT:    s_sext_i32_i16 s3, s4
 ; GFX8-NEXT:    s_sext_i32_i16 s1, s1
-; GFX8-NEXT:    s_sub_i32 s2, s2, 0xffff8000
+; GFX8-NEXT:    s_addk_i32 s2, 0x8000
 ; GFX8-NEXT:    s_max_i32 s1, s3, s1
 ; GFX8-NEXT:    s_sext_i32_i16 s1, s1
 ; GFX8-NEXT:    s_sext_i32_i16 s2, s2
@@ -122,9 +122,9 @@ define i8 @v_ssubsat_i8(i8 %lhs, i8 %rhs) {
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v0, 24, v0
 ; GFX6-NEXT:    v_max_i32_e32 v2, -1, v0
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 24, v1
-; GFX6-NEXT:    v_subrev_i32_e32 v2, vcc, 0x7fffffff, v2
+; GFX6-NEXT:    v_add_i32_e32 v2, vcc, 0x80000001, v2
 ; GFX6-NEXT:    v_min_i32_e32 v3, -1, v0
-; GFX6-NEXT:    v_subrev_i32_e32 v3, vcc, 0x80000000, v3
+; GFX6-NEXT:    v_add_i32_e32 v3, vcc, 0x80000000, v3
 ; GFX6-NEXT:    v_max_i32_e32 v1, v2, v1
 ; GFX6-NEXT:    v_min_i32_e32 v1, v1, v3
 ; GFX6-NEXT:    v_sub_i32_e32 v0, vcc, v0, v1
@@ -137,9 +137,9 @@ define i8 @v_ssubsat_i8(i8 %lhs, i8 %rhs) {
 ; GFX8-NEXT:    v_lshlrev_b16_e32 v0, 8, v0
 ; GFX8-NEXT:    v_max_i16_e32 v2, -1, v0
 ; GFX8-NEXT:    v_lshlrev_b16_e32 v1, 8, v1
-; GFX8-NEXT:    v_subrev_u16_e32 v2, 0x7fff, v2
+; GFX8-NEXT:    v_add_u16_e32 v2, 0x8001, v2
 ; GFX8-NEXT:    v_min_i16_e32 v3, -1, v0
-; GFX8-NEXT:    v_subrev_u16_e32 v3, 0x8000, v3
+; GFX8-NEXT:    v_add_u16_e32 v3, 0x8000, v3
 ; GFX8-NEXT:    v_max_i16_e32 v1, v2, v1
 ; GFX8-NEXT:    v_min_i16_e32 v1, v1, v3
 ; GFX8-NEXT:    v_sub_u16_e32 v0, v0, v1
@@ -173,9 +173,9 @@ define amdgpu_ps i8 @s_ssubsat_i8(i8 inreg %lhs, i8 inreg %rhs) {
 ; GFX6-NEXT:    s_lshl_b32 s0, s0, 24
 ; GFX6-NEXT:    s_max_i32 s2, s0, -1
 ; GFX6-NEXT:    s_lshl_b32 s1, s1, 24
-; GFX6-NEXT:    s_sub_i32 s2, s2, 0x7fffffff
+; GFX6-NEXT:    s_add_i32 s2, s2, 0x80000001
 ; GFX6-NEXT:    s_min_i32 s3, s0, -1
-; GFX6-NEXT:    s_sub_i32 s3, s3, 0x80000000
+; GFX6-NEXT:    s_add_i32 s3, s3, 0x80000000
 ; GFX6-NEXT:    s_max_i32 s1, s2, s1
 ; GFX6-NEXT:    s_min_i32 s1, s1, s3
 ; GFX6-NEXT:    s_sub_i32 s0, s0, s1
@@ -189,11 +189,11 @@ define amdgpu_ps i8 @s_ssubsat_i8(i8 inreg %lhs, i8 inreg %rhs) {
 ; GFX8-NEXT:    s_sext_i32_i16 s3, -1
 ; GFX8-NEXT:    s_max_i32 s4, s2, s3
 ; GFX8-NEXT:    s_lshl_b32 s1, s1, 8
-; GFX8-NEXT:    s_sub_i32 s4, s4, 0x7fff
+; GFX8-NEXT:    s_addk_i32 s4, 0x8001
 ; GFX8-NEXT:    s_min_i32 s2, s2, s3
 ; GFX8-NEXT:    s_sext_i32_i16 s3, s4
 ; GFX8-NEXT:    s_sext_i32_i16 s1, s1
-; GFX8-NEXT:    s_sub_i32 s2, s2, 0xffff8000
+; GFX8-NEXT:    s_addk_i32 s2, 0x8000
 ; GFX8-NEXT:    s_max_i32 s1, s3, s1
 ; GFX8-NEXT:    s_sext_i32_i16 s1, s1
 ; GFX8-NEXT:    s_sext_i32_i16 s2, s2
@@ -234,18 +234,19 @@ define i16 @v_ssubsat_v2i8(i16 %lhs.arg, i16 %rhs.arg) {
 ; GFX6-NEXT:    v_max_i32_e32 v4, -1, v0
 ; GFX6-NEXT:    v_lshrrev_b32_e32 v3, 8, v1
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 24, v1
-; GFX6-NEXT:    v_subrev_i32_e32 v4, vcc, 0x7fffffff, v4
+; GFX6-NEXT:    v_add_i32_e32 v4, vcc, 0x80000001, v4
 ; GFX6-NEXT:    v_min_i32_e32 v5, -1, v0
-; GFX6-NEXT:    v_subrev_i32_e32 v5, vcc, 0x80000000, v5
+; GFX6-NEXT:    v_bfrev_b32_e32 v6, 1
+; GFX6-NEXT:    v_add_i32_e32 v5, vcc, v5, v6
 ; GFX6-NEXT:    v_max_i32_e32 v1, v4, v1
 ; GFX6-NEXT:    v_min_i32_e32 v1, v1, v5
 ; GFX6-NEXT:    v_sub_i32_e32 v0, vcc, v0, v1
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 24, v2
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v2, 24, v3
 ; GFX6-NEXT:    v_max_i32_e32 v3, -1, v1
-; GFX6-NEXT:    v_subrev_i32_e32 v3, vcc, 0x7fffffff, v3
+; GFX6-NEXT:    v_add_i32_e32 v3, vcc, 0x80000001, v3
 ; GFX6-NEXT:    v_min_i32_e32 v4, -1, v1
-; GFX6-NEXT:    v_subrev_i32_e32 v4, vcc, 0x80000000, v4
+; GFX6-NEXT:    v_add_i32_e32 v4, vcc, 0x80000000, v4
 ; GFX6-NEXT:    v_max_i32_e32 v2, v3, v2
 ; GFX6-NEXT:    v_min_i32_e32 v2, v2, v4
 ; GFX6-NEXT:    v_sub_i32_e32 v1, vcc, v1, v2
@@ -266,16 +267,16 @@ define i16 @v_ssubsat_v2i8(i16 %lhs.arg, i16 %rhs.arg) {
 ; GFX8-NEXT:    v_max_i16_e32 v4, -1, v0
 ; GFX8-NEXT:    v_lshrrev_b32_sdwa v2, v2, v1 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
 ; GFX8-NEXT:    v_lshlrev_b16_e32 v1, 8, v1
-; GFX8-NEXT:    v_subrev_u16_e32 v4, 0x7fff, v4
+; GFX8-NEXT:    v_add_u16_e32 v4, 0x8001, v4
 ; GFX8-NEXT:    v_min_i16_e32 v5, -1, v0
-; GFX8-NEXT:    v_subrev_u16_e32 v5, 0x8000, v5
+; GFX8-NEXT:    v_add_u16_e32 v5, 0x8000, v5
 ; GFX8-NEXT:    v_max_i16_e32 v1, v4, v1
 ; GFX8-NEXT:    v_min_i16_e32 v1, v1, v5
 ; GFX8-NEXT:    v_sub_u16_e32 v0, v0, v1
 ; GFX8-NEXT:    v_max_i16_e32 v1, -1, v3
-; GFX8-NEXT:    v_subrev_u16_e32 v1, 0x7fff, v1
+; GFX8-NEXT:    v_add_u16_e32 v1, 0x8001, v1
 ; GFX8-NEXT:    v_min_i16_e32 v4, -1, v3
-; GFX8-NEXT:    v_subrev_u16_e32 v4, 0x8000, v4
+; GFX8-NEXT:    v_add_u16_e32 v4, 0x8000, v4
 ; GFX8-NEXT:    v_max_i16_e32 v1, v1, v2
 ; GFX8-NEXT:    v_min_i16_e32 v1, v1, v4
 ; GFX8-NEXT:    v_sub_u16_e32 v1, v3, v1
@@ -355,18 +356,18 @@ define amdgpu_ps i16 @s_ssubsat_v2i8(i16 inreg %lhs.arg, i16 inreg %rhs.arg) {
 ; GFX6-NEXT:    s_max_i32 s4, s0, -1
 ; GFX6-NEXT:    s_lshr_b32 s3, s1, 8
 ; GFX6-NEXT:    s_lshl_b32 s1, s1, 24
-; GFX6-NEXT:    s_sub_i32 s4, s4, 0x7fffffff
+; GFX6-NEXT:    s_add_i32 s4, s4, 0x80000001
 ; GFX6-NEXT:    s_min_i32 s5, s0, -1
-; GFX6-NEXT:    s_sub_i32 s5, s5, 0x80000000
+; GFX6-NEXT:    s_add_i32 s5, s5, 0x80000000
 ; GFX6-NEXT:    s_max_i32 s1, s4, s1
 ; GFX6-NEXT:    s_min_i32 s1, s1, s5
 ; GFX6-NEXT:    s_sub_i32 s0, s0, s1
 ; GFX6-NEXT:    s_lshl_b32 s1, s2, 24
 ; GFX6-NEXT:    s_lshl_b32 s2, s3, 24
 ; GFX6-NEXT:    s_max_i32 s3, s1, -1
-; GFX6-NEXT:    s_sub_i32 s3, s3, 0x7fffffff
+; GFX6-NEXT:    s_add_i32 s3, s3, 0x80000001
 ; GFX6-NEXT:    s_min_i32 s4, s1, -1
-; GFX6-NEXT:    s_sub_i32 s4, s4, 0x80000000
+; GFX6-NEXT:    s_add_i32 s4, s4, 0x80000000
 ; GFX6-NEXT:    s_max_i32 s2, s3, s2
 ; GFX6-NEXT:    s_min_i32 s2, s2, s4
 ; GFX6-NEXT:    s_sub_i32 s1, s1, s2
@@ -387,11 +388,11 @@ define amdgpu_ps i16 @s_ssubsat_v2i8(i16 inreg %lhs.arg, i16 inreg %rhs.arg) {
 ; GFX8-NEXT:    s_max_i32 s6, s4, s5
 ; GFX8-NEXT:    s_lshr_b32 s3, s1, 8
 ; GFX8-NEXT:    s_lshl_b32 s1, s1, 8
-; GFX8-NEXT:    s_sub_i32 s6, s6, 0x7fff
+; GFX8-NEXT:    s_addk_i32 s6, 0x8001
 ; GFX8-NEXT:    s_min_i32 s4, s4, s5
 ; GFX8-NEXT:    s_sext_i32_i16 s6, s6
 ; GFX8-NEXT:    s_sext_i32_i16 s1, s1
-; GFX8-NEXT:    s_sub_i32 s4, s4, 0xffff8000
+; GFX8-NEXT:    s_addk_i32 s4, 0x8000
 ; GFX8-NEXT:    s_max_i32 s1, s6, s1
 ; GFX8-NEXT:    s_sext_i32_i16 s1, s1
 ; GFX8-NEXT:    s_sext_i32_i16 s4, s4
@@ -401,11 +402,11 @@ define amdgpu_ps i16 @s_ssubsat_v2i8(i16 inreg %lhs.arg, i16 inreg %rhs.arg) {
 ; GFX8-NEXT:    s_lshl_b32 s2, s3, 8
 ; GFX8-NEXT:    s_sext_i32_i16 s3, s1
 ; GFX8-NEXT:    s_max_i32 s4, s3, s5
-; GFX8-NEXT:    s_sub_i32 s4, s4, 0x7fff
+; GFX8-NEXT:    s_addk_i32 s4, 0x8001
 ; GFX8-NEXT:    s_min_i32 s3, s3, s5
 ; GFX8-NEXT:    s_sext_i32_i16 s4, s4
 ; GFX8-NEXT:    s_sext_i32_i16 s2, s2
-; GFX8-NEXT:    s_sub_i32 s3, s3, 0xffff8000
+; GFX8-NEXT:    s_addk_i32 s3, 0x8000
 ; GFX8-NEXT:    s_max_i32 s2, s4, s2
 ; GFX8-NEXT:    s_sext_i32_i16 s2, s2
 ; GFX8-NEXT:    s_sext_i32_i16 s3, s3
@@ -509,29 +510,29 @@ define i32 @v_ssubsat_v4i8(i32 %lhs.arg, i32 %rhs.arg) {
 ; GFX6-NEXT:    v_lshrrev_b32_e32 v6, 16, v1
 ; GFX6-NEXT:    v_lshrrev_b32_e32 v7, 24, v1
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 24, v1
-; GFX6-NEXT:    v_subrev_i32_e32 v8, vcc, 0x7fffffff, v8
+; GFX6-NEXT:    v_add_i32_e32 v8, vcc, 0x80000001, v8
 ; GFX6-NEXT:    v_min_i32_e32 v10, -1, v0
 ; GFX6-NEXT:    v_bfrev_b32_e32 v11, 1
-; GFX6-NEXT:    v_sub_i32_e32 v10, vcc, v10, v11
+; GFX6-NEXT:    v_add_i32_e32 v10, vcc, v10, v11
 ; GFX6-NEXT:    v_max_i32_e32 v1, v8, v1
 ; GFX6-NEXT:    v_min_i32_e32 v1, v1, v10
 ; GFX6-NEXT:    v_sub_i32_e32 v0, vcc, v0, v1
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 24, v2
-; GFX6-NEXT:    v_bfrev_b32_e32 v9, -2
+; GFX6-NEXT:    v_mov_b32_e32 v9, 0x80000001
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v2, 24, v5
 ; GFX6-NEXT:    v_max_i32_e32 v5, -1, v1
-; GFX6-NEXT:    v_sub_i32_e32 v5, vcc, v5, v9
+; GFX6-NEXT:    v_add_i32_e32 v5, vcc, v5, v9
 ; GFX6-NEXT:    v_min_i32_e32 v8, -1, v1
-; GFX6-NEXT:    v_sub_i32_e32 v8, vcc, v8, v11
+; GFX6-NEXT:    v_add_i32_e32 v8, vcc, v8, v11
 ; GFX6-NEXT:    v_max_i32_e32 v2, v5, v2
 ; GFX6-NEXT:    v_min_i32_e32 v2, v2, v8
 ; GFX6-NEXT:    v_sub_i32_e32 v1, vcc, v1, v2
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v2, 24, v3
 ; GFX6-NEXT:    v_max_i32_e32 v5, -1, v2
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v3, 24, v6
-; GFX6-NEXT:    v_sub_i32_e32 v5, vcc, v5, v9
+; GFX6-NEXT:    v_add_i32_e32 v5, vcc, v5, v9
 ; GFX6-NEXT:    v_min_i32_e32 v6, -1, v2
-; GFX6-NEXT:    v_sub_i32_e32 v6, vcc, v6, v11
+; GFX6-NEXT:    v_add_i32_e32 v6, vcc, v6, v11
 ; GFX6-NEXT:    v_max_i32_e32 v3, v5, v3
 ; GFX6-NEXT:    v_min_i32_e32 v3, v3, v6
 ; GFX6-NEXT:    v_sub_i32_e32 v2, vcc, v2, v3
@@ -539,10 +540,10 @@ define i32 @v_ssubsat_v4i8(i32 %lhs.arg, i32 %rhs.arg) {
 ; GFX6-NEXT:    v_max_i32_e32 v5, -1, v3
 ; GFX6-NEXT:    v_ashrrev_i32_e32 v1, 24, v1
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v4, 24, v7
-; GFX6-NEXT:    v_sub_i32_e32 v5, vcc, v5, v9
+; GFX6-NEXT:    v_add_i32_e32 v5, vcc, v5, v9
 ; GFX6-NEXT:    v_min_i32_e32 v6, -1, v3
 ; GFX6-NEXT:    v_ashrrev_i32_e32 v0, 24, v0
-; GFX6-NEXT:    v_sub_i32_e32 v6, vcc, v6, v11
+; GFX6-NEXT:    v_add_i32_e32 v6, vcc, v6, v11
 ; GFX6-NEXT:    v_max_i32_e32 v4, v5, v4
 ; GFX6-NEXT:    v_and_b32_e32 v1, 0xff, v1
 ; GFX6-NEXT:    v_ashrrev_i32_e32 v2, 24, v2
@@ -573,34 +574,34 @@ define i32 @v_ssubsat_v4i8(i32 %lhs.arg, i32 %rhs.arg) {
 ; GFX8-NEXT:    v_lshrrev_b32_e32 v6, 16, v1
 ; GFX8-NEXT:    v_lshrrev_b32_e32 v7, 24, v1
 ; GFX8-NEXT:    v_lshlrev_b16_e32 v1, 8, v1
-; GFX8-NEXT:    v_subrev_u16_e32 v8, 0x7fff, v8
+; GFX8-NEXT:    v_add_u16_e32 v8, 0x8001, v8
 ; GFX8-NEXT:    v_min_i16_e32 v9, -1, v0
-; GFX8-NEXT:    v_subrev_u16_e32 v9, 0x8000, v9
+; GFX8-NEXT:    v_add_u16_e32 v9, 0x8000, v9
 ; GFX8-NEXT:    v_max_i16_e32 v1, v8, v1
 ; GFX8-NEXT:    v_min_i16_e32 v1, v1, v9
 ; GFX8-NEXT:    v_sub_u16_e32 v0, v0, v1
 ; GFX8-NEXT:    v_max_i16_e32 v1, -1, v3
-; GFX8-NEXT:    v_subrev_u16_e32 v1, 0x7fff, v1
+; GFX8-NEXT:    v_add_u16_e32 v1, 0x8001, v1
 ; GFX8-NEXT:    v_min_i16_e32 v8, -1, v3
-; GFX8-NEXT:    v_subrev_u16_e32 v8, 0x8000, v8
+; GFX8-NEXT:    v_add_u16_e32 v8, 0x8000, v8
 ; GFX8-NEXT:    v_max_i16_e32 v1, v1, v2
 ; GFX8-NEXT:    v_lshlrev_b16_e32 v2, 8, v4
 ; GFX8-NEXT:    v_min_i16_e32 v1, v1, v8
 ; GFX8-NEXT:    v_max_i16_e32 v4, -1, v2
 ; GFX8-NEXT:    v_sub_u16_e32 v1, v3, v1
 ; GFX8-NEXT:    v_lshlrev_b16_e32 v3, 8, v6
-; GFX8-NEXT:    v_subrev_u16_e32 v4, 0x7fff, v4
+; GFX8-NEXT:    v_add_u16_e32 v4, 0x8001, v4
 ; GFX8-NEXT:    v_min_i16_e32 v6, -1, v2
-; GFX8-NEXT:    v_subrev_u16_e32 v6, 0x8000, v6
+; GFX8-NEXT:    v_add_u16_e32 v6, 0x8000, v6
 ; GFX8-NEXT:    v_max_i16_e32 v3, v4, v3
 ; GFX8-NEXT:    v_min_i16_e32 v3, v3, v6
 ; GFX8-NEXT:    v_sub_u16_e32 v2, v2, v3
 ; GFX8-NEXT:    v_lshlrev_b16_e32 v3, 8, v5
 ; GFX8-NEXT:    v_max_i16_e32 v5, -1, v3
 ; GFX8-NEXT:    v_lshlrev_b16_e32 v4, 8, v7
-; GFX8-NEXT:    v_subrev_u16_e32 v5, 0x7fff, v5
+; GFX8-NEXT:    v_add_u16_e32 v5, 0x8001, v5
 ; GFX8-NEXT:    v_min_i16_e32 v6, -1, v3
-; GFX8-NEXT:    v_subrev_u16_e32 v6, 0x8000, v6
+; GFX8-NEXT:    v_add_u16_e32 v6, 0x8000, v6
 ; GFX8-NEXT:    v_max_i16_e32 v4, v5, v4
 ; GFX8-NEXT:    v_min_i16_e32 v4, v4, v6
 ; GFX8-NEXT:    v_sub_u16_e32 v3, v3, v4
@@ -727,27 +728,27 @@ define amdgpu_ps i32 @s_ssubsat_v4i8(i32 inreg %lhs.arg, i32 inreg %rhs.arg) {
 ; GFX6-NEXT:    s_lshr_b32 s6, s1, 16
 ; GFX6-NEXT:    s_lshr_b32 s7, s1, 24
 ; GFX6-NEXT:    s_lshl_b32 s1, s1, 24
-; GFX6-NEXT:    s_sub_i32 s8, s8, 0x7fffffff
+; GFX6-NEXT:    s_add_i32 s8, s8, 0x80000001
 ; GFX6-NEXT:    s_min_i32 s9, s0, -1
-; GFX6-NEXT:    s_sub_i32 s9, s9, 0x80000000
+; GFX6-NEXT:    s_add_i32 s9, s9, 0x80000000
 ; GFX6-NEXT:    s_max_i32 s1, s8, s1
 ; GFX6-NEXT:    s_min_i32 s1, s1, s9
 ; GFX6-NEXT:    s_sub_i32 s0, s0, s1
 ; GFX6-NEXT:    s_lshl_b32 s1, s2, 24
 ; GFX6-NEXT:    s_lshl_b32 s2, s5, 24
 ; GFX6-NEXT:    s_max_i32 s5, s1, -1
-; GFX6-NEXT:    s_sub_i32 s5, s5, 0x7fffffff
+; GFX6-NEXT:    s_add_i32 s5, s5, 0x80000001
 ; GFX6-NEXT:    s_min_i32 s8, s1, -1
-; GFX6-NEXT:    s_sub_i32 s8, s8, 0x80000000
+; GFX6-NEXT:    s_add_i32 s8, s8, 0x80000000
 ; GFX6-NEXT:    s_max_i32 s2, s5, s2
 ; GFX6-NEXT:    s_min_i32 s2, s2, s8
 ; GFX6-NEXT:    s_sub_i32 s1, s1, s2
 ; GFX6-NEXT:    s_lshl_b32 s2, s3, 24
 ; GFX6-NEXT:    s_max_i32 s5, s2, -1
 ; GFX6-NEXT:    s_lshl_b32 s3, s6, 24
-; GFX6-NEXT:    s_sub_i32 s5, s5, 0x7fffffff
+; GFX6-NEXT:    s_add_i32 s5, s5, 0x80000001
 ; GFX6-NEXT:    s_min_i32 s6, s2, -1
-; GFX6-NEXT:    s_sub_i32 s6, s6, 0x80000000
+; GFX6-NEXT:    s_add_i32 s6, s6, 0x80000000
 ; GFX6-NEXT:    s_max_i32 s3, s5, s3
 ; GFX6-NEXT:    s_min_i32 s3, s3, s6
 ; GFX6-NEXT:    s_sub_i32 s2, s2, s3
@@ -755,10 +756,10 @@ define amdgpu_ps i32 @s_ssubsat_v4i8(i32 inreg %lhs.arg, i32 inreg %rhs.arg) {
 ; GFX6-NEXT:    s_max_i32 s5, s3, -1
 ; GFX6-NEXT:    s_ashr_i32 s1, s1, 24
 ; GFX6-NEXT:    s_lshl_b32 s4, s7, 24
-; GFX6-NEXT:    s_sub_i32 s5, s5, 0x7fffffff
+; GFX6-NEXT:    s_add_i32 s5, s5, 0x80000001
 ; GFX6-NEXT:    s_min_i32 s6, s3, -1
 ; GFX6-NEXT:    s_ashr_i32 s0, s0, 24
-; GFX6-NEXT:    s_sub_i32 s6, s6, 0x80000000
+; GFX6-NEXT:    s_add_i32 s6, s6, 0x80000000
 ; GFX6-NEXT:    s_max_i32 s4, s5, s4
 ; GFX6-NEXT:    s_and_b32 s1, s1, 0xff
 ; GFX6-NEXT:    s_ashr_i32 s2, s2, 24
@@ -789,11 +790,11 @@ define amdgpu_ps i32 @s_ssubsat_v4i8(i32 inreg %lhs.arg, i32 inreg %rhs.arg) {
 ; GFX8-NEXT:    s_lshr_b32 s6, s1, 16
 ; GFX8-NEXT:    s_lshr_b32 s7, s1, 24
 ; GFX8-NEXT:    s_lshl_b32 s1, s1, 8
-; GFX8-NEXT:    s_sub_i32 s10, s10, 0x7fff
+; GFX8-NEXT:    s_addk_i32 s10, 0x8001
 ; GFX8-NEXT:    s_min_i32 s8, s8, s9
 ; GFX8-NEXT:    s_sext_i32_i16 s10, s10
 ; GFX8-NEXT:    s_sext_i32_i16 s1, s1
-; GFX8-NEXT:    s_sub_i32 s8, s8, 0xffff8000
+; GFX8-NEXT:    s_addk_i32 s8, 0x8000
 ; GFX8-NEXT:    s_max_i32 s1, s10, s1
 ; GFX8-NEXT:    s_sext_i32_i16 s1, s1
 ; GFX8-NEXT:    s_sext_i32_i16 s8, s8
@@ -803,11 +804,11 @@ define amdgpu_ps i32 @s_ssubsat_v4i8(i32 inreg %lhs.arg, i32 inreg %rhs.arg) {
 ; GFX8-NEXT:    s_lshl_b32 s2, s5, 8
 ; GFX8-NEXT:    s_sext_i32_i16 s5, s1
 ; GFX8-NEXT:    s_max_i32 s8, s5, s9
-; GFX8-NEXT:    s_sub_i32 s8, s8, 0x7fff
+; GFX8-NEXT:    s_addk_i32 s8, 0x8001
 ; GFX8-NEXT:    s_min_i32 s5, s5, s9
 ; GFX8-NEXT:    s_sext_i32_i16 s8, s8
 ; GFX8-NEXT:    s_sext_i32_i16 s2, s2
-; GFX8-NEXT:    s_sub_i32 s5, s5, 0xffff8000
+; GFX8-NEXT:    s_addk_i32 s5, 0x8000
 ; GFX8-NEXT:    s_max_i32 s2, s8, s2
 ; GFX8-NEXT:    s_sext_i32_i16 s2, s2
 ; GFX8-NEXT:    s_sext_i32_i16 s5, s5
@@ -817,11 +818,11 @@ define amdgpu_ps i32 @s_ssubsat_v4i8(i32 inreg %lhs.arg, i32 inreg %rhs.arg) {
 ; GFX8-NEXT:    s_sext_i32_i16 s5, s2
 ; GFX8-NEXT:    s_lshl_b32 s3, s6, 8
 ; GFX8-NEXT:    s_max_i32 s6, s5, s9
-; GFX8-NEXT:    s_sub_i32 s6, s6, 0x7fff
+; GFX8-NEXT:    s_addk_i32 s6, 0x8001
 ; GFX8-NEXT:    s_min_i32 s5, s5, s9
 ; GFX8-NEXT:    s_sext_i32_i16 s6, s6
 ; GFX8-NEXT:    s_sext_i32_i16 s3, s3
-; GFX8-NEXT:    s_sub_i32 s5, s5, 0xffff8000
+; GFX8-NEXT:    s_addk_i32 s5, 0x8000
 ; GFX8-NEXT:    s_max_i32 s3, s6, s3
 ; GFX8-NEXT:    s_sext_i32_i16 s3, s3
 ; GFX8-NEXT:    s_sext_i32_i16 s5, s5
@@ -831,12 +832,12 @@ define amdgpu_ps i32 @s_ssubsat_v4i8(i32 inreg %lhs.arg, i32 inreg %rhs.arg) {
 ; GFX8-NEXT:    s_sext_i32_i16 s5, s3
 ; GFX8-NEXT:    s_max_i32 s6, s5, s9
 ; GFX8-NEXT:    s_lshl_b32 s4, s7, 8
-; GFX8-NEXT:    s_sub_i32 s6, s6, 0x7fff
+; GFX8-NEXT:    s_addk_i32 s6, 0x8001
 ; GFX8-NEXT:    s_min_i32 s5, s5, s9
 ; GFX8-NEXT:    s_sext_i32_i16 s6, s6
 ; GFX8-NEXT:    s_sext_i32_i16 s4, s4
 ; GFX8-NEXT:    s_sext_i32_i16 s1, s1
-; GFX8-NEXT:    s_sub_i32 s5, s5, 0xffff8000
+; GFX8-NEXT:    s_addk_i32 s5, 0x8000
 ; GFX8-NEXT:    s_max_i32 s4, s6, s4
 ; GFX8-NEXT:    s_sext_i32_i16 s0, s0
 ; GFX8-NEXT:    s_ashr_i32 s1, s1, 8
@@ -1004,9 +1005,9 @@ define i24 @v_ssubsat_i24(i24 %lhs, i24 %rhs) {
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v0, 8, v0
 ; GFX6-NEXT:    v_max_i32_e32 v2, -1, v0
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 8, v1
-; GFX6-NEXT:    v_subrev_i32_e32 v2, vcc, 0x7fffffff, v2
+; GFX6-NEXT:    v_add_i32_e32 v2, vcc, 0x80000001, v2
 ; GFX6-NEXT:    v_min_i32_e32 v3, -1, v0
-; GFX6-NEXT:    v_subrev_i32_e32 v3, vcc, 0x80000000, v3
+; GFX6-NEXT:    v_add_i32_e32 v3, vcc, 0x80000000, v3
 ; GFX6-NEXT:    v_max_i32_e32 v1, v2, v1
 ; GFX6-NEXT:    v_min_i32_e32 v1, v1, v3
 ; GFX6-NEXT:    v_sub_i32_e32 v0, vcc, v0, v1
@@ -1055,9 +1056,9 @@ define amdgpu_ps i24 @s_ssubsat_i24(i24 inreg %lhs, i24 inreg %rhs) {
 ; GFX6-NEXT:    s_lshl_b32 s0, s0, 8
 ; GFX6-NEXT:    s_max_i32 s2, s0, -1
 ; GFX6-NEXT:    s_lshl_b32 s1, s1, 8
-; GFX6-NEXT:    s_sub_i32 s2, s2, 0x7fffffff
+; GFX6-NEXT:    s_add_i32 s2, s2, 0x80000001
 ; GFX6-NEXT:    s_min_i32 s3, s0, -1
-; GFX6-NEXT:    s_sub_i32 s3, s3, 0x80000000
+; GFX6-NEXT:    s_add_i32 s3, s3, 0x80000000
 ; GFX6-NEXT:    s_max_i32 s1, s2, s1
 ; GFX6-NEXT:    s_min_i32 s1, s1, s3
 ; GFX6-NEXT:    s_sub_i32 s0, s0, s1
@@ -1109,9 +1110,9 @@ define i32 @v_ssubsat_i32(i32 %lhs, i32 %rhs) {
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX6-NEXT:    v_max_i32_e32 v2, -1, v0
-; GFX6-NEXT:    v_subrev_i32_e32 v2, vcc, 0x7fffffff, v2
+; GFX6-NEXT:    v_add_i32_e32 v2, vcc, 0x80000001, v2
 ; GFX6-NEXT:    v_min_i32_e32 v3, -1, v0
-; GFX6-NEXT:    v_subrev_i32_e32 v3, vcc, 0x80000000, v3
+; GFX6-NEXT:    v_add_i32_e32 v3, vcc, 0x80000000, v3
 ; GFX6-NEXT:    v_max_i32_e32 v1, v2, v1
 ; GFX6-NEXT:    v_min_i32_e32 v1, v1, v3
 ; GFX6-NEXT:    v_sub_i32_e32 v0, vcc, v0, v1
@@ -1121,9 +1122,9 @@ define i32 @v_ssubsat_i32(i32 %lhs, i32 %rhs) {
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX8-NEXT:    v_max_i32_e32 v2, -1, v0
-; GFX8-NEXT:    v_subrev_u32_e32 v2, vcc, 0x7fffffff, v2
+; GFX8-NEXT:    v_add_u32_e32 v2, vcc, 0x80000001, v2
 ; GFX8-NEXT:    v_min_i32_e32 v3, -1, v0
-; GFX8-NEXT:    v_subrev_u32_e32 v3, vcc, 0x80000000, v3
+; GFX8-NEXT:    v_add_u32_e32 v3, vcc, 0x80000000, v3
 ; GFX8-NEXT:    v_max_i32_e32 v1, v2, v1
 ; GFX8-NEXT:    v_min_i32_e32 v1, v1, v3
 ; GFX8-NEXT:    v_sub_u32_e32 v0, vcc, v0, v1
@@ -1148,9 +1149,9 @@ define amdgpu_ps i32 @s_ssubsat_i32(i32 inreg %lhs, i32 inreg %rhs) {
 ; GFX6-LABEL: s_ssubsat_i32:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_max_i32 s2, s0, -1
-; GFX6-NEXT:    s_sub_i32 s2, s2, 0x7fffffff
+; GFX6-NEXT:    s_add_i32 s2, s2, 0x80000001
 ; GFX6-NEXT:    s_min_i32 s3, s0, -1
-; GFX6-NEXT:    s_sub_i32 s3, s3, 0x80000000
+; GFX6-NEXT:    s_add_i32 s3, s3, 0x80000000
 ; GFX6-NEXT:    s_max_i32 s1, s2, s1
 ; GFX6-NEXT:    s_min_i32 s1, s1, s3
 ; GFX6-NEXT:    s_sub_i32 s0, s0, s1
@@ -1159,9 +1160,9 @@ define amdgpu_ps i32 @s_ssubsat_i32(i32 inreg %lhs, i32 inreg %rhs) {
 ; GFX8-LABEL: s_ssubsat_i32:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_max_i32 s2, s0, -1
-; GFX8-NEXT:    s_sub_i32 s2, s2, 0x7fffffff
+; GFX8-NEXT:    s_add_i32 s2, s2, 0x80000001
 ; GFX8-NEXT:    s_min_i32 s3, s0, -1
-; GFX8-NEXT:    s_sub_i32 s3, s3, 0x80000000
+; GFX8-NEXT:    s_add_i32 s3, s3, 0x80000000
 ; GFX8-NEXT:    s_max_i32 s1, s2, s1
 ; GFX8-NEXT:    s_min_i32 s1, s1, s3
 ; GFX8-NEXT:    s_sub_i32 s0, s0, s1
@@ -1187,9 +1188,9 @@ define amdgpu_ps float @ssubsat_i32_sv(i32 inreg %lhs, i32 %rhs) {
 ; GFX6-LABEL: ssubsat_i32_sv:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_max_i32 s1, s0, -1
-; GFX6-NEXT:    s_sub_i32 s1, s1, 0x7fffffff
+; GFX6-NEXT:    s_add_i32 s1, s1, 0x80000001
 ; GFX6-NEXT:    s_min_i32 s2, s0, -1
-; GFX6-NEXT:    s_sub_i32 s2, s2, 0x80000000
+; GFX6-NEXT:    s_add_i32 s2, s2, 0x80000000
 ; GFX6-NEXT:    v_max_i32_e32 v0, s1, v0
 ; GFX6-NEXT:    v_min_i32_e32 v0, s2, v0
 ; GFX6-NEXT:    v_sub_i32_e32 v0, vcc, s0, v0
@@ -1198,9 +1199,9 @@ define amdgpu_ps float @ssubsat_i32_sv(i32 inreg %lhs, i32 %rhs) {
 ; GFX8-LABEL: ssubsat_i32_sv:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_max_i32 s1, s0, -1
-; GFX8-NEXT:    s_sub_i32 s1, s1, 0x7fffffff
+; GFX8-NEXT:    s_add_i32 s1, s1, 0x80000001
 ; GFX8-NEXT:    s_min_i32 s2, s0, -1
-; GFX8-NEXT:    s_sub_i32 s2, s2, 0x80000000
+; GFX8-NEXT:    s_add_i32 s2, s2, 0x80000000
 ; GFX8-NEXT:    v_max_i32_e32 v0, s1, v0
 ; GFX8-NEXT:    v_min_i32_e32 v0, s2, v0
 ; GFX8-NEXT:    v_sub_u32_e32 v0, vcc, s0, v0
@@ -1224,9 +1225,9 @@ define amdgpu_ps float @ssubsat_i32_vs(i32 %lhs, i32 inreg %rhs) {
 ; GFX6-LABEL: ssubsat_i32_vs:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    v_max_i32_e32 v1, -1, v0
-; GFX6-NEXT:    v_subrev_i32_e32 v1, vcc, 0x7fffffff, v1
+; GFX6-NEXT:    v_add_i32_e32 v1, vcc, 0x80000001, v1
 ; GFX6-NEXT:    v_min_i32_e32 v2, -1, v0
-; GFX6-NEXT:    v_subrev_i32_e32 v2, vcc, 0x80000000, v2
+; GFX6-NEXT:    v_add_i32_e32 v2, vcc, 0x80000000, v2
 ; GFX6-NEXT:    v_max_i32_e32 v1, s0, v1
 ; GFX6-NEXT:    v_min_i32_e32 v1, v1, v2
 ; GFX6-NEXT:    v_sub_i32_e32 v0, vcc, v0, v1
@@ -1235,9 +1236,9 @@ define amdgpu_ps float @ssubsat_i32_vs(i32 %lhs, i32 inreg %rhs) {
 ; GFX8-LABEL: ssubsat_i32_vs:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    v_max_i32_e32 v1, -1, v0
-; GFX8-NEXT:    v_subrev_u32_e32 v1, vcc, 0x7fffffff, v1
+; GFX8-NEXT:    v_add_u32_e32 v1, vcc, 0x80000001, v1
 ; GFX8-NEXT:    v_min_i32_e32 v2, -1, v0
-; GFX8-NEXT:    v_subrev_u32_e32 v2, vcc, 0x80000000, v2
+; GFX8-NEXT:    v_add_u32_e32 v2, vcc, 0x80000000, v2
 ; GFX8-NEXT:    v_max_i32_e32 v1, s0, v1
 ; GFX8-NEXT:    v_min_i32_e32 v1, v1, v2
 ; GFX8-NEXT:    v_sub_u32_e32 v0, vcc, v0, v1
@@ -1262,16 +1263,16 @@ define <2 x i32> @v_ssubsat_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX6-NEXT:    v_max_i32_e32 v4, -1, v0
-; GFX6-NEXT:    v_subrev_i32_e32 v4, vcc, 0x7fffffff, v4
+; GFX6-NEXT:    v_add_i32_e32 v4, vcc, 0x80000001, v4
 ; GFX6-NEXT:    v_min_i32_e32 v5, -1, v0
-; GFX6-NEXT:    v_subrev_i32_e32 v5, vcc, 0x80000000, v5
+; GFX6-NEXT:    v_add_i32_e32 v5, vcc, 0x80000000, v5
 ; GFX6-NEXT:    v_max_i32_e32 v2, v4, v2
 ; GFX6-NEXT:    v_min_i32_e32 v2, v2, v5
 ; GFX6-NEXT:    v_sub_i32_e32 v0, vcc, v0, v2
 ; GFX6-NEXT:    v_max_i32_e32 v2, -1, v1
-; GFX6-NEXT:    v_subrev_i32_e32 v2, vcc, 0x7fffffff, v2
+; GFX6-NEXT:    v_add_i32_e32 v2, vcc, 0x80000001, v2
 ; GFX6-NEXT:    v_min_i32_e32 v4, -1, v1
-; GFX6-NEXT:    v_subrev_i32_e32 v4, vcc, 0x80000000, v4
+; GFX6-NEXT:    v_add_i32_e32 v4, vcc, 0x80000000, v4
 ; GFX6-NEXT:    v_max_i32_e32 v2, v2, v3
 ; GFX6-NEXT:    v_min_i32_e32 v2, v2, v4
 ; GFX6-NEXT:    v_sub_i32_e32 v1, vcc, v1, v2
@@ -1281,16 +1282,16 @@ define <2 x i32> @v_ssubsat_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX8-NEXT:    v_max_i32_e32 v4, -1, v0
-; GFX8-NEXT:    v_subrev_u32_e32 v4, vcc, 0x7fffffff, v4
+; GFX8-NEXT:    v_add_u32_e32 v4, vcc, 0x80000001, v4
 ; GFX8-NEXT:    v_min_i32_e32 v5, -1, v0
-; GFX8-NEXT:    v_subrev_u32_e32 v5, vcc, 0x80000000, v5
+; GFX8-NEXT:    v_add_u32_e32 v5, vcc, 0x80000000, v5
 ; GFX8-NEXT:    v_max_i32_e32 v2, v4, v2
 ; GFX8-NEXT:    v_min_i32_e32 v2, v2, v5
 ; GFX8-NEXT:    v_sub_u32_e32 v0, vcc, v0, v2
 ; GFX8-NEXT:    v_max_i32_e32 v2, -1, v1
-; GFX8-NEXT:    v_subrev_u32_e32 v2, vcc, 0x7fffffff, v2
+; GFX8-NEXT:    v_add_u32_e32 v2, vcc, 0x80000001, v2
 ; GFX8-NEXT:    v_min_i32_e32 v4, -1, v1
-; GFX8-NEXT:    v_subrev_u32_e32 v4, vcc, 0x80000000, v4
+; GFX8-NEXT:    v_add_u32_e32 v4, vcc, 0x80000000, v4
 ; GFX8-NEXT:    v_max_i32_e32 v2, v2, v3
 ; GFX8-NEXT:    v_min_i32_e32 v2, v2, v4
 ; GFX8-NEXT:    v_sub_u32_e32 v1, vcc, v1, v2
@@ -1317,16 +1318,16 @@ define amdgpu_ps <2 x i32> @s_ssubsat_v2i32(<2 x i32> inreg %lhs, <2 x i32> inre
 ; GFX6-LABEL: s_ssubsat_v2i32:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_max_i32 s4, s0, -1
-; GFX6-NEXT:    s_sub_i32 s4, s4, 0x7fffffff
+; GFX6-NEXT:    s_add_i32 s4, s4, 0x80000001
 ; GFX6-NEXT:    s_min_i32 s5, s0, -1
-; GFX6-NEXT:    s_sub_i32 s5, s5, 0x80000000
+; GFX6-NEXT:    s_add_i32 s5, s5, 0x80000000
 ; GFX6-NEXT:    s_max_i32 s2, s4, s2
 ; GFX6-NEXT:    s_min_i32 s2, s2, s5
 ; GFX6-NEXT:    s_sub_i32 s0, s0, s2
 ; GFX6-NEXT:    s_max_i32 s2, s1, -1
-; GFX6-NEXT:    s_sub_i32 s2, s2, 0x7fffffff
+; GFX6-NEXT:    s_add_i32 s2, s2, 0x80000001
 ; GFX6-NEXT:    s_min_i32 s4, s1, -1
-; GFX6-NEXT:    s_sub_i32 s4, s4, 0x80000000
+; GFX6-NEXT:    s_add_i32 s4, s4, 0x80000000
 ; GFX6-NEXT:    s_max_i32 s2, s2, s3
 ; GFX6-NEXT:    s_min_i32 s2, s2, s4
 ; GFX6-NEXT:    s_sub_i32 s1, s1, s2
@@ -1335,16 +1336,16 @@ define amdgpu_ps <2 x i32> @s_ssubsat_v2i32(<2 x i32> inreg %lhs, <2 x i32> inre
 ; GFX8-LABEL: s_ssubsat_v2i32:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_max_i32 s4, s0, -1
-; GFX8-NEXT:    s_sub_i32 s4, s4, 0x7fffffff
+; GFX8-NEXT:    s_add_i32 s4, s4, 0x80000001
 ; GFX8-NEXT:    s_min_i32 s5, s0, -1
-; GFX8-NEXT:    s_sub_i32 s5, s5, 0x80000000
+; GFX8-NEXT:    s_add_i32 s5, s5, 0x80000000
 ; GFX8-NEXT:    s_max_i32 s2, s4, s2
 ; GFX8-NEXT:    s_min_i32 s2, s2, s5
 ; GFX8-NEXT:    s_sub_i32 s0, s0, s2
 ; GFX8-NEXT:    s_max_i32 s2, s1, -1
-; GFX8-NEXT:    s_sub_i32 s2, s2, 0x7fffffff
+; GFX8-NEXT:    s_add_i32 s2, s2, 0x80000001
 ; GFX8-NEXT:    s_min_i32 s4, s1, -1
-; GFX8-NEXT:    s_sub_i32 s4, s4, 0x80000000
+; GFX8-NEXT:    s_add_i32 s4, s4, 0x80000000
 ; GFX8-NEXT:    s_max_i32 s2, s2, s3
 ; GFX8-NEXT:    s_min_i32 s2, s2, s4
 ; GFX8-NEXT:    s_sub_i32 s1, s1, s2
@@ -1376,24 +1377,25 @@ define <3 x i32> @v_ssubsat_v3i32(<3 x i32> %lhs, <3 x i32> %rhs) {
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX6-NEXT:    v_max_i32_e32 v6, -1, v0
-; GFX6-NEXT:    v_subrev_i32_e32 v6, vcc, 0x7fffffff, v6
+; GFX6-NEXT:    v_add_i32_e32 v6, vcc, 0x80000001, v6
 ; GFX6-NEXT:    v_min_i32_e32 v8, -1, v0
-; GFX6-NEXT:    v_subrev_i32_e32 v8, vcc, 0x80000000, v8
+; GFX6-NEXT:    v_bfrev_b32_e32 v9, 1
+; GFX6-NEXT:    v_add_i32_e32 v8, vcc, v8, v9
 ; GFX6-NEXT:    v_max_i32_e32 v3, v6, v3
 ; GFX6-NEXT:    v_min_i32_e32 v3, v3, v8
-; GFX6-NEXT:    v_bfrev_b32_e32 v7, -2
+; GFX6-NEXT:    v_mov_b32_e32 v7, 0x80000001
 ; GFX6-NEXT:    v_sub_i32_e32 v0, vcc, v0, v3
 ; GFX6-NEXT:    v_max_i32_e32 v3, -1, v1
-; GFX6-NEXT:    v_sub_i32_e32 v3, vcc, v3, v7
+; GFX6-NEXT:    v_add_i32_e32 v3, vcc, v3, v7
 ; GFX6-NEXT:    v_min_i32_e32 v6, -1, v1
-; GFX6-NEXT:    v_subrev_i32_e32 v6, vcc, 0x80000000, v6
+; GFX6-NEXT:    v_add_i32_e32 v6, vcc, v6, v9
 ; GFX6-NEXT:    v_max_i32_e32 v3, v3, v4
 ; GFX6-NEXT:    v_min_i32_e32 v3, v3, v6
 ; GFX6-NEXT:    v_sub_i32_e32 v1, vcc, v1, v3
 ; GFX6-NEXT:    v_max_i32_e32 v3, -1, v2
-; GFX6-NEXT:    v_subrev_i32_e32 v3, vcc, 0x7fffffff, v3
+; GFX6-NEXT:    v_add_i32_e32 v3, vcc, 0x80000001, v3
 ; GFX6-NEXT:    v_min_i32_e32 v4, -1, v2
-; GFX6-NEXT:    v_subrev_i32_e32 v4, vcc, 0x80000000, v4
+; GFX6-NEXT:    v_add_i32_e32 v4, vcc, 0x80000000, v4
 ; GFX6-NEXT:    v_max_i32_e32 v3, v3, v5
 ; GFX6-NEXT:    v_min_i32_e32 v3, v3, v4
 ; GFX6-NEXT:    v_sub_i32_e32 v2, vcc, v2, v3
@@ -1403,24 +1405,25 @@ define <3 x i32> @v_ssubsat_v3i32(<3 x i32> %lhs, <3 x i32> %rhs) {
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX8-NEXT:    v_max_i32_e32 v6, -1, v0
-; GFX8-NEXT:    v_subrev_u32_e32 v6, vcc, 0x7fffffff, v6
+; GFX8-NEXT:    v_add_u32_e32 v6, vcc, 0x80000001, v6
 ; GFX8-NEXT:    v_min_i32_e32 v8, -1, v0
-; GFX8-NEXT:    v_subrev_u32_e32 v8, vcc, 0x80000000, v8
+; GFX8-NEXT:    v_bfrev_b32_e32 v9, 1
+; GFX8-NEXT:    v_add_u32_e32 v8, vcc, v8, v9
 ; GFX8-NEXT:    v_max_i32_e32 v3, v6, v3
 ; GFX8-NEXT:    v_min_i32_e32 v3, v3, v8
-; GFX8-NEXT:    v_bfrev_b32_e32 v7, -2
+; GFX8-NEXT:    v_mov_b32_e32 v7, 0x80000001
 ; GFX8-NEXT:    v_sub_u32_e32 v0, vcc, v0, v3
 ; GFX8-NEXT:    v_max_i32_e32 v3, -1, v1
-; GFX8-NEXT:    v_sub_u32_e32 v3, vcc, v3, v7
+; GFX8-NEXT:    v_add_u32_e32 v3, vcc, v3, v7
 ; GFX8-NEXT:    v_min_i32_e32 v6, -1, v1
-; GFX8-NEXT:    v_subrev_u32_e32 v6, vcc, 0x80000000, v6
+; GFX8-NEXT:    v_add_u32_e32 v6, vcc, v6, v9
 ; GFX8-NEXT:    v_max_i32_e32 v3, v3, v4
 ; GFX8-NEXT:    v_min_i32_e32 v3, v3, v6
 ; GFX8-NEXT:    v_sub_u32_e32 v1, vcc, v1, v3
 ; GFX8-NEXT:    v_max_i32_e32 v3, -1, v2
-; GFX8-NEXT:    v_subrev_u32_e32 v3, vcc, 0x7fffffff, v3
+; GFX8-NEXT:    v_add_u32_e32 v3, vcc, 0x80000001, v3
 ; GFX8-NEXT:    v_min_i32_e32 v4, -1, v2
-; GFX8-NEXT:    v_subrev_u32_e32 v4, vcc, 0x80000000, v4
+; GFX8-NEXT:    v_add_u32_e32 v4, vcc, 0x80000000, v4
 ; GFX8-NEXT:    v_max_i32_e32 v3, v3, v5
 ; GFX8-NEXT:    v_min_i32_e32 v3, v3, v4
 ; GFX8-NEXT:    v_sub_u32_e32 v2, vcc, v2, v3
@@ -1449,23 +1452,23 @@ define amdgpu_ps <3 x i32> @s_ssubsat_v3i32(<3 x i32> inreg %lhs, <3 x i32> inre
 ; GFX6-LABEL: s_ssubsat_v3i32:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_max_i32 s6, s0, -1
-; GFX6-NEXT:    s_sub_i32 s6, s6, 0x7fffffff
+; GFX6-NEXT:    s_add_i32 s6, s6, 0x80000001
 ; GFX6-NEXT:    s_min_i32 s7, s0, -1
-; GFX6-NEXT:    s_sub_i32 s7, s7, 0x80000000
+; GFX6-NEXT:    s_add_i32 s7, s7, 0x80000000
 ; GFX6-NEXT:    s_max_i32 s3, s6, s3
 ; GFX6-NEXT:    s_min_i32 s3, s3, s7
 ; GFX6-NEXT:    s_sub_i32 s0, s0, s3
 ; GFX6-NEXT:    s_max_i32 s3, s1, -1
-; GFX6-NEXT:    s_sub_i32 s3, s3, 0x7fffffff
+; GFX6-NEXT:    s_add_i32 s3, s3, 0x80000001
 ; GFX6-NEXT:    s_min_i32 s6, s1, -1
-; GFX6-NEXT:    s_sub_i32 s6, s6, 0x80000000
+; GFX6-NEXT:    s_add_i32 s6, s6, 0x80000000
 ; GFX6-NEXT:    s_max_i32 s3, s3, s4
 ; GFX6-NEXT:    s_min_i32 s3, s3, s6
 ; GFX6-NEXT:    s_sub_i32 s1, s1, s3
 ; GFX6-NEXT:    s_max_i32 s3, s2, -1
-; GFX6-NEXT:    s_sub_i32 s3, s3, 0x7fffffff
+; GFX6-NEXT:    s_add_i32 s3, s3, 0x80000001
 ; GFX6-NEXT:    s_min_i32 s4, s2, -1
-; GFX6-NEXT:    s_sub_i32 s4, s4, 0x80000000
+; GFX6-NEXT:    s_add_i32 s4, s4, 0x80000000
 ; GFX6-NEXT:    s_max_i32 s3, s3, s5
 ; GFX6-NEXT:    s_min_i32 s3, s3, s4
 ; GFX6-NEXT:    s_sub_i32 s2, s2, s3
@@ -1474,23 +1477,23 @@ define amdgpu_ps <3 x i32> @s_ssubsat_v3i32(<3 x i32> inreg %lhs, <3 x i32> inre
 ; GFX8-LABEL: s_ssubsat_v3i32:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_max_i32 s6, s0, -1
-; GFX8-NEXT:    s_sub_i32 s6, s6, 0x7fffffff
+; GFX8-NEXT:    s_add_i32 s6, s6, 0x80000001
 ; GFX8-NEXT:    s_min_i32 s7, s0, -1
-; GFX8-NEXT:    s_sub_i32 s7, s7, 0x80000000
+; GFX8-NEXT:    s_add_i32 s7, s7, 0x80000000
 ; GFX8-NEXT:    s_max_i32 s3, s6, s3
 ; GFX8-NEXT:    s_min_i32 s3, s3, s7
 ; GFX8-NEXT:    s_sub_i32 s0, s0, s3
 ; GFX8-NEXT:    s_max_i32 s3, s1, -1
-; GFX8-NEXT:    s_sub_i32 s3, s3, 0x7fffffff
+; GFX8-NEXT:    s_add_i32 s3, s3, 0x80000001
 ; GFX8-NEXT:    s_min_i32 s6, s1, -1
-; GFX8-NEXT:    s_sub_i32 s6, s6, 0x80000000
+; GFX8-NEXT:    s_add_i32 s6, s6, 0x80000000
 ; GFX8-NEXT:    s_max_i32 s3, s3, s4
 ; GFX8-NEXT:    s_min_i32 s3, s3, s6
 ; GFX8-NEXT:    s_sub_i32 s1, s1, s3
 ; GFX8-NEXT:    s_max_i32 s3, s2, -1
-; GFX8-NEXT:    s_sub_i32 s3, s3, 0x7fffffff
+; GFX8-NEXT:    s_add_i32 s3, s3, 0x80000001
 ; GFX8-NEXT:    s_min_i32 s4, s2, -1
-; GFX8-NEXT:    s_sub_i32 s4, s4, 0x80000000
+; GFX8-NEXT:    s_add_i32 s4, s4, 0x80000000
 ; GFX8-NEXT:    s_max_i32 s3, s3, s5
 ; GFX8-NEXT:    s_min_i32 s3, s3, s4
 ; GFX8-NEXT:    s_sub_i32 s2, s2, s3
@@ -1527,32 +1530,32 @@ define <4 x i32> @v_ssubsat_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX6-NEXT:    v_max_i32_e32 v8, -1, v0
-; GFX6-NEXT:    v_subrev_i32_e32 v8, vcc, 0x7fffffff, v8
+; GFX6-NEXT:    v_add_i32_e32 v8, vcc, 0x80000001, v8
 ; GFX6-NEXT:    v_min_i32_e32 v10, -1, v0
 ; GFX6-NEXT:    v_bfrev_b32_e32 v11, 1
-; GFX6-NEXT:    v_sub_i32_e32 v10, vcc, v10, v11
+; GFX6-NEXT:    v_add_i32_e32 v10, vcc, v10, v11
 ; GFX6-NEXT:    v_max_i32_e32 v4, v8, v4
 ; GFX6-NEXT:    v_min_i32_e32 v4, v4, v10
-; GFX6-NEXT:    v_bfrev_b32_e32 v9, -2
+; GFX6-NEXT:    v_mov_b32_e32 v9, 0x80000001
 ; GFX6-NEXT:    v_sub_i32_e32 v0, vcc, v0, v4
 ; GFX6-NEXT:    v_max_i32_e32 v4, -1, v1
-; GFX6-NEXT:    v_sub_i32_e32 v4, vcc, v4, v9
+; GFX6-NEXT:    v_add_i32_e32 v4, vcc, v4, v9
 ; GFX6-NEXT:    v_min_i32_e32 v8, -1, v1
-; GFX6-NEXT:    v_subrev_i32_e32 v8, vcc, 0x80000000, v8
+; GFX6-NEXT:    v_add_i32_e32 v8, vcc, v8, v11
 ; GFX6-NEXT:    v_max_i32_e32 v4, v4, v5
 ; GFX6-NEXT:    v_min_i32_e32 v4, v4, v8
 ; GFX6-NEXT:    v_sub_i32_e32 v1, vcc, v1, v4
 ; GFX6-NEXT:    v_max_i32_e32 v4, -1, v2
-; GFX6-NEXT:    v_sub_i32_e32 v4, vcc, v4, v9
+; GFX6-NEXT:    v_add_i32_e32 v4, vcc, v4, v9
 ; GFX6-NEXT:    v_min_i32_e32 v5, -1, v2
-; GFX6-NEXT:    v_subrev_i32_e32 v5, vcc, 0x80000000, v5
+; GFX6-NEXT:    v_add_i32_e32 v5, vcc, v5, v11
 ; GFX6-NEXT:    v_max_i32_e32 v4, v4, v6
 ; GFX6-NEXT:    v_min_i32_e32 v4, v4, v5
 ; GFX6-NEXT:    v_sub_i32_e32 v2, vcc, v2, v4
 ; GFX6-NEXT:    v_max_i32_e32 v4, -1, v3
-; GFX6-NEXT:    v_subrev_i32_e32 v4, vcc, 0x7fffffff, v4
+; GFX6-NEXT:    v_add_i32_e32 v4, vcc, 0x80000001, v4
 ; GFX6-NEXT:    v_min_i32_e32 v5, -1, v3
-; GFX6-NEXT:    v_subrev_i32_e32 v5, vcc, 0x80000000, v5
+; GFX6-NEXT:    v_add_i32_e32 v5, vcc, 0x80000000, v5
 ; GFX6-NEXT:    v_max_i32_e32 v4, v4, v7
 ; GFX6-NEXT:    v_min_i32_e32 v4, v4, v5
 ; GFX6-NEXT:    v_sub_i32_e32 v3, vcc, v3, v4
@@ -1562,32 +1565,32 @@ define <4 x i32> @v_ssubsat_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX8-NEXT:    v_max_i32_e32 v8, -1, v0
-; GFX8-NEXT:    v_subrev_u32_e32 v8, vcc, 0x7fffffff, v8
+; GFX8-NEXT:    v_add_u32_e32 v8, vcc, 0x80000001, v8
 ; GFX8-NEXT:    v_min_i32_e32 v10, -1, v0
 ; GFX8-NEXT:    v_bfrev_b32_e32 v11, 1
-; GFX8-NEXT:    v_sub_u32_e32 v10, vcc, v10, v11
+; GFX8-NEXT:    v_add_u32_e32 v10, vcc, v10, v11
 ; GFX8-NEXT:    v_max_i32_e32 v4, v8, v4
 ; GFX8-NEXT:    v_min_i32_e32 v4, v4, v10
-; GFX8-NEXT:    v_bfrev_b32_e32 v9, -2
+; GFX8-NEXT:    v_mov_b32_e32 v9, 0x80000001
 ; GFX8-NEXT:    v_sub_u32_e32 v0, vcc, v0, v4
 ; GFX8-NEXT:    v_max_i32_e32 v4, -1, v1
-; GFX8-NEXT:    v_sub_u32_e32 v4, vcc, v4, v9
+; GFX8-NEXT:    v_add_u32_e32 v4, vcc, v4, v9
 ; GFX8-NEXT:    v_min_i32_e32 v8, -1, v1
-; GFX8-NEXT:    v_subrev_u32_e32 v8, vcc, 0x80000000, v8
+; GFX8-NEXT:    v_add_u32_e32 v8, vcc, v8, v11
 ; GFX8-NEXT:    v_max_i32_e32 v4, v4, v5
 ; GFX8-NEXT:    v_min_i32_e32 v4, v4, v8
 ; GFX8-NEXT:    v_sub_u32_e32 v1, vcc, v1, v4
 ; GFX8-NEXT:    v_max_i32_e32 v4, -1, v2
-; GFX8-NEXT:    v_sub_u32_e32 v4, vcc, v4, v9
+; GFX8-NEXT:    v_add_u32_e32 v4, vcc, v4, v9
 ; GFX8-NEXT:    v_min_i32_e32 v5, -1, v2
-; GFX8-NEXT:    v_subrev_u32_e32 v5, vcc, 0x80000000, v5
+; GFX8-NEXT:    v_add_u32_e32 v5, vcc, v5, v11
 ; GFX8-NEXT:    v_max_i32_e32 v4, v4, v6
 ; GFX8-NEXT:    v_min_i32_e32 v4, v4, v5
 ; GFX8-NEXT:    v_sub_u32_e32 v2, vcc, v2, v4
 ; GFX8-NEXT:    v_max_i32_e32 v4, -1, v3
-; GFX8-NEXT:    v_subrev_u32_e32 v4, vcc, 0x7fffffff, v4
+; GFX8-NEXT:    v_add_u32_e32 v4, vcc, 0x80000001, v4
 ; GFX8-NEXT:    v_min_i32_e32 v5, -1, v3
-; GFX8-NEXT:    v_subrev_u32_e32 v5, vcc, 0x80000000, v5
+; GFX8-NEXT:    v_add_u32_e32 v5, vcc, 0x80000000, v5
 ; GFX8-NEXT:    v_max_i32_e32 v4, v4, v7
 ; GFX8-NEXT:    v_min_i32_e32 v4, v4, v5
 ; GFX8-NEXT:    v_sub_u32_e32 v3, vcc, v3, v4
@@ -1618,30 +1621,30 @@ define amdgpu_ps <4 x i32> @s_ssubsat_v4i32(<4 x i32> inreg %lhs, <4 x i32> inre
 ; GFX6-LABEL: s_ssubsat_v4i32:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_max_i32 s8, s0, -1
-; GFX6-NEXT:    s_sub_i32 s8, s8, 0x7fffffff
+; GFX6-NEXT:    s_add_i32 s8, s8, 0x80000001
 ; GFX6-NEXT:    s_min_i32 s9, s0, -1
-; GFX6-NEXT:    s_sub_i32 s9, s9, 0x80000000
+; GFX6-NEXT:    s_add_i32 s9, s9, 0x80000000
 ; GFX6-NEXT:    s_max_i32 s4, s8, s4
 ; GFX6-NEXT:    s_min_i32 s4, s4, s9
 ; GFX6-NEXT:    s_sub_i32 s0, s0, s4
 ; GFX6-NEXT:    s_max_i32 s4, s1, -1
-; GFX6-NEXT:    s_sub_i32 s4, s4, 0x7fffffff
+; GFX6-NEXT:    s_add_i32 s4, s4, 0x80000001
 ; GFX6-NEXT:    s_min_i32 s8, s1, -1
-; GFX6-NEXT:    s_sub_i32 s8, s8, 0x80000000
+; GFX6-NEXT:    s_add_i32 s8, s8, 0x80000000
 ; GFX6-NEXT:    s_max_i32 s4, s4, s5
 ; GFX6-NEXT:    s_min_i32 s4, s4, s8
 ; GFX6-NEXT:    s_sub_i32 s1, s1, s4
 ; GFX6-NEXT:    s_max_i32 s4, s2, -1
-; GFX6-NEXT:    s_sub_i32 s4, s4, 0x7fffffff
+; GFX6-NEXT:    s_add_i32 s4, s4, 0x80000001
 ; GFX6-NEXT:    s_min_i32 s5, s2, -1
-; GFX6-NEXT:    s_sub_i32 s5, s5, 0x80000000
+; GFX6-NEXT:    s_add_i32 s5, s5, 0x80000000
 ; GFX6-NEXT:    s_max_i32 s4, s4, s6
 ; GFX6-NEXT:    s_min_i32 s4, s4, s5
 ; GFX6-NEXT:    s_sub_i32 s2, s2, s4
 ; GFX6-NEXT:    s_max_i32 s4, s3, -1
-; GFX6-NEXT:    s_sub_i32 s4, s4, 0x7fffffff
+; GFX6-NEXT:    s_add_i32 s4, s4, 0x80000001
 ; GFX6-NEXT:    s_min_i32 s5, s3, -1
-; GFX6-NEXT:    s_sub_i32 s5, s5, 0x80000000
+; GFX6-NEXT:    s_add_i32 s5, s5, 0x80000000
 ; GFX6-NEXT:    s_max_i32 s4, s4, s7
 ; GFX6-NEXT:    s_min_i32 s4, s4, s5
 ; GFX6-NEXT:    s_sub_i32 s3, s3, s4
@@ -1650,30 +1653,30 @@ define amdgpu_ps <4 x i32> @s_ssubsat_v4i32(<4 x i32> inreg %lhs, <4 x i32> inre
 ; GFX8-LABEL: s_ssubsat_v4i32:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_max_i32 s8, s0, -1
-; GFX8-NEXT:    s_sub_i32 s8, s8, 0x7fffffff
+; GFX8-NEXT:    s_add_i32 s8, s8, 0x80000001
 ; GFX8-NEXT:    s_min_i32 s9, s0, -1
-; GFX8-NEXT:    s_sub_i32 s9, s9, 0x80000000
+; GFX8-NEXT:    s_add_i32 s9, s9, 0x80000000
 ; GFX8-NEXT:    s_max_i32 s4, s8, s4
 ; GFX8-NEXT:    s_min_i32 s4, s4, s9
 ; GFX8-NEXT:    s_sub_i32 s0, s0, s4
 ; GFX8-NEXT:    s_max_i32 s4, s1, -1
-; GFX8-NEXT:    s_sub_i32 s4, s4, 0x7fffffff
+; GFX8-NEXT:    s_add_i32 s4, s4, 0x80000001
 ; GFX8-NEXT:    s_min_i32 s8, s1, -1
-; GFX8-NEXT:    s_sub_i32 s8, s8, 0x80000000
+; GFX8-NEXT:    s_add_i32 s8, s8, 0x80000000
 ; GFX8-NEXT:    s_max_i32 s4, s4, s5
 ; GFX8-NEXT:    s_min_i32 s4, s4, s8
 ; GFX8-NEXT:    s_sub_i32 s1, s1, s4
 ; GFX8-NEXT:    s_max_i32 s4, s2, -1
-; GFX8-NEXT:    s_sub_i32 s4, s4, 0x7fffffff
+; GFX8-NEXT:    s_add_i32 s4, s4, 0x80000001
 ; GFX8-NEXT:    s_min_i32 s5, s2, -1
-; GFX8-NEXT:    s_sub_i32 s5, s5, 0x80000000
+; GFX8-NEXT:    s_add_i32 s5, s5, 0x80000000
 ; GFX8-NEXT:    s_max_i32 s4, s4, s6
 ; GFX8-NEXT:    s_min_i32 s4, s4, s5
 ; GFX8-NEXT:    s_sub_i32 s2, s2, s4
 ; GFX8-NEXT:    s_max_i32 s4, s3, -1
-; GFX8-NEXT:    s_sub_i32 s4, s4, 0x7fffffff
+; GFX8-NEXT:    s_add_i32 s4, s4, 0x80000001
 ; GFX8-NEXT:    s_min_i32 s5, s3, -1
-; GFX8-NEXT:    s_sub_i32 s5, s5, 0x80000000
+; GFX8-NEXT:    s_add_i32 s5, s5, 0x80000000
 ; GFX8-NEXT:    s_max_i32 s4, s4, s7
 ; GFX8-NEXT:    s_min_i32 s4, s4, s5
 ; GFX8-NEXT:    s_sub_i32 s3, s3, s4
@@ -1715,39 +1718,39 @@ define <5 x i32> @v_ssubsat_v5i32(<5 x i32> %lhs, <5 x i32> %rhs) {
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX6-NEXT:    v_max_i32_e32 v10, -1, v0
-; GFX6-NEXT:    v_subrev_i32_e32 v10, vcc, 0x7fffffff, v10
+; GFX6-NEXT:    v_add_i32_e32 v10, vcc, 0x80000001, v10
 ; GFX6-NEXT:    v_min_i32_e32 v12, -1, v0
 ; GFX6-NEXT:    v_bfrev_b32_e32 v13, 1
-; GFX6-NEXT:    v_sub_i32_e32 v12, vcc, v12, v13
+; GFX6-NEXT:    v_add_i32_e32 v12, vcc, v12, v13
 ; GFX6-NEXT:    v_max_i32_e32 v5, v10, v5
 ; GFX6-NEXT:    v_min_i32_e32 v5, v5, v12
-; GFX6-NEXT:    v_bfrev_b32_e32 v11, -2
+; GFX6-NEXT:    v_mov_b32_e32 v11, 0x80000001
 ; GFX6-NEXT:    v_sub_i32_e32 v0, vcc, v0, v5
 ; GFX6-NEXT:    v_max_i32_e32 v5, -1, v1
-; GFX6-NEXT:    v_sub_i32_e32 v5, vcc, v5, v11
+; GFX6-NEXT:    v_add_i32_e32 v5, vcc, v5, v11
 ; GFX6-NEXT:    v_min_i32_e32 v10, -1, v1
-; GFX6-NEXT:    v_sub_i32_e32 v10, vcc, v10, v13
+; GFX6-NEXT:    v_add_i32_e32 v10, vcc, v10, v13
 ; GFX6-NEXT:    v_max_i32_e32 v5, v5, v6
 ; GFX6-NEXT:    v_min_i32_e32 v5, v5, v10
 ; GFX6-NEXT:    v_sub_i32_e32 v1, vcc, v1, v5
 ; GFX6-NEXT:    v_max_i32_e32 v5, -1, v2
-; GFX6-NEXT:    v_sub_i32_e32 v5, vcc, v5, v11
+; GFX6-NEXT:    v_add_i32_e32 v5, vcc, v5, v11
 ; GFX6-NEXT:    v_min_i32_e32 v6, -1, v2
-; GFX6-NEXT:    v_subrev_i32_e32 v6, vcc, 0x80000000, v6
+; GFX6-NEXT:    v_add_i32_e32 v6, vcc, v6, v13
 ; GFX6-NEXT:    v_max_i32_e32 v5, v5, v7
 ; GFX6-NEXT:    v_min_i32_e32 v5, v5, v6
 ; GFX6-NEXT:    v_sub_i32_e32 v2, vcc, v2, v5
 ; GFX6-NEXT:    v_max_i32_e32 v5, -1, v3
-; GFX6-NEXT:    v_sub_i32_e32 v5, vcc, v5, v11
+; GFX6-NEXT:    v_add_i32_e32 v5, vcc, v5, v11
 ; GFX6-NEXT:    v_min_i32_e32 v6, -1, v3
-; GFX6-NEXT:    v_subrev_i32_e32 v6, vcc, 0x80000000, v6
+; GFX6-NEXT:    v_add_i32_e32 v6, vcc, v6, v13
 ; GFX6-NEXT:    v_max_i32_e32 v5, v5, v8
 ; GFX6-NEXT:    v_min_i32_e32 v5, v5, v6
 ; GFX6-NEXT:    v_sub_i32_e32 v3, vcc, v3, v5
 ; GFX6-NEXT:    v_max_i32_e32 v5, -1, v4
-; GFX6-NEXT:    v_subrev_i32_e32 v5, vcc, 0x7fffffff, v5
+; GFX6-NEXT:    v_add_i32_e32 v5, vcc, 0x80000001, v5
 ; GFX6-NEXT:    v_min_i32_e32 v6, -1, v4
-; GFX6-NEXT:    v_subrev_i32_e32 v6, vcc, 0x80000000, v6
+; GFX6-NEXT:    v_add_i32_e32 v6, vcc, 0x80000000, v6
 ; GFX6-NEXT:    v_max_i32_e32 v5, v5, v9
 ; GFX6-NEXT:    v_min_i32_e32 v5, v5, v6
 ; GFX6-NEXT:    v_sub_i32_e32 v4, vcc, v4, v5
@@ -1757,39 +1760,39 @@ define <5 x i32> @v_ssubsat_v5i32(<5 x i32> %lhs, <5 x i32> %rhs) {
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX8-NEXT:    v_max_i32_e32 v10, -1, v0
-; GFX8-NEXT:    v_subrev_u32_e32 v10, vcc, 0x7fffffff, v10
+; GFX8-NEXT:    v_add_u32_e32 v10, vcc, 0x80000001, v10
 ; GFX8-NEXT:    v_min_i32_e32 v12, -1, v0
 ; GFX8-NEXT:    v_bfrev_b32_e32 v13, 1
-; GFX8-NEXT:    v_sub_u32_e32 v12, vcc, v12, v13
+; GFX8-NEXT:    v_add_u32_e32 v12, vcc, v12, v13
 ; GFX8-NEXT:    v_max_i32_e32 v5, v10, v5
 ; GFX8-NEXT:    v_min_i32_e32 v5, v5, v12
-; GFX8-NEXT:    v_bfrev_b32_e32 v11, -2
+; GFX8-NEXT:    v_mov_b32_e32 v11, 0x80000001
 ; GFX8-NEXT:    v_sub_u32_e32 v0, vcc, v0, v5
 ; GFX8-NEXT:    v_max_i32_e32 v5, -1, v1
-; GFX8-NEXT:    v_sub_u32_e32 v5, vcc, v5, v11
+; GFX8-NEXT:    v_add_u32_e32 v5, vcc, v5, v11
 ; GFX8-NEXT:    v_min_i32_e32 v10, -1, v1
-; GFX8-NEXT:    v_sub_u32_e32 v10, vcc, v10, v13
+; GFX8-NEXT:    v_add_u32_e32 v10, vcc, v10, v13
 ; GFX8-NEXT:    v_max_i32_e32 v5, v5, v6
 ; GFX8-NEXT:    v_min_i32_e32 v5, v5, v10
 ; GFX8-NEXT:    v_sub_u32_e32 v1, vcc, v1, v5
 ; GFX8-NEXT:    v_max_i32_e32 v5, -1, v2
-; GFX8-NEXT:    v_sub_u32_e32 v5, vcc, v5, v11
+; GFX8-NEXT:    v_add_u32_e32 v5, vcc, v5, v11
 ; GFX8-NEXT:    v_min_i32_e32 v6, -1, v2
-; GFX8-NEXT:    v_subrev_u32_e32 v6, vcc, 0x80000000, v6
+; GFX8-NEXT:    v_add_u32_e32 v6, vcc, v6, v13
 ; GFX8-NEXT:    v_max_i32_e32 v5, v5, v7
 ; GFX8-NEXT:    v_min_i32_e32 v5, v5, v6
 ; GFX8-NEXT:    v_sub_u32_e32 v2, vcc, v2, v5
 ; GFX8-NEXT:    v_max_i32_e32 v5, -1, v3
-; GFX8-NEXT:    v_sub_u32_e32 v5, vcc, v5, v11
+; GFX8-NEXT:    v_add_u32_e32 v5, vcc, v5, v11
 ; GFX8-NEXT:    v_min_i32_e32 v6, -1, v3
-; GFX8-NEXT:    v_subrev_u32_e32 v6, vcc, 0x80000000, v6
+; GFX8-NEXT:    v_add_u32_e32 v6, vcc, v6, v13
 ; GFX8-NEXT:    v_max_i32_e32 v5, v5, v8
 ; GFX8-NEXT:    v_min_i32_e32 v5, v5, v6
 ; GFX8-NEXT:    v_sub_u32_e32 v3, vcc, v3, v5
 ; GFX8-NEXT:    v_max_i32_e32 v5, -1, v4
-; GFX8-NEXT:    v_subrev_u32_e32 v5, vcc, 0x7fffffff, v5
+; GFX8-NEXT:    v_add_u32_e32 v5, vcc, 0x80000001, v5
 ; GFX8-NEXT:    v_min_i32_e32 v6, -1, v4
-; GFX8-NEXT:    v_subrev_u32_e32 v6, vcc, 0x80000000, v6
+; GFX8-NEXT:    v_add_u32_e32 v6, vcc, 0x80000000, v6
 ; GFX8-NEXT:    v_max_i32_e32 v5, v5, v9
 ; GFX8-NEXT:    v_min_i32_e32 v5, v5, v6
 ; GFX8-NEXT:    v_sub_u32_e32 v4, vcc, v4, v5
@@ -1822,37 +1825,37 @@ define amdgpu_ps <5 x i32> @s_ssubsat_v5i32(<5 x i32> inreg %lhs, <5 x i32> inre
 ; GFX6-LABEL: s_ssubsat_v5i32:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_max_i32 s10, s0, -1
-; GFX6-NEXT:    s_sub_i32 s10, s10, 0x7fffffff
+; GFX6-NEXT:    s_add_i32 s10, s10, 0x80000001
 ; GFX6-NEXT:    s_min_i32 s11, s0, -1
-; GFX6-NEXT:    s_sub_i32 s11, s11, 0x80000000
+; GFX6-NEXT:    s_add_i32 s11, s11, 0x80000000
 ; GFX6-NEXT:    s_max_i32 s5, s10, s5
 ; GFX6-NEXT:    s_min_i32 s5, s5, s11
 ; GFX6-NEXT:    s_sub_i32 s0, s0, s5
 ; GFX6-NEXT:    s_max_i32 s5, s1, -1
-; GFX6-NEXT:    s_sub_i32 s5, s5, 0x7fffffff
+; GFX6-NEXT:    s_add_i32 s5, s5, 0x80000001
 ; GFX6-NEXT:    s_min_i32 s10, s1, -1
-; GFX6-NEXT:    s_sub_i32 s10, s10, 0x80000000
+; GFX6-NEXT:    s_add_i32 s10, s10, 0x80000000
 ; GFX6-NEXT:    s_max_i32 s5, s5, s6
 ; GFX6-NEXT:    s_min_i32 s5, s5, s10
 ; GFX6-NEXT:    s_sub_i32 s1, s1, s5
 ; GFX6-NEXT:    s_max_i32 s5, s2, -1
-; GFX6-NEXT:    s_sub_i32 s5, s5, 0x7fffffff
+; GFX6-NEXT:    s_add_i32 s5, s5, 0x80000001
 ; GFX6-NEXT:    s_min_i32 s6, s2, -1
-; GFX6-NEXT:    s_sub_i32 s6, s6, 0x80000000
+; GFX6-NEXT:    s_add_i32 s6, s6, 0x80000000
 ; GFX6-NEXT:    s_max_i32 s5, s5, s7
 ; GFX6-NEXT:    s_min_i32 s5, s5, s6
 ; GFX6-NEXT:    s_sub_i32 s2, s2, s5
 ; GFX6-NEXT:    s_max_i32 s5, s3, -1
-; GFX6-NEXT:    s_sub_i32 s5, s5, 0x7fffffff
+; GFX6-NEXT:    s_add_i32 s5, s5, 0x80000001
 ; GFX6-NEXT:    s_min_i32 s6, s3, -1
-; GFX6-NEXT:    s_sub_i32 s6, s6, 0x80000000
+; GFX6-NEXT:    s_add_i32 s6, s6, 0x80000000
 ; GFX6-NEXT:    s_max_i32 s5, s5, s8
 ; GFX6-NEXT:    s_min_i32 s5, s5, s6
 ; GFX6-NEXT:    s_sub_i32 s3, s3, s5
 ; GFX6-NEXT:    s_max_i32 s5, s4, -1
-; GFX6-NEXT:    s_sub_i32 s5, s5, 0x7fffffff
+; GFX6-NEXT:    s_add_i32 s5, s5, 0x80000001
 ; GFX6-NEXT:    s_min_i32 s6, s4, -1
-; GFX6-NEXT:    s_sub_i32 s6, s6, 0x80000000
+; GFX6-NEXT:    s_add_i32 s6, s6, 0x80000000
 ; GFX6-NEXT:    s_max_i32 s5, s5, s9
 ; GFX6-NEXT:    s_min_i32 s5, s5, s6
 ; GFX6-NEXT:    s_sub_i32 s4, s4, s5
@@ -1861,37 +1864,37 @@ define amdgpu_ps <5 x i32> @s_ssubsat_v5i32(<5 x i32> inreg %lhs, <5 x i32> inre
 ; GFX8-LABEL: s_ssubsat_v5i32:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_max_i32 s10, s0, -1
-; GFX8-NEXT:    s_sub_i32 s10, s10, 0x7fffffff
+; GFX8-NEXT:    s_add_i32 s10, s10, 0x80000001
 ; GFX8-NEXT:    s_min_i32 s11, s0, -1
-; GFX8-NEXT:    s_sub_i32 s11, s11, 0x80000000
+; GFX8-NEXT:    s_add_i32 s11, s11, 0x80000000
 ; GFX8-NEXT:    s_max_i32 s5, s10, s5
 ; GFX8-NEXT:    s_min_i32 s5, s5, s11
 ; GFX8-NEXT:    s_sub_i32 s0, s0, s5
 ; GFX8-NEXT:    s_max_i32 s5, s1, -1
-; GFX8-NEXT:    s_sub_i32 s5, s5, 0x7fffffff
+; GFX8-NEXT:    s_add_i32 s5, s5, 0x80000001
 ; GFX8-NEXT:    s_min_i32 s10, s1, -1
-; GFX8-NEXT:    s_sub_i32 s10, s10, 0x80000000
+; GFX8-NEXT:    s_add_i32 s10, s10, 0x80000000
 ; GFX8-NEXT:    s_max_i32 s5, s5, s6
 ; GFX8-NEXT:    s_min_i32 s5, s5, s10
 ; GFX8-NEXT:    s_sub_i32 s1, s1, s5
 ; GFX8-NEXT:    s_max_i32 s5, s2, -1
-; GFX8-NEXT:    s_sub_i32 s5, s5, 0x7fffffff
+; GFX8-NEXT:    s_add_i32 s5, s5, 0x80000001
 ; GFX8-NEXT:    s_min_i32 s6, s2, -1
-; GFX8-NEXT:    s_sub_i32 s6, s6, 0x80000000
+; GFX8-NEXT:    s_add_i32 s6, s6, 0x80000000
 ; GFX8-NEXT:    s_max_i32 s5, s5, s7
 ; GFX8-NEXT:    s_min_i32 s5, s5, s6
 ; GFX8-NEXT:    s_sub_i32 s2, s2, s5
 ; GFX8-NEXT:    s_max_i32 s5, s3, -1
-; GFX8-NEXT:    s_sub_i32 s5, s5, 0x7fffffff
+; GFX8-NEXT:    s_add_i32 s5, s5, 0x80000001
 ; GFX8-NEXT:    s_min_i32 s6, s3, -1
-; GFX8-NEXT:    s_sub_i32 s6, s6, 0x80000000
+; GFX8-NEXT:    s_add_i32 s6, s6, 0x80000000
 ; GFX8-NEXT:    s_max_i32 s5, s5, s8
 ; GFX8-NEXT:    s_min_i32 s5, s5, s6
 ; GFX8-NEXT:    s_sub_i32 s3, s3, s5
 ; GFX8-NEXT:    s_max_i32 s5, s4, -1
-; GFX8-NEXT:    s_sub_i32 s5, s5, 0x7fffffff
+; GFX8-NEXT:    s_add_i32 s5, s5, 0x80000001
 ; GFX8-NEXT:    s_min_i32 s6, s4, -1
-; GFX8-NEXT:    s_sub_i32 s6, s6, 0x80000000
+; GFX8-NEXT:    s_add_i32 s6, s6, 0x80000000
 ; GFX8-NEXT:    s_max_i32 s5, s5, s9
 ; GFX8-NEXT:    s_min_i32 s5, s5, s6
 ; GFX8-NEXT:    s_sub_i32 s4, s4, s5
@@ -1938,117 +1941,117 @@ define <16 x i32> @v_ssubsat_v16i32(<16 x i32> %lhs, <16 x i32> %rhs) {
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX6-NEXT:    v_max_i32_e32 v32, -1, v0
-; GFX6-NEXT:    v_bfrev_b32_e32 v31, -2
-; GFX6-NEXT:    v_sub_i32_e32 v32, vcc, v32, v31
+; GFX6-NEXT:    v_mov_b32_e32 v31, 0x80000001
+; GFX6-NEXT:    v_add_i32_e32 v32, vcc, v32, v31
 ; GFX6-NEXT:    v_max_i32_e32 v32, v32, v16
 ; GFX6-NEXT:    v_min_i32_e32 v33, -1, v0
 ; GFX6-NEXT:    v_bfrev_b32_e32 v16, 1
-; GFX6-NEXT:    v_sub_i32_e32 v33, vcc, v33, v16
+; GFX6-NEXT:    v_add_i32_e32 v33, vcc, v33, v16
 ; GFX6-NEXT:    v_min_i32_e32 v32, v32, v33
 ; GFX6-NEXT:    v_sub_i32_e32 v0, vcc, v0, v32
 ; GFX6-NEXT:    v_max_i32_e32 v32, -1, v1
-; GFX6-NEXT:    v_sub_i32_e32 v32, vcc, v32, v31
+; GFX6-NEXT:    v_add_i32_e32 v32, vcc, v32, v31
 ; GFX6-NEXT:    v_max_i32_e32 v17, v32, v17
 ; GFX6-NEXT:    v_min_i32_e32 v32, -1, v1
-; GFX6-NEXT:    v_sub_i32_e32 v32, vcc, v32, v16
+; GFX6-NEXT:    v_add_i32_e32 v32, vcc, v32, v16
 ; GFX6-NEXT:    v_min_i32_e32 v17, v17, v32
 ; GFX6-NEXT:    v_sub_i32_e32 v1, vcc, v1, v17
 ; GFX6-NEXT:    v_max_i32_e32 v17, -1, v2
-; GFX6-NEXT:    v_sub_i32_e32 v17, vcc, v17, v31
+; GFX6-NEXT:    v_add_i32_e32 v17, vcc, v17, v31
 ; GFX6-NEXT:    v_max_i32_e32 v17, v17, v18
 ; GFX6-NEXT:    v_min_i32_e32 v18, -1, v2
-; GFX6-NEXT:    v_sub_i32_e32 v18, vcc, v18, v16
+; GFX6-NEXT:    v_add_i32_e32 v18, vcc, v18, v16
 ; GFX6-NEXT:    v_min_i32_e32 v17, v17, v18
 ; GFX6-NEXT:    v_sub_i32_e32 v2, vcc, v2, v17
 ; GFX6-NEXT:    v_max_i32_e32 v17, -1, v3
-; GFX6-NEXT:    v_sub_i32_e32 v17, vcc, v17, v31
+; GFX6-NEXT:    v_add_i32_e32 v17, vcc, v17, v31
 ; GFX6-NEXT:    v_min_i32_e32 v18, -1, v3
 ; GFX6-NEXT:    v_max_i32_e32 v17, v17, v19
-; GFX6-NEXT:    v_sub_i32_e32 v18, vcc, v18, v16
+; GFX6-NEXT:    v_add_i32_e32 v18, vcc, v18, v16
 ; GFX6-NEXT:    v_min_i32_e32 v17, v17, v18
 ; GFX6-NEXT:    v_sub_i32_e32 v3, vcc, v3, v17
 ; GFX6-NEXT:    v_max_i32_e32 v17, -1, v4
-; GFX6-NEXT:    v_sub_i32_e32 v17, vcc, v17, v31
+; GFX6-NEXT:    v_add_i32_e32 v17, vcc, v17, v31
 ; GFX6-NEXT:    v_min_i32_e32 v18, -1, v4
 ; GFX6-NEXT:    v_max_i32_e32 v17, v17, v20
-; GFX6-NEXT:    v_sub_i32_e32 v18, vcc, v18, v16
+; GFX6-NEXT:    v_add_i32_e32 v18, vcc, v18, v16
 ; GFX6-NEXT:    v_min_i32_e32 v17, v17, v18
 ; GFX6-NEXT:    v_sub_i32_e32 v4, vcc, v4, v17
 ; GFX6-NEXT:    v_max_i32_e32 v17, -1, v5
-; GFX6-NEXT:    v_sub_i32_e32 v17, vcc, v17, v31
+; GFX6-NEXT:    v_add_i32_e32 v17, vcc, v17, v31
 ; GFX6-NEXT:    v_min_i32_e32 v18, -1, v5
 ; GFX6-NEXT:    v_max_i32_e32 v17, v17, v21
-; GFX6-NEXT:    v_sub_i32_e32 v18, vcc, v18, v16
+; GFX6-NEXT:    v_add_i32_e32 v18, vcc, v18, v16
 ; GFX6-NEXT:    v_min_i32_e32 v17, v17, v18
 ; GFX6-NEXT:    v_sub_i32_e32 v5, vcc, v5, v17
 ; GFX6-NEXT:    v_max_i32_e32 v17, -1, v6
-; GFX6-NEXT:    v_sub_i32_e32 v17, vcc, v17, v31
+; GFX6-NEXT:    v_add_i32_e32 v17, vcc, v17, v31
 ; GFX6-NEXT:    v_min_i32_e32 v18, -1, v6
 ; GFX6-NEXT:    v_max_i32_e32 v17, v17, v22
-; GFX6-NEXT:    v_sub_i32_e32 v18, vcc, v18, v16
+; GFX6-NEXT:    v_add_i32_e32 v18, vcc, v18, v16
 ; GFX6-NEXT:    v_min_i32_e32 v17, v17, v18
 ; GFX6-NEXT:    buffer_load_dword v18, off, s[0:3], s32
 ; GFX6-NEXT:    v_sub_i32_e32 v6, vcc, v6, v17
 ; GFX6-NEXT:    v_max_i32_e32 v17, -1, v7
-; GFX6-NEXT:    v_sub_i32_e32 v17, vcc, v17, v31
+; GFX6-NEXT:    v_add_i32_e32 v17, vcc, v17, v31
 ; GFX6-NEXT:    v_min_i32_e32 v19, -1, v7
 ; GFX6-NEXT:    v_max_i32_e32 v17, v17, v23
-; GFX6-NEXT:    v_sub_i32_e32 v19, vcc, v19, v16
+; GFX6-NEXT:    v_add_i32_e32 v19, vcc, v19, v16
 ; GFX6-NEXT:    v_min_i32_e32 v17, v17, v19
 ; GFX6-NEXT:    v_sub_i32_e32 v7, vcc, v7, v17
 ; GFX6-NEXT:    v_max_i32_e32 v17, -1, v8
-; GFX6-NEXT:    v_sub_i32_e32 v17, vcc, v17, v31
+; GFX6-NEXT:    v_add_i32_e32 v17, vcc, v17, v31
 ; GFX6-NEXT:    v_min_i32_e32 v19, -1, v8
-; GFX6-NEXT:    v_sub_i32_e32 v19, vcc, v19, v16
+; GFX6-NEXT:    v_add_i32_e32 v19, vcc, v19, v16
 ; GFX6-NEXT:    v_max_i32_e32 v17, v17, v24
 ; GFX6-NEXT:    v_min_i32_e32 v17, v17, v19
 ; GFX6-NEXT:    v_sub_i32_e32 v8, vcc, v8, v17
 ; GFX6-NEXT:    v_max_i32_e32 v17, -1, v9
-; GFX6-NEXT:    v_sub_i32_e32 v17, vcc, v17, v31
+; GFX6-NEXT:    v_add_i32_e32 v17, vcc, v17, v31
 ; GFX6-NEXT:    v_min_i32_e32 v19, -1, v9
-; GFX6-NEXT:    v_sub_i32_e32 v19, vcc, v19, v16
+; GFX6-NEXT:    v_add_i32_e32 v19, vcc, v19, v16
 ; GFX6-NEXT:    v_max_i32_e32 v17, v17, v25
 ; GFX6-NEXT:    v_min_i32_e32 v17, v17, v19
 ; GFX6-NEXT:    v_sub_i32_e32 v9, vcc, v9, v17
 ; GFX6-NEXT:    v_max_i32_e32 v17, -1, v10
-; GFX6-NEXT:    v_sub_i32_e32 v17, vcc, v17, v31
+; GFX6-NEXT:    v_add_i32_e32 v17, vcc, v17, v31
 ; GFX6-NEXT:    v_min_i32_e32 v19, -1, v10
-; GFX6-NEXT:    v_sub_i32_e32 v19, vcc, v19, v16
+; GFX6-NEXT:    v_add_i32_e32 v19, vcc, v19, v16
 ; GFX6-NEXT:    v_max_i32_e32 v17, v17, v26
 ; GFX6-NEXT:    v_min_i32_e32 v17, v17, v19
 ; GFX6-NEXT:    v_sub_i32_e32 v10, vcc, v10, v17
 ; GFX6-NEXT:    v_max_i32_e32 v17, -1, v11
-; GFX6-NEXT:    v_sub_i32_e32 v17, vcc, v17, v31
+; GFX6-NEXT:    v_add_i32_e32 v17, vcc, v17, v31
 ; GFX6-NEXT:    v_min_i32_e32 v19, -1, v11
-; GFX6-NEXT:    v_sub_i32_e32 v19, vcc, v19, v16
+; GFX6-NEXT:    v_add_i32_e32 v19, vcc, v19, v16
 ; GFX6-NEXT:    v_max_i32_e32 v17, v17, v27
 ; GFX6-NEXT:    v_min_i32_e32 v17, v17, v19
 ; GFX6-NEXT:    v_sub_i32_e32 v11, vcc, v11, v17
 ; GFX6-NEXT:    v_max_i32_e32 v17, -1, v12
-; GFX6-NEXT:    v_sub_i32_e32 v17, vcc, v17, v31
+; GFX6-NEXT:    v_add_i32_e32 v17, vcc, v17, v31
 ; GFX6-NEXT:    v_min_i32_e32 v19, -1, v12
-; GFX6-NEXT:    v_sub_i32_e32 v19, vcc, v19, v16
+; GFX6-NEXT:    v_add_i32_e32 v19, vcc, v19, v16
 ; GFX6-NEXT:    v_max_i32_e32 v17, v17, v28
 ; GFX6-NEXT:    v_min_i32_e32 v17, v17, v19
 ; GFX6-NEXT:    v_sub_i32_e32 v12, vcc, v12, v17
 ; GFX6-NEXT:    v_max_i32_e32 v17, -1, v13
-; GFX6-NEXT:    v_sub_i32_e32 v17, vcc, v17, v31
+; GFX6-NEXT:    v_add_i32_e32 v17, vcc, v17, v31
 ; GFX6-NEXT:    v_min_i32_e32 v19, -1, v13
-; GFX6-NEXT:    v_sub_i32_e32 v19, vcc, v19, v16
+; GFX6-NEXT:    v_add_i32_e32 v19, vcc, v19, v16
 ; GFX6-NEXT:    v_max_i32_e32 v17, v17, v29
 ; GFX6-NEXT:    v_min_i32_e32 v17, v17, v19
 ; GFX6-NEXT:    v_sub_i32_e32 v13, vcc, v13, v17
 ; GFX6-NEXT:    v_max_i32_e32 v17, -1, v14
-; GFX6-NEXT:    v_sub_i32_e32 v17, vcc, v17, v31
+; GFX6-NEXT:    v_add_i32_e32 v17, vcc, v17, v31
 ; GFX6-NEXT:    v_min_i32_e32 v19, -1, v14
-; GFX6-NEXT:    v_sub_i32_e32 v19, vcc, v19, v16
+; GFX6-NEXT:    v_add_i32_e32 v19, vcc, v19, v16
 ; GFX6-NEXT:    v_max_i32_e32 v17, v17, v30
 ; GFX6-NEXT:    v_min_i32_e32 v17, v17, v19
 ; GFX6-NEXT:    v_sub_i32_e32 v14, vcc, v14, v17
 ; GFX6-NEXT:    v_max_i32_e32 v17, -1, v15
-; GFX6-NEXT:    v_sub_i32_e32 v17, vcc, v17, v31
+; GFX6-NEXT:    v_add_i32_e32 v17, vcc, v17, v31
 ; GFX6-NEXT:    v_min_i32_e32 v19, -1, v15
-; GFX6-NEXT:    v_sub_i32_e32 v16, vcc, v19, v16
+; GFX6-NEXT:    v_add_i32_e32 v16, vcc, v19, v16
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
 ; GFX6-NEXT:    v_max_i32_e32 v17, v17, v18
 ; GFX6-NEXT:    v_min_i32_e32 v16, v17, v16
@@ -2059,117 +2062,117 @@ define <16 x i32> @v_ssubsat_v16i32(<16 x i32> %lhs, <16 x i32> %rhs) {
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX8-NEXT:    v_max_i32_e32 v32, -1, v0
-; GFX8-NEXT:    v_bfrev_b32_e32 v31, -2
-; GFX8-NEXT:    v_sub_u32_e32 v32, vcc, v32, v31
+; GFX8-NEXT:    v_mov_b32_e32 v31, 0x80000001
+; GFX8-NEXT:    v_add_u32_e32 v32, vcc, v32, v31
 ; GFX8-NEXT:    v_max_i32_e32 v32, v32, v16
 ; GFX8-NEXT:    v_min_i32_e32 v33, -1, v0
 ; GFX8-NEXT:    v_bfrev_b32_e32 v16, 1
-; GFX8-NEXT:    v_sub_u32_e32 v33, vcc, v33, v16
+; GFX8-NEXT:    v_add_u32_e32 v33, vcc, v33, v16
 ; GFX8-NEXT:    v_min_i32_e32 v32, v32, v33
 ; GFX8-NEXT:    v_sub_u32_e32 v0, vcc, v0, v32
 ; GFX8-NEXT:    v_max_i32_e32 v32, -1, v1
-; GFX8-NEXT:    v_sub_u32_e32 v32, vcc, v32, v31
+; GFX8-NEXT:    v_add_u32_e32 v32, vcc, v32, v31
 ; GFX8-NEXT:    v_max_i32_e32 v17, v32, v17
 ; GFX8-NEXT:    v_min_i32_e32 v32, -1, v1
-; GFX8-NEXT:    v_sub_u32_e32 v32, vcc, v32, v16
+; GFX8-NEXT:    v_add_u32_e32 v32, vcc, v32, v16
 ; GFX8-NEXT:    v_min_i32_e32 v17, v17, v32
 ; GFX8-NEXT:    v_sub_u32_e32 v1, vcc, v1, v17
 ; GFX8-NEXT:    v_max_i32_e32 v17, -1, v2
-; GFX8-NEXT:    v_sub_u32_e32 v17, vcc, v17, v31
+; GFX8-NEXT:    v_add_u32_e32 v17, vcc, v17, v31
 ; GFX8-NEXT:    v_max_i32_e32 v17, v17, v18
 ; GFX8-NEXT:    v_min_i32_e32 v18, -1, v2
-; GFX8-NEXT:    v_sub_u32_e32 v18, vcc, v18, v16
+; GFX8-NEXT:    v_add_u32_e32 v18, vcc, v18, v16
 ; GFX8-NEXT:    v_min_i32_e32 v17, v17, v18
 ; GFX8-NEXT:    v_sub_u32_e32 v2, vcc, v2, v17
 ; GFX8-NEXT:    v_max_i32_e32 v17, -1, v3
-; GFX8-NEXT:    v_sub_u32_e32 v17, vcc, v17, v31
+; GFX8-NEXT:    v_add_u32_e32 v17, vcc, v17, v31
 ; GFX8-NEXT:    v_min_i32_e32 v18, -1, v3
 ; GFX8-NEXT:    v_max_i32_e32 v17, v17, v19
-; GFX8-NEXT:    v_sub_u32_e32 v18, vcc, v18, v16
+; GFX8-NEXT:    v_add_u32_e32 v18, vcc, v18, v16
 ; GFX8-NEXT:    v_min_i32_e32 v17, v17, v18
 ; GFX8-NEXT:    v_sub_u32_e32 v3, vcc, v3, v17
 ; GFX8-NEXT:    v_max_i32_e32 v17, -1, v4
-; GFX8-NEXT:    v_sub_u32_e32 v17, vcc, v17, v31
+; GFX8-NEXT:    v_add_u32_e32 v17, vcc, v17, v31
 ; GFX8-NEXT:    v_min_i32_e32 v18, -1, v4
 ; GFX8-NEXT:    v_max_i32_e32 v17, v17, v20
-; GFX8-NEXT:    v_sub_u32_e32 v18, vcc, v18, v16
+; GFX8-NEXT:    v_add_u32_e32 v18, vcc, v18, v16
 ; GFX8-NEXT:    v_min_i32_e32 v17, v17, v18
 ; GFX8-NEXT:    v_sub_u32_e32 v4, vcc, v4, v17
 ; GFX8-NEXT:    v_max_i32_e32 v17, -1, v5
-; GFX8-NEXT:    v_sub_u32_e32 v17, vcc, v17, v31
+; GFX8-NEXT:    v_add_u32_e32 v17, vcc, v17, v31
 ; GFX8-NEXT:    v_min_i32_e32 v18, -1, v5
 ; GFX8-NEXT:    v_max_i32_e32 v17, v17, v21
-; GFX8-NEXT:    v_sub_u32_e32 v18, vcc, v18, v16
+; GFX8-NEXT:    v_add_u32_e32 v18, vcc, v18, v16
 ; GFX8-NEXT:    v_min_i32_e32 v17, v17, v18
 ; GFX8-NEXT:    v_sub_u32_e32 v5, vcc, v5, v17
 ; GFX8-NEXT:    v_max_i32_e32 v17, -1, v6
-; GFX8-NEXT:    v_sub_u32_e32 v17, vcc, v17, v31
+; GFX8-NEXT:    v_add_u32_e32 v17, vcc, v17, v31
 ; GFX8-NEXT:    v_min_i32_e32 v18, -1, v6
 ; GFX8-NEXT:    v_max_i32_e32 v17, v17, v22
-; GFX8-NEXT:    v_sub_u32_e32 v18, vcc, v18, v16
+; GFX8-NEXT:    v_add_u32_e32 v18, vcc, v18, v16
 ; GFX8-NEXT:    v_min_i32_e32 v17, v17, v18
 ; GFX8-NEXT:    buffer_load_dword v18, off, s[0:3], s32
 ; GFX8-NEXT:    v_sub_u32_e32 v6, vcc, v6, v17
 ; GFX8-NEXT:    v_max_i32_e32 v17, -1, v7
-; GFX8-NEXT:    v_sub_u32_e32 v17, vcc, v17, v31
+; GFX8-NEXT:    v_add_u32_e32 v17, vcc, v17, v31
 ; GFX8-NEXT:    v_min_i32_e32 v19, -1, v7
 ; GFX8-NEXT:    v_max_i32_e32 v17, v17, v23
-; GFX8-NEXT:    v_sub_u32_e32 v19, vcc, v19, v16
+; GFX8-NEXT:    v_add_u32_e32 v19, vcc, v19, v16
 ; GFX8-NEXT:    v_min_i32_e32 v17, v17, v19
 ; GFX8-NEXT:    v_sub_u32_e32 v7, vcc, v7, v17
 ; GFX8-NEXT:    v_max_i32_e32 v17, -1, v8
-; GFX8-NEXT:    v_sub_u32_e32 v17, vcc, v17, v31
+; GFX8-NEXT:    v_add_u32_e32 v17, vcc, v17, v31
 ; GFX8-NEXT:    v_min_i32_e32 v19, -1, v8
-; GFX8-NEXT:    v_sub_u32_e32 v19, vcc, v19, v16
+; GFX8-NEXT:    v_add_u32_e32 v19, vcc, v19, v16
 ; GFX8-NEXT:    v_max_i32_e32 v17, v17, v24
 ; GFX8-NEXT:    v_min_i32_e32 v17, v17, v19
 ; GFX8-NEXT:    v_sub_u32_e32 v8, vcc, v8, v17
 ; GFX8-NEXT:    v_max_i32_e32 v17, -1, v9
-; GFX8-NEXT:    v_sub_u32_e32 v17, vcc, v17, v31
+; GFX8-NEXT:    v_add_u32_e32 v17, vcc, v17, v31
 ; GFX8-NEXT:    v_min_i32_e32 v19, -1, v9
-; GFX8-NEXT:    v_sub_u32_e32 v19, vcc, v19, v16
+; GFX8-NEXT:    v_add_u32_e32 v19, vcc, v19, v16
 ; GFX8-NEXT:    v_max_i32_e32 v17, v17, v25
 ; GFX8-NEXT:    v_min_i32_e32 v17, v17, v19
 ; GFX8-NEXT:    v_sub_u32_e32 v9, vcc, v9, v17
 ; GFX8-NEXT:    v_max_i32_e32 v17, -1, v10
-; GFX8-NEXT:    v_sub_u32_e32 v17, vcc, v17, v31
+; GFX8-NEXT:    v_add_u32_e32 v17, vcc, v17, v31
 ; GFX8-NEXT:    v_min_i32_e32 v19, -1, v10
-; GFX8-NEXT:    v_sub_u32_e32 v19, vcc, v19, v16
+; GFX8-NEXT:    v_add_u32_e32 v19, vcc, v19, v16
 ; GFX8-NEXT:    v_max_i32_e32 v17, v17, v26
 ; GFX8-NEXT:    v_min_i32_e32 v17, v17, v19
 ; GFX8-NEXT:    v_sub_u32_e32 v10, vcc, v10, v17
 ; GFX8-NEXT:    v_max_i32_e32 v17, -1, v11
-; GFX8-NEXT:    v_sub_u32_e32 v17, vcc, v17, v31
+; GFX8-NEXT:    v_add_u32_e32 v17, vcc, v17, v31
 ; GFX8-NEXT:    v_min_i32_e32 v19, -1, v11
-; GFX8-NEXT:    v_sub_u32_e32 v19, vcc, v19, v16
+; GFX8-NEXT:    v_add_u32_e32 v19, vcc, v19, v16
 ; GFX8-NEXT:    v_max_i32_e32 v17, v17, v27
 ; GFX8-NEXT:    v_min_i32_e32 v17, v17, v19
 ; GFX8-NEXT:    v_sub_u32_e32 v11, vcc, v11, v17
 ; GFX8-NEXT:    v_max_i32_e32 v17, -1, v12
-; GFX8-NEXT:    v_sub_u32_e32 v17, vcc, v17, v31
+; GFX8-NEXT:    v_add_u32_e32 v17, vcc, v17, v31
 ; GFX8-NEXT:    v_min_i32_e32 v19, -1, v12
-; GFX8-NEXT:    v_sub_u32_e32 v19, vcc, v19, v16
+; GFX8-NEXT:    v_add_u32_e32 v19, vcc, v19, v16
 ; GFX8-NEXT:    v_max_i32_e32 v17, v17, v28
 ; GFX8-NEXT:    v_min_i32_e32 v17, v17, v19
 ; GFX8-NEXT:    v_sub_u32_e32 v12, vcc, v12, v17
 ; GFX8-NEXT:    v_max_i32_e32 v17, -1, v13
-; GFX8-NEXT:    v_sub_u32_e32 v17, vcc, v17, v31
+; GFX8-NEXT:    v_add_u32_e32 v17, vcc, v17, v31
 ; GFX8-NEXT:    v_min_i32_e32 v19, -1, v13
-; GFX8-NEXT:    v_sub_u32_e32 v19, vcc, v19, v16
+; GFX8-NEXT:    v_add_u32_e32 v19, vcc, v19, v16
 ; GFX8-NEXT:    v_max_i32_e32 v17, v17, v29
 ; GFX8-NEXT:    v_min_i32_e32 v17, v17, v19
 ; GFX8-NEXT:    v_sub_u32_e32 v13, vcc, v13, v17
 ; GFX8-NEXT:    v_max_i32_e32 v17, -1, v14
-; GFX8-NEXT:    v_sub_u32_e32 v17, vcc, v17, v31
+; GFX8-NEXT:    v_add_u32_e32 v17, vcc, v17, v31
 ; GFX8-NEXT:    v_min_i32_e32 v19, -1, v14
-; GFX8-NEXT:    v_sub_u32_e32 v19, vcc, v19, v16
+; GFX8-NEXT:    v_add_u32_e32 v19, vcc, v19, v16
 ; GFX8-NEXT:    v_max_i32_e32 v17, v17, v30
 ; GFX8-NEXT:    v_min_i32_e32 v17, v17, v19
 ; GFX8-NEXT:    v_sub_u32_e32 v14, vcc, v14, v17
 ; GFX8-NEXT:    v_max_i32_e32 v17, -1, v15
-; GFX8-NEXT:    v_sub_u32_e32 v17, vcc, v17, v31
+; GFX8-NEXT:    v_add_u32_e32 v17, vcc, v17, v31
 ; GFX8-NEXT:    v_min_i32_e32 v19, -1, v15
-; GFX8-NEXT:    v_sub_u32_e32 v16, vcc, v19, v16
+; GFX8-NEXT:    v_add_u32_e32 v16, vcc, v19, v16
 ; GFX8-NEXT:    s_waitcnt vmcnt(0)
 ; GFX8-NEXT:    v_max_i32_e32 v17, v17, v18
 ; GFX8-NEXT:    v_min_i32_e32 v16, v17, v16
@@ -2252,114 +2255,114 @@ define amdgpu_ps <16 x i32> @s_ssubsat_v16i32(<16 x i32> inreg %lhs, <16 x i32>
 ; GFX6-LABEL: s_ssubsat_v16i32:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_max_i32 s32, s0, -1
-; GFX6-NEXT:    s_sub_i32 s32, s32, 0x7fffffff
+; GFX6-NEXT:    s_add_i32 s32, s32, 0x80000001
 ; GFX6-NEXT:    s_min_i32 s33, s0, -1
-; GFX6-NEXT:    s_sub_i32 s33, s33, 0x80000000
+; GFX6-NEXT:    s_add_i32 s33, s33, 0x80000000
 ; GFX6-NEXT:    s_max_i32 s16, s32, s16
 ; GFX6-NEXT:    s_min_i32 s16, s16, s33
 ; GFX6-NEXT:    s_sub_i32 s0, s0, s16
 ; GFX6-NEXT:    s_max_i32 s16, s1, -1
-; GFX6-NEXT:    s_sub_i32 s16, s16, 0x7fffffff
+; GFX6-NEXT:    s_add_i32 s16, s16, 0x80000001
 ; GFX6-NEXT:    s_min_i32 s32, s1, -1
-; GFX6-NEXT:    s_sub_i32 s32, s32, 0x80000000
+; GFX6-NEXT:    s_add_i32 s32, s32, 0x80000000
 ; GFX6-NEXT:    s_max_i32 s16, s16, s17
 ; GFX6-NEXT:    s_min_i32 s16, s16, s32
 ; GFX6-NEXT:    s_sub_i32 s1, s1, s16
 ; GFX6-NEXT:    s_max_i32 s16, s2, -1
-; GFX6-NEXT:    s_sub_i32 s16, s16, 0x7fffffff
+; GFX6-NEXT:    s_add_i32 s16, s16, 0x80000001
 ; GFX6-NEXT:    s_min_i32 s17, s2, -1
-; GFX6-NEXT:    s_sub_i32 s17, s17, 0x80000000
+; GFX6-NEXT:    s_add_i32 s17, s17, 0x80000000
 ; GFX6-NEXT:    s_max_i32 s16, s16, s18
 ; GFX6-NEXT:    s_min_i32 s16, s16, s17
 ; GFX6-NEXT:    s_sub_i32 s2, s2, s16
 ; GFX6-NEXT:    s_max_i32 s16, s3, -1
-; GFX6-NEXT:    s_sub_i32 s16, s16, 0x7fffffff
+; GFX6-NEXT:    s_add_i32 s16, s16, 0x80000001
 ; GFX6-NEXT:    s_min_i32 s17, s3, -1
-; GFX6-NEXT:    s_sub_i32 s17, s17, 0x80000000
+; GFX6-NEXT:    s_add_i32 s17, s17, 0x80000000
 ; GFX6-NEXT:    s_max_i32 s16, s16, s19
 ; GFX6-NEXT:    s_min_i32 s16, s16, s17
 ; GFX6-NEXT:    s_sub_i32 s3, s3, s16
 ; GFX6-NEXT:    s_max_i32 s16, s4, -1
-; GFX6-NEXT:    s_sub_i32 s16, s16, 0x7fffffff
+; GFX6-NEXT:    s_add_i32 s16, s16, 0x80000001
 ; GFX6-NEXT:    s_min_i32 s17, s4, -1
-; GFX6-NEXT:    s_sub_i32 s17, s17, 0x80000000
+; GFX6-NEXT:    s_add_i32 s17, s17, 0x80000000
 ; GFX6-NEXT:    s_max_i32 s16, s16, s20
 ; GFX6-NEXT:    s_min_i32 s16, s16, s17
 ; GFX6-NEXT:    s_sub_i32 s4, s4, s16
 ; GFX6-NEXT:    s_max_i32 s16, s5, -1
-; GFX6-NEXT:    s_sub_i32 s16, s16, 0x7fffffff
+; GFX6-NEXT:    s_add_i32 s16, s16, 0x80000001
 ; GFX6-NEXT:    s_min_i32 s17, s5, -1
-; GFX6-NEXT:    s_sub_i32 s17, s17, 0x80000000
+; GFX6-NEXT:    s_add_i32 s17, s17, 0x80000000
 ; GFX6-NEXT:    s_max_i32 s16, s16, s21
 ; GFX6-NEXT:    s_min_i32 s16, s16, s17
 ; GFX6-NEXT:    s_sub_i32 s5, s5, s16
 ; GFX6-NEXT:    s_max_i32 s16, s6, -1
-; GFX6-NEXT:    s_sub_i32 s16, s16, 0x7fffffff
+; GFX6-NEXT:    s_add_i32 s16, s16, 0x80000001
 ; GFX6-NEXT:    s_min_i32 s17, s6, -1
-; GFX6-NEXT:    s_sub_i32 s17, s17, 0x80000000
+; GFX6-NEXT:    s_add_i32 s17, s17, 0x80000000
 ; GFX6-NEXT:    s_max_i32 s16, s16, s22
 ; GFX6-NEXT:    s_min_i32 s16, s16, s17
 ; GFX6-NEXT:    s_sub_i32 s6, s6, s16
 ; GFX6-NEXT:    s_max_i32 s16, s7, -1
-; GFX6-NEXT:    s_sub_i32 s16, s16, 0x7fffffff
+; GFX6-NEXT:    s_add_i32 s16, s16, 0x80000001
 ; GFX6-NEXT:    s_min_i32 s17, s7, -1
-; GFX6-NEXT:    s_sub_i32 s17, s17, 0x80000000
+; GFX6-NEXT:    s_add_i32 s17, s17, 0x80000000
 ; GFX6-NEXT:    s_max_i32 s16, s16, s23
 ; GFX6-NEXT:    s_min_i32 s16, s16, s17
 ; GFX6-NEXT:    s_sub_i32 s7, s7, s16
 ; GFX6-NEXT:    s_max_i32 s16, s8, -1
-; GFX6-NEXT:    s_sub_i32 s16, s16, 0x7fffffff
+; GFX6-NEXT:    s_add_i32 s16, s16, 0x80000001
 ; GFX6-NEXT:    s_min_i32 s17, s8, -1
-; GFX6-NEXT:    s_sub_i32 s17, s17, 0x80000000
+; GFX6-NEXT:    s_add_i32 s17, s17, 0x80000000
 ; GFX6-NEXT:    s_max_i32 s16, s16, s24
 ; GFX6-NEXT:    s_min_i32 s16, s16, s17
 ; GFX6-NEXT:    s_sub_i32 s8, s8, s16
 ; GFX6-NEXT:    s_max_i32 s16, s9, -1
-; GFX6-NEXT:    s_sub_i32 s16, s16, 0x7fffffff
+; GFX6-NEXT:    s_add_i32 s16, s16, 0x80000001
 ; GFX6-NEXT:    s_min_i32 s17, s9, -1
-; GFX6-NEXT:    s_sub_i32 s17, s17, 0x80000000
+; GFX6-NEXT:    s_add_i32 s17, s17, 0x80000000
 ; GFX6-NEXT:    s_max_i32 s16, s16, s25
 ; GFX6-NEXT:    s_min_i32 s16, s16, s17
 ; GFX6-NEXT:    s_sub_i32 s9, s9, s16
 ; GFX6-NEXT:    s_max_i32 s16, s10, -1
-; GFX6-NEXT:    s_sub_i32 s16, s16, 0x7fffffff
+; GFX6-NEXT:    s_add_i32 s16, s16, 0x80000001
 ; GFX6-NEXT:    s_min_i32 s17, s10, -1
-; GFX6-NEXT:    s_sub_i32 s17, s17, 0x80000000
+; GFX6-NEXT:    s_add_i32 s17, s17, 0x80000000
 ; GFX6-NEXT:    s_max_i32 s16, s16, s26
 ; GFX6-NEXT:    s_min_i32 s16, s16, s17
 ; GFX6-NEXT:    s_sub_i32 s10, s10, s16
 ; GFX6-NEXT:    s_max_i32 s16, s11, -1
-; GFX6-NEXT:    s_sub_i32 s16, s16, 0x7fffffff
+; GFX6-NEXT:    s_add_i32 s16, s16, 0x80000001
 ; GFX6-NEXT:    s_min_i32 s17, s11, -1
-; GFX6-NEXT:    s_sub_i32 s17, s17, 0x80000000
+; GFX6-NEXT:    s_add_i32 s17, s17, 0x80000000
 ; GFX6-NEXT:    s_max_i32 s16, s16, s27
 ; GFX6-NEXT:    s_min_i32 s16, s16, s17
 ; GFX6-NEXT:    s_sub_i32 s11, s11, s16
 ; GFX6-NEXT:    s_max_i32 s16, s12, -1
-; GFX6-NEXT:    s_sub_i32 s16, s16, 0x7fffffff
+; GFX6-NEXT:    s_add_i32 s16, s16, 0x80000001
 ; GFX6-NEXT:    s_min_i32 s17, s12, -1
-; GFX6-NEXT:    s_sub_i32 s17, s17, 0x80000000
+; GFX6-NEXT:    s_add_i32 s17, s17, 0x80000000
 ; GFX6-NEXT:    s_max_i32 s16, s16, s28
 ; GFX6-NEXT:    s_min_i32 s16, s16, s17
 ; GFX6-NEXT:    s_sub_i32 s12, s12, s16
 ; GFX6-NEXT:    s_max_i32 s16, s13, -1
-; GFX6-NEXT:    s_sub_i32 s16, s16, 0x7fffffff
+; GFX6-NEXT:    s_add_i32 s16, s16, 0x80000001
 ; GFX6-NEXT:    s_min_i32 s17, s13, -1
-; GFX6-NEXT:    s_sub_i32 s17, s17, 0x80000000
+; GFX6-NEXT:    s_add_i32 s17, s17, 0x80000000
 ; GFX6-NEXT:    s_max_i32 s16, s16, s29
 ; GFX6-NEXT:    s_min_i32 s16, s16, s17
 ; GFX6-NEXT:    s_sub_i32 s13, s13, s16
 ; GFX6-NEXT:    s_max_i32 s16, s14, -1
-; GFX6-NEXT:    s_sub_i32 s16, s16, 0x7fffffff
+; GFX6-NEXT:    s_add_i32 s16, s16, 0x80000001
 ; GFX6-NEXT:    s_min_i32 s17, s14, -1
-; GFX6-NEXT:    s_sub_i32 s17, s17, 0x80000000
+; GFX6-NEXT:    s_add_i32 s17, s17, 0x80000000
 ; GFX6-NEXT:    s_max_i32 s16, s16, s30
 ; GFX6-NEXT:    s_min_i32 s16, s16, s17
 ; GFX6-NEXT:    s_sub_i32 s14, s14, s16
 ; GFX6-NEXT:    s_max_i32 s16, s15, -1
-; GFX6-NEXT:    s_sub_i32 s16, s16, 0x7fffffff
+; GFX6-NEXT:    s_add_i32 s16, s16, 0x80000001
 ; GFX6-NEXT:    s_min_i32 s17, s15, -1
-; GFX6-NEXT:    s_sub_i32 s17, s17, 0x80000000
+; GFX6-NEXT:    s_add_i32 s17, s17, 0x80000000
 ; GFX6-NEXT:    s_max_i32 s16, s16, s31
 ; GFX6-NEXT:    s_min_i32 s16, s16, s17
 ; GFX6-NEXT:    s_sub_i32 s15, s15, s16
@@ -2368,114 +2371,114 @@ define amdgpu_ps <16 x i32> @s_ssubsat_v16i32(<16 x i32> inreg %lhs, <16 x i32>
 ; GFX8-LABEL: s_ssubsat_v16i32:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_max_i32 s32, s0, -1
-; GFX8-NEXT:    s_sub_i32 s32, s32, 0x7fffffff
+; GFX8-NEXT:    s_add_i32 s32, s32, 0x80000001
 ; GFX8-NEXT:    s_min_i32 s33, s0, -1
-; GFX8-NEXT:    s_sub_i32 s33, s33, 0x80000000
+; GFX8-NEXT:    s_add_i32 s33, s33, 0x80000000
 ; GFX8-NEXT:    s_max_i32 s16, s32, s16
 ; GFX8-NEXT:    s_min_i32 s16, s16, s33
 ; GFX8-NEXT:    s_sub_i32 s0, s0, s16
 ; GFX8-NEXT:    s_max_i32 s16, s1, -1
-; GFX8-NEXT:    s_sub_i32 s16, s16, 0x7fffffff
+; GFX8-NEXT:    s_add_i32 s16, s16, 0x80000001
 ; GFX8-NEXT:    s_min_i32 s32, s1, -1
-; GFX8-NEXT:    s_sub_i32 s32, s32, 0x80000000
+; GFX8-NEXT:    s_add_i32 s32, s32, 0x80000000
 ; GFX8-NEXT:    s_max_i32 s16, s16, s17
 ; GFX8-NEXT:    s_min_i32 s16, s16, s32
 ; GFX8-NEXT:    s_sub_i32 s1, s1, s16
 ; GFX8-NEXT:    s_max_i32 s16, s2, -1
-; GFX8-NEXT:    s_sub_i32 s16, s16, 0x7fffffff
+; GFX8-NEXT:    s_add_i32 s16, s16, 0x80000001
 ; GFX8-NEXT:    s_min_i32 s17, s2, -1
-; GFX8-NEXT:    s_sub_i32 s17, s17, 0x80000000
+; GFX8-NEXT:    s_add_i32 s17, s17, 0x80000000
 ; GFX8-NEXT:    s_max_i32 s16, s16, s18
 ; GFX8-NEXT:    s_min_i32 s16, s16, s17
 ; GFX8-NEXT:    s_sub_i32 s2, s2, s16
 ; GFX8-NEXT:    s_max_i32 s16, s3, -1
-; GFX8-NEXT:    s_sub_i32 s16, s16, 0x7fffffff
+; GFX8-NEXT:    s_add_i32 s16, s16, 0x80000001
 ; GFX8-NEXT:    s_min_i32 s17, s3, -1
-; GFX8-NEXT:    s_sub_i32 s17, s17, 0x80000000
+; GFX8-NEXT:    s_add_i32 s17, s17, 0x80000000
 ; GFX8-NEXT:    s_max_i32 s16, s16, s19
 ; GFX8-NEXT:    s_min_i32 s16, s16, s17
 ; GFX8-NEXT:    s_sub_i32 s3, s3, s16
 ; GFX8-NEXT:    s_max_i32 s16, s4, -1
-; GFX8-NEXT:    s_sub_i32 s16, s16, 0x7fffffff
+; GFX8-NEXT:    s_add_i32 s16, s16, 0x80000001
 ; GFX8-NEXT:    s_min_i32 s17, s4, -1
-; GFX8-NEXT:    s_sub_i32 s17, s17, 0x80000000
+; GFX8-NEXT:    s_add_i32 s17, s17, 0x80000000
 ; GFX8-NEXT:    s_max_i32 s16, s16, s20
 ; GFX8-NEXT:    s_min_i32 s16, s16, s17
 ; GFX8-NEXT:    s_sub_i32 s4, s4, s16
 ; GFX8-NEXT:    s_max_i32 s16, s5, -1
-; GFX8-NEXT:    s_sub_i32 s16, s16, 0x7fffffff
+; GFX8-NEXT:    s_add_i32 s16, s16, 0x80000001
 ; GFX8-NEXT:    s_min_i32 s17, s5, -1
-; GFX8-NEXT:    s_sub_i32 s17, s17, 0x80000000
+; GFX8-NEXT:    s_add_i32 s17, s17, 0x80000000
 ; GFX8-NEXT:    s_max_i32 s16, s16, s21
 ; GFX8-NEXT:    s_min_i32 s16, s16, s17
 ; GFX8-NEXT:    s_sub_i32 s5, s5, s16
 ; GFX8-NEXT:    s_max_i32 s16, s6, -1
-; GFX8-NEXT:    s_sub_i32 s16, s16, 0x7fffffff
+; GFX8-NEXT:    s_add_i32 s16, s16, 0x80000001
 ; GFX8-NEXT:    s_min_i32 s17, s6, -1
-; GFX8-NEXT:    s_sub_i32 s17, s17, 0x80000000
+; GFX8-NEXT:    s_add_i32 s17, s17, 0x80000000
 ; GFX8-NEXT:    s_max_i32 s16, s16, s22
 ; GFX8-NEXT:    s_min_i32 s16, s16, s17
 ; GFX8-NEXT:    s_sub_i32 s6, s6, s16
 ; GFX8-NEXT:    s_max_i32 s16, s7, -1
-; GFX8-NEXT:    s_sub_i32 s16, s16, 0x7fffffff
+; GFX8-NEXT:    s_add_i32 s16, s16, 0x80000001
 ; GFX8-NEXT:    s_min_i32 s17, s7, -1
-; GFX8-NEXT:    s_sub_i32 s17, s17, 0x80000000
+; GFX8-NEXT:    s_add_i32 s17, s17, 0x80000000
 ; GFX8-NEXT:    s_max_i32 s16, s16, s23
 ; GFX8-NEXT:    s_min_i32 s16, s16, s17
 ; GFX8-NEXT:    s_sub_i32 s7, s7, s16
 ; GFX8-NEXT:    s_max_i32 s16, s8, -1
-; GFX8-NEXT:    s_sub_i32 s16, s16, 0x7fffffff
+; GFX8-NEXT:    s_add_i32 s16, s16, 0x80000001
 ; GFX8-NEXT:    s_min_i32 s17, s8, -1
-; GFX8-NEXT:    s_sub_i32 s17, s17, 0x80000000
+; GFX8-NEXT:    s_add_i32 s17, s17, 0x80000000
 ; GFX8-NEXT:    s_max_i32 s16, s16, s24
 ; GFX8-NEXT:    s_min_i32 s16, s16, s17
 ; GFX8-NEXT:    s_sub_i32 s8, s8, s16
 ; GFX8-NEXT:    s_max_i32 s16, s9, -1
-; GFX8-NEXT:    s_sub_i32 s16, s16, 0x7fffffff
+; GFX8-NEXT:    s_add_i32 s16, s16, 0x80000001
 ; GFX8-NEXT:    s_min_i32 s17, s9, -1
-; GFX8-NEXT:    s_sub_i32 s17, s17, 0x80000000
+; GFX8-NEXT:    s_add_i32 s17, s17, 0x80000000
 ; GFX8-NEXT:    s_max_i32 s16, s16, s25
 ; GFX8-NEXT:    s_min_i32 s16, s16, s17
 ; GFX8-NEXT:    s_sub_i32 s9, s9, s16
 ; GFX8-NEXT:    s_max_i32 s16, s10, -1
-; GFX8-NEXT:    s_sub_i32 s16, s16, 0x7fffffff
+; GFX8-NEXT:    s_add_i32 s16, s16, 0x80000001
 ; GFX8-NEXT:    s_min_i32 s17, s10, -1
-; GFX8-NEXT:    s_sub_i32 s17, s17, 0x80000000
+; GFX8-NEXT:    s_add_i32 s17, s17, 0x80000000
 ; GFX8-NEXT:    s_max_i32 s16, s16, s26
 ; GFX8-NEXT:    s_min_i32 s16, s16, s17
 ; GFX8-NEXT:    s_sub_i32 s10, s10, s16
 ; GFX8-NEXT:    s_max_i32 s16, s11, -1
-; GFX8-NEXT:    s_sub_i32 s16, s16, 0x7fffffff
+; GFX8-NEXT:    s_add_i32 s16, s16, 0x80000001
 ; GFX8-NEXT:    s_min_i32 s17, s11, -1
-; GFX8-NEXT:    s_sub_i32 s17, s17, 0x80000000
+; GFX8-NEXT:    s_add_i32 s17, s17, 0x80000000
 ; GFX8-NEXT:    s_max_i32 s16, s16, s27
 ; GFX8-NEXT:    s_min_i32 s16, s16, s17
 ; GFX8-NEXT:    s_sub_i32 s11, s11, s16
 ; GFX8-NEXT:    s_max_i32 s16, s12, -1
-; GFX8-NEXT:    s_sub_i32 s16, s16, 0x7fffffff
+; GFX8-NEXT:    s_add_i32 s16, s16, 0x80000001
 ; GFX8-NEXT:    s_min_i32 s17, s12, -1
-; GFX8-NEXT:    s_sub_i32 s17, s17, 0x80000000
+; GFX8-NEXT:    s_add_i32 s17, s17, 0x80000000
 ; GFX8-NEXT:    s_max_i32 s16, s16, s28
 ; GFX8-NEXT:    s_min_i32 s16, s16, s17
 ; GFX8-NEXT:    s_sub_i32 s12, s12, s16
 ; GFX8-NEXT:    s_max_i32 s16, s13, -1
-; GFX8-NEXT:    s_sub_i32 s16, s16, 0x7fffffff
+; GFX8-NEXT:    s_add_i32 s16, s16, 0x80000001
 ; GFX8-NEXT:    s_min_i32 s17, s13, -1
-; GFX8-NEXT:    s_sub_i32 s17, s17, 0x80000000
+; GFX8-NEXT:    s_add_i32 s17, s17, 0x80000000
 ; GFX8-NEXT:    s_max_i32 s16, s16, s29
 ; GFX8-NEXT:    s_min_i32 s16, s16, s17
 ; GFX8-NEXT:    s_sub_i32 s13, s13, s16
 ; GFX8-NEXT:    s_max_i32 s16, s14, -1
-; GFX8-NEXT:    s_sub_i32 s16, s16, 0x7fffffff
+; GFX8-NEXT:    s_add_i32 s16, s16, 0x80000001
 ; GFX8-NEXT:    s_min_i32 s17, s14, -1
-; GFX8-NEXT:    s_sub_i32 s17, s17, 0x80000000
+; GFX8-NEXT:    s_add_i32 s17, s17, 0x80000000
 ; GFX8-NEXT:    s_max_i32 s16, s16, s30
 ; GFX8-NEXT:    s_min_i32 s16, s16, s17
 ; GFX8-NEXT:    s_sub_i32 s14, s14, s16
 ; GFX8-NEXT:    s_max_i32 s16, s15, -1
-; GFX8-NEXT:    s_sub_i32 s16, s16, 0x7fffffff
+; GFX8-NEXT:    s_add_i32 s16, s16, 0x80000001
 ; GFX8-NEXT:    s_min_i32 s17, s15, -1
-; GFX8-NEXT:    s_sub_i32 s17, s17, 0x80000000
+; GFX8-NEXT:    s_add_i32 s17, s17, 0x80000000
 ; GFX8-NEXT:    s_max_i32 s16, s16, s31
 ; GFX8-NEXT:    s_min_i32 s16, s16, s17
 ; GFX8-NEXT:    s_sub_i32 s15, s15, s16
@@ -2579,9 +2582,9 @@ define i16 @v_ssubsat_i16(i16 %lhs, i16 %rhs) {
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
 ; GFX6-NEXT:    v_max_i32_e32 v2, -1, v0
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
-; GFX6-NEXT:    v_subrev_i32_e32 v2, vcc, 0x7fffffff, v2
+; GFX6-NEXT:    v_add_i32_e32 v2, vcc, 0x80000001, v2
 ; GFX6-NEXT:    v_min_i32_e32 v3, -1, v0
-; GFX6-NEXT:    v_subrev_i32_e32 v3, vcc, 0x80000000, v3
+; GFX6-NEXT:    v_add_i32_e32 v3, vcc, 0x80000000, v3
 ; GFX6-NEXT:    v_max_i32_e32 v1, v2, v1
 ; GFX6-NEXT:    v_min_i32_e32 v1, v1, v3
 ; GFX6-NEXT:    v_sub_i32_e32 v0, vcc, v0, v1
@@ -2592,9 +2595,9 @@ define i16 @v_ssubsat_i16(i16 %lhs, i16 %rhs) {
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX8-NEXT:    v_max_i16_e32 v2, -1, v0
-; GFX8-NEXT:    v_subrev_u16_e32 v2, 0x7fff, v2
+; GFX8-NEXT:    v_add_u16_e32 v2, 0x8001, v2
 ; GFX8-NEXT:    v_min_i16_e32 v3, -1, v0
-; GFX8-NEXT:    v_subrev_u16_e32 v3, 0x8000, v3
+; GFX8-NEXT:    v_add_u16_e32 v3, 0x8000, v3
 ; GFX8-NEXT:    v_max_i16_e32 v1, v2, v1
 ; GFX8-NEXT:    v_min_i16_e32 v1, v1, v3
 ; GFX8-NEXT:    v_sub_u16_e32 v0, v0, v1
@@ -2621,9 +2624,9 @@ define amdgpu_ps i16 @s_ssubsat_i16(i16 inreg %lhs, i16 inreg %rhs) {
 ; GFX6-NEXT:    s_lshl_b32 s0, s0, 16
 ; GFX6-NEXT:    s_max_i32 s2, s0, -1
 ; GFX6-NEXT:    s_lshl_b32 s1, s1, 16
-; GFX6-NEXT:    s_sub_i32 s2, s2, 0x7fffffff
+; GFX6-NEXT:    s_add_i32 s2, s2, 0x80000001
 ; GFX6-NEXT:    s_min_i32 s3, s0, -1
-; GFX6-NEXT:    s_sub_i32 s3, s3, 0x80000000
+; GFX6-NEXT:    s_add_i32 s3, s3, 0x80000000
 ; GFX6-NEXT:    s_max_i32 s1, s2, s1
 ; GFX6-NEXT:    s_min_i32 s1, s1, s3
 ; GFX6-NEXT:    s_sub_i32 s0, s0, s1
@@ -2635,11 +2638,11 @@ define amdgpu_ps i16 @s_ssubsat_i16(i16 inreg %lhs, i16 inreg %rhs) {
 ; GFX8-NEXT:    s_sext_i32_i16 s2, s0
 ; GFX8-NEXT:    s_sext_i32_i16 s3, -1
 ; GFX8-NEXT:    s_max_i32 s4, s2, s3
-; GFX8-NEXT:    s_sub_i32 s4, s4, 0x7fff
+; GFX8-NEXT:    s_addk_i32 s4, 0x8001
 ; GFX8-NEXT:    s_min_i32 s2, s2, s3
 ; GFX8-NEXT:    s_sext_i32_i16 s3, s4
 ; GFX8-NEXT:    s_sext_i32_i16 s1, s1
-; GFX8-NEXT:    s_sub_i32 s2, s2, 0xffff8000
+; GFX8-NEXT:    s_addk_i32 s2, 0x8000
 ; GFX8-NEXT:    s_max_i32 s1, s3, s1
 ; GFX8-NEXT:    s_sext_i32_i16 s1, s1
 ; GFX8-NEXT:    s_sext_i32_i16 s2, s2
@@ -2669,9 +2672,9 @@ define amdgpu_ps half @ssubsat_i16_sv(i16 inreg %lhs, i16 %rhs) {
 ; GFX6-NEXT:    s_lshl_b32 s0, s0, 16
 ; GFX6-NEXT:    s_max_i32 s1, s0, -1
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
-; GFX6-NEXT:    s_sub_i32 s1, s1, 0x7fffffff
+; GFX6-NEXT:    s_add_i32 s1, s1, 0x80000001
 ; GFX6-NEXT:    s_min_i32 s2, s0, -1
-; GFX6-NEXT:    s_sub_i32 s2, s2, 0x80000000
+; GFX6-NEXT:    s_add_i32 s2, s2, 0x80000000
 ; GFX6-NEXT:    v_max_i32_e32 v0, s1, v0
 ; GFX6-NEXT:    v_min_i32_e32 v0, s2, v0
 ; GFX6-NEXT:    v_sub_i32_e32 v0, vcc, s0, v0
@@ -2683,9 +2686,9 @@ define amdgpu_ps half @ssubsat_i16_sv(i16 inreg %lhs, i16 %rhs) {
 ; GFX8-NEXT:    s_sext_i32_i16 s1, s0
 ; GFX8-NEXT:    s_sext_i32_i16 s2, -1
 ; GFX8-NEXT:    s_max_i32 s3, s1, s2
-; GFX8-NEXT:    s_sub_i32 s3, s3, 0x7fff
+; GFX8-NEXT:    s_addk_i32 s3, 0x8001
 ; GFX8-NEXT:    s_min_i32 s1, s1, s2
-; GFX8-NEXT:    s_sub_i32 s1, s1, 0xffff8000
+; GFX8-NEXT:    s_addk_i32 s1, 0x8000
 ; GFX8-NEXT:    v_max_i16_e32 v0, s3, v0
 ; GFX8-NEXT:    v_min_i16_e32 v0, s1, v0
 ; GFX8-NEXT:    v_sub_u16_e32 v0, s0, v0
@@ -2711,9 +2714,9 @@ define amdgpu_ps half @ssubsat_i16_vs(i16 %lhs, i16 inreg %rhs) {
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
 ; GFX6-NEXT:    v_max_i32_e32 v1, -1, v0
 ; GFX6-NEXT:    s_lshl_b32 s0, s0, 16
-; GFX6-NEXT:    v_subrev_i32_e32 v1, vcc, 0x7fffffff, v1
+; GFX6-NEXT:    v_add_i32_e32 v1, vcc, 0x80000001, v1
 ; GFX6-NEXT:    v_min_i32_e32 v2, -1, v0
-; GFX6-NEXT:    v_subrev_i32_e32 v2, vcc, 0x80000000, v2
+; GFX6-NEXT:    v_add_i32_e32 v2, vcc, 0x80000000, v2
 ; GFX6-NEXT:    v_max_i32_e32 v1, s0, v1
 ; GFX6-NEXT:    v_min_i32_e32 v1, v1, v2
 ; GFX6-NEXT:    v_sub_i32_e32 v0, vcc, v0, v1
@@ -2723,9 +2726,9 @@ define amdgpu_ps half @ssubsat_i16_vs(i16 %lhs, i16 inreg %rhs) {
 ; GFX8-LABEL: ssubsat_i16_vs:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    v_max_i16_e32 v1, -1, v0
-; GFX8-NEXT:    v_subrev_u16_e32 v1, 0x7fff, v1
+; GFX8-NEXT:    v_add_u16_e32 v1, 0x8001, v1
 ; GFX8-NEXT:    v_min_i16_e32 v2, -1, v0
-; GFX8-NEXT:    v_subrev_u16_e32 v2, 0x8000, v2
+; GFX8-NEXT:    v_add_u16_e32 v2, 0x8000, v2
 ; GFX8-NEXT:    v_max_i16_e32 v1, s0, v1
 ; GFX8-NEXT:    v_min_i16_e32 v1, v1, v2
 ; GFX8-NEXT:    v_sub_u16_e32 v0, v0, v1
@@ -2752,18 +2755,19 @@ define <2 x i16> @v_ssubsat_v2i16(<2 x i16> %lhs, <2 x i16> %rhs) {
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
 ; GFX6-NEXT:    v_max_i32_e32 v4, -1, v0
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
-; GFX6-NEXT:    v_subrev_i32_e32 v4, vcc, 0x7fffffff, v4
+; GFX6-NEXT:    v_add_i32_e32 v4, vcc, 0x80000001, v4
 ; GFX6-NEXT:    v_min_i32_e32 v5, -1, v0
-; GFX6-NEXT:    v_subrev_i32_e32 v5, vcc, 0x80000000, v5
+; GFX6-NEXT:    v_bfrev_b32_e32 v6, 1
+; GFX6-NEXT:    v_add_i32_e32 v5, vcc, v5, v6
 ; GFX6-NEXT:    v_max_i32_e32 v2, v4, v2
 ; GFX6-NEXT:    v_min_i32_e32 v2, v2, v5
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
 ; GFX6-NEXT:    v_sub_i32_e32 v0, vcc, v0, v2
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v2, 16, v3
 ; GFX6-NEXT:    v_max_i32_e32 v3, -1, v1
-; GFX6-NEXT:    v_subrev_i32_e32 v3, vcc, 0x7fffffff, v3
+; GFX6-NEXT:    v_add_i32_e32 v3, vcc, 0x80000001, v3
 ; GFX6-NEXT:    v_min_i32_e32 v4, -1, v1
-; GFX6-NEXT:    v_subrev_i32_e32 v4, vcc, 0x80000000, v4
+; GFX6-NEXT:    v_add_i32_e32 v4, vcc, 0x80000000, v4
 ; GFX6-NEXT:    v_max_i32_e32 v2, v3, v2
 ; GFX6-NEXT:    v_min_i32_e32 v2, v2, v4
 ; GFX6-NEXT:    v_sub_i32_e32 v1, vcc, v1, v2
@@ -2775,16 +2779,16 @@ define <2 x i16> @v_ssubsat_v2i16(<2 x i16> %lhs, <2 x i16> %rhs) {
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX8-NEXT:    v_max_i16_e32 v2, -1, v0
-; GFX8-NEXT:    v_subrev_u16_e32 v2, 0x7fff, v2
+; GFX8-NEXT:    v_add_u16_e32 v2, 0x8001, v2
 ; GFX8-NEXT:    v_min_i16_e32 v3, -1, v0
-; GFX8-NEXT:    v_subrev_u16_e32 v3, 0x8000, v3
+; GFX8-NEXT:    v_add_u16_e32 v3, 0x8000, v3
 ; GFX8-NEXT:    v_max_i16_e32 v2, v2, v1
 ; GFX8-NEXT:    v_min_i16_e32 v2, v2, v3
 ; GFX8-NEXT:    v_mov_b32_e32 v3, -1
 ; GFX8-NEXT:    v_max_i16_sdwa v4, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
-; GFX8-NEXT:    v_subrev_u16_e32 v4, 0x7fff, v4
+; GFX8-NEXT:    v_add_u16_e32 v4, 0x8001, v4
 ; GFX8-NEXT:    v_min_i16_sdwa v3, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
-; GFX8-NEXT:    v_subrev_u16_e32 v3, 0x8000, v3
+; GFX8-NEXT:    v_add_u16_e32 v3, 0x8000, v3
 ; GFX8-NEXT:    v_max_i16_sdwa v1, v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
 ; GFX8-NEXT:    v_min_i16_e32 v1, v1, v3
 ; GFX8-NEXT:    v_sub_u16_e32 v2, v0, v2
@@ -2813,18 +2817,18 @@ define amdgpu_ps i32 @s_ssubsat_v2i16(<2 x i16> inreg %lhs, <2 x i16> inreg %rhs
 ; GFX6-NEXT:    s_lshl_b32 s0, s0, 16
 ; GFX6-NEXT:    s_max_i32 s4, s0, -1
 ; GFX6-NEXT:    s_lshl_b32 s2, s2, 16
-; GFX6-NEXT:    s_sub_i32 s4, s4, 0x7fffffff
+; GFX6-NEXT:    s_add_i32 s4, s4, 0x80000001
 ; GFX6-NEXT:    s_min_i32 s5, s0, -1
-; GFX6-NEXT:    s_sub_i32 s5, s5, 0x80000000
+; GFX6-NEXT:    s_add_i32 s5, s5, 0x80000000
 ; GFX6-NEXT:    s_max_i32 s2, s4, s2
 ; GFX6-NEXT:    s_min_i32 s2, s2, s5
 ; GFX6-NEXT:    s_lshl_b32 s1, s1, 16
 ; GFX6-NEXT:    s_sub_i32 s0, s0, s2
 ; GFX6-NEXT:    s_lshl_b32 s2, s3, 16
 ; GFX6-NEXT:    s_max_i32 s3, s1, -1
-; GFX6-NEXT:    s_sub_i32 s3, s3, 0x7fffffff
+; GFX6-NEXT:    s_add_i32 s3, s3, 0x80000001
 ; GFX6-NEXT:    s_min_i32 s4, s1, -1
-; GFX6-NEXT:    s_sub_i32 s4, s4, 0x80000000
+; GFX6-NEXT:    s_add_i32 s4, s4, 0x80000000
 ; GFX6-NEXT:    s_max_i32 s2, s3, s2
 ; GFX6-NEXT:    s_min_i32 s2, s2, s4
 ; GFX6-NEXT:    s_sub_i32 s1, s1, s2
@@ -2841,12 +2845,12 @@ define amdgpu_ps i32 @s_ssubsat_v2i16(<2 x i16> inreg %lhs, <2 x i16> inreg %rhs
 ; GFX8-NEXT:    s_sext_i32_i16 s4, s0
 ; GFX8-NEXT:    s_sext_i32_i16 s5, -1
 ; GFX8-NEXT:    s_max_i32 s6, s4, s5
-; GFX8-NEXT:    s_sub_i32 s6, s6, 0x7fff
+; GFX8-NEXT:    s_addk_i32 s6, 0x8001
 ; GFX8-NEXT:    s_lshr_b32 s3, s1, 16
 ; GFX8-NEXT:    s_min_i32 s4, s4, s5
 ; GFX8-NEXT:    s_sext_i32_i16 s6, s6
 ; GFX8-NEXT:    s_sext_i32_i16 s1, s1
-; GFX8-NEXT:    s_sub_i32 s4, s4, 0xffff8000
+; GFX8-NEXT:    s_addk_i32 s4, 0x8000
 ; GFX8-NEXT:    s_max_i32 s1, s6, s1
 ; GFX8-NEXT:    s_sext_i32_i16 s1, s1
 ; GFX8-NEXT:    s_sext_i32_i16 s4, s4
@@ -2855,11 +2859,11 @@ define amdgpu_ps i32 @s_ssubsat_v2i16(<2 x i16> inreg %lhs, <2 x i16> inreg %rhs
 ; GFX8-NEXT:    s_sub_i32 s0, s0, s1
 ; GFX8-NEXT:    s_sext_i32_i16 s1, s2
 ; GFX8-NEXT:    s_max_i32 s4, s1, s5
-; GFX8-NEXT:    s_sub_i32 s4, s4, 0x7fff
+; GFX8-NEXT:    s_addk_i32 s4, 0x8001
 ; GFX8-NEXT:    s_min_i32 s1, s1, s5
 ; GFX8-NEXT:    s_sext_i32_i16 s4, s4
 ; GFX8-NEXT:    s_sext_i32_i16 s3, s3
-; GFX8-NEXT:    s_sub_i32 s1, s1, 0xffff8000
+; GFX8-NEXT:    s_addk_i32 s1, 0x8000
 ; GFX8-NEXT:    s_max_i32 s3, s4, s3
 ; GFX8-NEXT:    s_sext_i32_i16 s3, s3
 ; GFX8-NEXT:    s_sext_i32_i16 s1, s1
@@ -2894,18 +2898,18 @@ define amdgpu_ps float @ssubsat_v2i16_sv(<2 x i16> inreg %lhs, <2 x i16> %rhs) {
 ; GFX6-NEXT:    s_lshl_b32 s0, s0, 16
 ; GFX6-NEXT:    s_max_i32 s2, s0, -1
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
-; GFX6-NEXT:    s_sub_i32 s2, s2, 0x7fffffff
+; GFX6-NEXT:    s_add_i32 s2, s2, 0x80000001
 ; GFX6-NEXT:    s_min_i32 s3, s0, -1
-; GFX6-NEXT:    s_sub_i32 s3, s3, 0x80000000
+; GFX6-NEXT:    s_add_i32 s3, s3, 0x80000000
 ; GFX6-NEXT:    v_max_i32_e32 v0, s2, v0
 ; GFX6-NEXT:    v_min_i32_e32 v0, s3, v0
 ; GFX6-NEXT:    v_sub_i32_e32 v0, vcc, s0, v0
 ; GFX6-NEXT:    s_lshl_b32 s0, s1, 16
 ; GFX6-NEXT:    s_max_i32 s1, s0, -1
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
-; GFX6-NEXT:    s_sub_i32 s1, s1, 0x7fffffff
+; GFX6-NEXT:    s_add_i32 s1, s1, 0x80000001
 ; GFX6-NEXT:    s_min_i32 s2, s0, -1
-; GFX6-NEXT:    s_sub_i32 s2, s2, 0x80000000
+; GFX6-NEXT:    s_add_i32 s2, s2, 0x80000000
 ; GFX6-NEXT:    v_max_i32_e32 v1, s1, v1
 ; GFX6-NEXT:    v_min_i32_e32 v1, s2, v1
 ; GFX6-NEXT:    v_sub_i32_e32 v1, vcc, s0, v1
@@ -2922,18 +2926,18 @@ define amdgpu_ps float @ssubsat_v2i16_sv(<2 x i16> inreg %lhs, <2 x i16> %rhs) {
 ; GFX8-NEXT:    s_sext_i32_i16 s2, s0
 ; GFX8-NEXT:    s_sext_i32_i16 s3, -1
 ; GFX8-NEXT:    s_max_i32 s4, s2, s3
-; GFX8-NEXT:    s_sub_i32 s4, s4, 0x7fff
+; GFX8-NEXT:    s_addk_i32 s4, 0x8001
 ; GFX8-NEXT:    s_min_i32 s2, s2, s3
 ; GFX8-NEXT:    s_lshr_b32 s1, s0, 16
-; GFX8-NEXT:    s_sub_i32 s2, s2, 0xffff8000
+; GFX8-NEXT:    s_addk_i32 s2, 0x8000
 ; GFX8-NEXT:    v_max_i16_e32 v1, s4, v0
 ; GFX8-NEXT:    v_min_i16_e32 v1, s2, v1
 ; GFX8-NEXT:    s_sext_i32_i16 s2, s1
 ; GFX8-NEXT:    s_max_i32 s4, s2, s3
-; GFX8-NEXT:    s_sub_i32 s4, s4, 0x7fff
+; GFX8-NEXT:    s_addk_i32 s4, 0x8001
 ; GFX8-NEXT:    s_min_i32 s2, s2, s3
 ; GFX8-NEXT:    v_mov_b32_e32 v2, s4
-; GFX8-NEXT:    s_sub_i32 s2, s2, 0xffff8000
+; GFX8-NEXT:    s_addk_i32 s2, 0x8000
 ; GFX8-NEXT:    v_max_i16_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
 ; GFX8-NEXT:    v_min_i16_e32 v0, s2, v0
 ; GFX8-NEXT:    v_mov_b32_e32 v2, s1
@@ -2962,18 +2966,20 @@ define amdgpu_ps float @ssubsat_v2i16_vs(<2 x i16> %lhs, <2 x i16> inreg %rhs) {
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
 ; GFX6-NEXT:    v_max_i32_e32 v2, -1, v0
 ; GFX6-NEXT:    s_lshl_b32 s0, s0, 16
-; GFX6-NEXT:    v_subrev_i32_e32 v2, vcc, 0x7fffffff, v2
-; GFX6-NEXT:    v_min_i32_e32 v3, -1, v0
-; GFX6-NEXT:    v_subrev_i32_e32 v3, vcc, 0x80000000, v3
+; GFX6-NEXT:    v_add_i32_e32 v2, vcc, 0x80000001, v2
+; GFX6-NEXT:    v_min_i32_e32 v4, -1, v0
+; GFX6-NEXT:    v_bfrev_b32_e32 v5, 1
+; GFX6-NEXT:    v_add_i32_e32 v4, vcc, v4, v5
 ; GFX6-NEXT:    v_max_i32_e32 v2, s0, v2
-; GFX6-NEXT:    v_min_i32_e32 v2, v2, v3
+; GFX6-NEXT:    v_min_i32_e32 v2, v2, v4
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
+; GFX6-NEXT:    v_mov_b32_e32 v3, 0x80000001
 ; GFX6-NEXT:    v_sub_i32_e32 v0, vcc, v0, v2
 ; GFX6-NEXT:    v_max_i32_e32 v2, -1, v1
 ; GFX6-NEXT:    s_lshl_b32 s0, s1, 16
-; GFX6-NEXT:    v_subrev_i32_e32 v2, vcc, 0x7fffffff, v2
+; GFX6-NEXT:    v_add_i32_e32 v2, vcc, v2, v3
 ; GFX6-NEXT:    v_min_i32_e32 v3, -1, v1
-; GFX6-NEXT:    v_subrev_i32_e32 v3, vcc, 0x80000000, v3
+; GFX6-NEXT:    v_add_i32_e32 v3, vcc, 0x80000000, v3
 ; GFX6-NEXT:    v_max_i32_e32 v2, s0, v2
 ; GFX6-NEXT:    v_min_i32_e32 v2, v2, v3
 ; GFX6-NEXT:    v_sub_i32_e32 v1, vcc, v1, v2
@@ -2988,17 +2994,17 @@ define amdgpu_ps float @ssubsat_v2i16_vs(<2 x i16> %lhs, <2 x i16> inreg %rhs) {
 ; GFX8-LABEL: ssubsat_v2i16_vs:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    v_max_i16_e32 v1, -1, v0
-; GFX8-NEXT:    v_subrev_u16_e32 v1, 0x7fff, v1
+; GFX8-NEXT:    v_add_u16_e32 v1, 0x8001, v1
 ; GFX8-NEXT:    v_min_i16_e32 v2, -1, v0
-; GFX8-NEXT:    v_subrev_u16_e32 v2, 0x8000, v2
+; GFX8-NEXT:    v_add_u16_e32 v2, 0x8000, v2
 ; GFX8-NEXT:    v_max_i16_e32 v1, s0, v1
 ; GFX8-NEXT:    v_min_i16_e32 v1, v1, v2
 ; GFX8-NEXT:    v_mov_b32_e32 v2, -1
 ; GFX8-NEXT:    v_max_i16_sdwa v3, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
 ; GFX8-NEXT:    s_lshr_b32 s1, s0, 16
-; GFX8-NEXT:    v_subrev_u16_e32 v3, 0x7fff, v3
+; GFX8-NEXT:    v_add_u16_e32 v3, 0x8001, v3
 ; GFX8-NEXT:    v_min_i16_sdwa v2, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
-; GFX8-NEXT:    v_subrev_u16_e32 v2, 0x8000, v2
+; GFX8-NEXT:    v_add_u16_e32 v2, 0x8000, v2
 ; GFX8-NEXT:    v_max_i16_e32 v3, s1, v3
 ; GFX8-NEXT:    v_min_i16_e32 v2, v3, v2
 ; GFX8-NEXT:    v_sub_u16_e32 v1, v0, v1
@@ -3038,38 +3044,38 @@ define <2 x float> @v_ssubsat_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
 ; GFX6-NEXT:    v_max_i32_e32 v8, -1, v0
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v4, 16, v4
-; GFX6-NEXT:    v_subrev_i32_e32 v8, vcc, 0x7fffffff, v8
+; GFX6-NEXT:    v_add_i32_e32 v8, vcc, 0x80000001, v8
 ; GFX6-NEXT:    v_min_i32_e32 v10, -1, v0
 ; GFX6-NEXT:    v_bfrev_b32_e32 v11, 1
-; GFX6-NEXT:    v_sub_i32_e32 v10, vcc, v10, v11
+; GFX6-NEXT:    v_add_i32_e32 v10, vcc, v10, v11
 ; GFX6-NEXT:    v_max_i32_e32 v4, v8, v4
 ; GFX6-NEXT:    v_min_i32_e32 v4, v4, v10
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
-; GFX6-NEXT:    v_bfrev_b32_e32 v9, -2
+; GFX6-NEXT:    v_mov_b32_e32 v9, 0x80000001
 ; GFX6-NEXT:    v_sub_i32_e32 v0, vcc, v0, v4
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v4, 16, v5
 ; GFX6-NEXT:    v_max_i32_e32 v5, -1, v1
-; GFX6-NEXT:    v_sub_i32_e32 v5, vcc, v5, v9
+; GFX6-NEXT:    v_add_i32_e32 v5, vcc, v5, v9
 ; GFX6-NEXT:    v_min_i32_e32 v8, -1, v1
-; GFX6-NEXT:    v_sub_i32_e32 v8, vcc, v8, v11
+; GFX6-NEXT:    v_add_i32_e32 v8, vcc, v8, v11
 ; GFX6-NEXT:    v_max_i32_e32 v4, v5, v4
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
 ; GFX6-NEXT:    v_min_i32_e32 v4, v4, v8
 ; GFX6-NEXT:    v_max_i32_e32 v5, -1, v2
 ; GFX6-NEXT:    v_sub_i32_e32 v1, vcc, v1, v4
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v4, 16, v6
-; GFX6-NEXT:    v_sub_i32_e32 v5, vcc, v5, v9
+; GFX6-NEXT:    v_add_i32_e32 v5, vcc, v5, v9
 ; GFX6-NEXT:    v_min_i32_e32 v6, -1, v2
-; GFX6-NEXT:    v_sub_i32_e32 v6, vcc, v6, v11
+; GFX6-NEXT:    v_add_i32_e32 v6, vcc, v6, v11
 ; GFX6-NEXT:    v_max_i32_e32 v4, v5, v4
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
 ; GFX6-NEXT:    v_min_i32_e32 v4, v4, v6
 ; GFX6-NEXT:    v_max_i32_e32 v5, -1, v3
 ; GFX6-NEXT:    v_sub_i32_e32 v2, vcc, v2, v4
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v4, 16, v7
-; GFX6-NEXT:    v_sub_i32_e32 v5, vcc, v5, v9
+; GFX6-NEXT:    v_add_i32_e32 v5, vcc, v5, v9
 ; GFX6-NEXT:    v_min_i32_e32 v6, -1, v3
-; GFX6-NEXT:    v_sub_i32_e32 v6, vcc, v6, v11
+; GFX6-NEXT:    v_add_i32_e32 v6, vcc, v6, v11
 ; GFX6-NEXT:    v_max_i32_e32 v4, v5, v4
 ; GFX6-NEXT:    v_ashrrev_i32_e32 v1, 16, v1
 ; GFX6-NEXT:    v_min_i32_e32 v4, v4, v6
@@ -3091,28 +3097,28 @@ define <2 x float> @v_ssubsat_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX8-NEXT:    v_max_i16_e32 v4, -1, v0
-; GFX8-NEXT:    v_subrev_u16_e32 v4, 0x7fff, v4
+; GFX8-NEXT:    v_add_u16_e32 v4, 0x8001, v4
 ; GFX8-NEXT:    v_min_i16_e32 v5, -1, v0
-; GFX8-NEXT:    v_subrev_u16_e32 v5, 0x8000, v5
+; GFX8-NEXT:    v_add_u16_e32 v5, 0x8000, v5
 ; GFX8-NEXT:    v_max_i16_e32 v4, v4, v2
 ; GFX8-NEXT:    v_min_i16_e32 v4, v4, v5
 ; GFX8-NEXT:    v_mov_b32_e32 v5, -1
 ; GFX8-NEXT:    v_max_i16_sdwa v6, v0, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
-; GFX8-NEXT:    v_subrev_u16_e32 v6, 0x7fff, v6
+; GFX8-NEXT:    v_add_u16_e32 v6, 0x8001, v6
 ; GFX8-NEXT:    v_min_i16_sdwa v7, v0, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
-; GFX8-NEXT:    v_subrev_u16_e32 v7, 0x8000, v7
+; GFX8-NEXT:    v_add_u16_e32 v7, 0x8000, v7
 ; GFX8-NEXT:    v_max_i16_sdwa v2, v6, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
 ; GFX8-NEXT:    v_max_i16_e32 v6, -1, v1
 ; GFX8-NEXT:    v_min_i16_e32 v2, v2, v7
-; GFX8-NEXT:    v_subrev_u16_e32 v6, 0x7fff, v6
+; GFX8-NEXT:    v_add_u16_e32 v6, 0x8001, v6
 ; GFX8-NEXT:    v_min_i16_e32 v7, -1, v1
-; GFX8-NEXT:    v_subrev_u16_e32 v7, 0x8000, v7
+; GFX8-NEXT:    v_add_u16_e32 v7, 0x8000, v7
 ; GFX8-NEXT:    v_max_i16_e32 v6, v6, v3
 ; GFX8-NEXT:    v_min_i16_e32 v6, v6, v7
 ; GFX8-NEXT:    v_max_i16_sdwa v7, v1, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
-; GFX8-NEXT:    v_subrev_u16_e32 v7, 0x7fff, v7
+; GFX8-NEXT:    v_add_u16_e32 v7, 0x8001, v7
 ; GFX8-NEXT:    v_min_i16_sdwa v5, v1, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
-; GFX8-NEXT:    v_subrev_u16_e32 v5, 0x8000, v5
+; GFX8-NEXT:    v_add_u16_e32 v5, 0x8000, v5
 ; GFX8-NEXT:    v_max_i16_sdwa v3, v7, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
 ; GFX8-NEXT:    v_min_i16_e32 v3, v3, v5
 ; GFX8-NEXT:    v_sub_u16_e32 v4, v0, v4
@@ -3147,36 +3153,36 @@ define amdgpu_ps <2 x i32> @s_ssubsat_v4i16(<4 x i16> inreg %lhs, <4 x i16> inre
 ; GFX6-NEXT:    s_lshl_b32 s0, s0, 16
 ; GFX6-NEXT:    s_max_i32 s8, s0, -1
 ; GFX6-NEXT:    s_lshl_b32 s4, s4, 16
-; GFX6-NEXT:    s_sub_i32 s8, s8, 0x7fffffff
+; GFX6-NEXT:    s_add_i32 s8, s8, 0x80000001
 ; GFX6-NEXT:    s_min_i32 s9, s0, -1
-; GFX6-NEXT:    s_sub_i32 s9, s9, 0x80000000
+; GFX6-NEXT:    s_add_i32 s9, s9, 0x80000000
 ; GFX6-NEXT:    s_max_i32 s4, s8, s4
 ; GFX6-NEXT:    s_min_i32 s4, s4, s9
 ; GFX6-NEXT:    s_lshl_b32 s1, s1, 16
 ; GFX6-NEXT:    s_sub_i32 s0, s0, s4
 ; GFX6-NEXT:    s_lshl_b32 s4, s5, 16
 ; GFX6-NEXT:    s_max_i32 s5, s1, -1
-; GFX6-NEXT:    s_sub_i32 s5, s5, 0x7fffffff
+; GFX6-NEXT:    s_add_i32 s5, s5, 0x80000001
 ; GFX6-NEXT:    s_min_i32 s8, s1, -1
-; GFX6-NEXT:    s_sub_i32 s8, s8, 0x80000000
+; GFX6-NEXT:    s_add_i32 s8, s8, 0x80000000
 ; GFX6-NEXT:    s_max_i32 s4, s5, s4
 ; GFX6-NEXT:    s_lshl_b32 s2, s2, 16
 ; GFX6-NEXT:    s_min_i32 s4, s4, s8
 ; GFX6-NEXT:    s_max_i32 s5, s2, -1
 ; GFX6-NEXT:    s_sub_i32 s1, s1, s4
 ; GFX6-NEXT:    s_lshl_b32 s4, s6, 16
-; GFX6-NEXT:    s_sub_i32 s5, s5, 0x7fffffff
+; GFX6-NEXT:    s_add_i32 s5, s5, 0x80000001
 ; GFX6-NEXT:    s_min_i32 s6, s2, -1
-; GFX6-NEXT:    s_sub_i32 s6, s6, 0x80000000
+; GFX6-NEXT:    s_add_i32 s6, s6, 0x80000000
 ; GFX6-NEXT:    s_max_i32 s4, s5, s4
 ; GFX6-NEXT:    s_lshl_b32 s3, s3, 16
 ; GFX6-NEXT:    s_min_i32 s4, s4, s6
 ; GFX6-NEXT:    s_max_i32 s5, s3, -1
 ; GFX6-NEXT:    s_sub_i32 s2, s2, s4
 ; GFX6-NEXT:    s_lshl_b32 s4, s7, 16
-; GFX6-NEXT:    s_sub_i32 s5, s5, 0x7fffffff
+; GFX6-NEXT:    s_add_i32 s5, s5, 0x80000001
 ; GFX6-NEXT:    s_min_i32 s6, s3, -1
-; GFX6-NEXT:    s_sub_i32 s6, s6, 0x80000000
+; GFX6-NEXT:    s_add_i32 s6, s6, 0x80000000
 ; GFX6-NEXT:    s_max_i32 s4, s5, s4
 ; GFX6-NEXT:    s_ashr_i32 s1, s1, 16
 ; GFX6-NEXT:    s_min_i32 s4, s4, s6
@@ -3199,12 +3205,12 @@ define amdgpu_ps <2 x i32> @s_ssubsat_v4i16(<4 x i16> inreg %lhs, <4 x i16> inre
 ; GFX8-NEXT:    s_sext_i32_i16 s8, s0
 ; GFX8-NEXT:    s_sext_i32_i16 s9, -1
 ; GFX8-NEXT:    s_max_i32 s10, s8, s9
-; GFX8-NEXT:    s_sub_i32 s10, s10, 0x7fff
+; GFX8-NEXT:    s_addk_i32 s10, 0x8001
 ; GFX8-NEXT:    s_lshr_b32 s6, s2, 16
 ; GFX8-NEXT:    s_min_i32 s8, s8, s9
 ; GFX8-NEXT:    s_sext_i32_i16 s10, s10
 ; GFX8-NEXT:    s_sext_i32_i16 s2, s2
-; GFX8-NEXT:    s_sub_i32 s8, s8, 0xffff8000
+; GFX8-NEXT:    s_addk_i32 s8, 0x8000
 ; GFX8-NEXT:    s_max_i32 s2, s10, s2
 ; GFX8-NEXT:    s_sext_i32_i16 s2, s2
 ; GFX8-NEXT:    s_sext_i32_i16 s8, s8
@@ -3213,11 +3219,11 @@ define amdgpu_ps <2 x i32> @s_ssubsat_v4i16(<4 x i16> inreg %lhs, <4 x i16> inre
 ; GFX8-NEXT:    s_sub_i32 s0, s0, s2
 ; GFX8-NEXT:    s_sext_i32_i16 s2, s4
 ; GFX8-NEXT:    s_max_i32 s8, s2, s9
-; GFX8-NEXT:    s_sub_i32 s8, s8, 0x7fff
+; GFX8-NEXT:    s_addk_i32 s8, 0x8001
 ; GFX8-NEXT:    s_min_i32 s2, s2, s9
 ; GFX8-NEXT:    s_sext_i32_i16 s8, s8
 ; GFX8-NEXT:    s_sext_i32_i16 s6, s6
-; GFX8-NEXT:    s_sub_i32 s2, s2, 0xffff8000
+; GFX8-NEXT:    s_addk_i32 s2, 0x8000
 ; GFX8-NEXT:    s_max_i32 s6, s8, s6
 ; GFX8-NEXT:    s_sext_i32_i16 s6, s6
 ; GFX8-NEXT:    s_sext_i32_i16 s2, s2
@@ -3225,12 +3231,12 @@ define amdgpu_ps <2 x i32> @s_ssubsat_v4i16(<4 x i16> inreg %lhs, <4 x i16> inre
 ; GFX8-NEXT:    s_sub_i32 s2, s4, s2
 ; GFX8-NEXT:    s_sext_i32_i16 s4, s1
 ; GFX8-NEXT:    s_max_i32 s6, s4, s9
-; GFX8-NEXT:    s_sub_i32 s6, s6, 0x7fff
+; GFX8-NEXT:    s_addk_i32 s6, 0x8001
 ; GFX8-NEXT:    s_lshr_b32 s7, s3, 16
 ; GFX8-NEXT:    s_min_i32 s4, s4, s9
 ; GFX8-NEXT:    s_sext_i32_i16 s6, s6
 ; GFX8-NEXT:    s_sext_i32_i16 s3, s3
-; GFX8-NEXT:    s_sub_i32 s4, s4, 0xffff8000
+; GFX8-NEXT:    s_addk_i32 s4, 0x8000
 ; GFX8-NEXT:    s_max_i32 s3, s6, s3
 ; GFX8-NEXT:    s_sext_i32_i16 s3, s3
 ; GFX8-NEXT:    s_sext_i32_i16 s4, s4
@@ -3239,11 +3245,11 @@ define amdgpu_ps <2 x i32> @s_ssubsat_v4i16(<4 x i16> inreg %lhs, <4 x i16> inre
 ; GFX8-NEXT:    s_sub_i32 s1, s1, s3
 ; GFX8-NEXT:    s_sext_i32_i16 s3, s5
 ; GFX8-NEXT:    s_max_i32 s4, s3, s9
-; GFX8-NEXT:    s_sub_i32 s4, s4, 0x7fff
+; GFX8-NEXT:    s_addk_i32 s4, 0x8001
 ; GFX8-NEXT:    s_min_i32 s3, s3, s9
 ; GFX8-NEXT:    s_sext_i32_i16 s4, s4
 ; GFX8-NEXT:    s_sext_i32_i16 s6, s7
-; GFX8-NEXT:    s_sub_i32 s3, s3, 0xffff8000
+; GFX8-NEXT:    s_addk_i32 s3, 0x8000
 ; GFX8-NEXT:    s_max_i32 s4, s4, s6
 ; GFX8-NEXT:    s_sext_i32_i16 s4, s4
 ; GFX8-NEXT:    s_sext_i32_i16 s3, s3
@@ -3299,57 +3305,57 @@ define <3 x float> @v_ssubsat_v6i16(<6 x i16> %lhs, <6 x i16> %rhs) {
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
 ; GFX6-NEXT:    v_max_i32_e32 v12, -1, v0
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v6, 16, v6
-; GFX6-NEXT:    v_subrev_i32_e32 v12, vcc, 0x7fffffff, v12
+; GFX6-NEXT:    v_add_i32_e32 v12, vcc, 0x80000001, v12
 ; GFX6-NEXT:    v_min_i32_e32 v14, -1, v0
 ; GFX6-NEXT:    v_bfrev_b32_e32 v15, 1
-; GFX6-NEXT:    v_sub_i32_e32 v14, vcc, v14, v15
+; GFX6-NEXT:    v_add_i32_e32 v14, vcc, v14, v15
 ; GFX6-NEXT:    v_max_i32_e32 v6, v12, v6
 ; GFX6-NEXT:    v_min_i32_e32 v6, v6, v14
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
-; GFX6-NEXT:    v_bfrev_b32_e32 v13, -2
+; GFX6-NEXT:    v_mov_b32_e32 v13, 0x80000001
 ; GFX6-NEXT:    v_sub_i32_e32 v0, vcc, v0, v6
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v6, 16, v7
 ; GFX6-NEXT:    v_max_i32_e32 v7, -1, v1
-; GFX6-NEXT:    v_sub_i32_e32 v7, vcc, v7, v13
+; GFX6-NEXT:    v_add_i32_e32 v7, vcc, v7, v13
 ; GFX6-NEXT:    v_min_i32_e32 v12, -1, v1
-; GFX6-NEXT:    v_sub_i32_e32 v12, vcc, v12, v15
+; GFX6-NEXT:    v_add_i32_e32 v12, vcc, v12, v15
 ; GFX6-NEXT:    v_max_i32_e32 v6, v7, v6
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
 ; GFX6-NEXT:    v_min_i32_e32 v6, v6, v12
 ; GFX6-NEXT:    v_max_i32_e32 v7, -1, v2
 ; GFX6-NEXT:    v_sub_i32_e32 v1, vcc, v1, v6
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v6, 16, v8
-; GFX6-NEXT:    v_sub_i32_e32 v7, vcc, v7, v13
+; GFX6-NEXT:    v_add_i32_e32 v7, vcc, v7, v13
 ; GFX6-NEXT:    v_min_i32_e32 v8, -1, v2
-; GFX6-NEXT:    v_sub_i32_e32 v8, vcc, v8, v15
+; GFX6-NEXT:    v_add_i32_e32 v8, vcc, v8, v15
 ; GFX6-NEXT:    v_max_i32_e32 v6, v7, v6
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
 ; GFX6-NEXT:    v_min_i32_e32 v6, v6, v8
 ; GFX6-NEXT:    v_max_i32_e32 v7, -1, v3
 ; GFX6-NEXT:    v_sub_i32_e32 v2, vcc, v2, v6
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v6, 16, v9
-; GFX6-NEXT:    v_sub_i32_e32 v7, vcc, v7, v13
+; GFX6-NEXT:    v_add_i32_e32 v7, vcc, v7, v13
 ; GFX6-NEXT:    v_min_i32_e32 v8, -1, v3
-; GFX6-NEXT:    v_sub_i32_e32 v8, vcc, v8, v15
+; GFX6-NEXT:    v_add_i32_e32 v8, vcc, v8, v15
 ; GFX6-NEXT:    v_max_i32_e32 v6, v7, v6
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v4, 16, v4
 ; GFX6-NEXT:    v_min_i32_e32 v6, v6, v8
 ; GFX6-NEXT:    v_max_i32_e32 v7, -1, v4
 ; GFX6-NEXT:    v_sub_i32_e32 v3, vcc, v3, v6
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v6, 16, v10
-; GFX6-NEXT:    v_sub_i32_e32 v7, vcc, v7, v13
+; GFX6-NEXT:    v_add_i32_e32 v7, vcc, v7, v13
 ; GFX6-NEXT:    v_min_i32_e32 v8, -1, v4
-; GFX6-NEXT:    v_sub_i32_e32 v8, vcc, v8, v15
+; GFX6-NEXT:    v_add_i32_e32 v8, vcc, v8, v15
 ; GFX6-NEXT:    v_max_i32_e32 v6, v7, v6
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v5, 16, v5
 ; GFX6-NEXT:    v_min_i32_e32 v6, v6, v8
 ; GFX6-NEXT:    v_max_i32_e32 v7, -1, v5
 ; GFX6-NEXT:    v_sub_i32_e32 v4, vcc, v4, v6
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v6, 16, v11
-; GFX6-NEXT:    v_sub_i32_e32 v7, vcc, v7, v13
+; GFX6-NEXT:    v_add_i32_e32 v7, vcc, v7, v13
 ; GFX6-NEXT:    v_min_i32_e32 v8, -1, v5
 ; GFX6-NEXT:    v_ashrrev_i32_e32 v1, 16, v1
-; GFX6-NEXT:    v_sub_i32_e32 v8, vcc, v8, v15
+; GFX6-NEXT:    v_add_i32_e32 v8, vcc, v8, v15
 ; GFX6-NEXT:    v_max_i32_e32 v6, v7, v6
 ; GFX6-NEXT:    v_ashrrev_i32_e32 v0, 16, v0
 ; GFX6-NEXT:    v_min_i32_e32 v6, v6, v8
@@ -3376,40 +3382,40 @@ define <3 x float> @v_ssubsat_v6i16(<6 x i16> %lhs, <6 x i16> %rhs) {
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX8-NEXT:    v_max_i16_e32 v6, -1, v0
-; GFX8-NEXT:    v_subrev_u16_e32 v6, 0x7fff, v6
+; GFX8-NEXT:    v_add_u16_e32 v6, 0x8001, v6
 ; GFX8-NEXT:    v_min_i16_e32 v7, -1, v0
-; GFX8-NEXT:    v_subrev_u16_e32 v7, 0x8000, v7
+; GFX8-NEXT:    v_add_u16_e32 v7, 0x8000, v7
 ; GFX8-NEXT:    v_max_i16_e32 v6, v6, v3
 ; GFX8-NEXT:    v_min_i16_e32 v6, v6, v7
 ; GFX8-NEXT:    v_mov_b32_e32 v7, -1
 ; GFX8-NEXT:    v_max_i16_sdwa v8, v0, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
-; GFX8-NEXT:    v_subrev_u16_e32 v8, 0x7fff, v8
+; GFX8-NEXT:    v_add_u16_e32 v8, 0x8001, v8
 ; GFX8-NEXT:    v_min_i16_sdwa v9, v0, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
-; GFX8-NEXT:    v_subrev_u16_e32 v9, 0x8000, v9
+; GFX8-NEXT:    v_add_u16_e32 v9, 0x8000, v9
 ; GFX8-NEXT:    v_max_i16_sdwa v3, v8, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
 ; GFX8-NEXT:    v_max_i16_e32 v8, -1, v1
 ; GFX8-NEXT:    v_min_i16_e32 v3, v3, v9
-; GFX8-NEXT:    v_subrev_u16_e32 v8, 0x7fff, v8
+; GFX8-NEXT:    v_add_u16_e32 v8, 0x8001, v8
 ; GFX8-NEXT:    v_min_i16_e32 v9, -1, v1
-; GFX8-NEXT:    v_subrev_u16_e32 v9, 0x8000, v9
+; GFX8-NEXT:    v_add_u16_e32 v9, 0x8000, v9
 ; GFX8-NEXT:    v_max_i16_e32 v8, v8, v4
 ; GFX8-NEXT:    v_min_i16_e32 v8, v8, v9
 ; GFX8-NEXT:    v_max_i16_sdwa v9, v1, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
-; GFX8-NEXT:    v_subrev_u16_e32 v9, 0x7fff, v9
+; GFX8-NEXT:    v_add_u16_e32 v9, 0x8001, v9
 ; GFX8-NEXT:    v_min_i16_sdwa v10, v1, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
-; GFX8-NEXT:    v_subrev_u16_e32 v10, 0x8000, v10
+; GFX8-NEXT:    v_add_u16_e32 v10, 0x8000, v10
 ; GFX8-NEXT:    v_max_i16_sdwa v4, v9, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
 ; GFX8-NEXT:    v_max_i16_e32 v9, -1, v2
 ; GFX8-NEXT:    v_min_i16_e32 v4, v4, v10
-; GFX8-NEXT:    v_subrev_u16_e32 v9, 0x7fff, v9
+; GFX8-NEXT:    v_add_u16_e32 v9, 0x8001, v9
 ; GFX8-NEXT:    v_min_i16_e32 v10, -1, v2
-; GFX8-NEXT:    v_subrev_u16_e32 v10, 0x8000, v10
+; GFX8-NEXT:    v_add_u16_e32 v10, 0x8000, v10
 ; GFX8-NEXT:    v_max_i16_e32 v9, v9, v5
 ; GFX8-NEXT:    v_min_i16_e32 v9, v9, v10
 ; GFX8-NEXT:    v_max_i16_sdwa v10, v2, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
-; GFX8-NEXT:    v_subrev_u16_e32 v10, 0x7fff, v10
+; GFX8-NEXT:    v_add_u16_e32 v10, 0x8001, v10
 ; GFX8-NEXT:    v_min_i16_sdwa v7, v2, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
-; GFX8-NEXT:    v_subrev_u16_e32 v7, 0x8000, v7
+; GFX8-NEXT:    v_add_u16_e32 v7, 0x8000, v7
 ; GFX8-NEXT:    v_max_i16_sdwa v5, v10, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
 ; GFX8-NEXT:    v_min_i16_e32 v5, v5, v7
 ; GFX8-NEXT:    v_sub_u16_e32 v6, v0, v6
@@ -3449,55 +3455,55 @@ define amdgpu_ps <3 x i32> @s_ssubsat_v6i16(<6 x i16> inreg %lhs, <6 x i16> inre
 ; GFX6-NEXT:    s_lshl_b32 s0, s0, 16
 ; GFX6-NEXT:    s_max_i32 s12, s0, -1
 ; GFX6-NEXT:    s_lshl_b32 s6, s6, 16
-; GFX6-NEXT:    s_sub_i32 s12, s12, 0x7fffffff
+; GFX6-NEXT:    s_add_i32 s12, s12, 0x80000001
 ; GFX6-NEXT:    s_min_i32 s13, s0, -1
-; GFX6-NEXT:    s_sub_i32 s13, s13, 0x80000000
+; GFX6-NEXT:    s_add_i32 s13, s13, 0x80000000
 ; GFX6-NEXT:    s_max_i32 s6, s12, s6
 ; GFX6-NEXT:    s_min_i32 s6, s6, s13
 ; GFX6-NEXT:    s_lshl_b32 s1, s1, 16
 ; GFX6-NEXT:    s_sub_i32 s0, s0, s6
 ; GFX6-NEXT:    s_lshl_b32 s6, s7, 16
 ; GFX6-NEXT:    s_max_i32 s7, s1, -1
-; GFX6-NEXT:    s_sub_i32 s7, s7, 0x7fffffff
+; GFX6-NEXT:    s_add_i32 s7, s7, 0x80000001
 ; GFX6-NEXT:    s_min_i32 s12, s1, -1
-; GFX6-NEXT:    s_sub_i32 s12, s12, 0x80000000
+; GFX6-NEXT:    s_add_i32 s12, s12, 0x80000000
 ; GFX6-NEXT:    s_max_i32 s6, s7, s6
 ; GFX6-NEXT:    s_lshl_b32 s2, s2, 16
 ; GFX6-NEXT:    s_min_i32 s6, s6, s12
 ; GFX6-NEXT:    s_max_i32 s7, s2, -1
 ; GFX6-NEXT:    s_sub_i32 s1, s1, s6
 ; GFX6-NEXT:    s_lshl_b32 s6, s8, 16
-; GFX6-NEXT:    s_sub_i32 s7, s7, 0x7fffffff
+; GFX6-NEXT:    s_add_i32 s7, s7, 0x80000001
 ; GFX6-NEXT:    s_min_i32 s8, s2, -1
-; GFX6-NEXT:    s_sub_i32 s8, s8, 0x80000000
+; GFX6-NEXT:    s_add_i32 s8, s8, 0x80000000
 ; GFX6-NEXT:    s_max_i32 s6, s7, s6
 ; GFX6-NEXT:    s_lshl_b32 s3, s3, 16
 ; GFX6-NEXT:    s_min_i32 s6, s6, s8
 ; GFX6-NEXT:    s_max_i32 s7, s3, -1
 ; GFX6-NEXT:    s_sub_i32 s2, s2, s6
 ; GFX6-NEXT:    s_lshl_b32 s6, s9, 16
-; GFX6-NEXT:    s_sub_i32 s7, s7, 0x7fffffff
+; GFX6-NEXT:    s_add_i32 s7, s7, 0x80000001
 ; GFX6-NEXT:    s_min_i32 s8, s3, -1
-; GFX6-NEXT:    s_sub_i32 s8, s8, 0x80000000
+; GFX6-NEXT:    s_add_i32 s8, s8, 0x80000000
 ; GFX6-NEXT:    s_max_i32 s6, s7, s6
 ; GFX6-NEXT:    s_lshl_b32 s4, s4, 16
 ; GFX6-NEXT:    s_min_i32 s6, s6, s8
 ; GFX6-NEXT:    s_max_i32 s7, s4, -1
 ; GFX6-NEXT:    s_sub_i32 s3, s3, s6
 ; GFX6-NEXT:    s_lshl_b32 s6, s10, 16
-; GFX6-NEXT:    s_sub_i32 s7, s7, 0x7fffffff
+; GFX6-NEXT:    s_add_i32 s7, s7, 0x80000001
 ; GFX6-NEXT:    s_min_i32 s8, s4, -1
-; GFX6-NEXT:    s_sub_i32 s8, s8, 0x80000000
+; GFX6-NEXT:    s_add_i32 s8, s8, 0x80000000
 ; GFX6-NEXT:    s_max_i32 s6, s7, s6
 ; GFX6-NEXT:    s_lshl_b32 s5, s5, 16
 ; GFX6-NEXT:    s_min_i32 s6, s6, s8
 ; GFX6-NEXT:    s_max_i32 s7, s5, -1
 ; GFX6-NEXT:    s_sub_i32 s4, s4, s6
 ; GFX6-NEXT:    s_lshl_b32 s6, s11, 16
-; GFX6-NEXT:    s_sub_i32 s7, s7, 0x7fffffff
+; GFX6-NEXT:    s_add_i32 s7, s7, 0x80000001
 ; GFX6-NEXT:    s_min_i32 s8, s5, -1
 ; GFX6-NEXT:    s_ashr_i32 s1, s1, 16
-; GFX6-NEXT:    s_sub_i32 s8, s8, 0x80000000
+; GFX6-NEXT:    s_add_i32 s8, s8, 0x80000000
 ; GFX6-NEXT:    s_max_i32 s6, s7, s6
 ; GFX6-NEXT:    s_ashr_i32 s0, s0, 16
 ; GFX6-NEXT:    s_min_i32 s6, s6, s8
@@ -3525,12 +3531,12 @@ define amdgpu_ps <3 x i32> @s_ssubsat_v6i16(<6 x i16> inreg %lhs, <6 x i16> inre
 ; GFX8-NEXT:    s_sext_i32_i16 s12, s0
 ; GFX8-NEXT:    s_sext_i32_i16 s13, -1
 ; GFX8-NEXT:    s_max_i32 s14, s12, s13
-; GFX8-NEXT:    s_sub_i32 s14, s14, 0x7fff
+; GFX8-NEXT:    s_addk_i32 s14, 0x8001
 ; GFX8-NEXT:    s_lshr_b32 s9, s3, 16
 ; GFX8-NEXT:    s_min_i32 s12, s12, s13
 ; GFX8-NEXT:    s_sext_i32_i16 s14, s14
 ; GFX8-NEXT:    s_sext_i32_i16 s3, s3
-; GFX8-NEXT:    s_sub_i32 s12, s12, 0xffff8000
+; GFX8-NEXT:    s_addk_i32 s12, 0x8000
 ; GFX8-NEXT:    s_max_i32 s3, s14, s3
 ; GFX8-NEXT:    s_sext_i32_i16 s3, s3
 ; GFX8-NEXT:    s_sext_i32_i16 s12, s12
@@ -3539,11 +3545,11 @@ define amdgpu_ps <3 x i32> @s_ssubsat_v6i16(<6 x i16> inreg %lhs, <6 x i16> inre
 ; GFX8-NEXT:    s_sub_i32 s0, s0, s3
 ; GFX8-NEXT:    s_sext_i32_i16 s3, s6
 ; GFX8-NEXT:    s_max_i32 s12, s3, s13
-; GFX8-NEXT:    s_sub_i32 s12, s12, 0x7fff
+; GFX8-NEXT:    s_addk_i32 s12, 0x8001
 ; GFX8-NEXT:    s_min_i32 s3, s3, s13
 ; GFX8-NEXT:    s_sext_i32_i16 s12, s12
 ; GFX8-NEXT:    s_sext_i32_i16 s9, s9
-; GFX8-NEXT:    s_sub_i32 s3, s3, 0xffff8000
+; GFX8-NEXT:    s_addk_i32 s3, 0x8000
 ; GFX8-NEXT:    s_max_i32 s9, s12, s9
 ; GFX8-NEXT:    s_sext_i32_i16 s9, s9
 ; GFX8-NEXT:    s_sext_i32_i16 s3, s3
@@ -3551,12 +3557,12 @@ define amdgpu_ps <3 x i32> @s_ssubsat_v6i16(<6 x i16> inreg %lhs, <6 x i16> inre
 ; GFX8-NEXT:    s_sub_i32 s3, s6, s3
 ; GFX8-NEXT:    s_sext_i32_i16 s6, s1
 ; GFX8-NEXT:    s_max_i32 s9, s6, s13
-; GFX8-NEXT:    s_sub_i32 s9, s9, 0x7fff
+; GFX8-NEXT:    s_addk_i32 s9, 0x8001
 ; GFX8-NEXT:    s_lshr_b32 s10, s4, 16
 ; GFX8-NEXT:    s_min_i32 s6, s6, s13
 ; GFX8-NEXT:    s_sext_i32_i16 s9, s9
 ; GFX8-NEXT:    s_sext_i32_i16 s4, s4
-; GFX8-NEXT:    s_sub_i32 s6, s6, 0xffff8000
+; GFX8-NEXT:    s_addk_i32 s6, 0x8000
 ; GFX8-NEXT:    s_max_i32 s4, s9, s4
 ; GFX8-NEXT:    s_sext_i32_i16 s4, s4
 ; GFX8-NEXT:    s_sext_i32_i16 s6, s6
@@ -3565,11 +3571,11 @@ define amdgpu_ps <3 x i32> @s_ssubsat_v6i16(<6 x i16> inreg %lhs, <6 x i16> inre
 ; GFX8-NEXT:    s_sub_i32 s1, s1, s4
 ; GFX8-NEXT:    s_sext_i32_i16 s4, s7
 ; GFX8-NEXT:    s_max_i32 s6, s4, s13
-; GFX8-NEXT:    s_sub_i32 s6, s6, 0x7fff
+; GFX8-NEXT:    s_addk_i32 s6, 0x8001
 ; GFX8-NEXT:    s_min_i32 s4, s4, s13
 ; GFX8-NEXT:    s_sext_i32_i16 s6, s6
 ; GFX8-NEXT:    s_sext_i32_i16 s9, s10
-; GFX8-NEXT:    s_sub_i32 s4, s4, 0xffff8000
+; GFX8-NEXT:    s_addk_i32 s4, 0x8000
 ; GFX8-NEXT:    s_max_i32 s6, s6, s9
 ; GFX8-NEXT:    s_sext_i32_i16 s6, s6
 ; GFX8-NEXT:    s_sext_i32_i16 s4, s4
@@ -3577,12 +3583,12 @@ define amdgpu_ps <3 x i32> @s_ssubsat_v6i16(<6 x i16> inreg %lhs, <6 x i16> inre
 ; GFX8-NEXT:    s_sext_i32_i16 s6, s2
 ; GFX8-NEXT:    s_sub_i32 s4, s7, s4
 ; GFX8-NEXT:    s_max_i32 s7, s6, s13
-; GFX8-NEXT:    s_sub_i32 s7, s7, 0x7fff
+; GFX8-NEXT:    s_addk_i32 s7, 0x8001
 ; GFX8-NEXT:    s_lshr_b32 s11, s5, 16
 ; GFX8-NEXT:    s_min_i32 s6, s6, s13
 ; GFX8-NEXT:    s_sext_i32_i16 s7, s7
 ; GFX8-NEXT:    s_sext_i32_i16 s5, s5
-; GFX8-NEXT:    s_sub_i32 s6, s6, 0xffff8000
+; GFX8-NEXT:    s_addk_i32 s6, 0x8000
 ; GFX8-NEXT:    s_max_i32 s5, s7, s5
 ; GFX8-NEXT:    s_sext_i32_i16 s5, s5
 ; GFX8-NEXT:    s_sext_i32_i16 s6, s6
@@ -3591,11 +3597,11 @@ define amdgpu_ps <3 x i32> @s_ssubsat_v6i16(<6 x i16> inreg %lhs, <6 x i16> inre
 ; GFX8-NEXT:    s_sub_i32 s2, s2, s5
 ; GFX8-NEXT:    s_sext_i32_i16 s5, s8
 ; GFX8-NEXT:    s_max_i32 s6, s5, s13
-; GFX8-NEXT:    s_sub_i32 s6, s6, 0x7fff
+; GFX8-NEXT:    s_addk_i32 s6, 0x8001
 ; GFX8-NEXT:    s_min_i32 s5, s5, s13
 ; GFX8-NEXT:    s_sext_i32_i16 s6, s6
 ; GFX8-NEXT:    s_sext_i32_i16 s7, s11
-; GFX8-NEXT:    s_sub_i32 s5, s5, 0xffff8000
+; GFX8-NEXT:    s_addk_i32 s5, 0x8000
 ; GFX8-NEXT:    s_max_i32 s6, s6, s7
 ; GFX8-NEXT:    s_and_b32 s3, 0xffff, s3
 ; GFX8-NEXT:    s_sext_i32_i16 s6, s6
@@ -3648,66 +3654,66 @@ define <4 x float> @v_ssubsat_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
 ; GFX6-NEXT:    v_max_i32_e32 v16, -1, v0
-; GFX6-NEXT:    v_bfrev_b32_e32 v17, -2
+; GFX6-NEXT:    v_mov_b32_e32 v17, 0x80000001
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v8, 16, v8
-; GFX6-NEXT:    v_sub_i32_e32 v16, vcc, v16, v17
+; GFX6-NEXT:    v_add_i32_e32 v16, vcc, v16, v17
 ; GFX6-NEXT:    v_min_i32_e32 v18, -1, v0
 ; GFX6-NEXT:    v_bfrev_b32_e32 v19, 1
-; GFX6-NEXT:    v_sub_i32_e32 v18, vcc, v18, v19
+; GFX6-NEXT:    v_add_i32_e32 v18, vcc, v18, v19
 ; GFX6-NEXT:    v_max_i32_e32 v8, v16, v8
 ; GFX6-NEXT:    v_min_i32_e32 v8, v8, v18
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
 ; GFX6-NEXT:    v_sub_i32_e32 v0, vcc, v0, v8
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v8, 16, v9
 ; GFX6-NEXT:    v_max_i32_e32 v9, -1, v1
-; GFX6-NEXT:    v_sub_i32_e32 v9, vcc, v9, v17
+; GFX6-NEXT:    v_add_i32_e32 v9, vcc, v9, v17
 ; GFX6-NEXT:    v_min_i32_e32 v16, -1, v1
-; GFX6-NEXT:    v_sub_i32_e32 v16, vcc, v16, v19
+; GFX6-NEXT:    v_add_i32_e32 v16, vcc, v16, v19
 ; GFX6-NEXT:    v_max_i32_e32 v8, v9, v8
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
 ; GFX6-NEXT:    v_min_i32_e32 v8, v8, v16
 ; GFX6-NEXT:    v_max_i32_e32 v9, -1, v2
 ; GFX6-NEXT:    v_sub_i32_e32 v1, vcc, v1, v8
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v8, 16, v10
-; GFX6-NEXT:    v_sub_i32_e32 v9, vcc, v9, v17
+; GFX6-NEXT:    v_add_i32_e32 v9, vcc, v9, v17
 ; GFX6-NEXT:    v_min_i32_e32 v10, -1, v2
-; GFX6-NEXT:    v_sub_i32_e32 v10, vcc, v10, v19
+; GFX6-NEXT:    v_add_i32_e32 v10, vcc, v10, v19
 ; GFX6-NEXT:    v_max_i32_e32 v8, v9, v8
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
 ; GFX6-NEXT:    v_min_i32_e32 v8, v8, v10
 ; GFX6-NEXT:    v_max_i32_e32 v9, -1, v3
 ; GFX6-NEXT:    v_sub_i32_e32 v2, vcc, v2, v8
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v8, 16, v11
-; GFX6-NEXT:    v_sub_i32_e32 v9, vcc, v9, v17
+; GFX6-NEXT:    v_add_i32_e32 v9, vcc, v9, v17
 ; GFX6-NEXT:    v_min_i32_e32 v10, -1, v3
-; GFX6-NEXT:    v_sub_i32_e32 v10, vcc, v10, v19
+; GFX6-NEXT:    v_add_i32_e32 v10, vcc, v10, v19
 ; GFX6-NEXT:    v_max_i32_e32 v8, v9, v8
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v4, 16, v4
 ; GFX6-NEXT:    v_min_i32_e32 v8, v8, v10
 ; GFX6-NEXT:    v_max_i32_e32 v9, -1, v4
 ; GFX6-NEXT:    v_sub_i32_e32 v3, vcc, v3, v8
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v8, 16, v12
-; GFX6-NEXT:    v_sub_i32_e32 v9, vcc, v9, v17
+; GFX6-NEXT:    v_add_i32_e32 v9, vcc, v9, v17
 ; GFX6-NEXT:    v_min_i32_e32 v10, -1, v4
-; GFX6-NEXT:    v_sub_i32_e32 v10, vcc, v10, v19
+; GFX6-NEXT:    v_add_i32_e32 v10, vcc, v10, v19
 ; GFX6-NEXT:    v_max_i32_e32 v8, v9, v8
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v5, 16, v5
 ; GFX6-NEXT:    v_min_i32_e32 v8, v8, v10
 ; GFX6-NEXT:    v_max_i32_e32 v9, -1, v5
 ; GFX6-NEXT:    v_sub_i32_e32 v4, vcc, v4, v8
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v8, 16, v13
-; GFX6-NEXT:    v_sub_i32_e32 v9, vcc, v9, v17
+; GFX6-NEXT:    v_add_i32_e32 v9, vcc, v9, v17
 ; GFX6-NEXT:    v_min_i32_e32 v10, -1, v5
-; GFX6-NEXT:    v_sub_i32_e32 v10, vcc, v10, v19
+; GFX6-NEXT:    v_add_i32_e32 v10, vcc, v10, v19
 ; GFX6-NEXT:    v_max_i32_e32 v8, v9, v8
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v6, 16, v6
 ; GFX6-NEXT:    v_min_i32_e32 v8, v8, v10
 ; GFX6-NEXT:    v_max_i32_e32 v9, -1, v6
 ; GFX6-NEXT:    v_sub_i32_e32 v5, vcc, v5, v8
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v8, 16, v14
-; GFX6-NEXT:    v_sub_i32_e32 v9, vcc, v9, v17
+; GFX6-NEXT:    v_add_i32_e32 v9, vcc, v9, v17
 ; GFX6-NEXT:    v_min_i32_e32 v10, -1, v6
-; GFX6-NEXT:    v_sub_i32_e32 v10, vcc, v10, v19
+; GFX6-NEXT:    v_add_i32_e32 v10, vcc, v10, v19
 ; GFX6-NEXT:    v_max_i32_e32 v8, v9, v8
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v7, 16, v7
 ; GFX6-NEXT:    v_min_i32_e32 v8, v8, v10
@@ -3715,10 +3721,10 @@ define <4 x float> @v_ssubsat_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
 ; GFX6-NEXT:    v_ashrrev_i32_e32 v1, 16, v1
 ; GFX6-NEXT:    v_sub_i32_e32 v6, vcc, v6, v8
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v8, 16, v15
-; GFX6-NEXT:    v_sub_i32_e32 v9, vcc, v9, v17
+; GFX6-NEXT:    v_add_i32_e32 v9, vcc, v9, v17
 ; GFX6-NEXT:    v_min_i32_e32 v10, -1, v7
 ; GFX6-NEXT:    v_ashrrev_i32_e32 v0, 16, v0
-; GFX6-NEXT:    v_sub_i32_e32 v10, vcc, v10, v19
+; GFX6-NEXT:    v_add_i32_e32 v10, vcc, v10, v19
 ; GFX6-NEXT:    v_max_i32_e32 v8, v9, v8
 ; GFX6-NEXT:    v_and_b32_e32 v1, 0xffff, v1
 ; GFX6-NEXT:    v_ashrrev_i32_e32 v2, 16, v2
@@ -3750,52 +3756,52 @@ define <4 x float> @v_ssubsat_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX8-NEXT:    v_max_i16_e32 v8, -1, v0
-; GFX8-NEXT:    v_subrev_u16_e32 v8, 0x7fff, v8
+; GFX8-NEXT:    v_add_u16_e32 v8, 0x8001, v8
 ; GFX8-NEXT:    v_min_i16_e32 v9, -1, v0
-; GFX8-NEXT:    v_subrev_u16_e32 v9, 0x8000, v9
+; GFX8-NEXT:    v_add_u16_e32 v9, 0x8000, v9
 ; GFX8-NEXT:    v_max_i16_e32 v8, v8, v4
 ; GFX8-NEXT:    v_min_i16_e32 v8, v8, v9
 ; GFX8-NEXT:    v_mov_b32_e32 v9, -1
 ; GFX8-NEXT:    v_max_i16_sdwa v10, v0, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
-; GFX8-NEXT:    v_subrev_u16_e32 v10, 0x7fff, v10
+; GFX8-NEXT:    v_add_u16_e32 v10, 0x8001, v10
 ; GFX8-NEXT:    v_min_i16_sdwa v11, v0, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
-; GFX8-NEXT:    v_subrev_u16_e32 v11, 0x8000, v11
+; GFX8-NEXT:    v_add_u16_e32 v11, 0x8000, v11
 ; GFX8-NEXT:    v_max_i16_sdwa v4, v10, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
 ; GFX8-NEXT:    v_max_i16_e32 v10, -1, v1
 ; GFX8-NEXT:    v_min_i16_e32 v4, v4, v11
-; GFX8-NEXT:    v_subrev_u16_e32 v10, 0x7fff, v10
+; GFX8-NEXT:    v_add_u16_e32 v10, 0x8001, v10
 ; GFX8-NEXT:    v_min_i16_e32 v11, -1, v1
-; GFX8-NEXT:    v_subrev_u16_e32 v11, 0x8000, v11
+; GFX8-NEXT:    v_add_u16_e32 v11, 0x8000, v11
 ; GFX8-NEXT:    v_max_i16_e32 v10, v10, v5
 ; GFX8-NEXT:    v_min_i16_e32 v10, v10, v11
 ; GFX8-NEXT:    v_max_i16_sdwa v11, v1, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
-; GFX8-NEXT:    v_subrev_u16_e32 v11, 0x7fff, v11
+; GFX8-NEXT:    v_add_u16_e32 v11, 0x8001, v11
 ; GFX8-NEXT:    v_min_i16_sdwa v12, v1, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
-; GFX8-NEXT:    v_subrev_u16_e32 v12, 0x8000, v12
+; GFX8-NEXT:    v_add_u16_e32 v12, 0x8000, v12
 ; GFX8-NEXT:    v_max_i16_sdwa v5, v11, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
 ; GFX8-NEXT:    v_max_i16_e32 v11, -1, v2
 ; GFX8-NEXT:    v_min_i16_e32 v5, v5, v12
-; GFX8-NEXT:    v_subrev_u16_e32 v11, 0x7fff, v11
+; GFX8-NEXT:    v_add_u16_e32 v11, 0x8001, v11
 ; GFX8-NEXT:    v_min_i16_e32 v12, -1, v2
-; GFX8-NEXT:    v_subrev_u16_e32 v12, 0x8000, v12
+; GFX8-NEXT:    v_add_u16_e32 v12, 0x8000, v12
 ; GFX8-NEXT:    v_max_i16_e32 v11, v11, v6
 ; GFX8-NEXT:    v_min_i16_e32 v11, v11, v12
 ; GFX8-NEXT:    v_max_i16_sdwa v12, v2, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
-; GFX8-NEXT:    v_subrev_u16_e32 v12, 0x7fff, v12
+; GFX8-NEXT:    v_add_u16_e32 v12, 0x8001, v12
 ; GFX8-NEXT:    v_min_i16_sdwa v13, v2, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
-; GFX8-NEXT:    v_subrev_u16_e32 v13, 0x8000, v13
+; GFX8-NEXT:    v_add_u16_e32 v13, 0x8000, v13
 ; GFX8-NEXT:    v_max_i16_sdwa v6, v12, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
 ; GFX8-NEXT:    v_max_i16_e32 v12, -1, v3
 ; GFX8-NEXT:    v_min_i16_e32 v6, v6, v13
-; GFX8-NEXT:    v_subrev_u16_e32 v12, 0x7fff, v12
+; GFX8-NEXT:    v_add_u16_e32 v12, 0x8001, v12
 ; GFX8-NEXT:    v_min_i16_e32 v13, -1, v3
-; GFX8-NEXT:    v_subrev_u16_e32 v13, 0x8000, v13
+; GFX8-NEXT:    v_add_u16_e32 v13, 0x8000, v13
 ; GFX8-NEXT:    v_max_i16_e32 v12, v12, v7
 ; GFX8-NEXT:    v_min_i16_e32 v12, v12, v13
 ; GFX8-NEXT:    v_max_i16_sdwa v13, v3, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
-; GFX8-NEXT:    v_subrev_u16_e32 v13, 0x7fff, v13
+; GFX8-NEXT:    v_add_u16_e32 v13, 0x8001, v13
 ; GFX8-NEXT:    v_min_i16_sdwa v9, v3, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
-; GFX8-NEXT:    v_subrev_u16_e32 v9, 0x8000, v9
+; GFX8-NEXT:    v_add_u16_e32 v9, 0x8000, v9
 ; GFX8-NEXT:    v_max_i16_sdwa v7, v13, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
 ; GFX8-NEXT:    v_sub_u16_e32 v8, v0, v8
 ; GFX8-NEXT:    v_sub_u16_sdwa v0, v0, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
@@ -3840,63 +3846,63 @@ define amdgpu_ps <4 x i32> @s_ssubsat_v8i16(<8 x i16> inreg %lhs, <8 x i16> inre
 ; GFX6-NEXT:    s_lshl_b32 s0, s0, 16
 ; GFX6-NEXT:    s_max_i32 s16, s0, -1
 ; GFX6-NEXT:    s_lshl_b32 s8, s8, 16
-; GFX6-NEXT:    s_sub_i32 s16, s16, 0x7fffffff
+; GFX6-NEXT:    s_add_i32 s16, s16, 0x80000001
 ; GFX6-NEXT:    s_min_i32 s17, s0, -1
-; GFX6-NEXT:    s_sub_i32 s17, s17, 0x80000000
+; GFX6-NEXT:    s_add_i32 s17, s17, 0x80000000
 ; GFX6-NEXT:    s_max_i32 s8, s16, s8
 ; GFX6-NEXT:    s_min_i32 s8, s8, s17
 ; GFX6-NEXT:    s_lshl_b32 s1, s1, 16
 ; GFX6-NEXT:    s_sub_i32 s0, s0, s8
 ; GFX6-NEXT:    s_lshl_b32 s8, s9, 16
 ; GFX6-NEXT:    s_max_i32 s9, s1, -1
-; GFX6-NEXT:    s_sub_i32 s9, s9, 0x7fffffff
+; GFX6-NEXT:    s_add_i32 s9, s9, 0x80000001
 ; GFX6-NEXT:    s_min_i32 s16, s1, -1
-; GFX6-NEXT:    s_sub_i32 s16, s16, 0x80000000
+; GFX6-NEXT:    s_add_i32 s16, s16, 0x80000000
 ; GFX6-NEXT:    s_max_i32 s8, s9, s8
 ; GFX6-NEXT:    s_lshl_b32 s2, s2, 16
 ; GFX6-NEXT:    s_min_i32 s8, s8, s16
 ; GFX6-NEXT:    s_max_i32 s9, s2, -1
 ; GFX6-NEXT:    s_sub_i32 s1, s1, s8
 ; GFX6-NEXT:    s_lshl_b32 s8, s10, 16
-; GFX6-NEXT:    s_sub_i32 s9, s9, 0x7fffffff
+; GFX6-NEXT:    s_add_i32 s9, s9, 0x80000001
 ; GFX6-NEXT:    s_min_i32 s10, s2, -1
-; GFX6-NEXT:    s_sub_i32 s10, s10, 0x80000000
+; GFX6-NEXT:    s_add_i32 s10, s10, 0x80000000
 ; GFX6-NEXT:    s_max_i32 s8, s9, s8
 ; GFX6-NEXT:    s_lshl_b32 s3, s3, 16
 ; GFX6-NEXT:    s_min_i32 s8, s8, s10
 ; GFX6-NEXT:    s_max_i32 s9, s3, -1
 ; GFX6-NEXT:    s_sub_i32 s2, s2, s8
 ; GFX6-NEXT:    s_lshl_b32 s8, s11, 16
-; GFX6-NEXT:    s_sub_i32 s9, s9, 0x7fffffff
+; GFX6-NEXT:    s_add_i32 s9, s9, 0x80000001
 ; GFX6-NEXT:    s_min_i32 s10, s3, -1
-; GFX6-NEXT:    s_sub_i32 s10, s10, 0x80000000
+; GFX6-NEXT:    s_add_i32 s10, s10, 0x80000000
 ; GFX6-NEXT:    s_max_i32 s8, s9, s8
 ; GFX6-NEXT:    s_lshl_b32 s4, s4, 16
 ; GFX6-NEXT:    s_min_i32 s8, s8, s10
 ; GFX6-NEXT:    s_max_i32 s9, s4, -1
 ; GFX6-NEXT:    s_sub_i32 s3, s3, s8
 ; GFX6-NEXT:    s_lshl_b32 s8, s12, 16
-; GFX6-NEXT:    s_sub_i32 s9, s9, 0x7fffffff
+; GFX6-NEXT:    s_add_i32 s9, s9, 0x80000001
 ; GFX6-NEXT:    s_min_i32 s10, s4, -1
-; GFX6-NEXT:    s_sub_i32 s10, s10, 0x80000000
+; GFX6-NEXT:    s_add_i32 s10, s10, 0x80000000
 ; GFX6-NEXT:    s_max_i32 s8, s9, s8
 ; GFX6-NEXT:    s_lshl_b32 s5, s5, 16
 ; GFX6-NEXT:    s_min_i32 s8, s8, s10
 ; GFX6-NEXT:    s_max_i32 s9, s5, -1
 ; GFX6-NEXT:    s_sub_i32 s4, s4, s8
 ; GFX6-NEXT:    s_lshl_b32 s8, s13, 16
-; GFX6-NEXT:    s_sub_i32 s9, s9, 0x7fffffff
+; GFX6-NEXT:    s_add_i32 s9, s9, 0x80000001
 ; GFX6-NEXT:    s_min_i32 s10, s5, -1
-; GFX6-NEXT:    s_sub_i32 s10, s10, 0x80000000
+; GFX6-NEXT:    s_add_i32 s10, s10, 0x80000000
 ; GFX6-NEXT:    s_max_i32 s8, s9, s8
 ; GFX6-NEXT:    s_lshl_b32 s6, s6, 16
 ; GFX6-NEXT:    s_min_i32 s8, s8, s10
 ; GFX6-NEXT:    s_max_i32 s9, s6, -1
 ; GFX6-NEXT:    s_sub_i32 s5, s5, s8
 ; GFX6-NEXT:    s_lshl_b32 s8, s14, 16
-; GFX6-NEXT:    s_sub_i32 s9, s9, 0x7fffffff
+; GFX6-NEXT:    s_add_i32 s9, s9, 0x80000001
 ; GFX6-NEXT:    s_min_i32 s10, s6, -1
-; GFX6-NEXT:    s_sub_i32 s10, s10, 0x80000000
+; GFX6-NEXT:    s_add_i32 s10, s10, 0x80000000
 ; GFX6-NEXT:    s_max_i32 s8, s9, s8
 ; GFX6-NEXT:    s_lshl_b32 s7, s7, 16
 ; GFX6-NEXT:    s_min_i32 s8, s8, s10
@@ -3904,10 +3910,10 @@ define amdgpu_ps <4 x i32> @s_ssubsat_v8i16(<8 x i16> inreg %lhs, <8 x i16> inre
 ; GFX6-NEXT:    s_ashr_i32 s1, s1, 16
 ; GFX6-NEXT:    s_sub_i32 s6, s6, s8
 ; GFX6-NEXT:    s_lshl_b32 s8, s15, 16
-; GFX6-NEXT:    s_sub_i32 s9, s9, 0x7fffffff
+; GFX6-NEXT:    s_add_i32 s9, s9, 0x80000001
 ; GFX6-NEXT:    s_min_i32 s10, s7, -1
 ; GFX6-NEXT:    s_ashr_i32 s0, s0, 16
-; GFX6-NEXT:    s_sub_i32 s10, s10, 0x80000000
+; GFX6-NEXT:    s_add_i32 s10, s10, 0x80000000
 ; GFX6-NEXT:    s_max_i32 s8, s9, s8
 ; GFX6-NEXT:    s_and_b32 s1, s1, 0xffff
 ; GFX6-NEXT:    s_ashr_i32 s2, s2, 16
@@ -3940,12 +3946,12 @@ define amdgpu_ps <4 x i32> @s_ssubsat_v8i16(<8 x i16> inreg %lhs, <8 x i16> inre
 ; GFX8-NEXT:    s_sext_i32_i16 s16, s0
 ; GFX8-NEXT:    s_sext_i32_i16 s17, -1
 ; GFX8-NEXT:    s_max_i32 s18, s16, s17
-; GFX8-NEXT:    s_sub_i32 s18, s18, 0x7fff
+; GFX8-NEXT:    s_addk_i32 s18, 0x8001
 ; GFX8-NEXT:    s_lshr_b32 s12, s4, 16
 ; GFX8-NEXT:    s_min_i32 s16, s16, s17
 ; GFX8-NEXT:    s_sext_i32_i16 s18, s18
 ; GFX8-NEXT:    s_sext_i32_i16 s4, s4
-; GFX8-NEXT:    s_sub_i32 s16, s16, 0xffff8000
+; GFX8-NEXT:    s_addk_i32 s16, 0x8000
 ; GFX8-NEXT:    s_max_i32 s4, s18, s4
 ; GFX8-NEXT:    s_sext_i32_i16 s4, s4
 ; GFX8-NEXT:    s_sext_i32_i16 s16, s16
@@ -3954,11 +3960,11 @@ define amdgpu_ps <4 x i32> @s_ssubsat_v8i16(<8 x i16> inreg %lhs, <8 x i16> inre
 ; GFX8-NEXT:    s_sub_i32 s0, s0, s4
 ; GFX8-NEXT:    s_sext_i32_i16 s4, s8
 ; GFX8-NEXT:    s_max_i32 s16, s4, s17
-; GFX8-NEXT:    s_sub_i32 s16, s16, 0x7fff
+; GFX8-NEXT:    s_addk_i32 s16, 0x8001
 ; GFX8-NEXT:    s_min_i32 s4, s4, s17
 ; GFX8-NEXT:    s_sext_i32_i16 s16, s16
 ; GFX8-NEXT:    s_sext_i32_i16 s12, s12
-; GFX8-NEXT:    s_sub_i32 s4, s4, 0xffff8000
+; GFX8-NEXT:    s_addk_i32 s4, 0x8000
 ; GFX8-NEXT:    s_max_i32 s12, s16, s12
 ; GFX8-NEXT:    s_sext_i32_i16 s12, s12
 ; GFX8-NEXT:    s_sext_i32_i16 s4, s4
@@ -3966,12 +3972,12 @@ define amdgpu_ps <4 x i32> @s_ssubsat_v8i16(<8 x i16> inreg %lhs, <8 x i16> inre
 ; GFX8-NEXT:    s_sub_i32 s4, s8, s4
 ; GFX8-NEXT:    s_sext_i32_i16 s8, s1
 ; GFX8-NEXT:    s_max_i32 s12, s8, s17
-; GFX8-NEXT:    s_sub_i32 s12, s12, 0x7fff
+; GFX8-NEXT:    s_addk_i32 s12, 0x8001
 ; GFX8-NEXT:    s_lshr_b32 s13, s5, 16
 ; GFX8-NEXT:    s_min_i32 s8, s8, s17
 ; GFX8-NEXT:    s_sext_i32_i16 s12, s12
 ; GFX8-NEXT:    s_sext_i32_i16 s5, s5
-; GFX8-NEXT:    s_sub_i32 s8, s8, 0xffff8000
+; GFX8-NEXT:    s_addk_i32 s8, 0x8000
 ; GFX8-NEXT:    s_max_i32 s5, s12, s5
 ; GFX8-NEXT:    s_sext_i32_i16 s5, s5
 ; GFX8-NEXT:    s_sext_i32_i16 s8, s8
@@ -3980,11 +3986,11 @@ define amdgpu_ps <4 x i32> @s_ssubsat_v8i16(<8 x i16> inreg %lhs, <8 x i16> inre
 ; GFX8-NEXT:    s_sub_i32 s1, s1, s5
 ; GFX8-NEXT:    s_sext_i32_i16 s5, s9
 ; GFX8-NEXT:    s_max_i32 s8, s5, s17
-; GFX8-NEXT:    s_sub_i32 s8, s8, 0x7fff
+; GFX8-NEXT:    s_addk_i32 s8, 0x8001
 ; GFX8-NEXT:    s_min_i32 s5, s5, s17
 ; GFX8-NEXT:    s_sext_i32_i16 s8, s8
 ; GFX8-NEXT:    s_sext_i32_i16 s12, s13
-; GFX8-NEXT:    s_sub_i32 s5, s5, 0xffff8000
+; GFX8-NEXT:    s_addk_i32 s5, 0x8000
 ; GFX8-NEXT:    s_max_i32 s8, s8, s12
 ; GFX8-NEXT:    s_sext_i32_i16 s8, s8
 ; GFX8-NEXT:    s_sext_i32_i16 s5, s5
@@ -3992,12 +3998,12 @@ define amdgpu_ps <4 x i32> @s_ssubsat_v8i16(<8 x i16> inreg %lhs, <8 x i16> inre
 ; GFX8-NEXT:    s_sext_i32_i16 s8, s2
 ; GFX8-NEXT:    s_sub_i32 s5, s9, s5
 ; GFX8-NEXT:    s_max_i32 s9, s8, s17
-; GFX8-NEXT:    s_sub_i32 s9, s9, 0x7fff
+; GFX8-NEXT:    s_addk_i32 s9, 0x8001
 ; GFX8-NEXT:    s_lshr_b32 s14, s6, 16
 ; GFX8-NEXT:    s_min_i32 s8, s8, s17
 ; GFX8-NEXT:    s_sext_i32_i16 s9, s9
 ; GFX8-NEXT:    s_sext_i32_i16 s6, s6
-; GFX8-NEXT:    s_sub_i32 s8, s8, 0xffff8000
+; GFX8-NEXT:    s_addk_i32 s8, 0x8000
 ; GFX8-NEXT:    s_max_i32 s6, s9, s6
 ; GFX8-NEXT:    s_sext_i32_i16 s6, s6
 ; GFX8-NEXT:    s_sext_i32_i16 s8, s8
@@ -4006,23 +4012,23 @@ define amdgpu_ps <4 x i32> @s_ssubsat_v8i16(<8 x i16> inreg %lhs, <8 x i16> inre
 ; GFX8-NEXT:    s_sub_i32 s2, s2, s6
 ; GFX8-NEXT:    s_sext_i32_i16 s6, s10
 ; GFX8-NEXT:    s_max_i32 s8, s6, s17
-; GFX8-NEXT:    s_sub_i32 s8, s8, 0x7fff
+; GFX8-NEXT:    s_addk_i32 s8, 0x8001
 ; GFX8-NEXT:    s_min_i32 s6, s6, s17
 ; GFX8-NEXT:    s_sext_i32_i16 s8, s8
 ; GFX8-NEXT:    s_sext_i32_i16 s9, s14
-; GFX8-NEXT:    s_sub_i32 s6, s6, 0xffff8000
+; GFX8-NEXT:    s_addk_i32 s6, 0x8000
 ; GFX8-NEXT:    s_max_i32 s8, s8, s9
 ; GFX8-NEXT:    s_sext_i32_i16 s8, s8
 ; GFX8-NEXT:    s_sext_i32_i16 s6, s6
 ; GFX8-NEXT:    s_min_i32 s6, s8, s6
 ; GFX8-NEXT:    s_sext_i32_i16 s8, s3
 ; GFX8-NEXT:    s_max_i32 s9, s8, s17
-; GFX8-NEXT:    s_sub_i32 s9, s9, 0x7fff
+; GFX8-NEXT:    s_addk_i32 s9, 0x8001
 ; GFX8-NEXT:    s_lshr_b32 s15, s7, 16
 ; GFX8-NEXT:    s_min_i32 s8, s8, s17
 ; GFX8-NEXT:    s_sext_i32_i16 s9, s9
 ; GFX8-NEXT:    s_sext_i32_i16 s7, s7
-; GFX8-NEXT:    s_sub_i32 s8, s8, 0xffff8000
+; GFX8-NEXT:    s_addk_i32 s8, 0x8000
 ; GFX8-NEXT:    s_max_i32 s7, s9, s7
 ; GFX8-NEXT:    s_sext_i32_i16 s7, s7
 ; GFX8-NEXT:    s_sext_i32_i16 s8, s8
@@ -4031,14 +4037,14 @@ define amdgpu_ps <4 x i32> @s_ssubsat_v8i16(<8 x i16> inreg %lhs, <8 x i16> inre
 ; GFX8-NEXT:    s_sub_i32 s3, s3, s7
 ; GFX8-NEXT:    s_sext_i32_i16 s7, s11
 ; GFX8-NEXT:    s_max_i32 s8, s7, s17
-; GFX8-NEXT:    s_sub_i32 s8, s8, 0x7fff
+; GFX8-NEXT:    s_addk_i32 s8, 0x8001
 ; GFX8-NEXT:    s_and_b32 s4, 0xffff, s4
 ; GFX8-NEXT:    s_min_i32 s7, s7, s17
 ; GFX8-NEXT:    s_sext_i32_i16 s8, s8
 ; GFX8-NEXT:    s_sext_i32_i16 s9, s15
 ; GFX8-NEXT:    s_and_b32 s0, 0xffff, s0
 ; GFX8-NEXT:    s_lshl_b32 s4, s4, 16
-; GFX8-NEXT:    s_sub_i32 s7, s7, 0xffff8000
+; GFX8-NEXT:    s_addk_i32 s7, 0x8000
 ; GFX8-NEXT:    s_max_i32 s8, s8, s9
 ; GFX8-NEXT:    s_or_b32 s0, s0, s4
 ; GFX8-NEXT:    s_and_b32 s4, 0xffff, s5
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/sub.v2i16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/sub.v2i16.ll
index 855687281ce9ab..6c104709f5ee3a 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/sub.v2i16.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/sub.v2i16.ll
@@ -147,10 +147,10 @@ define <2 x i16> @v_sub_v2i16_neg_inline_imm_splat(<2 x i16> %a) {
 ; GFX8-LABEL: v_sub_v2i16_neg_inline_imm_splat:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    v_not_b32_e32 v1, 63
-; GFX8-NEXT:    v_subrev_u16_e32 v2, 0xffc0, v0
-; GFX8-NEXT:    v_sub_u16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
-; GFX8-NEXT:    v_or_b32_e32 v0, v2, v0
+; GFX8-NEXT:    v_mov_b32_e32 v2, 64
+; GFX8-NEXT:    v_add_u16_e32 v1, 64, v0
+; GFX8-NEXT:    v_add_u16_sdwa v0, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX8-NEXT:    v_or_b32_e32 v0, v1, v0
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: v_sub_v2i16_neg_inline_imm_splat:
@@ -179,9 +179,9 @@ define <2 x i16> @v_sub_v2i16_neg_inline_imm_lo(<2 x i16> %a) {
 ; GFX8-LABEL: v_sub_v2i16_neg_inline_imm_lo:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    v_mov_b32_e32 v2, 4
-; GFX8-NEXT:    v_subrev_u16_e32 v1, 0xffc0, v0
-; GFX8-NEXT:    v_sub_u16_sdwa v0, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX8-NEXT:    v_mov_b32_e32 v2, -4
+; GFX8-NEXT:    v_add_u16_e32 v1, 64, v0
+; GFX8-NEXT:    v_add_u16_sdwa v0, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
 ; GFX8-NEXT:    v_or_b32_e32 v0, v1, v0
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -211,10 +211,10 @@ define <2 x i16> @v_sub_v2i16_neg_inline_imm_hi(<2 x i16> %a) {
 ; GFX8-LABEL: v_sub_v2i16_neg_inline_imm_hi:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    v_not_b32_e32 v1, 63
-; GFX8-NEXT:    v_subrev_u16_e32 v2, 4, v0
-; GFX8-NEXT:    v_sub_u16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
-; GFX8-NEXT:    v_or_b32_e32 v0, v2, v0
+; GFX8-NEXT:    v_mov_b32_e32 v2, 64
+; GFX8-NEXT:    v_add_u16_e32 v1, -4, v0
+; GFX8-NEXT:    v_add_u16_sdwa v0, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX8-NEXT:    v_or_b32_e32 v0, v1, v0
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: v_sub_v2i16_neg_inline_imm_hi:
@@ -245,8 +245,8 @@ define amdgpu_ps i32 @s_sub_v2i16_neg_inline_imm_splat(<2 x i16> inreg %a) {
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_lshr_b32 s1, s0, 16
 ; GFX8-NEXT:    s_and_b32 s0, s0, 0xffff
-; GFX8-NEXT:    s_sub_i32 s0, s0, 0xffc0
-; GFX8-NEXT:    s_sub_i32 s1, s1, 0xffc0
+; GFX8-NEXT:    s_add_i32 s0, s0, 0xffff0040
+; GFX8-NEXT:    s_add_i32 s1, s1, 0xffff0040
 ; GFX8-NEXT:    s_lshl_b32 s1, s1, 16
 ; GFX8-NEXT:    s_and_b32 s0, s0, 0xffff
 ; GFX8-NEXT:    s_or_b32 s0, s1, s0
@@ -285,8 +285,8 @@ define amdgpu_ps i32 @s_sub_v2i16_neg_inline_imm_lo(<2 x i16> inreg %a) {
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_lshr_b32 s1, s0, 16
 ; GFX8-NEXT:    s_and_b32 s0, s0, 0xffff
-; GFX8-NEXT:    s_sub_i32 s0, s0, 0xffc0
-; GFX8-NEXT:    s_sub_i32 s1, s1, 4
+; GFX8-NEXT:    s_add_i32 s0, s0, 0xffff0040
+; GFX8-NEXT:    s_add_i32 s1, s1, -4
 ; GFX8-NEXT:    s_lshl_b32 s1, s1, 16
 ; GFX8-NEXT:    s_and_b32 s0, s0, 0xffff
 ; GFX8-NEXT:    s_or_b32 s0, s1, s0
@@ -325,8 +325,8 @@ define amdgpu_ps i32 @s_sub_v2i16_neg_inline_imm_hi(<2 x i16> inreg %a) {
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_lshr_b32 s1, s0, 16
 ; GFX8-NEXT:    s_and_b32 s0, s0, 0xffff
-; GFX8-NEXT:    s_sub_i32 s0, s0, 4
-; GFX8-NEXT:    s_sub_i32 s1, s1, 0xffc0
+; GFX8-NEXT:    s_add_i32 s0, s0, -4
+; GFX8-NEXT:    s_add_i32 s1, s1, 0xffff0040
 ; GFX8-NEXT:    s_lshl_b32 s1, s1, 16
 ; GFX8-NEXT:    s_and_b32 s0, s0, 0xffff
 ; GFX8-NEXT:    s_or_b32 s0, s1, s0
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i32.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i32.ll
index 31f61b9968b8bf..24ec4fa48f7789 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i32.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i32.ll
@@ -222,10 +222,10 @@ define i32 @v_urem_i32_oddk_denom(i32 %num) {
 ; CHECK-NEXT:    v_mul_hi_u32 v1, v0, v1
 ; CHECK-NEXT:    v_mul_lo_u32 v1, v1, v3
 ; CHECK-NEXT:    v_sub_i32_e32 v0, vcc, v0, v1
-; CHECK-NEXT:    v_subrev_i32_e32 v1, vcc, 0x12d8fb, v0
+; CHECK-NEXT:    v_add_i32_e32 v1, vcc, 0xffed2705, v0
 ; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v3
 ; CHECK-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
-; CHECK-NEXT:    v_subrev_i32_e32 v1, vcc, 0x12d8fb, v0
+; CHECK-NEXT:    v_add_i32_e32 v1, vcc, 0xffed2705, v0
 ; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v3
 ; CHECK-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
 ; CHECK-NEXT:    s_setpc_b64 s[30:31]
@@ -243,23 +243,23 @@ define <2 x i32> @v_urem_v2i32_oddk_denom(<2 x i32> %num) {
 ; GISEL-NEXT:    v_rcp_iflag_f32_e32 v3, v3
 ; GISEL-NEXT:    v_mul_f32_e32 v3, 0x4f7ffffe, v3
 ; GISEL-NEXT:    v_cvt_u32_f32_e32 v3, v3
-; GISEL-NEXT:    v_mul_lo_u32 v4, v3, v4
-; GISEL-NEXT:    v_mul_hi_u32 v4, v3, v4
-; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v3, v4
-; GISEL-NEXT:    v_mul_hi_u32 v4, v0, v3
+; GISEL-NEXT:    v_mul_lo_u32 v5, v3, v4
+; GISEL-NEXT:    v_mul_hi_u32 v5, v3, v5
+; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v3, v5
+; GISEL-NEXT:    v_mul_hi_u32 v5, v0, v3
 ; GISEL-NEXT:    v_mul_hi_u32 v3, v1, v3
-; GISEL-NEXT:    v_mul_lo_u32 v4, v4, v2
+; GISEL-NEXT:    v_mul_lo_u32 v5, v5, v2
 ; GISEL-NEXT:    v_mul_lo_u32 v3, v3, v2
-; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v4
+; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v5
 ; GISEL-NEXT:    v_sub_i32_e32 v1, vcc, v1, v3
-; GISEL-NEXT:    v_sub_i32_e32 v3, vcc, v0, v2
-; GISEL-NEXT:    v_subrev_i32_e32 v4, vcc, 0x12d8fb, v1
+; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v0, v4
+; GISEL-NEXT:    v_add_i32_e32 v5, vcc, 0xffed2705, v1
 ; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v2
 ; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
 ; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v2
-; GISEL-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc
-; GISEL-NEXT:    v_sub_i32_e32 v3, vcc, v0, v2
-; GISEL-NEXT:    v_subrev_i32_e32 v4, vcc, 0x12d8fb, v1
+; GISEL-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
+; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v0, v4
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, 0xffed2705, v1
 ; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v2
 ; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
 ; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v2
@@ -274,23 +274,23 @@ define <2 x i32> @v_urem_v2i32_oddk_denom(<2 x i32> %num) {
 ; CGP-NEXT:    v_mov_b32_e32 v4, 0x12d8fb
 ; CGP-NEXT:    v_mul_f32_e32 v2, 0x4f7ffffe, v2
 ; CGP-NEXT:    v_cvt_u32_f32_e32 v2, v2
-; CGP-NEXT:    v_mul_lo_u32 v3, v2, v3
-; CGP-NEXT:    v_mul_hi_u32 v3, v2, v3
-; CGP-NEXT:    v_add_i32_e32 v2, vcc, v2, v3
-; CGP-NEXT:    v_mul_hi_u32 v3, v0, v2
+; CGP-NEXT:    v_mul_lo_u32 v5, v2, v3
+; CGP-NEXT:    v_mul_hi_u32 v5, v2, v5
+; CGP-NEXT:    v_add_i32_e32 v2, vcc, v2, v5
+; CGP-NEXT:    v_mul_hi_u32 v5, v0, v2
 ; CGP-NEXT:    v_mul_hi_u32 v2, v1, v2
-; CGP-NEXT:    v_mul_lo_u32 v3, v3, v4
+; CGP-NEXT:    v_mul_lo_u32 v5, v5, v4
 ; CGP-NEXT:    v_mul_lo_u32 v2, v2, v4
-; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v0, v3
+; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v0, v5
 ; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v1, v2
-; CGP-NEXT:    v_subrev_i32_e32 v2, vcc, 0x12d8fb, v0
-; CGP-NEXT:    v_subrev_i32_e32 v3, vcc, 0x12d8fb, v1
+; CGP-NEXT:    v_add_i32_e32 v2, vcc, v0, v3
+; CGP-NEXT:    v_add_i32_e32 v5, vcc, 0xffed2705, v1
 ; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v4
 ; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
 ; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v4
-; CGP-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
-; CGP-NEXT:    v_sub_i32_e32 v2, vcc, v0, v4
-; CGP-NEXT:    v_subrev_i32_e32 v3, vcc, 0x12d8fb, v1
+; CGP-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
+; CGP-NEXT:    v_add_i32_e32 v2, vcc, v0, v3
+; CGP-NEXT:    v_add_i32_e32 v3, vcc, 0xffed2705, v1
 ; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v4
 ; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
 ; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v4
diff --git a/llvm/test/CodeGen/AMDGPU/ctlz.ll b/llvm/test/CodeGen/AMDGPU/ctlz.ll
index a7522ef761b8ab..c63e9d471b6bf7 100644
--- a/llvm/test/CodeGen/AMDGPU/ctlz.ll
+++ b/llvm/test/CodeGen/AMDGPU/ctlz.ll
@@ -567,7 +567,7 @@ define amdgpu_kernel void @v_ctlz_i8(ptr addrspace(1) noalias %out, ptr addrspac
 ; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-GISEL-NEXT:    v_ffbh_u32_e32 v1, v1
 ; GFX10-GISEL-NEXT:    v_min_u32_e32 v1, 32, v1
-; GFX10-GISEL-NEXT:    v_subrev_nc_u32_e32 v1, 24, v1
+; GFX10-GISEL-NEXT:    v_add_nc_u32_e32 v1, 0xffffffe8, v1
 ; GFX10-GISEL-NEXT:    global_store_byte v0, v1, s[4:5]
 ; GFX10-GISEL-NEXT:    s_endpgm
 ;
@@ -1566,7 +1566,7 @@ define amdgpu_kernel void @v_ctlz_i32_sel_ne_bitwidth(ptr addrspace(1) noalias %
 ; GFX10-GISEL-NEXT:    v_ffbh_u32_e32 v1, v0
 ; GFX10-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
 ; GFX10-GISEL-NEXT:    v_min_u32_e32 v1, 32, v1
-; GFX10-GISEL-NEXT:    v_sub_nc_u16 v1, v1, 24
+; GFX10-GISEL-NEXT:    v_add_nc_u16 v1, v1, 0xffe8
 ; GFX10-GISEL-NEXT:    v_cndmask_b32_e64 v0, v1, 0xffff, vcc_lo
 ; GFX10-GISEL-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX10-GISEL-NEXT:    global_store_byte v1, v0, s[4:5]
@@ -1686,7 +1686,7 @@ define amdgpu_kernel void @v_ctlz_i32_sel_ne_bitwidth(ptr addrspace(1) noalias %
 ; GFX10-GISEL-NEXT:    v_ffbh_u32_e32 v2, v1
 ; GFX10-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
 ; GFX10-GISEL-NEXT:    v_min_u32_e32 v2, 32, v2
-; GFX10-GISEL-NEXT:    v_subrev_nc_u32_e32 v2, 16, v2
+; GFX10-GISEL-NEXT:    v_add_nc_u32_e32 v2, -16, v2
 ; GFX10-GISEL-NEXT:    v_and_b32_e32 v2, 0xffff, v2
 ; GFX10-GISEL-NEXT:    v_cndmask_b32_e64 v1, v2, 0xffff, vcc_lo
 ; GFX10-GISEL-NEXT:    global_store_short v0, v1, s[4:5]
@@ -1807,7 +1807,7 @@ define amdgpu_kernel void @v_ctlz_i7_sel_eq_neg1(ptr addrspace(1) noalias %out,
 ; GFX10-GISEL-NEXT:    v_ffbh_u32_e32 v1, v0
 ; GFX10-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
 ; GFX10-GISEL-NEXT:    v_min_u32_e32 v1, 32, v1
-; GFX10-GISEL-NEXT:    v_sub_nc_u16 v1, v1, 25
+; GFX10-GISEL-NEXT:    v_add_nc_u16 v1, v1, 0xffe7
 ; GFX10-GISEL-NEXT:    v_cndmask_b32_e64 v0, v1, 0x7f, vcc_lo
 ; GFX10-GISEL-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX10-GISEL-NEXT:    v_and_b32_e32 v0, 0x7f, v0
diff --git a/llvm/test/CodeGen/AMDGPU/div_i128.ll b/llvm/test/CodeGen/AMDGPU/div_i128.ll
index d94ec56842ab87..147ddc4d4b75b2 100644
--- a/llvm/test/CodeGen/AMDGPU/div_i128.ll
+++ b/llvm/test/CodeGen/AMDGPU/div_i128.ll
@@ -1313,7 +1313,7 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
 ; GFX9-G-NEXT:    v_sub_u32_e32 v0, 64, v8
 ; GFX9-G-NEXT:    v_lshrrev_b64 v[0:1], v0, v[10:11]
 ; GFX9-G-NEXT:    v_lshlrev_b64 v[2:3], v8, v[12:13]
-; GFX9-G-NEXT:    v_subrev_u32_e32 v9, 64, v8
+; GFX9-G-NEXT:    v_add_u32_e32 v9, 0xffffffc0, v8
 ; GFX9-G-NEXT:    v_lshlrev_b64 v[6:7], v8, v[10:11]
 ; GFX9-G-NEXT:    v_or_b32_e32 v2, v0, v2
 ; GFX9-G-NEXT:    v_or_b32_e32 v3, v1, v3
@@ -1338,7 +1338,7 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
 ; GFX9-G-NEXT:    v_sub_u32_e32 v2, 64, v20
 ; GFX9-G-NEXT:    v_lshrrev_b64 v[0:1], v20, v[10:11]
 ; GFX9-G-NEXT:    v_lshlrev_b64 v[2:3], v2, v[12:13]
-; GFX9-G-NEXT:    v_subrev_u32_e32 v24, 64, v20
+; GFX9-G-NEXT:    v_add_u32_e32 v24, 0xffffffc0, v20
 ; GFX9-G-NEXT:    v_lshrrev_b64 v[14:15], v20, v[12:13]
 ; GFX9-G-NEXT:    v_or_b32_e32 v2, v0, v2
 ; GFX9-G-NEXT:    v_or_b32_e32 v3, v1, v3
@@ -2070,8 +2070,9 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
 ; GFX9-G-O0-NEXT:    ; kill: def $vgpr20 killed $vgpr20 def $vgpr20_vgpr21 killed $exec
 ; GFX9-G-O0-NEXT:    s_waitcnt vmcnt(1)
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v21, v4
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v4, s4
-; GFX9-G-O0-NEXT:    v_sub_u32_e64 v4, v18, v4
+; GFX9-G-O0-NEXT:    s_mov_b32 s5, 0xffffffc0
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v4, s5
+; GFX9-G-O0-NEXT:    v_add_u32_e64 v4, v18, v4
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v5, s4
 ; GFX9-G-O0-NEXT:    v_sub_u32_e64 v5, v5, v18
 ; GFX9-G-O0-NEXT:    s_mov_b32 s6, 0
@@ -2203,8 +2204,9 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v13, v0
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v10, v9
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v11, v3
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v0, s7
-; GFX9-G-O0-NEXT:    v_sub_u32_e64 v2, v8, v0
+; GFX9-G-O0-NEXT:    s_mov_b32 s6, 0xffffffc0
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v0, s6
+; GFX9-G-O0-NEXT:    v_add_u32_e64 v2, v8, v0
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v0, s7
 ; GFX9-G-O0-NEXT:    v_sub_u32_e64 v14, v0, v8
 ; GFX9-G-O0-NEXT:    s_mov_b32 s6, 0
@@ -3453,7 +3455,7 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) {
 ; GFX9-G-NEXT:    v_sub_u32_e32 v8, 64, v16
 ; GFX9-G-NEXT:    v_lshrrev_b64 v[8:9], v8, v[0:1]
 ; GFX9-G-NEXT:    v_lshlrev_b64 v[10:11], v16, v[2:3]
-; GFX9-G-NEXT:    v_subrev_u32_e32 v14, 64, v16
+; GFX9-G-NEXT:    v_add_u32_e32 v14, 0xffffffc0, v16
 ; GFX9-G-NEXT:    v_lshlrev_b64 v[12:13], v16, v[0:1]
 ; GFX9-G-NEXT:    v_or_b32_e32 v10, v8, v10
 ; GFX9-G-NEXT:    v_or_b32_e32 v11, v9, v11
@@ -3476,7 +3478,7 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) {
 ; GFX9-G-NEXT:    s_cbranch_execz .LBB1_5
 ; GFX9-G-NEXT:  ; %bb.2: ; %udiv-preheader
 ; GFX9-G-NEXT:    v_sub_u32_e32 v12, 64, v18
-; GFX9-G-NEXT:    v_subrev_u32_e32 v22, 64, v18
+; GFX9-G-NEXT:    v_add_u32_e32 v22, 0xffffffc0, v18
 ; GFX9-G-NEXT:    v_lshrrev_b64 v[10:11], v18, v[0:1]
 ; GFX9-G-NEXT:    v_lshlrev_b64 v[12:13], v12, v[2:3]
 ; GFX9-G-NEXT:    v_lshrrev_b64 v[16:17], v18, v[2:3]
@@ -4175,8 +4177,9 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) {
 ; GFX9-G-O0-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v21, v7
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v20, v6
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v4, s4
-; GFX9-G-O0-NEXT:    v_sub_u32_e64 v4, v12, v4
+; GFX9-G-O0-NEXT:    s_mov_b32 s5, 0xffffffc0
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v4, s5
+; GFX9-G-O0-NEXT:    v_add_u32_e64 v4, v12, v4
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v5, s4
 ; GFX9-G-O0-NEXT:    v_sub_u32_e64 v5, v5, v12
 ; GFX9-G-O0-NEXT:    s_mov_b32 s6, 0
@@ -4311,8 +4314,9 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) {
 ; GFX9-G-O0-NEXT:    s_mov_b32 s7, 64
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v13, v9
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v12, v8
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v0, s7
-; GFX9-G-O0-NEXT:    v_sub_u32_e64 v2, v3, v0
+; GFX9-G-O0-NEXT:    s_mov_b32 s6, 0xffffffc0
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v0, s6
+; GFX9-G-O0-NEXT:    v_add_u32_e64 v2, v3, v0
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v0, s7
 ; GFX9-G-O0-NEXT:    v_sub_u32_e64 v8, v0, v3
 ; GFX9-G-O0-NEXT:    s_mov_b32 s6, 0
diff --git a/llvm/test/CodeGen/AMDGPU/div_v2i128.ll b/llvm/test/CodeGen/AMDGPU/div_v2i128.ll
index e04cd711256081..691f3d36bc7360 100644
--- a/llvm/test/CodeGen/AMDGPU/div_v2i128.ll
+++ b/llvm/test/CodeGen/AMDGPU/div_v2i128.ll
@@ -476,18 +476,18 @@ define <2 x i128> @v_sdiv_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
 ; GISEL-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[18:19]
 ; GISEL-NEXT:    v_cndmask_b32_e32 v1, v3, v2, vcc
-; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v1
-; GISEL-NEXT:    v_subb_u32_e64 v1, s[4:5], 0, 0, vcc
-; GISEL-NEXT:    v_subb_u32_e64 v2, s[4:5], 0, 0, s[4:5]
-; GISEL-NEXT:    v_subb_u32_e64 v3, s[4:5], 0, 0, s[4:5]
-; GISEL-NEXT:    v_cmp_gt_u64_e32 vcc, v[0:1], v[20:21]
+; GISEL-NEXT:    v_sub_i32_e32 v2, vcc, v0, v1
+; GISEL-NEXT:    v_subb_u32_e64 v3, s[4:5], 0, 0, vcc
+; GISEL-NEXT:    v_subb_u32_e64 v0, s[4:5], 0, 0, s[4:5]
+; GISEL-NEXT:    v_subb_u32_e64 v1, s[4:5], 0, 0, s[4:5]
+; GISEL-NEXT:    v_cmp_gt_u64_e32 vcc, v[2:3], v[20:21]
 ; GISEL-NEXT:    v_cndmask_b32_e64 v20, 0, 1, vcc
-; GISEL-NEXT:    v_xor_b32_e32 v8, 0x7f, v0
-; GISEL-NEXT:    v_cmp_lt_u64_e32 vcc, 0, v[2:3]
+; GISEL-NEXT:    v_xor_b32_e32 v8, 0x7f, v2
+; GISEL-NEXT:    v_cmp_lt_u64_e32 vcc, 0, v[0:1]
 ; GISEL-NEXT:    v_cndmask_b32_e64 v21, 0, 1, vcc
-; GISEL-NEXT:    v_or_b32_e32 v8, v8, v2
-; GISEL-NEXT:    v_or_b32_e32 v9, v1, v3
-; GISEL-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[2:3]
+; GISEL-NEXT:    v_or_b32_e32 v8, v8, v0
+; GISEL-NEXT:    v_or_b32_e32 v9, v3, v1
+; GISEL-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[0:1]
 ; GISEL-NEXT:    v_cndmask_b32_e32 v20, v21, v20, vcc
 ; GISEL-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[8:9]
 ; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
@@ -505,12 +505,13 @@ define <2 x i128> @v_sdiv_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; GISEL-NEXT:    s_and_saveexec_b64 s[12:13], s[4:5]
 ; GISEL-NEXT:    s_cbranch_execz .LBB0_6
 ; GISEL-NEXT:  ; %bb.1: ; %udiv-bb15
-; GISEL-NEXT:    v_add_i32_e32 v28, vcc, 1, v0
-; GISEL-NEXT:    v_addc_u32_e64 v29, s[4:5], 0, v1, vcc
-; GISEL-NEXT:    v_sub_i32_e32 v32, vcc, 0x7f, v0
-; GISEL-NEXT:    v_addc_u32_e64 v30, vcc, 0, v2, s[4:5]
-; GISEL-NEXT:    v_addc_u32_e32 v31, vcc, 0, v3, vcc
-; GISEL-NEXT:    v_subrev_i32_e64 v20, s[4:5], 64, v32
+; GISEL-NEXT:    v_add_i32_e32 v28, vcc, 1, v2
+; GISEL-NEXT:    v_addc_u32_e64 v29, s[4:5], 0, v3, vcc
+; GISEL-NEXT:    v_sub_i32_e32 v32, vcc, 0x7f, v2
+; GISEL-NEXT:    v_not_b32_e32 v2, 63
+; GISEL-NEXT:    v_addc_u32_e64 v30, vcc, 0, v0, s[4:5]
+; GISEL-NEXT:    v_addc_u32_e32 v31, vcc, 0, v1, vcc
+; GISEL-NEXT:    v_add_i32_e64 v20, s[4:5], v32, v2
 ; GISEL-NEXT:    v_sub_i32_e64 v8, s[4:5], 64, v32
 ; GISEL-NEXT:    v_lshl_b64 v[0:1], v[16:17], v32
 ; GISEL-NEXT:    v_lshl_b64 v[2:3], v[18:19], v32
@@ -536,7 +537,7 @@ define <2 x i128> @v_sdiv_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; GISEL-NEXT:    s_xor_b64 s[14:15], exec, s[6:7]
 ; GISEL-NEXT:    s_cbranch_execz .LBB0_5
 ; GISEL-NEXT:  ; %bb.2: ; %udiv-preheader4
-; GISEL-NEXT:    v_subrev_i32_e32 v34, vcc, 64, v28
+; GISEL-NEXT:    v_add_i32_e32 v34, vcc, 0xffffffc0, v28
 ; GISEL-NEXT:    v_sub_i32_e32 v22, vcc, 64, v28
 ; GISEL-NEXT:    v_lshr_b64 v[0:1], v[18:19], v28
 ; GISEL-NEXT:    v_lshr_b64 v[2:3], v[16:17], v28
@@ -665,18 +666,18 @@ define <2 x i128> @v_sdiv_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
 ; GISEL-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[12:13]
 ; GISEL-NEXT:    v_cndmask_b32_e32 v1, v3, v2, vcc
-; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v1
-; GISEL-NEXT:    v_subb_u32_e64 v1, s[4:5], 0, 0, vcc
-; GISEL-NEXT:    v_subb_u32_e64 v2, s[4:5], 0, 0, s[4:5]
-; GISEL-NEXT:    v_subb_u32_e64 v3, s[4:5], 0, 0, s[4:5]
-; GISEL-NEXT:    v_cmp_gt_u64_e32 vcc, v[0:1], v[10:11]
+; GISEL-NEXT:    v_sub_i32_e32 v2, vcc, v0, v1
+; GISEL-NEXT:    v_subb_u32_e64 v3, s[4:5], 0, 0, vcc
+; GISEL-NEXT:    v_subb_u32_e64 v0, s[4:5], 0, 0, s[4:5]
+; GISEL-NEXT:    v_subb_u32_e64 v1, s[4:5], 0, 0, s[4:5]
+; GISEL-NEXT:    v_cmp_gt_u64_e32 vcc, v[2:3], v[10:11]
 ; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
-; GISEL-NEXT:    v_xor_b32_e32 v10, 0x7f, v0
-; GISEL-NEXT:    v_cmp_lt_u64_e32 vcc, 0, v[2:3]
+; GISEL-NEXT:    v_xor_b32_e32 v10, 0x7f, v2
+; GISEL-NEXT:    v_cmp_lt_u64_e32 vcc, 0, v[0:1]
 ; GISEL-NEXT:    v_cndmask_b32_e64 v16, 0, 1, vcc
-; GISEL-NEXT:    v_or_b32_e32 v10, v10, v2
-; GISEL-NEXT:    v_or_b32_e32 v11, v1, v3
-; GISEL-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[2:3]
+; GISEL-NEXT:    v_or_b32_e32 v10, v10, v0
+; GISEL-NEXT:    v_or_b32_e32 v11, v3, v1
+; GISEL-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[0:1]
 ; GISEL-NEXT:    v_cndmask_b32_e32 v15, v16, v15, vcc
 ; GISEL-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[10:11]
 ; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
@@ -694,12 +695,13 @@ define <2 x i128> @v_sdiv_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; GISEL-NEXT:    s_and_saveexec_b64 s[12:13], s[4:5]
 ; GISEL-NEXT:    s_cbranch_execz .LBB0_12
 ; GISEL-NEXT:  ; %bb.7: ; %udiv-bb1
-; GISEL-NEXT:    v_add_i32_e32 v26, vcc, 1, v0
-; GISEL-NEXT:    v_addc_u32_e64 v27, s[4:5], 0, v1, vcc
-; GISEL-NEXT:    v_sub_i32_e32 v30, vcc, 0x7f, v0
-; GISEL-NEXT:    v_addc_u32_e64 v28, vcc, 0, v2, s[4:5]
-; GISEL-NEXT:    v_addc_u32_e32 v29, vcc, 0, v3, vcc
-; GISEL-NEXT:    v_subrev_i32_e64 v14, s[4:5], 64, v30
+; GISEL-NEXT:    v_add_i32_e32 v26, vcc, 1, v2
+; GISEL-NEXT:    v_addc_u32_e64 v27, s[4:5], 0, v3, vcc
+; GISEL-NEXT:    v_sub_i32_e32 v30, vcc, 0x7f, v2
+; GISEL-NEXT:    v_not_b32_e32 v2, 63
+; GISEL-NEXT:    v_addc_u32_e64 v28, vcc, 0, v0, s[4:5]
+; GISEL-NEXT:    v_addc_u32_e32 v29, vcc, 0, v1, vcc
+; GISEL-NEXT:    v_add_i32_e64 v14, s[4:5], v30, v2
 ; GISEL-NEXT:    v_sub_i32_e64 v10, s[4:5], 64, v30
 ; GISEL-NEXT:    v_lshl_b64 v[0:1], v[6:7], v30
 ; GISEL-NEXT:    v_lshl_b64 v[2:3], v[12:13], v30
@@ -725,7 +727,7 @@ define <2 x i128> @v_sdiv_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; GISEL-NEXT:    s_xor_b64 s[8:9], exec, s[6:7]
 ; GISEL-NEXT:    s_cbranch_execz .LBB0_11
 ; GISEL-NEXT:  ; %bb.8: ; %udiv-preheader
-; GISEL-NEXT:    v_subrev_i32_e32 v32, vcc, 64, v26
+; GISEL-NEXT:    v_add_i32_e32 v32, vcc, 0xffffffc0, v26
 ; GISEL-NEXT:    v_sub_i32_e32 v16, vcc, 64, v26
 ; GISEL-NEXT:    v_lshr_b64 v[0:1], v[12:13], v26
 ; GISEL-NEXT:    v_lshr_b64 v[2:3], v[6:7], v26
@@ -1229,18 +1231,18 @@ define <2 x i128> @v_udiv_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; GISEL-NEXT:    v_cndmask_b32_e32 v2, v3, v2, vcc
 ; GISEL-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[16:17]
 ; GISEL-NEXT:    v_cndmask_b32_e32 v3, v19, v18, vcc
-; GISEL-NEXT:    v_sub_i32_e32 v20, vcc, v2, v3
-; GISEL-NEXT:    v_subb_u32_e64 v21, s[4:5], 0, 0, vcc
-; GISEL-NEXT:    v_subb_u32_e64 v22, s[4:5], 0, 0, s[4:5]
-; GISEL-NEXT:    v_subb_u32_e64 v23, s[4:5], 0, 0, s[4:5]
-; GISEL-NEXT:    v_cmp_gt_u64_e32 vcc, v[20:21], v[24:25]
+; GISEL-NEXT:    v_sub_i32_e32 v22, vcc, v2, v3
+; GISEL-NEXT:    v_subb_u32_e64 v23, s[4:5], 0, 0, vcc
+; GISEL-NEXT:    v_subb_u32_e64 v20, s[4:5], 0, 0, s[4:5]
+; GISEL-NEXT:    v_subb_u32_e64 v21, s[4:5], 0, 0, s[4:5]
+; GISEL-NEXT:    v_cmp_gt_u64_e32 vcc, v[22:23], v[24:25]
 ; GISEL-NEXT:    v_cndmask_b32_e64 v18, 0, 1, vcc
-; GISEL-NEXT:    v_xor_b32_e32 v2, 0x7f, v20
-; GISEL-NEXT:    v_cmp_lt_u64_e32 vcc, 0, v[22:23]
+; GISEL-NEXT:    v_xor_b32_e32 v2, 0x7f, v22
+; GISEL-NEXT:    v_cmp_lt_u64_e32 vcc, 0, v[20:21]
 ; GISEL-NEXT:    v_cndmask_b32_e64 v19, 0, 1, vcc
-; GISEL-NEXT:    v_or_b32_e32 v2, v2, v22
-; GISEL-NEXT:    v_or_b32_e32 v3, v21, v23
-; GISEL-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[22:23]
+; GISEL-NEXT:    v_or_b32_e32 v2, v2, v20
+; GISEL-NEXT:    v_or_b32_e32 v3, v23, v21
+; GISEL-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[20:21]
 ; GISEL-NEXT:    v_cndmask_b32_e32 v18, v19, v18, vcc
 ; GISEL-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[2:3]
 ; GISEL-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
@@ -1258,12 +1260,13 @@ define <2 x i128> @v_udiv_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; GISEL-NEXT:    s_and_saveexec_b64 s[12:13], s[4:5]
 ; GISEL-NEXT:    s_cbranch_execz .LBB1_6
 ; GISEL-NEXT:  ; %bb.1: ; %udiv-bb15
-; GISEL-NEXT:    v_add_i32_e32 v26, vcc, 1, v20
-; GISEL-NEXT:    v_addc_u32_e64 v27, s[4:5], 0, v21, vcc
-; GISEL-NEXT:    v_sub_i32_e32 v30, vcc, 0x7f, v20
-; GISEL-NEXT:    v_addc_u32_e64 v28, vcc, 0, v22, s[4:5]
-; GISEL-NEXT:    v_addc_u32_e32 v29, vcc, 0, v23, vcc
-; GISEL-NEXT:    v_subrev_i32_e64 v22, s[4:5], 64, v30
+; GISEL-NEXT:    v_add_i32_e32 v26, vcc, 1, v22
+; GISEL-NEXT:    v_addc_u32_e64 v27, s[4:5], 0, v23, vcc
+; GISEL-NEXT:    v_sub_i32_e32 v30, vcc, 0x7f, v22
+; GISEL-NEXT:    v_not_b32_e32 v2, 63
+; GISEL-NEXT:    v_addc_u32_e64 v28, vcc, 0, v20, s[4:5]
+; GISEL-NEXT:    v_addc_u32_e32 v29, vcc, 0, v21, vcc
+; GISEL-NEXT:    v_add_i32_e64 v22, s[4:5], v30, v2
 ; GISEL-NEXT:    v_sub_i32_e64 v20, s[4:5], 64, v30
 ; GISEL-NEXT:    v_lshl_b64 v[2:3], v[0:1], v30
 ; GISEL-NEXT:    v_lshl_b64 v[18:19], v[16:17], v30
@@ -1289,7 +1292,7 @@ define <2 x i128> @v_udiv_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; GISEL-NEXT:    s_xor_b64 s[8:9], exec, s[6:7]
 ; GISEL-NEXT:    s_cbranch_execz .LBB1_5
 ; GISEL-NEXT:  ; %bb.2: ; %udiv-preheader4
-; GISEL-NEXT:    v_subrev_i32_e32 v32, vcc, 64, v26
+; GISEL-NEXT:    v_add_i32_e32 v32, vcc, 0xffffffc0, v26
 ; GISEL-NEXT:    v_sub_i32_e32 v24, vcc, 64, v26
 ; GISEL-NEXT:    v_lshr_b64 v[18:19], v[16:17], v26
 ; GISEL-NEXT:    v_lshr_b64 v[20:21], v[0:1], v26
@@ -1401,18 +1404,18 @@ define <2 x i128> @v_udiv_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
 ; GISEL-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[6:7]
 ; GISEL-NEXT:    v_cndmask_b32_e32 v1, v9, v8, vcc
-; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v1
-; GISEL-NEXT:    v_subb_u32_e64 v1, s[4:5], 0, 0, vcc
-; GISEL-NEXT:    v_subb_u32_e64 v16, s[4:5], 0, 0, s[4:5]
-; GISEL-NEXT:    v_subb_u32_e64 v17, s[4:5], 0, 0, s[4:5]
-; GISEL-NEXT:    v_cmp_gt_u64_e32 vcc, v[0:1], v[10:11]
+; GISEL-NEXT:    v_sub_i32_e32 v16, vcc, v0, v1
+; GISEL-NEXT:    v_subb_u32_e64 v17, s[4:5], 0, 0, vcc
+; GISEL-NEXT:    v_subb_u32_e64 v0, s[4:5], 0, 0, s[4:5]
+; GISEL-NEXT:    v_subb_u32_e64 v1, s[4:5], 0, 0, s[4:5]
+; GISEL-NEXT:    v_cmp_gt_u64_e32 vcc, v[16:17], v[10:11]
 ; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
-; GISEL-NEXT:    v_xor_b32_e32 v8, 0x7f, v0
-; GISEL-NEXT:    v_cmp_lt_u64_e32 vcc, 0, v[16:17]
+; GISEL-NEXT:    v_xor_b32_e32 v8, 0x7f, v16
+; GISEL-NEXT:    v_cmp_lt_u64_e32 vcc, 0, v[0:1]
 ; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
-; GISEL-NEXT:    v_or_b32_e32 v8, v8, v16
-; GISEL-NEXT:    v_or_b32_e32 v9, v1, v17
-; GISEL-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[16:17]
+; GISEL-NEXT:    v_or_b32_e32 v8, v8, v0
+; GISEL-NEXT:    v_or_b32_e32 v9, v17, v1
+; GISEL-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[0:1]
 ; GISEL-NEXT:    v_cndmask_b32_e32 v10, v11, v10, vcc
 ; GISEL-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[8:9]
 ; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
@@ -1430,12 +1433,13 @@ define <2 x i128> @v_udiv_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; GISEL-NEXT:    s_and_saveexec_b64 s[12:13], s[4:5]
 ; GISEL-NEXT:    s_cbranch_execz .LBB1_12
 ; GISEL-NEXT:  ; %bb.7: ; %udiv-bb1
-; GISEL-NEXT:    v_add_i32_e32 v8, vcc, 1, v0
-; GISEL-NEXT:    v_addc_u32_e64 v11, s[4:5], 0, v1, vcc
-; GISEL-NEXT:    v_sub_i32_e32 v26, vcc, 0x7f, v0
-; GISEL-NEXT:    v_addc_u32_e64 v24, vcc, 0, v16, s[4:5]
-; GISEL-NEXT:    v_addc_u32_e32 v25, vcc, 0, v17, vcc
-; GISEL-NEXT:    v_subrev_i32_e64 v9, s[4:5], 64, v26
+; GISEL-NEXT:    v_add_i32_e32 v8, vcc, 1, v16
+; GISEL-NEXT:    v_addc_u32_e64 v11, s[4:5], 0, v17, vcc
+; GISEL-NEXT:    v_sub_i32_e32 v26, vcc, 0x7f, v16
+; GISEL-NEXT:    v_not_b32_e32 v9, 63
+; GISEL-NEXT:    v_addc_u32_e64 v24, vcc, 0, v0, s[4:5]
+; GISEL-NEXT:    v_addc_u32_e32 v25, vcc, 0, v1, vcc
+; GISEL-NEXT:    v_add_i32_e64 v9, s[4:5], v26, v9
 ; GISEL-NEXT:    v_sub_i32_e64 v10, s[4:5], 64, v26
 ; GISEL-NEXT:    v_lshl_b64 v[0:1], v[4:5], v26
 ; GISEL-NEXT:    v_lshl_b64 v[16:17], v[6:7], v26
@@ -1461,7 +1465,7 @@ define <2 x i128> @v_udiv_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; GISEL-NEXT:    s_xor_b64 s[8:9], exec, s[6:7]
 ; GISEL-NEXT:    s_cbranch_execz .LBB1_11
 ; GISEL-NEXT:  ; %bb.8: ; %udiv-preheader
-; GISEL-NEXT:    v_subrev_i32_e32 v28, vcc, 64, v8
+; GISEL-NEXT:    v_add_i32_e32 v28, vcc, 0xffffffc0, v8
 ; GISEL-NEXT:    v_sub_i32_e32 v22, vcc, 64, v8
 ; GISEL-NEXT:    v_lshr_b64 v[16:17], v[6:7], v8
 ; GISEL-NEXT:    v_lshr_b64 v[20:21], v[4:5], v8
@@ -2072,18 +2076,18 @@ define <2 x i128> @v_srem_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
 ; GISEL-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[8:9]
 ; GISEL-NEXT:    v_cndmask_b32_e32 v1, v3, v2, vcc
-; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v1
-; GISEL-NEXT:    v_subb_u32_e64 v1, s[4:5], 0, 0, vcc
-; GISEL-NEXT:    v_subb_u32_e64 v2, s[4:5], 0, 0, s[4:5]
-; GISEL-NEXT:    v_subb_u32_e64 v3, s[4:5], 0, 0, s[4:5]
-; GISEL-NEXT:    v_cmp_gt_u64_e32 vcc, v[0:1], v[18:19]
+; GISEL-NEXT:    v_sub_i32_e32 v2, vcc, v0, v1
+; GISEL-NEXT:    v_subb_u32_e64 v3, s[4:5], 0, 0, vcc
+; GISEL-NEXT:    v_subb_u32_e64 v0, s[4:5], 0, 0, s[4:5]
+; GISEL-NEXT:    v_subb_u32_e64 v1, s[4:5], 0, 0, s[4:5]
+; GISEL-NEXT:    v_cmp_gt_u64_e32 vcc, v[2:3], v[18:19]
 ; GISEL-NEXT:    v_cndmask_b32_e64 v21, 0, 1, vcc
-; GISEL-NEXT:    v_xor_b32_e32 v18, 0x7f, v0
-; GISEL-NEXT:    v_cmp_lt_u64_e32 vcc, 0, v[2:3]
+; GISEL-NEXT:    v_xor_b32_e32 v18, 0x7f, v2
+; GISEL-NEXT:    v_cmp_lt_u64_e32 vcc, 0, v[0:1]
 ; GISEL-NEXT:    v_cndmask_b32_e64 v22, 0, 1, vcc
-; GISEL-NEXT:    v_or_b32_e32 v18, v18, v2
-; GISEL-NEXT:    v_or_b32_e32 v19, v1, v3
-; GISEL-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[2:3]
+; GISEL-NEXT:    v_or_b32_e32 v18, v18, v0
+; GISEL-NEXT:    v_or_b32_e32 v19, v3, v1
+; GISEL-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[0:1]
 ; GISEL-NEXT:    v_cndmask_b32_e32 v21, v22, v21, vcc
 ; GISEL-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[18:19]
 ; GISEL-NEXT:    v_cndmask_b32_e64 v18, 0, 1, vcc
@@ -2101,12 +2105,13 @@ define <2 x i128> @v_srem_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; GISEL-NEXT:    s_and_saveexec_b64 s[12:13], s[4:5]
 ; GISEL-NEXT:    s_cbranch_execz .LBB2_6
 ; GISEL-NEXT:  ; %bb.1: ; %udiv-bb15
-; GISEL-NEXT:    v_add_i32_e32 v31, vcc, 1, v0
-; GISEL-NEXT:    v_addc_u32_e64 v32, s[4:5], 0, v1, vcc
-; GISEL-NEXT:    v_sub_i32_e32 v24, vcc, 0x7f, v0
-; GISEL-NEXT:    v_addc_u32_e64 v33, vcc, 0, v2, s[4:5]
-; GISEL-NEXT:    v_addc_u32_e32 v34, vcc, 0, v3, vcc
-; GISEL-NEXT:    v_subrev_i32_e64 v20, s[4:5], 64, v24
+; GISEL-NEXT:    v_add_i32_e32 v31, vcc, 1, v2
+; GISEL-NEXT:    v_addc_u32_e64 v32, s[4:5], 0, v3, vcc
+; GISEL-NEXT:    v_sub_i32_e32 v24, vcc, 0x7f, v2
+; GISEL-NEXT:    v_not_b32_e32 v2, 63
+; GISEL-NEXT:    v_addc_u32_e64 v33, vcc, 0, v0, s[4:5]
+; GISEL-NEXT:    v_addc_u32_e32 v34, vcc, 0, v1, vcc
+; GISEL-NEXT:    v_add_i32_e64 v20, s[4:5], v24, v2
 ; GISEL-NEXT:    v_sub_i32_e64 v18, s[4:5], 64, v24
 ; GISEL-NEXT:    v_lshl_b64 v[0:1], v[16:17], v24
 ; GISEL-NEXT:    v_lshl_b64 v[2:3], v[8:9], v24
@@ -2132,7 +2137,7 @@ define <2 x i128> @v_srem_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; GISEL-NEXT:    s_xor_b64 s[8:9], exec, s[6:7]
 ; GISEL-NEXT:    s_cbranch_execz .LBB2_5
 ; GISEL-NEXT:  ; %bb.2: ; %udiv-preheader4
-; GISEL-NEXT:    v_subrev_i32_e32 v24, vcc, 64, v31
+; GISEL-NEXT:    v_add_i32_e32 v24, vcc, 0xffffffc0, v31
 ; GISEL-NEXT:    v_sub_i32_e32 v22, vcc, 64, v31
 ; GISEL-NEXT:    v_lshr_b64 v[0:1], v[8:9], v31
 ; GISEL-NEXT:    v_lshr_b64 v[2:3], v[16:17], v31
@@ -2262,18 +2267,18 @@ define <2 x i128> @v_srem_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
 ; GISEL-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[6:7]
 ; GISEL-NEXT:    v_cndmask_b32_e32 v1, v15, v14, vcc
-; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v1
-; GISEL-NEXT:    v_subb_u32_e64 v1, s[4:5], 0, 0, vcc
-; GISEL-NEXT:    v_subb_u32_e64 v14, s[4:5], 0, 0, s[4:5]
-; GISEL-NEXT:    v_subb_u32_e64 v15, s[4:5], 0, 0, s[4:5]
-; GISEL-NEXT:    v_cmp_gt_u64_e32 vcc, v[0:1], v[2:3]
+; GISEL-NEXT:    v_sub_i32_e32 v14, vcc, v0, v1
+; GISEL-NEXT:    v_subb_u32_e64 v15, s[4:5], 0, 0, vcc
+; GISEL-NEXT:    v_subb_u32_e64 v0, s[4:5], 0, 0, s[4:5]
+; GISEL-NEXT:    v_subb_u32_e64 v1, s[4:5], 0, 0, s[4:5]
+; GISEL-NEXT:    v_cmp_gt_u64_e32 vcc, v[14:15], v[2:3]
 ; GISEL-NEXT:    v_cndmask_b32_e64 v21, 0, 1, vcc
-; GISEL-NEXT:    v_xor_b32_e32 v2, 0x7f, v0
-; GISEL-NEXT:    v_cmp_lt_u64_e32 vcc, 0, v[14:15]
+; GISEL-NEXT:    v_xor_b32_e32 v2, 0x7f, v14
+; GISEL-NEXT:    v_cmp_lt_u64_e32 vcc, 0, v[0:1]
 ; GISEL-NEXT:    v_cndmask_b32_e64 v22, 0, 1, vcc
-; GISEL-NEXT:    v_or_b32_e32 v2, v2, v14
-; GISEL-NEXT:    v_or_b32_e32 v3, v1, v15
-; GISEL-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[14:15]
+; GISEL-NEXT:    v_or_b32_e32 v2, v2, v0
+; GISEL-NEXT:    v_or_b32_e32 v3, v15, v1
+; GISEL-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[0:1]
 ; GISEL-NEXT:    v_cndmask_b32_e32 v21, v22, v21, vcc
 ; GISEL-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[2:3]
 ; GISEL-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
@@ -2291,12 +2296,13 @@ define <2 x i128> @v_srem_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; GISEL-NEXT:    s_and_saveexec_b64 s[12:13], s[4:5]
 ; GISEL-NEXT:    s_cbranch_execz .LBB2_12
 ; GISEL-NEXT:  ; %bb.7: ; %udiv-bb1
-; GISEL-NEXT:    v_add_i32_e32 v36, vcc, 1, v0
-; GISEL-NEXT:    v_addc_u32_e64 v37, s[4:5], 0, v1, vcc
-; GISEL-NEXT:    v_sub_i32_e32 v24, vcc, 0x7f, v0
-; GISEL-NEXT:    v_addc_u32_e64 v38, vcc, 0, v14, s[4:5]
-; GISEL-NEXT:    v_addc_u32_e32 v39, vcc, 0, v15, vcc
-; GISEL-NEXT:    v_subrev_i32_e64 v20, s[4:5], 64, v24
+; GISEL-NEXT:    v_add_i32_e32 v36, vcc, 1, v14
+; GISEL-NEXT:    v_addc_u32_e64 v37, s[4:5], 0, v15, vcc
+; GISEL-NEXT:    v_sub_i32_e32 v24, vcc, 0x7f, v14
+; GISEL-NEXT:    v_not_b32_e32 v2, 63
+; GISEL-NEXT:    v_addc_u32_e64 v38, vcc, 0, v0, s[4:5]
+; GISEL-NEXT:    v_addc_u32_e32 v39, vcc, 0, v1, vcc
+; GISEL-NEXT:    v_add_i32_e64 v20, s[4:5], v24, v2
 ; GISEL-NEXT:    v_sub_i32_e64 v14, s[4:5], 64, v24
 ; GISEL-NEXT:    v_lshl_b64 v[0:1], v[12:13], v24
 ; GISEL-NEXT:    v_lshl_b64 v[2:3], v[6:7], v24
@@ -2322,7 +2328,7 @@ define <2 x i128> @v_srem_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; GISEL-NEXT:    s_xor_b64 s[8:9], exec, s[6:7]
 ; GISEL-NEXT:    s_cbranch_execz .LBB2_11
 ; GISEL-NEXT:  ; %bb.8: ; %udiv-preheader
-; GISEL-NEXT:    v_subrev_i32_e32 v24, vcc, 64, v36
+; GISEL-NEXT:    v_add_i32_e32 v24, vcc, 0xffffffc0, v36
 ; GISEL-NEXT:    v_sub_i32_e32 v22, vcc, 64, v36
 ; GISEL-NEXT:    v_lshr_b64 v[0:1], v[6:7], v36
 ; GISEL-NEXT:    v_lshr_b64 v[2:3], v[12:13], v36
@@ -2903,18 +2909,18 @@ define <2 x i128> @v_urem_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; GISEL-NEXT:    v_cndmask_b32_e32 v16, v17, v16, vcc
 ; GISEL-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[2:3]
 ; GISEL-NEXT:    v_cndmask_b32_e32 v17, v19, v18, vcc
-; GISEL-NEXT:    v_sub_i32_e32 v16, vcc, v16, v17
-; GISEL-NEXT:    v_subb_u32_e64 v17, s[4:5], 0, 0, vcc
-; GISEL-NEXT:    v_subb_u32_e64 v18, s[4:5], 0, 0, s[4:5]
-; GISEL-NEXT:    v_subb_u32_e64 v19, s[4:5], 0, 0, s[4:5]
-; GISEL-NEXT:    v_cmp_gt_u64_e32 vcc, v[16:17], v[20:21]
+; GISEL-NEXT:    v_sub_i32_e32 v18, vcc, v16, v17
+; GISEL-NEXT:    v_subb_u32_e64 v19, s[4:5], 0, 0, vcc
+; GISEL-NEXT:    v_subb_u32_e64 v16, s[4:5], 0, 0, s[4:5]
+; GISEL-NEXT:    v_subb_u32_e64 v17, s[4:5], 0, 0, s[4:5]
+; GISEL-NEXT:    v_cmp_gt_u64_e32 vcc, v[18:19], v[20:21]
 ; GISEL-NEXT:    v_cndmask_b32_e64 v23, 0, 1, vcc
-; GISEL-NEXT:    v_xor_b32_e32 v20, 0x7f, v16
-; GISEL-NEXT:    v_cmp_lt_u64_e32 vcc, 0, v[18:19]
+; GISEL-NEXT:    v_xor_b32_e32 v20, 0x7f, v18
+; GISEL-NEXT:    v_cmp_lt_u64_e32 vcc, 0, v[16:17]
 ; GISEL-NEXT:    v_cndmask_b32_e64 v24, 0, 1, vcc
-; GISEL-NEXT:    v_or_b32_e32 v20, v20, v18
-; GISEL-NEXT:    v_or_b32_e32 v21, v17, v19
-; GISEL-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[18:19]
+; GISEL-NEXT:    v_or_b32_e32 v20, v20, v16
+; GISEL-NEXT:    v_or_b32_e32 v21, v19, v17
+; GISEL-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[16:17]
 ; GISEL-NEXT:    v_cndmask_b32_e32 v23, v24, v23, vcc
 ; GISEL-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[20:21]
 ; GISEL-NEXT:    v_cndmask_b32_e64 v20, 0, 1, vcc
@@ -2932,12 +2938,13 @@ define <2 x i128> @v_urem_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; GISEL-NEXT:    s_and_saveexec_b64 s[12:13], s[4:5]
 ; GISEL-NEXT:    s_cbranch_execz .LBB3_6
 ; GISEL-NEXT:  ; %bb.1: ; %udiv-bb15
-; GISEL-NEXT:    v_add_i32_e32 v30, vcc, 1, v16
-; GISEL-NEXT:    v_addc_u32_e64 v31, s[4:5], 0, v17, vcc
-; GISEL-NEXT:    v_sub_i32_e32 v26, vcc, 0x7f, v16
-; GISEL-NEXT:    v_addc_u32_e64 v32, vcc, 0, v18, s[4:5]
-; GISEL-NEXT:    v_addc_u32_e32 v33, vcc, 0, v19, vcc
-; GISEL-NEXT:    v_subrev_i32_e64 v22, s[4:5], 64, v26
+; GISEL-NEXT:    v_add_i32_e32 v30, vcc, 1, v18
+; GISEL-NEXT:    v_addc_u32_e64 v31, s[4:5], 0, v19, vcc
+; GISEL-NEXT:    v_sub_i32_e32 v26, vcc, 0x7f, v18
+; GISEL-NEXT:    v_not_b32_e32 v18, 63
+; GISEL-NEXT:    v_addc_u32_e64 v32, vcc, 0, v16, s[4:5]
+; GISEL-NEXT:    v_addc_u32_e32 v33, vcc, 0, v17, vcc
+; GISEL-NEXT:    v_add_i32_e64 v22, s[4:5], v26, v18
 ; GISEL-NEXT:    v_sub_i32_e64 v20, s[4:5], 64, v26
 ; GISEL-NEXT:    v_lshl_b64 v[16:17], v[0:1], v26
 ; GISEL-NEXT:    v_lshl_b64 v[18:19], v[2:3], v26
@@ -2963,7 +2970,7 @@ define <2 x i128> @v_urem_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; GISEL-NEXT:    s_xor_b64 s[8:9], exec, s[6:7]
 ; GISEL-NEXT:    s_cbranch_execz .LBB3_5
 ; GISEL-NEXT:  ; %bb.2: ; %udiv-preheader4
-; GISEL-NEXT:    v_subrev_i32_e32 v26, vcc, 64, v30
+; GISEL-NEXT:    v_add_i32_e32 v26, vcc, 0xffffffc0, v30
 ; GISEL-NEXT:    v_sub_i32_e32 v24, vcc, 64, v30
 ; GISEL-NEXT:    v_lshr_b64 v[16:17], v[2:3], v30
 ; GISEL-NEXT:    v_lshr_b64 v[18:19], v[0:1], v30
@@ -3075,18 +3082,18 @@ define <2 x i128> @v_urem_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; GISEL-NEXT:    v_cndmask_b32_e32 v16, v17, v16, vcc
 ; GISEL-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[6:7]
 ; GISEL-NEXT:    v_cndmask_b32_e32 v17, v19, v18, vcc
-; GISEL-NEXT:    v_sub_i32_e32 v16, vcc, v16, v17
-; GISEL-NEXT:    v_subb_u32_e64 v17, s[4:5], 0, 0, vcc
-; GISEL-NEXT:    v_subb_u32_e64 v22, s[4:5], 0, 0, s[4:5]
-; GISEL-NEXT:    v_subb_u32_e64 v23, s[4:5], 0, 0, s[4:5]
-; GISEL-NEXT:    v_cmp_gt_u64_e32 vcc, v[16:17], v[24:25]
+; GISEL-NEXT:    v_sub_i32_e32 v22, vcc, v16, v17
+; GISEL-NEXT:    v_subb_u32_e64 v23, s[4:5], 0, 0, vcc
+; GISEL-NEXT:    v_subb_u32_e64 v16, s[4:5], 0, 0, s[4:5]
+; GISEL-NEXT:    v_subb_u32_e64 v17, s[4:5], 0, 0, s[4:5]
+; GISEL-NEXT:    v_cmp_gt_u64_e32 vcc, v[22:23], v[24:25]
 ; GISEL-NEXT:    v_cndmask_b32_e64 v24, 0, 1, vcc
-; GISEL-NEXT:    v_xor_b32_e32 v18, 0x7f, v16
-; GISEL-NEXT:    v_cmp_lt_u64_e32 vcc, 0, v[22:23]
+; GISEL-NEXT:    v_xor_b32_e32 v18, 0x7f, v22
+; GISEL-NEXT:    v_cmp_lt_u64_e32 vcc, 0, v[16:17]
 ; GISEL-NEXT:    v_cndmask_b32_e64 v25, 0, 1, vcc
-; GISEL-NEXT:    v_or_b32_e32 v18, v18, v22
-; GISEL-NEXT:    v_or_b32_e32 v19, v17, v23
-; GISEL-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[22:23]
+; GISEL-NEXT:    v_or_b32_e32 v18, v18, v16
+; GISEL-NEXT:    v_or_b32_e32 v19, v23, v17
+; GISEL-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[16:17]
 ; GISEL-NEXT:    v_cndmask_b32_e32 v24, v25, v24, vcc
 ; GISEL-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[18:19]
 ; GISEL-NEXT:    v_cndmask_b32_e64 v18, 0, 1, vcc
@@ -3104,12 +3111,13 @@ define <2 x i128> @v_urem_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; GISEL-NEXT:    s_and_saveexec_b64 s[12:13], s[4:5]
 ; GISEL-NEXT:    s_cbranch_execz .LBB3_12
 ; GISEL-NEXT:  ; %bb.7: ; %udiv-bb1
-; GISEL-NEXT:    v_add_i32_e32 v34, vcc, 1, v16
-; GISEL-NEXT:    v_addc_u32_e64 v35, s[4:5], 0, v17, vcc
-; GISEL-NEXT:    v_sub_i32_e32 v28, vcc, 0x7f, v16
-; GISEL-NEXT:    v_addc_u32_e64 v36, vcc, 0, v22, s[4:5]
-; GISEL-NEXT:    v_addc_u32_e32 v37, vcc, 0, v23, vcc
-; GISEL-NEXT:    v_subrev_i32_e64 v24, s[4:5], 64, v28
+; GISEL-NEXT:    v_add_i32_e32 v34, vcc, 1, v22
+; GISEL-NEXT:    v_addc_u32_e64 v35, s[4:5], 0, v23, vcc
+; GISEL-NEXT:    v_sub_i32_e32 v28, vcc, 0x7f, v22
+; GISEL-NEXT:    v_not_b32_e32 v18, 63
+; GISEL-NEXT:    v_addc_u32_e64 v36, vcc, 0, v16, s[4:5]
+; GISEL-NEXT:    v_addc_u32_e32 v37, vcc, 0, v17, vcc
+; GISEL-NEXT:    v_add_i32_e64 v24, s[4:5], v28, v18
 ; GISEL-NEXT:    v_sub_i32_e64 v22, s[4:5], 64, v28
 ; GISEL-NEXT:    v_lshl_b64 v[16:17], v[4:5], v28
 ; GISEL-NEXT:    v_lshl_b64 v[18:19], v[6:7], v28
@@ -3135,7 +3143,7 @@ define <2 x i128> @v_urem_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; GISEL-NEXT:    s_xor_b64 s[8:9], exec, s[6:7]
 ; GISEL-NEXT:    s_cbranch_execz .LBB3_11
 ; GISEL-NEXT:  ; %bb.8: ; %udiv-preheader
-; GISEL-NEXT:    v_subrev_i32_e32 v28, vcc, 64, v34
+; GISEL-NEXT:    v_add_i32_e32 v28, vcc, 0xffffffc0, v34
 ; GISEL-NEXT:    v_sub_i32_e32 v26, vcc, 64, v34
 ; GISEL-NEXT:    v_lshr_b64 v[16:17], v[6:7], v34
 ; GISEL-NEXT:    v_lshr_b64 v[18:19], v[4:5], v34
diff --git a/llvm/test/CodeGen/AMDGPU/fptoi.i128.ll b/llvm/test/CodeGen/AMDGPU/fptoi.i128.ll
index 786fe03164690e..6fa607f83f8af6 100644
--- a/llvm/test/CodeGen/AMDGPU/fptoi.i128.ll
+++ b/llvm/test/CodeGen/AMDGPU/fptoi.i128.ll
@@ -236,17 +236,17 @@ define i128 @fptosi_f64_to_i128(double %x) {
 ; GISEL-NEXT:    s_xor_b64 s[16:17], exec, s[6:7]
 ; GISEL-NEXT:    s_cbranch_execz .LBB0_4
 ; GISEL-NEXT:  ; %bb.3: ; %fp-to-i-if-else
-; GISEL-NEXT:    v_add_u32_e32 v6, 0xfffffbcd, v6
-; GISEL-NEXT:    v_lshlrev_b64 v[0:1], v6, v[4:5]
-; GISEL-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v6
+; GISEL-NEXT:    v_add_u32_e32 v7, 0xfffffbcd, v6
+; GISEL-NEXT:    v_lshlrev_b64 v[0:1], v7, v[4:5]
+; GISEL-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v7
 ; GISEL-NEXT:    v_cndmask_b32_e32 v11, 0, v0, vcc
 ; GISEL-NEXT:    v_cndmask_b32_e32 v12, 0, v1, vcc
 ; GISEL-NEXT:    v_mad_u64_u32 v[0:1], s[6:7], v11, v10, 0
-; GISEL-NEXT:    v_subrev_u32_e32 v7, 64, v6
-; GISEL-NEXT:    v_sub_u32_e32 v2, 64, v6
+; GISEL-NEXT:    v_add_u32_e32 v6, 0xfffffb8d, v6
+; GISEL-NEXT:    v_sub_u32_e32 v2, 64, v7
 ; GISEL-NEXT:    v_lshrrev_b64 v[2:3], v2, v[4:5]
-; GISEL-NEXT:    v_lshlrev_b64 v[4:5], v7, v[4:5]
-; GISEL-NEXT:    v_cmp_eq_u32_e64 s[6:7], 0, v6
+; GISEL-NEXT:    v_lshlrev_b64 v[4:5], v6, v[4:5]
+; GISEL-NEXT:    v_cmp_eq_u32_e64 s[6:7], 0, v7
 ; GISEL-NEXT:    v_mad_u64_u32 v[6:7], s[8:9], v12, v9, v[0:1]
 ; GISEL-NEXT:    v_cndmask_b32_e32 v2, v4, v2, vcc
 ; GISEL-NEXT:    v_cndmask_b32_e64 v13, v2, 0, s[6:7]
@@ -273,7 +273,7 @@ define i128 @fptosi_f64_to_i128(double %x) {
 ; GISEL-NEXT:    s_cbranch_execz .LBB0_6
 ; GISEL-NEXT:  ; %bb.5: ; %fp-to-i-if-then12
 ; GISEL-NEXT:    v_sub_co_u32_e32 v6, vcc, 0x433, v6
-; GISEL-NEXT:    v_subrev_u32_e32 v2, 64, v6
+; GISEL-NEXT:    v_add_u32_e32 v2, 0xffffffc0, v6
 ; GISEL-NEXT:    v_lshrrev_b64 v[0:1], v6, v[4:5]
 ; GISEL-NEXT:    v_lshrrev_b64 v[2:3], v2, 0
 ; GISEL-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v6
@@ -608,17 +608,17 @@ define i128 @fptoui_f64_to_i128(double %x) {
 ; GISEL-NEXT:    s_xor_b64 s[16:17], exec, s[6:7]
 ; GISEL-NEXT:    s_cbranch_execz .LBB1_4
 ; GISEL-NEXT:  ; %bb.3: ; %fp-to-i-if-else
-; GISEL-NEXT:    v_add_u32_e32 v6, 0xfffffbcd, v6
-; GISEL-NEXT:    v_lshlrev_b64 v[0:1], v6, v[4:5]
-; GISEL-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v6
+; GISEL-NEXT:    v_add_u32_e32 v7, 0xfffffbcd, v6
+; GISEL-NEXT:    v_lshlrev_b64 v[0:1], v7, v[4:5]
+; GISEL-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v7
 ; GISEL-NEXT:    v_cndmask_b32_e32 v11, 0, v0, vcc
 ; GISEL-NEXT:    v_cndmask_b32_e32 v12, 0, v1, vcc
 ; GISEL-NEXT:    v_mad_u64_u32 v[0:1], s[6:7], v11, v10, 0
-; GISEL-NEXT:    v_subrev_u32_e32 v7, 64, v6
-; GISEL-NEXT:    v_sub_u32_e32 v2, 64, v6
+; GISEL-NEXT:    v_add_u32_e32 v6, 0xfffffb8d, v6
+; GISEL-NEXT:    v_sub_u32_e32 v2, 64, v7
 ; GISEL-NEXT:    v_lshrrev_b64 v[2:3], v2, v[4:5]
-; GISEL-NEXT:    v_lshlrev_b64 v[4:5], v7, v[4:5]
-; GISEL-NEXT:    v_cmp_eq_u32_e64 s[6:7], 0, v6
+; GISEL-NEXT:    v_lshlrev_b64 v[4:5], v6, v[4:5]
+; GISEL-NEXT:    v_cmp_eq_u32_e64 s[6:7], 0, v7
 ; GISEL-NEXT:    v_mad_u64_u32 v[6:7], s[8:9], v12, v9, v[0:1]
 ; GISEL-NEXT:    v_cndmask_b32_e32 v2, v4, v2, vcc
 ; GISEL-NEXT:    v_cndmask_b32_e64 v13, v2, 0, s[6:7]
@@ -645,7 +645,7 @@ define i128 @fptoui_f64_to_i128(double %x) {
 ; GISEL-NEXT:    s_cbranch_execz .LBB1_6
 ; GISEL-NEXT:  ; %bb.5: ; %fp-to-i-if-then12
 ; GISEL-NEXT:    v_sub_co_u32_e32 v6, vcc, 0x433, v6
-; GISEL-NEXT:    v_subrev_u32_e32 v2, 64, v6
+; GISEL-NEXT:    v_add_u32_e32 v2, 0xffffffc0, v6
 ; GISEL-NEXT:    v_lshrrev_b64 v[0:1], v6, v[4:5]
 ; GISEL-NEXT:    v_lshrrev_b64 v[2:3], v2, 0
 ; GISEL-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v6
@@ -972,17 +972,17 @@ define i128 @fptosi_f32_to_i128(float %x) {
 ; GISEL-NEXT:    s_xor_b64 s[16:17], exec, s[6:7]
 ; GISEL-NEXT:    s_cbranch_execz .LBB2_4
 ; GISEL-NEXT:  ; %bb.3: ; %fp-to-i-if-else
-; GISEL-NEXT:    v_add_u32_e32 v6, 0xffffff6a, v6
-; GISEL-NEXT:    v_lshlrev_b64 v[0:1], v6, v[4:5]
-; GISEL-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v6
+; GISEL-NEXT:    v_add_u32_e32 v7, 0xffffff6a, v6
+; GISEL-NEXT:    v_lshlrev_b64 v[0:1], v7, v[4:5]
+; GISEL-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v7
 ; GISEL-NEXT:    v_cndmask_b32_e32 v11, 0, v0, vcc
 ; GISEL-NEXT:    v_cndmask_b32_e32 v12, 0, v1, vcc
 ; GISEL-NEXT:    v_mad_u64_u32 v[0:1], s[6:7], v11, v10, 0
-; GISEL-NEXT:    v_subrev_u32_e32 v7, 64, v6
-; GISEL-NEXT:    v_sub_u32_e32 v2, 64, v6
+; GISEL-NEXT:    v_add_u32_e32 v6, 0xffffff2a, v6
+; GISEL-NEXT:    v_sub_u32_e32 v2, 64, v7
 ; GISEL-NEXT:    v_lshrrev_b64 v[2:3], v2, v[4:5]
-; GISEL-NEXT:    v_lshlrev_b64 v[4:5], v7, v[4:5]
-; GISEL-NEXT:    v_cmp_eq_u32_e64 s[6:7], 0, v6
+; GISEL-NEXT:    v_lshlrev_b64 v[4:5], v6, v[4:5]
+; GISEL-NEXT:    v_cmp_eq_u32_e64 s[6:7], 0, v7
 ; GISEL-NEXT:    v_mad_u64_u32 v[6:7], s[8:9], v12, v8, v[0:1]
 ; GISEL-NEXT:    v_cndmask_b32_e32 v2, v4, v2, vcc
 ; GISEL-NEXT:    v_cndmask_b32_e64 v13, v2, 0, s[6:7]
@@ -1009,7 +1009,7 @@ define i128 @fptosi_f32_to_i128(float %x) {
 ; GISEL-NEXT:    s_cbranch_execz .LBB2_6
 ; GISEL-NEXT:  ; %bb.5: ; %fp-to-i-if-then12
 ; GISEL-NEXT:    v_sub_co_u32_e32 v3, vcc, 0x96, v6
-; GISEL-NEXT:    v_subrev_u32_e32 v2, 64, v3
+; GISEL-NEXT:    v_add_u32_e32 v2, 0xffffffc0, v3
 ; GISEL-NEXT:    v_lshrrev_b64 v[0:1], v3, v[4:5]
 ; GISEL-NEXT:    v_lshrrev_b64 v[1:2], v2, 0
 ; GISEL-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v3
@@ -1330,17 +1330,17 @@ define i128 @fptoui_f32_to_i128(float %x) {
 ; GISEL-NEXT:    s_xor_b64 s[16:17], exec, s[6:7]
 ; GISEL-NEXT:    s_cbranch_execz .LBB3_4
 ; GISEL-NEXT:  ; %bb.3: ; %fp-to-i-if-else
-; GISEL-NEXT:    v_add_u32_e32 v6, 0xffffff6a, v6
-; GISEL-NEXT:    v_lshlrev_b64 v[0:1], v6, v[4:5]
-; GISEL-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v6
+; GISEL-NEXT:    v_add_u32_e32 v7, 0xffffff6a, v6
+; GISEL-NEXT:    v_lshlrev_b64 v[0:1], v7, v[4:5]
+; GISEL-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v7
 ; GISEL-NEXT:    v_cndmask_b32_e32 v11, 0, v0, vcc
 ; GISEL-NEXT:    v_cndmask_b32_e32 v12, 0, v1, vcc
 ; GISEL-NEXT:    v_mad_u64_u32 v[0:1], s[6:7], v11, v10, 0
-; GISEL-NEXT:    v_subrev_u32_e32 v7, 64, v6
-; GISEL-NEXT:    v_sub_u32_e32 v2, 64, v6
+; GISEL-NEXT:    v_add_u32_e32 v6, 0xffffff2a, v6
+; GISEL-NEXT:    v_sub_u32_e32 v2, 64, v7
 ; GISEL-NEXT:    v_lshrrev_b64 v[2:3], v2, v[4:5]
-; GISEL-NEXT:    v_lshlrev_b64 v[4:5], v7, v[4:5]
-; GISEL-NEXT:    v_cmp_eq_u32_e64 s[6:7], 0, v6
+; GISEL-NEXT:    v_lshlrev_b64 v[4:5], v6, v[4:5]
+; GISEL-NEXT:    v_cmp_eq_u32_e64 s[6:7], 0, v7
 ; GISEL-NEXT:    v_mad_u64_u32 v[6:7], s[8:9], v12, v8, v[0:1]
 ; GISEL-NEXT:    v_cndmask_b32_e32 v2, v4, v2, vcc
 ; GISEL-NEXT:    v_cndmask_b32_e64 v13, v2, 0, s[6:7]
@@ -1367,7 +1367,7 @@ define i128 @fptoui_f32_to_i128(float %x) {
 ; GISEL-NEXT:    s_cbranch_execz .LBB3_6
 ; GISEL-NEXT:  ; %bb.5: ; %fp-to-i-if-then12
 ; GISEL-NEXT:    v_sub_co_u32_e32 v3, vcc, 0x96, v6
-; GISEL-NEXT:    v_subrev_u32_e32 v2, 64, v3
+; GISEL-NEXT:    v_add_u32_e32 v2, 0xffffffc0, v3
 ; GISEL-NEXT:    v_lshrrev_b64 v[0:1], v3, v[4:5]
 ; GISEL-NEXT:    v_lshrrev_b64 v[1:2], v2, 0
 ; GISEL-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v3
@@ -1714,7 +1714,7 @@ define i128 @fptosi_bf16_to_i128(bfloat %x) {
 ; GISEL-NEXT:  ; %bb.3: ; %fp-to-i-if-else
 ; GISEL-NEXT:    v_add_u32_e32 v6, 0xffffff7a, v5
 ; GISEL-NEXT:    v_lshlrev_b64 v[0:1], v6, v[7:8]
-; GISEL-NEXT:    v_subrev_u32_e32 v4, 64, v6
+; GISEL-NEXT:    v_add_u32_e32 v4, 0xffffff3a, v5
 ; GISEL-NEXT:    v_sub_u32_e32 v2, 64, v6
 ; GISEL-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v6
 ; GISEL-NEXT:    v_lshl_or_b32 v11, v11, 16, v11
@@ -1748,7 +1748,7 @@ define i128 @fptosi_bf16_to_i128(bfloat %x) {
 ; GISEL-NEXT:    s_cbranch_execz .LBB6_6
 ; GISEL-NEXT:  ; %bb.5: ; %fp-to-i-if-then12
 ; GISEL-NEXT:    v_sub_co_u32_e32 v3, vcc, 0x86, v5
-; GISEL-NEXT:    v_subrev_u32_e32 v2, 64, v3
+; GISEL-NEXT:    v_add_u32_e32 v2, 0xffffffc0, v3
 ; GISEL-NEXT:    v_lshrrev_b64 v[0:1], v3, v[7:8]
 ; GISEL-NEXT:    v_lshrrev_b64 v[1:2], v2, 0
 ; GISEL-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v3
@@ -2066,7 +2066,7 @@ define i128 @fptoui_bf16_to_i128(bfloat %x) {
 ; GISEL-NEXT:  ; %bb.3: ; %fp-to-i-if-else
 ; GISEL-NEXT:    v_add_u32_e32 v6, 0xffffff7a, v5
 ; GISEL-NEXT:    v_lshlrev_b64 v[0:1], v6, v[7:8]
-; GISEL-NEXT:    v_subrev_u32_e32 v4, 64, v6
+; GISEL-NEXT:    v_add_u32_e32 v4, 0xffffff3a, v5
 ; GISEL-NEXT:    v_sub_u32_e32 v2, 64, v6
 ; GISEL-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v6
 ; GISEL-NEXT:    v_lshl_or_b32 v11, v11, 16, v11
@@ -2100,7 +2100,7 @@ define i128 @fptoui_bf16_to_i128(bfloat %x) {
 ; GISEL-NEXT:    s_cbranch_execz .LBB7_6
 ; GISEL-NEXT:  ; %bb.5: ; %fp-to-i-if-then12
 ; GISEL-NEXT:    v_sub_co_u32_e32 v3, vcc, 0x86, v5
-; GISEL-NEXT:    v_subrev_u32_e32 v2, 64, v3
+; GISEL-NEXT:    v_add_u32_e32 v2, 0xffffffc0, v3
 ; GISEL-NEXT:    v_lshrrev_b64 v[0:1], v3, v[7:8]
 ; GISEL-NEXT:    v_lshrrev_b64 v[1:2], v2, 0
 ; GISEL-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v3
diff --git a/llvm/test/CodeGen/AMDGPU/itofp.i128.ll b/llvm/test/CodeGen/AMDGPU/itofp.i128.ll
index 2999ddb8315883..f372a54894604c 100644
--- a/llvm/test/CodeGen/AMDGPU/itofp.i128.ll
+++ b/llvm/test/CodeGen/AMDGPU/itofp.i128.ll
@@ -193,32 +193,32 @@ define float @sitofp_i128_to_f32(i128 %x) {
 ; GISEL-NEXT:    v_sub_u32_e32 v11, 64, v4
 ; GISEL-NEXT:    v_lshrrev_b64 v[9:10], v4, v[0:1]
 ; GISEL-NEXT:    v_lshlrev_b64 v[11:12], v11, v[2:3]
-; GISEL-NEXT:    v_subrev_u32_e32 v13, 64, v4
+; GISEL-NEXT:    v_add_u32_e32 v13, 0xffffffc0, v4
 ; GISEL-NEXT:    v_or_b32_e32 v11, v9, v11
 ; GISEL-NEXT:    v_or_b32_e32 v12, v10, v12
 ; GISEL-NEXT:    v_lshrrev_b64 v[9:10], v13, v[2:3]
 ; GISEL-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v4
-; GISEL-NEXT:    v_add_u32_e32 v5, 26, v5
+; GISEL-NEXT:    v_add_u32_e32 v14, 26, v5
 ; GISEL-NEXT:    v_cndmask_b32_e32 v9, v9, v11, vcc
 ; GISEL-NEXT:    v_cndmask_b32_e32 v10, v10, v12, vcc
 ; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v4
-; GISEL-NEXT:    v_sub_u32_e32 v11, 64, v5
+; GISEL-NEXT:    v_sub_u32_e32 v11, 64, v14
 ; GISEL-NEXT:    v_cndmask_b32_e32 v13, v9, v0, vcc
 ; GISEL-NEXT:    v_cndmask_b32_e32 v4, v10, v1, vcc
-; GISEL-NEXT:    v_lshrrev_b64 v[9:10], v5, -1
+; GISEL-NEXT:    v_lshrrev_b64 v[9:10], v14, -1
 ; GISEL-NEXT:    v_lshlrev_b64 v[11:12], v11, -1
-; GISEL-NEXT:    v_subrev_u32_e32 v14, 64, v5
+; GISEL-NEXT:    v_add_u32_e32 v5, 0xffffffda, v5
 ; GISEL-NEXT:    v_or_b32_e32 v15, v9, v11
 ; GISEL-NEXT:    v_or_b32_e32 v16, v10, v12
-; GISEL-NEXT:    v_lshrrev_b64 v[11:12], v14, -1
-; GISEL-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v5
-; GISEL-NEXT:    v_cndmask_b32_e32 v11, v11, v15, vcc
-; GISEL-NEXT:    v_cndmask_b32_e32 v12, v12, v16, vcc
-; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v5
+; GISEL-NEXT:    v_lshrrev_b64 v[11:12], v5, -1
+; GISEL-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v14
+; GISEL-NEXT:    v_cndmask_b32_e32 v5, v11, v15, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v11, v12, v16, vcc
+; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v14
 ; GISEL-NEXT:    v_cndmask_b32_e32 v9, 0, v9, vcc
 ; GISEL-NEXT:    v_cndmask_b32_e32 v10, 0, v10, vcc
-; GISEL-NEXT:    v_cndmask_b32_e64 v5, v11, -1, s[4:5]
-; GISEL-NEXT:    v_cndmask_b32_e64 v11, v12, -1, s[4:5]
+; GISEL-NEXT:    v_cndmask_b32_e64 v5, v5, -1, s[4:5]
+; GISEL-NEXT:    v_cndmask_b32_e64 v11, v11, -1, s[4:5]
 ; GISEL-NEXT:    v_and_b32_e32 v2, v9, v2
 ; GISEL-NEXT:    v_and_b32_e32 v3, v10, v3
 ; GISEL-NEXT:    v_and_or_b32 v0, v5, v0, v2
@@ -438,32 +438,32 @@ define float @uitofp_i128_to_f32(i128 %x) {
 ; GISEL-NEXT:    v_sub_u32_e32 v10, 64, v4
 ; GISEL-NEXT:    v_lshrrev_b64 v[8:9], v4, v[0:1]
 ; GISEL-NEXT:    v_lshlrev_b64 v[10:11], v10, v[2:3]
-; GISEL-NEXT:    v_subrev_u32_e32 v12, 64, v4
+; GISEL-NEXT:    v_add_u32_e32 v12, 0xffffffc0, v4
 ; GISEL-NEXT:    v_or_b32_e32 v10, v8, v10
 ; GISEL-NEXT:    v_or_b32_e32 v11, v9, v11
 ; GISEL-NEXT:    v_lshrrev_b64 v[8:9], v12, v[2:3]
 ; GISEL-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v4
-; GISEL-NEXT:    v_add_u32_e32 v5, 26, v5
+; GISEL-NEXT:    v_add_u32_e32 v13, 26, v5
 ; GISEL-NEXT:    v_cndmask_b32_e32 v8, v8, v10, vcc
 ; GISEL-NEXT:    v_cndmask_b32_e32 v9, v9, v11, vcc
 ; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v4
-; GISEL-NEXT:    v_sub_u32_e32 v10, 64, v5
+; GISEL-NEXT:    v_sub_u32_e32 v10, 64, v13
 ; GISEL-NEXT:    v_cndmask_b32_e32 v12, v8, v0, vcc
 ; GISEL-NEXT:    v_cndmask_b32_e32 v4, v9, v1, vcc
-; GISEL-NEXT:    v_lshrrev_b64 v[8:9], v5, -1
+; GISEL-NEXT:    v_lshrrev_b64 v[8:9], v13, -1
 ; GISEL-NEXT:    v_lshlrev_b64 v[10:11], v10, -1
-; GISEL-NEXT:    v_subrev_u32_e32 v13, 64, v5
+; GISEL-NEXT:    v_add_u32_e32 v5, 0xffffffda, v5
 ; GISEL-NEXT:    v_or_b32_e32 v14, v8, v10
 ; GISEL-NEXT:    v_or_b32_e32 v15, v9, v11
-; GISEL-NEXT:    v_lshrrev_b64 v[10:11], v13, -1
-; GISEL-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v5
-; GISEL-NEXT:    v_cndmask_b32_e32 v10, v10, v14, vcc
-; GISEL-NEXT:    v_cndmask_b32_e32 v11, v11, v15, vcc
-; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v5
+; GISEL-NEXT:    v_lshrrev_b64 v[10:11], v5, -1
+; GISEL-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v13
+; GISEL-NEXT:    v_cndmask_b32_e32 v5, v10, v14, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v10, v11, v15, vcc
+; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v13
 ; GISEL-NEXT:    v_cndmask_b32_e32 v8, 0, v8, vcc
 ; GISEL-NEXT:    v_cndmask_b32_e32 v9, 0, v9, vcc
-; GISEL-NEXT:    v_cndmask_b32_e64 v5, v10, -1, s[4:5]
-; GISEL-NEXT:    v_cndmask_b32_e64 v10, v11, -1, s[4:5]
+; GISEL-NEXT:    v_cndmask_b32_e64 v5, v5, -1, s[4:5]
+; GISEL-NEXT:    v_cndmask_b32_e64 v10, v10, -1, s[4:5]
 ; GISEL-NEXT:    v_and_b32_e32 v2, v8, v2
 ; GISEL-NEXT:    v_and_b32_e32 v3, v9, v3
 ; GISEL-NEXT:    v_and_or_b32 v0, v5, v0, v2
@@ -723,34 +723,34 @@ define double @sitofp_i128_to_f64(i128 %x) {
 ; GISEL-NEXT:    v_sub_u32_e32 v10, 64, v14
 ; GISEL-NEXT:    v_lshrrev_b64 v[0:1], v14, v[2:3]
 ; GISEL-NEXT:    v_lshlrev_b64 v[10:11], v10, v[4:5]
-; GISEL-NEXT:    v_subrev_u32_e32 v15, 64, v14
+; GISEL-NEXT:    v_add_u32_e32 v15, 0xffffffc0, v14
 ; GISEL-NEXT:    v_lshrrev_b64 v[12:13], v14, v[4:5]
 ; GISEL-NEXT:    v_or_b32_e32 v10, v0, v10
 ; GISEL-NEXT:    v_or_b32_e32 v11, v1, v11
 ; GISEL-NEXT:    v_lshrrev_b64 v[0:1], v15, v[4:5]
 ; GISEL-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v14
-; GISEL-NEXT:    v_add_u32_e32 v9, 55, v9
+; GISEL-NEXT:    v_add_u32_e32 v15, 55, v9
 ; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v10, vcc
 ; GISEL-NEXT:    v_cndmask_b32_e32 v1, v1, v11, vcc
 ; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v14
 ; GISEL-NEXT:    v_cndmask_b32_e32 v11, 0, v12, vcc
-; GISEL-NEXT:    v_sub_u32_e32 v12, 64, v9
+; GISEL-NEXT:    v_sub_u32_e32 v12, 64, v15
 ; GISEL-NEXT:    v_cndmask_b32_e64 v14, v0, v2, s[4:5]
 ; GISEL-NEXT:    v_cndmask_b32_e64 v10, v1, v3, s[4:5]
-; GISEL-NEXT:    v_lshrrev_b64 v[0:1], v9, -1
+; GISEL-NEXT:    v_lshrrev_b64 v[0:1], v15, -1
 ; GISEL-NEXT:    v_lshlrev_b64 v[12:13], v12, -1
-; GISEL-NEXT:    v_subrev_u32_e32 v15, 64, v9
+; GISEL-NEXT:    v_add_u32_e32 v9, -9, v9
 ; GISEL-NEXT:    v_or_b32_e32 v16, v0, v12
 ; GISEL-NEXT:    v_or_b32_e32 v17, v1, v13
-; GISEL-NEXT:    v_lshrrev_b64 v[12:13], v15, -1
-; GISEL-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v9
-; GISEL-NEXT:    v_cndmask_b32_e32 v12, v12, v16, vcc
-; GISEL-NEXT:    v_cndmask_b32_e32 v13, v13, v17, vcc
-; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v9
+; GISEL-NEXT:    v_lshrrev_b64 v[12:13], v9, -1
+; GISEL-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v15
+; GISEL-NEXT:    v_cndmask_b32_e32 v9, v12, v16, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v12, v13, v17, vcc
+; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v15
 ; GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
 ; GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
-; GISEL-NEXT:    v_cndmask_b32_e64 v9, v12, -1, s[4:5]
-; GISEL-NEXT:    v_cndmask_b32_e64 v12, v13, -1, s[4:5]
+; GISEL-NEXT:    v_cndmask_b32_e64 v9, v9, -1, s[4:5]
+; GISEL-NEXT:    v_cndmask_b32_e64 v12, v12, -1, s[4:5]
 ; GISEL-NEXT:    v_and_b32_e32 v0, v0, v4
 ; GISEL-NEXT:    v_and_b32_e32 v1, v1, v5
 ; GISEL-NEXT:    v_and_or_b32 v0, v9, v2, v0
@@ -999,35 +999,35 @@ define double @uitofp_i128_to_f64(i128 %x) {
 ; GISEL-NEXT:    v_sub_u32_e32 v9, 64, v13
 ; GISEL-NEXT:    v_lshrrev_b64 v[4:5], v13, v[0:1]
 ; GISEL-NEXT:    v_lshlrev_b64 v[9:10], v9, v[2:3]
-; GISEL-NEXT:    v_subrev_u32_e32 v14, 64, v13
+; GISEL-NEXT:    v_add_u32_e32 v14, 0xffffffc0, v13
 ; GISEL-NEXT:    v_lshrrev_b64 v[11:12], v13, v[2:3]
 ; GISEL-NEXT:    v_or_b32_e32 v9, v4, v9
 ; GISEL-NEXT:    v_or_b32_e32 v10, v5, v10
 ; GISEL-NEXT:    v_lshrrev_b64 v[4:5], v14, v[2:3]
 ; GISEL-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v13
-; GISEL-NEXT:    v_add_u32_e32 v8, 55, v8
+; GISEL-NEXT:    v_add_u32_e32 v15, 55, v8
 ; GISEL-NEXT:    v_cndmask_b32_e32 v4, v4, v9, vcc
 ; GISEL-NEXT:    v_cndmask_b32_e32 v5, v5, v10, vcc
 ; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v13
 ; GISEL-NEXT:    v_cndmask_b32_e32 v10, 0, v11, vcc
 ; GISEL-NEXT:    v_cndmask_b32_e32 v11, 0, v12, vcc
-; GISEL-NEXT:    v_sub_u32_e32 v12, 64, v8
+; GISEL-NEXT:    v_sub_u32_e32 v12, 64, v15
 ; GISEL-NEXT:    v_cndmask_b32_e64 v14, v4, v0, s[4:5]
 ; GISEL-NEXT:    v_cndmask_b32_e64 v9, v5, v1, s[4:5]
-; GISEL-NEXT:    v_lshrrev_b64 v[4:5], v8, -1
+; GISEL-NEXT:    v_lshrrev_b64 v[4:5], v15, -1
 ; GISEL-NEXT:    v_lshlrev_b64 v[12:13], v12, -1
-; GISEL-NEXT:    v_subrev_u32_e32 v15, 64, v8
+; GISEL-NEXT:    v_add_u32_e32 v8, -9, v8
 ; GISEL-NEXT:    v_or_b32_e32 v16, v4, v12
 ; GISEL-NEXT:    v_or_b32_e32 v17, v5, v13
-; GISEL-NEXT:    v_lshrrev_b64 v[12:13], v15, -1
-; GISEL-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v8
-; GISEL-NEXT:    v_cndmask_b32_e32 v12, v12, v16, vcc
-; GISEL-NEXT:    v_cndmask_b32_e32 v13, v13, v17, vcc
-; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v8
+; GISEL-NEXT:    v_lshrrev_b64 v[12:13], v8, -1
+; GISEL-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v15
+; GISEL-NEXT:    v_cndmask_b32_e32 v8, v12, v16, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v12, v13, v17, vcc
+; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v15
 ; GISEL-NEXT:    v_cndmask_b32_e32 v4, 0, v4, vcc
 ; GISEL-NEXT:    v_cndmask_b32_e32 v5, 0, v5, vcc
-; GISEL-NEXT:    v_cndmask_b32_e64 v8, v12, -1, s[4:5]
-; GISEL-NEXT:    v_cndmask_b32_e64 v12, v13, -1, s[4:5]
+; GISEL-NEXT:    v_cndmask_b32_e64 v8, v8, -1, s[4:5]
+; GISEL-NEXT:    v_cndmask_b32_e64 v12, v12, -1, s[4:5]
 ; GISEL-NEXT:    v_and_b32_e32 v2, v4, v2
 ; GISEL-NEXT:    v_and_b32_e32 v3, v5, v3
 ; GISEL-NEXT:    v_and_or_b32 v0, v8, v0, v2
@@ -1284,32 +1284,32 @@ define half @sitofp_i128_to_f16(i128 %x) {
 ; GISEL-NEXT:    v_sub_u32_e32 v11, 64, v4
 ; GISEL-NEXT:    v_lshrrev_b64 v[9:10], v4, v[0:1]
 ; GISEL-NEXT:    v_lshlrev_b64 v[11:12], v11, v[2:3]
-; GISEL-NEXT:    v_subrev_u32_e32 v13, 64, v4
+; GISEL-NEXT:    v_add_u32_e32 v13, 0xffffffc0, v4
 ; GISEL-NEXT:    v_or_b32_e32 v11, v9, v11
 ; GISEL-NEXT:    v_or_b32_e32 v12, v10, v12
 ; GISEL-NEXT:    v_lshrrev_b64 v[9:10], v13, v[2:3]
 ; GISEL-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v4
-; GISEL-NEXT:    v_add_u32_e32 v5, 26, v5
+; GISEL-NEXT:    v_add_u32_e32 v14, 26, v5
 ; GISEL-NEXT:    v_cndmask_b32_e32 v9, v9, v11, vcc
 ; GISEL-NEXT:    v_cndmask_b32_e32 v10, v10, v12, vcc
 ; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v4
-; GISEL-NEXT:    v_sub_u32_e32 v11, 64, v5
+; GISEL-NEXT:    v_sub_u32_e32 v11, 64, v14
 ; GISEL-NEXT:    v_cndmask_b32_e32 v13, v9, v0, vcc
 ; GISEL-NEXT:    v_cndmask_b32_e32 v4, v10, v1, vcc
-; GISEL-NEXT:    v_lshrrev_b64 v[9:10], v5, -1
+; GISEL-NEXT:    v_lshrrev_b64 v[9:10], v14, -1
 ; GISEL-NEXT:    v_lshlrev_b64 v[11:12], v11, -1
-; GISEL-NEXT:    v_subrev_u32_e32 v14, 64, v5
+; GISEL-NEXT:    v_add_u32_e32 v5, 0xffffffda, v5
 ; GISEL-NEXT:    v_or_b32_e32 v15, v9, v11
 ; GISEL-NEXT:    v_or_b32_e32 v16, v10, v12
-; GISEL-NEXT:    v_lshrrev_b64 v[11:12], v14, -1
-; GISEL-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v5
-; GISEL-NEXT:    v_cndmask_b32_e32 v11, v11, v15, vcc
-; GISEL-NEXT:    v_cndmask_b32_e32 v12, v12, v16, vcc
-; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v5
+; GISEL-NEXT:    v_lshrrev_b64 v[11:12], v5, -1
+; GISEL-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v14
+; GISEL-NEXT:    v_cndmask_b32_e32 v5, v11, v15, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v11, v12, v16, vcc
+; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v14
 ; GISEL-NEXT:    v_cndmask_b32_e32 v9, 0, v9, vcc
 ; GISEL-NEXT:    v_cndmask_b32_e32 v10, 0, v10, vcc
-; GISEL-NEXT:    v_cndmask_b32_e64 v5, v11, -1, s[4:5]
-; GISEL-NEXT:    v_cndmask_b32_e64 v11, v12, -1, s[4:5]
+; GISEL-NEXT:    v_cndmask_b32_e64 v5, v5, -1, s[4:5]
+; GISEL-NEXT:    v_cndmask_b32_e64 v11, v11, -1, s[4:5]
 ; GISEL-NEXT:    v_and_b32_e32 v2, v9, v2
 ; GISEL-NEXT:    v_and_b32_e32 v3, v10, v3
 ; GISEL-NEXT:    v_and_or_b32 v0, v5, v0, v2
@@ -1531,32 +1531,32 @@ define half @uitofp_i128_to_f16(i128 %x) {
 ; GISEL-NEXT:    v_sub_u32_e32 v10, 64, v4
 ; GISEL-NEXT:    v_lshrrev_b64 v[8:9], v4, v[0:1]
 ; GISEL-NEXT:    v_lshlrev_b64 v[10:11], v10, v[2:3]
-; GISEL-NEXT:    v_subrev_u32_e32 v12, 64, v4
+; GISEL-NEXT:    v_add_u32_e32 v12, 0xffffffc0, v4
 ; GISEL-NEXT:    v_or_b32_e32 v10, v8, v10
 ; GISEL-NEXT:    v_or_b32_e32 v11, v9, v11
 ; GISEL-NEXT:    v_lshrrev_b64 v[8:9], v12, v[2:3]
 ; GISEL-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v4
-; GISEL-NEXT:    v_add_u32_e32 v5, 26, v5
+; GISEL-NEXT:    v_add_u32_e32 v13, 26, v5
 ; GISEL-NEXT:    v_cndmask_b32_e32 v8, v8, v10, vcc
 ; GISEL-NEXT:    v_cndmask_b32_e32 v9, v9, v11, vcc
 ; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v4
-; GISEL-NEXT:    v_sub_u32_e32 v10, 64, v5
+; GISEL-NEXT:    v_sub_u32_e32 v10, 64, v13
 ; GISEL-NEXT:    v_cndmask_b32_e32 v12, v8, v0, vcc
 ; GISEL-NEXT:    v_cndmask_b32_e32 v4, v9, v1, vcc
-; GISEL-NEXT:    v_lshrrev_b64 v[8:9], v5, -1
+; GISEL-NEXT:    v_lshrrev_b64 v[8:9], v13, -1
 ; GISEL-NEXT:    v_lshlrev_b64 v[10:11], v10, -1
-; GISEL-NEXT:    v_subrev_u32_e32 v13, 64, v5
+; GISEL-NEXT:    v_add_u32_e32 v5, 0xffffffda, v5
 ; GISEL-NEXT:    v_or_b32_e32 v14, v8, v10
 ; GISEL-NEXT:    v_or_b32_e32 v15, v9, v11
-; GISEL-NEXT:    v_lshrrev_b64 v[10:11], v13, -1
-; GISEL-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v5
-; GISEL-NEXT:    v_cndmask_b32_e32 v10, v10, v14, vcc
-; GISEL-NEXT:    v_cndmask_b32_e32 v11, v11, v15, vcc
-; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v5
+; GISEL-NEXT:    v_lshrrev_b64 v[10:11], v5, -1
+; GISEL-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v13
+; GISEL-NEXT:    v_cndmask_b32_e32 v5, v10, v14, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v10, v11, v15, vcc
+; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v13
 ; GISEL-NEXT:    v_cndmask_b32_e32 v8, 0, v8, vcc
 ; GISEL-NEXT:    v_cndmask_b32_e32 v9, 0, v9, vcc
-; GISEL-NEXT:    v_cndmask_b32_e64 v5, v10, -1, s[4:5]
-; GISEL-NEXT:    v_cndmask_b32_e64 v10, v11, -1, s[4:5]
+; GISEL-NEXT:    v_cndmask_b32_e64 v5, v5, -1, s[4:5]
+; GISEL-NEXT:    v_cndmask_b32_e64 v10, v10, -1, s[4:5]
 ; GISEL-NEXT:    v_and_b32_e32 v2, v8, v2
 ; GISEL-NEXT:    v_and_b32_e32 v3, v9, v3
 ; GISEL-NEXT:    v_and_or_b32 v0, v5, v0, v2
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.ll
index 8d99ec2e1b709f..b2bfc2ea4e0b28 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.ll
@@ -139,7 +139,7 @@ define amdgpu_kernel void @test_barrier(ptr addrspace(1) %out, i32 %size) #0 {
 ; VARIANT6-NEXT:    s_load_b96 s[0:2], s[2:3], 0x24
 ; VARIANT6-NEXT:    s_wait_kmcnt 0x0
 ; VARIANT6-NEXT:    v_dual_mov_b32 v3, s1 :: v_dual_and_b32 v4, 0x3ff, v0
-; VARIANT6-NEXT:    s_sub_co_i32 s2, s2, 1
+; VARIANT6-NEXT:    s_add_co_i32 s2, s2, -1
 ; VARIANT6-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; VARIANT6-NEXT:    v_dual_mov_b32 v2, s0 :: v_dual_lshlrev_b32 v5, 2, v4
 ; VARIANT6-NEXT:    v_sub_nc_u32_e32 v0, s2, v4
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll
index a577fb3d190ab9..d874418b99dd38 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll
@@ -395,7 +395,7 @@ define i1 @posnormal_f16(half %x) nounwind {
 ; GFX7GLISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
 ; GFX7GLISEL-NEXT:    v_and_b32_e32 v2, 0xffff, v1
 ; GFX7GLISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v0, v2
-; GFX7GLISEL-NEXT:    v_subrev_i32_e32 v0, vcc, 0x400, v1
+; GFX7GLISEL-NEXT:    v_add_i32_e32 v0, vcc, 0xfffffc00, v1
 ; GFX7GLISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
 ; GFX7GLISEL-NEXT:    v_mov_b32_e32 v1, 0x7800
 ; GFX7GLISEL-NEXT:    v_cmp_lt_u32_e32 vcc, v0, v1
@@ -459,7 +459,7 @@ define i1 @negnormal_f16(half %x) nounwind {
 ; GFX7GLISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
 ; GFX7GLISEL-NEXT:    v_and_b32_e32 v2, 0xffff, v1
 ; GFX7GLISEL-NEXT:    v_cmp_ne_u32_e64 s[4:5], v0, v2
-; GFX7GLISEL-NEXT:    v_subrev_i32_e32 v0, vcc, 0x400, v1
+; GFX7GLISEL-NEXT:    v_add_i32_e32 v0, vcc, 0xfffffc00, v1
 ; GFX7GLISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
 ; GFX7GLISEL-NEXT:    v_mov_b32_e32 v1, 0x7800
 ; GFX7GLISEL-NEXT:    v_cmp_lt_u32_e32 vcc, v0, v1
@@ -513,7 +513,7 @@ define i1 @possubnormal_f16(half %x) nounwind {
 ; GFX7GLISEL-LABEL: possubnormal_f16:
 ; GFX7GLISEL:       ; %bb.0:
 ; GFX7GLISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7GLISEL-NEXT:    v_subrev_i32_e32 v0, vcc, 1, v0
+; GFX7GLISEL-NEXT:    v_add_i32_e32 v0, vcc, -1, v0
 ; GFX7GLISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
 ; GFX7GLISEL-NEXT:    v_mov_b32_e32 v1, 0x3ff
 ; GFX7GLISEL-NEXT:    v_cmp_lt_u32_e32 vcc, v0, v1
@@ -575,7 +575,7 @@ define i1 @negsubnormal_f16(half %x) nounwind {
 ; GFX7GLISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
 ; GFX7GLISEL-NEXT:    v_and_b32_e32 v2, 0xffff, v1
 ; GFX7GLISEL-NEXT:    v_cmp_ne_u32_e32 vcc, v0, v2
-; GFX7GLISEL-NEXT:    v_subrev_i32_e64 v0, s[4:5], 1, v1
+; GFX7GLISEL-NEXT:    v_add_i32_e64 v0, s[4:5], -1, v1
 ; GFX7GLISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
 ; GFX7GLISEL-NEXT:    v_mov_b32_e32 v1, 0x3ff
 ; GFX7GLISEL-NEXT:    v_cmp_lt_u32_e64 s[4:5], v0, v1
@@ -1587,7 +1587,7 @@ define i1 @not_issubnormal_or_zero_f16(half %x) {
 ; GFX7GLISEL-NEXT:    v_and_b32_e32 v0, 0x7fff, v0
 ; GFX7GLISEL-NEXT:    v_and_b32_e32 v1, 0xffff, v0
 ; GFX7GLISEL-NEXT:    v_mov_b32_e32 v2, 0x7c00
-; GFX7GLISEL-NEXT:    v_subrev_i32_e32 v0, vcc, 0x400, v0
+; GFX7GLISEL-NEXT:    v_add_i32_e32 v0, vcc, 0xfffffc00, v0
 ; GFX7GLISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v1, v2
 ; GFX7GLISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
 ; GFX7GLISEL-NEXT:    v_mov_b32_e32 v1, 0x7800
@@ -1647,7 +1647,7 @@ define i1 @isnormal_f16(half %x) {
 ; GFX7GLISEL:       ; %bb.0:
 ; GFX7GLISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX7GLISEL-NEXT:    v_and_b32_e32 v0, 0x7fff, v0
-; GFX7GLISEL-NEXT:    v_subrev_i32_e32 v0, vcc, 0x400, v0
+; GFX7GLISEL-NEXT:    v_add_i32_e32 v0, vcc, 0xfffffc00, v0
 ; GFX7GLISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
 ; GFX7GLISEL-NEXT:    v_mov_b32_e32 v1, 0x7800
 ; GFX7GLISEL-NEXT:    v_cmp_lt_u32_e32 vcc, v0, v1
@@ -1780,7 +1780,7 @@ define i1 @not_is_plus_normal_f16(half %x) {
 ; GFX7GLISEL-NEXT:    s_or_b64 s[6:7], vcc, s[6:7]
 ; GFX7GLISEL-NEXT:    v_cmp_gt_u32_e32 vcc, v3, v2
 ; GFX7GLISEL-NEXT:    s_or_b64 s[6:7], s[6:7], vcc
-; GFX7GLISEL-NEXT:    v_subrev_i32_e32 v0, vcc, 0x400, v1
+; GFX7GLISEL-NEXT:    v_add_i32_e32 v0, vcc, 0xfffffc00, v1
 ; GFX7GLISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
 ; GFX7GLISEL-NEXT:    v_mov_b32_e32 v1, 0x7800
 ; GFX7GLISEL-NEXT:    v_cmp_lt_u32_e32 vcc, v0, v1
@@ -1853,7 +1853,7 @@ define i1 @not_is_neg_normal_f16(half %x) {
 ; GFX7GLISEL-NEXT:    s_or_b64 s[6:7], vcc, s[6:7]
 ; GFX7GLISEL-NEXT:    v_cmp_gt_u32_e32 vcc, v3, v2
 ; GFX7GLISEL-NEXT:    s_or_b64 s[6:7], s[6:7], vcc
-; GFX7GLISEL-NEXT:    v_subrev_i32_e32 v0, vcc, 0x400, v1
+; GFX7GLISEL-NEXT:    v_add_i32_e32 v0, vcc, 0xfffffc00, v1
 ; GFX7GLISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
 ; GFX7GLISEL-NEXT:    v_mov_b32_e32 v1, 0x7800
 ; GFX7GLISEL-NEXT:    v_cmp_lt_u32_e32 vcc, v0, v1
@@ -1911,7 +1911,7 @@ define i1 @issubnormal_f16(half %x) {
 ; GFX7GLISEL:       ; %bb.0:
 ; GFX7GLISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX7GLISEL-NEXT:    v_and_b32_e32 v0, 0x7fff, v0
-; GFX7GLISEL-NEXT:    v_subrev_i32_e32 v0, vcc, 1, v0
+; GFX7GLISEL-NEXT:    v_add_i32_e32 v0, vcc, -1, v0
 ; GFX7GLISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
 ; GFX7GLISEL-NEXT:    v_mov_b32_e32 v1, 0x3ff
 ; GFX7GLISEL-NEXT:    v_cmp_lt_u32_e32 vcc, v0, v1
@@ -1974,7 +1974,7 @@ define i1 @not_issubnormal_f16(half %x) {
 ; GFX7GLISEL-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
 ; GFX7GLISEL-NEXT:    v_cmp_gt_u32_e32 vcc, v1, v2
 ; GFX7GLISEL-NEXT:    s_or_b64 s[4:5], s[4:5], vcc
-; GFX7GLISEL-NEXT:    v_subrev_i32_e32 v0, vcc, 0x400, v0
+; GFX7GLISEL-NEXT:    v_add_i32_e32 v0, vcc, 0xfffffc00, v0
 ; GFX7GLISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
 ; GFX7GLISEL-NEXT:    v_mov_b32_e32 v1, 0x7800
 ; GFX7GLISEL-NEXT:    v_cmp_lt_u32_e32 vcc, v0, v1
@@ -2081,7 +2081,7 @@ define i1 @not_iszero_f16(half %x) {
 ; GFX7GLISEL:       ; %bb.0:
 ; GFX7GLISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX7GLISEL-NEXT:    v_and_b32_e32 v0, 0x7fff, v0
-; GFX7GLISEL-NEXT:    v_subrev_i32_e32 v1, vcc, 1, v0
+; GFX7GLISEL-NEXT:    v_add_i32_e32 v1, vcc, -1, v0
 ; GFX7GLISEL-NEXT:    v_and_b32_e32 v1, 0xffff, v1
 ; GFX7GLISEL-NEXT:    v_mov_b32_e32 v2, 0x3ff
 ; GFX7GLISEL-NEXT:    v_cmp_lt_u32_e32 vcc, v1, v2
@@ -2091,7 +2091,7 @@ define i1 @not_iszero_f16(half %x) {
 ; GFX7GLISEL-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
 ; GFX7GLISEL-NEXT:    v_cmp_gt_u32_e32 vcc, v1, v2
 ; GFX7GLISEL-NEXT:    s_or_b64 s[4:5], s[4:5], vcc
-; GFX7GLISEL-NEXT:    v_subrev_i32_e32 v0, vcc, 0x400, v0
+; GFX7GLISEL-NEXT:    v_add_i32_e32 v0, vcc, 0xfffffc00, v0
 ; GFX7GLISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
 ; GFX7GLISEL-NEXT:    v_mov_b32_e32 v1, 0x7800
 ; GFX7GLISEL-NEXT:    v_cmp_lt_u32_e32 vcc, v0, v1
@@ -2568,7 +2568,7 @@ define i1 @not_iszero_or_nan_f16(half %x) {
 ; GFX7GLISEL:       ; %bb.0: ; %entry
 ; GFX7GLISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX7GLISEL-NEXT:    v_and_b32_e32 v0, 0x7fff, v0
-; GFX7GLISEL-NEXT:    v_subrev_i32_e32 v1, vcc, 1, v0
+; GFX7GLISEL-NEXT:    v_add_i32_e32 v1, vcc, -1, v0
 ; GFX7GLISEL-NEXT:    v_and_b32_e32 v1, 0xffff, v1
 ; GFX7GLISEL-NEXT:    v_mov_b32_e32 v2, 0x3ff
 ; GFX7GLISEL-NEXT:    v_cmp_lt_u32_e32 vcc, v1, v2
@@ -2576,7 +2576,7 @@ define i1 @not_iszero_or_nan_f16(half %x) {
 ; GFX7GLISEL-NEXT:    v_mov_b32_e32 v2, 0x7c00
 ; GFX7GLISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v1, v2
 ; GFX7GLISEL-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
-; GFX7GLISEL-NEXT:    v_subrev_i32_e32 v0, vcc, 0x400, v0
+; GFX7GLISEL-NEXT:    v_add_i32_e32 v0, vcc, 0xfffffc00, v0
 ; GFX7GLISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
 ; GFX7GLISEL-NEXT:    v_mov_b32_e32 v1, 0x7800
 ; GFX7GLISEL-NEXT:    v_cmp_lt_u32_e32 vcc, v0, v1
@@ -2635,7 +2635,7 @@ define i1 @not_iszero_or_nan_f_daz(half %x) #0 {
 ; GFX7GLISEL:       ; %bb.0: ; %entry
 ; GFX7GLISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX7GLISEL-NEXT:    v_and_b32_e32 v0, 0x7fff, v0
-; GFX7GLISEL-NEXT:    v_subrev_i32_e32 v1, vcc, 1, v0
+; GFX7GLISEL-NEXT:    v_add_i32_e32 v1, vcc, -1, v0
 ; GFX7GLISEL-NEXT:    v_and_b32_e32 v1, 0xffff, v1
 ; GFX7GLISEL-NEXT:    v_mov_b32_e32 v2, 0x3ff
 ; GFX7GLISEL-NEXT:    v_cmp_lt_u32_e32 vcc, v1, v2
@@ -2643,7 +2643,7 @@ define i1 @not_iszero_or_nan_f_daz(half %x) #0 {
 ; GFX7GLISEL-NEXT:    v_mov_b32_e32 v2, 0x7c00
 ; GFX7GLISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v1, v2
 ; GFX7GLISEL-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
-; GFX7GLISEL-NEXT:    v_subrev_i32_e32 v0, vcc, 0x400, v0
+; GFX7GLISEL-NEXT:    v_add_i32_e32 v0, vcc, 0xfffffc00, v0
 ; GFX7GLISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
 ; GFX7GLISEL-NEXT:    v_mov_b32_e32 v1, 0x7800
 ; GFX7GLISEL-NEXT:    v_cmp_lt_u32_e32 vcc, v0, v1
@@ -2702,7 +2702,7 @@ define i1 @not_iszero_or_nan_f_maybe_daz(half %x) #1 {
 ; GFX7GLISEL:       ; %bb.0: ; %entry
 ; GFX7GLISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX7GLISEL-NEXT:    v_and_b32_e32 v0, 0x7fff, v0
-; GFX7GLISEL-NEXT:    v_subrev_i32_e32 v1, vcc, 1, v0
+; GFX7GLISEL-NEXT:    v_add_i32_e32 v1, vcc, -1, v0
 ; GFX7GLISEL-NEXT:    v_and_b32_e32 v1, 0xffff, v1
 ; GFX7GLISEL-NEXT:    v_mov_b32_e32 v2, 0x3ff
 ; GFX7GLISEL-NEXT:    v_cmp_lt_u32_e32 vcc, v1, v2
@@ -2710,7 +2710,7 @@ define i1 @not_iszero_or_nan_f_maybe_daz(half %x) #1 {
 ; GFX7GLISEL-NEXT:    v_mov_b32_e32 v2, 0x7c00
 ; GFX7GLISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v1, v2
 ; GFX7GLISEL-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
-; GFX7GLISEL-NEXT:    v_subrev_i32_e32 v0, vcc, 0x400, v0
+; GFX7GLISEL-NEXT:    v_add_i32_e32 v0, vcc, 0xfffffc00, v0
 ; GFX7GLISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
 ; GFX7GLISEL-NEXT:    v_mov_b32_e32 v1, 0x7800
 ; GFX7GLISEL-NEXT:    v_cmp_lt_u32_e32 vcc, v0, v1
@@ -2902,7 +2902,7 @@ define i1 @not_iszero_or_qnan_f16(half %x) {
 ; GFX7GLISEL:       ; %bb.0: ; %entry
 ; GFX7GLISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX7GLISEL-NEXT:    v_and_b32_e32 v0, 0x7fff, v0
-; GFX7GLISEL-NEXT:    v_subrev_i32_e32 v1, vcc, 1, v0
+; GFX7GLISEL-NEXT:    v_add_i32_e32 v1, vcc, -1, v0
 ; GFX7GLISEL-NEXT:    v_and_b32_e32 v1, 0xffff, v1
 ; GFX7GLISEL-NEXT:    v_mov_b32_e32 v2, 0x3ff
 ; GFX7GLISEL-NEXT:    v_cmp_lt_u32_e32 vcc, v1, v2
@@ -2914,7 +2914,7 @@ define i1 @not_iszero_or_qnan_f16(half %x) {
 ; GFX7GLISEL-NEXT:    v_mov_b32_e32 v2, 0x1ff
 ; GFX7GLISEL-NEXT:    v_cmp_lt_u32_e32 vcc, v1, v2
 ; GFX7GLISEL-NEXT:    s_or_b64 s[4:5], s[4:5], vcc
-; GFX7GLISEL-NEXT:    v_subrev_i32_e32 v0, vcc, 0x400, v0
+; GFX7GLISEL-NEXT:    v_add_i32_e32 v0, vcc, 0xfffffc00, v0
 ; GFX7GLISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
 ; GFX7GLISEL-NEXT:    v_mov_b32_e32 v1, 0x7800
 ; GFX7GLISEL-NEXT:    v_cmp_lt_u32_e32 vcc, v0, v1
@@ -2983,7 +2983,7 @@ define i1 @not_iszero_or_snan_f16(half %x) {
 ; GFX7GLISEL:       ; %bb.0: ; %entry
 ; GFX7GLISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX7GLISEL-NEXT:    v_and_b32_e32 v0, 0x7fff, v0
-; GFX7GLISEL-NEXT:    v_subrev_i32_e32 v1, vcc, 1, v0
+; GFX7GLISEL-NEXT:    v_add_i32_e32 v1, vcc, -1, v0
 ; GFX7GLISEL-NEXT:    v_and_b32_e32 v1, 0xffff, v1
 ; GFX7GLISEL-NEXT:    v_mov_b32_e32 v2, 0x3ff
 ; GFX7GLISEL-NEXT:    v_cmp_lt_u32_e32 vcc, v1, v2
@@ -2994,7 +2994,7 @@ define i1 @not_iszero_or_snan_f16(half %x) {
 ; GFX7GLISEL-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
 ; GFX7GLISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v2
 ; GFX7GLISEL-NEXT:    s_or_b64 s[4:5], s[4:5], vcc
-; GFX7GLISEL-NEXT:    v_subrev_i32_e32 v0, vcc, 0x400, v0
+; GFX7GLISEL-NEXT:    v_add_i32_e32 v0, vcc, 0xfffffc00, v0
 ; GFX7GLISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
 ; GFX7GLISEL-NEXT:    v_mov_b32_e32 v1, 0x7800
 ; GFX7GLISEL-NEXT:    v_cmp_lt_u32_e32 vcc, v0, v1
diff --git a/llvm/test/CodeGen/AMDGPU/shrink-add-sub-constant.ll b/llvm/test/CodeGen/AMDGPU/shrink-add-sub-constant.ll
index 6ac04d8bc42bba..b3c06756a89872 100644
--- a/llvm/test/CodeGen/AMDGPU/shrink-add-sub-constant.ll
+++ b/llvm/test/CodeGen/AMDGPU/shrink-add-sub-constant.ll
@@ -41,7 +41,7 @@ define amdgpu_kernel void @v_test_i32_x_sub_64(ptr addrspace(1) %out, ptr addrsp
 ; SI-GISEL-NEXT:    s_mov_b64 s[4:5], s[2:3]
 ; SI-GISEL-NEXT:    buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
 ; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
-; SI-GISEL-NEXT:    v_subrev_i32_e32 v2, vcc, 64, v2
+; SI-GISEL-NEXT:    v_add_i32_e32 v2, vcc, 0xffffffc0, v2
 ; SI-GISEL-NEXT:    s_mov_b64 s[2:3], s[6:7]
 ; SI-GISEL-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
 ; SI-GISEL-NEXT:    s_endpgm
@@ -78,44 +78,79 @@ define amdgpu_kernel void @v_test_i32_x_sub_64(ptr addrspace(1) %out, ptr addrsp
 ; VI-GISEL-NEXT:    v_add_u32_e32 v0, vcc, v0, v2
 ; VI-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
 ; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
-; VI-GISEL-NEXT:    v_subrev_u32_e32 v2, vcc, 64, v3
+; VI-GISEL-NEXT:    v_add_u32_e32 v2, vcc, 0xffffffc0, v3
 ; VI-GISEL-NEXT:    flat_store_dword v[0:1], v2
 ; VI-GISEL-NEXT:    s_endpgm
 ;
-; GFX9-LABEL: v_test_i32_x_sub_64:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_load_dwordx4 s[4:7], s[2:3], 0x24
-; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    global_load_dword v1, v0, s[6:7]
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    v_subrev_u32_e32 v1, 64, v1
-; GFX9-NEXT:    global_store_dword v0, v1, s[4:5]
-; GFX9-NEXT:    s_endpgm
+; GFX9-SDAG-LABEL: v_test_i32_x_sub_64:
+; GFX9-SDAG:       ; %bb.0:
+; GFX9-SDAG-NEXT:    s_load_dwordx4 s[4:7], s[2:3], 0x24
+; GFX9-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-SDAG-NEXT:    global_load_dword v1, v0, s[6:7]
+; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-SDAG-NEXT:    v_subrev_u32_e32 v1, 64, v1
+; GFX9-SDAG-NEXT:    global_store_dword v0, v1, s[4:5]
+; GFX9-SDAG-NEXT:    s_endpgm
 ;
-; GFX10-LABEL: v_test_i32_x_sub_64:
-; GFX10:       ; %bb.0:
-; GFX10-NEXT:    s_load_dwordx4 s[4:7], s[2:3], 0x24
-; GFX10-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX10-NEXT:    global_load_dword v1, v0, s[6:7]
-; GFX10-NEXT:    s_waitcnt vmcnt(0)
-; GFX10-NEXT:    v_subrev_nc_u32_e32 v1, 64, v1
-; GFX10-NEXT:    global_store_dword v0, v1, s[4:5]
-; GFX10-NEXT:    s_endpgm
+; GFX9-GISEL-LABEL: v_test_i32_x_sub_64:
+; GFX9-GISEL:       ; %bb.0:
+; GFX9-GISEL-NEXT:    s_load_dwordx4 s[4:7], s[2:3], 0x24
+; GFX9-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-GISEL-NEXT:    global_load_dword v1, v0, s[6:7]
+; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GISEL-NEXT:    v_add_u32_e32 v1, 0xffffffc0, v1
+; GFX9-GISEL-NEXT:    global_store_dword v0, v1, s[4:5]
+; GFX9-GISEL-NEXT:    s_endpgm
 ;
-; GFX11-LABEL: v_test_i32_x_sub_64:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_load_b128 s[0:3], s[2:3], 0x24
-; GFX11-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11-NEXT:    global_load_b32 v1, v0, s[2:3]
-; GFX11-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-NEXT:    v_subrev_nc_u32_e32 v1, 64, v1
-; GFX11-NEXT:    global_store_b32 v0, v1, s[0:1]
-; GFX11-NEXT:    s_endpgm
+; GFX10-SDAG-LABEL: v_test_i32_x_sub_64:
+; GFX10-SDAG:       ; %bb.0:
+; GFX10-SDAG-NEXT:    s_load_dwordx4 s[4:7], s[2:3], 0x24
+; GFX10-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX10-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX10-SDAG-NEXT:    global_load_dword v1, v0, s[6:7]
+; GFX10-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-SDAG-NEXT:    v_subrev_nc_u32_e32 v1, 64, v1
+; GFX10-SDAG-NEXT:    global_store_dword v0, v1, s[4:5]
+; GFX10-SDAG-NEXT:    s_endpgm
+;
+; GFX10-GISEL-LABEL: v_test_i32_x_sub_64:
+; GFX10-GISEL:       ; %bb.0:
+; GFX10-GISEL-NEXT:    s_load_dwordx4 s[4:7], s[2:3], 0x24
+; GFX10-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX10-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX10-GISEL-NEXT:    global_load_dword v1, v0, s[6:7]
+; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-GISEL-NEXT:    v_add_nc_u32_e32 v1, 0xffffffc0, v1
+; GFX10-GISEL-NEXT:    global_store_dword v0, v1, s[4:5]
+; GFX10-GISEL-NEXT:    s_endpgm
+;
+; GFX11-SDAG-LABEL: v_test_i32_x_sub_64:
+; GFX11-SDAG:       ; %bb.0:
+; GFX11-SDAG-NEXT:    s_load_b128 s[0:3], s[2:3], 0x24
+; GFX11-SDAG-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
+; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX11-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-SDAG-NEXT:    global_load_b32 v1, v0, s[2:3]
+; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-SDAG-NEXT:    v_subrev_nc_u32_e32 v1, 64, v1
+; GFX11-SDAG-NEXT:    global_store_b32 v0, v1, s[0:1]
+; GFX11-SDAG-NEXT:    s_endpgm
+;
+; GFX11-GISEL-LABEL: v_test_i32_x_sub_64:
+; GFX11-GISEL:       ; %bb.0:
+; GFX11-GISEL-NEXT:    s_load_b128 s[0:3], s[2:3], 0x24
+; GFX11-GISEL-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
+; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-GISEL-NEXT:    global_load_b32 v1, v0, s[2:3]
+; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GISEL-NEXT:    v_add_nc_u32_e32 v1, 0xffffffc0, v1
+; GFX11-GISEL-NEXT:    global_store_b32 v0, v1, s[0:1]
+; GFX11-GISEL-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
   %gep = getelementptr inbounds i32, ptr addrspace(1) %in, i64 %tid.ext
@@ -163,8 +198,8 @@ define amdgpu_kernel void @v_test_i32_x_sub_64_multi_use(ptr addrspace(1) %out,
 ; SI-GISEL-NEXT:    buffer_load_dword v3, v[0:1], s[4:7], 0 addr64 glc
 ; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
 ; SI-GISEL-NEXT:    s_mov_b64 s[2:3], s[6:7]
-; SI-GISEL-NEXT:    v_subrev_i32_e32 v2, vcc, 64, v2
-; SI-GISEL-NEXT:    v_subrev_i32_e32 v3, vcc, 64, v3
+; SI-GISEL-NEXT:    v_add_i32_e32 v2, vcc, 0xffffffc0, v2
+; SI-GISEL-NEXT:    v_add_i32_e32 v3, vcc, 0xffffffc0, v3
 ; SI-GISEL-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
 ; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
 ; SI-GISEL-NEXT:    buffer_store_dword v3, v[0:1], s[0:3], 0 addr64
@@ -211,66 +246,119 @@ define amdgpu_kernel void @v_test_i32_x_sub_64_multi_use(ptr addrspace(1) %out,
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v1, s1
 ; VI-GISEL-NEXT:    v_add_u32_e32 v0, vcc, v0, v2
 ; VI-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
-; VI-GISEL-NEXT:    v_subrev_u32_e32 v2, vcc, 64, v3
-; VI-GISEL-NEXT:    v_subrev_u32_e32 v3, vcc, 64, v4
+; VI-GISEL-NEXT:    v_add_u32_e32 v2, vcc, 0xffffffc0, v3
+; VI-GISEL-NEXT:    v_add_u32_e32 v3, vcc, 0xffffffc0, v4
 ; VI-GISEL-NEXT:    flat_store_dword v[0:1], v2
 ; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
 ; VI-GISEL-NEXT:    flat_store_dword v[0:1], v3
 ; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
 ; VI-GISEL-NEXT:    s_endpgm
 ;
-; GFX9-LABEL: v_test_i32_x_sub_64_multi_use:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_load_dwordx4 s[4:7], s[2:3], 0x24
-; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    global_load_dword v1, v0, s[6:7] glc
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    global_load_dword v2, v0, s[6:7] glc
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    v_subrev_u32_e32 v1, 64, v1
-; GFX9-NEXT:    v_subrev_u32_e32 v2, 64, v2
-; GFX9-NEXT:    global_store_dword v0, v1, s[4:5]
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    global_store_dword v0, v2, s[4:5]
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    s_endpgm
+; GFX9-SDAG-LABEL: v_test_i32_x_sub_64_multi_use:
+; GFX9-SDAG:       ; %bb.0:
+; GFX9-SDAG-NEXT:    s_load_dwordx4 s[4:7], s[2:3], 0x24
+; GFX9-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-SDAG-NEXT:    global_load_dword v1, v0, s[6:7] glc
+; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-SDAG-NEXT:    global_load_dword v2, v0, s[6:7] glc
+; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-SDAG-NEXT:    v_subrev_u32_e32 v1, 64, v1
+; GFX9-SDAG-NEXT:    v_subrev_u32_e32 v2, 64, v2
+; GFX9-SDAG-NEXT:    global_store_dword v0, v1, s[4:5]
+; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-SDAG-NEXT:    global_store_dword v0, v2, s[4:5]
+; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-SDAG-NEXT:    s_endpgm
 ;
-; GFX10-LABEL: v_test_i32_x_sub_64_multi_use:
-; GFX10:       ; %bb.0:
-; GFX10-NEXT:    s_load_dwordx4 s[4:7], s[2:3], 0x24
-; GFX10-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX10-NEXT:    global_load_dword v1, v0, s[6:7] glc dlc
-; GFX10-NEXT:    s_waitcnt vmcnt(0)
-; GFX10-NEXT:    global_load_dword v2, v0, s[6:7] glc dlc
-; GFX10-NEXT:    s_waitcnt vmcnt(0)
-; GFX10-NEXT:    v_subrev_nc_u32_e32 v1, 64, v1
-; GFX10-NEXT:    v_subrev_nc_u32_e32 v2, 64, v2
-; GFX10-NEXT:    global_store_dword v0, v1, s[4:5]
-; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT:    global_store_dword v0, v2, s[4:5]
-; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT:    s_endpgm
+; GFX9-GISEL-LABEL: v_test_i32_x_sub_64_multi_use:
+; GFX9-GISEL:       ; %bb.0:
+; GFX9-GISEL-NEXT:    s_load_dwordx4 s[4:7], s[2:3], 0x24
+; GFX9-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-GISEL-NEXT:    global_load_dword v1, v0, s[6:7] glc
+; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GISEL-NEXT:    global_load_dword v2, v0, s[6:7] glc
+; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GISEL-NEXT:    v_add_u32_e32 v1, 0xffffffc0, v1
+; GFX9-GISEL-NEXT:    v_add_u32_e32 v2, 0xffffffc0, v2
+; GFX9-GISEL-NEXT:    global_store_dword v0, v1, s[4:5]
+; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GISEL-NEXT:    global_store_dword v0, v2, s[4:5]
+; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GISEL-NEXT:    s_endpgm
 ;
-; GFX11-LABEL: v_test_i32_x_sub_64_multi_use:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_load_b128 s[0:3], s[2:3], 0x24
-; GFX11-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11-NEXT:    global_load_b32 v1, v0, s[2:3] glc dlc
-; GFX11-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-NEXT:    global_load_b32 v2, v0, s[2:3] glc dlc
-; GFX11-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-NEXT:    v_subrev_nc_u32_e32 v1, 64, v1
-; GFX11-NEXT:    v_subrev_nc_u32_e32 v2, 64, v2
-; GFX11-NEXT:    global_store_b32 v0, v1, s[0:1] dlc
-; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
-; GFX11-NEXT:    global_store_b32 v0, v2, s[0:1] dlc
-; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
-; GFX11-NEXT:    s_endpgm
+; GFX10-SDAG-LABEL: v_test_i32_x_sub_64_multi_use:
+; GFX10-SDAG:       ; %bb.0:
+; GFX10-SDAG-NEXT:    s_load_dwordx4 s[4:7], s[2:3], 0x24
+; GFX10-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX10-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX10-SDAG-NEXT:    global_load_dword v1, v0, s[6:7] glc dlc
+; GFX10-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-SDAG-NEXT:    global_load_dword v2, v0, s[6:7] glc dlc
+; GFX10-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-SDAG-NEXT:    v_subrev_nc_u32_e32 v1, 64, v1
+; GFX10-SDAG-NEXT:    v_subrev_nc_u32_e32 v2, 64, v2
+; GFX10-SDAG-NEXT:    global_store_dword v0, v1, s[4:5]
+; GFX10-SDAG-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX10-SDAG-NEXT:    global_store_dword v0, v2, s[4:5]
+; GFX10-SDAG-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX10-SDAG-NEXT:    s_endpgm
+;
+; GFX10-GISEL-LABEL: v_test_i32_x_sub_64_multi_use:
+; GFX10-GISEL:       ; %bb.0:
+; GFX10-GISEL-NEXT:    s_load_dwordx4 s[4:7], s[2:3], 0x24
+; GFX10-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX10-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX10-GISEL-NEXT:    global_load_dword v1, v0, s[6:7] glc dlc
+; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-GISEL-NEXT:    global_load_dword v2, v0, s[6:7] glc dlc
+; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-GISEL-NEXT:    v_add_nc_u32_e32 v1, 0xffffffc0, v1
+; GFX10-GISEL-NEXT:    v_add_nc_u32_e32 v2, 0xffffffc0, v2
+; GFX10-GISEL-NEXT:    global_store_dword v0, v1, s[4:5]
+; GFX10-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX10-GISEL-NEXT:    global_store_dword v0, v2, s[4:5]
+; GFX10-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX10-GISEL-NEXT:    s_endpgm
+;
+; GFX11-SDAG-LABEL: v_test_i32_x_sub_64_multi_use:
+; GFX11-SDAG:       ; %bb.0:
+; GFX11-SDAG-NEXT:    s_load_b128 s[0:3], s[2:3], 0x24
+; GFX11-SDAG-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
+; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX11-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-SDAG-NEXT:    global_load_b32 v1, v0, s[2:3] glc dlc
+; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-SDAG-NEXT:    global_load_b32 v2, v0, s[2:3] glc dlc
+; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-SDAG-NEXT:    v_subrev_nc_u32_e32 v1, 64, v1
+; GFX11-SDAG-NEXT:    v_subrev_nc_u32_e32 v2, 64, v2
+; GFX11-SDAG-NEXT:    global_store_b32 v0, v1, s[0:1] dlc
+; GFX11-SDAG-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX11-SDAG-NEXT:    global_store_b32 v0, v2, s[0:1] dlc
+; GFX11-SDAG-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX11-SDAG-NEXT:    s_endpgm
+;
+; GFX11-GISEL-LABEL: v_test_i32_x_sub_64_multi_use:
+; GFX11-GISEL:       ; %bb.0:
+; GFX11-GISEL-NEXT:    s_load_b128 s[0:3], s[2:3], 0x24
+; GFX11-GISEL-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
+; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-GISEL-NEXT:    global_load_b32 v1, v0, s[2:3] glc dlc
+; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GISEL-NEXT:    global_load_b32 v2, v0, s[2:3] glc dlc
+; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GISEL-NEXT:    v_add_nc_u32_e32 v1, 0xffffffc0, v1
+; GFX11-GISEL-NEXT:    v_add_nc_u32_e32 v2, 0xffffffc0, v2
+; GFX11-GISEL-NEXT:    global_store_b32 v0, v1, s[0:1] dlc
+; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX11-GISEL-NEXT:    global_store_b32 v0, v2, s[0:1] dlc
+; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX11-GISEL-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
   %gep = getelementptr inbounds i32, ptr addrspace(1) %in, i64 %tid.ext
@@ -425,7 +513,7 @@ define amdgpu_kernel void @v_test_i32_x_sub_65(ptr addrspace(1) %out, ptr addrsp
 ; SI-GISEL-NEXT:    s_mov_b64 s[4:5], s[2:3]
 ; SI-GISEL-NEXT:    buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
 ; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
-; SI-GISEL-NEXT:    v_subrev_i32_e32 v2, vcc, 0x41, v2
+; SI-GISEL-NEXT:    v_add_i32_e32 v2, vcc, 0xffffffbf, v2
 ; SI-GISEL-NEXT:    s_mov_b64 s[2:3], s[6:7]
 ; SI-GISEL-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
 ; SI-GISEL-NEXT:    s_endpgm
@@ -462,79 +550,44 @@ define amdgpu_kernel void @v_test_i32_x_sub_65(ptr addrspace(1) %out, ptr addrsp
 ; VI-GISEL-NEXT:    v_add_u32_e32 v0, vcc, v0, v2
 ; VI-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
 ; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
-; VI-GISEL-NEXT:    v_subrev_u32_e32 v2, vcc, 0x41, v3
+; VI-GISEL-NEXT:    v_add_u32_e32 v2, vcc, 0xffffffbf, v3
 ; VI-GISEL-NEXT:    flat_store_dword v[0:1], v2
 ; VI-GISEL-NEXT:    s_endpgm
 ;
-; GFX9-SDAG-LABEL: v_test_i32_x_sub_65:
-; GFX9-SDAG:       ; %bb.0:
-; GFX9-SDAG-NEXT:    s_load_dwordx4 s[4:7], s[2:3], 0x24
-; GFX9-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; GFX9-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-SDAG-NEXT:    global_load_dword v1, v0, s[6:7]
-; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-SDAG-NEXT:    v_add_u32_e32 v1, 0xffffffbf, v1
-; GFX9-SDAG-NEXT:    global_store_dword v0, v1, s[4:5]
-; GFX9-SDAG-NEXT:    s_endpgm
-;
-; GFX9-GISEL-LABEL: v_test_i32_x_sub_65:
-; GFX9-GISEL:       ; %bb.0:
-; GFX9-GISEL-NEXT:    s_load_dwordx4 s[4:7], s[2:3], 0x24
-; GFX9-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; GFX9-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-GISEL-NEXT:    global_load_dword v1, v0, s[6:7]
-; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-GISEL-NEXT:    v_subrev_u32_e32 v1, 0x41, v1
-; GFX9-GISEL-NEXT:    global_store_dword v0, v1, s[4:5]
-; GFX9-GISEL-NEXT:    s_endpgm
-;
-; GFX10-SDAG-LABEL: v_test_i32_x_sub_65:
-; GFX10-SDAG:       ; %bb.0:
-; GFX10-SDAG-NEXT:    s_load_dwordx4 s[4:7], s[2:3], 0x24
-; GFX10-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; GFX10-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX10-SDAG-NEXT:    global_load_dword v1, v0, s[6:7]
-; GFX10-SDAG-NEXT:    s_waitcnt vmcnt(0)
-; GFX10-SDAG-NEXT:    v_add_nc_u32_e32 v1, 0xffffffbf, v1
-; GFX10-SDAG-NEXT:    global_store_dword v0, v1, s[4:5]
-; GFX10-SDAG-NEXT:    s_endpgm
-;
-; GFX10-GISEL-LABEL: v_test_i32_x_sub_65:
-; GFX10-GISEL:       ; %bb.0:
-; GFX10-GISEL-NEXT:    s_load_dwordx4 s[4:7], s[2:3], 0x24
-; GFX10-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; GFX10-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX10-GISEL-NEXT:    global_load_dword v1, v0, s[6:7]
-; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0)
-; GFX10-GISEL-NEXT:    v_subrev_nc_u32_e32 v1, 0x41, v1
-; GFX10-GISEL-NEXT:    global_store_dword v0, v1, s[4:5]
-; GFX10-GISEL-NEXT:    s_endpgm
+; GFX9-LABEL: v_test_i32_x_sub_65:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_load_dwordx4 s[4:7], s[2:3], 0x24
+; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-NEXT:    global_load_dword v1, v0, s[6:7]
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    v_add_u32_e32 v1, 0xffffffbf, v1
+; GFX9-NEXT:    global_store_dword v0, v1, s[4:5]
+; GFX9-NEXT:    s_endpgm
 ;
-; GFX11-SDAG-LABEL: v_test_i32_x_sub_65:
-; GFX11-SDAG:       ; %bb.0:
-; GFX11-SDAG-NEXT:    s_load_b128 s[0:3], s[2:3], 0x24
-; GFX11-SDAG-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
-; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; GFX11-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11-SDAG-NEXT:    global_load_b32 v1, v0, s[2:3]
-; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-SDAG-NEXT:    v_add_nc_u32_e32 v1, 0xffffffbf, v1
-; GFX11-SDAG-NEXT:    global_store_b32 v0, v1, s[0:1]
-; GFX11-SDAG-NEXT:    s_endpgm
+; GFX10-LABEL: v_test_i32_x_sub_65:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_load_dwordx4 s[4:7], s[2:3], 0x24
+; GFX10-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX10-NEXT:    global_load_dword v1, v0, s[6:7]
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    v_add_nc_u32_e32 v1, 0xffffffbf, v1
+; GFX10-NEXT:    global_store_dword v0, v1, s[4:5]
+; GFX10-NEXT:    s_endpgm
 ;
-; GFX11-GISEL-LABEL: v_test_i32_x_sub_65:
-; GFX11-GISEL:       ; %bb.0:
-; GFX11-GISEL-NEXT:    s_load_b128 s[0:3], s[2:3], 0x24
-; GFX11-GISEL-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
-; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11-GISEL-NEXT:    global_load_b32 v1, v0, s[2:3]
-; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-GISEL-NEXT:    v_subrev_nc_u32_e32 v1, 0x41, v1
-; GFX11-GISEL-NEXT:    global_store_b32 v0, v1, s[0:1]
-; GFX11-GISEL-NEXT:    s_endpgm
+; GFX11-LABEL: v_test_i32_x_sub_65:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    s_load_b128 s[0:3], s[2:3], 0x24
+; GFX11-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-NEXT:    global_load_b32 v1, v0, s[2:3]
+; GFX11-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-NEXT:    v_add_nc_u32_e32 v1, 0xffffffbf, v1
+; GFX11-NEXT:    global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
   %gep = getelementptr inbounds i32, ptr addrspace(1) %in, i64 %tid.ext
@@ -686,7 +739,7 @@ define amdgpu_kernel void @v_test_i32_x_sub_neg16(ptr addrspace(1) %out, ptr add
 ; SI-GISEL-NEXT:    s_mov_b64 s[4:5], s[2:3]
 ; SI-GISEL-NEXT:    buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
 ; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
-; SI-GISEL-NEXT:    v_subrev_i32_e32 v2, vcc, -16, v2
+; SI-GISEL-NEXT:    v_add_i32_e32 v2, vcc, 16, v2
 ; SI-GISEL-NEXT:    s_mov_b64 s[2:3], s[6:7]
 ; SI-GISEL-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
 ; SI-GISEL-NEXT:    s_endpgm
@@ -723,79 +776,44 @@ define amdgpu_kernel void @v_test_i32_x_sub_neg16(ptr addrspace(1) %out, ptr add
 ; VI-GISEL-NEXT:    v_add_u32_e32 v0, vcc, v0, v2
 ; VI-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
 ; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
-; VI-GISEL-NEXT:    v_subrev_u32_e32 v2, vcc, -16, v3
+; VI-GISEL-NEXT:    v_add_u32_e32 v2, vcc, 16, v3
 ; VI-GISEL-NEXT:    flat_store_dword v[0:1], v2
 ; VI-GISEL-NEXT:    s_endpgm
 ;
-; GFX9-SDAG-LABEL: v_test_i32_x_sub_neg16:
-; GFX9-SDAG:       ; %bb.0:
-; GFX9-SDAG-NEXT:    s_load_dwordx4 s[4:7], s[2:3], 0x24
-; GFX9-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; GFX9-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-SDAG-NEXT:    global_load_dword v1, v0, s[6:7]
-; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-SDAG-NEXT:    v_add_u32_e32 v1, 16, v1
-; GFX9-SDAG-NEXT:    global_store_dword v0, v1, s[4:5]
-; GFX9-SDAG-NEXT:    s_endpgm
-;
-; GFX9-GISEL-LABEL: v_test_i32_x_sub_neg16:
-; GFX9-GISEL:       ; %bb.0:
-; GFX9-GISEL-NEXT:    s_load_dwordx4 s[4:7], s[2:3], 0x24
-; GFX9-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; GFX9-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-GISEL-NEXT:    global_load_dword v1, v0, s[6:7]
-; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-GISEL-NEXT:    v_subrev_u32_e32 v1, -16, v1
-; GFX9-GISEL-NEXT:    global_store_dword v0, v1, s[4:5]
-; GFX9-GISEL-NEXT:    s_endpgm
-;
-; GFX10-SDAG-LABEL: v_test_i32_x_sub_neg16:
-; GFX10-SDAG:       ; %bb.0:
-; GFX10-SDAG-NEXT:    s_load_dwordx4 s[4:7], s[2:3], 0x24
-; GFX10-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; GFX10-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX10-SDAG-NEXT:    global_load_dword v1, v0, s[6:7]
-; GFX10-SDAG-NEXT:    s_waitcnt vmcnt(0)
-; GFX10-SDAG-NEXT:    v_add_nc_u32_e32 v1, 16, v1
-; GFX10-SDAG-NEXT:    global_store_dword v0, v1, s[4:5]
-; GFX10-SDAG-NEXT:    s_endpgm
-;
-; GFX10-GISEL-LABEL: v_test_i32_x_sub_neg16:
-; GFX10-GISEL:       ; %bb.0:
-; GFX10-GISEL-NEXT:    s_load_dwordx4 s[4:7], s[2:3], 0x24
-; GFX10-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; GFX10-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX10-GISEL-NEXT:    global_load_dword v1, v0, s[6:7]
-; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0)
-; GFX10-GISEL-NEXT:    v_subrev_nc_u32_e32 v1, -16, v1
-; GFX10-GISEL-NEXT:    global_store_dword v0, v1, s[4:5]
-; GFX10-GISEL-NEXT:    s_endpgm
+; GFX9-LABEL: v_test_i32_x_sub_neg16:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_load_dwordx4 s[4:7], s[2:3], 0x24
+; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-NEXT:    global_load_dword v1, v0, s[6:7]
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    v_add_u32_e32 v1, 16, v1
+; GFX9-NEXT:    global_store_dword v0, v1, s[4:5]
+; GFX9-NEXT:    s_endpgm
 ;
-; GFX11-SDAG-LABEL: v_test_i32_x_sub_neg16:
-; GFX11-SDAG:       ; %bb.0:
-; GFX11-SDAG-NEXT:    s_load_b128 s[0:3], s[2:3], 0x24
-; GFX11-SDAG-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
-; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; GFX11-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11-SDAG-NEXT:    global_load_b32 v1, v0, s[2:3]
-; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-SDAG-NEXT:    v_add_nc_u32_e32 v1, 16, v1
-; GFX11-SDAG-NEXT:    global_store_b32 v0, v1, s[0:1]
-; GFX11-SDAG-NEXT:    s_endpgm
+; GFX10-LABEL: v_test_i32_x_sub_neg16:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_load_dwordx4 s[4:7], s[2:3], 0x24
+; GFX10-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX10-NEXT:    global_load_dword v1, v0, s[6:7]
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    v_add_nc_u32_e32 v1, 16, v1
+; GFX10-NEXT:    global_store_dword v0, v1, s[4:5]
+; GFX10-NEXT:    s_endpgm
 ;
-; GFX11-GISEL-LABEL: v_test_i32_x_sub_neg16:
-; GFX11-GISEL:       ; %bb.0:
-; GFX11-GISEL-NEXT:    s_load_b128 s[0:3], s[2:3], 0x24
-; GFX11-GISEL-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
-; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11-GISEL-NEXT:    global_load_b32 v1, v0, s[2:3]
-; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-GISEL-NEXT:    v_subrev_nc_u32_e32 v1, -16, v1
-; GFX11-GISEL-NEXT:    global_store_b32 v0, v1, s[0:1]
-; GFX11-GISEL-NEXT:    s_endpgm
+; GFX11-LABEL: v_test_i32_x_sub_neg16:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    s_load_b128 s[0:3], s[2:3], 0x24
+; GFX11-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-NEXT:    global_load_b32 v1, v0, s[2:3]
+; GFX11-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-NEXT:    v_add_nc_u32_e32 v1, 16, v1
+; GFX11-NEXT:    global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
   %gep = getelementptr inbounds i32, ptr addrspace(1) %in, i64 %tid.ext
@@ -947,7 +965,7 @@ define amdgpu_kernel void @v_test_i32_x_sub_neg17(ptr addrspace(1) %out, ptr add
 ; SI-GISEL-NEXT:    s_mov_b64 s[4:5], s[2:3]
 ; SI-GISEL-NEXT:    buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
 ; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
-; SI-GISEL-NEXT:    v_subrev_i32_e32 v2, vcc, 0xffffffef, v2
+; SI-GISEL-NEXT:    v_add_i32_e32 v2, vcc, 17, v2
 ; SI-GISEL-NEXT:    s_mov_b64 s[2:3], s[6:7]
 ; SI-GISEL-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
 ; SI-GISEL-NEXT:    s_endpgm
@@ -983,80 +1001,45 @@ define amdgpu_kernel void @v_test_i32_x_sub_neg17(ptr addrspace(1) %out, ptr add
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v1, s1
 ; VI-GISEL-NEXT:    v_add_u32_e32 v0, vcc, v0, v2
 ; VI-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
-; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
-; VI-GISEL-NEXT:    v_subrev_u32_e32 v2, vcc, 0xffffffef, v3
-; VI-GISEL-NEXT:    flat_store_dword v[0:1], v2
-; VI-GISEL-NEXT:    s_endpgm
-;
-; GFX9-SDAG-LABEL: v_test_i32_x_sub_neg17:
-; GFX9-SDAG:       ; %bb.0:
-; GFX9-SDAG-NEXT:    s_load_dwordx4 s[4:7], s[2:3], 0x24
-; GFX9-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; GFX9-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-SDAG-NEXT:    global_load_dword v1, v0, s[6:7]
-; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-SDAG-NEXT:    v_add_u32_e32 v1, 17, v1
-; GFX9-SDAG-NEXT:    global_store_dword v0, v1, s[4:5]
-; GFX9-SDAG-NEXT:    s_endpgm
-;
-; GFX9-GISEL-LABEL: v_test_i32_x_sub_neg17:
-; GFX9-GISEL:       ; %bb.0:
-; GFX9-GISEL-NEXT:    s_load_dwordx4 s[4:7], s[2:3], 0x24
-; GFX9-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; GFX9-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-GISEL-NEXT:    global_load_dword v1, v0, s[6:7]
-; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-GISEL-NEXT:    v_subrev_u32_e32 v1, 0xffffffef, v1
-; GFX9-GISEL-NEXT:    global_store_dword v0, v1, s[4:5]
-; GFX9-GISEL-NEXT:    s_endpgm
-;
-; GFX10-SDAG-LABEL: v_test_i32_x_sub_neg17:
-; GFX10-SDAG:       ; %bb.0:
-; GFX10-SDAG-NEXT:    s_load_dwordx4 s[4:7], s[2:3], 0x24
-; GFX10-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; GFX10-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX10-SDAG-NEXT:    global_load_dword v1, v0, s[6:7]
-; GFX10-SDAG-NEXT:    s_waitcnt vmcnt(0)
-; GFX10-SDAG-NEXT:    v_add_nc_u32_e32 v1, 17, v1
-; GFX10-SDAG-NEXT:    global_store_dword v0, v1, s[4:5]
-; GFX10-SDAG-NEXT:    s_endpgm
-;
-; GFX10-GISEL-LABEL: v_test_i32_x_sub_neg17:
-; GFX10-GISEL:       ; %bb.0:
-; GFX10-GISEL-NEXT:    s_load_dwordx4 s[4:7], s[2:3], 0x24
-; GFX10-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; GFX10-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX10-GISEL-NEXT:    global_load_dword v1, v0, s[6:7]
-; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0)
-; GFX10-GISEL-NEXT:    v_subrev_nc_u32_e32 v1, 0xffffffef, v1
-; GFX10-GISEL-NEXT:    global_store_dword v0, v1, s[4:5]
-; GFX10-GISEL-NEXT:    s_endpgm
+; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; VI-GISEL-NEXT:    v_add_u32_e32 v2, vcc, 17, v3
+; VI-GISEL-NEXT:    flat_store_dword v[0:1], v2
+; VI-GISEL-NEXT:    s_endpgm
 ;
-; GFX11-SDAG-LABEL: v_test_i32_x_sub_neg17:
-; GFX11-SDAG:       ; %bb.0:
-; GFX11-SDAG-NEXT:    s_load_b128 s[0:3], s[2:3], 0x24
-; GFX11-SDAG-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
-; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; GFX11-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11-SDAG-NEXT:    global_load_b32 v1, v0, s[2:3]
-; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-SDAG-NEXT:    v_add_nc_u32_e32 v1, 17, v1
-; GFX11-SDAG-NEXT:    global_store_b32 v0, v1, s[0:1]
-; GFX11-SDAG-NEXT:    s_endpgm
+; GFX9-LABEL: v_test_i32_x_sub_neg17:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_load_dwordx4 s[4:7], s[2:3], 0x24
+; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-NEXT:    global_load_dword v1, v0, s[6:7]
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    v_add_u32_e32 v1, 17, v1
+; GFX9-NEXT:    global_store_dword v0, v1, s[4:5]
+; GFX9-NEXT:    s_endpgm
 ;
-; GFX11-GISEL-LABEL: v_test_i32_x_sub_neg17:
-; GFX11-GISEL:       ; %bb.0:
-; GFX11-GISEL-NEXT:    s_load_b128 s[0:3], s[2:3], 0x24
-; GFX11-GISEL-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
-; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11-GISEL-NEXT:    global_load_b32 v1, v0, s[2:3]
-; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-GISEL-NEXT:    v_subrev_nc_u32_e32 v1, 0xffffffef, v1
-; GFX11-GISEL-NEXT:    global_store_b32 v0, v1, s[0:1]
-; GFX11-GISEL-NEXT:    s_endpgm
+; GFX10-LABEL: v_test_i32_x_sub_neg17:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_load_dwordx4 s[4:7], s[2:3], 0x24
+; GFX10-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX10-NEXT:    global_load_dword v1, v0, s[6:7]
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    v_add_nc_u32_e32 v1, 17, v1
+; GFX10-NEXT:    global_store_dword v0, v1, s[4:5]
+; GFX10-NEXT:    s_endpgm
+;
+; GFX11-LABEL: v_test_i32_x_sub_neg17:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    s_load_b128 s[0:3], s[2:3], 0x24
+; GFX11-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-NEXT:    global_load_b32 v1, v0, s[2:3]
+; GFX11-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-NEXT:    v_add_nc_u32_e32 v1, 17, v1
+; GFX11-NEXT:    global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
   %gep = getelementptr inbounds i32, ptr addrspace(1) %in, i64 %tid.ext
@@ -1263,7 +1246,7 @@ define amdgpu_kernel void @v_test_i16_x_sub_64(ptr addrspace(1) %out, ptr addrsp
 ; SI-GISEL-NEXT:    s_mov_b64 s[4:5], s[2:3]
 ; SI-GISEL-NEXT:    buffer_load_ushort v2, v[0:1], s[4:7], 0 addr64
 ; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
-; SI-GISEL-NEXT:    v_subrev_i32_e32 v2, vcc, 64, v2
+; SI-GISEL-NEXT:    v_add_i32_e32 v2, vcc, 0xffffffc0, v2
 ; SI-GISEL-NEXT:    s_mov_b64 s[2:3], s[6:7]
 ; SI-GISEL-NEXT:    buffer_store_short v2, v[0:1], s[0:3], 0 addr64
 ; SI-GISEL-NEXT:    s_endpgm
@@ -1300,44 +1283,79 @@ define amdgpu_kernel void @v_test_i16_x_sub_64(ptr addrspace(1) %out, ptr addrsp
 ; VI-GISEL-NEXT:    v_add_u32_e32 v0, vcc, v0, v2
 ; VI-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
 ; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
-; VI-GISEL-NEXT:    v_subrev_u16_e32 v2, 64, v3
+; VI-GISEL-NEXT:    v_add_u16_e32 v2, 0xffc0, v3
 ; VI-GISEL-NEXT:    flat_store_short v[0:1], v2
 ; VI-GISEL-NEXT:    s_endpgm
 ;
-; GFX9-LABEL: v_test_i16_x_sub_64:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_load_dwordx4 s[4:7], s[2:3], 0x24
-; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
-; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    global_load_ushort v1, v0, s[6:7]
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    v_subrev_u16_e32 v1, 64, v1
-; GFX9-NEXT:    global_store_short v0, v1, s[4:5]
-; GFX9-NEXT:    s_endpgm
+; GFX9-SDAG-LABEL: v_test_i16_x_sub_64:
+; GFX9-SDAG:       ; %bb.0:
+; GFX9-SDAG-NEXT:    s_load_dwordx4 s[4:7], s[2:3], 0x24
+; GFX9-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
+; GFX9-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-SDAG-NEXT:    global_load_ushort v1, v0, s[6:7]
+; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-SDAG-NEXT:    v_subrev_u16_e32 v1, 64, v1
+; GFX9-SDAG-NEXT:    global_store_short v0, v1, s[4:5]
+; GFX9-SDAG-NEXT:    s_endpgm
 ;
-; GFX10-LABEL: v_test_i16_x_sub_64:
-; GFX10:       ; %bb.0:
-; GFX10-NEXT:    s_load_dwordx4 s[4:7], s[2:3], 0x24
-; GFX10-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
-; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX10-NEXT:    global_load_ushort v1, v0, s[6:7]
-; GFX10-NEXT:    s_waitcnt vmcnt(0)
-; GFX10-NEXT:    v_sub_nc_u16 v1, v1, 64
-; GFX10-NEXT:    global_store_short v0, v1, s[4:5]
-; GFX10-NEXT:    s_endpgm
+; GFX9-GISEL-LABEL: v_test_i16_x_sub_64:
+; GFX9-GISEL:       ; %bb.0:
+; GFX9-GISEL-NEXT:    s_load_dwordx4 s[4:7], s[2:3], 0x24
+; GFX9-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
+; GFX9-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-GISEL-NEXT:    global_load_ushort v1, v0, s[6:7]
+; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GISEL-NEXT:    v_add_u16_e32 v1, 0xffc0, v1
+; GFX9-GISEL-NEXT:    global_store_short v0, v1, s[4:5]
+; GFX9-GISEL-NEXT:    s_endpgm
 ;
-; GFX11-LABEL: v_test_i16_x_sub_64:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_load_b128 s[0:3], s[2:3], 0x24
-; GFX11-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
-; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11-NEXT:    global_load_u16 v1, v0, s[2:3]
-; GFX11-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-NEXT:    v_sub_nc_u16 v1, v1, 64
-; GFX11-NEXT:    global_store_b16 v0, v1, s[0:1]
-; GFX11-NEXT:    s_endpgm
+; GFX10-SDAG-LABEL: v_test_i16_x_sub_64:
+; GFX10-SDAG:       ; %bb.0:
+; GFX10-SDAG-NEXT:    s_load_dwordx4 s[4:7], s[2:3], 0x24
+; GFX10-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
+; GFX10-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX10-SDAG-NEXT:    global_load_ushort v1, v0, s[6:7]
+; GFX10-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-SDAG-NEXT:    v_sub_nc_u16 v1, v1, 64
+; GFX10-SDAG-NEXT:    global_store_short v0, v1, s[4:5]
+; GFX10-SDAG-NEXT:    s_endpgm
+;
+; GFX10-GISEL-LABEL: v_test_i16_x_sub_64:
+; GFX10-GISEL:       ; %bb.0:
+; GFX10-GISEL-NEXT:    s_load_dwordx4 s[4:7], s[2:3], 0x24
+; GFX10-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
+; GFX10-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX10-GISEL-NEXT:    global_load_ushort v1, v0, s[6:7]
+; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-GISEL-NEXT:    v_add_nc_u16 v1, v1, 0xffc0
+; GFX10-GISEL-NEXT:    global_store_short v0, v1, s[4:5]
+; GFX10-GISEL-NEXT:    s_endpgm
+;
+; GFX11-SDAG-LABEL: v_test_i16_x_sub_64:
+; GFX11-SDAG:       ; %bb.0:
+; GFX11-SDAG-NEXT:    s_load_b128 s[0:3], s[2:3], 0x24
+; GFX11-SDAG-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
+; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
+; GFX11-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-SDAG-NEXT:    global_load_u16 v1, v0, s[2:3]
+; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-SDAG-NEXT:    v_sub_nc_u16 v1, v1, 64
+; GFX11-SDAG-NEXT:    global_store_b16 v0, v1, s[0:1]
+; GFX11-SDAG-NEXT:    s_endpgm
+;
+; GFX11-GISEL-LABEL: v_test_i16_x_sub_64:
+; GFX11-GISEL:       ; %bb.0:
+; GFX11-GISEL-NEXT:    s_load_b128 s[0:3], s[2:3], 0x24
+; GFX11-GISEL-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
+; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
+; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-GISEL-NEXT:    global_load_u16 v1, v0, s[2:3]
+; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GISEL-NEXT:    v_add_nc_u16 v1, v1, 0xffc0
+; GFX11-GISEL-NEXT:    global_store_b16 v0, v1, s[0:1]
+; GFX11-GISEL-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
   %gep = getelementptr inbounds i16, ptr addrspace(1) %in, i64 %tid.ext
@@ -1379,7 +1397,7 @@ define amdgpu_kernel void @v_test_i16_x_sub_64_zext_to_i32(ptr addrspace(1) %out
 ; SI-GISEL-NEXT:    buffer_load_ushort v3, v[1:2], s[4:7], 0 addr64
 ; SI-GISEL-NEXT:    v_lshlrev_b32_e32 v1, 2, v0
 ; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
-; SI-GISEL-NEXT:    v_subrev_i32_e32 v0, vcc, 64, v3
+; SI-GISEL-NEXT:    v_add_i32_e32 v0, vcc, 0xffffffc0, v3
 ; SI-GISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
 ; SI-GISEL-NEXT:    s_mov_b64 s[2:3], s[6:7]
 ; SI-GISEL-NEXT:    buffer_store_dword v0, v[1:2], s[0:3], 0 addr64
@@ -1419,50 +1437,91 @@ define amdgpu_kernel void @v_test_i16_x_sub_64_zext_to_i32(ptr addrspace(1) %out
 ; VI-GISEL-NEXT:    v_add_u32_e32 v0, vcc, v0, v3
 ; VI-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
 ; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
-; VI-GISEL-NEXT:    v_subrev_u16_e32 v2, 64, v2
+; VI-GISEL-NEXT:    v_add_u16_e32 v2, 0xffc0, v2
 ; VI-GISEL-NEXT:    flat_store_dword v[0:1], v2
 ; VI-GISEL-NEXT:    s_endpgm
 ;
-; GFX9-LABEL: v_test_i16_x_sub_64_zext_to_i32:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_load_dwordx4 s[4:7], s[2:3], 0x24
-; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 1, v0
-; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    global_load_ushort v1, v1, s[6:7]
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    v_subrev_u16_e32 v1, 64, v1
-; GFX9-NEXT:    global_store_dword v0, v1, s[4:5]
-; GFX9-NEXT:    s_endpgm
+; GFX9-SDAG-LABEL: v_test_i16_x_sub_64_zext_to_i32:
+; GFX9-SDAG:       ; %bb.0:
+; GFX9-SDAG-NEXT:    s_load_dwordx4 s[4:7], s[2:3], 0x24
+; GFX9-SDAG-NEXT:    v_lshlrev_b32_e32 v1, 1, v0
+; GFX9-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-SDAG-NEXT:    global_load_ushort v1, v1, s[6:7]
+; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-SDAG-NEXT:    v_subrev_u16_e32 v1, 64, v1
+; GFX9-SDAG-NEXT:    global_store_dword v0, v1, s[4:5]
+; GFX9-SDAG-NEXT:    s_endpgm
 ;
-; GFX10-LABEL: v_test_i16_x_sub_64_zext_to_i32:
-; GFX10:       ; %bb.0:
-; GFX10-NEXT:    s_load_dwordx4 s[4:7], s[2:3], 0x24
-; GFX10-NEXT:    v_lshlrev_b32_e32 v1, 1, v0
-; GFX10-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX10-NEXT:    global_load_ushort v1, v1, s[6:7]
-; GFX10-NEXT:    s_waitcnt vmcnt(0)
-; GFX10-NEXT:    v_sub_nc_u16 v1, v1, 64
-; GFX10-NEXT:    v_and_b32_e32 v1, 0xffff, v1
-; GFX10-NEXT:    global_store_dword v0, v1, s[4:5]
-; GFX10-NEXT:    s_endpgm
+; GFX9-GISEL-LABEL: v_test_i16_x_sub_64_zext_to_i32:
+; GFX9-GISEL:       ; %bb.0:
+; GFX9-GISEL-NEXT:    s_load_dwordx4 s[4:7], s[2:3], 0x24
+; GFX9-GISEL-NEXT:    v_lshlrev_b32_e32 v1, 1, v0
+; GFX9-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-GISEL-NEXT:    global_load_ushort v1, v1, s[6:7]
+; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GISEL-NEXT:    v_add_u16_e32 v1, 0xffc0, v1
+; GFX9-GISEL-NEXT:    global_store_dword v0, v1, s[4:5]
+; GFX9-GISEL-NEXT:    s_endpgm
 ;
-; GFX11-LABEL: v_test_i16_x_sub_64_zext_to_i32:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_load_b128 s[0:3], s[2:3], 0x24
-; GFX11-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT:    v_lshlrev_b32_e32 v1, 1, v0
-; GFX11-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11-NEXT:    global_load_u16 v1, v1, s[2:3]
-; GFX11-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-NEXT:    v_sub_nc_u16 v1, v1, 64
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT:    v_and_b32_e32 v1, 0xffff, v1
-; GFX11-NEXT:    global_store_b32 v0, v1, s[0:1]
-; GFX11-NEXT:    s_endpgm
+; GFX10-SDAG-LABEL: v_test_i16_x_sub_64_zext_to_i32:
+; GFX10-SDAG:       ; %bb.0:
+; GFX10-SDAG-NEXT:    s_load_dwordx4 s[4:7], s[2:3], 0x24
+; GFX10-SDAG-NEXT:    v_lshlrev_b32_e32 v1, 1, v0
+; GFX10-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX10-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX10-SDAG-NEXT:    global_load_ushort v1, v1, s[6:7]
+; GFX10-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-SDAG-NEXT:    v_sub_nc_u16 v1, v1, 64
+; GFX10-SDAG-NEXT:    v_and_b32_e32 v1, 0xffff, v1
+; GFX10-SDAG-NEXT:    global_store_dword v0, v1, s[4:5]
+; GFX10-SDAG-NEXT:    s_endpgm
+;
+; GFX10-GISEL-LABEL: v_test_i16_x_sub_64_zext_to_i32:
+; GFX10-GISEL:       ; %bb.0:
+; GFX10-GISEL-NEXT:    s_load_dwordx4 s[4:7], s[2:3], 0x24
+; GFX10-GISEL-NEXT:    v_lshlrev_b32_e32 v1, 1, v0
+; GFX10-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX10-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX10-GISEL-NEXT:    global_load_ushort v1, v1, s[6:7]
+; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-GISEL-NEXT:    v_add_nc_u16 v1, v1, 0xffc0
+; GFX10-GISEL-NEXT:    v_and_b32_e32 v1, 0xffff, v1
+; GFX10-GISEL-NEXT:    global_store_dword v0, v1, s[4:5]
+; GFX10-GISEL-NEXT:    s_endpgm
+;
+; GFX11-SDAG-LABEL: v_test_i16_x_sub_64_zext_to_i32:
+; GFX11-SDAG:       ; %bb.0:
+; GFX11-SDAG-NEXT:    s_load_b128 s[0:3], s[2:3], 0x24
+; GFX11-SDAG-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
+; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SDAG-NEXT:    v_lshlrev_b32_e32 v1, 1, v0
+; GFX11-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX11-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-SDAG-NEXT:    global_load_u16 v1, v1, s[2:3]
+; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-SDAG-NEXT:    v_sub_nc_u16 v1, v1, 64
+; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SDAG-NEXT:    v_and_b32_e32 v1, 0xffff, v1
+; GFX11-SDAG-NEXT:    global_store_b32 v0, v1, s[0:1]
+; GFX11-SDAG-NEXT:    s_endpgm
+;
+; GFX11-GISEL-LABEL: v_test_i16_x_sub_64_zext_to_i32:
+; GFX11-GISEL:       ; %bb.0:
+; GFX11-GISEL-NEXT:    s_load_b128 s[0:3], s[2:3], 0x24
+; GFX11-GISEL-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
+; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GISEL-NEXT:    v_lshlrev_b32_e32 v1, 1, v0
+; GFX11-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-GISEL-NEXT:    global_load_u16 v1, v1, s[2:3]
+; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GISEL-NEXT:    v_add_nc_u16 v1, v1, 0xffc0
+; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GISEL-NEXT:    v_and_b32_e32 v1, 0xffff, v1
+; GFX11-GISEL-NEXT:    global_store_b32 v0, v1, s[0:1]
+; GFX11-GISEL-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
   %gep = getelementptr inbounds i16, ptr addrspace(1) %in, i64 %tid.ext
@@ -1511,8 +1570,8 @@ define amdgpu_kernel void @v_test_i16_x_sub_64_multi_use(ptr addrspace(1) %out,
 ; SI-GISEL-NEXT:    buffer_load_ushort v3, v[0:1], s[4:7], 0 addr64 glc
 ; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
 ; SI-GISEL-NEXT:    s_mov_b64 s[2:3], s[6:7]
-; SI-GISEL-NEXT:    v_subrev_i32_e32 v2, vcc, 64, v2
-; SI-GISEL-NEXT:    v_subrev_i32_e32 v3, vcc, 64, v3
+; SI-GISEL-NEXT:    v_add_i32_e32 v2, vcc, 0xffffffc0, v2
+; SI-GISEL-NEXT:    v_add_i32_e32 v3, vcc, 0xffffffc0, v3
 ; SI-GISEL-NEXT:    buffer_store_short v2, v[0:1], s[0:3], 0 addr64
 ; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
 ; SI-GISEL-NEXT:    buffer_store_short v3, v[0:1], s[0:3], 0 addr64
@@ -1559,66 +1618,119 @@ define amdgpu_kernel void @v_test_i16_x_sub_64_multi_use(ptr addrspace(1) %out,
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v1, s1
 ; VI-GISEL-NEXT:    v_add_u32_e32 v0, vcc, v0, v2
 ; VI-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
-; VI-GISEL-NEXT:    v_subrev_u16_e32 v2, 64, v3
-; VI-GISEL-NEXT:    v_subrev_u16_e32 v3, 64, v4
+; VI-GISEL-NEXT:    v_add_u16_e32 v2, 0xffc0, v3
+; VI-GISEL-NEXT:    v_add_u16_e32 v3, 0xffc0, v4
 ; VI-GISEL-NEXT:    flat_store_short v[0:1], v2
 ; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
 ; VI-GISEL-NEXT:    flat_store_short v[0:1], v3
 ; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
 ; VI-GISEL-NEXT:    s_endpgm
 ;
-; GFX9-LABEL: v_test_i16_x_sub_64_multi_use:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_load_dwordx4 s[4:7], s[2:3], 0x24
-; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
-; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    global_load_ushort v1, v0, s[6:7] glc
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    global_load_ushort v2, v0, s[6:7] glc
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    v_subrev_u16_e32 v1, 64, v1
-; GFX9-NEXT:    v_subrev_u16_e32 v2, 64, v2
-; GFX9-NEXT:    global_store_short v0, v1, s[4:5]
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    global_store_short v0, v2, s[4:5]
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    s_endpgm
+; GFX9-SDAG-LABEL: v_test_i16_x_sub_64_multi_use:
+; GFX9-SDAG:       ; %bb.0:
+; GFX9-SDAG-NEXT:    s_load_dwordx4 s[4:7], s[2:3], 0x24
+; GFX9-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
+; GFX9-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-SDAG-NEXT:    global_load_ushort v1, v0, s[6:7] glc
+; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-SDAG-NEXT:    global_load_ushort v2, v0, s[6:7] glc
+; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-SDAG-NEXT:    v_subrev_u16_e32 v1, 64, v1
+; GFX9-SDAG-NEXT:    v_subrev_u16_e32 v2, 64, v2
+; GFX9-SDAG-NEXT:    global_store_short v0, v1, s[4:5]
+; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-SDAG-NEXT:    global_store_short v0, v2, s[4:5]
+; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-SDAG-NEXT:    s_endpgm
 ;
-; GFX10-LABEL: v_test_i16_x_sub_64_multi_use:
-; GFX10:       ; %bb.0:
-; GFX10-NEXT:    s_load_dwordx4 s[4:7], s[2:3], 0x24
-; GFX10-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
-; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX10-NEXT:    global_load_ushort v1, v0, s[6:7] glc dlc
-; GFX10-NEXT:    s_waitcnt vmcnt(0)
-; GFX10-NEXT:    global_load_ushort v2, v0, s[6:7] glc dlc
-; GFX10-NEXT:    s_waitcnt vmcnt(0)
-; GFX10-NEXT:    v_sub_nc_u16 v1, v1, 64
-; GFX10-NEXT:    v_sub_nc_u16 v2, v2, 64
-; GFX10-NEXT:    global_store_short v0, v1, s[4:5]
-; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT:    global_store_short v0, v2, s[4:5]
-; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT:    s_endpgm
+; GFX9-GISEL-LABEL: v_test_i16_x_sub_64_multi_use:
+; GFX9-GISEL:       ; %bb.0:
+; GFX9-GISEL-NEXT:    s_load_dwordx4 s[4:7], s[2:3], 0x24
+; GFX9-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
+; GFX9-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-GISEL-NEXT:    global_load_ushort v1, v0, s[6:7] glc
+; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GISEL-NEXT:    global_load_ushort v2, v0, s[6:7] glc
+; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GISEL-NEXT:    v_add_u16_e32 v1, 0xffc0, v1
+; GFX9-GISEL-NEXT:    v_add_u16_e32 v2, 0xffc0, v2
+; GFX9-GISEL-NEXT:    global_store_short v0, v1, s[4:5]
+; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GISEL-NEXT:    global_store_short v0, v2, s[4:5]
+; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-GISEL-NEXT:    s_endpgm
 ;
-; GFX11-LABEL: v_test_i16_x_sub_64_multi_use:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_load_b128 s[0:3], s[2:3], 0x24
-; GFX11-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
-; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11-NEXT:    global_load_u16 v1, v0, s[2:3] glc dlc
-; GFX11-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-NEXT:    global_load_u16 v2, v0, s[2:3] glc dlc
-; GFX11-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-NEXT:    v_sub_nc_u16 v1, v1, 64
-; GFX11-NEXT:    v_sub_nc_u16 v2, v2, 64
-; GFX11-NEXT:    global_store_b16 v0, v1, s[0:1] dlc
-; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
-; GFX11-NEXT:    global_store_b16 v0, v2, s[0:1] dlc
-; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
-; GFX11-NEXT:    s_endpgm
+; GFX10-SDAG-LABEL: v_test_i16_x_sub_64_multi_use:
+; GFX10-SDAG:       ; %bb.0:
+; GFX10-SDAG-NEXT:    s_load_dwordx4 s[4:7], s[2:3], 0x24
+; GFX10-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
+; GFX10-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX10-SDAG-NEXT:    global_load_ushort v1, v0, s[6:7] glc dlc
+; GFX10-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-SDAG-NEXT:    global_load_ushort v2, v0, s[6:7] glc dlc
+; GFX10-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-SDAG-NEXT:    v_sub_nc_u16 v1, v1, 64
+; GFX10-SDAG-NEXT:    v_sub_nc_u16 v2, v2, 64
+; GFX10-SDAG-NEXT:    global_store_short v0, v1, s[4:5]
+; GFX10-SDAG-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX10-SDAG-NEXT:    global_store_short v0, v2, s[4:5]
+; GFX10-SDAG-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX10-SDAG-NEXT:    s_endpgm
+;
+; GFX10-GISEL-LABEL: v_test_i16_x_sub_64_multi_use:
+; GFX10-GISEL:       ; %bb.0:
+; GFX10-GISEL-NEXT:    s_load_dwordx4 s[4:7], s[2:3], 0x24
+; GFX10-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
+; GFX10-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX10-GISEL-NEXT:    global_load_ushort v1, v0, s[6:7] glc dlc
+; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-GISEL-NEXT:    global_load_ushort v2, v0, s[6:7] glc dlc
+; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-GISEL-NEXT:    v_add_nc_u16 v1, v1, 0xffc0
+; GFX10-GISEL-NEXT:    v_add_nc_u16 v2, v2, 0xffc0
+; GFX10-GISEL-NEXT:    global_store_short v0, v1, s[4:5]
+; GFX10-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX10-GISEL-NEXT:    global_store_short v0, v2, s[4:5]
+; GFX10-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX10-GISEL-NEXT:    s_endpgm
+;
+; GFX11-SDAG-LABEL: v_test_i16_x_sub_64_multi_use:
+; GFX11-SDAG:       ; %bb.0:
+; GFX11-SDAG-NEXT:    s_load_b128 s[0:3], s[2:3], 0x24
+; GFX11-SDAG-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
+; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
+; GFX11-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-SDAG-NEXT:    global_load_u16 v1, v0, s[2:3] glc dlc
+; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-SDAG-NEXT:    global_load_u16 v2, v0, s[2:3] glc dlc
+; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-SDAG-NEXT:    v_sub_nc_u16 v1, v1, 64
+; GFX11-SDAG-NEXT:    v_sub_nc_u16 v2, v2, 64
+; GFX11-SDAG-NEXT:    global_store_b16 v0, v1, s[0:1] dlc
+; GFX11-SDAG-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX11-SDAG-NEXT:    global_store_b16 v0, v2, s[0:1] dlc
+; GFX11-SDAG-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX11-SDAG-NEXT:    s_endpgm
+;
+; GFX11-GISEL-LABEL: v_test_i16_x_sub_64_multi_use:
+; GFX11-GISEL:       ; %bb.0:
+; GFX11-GISEL-NEXT:    s_load_b128 s[0:3], s[2:3], 0x24
+; GFX11-GISEL-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
+; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
+; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-GISEL-NEXT:    global_load_u16 v1, v0, s[2:3] glc dlc
+; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GISEL-NEXT:    global_load_u16 v2, v0, s[2:3] glc dlc
+; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GISEL-NEXT:    v_add_nc_u16 v1, v1, 0xffc0
+; GFX11-GISEL-NEXT:    v_add_nc_u16 v2, v2, 0xffc0
+; GFX11-GISEL-NEXT:    global_store_b16 v0, v1, s[0:1] dlc
+; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX11-GISEL-NEXT:    global_store_b16 v0, v2, s[0:1] dlc
+; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX11-GISEL-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
   %gep = getelementptr inbounds i16, ptr addrspace(1) %in, i64 %tid.ext
@@ -1664,8 +1776,8 @@ define amdgpu_kernel void @v_test_v2i16_x_sub_64_64(ptr addrspace(1) %out, ptr a
 ; SI-GISEL-NEXT:    buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
 ; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
 ; SI-GISEL-NEXT:    v_lshrrev_b32_e32 v3, 16, v2
-; SI-GISEL-NEXT:    v_subrev_i32_e32 v2, vcc, 64, v2
-; SI-GISEL-NEXT:    v_subrev_i32_e32 v3, vcc, 64, v3
+; SI-GISEL-NEXT:    v_add_i32_e32 v2, vcc, 0xffffffc0, v2
+; SI-GISEL-NEXT:    v_add_i32_e32 v3, vcc, 0xffffffc0, v3
 ; SI-GISEL-NEXT:    v_and_b32_e32 v2, 0xffff, v2
 ; SI-GISEL-NEXT:    v_and_b32_e32 v3, 0xffff, v3
 ; SI-GISEL-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
@@ -1698,7 +1810,7 @@ define amdgpu_kernel void @v_test_v2i16_x_sub_64_64(ptr addrspace(1) %out, ptr a
 ; VI-GISEL:       ; %bb.0:
 ; VI-GISEL-NEXT:    s_load_dwordx4 s[0:3], s[2:3], 0x24
 ; VI-GISEL-NEXT:    v_lshlrev_b32_e32 v2, 2, v0
-; VI-GISEL-NEXT:    v_mov_b32_e32 v4, 64
+; VI-GISEL-NEXT:    v_not_b32_e32 v4, 63
 ; VI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v0, s2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v1, s3
@@ -1710,8 +1822,8 @@ define amdgpu_kernel void @v_test_v2i16_x_sub_64_64(ptr addrspace(1) %out, ptr a
 ; VI-GISEL-NEXT:    v_add_u32_e32 v0, vcc, v0, v2
 ; VI-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
 ; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
-; VI-GISEL-NEXT:    v_subrev_u16_e32 v2, 64, v3
-; VI-GISEL-NEXT:    v_sub_u16_sdwa v3, v3, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; VI-GISEL-NEXT:    v_add_u16_e32 v2, 0xffc0, v3
+; VI-GISEL-NEXT:    v_add_u16_sdwa v3, v3, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
 ; VI-GISEL-NEXT:    v_or_b32_e32 v2, v2, v3
 ; VI-GISEL-NEXT:    flat_store_dword v[0:1], v2
 ; VI-GISEL-NEXT:    s_endpgm
@@ -1792,8 +1904,8 @@ define amdgpu_kernel void @v_test_v2i16_x_sub_7_64(ptr addrspace(1) %out, ptr ad
 ; SI-GISEL-NEXT:    buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
 ; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
 ; SI-GISEL-NEXT:    v_lshrrev_b32_e32 v3, 16, v2
-; SI-GISEL-NEXT:    v_subrev_i32_e32 v2, vcc, 7, v2
-; SI-GISEL-NEXT:    v_subrev_i32_e32 v3, vcc, 64, v3
+; SI-GISEL-NEXT:    v_add_i32_e32 v2, vcc, -7, v2
+; SI-GISEL-NEXT:    v_add_i32_e32 v3, vcc, 0xffffffc0, v3
 ; SI-GISEL-NEXT:    v_and_b32_e32 v2, 0xffff, v2
 ; SI-GISEL-NEXT:    v_and_b32_e32 v3, 0xffff, v3
 ; SI-GISEL-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
@@ -1826,7 +1938,7 @@ define amdgpu_kernel void @v_test_v2i16_x_sub_7_64(ptr addrspace(1) %out, ptr ad
 ; VI-GISEL:       ; %bb.0:
 ; VI-GISEL-NEXT:    s_load_dwordx4 s[0:3], s[2:3], 0x24
 ; VI-GISEL-NEXT:    v_lshlrev_b32_e32 v2, 2, v0
-; VI-GISEL-NEXT:    v_mov_b32_e32 v4, 64
+; VI-GISEL-NEXT:    v_not_b32_e32 v4, 63
 ; VI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v0, s2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v1, s3
@@ -1838,8 +1950,8 @@ define amdgpu_kernel void @v_test_v2i16_x_sub_7_64(ptr addrspace(1) %out, ptr ad
 ; VI-GISEL-NEXT:    v_add_u32_e32 v0, vcc, v0, v2
 ; VI-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
 ; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
-; VI-GISEL-NEXT:    v_subrev_u16_e32 v2, 7, v3
-; VI-GISEL-NEXT:    v_sub_u16_sdwa v3, v3, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; VI-GISEL-NEXT:    v_add_u16_e32 v2, -7, v3
+; VI-GISEL-NEXT:    v_add_u16_sdwa v3, v3, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
 ; VI-GISEL-NEXT:    v_or_b32_e32 v2, v2, v3
 ; VI-GISEL-NEXT:    flat_store_dword v[0:1], v2
 ; VI-GISEL-NEXT:    s_endpgm
@@ -1933,8 +2045,8 @@ define amdgpu_kernel void @v_test_v2i16_x_sub_64_123(ptr addrspace(1) %out, ptr
 ; SI-GISEL-NEXT:    buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
 ; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
 ; SI-GISEL-NEXT:    v_lshrrev_b32_e32 v3, 16, v2
-; SI-GISEL-NEXT:    v_subrev_i32_e32 v2, vcc, 64, v2
-; SI-GISEL-NEXT:    v_subrev_i32_e32 v3, vcc, 0x7b, v3
+; SI-GISEL-NEXT:    v_add_i32_e32 v2, vcc, 0xffffffc0, v2
+; SI-GISEL-NEXT:    v_add_i32_e32 v3, vcc, 0xffffff85, v3
 ; SI-GISEL-NEXT:    v_and_b32_e32 v2, 0xffff, v2
 ; SI-GISEL-NEXT:    v_and_b32_e32 v3, 0xffff, v3
 ; SI-GISEL-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
@@ -1967,7 +2079,7 @@ define amdgpu_kernel void @v_test_v2i16_x_sub_64_123(ptr addrspace(1) %out, ptr
 ; VI-GISEL:       ; %bb.0:
 ; VI-GISEL-NEXT:    s_load_dwordx4 s[0:3], s[2:3], 0x24
 ; VI-GISEL-NEXT:    v_lshlrev_b32_e32 v2, 2, v0
-; VI-GISEL-NEXT:    v_mov_b32_e32 v4, 0x7b
+; VI-GISEL-NEXT:    v_mov_b32_e32 v4, 0xffffff85
 ; VI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v0, s2
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v1, s3
@@ -1979,8 +2091,8 @@ define amdgpu_kernel void @v_test_v2i16_x_sub_64_123(ptr addrspace(1) %out, ptr
 ; VI-GISEL-NEXT:    v_add_u32_e32 v0, vcc, v0, v2
 ; VI-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
 ; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
-; VI-GISEL-NEXT:    v_subrev_u16_e32 v2, 64, v3
-; VI-GISEL-NEXT:    v_sub_u16_sdwa v3, v3, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; VI-GISEL-NEXT:    v_add_u16_e32 v2, 0xffc0, v3
+; VI-GISEL-NEXT:    v_add_u16_sdwa v3, v3, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
 ; VI-GISEL-NEXT:    v_or_b32_e32 v2, v2, v3
 ; VI-GISEL-NEXT:    flat_store_dword v[0:1], v2
 ; VI-GISEL-NEXT:    s_endpgm
@@ -2074,7 +2186,7 @@ define amdgpu_kernel void @v_test_v2i16_x_sub_7_0(ptr addrspace(1) %out, ptr add
 ; SI-GISEL-NEXT:    buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
 ; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
 ; SI-GISEL-NEXT:    v_lshrrev_b32_e32 v3, 16, v2
-; SI-GISEL-NEXT:    v_subrev_i32_e32 v2, vcc, 7, v2
+; SI-GISEL-NEXT:    v_add_i32_e32 v2, vcc, -7, v2
 ; SI-GISEL-NEXT:    v_and_b32_e32 v2, 0xffff, v2
 ; SI-GISEL-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
 ; SI-GISEL-NEXT:    v_or_b32_e32 v2, v2, v3
@@ -2117,7 +2229,7 @@ define amdgpu_kernel void @v_test_v2i16_x_sub_7_0(ptr addrspace(1) %out, ptr add
 ; VI-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
 ; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
 ; VI-GISEL-NEXT:    v_lshrrev_b32_e32 v2, 16, v3
-; VI-GISEL-NEXT:    v_subrev_u16_e32 v3, 7, v3
+; VI-GISEL-NEXT:    v_add_u16_e32 v3, -7, v3
 ; VI-GISEL-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
 ; VI-GISEL-NEXT:    v_or_b32_e32 v2, v3, v2
 ; VI-GISEL-NEXT:    flat_store_dword v[0:1], v2
@@ -2197,7 +2309,7 @@ define amdgpu_kernel void @v_test_v2i16_x_sub_0_16(ptr addrspace(1) %out, ptr ad
 ; SI-GISEL-NEXT:    buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
 ; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
 ; SI-GISEL-NEXT:    v_lshrrev_b32_e32 v3, 16, v2
-; SI-GISEL-NEXT:    v_subrev_i32_e32 v3, vcc, 16, v3
+; SI-GISEL-NEXT:    v_add_i32_e32 v3, vcc, -16, v3
 ; SI-GISEL-NEXT:    v_and_b32_e32 v2, 0xffff, v2
 ; SI-GISEL-NEXT:    v_and_b32_e32 v3, 0xffff, v3
 ; SI-GISEL-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
@@ -2237,11 +2349,11 @@ define amdgpu_kernel void @v_test_v2i16_x_sub_0_16(ptr addrspace(1) %out, ptr ad
 ; VI-GISEL-NEXT:    flat_load_dword v3, v[0:1]
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v0, s0
 ; VI-GISEL-NEXT:    v_add_u32_e32 v0, vcc, v0, v2
-; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 16
+; VI-GISEL-NEXT:    v_mov_b32_e32 v2, -16
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v1, s1
 ; VI-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
 ; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
-; VI-GISEL-NEXT:    v_sub_u16_sdwa v2, v3, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; VI-GISEL-NEXT:    v_add_u16_sdwa v2, v3, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
 ; VI-GISEL-NEXT:    v_or_b32_sdwa v2, v3, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
 ; VI-GISEL-NEXT:    flat_store_dword v[0:1], v2
 ; VI-GISEL-NEXT:    s_endpgm
@@ -2319,7 +2431,7 @@ define amdgpu_kernel void @v_test_v2i16_x_sub_0_1_0(ptr addrspace(1) %out, ptr a
 ; SI-GISEL-NEXT:    buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
 ; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
 ; SI-GISEL-NEXT:    v_lshrrev_b32_e32 v3, 16, v2
-; SI-GISEL-NEXT:    v_subrev_i32_e32 v3, vcc, 0xffffc400, v3
+; SI-GISEL-NEXT:    v_add_i32_e32 v3, vcc, 0x3c00, v3
 ; SI-GISEL-NEXT:    v_and_b32_e32 v2, 0xffff, v2
 ; SI-GISEL-NEXT:    v_and_b32_e32 v3, 0xffff, v3
 ; SI-GISEL-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
@@ -2359,11 +2471,11 @@ define amdgpu_kernel void @v_test_v2i16_x_sub_0_1_0(ptr addrspace(1) %out, ptr a
 ; VI-GISEL-NEXT:    flat_load_dword v3, v[0:1]
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v0, s0
 ; VI-GISEL-NEXT:    v_add_u32_e32 v0, vcc, v0, v2
-; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xffffc400
+; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x3c00
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v1, s1
 ; VI-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
 ; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
-; VI-GISEL-NEXT:    v_sub_u16_sdwa v2, v3, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; VI-GISEL-NEXT:    v_add_u16_sdwa v2, v3, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
 ; VI-GISEL-NEXT:    v_or_b32_sdwa v2, v3, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
 ; VI-GISEL-NEXT:    flat_store_dword v[0:1], v2
 ; VI-GISEL-NEXT:    s_endpgm
@@ -2454,7 +2566,7 @@ define amdgpu_kernel void @v_test_v2i16_x_sub_0_neg1_0(ptr addrspace(1) %out, pt
 ; SI-GISEL-NEXT:    buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
 ; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
 ; SI-GISEL-NEXT:    v_lshrrev_b32_e32 v3, 16, v2
-; SI-GISEL-NEXT:    v_subrev_i32_e32 v3, vcc, 0x4400, v3
+; SI-GISEL-NEXT:    v_add_i32_e32 v3, vcc, 0xffffbc00, v3
 ; SI-GISEL-NEXT:    v_and_b32_e32 v2, 0xffff, v2
 ; SI-GISEL-NEXT:    v_and_b32_e32 v3, 0xffff, v3
 ; SI-GISEL-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
@@ -2494,11 +2606,11 @@ define amdgpu_kernel void @v_test_v2i16_x_sub_0_neg1_0(ptr addrspace(1) %out, pt
 ; VI-GISEL-NEXT:    flat_load_dword v3, v[0:1]
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v0, s0
 ; VI-GISEL-NEXT:    v_add_u32_e32 v0, vcc, v0, v2
-; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x4400
+; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0xffffbc00
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v1, s1
 ; VI-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
 ; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
-; VI-GISEL-NEXT:    v_sub_u16_sdwa v2, v3, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; VI-GISEL-NEXT:    v_add_u16_sdwa v2, v3, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
 ; VI-GISEL-NEXT:    v_or_b32_sdwa v2, v3, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
 ; VI-GISEL-NEXT:    flat_store_dword v[0:1], v2
 ; VI-GISEL-NEXT:    s_endpgm
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/alu-roundtrip.ll b/llvm/test/CodeGen/RISCV/GlobalISel/alu-roundtrip.ll
index c503d6541b0a57..14ff9e01ab3bc2 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/alu-roundtrip.ll
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/alu-roundtrip.ll
@@ -457,19 +457,19 @@ entry:
 define i64 @subi_i64(i64 %a) {
 ; RV32IM-LABEL: subi_i64:
 ; RV32IM:       # %bb.0: # %entry
-; RV32IM-NEXT:    lui a2, 301
-; RV32IM-NEXT:    addi a3, a2, 1548
-; RV32IM-NEXT:    sub a2, a0, a3
-; RV32IM-NEXT:    sltu a0, a0, a3
-; RV32IM-NEXT:    sub a1, a1, a0
-; RV32IM-NEXT:    mv a0, a2
+; RV32IM-NEXT:    lui a2, 1048275
+; RV32IM-NEXT:    addi a2, a2, -1548
+; RV32IM-NEXT:    add a0, a0, a2
+; RV32IM-NEXT:    sltu a2, a0, a2
+; RV32IM-NEXT:    addi a1, a1, -1
+; RV32IM-NEXT:    add a1, a1, a2
 ; RV32IM-NEXT:    ret
 ;
 ; RV64IM-LABEL: subi_i64:
 ; RV64IM:       # %bb.0: # %entry
-; RV64IM-NEXT:    lui a1, 301
-; RV64IM-NEXT:    addiw a1, a1, 1548
-; RV64IM-NEXT:    sub a0, a0, a1
+; RV64IM-NEXT:    lui a1, 1048275
+; RV64IM-NEXT:    addiw a1, a1, -1548
+; RV64IM-NEXT:    add a0, a0, a1
 ; RV64IM-NEXT:    ret
 entry:
   %0 = sub i64 %a, 1234444
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/alu-rv32.mir b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/alu-rv32.mir
index 2ef5de501ee711..39d0ee7c382dfc 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/alu-rv32.mir
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/alu-rv32.mir
@@ -200,8 +200,9 @@ body:             |
     ; RV32I: liveins: $x10
     ; RV32I-NEXT: {{  $}}
     ; RV32I-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
-    ; RV32I-NEXT: [[ADDI:%[0-9]+]]:gpr = ADDI [[COPY]], 1234
-    ; RV32I-NEXT: $x10 = COPY [[ADDI]]
+    ; RV32I-NEXT: [[ADDI:%[0-9]+]]:gpr = ADDI $x0, -1234
+    ; RV32I-NEXT: [[SUB:%[0-9]+]]:gpr = SUB [[COPY]], [[ADDI]]
+    ; RV32I-NEXT: $x10 = COPY [[SUB]]
     ; RV32I-NEXT: PseudoRET implicit $x10
     %0:gprb(s32) = COPY $x10
     %1:gprb(s32) = G_CONSTANT i32 -1234
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/alu-rv64.mir b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/alu-rv64.mir
index be12333e1499b2..527036d8b750fc 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/alu-rv64.mir
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/alu-rv64.mir
@@ -188,8 +188,9 @@ body:             |
     ; RV64I: liveins: $x10
     ; RV64I-NEXT: {{  $}}
     ; RV64I-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
-    ; RV64I-NEXT: [[ADDIW:%[0-9]+]]:gpr = ADDIW [[COPY]], 1234
-    ; RV64I-NEXT: $x10 = COPY [[ADDIW]]
+    ; RV64I-NEXT: [[ADDI:%[0-9]+]]:gpr = ADDI $x0, -1234
+    ; RV64I-NEXT: [[SUBW:%[0-9]+]]:gpr = SUBW [[COPY]], [[ADDI]]
+    ; RV64I-NEXT: $x10 = COPY [[SUBW]]
     ; RV64I-NEXT: PseudoRET implicit $x10
     %0:gprb(s64) = COPY $x10
     %1:gprb(s32) = G_TRUNC %0(s64)
@@ -440,8 +441,9 @@ body:             |
     ; RV64I: liveins: $x10
     ; RV64I-NEXT: {{  $}}
     ; RV64I-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
-    ; RV64I-NEXT: [[ADDI:%[0-9]+]]:gpr = ADDI [[COPY]], 1234
-    ; RV64I-NEXT: $x10 = COPY [[ADDI]]
+    ; RV64I-NEXT: [[ADDI:%[0-9]+]]:gpr = ADDI $x0, -1234
+    ; RV64I-NEXT: [[SUB:%[0-9]+]]:gpr = SUB [[COPY]], [[ADDI]]
+    ; RV64I-NEXT: $x10 = COPY [[SUB]]
     ; RV64I-NEXT: PseudoRET implicit $x10
     %0:gprb(s64) = COPY $x10
     %1:gprb(s64) = G_CONSTANT i64 -1234
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/jump-table-brjt-medium-rv64.mir b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/jump-table-brjt-medium-rv64.mir
index 5d980e7721458e..d0237892d132f3 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/jump-table-brjt-medium-rv64.mir
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/jump-table-brjt-medium-rv64.mir
@@ -111,8 +111,8 @@ body:             |
     %2:gprb(s64) = G_ASSERT_SEXT %1, 32
     %7:gprb(s64) = G_CONSTANT i64 5
     %3:gprb(s64) = G_SEXT_INREG %2, 32
-    %4:gprb(s64) = G_CONSTANT i64 1
-    %5:gprb(s64) = G_SUB %3, %4
+    %4:gprb(s64) = G_CONSTANT i64 -1
+    %5:gprb(s64) = G_ADD %3, %4
     %26:gprb(s64) = G_ICMP intpred(ugt), %5(s64), %7
     G_BRCOND %26(s64), %bb.8
 
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/jump-table-brjt-pic-rv32.mir b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/jump-table-brjt-pic-rv32.mir
index 27fe465ccf696b..396421a4ba739a 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/jump-table-brjt-pic-rv32.mir
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/jump-table-brjt-pic-rv32.mir
@@ -115,8 +115,8 @@ body:             |
     %12:gprb(s32) = G_CONSTANT i32 3
     %13:gprb(s32) = G_CONSTANT i32 4
     %14:gprb(s32) = G_CONSTANT i32 1000
-    %1:gprb(s32) = G_CONSTANT i32 1
-    %2:gprb(s32) = G_SUB %0, %1
+    %1:gprb(s32) = G_CONSTANT i32 -1
+    %2:gprb(s32) = G_ADD %0, %1
     %16:gprb(s32) = G_ICMP intpred(ugt), %2(s32), %4
     G_BRCOND %16(s32), %bb.8
 
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/jump-table-brjt-pic-rv64.mir b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/jump-table-brjt-pic-rv64.mir
index 77156b913c5e8b..0a08586bc1af4f 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/jump-table-brjt-pic-rv64.mir
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/jump-table-brjt-pic-rv64.mir
@@ -112,8 +112,8 @@ body:             |
     %2:gprb(s64) = G_ASSERT_SEXT %1, 32
     %7:gprb(s64) = G_CONSTANT i64 5
     %3:gprb(s64) = G_SEXT_INREG %2, 32
-    %4:gprb(s64) = G_CONSTANT i64 1
-    %5:gprb(s64) = G_SUB %3, %4
+    %4:gprb(s64) = G_CONSTANT i64 -1
+    %5:gprb(s64) = G_ADD %3, %4
     %26:gprb(s64) = G_ICMP intpred(ugt), %5(s64), %7
     G_BRCOND %26(s64), %bb.8
 
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/jump-table-brjt-rv32.mir b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/jump-table-brjt-rv32.mir
index 388c238b86eb6f..efa1a6c86027db 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/jump-table-brjt-rv32.mir
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/jump-table-brjt-rv32.mir
@@ -171,8 +171,8 @@ body:             |
     %12:gprb(s32) = G_CONSTANT i32 3
     %13:gprb(s32) = G_CONSTANT i32 4
     %14:gprb(s32) = G_CONSTANT i32 1000
-    %1:gprb(s32) = G_CONSTANT i32 1
-    %2:gprb(s32) = G_SUB %0, %1
+    %1:gprb(s32) = G_CONSTANT i32 -1
+    %2:gprb(s32) = G_ADD %0, %1
     %16:gprb(s32) = G_ICMP intpred(ugt), %2(s32), %4
     G_BRCOND %16(s32), %bb.8
 
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/jump-table-brjt-small-rv64.mir b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/jump-table-brjt-small-rv64.mir
index 09a855105c2627..12b1517e2cfb54 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/jump-table-brjt-small-rv64.mir
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/jump-table-brjt-small-rv64.mir
@@ -112,8 +112,8 @@ body:             |
     %2:gprb(s64) = G_ASSERT_SEXT %1, 32
     %7:gprb(s64) = G_CONSTANT i64 5
     %3:gprb(s64) = G_SEXT_INREG %2, 32
-    %4:gprb(s64) = G_CONSTANT i64 1
-    %5:gprb(s64) = G_SUB %3, %4
+    %4:gprb(s64) = G_CONSTANT i64 -1
+    %5:gprb(s64) = G_ADD %3, %4
     %26:gprb(s64) = G_ICMP intpred(ugt), %5(s64), %7
     G_BRCOND %26(s64), %bb.8
 

>From fa7ec2bea3bc5df8adc810a970b81cdcc42d1c85 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper at sifive.com>
Date: Wed, 30 Oct 2024 15:43:07 -0700
Subject: [PATCH 2/5] fixup! address review comments.

---
 llvm/include/llvm/Target/GlobalISel/Combine.td |  3 ++-
 llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp | 13 +++++--------
 2 files changed, 7 insertions(+), 9 deletions(-)

diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index 9891db5ceb6fa9..80a22c35ebceff 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -338,7 +338,8 @@ def mul_to_shl : GICombineRule<
 // (sub x, C) -> (add x, -C)
 def sub_to_add : GICombineRule<
   (defs root:$d, build_fn_matchinfo:$matchinfo),
-  (match (G_SUB $d, $op1, $op2):$mi,
+  (match (G_CONSTANT $c, $imm),
+         (G_SUB $d, $op1, $c):$mi,
          [{ return Helper.matchCombineSubToAdd(*${mi}, ${matchinfo}); }]),
   (apply [{ Helper.applyBuildFnNoErase(*${mi}, ${matchinfo}); }])>;
 
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index 91e5af9dfd8e25..da9860352b00d6 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -2043,17 +2043,14 @@ void CombinerHelper::applyCombineMulToShl(MachineInstr &MI,
 
 bool CombinerHelper::matchCombineSubToAdd(MachineInstr &MI,
                                           BuildFnTy &MatchInfo) {
-  assert(MI.getOpcode() == TargetOpcode::G_SUB && "Expected a G_SUB");
-  auto MaybeImmVal =
-      getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
-  if (!MaybeImmVal)
-    return false;
+  GSub &Sub = cast<GSub>(MI);
 
-  LLT Ty = MRI.getType(MI.getOperand(0).getReg());
+  LLT Ty = MRI.getType(Sub.getReg(0));
+
+  APInt Imm = getIConstantFromReg(Sub.getRHSReg(), MRI);
 
-  APInt NegImm = -MaybeImmVal->Value;
   MatchInfo = [=, &MI](MachineIRBuilder &B) {
-    auto NegCst = B.buildConstant(Ty, NegImm);
+    auto NegCst = B.buildConstant(Ty, -Imm);
     Observer.changingInstr(MI);
     MI.setDesc(B.getTII().get(TargetOpcode::G_ADD));
     MI.getOperand(2).setReg(NegCst.getReg(0));

>From 78d79f0edff688b99a0773165a04d1acd9a419af Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper at sifive.com>
Date: Wed, 30 Oct 2024 16:20:21 -0700
Subject: [PATCH 3/5] fixup! Add nuw/nsw flag tests.

---
 .../prelegalizercombiner-trivial-arith.mir    | 63 +++++++++++++++++++
 1 file changed, 63 insertions(+)

diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-trivial-arith.mir b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-trivial-arith.mir
index bc3be691bd25a0..4c3faa94039097 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-trivial-arith.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-trivial-arith.mir
@@ -488,3 +488,66 @@ body:             |
     RET_ReallyLR implicit $w0
 
 ...
+---
+name:            sub_to_add
+tracksRegLiveness: true
+body:             |
+  bb.1.entry:
+    liveins: $w0
+    ; CHECK-LABEL: name: sub_to_add
+    ; CHECK: liveins: $w0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: %x:_(s32) = COPY $w0
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
+    ; CHECK-NEXT: %op:_(s32) = G_ADD %x, [[C]]
+    ; CHECK-NEXT: $w0 = COPY %op(s32)
+    ; CHECK-NEXT: RET_ReallyLR implicit $w0
+    %x:_(s32) = COPY $w0
+    %cst:_(s32) = G_CONSTANT i32 1
+    %op:_(s32) = G_SUB %x(s32), %cst
+    $w0 = COPY %op(s32)
+    RET_ReallyLR implicit $w0
+
+...
+---
+name:            sub_to_add_nuw
+tracksRegLiveness: true
+body:             |
+  bb.1.entry:
+    liveins: $w0
+    ; CHECK-LABEL: name: sub_to_add_nuw
+    ; CHECK: liveins: $w0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: %x:_(s32) = COPY $w0
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
+    ; CHECK-NEXT: %op:_(s32) = G_ADD %x, [[C]]
+    ; CHECK-NEXT: $w0 = COPY %op(s32)
+    ; CHECK-NEXT: RET_ReallyLR implicit $w0
+    %x:_(s32) = COPY $w0
+    %cst:_(s32) = G_CONSTANT i32 1
+    %op:_(s32) = nuw G_SUB %x(s32), %cst
+    $w0 = COPY %op(s32)
+    RET_ReallyLR implicit $w0
+
+...
+---
+name:            sub_to_add_nsw
+tracksRegLiveness: true
+body:             |
+  bb.1.entry:
+    liveins: $w0
+    ; CHECK-LABEL: name: sub_to_add_nsw
+    ; CHECK: liveins: $w0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: %x:_(s32) = COPY $w0
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
+    ; CHECK-NEXT: %op:_(s32) = nsw G_ADD %x, [[C]]
+    ; CHECK-NEXT: $w0 = COPY %op(s32)
+    ; CHECK-NEXT: RET_ReallyLR implicit $w0
+    %x:_(s32) = COPY $w0
+    %cst:_(s32) = G_CONSTANT i32 1
+    %op:_(s32) = nsw G_SUB %x(s32), %cst
+    $w0 = COPY %op(s32)
+    RET_ReallyLR implicit $w0
+
+...

>From 1e3a249129e7c63696030abaa663baf345019d14 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper at sifive.com>
Date: Wed, 30 Oct 2024 22:55:04 -0700
Subject: [PATCH 4/5] fixup! Add isLegalOrBeforeLegalizer check

---
 llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index da9860352b00d6..494938c32739ab 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -2047,6 +2047,9 @@ bool CombinerHelper::matchCombineSubToAdd(MachineInstr &MI,
 
   LLT Ty = MRI.getType(Sub.getReg(0));
 
+  if (!isLegalOrBeforeLegalizer({TargetOpcode::G_ADD, {Ty}}))
+    return false;
+
   APInt Imm = getIConstantFromReg(Sub.getRHSReg(), MRI);
 
   MatchInfo = [=, &MI](MachineIRBuilder &B) {

>From 27433abcb8583c2aeacb836a21e6e43607af2795 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper at sifive.com>
Date: Thu, 31 Oct 2024 10:25:29 -0700
Subject: [PATCH 5/5] fixup! Add isConstantLegalOrBeforeLegalizer.

---
 llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index 494938c32739ab..55df8dcb095f58 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -2050,6 +2050,9 @@ bool CombinerHelper::matchCombineSubToAdd(MachineInstr &MI,
   if (!isLegalOrBeforeLegalizer({TargetOpcode::G_ADD, {Ty}}))
     return false;
 
+  if (!isConstantLegalOrBeforeLegalizer(Ty))
+    return false;
+
   APInt Imm = getIConstantFromReg(Sub.getRHSReg(), MRI);
 
   MatchInfo = [=, &MI](MachineIRBuilder &B) {



More information about the llvm-commits mailing list