[llvm] 17f3e00 - Recommit "[GISel][AArch64][AMDGPU][RISCV] Canonicalize (sub X, C) -> (add X, -C) (#114309)"
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Fri Nov 8 10:22:33 PST 2024
Author: Craig Topper
Date: 2024-11-08T10:21:46-08:00
New Revision: 17f3e00911b860d535f41185e605c47babcc2039
URL: https://github.com/llvm/llvm-project/commit/17f3e00911b860d535f41185e605c47babcc2039
DIFF: https://github.com/llvm/llvm-project/commit/17f3e00911b860d535f41185e605c47babcc2039.diff
LOG: Recommit "[GISel][AArch64][AMDGPU][RISCV] Canonicalize (sub X, C) -> (add X, -C) (#114309)"
The increase in fallbacks that was previously reported were not caused
by this change.
Original description:
This matches InstCombine and DAGCombine.
RISC-V only has an ADDI instruction so without this we need additional
patterns to do the conversion.
Some of the AMDGPU tests look like possible regressions. Maybe some
patterns from isel aren't imported.
Added:
Modified:
llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
llvm/include/llvm/Target/GlobalISel/Combine.td
llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
llvm/lib/Target/RISCV/RISCVGISel.td
llvm/test/CodeGen/AArch64/GlobalISel/combine-integer.mir
llvm/test/CodeGen/AArch64/GlobalISel/combine-narrow-binop.mir
llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-extending-loads-cornercases.mir
llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-trivial-arith.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/ashr.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/cvt_f32_ubyte.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f32.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/fshl.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/fshr.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/lshr.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i32.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/shl.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i32.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/sub.v2i16.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i32.ll
llvm/test/CodeGen/AMDGPU/ctlz.ll
llvm/test/CodeGen/AMDGPU/div_i128.ll
llvm/test/CodeGen/AMDGPU/div_v2i128.ll
llvm/test/CodeGen/AMDGPU/fptoi.i128.ll
llvm/test/CodeGen/AMDGPU/itofp.i128.ll
llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.ll
llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll
llvm/test/CodeGen/AMDGPU/shrink-add-sub-constant.ll
llvm/test/CodeGen/RISCV/GlobalISel/alu-roundtrip.ll
llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/alu-rv32.mir
llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/alu-rv64.mir
llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/jump-table-brjt-medium-rv64.mir
llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/jump-table-brjt-pic-rv32.mir
llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/jump-table-brjt-pic-rv64.mir
llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/jump-table-brjt-rv32.mir
llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/jump-table-brjt-small-rv64.mir
llvm/test/CodeGen/RISCV/GlobalISel/rv32zbb-zbkb.ll
Removed:
################################################################################
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
index 72573facf1a7fe..cd2022e88a0df1 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
@@ -321,6 +321,9 @@ class CombinerHelper {
bool matchCombineMulToShl(MachineInstr &MI, unsigned &ShiftVal);
void applyCombineMulToShl(MachineInstr &MI, unsigned &ShiftVal);
+ // Transform a G_SUB with constant on the RHS to G_ADD.
+ bool matchCombineSubToAdd(MachineInstr &MI, BuildFnTy &MatchInfo);
+
// Transform a G_SHL with an extended source into a narrower shift if
// possible.
bool matchCombineShlOfExtend(MachineInstr &MI, RegisterImmPair &MatchData);
diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index 5928b369913916..6da089b719c06c 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -335,6 +335,14 @@ def mul_to_shl : GICombineRule<
[{ return Helper.matchCombineMulToShl(*${mi}, ${matchinfo}); }]),
(apply [{ Helper.applyCombineMulToShl(*${mi}, ${matchinfo}); }])>;
+// (sub x, C) -> (add x, -C)
+def sub_to_add : GICombineRule<
+ (defs root:$d, build_fn_matchinfo:$matchinfo),
+ (match (G_CONSTANT $c, $imm),
+ (G_SUB $d, $op1, $c):$mi,
+ [{ return Helper.matchCombineSubToAdd(*${mi}, ${matchinfo}); }]),
+ (apply [{ Helper.applyBuildFnNoErase(*${mi}, ${matchinfo}); }])>;
+
// shl ([asz]ext x), y => zext (shl x, y), if shift does not overflow int
def reduce_shl_of_extend_matchdata : GIDefMatchData<"RegisterImmPair">;
def reduce_shl_of_extend : GICombineRule<
@@ -1912,8 +1920,9 @@ def bitreverse_shift : GICombineGroup<[bitreverse_shl, bitreverse_lshr]>;
def select_combines : GICombineGroup<[select_undef_cmp, select_constant_cmp,
select_to_iminmax, match_selects]>;
-def trivial_combines : GICombineGroup<[copy_prop, mul_to_shl, add_p2i_to_ptradd,
- mul_by_neg_one, idempotent_prop]>;
+def trivial_combines : GICombineGroup<[copy_prop, mul_to_shl, sub_to_add,
+ add_p2i_to_ptradd, mul_by_neg_one,
+ idempotent_prop]>;
def fma_combines : GICombineGroup<[combine_fadd_fmul_to_fmad_or_fma,
combine_fadd_fpext_fmul_to_fmad_or_fma, combine_fadd_fma_fmul_to_fmad_or_fma,
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index 3b648a7e3f4472..32afbeaeaa249e 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -2044,6 +2044,31 @@ void CombinerHelper::applyCombineMulToShl(MachineInstr &MI,
Observer.changedInstr(MI);
}
+bool CombinerHelper::matchCombineSubToAdd(MachineInstr &MI,
+ BuildFnTy &MatchInfo) {
+ GSub &Sub = cast<GSub>(MI);
+
+ LLT Ty = MRI.getType(Sub.getReg(0));
+
+ if (!isLegalOrBeforeLegalizer({TargetOpcode::G_ADD, {Ty}}))
+ return false;
+
+ if (!isConstantLegalOrBeforeLegalizer(Ty))
+ return false;
+
+ APInt Imm = getIConstantFromReg(Sub.getRHSReg(), MRI);
+
+ MatchInfo = [=, &MI](MachineIRBuilder &B) {
+ auto NegCst = B.buildConstant(Ty, -Imm);
+ Observer.changingInstr(MI);
+ MI.setDesc(B.getTII().get(TargetOpcode::G_ADD));
+ MI.getOperand(2).setReg(NegCst.getReg(0));
+ MI.clearFlag(MachineInstr::MIFlag::NoUWrap);
+ Observer.changedInstr(MI);
+ };
+ return true;
+}
+
// shl ([sza]ext x), y => zext (shl x, y), if shift does not overflow source
bool CombinerHelper::matchCombineShlOfExtend(MachineInstr &MI,
RegisterImmPair &MatchData) {
diff --git a/llvm/lib/Target/RISCV/RISCVGISel.td b/llvm/lib/Target/RISCV/RISCVGISel.td
index 10906aebf1bf84..838cec9c7717ca 100644
--- a/llvm/lib/Target/RISCV/RISCVGISel.td
+++ b/llvm/lib/Target/RISCV/RISCVGISel.td
@@ -106,15 +106,6 @@ def gi_zexti16 : GIComplexOperandMatcher<s32, "selectZExtBits<16>">,
def gi_zexti8 : GIComplexOperandMatcher<s32, "selectZExtBits<8>">,
GIComplexPatternEquiv<zexti8>;
-// FIXME: Canonicalize (sub X, C) -> (add X, -C) earlier.
-def : Pat<(XLenVT (sub GPR:$rs1, simm12Plus1:$imm)),
- (ADDI GPR:$rs1, (NegImm simm12Plus1:$imm))>;
-
-let Predicates = [IsRV64] in {
-def : Pat<(i32 (sub GPR:$rs1, simm12Plus1i32:$imm)),
- (ADDIW GPR:$rs1, (i64 (NegImm $imm)))>;
-}
-
// Ptr type used in patterns with GlobalISelEmitter
def PtrVT : PtrValueTypeByHwMode<XLenVT, 0>;
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-integer.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-integer.mir
index 2f10a497fa74cb..5cbff0f0c74cb7 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-integer.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-integer.mir
@@ -308,8 +308,8 @@ body: |
; CHECK: liveins: $w0, $w1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %a:_(s64) = COPY $x0
- ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 71
- ; CHECK-NEXT: %sub:_(s64) = G_SUB %a, [[C]]
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -71
+ ; CHECK-NEXT: %sub:_(s64) = G_ADD %a, [[C]]
; CHECK-NEXT: $x0 = COPY %sub(s64)
; CHECK-NEXT: RET_ReallyLR implicit $x0
%a:_(s64) = COPY $x0
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-narrow-binop.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-narrow-binop.mir
index f207e9c149a476..e9d4af7da5d06f 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-narrow-binop.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-narrow-binop.mir
@@ -88,8 +88,8 @@ body: |
; CHECK-LABEL: name: test_combine_trunc_sub_i128
; CHECK: %lhs:_(s128) = COPY $q0
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC %lhs(s128)
- ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
- ; CHECK-NEXT: %small:_(s32) = G_SUB [[TRUNC]], [[C]]
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -5
+ ; CHECK-NEXT: %small:_(s32) = G_ADD [[TRUNC]], [[C]]
; CHECK-NEXT: $w0 = COPY %small(s32)
%lhs:_(s128) = COPY $q0
%rhs:_(s128) = G_CONSTANT i128 5
@@ -103,8 +103,8 @@ body: |
bb.1:
; CHECK-LABEL: name: test_combine_trunc_sub_i128_multi_use
; CHECK: %lhs:_(s128) = COPY $q0
- ; CHECK-NEXT: %rhs:_(s128) = G_CONSTANT i128 5
- ; CHECK-NEXT: %res:_(s128) = G_SUB %lhs, %rhs
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s128) = G_CONSTANT i128 -5
+ ; CHECK-NEXT: %res:_(s128) = G_ADD %lhs, [[C]]
; CHECK-NEXT: %small:_(s32) = G_TRUNC %res(s128)
; CHECK-NEXT: $q0 = COPY %res(s128)
; CHECK-NEXT: $w0 = COPY %small(s32)
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-extending-loads-cornercases.mir b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-extending-loads-cornercases.mir
index 04968dab3a37ce..591b6a17928cb1 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-extending-loads-cornercases.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-extending-loads-cornercases.mir
@@ -95,7 +95,7 @@ body: |
%11:_(s8) = G_CONSTANT i8 1
; CHECK: [[T3:%[0-9]+]]:_(s8) = G_TRUNC [[T0]](s32)
%7:_(s8) = G_SUB %2, %11
- ; CHECK: [[T4:%[0-9]+]]:_(s8) = G_SUB [[T3]], {{.*}}
+ ; CHECK: [[T4:%[0-9]+]]:_(s8) = G_ADD [[T3]], {{.*}}
G_BR %bb.3.exit
bb.3.exit:
; CHECK: bb.3.exit:
@@ -197,7 +197,7 @@ body: |
%7:_(s8) = G_CONSTANT i8 1
; CHECK: [[T3:%[0-9]+]]:_(s8) = G_TRUNC [[T0]](s32)
%8:_(s8) = G_SUB %2, %7
- ; CHECK: [[T4:%[0-9]+]]:_(s8) = G_SUB [[T3]], {{.*}}
+ ; CHECK: [[T4:%[0-9]+]]:_(s8) = G_ADD [[T3]], {{.*}}
G_BR %bb.3.exit
bb.3.exit:
; CHECK: bb.3.exit:
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-trivial-arith.mir b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-trivial-arith.mir
index 0900dd4267a2e4..4c3faa94039097 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-trivial-arith.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-trivial-arith.mir
@@ -289,8 +289,8 @@ body: |
; CHECK: liveins: $w0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %x:_(s32) = COPY $w0
- ; CHECK-NEXT: %cst:_(s32) = G_CONSTANT i32 1
- ; CHECK-NEXT: %op:_(s32) = G_SUB %x, %cst
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
+ ; CHECK-NEXT: %op:_(s32) = G_ADD %x, [[C]]
; CHECK-NEXT: $w0 = COPY %op(s32)
; CHECK-NEXT: RET_ReallyLR implicit $w0
%x:_(s32) = COPY $w0
@@ -488,3 +488,66 @@ body: |
RET_ReallyLR implicit $w0
...
+---
+name: sub_to_add
+tracksRegLiveness: true
+body: |
+ bb.1.entry:
+ liveins: $w0
+ ; CHECK-LABEL: name: sub_to_add
+ ; CHECK: liveins: $w0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %x:_(s32) = COPY $w0
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
+ ; CHECK-NEXT: %op:_(s32) = G_ADD %x, [[C]]
+ ; CHECK-NEXT: $w0 = COPY %op(s32)
+ ; CHECK-NEXT: RET_ReallyLR implicit $w0
+ %x:_(s32) = COPY $w0
+ %cst:_(s32) = G_CONSTANT i32 1
+ %op:_(s32) = G_SUB %x(s32), %cst
+ $w0 = COPY %op(s32)
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: sub_to_add_nuw
+tracksRegLiveness: true
+body: |
+ bb.1.entry:
+ liveins: $w0
+ ; CHECK-LABEL: name: sub_to_add_nuw
+ ; CHECK: liveins: $w0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %x:_(s32) = COPY $w0
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
+ ; CHECK-NEXT: %op:_(s32) = G_ADD %x, [[C]]
+ ; CHECK-NEXT: $w0 = COPY %op(s32)
+ ; CHECK-NEXT: RET_ReallyLR implicit $w0
+ %x:_(s32) = COPY $w0
+ %cst:_(s32) = G_CONSTANT i32 1
+ %op:_(s32) = nuw G_SUB %x(s32), %cst
+ $w0 = COPY %op(s32)
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: sub_to_add_nsw
+tracksRegLiveness: true
+body: |
+ bb.1.entry:
+ liveins: $w0
+ ; CHECK-LABEL: name: sub_to_add_nsw
+ ; CHECK: liveins: $w0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %x:_(s32) = COPY $w0
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
+ ; CHECK-NEXT: %op:_(s32) = nsw G_ADD %x, [[C]]
+ ; CHECK-NEXT: $w0 = COPY %op(s32)
+ ; CHECK-NEXT: RET_ReallyLR implicit $w0
+ %x:_(s32) = COPY $w0
+ %cst:_(s32) = G_CONSTANT i32 1
+ %op:_(s32) = nsw G_SUB %x(s32), %cst
+ $w0 = COPY %op(s32)
+ RET_ReallyLR implicit $w0
+
+...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/ashr.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/ashr.ll
index 63f5464371cc62..493e8cef638902 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/ashr.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/ashr.ll
@@ -1669,7 +1669,7 @@ define i65 @v_ashr_i65(i65 %value, i65 %amount) {
; GFX6-NEXT: v_sub_i32_e32 v8, vcc, 64, v3
; GFX6-NEXT: v_lshr_b64 v[6:7], v[0:1], v3
; GFX6-NEXT: v_lshl_b64 v[8:9], v[4:5], v8
-; GFX6-NEXT: v_subrev_i32_e32 v2, vcc, 64, v3
+; GFX6-NEXT: v_add_i32_e32 v2, vcc, 0xffffffc0, v3
; GFX6-NEXT: v_ashr_i64 v[10:11], v[4:5], v3
; GFX6-NEXT: v_or_b32_e32 v6, v6, v8
; GFX6-NEXT: v_ashrrev_i32_e32 v8, 31, v5
@@ -1692,7 +1692,7 @@ define i65 @v_ashr_i65(i65 %value, i65 %amount) {
; GFX8-NEXT: v_sub_u32_e32 v8, vcc, 64, v3
; GFX8-NEXT: v_lshrrev_b64 v[6:7], v3, v[0:1]
; GFX8-NEXT: v_lshlrev_b64 v[8:9], v8, v[4:5]
-; GFX8-NEXT: v_subrev_u32_e32 v2, vcc, 64, v3
+; GFX8-NEXT: v_add_u32_e32 v2, vcc, 0xffffffc0, v3
; GFX8-NEXT: v_ashrrev_i64 v[10:11], v3, v[4:5]
; GFX8-NEXT: v_or_b32_e32 v6, v6, v8
; GFX8-NEXT: v_ashrrev_i32_e32 v8, 31, v5
@@ -1715,7 +1715,7 @@ define i65 @v_ashr_i65(i65 %value, i65 %amount) {
; GFX9-NEXT: v_sub_u32_e32 v8, 64, v3
; GFX9-NEXT: v_lshrrev_b64 v[6:7], v3, v[0:1]
; GFX9-NEXT: v_lshlrev_b64 v[8:9], v8, v[4:5]
-; GFX9-NEXT: v_subrev_u32_e32 v2, 64, v3
+; GFX9-NEXT: v_add_u32_e32 v2, 0xffffffc0, v3
; GFX9-NEXT: v_ashrrev_i64 v[10:11], v3, v[4:5]
; GFX9-NEXT: v_or_b32_e32 v6, v6, v8
; GFX9-NEXT: v_ashrrev_i32_e32 v8, 31, v5
@@ -1735,7 +1735,7 @@ define i65 @v_ashr_i65(i65 %value, i65 %amount) {
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: v_bfe_i32 v4, v2, 0, 1
; GFX10-NEXT: v_sub_nc_u32_e32 v2, 64, v3
-; GFX10-NEXT: v_subrev_nc_u32_e32 v10, 64, v3
+; GFX10-NEXT: v_add_nc_u32_e32 v10, 0xffffffc0, v3
; GFX10-NEXT: v_lshrrev_b64 v[6:7], v3, v[0:1]
; GFX10-NEXT: v_cmp_gt_u32_e32 vcc_lo, 64, v3
; GFX10-NEXT: v_ashrrev_i32_e32 v5, 31, v4
@@ -1758,7 +1758,7 @@ define i65 @v_ashr_i65(i65 %value, i65 %amount) {
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_bfe_i32 v4, v2, 0, 1
; GFX11-NEXT: v_sub_nc_u32_e32 v2, 64, v3
-; GFX11-NEXT: v_subrev_nc_u32_e32 v10, 64, v3
+; GFX11-NEXT: v_add_nc_u32_e32 v10, 0xffffffc0, v3
; GFX11-NEXT: v_lshrrev_b64 v[6:7], v3, v[0:1]
; GFX11-NEXT: v_cmp_gt_u32_e32 vcc_lo, 64, v3
; GFX11-NEXT: v_ashrrev_i32_e32 v5, 31, v4
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/cvt_f32_ubyte.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/cvt_f32_ubyte.ll
index 168bf16ad68674..b9cd330ee2b5f9 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/cvt_f32_ubyte.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/cvt_f32_ubyte.ll
@@ -1438,7 +1438,7 @@ define float @v_test_sitofp_i64_byte_to_f32(i64 %arg0) {
; SI-NEXT: v_ashrrev_i32_e32 v2, 31, v0
; SI-NEXT: v_ffbh_i32_e32 v3, 0
; SI-NEXT: v_add_i32_e32 v2, vcc, 32, v2
-; SI-NEXT: v_subrev_i32_e32 v3, vcc, 1, v3
+; SI-NEXT: v_add_i32_e32 v3, vcc, -1, v3
; SI-NEXT: v_mov_b32_e32 v1, 0
; SI-NEXT: v_min_u32_e32 v2, v3, v2
; SI-NEXT: v_lshl_b64 v[0:1], v[0:1], v2
@@ -1456,7 +1456,7 @@ define float @v_test_sitofp_i64_byte_to_f32(i64 %arg0) {
; VI-NEXT: v_ashrrev_i32_e32 v2, 31, v0
; VI-NEXT: v_ffbh_i32_e32 v3, 0
; VI-NEXT: v_add_u32_e32 v2, vcc, 32, v2
-; VI-NEXT: v_subrev_u32_e32 v3, vcc, 1, v3
+; VI-NEXT: v_add_u32_e32 v3, vcc, -1, v3
; VI-NEXT: v_mov_b32_e32 v1, 0
; VI-NEXT: v_min_u32_e32 v2, v3, v2
; VI-NEXT: v_lshlrev_b64 v[0:1], v2, v[0:1]
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f32.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f32.ll
index 146f344930a4ee..6e55d7fdb5e957 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f32.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f32.ll
@@ -4101,7 +4101,7 @@ define float @v_fdiv_f32_constrhs0_dynamic_25ulp(float %x) #0 {
; GFX10-NEXT: v_rcp_f32_e32 v1, 0x3f40e400
; GFX10-NEXT: v_frexp_mant_f32_e32 v2, v0
; GFX10-NEXT: v_frexp_exp_i32_f32_e32 v0, v0
-; GFX10-NEXT: v_subrev_nc_u32_e32 v0, 14, v0
+; GFX10-NEXT: v_add_nc_u32_e32 v0, -14, v0
; GFX10-NEXT: v_mul_f32_e32 v1, v2, v1
; GFX10-NEXT: v_ldexp_f32 v0, v1, v0
; GFX10-NEXT: s_setpc_b64 s[30:31]
@@ -4112,10 +4112,9 @@ define float @v_fdiv_f32_constrhs0_dynamic_25ulp(float %x) #0 {
; GFX11-NEXT: v_rcp_f32_e32 v1, 0x3f40e400
; GFX11-NEXT: v_frexp_mant_f32_e32 v2, v0
; GFX11-NEXT: v_frexp_exp_i32_f32_e32 v0, v0
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1)
-; GFX11-NEXT: v_subrev_nc_u32_e32 v0, 14, v0
; GFX11-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-NEXT: v_mul_f32_e32 v1, v2, v1
+; GFX11-NEXT: v_dual_mul_f32 v1, v2, v1 :: v_dual_add_nc_u32 v0, -14, v0
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_ldexp_f32 v0, v1, v0
; GFX11-NEXT: s_setpc_b64 s[30:31]
; EG-LABEL: v_fdiv_f32_constrhs0_dynamic_25ulp:
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fshl.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fshl.ll
index 3bd3486ec261d4..5d76b542fad894 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fshl.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fshl.ll
@@ -20,10 +20,10 @@ define amdgpu_ps i7 @s_fshl_i7(i7 inreg %lhs, i7 inreg %rhs, i7 inreg %amt) {
; GFX6-NEXT: v_mul_hi_u32 v0, s2, v0
; GFX6-NEXT: v_mul_lo_u32 v0, v0, 7
; GFX6-NEXT: v_sub_i32_e32 v0, vcc, s2, v0
-; GFX6-NEXT: v_subrev_i32_e32 v1, vcc, 7, v0
+; GFX6-NEXT: v_add_i32_e32 v1, vcc, -7, v0
; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 7, v0
; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
-; GFX6-NEXT: v_subrev_i32_e32 v1, vcc, 7, v0
+; GFX6-NEXT: v_add_i32_e32 v1, vcc, -7, v0
; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 7, v0
; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
; GFX6-NEXT: v_sub_i32_e32 v1, vcc, 6, v0
@@ -51,10 +51,10 @@ define amdgpu_ps i7 @s_fshl_i7(i7 inreg %lhs, i7 inreg %rhs, i7 inreg %amt) {
; GFX8-NEXT: v_mul_hi_u32 v0, s2, v0
; GFX8-NEXT: v_mul_lo_u32 v0, v0, 7
; GFX8-NEXT: v_sub_u32_e32 v0, vcc, s2, v0
-; GFX8-NEXT: v_subrev_u32_e32 v1, vcc, 7, v0
+; GFX8-NEXT: v_add_u32_e32 v1, vcc, -7, v0
; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 7, v0
; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
-; GFX8-NEXT: v_subrev_u32_e32 v1, vcc, 7, v0
+; GFX8-NEXT: v_add_u32_e32 v1, vcc, -7, v0
; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 7, v0
; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
; GFX8-NEXT: v_sub_u16_e32 v1, 6, v0
@@ -82,10 +82,10 @@ define amdgpu_ps i7 @s_fshl_i7(i7 inreg %lhs, i7 inreg %rhs, i7 inreg %amt) {
; GFX9-NEXT: v_mul_hi_u32 v0, s2, v0
; GFX9-NEXT: v_mul_lo_u32 v0, v0, 7
; GFX9-NEXT: v_sub_u32_e32 v0, s2, v0
-; GFX9-NEXT: v_subrev_u32_e32 v1, 7, v0
+; GFX9-NEXT: v_add_u32_e32 v1, -7, v0
; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 7, v0
; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
-; GFX9-NEXT: v_subrev_u32_e32 v1, 7, v0
+; GFX9-NEXT: v_add_u32_e32 v1, -7, v0
; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 7, v0
; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
; GFX9-NEXT: v_sub_u16_e32 v1, 6, v0
@@ -113,10 +113,10 @@ define amdgpu_ps i7 @s_fshl_i7(i7 inreg %lhs, i7 inreg %rhs, i7 inreg %amt) {
; GFX10-NEXT: v_mul_hi_u32 v0, s2, v0
; GFX10-NEXT: v_mul_lo_u32 v0, v0, 7
; GFX10-NEXT: v_sub_nc_u32_e32 v0, s2, v0
-; GFX10-NEXT: v_subrev_nc_u32_e32 v1, 7, v0
+; GFX10-NEXT: v_add_nc_u32_e32 v1, -7, v0
; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 7, v0
; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
-; GFX10-NEXT: v_subrev_nc_u32_e32 v1, 7, v0
+; GFX10-NEXT: v_add_nc_u32_e32 v1, -7, v0
; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 7, v0
; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
; GFX10-NEXT: v_sub_nc_u16 v1, 6, v0
@@ -150,11 +150,11 @@ define amdgpu_ps i7 @s_fshl_i7(i7 inreg %lhs, i7 inreg %rhs, i7 inreg %amt) {
; GFX11-NEXT: v_mul_lo_u32 v0, v0, 7
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_sub_nc_u32_e32 v0, s2, v0
-; GFX11-NEXT: v_subrev_nc_u32_e32 v1, 7, v0
+; GFX11-NEXT: v_add_nc_u32_e32 v1, -7, v0
; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 7, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
-; GFX11-NEXT: v_subrev_nc_u32_e32 v1, 7, v0
+; GFX11-NEXT: v_add_nc_u32_e32 v1, -7, v0
; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 7, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
@@ -189,10 +189,10 @@ define i7 @v_fshl_i7(i7 %lhs, i7 %rhs, i7 %amt) {
; GFX6-NEXT: v_mul_hi_u32 v3, v2, v3
; GFX6-NEXT: v_mul_lo_u32 v3, v3, 7
; GFX6-NEXT: v_sub_i32_e32 v2, vcc, v2, v3
-; GFX6-NEXT: v_subrev_i32_e32 v3, vcc, 7, v2
+; GFX6-NEXT: v_add_i32_e32 v3, vcc, -7, v2
; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 7, v2
; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
-; GFX6-NEXT: v_subrev_i32_e32 v3, vcc, 7, v2
+; GFX6-NEXT: v_add_i32_e32 v3, vcc, -7, v2
; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 7, v2
; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
; GFX6-NEXT: v_sub_i32_e32 v3, vcc, 6, v2
@@ -219,10 +219,10 @@ define i7 @v_fshl_i7(i7 %lhs, i7 %rhs, i7 %amt) {
; GFX8-NEXT: v_mul_hi_u32 v3, v2, v3
; GFX8-NEXT: v_mul_lo_u32 v3, v3, 7
; GFX8-NEXT: v_sub_u32_e32 v2, vcc, v2, v3
-; GFX8-NEXT: v_subrev_u32_e32 v3, vcc, 7, v2
+; GFX8-NEXT: v_add_u32_e32 v3, vcc, -7, v2
; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 7, v2
; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
-; GFX8-NEXT: v_subrev_u32_e32 v3, vcc, 7, v2
+; GFX8-NEXT: v_add_u32_e32 v3, vcc, -7, v2
; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 7, v2
; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
; GFX8-NEXT: v_sub_u16_e32 v3, 6, v2
@@ -249,10 +249,10 @@ define i7 @v_fshl_i7(i7 %lhs, i7 %rhs, i7 %amt) {
; GFX9-NEXT: v_mul_hi_u32 v3, v2, v3
; GFX9-NEXT: v_mul_lo_u32 v3, v3, 7
; GFX9-NEXT: v_sub_u32_e32 v2, v2, v3
-; GFX9-NEXT: v_subrev_u32_e32 v3, 7, v2
+; GFX9-NEXT: v_add_u32_e32 v3, -7, v2
; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 7, v2
; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
-; GFX9-NEXT: v_subrev_u32_e32 v3, 7, v2
+; GFX9-NEXT: v_add_u32_e32 v3, -7, v2
; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 7, v2
; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
; GFX9-NEXT: v_sub_u16_e32 v3, 6, v2
@@ -279,10 +279,10 @@ define i7 @v_fshl_i7(i7 %lhs, i7 %rhs, i7 %amt) {
; GFX10-NEXT: v_mul_hi_u32 v3, v2, v3
; GFX10-NEXT: v_mul_lo_u32 v3, v3, 7
; GFX10-NEXT: v_sub_nc_u32_e32 v2, v2, v3
-; GFX10-NEXT: v_subrev_nc_u32_e32 v3, 7, v2
+; GFX10-NEXT: v_add_nc_u32_e32 v3, -7, v2
; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 7, v2
; GFX10-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc_lo
-; GFX10-NEXT: v_subrev_nc_u32_e32 v3, 7, v2
+; GFX10-NEXT: v_add_nc_u32_e32 v3, -7, v2
; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 7, v2
; GFX10-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc_lo
; GFX10-NEXT: v_sub_nc_u16 v3, 6, v2
@@ -315,11 +315,11 @@ define i7 @v_fshl_i7(i7 %lhs, i7 %rhs, i7 %amt) {
; GFX11-NEXT: v_mul_lo_u32 v3, v3, 7
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_sub_nc_u32_e32 v2, v2, v3
-; GFX11-NEXT: v_subrev_nc_u32_e32 v3, 7, v2
+; GFX11-NEXT: v_add_nc_u32_e32 v3, -7, v2
; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 7, v2
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc_lo
-; GFX11-NEXT: v_subrev_nc_u32_e32 v3, 7, v2
+; GFX11-NEXT: v_add_nc_u32_e32 v3, -7, v2
; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 7, v2
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc_lo
@@ -1550,16 +1550,16 @@ define amdgpu_ps i24 @s_fshl_i24(i24 inreg %lhs, i24 inreg %rhs, i24 inreg %amt)
; GFX6-NEXT: s_bfe_u32 s1, s1, 0x170001
; GFX6-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0
; GFX6-NEXT: v_cvt_u32_f32_e32 v0, v0
-; GFX6-NEXT: v_mul_lo_u32 v1, v0, v1
-; GFX6-NEXT: v_mul_hi_u32 v1, v0, v1
-; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v1
+; GFX6-NEXT: v_mul_lo_u32 v2, v0, v1
+; GFX6-NEXT: v_mul_hi_u32 v2, v0, v2
+; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v2
; GFX6-NEXT: v_mul_hi_u32 v0, s2, v0
; GFX6-NEXT: v_mul_lo_u32 v0, v0, 24
; GFX6-NEXT: v_sub_i32_e32 v0, vcc, s2, v0
-; GFX6-NEXT: v_subrev_i32_e32 v1, vcc, 24, v0
+; GFX6-NEXT: v_add_i32_e32 v2, vcc, v0, v1
; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v0
-; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
-; GFX6-NEXT: v_subrev_i32_e32 v1, vcc, 24, v0
+; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX6-NEXT: v_add_i32_e32 v1, vcc, v0, v1
; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v0
; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
; GFX6-NEXT: v_sub_i32_e32 v1, vcc, 23, v0
@@ -1580,16 +1580,16 @@ define amdgpu_ps i24 @s_fshl_i24(i24 inreg %lhs, i24 inreg %rhs, i24 inreg %amt)
; GFX8-NEXT: s_bfe_u32 s1, s1, 0x170001
; GFX8-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0
; GFX8-NEXT: v_cvt_u32_f32_e32 v0, v0
-; GFX8-NEXT: v_mul_lo_u32 v1, v0, v1
-; GFX8-NEXT: v_mul_hi_u32 v1, v0, v1
-; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v1
+; GFX8-NEXT: v_mul_lo_u32 v2, v0, v1
+; GFX8-NEXT: v_mul_hi_u32 v2, v0, v2
+; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2
; GFX8-NEXT: v_mul_hi_u32 v0, s2, v0
; GFX8-NEXT: v_mul_lo_u32 v0, v0, 24
; GFX8-NEXT: v_sub_u32_e32 v0, vcc, s2, v0
-; GFX8-NEXT: v_subrev_u32_e32 v1, vcc, 24, v0
+; GFX8-NEXT: v_add_u32_e32 v2, vcc, v0, v1
; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v0
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
-; GFX8-NEXT: v_subrev_u32_e32 v1, vcc, 24, v0
+; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX8-NEXT: v_add_u32_e32 v1, vcc, v0, v1
; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v0
; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
; GFX8-NEXT: v_sub_u32_e32 v1, vcc, 23, v0
@@ -1616,10 +1616,10 @@ define amdgpu_ps i24 @s_fshl_i24(i24 inreg %lhs, i24 inreg %rhs, i24 inreg %amt)
; GFX9-NEXT: v_mul_hi_u32 v0, s2, v0
; GFX9-NEXT: v_mul_lo_u32 v0, v0, 24
; GFX9-NEXT: v_sub_u32_e32 v0, s2, v0
-; GFX9-NEXT: v_subrev_u32_e32 v1, 24, v0
+; GFX9-NEXT: v_add_u32_e32 v1, 0xffffffe8, v0
; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v0
; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
-; GFX9-NEXT: v_subrev_u32_e32 v1, 24, v0
+; GFX9-NEXT: v_add_u32_e32 v1, 0xffffffe8, v0
; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v0
; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
; GFX9-NEXT: v_sub_u32_e32 v1, 23, v0
@@ -1644,10 +1644,10 @@ define amdgpu_ps i24 @s_fshl_i24(i24 inreg %lhs, i24 inreg %rhs, i24 inreg %amt)
; GFX10-NEXT: v_mul_hi_u32 v0, s2, v0
; GFX10-NEXT: v_mul_lo_u32 v0, v0, 24
; GFX10-NEXT: v_sub_nc_u32_e32 v0, s2, v0
-; GFX10-NEXT: v_subrev_nc_u32_e32 v1, 24, v0
+; GFX10-NEXT: v_add_nc_u32_e32 v1, 0xffffffe8, v0
; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v0
; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
-; GFX10-NEXT: v_subrev_nc_u32_e32 v1, 24, v0
+; GFX10-NEXT: v_add_nc_u32_e32 v1, 0xffffffe8, v0
; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v0
; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
; GFX10-NEXT: v_sub_nc_u32_e32 v1, 23, v0
@@ -1678,11 +1678,11 @@ define amdgpu_ps i24 @s_fshl_i24(i24 inreg %lhs, i24 inreg %rhs, i24 inreg %amt)
; GFX11-NEXT: v_mul_lo_u32 v0, v0, 24
; GFX11-NEXT: v_sub_nc_u32_e32 v0, s2, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-NEXT: v_subrev_nc_u32_e32 v1, 24, v0
+; GFX11-NEXT: v_add_nc_u32_e32 v1, 0xffffffe8, v0
; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v0
; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-NEXT: v_subrev_nc_u32_e32 v1, 24, v0
+; GFX11-NEXT: v_add_nc_u32_e32 v1, 0xffffffe8, v0
; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v0
; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
@@ -1710,16 +1710,16 @@ define i24 @v_fshl_i24(i24 %lhs, i24 %rhs, i24 %amt) {
; GFX6-NEXT: v_bfe_u32 v1, v1, 1, 23
; GFX6-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3
; GFX6-NEXT: v_cvt_u32_f32_e32 v3, v3
-; GFX6-NEXT: v_mul_lo_u32 v4, v3, v4
-; GFX6-NEXT: v_mul_hi_u32 v4, v3, v4
-; GFX6-NEXT: v_add_i32_e32 v3, vcc, v3, v4
+; GFX6-NEXT: v_mul_lo_u32 v5, v3, v4
+; GFX6-NEXT: v_mul_hi_u32 v5, v3, v5
+; GFX6-NEXT: v_add_i32_e32 v3, vcc, v3, v5
; GFX6-NEXT: v_mul_hi_u32 v3, v2, v3
; GFX6-NEXT: v_mul_lo_u32 v3, v3, 24
; GFX6-NEXT: v_sub_i32_e32 v2, vcc, v2, v3
-; GFX6-NEXT: v_subrev_i32_e32 v3, vcc, 24, v2
+; GFX6-NEXT: v_add_i32_e32 v3, vcc, v2, v4
; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v2
; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
-; GFX6-NEXT: v_subrev_i32_e32 v3, vcc, 24, v2
+; GFX6-NEXT: v_add_i32_e32 v3, vcc, 0xffffffe8, v2
; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v2
; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
; GFX6-NEXT: v_sub_i32_e32 v3, vcc, 23, v2
@@ -1740,16 +1740,16 @@ define i24 @v_fshl_i24(i24 %lhs, i24 %rhs, i24 %amt) {
; GFX8-NEXT: v_bfe_u32 v1, v1, 1, 23
; GFX8-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3
; GFX8-NEXT: v_cvt_u32_f32_e32 v3, v3
-; GFX8-NEXT: v_mul_lo_u32 v4, v3, v4
-; GFX8-NEXT: v_mul_hi_u32 v4, v3, v4
-; GFX8-NEXT: v_add_u32_e32 v3, vcc, v3, v4
+; GFX8-NEXT: v_mul_lo_u32 v5, v3, v4
+; GFX8-NEXT: v_mul_hi_u32 v5, v3, v5
+; GFX8-NEXT: v_add_u32_e32 v3, vcc, v3, v5
; GFX8-NEXT: v_mul_hi_u32 v3, v2, v3
; GFX8-NEXT: v_mul_lo_u32 v3, v3, 24
; GFX8-NEXT: v_sub_u32_e32 v2, vcc, v2, v3
-; GFX8-NEXT: v_subrev_u32_e32 v3, vcc, 24, v2
+; GFX8-NEXT: v_add_u32_e32 v3, vcc, v2, v4
; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v2
; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
-; GFX8-NEXT: v_subrev_u32_e32 v3, vcc, 24, v2
+; GFX8-NEXT: v_add_u32_e32 v3, vcc, 0xffffffe8, v2
; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v2
; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
; GFX8-NEXT: v_sub_u32_e32 v3, vcc, 23, v2
@@ -1776,10 +1776,10 @@ define i24 @v_fshl_i24(i24 %lhs, i24 %rhs, i24 %amt) {
; GFX9-NEXT: v_mul_hi_u32 v3, v2, v3
; GFX9-NEXT: v_mul_lo_u32 v3, v3, 24
; GFX9-NEXT: v_sub_u32_e32 v2, v2, v3
-; GFX9-NEXT: v_subrev_u32_e32 v3, 24, v2
+; GFX9-NEXT: v_add_u32_e32 v3, 0xffffffe8, v2
; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v2
; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
-; GFX9-NEXT: v_subrev_u32_e32 v3, 24, v2
+; GFX9-NEXT: v_add_u32_e32 v3, 0xffffffe8, v2
; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v2
; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
; GFX9-NEXT: v_sub_u32_e32 v3, 23, v2
@@ -1804,10 +1804,10 @@ define i24 @v_fshl_i24(i24 %lhs, i24 %rhs, i24 %amt) {
; GFX10-NEXT: v_mul_hi_u32 v3, v2, v3
; GFX10-NEXT: v_mul_lo_u32 v3, v3, 24
; GFX10-NEXT: v_sub_nc_u32_e32 v2, v2, v3
-; GFX10-NEXT: v_subrev_nc_u32_e32 v3, 24, v2
+; GFX10-NEXT: v_add_nc_u32_e32 v3, 0xffffffe8, v2
; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v2
; GFX10-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc_lo
-; GFX10-NEXT: v_subrev_nc_u32_e32 v3, 24, v2
+; GFX10-NEXT: v_add_nc_u32_e32 v3, 0xffffffe8, v2
; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v2
; GFX10-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc_lo
; GFX10-NEXT: v_sub_nc_u32_e32 v3, 23, v2
@@ -1838,11 +1838,11 @@ define i24 @v_fshl_i24(i24 %lhs, i24 %rhs, i24 %amt) {
; GFX11-NEXT: v_mul_lo_u32 v3, v3, 24
; GFX11-NEXT: v_sub_nc_u32_e32 v2, v2, v3
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-NEXT: v_subrev_nc_u32_e32 v3, 24, v2
+; GFX11-NEXT: v_add_nc_u32_e32 v3, 0xffffffe8, v2
; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v2
; GFX11-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc_lo
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-NEXT: v_subrev_nc_u32_e32 v3, 24, v2
+; GFX11-NEXT: v_add_nc_u32_e32 v3, 0xffffffe8, v2
; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v2
; GFX11-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc_lo
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
@@ -1887,7 +1887,7 @@ define amdgpu_ps i48 @s_fshl_v2i24(i48 inreg %lhs.arg, i48 inreg %rhs.arg, i48 i
; GFX6-NEXT: s_lshr_b32 s0, s2, 16
; GFX6-NEXT: s_lshr_b32 s1, s3, 8
; GFX6-NEXT: s_bfe_u32 s8, s2, 0x80008
-; GFX6-NEXT: v_mul_lo_u32 v3, v2, v3
+; GFX6-NEXT: v_mul_lo_u32 v4, v2, v3
; GFX6-NEXT: s_and_b32 s7, s2, 0xff
; GFX6-NEXT: s_lshl_b32 s8, s8, 8
; GFX6-NEXT: s_and_b32 s0, s0, 0xff
@@ -1906,7 +1906,7 @@ define amdgpu_ps i48 @s_fshl_v2i24(i48 inreg %lhs.arg, i48 inreg %rhs.arg, i48 i
; GFX6-NEXT: v_or_b32_e32 v1, s1, v1
; GFX6-NEXT: s_lshr_b32 s1, s4, 16
; GFX6-NEXT: s_bfe_u32 s7, s4, 0x80008
-; GFX6-NEXT: v_mul_hi_u32 v3, v2, v3
+; GFX6-NEXT: v_mul_hi_u32 v4, v2, v4
; GFX6-NEXT: s_and_b32 s3, s4, 0xff
; GFX6-NEXT: s_lshl_b32 s7, s7, 8
; GFX6-NEXT: s_and_b32 s1, s1, 0xff
@@ -1915,53 +1915,53 @@ define amdgpu_ps i48 @s_fshl_v2i24(i48 inreg %lhs.arg, i48 inreg %rhs.arg, i48 i
; GFX6-NEXT: s_and_b32 s3, 0xffff, s3
; GFX6-NEXT: s_lshl_b32 s1, s1, 16
; GFX6-NEXT: s_or_b32 s1, s3, s1
-; GFX6-NEXT: v_add_i32_e32 v2, vcc, v2, v3
-; GFX6-NEXT: v_mul_hi_u32 v3, s1, v2
+; GFX6-NEXT: v_add_i32_e32 v2, vcc, v2, v4
+; GFX6-NEXT: v_mul_hi_u32 v4, s1, v2
; GFX6-NEXT: s_lshr_b32 s2, s5, 8
; GFX6-NEXT: s_and_b32 s3, s5, 0xff
-; GFX6-NEXT: v_mov_b32_e32 v4, s4
+; GFX6-NEXT: v_mov_b32_e32 v5, s4
; GFX6-NEXT: s_and_b32 s2, s2, 0xff
-; GFX6-NEXT: v_alignbit_b32 v4, s3, v4, 24
+; GFX6-NEXT: v_alignbit_b32 v5, s3, v5, 24
; GFX6-NEXT: s_and_b32 s2, 0xffff, s2
-; GFX6-NEXT: v_and_b32_e32 v4, 0xffff, v4
-; GFX6-NEXT: v_mul_lo_u32 v3, v3, 24
+; GFX6-NEXT: v_and_b32_e32 v5, 0xffff, v5
+; GFX6-NEXT: v_mul_lo_u32 v4, v4, 24
; GFX6-NEXT: s_lshl_b32 s2, s2, 16
-; GFX6-NEXT: v_or_b32_e32 v4, s2, v4
-; GFX6-NEXT: v_mul_hi_u32 v2, v4, v2
-; GFX6-NEXT: v_sub_i32_e32 v3, vcc, s1, v3
-; GFX6-NEXT: v_subrev_i32_e32 v5, vcc, 24, v3
-; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v3
+; GFX6-NEXT: v_or_b32_e32 v5, s2, v5
+; GFX6-NEXT: v_mul_hi_u32 v2, v5, v2
+; GFX6-NEXT: v_sub_i32_e32 v4, vcc, s1, v4
+; GFX6-NEXT: v_add_i32_e32 v6, vcc, v4, v3
+; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v4
; GFX6-NEXT: v_mul_lo_u32 v2, v2, 24
-; GFX6-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc
-; GFX6-NEXT: v_subrev_i32_e32 v5, vcc, 24, v3
-; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v3
-; GFX6-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc
-; GFX6-NEXT: v_sub_i32_e32 v2, vcc, v4, v2
-; GFX6-NEXT: v_sub_i32_e32 v5, vcc, 23, v3
-; GFX6-NEXT: v_subrev_i32_e32 v4, vcc, 24, v2
+; GFX6-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc
+; GFX6-NEXT: v_add_i32_e32 v6, vcc, v4, v3
+; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v4
+; GFX6-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc
+; GFX6-NEXT: v_sub_i32_e32 v2, vcc, v5, v2
+; GFX6-NEXT: v_sub_i32_e32 v6, vcc, 23, v4
+; GFX6-NEXT: v_add_i32_e32 v5, vcc, v2, v3
; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v2
-; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
-; GFX6-NEXT: v_subrev_i32_e32 v4, vcc, 24, v2
+; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc
+; GFX6-NEXT: v_add_i32_e32 v3, vcc, v2, v3
; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v2
-; GFX6-NEXT: v_and_b32_e32 v3, 0xffffff, v3
+; GFX6-NEXT: v_and_b32_e32 v4, 0xffffff, v4
; GFX6-NEXT: s_lshr_b32 s0, s0, 1
-; GFX6-NEXT: v_and_b32_e32 v5, 0xffffff, v5
-; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
-; GFX6-NEXT: v_lshl_b32_e32 v3, s6, v3
-; GFX6-NEXT: v_lshr_b32_e32 v5, s0, v5
-; GFX6-NEXT: v_sub_i32_e32 v4, vcc, 23, v2
+; GFX6-NEXT: v_and_b32_e32 v6, 0xffffff, v6
+; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
+; GFX6-NEXT: v_lshl_b32_e32 v4, s6, v4
+; GFX6-NEXT: v_lshr_b32_e32 v6, s0, v6
+; GFX6-NEXT: v_sub_i32_e32 v3, vcc, 23, v2
; GFX6-NEXT: v_and_b32_e32 v2, 0xffffff, v2
-; GFX6-NEXT: v_or_b32_e32 v3, v3, v5
+; GFX6-NEXT: v_or_b32_e32 v4, v4, v6
; GFX6-NEXT: v_lshlrev_b32_e32 v0, v2, v0
; GFX6-NEXT: v_lshrrev_b32_e32 v1, 1, v1
-; GFX6-NEXT: v_and_b32_e32 v2, 0xffffff, v4
+; GFX6-NEXT: v_and_b32_e32 v2, 0xffffff, v3
; GFX6-NEXT: v_lshrrev_b32_e32 v1, v2, v1
-; GFX6-NEXT: v_bfe_u32 v2, v3, 8, 8
+; GFX6-NEXT: v_bfe_u32 v2, v4, 8, 8
; GFX6-NEXT: v_or_b32_e32 v0, v0, v1
-; GFX6-NEXT: v_and_b32_e32 v1, 0xff, v3
+; GFX6-NEXT: v_and_b32_e32 v1, 0xff, v4
; GFX6-NEXT: v_lshlrev_b32_e32 v2, 8, v2
; GFX6-NEXT: v_or_b32_e32 v1, v1, v2
-; GFX6-NEXT: v_bfe_u32 v2, v3, 16, 8
+; GFX6-NEXT: v_bfe_u32 v2, v4, 16, 8
; GFX6-NEXT: v_lshlrev_b32_e32 v2, 16, v2
; GFX6-NEXT: v_or_b32_e32 v1, v1, v2
; GFX6-NEXT: v_and_b32_e32 v2, 0xff, v0
@@ -2021,7 +2021,7 @@ define amdgpu_ps i48 @s_fshl_v2i24(i48 inreg %lhs.arg, i48 inreg %rhs.arg, i48 i
; GFX8-NEXT: v_not_b32_e32 v1, 23
; GFX8-NEXT: s_or_b32 s3, s8, s3
; GFX8-NEXT: s_and_b32 s6, 0xffff, s6
-; GFX8-NEXT: v_mul_lo_u32 v1, v0, v1
+; GFX8-NEXT: v_mul_lo_u32 v2, v0, v1
; GFX8-NEXT: s_and_b32 s3, 0xffff, s3
; GFX8-NEXT: s_lshl_b32 s6, s6, 16
; GFX8-NEXT: s_or_b32 s3, s3, s6
@@ -2031,67 +2031,67 @@ define amdgpu_ps i48 @s_fshl_v2i24(i48 inreg %lhs.arg, i48 inreg %rhs.arg, i48 i
; GFX8-NEXT: s_lshr_b32 s8, s4, 24
; GFX8-NEXT: s_and_b32 s4, s4, 0xff
; GFX8-NEXT: s_lshl_b32 s6, s6, 8
-; GFX8-NEXT: v_mul_hi_u32 v1, v0, v1
+; GFX8-NEXT: v_mul_hi_u32 v2, v0, v2
; GFX8-NEXT: s_or_b32 s4, s4, s6
; GFX8-NEXT: s_and_b32 s6, s7, 0xff
; GFX8-NEXT: s_and_b32 s6, 0xffff, s6
; GFX8-NEXT: s_and_b32 s4, 0xffff, s4
; GFX8-NEXT: s_lshl_b32 s6, s6, 16
; GFX8-NEXT: s_or_b32 s4, s4, s6
-; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v1
-; GFX8-NEXT: v_mul_hi_u32 v1, s4, v0
+; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2
+; GFX8-NEXT: v_mul_hi_u32 v2, s4, v0
; GFX8-NEXT: s_lshr_b32 s9, s5, 8
; GFX8-NEXT: s_and_b32 s5, s5, 0xff
; GFX8-NEXT: s_lshl_b32 s5, s5, 8
-; GFX8-NEXT: v_mul_lo_u32 v1, v1, 24
+; GFX8-NEXT: v_mul_lo_u32 v2, v2, 24
; GFX8-NEXT: s_and_b32 s6, s9, 0xff
; GFX8-NEXT: s_or_b32 s5, s8, s5
; GFX8-NEXT: s_and_b32 s6, 0xffff, s6
; GFX8-NEXT: s_and_b32 s5, 0xffff, s5
; GFX8-NEXT: s_lshl_b32 s6, s6, 16
; GFX8-NEXT: s_or_b32 s5, s5, s6
-; GFX8-NEXT: v_sub_u32_e32 v1, vcc, s4, v1
-; GFX8-NEXT: v_subrev_u32_e32 v2, vcc, 24, v1
+; GFX8-NEXT: v_sub_u32_e32 v2, vcc, s4, v2
+; GFX8-NEXT: v_add_u32_e32 v3, vcc, v2, v1
; GFX8-NEXT: v_mul_hi_u32 v0, s5, v0
-; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v1
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
-; GFX8-NEXT: v_subrev_u32_e32 v2, vcc, 24, v1
-; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v1
+; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v2
+; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
+; GFX8-NEXT: v_add_u32_e32 v3, vcc, v2, v1
+; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v2
; GFX8-NEXT: v_mul_lo_u32 v0, v0, 24
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
-; GFX8-NEXT: v_sub_u32_e32 v2, vcc, 23, v1
-; GFX8-NEXT: v_and_b32_e32 v1, 0xffffff, v1
-; GFX8-NEXT: v_lshlrev_b32_e64 v1, v1, s0
-; GFX8-NEXT: s_lshr_b32 s0, s2, 1
+; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
+; GFX8-NEXT: v_sub_u32_e32 v3, vcc, 23, v2
; GFX8-NEXT: v_and_b32_e32 v2, 0xffffff, v2
-; GFX8-NEXT: v_lshrrev_b32_e64 v2, v2, s0
+; GFX8-NEXT: v_lshlrev_b32_e64 v2, v2, s0
+; GFX8-NEXT: s_lshr_b32 s0, s2, 1
+; GFX8-NEXT: v_and_b32_e32 v3, 0xffffff, v3
+; GFX8-NEXT: v_lshrrev_b32_e64 v3, v3, s0
; GFX8-NEXT: v_sub_u32_e32 v0, vcc, s5, v0
-; GFX8-NEXT: v_or_b32_e32 v1, v1, v2
-; GFX8-NEXT: v_subrev_u32_e32 v2, vcc, 24, v0
+; GFX8-NEXT: v_or_b32_e32 v2, v2, v3
+; GFX8-NEXT: v_add_u32_e32 v3, vcc, v0, v1
; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v0
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
-; GFX8-NEXT: v_subrev_u32_e32 v2, vcc, 24, v0
+; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
+; GFX8-NEXT: v_add_u32_e32 v1, vcc, v0, v1
; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v0
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
-; GFX8-NEXT: v_sub_u32_e32 v2, vcc, 23, v0
+; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX8-NEXT: v_sub_u32_e32 v1, vcc, 23, v0
; GFX8-NEXT: v_and_b32_e32 v0, 0xffffff, v0
; GFX8-NEXT: s_lshr_b32 s0, s3, 1
-; GFX8-NEXT: v_and_b32_e32 v2, 0xffffff, v2
+; GFX8-NEXT: v_and_b32_e32 v1, 0xffffff, v1
; GFX8-NEXT: v_lshlrev_b32_e64 v0, v0, s1
-; GFX8-NEXT: v_lshrrev_b32_e64 v2, v2, s0
-; GFX8-NEXT: v_or_b32_e32 v0, v0, v2
-; GFX8-NEXT: v_mov_b32_e32 v2, 8
-; GFX8-NEXT: v_lshlrev_b32_sdwa v3, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
+; GFX8-NEXT: v_lshrrev_b32_e64 v1, v1, s0
+; GFX8-NEXT: v_or_b32_e32 v0, v0, v1
+; GFX8-NEXT: v_mov_b32_e32 v1, 8
+; GFX8-NEXT: v_lshlrev_b32_sdwa v3, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
; GFX8-NEXT: v_mov_b32_e32 v4, 16
-; GFX8-NEXT: v_or_b32_sdwa v3, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
-; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
-; GFX8-NEXT: v_or_b32_e32 v1, v3, v1
+; GFX8-NEXT: v_or_b32_sdwa v3, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
+; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
+; GFX8-NEXT: v_or_b32_e32 v2, v3, v2
; GFX8-NEXT: v_and_b32_e32 v3, 0xff, v0
; GFX8-NEXT: v_lshlrev_b32_e32 v3, 24, v3
-; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
-; GFX8-NEXT: v_or_b32_e32 v1, v1, v3
-; GFX8-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD
-; GFX8-NEXT: v_readfirstlane_b32 s0, v1
+; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
+; GFX8-NEXT: v_or_b32_e32 v2, v2, v3
+; GFX8-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD
+; GFX8-NEXT: v_readfirstlane_b32 s0, v2
; GFX8-NEXT: v_readfirstlane_b32 s1, v0
; GFX8-NEXT: ; return to shader part epilog
;
@@ -2172,10 +2172,10 @@ define amdgpu_ps i48 @s_fshl_v2i24(i48 inreg %lhs.arg, i48 inreg %rhs.arg, i48 i
; GFX9-NEXT: s_or_b32 s5, s5, s6
; GFX9-NEXT: v_mul_hi_u32 v0, s5, v0
; GFX9-NEXT: v_sub_u32_e32 v1, s4, v1
-; GFX9-NEXT: v_subrev_u32_e32 v2, 24, v1
+; GFX9-NEXT: v_add_u32_e32 v2, 0xffffffe8, v1
; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
-; GFX9-NEXT: v_subrev_u32_e32 v2, 24, v1
+; GFX9-NEXT: v_add_u32_e32 v2, 0xffffffe8, v1
; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v1
; GFX9-NEXT: v_mul_lo_u32 v0, v0, 24
; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
@@ -2186,10 +2186,10 @@ define amdgpu_ps i48 @s_fshl_v2i24(i48 inreg %lhs.arg, i48 inreg %rhs.arg, i48 i
; GFX9-NEXT: v_lshrrev_b32_e64 v2, v2, s2
; GFX9-NEXT: v_sub_u32_e32 v0, s5, v0
; GFX9-NEXT: v_lshl_or_b32 v1, s0, v1, v2
-; GFX9-NEXT: v_subrev_u32_e32 v2, 24, v0
+; GFX9-NEXT: v_add_u32_e32 v2, 0xffffffe8, v0
; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v0
; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
-; GFX9-NEXT: v_subrev_u32_e32 v2, 24, v0
+; GFX9-NEXT: v_add_u32_e32 v2, 0xffffffe8, v0
; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v0
; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
; GFX9-NEXT: v_sub_u32_e32 v2, 23, v0
@@ -2282,9 +2282,9 @@ define amdgpu_ps i48 @s_fshl_v2i24(i48 inreg %lhs.arg, i48 inreg %rhs.arg, i48 i
; GFX10-NEXT: s_lshr_b32 s4, s3, 8
; GFX10-NEXT: s_and_b32 s5, s9, 0xff
; GFX10-NEXT: s_and_b32 s3, s3, 0xff
-; GFX10-NEXT: v_subrev_nc_u32_e32 v2, 24, v1
+; GFX10-NEXT: v_add_nc_u32_e32 v2, 0xffffffe8, v1
; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v1
-; GFX10-NEXT: v_subrev_nc_u32_e32 v3, 24, v0
+; GFX10-NEXT: v_add_nc_u32_e32 v3, 0xffffffe8, v0
; GFX10-NEXT: s_and_b32 s5, 0xffff, s5
; GFX10-NEXT: s_lshl_b32 s3, s3, 8
; GFX10-NEXT: s_and_b32 s4, s4, 0xff
@@ -2293,13 +2293,13 @@ define amdgpu_ps i48 @s_fshl_v2i24(i48 inreg %lhs.arg, i48 inreg %rhs.arg, i48 i
; GFX10-NEXT: s_lshl_b32 s5, s5, 16
; GFX10-NEXT: s_or_b32 s3, s10, s3
; GFX10-NEXT: s_and_b32 s4, 0xffff, s4
-; GFX10-NEXT: v_subrev_nc_u32_e32 v2, 24, v1
+; GFX10-NEXT: v_add_nc_u32_e32 v2, 0xffffffe8, v1
; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc_lo
; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v1
; GFX10-NEXT: s_or_b32 s2, s2, s5
; GFX10-NEXT: s_and_b32 s3, 0xffff, s3
; GFX10-NEXT: s_lshl_b32 s4, s4, 16
-; GFX10-NEXT: v_subrev_nc_u32_e32 v3, 24, v0
+; GFX10-NEXT: v_add_nc_u32_e32 v3, 0xffffffe8, v0
; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc_lo
; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v0
; GFX10-NEXT: s_or_b32 s3, s3, s4
@@ -2399,9 +2399,9 @@ define amdgpu_ps i48 @s_fshl_v2i24(i48 inreg %lhs.arg, i48 inreg %rhs.arg, i48 i
; GFX11-NEXT: s_and_b32 s5, s8, 0xff
; GFX11-NEXT: s_lshr_b32 s4, s3, 8
; GFX11-NEXT: s_and_b32 s5, 0xffff, s5
-; GFX11-NEXT: v_subrev_nc_u32_e32 v2, 24, v1
+; GFX11-NEXT: v_add_nc_u32_e32 v2, 0xffffffe8, v1
; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v1
-; GFX11-NEXT: v_subrev_nc_u32_e32 v3, 24, v0
+; GFX11-NEXT: v_add_nc_u32_e32 v3, 0xffffffe8, v0
; GFX11-NEXT: s_and_b32 s3, s3, 0xff
; GFX11-NEXT: s_lshl_b32 s5, s5, 16
; GFX11-NEXT: s_lshl_b32 s3, s3, 8
@@ -2410,7 +2410,7 @@ define amdgpu_ps i48 @s_fshl_v2i24(i48 inreg %lhs.arg, i48 inreg %rhs.arg, i48 i
; GFX11-NEXT: s_and_b32 s4, s4, 0xff
; GFX11-NEXT: s_or_b32 s2, s2, s5
; GFX11-NEXT: s_or_b32 s3, s9, s3
-; GFX11-NEXT: v_subrev_nc_u32_e32 v2, 24, v1
+; GFX11-NEXT: v_add_nc_u32_e32 v2, 0xffffffe8, v1
; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc_lo
; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v1
; GFX11-NEXT: s_and_b32 s4, 0xffff, s4
@@ -2423,7 +2423,7 @@ define amdgpu_ps i48 @s_fshl_v2i24(i48 inreg %lhs.arg, i48 inreg %rhs.arg, i48 i
; GFX11-NEXT: s_lshr_b32 s3, s3, 1
; GFX11-NEXT: v_sub_nc_u32_e32 v2, 23, v1
; GFX11-NEXT: v_and_b32_e32 v1, 0xffffff, v1
-; GFX11-NEXT: v_subrev_nc_u32_e32 v3, 24, v0
+; GFX11-NEXT: v_add_nc_u32_e32 v3, 0xffffffe8, v0
; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX11-NEXT: v_and_b32_e32 v2, 0xffffff, v2
@@ -2479,31 +2479,31 @@ define <2 x i24> @v_fshl_v2i24(<2 x i24> %lhs, <2 x i24> %rhs, <2 x i24> %amt) {
; GFX6-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6
; GFX6-NEXT: v_cvt_u32_f32_e32 v6, v6
; GFX6-NEXT: v_bfe_u32 v2, v2, 1, 23
-; GFX6-NEXT: v_mul_lo_u32 v7, v6, v7
-; GFX6-NEXT: v_mul_hi_u32 v7, v6, v7
-; GFX6-NEXT: v_add_i32_e32 v6, vcc, v6, v7
-; GFX6-NEXT: v_mul_hi_u32 v7, v4, v6
+; GFX6-NEXT: v_mul_lo_u32 v8, v6, v7
+; GFX6-NEXT: v_mul_hi_u32 v8, v6, v8
+; GFX6-NEXT: v_add_i32_e32 v6, vcc, v6, v8
+; GFX6-NEXT: v_mul_hi_u32 v8, v4, v6
; GFX6-NEXT: v_mul_hi_u32 v6, v5, v6
-; GFX6-NEXT: v_mul_lo_u32 v7, v7, 24
+; GFX6-NEXT: v_mul_lo_u32 v8, v8, 24
; GFX6-NEXT: v_mul_lo_u32 v6, v6, 24
-; GFX6-NEXT: v_sub_i32_e32 v4, vcc, v4, v7
-; GFX6-NEXT: v_subrev_i32_e32 v7, vcc, 24, v4
+; GFX6-NEXT: v_sub_i32_e32 v4, vcc, v4, v8
+; GFX6-NEXT: v_add_i32_e32 v8, vcc, v4, v7
; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v4
-; GFX6-NEXT: v_cndmask_b32_e32 v4, v4, v7, vcc
-; GFX6-NEXT: v_subrev_i32_e32 v7, vcc, 24, v4
+; GFX6-NEXT: v_cndmask_b32_e32 v4, v4, v8, vcc
+; GFX6-NEXT: v_add_i32_e32 v8, vcc, v4, v7
; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v4
-; GFX6-NEXT: v_cndmask_b32_e32 v4, v4, v7, vcc
-; GFX6-NEXT: v_sub_i32_e32 v7, vcc, 23, v4
+; GFX6-NEXT: v_cndmask_b32_e32 v4, v4, v8, vcc
+; GFX6-NEXT: v_sub_i32_e32 v8, vcc, 23, v4
; GFX6-NEXT: v_and_b32_e32 v4, 0xffffff, v4
; GFX6-NEXT: v_lshlrev_b32_e32 v0, v4, v0
-; GFX6-NEXT: v_and_b32_e32 v4, 0xffffff, v7
+; GFX6-NEXT: v_and_b32_e32 v4, 0xffffff, v8
; GFX6-NEXT: v_lshrrev_b32_e32 v2, v4, v2
; GFX6-NEXT: v_or_b32_e32 v0, v0, v2
; GFX6-NEXT: v_sub_i32_e32 v2, vcc, v5, v6
-; GFX6-NEXT: v_subrev_i32_e32 v4, vcc, 24, v2
+; GFX6-NEXT: v_add_i32_e32 v4, vcc, v2, v7
; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v2
; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
-; GFX6-NEXT: v_subrev_i32_e32 v4, vcc, 24, v2
+; GFX6-NEXT: v_add_i32_e32 v4, vcc, 0xffffffe8, v2
; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v2
; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
; GFX6-NEXT: v_sub_i32_e32 v4, vcc, 23, v2
@@ -2526,31 +2526,31 @@ define <2 x i24> @v_fshl_v2i24(<2 x i24> %lhs, <2 x i24> %rhs, <2 x i24> %amt) {
; GFX8-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6
; GFX8-NEXT: v_cvt_u32_f32_e32 v6, v6
; GFX8-NEXT: v_bfe_u32 v2, v2, 1, 23
-; GFX8-NEXT: v_mul_lo_u32 v7, v6, v7
-; GFX8-NEXT: v_mul_hi_u32 v7, v6, v7
-; GFX8-NEXT: v_add_u32_e32 v6, vcc, v6, v7
-; GFX8-NEXT: v_mul_hi_u32 v7, v4, v6
+; GFX8-NEXT: v_mul_lo_u32 v8, v6, v7
+; GFX8-NEXT: v_mul_hi_u32 v8, v6, v8
+; GFX8-NEXT: v_add_u32_e32 v6, vcc, v6, v8
+; GFX8-NEXT: v_mul_hi_u32 v8, v4, v6
; GFX8-NEXT: v_mul_hi_u32 v6, v5, v6
-; GFX8-NEXT: v_mul_lo_u32 v7, v7, 24
+; GFX8-NEXT: v_mul_lo_u32 v8, v8, 24
; GFX8-NEXT: v_mul_lo_u32 v6, v6, 24
-; GFX8-NEXT: v_sub_u32_e32 v4, vcc, v4, v7
-; GFX8-NEXT: v_subrev_u32_e32 v7, vcc, 24, v4
+; GFX8-NEXT: v_sub_u32_e32 v4, vcc, v4, v8
+; GFX8-NEXT: v_add_u32_e32 v8, vcc, v4, v7
; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v4
-; GFX8-NEXT: v_cndmask_b32_e32 v4, v4, v7, vcc
-; GFX8-NEXT: v_subrev_u32_e32 v7, vcc, 24, v4
+; GFX8-NEXT: v_cndmask_b32_e32 v4, v4, v8, vcc
+; GFX8-NEXT: v_add_u32_e32 v8, vcc, v4, v7
; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v4
-; GFX8-NEXT: v_cndmask_b32_e32 v4, v4, v7, vcc
-; GFX8-NEXT: v_sub_u32_e32 v7, vcc, 23, v4
+; GFX8-NEXT: v_cndmask_b32_e32 v4, v4, v8, vcc
+; GFX8-NEXT: v_sub_u32_e32 v8, vcc, 23, v4
; GFX8-NEXT: v_and_b32_e32 v4, 0xffffff, v4
; GFX8-NEXT: v_lshlrev_b32_e32 v0, v4, v0
-; GFX8-NEXT: v_and_b32_e32 v4, 0xffffff, v7
+; GFX8-NEXT: v_and_b32_e32 v4, 0xffffff, v8
; GFX8-NEXT: v_lshrrev_b32_e32 v2, v4, v2
; GFX8-NEXT: v_or_b32_e32 v0, v0, v2
; GFX8-NEXT: v_sub_u32_e32 v2, vcc, v5, v6
-; GFX8-NEXT: v_subrev_u32_e32 v4, vcc, 24, v2
+; GFX8-NEXT: v_add_u32_e32 v4, vcc, v2, v7
; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v2
; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
-; GFX8-NEXT: v_subrev_u32_e32 v4, vcc, 24, v2
+; GFX8-NEXT: v_add_u32_e32 v4, vcc, 0xffffffe8, v2
; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v2
; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
; GFX8-NEXT: v_sub_u32_e32 v4, vcc, 23, v2
@@ -2583,21 +2583,21 @@ define <2 x i24> @v_fshl_v2i24(<2 x i24> %lhs, <2 x i24> %rhs, <2 x i24> %amt) {
; GFX9-NEXT: v_mul_lo_u32 v6, v6, 24
; GFX9-NEXT: v_sub_u32_e32 v4, v4, v7
; GFX9-NEXT: v_sub_u32_e32 v5, v5, v6
-; GFX9-NEXT: v_subrev_u32_e32 v6, 24, v4
+; GFX9-NEXT: v_add_u32_e32 v6, 0xffffffe8, v4
; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v4
; GFX9-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc
-; GFX9-NEXT: v_subrev_u32_e32 v6, 24, v4
+; GFX9-NEXT: v_add_u32_e32 v6, 0xffffffe8, v4
; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v4
; GFX9-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc
; GFX9-NEXT: v_sub_u32_e32 v6, 23, v4
; GFX9-NEXT: v_and_b32_e32 v6, 0xffffff, v6
-; GFX9-NEXT: v_subrev_u32_e32 v7, 24, v5
+; GFX9-NEXT: v_add_u32_e32 v7, 0xffffffe8, v5
; GFX9-NEXT: v_and_b32_e32 v4, 0xffffff, v4
; GFX9-NEXT: v_lshrrev_b32_e32 v2, v6, v2
; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v5
; GFX9-NEXT: v_lshl_or_b32 v0, v0, v4, v2
; GFX9-NEXT: v_cndmask_b32_e32 v2, v5, v7, vcc
-; GFX9-NEXT: v_subrev_u32_e32 v4, 24, v2
+; GFX9-NEXT: v_add_u32_e32 v4, 0xffffffe8, v2
; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v2
; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
; GFX9-NEXT: v_sub_u32_e32 v4, 23, v2
@@ -2627,15 +2627,15 @@ define <2 x i24> @v_fshl_v2i24(<2 x i24> %lhs, <2 x i24> %rhs, <2 x i24> %amt) {
; GFX10-NEXT: v_mul_lo_u32 v6, v6, 24
; GFX10-NEXT: v_sub_nc_u32_e32 v4, v4, v7
; GFX10-NEXT: v_sub_nc_u32_e32 v5, v5, v6
-; GFX10-NEXT: v_subrev_nc_u32_e32 v6, 24, v4
+; GFX10-NEXT: v_add_nc_u32_e32 v6, 0xffffffe8, v4
; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v4
-; GFX10-NEXT: v_subrev_nc_u32_e32 v7, 24, v5
+; GFX10-NEXT: v_add_nc_u32_e32 v7, 0xffffffe8, v5
; GFX10-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc_lo
; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v5
-; GFX10-NEXT: v_subrev_nc_u32_e32 v6, 24, v4
+; GFX10-NEXT: v_add_nc_u32_e32 v6, 0xffffffe8, v4
; GFX10-NEXT: v_cndmask_b32_e32 v5, v5, v7, vcc_lo
; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v4
-; GFX10-NEXT: v_subrev_nc_u32_e32 v7, 24, v5
+; GFX10-NEXT: v_add_nc_u32_e32 v7, 0xffffffe8, v5
; GFX10-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc_lo
; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v5
; GFX10-NEXT: v_sub_nc_u32_e32 v6, 23, v4
@@ -2679,34 +2679,32 @@ define <2 x i24> @v_fshl_v2i24(<2 x i24> %lhs, <2 x i24> %rhs, <2 x i24> %amt) {
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_mul_lo_u32 v6, v6, 24
; GFX11-NEXT: v_sub_nc_u32_e32 v5, v5, v6
-; GFX11-NEXT: v_subrev_nc_u32_e32 v6, 24, v4
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NEXT: v_subrev_nc_u32_e32 v7, 24, v5
+; GFX11-NEXT: v_add_nc_u32_e32 v6, 0xffffffe8, v4
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX11-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc_lo
; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v5
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_4)
-; GFX11-NEXT: v_subrev_nc_u32_e32 v6, 24, v4
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX11-NEXT: v_add_nc_u32_e32 v6, 0xffffffe8, v4
+; GFX11-NEXT: v_add_nc_u32_e32 v7, 0xffffffe8, v5
; GFX11-NEXT: v_cndmask_b32_e32 v5, v5, v7, vcc_lo
; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v4
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_4)
-; GFX11-NEXT: v_subrev_nc_u32_e32 v7, 24, v5
-; GFX11-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc_lo
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-NEXT: v_dual_cndmask_b32 v4, v4, v6 :: v_dual_add_nc_u32 v7, 0xffffffe8, v5
; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v5
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT: v_cndmask_b32_e32 v5, v5, v7, vcc_lo
-; GFX11-NEXT: v_sub_nc_u32_e32 v7, 23, v5
-; GFX11-NEXT: v_and_b32_e32 v5, 0xffffff, v5
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
-; GFX11-NEXT: v_and_b32_e32 v7, 0xffffff, v7
; GFX11-NEXT: v_sub_nc_u32_e32 v6, 23, v4
-; GFX11-NEXT: v_and_b32_e32 v4, 0xffffff, v4
-; GFX11-NEXT: v_lshrrev_b32_e32 v3, v7, v3
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NEXT: v_dual_cndmask_b32 v5, v5, v7 :: v_dual_and_b32 v4, 0xffffff, v4
; GFX11-NEXT: v_and_b32_e32 v6, 0xffffff, v6
-; GFX11-NEXT: v_lshl_or_b32 v1, v1, v5, v3
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3)
+; GFX11-NEXT: v_sub_nc_u32_e32 v7, 23, v5
+; GFX11-NEXT: v_and_b32_e32 v5, 0xffffff, v5
; GFX11-NEXT: v_lshrrev_b32_e32 v2, v6, v2
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NEXT: v_and_b32_e32 v7, 0xffffff, v7
; GFX11-NEXT: v_lshl_or_b32 v0, v0, v4, v2
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT: v_lshrrev_b32_e32 v3, v7, v3
+; GFX11-NEXT: v_lshl_or_b32 v1, v1, v5, v3
; GFX11-NEXT: s_setpc_b64 s[30:31]
%result = call <2 x i24> @llvm.fshl.v2i24(<2 x i24> %lhs, <2 x i24> %rhs, <2 x i24> %amt)
ret <2 x i24> %result
@@ -6061,11 +6059,11 @@ define i128 @v_fshl_i128(i128 %lhs, i128 %rhs, i128 %amt) {
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX6-NEXT: v_and_b32_e32 v15, 0x7f, v8
; GFX6-NEXT: v_sub_i32_e32 v9, vcc, 64, v15
-; GFX6-NEXT: v_subrev_i32_e32 v16, vcc, 64, v15
+; GFX6-NEXT: v_add_i32_e32 v17, vcc, 0xffffffc0, v15
; GFX6-NEXT: v_lshr_b64 v[9:10], v[0:1], v9
; GFX6-NEXT: v_lshl_b64 v[11:12], v[2:3], v15
; GFX6-NEXT: v_lshl_b64 v[13:14], v[0:1], v15
-; GFX6-NEXT: v_lshl_b64 v[0:1], v[0:1], v16
+; GFX6-NEXT: v_lshl_b64 v[0:1], v[0:1], v17
; GFX6-NEXT: v_or_b32_e32 v9, v9, v11
; GFX6-NEXT: v_or_b32_e32 v10, v10, v12
; GFX6-NEXT: v_cmp_gt_u32_e32 vcc, 64, v15
@@ -6082,8 +6080,9 @@ define i128 @v_fshl_i128(i128 %lhs, i128 %rhs, i128 %amt) {
; GFX6-NEXT: v_or_b32_e32 v1, v1, v2
; GFX6-NEXT: v_lshr_b64 v[2:3], v[6:7], 1
; GFX6-NEXT: v_and_b32_e32 v14, 0x7f, v4
+; GFX6-NEXT: v_not_b32_e32 v16, 63
; GFX6-NEXT: v_sub_i32_e32 v6, vcc, 64, v14
-; GFX6-NEXT: v_subrev_i32_e32 v15, vcc, 64, v14
+; GFX6-NEXT: v_add_i32_e32 v15, vcc, v14, v16
; GFX6-NEXT: v_lshr_b64 v[4:5], v[0:1], v14
; GFX6-NEXT: v_lshl_b64 v[6:7], v[2:3], v6
; GFX6-NEXT: v_lshr_b64 v[8:9], v[2:3], v14
@@ -6109,11 +6108,11 @@ define i128 @v_fshl_i128(i128 %lhs, i128 %rhs, i128 %amt) {
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_and_b32_e32 v15, 0x7f, v8
; GFX8-NEXT: v_sub_u32_e32 v9, vcc, 64, v15
-; GFX8-NEXT: v_subrev_u32_e32 v16, vcc, 64, v15
+; GFX8-NEXT: v_add_u32_e32 v17, vcc, 0xffffffc0, v15
; GFX8-NEXT: v_lshrrev_b64 v[9:10], v9, v[0:1]
; GFX8-NEXT: v_lshlrev_b64 v[11:12], v15, v[2:3]
; GFX8-NEXT: v_lshlrev_b64 v[13:14], v15, v[0:1]
-; GFX8-NEXT: v_lshlrev_b64 v[0:1], v16, v[0:1]
+; GFX8-NEXT: v_lshlrev_b64 v[0:1], v17, v[0:1]
; GFX8-NEXT: v_or_b32_e32 v9, v9, v11
; GFX8-NEXT: v_or_b32_e32 v10, v10, v12
; GFX8-NEXT: v_cmp_gt_u32_e32 vcc, 64, v15
@@ -6130,8 +6129,9 @@ define i128 @v_fshl_i128(i128 %lhs, i128 %rhs, i128 %amt) {
; GFX8-NEXT: v_or_b32_e32 v1, v1, v2
; GFX8-NEXT: v_lshrrev_b64 v[2:3], 1, v[6:7]
; GFX8-NEXT: v_and_b32_e32 v14, 0x7f, v4
+; GFX8-NEXT: v_not_b32_e32 v16, 63
; GFX8-NEXT: v_sub_u32_e32 v6, vcc, 64, v14
-; GFX8-NEXT: v_subrev_u32_e32 v15, vcc, 64, v14
+; GFX8-NEXT: v_add_u32_e32 v15, vcc, v14, v16
; GFX8-NEXT: v_lshrrev_b64 v[4:5], v14, v[0:1]
; GFX8-NEXT: v_lshlrev_b64 v[6:7], v6, v[2:3]
; GFX8-NEXT: v_lshrrev_b64 v[8:9], v14, v[2:3]
@@ -6157,7 +6157,7 @@ define i128 @v_fshl_i128(i128 %lhs, i128 %rhs, i128 %amt) {
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_and_b32_e32 v15, 0x7f, v8
; GFX9-NEXT: v_sub_u32_e32 v9, 64, v15
-; GFX9-NEXT: v_subrev_u32_e32 v16, 64, v15
+; GFX9-NEXT: v_add_u32_e32 v16, 0xffffffc0, v15
; GFX9-NEXT: v_lshrrev_b64 v[9:10], v9, v[0:1]
; GFX9-NEXT: v_lshlrev_b64 v[11:12], v15, v[2:3]
; GFX9-NEXT: v_lshlrev_b64 v[13:14], v15, v[0:1]
@@ -6178,7 +6178,7 @@ define i128 @v_fshl_i128(i128 %lhs, i128 %rhs, i128 %amt) {
; GFX9-NEXT: v_and_b32_e32 v14, 0x7f, v4
; GFX9-NEXT: v_lshl_or_b32 v1, v6, 31, v1
; GFX9-NEXT: v_sub_u32_e32 v6, 64, v14
-; GFX9-NEXT: v_subrev_u32_e32 v15, 64, v14
+; GFX9-NEXT: v_add_u32_e32 v15, 0xffffffc0, v14
; GFX9-NEXT: v_lshrrev_b64 v[4:5], v14, v[0:1]
; GFX9-NEXT: v_lshlrev_b64 v[6:7], v6, v[2:3]
; GFX9-NEXT: v_lshrrev_b64 v[8:9], v14, v[2:3]
@@ -6210,7 +6210,7 @@ define i128 @v_fshl_i128(i128 %lhs, i128 %rhs, i128 %amt) {
; GFX10-NEXT: v_and_b32_e32 v19, 0x7f, v10
; GFX10-NEXT: v_lshlrev_b64 v[8:9], v18, v[2:3]
; GFX10-NEXT: v_lshl_or_b32 v5, v6, 31, v5
-; GFX10-NEXT: v_subrev_nc_u32_e32 v20, 64, v18
+; GFX10-NEXT: v_add_nc_u32_e32 v20, 0xffffffc0, v18
; GFX10-NEXT: v_lshrrev_b64 v[10:11], v11, v[0:1]
; GFX10-NEXT: v_sub_nc_u32_e32 v16, 64, v19
; GFX10-NEXT: v_lshlrev_b64 v[6:7], v18, v[0:1]
@@ -6218,7 +6218,7 @@ define i128 @v_fshl_i128(i128 %lhs, i128 %rhs, i128 %amt) {
; GFX10-NEXT: v_lshlrev_b64 v[0:1], v20, v[0:1]
; GFX10-NEXT: v_cmp_gt_u32_e32 vcc_lo, 64, v18
; GFX10-NEXT: v_or_b32_e32 v10, v10, v8
-; GFX10-NEXT: v_subrev_nc_u32_e32 v8, 64, v19
+; GFX10-NEXT: v_add_nc_u32_e32 v8, 0xffffffc0, v19
; GFX10-NEXT: v_lshlrev_b64 v[16:17], v16, v[12:13]
; GFX10-NEXT: v_or_b32_e32 v11, v11, v9
; GFX10-NEXT: v_cmp_gt_u32_e64 s4, 64, v19
@@ -6258,34 +6258,34 @@ define i128 @v_fshl_i128(i128 %lhs, i128 %rhs, i128 %amt) {
; GFX11-NEXT: v_and_b32_e32 v19, 0x7f, v10
; GFX11-NEXT: v_lshlrev_b64 v[8:9], v18, v[2:3]
; GFX11-NEXT: v_lshl_or_b32 v5, v6, 31, v5
-; GFX11-NEXT: v_subrev_nc_u32_e32 v20, 64, v18
+; GFX11-NEXT: v_lshlrev_b64 v[6:7], v18, v[0:1]
; GFX11-NEXT: v_lshrrev_b64 v[10:11], v11, v[0:1]
; GFX11-NEXT: v_sub_nc_u32_e32 v16, 64, v19
-; GFX11-NEXT: v_lshlrev_b64 v[6:7], v18, v[0:1]
-; GFX11-NEXT: v_lshrrev_b64 v[14:15], v19, v[4:5]
-; GFX11-NEXT: v_lshlrev_b64 v[0:1], v20, v[0:1]
; GFX11-NEXT: v_cmp_gt_u32_e32 vcc_lo, 64, v18
+; GFX11-NEXT: v_add_nc_u32_e32 v20, 0xffffffc0, v18
+; GFX11-NEXT: v_lshrrev_b64 v[14:15], v19, v[4:5]
+; GFX11-NEXT: v_cmp_gt_u32_e64 s0, 64, v19
; GFX11-NEXT: v_or_b32_e32 v10, v10, v8
-; GFX11-NEXT: v_subrev_nc_u32_e32 v8, 64, v19
+; GFX11-NEXT: v_cndmask_b32_e32 v7, 0, v7, vcc_lo
+; GFX11-NEXT: v_add_nc_u32_e32 v8, 0xffffffc0, v19
; GFX11-NEXT: v_lshlrev_b64 v[16:17], v16, v[12:13]
+; GFX11-NEXT: v_lshlrev_b64 v[0:1], v20, v[0:1]
; GFX11-NEXT: v_or_b32_e32 v11, v11, v9
-; GFX11-NEXT: v_cmp_gt_u32_e64 s0, 64, v19
-; GFX11-NEXT: v_cndmask_b32_e32 v10, v0, v10, vcc_lo
-; GFX11-NEXT: v_lshrrev_b64 v[8:9], v8, v[12:13]
; GFX11-NEXT: v_cmp_eq_u32_e64 s1, 0, v19
-; GFX11-NEXT: v_cndmask_b32_e32 v11, v1, v11, vcc_lo
+; GFX11-NEXT: v_lshrrev_b64 v[8:9], v8, v[12:13]
+; GFX11-NEXT: v_cndmask_b32_e32 v6, 0, v6, vcc_lo
; GFX11-NEXT: v_or_b32_e32 v14, v14, v16
; GFX11-NEXT: v_or_b32_e32 v15, v15, v17
+; GFX11-NEXT: v_dual_cndmask_b32 v10, v0, v10 :: v_dual_cndmask_b32 v11, v1, v11
; GFX11-NEXT: v_lshrrev_b64 v[0:1], v19, v[12:13]
-; GFX11-NEXT: v_cmp_eq_u32_e64 s2, 0, v18
-; GFX11-NEXT: v_dual_cndmask_b32 v6, 0, v6 :: v_dual_cndmask_b32 v7, 0, v7
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_2) | instid1(VALU_DEP_3)
; GFX11-NEXT: v_cndmask_b32_e64 v8, v8, v14, s0
+; GFX11-NEXT: v_cmp_eq_u32_e64 s2, 0, v18
; GFX11-NEXT: v_cndmask_b32_e64 v9, v9, v15, s0
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4)
+; GFX11-NEXT: v_cndmask_b32_e64 v4, v8, v4, s1
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_4)
; GFX11-NEXT: v_cndmask_b32_e64 v2, v10, v2, s2
; GFX11-NEXT: v_cndmask_b32_e64 v3, v11, v3, s2
-; GFX11-NEXT: v_cndmask_b32_e64 v4, v8, v4, s1
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_2) | instid1(VALU_DEP_4)
; GFX11-NEXT: v_cndmask_b32_e64 v5, v9, v5, s1
; GFX11-NEXT: v_cndmask_b32_e64 v8, 0, v0, s0
; GFX11-NEXT: v_cndmask_b32_e64 v9, 0, v1, s0
@@ -6307,15 +6307,15 @@ define amdgpu_ps <4 x float> @v_fshl_i128_ssv(i128 inreg %lhs, i128 inreg %rhs,
; GFX6-NEXT: v_sub_i32_e32 v1, vcc, 64, v7
; GFX6-NEXT: v_lshr_b64 v[1:2], s[0:1], v1
; GFX6-NEXT: v_lshl_b64 v[3:4], s[2:3], v7
-; GFX6-NEXT: v_subrev_i32_e32 v8, vcc, 64, v7
+; GFX6-NEXT: v_add_i32_e32 v9, vcc, 0xffffffc0, v7
; GFX6-NEXT: v_lshl_b64 v[5:6], s[0:1], v7
; GFX6-NEXT: v_or_b32_e32 v3, v1, v3
; GFX6-NEXT: v_or_b32_e32 v4, v2, v4
-; GFX6-NEXT: v_lshl_b64 v[1:2], s[0:1], v8
+; GFX6-NEXT: v_lshl_b64 v[1:2], s[0:1], v9
; GFX6-NEXT: v_cmp_gt_u32_e32 vcc, 64, v7
; GFX6-NEXT: v_not_b32_e32 v0, v0
; GFX6-NEXT: s_mov_b32 s8, 0
-; GFX6-NEXT: v_cndmask_b32_e32 v8, 0, v5, vcc
+; GFX6-NEXT: v_cndmask_b32_e32 v9, 0, v5, vcc
; GFX6-NEXT: v_cndmask_b32_e32 v6, 0, v6, vcc
; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
@@ -6324,33 +6324,34 @@ define amdgpu_ps <4 x float> @v_fshl_i128_ssv(i128 inreg %lhs, i128 inreg %rhs,
; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, 0, v7
; GFX6-NEXT: s_lshr_b64 s[0:1], s[4:5], 1
; GFX6-NEXT: s_lshl_b32 s9, s6, 31
-; GFX6-NEXT: v_and_b32_e32 v10, 0x7f, v0
+; GFX6-NEXT: v_and_b32_e32 v11, 0x7f, v0
; GFX6-NEXT: v_cndmask_b32_e32 v7, v1, v3, vcc
-; GFX6-NEXT: v_cndmask_b32_e32 v9, v2, v4, vcc
+; GFX6-NEXT: v_cndmask_b32_e32 v10, v2, v4, vcc
; GFX6-NEXT: s_or_b64 s[0:1], s[0:1], s[8:9]
; GFX6-NEXT: s_lshr_b64 s[2:3], s[6:7], 1
-; GFX6-NEXT: v_sub_i32_e32 v2, vcc, 64, v10
-; GFX6-NEXT: v_lshr_b64 v[0:1], s[0:1], v10
+; GFX6-NEXT: v_sub_i32_e32 v2, vcc, 64, v11
+; GFX6-NEXT: v_not_b32_e32 v8, 63
+; GFX6-NEXT: v_lshr_b64 v[0:1], s[0:1], v11
; GFX6-NEXT: v_lshl_b64 v[2:3], s[2:3], v2
-; GFX6-NEXT: v_subrev_i32_e32 v11, vcc, 64, v10
+; GFX6-NEXT: v_add_i32_e32 v8, vcc, v11, v8
; GFX6-NEXT: v_or_b32_e32 v2, v0, v2
; GFX6-NEXT: v_or_b32_e32 v3, v1, v3
-; GFX6-NEXT: v_lshr_b64 v[0:1], s[2:3], v11
-; GFX6-NEXT: v_lshr_b64 v[4:5], s[2:3], v10
-; GFX6-NEXT: v_cmp_gt_u32_e32 vcc, 64, v10
+; GFX6-NEXT: v_lshr_b64 v[0:1], s[2:3], v8
+; GFX6-NEXT: v_lshr_b64 v[4:5], s[2:3], v11
+; GFX6-NEXT: v_cmp_gt_u32_e32 vcc, 64, v11
; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
; GFX6-NEXT: v_mov_b32_e32 v2, s0
; GFX6-NEXT: v_mov_b32_e32 v3, s1
-; GFX6-NEXT: v_cmp_eq_u32_e64 s[0:1], 0, v10
+; GFX6-NEXT: v_cmp_eq_u32_e64 s[0:1], 0, v11
; GFX6-NEXT: v_cndmask_b32_e64 v0, v0, v2, s[0:1]
; GFX6-NEXT: v_cndmask_b32_e64 v1, v1, v3, s[0:1]
; GFX6-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc
; GFX6-NEXT: v_cndmask_b32_e32 v3, 0, v5, vcc
-; GFX6-NEXT: v_or_b32_e32 v0, v8, v0
+; GFX6-NEXT: v_or_b32_e32 v0, v9, v0
; GFX6-NEXT: v_or_b32_e32 v1, v6, v1
; GFX6-NEXT: v_or_b32_e32 v2, v7, v2
-; GFX6-NEXT: v_or_b32_e32 v3, v9, v3
+; GFX6-NEXT: v_or_b32_e32 v3, v10, v3
; GFX6-NEXT: ; return to shader part epilog
;
; GFX8-LABEL: v_fshl_i128_ssv:
@@ -6359,15 +6360,15 @@ define amdgpu_ps <4 x float> @v_fshl_i128_ssv(i128 inreg %lhs, i128 inreg %rhs,
; GFX8-NEXT: v_sub_u32_e32 v1, vcc, 64, v7
; GFX8-NEXT: v_lshrrev_b64 v[1:2], v1, s[0:1]
; GFX8-NEXT: v_lshlrev_b64 v[3:4], v7, s[2:3]
-; GFX8-NEXT: v_subrev_u32_e32 v8, vcc, 64, v7
+; GFX8-NEXT: v_add_u32_e32 v9, vcc, 0xffffffc0, v7
; GFX8-NEXT: v_lshlrev_b64 v[5:6], v7, s[0:1]
; GFX8-NEXT: v_or_b32_e32 v3, v1, v3
; GFX8-NEXT: v_or_b32_e32 v4, v2, v4
-; GFX8-NEXT: v_lshlrev_b64 v[1:2], v8, s[0:1]
+; GFX8-NEXT: v_lshlrev_b64 v[1:2], v9, s[0:1]
; GFX8-NEXT: v_cmp_gt_u32_e32 vcc, 64, v7
; GFX8-NEXT: v_not_b32_e32 v0, v0
; GFX8-NEXT: s_mov_b32 s8, 0
-; GFX8-NEXT: v_cndmask_b32_e32 v8, 0, v5, vcc
+; GFX8-NEXT: v_cndmask_b32_e32 v9, 0, v5, vcc
; GFX8-NEXT: v_cndmask_b32_e32 v6, 0, v6, vcc
; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
@@ -6376,33 +6377,34 @@ define amdgpu_ps <4 x float> @v_fshl_i128_ssv(i128 inreg %lhs, i128 inreg %rhs,
; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 0, v7
; GFX8-NEXT: s_lshr_b64 s[0:1], s[4:5], 1
; GFX8-NEXT: s_lshl_b32 s9, s6, 31
-; GFX8-NEXT: v_and_b32_e32 v10, 0x7f, v0
+; GFX8-NEXT: v_and_b32_e32 v11, 0x7f, v0
; GFX8-NEXT: v_cndmask_b32_e32 v7, v1, v3, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v9, v2, v4, vcc
+; GFX8-NEXT: v_cndmask_b32_e32 v10, v2, v4, vcc
; GFX8-NEXT: s_or_b64 s[0:1], s[0:1], s[8:9]
; GFX8-NEXT: s_lshr_b64 s[2:3], s[6:7], 1
-; GFX8-NEXT: v_sub_u32_e32 v2, vcc, 64, v10
-; GFX8-NEXT: v_lshrrev_b64 v[0:1], v10, s[0:1]
+; GFX8-NEXT: v_sub_u32_e32 v2, vcc, 64, v11
+; GFX8-NEXT: v_not_b32_e32 v8, 63
+; GFX8-NEXT: v_lshrrev_b64 v[0:1], v11, s[0:1]
; GFX8-NEXT: v_lshlrev_b64 v[2:3], v2, s[2:3]
-; GFX8-NEXT: v_subrev_u32_e32 v11, vcc, 64, v10
+; GFX8-NEXT: v_add_u32_e32 v8, vcc, v11, v8
; GFX8-NEXT: v_or_b32_e32 v2, v0, v2
; GFX8-NEXT: v_or_b32_e32 v3, v1, v3
-; GFX8-NEXT: v_lshrrev_b64 v[0:1], v11, s[2:3]
-; GFX8-NEXT: v_lshrrev_b64 v[4:5], v10, s[2:3]
-; GFX8-NEXT: v_cmp_gt_u32_e32 vcc, 64, v10
+; GFX8-NEXT: v_lshrrev_b64 v[0:1], v8, s[2:3]
+; GFX8-NEXT: v_lshrrev_b64 v[4:5], v11, s[2:3]
+; GFX8-NEXT: v_cmp_gt_u32_e32 vcc, 64, v11
; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
; GFX8-NEXT: v_mov_b32_e32 v2, s0
; GFX8-NEXT: v_mov_b32_e32 v3, s1
-; GFX8-NEXT: v_cmp_eq_u32_e64 s[0:1], 0, v10
+; GFX8-NEXT: v_cmp_eq_u32_e64 s[0:1], 0, v11
; GFX8-NEXT: v_cndmask_b32_e64 v0, v0, v2, s[0:1]
; GFX8-NEXT: v_cndmask_b32_e64 v1, v1, v3, s[0:1]
; GFX8-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc
; GFX8-NEXT: v_cndmask_b32_e32 v3, 0, v5, vcc
-; GFX8-NEXT: v_or_b32_e32 v0, v8, v0
+; GFX8-NEXT: v_or_b32_e32 v0, v9, v0
; GFX8-NEXT: v_or_b32_e32 v1, v6, v1
; GFX8-NEXT: v_or_b32_e32 v2, v7, v2
-; GFX8-NEXT: v_or_b32_e32 v3, v9, v3
+; GFX8-NEXT: v_or_b32_e32 v3, v10, v3
; GFX8-NEXT: ; return to shader part epilog
;
; GFX9-LABEL: v_fshl_i128_ssv:
@@ -6411,7 +6413,7 @@ define amdgpu_ps <4 x float> @v_fshl_i128_ssv(i128 inreg %lhs, i128 inreg %rhs,
; GFX9-NEXT: v_sub_u32_e32 v1, 64, v7
; GFX9-NEXT: v_lshrrev_b64 v[1:2], v1, s[0:1]
; GFX9-NEXT: v_lshlrev_b64 v[3:4], v7, s[2:3]
-; GFX9-NEXT: v_subrev_u32_e32 v8, 64, v7
+; GFX9-NEXT: v_add_u32_e32 v8, 0xffffffc0, v7
; GFX9-NEXT: v_lshlrev_b64 v[5:6], v7, s[0:1]
; GFX9-NEXT: v_or_b32_e32 v3, v1, v3
; GFX9-NEXT: v_or_b32_e32 v4, v2, v4
@@ -6436,7 +6438,7 @@ define amdgpu_ps <4 x float> @v_fshl_i128_ssv(i128 inreg %lhs, i128 inreg %rhs,
; GFX9-NEXT: v_cndmask_b32_e32 v7, v1, v3, vcc
; GFX9-NEXT: v_lshrrev_b64 v[0:1], v10, s[0:1]
; GFX9-NEXT: v_lshlrev_b64 v[2:3], v2, s[2:3]
-; GFX9-NEXT: v_subrev_u32_e32 v11, 64, v10
+; GFX9-NEXT: v_add_u32_e32 v11, 0xffffffc0, v10
; GFX9-NEXT: v_or_b32_e32 v2, v0, v2
; GFX9-NEXT: v_or_b32_e32 v3, v1, v3
; GFX9-NEXT: v_lshrrev_b64 v[0:1], v11, s[2:3]
@@ -6471,12 +6473,12 @@ define amdgpu_ps <4 x float> @v_fshl_i128_ssv(i128 inreg %lhs, i128 inreg %rhs,
; GFX10-NEXT: s_lshr_b64 s[6:7], s[6:7], 1
; GFX10-NEXT: v_lshrrev_b64 v[2:3], v3, s[0:1]
; GFX10-NEXT: v_sub_nc_u32_e32 v8, 64, v13
-; GFX10-NEXT: v_subrev_nc_u32_e32 v10, 64, v12
+; GFX10-NEXT: v_add_nc_u32_e32 v10, 0xffffffc0, v12
; GFX10-NEXT: v_lshrrev_b64 v[6:7], v13, s[8:9]
; GFX10-NEXT: v_cmp_gt_u32_e32 vcc_lo, 64, v12
; GFX10-NEXT: v_lshlrev_b64 v[4:5], v12, s[0:1]
; GFX10-NEXT: v_or_b32_e32 v2, v2, v0
-; GFX10-NEXT: v_subrev_nc_u32_e32 v0, 64, v13
+; GFX10-NEXT: v_add_nc_u32_e32 v0, 0xffffffc0, v13
; GFX10-NEXT: v_lshlrev_b64 v[8:9], v8, s[6:7]
; GFX10-NEXT: v_lshlrev_b64 v[10:11], v10, s[0:1]
; GFX10-NEXT: v_or_b32_e32 v3, v3, v1
@@ -6522,7 +6524,7 @@ define amdgpu_ps <4 x float> @v_fshl_i128_ssv(i128 inreg %lhs, i128 inreg %rhs,
; GFX11-NEXT: v_sub_nc_u32_e32 v3, 64, v12
; GFX11-NEXT: v_lshlrev_b64 v[0:1], v12, s[2:3]
; GFX11-NEXT: v_sub_nc_u32_e32 v8, 64, v13
-; GFX11-NEXT: v_subrev_nc_u32_e32 v10, 64, v12
+; GFX11-NEXT: v_add_nc_u32_e32 v10, 0xffffffc0, v12
; GFX11-NEXT: v_lshrrev_b64 v[6:7], v13, s[8:9]
; GFX11-NEXT: v_lshrrev_b64 v[2:3], v3, s[0:1]
; GFX11-NEXT: v_cmp_eq_u32_e64 s4, 0, v12
@@ -6531,7 +6533,7 @@ define amdgpu_ps <4 x float> @v_fshl_i128_ssv(i128 inreg %lhs, i128 inreg %rhs,
; GFX11-NEXT: v_cmp_gt_u32_e64 s0, 64, v13
; GFX11-NEXT: v_cmp_eq_u32_e64 s1, 0, v13
; GFX11-NEXT: v_or_b32_e32 v2, v2, v0
-; GFX11-NEXT: v_subrev_nc_u32_e32 v0, 64, v13
+; GFX11-NEXT: v_add_nc_u32_e32 v0, 0xffffffc0, v13
; GFX11-NEXT: v_or_b32_e32 v3, v3, v1
; GFX11-NEXT: v_or_b32_e32 v6, v6, v8
; GFX11-NEXT: v_or_b32_e32 v7, v7, v9
@@ -7677,12 +7679,13 @@ define <2 x i128> @v_fshl_v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %a
; GFX6: ; %bb.0:
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX6-NEXT: v_and_b32_e32 v19, 0x7f, v16
+; GFX6-NEXT: v_not_b32_e32 v25, 63
; GFX6-NEXT: v_sub_i32_e32 v17, vcc, 64, v19
-; GFX6-NEXT: v_subrev_i32_e32 v25, vcc, 64, v19
+; GFX6-NEXT: v_add_i32_e32 v26, vcc, v19, v25
; GFX6-NEXT: v_lshr_b64 v[17:18], v[0:1], v17
; GFX6-NEXT: v_lshl_b64 v[21:22], v[2:3], v19
; GFX6-NEXT: v_lshl_b64 v[23:24], v[0:1], v19
-; GFX6-NEXT: v_lshl_b64 v[0:1], v[0:1], v25
+; GFX6-NEXT: v_lshl_b64 v[0:1], v[0:1], v26
; GFX6-NEXT: v_or_b32_e32 v17, v17, v21
; GFX6-NEXT: v_or_b32_e32 v18, v18, v22
; GFX6-NEXT: v_cmp_gt_u32_e32 vcc, 64, v19
@@ -7700,7 +7703,7 @@ define <2 x i128> @v_fshl_v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %a
; GFX6-NEXT: v_lshr_b64 v[2:3], v[10:11], 1
; GFX6-NEXT: v_and_b32_e32 v23, 0x7f, v8
; GFX6-NEXT: v_sub_i32_e32 v10, vcc, 64, v23
-; GFX6-NEXT: v_subrev_i32_e32 v24, vcc, 64, v23
+; GFX6-NEXT: v_add_i32_e32 v24, vcc, v23, v25
; GFX6-NEXT: v_lshr_b64 v[8:9], v[0:1], v23
; GFX6-NEXT: v_lshl_b64 v[10:11], v[2:3], v10
; GFX6-NEXT: v_lshr_b64 v[16:17], v[2:3], v23
@@ -7719,7 +7722,7 @@ define <2 x i128> @v_fshl_v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %a
; GFX6-NEXT: v_cndmask_b32_e32 v3, 0, v17, vcc
; GFX6-NEXT: v_sub_i32_e32 v8, vcc, 64, v18
; GFX6-NEXT: v_or_b32_e32 v3, v19, v3
-; GFX6-NEXT: v_subrev_i32_e32 v19, vcc, 64, v18
+; GFX6-NEXT: v_add_i32_e32 v19, vcc, v18, v25
; GFX6-NEXT: v_lshr_b64 v[8:9], v[4:5], v8
; GFX6-NEXT: v_lshl_b64 v[10:11], v[6:7], v18
; GFX6-NEXT: v_lshl_b64 v[16:17], v[4:5], v18
@@ -7741,7 +7744,7 @@ define <2 x i128> @v_fshl_v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %a
; GFX6-NEXT: v_lshr_b64 v[6:7], v[14:15], 1
; GFX6-NEXT: v_and_b32_e32 v14, 0x7f, v8
; GFX6-NEXT: v_sub_i32_e32 v10, vcc, 64, v14
-; GFX6-NEXT: v_subrev_i32_e32 v15, vcc, 64, v14
+; GFX6-NEXT: v_add_i32_e32 v15, vcc, v14, v25
; GFX6-NEXT: v_lshr_b64 v[8:9], v[4:5], v14
; GFX6-NEXT: v_lshl_b64 v[10:11], v[6:7], v10
; GFX6-NEXT: v_lshr_b64 v[12:13], v[6:7], v14
@@ -7768,12 +7771,13 @@ define <2 x i128> @v_fshl_v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %a
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_and_b32_e32 v19, 0x7f, v16
+; GFX8-NEXT: v_not_b32_e32 v25, 63
; GFX8-NEXT: v_sub_u32_e32 v17, vcc, 64, v19
-; GFX8-NEXT: v_subrev_u32_e32 v25, vcc, 64, v19
+; GFX8-NEXT: v_add_u32_e32 v26, vcc, v19, v25
; GFX8-NEXT: v_lshrrev_b64 v[17:18], v17, v[0:1]
; GFX8-NEXT: v_lshlrev_b64 v[21:22], v19, v[2:3]
; GFX8-NEXT: v_lshlrev_b64 v[23:24], v19, v[0:1]
-; GFX8-NEXT: v_lshlrev_b64 v[0:1], v25, v[0:1]
+; GFX8-NEXT: v_lshlrev_b64 v[0:1], v26, v[0:1]
; GFX8-NEXT: v_or_b32_e32 v17, v17, v21
; GFX8-NEXT: v_or_b32_e32 v18, v18, v22
; GFX8-NEXT: v_cmp_gt_u32_e32 vcc, 64, v19
@@ -7791,7 +7795,7 @@ define <2 x i128> @v_fshl_v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %a
; GFX8-NEXT: v_lshrrev_b64 v[2:3], 1, v[10:11]
; GFX8-NEXT: v_and_b32_e32 v23, 0x7f, v8
; GFX8-NEXT: v_sub_u32_e32 v10, vcc, 64, v23
-; GFX8-NEXT: v_subrev_u32_e32 v24, vcc, 64, v23
+; GFX8-NEXT: v_add_u32_e32 v24, vcc, v23, v25
; GFX8-NEXT: v_lshrrev_b64 v[8:9], v23, v[0:1]
; GFX8-NEXT: v_lshlrev_b64 v[10:11], v10, v[2:3]
; GFX8-NEXT: v_lshrrev_b64 v[16:17], v23, v[2:3]
@@ -7810,7 +7814,7 @@ define <2 x i128> @v_fshl_v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %a
; GFX8-NEXT: v_cndmask_b32_e32 v3, 0, v17, vcc
; GFX8-NEXT: v_sub_u32_e32 v8, vcc, 64, v18
; GFX8-NEXT: v_or_b32_e32 v3, v19, v3
-; GFX8-NEXT: v_subrev_u32_e32 v19, vcc, 64, v18
+; GFX8-NEXT: v_add_u32_e32 v19, vcc, v18, v25
; GFX8-NEXT: v_lshrrev_b64 v[8:9], v8, v[4:5]
; GFX8-NEXT: v_lshlrev_b64 v[10:11], v18, v[6:7]
; GFX8-NEXT: v_lshlrev_b64 v[16:17], v18, v[4:5]
@@ -7832,7 +7836,7 @@ define <2 x i128> @v_fshl_v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %a
; GFX8-NEXT: v_lshrrev_b64 v[6:7], 1, v[14:15]
; GFX8-NEXT: v_and_b32_e32 v14, 0x7f, v8
; GFX8-NEXT: v_sub_u32_e32 v10, vcc, 64, v14
-; GFX8-NEXT: v_subrev_u32_e32 v15, vcc, 64, v14
+; GFX8-NEXT: v_add_u32_e32 v15, vcc, v14, v25
; GFX8-NEXT: v_lshrrev_b64 v[8:9], v14, v[4:5]
; GFX8-NEXT: v_lshlrev_b64 v[10:11], v10, v[6:7]
; GFX8-NEXT: v_lshrrev_b64 v[12:13], v14, v[6:7]
@@ -7860,7 +7864,7 @@ define <2 x i128> @v_fshl_v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %a
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_and_b32_e32 v19, 0x7f, v16
; GFX9-NEXT: v_sub_u32_e32 v17, 64, v19
-; GFX9-NEXT: v_subrev_u32_e32 v25, 64, v19
+; GFX9-NEXT: v_add_u32_e32 v25, 0xffffffc0, v19
; GFX9-NEXT: v_lshrrev_b64 v[17:18], v17, v[0:1]
; GFX9-NEXT: v_lshlrev_b64 v[21:22], v19, v[2:3]
; GFX9-NEXT: v_lshlrev_b64 v[23:24], v19, v[0:1]
@@ -7881,7 +7885,7 @@ define <2 x i128> @v_fshl_v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %a
; GFX9-NEXT: v_and_b32_e32 v23, 0x7f, v8
; GFX9-NEXT: v_lshl_or_b32 v1, v10, 31, v1
; GFX9-NEXT: v_sub_u32_e32 v10, 64, v23
-; GFX9-NEXT: v_subrev_u32_e32 v24, 64, v23
+; GFX9-NEXT: v_add_u32_e32 v24, 0xffffffc0, v23
; GFX9-NEXT: v_lshrrev_b64 v[8:9], v23, v[0:1]
; GFX9-NEXT: v_lshlrev_b64 v[10:11], v10, v[2:3]
; GFX9-NEXT: v_lshrrev_b64 v[16:17], v23, v[2:3]
@@ -7900,7 +7904,7 @@ define <2 x i128> @v_fshl_v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %a
; GFX9-NEXT: v_cndmask_b32_e32 v3, 0, v17, vcc
; GFX9-NEXT: v_sub_u32_e32 v8, 64, v18
; GFX9-NEXT: v_or_b32_e32 v3, v19, v3
-; GFX9-NEXT: v_subrev_u32_e32 v19, 64, v18
+; GFX9-NEXT: v_add_u32_e32 v19, 0xffffffc0, v18
; GFX9-NEXT: v_lshrrev_b64 v[8:9], v8, v[4:5]
; GFX9-NEXT: v_lshlrev_b64 v[10:11], v18, v[6:7]
; GFX9-NEXT: v_lshlrev_b64 v[16:17], v18, v[4:5]
@@ -7921,7 +7925,7 @@ define <2 x i128> @v_fshl_v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %a
; GFX9-NEXT: v_lshrrev_b64 v[6:7], 1, v[14:15]
; GFX9-NEXT: v_and_b32_e32 v14, 0x7f, v8
; GFX9-NEXT: v_sub_u32_e32 v10, 64, v14
-; GFX9-NEXT: v_subrev_u32_e32 v15, 64, v14
+; GFX9-NEXT: v_add_u32_e32 v15, 0xffffffc0, v14
; GFX9-NEXT: v_lshrrev_b64 v[8:9], v14, v[4:5]
; GFX9-NEXT: v_lshlrev_b64 v[10:11], v10, v[6:7]
; GFX9-NEXT: v_lshrrev_b64 v[12:13], v14, v[6:7]
@@ -7956,13 +7960,13 @@ define <2 x i128> @v_fshl_v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %a
; GFX10-NEXT: v_lshl_or_b32 v9, v10, 31, v9
; GFX10-NEXT: v_lshrrev_b64 v[10:11], 1, v[10:11]
; GFX10-NEXT: v_lshrrev_b64 v[16:17], v17, v[0:1]
-; GFX10-NEXT: v_subrev_nc_u32_e32 v29, 64, v27
+; GFX10-NEXT: v_add_nc_u32_e32 v29, 0xffffffc0, v27
; GFX10-NEXT: v_sub_nc_u32_e32 v25, 64, v28
; GFX10-NEXT: v_lshlrev_b64 v[21:22], v27, v[0:1]
; GFX10-NEXT: v_lshrrev_b64 v[23:24], v28, v[8:9]
; GFX10-NEXT: v_cmp_gt_u32_e32 vcc_lo, 64, v27
; GFX10-NEXT: v_or_b32_e32 v18, v16, v18
-; GFX10-NEXT: v_subrev_nc_u32_e32 v16, 64, v28
+; GFX10-NEXT: v_add_nc_u32_e32 v16, 0xffffffc0, v28
; GFX10-NEXT: v_lshlrev_b64 v[25:26], v25, v[10:11]
; GFX10-NEXT: v_lshlrev_b64 v[0:1], v29, v[0:1]
; GFX10-NEXT: v_or_b32_e32 v19, v17, v19
@@ -7999,10 +8003,10 @@ define <2 x i128> @v_fshl_v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %a
; GFX10-NEXT: v_lshl_or_b32 v9, v14, 31, v9
; GFX10-NEXT: v_lshrrev_b64 v[14:15], 1, v[14:15]
; GFX10-NEXT: v_sub_nc_u32_e32 v20, 64, v22
-; GFX10-NEXT: v_subrev_nc_u32_e32 v3, 64, v24
+; GFX10-NEXT: v_add_nc_u32_e32 v3, 0xffffffc0, v24
; GFX10-NEXT: v_cmp_gt_u32_e32 vcc_lo, 64, v24
; GFX10-NEXT: v_or_b32_e32 v12, v10, v12
-; GFX10-NEXT: v_subrev_nc_u32_e32 v10, 64, v22
+; GFX10-NEXT: v_add_nc_u32_e32 v10, 0xffffffc0, v22
; GFX10-NEXT: v_lshrrev_b64 v[18:19], v22, v[8:9]
; GFX10-NEXT: v_lshlrev_b64 v[20:21], v20, v[14:15]
; GFX10-NEXT: v_lshlrev_b64 v[3:4], v3, v[4:5]
@@ -8049,19 +8053,19 @@ define <2 x i128> @v_fshl_v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %a
; GFX11-NEXT: v_cndmask_b32_e32 v22, 0, v22, vcc_lo
; GFX11-NEXT: v_sub_nc_u32_e32 v17, 64, v27
; GFX11-NEXT: v_lshlrev_b64 v[18:19], v27, v[2:3]
-; GFX11-NEXT: v_subrev_nc_u32_e32 v29, 64, v27
; GFX11-NEXT: v_cmp_eq_u32_e64 s0, 0, v27
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11-NEXT: v_cndmask_b32_e32 v21, 0, v21, vcc_lo
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_lshrrev_b64 v[16:17], v17, v[0:1]
-; GFX11-NEXT: v_lshlrev_b64 v[0:1], v29, v[0:1]
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NEXT: v_or_b32_e32 v19, v17, v19
; GFX11-NEXT: v_or_b32_e32 v18, v16, v18
-; GFX11-NEXT: v_cndmask_b32_e32 v21, 0, v21, vcc_lo
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX11-NEXT: v_dual_cndmask_b32 v19, v1, v19 :: v_dual_cndmask_b32 v18, v0, v18
+; GFX11-NEXT: v_add_nc_u32_e32 v29, 0xffffffc0, v27
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NEXT: v_or_b32_e32 v19, v17, v19
+; GFX11-NEXT: v_lshlrev_b64 v[0:1], v29, v[0:1]
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT: v_dual_cndmask_b32 v18, v0, v18 :: v_dual_cndmask_b32 v19, v1, v19
; GFX11-NEXT: v_sub_nc_u32_e32 v25, 64, v28
-; GFX11-NEXT: v_subrev_nc_u32_e32 v16, 64, v28
+; GFX11-NEXT: v_add_nc_u32_e32 v16, 0xffffffc0, v28
; GFX11-NEXT: v_lshrrev_b64 v[23:24], v28, v[8:9]
; GFX11-NEXT: v_cmp_gt_u32_e32 vcc_lo, 64, v28
; GFX11-NEXT: v_lshrrev_b64 v[0:1], v28, v[10:11]
@@ -8095,26 +8099,26 @@ define <2 x i128> @v_fshl_v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %a
; GFX11-NEXT: v_lshl_or_b32 v9, v14, 31, v9
; GFX11-NEXT: v_lshrrev_b64 v[14:15], 1, v[14:15]
; GFX11-NEXT: v_sub_nc_u32_e32 v20, 64, v22
-; GFX11-NEXT: v_subrev_nc_u32_e32 v3, 64, v24
+; GFX11-NEXT: v_add_nc_u32_e32 v3, 0xffffffc0, v24
; GFX11-NEXT: v_cmp_gt_u32_e64 s0, 64, v22
; GFX11-NEXT: v_or_b32_e32 v12, v10, v12
-; GFX11-NEXT: v_subrev_nc_u32_e32 v10, 64, v22
+; GFX11-NEXT: v_add_nc_u32_e32 v10, 0xffffffc0, v22
; GFX11-NEXT: v_lshrrev_b64 v[18:19], v22, v[8:9]
; GFX11-NEXT: v_lshlrev_b64 v[20:21], v20, v[14:15]
; GFX11-NEXT: v_lshlrev_b64 v[3:4], v3, v[4:5]
; GFX11-NEXT: v_or_b32_e32 v5, v11, v13
-; GFX11-NEXT: v_lshrrev_b64 v[10:11], v10, v[14:15]
; GFX11-NEXT: v_cndmask_b32_e32 v13, 0, v16, vcc_lo
+; GFX11-NEXT: v_lshrrev_b64 v[10:11], v10, v[14:15]
; GFX11-NEXT: v_cmp_eq_u32_e64 s1, 0, v22
; GFX11-NEXT: v_or_b32_e32 v16, v18, v20
; GFX11-NEXT: v_or_b32_e32 v18, v19, v21
; GFX11-NEXT: v_dual_cndmask_b32 v12, v3, v12 :: v_dual_cndmask_b32 v5, v4, v5
; GFX11-NEXT: v_lshrrev_b64 v[3:4], v22, v[14:15]
-; GFX11-NEXT: v_cndmask_b32_e32 v14, 0, v17, vcc_lo
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_3) | instid1(VALU_DEP_3)
; GFX11-NEXT: v_cndmask_b32_e64 v10, v10, v16, s0
; GFX11-NEXT: v_cmp_eq_u32_e64 s2, 0, v24
; GFX11-NEXT: v_cndmask_b32_e64 v11, v11, v18, s0
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_4)
+; GFX11-NEXT: v_cndmask_b32_e32 v14, 0, v17, vcc_lo
; GFX11-NEXT: v_cndmask_b32_e64 v6, v12, v6, s2
; GFX11-NEXT: v_cndmask_b32_e64 v7, v5, v7, s2
; GFX11-NEXT: v_cndmask_b32_e64 v5, v10, v8, s1
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fshr.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fshr.ll
index 58304d2072d7f6..dbc8f12c2c25c4 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fshr.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fshr.ll
@@ -21,10 +21,10 @@ define amdgpu_ps i7 @s_fshr_i7(i7 inreg %lhs, i7 inreg %rhs, i7 inreg %amt) {
; GFX6-NEXT: v_mul_hi_u32 v0, s2, v0
; GFX6-NEXT: v_mul_lo_u32 v0, v0, 7
; GFX6-NEXT: v_sub_i32_e32 v0, vcc, s2, v0
-; GFX6-NEXT: v_subrev_i32_e32 v1, vcc, 7, v0
+; GFX6-NEXT: v_add_i32_e32 v1, vcc, -7, v0
; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 7, v0
; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
-; GFX6-NEXT: v_subrev_i32_e32 v1, vcc, 7, v0
+; GFX6-NEXT: v_add_i32_e32 v1, vcc, -7, v0
; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 7, v0
; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
; GFX6-NEXT: v_sub_i32_e32 v1, vcc, 6, v0
@@ -51,10 +51,10 @@ define amdgpu_ps i7 @s_fshr_i7(i7 inreg %lhs, i7 inreg %rhs, i7 inreg %amt) {
; GFX8-NEXT: v_mul_hi_u32 v0, s2, v0
; GFX8-NEXT: v_mul_lo_u32 v0, v0, 7
; GFX8-NEXT: v_sub_u32_e32 v0, vcc, s2, v0
-; GFX8-NEXT: v_subrev_u32_e32 v1, vcc, 7, v0
+; GFX8-NEXT: v_add_u32_e32 v1, vcc, -7, v0
; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 7, v0
; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
-; GFX8-NEXT: v_subrev_u32_e32 v1, vcc, 7, v0
+; GFX8-NEXT: v_add_u32_e32 v1, vcc, -7, v0
; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 7, v0
; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
; GFX8-NEXT: v_sub_u16_e32 v1, 6, v0
@@ -81,10 +81,10 @@ define amdgpu_ps i7 @s_fshr_i7(i7 inreg %lhs, i7 inreg %rhs, i7 inreg %amt) {
; GFX9-NEXT: v_mul_hi_u32 v0, s2, v0
; GFX9-NEXT: v_mul_lo_u32 v0, v0, 7
; GFX9-NEXT: v_sub_u32_e32 v0, s2, v0
-; GFX9-NEXT: v_subrev_u32_e32 v1, 7, v0
+; GFX9-NEXT: v_add_u32_e32 v1, -7, v0
; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 7, v0
; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
-; GFX9-NEXT: v_subrev_u32_e32 v1, 7, v0
+; GFX9-NEXT: v_add_u32_e32 v1, -7, v0
; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 7, v0
; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
; GFX9-NEXT: v_sub_u16_e32 v1, 6, v0
@@ -111,10 +111,10 @@ define amdgpu_ps i7 @s_fshr_i7(i7 inreg %lhs, i7 inreg %rhs, i7 inreg %amt) {
; GFX10-NEXT: v_mul_hi_u32 v0, s2, v0
; GFX10-NEXT: v_mul_lo_u32 v0, v0, 7
; GFX10-NEXT: v_sub_nc_u32_e32 v0, s2, v0
-; GFX10-NEXT: v_subrev_nc_u32_e32 v1, 7, v0
+; GFX10-NEXT: v_add_nc_u32_e32 v1, -7, v0
; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 7, v0
; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
-; GFX10-NEXT: v_subrev_nc_u32_e32 v1, 7, v0
+; GFX10-NEXT: v_add_nc_u32_e32 v1, -7, v0
; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 7, v0
; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
; GFX10-NEXT: v_sub_nc_u16 v1, 6, v0
@@ -147,11 +147,11 @@ define amdgpu_ps i7 @s_fshr_i7(i7 inreg %lhs, i7 inreg %rhs, i7 inreg %amt) {
; GFX11-NEXT: v_mul_lo_u32 v0, v0, 7
; GFX11-NEXT: v_sub_nc_u32_e32 v0, s2, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-NEXT: v_subrev_nc_u32_e32 v1, 7, v0
+; GFX11-NEXT: v_add_nc_u32_e32 v1, -7, v0
; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 7, v0
; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-NEXT: v_subrev_nc_u32_e32 v1, 7, v0
+; GFX11-NEXT: v_add_nc_u32_e32 v1, -7, v0
; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 7, v0
; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
@@ -186,10 +186,10 @@ define i7 @v_fshr_i7(i7 %lhs, i7 %rhs, i7 %amt) {
; GFX6-NEXT: v_mul_hi_u32 v3, v2, v3
; GFX6-NEXT: v_mul_lo_u32 v3, v3, 7
; GFX6-NEXT: v_sub_i32_e32 v2, vcc, v2, v3
-; GFX6-NEXT: v_subrev_i32_e32 v3, vcc, 7, v2
+; GFX6-NEXT: v_add_i32_e32 v3, vcc, -7, v2
; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 7, v2
; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
-; GFX6-NEXT: v_subrev_i32_e32 v3, vcc, 7, v2
+; GFX6-NEXT: v_add_i32_e32 v3, vcc, -7, v2
; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 7, v2
; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
; GFX6-NEXT: v_sub_i32_e32 v3, vcc, 6, v2
@@ -216,10 +216,10 @@ define i7 @v_fshr_i7(i7 %lhs, i7 %rhs, i7 %amt) {
; GFX8-NEXT: v_mul_hi_u32 v3, v2, v3
; GFX8-NEXT: v_mul_lo_u32 v3, v3, 7
; GFX8-NEXT: v_sub_u32_e32 v2, vcc, v2, v3
-; GFX8-NEXT: v_subrev_u32_e32 v3, vcc, 7, v2
+; GFX8-NEXT: v_add_u32_e32 v3, vcc, -7, v2
; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 7, v2
; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
-; GFX8-NEXT: v_subrev_u32_e32 v3, vcc, 7, v2
+; GFX8-NEXT: v_add_u32_e32 v3, vcc, -7, v2
; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 7, v2
; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
; GFX8-NEXT: v_sub_u16_e32 v3, 6, v2
@@ -246,10 +246,10 @@ define i7 @v_fshr_i7(i7 %lhs, i7 %rhs, i7 %amt) {
; GFX9-NEXT: v_mul_hi_u32 v3, v2, v3
; GFX9-NEXT: v_mul_lo_u32 v3, v3, 7
; GFX9-NEXT: v_sub_u32_e32 v2, v2, v3
-; GFX9-NEXT: v_subrev_u32_e32 v3, 7, v2
+; GFX9-NEXT: v_add_u32_e32 v3, -7, v2
; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 7, v2
; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
-; GFX9-NEXT: v_subrev_u32_e32 v3, 7, v2
+; GFX9-NEXT: v_add_u32_e32 v3, -7, v2
; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 7, v2
; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
; GFX9-NEXT: v_sub_u16_e32 v3, 6, v2
@@ -276,10 +276,10 @@ define i7 @v_fshr_i7(i7 %lhs, i7 %rhs, i7 %amt) {
; GFX10-NEXT: v_mul_hi_u32 v3, v2, v3
; GFX10-NEXT: v_mul_lo_u32 v3, v3, 7
; GFX10-NEXT: v_sub_nc_u32_e32 v2, v2, v3
-; GFX10-NEXT: v_subrev_nc_u32_e32 v3, 7, v2
+; GFX10-NEXT: v_add_nc_u32_e32 v3, -7, v2
; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 7, v2
; GFX10-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc_lo
-; GFX10-NEXT: v_subrev_nc_u32_e32 v3, 7, v2
+; GFX10-NEXT: v_add_nc_u32_e32 v3, -7, v2
; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 7, v2
; GFX10-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc_lo
; GFX10-NEXT: v_sub_nc_u16 v3, 6, v2
@@ -312,11 +312,11 @@ define i7 @v_fshr_i7(i7 %lhs, i7 %rhs, i7 %amt) {
; GFX11-NEXT: v_mul_lo_u32 v3, v3, 7
; GFX11-NEXT: v_sub_nc_u32_e32 v2, v2, v3
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-NEXT: v_subrev_nc_u32_e32 v3, 7, v2
+; GFX11-NEXT: v_add_nc_u32_e32 v3, -7, v2
; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 7, v2
; GFX11-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc_lo
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-NEXT: v_subrev_nc_u32_e32 v3, 7, v2
+; GFX11-NEXT: v_add_nc_u32_e32 v3, -7, v2
; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 7, v2
; GFX11-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc_lo
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
@@ -1552,16 +1552,16 @@ define amdgpu_ps i24 @s_fshr_i24(i24 inreg %lhs, i24 inreg %rhs, i24 inreg %amt)
; GFX6-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0
; GFX6-NEXT: v_cvt_u32_f32_e32 v0, v0
; GFX6-NEXT: s_and_b32 s1, s1, 0xffffff
-; GFX6-NEXT: v_mul_lo_u32 v1, v0, v1
-; GFX6-NEXT: v_mul_hi_u32 v1, v0, v1
-; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v1
+; GFX6-NEXT: v_mul_lo_u32 v2, v0, v1
+; GFX6-NEXT: v_mul_hi_u32 v2, v0, v2
+; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v2
; GFX6-NEXT: v_mul_hi_u32 v0, s2, v0
; GFX6-NEXT: v_mul_lo_u32 v0, v0, 24
; GFX6-NEXT: v_sub_i32_e32 v0, vcc, s2, v0
-; GFX6-NEXT: v_subrev_i32_e32 v1, vcc, 24, v0
+; GFX6-NEXT: v_add_i32_e32 v2, vcc, v0, v1
; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v0
-; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
-; GFX6-NEXT: v_subrev_i32_e32 v1, vcc, 24, v0
+; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX6-NEXT: v_add_i32_e32 v1, vcc, v0, v1
; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v0
; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
; GFX6-NEXT: v_sub_i32_e32 v1, vcc, 23, v0
@@ -1583,16 +1583,16 @@ define amdgpu_ps i24 @s_fshr_i24(i24 inreg %lhs, i24 inreg %rhs, i24 inreg %amt)
; GFX8-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0
; GFX8-NEXT: v_cvt_u32_f32_e32 v0, v0
; GFX8-NEXT: s_and_b32 s1, s1, 0xffffff
-; GFX8-NEXT: v_mul_lo_u32 v1, v0, v1
-; GFX8-NEXT: v_mul_hi_u32 v1, v0, v1
-; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v1
+; GFX8-NEXT: v_mul_lo_u32 v2, v0, v1
+; GFX8-NEXT: v_mul_hi_u32 v2, v0, v2
+; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2
; GFX8-NEXT: v_mul_hi_u32 v0, s2, v0
; GFX8-NEXT: v_mul_lo_u32 v0, v0, 24
; GFX8-NEXT: v_sub_u32_e32 v0, vcc, s2, v0
-; GFX8-NEXT: v_subrev_u32_e32 v1, vcc, 24, v0
+; GFX8-NEXT: v_add_u32_e32 v2, vcc, v0, v1
; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v0
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
-; GFX8-NEXT: v_subrev_u32_e32 v1, vcc, 24, v0
+; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX8-NEXT: v_add_u32_e32 v1, vcc, v0, v1
; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v0
; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
; GFX8-NEXT: v_sub_u32_e32 v1, vcc, 23, v0
@@ -1620,10 +1620,10 @@ define amdgpu_ps i24 @s_fshr_i24(i24 inreg %lhs, i24 inreg %rhs, i24 inreg %amt)
; GFX9-NEXT: v_mul_hi_u32 v0, s2, v0
; GFX9-NEXT: v_mul_lo_u32 v0, v0, 24
; GFX9-NEXT: v_sub_u32_e32 v0, s2, v0
-; GFX9-NEXT: v_subrev_u32_e32 v1, 24, v0
+; GFX9-NEXT: v_add_u32_e32 v1, 0xffffffe8, v0
; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v0
; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
-; GFX9-NEXT: v_subrev_u32_e32 v1, 24, v0
+; GFX9-NEXT: v_add_u32_e32 v1, 0xffffffe8, v0
; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v0
; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
; GFX9-NEXT: v_sub_u32_e32 v1, 23, v0
@@ -1649,10 +1649,10 @@ define amdgpu_ps i24 @s_fshr_i24(i24 inreg %lhs, i24 inreg %rhs, i24 inreg %amt)
; GFX10-NEXT: v_mul_hi_u32 v0, s2, v0
; GFX10-NEXT: v_mul_lo_u32 v0, v0, 24
; GFX10-NEXT: v_sub_nc_u32_e32 v0, s2, v0
-; GFX10-NEXT: v_subrev_nc_u32_e32 v1, 24, v0
+; GFX10-NEXT: v_add_nc_u32_e32 v1, 0xffffffe8, v0
; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v0
; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
-; GFX10-NEXT: v_subrev_nc_u32_e32 v1, 24, v0
+; GFX10-NEXT: v_add_nc_u32_e32 v1, 0xffffffe8, v0
; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v0
; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
; GFX10-NEXT: v_sub_nc_u32_e32 v1, 23, v0
@@ -1684,11 +1684,11 @@ define amdgpu_ps i24 @s_fshr_i24(i24 inreg %lhs, i24 inreg %rhs, i24 inreg %amt)
; GFX11-NEXT: v_mul_lo_u32 v0, v0, 24
; GFX11-NEXT: v_sub_nc_u32_e32 v0, s2, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-NEXT: v_subrev_nc_u32_e32 v1, 24, v0
+; GFX11-NEXT: v_add_nc_u32_e32 v1, 0xffffffe8, v0
; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v0
; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-NEXT: v_subrev_nc_u32_e32 v1, 24, v0
+; GFX11-NEXT: v_add_nc_u32_e32 v1, 0xffffffe8, v0
; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v0
; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
@@ -1717,16 +1717,16 @@ define i24 @v_fshr_i24(i24 %lhs, i24 %rhs, i24 %amt) {
; GFX6-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3
; GFX6-NEXT: v_cvt_u32_f32_e32 v3, v3
; GFX6-NEXT: v_and_b32_e32 v1, 0xffffff, v1
-; GFX6-NEXT: v_mul_lo_u32 v4, v3, v4
-; GFX6-NEXT: v_mul_hi_u32 v4, v3, v4
-; GFX6-NEXT: v_add_i32_e32 v3, vcc, v3, v4
+; GFX6-NEXT: v_mul_lo_u32 v5, v3, v4
+; GFX6-NEXT: v_mul_hi_u32 v5, v3, v5
+; GFX6-NEXT: v_add_i32_e32 v3, vcc, v3, v5
; GFX6-NEXT: v_mul_hi_u32 v3, v2, v3
; GFX6-NEXT: v_mul_lo_u32 v3, v3, 24
; GFX6-NEXT: v_sub_i32_e32 v2, vcc, v2, v3
-; GFX6-NEXT: v_subrev_i32_e32 v3, vcc, 24, v2
+; GFX6-NEXT: v_add_i32_e32 v3, vcc, v2, v4
; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v2
; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
-; GFX6-NEXT: v_subrev_i32_e32 v3, vcc, 24, v2
+; GFX6-NEXT: v_add_i32_e32 v3, vcc, 0xffffffe8, v2
; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v2
; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
; GFX6-NEXT: v_sub_i32_e32 v3, vcc, 23, v2
@@ -1748,16 +1748,16 @@ define i24 @v_fshr_i24(i24 %lhs, i24 %rhs, i24 %amt) {
; GFX8-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3
; GFX8-NEXT: v_cvt_u32_f32_e32 v3, v3
; GFX8-NEXT: v_and_b32_e32 v1, 0xffffff, v1
-; GFX8-NEXT: v_mul_lo_u32 v4, v3, v4
-; GFX8-NEXT: v_mul_hi_u32 v4, v3, v4
-; GFX8-NEXT: v_add_u32_e32 v3, vcc, v3, v4
+; GFX8-NEXT: v_mul_lo_u32 v5, v3, v4
+; GFX8-NEXT: v_mul_hi_u32 v5, v3, v5
+; GFX8-NEXT: v_add_u32_e32 v3, vcc, v3, v5
; GFX8-NEXT: v_mul_hi_u32 v3, v2, v3
; GFX8-NEXT: v_mul_lo_u32 v3, v3, 24
; GFX8-NEXT: v_sub_u32_e32 v2, vcc, v2, v3
-; GFX8-NEXT: v_subrev_u32_e32 v3, vcc, 24, v2
+; GFX8-NEXT: v_add_u32_e32 v3, vcc, v2, v4
; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v2
; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
-; GFX8-NEXT: v_subrev_u32_e32 v3, vcc, 24, v2
+; GFX8-NEXT: v_add_u32_e32 v3, vcc, 0xffffffe8, v2
; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v2
; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
; GFX8-NEXT: v_sub_u32_e32 v3, vcc, 23, v2
@@ -1785,10 +1785,10 @@ define i24 @v_fshr_i24(i24 %lhs, i24 %rhs, i24 %amt) {
; GFX9-NEXT: v_mul_hi_u32 v3, v2, v3
; GFX9-NEXT: v_mul_lo_u32 v3, v3, 24
; GFX9-NEXT: v_sub_u32_e32 v2, v2, v3
-; GFX9-NEXT: v_subrev_u32_e32 v3, 24, v2
+; GFX9-NEXT: v_add_u32_e32 v3, 0xffffffe8, v2
; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v2
; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
-; GFX9-NEXT: v_subrev_u32_e32 v3, 24, v2
+; GFX9-NEXT: v_add_u32_e32 v3, 0xffffffe8, v2
; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v2
; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
; GFX9-NEXT: v_sub_u32_e32 v3, 23, v2
@@ -1814,10 +1814,10 @@ define i24 @v_fshr_i24(i24 %lhs, i24 %rhs, i24 %amt) {
; GFX10-NEXT: v_mul_hi_u32 v3, v2, v3
; GFX10-NEXT: v_mul_lo_u32 v3, v3, 24
; GFX10-NEXT: v_sub_nc_u32_e32 v2, v2, v3
-; GFX10-NEXT: v_subrev_nc_u32_e32 v3, 24, v2
+; GFX10-NEXT: v_add_nc_u32_e32 v3, 0xffffffe8, v2
; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v2
; GFX10-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc_lo
-; GFX10-NEXT: v_subrev_nc_u32_e32 v3, 24, v2
+; GFX10-NEXT: v_add_nc_u32_e32 v3, 0xffffffe8, v2
; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v2
; GFX10-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc_lo
; GFX10-NEXT: v_sub_nc_u32_e32 v3, 23, v2
@@ -1849,11 +1849,11 @@ define i24 @v_fshr_i24(i24 %lhs, i24 %rhs, i24 %amt) {
; GFX11-NEXT: v_mul_lo_u32 v3, v3, 24
; GFX11-NEXT: v_sub_nc_u32_e32 v2, v2, v3
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-NEXT: v_subrev_nc_u32_e32 v3, 24, v2
+; GFX11-NEXT: v_add_nc_u32_e32 v3, 0xffffffe8, v2
; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v2
; GFX11-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc_lo
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-NEXT: v_subrev_nc_u32_e32 v3, 24, v2
+; GFX11-NEXT: v_add_nc_u32_e32 v3, 0xffffffe8, v2
; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v2
; GFX11-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc_lo
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
@@ -1888,7 +1888,7 @@ define amdgpu_ps i48 @s_fshr_v2i24(i48 inreg %lhs.arg, i48 inreg %rhs.arg, i48 i
; GFX6-NEXT: s_lshr_b32 s1, s2, 16
; GFX6-NEXT: s_lshr_b32 s7, s3, 8
; GFX6-NEXT: s_bfe_u32 s10, s2, 0x80008
-; GFX6-NEXT: v_mul_lo_u32 v3, v2, v3
+; GFX6-NEXT: v_mul_lo_u32 v4, v2, v3
; GFX6-NEXT: s_or_b32 s8, s8, s9
; GFX6-NEXT: s_and_b32 s9, s2, 0xff
; GFX6-NEXT: s_lshl_b32 s10, s10, 8
@@ -1908,7 +1908,7 @@ define amdgpu_ps i48 @s_fshr_v2i24(i48 inreg %lhs.arg, i48 inreg %rhs.arg, i48 i
; GFX6-NEXT: v_or_b32_e32 v1, s2, v1
; GFX6-NEXT: s_lshr_b32 s2, s4, 16
; GFX6-NEXT: s_bfe_u32 s9, s4, 0x80008
-; GFX6-NEXT: v_mul_hi_u32 v3, v2, v3
+; GFX6-NEXT: v_mul_hi_u32 v4, v2, v4
; GFX6-NEXT: s_and_b32 s7, s4, 0xff
; GFX6-NEXT: s_lshl_b32 s9, s9, 8
; GFX6-NEXT: s_and_b32 s2, s2, 0xff
@@ -1917,62 +1917,62 @@ define amdgpu_ps i48 @s_fshr_v2i24(i48 inreg %lhs.arg, i48 inreg %rhs.arg, i48 i
; GFX6-NEXT: s_and_b32 s7, 0xffff, s7
; GFX6-NEXT: s_lshl_b32 s2, s2, 16
; GFX6-NEXT: s_or_b32 s2, s7, s2
-; GFX6-NEXT: v_add_i32_e32 v2, vcc, v2, v3
-; GFX6-NEXT: v_mul_hi_u32 v3, s2, v2
+; GFX6-NEXT: v_add_i32_e32 v2, vcc, v2, v4
+; GFX6-NEXT: v_mul_hi_u32 v4, s2, v2
; GFX6-NEXT: s_lshr_b32 s3, s5, 8
; GFX6-NEXT: s_and_b32 s5, s5, 0xff
-; GFX6-NEXT: v_mov_b32_e32 v4, s4
+; GFX6-NEXT: v_mov_b32_e32 v5, s4
; GFX6-NEXT: s_and_b32 s3, s3, 0xff
-; GFX6-NEXT: v_alignbit_b32 v4, s5, v4, 24
+; GFX6-NEXT: v_alignbit_b32 v5, s5, v5, 24
; GFX6-NEXT: s_and_b32 s3, 0xffff, s3
-; GFX6-NEXT: v_and_b32_e32 v4, 0xffff, v4
-; GFX6-NEXT: v_mul_lo_u32 v3, v3, 24
+; GFX6-NEXT: v_and_b32_e32 v5, 0xffff, v5
+; GFX6-NEXT: v_mul_lo_u32 v4, v4, 24
; GFX6-NEXT: s_lshl_b32 s3, s3, 16
-; GFX6-NEXT: v_or_b32_e32 v4, s3, v4
-; GFX6-NEXT: v_mul_hi_u32 v2, v4, v2
-; GFX6-NEXT: v_sub_i32_e32 v3, vcc, s2, v3
-; GFX6-NEXT: v_subrev_i32_e32 v5, vcc, 24, v3
-; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v3
+; GFX6-NEXT: v_or_b32_e32 v5, s3, v5
+; GFX6-NEXT: v_mul_hi_u32 v2, v5, v2
+; GFX6-NEXT: v_sub_i32_e32 v4, vcc, s2, v4
+; GFX6-NEXT: v_add_i32_e32 v6, vcc, v4, v3
+; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v4
; GFX6-NEXT: v_mul_lo_u32 v2, v2, 24
-; GFX6-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc
-; GFX6-NEXT: v_subrev_i32_e32 v5, vcc, 24, v3
-; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v3
-; GFX6-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc
-; GFX6-NEXT: v_sub_i32_e32 v2, vcc, v4, v2
-; GFX6-NEXT: v_sub_i32_e32 v5, vcc, 23, v3
-; GFX6-NEXT: v_subrev_i32_e32 v4, vcc, 24, v2
+; GFX6-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc
+; GFX6-NEXT: v_add_i32_e32 v6, vcc, v4, v3
+; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v4
+; GFX6-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc
+; GFX6-NEXT: v_sub_i32_e32 v2, vcc, v5, v2
+; GFX6-NEXT: v_sub_i32_e32 v6, vcc, 23, v4
+; GFX6-NEXT: v_add_i32_e32 v5, vcc, v2, v3
; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v2
; GFX6-NEXT: s_and_b32 s6, s6, 0xff
-; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
+; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc
; GFX6-NEXT: s_and_b32 s8, 0xffff, s8
; GFX6-NEXT: s_and_b32 s6, 0xffff, s6
-; GFX6-NEXT: v_subrev_i32_e32 v4, vcc, 24, v2
+; GFX6-NEXT: v_add_i32_e32 v3, vcc, v2, v3
; GFX6-NEXT: s_lshl_b32 s2, s6, 17
; GFX6-NEXT: s_lshl_b32 s3, s8, 1
; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v2
; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX6-NEXT: s_and_b32 s0, 0xffff, s0
; GFX6-NEXT: s_or_b32 s2, s2, s3
-; GFX6-NEXT: v_and_b32_e32 v5, 0xffffff, v5
-; GFX6-NEXT: v_and_b32_e32 v3, 0xffffff, v3
-; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
-; GFX6-NEXT: v_lshl_b32_e32 v5, s2, v5
-; GFX6-NEXT: v_lshr_b32_e32 v3, s1, v3
-; GFX6-NEXT: v_sub_i32_e32 v4, vcc, 23, v2
+; GFX6-NEXT: v_and_b32_e32 v6, 0xffffff, v6
+; GFX6-NEXT: v_and_b32_e32 v4, 0xffffff, v4
+; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
+; GFX6-NEXT: v_lshl_b32_e32 v6, s2, v6
+; GFX6-NEXT: v_lshr_b32_e32 v4, s1, v4
+; GFX6-NEXT: v_sub_i32_e32 v3, vcc, 23, v2
; GFX6-NEXT: s_lshl_b32 s0, s0, 17
; GFX6-NEXT: v_lshlrev_b32_e32 v0, 1, v0
-; GFX6-NEXT: v_or_b32_e32 v3, v5, v3
+; GFX6-NEXT: v_or_b32_e32 v4, v6, v4
; GFX6-NEXT: v_or_b32_e32 v0, s0, v0
-; GFX6-NEXT: v_and_b32_e32 v4, 0xffffff, v4
+; GFX6-NEXT: v_and_b32_e32 v3, 0xffffff, v3
; GFX6-NEXT: v_and_b32_e32 v2, 0xffffff, v2
-; GFX6-NEXT: v_lshlrev_b32_e32 v0, v4, v0
+; GFX6-NEXT: v_lshlrev_b32_e32 v0, v3, v0
; GFX6-NEXT: v_lshrrev_b32_e32 v1, v2, v1
-; GFX6-NEXT: v_bfe_u32 v2, v3, 8, 8
+; GFX6-NEXT: v_bfe_u32 v2, v4, 8, 8
; GFX6-NEXT: v_or_b32_e32 v0, v0, v1
-; GFX6-NEXT: v_and_b32_e32 v1, 0xff, v3
+; GFX6-NEXT: v_and_b32_e32 v1, 0xff, v4
; GFX6-NEXT: v_lshlrev_b32_e32 v2, 8, v2
; GFX6-NEXT: v_or_b32_e32 v1, v1, v2
-; GFX6-NEXT: v_bfe_u32 v2, v3, 16, 8
+; GFX6-NEXT: v_bfe_u32 v2, v4, 16, 8
; GFX6-NEXT: v_lshlrev_b32_e32 v2, 16, v2
; GFX6-NEXT: v_or_b32_e32 v1, v1, v2
; GFX6-NEXT: v_and_b32_e32 v2, 0xff, v0
@@ -2024,7 +2024,7 @@ define amdgpu_ps i48 @s_fshr_v2i24(i48 inreg %lhs.arg, i48 inreg %rhs.arg, i48 i
; GFX8-NEXT: v_not_b32_e32 v1, 23
; GFX8-NEXT: s_or_b32 s3, s10, s3
; GFX8-NEXT: s_and_b32 s8, 0xffff, s8
-; GFX8-NEXT: v_mul_lo_u32 v1, v0, v1
+; GFX8-NEXT: v_mul_lo_u32 v2, v0, v1
; GFX8-NEXT: s_and_b32 s3, 0xffff, s3
; GFX8-NEXT: s_lshl_b32 s8, s8, 16
; GFX8-NEXT: s_or_b32 s3, s3, s8
@@ -2034,75 +2034,75 @@ define amdgpu_ps i48 @s_fshr_v2i24(i48 inreg %lhs.arg, i48 inreg %rhs.arg, i48 i
; GFX8-NEXT: s_lshr_b32 s10, s4, 24
; GFX8-NEXT: s_and_b32 s4, s4, 0xff
; GFX8-NEXT: s_lshl_b32 s8, s8, 8
-; GFX8-NEXT: v_mul_hi_u32 v1, v0, v1
+; GFX8-NEXT: v_mul_hi_u32 v2, v0, v2
; GFX8-NEXT: s_or_b32 s4, s4, s8
; GFX8-NEXT: s_and_b32 s8, s9, 0xff
; GFX8-NEXT: s_and_b32 s8, 0xffff, s8
; GFX8-NEXT: s_and_b32 s4, 0xffff, s4
; GFX8-NEXT: s_lshl_b32 s8, s8, 16
; GFX8-NEXT: s_or_b32 s4, s4, s8
-; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v1
-; GFX8-NEXT: v_mul_hi_u32 v1, s4, v0
+; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2
+; GFX8-NEXT: v_mul_hi_u32 v2, s4, v0
; GFX8-NEXT: s_lshr_b32 s11, s5, 8
; GFX8-NEXT: s_and_b32 s5, s5, 0xff
; GFX8-NEXT: s_lshl_b32 s5, s5, 8
-; GFX8-NEXT: v_mul_lo_u32 v1, v1, 24
+; GFX8-NEXT: v_mul_lo_u32 v2, v2, 24
; GFX8-NEXT: s_and_b32 s8, s11, 0xff
; GFX8-NEXT: s_or_b32 s5, s10, s5
; GFX8-NEXT: s_and_b32 s8, 0xffff, s8
; GFX8-NEXT: s_and_b32 s5, 0xffff, s5
; GFX8-NEXT: s_lshl_b32 s8, s8, 16
; GFX8-NEXT: s_or_b32 s5, s5, s8
-; GFX8-NEXT: v_sub_u32_e32 v1, vcc, s4, v1
-; GFX8-NEXT: v_subrev_u32_e32 v2, vcc, 24, v1
+; GFX8-NEXT: v_sub_u32_e32 v2, vcc, s4, v2
+; GFX8-NEXT: v_add_u32_e32 v3, vcc, v2, v1
; GFX8-NEXT: v_mul_hi_u32 v0, s5, v0
-; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v1
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
-; GFX8-NEXT: v_subrev_u32_e32 v2, vcc, 24, v1
-; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v1
+; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v2
+; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
+; GFX8-NEXT: v_add_u32_e32 v3, vcc, v2, v1
+; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v2
; GFX8-NEXT: v_mul_lo_u32 v0, v0, 24
; GFX8-NEXT: s_and_b32 s0, 0xffff, s0
; GFX8-NEXT: s_and_b32 s6, 0xffff, s6
-; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
-; GFX8-NEXT: v_sub_u32_e32 v2, vcc, 23, v1
+; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
+; GFX8-NEXT: v_sub_u32_e32 v3, vcc, 23, v2
; GFX8-NEXT: s_lshl_b32 s4, s6, 17
; GFX8-NEXT: s_lshl_b32 s0, s0, 1
; GFX8-NEXT: s_or_b32 s0, s4, s0
+; GFX8-NEXT: v_and_b32_e32 v3, 0xffffff, v3
; GFX8-NEXT: v_and_b32_e32 v2, 0xffffff, v2
-; GFX8-NEXT: v_and_b32_e32 v1, 0xffffff, v1
-; GFX8-NEXT: v_lshlrev_b32_e64 v2, v2, s0
-; GFX8-NEXT: v_lshrrev_b32_e64 v1, v1, s2
+; GFX8-NEXT: v_lshlrev_b32_e64 v3, v3, s0
+; GFX8-NEXT: v_lshrrev_b32_e64 v2, v2, s2
; GFX8-NEXT: v_sub_u32_e32 v0, vcc, s5, v0
-; GFX8-NEXT: v_or_b32_e32 v1, v2, v1
-; GFX8-NEXT: v_subrev_u32_e32 v2, vcc, 24, v0
+; GFX8-NEXT: v_or_b32_e32 v2, v3, v2
+; GFX8-NEXT: v_add_u32_e32 v3, vcc, v0, v1
; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v0
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
-; GFX8-NEXT: v_subrev_u32_e32 v2, vcc, 24, v0
+; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
+; GFX8-NEXT: v_add_u32_e32 v1, vcc, v0, v1
; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v0
; GFX8-NEXT: s_and_b32 s1, 0xffff, s1
; GFX8-NEXT: s_and_b32 s7, 0xffff, s7
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
-; GFX8-NEXT: v_sub_u32_e32 v2, vcc, 23, v0
+; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX8-NEXT: v_sub_u32_e32 v1, vcc, 23, v0
; GFX8-NEXT: s_lshl_b32 s0, s7, 17
; GFX8-NEXT: s_lshl_b32 s1, s1, 1
; GFX8-NEXT: s_or_b32 s0, s0, s1
-; GFX8-NEXT: v_and_b32_e32 v2, 0xffffff, v2
+; GFX8-NEXT: v_and_b32_e32 v1, 0xffffff, v1
; GFX8-NEXT: v_and_b32_e32 v0, 0xffffff, v0
-; GFX8-NEXT: v_lshlrev_b32_e64 v2, v2, s0
+; GFX8-NEXT: v_lshlrev_b32_e64 v1, v1, s0
; GFX8-NEXT: v_lshrrev_b32_e64 v0, v0, s3
-; GFX8-NEXT: v_or_b32_e32 v0, v2, v0
-; GFX8-NEXT: v_mov_b32_e32 v2, 8
-; GFX8-NEXT: v_lshlrev_b32_sdwa v3, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
+; GFX8-NEXT: v_or_b32_e32 v0, v1, v0
+; GFX8-NEXT: v_mov_b32_e32 v1, 8
+; GFX8-NEXT: v_lshlrev_b32_sdwa v3, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
; GFX8-NEXT: v_mov_b32_e32 v4, 16
-; GFX8-NEXT: v_or_b32_sdwa v3, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
-; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
-; GFX8-NEXT: v_or_b32_e32 v1, v3, v1
+; GFX8-NEXT: v_or_b32_sdwa v3, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
+; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
+; GFX8-NEXT: v_or_b32_e32 v2, v3, v2
; GFX8-NEXT: v_and_b32_e32 v3, 0xff, v0
; GFX8-NEXT: v_lshlrev_b32_e32 v3, 24, v3
-; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
-; GFX8-NEXT: v_or_b32_e32 v1, v1, v3
-; GFX8-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD
-; GFX8-NEXT: v_readfirstlane_b32 s0, v1
+; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
+; GFX8-NEXT: v_or_b32_e32 v2, v2, v3
+; GFX8-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD
+; GFX8-NEXT: v_readfirstlane_b32 s0, v2
; GFX8-NEXT: v_readfirstlane_b32 s1, v0
; GFX8-NEXT: ; return to shader part epilog
;
@@ -2175,11 +2175,11 @@ define amdgpu_ps i48 @s_fshr_v2i24(i48 inreg %lhs.arg, i48 inreg %rhs.arg, i48 i
; GFX9-NEXT: s_or_b32 s5, s5, s8
; GFX9-NEXT: v_mul_hi_u32 v0, s5, v0
; GFX9-NEXT: v_sub_u32_e32 v1, s4, v1
-; GFX9-NEXT: v_subrev_u32_e32 v2, 24, v1
+; GFX9-NEXT: v_add_u32_e32 v2, 0xffffffe8, v1
; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
; GFX9-NEXT: v_mul_lo_u32 v0, v0, 24
-; GFX9-NEXT: v_subrev_u32_e32 v2, 24, v1
+; GFX9-NEXT: v_add_u32_e32 v2, 0xffffffe8, v1
; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v1
; GFX9-NEXT: s_and_b32 s0, 0xffff, s0
; GFX9-NEXT: s_and_b32 s6, 0xffff, s6
@@ -2193,10 +2193,10 @@ define amdgpu_ps i48 @s_fshr_v2i24(i48 inreg %lhs.arg, i48 inreg %rhs.arg, i48 i
; GFX9-NEXT: v_lshrrev_b32_e64 v1, v1, s2
; GFX9-NEXT: v_sub_u32_e32 v0, s5, v0
; GFX9-NEXT: v_lshl_or_b32 v1, s0, v2, v1
-; GFX9-NEXT: v_subrev_u32_e32 v2, 24, v0
+; GFX9-NEXT: v_add_u32_e32 v2, 0xffffffe8, v0
; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v0
; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
-; GFX9-NEXT: v_subrev_u32_e32 v2, 24, v0
+; GFX9-NEXT: v_add_u32_e32 v2, 0xffffffe8, v0
; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v0
; GFX9-NEXT: s_and_b32 s1, 0xffff, s1
; GFX9-NEXT: s_and_b32 s7, 0xffff, s7
@@ -2294,23 +2294,23 @@ define amdgpu_ps i48 @s_fshr_v2i24(i48 inreg %lhs.arg, i48 inreg %rhs.arg, i48 i
; GFX10-NEXT: s_and_b32 s4, s11, 0xff
; GFX10-NEXT: s_lshl_b32 s0, s0, 1
; GFX10-NEXT: s_and_b32 s4, 0xffff, s4
-; GFX10-NEXT: v_subrev_nc_u32_e32 v2, 24, v1
+; GFX10-NEXT: v_add_nc_u32_e32 v2, 0xffffffe8, v1
; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v1
; GFX10-NEXT: s_lshl_b32 s4, s4, 16
; GFX10-NEXT: s_lshl_b32 s1, s1, 1
; GFX10-NEXT: s_or_b32 s2, s2, s4
; GFX10-NEXT: s_and_b32 s4, s13, 0xff
; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc_lo
-; GFX10-NEXT: v_subrev_nc_u32_e32 v2, 24, v0
+; GFX10-NEXT: v_add_nc_u32_e32 v2, 0xffffffe8, v0
; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v0
; GFX10-NEXT: s_and_b32 s4, 0xffff, s4
-; GFX10-NEXT: v_subrev_nc_u32_e32 v3, 24, v1
+; GFX10-NEXT: v_add_nc_u32_e32 v3, 0xffffffe8, v1
; GFX10-NEXT: s_lshl_b32 s4, s4, 16
; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo
; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v1
; GFX10-NEXT: s_or_b32 s3, s3, s4
; GFX10-NEXT: s_lshl_b32 s4, s7, 17
-; GFX10-NEXT: v_subrev_nc_u32_e32 v2, 24, v0
+; GFX10-NEXT: v_add_nc_u32_e32 v2, 0xffffffe8, v0
; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc_lo
; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v0
; GFX10-NEXT: s_or_b32 s0, s4, s0
@@ -2393,69 +2393,67 @@ define amdgpu_ps i48 @s_fshr_v2i24(i48 inreg %lhs.arg, i48 inreg %rhs.arg, i48 i
; GFX11-NEXT: s_and_b32 s0, s0, 0xff
; GFX11-NEXT: v_mul_hi_u32 v1, s4, v0
; GFX11-NEXT: v_mul_hi_u32 v0, s5, v0
+; GFX11-NEXT: s_lshr_b32 s13, s3, 8
+; GFX11-NEXT: s_and_b32 s3, s3, 0xff
; GFX11-NEXT: s_lshl_b32 s6, s6, 8
; GFX11-NEXT: s_and_b32 s7, s7, 0xff
+; GFX11-NEXT: s_lshl_b32 s3, s3, 8
+; GFX11-NEXT: s_and_b32 s13, s13, 0xff
+; GFX11-NEXT: v_mul_lo_u32 v1, v1, 24
+; GFX11-NEXT: v_mul_lo_u32 v0, v0, 24
; GFX11-NEXT: s_or_b32 s0, s0, s6
; GFX11-NEXT: s_and_b32 s7, 0xffff, s7
+; GFX11-NEXT: s_or_b32 s3, s12, s3
; GFX11-NEXT: s_and_b32 s0, 0xffff, s0
-; GFX11-NEXT: s_lshr_b32 s13, s3, 8
-; GFX11-NEXT: v_mul_lo_u32 v1, v1, 24
-; GFX11-NEXT: v_mul_lo_u32 v0, v0, 24
+; GFX11-NEXT: s_and_b32 s3, 0xffff, s3
; GFX11-NEXT: s_lshl_b32 s0, s0, 1
-; GFX11-NEXT: s_and_b32 s3, s3, 0xff
-; GFX11-NEXT: s_and_b32 s13, s13, 0xff
-; GFX11-NEXT: s_lshl_b32 s3, s3, 8
-; GFX11-NEXT: s_and_b32 s1, 0xffff, s1
-; GFX11-NEXT: s_or_b32 s3, s12, s3
; GFX11-NEXT: v_sub_nc_u32_e32 v1, s4, v1
; GFX11-NEXT: v_sub_nc_u32_e32 v0, s5, v0
; GFX11-NEXT: s_lshl_b32 s4, s10, 8
; GFX11-NEXT: s_and_b32 s10, 0xffff, s13
; GFX11-NEXT: s_or_b32 s2, s2, s4
-; GFX11-NEXT: v_subrev_nc_u32_e32 v2, 24, v1
+; GFX11-NEXT: v_add_nc_u32_e32 v2, 0xffffffe8, v1
; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v1
; GFX11-NEXT: s_lshl_b32 s4, s9, 16
; GFX11-NEXT: s_and_b32 s2, 0xffff, s2
-; GFX11-NEXT: s_and_b32 s3, 0xffff, s3
+; GFX11-NEXT: s_lshl_b32 s5, s10, 16
; GFX11-NEXT: s_or_b32 s2, s2, s4
-; GFX11-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc_lo
-; GFX11-NEXT: v_subrev_nc_u32_e32 v2, 24, v0
+; GFX11-NEXT: v_dual_cndmask_b32 v1, v1, v2 :: v_dual_add_nc_u32 v2, 0xffffffe8, v0
; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v0
; GFX11-NEXT: s_lshl_b32 s4, s7, 17
-; GFX11-NEXT: s_lshl_b32 s5, s10, 16
-; GFX11-NEXT: v_subrev_nc_u32_e32 v3, 24, v1
+; GFX11-NEXT: s_and_b32 s1, 0xffff, s1
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_4) | instid1(VALU_DEP_2)
+; GFX11-NEXT: v_add_nc_u32_e32 v3, 0xffffffe8, v1
; GFX11-NEXT: s_or_b32 s0, s4, s0
; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo
; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v1
; GFX11-NEXT: s_lshl_b32 s1, s1, 1
-; GFX11-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc_lo
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_3)
-; GFX11-NEXT: v_sub_nc_u32_e32 v3, 23, v1
-; GFX11-NEXT: v_and_b32_e32 v1, 0xffffff, v1
-; GFX11-NEXT: v_subrev_nc_u32_e32 v2, 24, v0
+; GFX11-NEXT: v_dual_cndmask_b32 v1, v1, v3 :: v_dual_add_nc_u32 v2, 0xffffffe8, v0
; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v0
-; GFX11-NEXT: v_lshrrev_b32_e64 v1, v1, s2
-; GFX11-NEXT: s_or_b32 s2, s3, s5
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11-NEXT: v_sub_nc_u32_e32 v3, 23, v1
+; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v2 :: v_dual_and_b32 v1, 0xffffff, v1
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-NEXT: v_and_b32_e32 v2, 0xffffff, v3
+; GFX11-NEXT: v_lshrrev_b32_e64 v1, v1, s2
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_3)
; GFX11-NEXT: v_sub_nc_u32_e32 v3, 23, v0
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
+; GFX11-NEXT: v_and_b32_e32 v0, 0xffffff, v0
+; GFX11-NEXT: s_or_b32 s2, s3, s5
; GFX11-NEXT: v_lshl_or_b32 v1, s0, v2, v1
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11-NEXT: v_and_b32_e32 v2, 0xffffff, v3
+; GFX11-NEXT: v_lshrrev_b32_e64 v0, v0, s2
; GFX11-NEXT: s_lshl_b32 s0, s8, 17
+; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX11-NEXT: s_or_b32 s0, s0, s1
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NEXT: v_and_b32_e32 v2, 0xffffff, v3
; GFX11-NEXT: v_bfe_u32 v3, v1, 16, 8
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
-; GFX11-NEXT: v_lshlrev_b32_e32 v3, 16, v3
-; GFX11-NEXT: v_and_b32_e32 v0, 0xffffff, v0
-; GFX11-NEXT: v_lshrrev_b32_e64 v0, v0, s2
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX11-NEXT: v_lshl_or_b32 v0, s0, v2, v0
; GFX11-NEXT: v_bfe_u32 v2, v1, 8, 8
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11-NEXT: v_lshlrev_b32_e32 v3, 16, v3
; GFX11-NEXT: v_and_b32_e32 v4, 0xff, v0
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-NEXT: v_lshlrev_b32_e32 v2, 8, v2
; GFX11-NEXT: v_lshlrev_b32_e32 v4, 24, v4
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
@@ -2491,32 +2489,32 @@ define <2 x i24> @v_fshr_v2i24(<2 x i24> %lhs, <2 x i24> %rhs, <2 x i24> %amt) {
; GFX6-NEXT: v_lshlrev_b32_e32 v0, 1, v0
; GFX6-NEXT: v_and_b32_e32 v2, 0xffffff, v2
; GFX6-NEXT: v_lshlrev_b32_e32 v1, 1, v1
-; GFX6-NEXT: v_mul_lo_u32 v7, v6, v7
+; GFX6-NEXT: v_mul_lo_u32 v8, v6, v7
; GFX6-NEXT: v_and_b32_e32 v3, 0xffffff, v3
-; GFX6-NEXT: v_mul_hi_u32 v7, v6, v7
-; GFX6-NEXT: v_add_i32_e32 v6, vcc, v6, v7
-; GFX6-NEXT: v_mul_hi_u32 v7, v4, v6
+; GFX6-NEXT: v_mul_hi_u32 v8, v6, v8
+; GFX6-NEXT: v_add_i32_e32 v6, vcc, v6, v8
+; GFX6-NEXT: v_mul_hi_u32 v8, v4, v6
; GFX6-NEXT: v_mul_hi_u32 v6, v5, v6
-; GFX6-NEXT: v_mul_lo_u32 v7, v7, 24
+; GFX6-NEXT: v_mul_lo_u32 v8, v8, 24
; GFX6-NEXT: v_mul_lo_u32 v6, v6, 24
-; GFX6-NEXT: v_sub_i32_e32 v4, vcc, v4, v7
-; GFX6-NEXT: v_subrev_i32_e32 v7, vcc, 24, v4
+; GFX6-NEXT: v_sub_i32_e32 v4, vcc, v4, v8
+; GFX6-NEXT: v_add_i32_e32 v8, vcc, v4, v7
; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v4
-; GFX6-NEXT: v_cndmask_b32_e32 v4, v4, v7, vcc
-; GFX6-NEXT: v_subrev_i32_e32 v7, vcc, 24, v4
+; GFX6-NEXT: v_cndmask_b32_e32 v4, v4, v8, vcc
+; GFX6-NEXT: v_add_i32_e32 v8, vcc, v4, v7
; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v4
-; GFX6-NEXT: v_cndmask_b32_e32 v4, v4, v7, vcc
-; GFX6-NEXT: v_sub_i32_e32 v7, vcc, 23, v4
-; GFX6-NEXT: v_and_b32_e32 v7, 0xffffff, v7
+; GFX6-NEXT: v_cndmask_b32_e32 v4, v4, v8, vcc
+; GFX6-NEXT: v_sub_i32_e32 v8, vcc, 23, v4
+; GFX6-NEXT: v_and_b32_e32 v8, 0xffffff, v8
; GFX6-NEXT: v_and_b32_e32 v4, 0xffffff, v4
-; GFX6-NEXT: v_lshlrev_b32_e32 v0, v7, v0
+; GFX6-NEXT: v_lshlrev_b32_e32 v0, v8, v0
; GFX6-NEXT: v_lshrrev_b32_e32 v2, v4, v2
; GFX6-NEXT: v_or_b32_e32 v0, v0, v2
; GFX6-NEXT: v_sub_i32_e32 v2, vcc, v5, v6
-; GFX6-NEXT: v_subrev_i32_e32 v4, vcc, 24, v2
+; GFX6-NEXT: v_add_i32_e32 v4, vcc, v2, v7
; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v2
; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
-; GFX6-NEXT: v_subrev_i32_e32 v4, vcc, 24, v2
+; GFX6-NEXT: v_add_i32_e32 v4, vcc, 0xffffffe8, v2
; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v2
; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
; GFX6-NEXT: v_sub_i32_e32 v4, vcc, 23, v2
@@ -2540,32 +2538,32 @@ define <2 x i24> @v_fshr_v2i24(<2 x i24> %lhs, <2 x i24> %rhs, <2 x i24> %amt) {
; GFX8-NEXT: v_lshlrev_b32_e32 v0, 1, v0
; GFX8-NEXT: v_and_b32_e32 v2, 0xffffff, v2
; GFX8-NEXT: v_lshlrev_b32_e32 v1, 1, v1
-; GFX8-NEXT: v_mul_lo_u32 v7, v6, v7
+; GFX8-NEXT: v_mul_lo_u32 v8, v6, v7
; GFX8-NEXT: v_and_b32_e32 v3, 0xffffff, v3
-; GFX8-NEXT: v_mul_hi_u32 v7, v6, v7
-; GFX8-NEXT: v_add_u32_e32 v6, vcc, v6, v7
-; GFX8-NEXT: v_mul_hi_u32 v7, v4, v6
+; GFX8-NEXT: v_mul_hi_u32 v8, v6, v8
+; GFX8-NEXT: v_add_u32_e32 v6, vcc, v6, v8
+; GFX8-NEXT: v_mul_hi_u32 v8, v4, v6
; GFX8-NEXT: v_mul_hi_u32 v6, v5, v6
-; GFX8-NEXT: v_mul_lo_u32 v7, v7, 24
+; GFX8-NEXT: v_mul_lo_u32 v8, v8, 24
; GFX8-NEXT: v_mul_lo_u32 v6, v6, 24
-; GFX8-NEXT: v_sub_u32_e32 v4, vcc, v4, v7
-; GFX8-NEXT: v_subrev_u32_e32 v7, vcc, 24, v4
+; GFX8-NEXT: v_sub_u32_e32 v4, vcc, v4, v8
+; GFX8-NEXT: v_add_u32_e32 v8, vcc, v4, v7
; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v4
-; GFX8-NEXT: v_cndmask_b32_e32 v4, v4, v7, vcc
-; GFX8-NEXT: v_subrev_u32_e32 v7, vcc, 24, v4
+; GFX8-NEXT: v_cndmask_b32_e32 v4, v4, v8, vcc
+; GFX8-NEXT: v_add_u32_e32 v8, vcc, v4, v7
; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v4
-; GFX8-NEXT: v_cndmask_b32_e32 v4, v4, v7, vcc
-; GFX8-NEXT: v_sub_u32_e32 v7, vcc, 23, v4
-; GFX8-NEXT: v_and_b32_e32 v7, 0xffffff, v7
+; GFX8-NEXT: v_cndmask_b32_e32 v4, v4, v8, vcc
+; GFX8-NEXT: v_sub_u32_e32 v8, vcc, 23, v4
+; GFX8-NEXT: v_and_b32_e32 v8, 0xffffff, v8
; GFX8-NEXT: v_and_b32_e32 v4, 0xffffff, v4
-; GFX8-NEXT: v_lshlrev_b32_e32 v0, v7, v0
+; GFX8-NEXT: v_lshlrev_b32_e32 v0, v8, v0
; GFX8-NEXT: v_lshrrev_b32_e32 v2, v4, v2
; GFX8-NEXT: v_or_b32_e32 v0, v0, v2
; GFX8-NEXT: v_sub_u32_e32 v2, vcc, v5, v6
-; GFX8-NEXT: v_subrev_u32_e32 v4, vcc, 24, v2
+; GFX8-NEXT: v_add_u32_e32 v4, vcc, v2, v7
; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v2
; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
-; GFX8-NEXT: v_subrev_u32_e32 v4, vcc, 24, v2
+; GFX8-NEXT: v_add_u32_e32 v4, vcc, 0xffffffe8, v2
; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v2
; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
; GFX8-NEXT: v_sub_u32_e32 v4, vcc, 23, v2
@@ -2599,10 +2597,10 @@ define <2 x i24> @v_fshr_v2i24(<2 x i24> %lhs, <2 x i24> %rhs, <2 x i24> %amt) {
; GFX9-NEXT: v_mul_lo_u32 v6, v6, 24
; GFX9-NEXT: v_sub_u32_e32 v4, v4, v7
; GFX9-NEXT: v_sub_u32_e32 v5, v5, v6
-; GFX9-NEXT: v_subrev_u32_e32 v6, 24, v4
+; GFX9-NEXT: v_add_u32_e32 v6, 0xffffffe8, v4
; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v4
; GFX9-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc
-; GFX9-NEXT: v_subrev_u32_e32 v6, 24, v4
+; GFX9-NEXT: v_add_u32_e32 v6, 0xffffffe8, v4
; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v4
; GFX9-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc
; GFX9-NEXT: v_sub_u32_e32 v6, 23, v4
@@ -2610,10 +2608,10 @@ define <2 x i24> @v_fshr_v2i24(<2 x i24> %lhs, <2 x i24> %rhs, <2 x i24> %amt) {
; GFX9-NEXT: v_and_b32_e32 v6, 0xffffff, v6
; GFX9-NEXT: v_lshrrev_b32_e32 v2, v4, v2
; GFX9-NEXT: v_lshl_or_b32 v0, v0, v6, v2
-; GFX9-NEXT: v_subrev_u32_e32 v2, 24, v5
+; GFX9-NEXT: v_add_u32_e32 v2, 0xffffffe8, v5
; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v5
; GFX9-NEXT: v_cndmask_b32_e32 v2, v5, v2, vcc
-; GFX9-NEXT: v_subrev_u32_e32 v4, 24, v2
+; GFX9-NEXT: v_add_u32_e32 v4, 0xffffffe8, v2
; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v2
; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
; GFX9-NEXT: v_sub_u32_e32 v4, 23, v2
@@ -2645,15 +2643,15 @@ define <2 x i24> @v_fshr_v2i24(<2 x i24> %lhs, <2 x i24> %rhs, <2 x i24> %amt) {
; GFX10-NEXT: v_mul_lo_u32 v6, v6, 24
; GFX10-NEXT: v_sub_nc_u32_e32 v4, v4, v7
; GFX10-NEXT: v_sub_nc_u32_e32 v5, v5, v6
-; GFX10-NEXT: v_subrev_nc_u32_e32 v6, 24, v4
+; GFX10-NEXT: v_add_nc_u32_e32 v6, 0xffffffe8, v4
; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v4
-; GFX10-NEXT: v_subrev_nc_u32_e32 v7, 24, v5
+; GFX10-NEXT: v_add_nc_u32_e32 v7, 0xffffffe8, v5
; GFX10-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc_lo
; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v5
-; GFX10-NEXT: v_subrev_nc_u32_e32 v6, 24, v4
+; GFX10-NEXT: v_add_nc_u32_e32 v6, 0xffffffe8, v4
; GFX10-NEXT: v_cndmask_b32_e32 v5, v5, v7, vcc_lo
; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v4
-; GFX10-NEXT: v_subrev_nc_u32_e32 v7, 24, v5
+; GFX10-NEXT: v_add_nc_u32_e32 v7, 0xffffffe8, v5
; GFX10-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc_lo
; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v5
; GFX10-NEXT: v_sub_nc_u32_e32 v6, 23, v4
@@ -2675,12 +2673,11 @@ define <2 x i24> @v_fshr_v2i24(<2 x i24> %lhs, <2 x i24> %rhs, <2 x i24> %amt) {
; GFX11-NEXT: v_cvt_f32_ubyte0_e32 v6, 24
; GFX11-NEXT: v_and_b32_e32 v4, 0xffffff, v4
; GFX11-NEXT: v_and_b32_e32 v2, 0xffffff, v2
+; GFX11-NEXT: v_and_b32_e32 v3, 0xffffff, v3
; GFX11-NEXT: v_lshlrev_b32_e32 v0, 1, v0
-; GFX11-NEXT: v_lshlrev_b32_e32 v1, 1, v1
; GFX11-NEXT: v_rcp_iflag_f32_e32 v6, v6
-; GFX11-NEXT: v_and_b32_e32 v3, 0xffffff, v3
; GFX11-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6
+; GFX11-NEXT: v_dual_mul_f32 v6, 0x4f7ffffe, v6 :: v_dual_lshlrev_b32 v1, 1, v1
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_cvt_u32_f32_e32 v6, v6
; GFX11-NEXT: v_mul_lo_u32 v7, 0xffffffe8, v6
@@ -2697,34 +2694,33 @@ define <2 x i24> @v_fshr_v2i24(<2 x i24> %lhs, <2 x i24> %rhs, <2 x i24> %amt) {
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_mul_hi_u32 v6, v5, v6
; GFX11-NEXT: v_mul_lo_u32 v6, v6, 24
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_sub_nc_u32_e32 v5, v5, v6
-; GFX11-NEXT: v_subrev_nc_u32_e32 v6, 24, v4
-; GFX11-NEXT: v_subrev_nc_u32_e32 v7, 24, v5
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-NEXT: v_add_nc_u32_e32 v6, 0xffffffe8, v4
; GFX11-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc_lo
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v5
-; GFX11-NEXT: v_subrev_nc_u32_e32 v6, 24, v4
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-NEXT: v_add_nc_u32_e32 v6, 0xffffffe8, v4
+; GFX11-NEXT: v_add_nc_u32_e32 v7, 0xffffffe8, v5
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX11-NEXT: v_cndmask_b32_e32 v5, v5, v7, vcc_lo
; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v4
-; GFX11-NEXT: v_subrev_nc_u32_e32 v7, 24, v5
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc_lo
+; GFX11-NEXT: v_dual_cndmask_b32 v4, v4, v6 :: v_dual_add_nc_u32 v7, 0xffffffe8, v5
; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v5
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX11-NEXT: v_sub_nc_u32_e32 v6, 23, v4
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-NEXT: v_dual_cndmask_b32 v5, v5, v7 :: v_dual_and_b32 v4, 0xffffff, v4
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-NEXT: v_and_b32_e32 v6, 0xffffff, v6
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_4)
; GFX11-NEXT: v_sub_nc_u32_e32 v7, 23, v5
; GFX11-NEXT: v_and_b32_e32 v5, 0xffffff, v5
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX11-NEXT: v_lshrrev_b32_e32 v2, v4, v2
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX11-NEXT: v_and_b32_e32 v4, 0xffffff, v7
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX11-NEXT: v_lshrrev_b32_e32 v3, v5, v3
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-NEXT: v_lshl_or_b32 v0, v0, v6, v2
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX11-NEXT: v_lshl_or_b32 v1, v1, v4, v3
; GFX11-NEXT: s_setpc_b64 s[30:31]
%result = call <2 x i24> @llvm.fshr.v2i24(<2 x i24> %lhs, <2 x i24> %rhs, <2 x i24> %amt)
@@ -6087,13 +6083,14 @@ define i128 @v_fshr_i128(i128 %lhs, i128 %rhs, i128 %amt) {
; GFX6-NEXT: v_not_b32_e32 v0, v8
; GFX6-NEXT: v_and_b32_e32 v15, 0x7f, v0
; GFX6-NEXT: v_sub_i32_e32 v0, vcc, 64, v15
+; GFX6-NEXT: v_not_b32_e32 v16, 63
; GFX6-NEXT: v_lshr_b64 v[0:1], v[9:10], v0
; GFX6-NEXT: v_lshl_b64 v[11:12], v[2:3], v15
-; GFX6-NEXT: v_subrev_i32_e32 v16, vcc, 64, v15
+; GFX6-NEXT: v_add_i32_e32 v17, vcc, v15, v16
; GFX6-NEXT: v_lshl_b64 v[13:14], v[9:10], v15
; GFX6-NEXT: v_or_b32_e32 v11, v0, v11
; GFX6-NEXT: v_or_b32_e32 v12, v1, v12
-; GFX6-NEXT: v_lshl_b64 v[0:1], v[9:10], v16
+; GFX6-NEXT: v_lshl_b64 v[0:1], v[9:10], v17
; GFX6-NEXT: v_cmp_gt_u32_e32 vcc, 64, v15
; GFX6-NEXT: v_cndmask_b32_e32 v10, 0, v13, vcc
; GFX6-NEXT: v_cndmask_b32_e32 v13, 0, v14, vcc
@@ -6106,7 +6103,7 @@ define i128 @v_fshr_i128(i128 %lhs, i128 %rhs, i128 %amt) {
; GFX6-NEXT: v_sub_i32_e32 v2, vcc, 64, v14
; GFX6-NEXT: v_lshr_b64 v[0:1], v[4:5], v14
; GFX6-NEXT: v_lshl_b64 v[2:3], v[6:7], v2
-; GFX6-NEXT: v_subrev_i32_e32 v15, vcc, 64, v14
+; GFX6-NEXT: v_add_i32_e32 v15, vcc, v14, v16
; GFX6-NEXT: v_or_b32_e32 v2, v0, v2
; GFX6-NEXT: v_or_b32_e32 v3, v1, v3
; GFX6-NEXT: v_lshr_b64 v[0:1], v[6:7], v15
@@ -6135,13 +6132,14 @@ define i128 @v_fshr_i128(i128 %lhs, i128 %rhs, i128 %amt) {
; GFX8-NEXT: v_not_b32_e32 v0, v8
; GFX8-NEXT: v_and_b32_e32 v15, 0x7f, v0
; GFX8-NEXT: v_sub_u32_e32 v0, vcc, 64, v15
+; GFX8-NEXT: v_not_b32_e32 v16, 63
; GFX8-NEXT: v_lshrrev_b64 v[0:1], v0, v[9:10]
; GFX8-NEXT: v_lshlrev_b64 v[11:12], v15, v[2:3]
-; GFX8-NEXT: v_subrev_u32_e32 v16, vcc, 64, v15
+; GFX8-NEXT: v_add_u32_e32 v17, vcc, v15, v16
; GFX8-NEXT: v_lshlrev_b64 v[13:14], v15, v[9:10]
; GFX8-NEXT: v_or_b32_e32 v11, v0, v11
; GFX8-NEXT: v_or_b32_e32 v12, v1, v12
-; GFX8-NEXT: v_lshlrev_b64 v[0:1], v16, v[9:10]
+; GFX8-NEXT: v_lshlrev_b64 v[0:1], v17, v[9:10]
; GFX8-NEXT: v_cmp_gt_u32_e32 vcc, 64, v15
; GFX8-NEXT: v_cndmask_b32_e32 v10, 0, v13, vcc
; GFX8-NEXT: v_cndmask_b32_e32 v13, 0, v14, vcc
@@ -6154,7 +6152,7 @@ define i128 @v_fshr_i128(i128 %lhs, i128 %rhs, i128 %amt) {
; GFX8-NEXT: v_sub_u32_e32 v2, vcc, 64, v14
; GFX8-NEXT: v_lshrrev_b64 v[0:1], v14, v[4:5]
; GFX8-NEXT: v_lshlrev_b64 v[2:3], v2, v[6:7]
-; GFX8-NEXT: v_subrev_u32_e32 v15, vcc, 64, v14
+; GFX8-NEXT: v_add_u32_e32 v15, vcc, v14, v16
; GFX8-NEXT: v_or_b32_e32 v2, v0, v2
; GFX8-NEXT: v_or_b32_e32 v3, v1, v3
; GFX8-NEXT: v_lshrrev_b64 v[0:1], v15, v[6:7]
@@ -6185,7 +6183,7 @@ define i128 @v_fshr_i128(i128 %lhs, i128 %rhs, i128 %amt) {
; GFX9-NEXT: v_sub_u32_e32 v0, 64, v15
; GFX9-NEXT: v_lshrrev_b64 v[0:1], v0, v[9:10]
; GFX9-NEXT: v_lshlrev_b64 v[11:12], v15, v[2:3]
-; GFX9-NEXT: v_subrev_u32_e32 v16, 64, v15
+; GFX9-NEXT: v_add_u32_e32 v16, 0xffffffc0, v15
; GFX9-NEXT: v_lshlrev_b64 v[13:14], v15, v[9:10]
; GFX9-NEXT: v_or_b32_e32 v11, v0, v11
; GFX9-NEXT: v_or_b32_e32 v12, v1, v12
@@ -6202,7 +6200,7 @@ define i128 @v_fshr_i128(i128 %lhs, i128 %rhs, i128 %amt) {
; GFX9-NEXT: v_cndmask_b32_e32 v12, v1, v3, vcc
; GFX9-NEXT: v_lshrrev_b64 v[0:1], v14, v[4:5]
; GFX9-NEXT: v_lshlrev_b64 v[2:3], v2, v[6:7]
-; GFX9-NEXT: v_subrev_u32_e32 v15, 64, v14
+; GFX9-NEXT: v_add_u32_e32 v15, 0xffffffc0, v14
; GFX9-NEXT: v_or_b32_e32 v2, v0, v2
; GFX9-NEXT: v_or_b32_e32 v3, v1, v3
; GFX9-NEXT: v_lshrrev_b64 v[0:1], v15, v[6:7]
@@ -6232,9 +6230,9 @@ define i128 @v_fshr_i128(i128 %lhs, i128 %rhs, i128 %amt) {
; GFX10-NEXT: v_and_b32_e32 v18, 0x7f, v9
; GFX10-NEXT: v_or_b32_e32 v2, v2, v10
; GFX10-NEXT: v_sub_nc_u32_e32 v16, 64, v19
-; GFX10-NEXT: v_subrev_nc_u32_e32 v21, 64, v19
+; GFX10-NEXT: v_add_nc_u32_e32 v21, 0xffffffc0, v19
; GFX10-NEXT: v_sub_nc_u32_e32 v10, 64, v18
-; GFX10-NEXT: v_subrev_nc_u32_e32 v20, 64, v18
+; GFX10-NEXT: v_add_nc_u32_e32 v20, 0xffffffc0, v18
; GFX10-NEXT: v_lshlrev_b64 v[8:9], v18, v[2:3]
; GFX10-NEXT: v_lshrrev_b64 v[12:13], v19, v[4:5]
; GFX10-NEXT: v_lshlrev_b64 v[16:17], v16, v[6:7]
@@ -6273,47 +6271,48 @@ define i128 @v_fshr_i128(i128 %lhs, i128 %rhs, i128 %amt) {
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_not_b32_e32 v9, v8
+; GFX11-NEXT: v_lshlrev_b64 v[2:3], 1, v[2:3]
; GFX11-NEXT: v_lshrrev_b32_e32 v10, 31, v1
; GFX11-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1]
-; GFX11-NEXT: v_lshlrev_b64 v[2:3], 1, v[2:3]
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX11-NEXT: v_and_b32_e32 v18, 0x7f, v9
; GFX11-NEXT: v_or_b32_e32 v2, v2, v10
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-NEXT: v_sub_nc_u32_e32 v10, 64, v18
; GFX11-NEXT: v_lshlrev_b64 v[14:15], v18, v[0:1]
; GFX11-NEXT: v_cmp_gt_u32_e32 vcc_lo, 64, v18
; GFX11-NEXT: v_and_b32_e32 v19, 0x7f, v8
-; GFX11-NEXT: v_sub_nc_u32_e32 v10, 64, v18
-; GFX11-NEXT: v_subrev_nc_u32_e32 v20, 64, v18
+; GFX11-NEXT: v_add_nc_u32_e32 v20, 0xffffffc0, v18
; GFX11-NEXT: v_lshlrev_b64 v[8:9], v18, v[2:3]
+; GFX11-NEXT: v_lshrrev_b64 v[10:11], v10, v[0:1]
; GFX11-NEXT: v_cndmask_b32_e32 v14, 0, v14, vcc_lo
; GFX11-NEXT: v_sub_nc_u32_e32 v16, 64, v19
-; GFX11-NEXT: v_lshrrev_b64 v[10:11], v10, v[0:1]
-; GFX11-NEXT: v_subrev_nc_u32_e32 v21, 64, v19
-; GFX11-NEXT: v_lshrrev_b64 v[12:13], v19, v[4:5]
; GFX11-NEXT: v_lshlrev_b64 v[0:1], v20, v[0:1]
-; GFX11-NEXT: v_lshlrev_b64 v[16:17], v16, v[6:7]
+; GFX11-NEXT: v_lshrrev_b64 v[12:13], v19, v[4:5]
; GFX11-NEXT: v_cmp_gt_u32_e64 s0, 64, v19
; GFX11-NEXT: v_or_b32_e32 v10, v10, v8
+; GFX11-NEXT: v_add_nc_u32_e32 v21, 0xffffffc0, v19
+; GFX11-NEXT: v_lshlrev_b64 v[16:17], v16, v[6:7]
; GFX11-NEXT: v_or_b32_e32 v11, v11, v9
-; GFX11-NEXT: v_lshrrev_b64 v[8:9], v21, v[6:7]
; GFX11-NEXT: v_cmp_eq_u32_e64 s1, 0, v19
+; GFX11-NEXT: v_cndmask_b32_e32 v10, v0, v10, vcc_lo
+; GFX11-NEXT: v_lshrrev_b64 v[8:9], v21, v[6:7]
; GFX11-NEXT: v_or_b32_e32 v12, v12, v16
; GFX11-NEXT: v_or_b32_e32 v13, v13, v17
-; GFX11-NEXT: v_dual_cndmask_b32 v10, v0, v10 :: v_dual_cndmask_b32 v11, v1, v11
+; GFX11-NEXT: v_cndmask_b32_e32 v11, v1, v11, vcc_lo
; GFX11-NEXT: v_lshrrev_b64 v[0:1], v19, v[6:7]
-; GFX11-NEXT: v_cndmask_b32_e32 v7, 0, v15, vcc_lo
-; GFX11-NEXT: v_cndmask_b32_e64 v8, v8, v12, s0
; GFX11-NEXT: v_cmp_eq_u32_e64 s2, 0, v18
+; GFX11-NEXT: v_cndmask_b32_e64 v8, v8, v12, s0
; GFX11-NEXT: v_cndmask_b32_e64 v6, v9, v13, s0
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NEXT: v_cndmask_b32_e64 v4, v8, v4, s1
+; GFX11-NEXT: v_cndmask_b32_e32 v7, 0, v15, vcc_lo
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4)
; GFX11-NEXT: v_cndmask_b32_e64 v2, v10, v2, s2
; GFX11-NEXT: v_cndmask_b32_e64 v3, v11, v3, s2
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_3) | instid1(VALU_DEP_4)
+; GFX11-NEXT: v_cndmask_b32_e64 v4, v8, v4, s1
; GFX11-NEXT: v_cndmask_b32_e64 v5, v6, v5, s1
; GFX11-NEXT: v_cndmask_b32_e64 v6, 0, v0, s0
; GFX11-NEXT: v_cndmask_b32_e64 v8, 0, v1, s0
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX11-NEXT: v_or_b32_e32 v0, v14, v4
; GFX11-NEXT: v_or_b32_e32 v1, v7, v5
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
@@ -6335,46 +6334,47 @@ define amdgpu_ps <4 x float> @v_fshr_i128_ssv(i128 inreg %lhs, i128 inreg %rhs,
; GFX6-NEXT: v_and_b32_e32 v7, 0x7f, v1
; GFX6-NEXT: s_or_b64 s[0:1], s[2:3], s[0:1]
; GFX6-NEXT: v_sub_i32_e32 v1, vcc, 64, v7
+; GFX6-NEXT: v_not_b32_e32 v8, 63
; GFX6-NEXT: v_lshr_b64 v[1:2], s[8:9], v1
; GFX6-NEXT: v_lshl_b64 v[3:4], s[0:1], v7
-; GFX6-NEXT: v_subrev_i32_e32 v8, vcc, 64, v7
+; GFX6-NEXT: v_add_i32_e32 v9, vcc, v7, v8
; GFX6-NEXT: v_lshl_b64 v[5:6], s[8:9], v7
; GFX6-NEXT: v_or_b32_e32 v3, v1, v3
; GFX6-NEXT: v_or_b32_e32 v4, v2, v4
-; GFX6-NEXT: v_lshl_b64 v[1:2], s[8:9], v8
+; GFX6-NEXT: v_lshl_b64 v[1:2], s[8:9], v9
; GFX6-NEXT: v_cmp_gt_u32_e32 vcc, 64, v7
-; GFX6-NEXT: v_cndmask_b32_e32 v8, 0, v5, vcc
+; GFX6-NEXT: v_cndmask_b32_e32 v9, 0, v5, vcc
; GFX6-NEXT: v_cndmask_b32_e32 v6, 0, v6, vcc
; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
; GFX6-NEXT: v_mov_b32_e32 v3, s0
; GFX6-NEXT: v_mov_b32_e32 v4, s1
; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, 0, v7
-; GFX6-NEXT: v_and_b32_e32 v10, 0x7f, v0
+; GFX6-NEXT: v_and_b32_e32 v11, 0x7f, v0
; GFX6-NEXT: v_cndmask_b32_e32 v7, v1, v3, vcc
-; GFX6-NEXT: v_cndmask_b32_e32 v9, v2, v4, vcc
-; GFX6-NEXT: v_sub_i32_e32 v2, vcc, 64, v10
-; GFX6-NEXT: v_lshr_b64 v[0:1], s[4:5], v10
+; GFX6-NEXT: v_cndmask_b32_e32 v10, v2, v4, vcc
+; GFX6-NEXT: v_sub_i32_e32 v2, vcc, 64, v11
+; GFX6-NEXT: v_lshr_b64 v[0:1], s[4:5], v11
; GFX6-NEXT: v_lshl_b64 v[2:3], s[6:7], v2
-; GFX6-NEXT: v_subrev_i32_e32 v11, vcc, 64, v10
+; GFX6-NEXT: v_add_i32_e32 v8, vcc, v11, v8
; GFX6-NEXT: v_or_b32_e32 v2, v0, v2
; GFX6-NEXT: v_or_b32_e32 v3, v1, v3
-; GFX6-NEXT: v_lshr_b64 v[0:1], s[6:7], v11
-; GFX6-NEXT: v_lshr_b64 v[4:5], s[6:7], v10
-; GFX6-NEXT: v_cmp_gt_u32_e32 vcc, 64, v10
+; GFX6-NEXT: v_lshr_b64 v[0:1], s[6:7], v8
+; GFX6-NEXT: v_lshr_b64 v[4:5], s[6:7], v11
+; GFX6-NEXT: v_cmp_gt_u32_e32 vcc, 64, v11
; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
; GFX6-NEXT: v_mov_b32_e32 v2, s4
; GFX6-NEXT: v_mov_b32_e32 v3, s5
-; GFX6-NEXT: v_cmp_eq_u32_e64 s[0:1], 0, v10
+; GFX6-NEXT: v_cmp_eq_u32_e64 s[0:1], 0, v11
; GFX6-NEXT: v_cndmask_b32_e64 v0, v0, v2, s[0:1]
; GFX6-NEXT: v_cndmask_b32_e64 v1, v1, v3, s[0:1]
; GFX6-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc
; GFX6-NEXT: v_cndmask_b32_e32 v3, 0, v5, vcc
-; GFX6-NEXT: v_or_b32_e32 v0, v8, v0
+; GFX6-NEXT: v_or_b32_e32 v0, v9, v0
; GFX6-NEXT: v_or_b32_e32 v1, v6, v1
; GFX6-NEXT: v_or_b32_e32 v2, v7, v2
-; GFX6-NEXT: v_or_b32_e32 v3, v9, v3
+; GFX6-NEXT: v_or_b32_e32 v3, v10, v3
; GFX6-NEXT: ; return to shader part epilog
;
; GFX8-LABEL: v_fshr_i128_ssv:
@@ -6387,46 +6387,47 @@ define amdgpu_ps <4 x float> @v_fshr_i128_ssv(i128 inreg %lhs, i128 inreg %rhs,
; GFX8-NEXT: v_and_b32_e32 v7, 0x7f, v1
; GFX8-NEXT: s_or_b64 s[0:1], s[2:3], s[0:1]
; GFX8-NEXT: v_sub_u32_e32 v1, vcc, 64, v7
+; GFX8-NEXT: v_not_b32_e32 v8, 63
; GFX8-NEXT: v_lshrrev_b64 v[1:2], v1, s[8:9]
; GFX8-NEXT: v_lshlrev_b64 v[3:4], v7, s[0:1]
-; GFX8-NEXT: v_subrev_u32_e32 v8, vcc, 64, v7
+; GFX8-NEXT: v_add_u32_e32 v9, vcc, v7, v8
; GFX8-NEXT: v_lshlrev_b64 v[5:6], v7, s[8:9]
; GFX8-NEXT: v_or_b32_e32 v3, v1, v3
; GFX8-NEXT: v_or_b32_e32 v4, v2, v4
-; GFX8-NEXT: v_lshlrev_b64 v[1:2], v8, s[8:9]
+; GFX8-NEXT: v_lshlrev_b64 v[1:2], v9, s[8:9]
; GFX8-NEXT: v_cmp_gt_u32_e32 vcc, 64, v7
-; GFX8-NEXT: v_cndmask_b32_e32 v8, 0, v5, vcc
+; GFX8-NEXT: v_cndmask_b32_e32 v9, 0, v5, vcc
; GFX8-NEXT: v_cndmask_b32_e32 v6, 0, v6, vcc
; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
; GFX8-NEXT: v_mov_b32_e32 v3, s0
; GFX8-NEXT: v_mov_b32_e32 v4, s1
; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 0, v7
-; GFX8-NEXT: v_and_b32_e32 v10, 0x7f, v0
+; GFX8-NEXT: v_and_b32_e32 v11, 0x7f, v0
; GFX8-NEXT: v_cndmask_b32_e32 v7, v1, v3, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v9, v2, v4, vcc
-; GFX8-NEXT: v_sub_u32_e32 v2, vcc, 64, v10
-; GFX8-NEXT: v_lshrrev_b64 v[0:1], v10, s[4:5]
+; GFX8-NEXT: v_cndmask_b32_e32 v10, v2, v4, vcc
+; GFX8-NEXT: v_sub_u32_e32 v2, vcc, 64, v11
+; GFX8-NEXT: v_lshrrev_b64 v[0:1], v11, s[4:5]
; GFX8-NEXT: v_lshlrev_b64 v[2:3], v2, s[6:7]
-; GFX8-NEXT: v_subrev_u32_e32 v11, vcc, 64, v10
+; GFX8-NEXT: v_add_u32_e32 v8, vcc, v11, v8
; GFX8-NEXT: v_or_b32_e32 v2, v0, v2
; GFX8-NEXT: v_or_b32_e32 v3, v1, v3
-; GFX8-NEXT: v_lshrrev_b64 v[0:1], v11, s[6:7]
-; GFX8-NEXT: v_lshrrev_b64 v[4:5], v10, s[6:7]
-; GFX8-NEXT: v_cmp_gt_u32_e32 vcc, 64, v10
+; GFX8-NEXT: v_lshrrev_b64 v[0:1], v8, s[6:7]
+; GFX8-NEXT: v_lshrrev_b64 v[4:5], v11, s[6:7]
+; GFX8-NEXT: v_cmp_gt_u32_e32 vcc, 64, v11
; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
; GFX8-NEXT: v_mov_b32_e32 v2, s4
; GFX8-NEXT: v_mov_b32_e32 v3, s5
-; GFX8-NEXT: v_cmp_eq_u32_e64 s[0:1], 0, v10
+; GFX8-NEXT: v_cmp_eq_u32_e64 s[0:1], 0, v11
; GFX8-NEXT: v_cndmask_b32_e64 v0, v0, v2, s[0:1]
; GFX8-NEXT: v_cndmask_b32_e64 v1, v1, v3, s[0:1]
; GFX8-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc
; GFX8-NEXT: v_cndmask_b32_e32 v3, 0, v5, vcc
-; GFX8-NEXT: v_or_b32_e32 v0, v8, v0
+; GFX8-NEXT: v_or_b32_e32 v0, v9, v0
; GFX8-NEXT: v_or_b32_e32 v1, v6, v1
; GFX8-NEXT: v_or_b32_e32 v2, v7, v2
-; GFX8-NEXT: v_or_b32_e32 v3, v9, v3
+; GFX8-NEXT: v_or_b32_e32 v3, v10, v3
; GFX8-NEXT: ; return to shader part epilog
;
; GFX9-LABEL: v_fshr_i128_ssv:
@@ -6441,7 +6442,7 @@ define amdgpu_ps <4 x float> @v_fshr_i128_ssv(i128 inreg %lhs, i128 inreg %rhs,
; GFX9-NEXT: v_sub_u32_e32 v1, 64, v7
; GFX9-NEXT: v_lshrrev_b64 v[1:2], v1, s[8:9]
; GFX9-NEXT: v_lshlrev_b64 v[3:4], v7, s[0:1]
-; GFX9-NEXT: v_subrev_u32_e32 v8, 64, v7
+; GFX9-NEXT: v_add_u32_e32 v8, 0xffffffc0, v7
; GFX9-NEXT: v_lshlrev_b64 v[5:6], v7, s[8:9]
; GFX9-NEXT: v_or_b32_e32 v3, v1, v3
; GFX9-NEXT: v_or_b32_e32 v4, v2, v4
@@ -6460,7 +6461,7 @@ define amdgpu_ps <4 x float> @v_fshr_i128_ssv(i128 inreg %lhs, i128 inreg %rhs,
; GFX9-NEXT: v_cndmask_b32_e32 v7, v1, v3, vcc
; GFX9-NEXT: v_lshrrev_b64 v[0:1], v10, s[4:5]
; GFX9-NEXT: v_lshlrev_b64 v[2:3], v2, s[6:7]
-; GFX9-NEXT: v_subrev_u32_e32 v11, 64, v10
+; GFX9-NEXT: v_add_u32_e32 v11, 0xffffffc0, v10
; GFX9-NEXT: v_or_b32_e32 v2, v0, v2
; GFX9-NEXT: v_or_b32_e32 v3, v1, v3
; GFX9-NEXT: v_lshrrev_b64 v[0:1], v11, s[6:7]
@@ -6492,10 +6493,10 @@ define amdgpu_ps <4 x float> @v_fshr_i128_ssv(i128 inreg %lhs, i128 inreg %rhs,
; GFX10-NEXT: v_sub_nc_u32_e32 v8, 64, v13
; GFX10-NEXT: s_lshl_b64 s[0:1], s[0:1], 1
; GFX10-NEXT: s_or_b64 s[8:9], s[2:3], s[8:9]
-; GFX10-NEXT: v_subrev_nc_u32_e32 v14, 64, v13
+; GFX10-NEXT: v_add_nc_u32_e32 v14, 0xffffffc0, v13
; GFX10-NEXT: v_sub_nc_u32_e32 v2, 64, v12
; GFX10-NEXT: v_lshlrev_b64 v[0:1], v12, s[8:9]
-; GFX10-NEXT: v_subrev_nc_u32_e32 v10, 64, v12
+; GFX10-NEXT: v_add_nc_u32_e32 v10, 0xffffffc0, v12
; GFX10-NEXT: v_lshrrev_b64 v[4:5], v13, s[4:5]
; GFX10-NEXT: v_lshlrev_b64 v[8:9], v8, s[6:7]
; GFX10-NEXT: v_lshrrev_b64 v[2:3], v2, s[0:1]
@@ -6544,11 +6545,11 @@ define amdgpu_ps <4 x float> @v_fshr_i128_ssv(i128 inreg %lhs, i128 inreg %rhs,
; GFX11-NEXT: v_and_b32_e32 v13, 0x7f, v0
; GFX11-NEXT: v_sub_nc_u32_e32 v2, 64, v12
; GFX11-NEXT: v_lshlrev_b64 v[0:1], v12, s[8:9]
-; GFX11-NEXT: v_subrev_nc_u32_e32 v10, 64, v12
+; GFX11-NEXT: v_add_nc_u32_e32 v10, 0xffffffc0, v12
; GFX11-NEXT: v_cndmask_b32_e32 v6, 0, v6, vcc_lo
; GFX11-NEXT: v_sub_nc_u32_e32 v8, 64, v13
; GFX11-NEXT: v_lshrrev_b64 v[2:3], v2, s[0:1]
-; GFX11-NEXT: v_subrev_nc_u32_e32 v14, 64, v13
+; GFX11-NEXT: v_add_nc_u32_e32 v14, 0xffffffc0, v13
; GFX11-NEXT: v_lshrrev_b64 v[4:5], v13, s[4:5]
; GFX11-NEXT: v_lshlrev_b64 v[10:11], v10, s[0:1]
; GFX11-NEXT: v_lshlrev_b64 v[8:9], v8, s[6:7]
@@ -7718,13 +7719,14 @@ define <2 x i128> @v_fshr_v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %a
; GFX6-NEXT: v_not_b32_e32 v0, v16
; GFX6-NEXT: v_and_b32_e32 v19, 0x7f, v0
; GFX6-NEXT: v_sub_i32_e32 v0, vcc, 64, v19
+; GFX6-NEXT: v_not_b32_e32 v25, 63
; GFX6-NEXT: v_lshr_b64 v[0:1], v[17:18], v0
; GFX6-NEXT: v_lshl_b64 v[21:22], v[2:3], v19
-; GFX6-NEXT: v_subrev_i32_e32 v25, vcc, 64, v19
+; GFX6-NEXT: v_add_i32_e32 v26, vcc, v19, v25
; GFX6-NEXT: v_lshl_b64 v[23:24], v[17:18], v19
; GFX6-NEXT: v_or_b32_e32 v21, v0, v21
; GFX6-NEXT: v_or_b32_e32 v22, v1, v22
-; GFX6-NEXT: v_lshl_b64 v[0:1], v[17:18], v25
+; GFX6-NEXT: v_lshl_b64 v[0:1], v[17:18], v26
; GFX6-NEXT: v_cmp_gt_u32_e32 vcc, 64, v19
; GFX6-NEXT: v_cndmask_b32_e32 v18, 0, v23, vcc
; GFX6-NEXT: v_cndmask_b32_e32 v23, 0, v24, vcc
@@ -7737,7 +7739,7 @@ define <2 x i128> @v_fshr_v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %a
; GFX6-NEXT: v_sub_i32_e32 v2, vcc, 64, v22
; GFX6-NEXT: v_lshr_b64 v[0:1], v[8:9], v22
; GFX6-NEXT: v_lshl_b64 v[2:3], v[10:11], v2
-; GFX6-NEXT: v_subrev_i32_e32 v24, vcc, 64, v22
+; GFX6-NEXT: v_add_i32_e32 v24, vcc, v22, v25
; GFX6-NEXT: v_or_b32_e32 v2, v0, v2
; GFX6-NEXT: v_or_b32_e32 v3, v1, v3
; GFX6-NEXT: v_lshr_b64 v[0:1], v[10:11], v24
@@ -7761,7 +7763,7 @@ define <2 x i128> @v_fshr_v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %a
; GFX6-NEXT: v_lshr_b64 v[4:5], v[8:9], v4
; GFX6-NEXT: v_lshl_b64 v[10:11], v[6:7], v18
; GFX6-NEXT: v_or_b32_e32 v2, v19, v2
-; GFX6-NEXT: v_subrev_i32_e32 v19, vcc, 64, v18
+; GFX6-NEXT: v_add_i32_e32 v19, vcc, v18, v25
; GFX6-NEXT: v_lshl_b64 v[16:17], v[8:9], v18
; GFX6-NEXT: v_or_b32_e32 v10, v4, v10
; GFX6-NEXT: v_or_b32_e32 v11, v5, v11
@@ -7778,7 +7780,7 @@ define <2 x i128> @v_fshr_v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %a
; GFX6-NEXT: v_sub_i32_e32 v6, vcc, 64, v18
; GFX6-NEXT: v_lshr_b64 v[4:5], v[12:13], v18
; GFX6-NEXT: v_lshl_b64 v[6:7], v[14:15], v6
-; GFX6-NEXT: v_subrev_i32_e32 v19, vcc, 64, v18
+; GFX6-NEXT: v_add_i32_e32 v19, vcc, v18, v25
; GFX6-NEXT: v_or_b32_e32 v6, v4, v6
; GFX6-NEXT: v_or_b32_e32 v7, v5, v7
; GFX6-NEXT: v_lshr_b64 v[4:5], v[14:15], v19
@@ -7809,13 +7811,14 @@ define <2 x i128> @v_fshr_v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %a
; GFX8-NEXT: v_not_b32_e32 v0, v16
; GFX8-NEXT: v_and_b32_e32 v19, 0x7f, v0
; GFX8-NEXT: v_sub_u32_e32 v0, vcc, 64, v19
+; GFX8-NEXT: v_not_b32_e32 v25, 63
; GFX8-NEXT: v_lshrrev_b64 v[0:1], v0, v[17:18]
; GFX8-NEXT: v_lshlrev_b64 v[21:22], v19, v[2:3]
-; GFX8-NEXT: v_subrev_u32_e32 v25, vcc, 64, v19
+; GFX8-NEXT: v_add_u32_e32 v26, vcc, v19, v25
; GFX8-NEXT: v_lshlrev_b64 v[23:24], v19, v[17:18]
; GFX8-NEXT: v_or_b32_e32 v21, v0, v21
; GFX8-NEXT: v_or_b32_e32 v22, v1, v22
-; GFX8-NEXT: v_lshlrev_b64 v[0:1], v25, v[17:18]
+; GFX8-NEXT: v_lshlrev_b64 v[0:1], v26, v[17:18]
; GFX8-NEXT: v_cmp_gt_u32_e32 vcc, 64, v19
; GFX8-NEXT: v_cndmask_b32_e32 v18, 0, v23, vcc
; GFX8-NEXT: v_cndmask_b32_e32 v23, 0, v24, vcc
@@ -7828,7 +7831,7 @@ define <2 x i128> @v_fshr_v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %a
; GFX8-NEXT: v_sub_u32_e32 v2, vcc, 64, v22
; GFX8-NEXT: v_lshrrev_b64 v[0:1], v22, v[8:9]
; GFX8-NEXT: v_lshlrev_b64 v[2:3], v2, v[10:11]
-; GFX8-NEXT: v_subrev_u32_e32 v24, vcc, 64, v22
+; GFX8-NEXT: v_add_u32_e32 v24, vcc, v22, v25
; GFX8-NEXT: v_or_b32_e32 v2, v0, v2
; GFX8-NEXT: v_or_b32_e32 v3, v1, v3
; GFX8-NEXT: v_lshrrev_b64 v[0:1], v24, v[10:11]
@@ -7852,7 +7855,7 @@ define <2 x i128> @v_fshr_v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %a
; GFX8-NEXT: v_lshrrev_b64 v[4:5], v4, v[8:9]
; GFX8-NEXT: v_lshlrev_b64 v[10:11], v18, v[6:7]
; GFX8-NEXT: v_or_b32_e32 v2, v19, v2
-; GFX8-NEXT: v_subrev_u32_e32 v19, vcc, 64, v18
+; GFX8-NEXT: v_add_u32_e32 v19, vcc, v18, v25
; GFX8-NEXT: v_lshlrev_b64 v[16:17], v18, v[8:9]
; GFX8-NEXT: v_or_b32_e32 v10, v4, v10
; GFX8-NEXT: v_or_b32_e32 v11, v5, v11
@@ -7869,7 +7872,7 @@ define <2 x i128> @v_fshr_v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %a
; GFX8-NEXT: v_sub_u32_e32 v6, vcc, 64, v18
; GFX8-NEXT: v_lshrrev_b64 v[4:5], v18, v[12:13]
; GFX8-NEXT: v_lshlrev_b64 v[6:7], v6, v[14:15]
-; GFX8-NEXT: v_subrev_u32_e32 v19, vcc, 64, v18
+; GFX8-NEXT: v_add_u32_e32 v19, vcc, v18, v25
; GFX8-NEXT: v_or_b32_e32 v6, v4, v6
; GFX8-NEXT: v_or_b32_e32 v7, v5, v7
; GFX8-NEXT: v_lshrrev_b64 v[4:5], v19, v[14:15]
@@ -7902,7 +7905,7 @@ define <2 x i128> @v_fshr_v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %a
; GFX9-NEXT: v_sub_u32_e32 v0, 64, v19
; GFX9-NEXT: v_lshrrev_b64 v[0:1], v0, v[17:18]
; GFX9-NEXT: v_lshlrev_b64 v[21:22], v19, v[2:3]
-; GFX9-NEXT: v_subrev_u32_e32 v25, 64, v19
+; GFX9-NEXT: v_add_u32_e32 v25, 0xffffffc0, v19
; GFX9-NEXT: v_lshlrev_b64 v[23:24], v19, v[17:18]
; GFX9-NEXT: v_or_b32_e32 v21, v0, v21
; GFX9-NEXT: v_or_b32_e32 v22, v1, v22
@@ -7919,7 +7922,7 @@ define <2 x i128> @v_fshr_v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %a
; GFX9-NEXT: v_cndmask_b32_e32 v21, v1, v3, vcc
; GFX9-NEXT: v_lshrrev_b64 v[0:1], v22, v[8:9]
; GFX9-NEXT: v_lshlrev_b64 v[2:3], v2, v[10:11]
-; GFX9-NEXT: v_subrev_u32_e32 v24, 64, v22
+; GFX9-NEXT: v_add_u32_e32 v24, 0xffffffc0, v22
; GFX9-NEXT: v_or_b32_e32 v2, v0, v2
; GFX9-NEXT: v_or_b32_e32 v3, v1, v3
; GFX9-NEXT: v_lshrrev_b64 v[0:1], v24, v[10:11]
@@ -7942,7 +7945,7 @@ define <2 x i128> @v_fshr_v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %a
; GFX9-NEXT: v_lshrrev_b64 v[4:5], v4, v[8:9]
; GFX9-NEXT: v_lshlrev_b64 v[10:11], v18, v[6:7]
; GFX9-NEXT: v_or_b32_e32 v2, v19, v2
-; GFX9-NEXT: v_subrev_u32_e32 v19, 64, v18
+; GFX9-NEXT: v_add_u32_e32 v19, 0xffffffc0, v18
; GFX9-NEXT: v_cndmask_b32_e32 v3, 0, v17, vcc
; GFX9-NEXT: v_lshlrev_b64 v[16:17], v18, v[8:9]
; GFX9-NEXT: v_or_b32_e32 v10, v4, v10
@@ -7960,7 +7963,7 @@ define <2 x i128> @v_fshr_v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %a
; GFX9-NEXT: v_cndmask_b32_e32 v11, v5, v7, vcc
; GFX9-NEXT: v_lshrrev_b64 v[4:5], v18, v[12:13]
; GFX9-NEXT: v_lshlrev_b64 v[6:7], v6, v[14:15]
-; GFX9-NEXT: v_subrev_u32_e32 v19, 64, v18
+; GFX9-NEXT: v_add_u32_e32 v19, 0xffffffc0, v18
; GFX9-NEXT: v_or_b32_e32 v6, v4, v6
; GFX9-NEXT: v_or_b32_e32 v7, v5, v7
; GFX9-NEXT: v_lshrrev_b64 v[4:5], v19, v[14:15]
@@ -7991,11 +7994,11 @@ define <2 x i128> @v_fshr_v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %a
; GFX10-NEXT: v_and_b32_e32 v25, 0x7f, v17
; GFX10-NEXT: v_lshrrev_b32_e32 v17, 31, v1
; GFX10-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1]
-; GFX10-NEXT: v_subrev_nc_u32_e32 v27, 64, v26
+; GFX10-NEXT: v_add_nc_u32_e32 v27, 0xffffffc0, v26
; GFX10-NEXT: v_cmp_gt_u32_e64 s4, 64, v26
; GFX10-NEXT: v_sub_nc_u32_e32 v18, 64, v25
; GFX10-NEXT: v_or_b32_e32 v2, v2, v17
-; GFX10-NEXT: v_subrev_nc_u32_e32 v19, 64, v25
+; GFX10-NEXT: v_add_nc_u32_e32 v19, 0xffffffc0, v25
; GFX10-NEXT: v_lshlrev_b64 v[23:24], v25, v[0:1]
; GFX10-NEXT: v_cmp_gt_u32_e32 vcc_lo, 64, v25
; GFX10-NEXT: v_lshrrev_b64 v[17:18], v18, v[0:1]
@@ -8035,12 +8038,12 @@ define <2 x i128> @v_fshr_v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %a
; GFX10-NEXT: v_lshrrev_b64 v[8:9], v9, v[4:5]
; GFX10-NEXT: v_lshlrev_b64 v[10:11], v25, v[6:7]
; GFX10-NEXT: v_sub_nc_u32_e32 v20, 64, v23
-; GFX10-NEXT: v_subrev_nc_u32_e32 v3, 64, v25
+; GFX10-NEXT: v_add_nc_u32_e32 v3, 0xffffffc0, v25
; GFX10-NEXT: v_or_b32_e32 v2, v18, v2
; GFX10-NEXT: v_lshlrev_b64 v[16:17], v25, v[4:5]
; GFX10-NEXT: v_lshrrev_b64 v[18:19], v23, v[12:13]
; GFX10-NEXT: v_or_b32_e32 v10, v8, v10
-; GFX10-NEXT: v_subrev_nc_u32_e32 v8, 64, v23
+; GFX10-NEXT: v_add_nc_u32_e32 v8, 0xffffffc0, v23
; GFX10-NEXT: v_lshlrev_b64 v[20:21], v20, v[14:15]
; GFX10-NEXT: v_cmp_gt_u32_e32 vcc_lo, 64, v25
; GFX10-NEXT: v_lshlrev_b64 v[3:4], v3, v[4:5]
@@ -8091,41 +8094,41 @@ define <2 x i128> @v_fshr_v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %a
; GFX11-NEXT: v_cndmask_b32_e32 v24, 0, v24, vcc_lo
; GFX11-NEXT: v_sub_nc_u32_e32 v18, 64, v25
; GFX11-NEXT: v_lshlrev_b64 v[21:22], v25, v[2:3]
-; GFX11-NEXT: v_subrev_nc_u32_e32 v19, 64, v25
-; GFX11-NEXT: v_subrev_nc_u32_e32 v27, 64, v26
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX11-NEXT: v_cmp_gt_u32_e64 s0, 64, v26
; GFX11-NEXT: v_lshrrev_b64 v[17:18], v18, v[0:1]
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NEXT: v_lshlrev_b64 v[0:1], v19, v[0:1]
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_3)
; GFX11-NEXT: v_or_b32_e32 v22, v18, v22
-; GFX11-NEXT: v_sub_nc_u32_e32 v18, 64, v26
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4)
+; GFX11-NEXT: v_add_nc_u32_e32 v19, 0xffffffc0, v25
; GFX11-NEXT: v_or_b32_e32 v21, v17, v21
+; GFX11-NEXT: v_sub_nc_u32_e32 v18, 64, v26
; GFX11-NEXT: v_lshrrev_b64 v[16:17], v26, v[8:9]
-; GFX11-NEXT: v_cndmask_b32_e32 v22, v1, v22, vcc_lo
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11-NEXT: v_lshlrev_b64 v[0:1], v19, v[0:1]
; GFX11-NEXT: v_lshlrev_b64 v[18:19], v18, v[10:11]
-; GFX11-NEXT: v_cndmask_b32_e32 v21, v0, v21, vcc_lo
-; GFX11-NEXT: v_lshrrev_b64 v[0:1], v27, v[10:11]
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_4)
+; GFX11-NEXT: v_dual_cndmask_b32 v22, v1, v22 :: v_dual_cndmask_b32 v21, v0, v21
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v25
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-NEXT: v_add_nc_u32_e32 v27, 0xffffffc0, v26
; GFX11-NEXT: v_or_b32_e32 v16, v16, v18
; GFX11-NEXT: v_or_b32_e32 v17, v17, v19
+; GFX11-NEXT: v_cndmask_b32_e32 v22, v22, v3, vcc_lo
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT: v_lshrrev_b64 v[0:1], v27, v[10:11]
; GFX11-NEXT: v_cndmask_b32_e64 v0, v0, v16, s0
; GFX11-NEXT: v_not_b32_e32 v16, v20
; GFX11-NEXT: v_cndmask_b32_e32 v18, v21, v2, vcc_lo
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4)
-; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, v17, s0
-; GFX11-NEXT: v_cndmask_b32_e32 v22, v22, v3, vcc_lo
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v26
-; GFX11-NEXT: v_and_b32_e32 v25, 0x7f, v16
+; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, v17, s0
; GFX11-NEXT: v_lshrrev_b64 v[2:3], v26, v[10:11]
-; GFX11-NEXT: v_dual_cndmask_b32 v1, v1, v9 :: v_dual_cndmask_b32 v0, v0, v8
+; GFX11-NEXT: v_and_b32_e32 v25, 0x7f, v16
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_4)
+; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v8 :: v_dual_cndmask_b32 v1, v1, v9
; GFX11-NEXT: v_lshrrev_b32_e32 v8, 31, v5
; GFX11-NEXT: v_lshlrev_b64 v[4:5], 1, v[4:5]
; GFX11-NEXT: v_sub_nc_u32_e32 v9, 64, v25
; GFX11-NEXT: v_cndmask_b32_e64 v26, 0, v3, s0
-; GFX11-NEXT: v_subrev_nc_u32_e32 v3, 64, v25
+; GFX11-NEXT: v_add_nc_u32_e32 v3, 0xffffffc0, v25
; GFX11-NEXT: v_or_b32_e32 v6, v6, v8
; GFX11-NEXT: v_or_b32_e32 v0, v23, v0
; GFX11-NEXT: v_lshrrev_b64 v[8:9], v9, v[4:5]
@@ -8143,7 +8146,7 @@ define <2 x i128> @v_fshr_v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %a
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX11-NEXT: v_dual_cndmask_b32 v11, 0, v16 :: v_dual_cndmask_b32 v10, v3, v10
; GFX11-NEXT: v_sub_nc_u32_e32 v20, 64, v23
-; GFX11-NEXT: v_subrev_nc_u32_e32 v8, 64, v23
+; GFX11-NEXT: v_add_nc_u32_e32 v8, 0xffffffc0, v23
; GFX11-NEXT: v_lshrrev_b64 v[18:19], v23, v[12:13]
; GFX11-NEXT: v_cmp_gt_u32_e64 s0, 64, v23
; GFX11-NEXT: v_cndmask_b32_e32 v5, v4, v5, vcc_lo
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/lshr.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/lshr.ll
index 5dd4fa0809131f..cc185aff9eff22 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/lshr.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/lshr.ll
@@ -1577,7 +1577,7 @@ define i65 @v_lshr_i65(i65 %value, i65 %amount) {
; GFX6-NEXT: v_and_b32_e32 v4, 1, v2
; GFX6-NEXT: v_mov_b32_e32 v5, 0
; GFX6-NEXT: v_sub_i32_e32 v8, vcc, 64, v3
-; GFX6-NEXT: v_subrev_i32_e32 v2, vcc, 64, v3
+; GFX6-NEXT: v_add_i32_e32 v2, vcc, 0xffffffc0, v3
; GFX6-NEXT: v_lshr_b64 v[6:7], v[0:1], v3
; GFX6-NEXT: v_lshl_b64 v[8:9], v[4:5], v8
; GFX6-NEXT: v_lshr_b64 v[10:11], v[4:5], v3
@@ -1599,7 +1599,7 @@ define i65 @v_lshr_i65(i65 %value, i65 %amount) {
; GFX8-NEXT: v_and_b32_e32 v4, 1, v2
; GFX8-NEXT: v_mov_b32_e32 v5, 0
; GFX8-NEXT: v_sub_u32_e32 v8, vcc, 64, v3
-; GFX8-NEXT: v_subrev_u32_e32 v2, vcc, 64, v3
+; GFX8-NEXT: v_add_u32_e32 v2, vcc, 0xffffffc0, v3
; GFX8-NEXT: v_lshrrev_b64 v[6:7], v3, v[0:1]
; GFX8-NEXT: v_lshlrev_b64 v[8:9], v8, v[4:5]
; GFX8-NEXT: v_lshrrev_b64 v[10:11], v3, v[4:5]
@@ -1621,7 +1621,7 @@ define i65 @v_lshr_i65(i65 %value, i65 %amount) {
; GFX9-NEXT: v_and_b32_e32 v4, 1, v2
; GFX9-NEXT: v_mov_b32_e32 v5, 0
; GFX9-NEXT: v_sub_u32_e32 v8, 64, v3
-; GFX9-NEXT: v_subrev_u32_e32 v2, 64, v3
+; GFX9-NEXT: v_add_u32_e32 v2, 0xffffffc0, v3
; GFX9-NEXT: v_lshrrev_b64 v[6:7], v3, v[0:1]
; GFX9-NEXT: v_lshlrev_b64 v[8:9], v8, v[4:5]
; GFX9-NEXT: v_lshrrev_b64 v[10:11], v3, v[4:5]
@@ -1643,7 +1643,7 @@ define i65 @v_lshr_i65(i65 %value, i65 %amount) {
; GFX10-NEXT: v_mov_b32_e32 v5, 0
; GFX10-NEXT: v_and_b32_e32 v4, 1, v2
; GFX10-NEXT: v_sub_nc_u32_e32 v2, 64, v3
-; GFX10-NEXT: v_subrev_nc_u32_e32 v10, 64, v3
+; GFX10-NEXT: v_add_nc_u32_e32 v10, 0xffffffc0, v3
; GFX10-NEXT: v_lshrrev_b64 v[6:7], v3, v[0:1]
; GFX10-NEXT: v_cmp_gt_u32_e32 vcc_lo, 64, v3
; GFX10-NEXT: v_cmp_eq_u32_e64 s4, 0, v3
@@ -1664,20 +1664,20 @@ define i65 @v_lshr_i65(i65 %value, i65 %amount) {
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_dual_mov_b32 v5, 0 :: v_dual_and_b32 v4, 1, v2
; GFX11-NEXT: v_sub_nc_u32_e32 v2, 64, v3
-; GFX11-NEXT: v_subrev_nc_u32_e32 v10, 64, v3
; GFX11-NEXT: v_lshrrev_b64 v[6:7], v3, v[0:1]
; GFX11-NEXT: v_cmp_gt_u32_e32 vcc_lo, 64, v3
; GFX11-NEXT: v_cmp_eq_u32_e64 s0, 0, v3
; GFX11-NEXT: v_lshlrev_b64 v[8:9], v2, v[4:5]
-; GFX11-NEXT: v_lshrrev_b64 v[10:11], v10, v[4:5]
-; GFX11-NEXT: v_lshrrev_b64 v[4:5], v3, v[4:5]
; GFX11-NEXT: v_or_b32_e32 v2, v6, v8
; GFX11-NEXT: v_or_b32_e32 v6, v7, v9
-; GFX11-NEXT: v_cndmask_b32_e32 v2, v10, v2, vcc_lo
+; GFX11-NEXT: v_add_nc_u32_e32 v10, 0xffffffc0, v3
+; GFX11-NEXT: v_lshrrev_b64 v[10:11], v10, v[4:5]
+; GFX11-NEXT: v_lshrrev_b64 v[4:5], v3, v[4:5]
; GFX11-NEXT: v_cndmask_b32_e32 v5, v11, v6, vcc_lo
+; GFX11-NEXT: v_cndmask_b32_e32 v2, v10, v2, vcc_lo
+; GFX11-NEXT: v_cndmask_b32_e64 v1, v5, v1, s0
; GFX11-NEXT: v_cndmask_b32_e64 v0, v2, v0, s0
; GFX11-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc_lo
-; GFX11-NEXT: v_cndmask_b32_e64 v1, v5, v1, s0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%result = lshr i65 %value, %amount
ret i65 %result
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i32.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i32.ll
index 2c2f8e914447d1..88eb0e4b848c95 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i32.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i32.ll
@@ -325,7 +325,7 @@ define i32 @v_sdiv_i32_oddk_denom(i32 %num) {
; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v3
; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v4
; CHECK-NEXT: v_cndmask_b32_e64 v2, v2, v5, s[4:5]
-; CHECK-NEXT: v_subrev_i32_e32 v3, vcc, 0x12d8fb, v0
+; CHECK-NEXT: v_add_i32_e32 v3, vcc, 0xffed2705, v0
; CHECK-NEXT: v_cndmask_b32_e64 v0, v0, v3, s[4:5]
; CHECK-NEXT: v_add_i32_e32 v3, vcc, 1, v2
; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v4
@@ -353,29 +353,29 @@ define <2 x i32> @v_sdiv_v2i32_oddk_denom(<2 x i32> %num) {
; GISEL-NEXT: v_mul_f32_e32 v4, 0x4f7ffffe, v4
; GISEL-NEXT: v_xor_b32_e32 v1, v1, v6
; GISEL-NEXT: v_cvt_u32_f32_e32 v4, v4
-; GISEL-NEXT: v_mul_lo_u32 v5, v4, v5
-; GISEL-NEXT: v_mul_hi_u32 v5, v4, v5
-; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v5
-; GISEL-NEXT: v_mul_hi_u32 v5, v0, v4
+; GISEL-NEXT: v_mul_lo_u32 v7, v4, v5
+; GISEL-NEXT: v_mul_hi_u32 v7, v4, v7
+; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v7
+; GISEL-NEXT: v_mul_hi_u32 v7, v0, v4
; GISEL-NEXT: v_mul_hi_u32 v4, v1, v4
-; GISEL-NEXT: v_mul_lo_u32 v7, v5, v3
-; GISEL-NEXT: v_add_i32_e32 v8, vcc, 1, v5
-; GISEL-NEXT: v_mul_lo_u32 v9, v4, v3
-; GISEL-NEXT: v_add_i32_e32 v10, vcc, 1, v4
-; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v7
-; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v9
+; GISEL-NEXT: v_mul_lo_u32 v8, v7, v3
+; GISEL-NEXT: v_add_i32_e32 v9, vcc, 1, v7
+; GISEL-NEXT: v_mul_lo_u32 v10, v4, v3
+; GISEL-NEXT: v_add_i32_e32 v11, vcc, 1, v4
+; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v8
+; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v10
; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v3
-; GISEL-NEXT: v_cndmask_b32_e64 v5, v5, v8, s[4:5]
-; GISEL-NEXT: v_sub_i32_e32 v7, vcc, v0, v3
+; GISEL-NEXT: v_cndmask_b32_e64 v7, v7, v9, s[4:5]
+; GISEL-NEXT: v_add_i32_e32 v5, vcc, v0, v5
; GISEL-NEXT: v_cmp_ge_u32_e64 s[6:7], v1, v3
-; GISEL-NEXT: v_cndmask_b32_e64 v4, v4, v10, s[6:7]
-; GISEL-NEXT: v_subrev_i32_e32 v8, vcc, 0x12d8fb, v1
-; GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v7, s[4:5]
-; GISEL-NEXT: v_add_i32_e32 v7, vcc, 1, v5
+; GISEL-NEXT: v_cndmask_b32_e64 v4, v4, v11, s[6:7]
+; GISEL-NEXT: v_add_i32_e32 v8, vcc, 0xffed2705, v1
+; GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v5, s[4:5]
+; GISEL-NEXT: v_add_i32_e32 v5, vcc, 1, v7
; GISEL-NEXT: v_cndmask_b32_e64 v1, v1, v8, s[6:7]
; GISEL-NEXT: v_add_i32_e32 v8, vcc, 1, v4
; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v3
-; GISEL-NEXT: v_cndmask_b32_e32 v0, v5, v7, vcc
+; GISEL-NEXT: v_cndmask_b32_e32 v0, v7, v5, vcc
; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3
; GISEL-NEXT: v_cndmask_b32_e32 v1, v4, v8, vcc
; GISEL-NEXT: v_xor_b32_e32 v0, v0, v2
@@ -398,29 +398,29 @@ define <2 x i32> @v_sdiv_v2i32_oddk_denom(<2 x i32> %num) {
; CGP-NEXT: v_xor_b32_e32 v0, v0, v2
; CGP-NEXT: v_cvt_u32_f32_e32 v3, v3
; CGP-NEXT: v_xor_b32_e32 v1, v1, v6
-; CGP-NEXT: v_mul_lo_u32 v4, v3, v4
-; CGP-NEXT: v_mul_hi_u32 v4, v3, v4
-; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v4
-; CGP-NEXT: v_mul_hi_u32 v4, v0, v3
+; CGP-NEXT: v_mul_lo_u32 v7, v3, v4
+; CGP-NEXT: v_mul_hi_u32 v7, v3, v7
+; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v7
+; CGP-NEXT: v_mul_hi_u32 v7, v0, v3
; CGP-NEXT: v_mul_hi_u32 v3, v1, v3
-; CGP-NEXT: v_mul_lo_u32 v7, v4, v5
-; CGP-NEXT: v_add_i32_e32 v8, vcc, 1, v4
-; CGP-NEXT: v_mul_lo_u32 v9, v3, v5
-; CGP-NEXT: v_add_i32_e32 v10, vcc, 1, v3
-; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v7
-; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v9
+; CGP-NEXT: v_mul_lo_u32 v8, v7, v5
+; CGP-NEXT: v_add_i32_e32 v9, vcc, 1, v7
+; CGP-NEXT: v_mul_lo_u32 v10, v3, v5
+; CGP-NEXT: v_add_i32_e32 v11, vcc, 1, v3
+; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v8
+; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v10
; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v5
-; CGP-NEXT: v_cndmask_b32_e64 v4, v4, v8, s[4:5]
-; CGP-NEXT: v_sub_i32_e32 v7, vcc, v0, v5
+; CGP-NEXT: v_cndmask_b32_e64 v7, v7, v9, s[4:5]
+; CGP-NEXT: v_add_i32_e32 v4, vcc, v0, v4
; CGP-NEXT: v_cmp_ge_u32_e64 s[6:7], v1, v5
-; CGP-NEXT: v_cndmask_b32_e64 v3, v3, v10, s[6:7]
-; CGP-NEXT: v_subrev_i32_e32 v8, vcc, 0x12d8fb, v1
-; CGP-NEXT: v_cndmask_b32_e64 v0, v0, v7, s[4:5]
-; CGP-NEXT: v_add_i32_e32 v7, vcc, 1, v4
+; CGP-NEXT: v_cndmask_b32_e64 v3, v3, v11, s[6:7]
+; CGP-NEXT: v_add_i32_e32 v8, vcc, 0xffed2705, v1
+; CGP-NEXT: v_cndmask_b32_e64 v0, v0, v4, s[4:5]
+; CGP-NEXT: v_add_i32_e32 v4, vcc, 1, v7
; CGP-NEXT: v_cndmask_b32_e64 v1, v1, v8, s[6:7]
; CGP-NEXT: v_add_i32_e32 v8, vcc, 1, v3
; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v5
-; CGP-NEXT: v_cndmask_b32_e32 v0, v4, v7, vcc
+; CGP-NEXT: v_cndmask_b32_e32 v0, v7, v4, vcc
; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v5
; CGP-NEXT: v_cndmask_b32_e32 v1, v3, v8, vcc
; CGP-NEXT: v_xor_b32_e32 v0, v0, v2
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/shl.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/shl.ll
index 4cf1c92539c36f..b12e915c7d21b1 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/shl.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/shl.ll
@@ -1583,7 +1583,7 @@ define i65 @v_shl_i65(i65 %value, i65 %amount) {
; GFX6-NEXT: v_sub_i32_e32 v4, vcc, 64, v3
; GFX6-NEXT: v_lshr_b64 v[4:5], v[0:1], v4
; GFX6-NEXT: v_lshl_b64 v[5:6], v[2:3], v3
-; GFX6-NEXT: v_subrev_i32_e32 v8, vcc, 64, v3
+; GFX6-NEXT: v_add_i32_e32 v8, vcc, 0xffffffc0, v3
; GFX6-NEXT: v_lshl_b64 v[6:7], v[0:1], v3
; GFX6-NEXT: v_or_b32_e32 v9, v4, v5
; GFX6-NEXT: v_lshl_b64 v[4:5], v[0:1], v8
@@ -1601,7 +1601,7 @@ define i65 @v_shl_i65(i65 %value, i65 %amount) {
; GFX8-NEXT: v_sub_u32_e32 v4, vcc, 64, v3
; GFX8-NEXT: v_lshrrev_b64 v[4:5], v4, v[0:1]
; GFX8-NEXT: v_lshlrev_b64 v[5:6], v3, v[2:3]
-; GFX8-NEXT: v_subrev_u32_e32 v8, vcc, 64, v3
+; GFX8-NEXT: v_add_u32_e32 v8, vcc, 0xffffffc0, v3
; GFX8-NEXT: v_lshlrev_b64 v[6:7], v3, v[0:1]
; GFX8-NEXT: v_or_b32_e32 v9, v4, v5
; GFX8-NEXT: v_lshlrev_b64 v[4:5], v8, v[0:1]
@@ -1619,7 +1619,7 @@ define i65 @v_shl_i65(i65 %value, i65 %amount) {
; GFX9-NEXT: v_sub_u32_e32 v4, 64, v3
; GFX9-NEXT: v_lshrrev_b64 v[4:5], v4, v[0:1]
; GFX9-NEXT: v_lshlrev_b64 v[5:6], v3, v[2:3]
-; GFX9-NEXT: v_subrev_u32_e32 v8, 64, v3
+; GFX9-NEXT: v_add_u32_e32 v8, 0xffffffc0, v3
; GFX9-NEXT: v_lshlrev_b64 v[6:7], v3, v[0:1]
; GFX9-NEXT: v_or_b32_e32 v9, v4, v5
; GFX9-NEXT: v_lshlrev_b64 v[4:5], v8, v[0:1]
@@ -1636,7 +1636,7 @@ define i65 @v_shl_i65(i65 %value, i65 %amount) {
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: v_sub_nc_u32_e32 v6, 64, v3
; GFX10-NEXT: v_lshlrev_b64 v[4:5], v3, v[2:3]
-; GFX10-NEXT: v_subrev_nc_u32_e32 v8, 64, v3
+; GFX10-NEXT: v_add_nc_u32_e32 v8, 0xffffffc0, v3
; GFX10-NEXT: v_cmp_gt_u32_e32 vcc_lo, 64, v3
; GFX10-NEXT: v_lshrrev_b64 v[5:6], v6, v[0:1]
; GFX10-NEXT: v_lshlrev_b64 v[6:7], v3, v[0:1]
@@ -1654,7 +1654,7 @@ define i65 @v_shl_i65(i65 %value, i65 %amount) {
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_sub_nc_u32_e32 v6, 64, v3
; GFX11-NEXT: v_lshlrev_b64 v[4:5], v3, v[2:3]
-; GFX11-NEXT: v_subrev_nc_u32_e32 v8, 64, v3
+; GFX11-NEXT: v_add_nc_u32_e32 v8, 0xffffffc0, v3
; GFX11-NEXT: v_cmp_gt_u32_e32 vcc_lo, 64, v3
; GFX11-NEXT: v_lshrrev_b64 v[5:6], v6, v[0:1]
; GFX11-NEXT: v_lshlrev_b64 v[6:7], v3, v[0:1]
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i32.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i32.ll
index 1bb606f36e48d2..2b12e4b973acb2 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i32.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i32.ll
@@ -268,10 +268,10 @@ define i32 @v_srem_i32_pow2k_denom(i32 %num) {
; CHECK-NEXT: v_mul_hi_u32 v2, v0, v2
; CHECK-NEXT: v_lshlrev_b32_e32 v2, 12, v2
; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v2
-; CHECK-NEXT: v_subrev_i32_e32 v2, vcc, 0x1000, v0
+; CHECK-NEXT: v_add_i32_e32 v2, vcc, 0xfffff000, v0
; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v4
; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
-; CHECK-NEXT: v_subrev_i32_e32 v2, vcc, 0x1000, v0
+; CHECK-NEXT: v_add_i32_e32 v2, vcc, 0xfffff000, v0
; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v4
; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
; CHECK-NEXT: v_xor_b32_e32 v0, v0, v1
@@ -297,23 +297,23 @@ define <2 x i32> @v_srem_v2i32_pow2k_denom(<2 x i32> %num) {
; GISEL-NEXT: v_mul_f32_e32 v4, 0x4f7ffffe, v4
; GISEL-NEXT: v_xor_b32_e32 v1, v1, v6
; GISEL-NEXT: v_cvt_u32_f32_e32 v4, v4
-; GISEL-NEXT: v_mul_lo_u32 v5, v4, v5
-; GISEL-NEXT: v_mul_hi_u32 v5, v4, v5
-; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v5
-; GISEL-NEXT: v_mul_hi_u32 v5, v0, v4
+; GISEL-NEXT: v_mul_lo_u32 v7, v4, v5
+; GISEL-NEXT: v_mul_hi_u32 v7, v4, v7
+; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v7
+; GISEL-NEXT: v_mul_hi_u32 v7, v0, v4
; GISEL-NEXT: v_mul_hi_u32 v4, v1, v4
-; GISEL-NEXT: v_lshlrev_b32_e32 v5, 12, v5
+; GISEL-NEXT: v_lshlrev_b32_e32 v7, 12, v7
; GISEL-NEXT: v_lshlrev_b32_e32 v4, 12, v4
-; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v5
+; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v7
; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v4
-; GISEL-NEXT: v_sub_i32_e32 v4, vcc, v0, v3
-; GISEL-NEXT: v_subrev_i32_e32 v5, vcc, 0x1000, v1
+; GISEL-NEXT: v_add_i32_e32 v4, vcc, v0, v5
+; GISEL-NEXT: v_add_i32_e32 v7, vcc, 0xfffff000, v1
; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v3
; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3
-; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc
-; GISEL-NEXT: v_sub_i32_e32 v4, vcc, v0, v3
-; GISEL-NEXT: v_subrev_i32_e32 v5, vcc, 0x1000, v1
+; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc
+; GISEL-NEXT: v_add_i32_e32 v4, vcc, v0, v5
+; GISEL-NEXT: v_add_i32_e32 v5, vcc, 0xfffff000, v1
; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v3
; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3
@@ -338,23 +338,23 @@ define <2 x i32> @v_srem_v2i32_pow2k_denom(<2 x i32> %num) {
; CGP-NEXT: v_xor_b32_e32 v0, v0, v2
; CGP-NEXT: v_cvt_u32_f32_e32 v3, v3
; CGP-NEXT: v_xor_b32_e32 v1, v1, v6
-; CGP-NEXT: v_mul_lo_u32 v4, v3, v4
-; CGP-NEXT: v_mul_hi_u32 v4, v3, v4
-; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v4
-; CGP-NEXT: v_mul_hi_u32 v4, v0, v3
+; CGP-NEXT: v_mul_lo_u32 v7, v3, v4
+; CGP-NEXT: v_mul_hi_u32 v7, v3, v7
+; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v7
+; CGP-NEXT: v_mul_hi_u32 v7, v0, v3
; CGP-NEXT: v_mul_hi_u32 v3, v1, v3
-; CGP-NEXT: v_lshlrev_b32_e32 v4, 12, v4
+; CGP-NEXT: v_lshlrev_b32_e32 v7, 12, v7
; CGP-NEXT: v_lshlrev_b32_e32 v3, 12, v3
-; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v4
+; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v7
; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v3
-; CGP-NEXT: v_sub_i32_e32 v3, vcc, v0, v5
-; CGP-NEXT: v_subrev_i32_e32 v4, vcc, 0x1000, v1
+; CGP-NEXT: v_add_i32_e32 v3, vcc, v0, v4
+; CGP-NEXT: v_add_i32_e32 v7, vcc, 0xfffff000, v1
; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v5
; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v5
-; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc
-; CGP-NEXT: v_sub_i32_e32 v3, vcc, v0, v5
-; CGP-NEXT: v_subrev_i32_e32 v4, vcc, 0x1000, v1
+; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc
+; CGP-NEXT: v_add_i32_e32 v3, vcc, v0, v4
+; CGP-NEXT: v_add_i32_e32 v4, vcc, 0xfffff000, v1
; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v5
; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v5
@@ -386,10 +386,10 @@ define i32 @v_srem_i32_oddk_denom(i32 %num) {
; CHECK-NEXT: v_mul_hi_u32 v2, v0, v2
; CHECK-NEXT: v_mul_lo_u32 v2, v2, v4
; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v2
-; CHECK-NEXT: v_subrev_i32_e32 v2, vcc, 0x12d8fb, v0
+; CHECK-NEXT: v_add_i32_e32 v2, vcc, 0xffed2705, v0
; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v4
; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
-; CHECK-NEXT: v_subrev_i32_e32 v2, vcc, 0x12d8fb, v0
+; CHECK-NEXT: v_add_i32_e32 v2, vcc, 0xffed2705, v0
; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v4
; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
; CHECK-NEXT: v_xor_b32_e32 v0, v0, v1
@@ -415,23 +415,23 @@ define <2 x i32> @v_srem_v2i32_oddk_denom(<2 x i32> %num) {
; GISEL-NEXT: v_mul_f32_e32 v4, 0x4f7ffffe, v4
; GISEL-NEXT: v_xor_b32_e32 v1, v1, v6
; GISEL-NEXT: v_cvt_u32_f32_e32 v4, v4
-; GISEL-NEXT: v_mul_lo_u32 v5, v4, v5
-; GISEL-NEXT: v_mul_hi_u32 v5, v4, v5
-; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v5
-; GISEL-NEXT: v_mul_hi_u32 v5, v0, v4
+; GISEL-NEXT: v_mul_lo_u32 v7, v4, v5
+; GISEL-NEXT: v_mul_hi_u32 v7, v4, v7
+; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v7
+; GISEL-NEXT: v_mul_hi_u32 v7, v0, v4
; GISEL-NEXT: v_mul_hi_u32 v4, v1, v4
-; GISEL-NEXT: v_mul_lo_u32 v5, v5, v3
+; GISEL-NEXT: v_mul_lo_u32 v7, v7, v3
; GISEL-NEXT: v_mul_lo_u32 v4, v4, v3
-; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v5
+; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v7
; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v4
-; GISEL-NEXT: v_sub_i32_e32 v4, vcc, v0, v3
-; GISEL-NEXT: v_subrev_i32_e32 v5, vcc, 0x12d8fb, v1
+; GISEL-NEXT: v_add_i32_e32 v4, vcc, v0, v5
+; GISEL-NEXT: v_add_i32_e32 v7, vcc, 0xffed2705, v1
; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v3
; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3
-; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc
-; GISEL-NEXT: v_sub_i32_e32 v4, vcc, v0, v3
-; GISEL-NEXT: v_subrev_i32_e32 v5, vcc, 0x12d8fb, v1
+; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc
+; GISEL-NEXT: v_add_i32_e32 v4, vcc, v0, v5
+; GISEL-NEXT: v_add_i32_e32 v5, vcc, 0xffed2705, v1
; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v3
; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3
@@ -456,23 +456,23 @@ define <2 x i32> @v_srem_v2i32_oddk_denom(<2 x i32> %num) {
; CGP-NEXT: v_xor_b32_e32 v0, v0, v2
; CGP-NEXT: v_cvt_u32_f32_e32 v3, v3
; CGP-NEXT: v_xor_b32_e32 v1, v1, v6
-; CGP-NEXT: v_mul_lo_u32 v4, v3, v4
-; CGP-NEXT: v_mul_hi_u32 v4, v3, v4
-; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v4
-; CGP-NEXT: v_mul_hi_u32 v4, v0, v3
+; CGP-NEXT: v_mul_lo_u32 v7, v3, v4
+; CGP-NEXT: v_mul_hi_u32 v7, v3, v7
+; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v7
+; CGP-NEXT: v_mul_hi_u32 v7, v0, v3
; CGP-NEXT: v_mul_hi_u32 v3, v1, v3
-; CGP-NEXT: v_mul_lo_u32 v4, v4, v5
+; CGP-NEXT: v_mul_lo_u32 v7, v7, v5
; CGP-NEXT: v_mul_lo_u32 v3, v3, v5
-; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v4
+; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v7
; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v3
-; CGP-NEXT: v_sub_i32_e32 v3, vcc, v0, v5
-; CGP-NEXT: v_subrev_i32_e32 v4, vcc, 0x12d8fb, v1
+; CGP-NEXT: v_add_i32_e32 v3, vcc, v0, v4
+; CGP-NEXT: v_add_i32_e32 v7, vcc, 0xffed2705, v1
; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v5
; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v5
-; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc
-; CGP-NEXT: v_sub_i32_e32 v3, vcc, v0, v5
-; CGP-NEXT: v_subrev_i32_e32 v4, vcc, 0x12d8fb, v1
+; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc
+; CGP-NEXT: v_add_i32_e32 v3, vcc, v0, v4
+; CGP-NEXT: v_add_i32_e32 v4, vcc, 0xffed2705, v1
; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v5
; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v5
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll
index 2572f8581f0edf..7214f4ab581d5d 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll
@@ -12,9 +12,9 @@ define i7 @v_ssubsat_i7(i7 %lhs, i7 %rhs) {
; GFX6-NEXT: v_lshlrev_b32_e32 v0, 25, v0
; GFX6-NEXT: v_max_i32_e32 v2, -1, v0
; GFX6-NEXT: v_lshlrev_b32_e32 v1, 25, v1
-; GFX6-NEXT: v_subrev_i32_e32 v2, vcc, 0x7fffffff, v2
+; GFX6-NEXT: v_add_i32_e32 v2, vcc, 0x80000001, v2
; GFX6-NEXT: v_min_i32_e32 v3, -1, v0
-; GFX6-NEXT: v_subrev_i32_e32 v3, vcc, 0x80000000, v3
+; GFX6-NEXT: v_add_i32_e32 v3, vcc, 0x80000000, v3
; GFX6-NEXT: v_max_i32_e32 v1, v2, v1
; GFX6-NEXT: v_min_i32_e32 v1, v1, v3
; GFX6-NEXT: v_sub_i32_e32 v0, vcc, v0, v1
@@ -27,9 +27,9 @@ define i7 @v_ssubsat_i7(i7 %lhs, i7 %rhs) {
; GFX8-NEXT: v_lshlrev_b16_e32 v0, 9, v0
; GFX8-NEXT: v_max_i16_e32 v2, -1, v0
; GFX8-NEXT: v_lshlrev_b16_e32 v1, 9, v1
-; GFX8-NEXT: v_subrev_u16_e32 v2, 0x7fff, v2
+; GFX8-NEXT: v_add_u16_e32 v2, 0x8001, v2
; GFX8-NEXT: v_min_i16_e32 v3, -1, v0
-; GFX8-NEXT: v_subrev_u16_e32 v3, 0x8000, v3
+; GFX8-NEXT: v_add_u16_e32 v3, 0x8000, v3
; GFX8-NEXT: v_max_i16_e32 v1, v2, v1
; GFX8-NEXT: v_min_i16_e32 v1, v1, v3
; GFX8-NEXT: v_sub_u16_e32 v0, v0, v1
@@ -63,9 +63,9 @@ define amdgpu_ps i7 @s_ssubsat_i7(i7 inreg %lhs, i7 inreg %rhs) {
; GFX6-NEXT: s_lshl_b32 s0, s0, 25
; GFX6-NEXT: s_max_i32 s2, s0, -1
; GFX6-NEXT: s_lshl_b32 s1, s1, 25
-; GFX6-NEXT: s_sub_i32 s2, s2, 0x7fffffff
+; GFX6-NEXT: s_add_i32 s2, s2, 0x80000001
; GFX6-NEXT: s_min_i32 s3, s0, -1
-; GFX6-NEXT: s_sub_i32 s3, s3, 0x80000000
+; GFX6-NEXT: s_add_i32 s3, s3, 0x80000000
; GFX6-NEXT: s_max_i32 s1, s2, s1
; GFX6-NEXT: s_min_i32 s1, s1, s3
; GFX6-NEXT: s_sub_i32 s0, s0, s1
@@ -79,11 +79,11 @@ define amdgpu_ps i7 @s_ssubsat_i7(i7 inreg %lhs, i7 inreg %rhs) {
; GFX8-NEXT: s_sext_i32_i16 s3, -1
; GFX8-NEXT: s_max_i32 s4, s2, s3
; GFX8-NEXT: s_lshl_b32 s1, s1, 9
-; GFX8-NEXT: s_sub_i32 s4, s4, 0x7fff
+; GFX8-NEXT: s_addk_i32 s4, 0x8001
; GFX8-NEXT: s_min_i32 s2, s2, s3
; GFX8-NEXT: s_sext_i32_i16 s3, s4
; GFX8-NEXT: s_sext_i32_i16 s1, s1
-; GFX8-NEXT: s_sub_i32 s2, s2, 0xffff8000
+; GFX8-NEXT: s_addk_i32 s2, 0x8000
; GFX8-NEXT: s_max_i32 s1, s3, s1
; GFX8-NEXT: s_sext_i32_i16 s1, s1
; GFX8-NEXT: s_sext_i32_i16 s2, s2
@@ -122,9 +122,9 @@ define i8 @v_ssubsat_i8(i8 %lhs, i8 %rhs) {
; GFX6-NEXT: v_lshlrev_b32_e32 v0, 24, v0
; GFX6-NEXT: v_max_i32_e32 v2, -1, v0
; GFX6-NEXT: v_lshlrev_b32_e32 v1, 24, v1
-; GFX6-NEXT: v_subrev_i32_e32 v2, vcc, 0x7fffffff, v2
+; GFX6-NEXT: v_add_i32_e32 v2, vcc, 0x80000001, v2
; GFX6-NEXT: v_min_i32_e32 v3, -1, v0
-; GFX6-NEXT: v_subrev_i32_e32 v3, vcc, 0x80000000, v3
+; GFX6-NEXT: v_add_i32_e32 v3, vcc, 0x80000000, v3
; GFX6-NEXT: v_max_i32_e32 v1, v2, v1
; GFX6-NEXT: v_min_i32_e32 v1, v1, v3
; GFX6-NEXT: v_sub_i32_e32 v0, vcc, v0, v1
@@ -137,9 +137,9 @@ define i8 @v_ssubsat_i8(i8 %lhs, i8 %rhs) {
; GFX8-NEXT: v_lshlrev_b16_e32 v0, 8, v0
; GFX8-NEXT: v_max_i16_e32 v2, -1, v0
; GFX8-NEXT: v_lshlrev_b16_e32 v1, 8, v1
-; GFX8-NEXT: v_subrev_u16_e32 v2, 0x7fff, v2
+; GFX8-NEXT: v_add_u16_e32 v2, 0x8001, v2
; GFX8-NEXT: v_min_i16_e32 v3, -1, v0
-; GFX8-NEXT: v_subrev_u16_e32 v3, 0x8000, v3
+; GFX8-NEXT: v_add_u16_e32 v3, 0x8000, v3
; GFX8-NEXT: v_max_i16_e32 v1, v2, v1
; GFX8-NEXT: v_min_i16_e32 v1, v1, v3
; GFX8-NEXT: v_sub_u16_e32 v0, v0, v1
@@ -173,9 +173,9 @@ define amdgpu_ps i8 @s_ssubsat_i8(i8 inreg %lhs, i8 inreg %rhs) {
; GFX6-NEXT: s_lshl_b32 s0, s0, 24
; GFX6-NEXT: s_max_i32 s2, s0, -1
; GFX6-NEXT: s_lshl_b32 s1, s1, 24
-; GFX6-NEXT: s_sub_i32 s2, s2, 0x7fffffff
+; GFX6-NEXT: s_add_i32 s2, s2, 0x80000001
; GFX6-NEXT: s_min_i32 s3, s0, -1
-; GFX6-NEXT: s_sub_i32 s3, s3, 0x80000000
+; GFX6-NEXT: s_add_i32 s3, s3, 0x80000000
; GFX6-NEXT: s_max_i32 s1, s2, s1
; GFX6-NEXT: s_min_i32 s1, s1, s3
; GFX6-NEXT: s_sub_i32 s0, s0, s1
@@ -189,11 +189,11 @@ define amdgpu_ps i8 @s_ssubsat_i8(i8 inreg %lhs, i8 inreg %rhs) {
; GFX8-NEXT: s_sext_i32_i16 s3, -1
; GFX8-NEXT: s_max_i32 s4, s2, s3
; GFX8-NEXT: s_lshl_b32 s1, s1, 8
-; GFX8-NEXT: s_sub_i32 s4, s4, 0x7fff
+; GFX8-NEXT: s_addk_i32 s4, 0x8001
; GFX8-NEXT: s_min_i32 s2, s2, s3
; GFX8-NEXT: s_sext_i32_i16 s3, s4
; GFX8-NEXT: s_sext_i32_i16 s1, s1
-; GFX8-NEXT: s_sub_i32 s2, s2, 0xffff8000
+; GFX8-NEXT: s_addk_i32 s2, 0x8000
; GFX8-NEXT: s_max_i32 s1, s3, s1
; GFX8-NEXT: s_sext_i32_i16 s1, s1
; GFX8-NEXT: s_sext_i32_i16 s2, s2
@@ -234,18 +234,19 @@ define i16 @v_ssubsat_v2i8(i16 %lhs.arg, i16 %rhs.arg) {
; GFX6-NEXT: v_max_i32_e32 v4, -1, v0
; GFX6-NEXT: v_lshrrev_b32_e32 v3, 8, v1
; GFX6-NEXT: v_lshlrev_b32_e32 v1, 24, v1
-; GFX6-NEXT: v_subrev_i32_e32 v4, vcc, 0x7fffffff, v4
+; GFX6-NEXT: v_add_i32_e32 v4, vcc, 0x80000001, v4
; GFX6-NEXT: v_min_i32_e32 v5, -1, v0
-; GFX6-NEXT: v_subrev_i32_e32 v5, vcc, 0x80000000, v5
+; GFX6-NEXT: v_bfrev_b32_e32 v6, 1
+; GFX6-NEXT: v_add_i32_e32 v5, vcc, v5, v6
; GFX6-NEXT: v_max_i32_e32 v1, v4, v1
; GFX6-NEXT: v_min_i32_e32 v1, v1, v5
; GFX6-NEXT: v_sub_i32_e32 v0, vcc, v0, v1
; GFX6-NEXT: v_lshlrev_b32_e32 v1, 24, v2
; GFX6-NEXT: v_lshlrev_b32_e32 v2, 24, v3
; GFX6-NEXT: v_max_i32_e32 v3, -1, v1
-; GFX6-NEXT: v_subrev_i32_e32 v3, vcc, 0x7fffffff, v3
+; GFX6-NEXT: v_add_i32_e32 v3, vcc, 0x80000001, v3
; GFX6-NEXT: v_min_i32_e32 v4, -1, v1
-; GFX6-NEXT: v_subrev_i32_e32 v4, vcc, 0x80000000, v4
+; GFX6-NEXT: v_add_i32_e32 v4, vcc, 0x80000000, v4
; GFX6-NEXT: v_max_i32_e32 v2, v3, v2
; GFX6-NEXT: v_min_i32_e32 v2, v2, v4
; GFX6-NEXT: v_sub_i32_e32 v1, vcc, v1, v2
@@ -266,16 +267,16 @@ define i16 @v_ssubsat_v2i8(i16 %lhs.arg, i16 %rhs.arg) {
; GFX8-NEXT: v_max_i16_e32 v4, -1, v0
; GFX8-NEXT: v_lshrrev_b32_sdwa v2, v2, v1 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
; GFX8-NEXT: v_lshlrev_b16_e32 v1, 8, v1
-; GFX8-NEXT: v_subrev_u16_e32 v4, 0x7fff, v4
+; GFX8-NEXT: v_add_u16_e32 v4, 0x8001, v4
; GFX8-NEXT: v_min_i16_e32 v5, -1, v0
-; GFX8-NEXT: v_subrev_u16_e32 v5, 0x8000, v5
+; GFX8-NEXT: v_add_u16_e32 v5, 0x8000, v5
; GFX8-NEXT: v_max_i16_e32 v1, v4, v1
; GFX8-NEXT: v_min_i16_e32 v1, v1, v5
; GFX8-NEXT: v_sub_u16_e32 v0, v0, v1
; GFX8-NEXT: v_max_i16_e32 v1, -1, v3
-; GFX8-NEXT: v_subrev_u16_e32 v1, 0x7fff, v1
+; GFX8-NEXT: v_add_u16_e32 v1, 0x8001, v1
; GFX8-NEXT: v_min_i16_e32 v4, -1, v3
-; GFX8-NEXT: v_subrev_u16_e32 v4, 0x8000, v4
+; GFX8-NEXT: v_add_u16_e32 v4, 0x8000, v4
; GFX8-NEXT: v_max_i16_e32 v1, v1, v2
; GFX8-NEXT: v_min_i16_e32 v1, v1, v4
; GFX8-NEXT: v_sub_u16_e32 v1, v3, v1
@@ -355,18 +356,18 @@ define amdgpu_ps i16 @s_ssubsat_v2i8(i16 inreg %lhs.arg, i16 inreg %rhs.arg) {
; GFX6-NEXT: s_max_i32 s4, s0, -1
; GFX6-NEXT: s_lshr_b32 s3, s1, 8
; GFX6-NEXT: s_lshl_b32 s1, s1, 24
-; GFX6-NEXT: s_sub_i32 s4, s4, 0x7fffffff
+; GFX6-NEXT: s_add_i32 s4, s4, 0x80000001
; GFX6-NEXT: s_min_i32 s5, s0, -1
-; GFX6-NEXT: s_sub_i32 s5, s5, 0x80000000
+; GFX6-NEXT: s_add_i32 s5, s5, 0x80000000
; GFX6-NEXT: s_max_i32 s1, s4, s1
; GFX6-NEXT: s_min_i32 s1, s1, s5
; GFX6-NEXT: s_sub_i32 s0, s0, s1
; GFX6-NEXT: s_lshl_b32 s1, s2, 24
; GFX6-NEXT: s_lshl_b32 s2, s3, 24
; GFX6-NEXT: s_max_i32 s3, s1, -1
-; GFX6-NEXT: s_sub_i32 s3, s3, 0x7fffffff
+; GFX6-NEXT: s_add_i32 s3, s3, 0x80000001
; GFX6-NEXT: s_min_i32 s4, s1, -1
-; GFX6-NEXT: s_sub_i32 s4, s4, 0x80000000
+; GFX6-NEXT: s_add_i32 s4, s4, 0x80000000
; GFX6-NEXT: s_max_i32 s2, s3, s2
; GFX6-NEXT: s_min_i32 s2, s2, s4
; GFX6-NEXT: s_sub_i32 s1, s1, s2
@@ -387,11 +388,11 @@ define amdgpu_ps i16 @s_ssubsat_v2i8(i16 inreg %lhs.arg, i16 inreg %rhs.arg) {
; GFX8-NEXT: s_max_i32 s6, s4, s5
; GFX8-NEXT: s_lshr_b32 s3, s1, 8
; GFX8-NEXT: s_lshl_b32 s1, s1, 8
-; GFX8-NEXT: s_sub_i32 s6, s6, 0x7fff
+; GFX8-NEXT: s_addk_i32 s6, 0x8001
; GFX8-NEXT: s_min_i32 s4, s4, s5
; GFX8-NEXT: s_sext_i32_i16 s6, s6
; GFX8-NEXT: s_sext_i32_i16 s1, s1
-; GFX8-NEXT: s_sub_i32 s4, s4, 0xffff8000
+; GFX8-NEXT: s_addk_i32 s4, 0x8000
; GFX8-NEXT: s_max_i32 s1, s6, s1
; GFX8-NEXT: s_sext_i32_i16 s1, s1
; GFX8-NEXT: s_sext_i32_i16 s4, s4
@@ -401,11 +402,11 @@ define amdgpu_ps i16 @s_ssubsat_v2i8(i16 inreg %lhs.arg, i16 inreg %rhs.arg) {
; GFX8-NEXT: s_lshl_b32 s2, s3, 8
; GFX8-NEXT: s_sext_i32_i16 s3, s1
; GFX8-NEXT: s_max_i32 s4, s3, s5
-; GFX8-NEXT: s_sub_i32 s4, s4, 0x7fff
+; GFX8-NEXT: s_addk_i32 s4, 0x8001
; GFX8-NEXT: s_min_i32 s3, s3, s5
; GFX8-NEXT: s_sext_i32_i16 s4, s4
; GFX8-NEXT: s_sext_i32_i16 s2, s2
-; GFX8-NEXT: s_sub_i32 s3, s3, 0xffff8000
+; GFX8-NEXT: s_addk_i32 s3, 0x8000
; GFX8-NEXT: s_max_i32 s2, s4, s2
; GFX8-NEXT: s_sext_i32_i16 s2, s2
; GFX8-NEXT: s_sext_i32_i16 s3, s3
@@ -509,29 +510,29 @@ define i32 @v_ssubsat_v4i8(i32 %lhs.arg, i32 %rhs.arg) {
; GFX6-NEXT: v_lshrrev_b32_e32 v6, 16, v1
; GFX6-NEXT: v_lshrrev_b32_e32 v7, 24, v1
; GFX6-NEXT: v_lshlrev_b32_e32 v1, 24, v1
-; GFX6-NEXT: v_subrev_i32_e32 v8, vcc, 0x7fffffff, v8
+; GFX6-NEXT: v_add_i32_e32 v8, vcc, 0x80000001, v8
; GFX6-NEXT: v_min_i32_e32 v10, -1, v0
; GFX6-NEXT: v_bfrev_b32_e32 v11, 1
-; GFX6-NEXT: v_sub_i32_e32 v10, vcc, v10, v11
+; GFX6-NEXT: v_add_i32_e32 v10, vcc, v10, v11
; GFX6-NEXT: v_max_i32_e32 v1, v8, v1
; GFX6-NEXT: v_min_i32_e32 v1, v1, v10
; GFX6-NEXT: v_sub_i32_e32 v0, vcc, v0, v1
; GFX6-NEXT: v_lshlrev_b32_e32 v1, 24, v2
-; GFX6-NEXT: v_bfrev_b32_e32 v9, -2
+; GFX6-NEXT: v_mov_b32_e32 v9, 0x80000001
; GFX6-NEXT: v_lshlrev_b32_e32 v2, 24, v5
; GFX6-NEXT: v_max_i32_e32 v5, -1, v1
-; GFX6-NEXT: v_sub_i32_e32 v5, vcc, v5, v9
+; GFX6-NEXT: v_add_i32_e32 v5, vcc, v5, v9
; GFX6-NEXT: v_min_i32_e32 v8, -1, v1
-; GFX6-NEXT: v_sub_i32_e32 v8, vcc, v8, v11
+; GFX6-NEXT: v_add_i32_e32 v8, vcc, v8, v11
; GFX6-NEXT: v_max_i32_e32 v2, v5, v2
; GFX6-NEXT: v_min_i32_e32 v2, v2, v8
; GFX6-NEXT: v_sub_i32_e32 v1, vcc, v1, v2
; GFX6-NEXT: v_lshlrev_b32_e32 v2, 24, v3
; GFX6-NEXT: v_max_i32_e32 v5, -1, v2
; GFX6-NEXT: v_lshlrev_b32_e32 v3, 24, v6
-; GFX6-NEXT: v_sub_i32_e32 v5, vcc, v5, v9
+; GFX6-NEXT: v_add_i32_e32 v5, vcc, v5, v9
; GFX6-NEXT: v_min_i32_e32 v6, -1, v2
-; GFX6-NEXT: v_sub_i32_e32 v6, vcc, v6, v11
+; GFX6-NEXT: v_add_i32_e32 v6, vcc, v6, v11
; GFX6-NEXT: v_max_i32_e32 v3, v5, v3
; GFX6-NEXT: v_min_i32_e32 v3, v3, v6
; GFX6-NEXT: v_sub_i32_e32 v2, vcc, v2, v3
@@ -539,10 +540,10 @@ define i32 @v_ssubsat_v4i8(i32 %lhs.arg, i32 %rhs.arg) {
; GFX6-NEXT: v_max_i32_e32 v5, -1, v3
; GFX6-NEXT: v_ashrrev_i32_e32 v1, 24, v1
; GFX6-NEXT: v_lshlrev_b32_e32 v4, 24, v7
-; GFX6-NEXT: v_sub_i32_e32 v5, vcc, v5, v9
+; GFX6-NEXT: v_add_i32_e32 v5, vcc, v5, v9
; GFX6-NEXT: v_min_i32_e32 v6, -1, v3
; GFX6-NEXT: v_ashrrev_i32_e32 v0, 24, v0
-; GFX6-NEXT: v_sub_i32_e32 v6, vcc, v6, v11
+; GFX6-NEXT: v_add_i32_e32 v6, vcc, v6, v11
; GFX6-NEXT: v_max_i32_e32 v4, v5, v4
; GFX6-NEXT: v_and_b32_e32 v1, 0xff, v1
; GFX6-NEXT: v_ashrrev_i32_e32 v2, 24, v2
@@ -573,34 +574,34 @@ define i32 @v_ssubsat_v4i8(i32 %lhs.arg, i32 %rhs.arg) {
; GFX8-NEXT: v_lshrrev_b32_e32 v6, 16, v1
; GFX8-NEXT: v_lshrrev_b32_e32 v7, 24, v1
; GFX8-NEXT: v_lshlrev_b16_e32 v1, 8, v1
-; GFX8-NEXT: v_subrev_u16_e32 v8, 0x7fff, v8
+; GFX8-NEXT: v_add_u16_e32 v8, 0x8001, v8
; GFX8-NEXT: v_min_i16_e32 v9, -1, v0
-; GFX8-NEXT: v_subrev_u16_e32 v9, 0x8000, v9
+; GFX8-NEXT: v_add_u16_e32 v9, 0x8000, v9
; GFX8-NEXT: v_max_i16_e32 v1, v8, v1
; GFX8-NEXT: v_min_i16_e32 v1, v1, v9
; GFX8-NEXT: v_sub_u16_e32 v0, v0, v1
; GFX8-NEXT: v_max_i16_e32 v1, -1, v3
-; GFX8-NEXT: v_subrev_u16_e32 v1, 0x7fff, v1
+; GFX8-NEXT: v_add_u16_e32 v1, 0x8001, v1
; GFX8-NEXT: v_min_i16_e32 v8, -1, v3
-; GFX8-NEXT: v_subrev_u16_e32 v8, 0x8000, v8
+; GFX8-NEXT: v_add_u16_e32 v8, 0x8000, v8
; GFX8-NEXT: v_max_i16_e32 v1, v1, v2
; GFX8-NEXT: v_lshlrev_b16_e32 v2, 8, v4
; GFX8-NEXT: v_min_i16_e32 v1, v1, v8
; GFX8-NEXT: v_max_i16_e32 v4, -1, v2
; GFX8-NEXT: v_sub_u16_e32 v1, v3, v1
; GFX8-NEXT: v_lshlrev_b16_e32 v3, 8, v6
-; GFX8-NEXT: v_subrev_u16_e32 v4, 0x7fff, v4
+; GFX8-NEXT: v_add_u16_e32 v4, 0x8001, v4
; GFX8-NEXT: v_min_i16_e32 v6, -1, v2
-; GFX8-NEXT: v_subrev_u16_e32 v6, 0x8000, v6
+; GFX8-NEXT: v_add_u16_e32 v6, 0x8000, v6
; GFX8-NEXT: v_max_i16_e32 v3, v4, v3
; GFX8-NEXT: v_min_i16_e32 v3, v3, v6
; GFX8-NEXT: v_sub_u16_e32 v2, v2, v3
; GFX8-NEXT: v_lshlrev_b16_e32 v3, 8, v5
; GFX8-NEXT: v_max_i16_e32 v5, -1, v3
; GFX8-NEXT: v_lshlrev_b16_e32 v4, 8, v7
-; GFX8-NEXT: v_subrev_u16_e32 v5, 0x7fff, v5
+; GFX8-NEXT: v_add_u16_e32 v5, 0x8001, v5
; GFX8-NEXT: v_min_i16_e32 v6, -1, v3
-; GFX8-NEXT: v_subrev_u16_e32 v6, 0x8000, v6
+; GFX8-NEXT: v_add_u16_e32 v6, 0x8000, v6
; GFX8-NEXT: v_max_i16_e32 v4, v5, v4
; GFX8-NEXT: v_min_i16_e32 v4, v4, v6
; GFX8-NEXT: v_sub_u16_e32 v3, v3, v4
@@ -727,27 +728,27 @@ define amdgpu_ps i32 @s_ssubsat_v4i8(i32 inreg %lhs.arg, i32 inreg %rhs.arg) {
; GFX6-NEXT: s_lshr_b32 s6, s1, 16
; GFX6-NEXT: s_lshr_b32 s7, s1, 24
; GFX6-NEXT: s_lshl_b32 s1, s1, 24
-; GFX6-NEXT: s_sub_i32 s8, s8, 0x7fffffff
+; GFX6-NEXT: s_add_i32 s8, s8, 0x80000001
; GFX6-NEXT: s_min_i32 s9, s0, -1
-; GFX6-NEXT: s_sub_i32 s9, s9, 0x80000000
+; GFX6-NEXT: s_add_i32 s9, s9, 0x80000000
; GFX6-NEXT: s_max_i32 s1, s8, s1
; GFX6-NEXT: s_min_i32 s1, s1, s9
; GFX6-NEXT: s_sub_i32 s0, s0, s1
; GFX6-NEXT: s_lshl_b32 s1, s2, 24
; GFX6-NEXT: s_lshl_b32 s2, s5, 24
; GFX6-NEXT: s_max_i32 s5, s1, -1
-; GFX6-NEXT: s_sub_i32 s5, s5, 0x7fffffff
+; GFX6-NEXT: s_add_i32 s5, s5, 0x80000001
; GFX6-NEXT: s_min_i32 s8, s1, -1
-; GFX6-NEXT: s_sub_i32 s8, s8, 0x80000000
+; GFX6-NEXT: s_add_i32 s8, s8, 0x80000000
; GFX6-NEXT: s_max_i32 s2, s5, s2
; GFX6-NEXT: s_min_i32 s2, s2, s8
; GFX6-NEXT: s_sub_i32 s1, s1, s2
; GFX6-NEXT: s_lshl_b32 s2, s3, 24
; GFX6-NEXT: s_max_i32 s5, s2, -1
; GFX6-NEXT: s_lshl_b32 s3, s6, 24
-; GFX6-NEXT: s_sub_i32 s5, s5, 0x7fffffff
+; GFX6-NEXT: s_add_i32 s5, s5, 0x80000001
; GFX6-NEXT: s_min_i32 s6, s2, -1
-; GFX6-NEXT: s_sub_i32 s6, s6, 0x80000000
+; GFX6-NEXT: s_add_i32 s6, s6, 0x80000000
; GFX6-NEXT: s_max_i32 s3, s5, s3
; GFX6-NEXT: s_min_i32 s3, s3, s6
; GFX6-NEXT: s_sub_i32 s2, s2, s3
@@ -755,10 +756,10 @@ define amdgpu_ps i32 @s_ssubsat_v4i8(i32 inreg %lhs.arg, i32 inreg %rhs.arg) {
; GFX6-NEXT: s_max_i32 s5, s3, -1
; GFX6-NEXT: s_ashr_i32 s1, s1, 24
; GFX6-NEXT: s_lshl_b32 s4, s7, 24
-; GFX6-NEXT: s_sub_i32 s5, s5, 0x7fffffff
+; GFX6-NEXT: s_add_i32 s5, s5, 0x80000001
; GFX6-NEXT: s_min_i32 s6, s3, -1
; GFX6-NEXT: s_ashr_i32 s0, s0, 24
-; GFX6-NEXT: s_sub_i32 s6, s6, 0x80000000
+; GFX6-NEXT: s_add_i32 s6, s6, 0x80000000
; GFX6-NEXT: s_max_i32 s4, s5, s4
; GFX6-NEXT: s_and_b32 s1, s1, 0xff
; GFX6-NEXT: s_ashr_i32 s2, s2, 24
@@ -789,11 +790,11 @@ define amdgpu_ps i32 @s_ssubsat_v4i8(i32 inreg %lhs.arg, i32 inreg %rhs.arg) {
; GFX8-NEXT: s_lshr_b32 s6, s1, 16
; GFX8-NEXT: s_lshr_b32 s7, s1, 24
; GFX8-NEXT: s_lshl_b32 s1, s1, 8
-; GFX8-NEXT: s_sub_i32 s10, s10, 0x7fff
+; GFX8-NEXT: s_addk_i32 s10, 0x8001
; GFX8-NEXT: s_min_i32 s8, s8, s9
; GFX8-NEXT: s_sext_i32_i16 s10, s10
; GFX8-NEXT: s_sext_i32_i16 s1, s1
-; GFX8-NEXT: s_sub_i32 s8, s8, 0xffff8000
+; GFX8-NEXT: s_addk_i32 s8, 0x8000
; GFX8-NEXT: s_max_i32 s1, s10, s1
; GFX8-NEXT: s_sext_i32_i16 s1, s1
; GFX8-NEXT: s_sext_i32_i16 s8, s8
@@ -803,11 +804,11 @@ define amdgpu_ps i32 @s_ssubsat_v4i8(i32 inreg %lhs.arg, i32 inreg %rhs.arg) {
; GFX8-NEXT: s_lshl_b32 s2, s5, 8
; GFX8-NEXT: s_sext_i32_i16 s5, s1
; GFX8-NEXT: s_max_i32 s8, s5, s9
-; GFX8-NEXT: s_sub_i32 s8, s8, 0x7fff
+; GFX8-NEXT: s_addk_i32 s8, 0x8001
; GFX8-NEXT: s_min_i32 s5, s5, s9
; GFX8-NEXT: s_sext_i32_i16 s8, s8
; GFX8-NEXT: s_sext_i32_i16 s2, s2
-; GFX8-NEXT: s_sub_i32 s5, s5, 0xffff8000
+; GFX8-NEXT: s_addk_i32 s5, 0x8000
; GFX8-NEXT: s_max_i32 s2, s8, s2
; GFX8-NEXT: s_sext_i32_i16 s2, s2
; GFX8-NEXT: s_sext_i32_i16 s5, s5
@@ -817,11 +818,11 @@ define amdgpu_ps i32 @s_ssubsat_v4i8(i32 inreg %lhs.arg, i32 inreg %rhs.arg) {
; GFX8-NEXT: s_sext_i32_i16 s5, s2
; GFX8-NEXT: s_lshl_b32 s3, s6, 8
; GFX8-NEXT: s_max_i32 s6, s5, s9
-; GFX8-NEXT: s_sub_i32 s6, s6, 0x7fff
+; GFX8-NEXT: s_addk_i32 s6, 0x8001
; GFX8-NEXT: s_min_i32 s5, s5, s9
; GFX8-NEXT: s_sext_i32_i16 s6, s6
; GFX8-NEXT: s_sext_i32_i16 s3, s3
-; GFX8-NEXT: s_sub_i32 s5, s5, 0xffff8000
+; GFX8-NEXT: s_addk_i32 s5, 0x8000
; GFX8-NEXT: s_max_i32 s3, s6, s3
; GFX8-NEXT: s_sext_i32_i16 s3, s3
; GFX8-NEXT: s_sext_i32_i16 s5, s5
@@ -831,12 +832,12 @@ define amdgpu_ps i32 @s_ssubsat_v4i8(i32 inreg %lhs.arg, i32 inreg %rhs.arg) {
; GFX8-NEXT: s_sext_i32_i16 s5, s3
; GFX8-NEXT: s_max_i32 s6, s5, s9
; GFX8-NEXT: s_lshl_b32 s4, s7, 8
-; GFX8-NEXT: s_sub_i32 s6, s6, 0x7fff
+; GFX8-NEXT: s_addk_i32 s6, 0x8001
; GFX8-NEXT: s_min_i32 s5, s5, s9
; GFX8-NEXT: s_sext_i32_i16 s6, s6
; GFX8-NEXT: s_sext_i32_i16 s4, s4
; GFX8-NEXT: s_sext_i32_i16 s1, s1
-; GFX8-NEXT: s_sub_i32 s5, s5, 0xffff8000
+; GFX8-NEXT: s_addk_i32 s5, 0x8000
; GFX8-NEXT: s_max_i32 s4, s6, s4
; GFX8-NEXT: s_sext_i32_i16 s0, s0
; GFX8-NEXT: s_ashr_i32 s1, s1, 8
@@ -1004,9 +1005,9 @@ define i24 @v_ssubsat_i24(i24 %lhs, i24 %rhs) {
; GFX6-NEXT: v_lshlrev_b32_e32 v0, 8, v0
; GFX6-NEXT: v_max_i32_e32 v2, -1, v0
; GFX6-NEXT: v_lshlrev_b32_e32 v1, 8, v1
-; GFX6-NEXT: v_subrev_i32_e32 v2, vcc, 0x7fffffff, v2
+; GFX6-NEXT: v_add_i32_e32 v2, vcc, 0x80000001, v2
; GFX6-NEXT: v_min_i32_e32 v3, -1, v0
-; GFX6-NEXT: v_subrev_i32_e32 v3, vcc, 0x80000000, v3
+; GFX6-NEXT: v_add_i32_e32 v3, vcc, 0x80000000, v3
; GFX6-NEXT: v_max_i32_e32 v1, v2, v1
; GFX6-NEXT: v_min_i32_e32 v1, v1, v3
; GFX6-NEXT: v_sub_i32_e32 v0, vcc, v0, v1
@@ -1055,9 +1056,9 @@ define amdgpu_ps i24 @s_ssubsat_i24(i24 inreg %lhs, i24 inreg %rhs) {
; GFX6-NEXT: s_lshl_b32 s0, s0, 8
; GFX6-NEXT: s_max_i32 s2, s0, -1
; GFX6-NEXT: s_lshl_b32 s1, s1, 8
-; GFX6-NEXT: s_sub_i32 s2, s2, 0x7fffffff
+; GFX6-NEXT: s_add_i32 s2, s2, 0x80000001
; GFX6-NEXT: s_min_i32 s3, s0, -1
-; GFX6-NEXT: s_sub_i32 s3, s3, 0x80000000
+; GFX6-NEXT: s_add_i32 s3, s3, 0x80000000
; GFX6-NEXT: s_max_i32 s1, s2, s1
; GFX6-NEXT: s_min_i32 s1, s1, s3
; GFX6-NEXT: s_sub_i32 s0, s0, s1
@@ -1109,9 +1110,9 @@ define i32 @v_ssubsat_i32(i32 %lhs, i32 %rhs) {
; GFX6: ; %bb.0:
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX6-NEXT: v_max_i32_e32 v2, -1, v0
-; GFX6-NEXT: v_subrev_i32_e32 v2, vcc, 0x7fffffff, v2
+; GFX6-NEXT: v_add_i32_e32 v2, vcc, 0x80000001, v2
; GFX6-NEXT: v_min_i32_e32 v3, -1, v0
-; GFX6-NEXT: v_subrev_i32_e32 v3, vcc, 0x80000000, v3
+; GFX6-NEXT: v_add_i32_e32 v3, vcc, 0x80000000, v3
; GFX6-NEXT: v_max_i32_e32 v1, v2, v1
; GFX6-NEXT: v_min_i32_e32 v1, v1, v3
; GFX6-NEXT: v_sub_i32_e32 v0, vcc, v0, v1
@@ -1121,9 +1122,9 @@ define i32 @v_ssubsat_i32(i32 %lhs, i32 %rhs) {
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_max_i32_e32 v2, -1, v0
-; GFX8-NEXT: v_subrev_u32_e32 v2, vcc, 0x7fffffff, v2
+; GFX8-NEXT: v_add_u32_e32 v2, vcc, 0x80000001, v2
; GFX8-NEXT: v_min_i32_e32 v3, -1, v0
-; GFX8-NEXT: v_subrev_u32_e32 v3, vcc, 0x80000000, v3
+; GFX8-NEXT: v_add_u32_e32 v3, vcc, 0x80000000, v3
; GFX8-NEXT: v_max_i32_e32 v1, v2, v1
; GFX8-NEXT: v_min_i32_e32 v1, v1, v3
; GFX8-NEXT: v_sub_u32_e32 v0, vcc, v0, v1
@@ -1148,9 +1149,9 @@ define amdgpu_ps i32 @s_ssubsat_i32(i32 inreg %lhs, i32 inreg %rhs) {
; GFX6-LABEL: s_ssubsat_i32:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_max_i32 s2, s0, -1
-; GFX6-NEXT: s_sub_i32 s2, s2, 0x7fffffff
+; GFX6-NEXT: s_add_i32 s2, s2, 0x80000001
; GFX6-NEXT: s_min_i32 s3, s0, -1
-; GFX6-NEXT: s_sub_i32 s3, s3, 0x80000000
+; GFX6-NEXT: s_add_i32 s3, s3, 0x80000000
; GFX6-NEXT: s_max_i32 s1, s2, s1
; GFX6-NEXT: s_min_i32 s1, s1, s3
; GFX6-NEXT: s_sub_i32 s0, s0, s1
@@ -1159,9 +1160,9 @@ define amdgpu_ps i32 @s_ssubsat_i32(i32 inreg %lhs, i32 inreg %rhs) {
; GFX8-LABEL: s_ssubsat_i32:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_max_i32 s2, s0, -1
-; GFX8-NEXT: s_sub_i32 s2, s2, 0x7fffffff
+; GFX8-NEXT: s_add_i32 s2, s2, 0x80000001
; GFX8-NEXT: s_min_i32 s3, s0, -1
-; GFX8-NEXT: s_sub_i32 s3, s3, 0x80000000
+; GFX8-NEXT: s_add_i32 s3, s3, 0x80000000
; GFX8-NEXT: s_max_i32 s1, s2, s1
; GFX8-NEXT: s_min_i32 s1, s1, s3
; GFX8-NEXT: s_sub_i32 s0, s0, s1
@@ -1187,9 +1188,9 @@ define amdgpu_ps float @ssubsat_i32_sv(i32 inreg %lhs, i32 %rhs) {
; GFX6-LABEL: ssubsat_i32_sv:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_max_i32 s1, s0, -1
-; GFX6-NEXT: s_sub_i32 s1, s1, 0x7fffffff
+; GFX6-NEXT: s_add_i32 s1, s1, 0x80000001
; GFX6-NEXT: s_min_i32 s2, s0, -1
-; GFX6-NEXT: s_sub_i32 s2, s2, 0x80000000
+; GFX6-NEXT: s_add_i32 s2, s2, 0x80000000
; GFX6-NEXT: v_max_i32_e32 v0, s1, v0
; GFX6-NEXT: v_min_i32_e32 v0, s2, v0
; GFX6-NEXT: v_sub_i32_e32 v0, vcc, s0, v0
@@ -1198,9 +1199,9 @@ define amdgpu_ps float @ssubsat_i32_sv(i32 inreg %lhs, i32 %rhs) {
; GFX8-LABEL: ssubsat_i32_sv:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_max_i32 s1, s0, -1
-; GFX8-NEXT: s_sub_i32 s1, s1, 0x7fffffff
+; GFX8-NEXT: s_add_i32 s1, s1, 0x80000001
; GFX8-NEXT: s_min_i32 s2, s0, -1
-; GFX8-NEXT: s_sub_i32 s2, s2, 0x80000000
+; GFX8-NEXT: s_add_i32 s2, s2, 0x80000000
; GFX8-NEXT: v_max_i32_e32 v0, s1, v0
; GFX8-NEXT: v_min_i32_e32 v0, s2, v0
; GFX8-NEXT: v_sub_u32_e32 v0, vcc, s0, v0
@@ -1224,9 +1225,9 @@ define amdgpu_ps float @ssubsat_i32_vs(i32 %lhs, i32 inreg %rhs) {
; GFX6-LABEL: ssubsat_i32_vs:
; GFX6: ; %bb.0:
; GFX6-NEXT: v_max_i32_e32 v1, -1, v0
-; GFX6-NEXT: v_subrev_i32_e32 v1, vcc, 0x7fffffff, v1
+; GFX6-NEXT: v_add_i32_e32 v1, vcc, 0x80000001, v1
; GFX6-NEXT: v_min_i32_e32 v2, -1, v0
-; GFX6-NEXT: v_subrev_i32_e32 v2, vcc, 0x80000000, v2
+; GFX6-NEXT: v_add_i32_e32 v2, vcc, 0x80000000, v2
; GFX6-NEXT: v_max_i32_e32 v1, s0, v1
; GFX6-NEXT: v_min_i32_e32 v1, v1, v2
; GFX6-NEXT: v_sub_i32_e32 v0, vcc, v0, v1
@@ -1235,9 +1236,9 @@ define amdgpu_ps float @ssubsat_i32_vs(i32 %lhs, i32 inreg %rhs) {
; GFX8-LABEL: ssubsat_i32_vs:
; GFX8: ; %bb.0:
; GFX8-NEXT: v_max_i32_e32 v1, -1, v0
-; GFX8-NEXT: v_subrev_u32_e32 v1, vcc, 0x7fffffff, v1
+; GFX8-NEXT: v_add_u32_e32 v1, vcc, 0x80000001, v1
; GFX8-NEXT: v_min_i32_e32 v2, -1, v0
-; GFX8-NEXT: v_subrev_u32_e32 v2, vcc, 0x80000000, v2
+; GFX8-NEXT: v_add_u32_e32 v2, vcc, 0x80000000, v2
; GFX8-NEXT: v_max_i32_e32 v1, s0, v1
; GFX8-NEXT: v_min_i32_e32 v1, v1, v2
; GFX8-NEXT: v_sub_u32_e32 v0, vcc, v0, v1
@@ -1262,16 +1263,16 @@ define <2 x i32> @v_ssubsat_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
; GFX6: ; %bb.0:
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX6-NEXT: v_max_i32_e32 v4, -1, v0
-; GFX6-NEXT: v_subrev_i32_e32 v4, vcc, 0x7fffffff, v4
+; GFX6-NEXT: v_add_i32_e32 v4, vcc, 0x80000001, v4
; GFX6-NEXT: v_min_i32_e32 v5, -1, v0
-; GFX6-NEXT: v_subrev_i32_e32 v5, vcc, 0x80000000, v5
+; GFX6-NEXT: v_add_i32_e32 v5, vcc, 0x80000000, v5
; GFX6-NEXT: v_max_i32_e32 v2, v4, v2
; GFX6-NEXT: v_min_i32_e32 v2, v2, v5
; GFX6-NEXT: v_sub_i32_e32 v0, vcc, v0, v2
; GFX6-NEXT: v_max_i32_e32 v2, -1, v1
-; GFX6-NEXT: v_subrev_i32_e32 v2, vcc, 0x7fffffff, v2
+; GFX6-NEXT: v_add_i32_e32 v2, vcc, 0x80000001, v2
; GFX6-NEXT: v_min_i32_e32 v4, -1, v1
-; GFX6-NEXT: v_subrev_i32_e32 v4, vcc, 0x80000000, v4
+; GFX6-NEXT: v_add_i32_e32 v4, vcc, 0x80000000, v4
; GFX6-NEXT: v_max_i32_e32 v2, v2, v3
; GFX6-NEXT: v_min_i32_e32 v2, v2, v4
; GFX6-NEXT: v_sub_i32_e32 v1, vcc, v1, v2
@@ -1281,16 +1282,16 @@ define <2 x i32> @v_ssubsat_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_max_i32_e32 v4, -1, v0
-; GFX8-NEXT: v_subrev_u32_e32 v4, vcc, 0x7fffffff, v4
+; GFX8-NEXT: v_add_u32_e32 v4, vcc, 0x80000001, v4
; GFX8-NEXT: v_min_i32_e32 v5, -1, v0
-; GFX8-NEXT: v_subrev_u32_e32 v5, vcc, 0x80000000, v5
+; GFX8-NEXT: v_add_u32_e32 v5, vcc, 0x80000000, v5
; GFX8-NEXT: v_max_i32_e32 v2, v4, v2
; GFX8-NEXT: v_min_i32_e32 v2, v2, v5
; GFX8-NEXT: v_sub_u32_e32 v0, vcc, v0, v2
; GFX8-NEXT: v_max_i32_e32 v2, -1, v1
-; GFX8-NEXT: v_subrev_u32_e32 v2, vcc, 0x7fffffff, v2
+; GFX8-NEXT: v_add_u32_e32 v2, vcc, 0x80000001, v2
; GFX8-NEXT: v_min_i32_e32 v4, -1, v1
-; GFX8-NEXT: v_subrev_u32_e32 v4, vcc, 0x80000000, v4
+; GFX8-NEXT: v_add_u32_e32 v4, vcc, 0x80000000, v4
; GFX8-NEXT: v_max_i32_e32 v2, v2, v3
; GFX8-NEXT: v_min_i32_e32 v2, v2, v4
; GFX8-NEXT: v_sub_u32_e32 v1, vcc, v1, v2
@@ -1317,16 +1318,16 @@ define amdgpu_ps <2 x i32> @s_ssubsat_v2i32(<2 x i32> inreg %lhs, <2 x i32> inre
; GFX6-LABEL: s_ssubsat_v2i32:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_max_i32 s4, s0, -1
-; GFX6-NEXT: s_sub_i32 s4, s4, 0x7fffffff
+; GFX6-NEXT: s_add_i32 s4, s4, 0x80000001
; GFX6-NEXT: s_min_i32 s5, s0, -1
-; GFX6-NEXT: s_sub_i32 s5, s5, 0x80000000
+; GFX6-NEXT: s_add_i32 s5, s5, 0x80000000
; GFX6-NEXT: s_max_i32 s2, s4, s2
; GFX6-NEXT: s_min_i32 s2, s2, s5
; GFX6-NEXT: s_sub_i32 s0, s0, s2
; GFX6-NEXT: s_max_i32 s2, s1, -1
-; GFX6-NEXT: s_sub_i32 s2, s2, 0x7fffffff
+; GFX6-NEXT: s_add_i32 s2, s2, 0x80000001
; GFX6-NEXT: s_min_i32 s4, s1, -1
-; GFX6-NEXT: s_sub_i32 s4, s4, 0x80000000
+; GFX6-NEXT: s_add_i32 s4, s4, 0x80000000
; GFX6-NEXT: s_max_i32 s2, s2, s3
; GFX6-NEXT: s_min_i32 s2, s2, s4
; GFX6-NEXT: s_sub_i32 s1, s1, s2
@@ -1335,16 +1336,16 @@ define amdgpu_ps <2 x i32> @s_ssubsat_v2i32(<2 x i32> inreg %lhs, <2 x i32> inre
; GFX8-LABEL: s_ssubsat_v2i32:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_max_i32 s4, s0, -1
-; GFX8-NEXT: s_sub_i32 s4, s4, 0x7fffffff
+; GFX8-NEXT: s_add_i32 s4, s4, 0x80000001
; GFX8-NEXT: s_min_i32 s5, s0, -1
-; GFX8-NEXT: s_sub_i32 s5, s5, 0x80000000
+; GFX8-NEXT: s_add_i32 s5, s5, 0x80000000
; GFX8-NEXT: s_max_i32 s2, s4, s2
; GFX8-NEXT: s_min_i32 s2, s2, s5
; GFX8-NEXT: s_sub_i32 s0, s0, s2
; GFX8-NEXT: s_max_i32 s2, s1, -1
-; GFX8-NEXT: s_sub_i32 s2, s2, 0x7fffffff
+; GFX8-NEXT: s_add_i32 s2, s2, 0x80000001
; GFX8-NEXT: s_min_i32 s4, s1, -1
-; GFX8-NEXT: s_sub_i32 s4, s4, 0x80000000
+; GFX8-NEXT: s_add_i32 s4, s4, 0x80000000
; GFX8-NEXT: s_max_i32 s2, s2, s3
; GFX8-NEXT: s_min_i32 s2, s2, s4
; GFX8-NEXT: s_sub_i32 s1, s1, s2
@@ -1376,24 +1377,25 @@ define <3 x i32> @v_ssubsat_v3i32(<3 x i32> %lhs, <3 x i32> %rhs) {
; GFX6: ; %bb.0:
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX6-NEXT: v_max_i32_e32 v6, -1, v0
-; GFX6-NEXT: v_subrev_i32_e32 v6, vcc, 0x7fffffff, v6
+; GFX6-NEXT: v_add_i32_e32 v6, vcc, 0x80000001, v6
; GFX6-NEXT: v_min_i32_e32 v8, -1, v0
-; GFX6-NEXT: v_subrev_i32_e32 v8, vcc, 0x80000000, v8
+; GFX6-NEXT: v_bfrev_b32_e32 v9, 1
+; GFX6-NEXT: v_add_i32_e32 v8, vcc, v8, v9
; GFX6-NEXT: v_max_i32_e32 v3, v6, v3
; GFX6-NEXT: v_min_i32_e32 v3, v3, v8
-; GFX6-NEXT: v_bfrev_b32_e32 v7, -2
+; GFX6-NEXT: v_mov_b32_e32 v7, 0x80000001
; GFX6-NEXT: v_sub_i32_e32 v0, vcc, v0, v3
; GFX6-NEXT: v_max_i32_e32 v3, -1, v1
-; GFX6-NEXT: v_sub_i32_e32 v3, vcc, v3, v7
+; GFX6-NEXT: v_add_i32_e32 v3, vcc, v3, v7
; GFX6-NEXT: v_min_i32_e32 v6, -1, v1
-; GFX6-NEXT: v_subrev_i32_e32 v6, vcc, 0x80000000, v6
+; GFX6-NEXT: v_add_i32_e32 v6, vcc, v6, v9
; GFX6-NEXT: v_max_i32_e32 v3, v3, v4
; GFX6-NEXT: v_min_i32_e32 v3, v3, v6
; GFX6-NEXT: v_sub_i32_e32 v1, vcc, v1, v3
; GFX6-NEXT: v_max_i32_e32 v3, -1, v2
-; GFX6-NEXT: v_subrev_i32_e32 v3, vcc, 0x7fffffff, v3
+; GFX6-NEXT: v_add_i32_e32 v3, vcc, 0x80000001, v3
; GFX6-NEXT: v_min_i32_e32 v4, -1, v2
-; GFX6-NEXT: v_subrev_i32_e32 v4, vcc, 0x80000000, v4
+; GFX6-NEXT: v_add_i32_e32 v4, vcc, 0x80000000, v4
; GFX6-NEXT: v_max_i32_e32 v3, v3, v5
; GFX6-NEXT: v_min_i32_e32 v3, v3, v4
; GFX6-NEXT: v_sub_i32_e32 v2, vcc, v2, v3
@@ -1403,24 +1405,25 @@ define <3 x i32> @v_ssubsat_v3i32(<3 x i32> %lhs, <3 x i32> %rhs) {
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_max_i32_e32 v6, -1, v0
-; GFX8-NEXT: v_subrev_u32_e32 v6, vcc, 0x7fffffff, v6
+; GFX8-NEXT: v_add_u32_e32 v6, vcc, 0x80000001, v6
; GFX8-NEXT: v_min_i32_e32 v8, -1, v0
-; GFX8-NEXT: v_subrev_u32_e32 v8, vcc, 0x80000000, v8
+; GFX8-NEXT: v_bfrev_b32_e32 v9, 1
+; GFX8-NEXT: v_add_u32_e32 v8, vcc, v8, v9
; GFX8-NEXT: v_max_i32_e32 v3, v6, v3
; GFX8-NEXT: v_min_i32_e32 v3, v3, v8
-; GFX8-NEXT: v_bfrev_b32_e32 v7, -2
+; GFX8-NEXT: v_mov_b32_e32 v7, 0x80000001
; GFX8-NEXT: v_sub_u32_e32 v0, vcc, v0, v3
; GFX8-NEXT: v_max_i32_e32 v3, -1, v1
-; GFX8-NEXT: v_sub_u32_e32 v3, vcc, v3, v7
+; GFX8-NEXT: v_add_u32_e32 v3, vcc, v3, v7
; GFX8-NEXT: v_min_i32_e32 v6, -1, v1
-; GFX8-NEXT: v_subrev_u32_e32 v6, vcc, 0x80000000, v6
+; GFX8-NEXT: v_add_u32_e32 v6, vcc, v6, v9
; GFX8-NEXT: v_max_i32_e32 v3, v3, v4
; GFX8-NEXT: v_min_i32_e32 v3, v3, v6
; GFX8-NEXT: v_sub_u32_e32 v1, vcc, v1, v3
; GFX8-NEXT: v_max_i32_e32 v3, -1, v2
-; GFX8-NEXT: v_subrev_u32_e32 v3, vcc, 0x7fffffff, v3
+; GFX8-NEXT: v_add_u32_e32 v3, vcc, 0x80000001, v3
; GFX8-NEXT: v_min_i32_e32 v4, -1, v2
-; GFX8-NEXT: v_subrev_u32_e32 v4, vcc, 0x80000000, v4
+; GFX8-NEXT: v_add_u32_e32 v4, vcc, 0x80000000, v4
; GFX8-NEXT: v_max_i32_e32 v3, v3, v5
; GFX8-NEXT: v_min_i32_e32 v3, v3, v4
; GFX8-NEXT: v_sub_u32_e32 v2, vcc, v2, v3
@@ -1449,23 +1452,23 @@ define amdgpu_ps <3 x i32> @s_ssubsat_v3i32(<3 x i32> inreg %lhs, <3 x i32> inre
; GFX6-LABEL: s_ssubsat_v3i32:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_max_i32 s6, s0, -1
-; GFX6-NEXT: s_sub_i32 s6, s6, 0x7fffffff
+; GFX6-NEXT: s_add_i32 s6, s6, 0x80000001
; GFX6-NEXT: s_min_i32 s7, s0, -1
-; GFX6-NEXT: s_sub_i32 s7, s7, 0x80000000
+; GFX6-NEXT: s_add_i32 s7, s7, 0x80000000
; GFX6-NEXT: s_max_i32 s3, s6, s3
; GFX6-NEXT: s_min_i32 s3, s3, s7
; GFX6-NEXT: s_sub_i32 s0, s0, s3
; GFX6-NEXT: s_max_i32 s3, s1, -1
-; GFX6-NEXT: s_sub_i32 s3, s3, 0x7fffffff
+; GFX6-NEXT: s_add_i32 s3, s3, 0x80000001
; GFX6-NEXT: s_min_i32 s6, s1, -1
-; GFX6-NEXT: s_sub_i32 s6, s6, 0x80000000
+; GFX6-NEXT: s_add_i32 s6, s6, 0x80000000
; GFX6-NEXT: s_max_i32 s3, s3, s4
; GFX6-NEXT: s_min_i32 s3, s3, s6
; GFX6-NEXT: s_sub_i32 s1, s1, s3
; GFX6-NEXT: s_max_i32 s3, s2, -1
-; GFX6-NEXT: s_sub_i32 s3, s3, 0x7fffffff
+; GFX6-NEXT: s_add_i32 s3, s3, 0x80000001
; GFX6-NEXT: s_min_i32 s4, s2, -1
-; GFX6-NEXT: s_sub_i32 s4, s4, 0x80000000
+; GFX6-NEXT: s_add_i32 s4, s4, 0x80000000
; GFX6-NEXT: s_max_i32 s3, s3, s5
; GFX6-NEXT: s_min_i32 s3, s3, s4
; GFX6-NEXT: s_sub_i32 s2, s2, s3
@@ -1474,23 +1477,23 @@ define amdgpu_ps <3 x i32> @s_ssubsat_v3i32(<3 x i32> inreg %lhs, <3 x i32> inre
; GFX8-LABEL: s_ssubsat_v3i32:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_max_i32 s6, s0, -1
-; GFX8-NEXT: s_sub_i32 s6, s6, 0x7fffffff
+; GFX8-NEXT: s_add_i32 s6, s6, 0x80000001
; GFX8-NEXT: s_min_i32 s7, s0, -1
-; GFX8-NEXT: s_sub_i32 s7, s7, 0x80000000
+; GFX8-NEXT: s_add_i32 s7, s7, 0x80000000
; GFX8-NEXT: s_max_i32 s3, s6, s3
; GFX8-NEXT: s_min_i32 s3, s3, s7
; GFX8-NEXT: s_sub_i32 s0, s0, s3
; GFX8-NEXT: s_max_i32 s3, s1, -1
-; GFX8-NEXT: s_sub_i32 s3, s3, 0x7fffffff
+; GFX8-NEXT: s_add_i32 s3, s3, 0x80000001
; GFX8-NEXT: s_min_i32 s6, s1, -1
-; GFX8-NEXT: s_sub_i32 s6, s6, 0x80000000
+; GFX8-NEXT: s_add_i32 s6, s6, 0x80000000
; GFX8-NEXT: s_max_i32 s3, s3, s4
; GFX8-NEXT: s_min_i32 s3, s3, s6
; GFX8-NEXT: s_sub_i32 s1, s1, s3
; GFX8-NEXT: s_max_i32 s3, s2, -1
-; GFX8-NEXT: s_sub_i32 s3, s3, 0x7fffffff
+; GFX8-NEXT: s_add_i32 s3, s3, 0x80000001
; GFX8-NEXT: s_min_i32 s4, s2, -1
-; GFX8-NEXT: s_sub_i32 s4, s4, 0x80000000
+; GFX8-NEXT: s_add_i32 s4, s4, 0x80000000
; GFX8-NEXT: s_max_i32 s3, s3, s5
; GFX8-NEXT: s_min_i32 s3, s3, s4
; GFX8-NEXT: s_sub_i32 s2, s2, s3
@@ -1527,32 +1530,32 @@ define <4 x i32> @v_ssubsat_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
; GFX6: ; %bb.0:
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX6-NEXT: v_max_i32_e32 v8, -1, v0
-; GFX6-NEXT: v_subrev_i32_e32 v8, vcc, 0x7fffffff, v8
+; GFX6-NEXT: v_add_i32_e32 v8, vcc, 0x80000001, v8
; GFX6-NEXT: v_min_i32_e32 v10, -1, v0
; GFX6-NEXT: v_bfrev_b32_e32 v11, 1
-; GFX6-NEXT: v_sub_i32_e32 v10, vcc, v10, v11
+; GFX6-NEXT: v_add_i32_e32 v10, vcc, v10, v11
; GFX6-NEXT: v_max_i32_e32 v4, v8, v4
; GFX6-NEXT: v_min_i32_e32 v4, v4, v10
-; GFX6-NEXT: v_bfrev_b32_e32 v9, -2
+; GFX6-NEXT: v_mov_b32_e32 v9, 0x80000001
; GFX6-NEXT: v_sub_i32_e32 v0, vcc, v0, v4
; GFX6-NEXT: v_max_i32_e32 v4, -1, v1
-; GFX6-NEXT: v_sub_i32_e32 v4, vcc, v4, v9
+; GFX6-NEXT: v_add_i32_e32 v4, vcc, v4, v9
; GFX6-NEXT: v_min_i32_e32 v8, -1, v1
-; GFX6-NEXT: v_subrev_i32_e32 v8, vcc, 0x80000000, v8
+; GFX6-NEXT: v_add_i32_e32 v8, vcc, v8, v11
; GFX6-NEXT: v_max_i32_e32 v4, v4, v5
; GFX6-NEXT: v_min_i32_e32 v4, v4, v8
; GFX6-NEXT: v_sub_i32_e32 v1, vcc, v1, v4
; GFX6-NEXT: v_max_i32_e32 v4, -1, v2
-; GFX6-NEXT: v_sub_i32_e32 v4, vcc, v4, v9
+; GFX6-NEXT: v_add_i32_e32 v4, vcc, v4, v9
; GFX6-NEXT: v_min_i32_e32 v5, -1, v2
-; GFX6-NEXT: v_subrev_i32_e32 v5, vcc, 0x80000000, v5
+; GFX6-NEXT: v_add_i32_e32 v5, vcc, v5, v11
; GFX6-NEXT: v_max_i32_e32 v4, v4, v6
; GFX6-NEXT: v_min_i32_e32 v4, v4, v5
; GFX6-NEXT: v_sub_i32_e32 v2, vcc, v2, v4
; GFX6-NEXT: v_max_i32_e32 v4, -1, v3
-; GFX6-NEXT: v_subrev_i32_e32 v4, vcc, 0x7fffffff, v4
+; GFX6-NEXT: v_add_i32_e32 v4, vcc, 0x80000001, v4
; GFX6-NEXT: v_min_i32_e32 v5, -1, v3
-; GFX6-NEXT: v_subrev_i32_e32 v5, vcc, 0x80000000, v5
+; GFX6-NEXT: v_add_i32_e32 v5, vcc, 0x80000000, v5
; GFX6-NEXT: v_max_i32_e32 v4, v4, v7
; GFX6-NEXT: v_min_i32_e32 v4, v4, v5
; GFX6-NEXT: v_sub_i32_e32 v3, vcc, v3, v4
@@ -1562,32 +1565,32 @@ define <4 x i32> @v_ssubsat_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_max_i32_e32 v8, -1, v0
-; GFX8-NEXT: v_subrev_u32_e32 v8, vcc, 0x7fffffff, v8
+; GFX8-NEXT: v_add_u32_e32 v8, vcc, 0x80000001, v8
; GFX8-NEXT: v_min_i32_e32 v10, -1, v0
; GFX8-NEXT: v_bfrev_b32_e32 v11, 1
-; GFX8-NEXT: v_sub_u32_e32 v10, vcc, v10, v11
+; GFX8-NEXT: v_add_u32_e32 v10, vcc, v10, v11
; GFX8-NEXT: v_max_i32_e32 v4, v8, v4
; GFX8-NEXT: v_min_i32_e32 v4, v4, v10
-; GFX8-NEXT: v_bfrev_b32_e32 v9, -2
+; GFX8-NEXT: v_mov_b32_e32 v9, 0x80000001
; GFX8-NEXT: v_sub_u32_e32 v0, vcc, v0, v4
; GFX8-NEXT: v_max_i32_e32 v4, -1, v1
-; GFX8-NEXT: v_sub_u32_e32 v4, vcc, v4, v9
+; GFX8-NEXT: v_add_u32_e32 v4, vcc, v4, v9
; GFX8-NEXT: v_min_i32_e32 v8, -1, v1
-; GFX8-NEXT: v_subrev_u32_e32 v8, vcc, 0x80000000, v8
+; GFX8-NEXT: v_add_u32_e32 v8, vcc, v8, v11
; GFX8-NEXT: v_max_i32_e32 v4, v4, v5
; GFX8-NEXT: v_min_i32_e32 v4, v4, v8
; GFX8-NEXT: v_sub_u32_e32 v1, vcc, v1, v4
; GFX8-NEXT: v_max_i32_e32 v4, -1, v2
-; GFX8-NEXT: v_sub_u32_e32 v4, vcc, v4, v9
+; GFX8-NEXT: v_add_u32_e32 v4, vcc, v4, v9
; GFX8-NEXT: v_min_i32_e32 v5, -1, v2
-; GFX8-NEXT: v_subrev_u32_e32 v5, vcc, 0x80000000, v5
+; GFX8-NEXT: v_add_u32_e32 v5, vcc, v5, v11
; GFX8-NEXT: v_max_i32_e32 v4, v4, v6
; GFX8-NEXT: v_min_i32_e32 v4, v4, v5
; GFX8-NEXT: v_sub_u32_e32 v2, vcc, v2, v4
; GFX8-NEXT: v_max_i32_e32 v4, -1, v3
-; GFX8-NEXT: v_subrev_u32_e32 v4, vcc, 0x7fffffff, v4
+; GFX8-NEXT: v_add_u32_e32 v4, vcc, 0x80000001, v4
; GFX8-NEXT: v_min_i32_e32 v5, -1, v3
-; GFX8-NEXT: v_subrev_u32_e32 v5, vcc, 0x80000000, v5
+; GFX8-NEXT: v_add_u32_e32 v5, vcc, 0x80000000, v5
; GFX8-NEXT: v_max_i32_e32 v4, v4, v7
; GFX8-NEXT: v_min_i32_e32 v4, v4, v5
; GFX8-NEXT: v_sub_u32_e32 v3, vcc, v3, v4
@@ -1618,30 +1621,30 @@ define amdgpu_ps <4 x i32> @s_ssubsat_v4i32(<4 x i32> inreg %lhs, <4 x i32> inre
; GFX6-LABEL: s_ssubsat_v4i32:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_max_i32 s8, s0, -1
-; GFX6-NEXT: s_sub_i32 s8, s8, 0x7fffffff
+; GFX6-NEXT: s_add_i32 s8, s8, 0x80000001
; GFX6-NEXT: s_min_i32 s9, s0, -1
-; GFX6-NEXT: s_sub_i32 s9, s9, 0x80000000
+; GFX6-NEXT: s_add_i32 s9, s9, 0x80000000
; GFX6-NEXT: s_max_i32 s4, s8, s4
; GFX6-NEXT: s_min_i32 s4, s4, s9
; GFX6-NEXT: s_sub_i32 s0, s0, s4
; GFX6-NEXT: s_max_i32 s4, s1, -1
-; GFX6-NEXT: s_sub_i32 s4, s4, 0x7fffffff
+; GFX6-NEXT: s_add_i32 s4, s4, 0x80000001
; GFX6-NEXT: s_min_i32 s8, s1, -1
-; GFX6-NEXT: s_sub_i32 s8, s8, 0x80000000
+; GFX6-NEXT: s_add_i32 s8, s8, 0x80000000
; GFX6-NEXT: s_max_i32 s4, s4, s5
; GFX6-NEXT: s_min_i32 s4, s4, s8
; GFX6-NEXT: s_sub_i32 s1, s1, s4
; GFX6-NEXT: s_max_i32 s4, s2, -1
-; GFX6-NEXT: s_sub_i32 s4, s4, 0x7fffffff
+; GFX6-NEXT: s_add_i32 s4, s4, 0x80000001
; GFX6-NEXT: s_min_i32 s5, s2, -1
-; GFX6-NEXT: s_sub_i32 s5, s5, 0x80000000
+; GFX6-NEXT: s_add_i32 s5, s5, 0x80000000
; GFX6-NEXT: s_max_i32 s4, s4, s6
; GFX6-NEXT: s_min_i32 s4, s4, s5
; GFX6-NEXT: s_sub_i32 s2, s2, s4
; GFX6-NEXT: s_max_i32 s4, s3, -1
-; GFX6-NEXT: s_sub_i32 s4, s4, 0x7fffffff
+; GFX6-NEXT: s_add_i32 s4, s4, 0x80000001
; GFX6-NEXT: s_min_i32 s5, s3, -1
-; GFX6-NEXT: s_sub_i32 s5, s5, 0x80000000
+; GFX6-NEXT: s_add_i32 s5, s5, 0x80000000
; GFX6-NEXT: s_max_i32 s4, s4, s7
; GFX6-NEXT: s_min_i32 s4, s4, s5
; GFX6-NEXT: s_sub_i32 s3, s3, s4
@@ -1650,30 +1653,30 @@ define amdgpu_ps <4 x i32> @s_ssubsat_v4i32(<4 x i32> inreg %lhs, <4 x i32> inre
; GFX8-LABEL: s_ssubsat_v4i32:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_max_i32 s8, s0, -1
-; GFX8-NEXT: s_sub_i32 s8, s8, 0x7fffffff
+; GFX8-NEXT: s_add_i32 s8, s8, 0x80000001
; GFX8-NEXT: s_min_i32 s9, s0, -1
-; GFX8-NEXT: s_sub_i32 s9, s9, 0x80000000
+; GFX8-NEXT: s_add_i32 s9, s9, 0x80000000
; GFX8-NEXT: s_max_i32 s4, s8, s4
; GFX8-NEXT: s_min_i32 s4, s4, s9
; GFX8-NEXT: s_sub_i32 s0, s0, s4
; GFX8-NEXT: s_max_i32 s4, s1, -1
-; GFX8-NEXT: s_sub_i32 s4, s4, 0x7fffffff
+; GFX8-NEXT: s_add_i32 s4, s4, 0x80000001
; GFX8-NEXT: s_min_i32 s8, s1, -1
-; GFX8-NEXT: s_sub_i32 s8, s8, 0x80000000
+; GFX8-NEXT: s_add_i32 s8, s8, 0x80000000
; GFX8-NEXT: s_max_i32 s4, s4, s5
; GFX8-NEXT: s_min_i32 s4, s4, s8
; GFX8-NEXT: s_sub_i32 s1, s1, s4
; GFX8-NEXT: s_max_i32 s4, s2, -1
-; GFX8-NEXT: s_sub_i32 s4, s4, 0x7fffffff
+; GFX8-NEXT: s_add_i32 s4, s4, 0x80000001
; GFX8-NEXT: s_min_i32 s5, s2, -1
-; GFX8-NEXT: s_sub_i32 s5, s5, 0x80000000
+; GFX8-NEXT: s_add_i32 s5, s5, 0x80000000
; GFX8-NEXT: s_max_i32 s4, s4, s6
; GFX8-NEXT: s_min_i32 s4, s4, s5
; GFX8-NEXT: s_sub_i32 s2, s2, s4
; GFX8-NEXT: s_max_i32 s4, s3, -1
-; GFX8-NEXT: s_sub_i32 s4, s4, 0x7fffffff
+; GFX8-NEXT: s_add_i32 s4, s4, 0x80000001
; GFX8-NEXT: s_min_i32 s5, s3, -1
-; GFX8-NEXT: s_sub_i32 s5, s5, 0x80000000
+; GFX8-NEXT: s_add_i32 s5, s5, 0x80000000
; GFX8-NEXT: s_max_i32 s4, s4, s7
; GFX8-NEXT: s_min_i32 s4, s4, s5
; GFX8-NEXT: s_sub_i32 s3, s3, s4
@@ -1715,39 +1718,39 @@ define <5 x i32> @v_ssubsat_v5i32(<5 x i32> %lhs, <5 x i32> %rhs) {
; GFX6: ; %bb.0:
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX6-NEXT: v_max_i32_e32 v10, -1, v0
-; GFX6-NEXT: v_subrev_i32_e32 v10, vcc, 0x7fffffff, v10
+; GFX6-NEXT: v_add_i32_e32 v10, vcc, 0x80000001, v10
; GFX6-NEXT: v_min_i32_e32 v12, -1, v0
; GFX6-NEXT: v_bfrev_b32_e32 v13, 1
-; GFX6-NEXT: v_sub_i32_e32 v12, vcc, v12, v13
+; GFX6-NEXT: v_add_i32_e32 v12, vcc, v12, v13
; GFX6-NEXT: v_max_i32_e32 v5, v10, v5
; GFX6-NEXT: v_min_i32_e32 v5, v5, v12
-; GFX6-NEXT: v_bfrev_b32_e32 v11, -2
+; GFX6-NEXT: v_mov_b32_e32 v11, 0x80000001
; GFX6-NEXT: v_sub_i32_e32 v0, vcc, v0, v5
; GFX6-NEXT: v_max_i32_e32 v5, -1, v1
-; GFX6-NEXT: v_sub_i32_e32 v5, vcc, v5, v11
+; GFX6-NEXT: v_add_i32_e32 v5, vcc, v5, v11
; GFX6-NEXT: v_min_i32_e32 v10, -1, v1
-; GFX6-NEXT: v_sub_i32_e32 v10, vcc, v10, v13
+; GFX6-NEXT: v_add_i32_e32 v10, vcc, v10, v13
; GFX6-NEXT: v_max_i32_e32 v5, v5, v6
; GFX6-NEXT: v_min_i32_e32 v5, v5, v10
; GFX6-NEXT: v_sub_i32_e32 v1, vcc, v1, v5
; GFX6-NEXT: v_max_i32_e32 v5, -1, v2
-; GFX6-NEXT: v_sub_i32_e32 v5, vcc, v5, v11
+; GFX6-NEXT: v_add_i32_e32 v5, vcc, v5, v11
; GFX6-NEXT: v_min_i32_e32 v6, -1, v2
-; GFX6-NEXT: v_subrev_i32_e32 v6, vcc, 0x80000000, v6
+; GFX6-NEXT: v_add_i32_e32 v6, vcc, v6, v13
; GFX6-NEXT: v_max_i32_e32 v5, v5, v7
; GFX6-NEXT: v_min_i32_e32 v5, v5, v6
; GFX6-NEXT: v_sub_i32_e32 v2, vcc, v2, v5
; GFX6-NEXT: v_max_i32_e32 v5, -1, v3
-; GFX6-NEXT: v_sub_i32_e32 v5, vcc, v5, v11
+; GFX6-NEXT: v_add_i32_e32 v5, vcc, v5, v11
; GFX6-NEXT: v_min_i32_e32 v6, -1, v3
-; GFX6-NEXT: v_subrev_i32_e32 v6, vcc, 0x80000000, v6
+; GFX6-NEXT: v_add_i32_e32 v6, vcc, v6, v13
; GFX6-NEXT: v_max_i32_e32 v5, v5, v8
; GFX6-NEXT: v_min_i32_e32 v5, v5, v6
; GFX6-NEXT: v_sub_i32_e32 v3, vcc, v3, v5
; GFX6-NEXT: v_max_i32_e32 v5, -1, v4
-; GFX6-NEXT: v_subrev_i32_e32 v5, vcc, 0x7fffffff, v5
+; GFX6-NEXT: v_add_i32_e32 v5, vcc, 0x80000001, v5
; GFX6-NEXT: v_min_i32_e32 v6, -1, v4
-; GFX6-NEXT: v_subrev_i32_e32 v6, vcc, 0x80000000, v6
+; GFX6-NEXT: v_add_i32_e32 v6, vcc, 0x80000000, v6
; GFX6-NEXT: v_max_i32_e32 v5, v5, v9
; GFX6-NEXT: v_min_i32_e32 v5, v5, v6
; GFX6-NEXT: v_sub_i32_e32 v4, vcc, v4, v5
@@ -1757,39 +1760,39 @@ define <5 x i32> @v_ssubsat_v5i32(<5 x i32> %lhs, <5 x i32> %rhs) {
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_max_i32_e32 v10, -1, v0
-; GFX8-NEXT: v_subrev_u32_e32 v10, vcc, 0x7fffffff, v10
+; GFX8-NEXT: v_add_u32_e32 v10, vcc, 0x80000001, v10
; GFX8-NEXT: v_min_i32_e32 v12, -1, v0
; GFX8-NEXT: v_bfrev_b32_e32 v13, 1
-; GFX8-NEXT: v_sub_u32_e32 v12, vcc, v12, v13
+; GFX8-NEXT: v_add_u32_e32 v12, vcc, v12, v13
; GFX8-NEXT: v_max_i32_e32 v5, v10, v5
; GFX8-NEXT: v_min_i32_e32 v5, v5, v12
-; GFX8-NEXT: v_bfrev_b32_e32 v11, -2
+; GFX8-NEXT: v_mov_b32_e32 v11, 0x80000001
; GFX8-NEXT: v_sub_u32_e32 v0, vcc, v0, v5
; GFX8-NEXT: v_max_i32_e32 v5, -1, v1
-; GFX8-NEXT: v_sub_u32_e32 v5, vcc, v5, v11
+; GFX8-NEXT: v_add_u32_e32 v5, vcc, v5, v11
; GFX8-NEXT: v_min_i32_e32 v10, -1, v1
-; GFX8-NEXT: v_sub_u32_e32 v10, vcc, v10, v13
+; GFX8-NEXT: v_add_u32_e32 v10, vcc, v10, v13
; GFX8-NEXT: v_max_i32_e32 v5, v5, v6
; GFX8-NEXT: v_min_i32_e32 v5, v5, v10
; GFX8-NEXT: v_sub_u32_e32 v1, vcc, v1, v5
; GFX8-NEXT: v_max_i32_e32 v5, -1, v2
-; GFX8-NEXT: v_sub_u32_e32 v5, vcc, v5, v11
+; GFX8-NEXT: v_add_u32_e32 v5, vcc, v5, v11
; GFX8-NEXT: v_min_i32_e32 v6, -1, v2
-; GFX8-NEXT: v_subrev_u32_e32 v6, vcc, 0x80000000, v6
+; GFX8-NEXT: v_add_u32_e32 v6, vcc, v6, v13
; GFX8-NEXT: v_max_i32_e32 v5, v5, v7
; GFX8-NEXT: v_min_i32_e32 v5, v5, v6
; GFX8-NEXT: v_sub_u32_e32 v2, vcc, v2, v5
; GFX8-NEXT: v_max_i32_e32 v5, -1, v3
-; GFX8-NEXT: v_sub_u32_e32 v5, vcc, v5, v11
+; GFX8-NEXT: v_add_u32_e32 v5, vcc, v5, v11
; GFX8-NEXT: v_min_i32_e32 v6, -1, v3
-; GFX8-NEXT: v_subrev_u32_e32 v6, vcc, 0x80000000, v6
+; GFX8-NEXT: v_add_u32_e32 v6, vcc, v6, v13
; GFX8-NEXT: v_max_i32_e32 v5, v5, v8
; GFX8-NEXT: v_min_i32_e32 v5, v5, v6
; GFX8-NEXT: v_sub_u32_e32 v3, vcc, v3, v5
; GFX8-NEXT: v_max_i32_e32 v5, -1, v4
-; GFX8-NEXT: v_subrev_u32_e32 v5, vcc, 0x7fffffff, v5
+; GFX8-NEXT: v_add_u32_e32 v5, vcc, 0x80000001, v5
; GFX8-NEXT: v_min_i32_e32 v6, -1, v4
-; GFX8-NEXT: v_subrev_u32_e32 v6, vcc, 0x80000000, v6
+; GFX8-NEXT: v_add_u32_e32 v6, vcc, 0x80000000, v6
; GFX8-NEXT: v_max_i32_e32 v5, v5, v9
; GFX8-NEXT: v_min_i32_e32 v5, v5, v6
; GFX8-NEXT: v_sub_u32_e32 v4, vcc, v4, v5
@@ -1822,37 +1825,37 @@ define amdgpu_ps <5 x i32> @s_ssubsat_v5i32(<5 x i32> inreg %lhs, <5 x i32> inre
; GFX6-LABEL: s_ssubsat_v5i32:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_max_i32 s10, s0, -1
-; GFX6-NEXT: s_sub_i32 s10, s10, 0x7fffffff
+; GFX6-NEXT: s_add_i32 s10, s10, 0x80000001
; GFX6-NEXT: s_min_i32 s11, s0, -1
-; GFX6-NEXT: s_sub_i32 s11, s11, 0x80000000
+; GFX6-NEXT: s_add_i32 s11, s11, 0x80000000
; GFX6-NEXT: s_max_i32 s5, s10, s5
; GFX6-NEXT: s_min_i32 s5, s5, s11
; GFX6-NEXT: s_sub_i32 s0, s0, s5
; GFX6-NEXT: s_max_i32 s5, s1, -1
-; GFX6-NEXT: s_sub_i32 s5, s5, 0x7fffffff
+; GFX6-NEXT: s_add_i32 s5, s5, 0x80000001
; GFX6-NEXT: s_min_i32 s10, s1, -1
-; GFX6-NEXT: s_sub_i32 s10, s10, 0x80000000
+; GFX6-NEXT: s_add_i32 s10, s10, 0x80000000
; GFX6-NEXT: s_max_i32 s5, s5, s6
; GFX6-NEXT: s_min_i32 s5, s5, s10
; GFX6-NEXT: s_sub_i32 s1, s1, s5
; GFX6-NEXT: s_max_i32 s5, s2, -1
-; GFX6-NEXT: s_sub_i32 s5, s5, 0x7fffffff
+; GFX6-NEXT: s_add_i32 s5, s5, 0x80000001
; GFX6-NEXT: s_min_i32 s6, s2, -1
-; GFX6-NEXT: s_sub_i32 s6, s6, 0x80000000
+; GFX6-NEXT: s_add_i32 s6, s6, 0x80000000
; GFX6-NEXT: s_max_i32 s5, s5, s7
; GFX6-NEXT: s_min_i32 s5, s5, s6
; GFX6-NEXT: s_sub_i32 s2, s2, s5
; GFX6-NEXT: s_max_i32 s5, s3, -1
-; GFX6-NEXT: s_sub_i32 s5, s5, 0x7fffffff
+; GFX6-NEXT: s_add_i32 s5, s5, 0x80000001
; GFX6-NEXT: s_min_i32 s6, s3, -1
-; GFX6-NEXT: s_sub_i32 s6, s6, 0x80000000
+; GFX6-NEXT: s_add_i32 s6, s6, 0x80000000
; GFX6-NEXT: s_max_i32 s5, s5, s8
; GFX6-NEXT: s_min_i32 s5, s5, s6
; GFX6-NEXT: s_sub_i32 s3, s3, s5
; GFX6-NEXT: s_max_i32 s5, s4, -1
-; GFX6-NEXT: s_sub_i32 s5, s5, 0x7fffffff
+; GFX6-NEXT: s_add_i32 s5, s5, 0x80000001
; GFX6-NEXT: s_min_i32 s6, s4, -1
-; GFX6-NEXT: s_sub_i32 s6, s6, 0x80000000
+; GFX6-NEXT: s_add_i32 s6, s6, 0x80000000
; GFX6-NEXT: s_max_i32 s5, s5, s9
; GFX6-NEXT: s_min_i32 s5, s5, s6
; GFX6-NEXT: s_sub_i32 s4, s4, s5
@@ -1861,37 +1864,37 @@ define amdgpu_ps <5 x i32> @s_ssubsat_v5i32(<5 x i32> inreg %lhs, <5 x i32> inre
; GFX8-LABEL: s_ssubsat_v5i32:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_max_i32 s10, s0, -1
-; GFX8-NEXT: s_sub_i32 s10, s10, 0x7fffffff
+; GFX8-NEXT: s_add_i32 s10, s10, 0x80000001
; GFX8-NEXT: s_min_i32 s11, s0, -1
-; GFX8-NEXT: s_sub_i32 s11, s11, 0x80000000
+; GFX8-NEXT: s_add_i32 s11, s11, 0x80000000
; GFX8-NEXT: s_max_i32 s5, s10, s5
; GFX8-NEXT: s_min_i32 s5, s5, s11
; GFX8-NEXT: s_sub_i32 s0, s0, s5
; GFX8-NEXT: s_max_i32 s5, s1, -1
-; GFX8-NEXT: s_sub_i32 s5, s5, 0x7fffffff
+; GFX8-NEXT: s_add_i32 s5, s5, 0x80000001
; GFX8-NEXT: s_min_i32 s10, s1, -1
-; GFX8-NEXT: s_sub_i32 s10, s10, 0x80000000
+; GFX8-NEXT: s_add_i32 s10, s10, 0x80000000
; GFX8-NEXT: s_max_i32 s5, s5, s6
; GFX8-NEXT: s_min_i32 s5, s5, s10
; GFX8-NEXT: s_sub_i32 s1, s1, s5
; GFX8-NEXT: s_max_i32 s5, s2, -1
-; GFX8-NEXT: s_sub_i32 s5, s5, 0x7fffffff
+; GFX8-NEXT: s_add_i32 s5, s5, 0x80000001
; GFX8-NEXT: s_min_i32 s6, s2, -1
-; GFX8-NEXT: s_sub_i32 s6, s6, 0x80000000
+; GFX8-NEXT: s_add_i32 s6, s6, 0x80000000
; GFX8-NEXT: s_max_i32 s5, s5, s7
; GFX8-NEXT: s_min_i32 s5, s5, s6
; GFX8-NEXT: s_sub_i32 s2, s2, s5
; GFX8-NEXT: s_max_i32 s5, s3, -1
-; GFX8-NEXT: s_sub_i32 s5, s5, 0x7fffffff
+; GFX8-NEXT: s_add_i32 s5, s5, 0x80000001
; GFX8-NEXT: s_min_i32 s6, s3, -1
-; GFX8-NEXT: s_sub_i32 s6, s6, 0x80000000
+; GFX8-NEXT: s_add_i32 s6, s6, 0x80000000
; GFX8-NEXT: s_max_i32 s5, s5, s8
; GFX8-NEXT: s_min_i32 s5, s5, s6
; GFX8-NEXT: s_sub_i32 s3, s3, s5
; GFX8-NEXT: s_max_i32 s5, s4, -1
-; GFX8-NEXT: s_sub_i32 s5, s5, 0x7fffffff
+; GFX8-NEXT: s_add_i32 s5, s5, 0x80000001
; GFX8-NEXT: s_min_i32 s6, s4, -1
-; GFX8-NEXT: s_sub_i32 s6, s6, 0x80000000
+; GFX8-NEXT: s_add_i32 s6, s6, 0x80000000
; GFX8-NEXT: s_max_i32 s5, s5, s9
; GFX8-NEXT: s_min_i32 s5, s5, s6
; GFX8-NEXT: s_sub_i32 s4, s4, s5
@@ -1938,117 +1941,117 @@ define <16 x i32> @v_ssubsat_v16i32(<16 x i32> %lhs, <16 x i32> %rhs) {
; GFX6: ; %bb.0:
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX6-NEXT: v_max_i32_e32 v32, -1, v0
-; GFX6-NEXT: v_bfrev_b32_e32 v31, -2
-; GFX6-NEXT: v_sub_i32_e32 v32, vcc, v32, v31
+; GFX6-NEXT: v_mov_b32_e32 v31, 0x80000001
+; GFX6-NEXT: v_add_i32_e32 v32, vcc, v32, v31
; GFX6-NEXT: v_max_i32_e32 v32, v32, v16
; GFX6-NEXT: v_min_i32_e32 v33, -1, v0
; GFX6-NEXT: v_bfrev_b32_e32 v16, 1
-; GFX6-NEXT: v_sub_i32_e32 v33, vcc, v33, v16
+; GFX6-NEXT: v_add_i32_e32 v33, vcc, v33, v16
; GFX6-NEXT: v_min_i32_e32 v32, v32, v33
; GFX6-NEXT: v_sub_i32_e32 v0, vcc, v0, v32
; GFX6-NEXT: v_max_i32_e32 v32, -1, v1
-; GFX6-NEXT: v_sub_i32_e32 v32, vcc, v32, v31
+; GFX6-NEXT: v_add_i32_e32 v32, vcc, v32, v31
; GFX6-NEXT: v_max_i32_e32 v17, v32, v17
; GFX6-NEXT: v_min_i32_e32 v32, -1, v1
-; GFX6-NEXT: v_sub_i32_e32 v32, vcc, v32, v16
+; GFX6-NEXT: v_add_i32_e32 v32, vcc, v32, v16
; GFX6-NEXT: v_min_i32_e32 v17, v17, v32
; GFX6-NEXT: v_sub_i32_e32 v1, vcc, v1, v17
; GFX6-NEXT: v_max_i32_e32 v17, -1, v2
-; GFX6-NEXT: v_sub_i32_e32 v17, vcc, v17, v31
+; GFX6-NEXT: v_add_i32_e32 v17, vcc, v17, v31
; GFX6-NEXT: v_max_i32_e32 v17, v17, v18
; GFX6-NEXT: v_min_i32_e32 v18, -1, v2
-; GFX6-NEXT: v_sub_i32_e32 v18, vcc, v18, v16
+; GFX6-NEXT: v_add_i32_e32 v18, vcc, v18, v16
; GFX6-NEXT: v_min_i32_e32 v17, v17, v18
; GFX6-NEXT: v_sub_i32_e32 v2, vcc, v2, v17
; GFX6-NEXT: v_max_i32_e32 v17, -1, v3
-; GFX6-NEXT: v_sub_i32_e32 v17, vcc, v17, v31
+; GFX6-NEXT: v_add_i32_e32 v17, vcc, v17, v31
; GFX6-NEXT: v_min_i32_e32 v18, -1, v3
; GFX6-NEXT: v_max_i32_e32 v17, v17, v19
-; GFX6-NEXT: v_sub_i32_e32 v18, vcc, v18, v16
+; GFX6-NEXT: v_add_i32_e32 v18, vcc, v18, v16
; GFX6-NEXT: v_min_i32_e32 v17, v17, v18
; GFX6-NEXT: v_sub_i32_e32 v3, vcc, v3, v17
; GFX6-NEXT: v_max_i32_e32 v17, -1, v4
-; GFX6-NEXT: v_sub_i32_e32 v17, vcc, v17, v31
+; GFX6-NEXT: v_add_i32_e32 v17, vcc, v17, v31
; GFX6-NEXT: v_min_i32_e32 v18, -1, v4
; GFX6-NEXT: v_max_i32_e32 v17, v17, v20
-; GFX6-NEXT: v_sub_i32_e32 v18, vcc, v18, v16
+; GFX6-NEXT: v_add_i32_e32 v18, vcc, v18, v16
; GFX6-NEXT: v_min_i32_e32 v17, v17, v18
; GFX6-NEXT: v_sub_i32_e32 v4, vcc, v4, v17
; GFX6-NEXT: v_max_i32_e32 v17, -1, v5
-; GFX6-NEXT: v_sub_i32_e32 v17, vcc, v17, v31
+; GFX6-NEXT: v_add_i32_e32 v17, vcc, v17, v31
; GFX6-NEXT: v_min_i32_e32 v18, -1, v5
; GFX6-NEXT: v_max_i32_e32 v17, v17, v21
-; GFX6-NEXT: v_sub_i32_e32 v18, vcc, v18, v16
+; GFX6-NEXT: v_add_i32_e32 v18, vcc, v18, v16
; GFX6-NEXT: v_min_i32_e32 v17, v17, v18
; GFX6-NEXT: v_sub_i32_e32 v5, vcc, v5, v17
; GFX6-NEXT: v_max_i32_e32 v17, -1, v6
-; GFX6-NEXT: v_sub_i32_e32 v17, vcc, v17, v31
+; GFX6-NEXT: v_add_i32_e32 v17, vcc, v17, v31
; GFX6-NEXT: v_min_i32_e32 v18, -1, v6
; GFX6-NEXT: v_max_i32_e32 v17, v17, v22
-; GFX6-NEXT: v_sub_i32_e32 v18, vcc, v18, v16
+; GFX6-NEXT: v_add_i32_e32 v18, vcc, v18, v16
; GFX6-NEXT: v_min_i32_e32 v17, v17, v18
; GFX6-NEXT: buffer_load_dword v18, off, s[0:3], s32
; GFX6-NEXT: v_sub_i32_e32 v6, vcc, v6, v17
; GFX6-NEXT: v_max_i32_e32 v17, -1, v7
-; GFX6-NEXT: v_sub_i32_e32 v17, vcc, v17, v31
+; GFX6-NEXT: v_add_i32_e32 v17, vcc, v17, v31
; GFX6-NEXT: v_min_i32_e32 v19, -1, v7
; GFX6-NEXT: v_max_i32_e32 v17, v17, v23
-; GFX6-NEXT: v_sub_i32_e32 v19, vcc, v19, v16
+; GFX6-NEXT: v_add_i32_e32 v19, vcc, v19, v16
; GFX6-NEXT: v_min_i32_e32 v17, v17, v19
; GFX6-NEXT: v_sub_i32_e32 v7, vcc, v7, v17
; GFX6-NEXT: v_max_i32_e32 v17, -1, v8
-; GFX6-NEXT: v_sub_i32_e32 v17, vcc, v17, v31
+; GFX6-NEXT: v_add_i32_e32 v17, vcc, v17, v31
; GFX6-NEXT: v_min_i32_e32 v19, -1, v8
-; GFX6-NEXT: v_sub_i32_e32 v19, vcc, v19, v16
+; GFX6-NEXT: v_add_i32_e32 v19, vcc, v19, v16
; GFX6-NEXT: v_max_i32_e32 v17, v17, v24
; GFX6-NEXT: v_min_i32_e32 v17, v17, v19
; GFX6-NEXT: v_sub_i32_e32 v8, vcc, v8, v17
; GFX6-NEXT: v_max_i32_e32 v17, -1, v9
-; GFX6-NEXT: v_sub_i32_e32 v17, vcc, v17, v31
+; GFX6-NEXT: v_add_i32_e32 v17, vcc, v17, v31
; GFX6-NEXT: v_min_i32_e32 v19, -1, v9
-; GFX6-NEXT: v_sub_i32_e32 v19, vcc, v19, v16
+; GFX6-NEXT: v_add_i32_e32 v19, vcc, v19, v16
; GFX6-NEXT: v_max_i32_e32 v17, v17, v25
; GFX6-NEXT: v_min_i32_e32 v17, v17, v19
; GFX6-NEXT: v_sub_i32_e32 v9, vcc, v9, v17
; GFX6-NEXT: v_max_i32_e32 v17, -1, v10
-; GFX6-NEXT: v_sub_i32_e32 v17, vcc, v17, v31
+; GFX6-NEXT: v_add_i32_e32 v17, vcc, v17, v31
; GFX6-NEXT: v_min_i32_e32 v19, -1, v10
-; GFX6-NEXT: v_sub_i32_e32 v19, vcc, v19, v16
+; GFX6-NEXT: v_add_i32_e32 v19, vcc, v19, v16
; GFX6-NEXT: v_max_i32_e32 v17, v17, v26
; GFX6-NEXT: v_min_i32_e32 v17, v17, v19
; GFX6-NEXT: v_sub_i32_e32 v10, vcc, v10, v17
; GFX6-NEXT: v_max_i32_e32 v17, -1, v11
-; GFX6-NEXT: v_sub_i32_e32 v17, vcc, v17, v31
+; GFX6-NEXT: v_add_i32_e32 v17, vcc, v17, v31
; GFX6-NEXT: v_min_i32_e32 v19, -1, v11
-; GFX6-NEXT: v_sub_i32_e32 v19, vcc, v19, v16
+; GFX6-NEXT: v_add_i32_e32 v19, vcc, v19, v16
; GFX6-NEXT: v_max_i32_e32 v17, v17, v27
; GFX6-NEXT: v_min_i32_e32 v17, v17, v19
; GFX6-NEXT: v_sub_i32_e32 v11, vcc, v11, v17
; GFX6-NEXT: v_max_i32_e32 v17, -1, v12
-; GFX6-NEXT: v_sub_i32_e32 v17, vcc, v17, v31
+; GFX6-NEXT: v_add_i32_e32 v17, vcc, v17, v31
; GFX6-NEXT: v_min_i32_e32 v19, -1, v12
-; GFX6-NEXT: v_sub_i32_e32 v19, vcc, v19, v16
+; GFX6-NEXT: v_add_i32_e32 v19, vcc, v19, v16
; GFX6-NEXT: v_max_i32_e32 v17, v17, v28
; GFX6-NEXT: v_min_i32_e32 v17, v17, v19
; GFX6-NEXT: v_sub_i32_e32 v12, vcc, v12, v17
; GFX6-NEXT: v_max_i32_e32 v17, -1, v13
-; GFX6-NEXT: v_sub_i32_e32 v17, vcc, v17, v31
+; GFX6-NEXT: v_add_i32_e32 v17, vcc, v17, v31
; GFX6-NEXT: v_min_i32_e32 v19, -1, v13
-; GFX6-NEXT: v_sub_i32_e32 v19, vcc, v19, v16
+; GFX6-NEXT: v_add_i32_e32 v19, vcc, v19, v16
; GFX6-NEXT: v_max_i32_e32 v17, v17, v29
; GFX6-NEXT: v_min_i32_e32 v17, v17, v19
; GFX6-NEXT: v_sub_i32_e32 v13, vcc, v13, v17
; GFX6-NEXT: v_max_i32_e32 v17, -1, v14
-; GFX6-NEXT: v_sub_i32_e32 v17, vcc, v17, v31
+; GFX6-NEXT: v_add_i32_e32 v17, vcc, v17, v31
; GFX6-NEXT: v_min_i32_e32 v19, -1, v14
-; GFX6-NEXT: v_sub_i32_e32 v19, vcc, v19, v16
+; GFX6-NEXT: v_add_i32_e32 v19, vcc, v19, v16
; GFX6-NEXT: v_max_i32_e32 v17, v17, v30
; GFX6-NEXT: v_min_i32_e32 v17, v17, v19
; GFX6-NEXT: v_sub_i32_e32 v14, vcc, v14, v17
; GFX6-NEXT: v_max_i32_e32 v17, -1, v15
-; GFX6-NEXT: v_sub_i32_e32 v17, vcc, v17, v31
+; GFX6-NEXT: v_add_i32_e32 v17, vcc, v17, v31
; GFX6-NEXT: v_min_i32_e32 v19, -1, v15
-; GFX6-NEXT: v_sub_i32_e32 v16, vcc, v19, v16
+; GFX6-NEXT: v_add_i32_e32 v16, vcc, v19, v16
; GFX6-NEXT: s_waitcnt vmcnt(0)
; GFX6-NEXT: v_max_i32_e32 v17, v17, v18
; GFX6-NEXT: v_min_i32_e32 v16, v17, v16
@@ -2059,117 +2062,117 @@ define <16 x i32> @v_ssubsat_v16i32(<16 x i32> %lhs, <16 x i32> %rhs) {
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_max_i32_e32 v32, -1, v0
-; GFX8-NEXT: v_bfrev_b32_e32 v31, -2
-; GFX8-NEXT: v_sub_u32_e32 v32, vcc, v32, v31
+; GFX8-NEXT: v_mov_b32_e32 v31, 0x80000001
+; GFX8-NEXT: v_add_u32_e32 v32, vcc, v32, v31
; GFX8-NEXT: v_max_i32_e32 v32, v32, v16
; GFX8-NEXT: v_min_i32_e32 v33, -1, v0
; GFX8-NEXT: v_bfrev_b32_e32 v16, 1
-; GFX8-NEXT: v_sub_u32_e32 v33, vcc, v33, v16
+; GFX8-NEXT: v_add_u32_e32 v33, vcc, v33, v16
; GFX8-NEXT: v_min_i32_e32 v32, v32, v33
; GFX8-NEXT: v_sub_u32_e32 v0, vcc, v0, v32
; GFX8-NEXT: v_max_i32_e32 v32, -1, v1
-; GFX8-NEXT: v_sub_u32_e32 v32, vcc, v32, v31
+; GFX8-NEXT: v_add_u32_e32 v32, vcc, v32, v31
; GFX8-NEXT: v_max_i32_e32 v17, v32, v17
; GFX8-NEXT: v_min_i32_e32 v32, -1, v1
-; GFX8-NEXT: v_sub_u32_e32 v32, vcc, v32, v16
+; GFX8-NEXT: v_add_u32_e32 v32, vcc, v32, v16
; GFX8-NEXT: v_min_i32_e32 v17, v17, v32
; GFX8-NEXT: v_sub_u32_e32 v1, vcc, v1, v17
; GFX8-NEXT: v_max_i32_e32 v17, -1, v2
-; GFX8-NEXT: v_sub_u32_e32 v17, vcc, v17, v31
+; GFX8-NEXT: v_add_u32_e32 v17, vcc, v17, v31
; GFX8-NEXT: v_max_i32_e32 v17, v17, v18
; GFX8-NEXT: v_min_i32_e32 v18, -1, v2
-; GFX8-NEXT: v_sub_u32_e32 v18, vcc, v18, v16
+; GFX8-NEXT: v_add_u32_e32 v18, vcc, v18, v16
; GFX8-NEXT: v_min_i32_e32 v17, v17, v18
; GFX8-NEXT: v_sub_u32_e32 v2, vcc, v2, v17
; GFX8-NEXT: v_max_i32_e32 v17, -1, v3
-; GFX8-NEXT: v_sub_u32_e32 v17, vcc, v17, v31
+; GFX8-NEXT: v_add_u32_e32 v17, vcc, v17, v31
; GFX8-NEXT: v_min_i32_e32 v18, -1, v3
; GFX8-NEXT: v_max_i32_e32 v17, v17, v19
-; GFX8-NEXT: v_sub_u32_e32 v18, vcc, v18, v16
+; GFX8-NEXT: v_add_u32_e32 v18, vcc, v18, v16
; GFX8-NEXT: v_min_i32_e32 v17, v17, v18
; GFX8-NEXT: v_sub_u32_e32 v3, vcc, v3, v17
; GFX8-NEXT: v_max_i32_e32 v17, -1, v4
-; GFX8-NEXT: v_sub_u32_e32 v17, vcc, v17, v31
+; GFX8-NEXT: v_add_u32_e32 v17, vcc, v17, v31
; GFX8-NEXT: v_min_i32_e32 v18, -1, v4
; GFX8-NEXT: v_max_i32_e32 v17, v17, v20
-; GFX8-NEXT: v_sub_u32_e32 v18, vcc, v18, v16
+; GFX8-NEXT: v_add_u32_e32 v18, vcc, v18, v16
; GFX8-NEXT: v_min_i32_e32 v17, v17, v18
; GFX8-NEXT: v_sub_u32_e32 v4, vcc, v4, v17
; GFX8-NEXT: v_max_i32_e32 v17, -1, v5
-; GFX8-NEXT: v_sub_u32_e32 v17, vcc, v17, v31
+; GFX8-NEXT: v_add_u32_e32 v17, vcc, v17, v31
; GFX8-NEXT: v_min_i32_e32 v18, -1, v5
; GFX8-NEXT: v_max_i32_e32 v17, v17, v21
-; GFX8-NEXT: v_sub_u32_e32 v18, vcc, v18, v16
+; GFX8-NEXT: v_add_u32_e32 v18, vcc, v18, v16
; GFX8-NEXT: v_min_i32_e32 v17, v17, v18
; GFX8-NEXT: v_sub_u32_e32 v5, vcc, v5, v17
; GFX8-NEXT: v_max_i32_e32 v17, -1, v6
-; GFX8-NEXT: v_sub_u32_e32 v17, vcc, v17, v31
+; GFX8-NEXT: v_add_u32_e32 v17, vcc, v17, v31
; GFX8-NEXT: v_min_i32_e32 v18, -1, v6
; GFX8-NEXT: v_max_i32_e32 v17, v17, v22
-; GFX8-NEXT: v_sub_u32_e32 v18, vcc, v18, v16
+; GFX8-NEXT: v_add_u32_e32 v18, vcc, v18, v16
; GFX8-NEXT: v_min_i32_e32 v17, v17, v18
; GFX8-NEXT: buffer_load_dword v18, off, s[0:3], s32
; GFX8-NEXT: v_sub_u32_e32 v6, vcc, v6, v17
; GFX8-NEXT: v_max_i32_e32 v17, -1, v7
-; GFX8-NEXT: v_sub_u32_e32 v17, vcc, v17, v31
+; GFX8-NEXT: v_add_u32_e32 v17, vcc, v17, v31
; GFX8-NEXT: v_min_i32_e32 v19, -1, v7
; GFX8-NEXT: v_max_i32_e32 v17, v17, v23
-; GFX8-NEXT: v_sub_u32_e32 v19, vcc, v19, v16
+; GFX8-NEXT: v_add_u32_e32 v19, vcc, v19, v16
; GFX8-NEXT: v_min_i32_e32 v17, v17, v19
; GFX8-NEXT: v_sub_u32_e32 v7, vcc, v7, v17
; GFX8-NEXT: v_max_i32_e32 v17, -1, v8
-; GFX8-NEXT: v_sub_u32_e32 v17, vcc, v17, v31
+; GFX8-NEXT: v_add_u32_e32 v17, vcc, v17, v31
; GFX8-NEXT: v_min_i32_e32 v19, -1, v8
-; GFX8-NEXT: v_sub_u32_e32 v19, vcc, v19, v16
+; GFX8-NEXT: v_add_u32_e32 v19, vcc, v19, v16
; GFX8-NEXT: v_max_i32_e32 v17, v17, v24
; GFX8-NEXT: v_min_i32_e32 v17, v17, v19
; GFX8-NEXT: v_sub_u32_e32 v8, vcc, v8, v17
; GFX8-NEXT: v_max_i32_e32 v17, -1, v9
-; GFX8-NEXT: v_sub_u32_e32 v17, vcc, v17, v31
+; GFX8-NEXT: v_add_u32_e32 v17, vcc, v17, v31
; GFX8-NEXT: v_min_i32_e32 v19, -1, v9
-; GFX8-NEXT: v_sub_u32_e32 v19, vcc, v19, v16
+; GFX8-NEXT: v_add_u32_e32 v19, vcc, v19, v16
; GFX8-NEXT: v_max_i32_e32 v17, v17, v25
; GFX8-NEXT: v_min_i32_e32 v17, v17, v19
; GFX8-NEXT: v_sub_u32_e32 v9, vcc, v9, v17
; GFX8-NEXT: v_max_i32_e32 v17, -1, v10
-; GFX8-NEXT: v_sub_u32_e32 v17, vcc, v17, v31
+; GFX8-NEXT: v_add_u32_e32 v17, vcc, v17, v31
; GFX8-NEXT: v_min_i32_e32 v19, -1, v10
-; GFX8-NEXT: v_sub_u32_e32 v19, vcc, v19, v16
+; GFX8-NEXT: v_add_u32_e32 v19, vcc, v19, v16
; GFX8-NEXT: v_max_i32_e32 v17, v17, v26
; GFX8-NEXT: v_min_i32_e32 v17, v17, v19
; GFX8-NEXT: v_sub_u32_e32 v10, vcc, v10, v17
; GFX8-NEXT: v_max_i32_e32 v17, -1, v11
-; GFX8-NEXT: v_sub_u32_e32 v17, vcc, v17, v31
+; GFX8-NEXT: v_add_u32_e32 v17, vcc, v17, v31
; GFX8-NEXT: v_min_i32_e32 v19, -1, v11
-; GFX8-NEXT: v_sub_u32_e32 v19, vcc, v19, v16
+; GFX8-NEXT: v_add_u32_e32 v19, vcc, v19, v16
; GFX8-NEXT: v_max_i32_e32 v17, v17, v27
; GFX8-NEXT: v_min_i32_e32 v17, v17, v19
; GFX8-NEXT: v_sub_u32_e32 v11, vcc, v11, v17
; GFX8-NEXT: v_max_i32_e32 v17, -1, v12
-; GFX8-NEXT: v_sub_u32_e32 v17, vcc, v17, v31
+; GFX8-NEXT: v_add_u32_e32 v17, vcc, v17, v31
; GFX8-NEXT: v_min_i32_e32 v19, -1, v12
-; GFX8-NEXT: v_sub_u32_e32 v19, vcc, v19, v16
+; GFX8-NEXT: v_add_u32_e32 v19, vcc, v19, v16
; GFX8-NEXT: v_max_i32_e32 v17, v17, v28
; GFX8-NEXT: v_min_i32_e32 v17, v17, v19
; GFX8-NEXT: v_sub_u32_e32 v12, vcc, v12, v17
; GFX8-NEXT: v_max_i32_e32 v17, -1, v13
-; GFX8-NEXT: v_sub_u32_e32 v17, vcc, v17, v31
+; GFX8-NEXT: v_add_u32_e32 v17, vcc, v17, v31
; GFX8-NEXT: v_min_i32_e32 v19, -1, v13
-; GFX8-NEXT: v_sub_u32_e32 v19, vcc, v19, v16
+; GFX8-NEXT: v_add_u32_e32 v19, vcc, v19, v16
; GFX8-NEXT: v_max_i32_e32 v17, v17, v29
; GFX8-NEXT: v_min_i32_e32 v17, v17, v19
; GFX8-NEXT: v_sub_u32_e32 v13, vcc, v13, v17
; GFX8-NEXT: v_max_i32_e32 v17, -1, v14
-; GFX8-NEXT: v_sub_u32_e32 v17, vcc, v17, v31
+; GFX8-NEXT: v_add_u32_e32 v17, vcc, v17, v31
; GFX8-NEXT: v_min_i32_e32 v19, -1, v14
-; GFX8-NEXT: v_sub_u32_e32 v19, vcc, v19, v16
+; GFX8-NEXT: v_add_u32_e32 v19, vcc, v19, v16
; GFX8-NEXT: v_max_i32_e32 v17, v17, v30
; GFX8-NEXT: v_min_i32_e32 v17, v17, v19
; GFX8-NEXT: v_sub_u32_e32 v14, vcc, v14, v17
; GFX8-NEXT: v_max_i32_e32 v17, -1, v15
-; GFX8-NEXT: v_sub_u32_e32 v17, vcc, v17, v31
+; GFX8-NEXT: v_add_u32_e32 v17, vcc, v17, v31
; GFX8-NEXT: v_min_i32_e32 v19, -1, v15
-; GFX8-NEXT: v_sub_u32_e32 v16, vcc, v19, v16
+; GFX8-NEXT: v_add_u32_e32 v16, vcc, v19, v16
; GFX8-NEXT: s_waitcnt vmcnt(0)
; GFX8-NEXT: v_max_i32_e32 v17, v17, v18
; GFX8-NEXT: v_min_i32_e32 v16, v17, v16
@@ -2252,114 +2255,114 @@ define amdgpu_ps <16 x i32> @s_ssubsat_v16i32(<16 x i32> inreg %lhs, <16 x i32>
; GFX6-LABEL: s_ssubsat_v16i32:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_max_i32 s32, s0, -1
-; GFX6-NEXT: s_sub_i32 s32, s32, 0x7fffffff
+; GFX6-NEXT: s_add_i32 s32, s32, 0x80000001
; GFX6-NEXT: s_min_i32 s33, s0, -1
-; GFX6-NEXT: s_sub_i32 s33, s33, 0x80000000
+; GFX6-NEXT: s_add_i32 s33, s33, 0x80000000
; GFX6-NEXT: s_max_i32 s16, s32, s16
; GFX6-NEXT: s_min_i32 s16, s16, s33
; GFX6-NEXT: s_sub_i32 s0, s0, s16
; GFX6-NEXT: s_max_i32 s16, s1, -1
-; GFX6-NEXT: s_sub_i32 s16, s16, 0x7fffffff
+; GFX6-NEXT: s_add_i32 s16, s16, 0x80000001
; GFX6-NEXT: s_min_i32 s32, s1, -1
-; GFX6-NEXT: s_sub_i32 s32, s32, 0x80000000
+; GFX6-NEXT: s_add_i32 s32, s32, 0x80000000
; GFX6-NEXT: s_max_i32 s16, s16, s17
; GFX6-NEXT: s_min_i32 s16, s16, s32
; GFX6-NEXT: s_sub_i32 s1, s1, s16
; GFX6-NEXT: s_max_i32 s16, s2, -1
-; GFX6-NEXT: s_sub_i32 s16, s16, 0x7fffffff
+; GFX6-NEXT: s_add_i32 s16, s16, 0x80000001
; GFX6-NEXT: s_min_i32 s17, s2, -1
-; GFX6-NEXT: s_sub_i32 s17, s17, 0x80000000
+; GFX6-NEXT: s_add_i32 s17, s17, 0x80000000
; GFX6-NEXT: s_max_i32 s16, s16, s18
; GFX6-NEXT: s_min_i32 s16, s16, s17
; GFX6-NEXT: s_sub_i32 s2, s2, s16
; GFX6-NEXT: s_max_i32 s16, s3, -1
-; GFX6-NEXT: s_sub_i32 s16, s16, 0x7fffffff
+; GFX6-NEXT: s_add_i32 s16, s16, 0x80000001
; GFX6-NEXT: s_min_i32 s17, s3, -1
-; GFX6-NEXT: s_sub_i32 s17, s17, 0x80000000
+; GFX6-NEXT: s_add_i32 s17, s17, 0x80000000
; GFX6-NEXT: s_max_i32 s16, s16, s19
; GFX6-NEXT: s_min_i32 s16, s16, s17
; GFX6-NEXT: s_sub_i32 s3, s3, s16
; GFX6-NEXT: s_max_i32 s16, s4, -1
-; GFX6-NEXT: s_sub_i32 s16, s16, 0x7fffffff
+; GFX6-NEXT: s_add_i32 s16, s16, 0x80000001
; GFX6-NEXT: s_min_i32 s17, s4, -1
-; GFX6-NEXT: s_sub_i32 s17, s17, 0x80000000
+; GFX6-NEXT: s_add_i32 s17, s17, 0x80000000
; GFX6-NEXT: s_max_i32 s16, s16, s20
; GFX6-NEXT: s_min_i32 s16, s16, s17
; GFX6-NEXT: s_sub_i32 s4, s4, s16
; GFX6-NEXT: s_max_i32 s16, s5, -1
-; GFX6-NEXT: s_sub_i32 s16, s16, 0x7fffffff
+; GFX6-NEXT: s_add_i32 s16, s16, 0x80000001
; GFX6-NEXT: s_min_i32 s17, s5, -1
-; GFX6-NEXT: s_sub_i32 s17, s17, 0x80000000
+; GFX6-NEXT: s_add_i32 s17, s17, 0x80000000
; GFX6-NEXT: s_max_i32 s16, s16, s21
; GFX6-NEXT: s_min_i32 s16, s16, s17
; GFX6-NEXT: s_sub_i32 s5, s5, s16
; GFX6-NEXT: s_max_i32 s16, s6, -1
-; GFX6-NEXT: s_sub_i32 s16, s16, 0x7fffffff
+; GFX6-NEXT: s_add_i32 s16, s16, 0x80000001
; GFX6-NEXT: s_min_i32 s17, s6, -1
-; GFX6-NEXT: s_sub_i32 s17, s17, 0x80000000
+; GFX6-NEXT: s_add_i32 s17, s17, 0x80000000
; GFX6-NEXT: s_max_i32 s16, s16, s22
; GFX6-NEXT: s_min_i32 s16, s16, s17
; GFX6-NEXT: s_sub_i32 s6, s6, s16
; GFX6-NEXT: s_max_i32 s16, s7, -1
-; GFX6-NEXT: s_sub_i32 s16, s16, 0x7fffffff
+; GFX6-NEXT: s_add_i32 s16, s16, 0x80000001
; GFX6-NEXT: s_min_i32 s17, s7, -1
-; GFX6-NEXT: s_sub_i32 s17, s17, 0x80000000
+; GFX6-NEXT: s_add_i32 s17, s17, 0x80000000
; GFX6-NEXT: s_max_i32 s16, s16, s23
; GFX6-NEXT: s_min_i32 s16, s16, s17
; GFX6-NEXT: s_sub_i32 s7, s7, s16
; GFX6-NEXT: s_max_i32 s16, s8, -1
-; GFX6-NEXT: s_sub_i32 s16, s16, 0x7fffffff
+; GFX6-NEXT: s_add_i32 s16, s16, 0x80000001
; GFX6-NEXT: s_min_i32 s17, s8, -1
-; GFX6-NEXT: s_sub_i32 s17, s17, 0x80000000
+; GFX6-NEXT: s_add_i32 s17, s17, 0x80000000
; GFX6-NEXT: s_max_i32 s16, s16, s24
; GFX6-NEXT: s_min_i32 s16, s16, s17
; GFX6-NEXT: s_sub_i32 s8, s8, s16
; GFX6-NEXT: s_max_i32 s16, s9, -1
-; GFX6-NEXT: s_sub_i32 s16, s16, 0x7fffffff
+; GFX6-NEXT: s_add_i32 s16, s16, 0x80000001
; GFX6-NEXT: s_min_i32 s17, s9, -1
-; GFX6-NEXT: s_sub_i32 s17, s17, 0x80000000
+; GFX6-NEXT: s_add_i32 s17, s17, 0x80000000
; GFX6-NEXT: s_max_i32 s16, s16, s25
; GFX6-NEXT: s_min_i32 s16, s16, s17
; GFX6-NEXT: s_sub_i32 s9, s9, s16
; GFX6-NEXT: s_max_i32 s16, s10, -1
-; GFX6-NEXT: s_sub_i32 s16, s16, 0x7fffffff
+; GFX6-NEXT: s_add_i32 s16, s16, 0x80000001
; GFX6-NEXT: s_min_i32 s17, s10, -1
-; GFX6-NEXT: s_sub_i32 s17, s17, 0x80000000
+; GFX6-NEXT: s_add_i32 s17, s17, 0x80000000
; GFX6-NEXT: s_max_i32 s16, s16, s26
; GFX6-NEXT: s_min_i32 s16, s16, s17
; GFX6-NEXT: s_sub_i32 s10, s10, s16
; GFX6-NEXT: s_max_i32 s16, s11, -1
-; GFX6-NEXT: s_sub_i32 s16, s16, 0x7fffffff
+; GFX6-NEXT: s_add_i32 s16, s16, 0x80000001
; GFX6-NEXT: s_min_i32 s17, s11, -1
-; GFX6-NEXT: s_sub_i32 s17, s17, 0x80000000
+; GFX6-NEXT: s_add_i32 s17, s17, 0x80000000
; GFX6-NEXT: s_max_i32 s16, s16, s27
; GFX6-NEXT: s_min_i32 s16, s16, s17
; GFX6-NEXT: s_sub_i32 s11, s11, s16
; GFX6-NEXT: s_max_i32 s16, s12, -1
-; GFX6-NEXT: s_sub_i32 s16, s16, 0x7fffffff
+; GFX6-NEXT: s_add_i32 s16, s16, 0x80000001
; GFX6-NEXT: s_min_i32 s17, s12, -1
-; GFX6-NEXT: s_sub_i32 s17, s17, 0x80000000
+; GFX6-NEXT: s_add_i32 s17, s17, 0x80000000
; GFX6-NEXT: s_max_i32 s16, s16, s28
; GFX6-NEXT: s_min_i32 s16, s16, s17
; GFX6-NEXT: s_sub_i32 s12, s12, s16
; GFX6-NEXT: s_max_i32 s16, s13, -1
-; GFX6-NEXT: s_sub_i32 s16, s16, 0x7fffffff
+; GFX6-NEXT: s_add_i32 s16, s16, 0x80000001
; GFX6-NEXT: s_min_i32 s17, s13, -1
-; GFX6-NEXT: s_sub_i32 s17, s17, 0x80000000
+; GFX6-NEXT: s_add_i32 s17, s17, 0x80000000
; GFX6-NEXT: s_max_i32 s16, s16, s29
; GFX6-NEXT: s_min_i32 s16, s16, s17
; GFX6-NEXT: s_sub_i32 s13, s13, s16
; GFX6-NEXT: s_max_i32 s16, s14, -1
-; GFX6-NEXT: s_sub_i32 s16, s16, 0x7fffffff
+; GFX6-NEXT: s_add_i32 s16, s16, 0x80000001
; GFX6-NEXT: s_min_i32 s17, s14, -1
-; GFX6-NEXT: s_sub_i32 s17, s17, 0x80000000
+; GFX6-NEXT: s_add_i32 s17, s17, 0x80000000
; GFX6-NEXT: s_max_i32 s16, s16, s30
; GFX6-NEXT: s_min_i32 s16, s16, s17
; GFX6-NEXT: s_sub_i32 s14, s14, s16
; GFX6-NEXT: s_max_i32 s16, s15, -1
-; GFX6-NEXT: s_sub_i32 s16, s16, 0x7fffffff
+; GFX6-NEXT: s_add_i32 s16, s16, 0x80000001
; GFX6-NEXT: s_min_i32 s17, s15, -1
-; GFX6-NEXT: s_sub_i32 s17, s17, 0x80000000
+; GFX6-NEXT: s_add_i32 s17, s17, 0x80000000
; GFX6-NEXT: s_max_i32 s16, s16, s31
; GFX6-NEXT: s_min_i32 s16, s16, s17
; GFX6-NEXT: s_sub_i32 s15, s15, s16
@@ -2368,114 +2371,114 @@ define amdgpu_ps <16 x i32> @s_ssubsat_v16i32(<16 x i32> inreg %lhs, <16 x i32>
; GFX8-LABEL: s_ssubsat_v16i32:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_max_i32 s32, s0, -1
-; GFX8-NEXT: s_sub_i32 s32, s32, 0x7fffffff
+; GFX8-NEXT: s_add_i32 s32, s32, 0x80000001
; GFX8-NEXT: s_min_i32 s33, s0, -1
-; GFX8-NEXT: s_sub_i32 s33, s33, 0x80000000
+; GFX8-NEXT: s_add_i32 s33, s33, 0x80000000
; GFX8-NEXT: s_max_i32 s16, s32, s16
; GFX8-NEXT: s_min_i32 s16, s16, s33
; GFX8-NEXT: s_sub_i32 s0, s0, s16
; GFX8-NEXT: s_max_i32 s16, s1, -1
-; GFX8-NEXT: s_sub_i32 s16, s16, 0x7fffffff
+; GFX8-NEXT: s_add_i32 s16, s16, 0x80000001
; GFX8-NEXT: s_min_i32 s32, s1, -1
-; GFX8-NEXT: s_sub_i32 s32, s32, 0x80000000
+; GFX8-NEXT: s_add_i32 s32, s32, 0x80000000
; GFX8-NEXT: s_max_i32 s16, s16, s17
; GFX8-NEXT: s_min_i32 s16, s16, s32
; GFX8-NEXT: s_sub_i32 s1, s1, s16
; GFX8-NEXT: s_max_i32 s16, s2, -1
-; GFX8-NEXT: s_sub_i32 s16, s16, 0x7fffffff
+; GFX8-NEXT: s_add_i32 s16, s16, 0x80000001
; GFX8-NEXT: s_min_i32 s17, s2, -1
-; GFX8-NEXT: s_sub_i32 s17, s17, 0x80000000
+; GFX8-NEXT: s_add_i32 s17, s17, 0x80000000
; GFX8-NEXT: s_max_i32 s16, s16, s18
; GFX8-NEXT: s_min_i32 s16, s16, s17
; GFX8-NEXT: s_sub_i32 s2, s2, s16
; GFX8-NEXT: s_max_i32 s16, s3, -1
-; GFX8-NEXT: s_sub_i32 s16, s16, 0x7fffffff
+; GFX8-NEXT: s_add_i32 s16, s16, 0x80000001
; GFX8-NEXT: s_min_i32 s17, s3, -1
-; GFX8-NEXT: s_sub_i32 s17, s17, 0x80000000
+; GFX8-NEXT: s_add_i32 s17, s17, 0x80000000
; GFX8-NEXT: s_max_i32 s16, s16, s19
; GFX8-NEXT: s_min_i32 s16, s16, s17
; GFX8-NEXT: s_sub_i32 s3, s3, s16
; GFX8-NEXT: s_max_i32 s16, s4, -1
-; GFX8-NEXT: s_sub_i32 s16, s16, 0x7fffffff
+; GFX8-NEXT: s_add_i32 s16, s16, 0x80000001
; GFX8-NEXT: s_min_i32 s17, s4, -1
-; GFX8-NEXT: s_sub_i32 s17, s17, 0x80000000
+; GFX8-NEXT: s_add_i32 s17, s17, 0x80000000
; GFX8-NEXT: s_max_i32 s16, s16, s20
; GFX8-NEXT: s_min_i32 s16, s16, s17
; GFX8-NEXT: s_sub_i32 s4, s4, s16
; GFX8-NEXT: s_max_i32 s16, s5, -1
-; GFX8-NEXT: s_sub_i32 s16, s16, 0x7fffffff
+; GFX8-NEXT: s_add_i32 s16, s16, 0x80000001
; GFX8-NEXT: s_min_i32 s17, s5, -1
-; GFX8-NEXT: s_sub_i32 s17, s17, 0x80000000
+; GFX8-NEXT: s_add_i32 s17, s17, 0x80000000
; GFX8-NEXT: s_max_i32 s16, s16, s21
; GFX8-NEXT: s_min_i32 s16, s16, s17
; GFX8-NEXT: s_sub_i32 s5, s5, s16
; GFX8-NEXT: s_max_i32 s16, s6, -1
-; GFX8-NEXT: s_sub_i32 s16, s16, 0x7fffffff
+; GFX8-NEXT: s_add_i32 s16, s16, 0x80000001
; GFX8-NEXT: s_min_i32 s17, s6, -1
-; GFX8-NEXT: s_sub_i32 s17, s17, 0x80000000
+; GFX8-NEXT: s_add_i32 s17, s17, 0x80000000
; GFX8-NEXT: s_max_i32 s16, s16, s22
; GFX8-NEXT: s_min_i32 s16, s16, s17
; GFX8-NEXT: s_sub_i32 s6, s6, s16
; GFX8-NEXT: s_max_i32 s16, s7, -1
-; GFX8-NEXT: s_sub_i32 s16, s16, 0x7fffffff
+; GFX8-NEXT: s_add_i32 s16, s16, 0x80000001
; GFX8-NEXT: s_min_i32 s17, s7, -1
-; GFX8-NEXT: s_sub_i32 s17, s17, 0x80000000
+; GFX8-NEXT: s_add_i32 s17, s17, 0x80000000
; GFX8-NEXT: s_max_i32 s16, s16, s23
; GFX8-NEXT: s_min_i32 s16, s16, s17
; GFX8-NEXT: s_sub_i32 s7, s7, s16
; GFX8-NEXT: s_max_i32 s16, s8, -1
-; GFX8-NEXT: s_sub_i32 s16, s16, 0x7fffffff
+; GFX8-NEXT: s_add_i32 s16, s16, 0x80000001
; GFX8-NEXT: s_min_i32 s17, s8, -1
-; GFX8-NEXT: s_sub_i32 s17, s17, 0x80000000
+; GFX8-NEXT: s_add_i32 s17, s17, 0x80000000
; GFX8-NEXT: s_max_i32 s16, s16, s24
; GFX8-NEXT: s_min_i32 s16, s16, s17
; GFX8-NEXT: s_sub_i32 s8, s8, s16
; GFX8-NEXT: s_max_i32 s16, s9, -1
-; GFX8-NEXT: s_sub_i32 s16, s16, 0x7fffffff
+; GFX8-NEXT: s_add_i32 s16, s16, 0x80000001
; GFX8-NEXT: s_min_i32 s17, s9, -1
-; GFX8-NEXT: s_sub_i32 s17, s17, 0x80000000
+; GFX8-NEXT: s_add_i32 s17, s17, 0x80000000
; GFX8-NEXT: s_max_i32 s16, s16, s25
; GFX8-NEXT: s_min_i32 s16, s16, s17
; GFX8-NEXT: s_sub_i32 s9, s9, s16
; GFX8-NEXT: s_max_i32 s16, s10, -1
-; GFX8-NEXT: s_sub_i32 s16, s16, 0x7fffffff
+; GFX8-NEXT: s_add_i32 s16, s16, 0x80000001
; GFX8-NEXT: s_min_i32 s17, s10, -1
-; GFX8-NEXT: s_sub_i32 s17, s17, 0x80000000
+; GFX8-NEXT: s_add_i32 s17, s17, 0x80000000
; GFX8-NEXT: s_max_i32 s16, s16, s26
; GFX8-NEXT: s_min_i32 s16, s16, s17
; GFX8-NEXT: s_sub_i32 s10, s10, s16
; GFX8-NEXT: s_max_i32 s16, s11, -1
-; GFX8-NEXT: s_sub_i32 s16, s16, 0x7fffffff
+; GFX8-NEXT: s_add_i32 s16, s16, 0x80000001
; GFX8-NEXT: s_min_i32 s17, s11, -1
-; GFX8-NEXT: s_sub_i32 s17, s17, 0x80000000
+; GFX8-NEXT: s_add_i32 s17, s17, 0x80000000
; GFX8-NEXT: s_max_i32 s16, s16, s27
; GFX8-NEXT: s_min_i32 s16, s16, s17
; GFX8-NEXT: s_sub_i32 s11, s11, s16
; GFX8-NEXT: s_max_i32 s16, s12, -1
-; GFX8-NEXT: s_sub_i32 s16, s16, 0x7fffffff
+; GFX8-NEXT: s_add_i32 s16, s16, 0x80000001
; GFX8-NEXT: s_min_i32 s17, s12, -1
-; GFX8-NEXT: s_sub_i32 s17, s17, 0x80000000
+; GFX8-NEXT: s_add_i32 s17, s17, 0x80000000
; GFX8-NEXT: s_max_i32 s16, s16, s28
; GFX8-NEXT: s_min_i32 s16, s16, s17
; GFX8-NEXT: s_sub_i32 s12, s12, s16
; GFX8-NEXT: s_max_i32 s16, s13, -1
-; GFX8-NEXT: s_sub_i32 s16, s16, 0x7fffffff
+; GFX8-NEXT: s_add_i32 s16, s16, 0x80000001
; GFX8-NEXT: s_min_i32 s17, s13, -1
-; GFX8-NEXT: s_sub_i32 s17, s17, 0x80000000
+; GFX8-NEXT: s_add_i32 s17, s17, 0x80000000
; GFX8-NEXT: s_max_i32 s16, s16, s29
; GFX8-NEXT: s_min_i32 s16, s16, s17
; GFX8-NEXT: s_sub_i32 s13, s13, s16
; GFX8-NEXT: s_max_i32 s16, s14, -1
-; GFX8-NEXT: s_sub_i32 s16, s16, 0x7fffffff
+; GFX8-NEXT: s_add_i32 s16, s16, 0x80000001
; GFX8-NEXT: s_min_i32 s17, s14, -1
-; GFX8-NEXT: s_sub_i32 s17, s17, 0x80000000
+; GFX8-NEXT: s_add_i32 s17, s17, 0x80000000
; GFX8-NEXT: s_max_i32 s16, s16, s30
; GFX8-NEXT: s_min_i32 s16, s16, s17
; GFX8-NEXT: s_sub_i32 s14, s14, s16
; GFX8-NEXT: s_max_i32 s16, s15, -1
-; GFX8-NEXT: s_sub_i32 s16, s16, 0x7fffffff
+; GFX8-NEXT: s_add_i32 s16, s16, 0x80000001
; GFX8-NEXT: s_min_i32 s17, s15, -1
-; GFX8-NEXT: s_sub_i32 s17, s17, 0x80000000
+; GFX8-NEXT: s_add_i32 s17, s17, 0x80000000
; GFX8-NEXT: s_max_i32 s16, s16, s31
; GFX8-NEXT: s_min_i32 s16, s16, s17
; GFX8-NEXT: s_sub_i32 s15, s15, s16
@@ -2579,9 +2582,9 @@ define i16 @v_ssubsat_i16(i16 %lhs, i16 %rhs) {
; GFX6-NEXT: v_lshlrev_b32_e32 v0, 16, v0
; GFX6-NEXT: v_max_i32_e32 v2, -1, v0
; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1
-; GFX6-NEXT: v_subrev_i32_e32 v2, vcc, 0x7fffffff, v2
+; GFX6-NEXT: v_add_i32_e32 v2, vcc, 0x80000001, v2
; GFX6-NEXT: v_min_i32_e32 v3, -1, v0
-; GFX6-NEXT: v_subrev_i32_e32 v3, vcc, 0x80000000, v3
+; GFX6-NEXT: v_add_i32_e32 v3, vcc, 0x80000000, v3
; GFX6-NEXT: v_max_i32_e32 v1, v2, v1
; GFX6-NEXT: v_min_i32_e32 v1, v1, v3
; GFX6-NEXT: v_sub_i32_e32 v0, vcc, v0, v1
@@ -2592,9 +2595,9 @@ define i16 @v_ssubsat_i16(i16 %lhs, i16 %rhs) {
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_max_i16_e32 v2, -1, v0
-; GFX8-NEXT: v_subrev_u16_e32 v2, 0x7fff, v2
+; GFX8-NEXT: v_add_u16_e32 v2, 0x8001, v2
; GFX8-NEXT: v_min_i16_e32 v3, -1, v0
-; GFX8-NEXT: v_subrev_u16_e32 v3, 0x8000, v3
+; GFX8-NEXT: v_add_u16_e32 v3, 0x8000, v3
; GFX8-NEXT: v_max_i16_e32 v1, v2, v1
; GFX8-NEXT: v_min_i16_e32 v1, v1, v3
; GFX8-NEXT: v_sub_u16_e32 v0, v0, v1
@@ -2621,9 +2624,9 @@ define amdgpu_ps i16 @s_ssubsat_i16(i16 inreg %lhs, i16 inreg %rhs) {
; GFX6-NEXT: s_lshl_b32 s0, s0, 16
; GFX6-NEXT: s_max_i32 s2, s0, -1
; GFX6-NEXT: s_lshl_b32 s1, s1, 16
-; GFX6-NEXT: s_sub_i32 s2, s2, 0x7fffffff
+; GFX6-NEXT: s_add_i32 s2, s2, 0x80000001
; GFX6-NEXT: s_min_i32 s3, s0, -1
-; GFX6-NEXT: s_sub_i32 s3, s3, 0x80000000
+; GFX6-NEXT: s_add_i32 s3, s3, 0x80000000
; GFX6-NEXT: s_max_i32 s1, s2, s1
; GFX6-NEXT: s_min_i32 s1, s1, s3
; GFX6-NEXT: s_sub_i32 s0, s0, s1
@@ -2635,11 +2638,11 @@ define amdgpu_ps i16 @s_ssubsat_i16(i16 inreg %lhs, i16 inreg %rhs) {
; GFX8-NEXT: s_sext_i32_i16 s2, s0
; GFX8-NEXT: s_sext_i32_i16 s3, -1
; GFX8-NEXT: s_max_i32 s4, s2, s3
-; GFX8-NEXT: s_sub_i32 s4, s4, 0x7fff
+; GFX8-NEXT: s_addk_i32 s4, 0x8001
; GFX8-NEXT: s_min_i32 s2, s2, s3
; GFX8-NEXT: s_sext_i32_i16 s3, s4
; GFX8-NEXT: s_sext_i32_i16 s1, s1
-; GFX8-NEXT: s_sub_i32 s2, s2, 0xffff8000
+; GFX8-NEXT: s_addk_i32 s2, 0x8000
; GFX8-NEXT: s_max_i32 s1, s3, s1
; GFX8-NEXT: s_sext_i32_i16 s1, s1
; GFX8-NEXT: s_sext_i32_i16 s2, s2
@@ -2669,9 +2672,9 @@ define amdgpu_ps half @ssubsat_i16_sv(i16 inreg %lhs, i16 %rhs) {
; GFX6-NEXT: s_lshl_b32 s0, s0, 16
; GFX6-NEXT: s_max_i32 s1, s0, -1
; GFX6-NEXT: v_lshlrev_b32_e32 v0, 16, v0
-; GFX6-NEXT: s_sub_i32 s1, s1, 0x7fffffff
+; GFX6-NEXT: s_add_i32 s1, s1, 0x80000001
; GFX6-NEXT: s_min_i32 s2, s0, -1
-; GFX6-NEXT: s_sub_i32 s2, s2, 0x80000000
+; GFX6-NEXT: s_add_i32 s2, s2, 0x80000000
; GFX6-NEXT: v_max_i32_e32 v0, s1, v0
; GFX6-NEXT: v_min_i32_e32 v0, s2, v0
; GFX6-NEXT: v_sub_i32_e32 v0, vcc, s0, v0
@@ -2683,9 +2686,9 @@ define amdgpu_ps half @ssubsat_i16_sv(i16 inreg %lhs, i16 %rhs) {
; GFX8-NEXT: s_sext_i32_i16 s1, s0
; GFX8-NEXT: s_sext_i32_i16 s2, -1
; GFX8-NEXT: s_max_i32 s3, s1, s2
-; GFX8-NEXT: s_sub_i32 s3, s3, 0x7fff
+; GFX8-NEXT: s_addk_i32 s3, 0x8001
; GFX8-NEXT: s_min_i32 s1, s1, s2
-; GFX8-NEXT: s_sub_i32 s1, s1, 0xffff8000
+; GFX8-NEXT: s_addk_i32 s1, 0x8000
; GFX8-NEXT: v_max_i16_e32 v0, s3, v0
; GFX8-NEXT: v_min_i16_e32 v0, s1, v0
; GFX8-NEXT: v_sub_u16_e32 v0, s0, v0
@@ -2711,9 +2714,9 @@ define amdgpu_ps half @ssubsat_i16_vs(i16 %lhs, i16 inreg %rhs) {
; GFX6-NEXT: v_lshlrev_b32_e32 v0, 16, v0
; GFX6-NEXT: v_max_i32_e32 v1, -1, v0
; GFX6-NEXT: s_lshl_b32 s0, s0, 16
-; GFX6-NEXT: v_subrev_i32_e32 v1, vcc, 0x7fffffff, v1
+; GFX6-NEXT: v_add_i32_e32 v1, vcc, 0x80000001, v1
; GFX6-NEXT: v_min_i32_e32 v2, -1, v0
-; GFX6-NEXT: v_subrev_i32_e32 v2, vcc, 0x80000000, v2
+; GFX6-NEXT: v_add_i32_e32 v2, vcc, 0x80000000, v2
; GFX6-NEXT: v_max_i32_e32 v1, s0, v1
; GFX6-NEXT: v_min_i32_e32 v1, v1, v2
; GFX6-NEXT: v_sub_i32_e32 v0, vcc, v0, v1
@@ -2723,9 +2726,9 @@ define amdgpu_ps half @ssubsat_i16_vs(i16 %lhs, i16 inreg %rhs) {
; GFX8-LABEL: ssubsat_i16_vs:
; GFX8: ; %bb.0:
; GFX8-NEXT: v_max_i16_e32 v1, -1, v0
-; GFX8-NEXT: v_subrev_u16_e32 v1, 0x7fff, v1
+; GFX8-NEXT: v_add_u16_e32 v1, 0x8001, v1
; GFX8-NEXT: v_min_i16_e32 v2, -1, v0
-; GFX8-NEXT: v_subrev_u16_e32 v2, 0x8000, v2
+; GFX8-NEXT: v_add_u16_e32 v2, 0x8000, v2
; GFX8-NEXT: v_max_i16_e32 v1, s0, v1
; GFX8-NEXT: v_min_i16_e32 v1, v1, v2
; GFX8-NEXT: v_sub_u16_e32 v0, v0, v1
@@ -2752,18 +2755,19 @@ define <2 x i16> @v_ssubsat_v2i16(<2 x i16> %lhs, <2 x i16> %rhs) {
; GFX6-NEXT: v_lshlrev_b32_e32 v0, 16, v0
; GFX6-NEXT: v_max_i32_e32 v4, -1, v0
; GFX6-NEXT: v_lshlrev_b32_e32 v2, 16, v2
-; GFX6-NEXT: v_subrev_i32_e32 v4, vcc, 0x7fffffff, v4
+; GFX6-NEXT: v_add_i32_e32 v4, vcc, 0x80000001, v4
; GFX6-NEXT: v_min_i32_e32 v5, -1, v0
-; GFX6-NEXT: v_subrev_i32_e32 v5, vcc, 0x80000000, v5
+; GFX6-NEXT: v_bfrev_b32_e32 v6, 1
+; GFX6-NEXT: v_add_i32_e32 v5, vcc, v5, v6
; GFX6-NEXT: v_max_i32_e32 v2, v4, v2
; GFX6-NEXT: v_min_i32_e32 v2, v2, v5
; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1
; GFX6-NEXT: v_sub_i32_e32 v0, vcc, v0, v2
; GFX6-NEXT: v_lshlrev_b32_e32 v2, 16, v3
; GFX6-NEXT: v_max_i32_e32 v3, -1, v1
-; GFX6-NEXT: v_subrev_i32_e32 v3, vcc, 0x7fffffff, v3
+; GFX6-NEXT: v_add_i32_e32 v3, vcc, 0x80000001, v3
; GFX6-NEXT: v_min_i32_e32 v4, -1, v1
-; GFX6-NEXT: v_subrev_i32_e32 v4, vcc, 0x80000000, v4
+; GFX6-NEXT: v_add_i32_e32 v4, vcc, 0x80000000, v4
; GFX6-NEXT: v_max_i32_e32 v2, v3, v2
; GFX6-NEXT: v_min_i32_e32 v2, v2, v4
; GFX6-NEXT: v_sub_i32_e32 v1, vcc, v1, v2
@@ -2775,16 +2779,16 @@ define <2 x i16> @v_ssubsat_v2i16(<2 x i16> %lhs, <2 x i16> %rhs) {
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_max_i16_e32 v2, -1, v0
-; GFX8-NEXT: v_subrev_u16_e32 v2, 0x7fff, v2
+; GFX8-NEXT: v_add_u16_e32 v2, 0x8001, v2
; GFX8-NEXT: v_min_i16_e32 v3, -1, v0
-; GFX8-NEXT: v_subrev_u16_e32 v3, 0x8000, v3
+; GFX8-NEXT: v_add_u16_e32 v3, 0x8000, v3
; GFX8-NEXT: v_max_i16_e32 v2, v2, v1
; GFX8-NEXT: v_min_i16_e32 v2, v2, v3
; GFX8-NEXT: v_mov_b32_e32 v3, -1
; GFX8-NEXT: v_max_i16_sdwa v4, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
-; GFX8-NEXT: v_subrev_u16_e32 v4, 0x7fff, v4
+; GFX8-NEXT: v_add_u16_e32 v4, 0x8001, v4
; GFX8-NEXT: v_min_i16_sdwa v3, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
-; GFX8-NEXT: v_subrev_u16_e32 v3, 0x8000, v3
+; GFX8-NEXT: v_add_u16_e32 v3, 0x8000, v3
; GFX8-NEXT: v_max_i16_sdwa v1, v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
; GFX8-NEXT: v_min_i16_e32 v1, v1, v3
; GFX8-NEXT: v_sub_u16_e32 v2, v0, v2
@@ -2813,18 +2817,18 @@ define amdgpu_ps i32 @s_ssubsat_v2i16(<2 x i16> inreg %lhs, <2 x i16> inreg %rhs
; GFX6-NEXT: s_lshl_b32 s0, s0, 16
; GFX6-NEXT: s_max_i32 s4, s0, -1
; GFX6-NEXT: s_lshl_b32 s2, s2, 16
-; GFX6-NEXT: s_sub_i32 s4, s4, 0x7fffffff
+; GFX6-NEXT: s_add_i32 s4, s4, 0x80000001
; GFX6-NEXT: s_min_i32 s5, s0, -1
-; GFX6-NEXT: s_sub_i32 s5, s5, 0x80000000
+; GFX6-NEXT: s_add_i32 s5, s5, 0x80000000
; GFX6-NEXT: s_max_i32 s2, s4, s2
; GFX6-NEXT: s_min_i32 s2, s2, s5
; GFX6-NEXT: s_lshl_b32 s1, s1, 16
; GFX6-NEXT: s_sub_i32 s0, s0, s2
; GFX6-NEXT: s_lshl_b32 s2, s3, 16
; GFX6-NEXT: s_max_i32 s3, s1, -1
-; GFX6-NEXT: s_sub_i32 s3, s3, 0x7fffffff
+; GFX6-NEXT: s_add_i32 s3, s3, 0x80000001
; GFX6-NEXT: s_min_i32 s4, s1, -1
-; GFX6-NEXT: s_sub_i32 s4, s4, 0x80000000
+; GFX6-NEXT: s_add_i32 s4, s4, 0x80000000
; GFX6-NEXT: s_max_i32 s2, s3, s2
; GFX6-NEXT: s_min_i32 s2, s2, s4
; GFX6-NEXT: s_sub_i32 s1, s1, s2
@@ -2841,12 +2845,12 @@ define amdgpu_ps i32 @s_ssubsat_v2i16(<2 x i16> inreg %lhs, <2 x i16> inreg %rhs
; GFX8-NEXT: s_sext_i32_i16 s4, s0
; GFX8-NEXT: s_sext_i32_i16 s5, -1
; GFX8-NEXT: s_max_i32 s6, s4, s5
-; GFX8-NEXT: s_sub_i32 s6, s6, 0x7fff
+; GFX8-NEXT: s_addk_i32 s6, 0x8001
; GFX8-NEXT: s_lshr_b32 s3, s1, 16
; GFX8-NEXT: s_min_i32 s4, s4, s5
; GFX8-NEXT: s_sext_i32_i16 s6, s6
; GFX8-NEXT: s_sext_i32_i16 s1, s1
-; GFX8-NEXT: s_sub_i32 s4, s4, 0xffff8000
+; GFX8-NEXT: s_addk_i32 s4, 0x8000
; GFX8-NEXT: s_max_i32 s1, s6, s1
; GFX8-NEXT: s_sext_i32_i16 s1, s1
; GFX8-NEXT: s_sext_i32_i16 s4, s4
@@ -2855,11 +2859,11 @@ define amdgpu_ps i32 @s_ssubsat_v2i16(<2 x i16> inreg %lhs, <2 x i16> inreg %rhs
; GFX8-NEXT: s_sub_i32 s0, s0, s1
; GFX8-NEXT: s_sext_i32_i16 s1, s2
; GFX8-NEXT: s_max_i32 s4, s1, s5
-; GFX8-NEXT: s_sub_i32 s4, s4, 0x7fff
+; GFX8-NEXT: s_addk_i32 s4, 0x8001
; GFX8-NEXT: s_min_i32 s1, s1, s5
; GFX8-NEXT: s_sext_i32_i16 s4, s4
; GFX8-NEXT: s_sext_i32_i16 s3, s3
-; GFX8-NEXT: s_sub_i32 s1, s1, 0xffff8000
+; GFX8-NEXT: s_addk_i32 s1, 0x8000
; GFX8-NEXT: s_max_i32 s3, s4, s3
; GFX8-NEXT: s_sext_i32_i16 s3, s3
; GFX8-NEXT: s_sext_i32_i16 s1, s1
@@ -2894,18 +2898,18 @@ define amdgpu_ps float @ssubsat_v2i16_sv(<2 x i16> inreg %lhs, <2 x i16> %rhs) {
; GFX6-NEXT: s_lshl_b32 s0, s0, 16
; GFX6-NEXT: s_max_i32 s2, s0, -1
; GFX6-NEXT: v_lshlrev_b32_e32 v0, 16, v0
-; GFX6-NEXT: s_sub_i32 s2, s2, 0x7fffffff
+; GFX6-NEXT: s_add_i32 s2, s2, 0x80000001
; GFX6-NEXT: s_min_i32 s3, s0, -1
-; GFX6-NEXT: s_sub_i32 s3, s3, 0x80000000
+; GFX6-NEXT: s_add_i32 s3, s3, 0x80000000
; GFX6-NEXT: v_max_i32_e32 v0, s2, v0
; GFX6-NEXT: v_min_i32_e32 v0, s3, v0
; GFX6-NEXT: v_sub_i32_e32 v0, vcc, s0, v0
; GFX6-NEXT: s_lshl_b32 s0, s1, 16
; GFX6-NEXT: s_max_i32 s1, s0, -1
; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1
-; GFX6-NEXT: s_sub_i32 s1, s1, 0x7fffffff
+; GFX6-NEXT: s_add_i32 s1, s1, 0x80000001
; GFX6-NEXT: s_min_i32 s2, s0, -1
-; GFX6-NEXT: s_sub_i32 s2, s2, 0x80000000
+; GFX6-NEXT: s_add_i32 s2, s2, 0x80000000
; GFX6-NEXT: v_max_i32_e32 v1, s1, v1
; GFX6-NEXT: v_min_i32_e32 v1, s2, v1
; GFX6-NEXT: v_sub_i32_e32 v1, vcc, s0, v1
@@ -2922,18 +2926,18 @@ define amdgpu_ps float @ssubsat_v2i16_sv(<2 x i16> inreg %lhs, <2 x i16> %rhs) {
; GFX8-NEXT: s_sext_i32_i16 s2, s0
; GFX8-NEXT: s_sext_i32_i16 s3, -1
; GFX8-NEXT: s_max_i32 s4, s2, s3
-; GFX8-NEXT: s_sub_i32 s4, s4, 0x7fff
+; GFX8-NEXT: s_addk_i32 s4, 0x8001
; GFX8-NEXT: s_min_i32 s2, s2, s3
; GFX8-NEXT: s_lshr_b32 s1, s0, 16
-; GFX8-NEXT: s_sub_i32 s2, s2, 0xffff8000
+; GFX8-NEXT: s_addk_i32 s2, 0x8000
; GFX8-NEXT: v_max_i16_e32 v1, s4, v0
; GFX8-NEXT: v_min_i16_e32 v1, s2, v1
; GFX8-NEXT: s_sext_i32_i16 s2, s1
; GFX8-NEXT: s_max_i32 s4, s2, s3
-; GFX8-NEXT: s_sub_i32 s4, s4, 0x7fff
+; GFX8-NEXT: s_addk_i32 s4, 0x8001
; GFX8-NEXT: s_min_i32 s2, s2, s3
; GFX8-NEXT: v_mov_b32_e32 v2, s4
-; GFX8-NEXT: s_sub_i32 s2, s2, 0xffff8000
+; GFX8-NEXT: s_addk_i32 s2, 0x8000
; GFX8-NEXT: v_max_i16_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
; GFX8-NEXT: v_min_i16_e32 v0, s2, v0
; GFX8-NEXT: v_mov_b32_e32 v2, s1
@@ -2962,18 +2966,20 @@ define amdgpu_ps float @ssubsat_v2i16_vs(<2 x i16> %lhs, <2 x i16> inreg %rhs) {
; GFX6-NEXT: v_lshlrev_b32_e32 v0, 16, v0
; GFX6-NEXT: v_max_i32_e32 v2, -1, v0
; GFX6-NEXT: s_lshl_b32 s0, s0, 16
-; GFX6-NEXT: v_subrev_i32_e32 v2, vcc, 0x7fffffff, v2
-; GFX6-NEXT: v_min_i32_e32 v3, -1, v0
-; GFX6-NEXT: v_subrev_i32_e32 v3, vcc, 0x80000000, v3
+; GFX6-NEXT: v_add_i32_e32 v2, vcc, 0x80000001, v2
+; GFX6-NEXT: v_min_i32_e32 v4, -1, v0
+; GFX6-NEXT: v_bfrev_b32_e32 v5, 1
+; GFX6-NEXT: v_add_i32_e32 v4, vcc, v4, v5
; GFX6-NEXT: v_max_i32_e32 v2, s0, v2
-; GFX6-NEXT: v_min_i32_e32 v2, v2, v3
+; GFX6-NEXT: v_min_i32_e32 v2, v2, v4
; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1
+; GFX6-NEXT: v_mov_b32_e32 v3, 0x80000001
; GFX6-NEXT: v_sub_i32_e32 v0, vcc, v0, v2
; GFX6-NEXT: v_max_i32_e32 v2, -1, v1
; GFX6-NEXT: s_lshl_b32 s0, s1, 16
-; GFX6-NEXT: v_subrev_i32_e32 v2, vcc, 0x7fffffff, v2
+; GFX6-NEXT: v_add_i32_e32 v2, vcc, v2, v3
; GFX6-NEXT: v_min_i32_e32 v3, -1, v1
-; GFX6-NEXT: v_subrev_i32_e32 v3, vcc, 0x80000000, v3
+; GFX6-NEXT: v_add_i32_e32 v3, vcc, 0x80000000, v3
; GFX6-NEXT: v_max_i32_e32 v2, s0, v2
; GFX6-NEXT: v_min_i32_e32 v2, v2, v3
; GFX6-NEXT: v_sub_i32_e32 v1, vcc, v1, v2
@@ -2988,17 +2994,17 @@ define amdgpu_ps float @ssubsat_v2i16_vs(<2 x i16> %lhs, <2 x i16> inreg %rhs) {
; GFX8-LABEL: ssubsat_v2i16_vs:
; GFX8: ; %bb.0:
; GFX8-NEXT: v_max_i16_e32 v1, -1, v0
-; GFX8-NEXT: v_subrev_u16_e32 v1, 0x7fff, v1
+; GFX8-NEXT: v_add_u16_e32 v1, 0x8001, v1
; GFX8-NEXT: v_min_i16_e32 v2, -1, v0
-; GFX8-NEXT: v_subrev_u16_e32 v2, 0x8000, v2
+; GFX8-NEXT: v_add_u16_e32 v2, 0x8000, v2
; GFX8-NEXT: v_max_i16_e32 v1, s0, v1
; GFX8-NEXT: v_min_i16_e32 v1, v1, v2
; GFX8-NEXT: v_mov_b32_e32 v2, -1
; GFX8-NEXT: v_max_i16_sdwa v3, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; GFX8-NEXT: s_lshr_b32 s1, s0, 16
-; GFX8-NEXT: v_subrev_u16_e32 v3, 0x7fff, v3
+; GFX8-NEXT: v_add_u16_e32 v3, 0x8001, v3
; GFX8-NEXT: v_min_i16_sdwa v2, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
-; GFX8-NEXT: v_subrev_u16_e32 v2, 0x8000, v2
+; GFX8-NEXT: v_add_u16_e32 v2, 0x8000, v2
; GFX8-NEXT: v_max_i16_e32 v3, s1, v3
; GFX8-NEXT: v_min_i16_e32 v2, v3, v2
; GFX8-NEXT: v_sub_u16_e32 v1, v0, v1
@@ -3038,38 +3044,38 @@ define <2 x float> @v_ssubsat_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
; GFX6-NEXT: v_lshlrev_b32_e32 v0, 16, v0
; GFX6-NEXT: v_max_i32_e32 v8, -1, v0
; GFX6-NEXT: v_lshlrev_b32_e32 v4, 16, v4
-; GFX6-NEXT: v_subrev_i32_e32 v8, vcc, 0x7fffffff, v8
+; GFX6-NEXT: v_add_i32_e32 v8, vcc, 0x80000001, v8
; GFX6-NEXT: v_min_i32_e32 v10, -1, v0
; GFX6-NEXT: v_bfrev_b32_e32 v11, 1
-; GFX6-NEXT: v_sub_i32_e32 v10, vcc, v10, v11
+; GFX6-NEXT: v_add_i32_e32 v10, vcc, v10, v11
; GFX6-NEXT: v_max_i32_e32 v4, v8, v4
; GFX6-NEXT: v_min_i32_e32 v4, v4, v10
; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1
-; GFX6-NEXT: v_bfrev_b32_e32 v9, -2
+; GFX6-NEXT: v_mov_b32_e32 v9, 0x80000001
; GFX6-NEXT: v_sub_i32_e32 v0, vcc, v0, v4
; GFX6-NEXT: v_lshlrev_b32_e32 v4, 16, v5
; GFX6-NEXT: v_max_i32_e32 v5, -1, v1
-; GFX6-NEXT: v_sub_i32_e32 v5, vcc, v5, v9
+; GFX6-NEXT: v_add_i32_e32 v5, vcc, v5, v9
; GFX6-NEXT: v_min_i32_e32 v8, -1, v1
-; GFX6-NEXT: v_sub_i32_e32 v8, vcc, v8, v11
+; GFX6-NEXT: v_add_i32_e32 v8, vcc, v8, v11
; GFX6-NEXT: v_max_i32_e32 v4, v5, v4
; GFX6-NEXT: v_lshlrev_b32_e32 v2, 16, v2
; GFX6-NEXT: v_min_i32_e32 v4, v4, v8
; GFX6-NEXT: v_max_i32_e32 v5, -1, v2
; GFX6-NEXT: v_sub_i32_e32 v1, vcc, v1, v4
; GFX6-NEXT: v_lshlrev_b32_e32 v4, 16, v6
-; GFX6-NEXT: v_sub_i32_e32 v5, vcc, v5, v9
+; GFX6-NEXT: v_add_i32_e32 v5, vcc, v5, v9
; GFX6-NEXT: v_min_i32_e32 v6, -1, v2
-; GFX6-NEXT: v_sub_i32_e32 v6, vcc, v6, v11
+; GFX6-NEXT: v_add_i32_e32 v6, vcc, v6, v11
; GFX6-NEXT: v_max_i32_e32 v4, v5, v4
; GFX6-NEXT: v_lshlrev_b32_e32 v3, 16, v3
; GFX6-NEXT: v_min_i32_e32 v4, v4, v6
; GFX6-NEXT: v_max_i32_e32 v5, -1, v3
; GFX6-NEXT: v_sub_i32_e32 v2, vcc, v2, v4
; GFX6-NEXT: v_lshlrev_b32_e32 v4, 16, v7
-; GFX6-NEXT: v_sub_i32_e32 v5, vcc, v5, v9
+; GFX6-NEXT: v_add_i32_e32 v5, vcc, v5, v9
; GFX6-NEXT: v_min_i32_e32 v6, -1, v3
-; GFX6-NEXT: v_sub_i32_e32 v6, vcc, v6, v11
+; GFX6-NEXT: v_add_i32_e32 v6, vcc, v6, v11
; GFX6-NEXT: v_max_i32_e32 v4, v5, v4
; GFX6-NEXT: v_ashrrev_i32_e32 v1, 16, v1
; GFX6-NEXT: v_min_i32_e32 v4, v4, v6
@@ -3091,28 +3097,28 @@ define <2 x float> @v_ssubsat_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_max_i16_e32 v4, -1, v0
-; GFX8-NEXT: v_subrev_u16_e32 v4, 0x7fff, v4
+; GFX8-NEXT: v_add_u16_e32 v4, 0x8001, v4
; GFX8-NEXT: v_min_i16_e32 v5, -1, v0
-; GFX8-NEXT: v_subrev_u16_e32 v5, 0x8000, v5
+; GFX8-NEXT: v_add_u16_e32 v5, 0x8000, v5
; GFX8-NEXT: v_max_i16_e32 v4, v4, v2
; GFX8-NEXT: v_min_i16_e32 v4, v4, v5
; GFX8-NEXT: v_mov_b32_e32 v5, -1
; GFX8-NEXT: v_max_i16_sdwa v6, v0, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
-; GFX8-NEXT: v_subrev_u16_e32 v6, 0x7fff, v6
+; GFX8-NEXT: v_add_u16_e32 v6, 0x8001, v6
; GFX8-NEXT: v_min_i16_sdwa v7, v0, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
-; GFX8-NEXT: v_subrev_u16_e32 v7, 0x8000, v7
+; GFX8-NEXT: v_add_u16_e32 v7, 0x8000, v7
; GFX8-NEXT: v_max_i16_sdwa v2, v6, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
; GFX8-NEXT: v_max_i16_e32 v6, -1, v1
; GFX8-NEXT: v_min_i16_e32 v2, v2, v7
-; GFX8-NEXT: v_subrev_u16_e32 v6, 0x7fff, v6
+; GFX8-NEXT: v_add_u16_e32 v6, 0x8001, v6
; GFX8-NEXT: v_min_i16_e32 v7, -1, v1
-; GFX8-NEXT: v_subrev_u16_e32 v7, 0x8000, v7
+; GFX8-NEXT: v_add_u16_e32 v7, 0x8000, v7
; GFX8-NEXT: v_max_i16_e32 v6, v6, v3
; GFX8-NEXT: v_min_i16_e32 v6, v6, v7
; GFX8-NEXT: v_max_i16_sdwa v7, v1, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
-; GFX8-NEXT: v_subrev_u16_e32 v7, 0x7fff, v7
+; GFX8-NEXT: v_add_u16_e32 v7, 0x8001, v7
; GFX8-NEXT: v_min_i16_sdwa v5, v1, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
-; GFX8-NEXT: v_subrev_u16_e32 v5, 0x8000, v5
+; GFX8-NEXT: v_add_u16_e32 v5, 0x8000, v5
; GFX8-NEXT: v_max_i16_sdwa v3, v7, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
; GFX8-NEXT: v_min_i16_e32 v3, v3, v5
; GFX8-NEXT: v_sub_u16_e32 v4, v0, v4
@@ -3147,36 +3153,36 @@ define amdgpu_ps <2 x i32> @s_ssubsat_v4i16(<4 x i16> inreg %lhs, <4 x i16> inre
; GFX6-NEXT: s_lshl_b32 s0, s0, 16
; GFX6-NEXT: s_max_i32 s8, s0, -1
; GFX6-NEXT: s_lshl_b32 s4, s4, 16
-; GFX6-NEXT: s_sub_i32 s8, s8, 0x7fffffff
+; GFX6-NEXT: s_add_i32 s8, s8, 0x80000001
; GFX6-NEXT: s_min_i32 s9, s0, -1
-; GFX6-NEXT: s_sub_i32 s9, s9, 0x80000000
+; GFX6-NEXT: s_add_i32 s9, s9, 0x80000000
; GFX6-NEXT: s_max_i32 s4, s8, s4
; GFX6-NEXT: s_min_i32 s4, s4, s9
; GFX6-NEXT: s_lshl_b32 s1, s1, 16
; GFX6-NEXT: s_sub_i32 s0, s0, s4
; GFX6-NEXT: s_lshl_b32 s4, s5, 16
; GFX6-NEXT: s_max_i32 s5, s1, -1
-; GFX6-NEXT: s_sub_i32 s5, s5, 0x7fffffff
+; GFX6-NEXT: s_add_i32 s5, s5, 0x80000001
; GFX6-NEXT: s_min_i32 s8, s1, -1
-; GFX6-NEXT: s_sub_i32 s8, s8, 0x80000000
+; GFX6-NEXT: s_add_i32 s8, s8, 0x80000000
; GFX6-NEXT: s_max_i32 s4, s5, s4
; GFX6-NEXT: s_lshl_b32 s2, s2, 16
; GFX6-NEXT: s_min_i32 s4, s4, s8
; GFX6-NEXT: s_max_i32 s5, s2, -1
; GFX6-NEXT: s_sub_i32 s1, s1, s4
; GFX6-NEXT: s_lshl_b32 s4, s6, 16
-; GFX6-NEXT: s_sub_i32 s5, s5, 0x7fffffff
+; GFX6-NEXT: s_add_i32 s5, s5, 0x80000001
; GFX6-NEXT: s_min_i32 s6, s2, -1
-; GFX6-NEXT: s_sub_i32 s6, s6, 0x80000000
+; GFX6-NEXT: s_add_i32 s6, s6, 0x80000000
; GFX6-NEXT: s_max_i32 s4, s5, s4
; GFX6-NEXT: s_lshl_b32 s3, s3, 16
; GFX6-NEXT: s_min_i32 s4, s4, s6
; GFX6-NEXT: s_max_i32 s5, s3, -1
; GFX6-NEXT: s_sub_i32 s2, s2, s4
; GFX6-NEXT: s_lshl_b32 s4, s7, 16
-; GFX6-NEXT: s_sub_i32 s5, s5, 0x7fffffff
+; GFX6-NEXT: s_add_i32 s5, s5, 0x80000001
; GFX6-NEXT: s_min_i32 s6, s3, -1
-; GFX6-NEXT: s_sub_i32 s6, s6, 0x80000000
+; GFX6-NEXT: s_add_i32 s6, s6, 0x80000000
; GFX6-NEXT: s_max_i32 s4, s5, s4
; GFX6-NEXT: s_ashr_i32 s1, s1, 16
; GFX6-NEXT: s_min_i32 s4, s4, s6
@@ -3199,12 +3205,12 @@ define amdgpu_ps <2 x i32> @s_ssubsat_v4i16(<4 x i16> inreg %lhs, <4 x i16> inre
; GFX8-NEXT: s_sext_i32_i16 s8, s0
; GFX8-NEXT: s_sext_i32_i16 s9, -1
; GFX8-NEXT: s_max_i32 s10, s8, s9
-; GFX8-NEXT: s_sub_i32 s10, s10, 0x7fff
+; GFX8-NEXT: s_addk_i32 s10, 0x8001
; GFX8-NEXT: s_lshr_b32 s6, s2, 16
; GFX8-NEXT: s_min_i32 s8, s8, s9
; GFX8-NEXT: s_sext_i32_i16 s10, s10
; GFX8-NEXT: s_sext_i32_i16 s2, s2
-; GFX8-NEXT: s_sub_i32 s8, s8, 0xffff8000
+; GFX8-NEXT: s_addk_i32 s8, 0x8000
; GFX8-NEXT: s_max_i32 s2, s10, s2
; GFX8-NEXT: s_sext_i32_i16 s2, s2
; GFX8-NEXT: s_sext_i32_i16 s8, s8
@@ -3213,11 +3219,11 @@ define amdgpu_ps <2 x i32> @s_ssubsat_v4i16(<4 x i16> inreg %lhs, <4 x i16> inre
; GFX8-NEXT: s_sub_i32 s0, s0, s2
; GFX8-NEXT: s_sext_i32_i16 s2, s4
; GFX8-NEXT: s_max_i32 s8, s2, s9
-; GFX8-NEXT: s_sub_i32 s8, s8, 0x7fff
+; GFX8-NEXT: s_addk_i32 s8, 0x8001
; GFX8-NEXT: s_min_i32 s2, s2, s9
; GFX8-NEXT: s_sext_i32_i16 s8, s8
; GFX8-NEXT: s_sext_i32_i16 s6, s6
-; GFX8-NEXT: s_sub_i32 s2, s2, 0xffff8000
+; GFX8-NEXT: s_addk_i32 s2, 0x8000
; GFX8-NEXT: s_max_i32 s6, s8, s6
; GFX8-NEXT: s_sext_i32_i16 s6, s6
; GFX8-NEXT: s_sext_i32_i16 s2, s2
@@ -3225,12 +3231,12 @@ define amdgpu_ps <2 x i32> @s_ssubsat_v4i16(<4 x i16> inreg %lhs, <4 x i16> inre
; GFX8-NEXT: s_sub_i32 s2, s4, s2
; GFX8-NEXT: s_sext_i32_i16 s4, s1
; GFX8-NEXT: s_max_i32 s6, s4, s9
-; GFX8-NEXT: s_sub_i32 s6, s6, 0x7fff
+; GFX8-NEXT: s_addk_i32 s6, 0x8001
; GFX8-NEXT: s_lshr_b32 s7, s3, 16
; GFX8-NEXT: s_min_i32 s4, s4, s9
; GFX8-NEXT: s_sext_i32_i16 s6, s6
; GFX8-NEXT: s_sext_i32_i16 s3, s3
-; GFX8-NEXT: s_sub_i32 s4, s4, 0xffff8000
+; GFX8-NEXT: s_addk_i32 s4, 0x8000
; GFX8-NEXT: s_max_i32 s3, s6, s3
; GFX8-NEXT: s_sext_i32_i16 s3, s3
; GFX8-NEXT: s_sext_i32_i16 s4, s4
@@ -3239,11 +3245,11 @@ define amdgpu_ps <2 x i32> @s_ssubsat_v4i16(<4 x i16> inreg %lhs, <4 x i16> inre
; GFX8-NEXT: s_sub_i32 s1, s1, s3
; GFX8-NEXT: s_sext_i32_i16 s3, s5
; GFX8-NEXT: s_max_i32 s4, s3, s9
-; GFX8-NEXT: s_sub_i32 s4, s4, 0x7fff
+; GFX8-NEXT: s_addk_i32 s4, 0x8001
; GFX8-NEXT: s_min_i32 s3, s3, s9
; GFX8-NEXT: s_sext_i32_i16 s4, s4
; GFX8-NEXT: s_sext_i32_i16 s6, s7
-; GFX8-NEXT: s_sub_i32 s3, s3, 0xffff8000
+; GFX8-NEXT: s_addk_i32 s3, 0x8000
; GFX8-NEXT: s_max_i32 s4, s4, s6
; GFX8-NEXT: s_sext_i32_i16 s4, s4
; GFX8-NEXT: s_sext_i32_i16 s3, s3
@@ -3299,57 +3305,57 @@ define <3 x float> @v_ssubsat_v6i16(<6 x i16> %lhs, <6 x i16> %rhs) {
; GFX6-NEXT: v_lshlrev_b32_e32 v0, 16, v0
; GFX6-NEXT: v_max_i32_e32 v12, -1, v0
; GFX6-NEXT: v_lshlrev_b32_e32 v6, 16, v6
-; GFX6-NEXT: v_subrev_i32_e32 v12, vcc, 0x7fffffff, v12
+; GFX6-NEXT: v_add_i32_e32 v12, vcc, 0x80000001, v12
; GFX6-NEXT: v_min_i32_e32 v14, -1, v0
; GFX6-NEXT: v_bfrev_b32_e32 v15, 1
-; GFX6-NEXT: v_sub_i32_e32 v14, vcc, v14, v15
+; GFX6-NEXT: v_add_i32_e32 v14, vcc, v14, v15
; GFX6-NEXT: v_max_i32_e32 v6, v12, v6
; GFX6-NEXT: v_min_i32_e32 v6, v6, v14
; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1
-; GFX6-NEXT: v_bfrev_b32_e32 v13, -2
+; GFX6-NEXT: v_mov_b32_e32 v13, 0x80000001
; GFX6-NEXT: v_sub_i32_e32 v0, vcc, v0, v6
; GFX6-NEXT: v_lshlrev_b32_e32 v6, 16, v7
; GFX6-NEXT: v_max_i32_e32 v7, -1, v1
-; GFX6-NEXT: v_sub_i32_e32 v7, vcc, v7, v13
+; GFX6-NEXT: v_add_i32_e32 v7, vcc, v7, v13
; GFX6-NEXT: v_min_i32_e32 v12, -1, v1
-; GFX6-NEXT: v_sub_i32_e32 v12, vcc, v12, v15
+; GFX6-NEXT: v_add_i32_e32 v12, vcc, v12, v15
; GFX6-NEXT: v_max_i32_e32 v6, v7, v6
; GFX6-NEXT: v_lshlrev_b32_e32 v2, 16, v2
; GFX6-NEXT: v_min_i32_e32 v6, v6, v12
; GFX6-NEXT: v_max_i32_e32 v7, -1, v2
; GFX6-NEXT: v_sub_i32_e32 v1, vcc, v1, v6
; GFX6-NEXT: v_lshlrev_b32_e32 v6, 16, v8
-; GFX6-NEXT: v_sub_i32_e32 v7, vcc, v7, v13
+; GFX6-NEXT: v_add_i32_e32 v7, vcc, v7, v13
; GFX6-NEXT: v_min_i32_e32 v8, -1, v2
-; GFX6-NEXT: v_sub_i32_e32 v8, vcc, v8, v15
+; GFX6-NEXT: v_add_i32_e32 v8, vcc, v8, v15
; GFX6-NEXT: v_max_i32_e32 v6, v7, v6
; GFX6-NEXT: v_lshlrev_b32_e32 v3, 16, v3
; GFX6-NEXT: v_min_i32_e32 v6, v6, v8
; GFX6-NEXT: v_max_i32_e32 v7, -1, v3
; GFX6-NEXT: v_sub_i32_e32 v2, vcc, v2, v6
; GFX6-NEXT: v_lshlrev_b32_e32 v6, 16, v9
-; GFX6-NEXT: v_sub_i32_e32 v7, vcc, v7, v13
+; GFX6-NEXT: v_add_i32_e32 v7, vcc, v7, v13
; GFX6-NEXT: v_min_i32_e32 v8, -1, v3
-; GFX6-NEXT: v_sub_i32_e32 v8, vcc, v8, v15
+; GFX6-NEXT: v_add_i32_e32 v8, vcc, v8, v15
; GFX6-NEXT: v_max_i32_e32 v6, v7, v6
; GFX6-NEXT: v_lshlrev_b32_e32 v4, 16, v4
; GFX6-NEXT: v_min_i32_e32 v6, v6, v8
; GFX6-NEXT: v_max_i32_e32 v7, -1, v4
; GFX6-NEXT: v_sub_i32_e32 v3, vcc, v3, v6
; GFX6-NEXT: v_lshlrev_b32_e32 v6, 16, v10
-; GFX6-NEXT: v_sub_i32_e32 v7, vcc, v7, v13
+; GFX6-NEXT: v_add_i32_e32 v7, vcc, v7, v13
; GFX6-NEXT: v_min_i32_e32 v8, -1, v4
-; GFX6-NEXT: v_sub_i32_e32 v8, vcc, v8, v15
+; GFX6-NEXT: v_add_i32_e32 v8, vcc, v8, v15
; GFX6-NEXT: v_max_i32_e32 v6, v7, v6
; GFX6-NEXT: v_lshlrev_b32_e32 v5, 16, v5
; GFX6-NEXT: v_min_i32_e32 v6, v6, v8
; GFX6-NEXT: v_max_i32_e32 v7, -1, v5
; GFX6-NEXT: v_sub_i32_e32 v4, vcc, v4, v6
; GFX6-NEXT: v_lshlrev_b32_e32 v6, 16, v11
-; GFX6-NEXT: v_sub_i32_e32 v7, vcc, v7, v13
+; GFX6-NEXT: v_add_i32_e32 v7, vcc, v7, v13
; GFX6-NEXT: v_min_i32_e32 v8, -1, v5
; GFX6-NEXT: v_ashrrev_i32_e32 v1, 16, v1
-; GFX6-NEXT: v_sub_i32_e32 v8, vcc, v8, v15
+; GFX6-NEXT: v_add_i32_e32 v8, vcc, v8, v15
; GFX6-NEXT: v_max_i32_e32 v6, v7, v6
; GFX6-NEXT: v_ashrrev_i32_e32 v0, 16, v0
; GFX6-NEXT: v_min_i32_e32 v6, v6, v8
@@ -3376,40 +3382,40 @@ define <3 x float> @v_ssubsat_v6i16(<6 x i16> %lhs, <6 x i16> %rhs) {
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_max_i16_e32 v6, -1, v0
-; GFX8-NEXT: v_subrev_u16_e32 v6, 0x7fff, v6
+; GFX8-NEXT: v_add_u16_e32 v6, 0x8001, v6
; GFX8-NEXT: v_min_i16_e32 v7, -1, v0
-; GFX8-NEXT: v_subrev_u16_e32 v7, 0x8000, v7
+; GFX8-NEXT: v_add_u16_e32 v7, 0x8000, v7
; GFX8-NEXT: v_max_i16_e32 v6, v6, v3
; GFX8-NEXT: v_min_i16_e32 v6, v6, v7
; GFX8-NEXT: v_mov_b32_e32 v7, -1
; GFX8-NEXT: v_max_i16_sdwa v8, v0, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
-; GFX8-NEXT: v_subrev_u16_e32 v8, 0x7fff, v8
+; GFX8-NEXT: v_add_u16_e32 v8, 0x8001, v8
; GFX8-NEXT: v_min_i16_sdwa v9, v0, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
-; GFX8-NEXT: v_subrev_u16_e32 v9, 0x8000, v9
+; GFX8-NEXT: v_add_u16_e32 v9, 0x8000, v9
; GFX8-NEXT: v_max_i16_sdwa v3, v8, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
; GFX8-NEXT: v_max_i16_e32 v8, -1, v1
; GFX8-NEXT: v_min_i16_e32 v3, v3, v9
-; GFX8-NEXT: v_subrev_u16_e32 v8, 0x7fff, v8
+; GFX8-NEXT: v_add_u16_e32 v8, 0x8001, v8
; GFX8-NEXT: v_min_i16_e32 v9, -1, v1
-; GFX8-NEXT: v_subrev_u16_e32 v9, 0x8000, v9
+; GFX8-NEXT: v_add_u16_e32 v9, 0x8000, v9
; GFX8-NEXT: v_max_i16_e32 v8, v8, v4
; GFX8-NEXT: v_min_i16_e32 v8, v8, v9
; GFX8-NEXT: v_max_i16_sdwa v9, v1, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
-; GFX8-NEXT: v_subrev_u16_e32 v9, 0x7fff, v9
+; GFX8-NEXT: v_add_u16_e32 v9, 0x8001, v9
; GFX8-NEXT: v_min_i16_sdwa v10, v1, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
-; GFX8-NEXT: v_subrev_u16_e32 v10, 0x8000, v10
+; GFX8-NEXT: v_add_u16_e32 v10, 0x8000, v10
; GFX8-NEXT: v_max_i16_sdwa v4, v9, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
; GFX8-NEXT: v_max_i16_e32 v9, -1, v2
; GFX8-NEXT: v_min_i16_e32 v4, v4, v10
-; GFX8-NEXT: v_subrev_u16_e32 v9, 0x7fff, v9
+; GFX8-NEXT: v_add_u16_e32 v9, 0x8001, v9
; GFX8-NEXT: v_min_i16_e32 v10, -1, v2
-; GFX8-NEXT: v_subrev_u16_e32 v10, 0x8000, v10
+; GFX8-NEXT: v_add_u16_e32 v10, 0x8000, v10
; GFX8-NEXT: v_max_i16_e32 v9, v9, v5
; GFX8-NEXT: v_min_i16_e32 v9, v9, v10
; GFX8-NEXT: v_max_i16_sdwa v10, v2, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
-; GFX8-NEXT: v_subrev_u16_e32 v10, 0x7fff, v10
+; GFX8-NEXT: v_add_u16_e32 v10, 0x8001, v10
; GFX8-NEXT: v_min_i16_sdwa v7, v2, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
-; GFX8-NEXT: v_subrev_u16_e32 v7, 0x8000, v7
+; GFX8-NEXT: v_add_u16_e32 v7, 0x8000, v7
; GFX8-NEXT: v_max_i16_sdwa v5, v10, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
; GFX8-NEXT: v_min_i16_e32 v5, v5, v7
; GFX8-NEXT: v_sub_u16_e32 v6, v0, v6
@@ -3449,55 +3455,55 @@ define amdgpu_ps <3 x i32> @s_ssubsat_v6i16(<6 x i16> inreg %lhs, <6 x i16> inre
; GFX6-NEXT: s_lshl_b32 s0, s0, 16
; GFX6-NEXT: s_max_i32 s12, s0, -1
; GFX6-NEXT: s_lshl_b32 s6, s6, 16
-; GFX6-NEXT: s_sub_i32 s12, s12, 0x7fffffff
+; GFX6-NEXT: s_add_i32 s12, s12, 0x80000001
; GFX6-NEXT: s_min_i32 s13, s0, -1
-; GFX6-NEXT: s_sub_i32 s13, s13, 0x80000000
+; GFX6-NEXT: s_add_i32 s13, s13, 0x80000000
; GFX6-NEXT: s_max_i32 s6, s12, s6
; GFX6-NEXT: s_min_i32 s6, s6, s13
; GFX6-NEXT: s_lshl_b32 s1, s1, 16
; GFX6-NEXT: s_sub_i32 s0, s0, s6
; GFX6-NEXT: s_lshl_b32 s6, s7, 16
; GFX6-NEXT: s_max_i32 s7, s1, -1
-; GFX6-NEXT: s_sub_i32 s7, s7, 0x7fffffff
+; GFX6-NEXT: s_add_i32 s7, s7, 0x80000001
; GFX6-NEXT: s_min_i32 s12, s1, -1
-; GFX6-NEXT: s_sub_i32 s12, s12, 0x80000000
+; GFX6-NEXT: s_add_i32 s12, s12, 0x80000000
; GFX6-NEXT: s_max_i32 s6, s7, s6
; GFX6-NEXT: s_lshl_b32 s2, s2, 16
; GFX6-NEXT: s_min_i32 s6, s6, s12
; GFX6-NEXT: s_max_i32 s7, s2, -1
; GFX6-NEXT: s_sub_i32 s1, s1, s6
; GFX6-NEXT: s_lshl_b32 s6, s8, 16
-; GFX6-NEXT: s_sub_i32 s7, s7, 0x7fffffff
+; GFX6-NEXT: s_add_i32 s7, s7, 0x80000001
; GFX6-NEXT: s_min_i32 s8, s2, -1
-; GFX6-NEXT: s_sub_i32 s8, s8, 0x80000000
+; GFX6-NEXT: s_add_i32 s8, s8, 0x80000000
; GFX6-NEXT: s_max_i32 s6, s7, s6
; GFX6-NEXT: s_lshl_b32 s3, s3, 16
; GFX6-NEXT: s_min_i32 s6, s6, s8
; GFX6-NEXT: s_max_i32 s7, s3, -1
; GFX6-NEXT: s_sub_i32 s2, s2, s6
; GFX6-NEXT: s_lshl_b32 s6, s9, 16
-; GFX6-NEXT: s_sub_i32 s7, s7, 0x7fffffff
+; GFX6-NEXT: s_add_i32 s7, s7, 0x80000001
; GFX6-NEXT: s_min_i32 s8, s3, -1
-; GFX6-NEXT: s_sub_i32 s8, s8, 0x80000000
+; GFX6-NEXT: s_add_i32 s8, s8, 0x80000000
; GFX6-NEXT: s_max_i32 s6, s7, s6
; GFX6-NEXT: s_lshl_b32 s4, s4, 16
; GFX6-NEXT: s_min_i32 s6, s6, s8
; GFX6-NEXT: s_max_i32 s7, s4, -1
; GFX6-NEXT: s_sub_i32 s3, s3, s6
; GFX6-NEXT: s_lshl_b32 s6, s10, 16
-; GFX6-NEXT: s_sub_i32 s7, s7, 0x7fffffff
+; GFX6-NEXT: s_add_i32 s7, s7, 0x80000001
; GFX6-NEXT: s_min_i32 s8, s4, -1
-; GFX6-NEXT: s_sub_i32 s8, s8, 0x80000000
+; GFX6-NEXT: s_add_i32 s8, s8, 0x80000000
; GFX6-NEXT: s_max_i32 s6, s7, s6
; GFX6-NEXT: s_lshl_b32 s5, s5, 16
; GFX6-NEXT: s_min_i32 s6, s6, s8
; GFX6-NEXT: s_max_i32 s7, s5, -1
; GFX6-NEXT: s_sub_i32 s4, s4, s6
; GFX6-NEXT: s_lshl_b32 s6, s11, 16
-; GFX6-NEXT: s_sub_i32 s7, s7, 0x7fffffff
+; GFX6-NEXT: s_add_i32 s7, s7, 0x80000001
; GFX6-NEXT: s_min_i32 s8, s5, -1
; GFX6-NEXT: s_ashr_i32 s1, s1, 16
-; GFX6-NEXT: s_sub_i32 s8, s8, 0x80000000
+; GFX6-NEXT: s_add_i32 s8, s8, 0x80000000
; GFX6-NEXT: s_max_i32 s6, s7, s6
; GFX6-NEXT: s_ashr_i32 s0, s0, 16
; GFX6-NEXT: s_min_i32 s6, s6, s8
@@ -3525,12 +3531,12 @@ define amdgpu_ps <3 x i32> @s_ssubsat_v6i16(<6 x i16> inreg %lhs, <6 x i16> inre
; GFX8-NEXT: s_sext_i32_i16 s12, s0
; GFX8-NEXT: s_sext_i32_i16 s13, -1
; GFX8-NEXT: s_max_i32 s14, s12, s13
-; GFX8-NEXT: s_sub_i32 s14, s14, 0x7fff
+; GFX8-NEXT: s_addk_i32 s14, 0x8001
; GFX8-NEXT: s_lshr_b32 s9, s3, 16
; GFX8-NEXT: s_min_i32 s12, s12, s13
; GFX8-NEXT: s_sext_i32_i16 s14, s14
; GFX8-NEXT: s_sext_i32_i16 s3, s3
-; GFX8-NEXT: s_sub_i32 s12, s12, 0xffff8000
+; GFX8-NEXT: s_addk_i32 s12, 0x8000
; GFX8-NEXT: s_max_i32 s3, s14, s3
; GFX8-NEXT: s_sext_i32_i16 s3, s3
; GFX8-NEXT: s_sext_i32_i16 s12, s12
@@ -3539,11 +3545,11 @@ define amdgpu_ps <3 x i32> @s_ssubsat_v6i16(<6 x i16> inreg %lhs, <6 x i16> inre
; GFX8-NEXT: s_sub_i32 s0, s0, s3
; GFX8-NEXT: s_sext_i32_i16 s3, s6
; GFX8-NEXT: s_max_i32 s12, s3, s13
-; GFX8-NEXT: s_sub_i32 s12, s12, 0x7fff
+; GFX8-NEXT: s_addk_i32 s12, 0x8001
; GFX8-NEXT: s_min_i32 s3, s3, s13
; GFX8-NEXT: s_sext_i32_i16 s12, s12
; GFX8-NEXT: s_sext_i32_i16 s9, s9
-; GFX8-NEXT: s_sub_i32 s3, s3, 0xffff8000
+; GFX8-NEXT: s_addk_i32 s3, 0x8000
; GFX8-NEXT: s_max_i32 s9, s12, s9
; GFX8-NEXT: s_sext_i32_i16 s9, s9
; GFX8-NEXT: s_sext_i32_i16 s3, s3
@@ -3551,12 +3557,12 @@ define amdgpu_ps <3 x i32> @s_ssubsat_v6i16(<6 x i16> inreg %lhs, <6 x i16> inre
; GFX8-NEXT: s_sub_i32 s3, s6, s3
; GFX8-NEXT: s_sext_i32_i16 s6, s1
; GFX8-NEXT: s_max_i32 s9, s6, s13
-; GFX8-NEXT: s_sub_i32 s9, s9, 0x7fff
+; GFX8-NEXT: s_addk_i32 s9, 0x8001
; GFX8-NEXT: s_lshr_b32 s10, s4, 16
; GFX8-NEXT: s_min_i32 s6, s6, s13
; GFX8-NEXT: s_sext_i32_i16 s9, s9
; GFX8-NEXT: s_sext_i32_i16 s4, s4
-; GFX8-NEXT: s_sub_i32 s6, s6, 0xffff8000
+; GFX8-NEXT: s_addk_i32 s6, 0x8000
; GFX8-NEXT: s_max_i32 s4, s9, s4
; GFX8-NEXT: s_sext_i32_i16 s4, s4
; GFX8-NEXT: s_sext_i32_i16 s6, s6
@@ -3565,11 +3571,11 @@ define amdgpu_ps <3 x i32> @s_ssubsat_v6i16(<6 x i16> inreg %lhs, <6 x i16> inre
; GFX8-NEXT: s_sub_i32 s1, s1, s4
; GFX8-NEXT: s_sext_i32_i16 s4, s7
; GFX8-NEXT: s_max_i32 s6, s4, s13
-; GFX8-NEXT: s_sub_i32 s6, s6, 0x7fff
+; GFX8-NEXT: s_addk_i32 s6, 0x8001
; GFX8-NEXT: s_min_i32 s4, s4, s13
; GFX8-NEXT: s_sext_i32_i16 s6, s6
; GFX8-NEXT: s_sext_i32_i16 s9, s10
-; GFX8-NEXT: s_sub_i32 s4, s4, 0xffff8000
+; GFX8-NEXT: s_addk_i32 s4, 0x8000
; GFX8-NEXT: s_max_i32 s6, s6, s9
; GFX8-NEXT: s_sext_i32_i16 s6, s6
; GFX8-NEXT: s_sext_i32_i16 s4, s4
@@ -3577,12 +3583,12 @@ define amdgpu_ps <3 x i32> @s_ssubsat_v6i16(<6 x i16> inreg %lhs, <6 x i16> inre
; GFX8-NEXT: s_sext_i32_i16 s6, s2
; GFX8-NEXT: s_sub_i32 s4, s7, s4
; GFX8-NEXT: s_max_i32 s7, s6, s13
-; GFX8-NEXT: s_sub_i32 s7, s7, 0x7fff
+; GFX8-NEXT: s_addk_i32 s7, 0x8001
; GFX8-NEXT: s_lshr_b32 s11, s5, 16
; GFX8-NEXT: s_min_i32 s6, s6, s13
; GFX8-NEXT: s_sext_i32_i16 s7, s7
; GFX8-NEXT: s_sext_i32_i16 s5, s5
-; GFX8-NEXT: s_sub_i32 s6, s6, 0xffff8000
+; GFX8-NEXT: s_addk_i32 s6, 0x8000
; GFX8-NEXT: s_max_i32 s5, s7, s5
; GFX8-NEXT: s_sext_i32_i16 s5, s5
; GFX8-NEXT: s_sext_i32_i16 s6, s6
@@ -3591,11 +3597,11 @@ define amdgpu_ps <3 x i32> @s_ssubsat_v6i16(<6 x i16> inreg %lhs, <6 x i16> inre
; GFX8-NEXT: s_sub_i32 s2, s2, s5
; GFX8-NEXT: s_sext_i32_i16 s5, s8
; GFX8-NEXT: s_max_i32 s6, s5, s13
-; GFX8-NEXT: s_sub_i32 s6, s6, 0x7fff
+; GFX8-NEXT: s_addk_i32 s6, 0x8001
; GFX8-NEXT: s_min_i32 s5, s5, s13
; GFX8-NEXT: s_sext_i32_i16 s6, s6
; GFX8-NEXT: s_sext_i32_i16 s7, s11
-; GFX8-NEXT: s_sub_i32 s5, s5, 0xffff8000
+; GFX8-NEXT: s_addk_i32 s5, 0x8000
; GFX8-NEXT: s_max_i32 s6, s6, s7
; GFX8-NEXT: s_and_b32 s3, 0xffff, s3
; GFX8-NEXT: s_sext_i32_i16 s6, s6
@@ -3648,66 +3654,66 @@ define <4 x float> @v_ssubsat_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX6-NEXT: v_lshlrev_b32_e32 v0, 16, v0
; GFX6-NEXT: v_max_i32_e32 v16, -1, v0
-; GFX6-NEXT: v_bfrev_b32_e32 v17, -2
+; GFX6-NEXT: v_mov_b32_e32 v17, 0x80000001
; GFX6-NEXT: v_lshlrev_b32_e32 v8, 16, v8
-; GFX6-NEXT: v_sub_i32_e32 v16, vcc, v16, v17
+; GFX6-NEXT: v_add_i32_e32 v16, vcc, v16, v17
; GFX6-NEXT: v_min_i32_e32 v18, -1, v0
; GFX6-NEXT: v_bfrev_b32_e32 v19, 1
-; GFX6-NEXT: v_sub_i32_e32 v18, vcc, v18, v19
+; GFX6-NEXT: v_add_i32_e32 v18, vcc, v18, v19
; GFX6-NEXT: v_max_i32_e32 v8, v16, v8
; GFX6-NEXT: v_min_i32_e32 v8, v8, v18
; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1
; GFX6-NEXT: v_sub_i32_e32 v0, vcc, v0, v8
; GFX6-NEXT: v_lshlrev_b32_e32 v8, 16, v9
; GFX6-NEXT: v_max_i32_e32 v9, -1, v1
-; GFX6-NEXT: v_sub_i32_e32 v9, vcc, v9, v17
+; GFX6-NEXT: v_add_i32_e32 v9, vcc, v9, v17
; GFX6-NEXT: v_min_i32_e32 v16, -1, v1
-; GFX6-NEXT: v_sub_i32_e32 v16, vcc, v16, v19
+; GFX6-NEXT: v_add_i32_e32 v16, vcc, v16, v19
; GFX6-NEXT: v_max_i32_e32 v8, v9, v8
; GFX6-NEXT: v_lshlrev_b32_e32 v2, 16, v2
; GFX6-NEXT: v_min_i32_e32 v8, v8, v16
; GFX6-NEXT: v_max_i32_e32 v9, -1, v2
; GFX6-NEXT: v_sub_i32_e32 v1, vcc, v1, v8
; GFX6-NEXT: v_lshlrev_b32_e32 v8, 16, v10
-; GFX6-NEXT: v_sub_i32_e32 v9, vcc, v9, v17
+; GFX6-NEXT: v_add_i32_e32 v9, vcc, v9, v17
; GFX6-NEXT: v_min_i32_e32 v10, -1, v2
-; GFX6-NEXT: v_sub_i32_e32 v10, vcc, v10, v19
+; GFX6-NEXT: v_add_i32_e32 v10, vcc, v10, v19
; GFX6-NEXT: v_max_i32_e32 v8, v9, v8
; GFX6-NEXT: v_lshlrev_b32_e32 v3, 16, v3
; GFX6-NEXT: v_min_i32_e32 v8, v8, v10
; GFX6-NEXT: v_max_i32_e32 v9, -1, v3
; GFX6-NEXT: v_sub_i32_e32 v2, vcc, v2, v8
; GFX6-NEXT: v_lshlrev_b32_e32 v8, 16, v11
-; GFX6-NEXT: v_sub_i32_e32 v9, vcc, v9, v17
+; GFX6-NEXT: v_add_i32_e32 v9, vcc, v9, v17
; GFX6-NEXT: v_min_i32_e32 v10, -1, v3
-; GFX6-NEXT: v_sub_i32_e32 v10, vcc, v10, v19
+; GFX6-NEXT: v_add_i32_e32 v10, vcc, v10, v19
; GFX6-NEXT: v_max_i32_e32 v8, v9, v8
; GFX6-NEXT: v_lshlrev_b32_e32 v4, 16, v4
; GFX6-NEXT: v_min_i32_e32 v8, v8, v10
; GFX6-NEXT: v_max_i32_e32 v9, -1, v4
; GFX6-NEXT: v_sub_i32_e32 v3, vcc, v3, v8
; GFX6-NEXT: v_lshlrev_b32_e32 v8, 16, v12
-; GFX6-NEXT: v_sub_i32_e32 v9, vcc, v9, v17
+; GFX6-NEXT: v_add_i32_e32 v9, vcc, v9, v17
; GFX6-NEXT: v_min_i32_e32 v10, -1, v4
-; GFX6-NEXT: v_sub_i32_e32 v10, vcc, v10, v19
+; GFX6-NEXT: v_add_i32_e32 v10, vcc, v10, v19
; GFX6-NEXT: v_max_i32_e32 v8, v9, v8
; GFX6-NEXT: v_lshlrev_b32_e32 v5, 16, v5
; GFX6-NEXT: v_min_i32_e32 v8, v8, v10
; GFX6-NEXT: v_max_i32_e32 v9, -1, v5
; GFX6-NEXT: v_sub_i32_e32 v4, vcc, v4, v8
; GFX6-NEXT: v_lshlrev_b32_e32 v8, 16, v13
-; GFX6-NEXT: v_sub_i32_e32 v9, vcc, v9, v17
+; GFX6-NEXT: v_add_i32_e32 v9, vcc, v9, v17
; GFX6-NEXT: v_min_i32_e32 v10, -1, v5
-; GFX6-NEXT: v_sub_i32_e32 v10, vcc, v10, v19
+; GFX6-NEXT: v_add_i32_e32 v10, vcc, v10, v19
; GFX6-NEXT: v_max_i32_e32 v8, v9, v8
; GFX6-NEXT: v_lshlrev_b32_e32 v6, 16, v6
; GFX6-NEXT: v_min_i32_e32 v8, v8, v10
; GFX6-NEXT: v_max_i32_e32 v9, -1, v6
; GFX6-NEXT: v_sub_i32_e32 v5, vcc, v5, v8
; GFX6-NEXT: v_lshlrev_b32_e32 v8, 16, v14
-; GFX6-NEXT: v_sub_i32_e32 v9, vcc, v9, v17
+; GFX6-NEXT: v_add_i32_e32 v9, vcc, v9, v17
; GFX6-NEXT: v_min_i32_e32 v10, -1, v6
-; GFX6-NEXT: v_sub_i32_e32 v10, vcc, v10, v19
+; GFX6-NEXT: v_add_i32_e32 v10, vcc, v10, v19
; GFX6-NEXT: v_max_i32_e32 v8, v9, v8
; GFX6-NEXT: v_lshlrev_b32_e32 v7, 16, v7
; GFX6-NEXT: v_min_i32_e32 v8, v8, v10
@@ -3715,10 +3721,10 @@ define <4 x float> @v_ssubsat_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
; GFX6-NEXT: v_ashrrev_i32_e32 v1, 16, v1
; GFX6-NEXT: v_sub_i32_e32 v6, vcc, v6, v8
; GFX6-NEXT: v_lshlrev_b32_e32 v8, 16, v15
-; GFX6-NEXT: v_sub_i32_e32 v9, vcc, v9, v17
+; GFX6-NEXT: v_add_i32_e32 v9, vcc, v9, v17
; GFX6-NEXT: v_min_i32_e32 v10, -1, v7
; GFX6-NEXT: v_ashrrev_i32_e32 v0, 16, v0
-; GFX6-NEXT: v_sub_i32_e32 v10, vcc, v10, v19
+; GFX6-NEXT: v_add_i32_e32 v10, vcc, v10, v19
; GFX6-NEXT: v_max_i32_e32 v8, v9, v8
; GFX6-NEXT: v_and_b32_e32 v1, 0xffff, v1
; GFX6-NEXT: v_ashrrev_i32_e32 v2, 16, v2
@@ -3750,52 +3756,52 @@ define <4 x float> @v_ssubsat_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_max_i16_e32 v8, -1, v0
-; GFX8-NEXT: v_subrev_u16_e32 v8, 0x7fff, v8
+; GFX8-NEXT: v_add_u16_e32 v8, 0x8001, v8
; GFX8-NEXT: v_min_i16_e32 v9, -1, v0
-; GFX8-NEXT: v_subrev_u16_e32 v9, 0x8000, v9
+; GFX8-NEXT: v_add_u16_e32 v9, 0x8000, v9
; GFX8-NEXT: v_max_i16_e32 v8, v8, v4
; GFX8-NEXT: v_min_i16_e32 v8, v8, v9
; GFX8-NEXT: v_mov_b32_e32 v9, -1
; GFX8-NEXT: v_max_i16_sdwa v10, v0, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
-; GFX8-NEXT: v_subrev_u16_e32 v10, 0x7fff, v10
+; GFX8-NEXT: v_add_u16_e32 v10, 0x8001, v10
; GFX8-NEXT: v_min_i16_sdwa v11, v0, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
-; GFX8-NEXT: v_subrev_u16_e32 v11, 0x8000, v11
+; GFX8-NEXT: v_add_u16_e32 v11, 0x8000, v11
; GFX8-NEXT: v_max_i16_sdwa v4, v10, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
; GFX8-NEXT: v_max_i16_e32 v10, -1, v1
; GFX8-NEXT: v_min_i16_e32 v4, v4, v11
-; GFX8-NEXT: v_subrev_u16_e32 v10, 0x7fff, v10
+; GFX8-NEXT: v_add_u16_e32 v10, 0x8001, v10
; GFX8-NEXT: v_min_i16_e32 v11, -1, v1
-; GFX8-NEXT: v_subrev_u16_e32 v11, 0x8000, v11
+; GFX8-NEXT: v_add_u16_e32 v11, 0x8000, v11
; GFX8-NEXT: v_max_i16_e32 v10, v10, v5
; GFX8-NEXT: v_min_i16_e32 v10, v10, v11
; GFX8-NEXT: v_max_i16_sdwa v11, v1, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
-; GFX8-NEXT: v_subrev_u16_e32 v11, 0x7fff, v11
+; GFX8-NEXT: v_add_u16_e32 v11, 0x8001, v11
; GFX8-NEXT: v_min_i16_sdwa v12, v1, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
-; GFX8-NEXT: v_subrev_u16_e32 v12, 0x8000, v12
+; GFX8-NEXT: v_add_u16_e32 v12, 0x8000, v12
; GFX8-NEXT: v_max_i16_sdwa v5, v11, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
; GFX8-NEXT: v_max_i16_e32 v11, -1, v2
; GFX8-NEXT: v_min_i16_e32 v5, v5, v12
-; GFX8-NEXT: v_subrev_u16_e32 v11, 0x7fff, v11
+; GFX8-NEXT: v_add_u16_e32 v11, 0x8001, v11
; GFX8-NEXT: v_min_i16_e32 v12, -1, v2
-; GFX8-NEXT: v_subrev_u16_e32 v12, 0x8000, v12
+; GFX8-NEXT: v_add_u16_e32 v12, 0x8000, v12
; GFX8-NEXT: v_max_i16_e32 v11, v11, v6
; GFX8-NEXT: v_min_i16_e32 v11, v11, v12
; GFX8-NEXT: v_max_i16_sdwa v12, v2, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
-; GFX8-NEXT: v_subrev_u16_e32 v12, 0x7fff, v12
+; GFX8-NEXT: v_add_u16_e32 v12, 0x8001, v12
; GFX8-NEXT: v_min_i16_sdwa v13, v2, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
-; GFX8-NEXT: v_subrev_u16_e32 v13, 0x8000, v13
+; GFX8-NEXT: v_add_u16_e32 v13, 0x8000, v13
; GFX8-NEXT: v_max_i16_sdwa v6, v12, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
; GFX8-NEXT: v_max_i16_e32 v12, -1, v3
; GFX8-NEXT: v_min_i16_e32 v6, v6, v13
-; GFX8-NEXT: v_subrev_u16_e32 v12, 0x7fff, v12
+; GFX8-NEXT: v_add_u16_e32 v12, 0x8001, v12
; GFX8-NEXT: v_min_i16_e32 v13, -1, v3
-; GFX8-NEXT: v_subrev_u16_e32 v13, 0x8000, v13
+; GFX8-NEXT: v_add_u16_e32 v13, 0x8000, v13
; GFX8-NEXT: v_max_i16_e32 v12, v12, v7
; GFX8-NEXT: v_min_i16_e32 v12, v12, v13
; GFX8-NEXT: v_max_i16_sdwa v13, v3, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
-; GFX8-NEXT: v_subrev_u16_e32 v13, 0x7fff, v13
+; GFX8-NEXT: v_add_u16_e32 v13, 0x8001, v13
; GFX8-NEXT: v_min_i16_sdwa v9, v3, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
-; GFX8-NEXT: v_subrev_u16_e32 v9, 0x8000, v9
+; GFX8-NEXT: v_add_u16_e32 v9, 0x8000, v9
; GFX8-NEXT: v_max_i16_sdwa v7, v13, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
; GFX8-NEXT: v_sub_u16_e32 v8, v0, v8
; GFX8-NEXT: v_sub_u16_sdwa v0, v0, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
@@ -3840,63 +3846,63 @@ define amdgpu_ps <4 x i32> @s_ssubsat_v8i16(<8 x i16> inreg %lhs, <8 x i16> inre
; GFX6-NEXT: s_lshl_b32 s0, s0, 16
; GFX6-NEXT: s_max_i32 s16, s0, -1
; GFX6-NEXT: s_lshl_b32 s8, s8, 16
-; GFX6-NEXT: s_sub_i32 s16, s16, 0x7fffffff
+; GFX6-NEXT: s_add_i32 s16, s16, 0x80000001
; GFX6-NEXT: s_min_i32 s17, s0, -1
-; GFX6-NEXT: s_sub_i32 s17, s17, 0x80000000
+; GFX6-NEXT: s_add_i32 s17, s17, 0x80000000
; GFX6-NEXT: s_max_i32 s8, s16, s8
; GFX6-NEXT: s_min_i32 s8, s8, s17
; GFX6-NEXT: s_lshl_b32 s1, s1, 16
; GFX6-NEXT: s_sub_i32 s0, s0, s8
; GFX6-NEXT: s_lshl_b32 s8, s9, 16
; GFX6-NEXT: s_max_i32 s9, s1, -1
-; GFX6-NEXT: s_sub_i32 s9, s9, 0x7fffffff
+; GFX6-NEXT: s_add_i32 s9, s9, 0x80000001
; GFX6-NEXT: s_min_i32 s16, s1, -1
-; GFX6-NEXT: s_sub_i32 s16, s16, 0x80000000
+; GFX6-NEXT: s_add_i32 s16, s16, 0x80000000
; GFX6-NEXT: s_max_i32 s8, s9, s8
; GFX6-NEXT: s_lshl_b32 s2, s2, 16
; GFX6-NEXT: s_min_i32 s8, s8, s16
; GFX6-NEXT: s_max_i32 s9, s2, -1
; GFX6-NEXT: s_sub_i32 s1, s1, s8
; GFX6-NEXT: s_lshl_b32 s8, s10, 16
-; GFX6-NEXT: s_sub_i32 s9, s9, 0x7fffffff
+; GFX6-NEXT: s_add_i32 s9, s9, 0x80000001
; GFX6-NEXT: s_min_i32 s10, s2, -1
-; GFX6-NEXT: s_sub_i32 s10, s10, 0x80000000
+; GFX6-NEXT: s_add_i32 s10, s10, 0x80000000
; GFX6-NEXT: s_max_i32 s8, s9, s8
; GFX6-NEXT: s_lshl_b32 s3, s3, 16
; GFX6-NEXT: s_min_i32 s8, s8, s10
; GFX6-NEXT: s_max_i32 s9, s3, -1
; GFX6-NEXT: s_sub_i32 s2, s2, s8
; GFX6-NEXT: s_lshl_b32 s8, s11, 16
-; GFX6-NEXT: s_sub_i32 s9, s9, 0x7fffffff
+; GFX6-NEXT: s_add_i32 s9, s9, 0x80000001
; GFX6-NEXT: s_min_i32 s10, s3, -1
-; GFX6-NEXT: s_sub_i32 s10, s10, 0x80000000
+; GFX6-NEXT: s_add_i32 s10, s10, 0x80000000
; GFX6-NEXT: s_max_i32 s8, s9, s8
; GFX6-NEXT: s_lshl_b32 s4, s4, 16
; GFX6-NEXT: s_min_i32 s8, s8, s10
; GFX6-NEXT: s_max_i32 s9, s4, -1
; GFX6-NEXT: s_sub_i32 s3, s3, s8
; GFX6-NEXT: s_lshl_b32 s8, s12, 16
-; GFX6-NEXT: s_sub_i32 s9, s9, 0x7fffffff
+; GFX6-NEXT: s_add_i32 s9, s9, 0x80000001
; GFX6-NEXT: s_min_i32 s10, s4, -1
-; GFX6-NEXT: s_sub_i32 s10, s10, 0x80000000
+; GFX6-NEXT: s_add_i32 s10, s10, 0x80000000
; GFX6-NEXT: s_max_i32 s8, s9, s8
; GFX6-NEXT: s_lshl_b32 s5, s5, 16
; GFX6-NEXT: s_min_i32 s8, s8, s10
; GFX6-NEXT: s_max_i32 s9, s5, -1
; GFX6-NEXT: s_sub_i32 s4, s4, s8
; GFX6-NEXT: s_lshl_b32 s8, s13, 16
-; GFX6-NEXT: s_sub_i32 s9, s9, 0x7fffffff
+; GFX6-NEXT: s_add_i32 s9, s9, 0x80000001
; GFX6-NEXT: s_min_i32 s10, s5, -1
-; GFX6-NEXT: s_sub_i32 s10, s10, 0x80000000
+; GFX6-NEXT: s_add_i32 s10, s10, 0x80000000
; GFX6-NEXT: s_max_i32 s8, s9, s8
; GFX6-NEXT: s_lshl_b32 s6, s6, 16
; GFX6-NEXT: s_min_i32 s8, s8, s10
; GFX6-NEXT: s_max_i32 s9, s6, -1
; GFX6-NEXT: s_sub_i32 s5, s5, s8
; GFX6-NEXT: s_lshl_b32 s8, s14, 16
-; GFX6-NEXT: s_sub_i32 s9, s9, 0x7fffffff
+; GFX6-NEXT: s_add_i32 s9, s9, 0x80000001
; GFX6-NEXT: s_min_i32 s10, s6, -1
-; GFX6-NEXT: s_sub_i32 s10, s10, 0x80000000
+; GFX6-NEXT: s_add_i32 s10, s10, 0x80000000
; GFX6-NEXT: s_max_i32 s8, s9, s8
; GFX6-NEXT: s_lshl_b32 s7, s7, 16
; GFX6-NEXT: s_min_i32 s8, s8, s10
@@ -3904,10 +3910,10 @@ define amdgpu_ps <4 x i32> @s_ssubsat_v8i16(<8 x i16> inreg %lhs, <8 x i16> inre
; GFX6-NEXT: s_ashr_i32 s1, s1, 16
; GFX6-NEXT: s_sub_i32 s6, s6, s8
; GFX6-NEXT: s_lshl_b32 s8, s15, 16
-; GFX6-NEXT: s_sub_i32 s9, s9, 0x7fffffff
+; GFX6-NEXT: s_add_i32 s9, s9, 0x80000001
; GFX6-NEXT: s_min_i32 s10, s7, -1
; GFX6-NEXT: s_ashr_i32 s0, s0, 16
-; GFX6-NEXT: s_sub_i32 s10, s10, 0x80000000
+; GFX6-NEXT: s_add_i32 s10, s10, 0x80000000
; GFX6-NEXT: s_max_i32 s8, s9, s8
; GFX6-NEXT: s_and_b32 s1, s1, 0xffff
; GFX6-NEXT: s_ashr_i32 s2, s2, 16
@@ -3940,12 +3946,12 @@ define amdgpu_ps <4 x i32> @s_ssubsat_v8i16(<8 x i16> inreg %lhs, <8 x i16> inre
; GFX8-NEXT: s_sext_i32_i16 s16, s0
; GFX8-NEXT: s_sext_i32_i16 s17, -1
; GFX8-NEXT: s_max_i32 s18, s16, s17
-; GFX8-NEXT: s_sub_i32 s18, s18, 0x7fff
+; GFX8-NEXT: s_addk_i32 s18, 0x8001
; GFX8-NEXT: s_lshr_b32 s12, s4, 16
; GFX8-NEXT: s_min_i32 s16, s16, s17
; GFX8-NEXT: s_sext_i32_i16 s18, s18
; GFX8-NEXT: s_sext_i32_i16 s4, s4
-; GFX8-NEXT: s_sub_i32 s16, s16, 0xffff8000
+; GFX8-NEXT: s_addk_i32 s16, 0x8000
; GFX8-NEXT: s_max_i32 s4, s18, s4
; GFX8-NEXT: s_sext_i32_i16 s4, s4
; GFX8-NEXT: s_sext_i32_i16 s16, s16
@@ -3954,11 +3960,11 @@ define amdgpu_ps <4 x i32> @s_ssubsat_v8i16(<8 x i16> inreg %lhs, <8 x i16> inre
; GFX8-NEXT: s_sub_i32 s0, s0, s4
; GFX8-NEXT: s_sext_i32_i16 s4, s8
; GFX8-NEXT: s_max_i32 s16, s4, s17
-; GFX8-NEXT: s_sub_i32 s16, s16, 0x7fff
+; GFX8-NEXT: s_addk_i32 s16, 0x8001
; GFX8-NEXT: s_min_i32 s4, s4, s17
; GFX8-NEXT: s_sext_i32_i16 s16, s16
; GFX8-NEXT: s_sext_i32_i16 s12, s12
-; GFX8-NEXT: s_sub_i32 s4, s4, 0xffff8000
+; GFX8-NEXT: s_addk_i32 s4, 0x8000
; GFX8-NEXT: s_max_i32 s12, s16, s12
; GFX8-NEXT: s_sext_i32_i16 s12, s12
; GFX8-NEXT: s_sext_i32_i16 s4, s4
@@ -3966,12 +3972,12 @@ define amdgpu_ps <4 x i32> @s_ssubsat_v8i16(<8 x i16> inreg %lhs, <8 x i16> inre
; GFX8-NEXT: s_sub_i32 s4, s8, s4
; GFX8-NEXT: s_sext_i32_i16 s8, s1
; GFX8-NEXT: s_max_i32 s12, s8, s17
-; GFX8-NEXT: s_sub_i32 s12, s12, 0x7fff
+; GFX8-NEXT: s_addk_i32 s12, 0x8001
; GFX8-NEXT: s_lshr_b32 s13, s5, 16
; GFX8-NEXT: s_min_i32 s8, s8, s17
; GFX8-NEXT: s_sext_i32_i16 s12, s12
; GFX8-NEXT: s_sext_i32_i16 s5, s5
-; GFX8-NEXT: s_sub_i32 s8, s8, 0xffff8000
+; GFX8-NEXT: s_addk_i32 s8, 0x8000
; GFX8-NEXT: s_max_i32 s5, s12, s5
; GFX8-NEXT: s_sext_i32_i16 s5, s5
; GFX8-NEXT: s_sext_i32_i16 s8, s8
@@ -3980,11 +3986,11 @@ define amdgpu_ps <4 x i32> @s_ssubsat_v8i16(<8 x i16> inreg %lhs, <8 x i16> inre
; GFX8-NEXT: s_sub_i32 s1, s1, s5
; GFX8-NEXT: s_sext_i32_i16 s5, s9
; GFX8-NEXT: s_max_i32 s8, s5, s17
-; GFX8-NEXT: s_sub_i32 s8, s8, 0x7fff
+; GFX8-NEXT: s_addk_i32 s8, 0x8001
; GFX8-NEXT: s_min_i32 s5, s5, s17
; GFX8-NEXT: s_sext_i32_i16 s8, s8
; GFX8-NEXT: s_sext_i32_i16 s12, s13
-; GFX8-NEXT: s_sub_i32 s5, s5, 0xffff8000
+; GFX8-NEXT: s_addk_i32 s5, 0x8000
; GFX8-NEXT: s_max_i32 s8, s8, s12
; GFX8-NEXT: s_sext_i32_i16 s8, s8
; GFX8-NEXT: s_sext_i32_i16 s5, s5
@@ -3992,12 +3998,12 @@ define amdgpu_ps <4 x i32> @s_ssubsat_v8i16(<8 x i16> inreg %lhs, <8 x i16> inre
; GFX8-NEXT: s_sext_i32_i16 s8, s2
; GFX8-NEXT: s_sub_i32 s5, s9, s5
; GFX8-NEXT: s_max_i32 s9, s8, s17
-; GFX8-NEXT: s_sub_i32 s9, s9, 0x7fff
+; GFX8-NEXT: s_addk_i32 s9, 0x8001
; GFX8-NEXT: s_lshr_b32 s14, s6, 16
; GFX8-NEXT: s_min_i32 s8, s8, s17
; GFX8-NEXT: s_sext_i32_i16 s9, s9
; GFX8-NEXT: s_sext_i32_i16 s6, s6
-; GFX8-NEXT: s_sub_i32 s8, s8, 0xffff8000
+; GFX8-NEXT: s_addk_i32 s8, 0x8000
; GFX8-NEXT: s_max_i32 s6, s9, s6
; GFX8-NEXT: s_sext_i32_i16 s6, s6
; GFX8-NEXT: s_sext_i32_i16 s8, s8
@@ -4006,23 +4012,23 @@ define amdgpu_ps <4 x i32> @s_ssubsat_v8i16(<8 x i16> inreg %lhs, <8 x i16> inre
; GFX8-NEXT: s_sub_i32 s2, s2, s6
; GFX8-NEXT: s_sext_i32_i16 s6, s10
; GFX8-NEXT: s_max_i32 s8, s6, s17
-; GFX8-NEXT: s_sub_i32 s8, s8, 0x7fff
+; GFX8-NEXT: s_addk_i32 s8, 0x8001
; GFX8-NEXT: s_min_i32 s6, s6, s17
; GFX8-NEXT: s_sext_i32_i16 s8, s8
; GFX8-NEXT: s_sext_i32_i16 s9, s14
-; GFX8-NEXT: s_sub_i32 s6, s6, 0xffff8000
+; GFX8-NEXT: s_addk_i32 s6, 0x8000
; GFX8-NEXT: s_max_i32 s8, s8, s9
; GFX8-NEXT: s_sext_i32_i16 s8, s8
; GFX8-NEXT: s_sext_i32_i16 s6, s6
; GFX8-NEXT: s_min_i32 s6, s8, s6
; GFX8-NEXT: s_sext_i32_i16 s8, s3
; GFX8-NEXT: s_max_i32 s9, s8, s17
-; GFX8-NEXT: s_sub_i32 s9, s9, 0x7fff
+; GFX8-NEXT: s_addk_i32 s9, 0x8001
; GFX8-NEXT: s_lshr_b32 s15, s7, 16
; GFX8-NEXT: s_min_i32 s8, s8, s17
; GFX8-NEXT: s_sext_i32_i16 s9, s9
; GFX8-NEXT: s_sext_i32_i16 s7, s7
-; GFX8-NEXT: s_sub_i32 s8, s8, 0xffff8000
+; GFX8-NEXT: s_addk_i32 s8, 0x8000
; GFX8-NEXT: s_max_i32 s7, s9, s7
; GFX8-NEXT: s_sext_i32_i16 s7, s7
; GFX8-NEXT: s_sext_i32_i16 s8, s8
@@ -4031,14 +4037,14 @@ define amdgpu_ps <4 x i32> @s_ssubsat_v8i16(<8 x i16> inreg %lhs, <8 x i16> inre
; GFX8-NEXT: s_sub_i32 s3, s3, s7
; GFX8-NEXT: s_sext_i32_i16 s7, s11
; GFX8-NEXT: s_max_i32 s8, s7, s17
-; GFX8-NEXT: s_sub_i32 s8, s8, 0x7fff
+; GFX8-NEXT: s_addk_i32 s8, 0x8001
; GFX8-NEXT: s_and_b32 s4, 0xffff, s4
; GFX8-NEXT: s_min_i32 s7, s7, s17
; GFX8-NEXT: s_sext_i32_i16 s8, s8
; GFX8-NEXT: s_sext_i32_i16 s9, s15
; GFX8-NEXT: s_and_b32 s0, 0xffff, s0
; GFX8-NEXT: s_lshl_b32 s4, s4, 16
-; GFX8-NEXT: s_sub_i32 s7, s7, 0xffff8000
+; GFX8-NEXT: s_addk_i32 s7, 0x8000
; GFX8-NEXT: s_max_i32 s8, s8, s9
; GFX8-NEXT: s_or_b32 s0, s0, s4
; GFX8-NEXT: s_and_b32 s4, 0xffff, s5
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/sub.v2i16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/sub.v2i16.ll
index 855687281ce9ab..6c104709f5ee3a 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/sub.v2i16.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/sub.v2i16.ll
@@ -147,10 +147,10 @@ define <2 x i16> @v_sub_v2i16_neg_inline_imm_splat(<2 x i16> %a) {
; GFX8-LABEL: v_sub_v2i16_neg_inline_imm_splat:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_not_b32_e32 v1, 63
-; GFX8-NEXT: v_subrev_u16_e32 v2, 0xffc0, v0
-; GFX8-NEXT: v_sub_u16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
-; GFX8-NEXT: v_or_b32_e32 v0, v2, v0
+; GFX8-NEXT: v_mov_b32_e32 v2, 64
+; GFX8-NEXT: v_add_u16_e32 v1, 64, v0
+; GFX8-NEXT: v_add_u16_sdwa v0, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX8-NEXT: v_or_b32_e32 v0, v1, v0
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_sub_v2i16_neg_inline_imm_splat:
@@ -179,9 +179,9 @@ define <2 x i16> @v_sub_v2i16_neg_inline_imm_lo(<2 x i16> %a) {
; GFX8-LABEL: v_sub_v2i16_neg_inline_imm_lo:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_mov_b32_e32 v2, 4
-; GFX8-NEXT: v_subrev_u16_e32 v1, 0xffc0, v0
-; GFX8-NEXT: v_sub_u16_sdwa v0, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX8-NEXT: v_mov_b32_e32 v2, -4
+; GFX8-NEXT: v_add_u16_e32 v1, 64, v0
+; GFX8-NEXT: v_add_u16_sdwa v0, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; GFX8-NEXT: v_or_b32_e32 v0, v1, v0
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
@@ -211,10 +211,10 @@ define <2 x i16> @v_sub_v2i16_neg_inline_imm_hi(<2 x i16> %a) {
; GFX8-LABEL: v_sub_v2i16_neg_inline_imm_hi:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_not_b32_e32 v1, 63
-; GFX8-NEXT: v_subrev_u16_e32 v2, 4, v0
-; GFX8-NEXT: v_sub_u16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
-; GFX8-NEXT: v_or_b32_e32 v0, v2, v0
+; GFX8-NEXT: v_mov_b32_e32 v2, 64
+; GFX8-NEXT: v_add_u16_e32 v1, -4, v0
+; GFX8-NEXT: v_add_u16_sdwa v0, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX8-NEXT: v_or_b32_e32 v0, v1, v0
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_sub_v2i16_neg_inline_imm_hi:
@@ -245,8 +245,8 @@ define amdgpu_ps i32 @s_sub_v2i16_neg_inline_imm_splat(<2 x i16> inreg %a) {
; GFX8: ; %bb.0:
; GFX8-NEXT: s_lshr_b32 s1, s0, 16
; GFX8-NEXT: s_and_b32 s0, s0, 0xffff
-; GFX8-NEXT: s_sub_i32 s0, s0, 0xffc0
-; GFX8-NEXT: s_sub_i32 s1, s1, 0xffc0
+; GFX8-NEXT: s_add_i32 s0, s0, 0xffff0040
+; GFX8-NEXT: s_add_i32 s1, s1, 0xffff0040
; GFX8-NEXT: s_lshl_b32 s1, s1, 16
; GFX8-NEXT: s_and_b32 s0, s0, 0xffff
; GFX8-NEXT: s_or_b32 s0, s1, s0
@@ -285,8 +285,8 @@ define amdgpu_ps i32 @s_sub_v2i16_neg_inline_imm_lo(<2 x i16> inreg %a) {
; GFX8: ; %bb.0:
; GFX8-NEXT: s_lshr_b32 s1, s0, 16
; GFX8-NEXT: s_and_b32 s0, s0, 0xffff
-; GFX8-NEXT: s_sub_i32 s0, s0, 0xffc0
-; GFX8-NEXT: s_sub_i32 s1, s1, 4
+; GFX8-NEXT: s_add_i32 s0, s0, 0xffff0040
+; GFX8-NEXT: s_add_i32 s1, s1, -4
; GFX8-NEXT: s_lshl_b32 s1, s1, 16
; GFX8-NEXT: s_and_b32 s0, s0, 0xffff
; GFX8-NEXT: s_or_b32 s0, s1, s0
@@ -325,8 +325,8 @@ define amdgpu_ps i32 @s_sub_v2i16_neg_inline_imm_hi(<2 x i16> inreg %a) {
; GFX8: ; %bb.0:
; GFX8-NEXT: s_lshr_b32 s1, s0, 16
; GFX8-NEXT: s_and_b32 s0, s0, 0xffff
-; GFX8-NEXT: s_sub_i32 s0, s0, 4
-; GFX8-NEXT: s_sub_i32 s1, s1, 0xffc0
+; GFX8-NEXT: s_add_i32 s0, s0, -4
+; GFX8-NEXT: s_add_i32 s1, s1, 0xffff0040
; GFX8-NEXT: s_lshl_b32 s1, s1, 16
; GFX8-NEXT: s_and_b32 s0, s0, 0xffff
; GFX8-NEXT: s_or_b32 s0, s1, s0
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i32.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i32.ll
index 31f61b9968b8bf..24ec4fa48f7789 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i32.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i32.ll
@@ -222,10 +222,10 @@ define i32 @v_urem_i32_oddk_denom(i32 %num) {
; CHECK-NEXT: v_mul_hi_u32 v1, v0, v1
; CHECK-NEXT: v_mul_lo_u32 v1, v1, v3
; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v1
-; CHECK-NEXT: v_subrev_i32_e32 v1, vcc, 0x12d8fb, v0
+; CHECK-NEXT: v_add_i32_e32 v1, vcc, 0xffed2705, v0
; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v3
; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
-; CHECK-NEXT: v_subrev_i32_e32 v1, vcc, 0x12d8fb, v0
+; CHECK-NEXT: v_add_i32_e32 v1, vcc, 0xffed2705, v0
; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v3
; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
; CHECK-NEXT: s_setpc_b64 s[30:31]
@@ -243,23 +243,23 @@ define <2 x i32> @v_urem_v2i32_oddk_denom(<2 x i32> %num) {
; GISEL-NEXT: v_rcp_iflag_f32_e32 v3, v3
; GISEL-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3
; GISEL-NEXT: v_cvt_u32_f32_e32 v3, v3
-; GISEL-NEXT: v_mul_lo_u32 v4, v3, v4
-; GISEL-NEXT: v_mul_hi_u32 v4, v3, v4
-; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v4
-; GISEL-NEXT: v_mul_hi_u32 v4, v0, v3
+; GISEL-NEXT: v_mul_lo_u32 v5, v3, v4
+; GISEL-NEXT: v_mul_hi_u32 v5, v3, v5
+; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v5
+; GISEL-NEXT: v_mul_hi_u32 v5, v0, v3
; GISEL-NEXT: v_mul_hi_u32 v3, v1, v3
-; GISEL-NEXT: v_mul_lo_u32 v4, v4, v2
+; GISEL-NEXT: v_mul_lo_u32 v5, v5, v2
; GISEL-NEXT: v_mul_lo_u32 v3, v3, v2
-; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v4
+; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v5
; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v3
-; GISEL-NEXT: v_sub_i32_e32 v3, vcc, v0, v2
-; GISEL-NEXT: v_subrev_i32_e32 v4, vcc, 0x12d8fb, v1
+; GISEL-NEXT: v_add_i32_e32 v3, vcc, v0, v4
+; GISEL-NEXT: v_add_i32_e32 v5, vcc, 0xffed2705, v1
; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2
; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v1, v2
-; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc
-; GISEL-NEXT: v_sub_i32_e32 v3, vcc, v0, v2
-; GISEL-NEXT: v_subrev_i32_e32 v4, vcc, 0x12d8fb, v1
+; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc
+; GISEL-NEXT: v_add_i32_e32 v3, vcc, v0, v4
+; GISEL-NEXT: v_add_i32_e32 v4, vcc, 0xffed2705, v1
; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2
; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v1, v2
@@ -274,23 +274,23 @@ define <2 x i32> @v_urem_v2i32_oddk_denom(<2 x i32> %num) {
; CGP-NEXT: v_mov_b32_e32 v4, 0x12d8fb
; CGP-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2
; CGP-NEXT: v_cvt_u32_f32_e32 v2, v2
-; CGP-NEXT: v_mul_lo_u32 v3, v2, v3
-; CGP-NEXT: v_mul_hi_u32 v3, v2, v3
-; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v3
-; CGP-NEXT: v_mul_hi_u32 v3, v0, v2
+; CGP-NEXT: v_mul_lo_u32 v5, v2, v3
+; CGP-NEXT: v_mul_hi_u32 v5, v2, v5
+; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v5
+; CGP-NEXT: v_mul_hi_u32 v5, v0, v2
; CGP-NEXT: v_mul_hi_u32 v2, v1, v2
-; CGP-NEXT: v_mul_lo_u32 v3, v3, v4
+; CGP-NEXT: v_mul_lo_u32 v5, v5, v4
; CGP-NEXT: v_mul_lo_u32 v2, v2, v4
-; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v3
+; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v5
; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v2
-; CGP-NEXT: v_subrev_i32_e32 v2, vcc, 0x12d8fb, v0
-; CGP-NEXT: v_subrev_i32_e32 v3, vcc, 0x12d8fb, v1
+; CGP-NEXT: v_add_i32_e32 v2, vcc, v0, v3
+; CGP-NEXT: v_add_i32_e32 v5, vcc, 0xffed2705, v1
; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v4
; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v4
-; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
-; CGP-NEXT: v_sub_i32_e32 v2, vcc, v0, v4
-; CGP-NEXT: v_subrev_i32_e32 v3, vcc, 0x12d8fb, v1
+; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc
+; CGP-NEXT: v_add_i32_e32 v2, vcc, v0, v3
+; CGP-NEXT: v_add_i32_e32 v3, vcc, 0xffed2705, v1
; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v4
; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v4
diff --git a/llvm/test/CodeGen/AMDGPU/ctlz.ll b/llvm/test/CodeGen/AMDGPU/ctlz.ll
index e1c9fed9df4892..3019d4d298eb45 100644
--- a/llvm/test/CodeGen/AMDGPU/ctlz.ll
+++ b/llvm/test/CodeGen/AMDGPU/ctlz.ll
@@ -567,7 +567,7 @@ define amdgpu_kernel void @v_ctlz_i8(ptr addrspace(1) noalias %out, ptr addrspac
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX10-GISEL-NEXT: v_ffbh_u32_e32 v1, v1
; GFX10-GISEL-NEXT: v_min_u32_e32 v1, 32, v1
-; GFX10-GISEL-NEXT: v_subrev_nc_u32_e32 v1, 24, v1
+; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v1, 0xffffffe8, v1
; GFX10-GISEL-NEXT: global_store_byte v0, v1, s[0:1]
; GFX10-GISEL-NEXT: s_endpgm
;
@@ -1566,7 +1566,7 @@ define amdgpu_kernel void @v_ctlz_i32_sel_ne_bitwidth(ptr addrspace(1) noalias %
; GFX10-GISEL-NEXT: v_ffbh_u32_e32 v1, v0
; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX10-GISEL-NEXT: v_min_u32_e32 v1, 32, v1
-; GFX10-GISEL-NEXT: v_sub_nc_u16 v1, v1, 24
+; GFX10-GISEL-NEXT: v_add_nc_u16 v1, v1, 0xffe8
; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0xffff, vcc_lo
; GFX10-GISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX10-GISEL-NEXT: global_store_byte v1, v0, s[0:1]
@@ -1686,7 +1686,7 @@ define amdgpu_kernel void @v_ctlz_i32_sel_ne_bitwidth(ptr addrspace(1) noalias %
; GFX10-GISEL-NEXT: v_ffbh_u32_e32 v2, v1
; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
; GFX10-GISEL-NEXT: v_min_u32_e32 v2, 32, v2
-; GFX10-GISEL-NEXT: v_subrev_nc_u32_e32 v2, 16, v2
+; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v2, -16, v2
; GFX10-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v1, v2, 0xffff, vcc_lo
; GFX10-GISEL-NEXT: global_store_short v0, v1, s[0:1]
@@ -1807,7 +1807,7 @@ define amdgpu_kernel void @v_ctlz_i7_sel_eq_neg1(ptr addrspace(1) noalias %out,
; GFX10-GISEL-NEXT: v_ffbh_u32_e32 v1, v0
; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX10-GISEL-NEXT: v_min_u32_e32 v1, 32, v1
-; GFX10-GISEL-NEXT: v_sub_nc_u16 v1, v1, 25
+; GFX10-GISEL-NEXT: v_add_nc_u16 v1, v1, 0xffe7
; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0x7f, vcc_lo
; GFX10-GISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX10-GISEL-NEXT: v_and_b32_e32 v0, 0x7f, v0
diff --git a/llvm/test/CodeGen/AMDGPU/div_i128.ll b/llvm/test/CodeGen/AMDGPU/div_i128.ll
index d94ec56842ab87..147ddc4d4b75b2 100644
--- a/llvm/test/CodeGen/AMDGPU/div_i128.ll
+++ b/llvm/test/CodeGen/AMDGPU/div_i128.ll
@@ -1313,7 +1313,7 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-G-NEXT: v_sub_u32_e32 v0, 64, v8
; GFX9-G-NEXT: v_lshrrev_b64 v[0:1], v0, v[10:11]
; GFX9-G-NEXT: v_lshlrev_b64 v[2:3], v8, v[12:13]
-; GFX9-G-NEXT: v_subrev_u32_e32 v9, 64, v8
+; GFX9-G-NEXT: v_add_u32_e32 v9, 0xffffffc0, v8
; GFX9-G-NEXT: v_lshlrev_b64 v[6:7], v8, v[10:11]
; GFX9-G-NEXT: v_or_b32_e32 v2, v0, v2
; GFX9-G-NEXT: v_or_b32_e32 v3, v1, v3
@@ -1338,7 +1338,7 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-G-NEXT: v_sub_u32_e32 v2, 64, v20
; GFX9-G-NEXT: v_lshrrev_b64 v[0:1], v20, v[10:11]
; GFX9-G-NEXT: v_lshlrev_b64 v[2:3], v2, v[12:13]
-; GFX9-G-NEXT: v_subrev_u32_e32 v24, 64, v20
+; GFX9-G-NEXT: v_add_u32_e32 v24, 0xffffffc0, v20
; GFX9-G-NEXT: v_lshrrev_b64 v[14:15], v20, v[12:13]
; GFX9-G-NEXT: v_or_b32_e32 v2, v0, v2
; GFX9-G-NEXT: v_or_b32_e32 v3, v1, v3
@@ -2070,8 +2070,9 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-G-O0-NEXT: ; kill: def $vgpr20 killed $vgpr20 def $vgpr20_vgpr21 killed $exec
; GFX9-G-O0-NEXT: s_waitcnt vmcnt(1)
; GFX9-G-O0-NEXT: v_mov_b32_e32 v21, v4
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v4, s4
-; GFX9-G-O0-NEXT: v_sub_u32_e64 v4, v18, v4
+; GFX9-G-O0-NEXT: s_mov_b32 s5, 0xffffffc0
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v4, s5
+; GFX9-G-O0-NEXT: v_add_u32_e64 v4, v18, v4
; GFX9-G-O0-NEXT: v_mov_b32_e32 v5, s4
; GFX9-G-O0-NEXT: v_sub_u32_e64 v5, v5, v18
; GFX9-G-O0-NEXT: s_mov_b32 s6, 0
@@ -2203,8 +2204,9 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-G-O0-NEXT: v_mov_b32_e32 v13, v0
; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, v9
; GFX9-G-O0-NEXT: v_mov_b32_e32 v11, v3
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, s7
-; GFX9-G-O0-NEXT: v_sub_u32_e64 v2, v8, v0
+; GFX9-G-O0-NEXT: s_mov_b32 s6, 0xffffffc0
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, s6
+; GFX9-G-O0-NEXT: v_add_u32_e64 v2, v8, v0
; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, s7
; GFX9-G-O0-NEXT: v_sub_u32_e64 v14, v0, v8
; GFX9-G-O0-NEXT: s_mov_b32 s6, 0
@@ -3453,7 +3455,7 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-G-NEXT: v_sub_u32_e32 v8, 64, v16
; GFX9-G-NEXT: v_lshrrev_b64 v[8:9], v8, v[0:1]
; GFX9-G-NEXT: v_lshlrev_b64 v[10:11], v16, v[2:3]
-; GFX9-G-NEXT: v_subrev_u32_e32 v14, 64, v16
+; GFX9-G-NEXT: v_add_u32_e32 v14, 0xffffffc0, v16
; GFX9-G-NEXT: v_lshlrev_b64 v[12:13], v16, v[0:1]
; GFX9-G-NEXT: v_or_b32_e32 v10, v8, v10
; GFX9-G-NEXT: v_or_b32_e32 v11, v9, v11
@@ -3476,7 +3478,7 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-G-NEXT: s_cbranch_execz .LBB1_5
; GFX9-G-NEXT: ; %bb.2: ; %udiv-preheader
; GFX9-G-NEXT: v_sub_u32_e32 v12, 64, v18
-; GFX9-G-NEXT: v_subrev_u32_e32 v22, 64, v18
+; GFX9-G-NEXT: v_add_u32_e32 v22, 0xffffffc0, v18
; GFX9-G-NEXT: v_lshrrev_b64 v[10:11], v18, v[0:1]
; GFX9-G-NEXT: v_lshlrev_b64 v[12:13], v12, v[2:3]
; GFX9-G-NEXT: v_lshrrev_b64 v[16:17], v18, v[2:3]
@@ -4175,8 +4177,9 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-G-O0-NEXT: s_waitcnt vmcnt(0)
; GFX9-G-O0-NEXT: v_mov_b32_e32 v21, v7
; GFX9-G-O0-NEXT: v_mov_b32_e32 v20, v6
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v4, s4
-; GFX9-G-O0-NEXT: v_sub_u32_e64 v4, v12, v4
+; GFX9-G-O0-NEXT: s_mov_b32 s5, 0xffffffc0
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v4, s5
+; GFX9-G-O0-NEXT: v_add_u32_e64 v4, v12, v4
; GFX9-G-O0-NEXT: v_mov_b32_e32 v5, s4
; GFX9-G-O0-NEXT: v_sub_u32_e64 v5, v5, v12
; GFX9-G-O0-NEXT: s_mov_b32 s6, 0
@@ -4311,8 +4314,9 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-G-O0-NEXT: s_mov_b32 s7, 64
; GFX9-G-O0-NEXT: v_mov_b32_e32 v13, v9
; GFX9-G-O0-NEXT: v_mov_b32_e32 v12, v8
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, s7
-; GFX9-G-O0-NEXT: v_sub_u32_e64 v2, v3, v0
+; GFX9-G-O0-NEXT: s_mov_b32 s6, 0xffffffc0
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, s6
+; GFX9-G-O0-NEXT: v_add_u32_e64 v2, v3, v0
; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, s7
; GFX9-G-O0-NEXT: v_sub_u32_e64 v8, v0, v3
; GFX9-G-O0-NEXT: s_mov_b32 s6, 0
diff --git a/llvm/test/CodeGen/AMDGPU/div_v2i128.ll b/llvm/test/CodeGen/AMDGPU/div_v2i128.ll
index e04cd711256081..691f3d36bc7360 100644
--- a/llvm/test/CodeGen/AMDGPU/div_v2i128.ll
+++ b/llvm/test/CodeGen/AMDGPU/div_v2i128.ll
@@ -476,18 +476,18 @@ define <2 x i128> @v_sdiv_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
; GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[18:19]
; GISEL-NEXT: v_cndmask_b32_e32 v1, v3, v2, vcc
-; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v1
-; GISEL-NEXT: v_subb_u32_e64 v1, s[4:5], 0, 0, vcc
-; GISEL-NEXT: v_subb_u32_e64 v2, s[4:5], 0, 0, s[4:5]
-; GISEL-NEXT: v_subb_u32_e64 v3, s[4:5], 0, 0, s[4:5]
-; GISEL-NEXT: v_cmp_gt_u64_e32 vcc, v[0:1], v[20:21]
+; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v0, v1
+; GISEL-NEXT: v_subb_u32_e64 v3, s[4:5], 0, 0, vcc
+; GISEL-NEXT: v_subb_u32_e64 v0, s[4:5], 0, 0, s[4:5]
+; GISEL-NEXT: v_subb_u32_e64 v1, s[4:5], 0, 0, s[4:5]
+; GISEL-NEXT: v_cmp_gt_u64_e32 vcc, v[2:3], v[20:21]
; GISEL-NEXT: v_cndmask_b32_e64 v20, 0, 1, vcc
-; GISEL-NEXT: v_xor_b32_e32 v8, 0x7f, v0
-; GISEL-NEXT: v_cmp_lt_u64_e32 vcc, 0, v[2:3]
+; GISEL-NEXT: v_xor_b32_e32 v8, 0x7f, v2
+; GISEL-NEXT: v_cmp_lt_u64_e32 vcc, 0, v[0:1]
; GISEL-NEXT: v_cndmask_b32_e64 v21, 0, 1, vcc
-; GISEL-NEXT: v_or_b32_e32 v8, v8, v2
-; GISEL-NEXT: v_or_b32_e32 v9, v1, v3
-; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[2:3]
+; GISEL-NEXT: v_or_b32_e32 v8, v8, v0
+; GISEL-NEXT: v_or_b32_e32 v9, v3, v1
+; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[0:1]
; GISEL-NEXT: v_cndmask_b32_e32 v20, v21, v20, vcc
; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[8:9]
; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
@@ -505,12 +505,13 @@ define <2 x i128> @v_sdiv_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
; GISEL-NEXT: s_and_saveexec_b64 s[12:13], s[4:5]
; GISEL-NEXT: s_cbranch_execz .LBB0_6
; GISEL-NEXT: ; %bb.1: ; %udiv-bb15
-; GISEL-NEXT: v_add_i32_e32 v28, vcc, 1, v0
-; GISEL-NEXT: v_addc_u32_e64 v29, s[4:5], 0, v1, vcc
-; GISEL-NEXT: v_sub_i32_e32 v32, vcc, 0x7f, v0
-; GISEL-NEXT: v_addc_u32_e64 v30, vcc, 0, v2, s[4:5]
-; GISEL-NEXT: v_addc_u32_e32 v31, vcc, 0, v3, vcc
-; GISEL-NEXT: v_subrev_i32_e64 v20, s[4:5], 64, v32
+; GISEL-NEXT: v_add_i32_e32 v28, vcc, 1, v2
+; GISEL-NEXT: v_addc_u32_e64 v29, s[4:5], 0, v3, vcc
+; GISEL-NEXT: v_sub_i32_e32 v32, vcc, 0x7f, v2
+; GISEL-NEXT: v_not_b32_e32 v2, 63
+; GISEL-NEXT: v_addc_u32_e64 v30, vcc, 0, v0, s[4:5]
+; GISEL-NEXT: v_addc_u32_e32 v31, vcc, 0, v1, vcc
+; GISEL-NEXT: v_add_i32_e64 v20, s[4:5], v32, v2
; GISEL-NEXT: v_sub_i32_e64 v8, s[4:5], 64, v32
; GISEL-NEXT: v_lshl_b64 v[0:1], v[16:17], v32
; GISEL-NEXT: v_lshl_b64 v[2:3], v[18:19], v32
@@ -536,7 +537,7 @@ define <2 x i128> @v_sdiv_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
; GISEL-NEXT: s_xor_b64 s[14:15], exec, s[6:7]
; GISEL-NEXT: s_cbranch_execz .LBB0_5
; GISEL-NEXT: ; %bb.2: ; %udiv-preheader4
-; GISEL-NEXT: v_subrev_i32_e32 v34, vcc, 64, v28
+; GISEL-NEXT: v_add_i32_e32 v34, vcc, 0xffffffc0, v28
; GISEL-NEXT: v_sub_i32_e32 v22, vcc, 64, v28
; GISEL-NEXT: v_lshr_b64 v[0:1], v[18:19], v28
; GISEL-NEXT: v_lshr_b64 v[2:3], v[16:17], v28
@@ -665,18 +666,18 @@ define <2 x i128> @v_sdiv_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
; GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[12:13]
; GISEL-NEXT: v_cndmask_b32_e32 v1, v3, v2, vcc
-; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v1
-; GISEL-NEXT: v_subb_u32_e64 v1, s[4:5], 0, 0, vcc
-; GISEL-NEXT: v_subb_u32_e64 v2, s[4:5], 0, 0, s[4:5]
-; GISEL-NEXT: v_subb_u32_e64 v3, s[4:5], 0, 0, s[4:5]
-; GISEL-NEXT: v_cmp_gt_u64_e32 vcc, v[0:1], v[10:11]
+; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v0, v1
+; GISEL-NEXT: v_subb_u32_e64 v3, s[4:5], 0, 0, vcc
+; GISEL-NEXT: v_subb_u32_e64 v0, s[4:5], 0, 0, s[4:5]
+; GISEL-NEXT: v_subb_u32_e64 v1, s[4:5], 0, 0, s[4:5]
+; GISEL-NEXT: v_cmp_gt_u64_e32 vcc, v[2:3], v[10:11]
; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc
-; GISEL-NEXT: v_xor_b32_e32 v10, 0x7f, v0
-; GISEL-NEXT: v_cmp_lt_u64_e32 vcc, 0, v[2:3]
+; GISEL-NEXT: v_xor_b32_e32 v10, 0x7f, v2
+; GISEL-NEXT: v_cmp_lt_u64_e32 vcc, 0, v[0:1]
; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc
-; GISEL-NEXT: v_or_b32_e32 v10, v10, v2
-; GISEL-NEXT: v_or_b32_e32 v11, v1, v3
-; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[2:3]
+; GISEL-NEXT: v_or_b32_e32 v10, v10, v0
+; GISEL-NEXT: v_or_b32_e32 v11, v3, v1
+; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[0:1]
; GISEL-NEXT: v_cndmask_b32_e32 v15, v16, v15, vcc
; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[10:11]
; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
@@ -694,12 +695,13 @@ define <2 x i128> @v_sdiv_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
; GISEL-NEXT: s_and_saveexec_b64 s[12:13], s[4:5]
; GISEL-NEXT: s_cbranch_execz .LBB0_12
; GISEL-NEXT: ; %bb.7: ; %udiv-bb1
-; GISEL-NEXT: v_add_i32_e32 v26, vcc, 1, v0
-; GISEL-NEXT: v_addc_u32_e64 v27, s[4:5], 0, v1, vcc
-; GISEL-NEXT: v_sub_i32_e32 v30, vcc, 0x7f, v0
-; GISEL-NEXT: v_addc_u32_e64 v28, vcc, 0, v2, s[4:5]
-; GISEL-NEXT: v_addc_u32_e32 v29, vcc, 0, v3, vcc
-; GISEL-NEXT: v_subrev_i32_e64 v14, s[4:5], 64, v30
+; GISEL-NEXT: v_add_i32_e32 v26, vcc, 1, v2
+; GISEL-NEXT: v_addc_u32_e64 v27, s[4:5], 0, v3, vcc
+; GISEL-NEXT: v_sub_i32_e32 v30, vcc, 0x7f, v2
+; GISEL-NEXT: v_not_b32_e32 v2, 63
+; GISEL-NEXT: v_addc_u32_e64 v28, vcc, 0, v0, s[4:5]
+; GISEL-NEXT: v_addc_u32_e32 v29, vcc, 0, v1, vcc
+; GISEL-NEXT: v_add_i32_e64 v14, s[4:5], v30, v2
; GISEL-NEXT: v_sub_i32_e64 v10, s[4:5], 64, v30
; GISEL-NEXT: v_lshl_b64 v[0:1], v[6:7], v30
; GISEL-NEXT: v_lshl_b64 v[2:3], v[12:13], v30
@@ -725,7 +727,7 @@ define <2 x i128> @v_sdiv_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
; GISEL-NEXT: s_xor_b64 s[8:9], exec, s[6:7]
; GISEL-NEXT: s_cbranch_execz .LBB0_11
; GISEL-NEXT: ; %bb.8: ; %udiv-preheader
-; GISEL-NEXT: v_subrev_i32_e32 v32, vcc, 64, v26
+; GISEL-NEXT: v_add_i32_e32 v32, vcc, 0xffffffc0, v26
; GISEL-NEXT: v_sub_i32_e32 v16, vcc, 64, v26
; GISEL-NEXT: v_lshr_b64 v[0:1], v[12:13], v26
; GISEL-NEXT: v_lshr_b64 v[2:3], v[6:7], v26
@@ -1229,18 +1231,18 @@ define <2 x i128> @v_udiv_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
; GISEL-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[16:17]
; GISEL-NEXT: v_cndmask_b32_e32 v3, v19, v18, vcc
-; GISEL-NEXT: v_sub_i32_e32 v20, vcc, v2, v3
-; GISEL-NEXT: v_subb_u32_e64 v21, s[4:5], 0, 0, vcc
-; GISEL-NEXT: v_subb_u32_e64 v22, s[4:5], 0, 0, s[4:5]
-; GISEL-NEXT: v_subb_u32_e64 v23, s[4:5], 0, 0, s[4:5]
-; GISEL-NEXT: v_cmp_gt_u64_e32 vcc, v[20:21], v[24:25]
+; GISEL-NEXT: v_sub_i32_e32 v22, vcc, v2, v3
+; GISEL-NEXT: v_subb_u32_e64 v23, s[4:5], 0, 0, vcc
+; GISEL-NEXT: v_subb_u32_e64 v20, s[4:5], 0, 0, s[4:5]
+; GISEL-NEXT: v_subb_u32_e64 v21, s[4:5], 0, 0, s[4:5]
+; GISEL-NEXT: v_cmp_gt_u64_e32 vcc, v[22:23], v[24:25]
; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc
-; GISEL-NEXT: v_xor_b32_e32 v2, 0x7f, v20
-; GISEL-NEXT: v_cmp_lt_u64_e32 vcc, 0, v[22:23]
+; GISEL-NEXT: v_xor_b32_e32 v2, 0x7f, v22
+; GISEL-NEXT: v_cmp_lt_u64_e32 vcc, 0, v[20:21]
; GISEL-NEXT: v_cndmask_b32_e64 v19, 0, 1, vcc
-; GISEL-NEXT: v_or_b32_e32 v2, v2, v22
-; GISEL-NEXT: v_or_b32_e32 v3, v21, v23
-; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[22:23]
+; GISEL-NEXT: v_or_b32_e32 v2, v2, v20
+; GISEL-NEXT: v_or_b32_e32 v3, v23, v21
+; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[20:21]
; GISEL-NEXT: v_cndmask_b32_e32 v18, v19, v18, vcc
; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[2:3]
; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
@@ -1258,12 +1260,13 @@ define <2 x i128> @v_udiv_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
; GISEL-NEXT: s_and_saveexec_b64 s[12:13], s[4:5]
; GISEL-NEXT: s_cbranch_execz .LBB1_6
; GISEL-NEXT: ; %bb.1: ; %udiv-bb15
-; GISEL-NEXT: v_add_i32_e32 v26, vcc, 1, v20
-; GISEL-NEXT: v_addc_u32_e64 v27, s[4:5], 0, v21, vcc
-; GISEL-NEXT: v_sub_i32_e32 v30, vcc, 0x7f, v20
-; GISEL-NEXT: v_addc_u32_e64 v28, vcc, 0, v22, s[4:5]
-; GISEL-NEXT: v_addc_u32_e32 v29, vcc, 0, v23, vcc
-; GISEL-NEXT: v_subrev_i32_e64 v22, s[4:5], 64, v30
+; GISEL-NEXT: v_add_i32_e32 v26, vcc, 1, v22
+; GISEL-NEXT: v_addc_u32_e64 v27, s[4:5], 0, v23, vcc
+; GISEL-NEXT: v_sub_i32_e32 v30, vcc, 0x7f, v22
+; GISEL-NEXT: v_not_b32_e32 v2, 63
+; GISEL-NEXT: v_addc_u32_e64 v28, vcc, 0, v20, s[4:5]
+; GISEL-NEXT: v_addc_u32_e32 v29, vcc, 0, v21, vcc
+; GISEL-NEXT: v_add_i32_e64 v22, s[4:5], v30, v2
; GISEL-NEXT: v_sub_i32_e64 v20, s[4:5], 64, v30
; GISEL-NEXT: v_lshl_b64 v[2:3], v[0:1], v30
; GISEL-NEXT: v_lshl_b64 v[18:19], v[16:17], v30
@@ -1289,7 +1292,7 @@ define <2 x i128> @v_udiv_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
; GISEL-NEXT: s_xor_b64 s[8:9], exec, s[6:7]
; GISEL-NEXT: s_cbranch_execz .LBB1_5
; GISEL-NEXT: ; %bb.2: ; %udiv-preheader4
-; GISEL-NEXT: v_subrev_i32_e32 v32, vcc, 64, v26
+; GISEL-NEXT: v_add_i32_e32 v32, vcc, 0xffffffc0, v26
; GISEL-NEXT: v_sub_i32_e32 v24, vcc, 64, v26
; GISEL-NEXT: v_lshr_b64 v[18:19], v[16:17], v26
; GISEL-NEXT: v_lshr_b64 v[20:21], v[0:1], v26
@@ -1401,18 +1404,18 @@ define <2 x i128> @v_udiv_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
; GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[6:7]
; GISEL-NEXT: v_cndmask_b32_e32 v1, v9, v8, vcc
-; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v1
-; GISEL-NEXT: v_subb_u32_e64 v1, s[4:5], 0, 0, vcc
-; GISEL-NEXT: v_subb_u32_e64 v16, s[4:5], 0, 0, s[4:5]
-; GISEL-NEXT: v_subb_u32_e64 v17, s[4:5], 0, 0, s[4:5]
-; GISEL-NEXT: v_cmp_gt_u64_e32 vcc, v[0:1], v[10:11]
+; GISEL-NEXT: v_sub_i32_e32 v16, vcc, v0, v1
+; GISEL-NEXT: v_subb_u32_e64 v17, s[4:5], 0, 0, vcc
+; GISEL-NEXT: v_subb_u32_e64 v0, s[4:5], 0, 0, s[4:5]
+; GISEL-NEXT: v_subb_u32_e64 v1, s[4:5], 0, 0, s[4:5]
+; GISEL-NEXT: v_cmp_gt_u64_e32 vcc, v[16:17], v[10:11]
; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
-; GISEL-NEXT: v_xor_b32_e32 v8, 0x7f, v0
-; GISEL-NEXT: v_cmp_lt_u64_e32 vcc, 0, v[16:17]
+; GISEL-NEXT: v_xor_b32_e32 v8, 0x7f, v16
+; GISEL-NEXT: v_cmp_lt_u64_e32 vcc, 0, v[0:1]
; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
-; GISEL-NEXT: v_or_b32_e32 v8, v8, v16
-; GISEL-NEXT: v_or_b32_e32 v9, v1, v17
-; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[16:17]
+; GISEL-NEXT: v_or_b32_e32 v8, v8, v0
+; GISEL-NEXT: v_or_b32_e32 v9, v17, v1
+; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[0:1]
; GISEL-NEXT: v_cndmask_b32_e32 v10, v11, v10, vcc
; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[8:9]
; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
@@ -1430,12 +1433,13 @@ define <2 x i128> @v_udiv_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
; GISEL-NEXT: s_and_saveexec_b64 s[12:13], s[4:5]
; GISEL-NEXT: s_cbranch_execz .LBB1_12
; GISEL-NEXT: ; %bb.7: ; %udiv-bb1
-; GISEL-NEXT: v_add_i32_e32 v8, vcc, 1, v0
-; GISEL-NEXT: v_addc_u32_e64 v11, s[4:5], 0, v1, vcc
-; GISEL-NEXT: v_sub_i32_e32 v26, vcc, 0x7f, v0
-; GISEL-NEXT: v_addc_u32_e64 v24, vcc, 0, v16, s[4:5]
-; GISEL-NEXT: v_addc_u32_e32 v25, vcc, 0, v17, vcc
-; GISEL-NEXT: v_subrev_i32_e64 v9, s[4:5], 64, v26
+; GISEL-NEXT: v_add_i32_e32 v8, vcc, 1, v16
+; GISEL-NEXT: v_addc_u32_e64 v11, s[4:5], 0, v17, vcc
+; GISEL-NEXT: v_sub_i32_e32 v26, vcc, 0x7f, v16
+; GISEL-NEXT: v_not_b32_e32 v9, 63
+; GISEL-NEXT: v_addc_u32_e64 v24, vcc, 0, v0, s[4:5]
+; GISEL-NEXT: v_addc_u32_e32 v25, vcc, 0, v1, vcc
+; GISEL-NEXT: v_add_i32_e64 v9, s[4:5], v26, v9
; GISEL-NEXT: v_sub_i32_e64 v10, s[4:5], 64, v26
; GISEL-NEXT: v_lshl_b64 v[0:1], v[4:5], v26
; GISEL-NEXT: v_lshl_b64 v[16:17], v[6:7], v26
@@ -1461,7 +1465,7 @@ define <2 x i128> @v_udiv_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
; GISEL-NEXT: s_xor_b64 s[8:9], exec, s[6:7]
; GISEL-NEXT: s_cbranch_execz .LBB1_11
; GISEL-NEXT: ; %bb.8: ; %udiv-preheader
-; GISEL-NEXT: v_subrev_i32_e32 v28, vcc, 64, v8
+; GISEL-NEXT: v_add_i32_e32 v28, vcc, 0xffffffc0, v8
; GISEL-NEXT: v_sub_i32_e32 v22, vcc, 64, v8
; GISEL-NEXT: v_lshr_b64 v[16:17], v[6:7], v8
; GISEL-NEXT: v_lshr_b64 v[20:21], v[4:5], v8
@@ -2072,18 +2076,18 @@ define <2 x i128> @v_srem_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
; GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[8:9]
; GISEL-NEXT: v_cndmask_b32_e32 v1, v3, v2, vcc
-; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v1
-; GISEL-NEXT: v_subb_u32_e64 v1, s[4:5], 0, 0, vcc
-; GISEL-NEXT: v_subb_u32_e64 v2, s[4:5], 0, 0, s[4:5]
-; GISEL-NEXT: v_subb_u32_e64 v3, s[4:5], 0, 0, s[4:5]
-; GISEL-NEXT: v_cmp_gt_u64_e32 vcc, v[0:1], v[18:19]
+; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v0, v1
+; GISEL-NEXT: v_subb_u32_e64 v3, s[4:5], 0, 0, vcc
+; GISEL-NEXT: v_subb_u32_e64 v0, s[4:5], 0, 0, s[4:5]
+; GISEL-NEXT: v_subb_u32_e64 v1, s[4:5], 0, 0, s[4:5]
+; GISEL-NEXT: v_cmp_gt_u64_e32 vcc, v[2:3], v[18:19]
; GISEL-NEXT: v_cndmask_b32_e64 v21, 0, 1, vcc
-; GISEL-NEXT: v_xor_b32_e32 v18, 0x7f, v0
-; GISEL-NEXT: v_cmp_lt_u64_e32 vcc, 0, v[2:3]
+; GISEL-NEXT: v_xor_b32_e32 v18, 0x7f, v2
+; GISEL-NEXT: v_cmp_lt_u64_e32 vcc, 0, v[0:1]
; GISEL-NEXT: v_cndmask_b32_e64 v22, 0, 1, vcc
-; GISEL-NEXT: v_or_b32_e32 v18, v18, v2
-; GISEL-NEXT: v_or_b32_e32 v19, v1, v3
-; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[2:3]
+; GISEL-NEXT: v_or_b32_e32 v18, v18, v0
+; GISEL-NEXT: v_or_b32_e32 v19, v3, v1
+; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[0:1]
; GISEL-NEXT: v_cndmask_b32_e32 v21, v22, v21, vcc
; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[18:19]
; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc
@@ -2101,12 +2105,13 @@ define <2 x i128> @v_srem_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
; GISEL-NEXT: s_and_saveexec_b64 s[12:13], s[4:5]
; GISEL-NEXT: s_cbranch_execz .LBB2_6
; GISEL-NEXT: ; %bb.1: ; %udiv-bb15
-; GISEL-NEXT: v_add_i32_e32 v31, vcc, 1, v0
-; GISEL-NEXT: v_addc_u32_e64 v32, s[4:5], 0, v1, vcc
-; GISEL-NEXT: v_sub_i32_e32 v24, vcc, 0x7f, v0
-; GISEL-NEXT: v_addc_u32_e64 v33, vcc, 0, v2, s[4:5]
-; GISEL-NEXT: v_addc_u32_e32 v34, vcc, 0, v3, vcc
-; GISEL-NEXT: v_subrev_i32_e64 v20, s[4:5], 64, v24
+; GISEL-NEXT: v_add_i32_e32 v31, vcc, 1, v2
+; GISEL-NEXT: v_addc_u32_e64 v32, s[4:5], 0, v3, vcc
+; GISEL-NEXT: v_sub_i32_e32 v24, vcc, 0x7f, v2
+; GISEL-NEXT: v_not_b32_e32 v2, 63
+; GISEL-NEXT: v_addc_u32_e64 v33, vcc, 0, v0, s[4:5]
+; GISEL-NEXT: v_addc_u32_e32 v34, vcc, 0, v1, vcc
+; GISEL-NEXT: v_add_i32_e64 v20, s[4:5], v24, v2
; GISEL-NEXT: v_sub_i32_e64 v18, s[4:5], 64, v24
; GISEL-NEXT: v_lshl_b64 v[0:1], v[16:17], v24
; GISEL-NEXT: v_lshl_b64 v[2:3], v[8:9], v24
@@ -2132,7 +2137,7 @@ define <2 x i128> @v_srem_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
; GISEL-NEXT: s_xor_b64 s[8:9], exec, s[6:7]
; GISEL-NEXT: s_cbranch_execz .LBB2_5
; GISEL-NEXT: ; %bb.2: ; %udiv-preheader4
-; GISEL-NEXT: v_subrev_i32_e32 v24, vcc, 64, v31
+; GISEL-NEXT: v_add_i32_e32 v24, vcc, 0xffffffc0, v31
; GISEL-NEXT: v_sub_i32_e32 v22, vcc, 64, v31
; GISEL-NEXT: v_lshr_b64 v[0:1], v[8:9], v31
; GISEL-NEXT: v_lshr_b64 v[2:3], v[16:17], v31
@@ -2262,18 +2267,18 @@ define <2 x i128> @v_srem_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
; GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[6:7]
; GISEL-NEXT: v_cndmask_b32_e32 v1, v15, v14, vcc
-; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v1
-; GISEL-NEXT: v_subb_u32_e64 v1, s[4:5], 0, 0, vcc
-; GISEL-NEXT: v_subb_u32_e64 v14, s[4:5], 0, 0, s[4:5]
-; GISEL-NEXT: v_subb_u32_e64 v15, s[4:5], 0, 0, s[4:5]
-; GISEL-NEXT: v_cmp_gt_u64_e32 vcc, v[0:1], v[2:3]
+; GISEL-NEXT: v_sub_i32_e32 v14, vcc, v0, v1
+; GISEL-NEXT: v_subb_u32_e64 v15, s[4:5], 0, 0, vcc
+; GISEL-NEXT: v_subb_u32_e64 v0, s[4:5], 0, 0, s[4:5]
+; GISEL-NEXT: v_subb_u32_e64 v1, s[4:5], 0, 0, s[4:5]
+; GISEL-NEXT: v_cmp_gt_u64_e32 vcc, v[14:15], v[2:3]
; GISEL-NEXT: v_cndmask_b32_e64 v21, 0, 1, vcc
-; GISEL-NEXT: v_xor_b32_e32 v2, 0x7f, v0
-; GISEL-NEXT: v_cmp_lt_u64_e32 vcc, 0, v[14:15]
+; GISEL-NEXT: v_xor_b32_e32 v2, 0x7f, v14
+; GISEL-NEXT: v_cmp_lt_u64_e32 vcc, 0, v[0:1]
; GISEL-NEXT: v_cndmask_b32_e64 v22, 0, 1, vcc
-; GISEL-NEXT: v_or_b32_e32 v2, v2, v14
-; GISEL-NEXT: v_or_b32_e32 v3, v1, v15
-; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[14:15]
+; GISEL-NEXT: v_or_b32_e32 v2, v2, v0
+; GISEL-NEXT: v_or_b32_e32 v3, v15, v1
+; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[0:1]
; GISEL-NEXT: v_cndmask_b32_e32 v21, v22, v21, vcc
; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[2:3]
; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
@@ -2291,12 +2296,13 @@ define <2 x i128> @v_srem_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
; GISEL-NEXT: s_and_saveexec_b64 s[12:13], s[4:5]
; GISEL-NEXT: s_cbranch_execz .LBB2_12
; GISEL-NEXT: ; %bb.7: ; %udiv-bb1
-; GISEL-NEXT: v_add_i32_e32 v36, vcc, 1, v0
-; GISEL-NEXT: v_addc_u32_e64 v37, s[4:5], 0, v1, vcc
-; GISEL-NEXT: v_sub_i32_e32 v24, vcc, 0x7f, v0
-; GISEL-NEXT: v_addc_u32_e64 v38, vcc, 0, v14, s[4:5]
-; GISEL-NEXT: v_addc_u32_e32 v39, vcc, 0, v15, vcc
-; GISEL-NEXT: v_subrev_i32_e64 v20, s[4:5], 64, v24
+; GISEL-NEXT: v_add_i32_e32 v36, vcc, 1, v14
+; GISEL-NEXT: v_addc_u32_e64 v37, s[4:5], 0, v15, vcc
+; GISEL-NEXT: v_sub_i32_e32 v24, vcc, 0x7f, v14
+; GISEL-NEXT: v_not_b32_e32 v2, 63
+; GISEL-NEXT: v_addc_u32_e64 v38, vcc, 0, v0, s[4:5]
+; GISEL-NEXT: v_addc_u32_e32 v39, vcc, 0, v1, vcc
+; GISEL-NEXT: v_add_i32_e64 v20, s[4:5], v24, v2
; GISEL-NEXT: v_sub_i32_e64 v14, s[4:5], 64, v24
; GISEL-NEXT: v_lshl_b64 v[0:1], v[12:13], v24
; GISEL-NEXT: v_lshl_b64 v[2:3], v[6:7], v24
@@ -2322,7 +2328,7 @@ define <2 x i128> @v_srem_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
; GISEL-NEXT: s_xor_b64 s[8:9], exec, s[6:7]
; GISEL-NEXT: s_cbranch_execz .LBB2_11
; GISEL-NEXT: ; %bb.8: ; %udiv-preheader
-; GISEL-NEXT: v_subrev_i32_e32 v24, vcc, 64, v36
+; GISEL-NEXT: v_add_i32_e32 v24, vcc, 0xffffffc0, v36
; GISEL-NEXT: v_sub_i32_e32 v22, vcc, 64, v36
; GISEL-NEXT: v_lshr_b64 v[0:1], v[6:7], v36
; GISEL-NEXT: v_lshr_b64 v[2:3], v[12:13], v36
@@ -2903,18 +2909,18 @@ define <2 x i128> @v_urem_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
; GISEL-NEXT: v_cndmask_b32_e32 v16, v17, v16, vcc
; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[2:3]
; GISEL-NEXT: v_cndmask_b32_e32 v17, v19, v18, vcc
-; GISEL-NEXT: v_sub_i32_e32 v16, vcc, v16, v17
-; GISEL-NEXT: v_subb_u32_e64 v17, s[4:5], 0, 0, vcc
-; GISEL-NEXT: v_subb_u32_e64 v18, s[4:5], 0, 0, s[4:5]
-; GISEL-NEXT: v_subb_u32_e64 v19, s[4:5], 0, 0, s[4:5]
-; GISEL-NEXT: v_cmp_gt_u64_e32 vcc, v[16:17], v[20:21]
+; GISEL-NEXT: v_sub_i32_e32 v18, vcc, v16, v17
+; GISEL-NEXT: v_subb_u32_e64 v19, s[4:5], 0, 0, vcc
+; GISEL-NEXT: v_subb_u32_e64 v16, s[4:5], 0, 0, s[4:5]
+; GISEL-NEXT: v_subb_u32_e64 v17, s[4:5], 0, 0, s[4:5]
+; GISEL-NEXT: v_cmp_gt_u64_e32 vcc, v[18:19], v[20:21]
; GISEL-NEXT: v_cndmask_b32_e64 v23, 0, 1, vcc
-; GISEL-NEXT: v_xor_b32_e32 v20, 0x7f, v16
-; GISEL-NEXT: v_cmp_lt_u64_e32 vcc, 0, v[18:19]
+; GISEL-NEXT: v_xor_b32_e32 v20, 0x7f, v18
+; GISEL-NEXT: v_cmp_lt_u64_e32 vcc, 0, v[16:17]
; GISEL-NEXT: v_cndmask_b32_e64 v24, 0, 1, vcc
-; GISEL-NEXT: v_or_b32_e32 v20, v20, v18
-; GISEL-NEXT: v_or_b32_e32 v21, v17, v19
-; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[18:19]
+; GISEL-NEXT: v_or_b32_e32 v20, v20, v16
+; GISEL-NEXT: v_or_b32_e32 v21, v19, v17
+; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[16:17]
; GISEL-NEXT: v_cndmask_b32_e32 v23, v24, v23, vcc
; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[20:21]
; GISEL-NEXT: v_cndmask_b32_e64 v20, 0, 1, vcc
@@ -2932,12 +2938,13 @@ define <2 x i128> @v_urem_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
; GISEL-NEXT: s_and_saveexec_b64 s[12:13], s[4:5]
; GISEL-NEXT: s_cbranch_execz .LBB3_6
; GISEL-NEXT: ; %bb.1: ; %udiv-bb15
-; GISEL-NEXT: v_add_i32_e32 v30, vcc, 1, v16
-; GISEL-NEXT: v_addc_u32_e64 v31, s[4:5], 0, v17, vcc
-; GISEL-NEXT: v_sub_i32_e32 v26, vcc, 0x7f, v16
-; GISEL-NEXT: v_addc_u32_e64 v32, vcc, 0, v18, s[4:5]
-; GISEL-NEXT: v_addc_u32_e32 v33, vcc, 0, v19, vcc
-; GISEL-NEXT: v_subrev_i32_e64 v22, s[4:5], 64, v26
+; GISEL-NEXT: v_add_i32_e32 v30, vcc, 1, v18
+; GISEL-NEXT: v_addc_u32_e64 v31, s[4:5], 0, v19, vcc
+; GISEL-NEXT: v_sub_i32_e32 v26, vcc, 0x7f, v18
+; GISEL-NEXT: v_not_b32_e32 v18, 63
+; GISEL-NEXT: v_addc_u32_e64 v32, vcc, 0, v16, s[4:5]
+; GISEL-NEXT: v_addc_u32_e32 v33, vcc, 0, v17, vcc
+; GISEL-NEXT: v_add_i32_e64 v22, s[4:5], v26, v18
; GISEL-NEXT: v_sub_i32_e64 v20, s[4:5], 64, v26
; GISEL-NEXT: v_lshl_b64 v[16:17], v[0:1], v26
; GISEL-NEXT: v_lshl_b64 v[18:19], v[2:3], v26
@@ -2963,7 +2970,7 @@ define <2 x i128> @v_urem_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
; GISEL-NEXT: s_xor_b64 s[8:9], exec, s[6:7]
; GISEL-NEXT: s_cbranch_execz .LBB3_5
; GISEL-NEXT: ; %bb.2: ; %udiv-preheader4
-; GISEL-NEXT: v_subrev_i32_e32 v26, vcc, 64, v30
+; GISEL-NEXT: v_add_i32_e32 v26, vcc, 0xffffffc0, v30
; GISEL-NEXT: v_sub_i32_e32 v24, vcc, 64, v30
; GISEL-NEXT: v_lshr_b64 v[16:17], v[2:3], v30
; GISEL-NEXT: v_lshr_b64 v[18:19], v[0:1], v30
@@ -3075,18 +3082,18 @@ define <2 x i128> @v_urem_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
; GISEL-NEXT: v_cndmask_b32_e32 v16, v17, v16, vcc
; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[6:7]
; GISEL-NEXT: v_cndmask_b32_e32 v17, v19, v18, vcc
-; GISEL-NEXT: v_sub_i32_e32 v16, vcc, v16, v17
-; GISEL-NEXT: v_subb_u32_e64 v17, s[4:5], 0, 0, vcc
-; GISEL-NEXT: v_subb_u32_e64 v22, s[4:5], 0, 0, s[4:5]
-; GISEL-NEXT: v_subb_u32_e64 v23, s[4:5], 0, 0, s[4:5]
-; GISEL-NEXT: v_cmp_gt_u64_e32 vcc, v[16:17], v[24:25]
+; GISEL-NEXT: v_sub_i32_e32 v22, vcc, v16, v17
+; GISEL-NEXT: v_subb_u32_e64 v23, s[4:5], 0, 0, vcc
+; GISEL-NEXT: v_subb_u32_e64 v16, s[4:5], 0, 0, s[4:5]
+; GISEL-NEXT: v_subb_u32_e64 v17, s[4:5], 0, 0, s[4:5]
+; GISEL-NEXT: v_cmp_gt_u64_e32 vcc, v[22:23], v[24:25]
; GISEL-NEXT: v_cndmask_b32_e64 v24, 0, 1, vcc
-; GISEL-NEXT: v_xor_b32_e32 v18, 0x7f, v16
-; GISEL-NEXT: v_cmp_lt_u64_e32 vcc, 0, v[22:23]
+; GISEL-NEXT: v_xor_b32_e32 v18, 0x7f, v22
+; GISEL-NEXT: v_cmp_lt_u64_e32 vcc, 0, v[16:17]
; GISEL-NEXT: v_cndmask_b32_e64 v25, 0, 1, vcc
-; GISEL-NEXT: v_or_b32_e32 v18, v18, v22
-; GISEL-NEXT: v_or_b32_e32 v19, v17, v23
-; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[22:23]
+; GISEL-NEXT: v_or_b32_e32 v18, v18, v16
+; GISEL-NEXT: v_or_b32_e32 v19, v23, v17
+; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[16:17]
; GISEL-NEXT: v_cndmask_b32_e32 v24, v25, v24, vcc
; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[18:19]
; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc
@@ -3104,12 +3111,13 @@ define <2 x i128> @v_urem_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
; GISEL-NEXT: s_and_saveexec_b64 s[12:13], s[4:5]
; GISEL-NEXT: s_cbranch_execz .LBB3_12
; GISEL-NEXT: ; %bb.7: ; %udiv-bb1
-; GISEL-NEXT: v_add_i32_e32 v34, vcc, 1, v16
-; GISEL-NEXT: v_addc_u32_e64 v35, s[4:5], 0, v17, vcc
-; GISEL-NEXT: v_sub_i32_e32 v28, vcc, 0x7f, v16
-; GISEL-NEXT: v_addc_u32_e64 v36, vcc, 0, v22, s[4:5]
-; GISEL-NEXT: v_addc_u32_e32 v37, vcc, 0, v23, vcc
-; GISEL-NEXT: v_subrev_i32_e64 v24, s[4:5], 64, v28
+; GISEL-NEXT: v_add_i32_e32 v34, vcc, 1, v22
+; GISEL-NEXT: v_addc_u32_e64 v35, s[4:5], 0, v23, vcc
+; GISEL-NEXT: v_sub_i32_e32 v28, vcc, 0x7f, v22
+; GISEL-NEXT: v_not_b32_e32 v18, 63
+; GISEL-NEXT: v_addc_u32_e64 v36, vcc, 0, v16, s[4:5]
+; GISEL-NEXT: v_addc_u32_e32 v37, vcc, 0, v17, vcc
+; GISEL-NEXT: v_add_i32_e64 v24, s[4:5], v28, v18
; GISEL-NEXT: v_sub_i32_e64 v22, s[4:5], 64, v28
; GISEL-NEXT: v_lshl_b64 v[16:17], v[4:5], v28
; GISEL-NEXT: v_lshl_b64 v[18:19], v[6:7], v28
@@ -3135,7 +3143,7 @@ define <2 x i128> @v_urem_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
; GISEL-NEXT: s_xor_b64 s[8:9], exec, s[6:7]
; GISEL-NEXT: s_cbranch_execz .LBB3_11
; GISEL-NEXT: ; %bb.8: ; %udiv-preheader
-; GISEL-NEXT: v_subrev_i32_e32 v28, vcc, 64, v34
+; GISEL-NEXT: v_add_i32_e32 v28, vcc, 0xffffffc0, v34
; GISEL-NEXT: v_sub_i32_e32 v26, vcc, 64, v34
; GISEL-NEXT: v_lshr_b64 v[16:17], v[6:7], v34
; GISEL-NEXT: v_lshr_b64 v[18:19], v[4:5], v34
diff --git a/llvm/test/CodeGen/AMDGPU/fptoi.i128.ll b/llvm/test/CodeGen/AMDGPU/fptoi.i128.ll
index 05a4c2a1c04644..a0fe9d88e31cf9 100644
--- a/llvm/test/CodeGen/AMDGPU/fptoi.i128.ll
+++ b/llvm/test/CodeGen/AMDGPU/fptoi.i128.ll
@@ -234,17 +234,17 @@ define i128 @fptosi_f64_to_i128(double %x) {
; GISEL-NEXT: s_xor_b64 s[16:17], exec, s[6:7]
; GISEL-NEXT: s_cbranch_execz .LBB0_4
; GISEL-NEXT: ; %bb.3: ; %fp-to-i-if-else
-; GISEL-NEXT: v_add_u32_e32 v6, 0xfffffbcd, v6
-; GISEL-NEXT: v_lshlrev_b64 v[0:1], v6, v[4:5]
-; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v6
+; GISEL-NEXT: v_add_u32_e32 v7, 0xfffffbcd, v6
+; GISEL-NEXT: v_lshlrev_b64 v[0:1], v7, v[4:5]
+; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v7
; GISEL-NEXT: v_cndmask_b32_e32 v11, 0, v0, vcc
; GISEL-NEXT: v_cndmask_b32_e32 v12, 0, v1, vcc
; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[6:7], v11, v10, 0
-; GISEL-NEXT: v_subrev_u32_e32 v7, 64, v6
-; GISEL-NEXT: v_sub_u32_e32 v2, 64, v6
+; GISEL-NEXT: v_add_u32_e32 v6, 0xfffffb8d, v6
+; GISEL-NEXT: v_sub_u32_e32 v2, 64, v7
; GISEL-NEXT: v_lshrrev_b64 v[2:3], v2, v[4:5]
-; GISEL-NEXT: v_lshlrev_b64 v[4:5], v7, v[4:5]
-; GISEL-NEXT: v_cmp_eq_u32_e64 s[6:7], 0, v6
+; GISEL-NEXT: v_lshlrev_b64 v[4:5], v6, v[4:5]
+; GISEL-NEXT: v_cmp_eq_u32_e64 s[6:7], 0, v7
; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[8:9], v12, v9, v[0:1]
; GISEL-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
; GISEL-NEXT: v_cndmask_b32_e64 v13, v2, 0, s[6:7]
@@ -602,17 +602,17 @@ define i128 @fptoui_f64_to_i128(double %x) {
; GISEL-NEXT: s_xor_b64 s[16:17], exec, s[6:7]
; GISEL-NEXT: s_cbranch_execz .LBB1_4
; GISEL-NEXT: ; %bb.3: ; %fp-to-i-if-else
-; GISEL-NEXT: v_add_u32_e32 v6, 0xfffffbcd, v6
-; GISEL-NEXT: v_lshlrev_b64 v[0:1], v6, v[4:5]
-; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v6
+; GISEL-NEXT: v_add_u32_e32 v7, 0xfffffbcd, v6
+; GISEL-NEXT: v_lshlrev_b64 v[0:1], v7, v[4:5]
+; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v7
; GISEL-NEXT: v_cndmask_b32_e32 v11, 0, v0, vcc
; GISEL-NEXT: v_cndmask_b32_e32 v12, 0, v1, vcc
; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[6:7], v11, v10, 0
-; GISEL-NEXT: v_subrev_u32_e32 v7, 64, v6
-; GISEL-NEXT: v_sub_u32_e32 v2, 64, v6
+; GISEL-NEXT: v_add_u32_e32 v6, 0xfffffb8d, v6
+; GISEL-NEXT: v_sub_u32_e32 v2, 64, v7
; GISEL-NEXT: v_lshrrev_b64 v[2:3], v2, v[4:5]
-; GISEL-NEXT: v_lshlrev_b64 v[4:5], v7, v[4:5]
-; GISEL-NEXT: v_cmp_eq_u32_e64 s[6:7], 0, v6
+; GISEL-NEXT: v_lshlrev_b64 v[4:5], v6, v[4:5]
+; GISEL-NEXT: v_cmp_eq_u32_e64 s[6:7], 0, v7
; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[8:9], v12, v9, v[0:1]
; GISEL-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
; GISEL-NEXT: v_cndmask_b32_e64 v13, v2, 0, s[6:7]
@@ -963,17 +963,17 @@ define i128 @fptosi_f32_to_i128(float %x) {
; GISEL-NEXT: s_xor_b64 s[16:17], exec, s[6:7]
; GISEL-NEXT: s_cbranch_execz .LBB2_4
; GISEL-NEXT: ; %bb.3: ; %fp-to-i-if-else
-; GISEL-NEXT: v_add_u32_e32 v6, 0xffffff6a, v6
-; GISEL-NEXT: v_lshlrev_b64 v[0:1], v6, v[4:5]
-; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v6
+; GISEL-NEXT: v_add_u32_e32 v7, 0xffffff6a, v6
+; GISEL-NEXT: v_lshlrev_b64 v[0:1], v7, v[4:5]
+; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v7
; GISEL-NEXT: v_cndmask_b32_e32 v11, 0, v0, vcc
; GISEL-NEXT: v_cndmask_b32_e32 v12, 0, v1, vcc
; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[6:7], v11, v10, 0
-; GISEL-NEXT: v_subrev_u32_e32 v7, 64, v6
-; GISEL-NEXT: v_sub_u32_e32 v2, 64, v6
+; GISEL-NEXT: v_add_u32_e32 v6, 0xffffff2a, v6
+; GISEL-NEXT: v_sub_u32_e32 v2, 64, v7
; GISEL-NEXT: v_lshrrev_b64 v[2:3], v2, v[4:5]
-; GISEL-NEXT: v_lshlrev_b64 v[4:5], v7, v[4:5]
-; GISEL-NEXT: v_cmp_eq_u32_e64 s[6:7], 0, v6
+; GISEL-NEXT: v_lshlrev_b64 v[4:5], v6, v[4:5]
+; GISEL-NEXT: v_cmp_eq_u32_e64 s[6:7], 0, v7
; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[8:9], v12, v8, v[0:1]
; GISEL-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
; GISEL-NEXT: v_cndmask_b32_e64 v13, v2, 0, s[6:7]
@@ -1318,17 +1318,17 @@ define i128 @fptoui_f32_to_i128(float %x) {
; GISEL-NEXT: s_xor_b64 s[16:17], exec, s[6:7]
; GISEL-NEXT: s_cbranch_execz .LBB3_4
; GISEL-NEXT: ; %bb.3: ; %fp-to-i-if-else
-; GISEL-NEXT: v_add_u32_e32 v6, 0xffffff6a, v6
-; GISEL-NEXT: v_lshlrev_b64 v[0:1], v6, v[4:5]
-; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v6
+; GISEL-NEXT: v_add_u32_e32 v7, 0xffffff6a, v6
+; GISEL-NEXT: v_lshlrev_b64 v[0:1], v7, v[4:5]
+; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v7
; GISEL-NEXT: v_cndmask_b32_e32 v11, 0, v0, vcc
; GISEL-NEXT: v_cndmask_b32_e32 v12, 0, v1, vcc
; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[6:7], v11, v10, 0
-; GISEL-NEXT: v_subrev_u32_e32 v7, 64, v6
-; GISEL-NEXT: v_sub_u32_e32 v2, 64, v6
+; GISEL-NEXT: v_add_u32_e32 v6, 0xffffff2a, v6
+; GISEL-NEXT: v_sub_u32_e32 v2, 64, v7
; GISEL-NEXT: v_lshrrev_b64 v[2:3], v2, v[4:5]
-; GISEL-NEXT: v_lshlrev_b64 v[4:5], v7, v[4:5]
-; GISEL-NEXT: v_cmp_eq_u32_e64 s[6:7], 0, v6
+; GISEL-NEXT: v_lshlrev_b64 v[4:5], v6, v[4:5]
+; GISEL-NEXT: v_cmp_eq_u32_e64 s[6:7], 0, v7
; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[8:9], v12, v8, v[0:1]
; GISEL-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
; GISEL-NEXT: v_cndmask_b32_e64 v13, v2, 0, s[6:7]
@@ -1699,7 +1699,7 @@ define i128 @fptosi_bf16_to_i128(bfloat %x) {
; GISEL-NEXT: ; %bb.3: ; %fp-to-i-if-else
; GISEL-NEXT: v_add_u32_e32 v6, 0xffffff7a, v5
; GISEL-NEXT: v_lshlrev_b64 v[0:1], v6, v[7:8]
-; GISEL-NEXT: v_subrev_u32_e32 v4, 64, v6
+; GISEL-NEXT: v_add_u32_e32 v4, 0xffffff3a, v5
; GISEL-NEXT: v_sub_u32_e32 v2, 64, v6
; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v6
; GISEL-NEXT: v_lshl_or_b32 v11, v11, 16, v11
@@ -2048,7 +2048,7 @@ define i128 @fptoui_bf16_to_i128(bfloat %x) {
; GISEL-NEXT: ; %bb.3: ; %fp-to-i-if-else
; GISEL-NEXT: v_add_u32_e32 v6, 0xffffff7a, v5
; GISEL-NEXT: v_lshlrev_b64 v[0:1], v6, v[7:8]
-; GISEL-NEXT: v_subrev_u32_e32 v4, 64, v6
+; GISEL-NEXT: v_add_u32_e32 v4, 0xffffff3a, v5
; GISEL-NEXT: v_sub_u32_e32 v2, 64, v6
; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v6
; GISEL-NEXT: v_lshl_or_b32 v11, v11, 16, v11
diff --git a/llvm/test/CodeGen/AMDGPU/itofp.i128.ll b/llvm/test/CodeGen/AMDGPU/itofp.i128.ll
index 2999ddb8315883..f372a54894604c 100644
--- a/llvm/test/CodeGen/AMDGPU/itofp.i128.ll
+++ b/llvm/test/CodeGen/AMDGPU/itofp.i128.ll
@@ -193,32 +193,32 @@ define float @sitofp_i128_to_f32(i128 %x) {
; GISEL-NEXT: v_sub_u32_e32 v11, 64, v4
; GISEL-NEXT: v_lshrrev_b64 v[9:10], v4, v[0:1]
; GISEL-NEXT: v_lshlrev_b64 v[11:12], v11, v[2:3]
-; GISEL-NEXT: v_subrev_u32_e32 v13, 64, v4
+; GISEL-NEXT: v_add_u32_e32 v13, 0xffffffc0, v4
; GISEL-NEXT: v_or_b32_e32 v11, v9, v11
; GISEL-NEXT: v_or_b32_e32 v12, v10, v12
; GISEL-NEXT: v_lshrrev_b64 v[9:10], v13, v[2:3]
; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v4
-; GISEL-NEXT: v_add_u32_e32 v5, 26, v5
+; GISEL-NEXT: v_add_u32_e32 v14, 26, v5
; GISEL-NEXT: v_cndmask_b32_e32 v9, v9, v11, vcc
; GISEL-NEXT: v_cndmask_b32_e32 v10, v10, v12, vcc
; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4
-; GISEL-NEXT: v_sub_u32_e32 v11, 64, v5
+; GISEL-NEXT: v_sub_u32_e32 v11, 64, v14
; GISEL-NEXT: v_cndmask_b32_e32 v13, v9, v0, vcc
; GISEL-NEXT: v_cndmask_b32_e32 v4, v10, v1, vcc
-; GISEL-NEXT: v_lshrrev_b64 v[9:10], v5, -1
+; GISEL-NEXT: v_lshrrev_b64 v[9:10], v14, -1
; GISEL-NEXT: v_lshlrev_b64 v[11:12], v11, -1
-; GISEL-NEXT: v_subrev_u32_e32 v14, 64, v5
+; GISEL-NEXT: v_add_u32_e32 v5, 0xffffffda, v5
; GISEL-NEXT: v_or_b32_e32 v15, v9, v11
; GISEL-NEXT: v_or_b32_e32 v16, v10, v12
-; GISEL-NEXT: v_lshrrev_b64 v[11:12], v14, -1
-; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v5
-; GISEL-NEXT: v_cndmask_b32_e32 v11, v11, v15, vcc
-; GISEL-NEXT: v_cndmask_b32_e32 v12, v12, v16, vcc
-; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v5
+; GISEL-NEXT: v_lshrrev_b64 v[11:12], v5, -1
+; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v14
+; GISEL-NEXT: v_cndmask_b32_e32 v5, v11, v15, vcc
+; GISEL-NEXT: v_cndmask_b32_e32 v11, v12, v16, vcc
+; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v14
; GISEL-NEXT: v_cndmask_b32_e32 v9, 0, v9, vcc
; GISEL-NEXT: v_cndmask_b32_e32 v10, 0, v10, vcc
-; GISEL-NEXT: v_cndmask_b32_e64 v5, v11, -1, s[4:5]
-; GISEL-NEXT: v_cndmask_b32_e64 v11, v12, -1, s[4:5]
+; GISEL-NEXT: v_cndmask_b32_e64 v5, v5, -1, s[4:5]
+; GISEL-NEXT: v_cndmask_b32_e64 v11, v11, -1, s[4:5]
; GISEL-NEXT: v_and_b32_e32 v2, v9, v2
; GISEL-NEXT: v_and_b32_e32 v3, v10, v3
; GISEL-NEXT: v_and_or_b32 v0, v5, v0, v2
@@ -438,32 +438,32 @@ define float @uitofp_i128_to_f32(i128 %x) {
; GISEL-NEXT: v_sub_u32_e32 v10, 64, v4
; GISEL-NEXT: v_lshrrev_b64 v[8:9], v4, v[0:1]
; GISEL-NEXT: v_lshlrev_b64 v[10:11], v10, v[2:3]
-; GISEL-NEXT: v_subrev_u32_e32 v12, 64, v4
+; GISEL-NEXT: v_add_u32_e32 v12, 0xffffffc0, v4
; GISEL-NEXT: v_or_b32_e32 v10, v8, v10
; GISEL-NEXT: v_or_b32_e32 v11, v9, v11
; GISEL-NEXT: v_lshrrev_b64 v[8:9], v12, v[2:3]
; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v4
-; GISEL-NEXT: v_add_u32_e32 v5, 26, v5
+; GISEL-NEXT: v_add_u32_e32 v13, 26, v5
; GISEL-NEXT: v_cndmask_b32_e32 v8, v8, v10, vcc
; GISEL-NEXT: v_cndmask_b32_e32 v9, v9, v11, vcc
; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4
-; GISEL-NEXT: v_sub_u32_e32 v10, 64, v5
+; GISEL-NEXT: v_sub_u32_e32 v10, 64, v13
; GISEL-NEXT: v_cndmask_b32_e32 v12, v8, v0, vcc
; GISEL-NEXT: v_cndmask_b32_e32 v4, v9, v1, vcc
-; GISEL-NEXT: v_lshrrev_b64 v[8:9], v5, -1
+; GISEL-NEXT: v_lshrrev_b64 v[8:9], v13, -1
; GISEL-NEXT: v_lshlrev_b64 v[10:11], v10, -1
-; GISEL-NEXT: v_subrev_u32_e32 v13, 64, v5
+; GISEL-NEXT: v_add_u32_e32 v5, 0xffffffda, v5
; GISEL-NEXT: v_or_b32_e32 v14, v8, v10
; GISEL-NEXT: v_or_b32_e32 v15, v9, v11
-; GISEL-NEXT: v_lshrrev_b64 v[10:11], v13, -1
-; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v5
-; GISEL-NEXT: v_cndmask_b32_e32 v10, v10, v14, vcc
-; GISEL-NEXT: v_cndmask_b32_e32 v11, v11, v15, vcc
-; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v5
+; GISEL-NEXT: v_lshrrev_b64 v[10:11], v5, -1
+; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v13
+; GISEL-NEXT: v_cndmask_b32_e32 v5, v10, v14, vcc
+; GISEL-NEXT: v_cndmask_b32_e32 v10, v11, v15, vcc
+; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v13
; GISEL-NEXT: v_cndmask_b32_e32 v8, 0, v8, vcc
; GISEL-NEXT: v_cndmask_b32_e32 v9, 0, v9, vcc
-; GISEL-NEXT: v_cndmask_b32_e64 v5, v10, -1, s[4:5]
-; GISEL-NEXT: v_cndmask_b32_e64 v10, v11, -1, s[4:5]
+; GISEL-NEXT: v_cndmask_b32_e64 v5, v5, -1, s[4:5]
+; GISEL-NEXT: v_cndmask_b32_e64 v10, v10, -1, s[4:5]
; GISEL-NEXT: v_and_b32_e32 v2, v8, v2
; GISEL-NEXT: v_and_b32_e32 v3, v9, v3
; GISEL-NEXT: v_and_or_b32 v0, v5, v0, v2
@@ -723,34 +723,34 @@ define double @sitofp_i128_to_f64(i128 %x) {
; GISEL-NEXT: v_sub_u32_e32 v10, 64, v14
; GISEL-NEXT: v_lshrrev_b64 v[0:1], v14, v[2:3]
; GISEL-NEXT: v_lshlrev_b64 v[10:11], v10, v[4:5]
-; GISEL-NEXT: v_subrev_u32_e32 v15, 64, v14
+; GISEL-NEXT: v_add_u32_e32 v15, 0xffffffc0, v14
; GISEL-NEXT: v_lshrrev_b64 v[12:13], v14, v[4:5]
; GISEL-NEXT: v_or_b32_e32 v10, v0, v10
; GISEL-NEXT: v_or_b32_e32 v11, v1, v11
; GISEL-NEXT: v_lshrrev_b64 v[0:1], v15, v[4:5]
; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v14
-; GISEL-NEXT: v_add_u32_e32 v9, 55, v9
+; GISEL-NEXT: v_add_u32_e32 v15, 55, v9
; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc
; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc
; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v14
; GISEL-NEXT: v_cndmask_b32_e32 v11, 0, v12, vcc
-; GISEL-NEXT: v_sub_u32_e32 v12, 64, v9
+; GISEL-NEXT: v_sub_u32_e32 v12, 64, v15
; GISEL-NEXT: v_cndmask_b32_e64 v14, v0, v2, s[4:5]
; GISEL-NEXT: v_cndmask_b32_e64 v10, v1, v3, s[4:5]
-; GISEL-NEXT: v_lshrrev_b64 v[0:1], v9, -1
+; GISEL-NEXT: v_lshrrev_b64 v[0:1], v15, -1
; GISEL-NEXT: v_lshlrev_b64 v[12:13], v12, -1
-; GISEL-NEXT: v_subrev_u32_e32 v15, 64, v9
+; GISEL-NEXT: v_add_u32_e32 v9, -9, v9
; GISEL-NEXT: v_or_b32_e32 v16, v0, v12
; GISEL-NEXT: v_or_b32_e32 v17, v1, v13
-; GISEL-NEXT: v_lshrrev_b64 v[12:13], v15, -1
-; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v9
-; GISEL-NEXT: v_cndmask_b32_e32 v12, v12, v16, vcc
-; GISEL-NEXT: v_cndmask_b32_e32 v13, v13, v17, vcc
-; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v9
+; GISEL-NEXT: v_lshrrev_b64 v[12:13], v9, -1
+; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v15
+; GISEL-NEXT: v_cndmask_b32_e32 v9, v12, v16, vcc
+; GISEL-NEXT: v_cndmask_b32_e32 v12, v13, v17, vcc
+; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v15
; GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc
; GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
-; GISEL-NEXT: v_cndmask_b32_e64 v9, v12, -1, s[4:5]
-; GISEL-NEXT: v_cndmask_b32_e64 v12, v13, -1, s[4:5]
+; GISEL-NEXT: v_cndmask_b32_e64 v9, v9, -1, s[4:5]
+; GISEL-NEXT: v_cndmask_b32_e64 v12, v12, -1, s[4:5]
; GISEL-NEXT: v_and_b32_e32 v0, v0, v4
; GISEL-NEXT: v_and_b32_e32 v1, v1, v5
; GISEL-NEXT: v_and_or_b32 v0, v9, v2, v0
@@ -999,35 +999,35 @@ define double @uitofp_i128_to_f64(i128 %x) {
; GISEL-NEXT: v_sub_u32_e32 v9, 64, v13
; GISEL-NEXT: v_lshrrev_b64 v[4:5], v13, v[0:1]
; GISEL-NEXT: v_lshlrev_b64 v[9:10], v9, v[2:3]
-; GISEL-NEXT: v_subrev_u32_e32 v14, 64, v13
+; GISEL-NEXT: v_add_u32_e32 v14, 0xffffffc0, v13
; GISEL-NEXT: v_lshrrev_b64 v[11:12], v13, v[2:3]
; GISEL-NEXT: v_or_b32_e32 v9, v4, v9
; GISEL-NEXT: v_or_b32_e32 v10, v5, v10
; GISEL-NEXT: v_lshrrev_b64 v[4:5], v14, v[2:3]
; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v13
-; GISEL-NEXT: v_add_u32_e32 v8, 55, v8
+; GISEL-NEXT: v_add_u32_e32 v15, 55, v8
; GISEL-NEXT: v_cndmask_b32_e32 v4, v4, v9, vcc
; GISEL-NEXT: v_cndmask_b32_e32 v5, v5, v10, vcc
; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v13
; GISEL-NEXT: v_cndmask_b32_e32 v10, 0, v11, vcc
; GISEL-NEXT: v_cndmask_b32_e32 v11, 0, v12, vcc
-; GISEL-NEXT: v_sub_u32_e32 v12, 64, v8
+; GISEL-NEXT: v_sub_u32_e32 v12, 64, v15
; GISEL-NEXT: v_cndmask_b32_e64 v14, v4, v0, s[4:5]
; GISEL-NEXT: v_cndmask_b32_e64 v9, v5, v1, s[4:5]
-; GISEL-NEXT: v_lshrrev_b64 v[4:5], v8, -1
+; GISEL-NEXT: v_lshrrev_b64 v[4:5], v15, -1
; GISEL-NEXT: v_lshlrev_b64 v[12:13], v12, -1
-; GISEL-NEXT: v_subrev_u32_e32 v15, 64, v8
+; GISEL-NEXT: v_add_u32_e32 v8, -9, v8
; GISEL-NEXT: v_or_b32_e32 v16, v4, v12
; GISEL-NEXT: v_or_b32_e32 v17, v5, v13
-; GISEL-NEXT: v_lshrrev_b64 v[12:13], v15, -1
-; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v8
-; GISEL-NEXT: v_cndmask_b32_e32 v12, v12, v16, vcc
-; GISEL-NEXT: v_cndmask_b32_e32 v13, v13, v17, vcc
-; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v8
+; GISEL-NEXT: v_lshrrev_b64 v[12:13], v8, -1
+; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v15
+; GISEL-NEXT: v_cndmask_b32_e32 v8, v12, v16, vcc
+; GISEL-NEXT: v_cndmask_b32_e32 v12, v13, v17, vcc
+; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v15
; GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
; GISEL-NEXT: v_cndmask_b32_e32 v5, 0, v5, vcc
-; GISEL-NEXT: v_cndmask_b32_e64 v8, v12, -1, s[4:5]
-; GISEL-NEXT: v_cndmask_b32_e64 v12, v13, -1, s[4:5]
+; GISEL-NEXT: v_cndmask_b32_e64 v8, v8, -1, s[4:5]
+; GISEL-NEXT: v_cndmask_b32_e64 v12, v12, -1, s[4:5]
; GISEL-NEXT: v_and_b32_e32 v2, v4, v2
; GISEL-NEXT: v_and_b32_e32 v3, v5, v3
; GISEL-NEXT: v_and_or_b32 v0, v8, v0, v2
@@ -1284,32 +1284,32 @@ define half @sitofp_i128_to_f16(i128 %x) {
; GISEL-NEXT: v_sub_u32_e32 v11, 64, v4
; GISEL-NEXT: v_lshrrev_b64 v[9:10], v4, v[0:1]
; GISEL-NEXT: v_lshlrev_b64 v[11:12], v11, v[2:3]
-; GISEL-NEXT: v_subrev_u32_e32 v13, 64, v4
+; GISEL-NEXT: v_add_u32_e32 v13, 0xffffffc0, v4
; GISEL-NEXT: v_or_b32_e32 v11, v9, v11
; GISEL-NEXT: v_or_b32_e32 v12, v10, v12
; GISEL-NEXT: v_lshrrev_b64 v[9:10], v13, v[2:3]
; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v4
-; GISEL-NEXT: v_add_u32_e32 v5, 26, v5
+; GISEL-NEXT: v_add_u32_e32 v14, 26, v5
; GISEL-NEXT: v_cndmask_b32_e32 v9, v9, v11, vcc
; GISEL-NEXT: v_cndmask_b32_e32 v10, v10, v12, vcc
; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4
-; GISEL-NEXT: v_sub_u32_e32 v11, 64, v5
+; GISEL-NEXT: v_sub_u32_e32 v11, 64, v14
; GISEL-NEXT: v_cndmask_b32_e32 v13, v9, v0, vcc
; GISEL-NEXT: v_cndmask_b32_e32 v4, v10, v1, vcc
-; GISEL-NEXT: v_lshrrev_b64 v[9:10], v5, -1
+; GISEL-NEXT: v_lshrrev_b64 v[9:10], v14, -1
; GISEL-NEXT: v_lshlrev_b64 v[11:12], v11, -1
-; GISEL-NEXT: v_subrev_u32_e32 v14, 64, v5
+; GISEL-NEXT: v_add_u32_e32 v5, 0xffffffda, v5
; GISEL-NEXT: v_or_b32_e32 v15, v9, v11
; GISEL-NEXT: v_or_b32_e32 v16, v10, v12
-; GISEL-NEXT: v_lshrrev_b64 v[11:12], v14, -1
-; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v5
-; GISEL-NEXT: v_cndmask_b32_e32 v11, v11, v15, vcc
-; GISEL-NEXT: v_cndmask_b32_e32 v12, v12, v16, vcc
-; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v5
+; GISEL-NEXT: v_lshrrev_b64 v[11:12], v5, -1
+; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v14
+; GISEL-NEXT: v_cndmask_b32_e32 v5, v11, v15, vcc
+; GISEL-NEXT: v_cndmask_b32_e32 v11, v12, v16, vcc
+; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v14
; GISEL-NEXT: v_cndmask_b32_e32 v9, 0, v9, vcc
; GISEL-NEXT: v_cndmask_b32_e32 v10, 0, v10, vcc
-; GISEL-NEXT: v_cndmask_b32_e64 v5, v11, -1, s[4:5]
-; GISEL-NEXT: v_cndmask_b32_e64 v11, v12, -1, s[4:5]
+; GISEL-NEXT: v_cndmask_b32_e64 v5, v5, -1, s[4:5]
+; GISEL-NEXT: v_cndmask_b32_e64 v11, v11, -1, s[4:5]
; GISEL-NEXT: v_and_b32_e32 v2, v9, v2
; GISEL-NEXT: v_and_b32_e32 v3, v10, v3
; GISEL-NEXT: v_and_or_b32 v0, v5, v0, v2
@@ -1531,32 +1531,32 @@ define half @uitofp_i128_to_f16(i128 %x) {
; GISEL-NEXT: v_sub_u32_e32 v10, 64, v4
; GISEL-NEXT: v_lshrrev_b64 v[8:9], v4, v[0:1]
; GISEL-NEXT: v_lshlrev_b64 v[10:11], v10, v[2:3]
-; GISEL-NEXT: v_subrev_u32_e32 v12, 64, v4
+; GISEL-NEXT: v_add_u32_e32 v12, 0xffffffc0, v4
; GISEL-NEXT: v_or_b32_e32 v10, v8, v10
; GISEL-NEXT: v_or_b32_e32 v11, v9, v11
; GISEL-NEXT: v_lshrrev_b64 v[8:9], v12, v[2:3]
; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v4
-; GISEL-NEXT: v_add_u32_e32 v5, 26, v5
+; GISEL-NEXT: v_add_u32_e32 v13, 26, v5
; GISEL-NEXT: v_cndmask_b32_e32 v8, v8, v10, vcc
; GISEL-NEXT: v_cndmask_b32_e32 v9, v9, v11, vcc
; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4
-; GISEL-NEXT: v_sub_u32_e32 v10, 64, v5
+; GISEL-NEXT: v_sub_u32_e32 v10, 64, v13
; GISEL-NEXT: v_cndmask_b32_e32 v12, v8, v0, vcc
; GISEL-NEXT: v_cndmask_b32_e32 v4, v9, v1, vcc
-; GISEL-NEXT: v_lshrrev_b64 v[8:9], v5, -1
+; GISEL-NEXT: v_lshrrev_b64 v[8:9], v13, -1
; GISEL-NEXT: v_lshlrev_b64 v[10:11], v10, -1
-; GISEL-NEXT: v_subrev_u32_e32 v13, 64, v5
+; GISEL-NEXT: v_add_u32_e32 v5, 0xffffffda, v5
; GISEL-NEXT: v_or_b32_e32 v14, v8, v10
; GISEL-NEXT: v_or_b32_e32 v15, v9, v11
-; GISEL-NEXT: v_lshrrev_b64 v[10:11], v13, -1
-; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v5
-; GISEL-NEXT: v_cndmask_b32_e32 v10, v10, v14, vcc
-; GISEL-NEXT: v_cndmask_b32_e32 v11, v11, v15, vcc
-; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v5
+; GISEL-NEXT: v_lshrrev_b64 v[10:11], v5, -1
+; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v13
+; GISEL-NEXT: v_cndmask_b32_e32 v5, v10, v14, vcc
+; GISEL-NEXT: v_cndmask_b32_e32 v10, v11, v15, vcc
+; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v13
; GISEL-NEXT: v_cndmask_b32_e32 v8, 0, v8, vcc
; GISEL-NEXT: v_cndmask_b32_e32 v9, 0, v9, vcc
-; GISEL-NEXT: v_cndmask_b32_e64 v5, v10, -1, s[4:5]
-; GISEL-NEXT: v_cndmask_b32_e64 v10, v11, -1, s[4:5]
+; GISEL-NEXT: v_cndmask_b32_e64 v5, v5, -1, s[4:5]
+; GISEL-NEXT: v_cndmask_b32_e64 v10, v10, -1, s[4:5]
; GISEL-NEXT: v_and_b32_e32 v2, v8, v2
; GISEL-NEXT: v_and_b32_e32 v3, v9, v3
; GISEL-NEXT: v_and_or_b32 v0, v5, v0, v2
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.ll
index e4b9299869334c..0dcfb840dec069 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.ll
@@ -139,7 +139,7 @@ define amdgpu_kernel void @test_barrier(ptr addrspace(1) %out, i32 %size) #0 {
; VARIANT6-NEXT: s_load_b96 s[0:2], s[4:5], 0x24
; VARIANT6-NEXT: s_wait_kmcnt 0x0
; VARIANT6-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_and_b32 v4, 0x3ff, v0
-; VARIANT6-NEXT: s_sub_co_i32 s2, s2, 1
+; VARIANT6-NEXT: s_add_co_i32 s2, s2, -1
; VARIANT6-NEXT: s_delay_alu instid0(VALU_DEP_1)
; VARIANT6-NEXT: v_dual_mov_b32 v2, s0 :: v_dual_lshlrev_b32 v5, 2, v4
; VARIANT6-NEXT: v_sub_nc_u32_e32 v0, s2, v4
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll
index b7c566f682e349..1d869559d9e772 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll
@@ -395,7 +395,7 @@ define i1 @posnormal_f16(half %x) nounwind {
; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX7GLISEL-NEXT: v_and_b32_e32 v2, 0xffff, v1
; GFX7GLISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v0, v2
-; GFX7GLISEL-NEXT: v_subrev_i32_e32 v0, vcc, 0x400, v1
+; GFX7GLISEL-NEXT: v_add_i32_e32 v0, vcc, 0xfffffc00, v1
; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX7GLISEL-NEXT: v_mov_b32_e32 v1, 0x7800
; GFX7GLISEL-NEXT: v_cmp_lt_u32_e32 vcc, v0, v1
@@ -459,7 +459,7 @@ define i1 @negnormal_f16(half %x) nounwind {
; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX7GLISEL-NEXT: v_and_b32_e32 v2, 0xffff, v1
; GFX7GLISEL-NEXT: v_cmp_ne_u32_e64 s[4:5], v0, v2
-; GFX7GLISEL-NEXT: v_subrev_i32_e32 v0, vcc, 0x400, v1
+; GFX7GLISEL-NEXT: v_add_i32_e32 v0, vcc, 0xfffffc00, v1
; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX7GLISEL-NEXT: v_mov_b32_e32 v1, 0x7800
; GFX7GLISEL-NEXT: v_cmp_lt_u32_e32 vcc, v0, v1
@@ -513,7 +513,7 @@ define i1 @possubnormal_f16(half %x) nounwind {
; GFX7GLISEL-LABEL: possubnormal_f16:
; GFX7GLISEL: ; %bb.0:
; GFX7GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7GLISEL-NEXT: v_subrev_i32_e32 v0, vcc, 1, v0
+; GFX7GLISEL-NEXT: v_add_i32_e32 v0, vcc, -1, v0
; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX7GLISEL-NEXT: v_mov_b32_e32 v1, 0x3ff
; GFX7GLISEL-NEXT: v_cmp_lt_u32_e32 vcc, v0, v1
@@ -575,7 +575,7 @@ define i1 @negsubnormal_f16(half %x) nounwind {
; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX7GLISEL-NEXT: v_and_b32_e32 v2, 0xffff, v1
; GFX7GLISEL-NEXT: v_cmp_ne_u32_e32 vcc, v0, v2
-; GFX7GLISEL-NEXT: v_subrev_i32_e64 v0, s[4:5], 1, v1
+; GFX7GLISEL-NEXT: v_add_i32_e64 v0, s[4:5], -1, v1
; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX7GLISEL-NEXT: v_mov_b32_e32 v1, 0x3ff
; GFX7GLISEL-NEXT: v_cmp_lt_u32_e64 s[4:5], v0, v1
@@ -1587,7 +1587,7 @@ define i1 @not_issubnormal_or_zero_f16(half %x) {
; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0x7fff, v0
; GFX7GLISEL-NEXT: v_and_b32_e32 v1, 0xffff, v0
; GFX7GLISEL-NEXT: v_mov_b32_e32 v2, 0x7c00
-; GFX7GLISEL-NEXT: v_subrev_i32_e32 v0, vcc, 0x400, v0
+; GFX7GLISEL-NEXT: v_add_i32_e32 v0, vcc, 0xfffffc00, v0
; GFX7GLISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v1, v2
; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX7GLISEL-NEXT: v_mov_b32_e32 v1, 0x7800
@@ -1647,7 +1647,7 @@ define i1 @isnormal_f16(half %x) {
; GFX7GLISEL: ; %bb.0:
; GFX7GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0x7fff, v0
-; GFX7GLISEL-NEXT: v_subrev_i32_e32 v0, vcc, 0x400, v0
+; GFX7GLISEL-NEXT: v_add_i32_e32 v0, vcc, 0xfffffc00, v0
; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX7GLISEL-NEXT: v_mov_b32_e32 v1, 0x7800
; GFX7GLISEL-NEXT: v_cmp_lt_u32_e32 vcc, v0, v1
@@ -1780,7 +1780,7 @@ define i1 @not_is_plus_normal_f16(half %x) {
; GFX7GLISEL-NEXT: s_or_b64 s[6:7], vcc, s[6:7]
; GFX7GLISEL-NEXT: v_cmp_gt_u32_e32 vcc, v3, v2
; GFX7GLISEL-NEXT: s_or_b64 s[6:7], s[6:7], vcc
-; GFX7GLISEL-NEXT: v_subrev_i32_e32 v0, vcc, 0x400, v1
+; GFX7GLISEL-NEXT: v_add_i32_e32 v0, vcc, 0xfffffc00, v1
; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX7GLISEL-NEXT: v_mov_b32_e32 v1, 0x7800
; GFX7GLISEL-NEXT: v_cmp_lt_u32_e32 vcc, v0, v1
@@ -1853,7 +1853,7 @@ define i1 @not_is_neg_normal_f16(half %x) {
; GFX7GLISEL-NEXT: s_or_b64 s[6:7], vcc, s[6:7]
; GFX7GLISEL-NEXT: v_cmp_gt_u32_e32 vcc, v3, v2
; GFX7GLISEL-NEXT: s_or_b64 s[6:7], s[6:7], vcc
-; GFX7GLISEL-NEXT: v_subrev_i32_e32 v0, vcc, 0x400, v1
+; GFX7GLISEL-NEXT: v_add_i32_e32 v0, vcc, 0xfffffc00, v1
; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX7GLISEL-NEXT: v_mov_b32_e32 v1, 0x7800
; GFX7GLISEL-NEXT: v_cmp_lt_u32_e32 vcc, v0, v1
@@ -1911,7 +1911,7 @@ define i1 @issubnormal_f16(half %x) {
; GFX7GLISEL: ; %bb.0:
; GFX7GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0x7fff, v0
-; GFX7GLISEL-NEXT: v_subrev_i32_e32 v0, vcc, 1, v0
+; GFX7GLISEL-NEXT: v_add_i32_e32 v0, vcc, -1, v0
; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX7GLISEL-NEXT: v_mov_b32_e32 v1, 0x3ff
; GFX7GLISEL-NEXT: v_cmp_lt_u32_e32 vcc, v0, v1
@@ -1974,7 +1974,7 @@ define i1 @not_issubnormal_f16(half %x) {
; GFX7GLISEL-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
; GFX7GLISEL-NEXT: v_cmp_gt_u32_e32 vcc, v1, v2
; GFX7GLISEL-NEXT: s_or_b64 s[4:5], s[4:5], vcc
-; GFX7GLISEL-NEXT: v_subrev_i32_e32 v0, vcc, 0x400, v0
+; GFX7GLISEL-NEXT: v_add_i32_e32 v0, vcc, 0xfffffc00, v0
; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX7GLISEL-NEXT: v_mov_b32_e32 v1, 0x7800
; GFX7GLISEL-NEXT: v_cmp_lt_u32_e32 vcc, v0, v1
@@ -2081,7 +2081,7 @@ define i1 @not_iszero_f16(half %x) {
; GFX7GLISEL: ; %bb.0:
; GFX7GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0x7fff, v0
-; GFX7GLISEL-NEXT: v_subrev_i32_e32 v1, vcc, 1, v0
+; GFX7GLISEL-NEXT: v_add_i32_e32 v1, vcc, -1, v0
; GFX7GLISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
; GFX7GLISEL-NEXT: v_mov_b32_e32 v2, 0x3ff
; GFX7GLISEL-NEXT: v_cmp_lt_u32_e32 vcc, v1, v2
@@ -2091,7 +2091,7 @@ define i1 @not_iszero_f16(half %x) {
; GFX7GLISEL-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
; GFX7GLISEL-NEXT: v_cmp_gt_u32_e32 vcc, v1, v2
; GFX7GLISEL-NEXT: s_or_b64 s[4:5], s[4:5], vcc
-; GFX7GLISEL-NEXT: v_subrev_i32_e32 v0, vcc, 0x400, v0
+; GFX7GLISEL-NEXT: v_add_i32_e32 v0, vcc, 0xfffffc00, v0
; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX7GLISEL-NEXT: v_mov_b32_e32 v1, 0x7800
; GFX7GLISEL-NEXT: v_cmp_lt_u32_e32 vcc, v0, v1
@@ -2568,7 +2568,7 @@ define i1 @not_iszero_or_nan_f16(half %x) {
; GFX7GLISEL: ; %bb.0: ; %entry
; GFX7GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0x7fff, v0
-; GFX7GLISEL-NEXT: v_subrev_i32_e32 v1, vcc, 1, v0
+; GFX7GLISEL-NEXT: v_add_i32_e32 v1, vcc, -1, v0
; GFX7GLISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
; GFX7GLISEL-NEXT: v_mov_b32_e32 v2, 0x3ff
; GFX7GLISEL-NEXT: v_cmp_lt_u32_e32 vcc, v1, v2
@@ -2576,7 +2576,7 @@ define i1 @not_iszero_or_nan_f16(half %x) {
; GFX7GLISEL-NEXT: v_mov_b32_e32 v2, 0x7c00
; GFX7GLISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v1, v2
; GFX7GLISEL-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
-; GFX7GLISEL-NEXT: v_subrev_i32_e32 v0, vcc, 0x400, v0
+; GFX7GLISEL-NEXT: v_add_i32_e32 v0, vcc, 0xfffffc00, v0
; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX7GLISEL-NEXT: v_mov_b32_e32 v1, 0x7800
; GFX7GLISEL-NEXT: v_cmp_lt_u32_e32 vcc, v0, v1
@@ -2635,7 +2635,7 @@ define i1 @not_iszero_or_nan_f_daz(half %x) #0 {
; GFX7GLISEL: ; %bb.0: ; %entry
; GFX7GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0x7fff, v0
-; GFX7GLISEL-NEXT: v_subrev_i32_e32 v1, vcc, 1, v0
+; GFX7GLISEL-NEXT: v_add_i32_e32 v1, vcc, -1, v0
; GFX7GLISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
; GFX7GLISEL-NEXT: v_mov_b32_e32 v2, 0x3ff
; GFX7GLISEL-NEXT: v_cmp_lt_u32_e32 vcc, v1, v2
@@ -2643,7 +2643,7 @@ define i1 @not_iszero_or_nan_f_daz(half %x) #0 {
; GFX7GLISEL-NEXT: v_mov_b32_e32 v2, 0x7c00
; GFX7GLISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v1, v2
; GFX7GLISEL-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
-; GFX7GLISEL-NEXT: v_subrev_i32_e32 v0, vcc, 0x400, v0
+; GFX7GLISEL-NEXT: v_add_i32_e32 v0, vcc, 0xfffffc00, v0
; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX7GLISEL-NEXT: v_mov_b32_e32 v1, 0x7800
; GFX7GLISEL-NEXT: v_cmp_lt_u32_e32 vcc, v0, v1
@@ -2702,7 +2702,7 @@ define i1 @not_iszero_or_nan_f_maybe_daz(half %x) #1 {
; GFX7GLISEL: ; %bb.0: ; %entry
; GFX7GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0x7fff, v0
-; GFX7GLISEL-NEXT: v_subrev_i32_e32 v1, vcc, 1, v0
+; GFX7GLISEL-NEXT: v_add_i32_e32 v1, vcc, -1, v0
; GFX7GLISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
; GFX7GLISEL-NEXT: v_mov_b32_e32 v2, 0x3ff
; GFX7GLISEL-NEXT: v_cmp_lt_u32_e32 vcc, v1, v2
@@ -2710,7 +2710,7 @@ define i1 @not_iszero_or_nan_f_maybe_daz(half %x) #1 {
; GFX7GLISEL-NEXT: v_mov_b32_e32 v2, 0x7c00
; GFX7GLISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v1, v2
; GFX7GLISEL-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
-; GFX7GLISEL-NEXT: v_subrev_i32_e32 v0, vcc, 0x400, v0
+; GFX7GLISEL-NEXT: v_add_i32_e32 v0, vcc, 0xfffffc00, v0
; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX7GLISEL-NEXT: v_mov_b32_e32 v1, 0x7800
; GFX7GLISEL-NEXT: v_cmp_lt_u32_e32 vcc, v0, v1
@@ -2902,7 +2902,7 @@ define i1 @not_iszero_or_qnan_f16(half %x) {
; GFX7GLISEL: ; %bb.0: ; %entry
; GFX7GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0x7fff, v0
-; GFX7GLISEL-NEXT: v_subrev_i32_e32 v1, vcc, 1, v0
+; GFX7GLISEL-NEXT: v_add_i32_e32 v1, vcc, -1, v0
; GFX7GLISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
; GFX7GLISEL-NEXT: v_mov_b32_e32 v2, 0x3ff
; GFX7GLISEL-NEXT: v_cmp_lt_u32_e32 vcc, v1, v2
@@ -2914,7 +2914,7 @@ define i1 @not_iszero_or_qnan_f16(half %x) {
; GFX7GLISEL-NEXT: v_mov_b32_e32 v2, 0x1ff
; GFX7GLISEL-NEXT: v_cmp_lt_u32_e32 vcc, v1, v2
; GFX7GLISEL-NEXT: s_or_b64 s[4:5], s[4:5], vcc
-; GFX7GLISEL-NEXT: v_subrev_i32_e32 v0, vcc, 0x400, v0
+; GFX7GLISEL-NEXT: v_add_i32_e32 v0, vcc, 0xfffffc00, v0
; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX7GLISEL-NEXT: v_mov_b32_e32 v1, 0x7800
; GFX7GLISEL-NEXT: v_cmp_lt_u32_e32 vcc, v0, v1
@@ -2983,7 +2983,7 @@ define i1 @not_iszero_or_snan_f16(half %x) {
; GFX7GLISEL: ; %bb.0: ; %entry
; GFX7GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0x7fff, v0
-; GFX7GLISEL-NEXT: v_subrev_i32_e32 v1, vcc, 1, v0
+; GFX7GLISEL-NEXT: v_add_i32_e32 v1, vcc, -1, v0
; GFX7GLISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
; GFX7GLISEL-NEXT: v_mov_b32_e32 v2, 0x3ff
; GFX7GLISEL-NEXT: v_cmp_lt_u32_e32 vcc, v1, v2
@@ -2994,7 +2994,7 @@ define i1 @not_iszero_or_snan_f16(half %x) {
; GFX7GLISEL-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
; GFX7GLISEL-NEXT: v_cmp_ge_u32_e32 vcc, v1, v2
; GFX7GLISEL-NEXT: s_or_b64 s[4:5], s[4:5], vcc
-; GFX7GLISEL-NEXT: v_subrev_i32_e32 v0, vcc, 0x400, v0
+; GFX7GLISEL-NEXT: v_add_i32_e32 v0, vcc, 0xfffffc00, v0
; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX7GLISEL-NEXT: v_mov_b32_e32 v1, 0x7800
; GFX7GLISEL-NEXT: v_cmp_lt_u32_e32 vcc, v0, v1
diff --git a/llvm/test/CodeGen/AMDGPU/shrink-add-sub-constant.ll b/llvm/test/CodeGen/AMDGPU/shrink-add-sub-constant.ll
index 10fdaaa17da0a4..a1a466fb04440d 100644
--- a/llvm/test/CodeGen/AMDGPU/shrink-add-sub-constant.ll
+++ b/llvm/test/CodeGen/AMDGPU/shrink-add-sub-constant.ll
@@ -41,7 +41,7 @@ define amdgpu_kernel void @v_test_i32_x_sub_64(ptr addrspace(1) %out, ptr addrsp
; SI-GISEL-NEXT: s_mov_b64 s[4:5], s[2:3]
; SI-GISEL-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
; SI-GISEL-NEXT: s_waitcnt vmcnt(0)
-; SI-GISEL-NEXT: v_subrev_i32_e32 v2, vcc, 64, v2
+; SI-GISEL-NEXT: v_add_i32_e32 v2, vcc, 0xffffffc0, v2
; SI-GISEL-NEXT: s_mov_b64 s[2:3], s[6:7]
; SI-GISEL-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
; SI-GISEL-NEXT: s_endpgm
@@ -78,44 +78,79 @@ define amdgpu_kernel void @v_test_i32_x_sub_64(ptr addrspace(1) %out, ptr addrsp
; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v2
; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
; VI-GISEL-NEXT: s_waitcnt vmcnt(0)
-; VI-GISEL-NEXT: v_subrev_u32_e32 v2, vcc, 64, v3
+; VI-GISEL-NEXT: v_add_u32_e32 v2, vcc, 0xffffffc0, v3
; VI-GISEL-NEXT: flat_store_dword v[0:1], v2
; VI-GISEL-NEXT: s_endpgm
;
-; GFX9-LABEL: v_test_i32_x_sub_64:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
-; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX9-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-NEXT: global_load_dword v1, v0, s[2:3]
-; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: v_subrev_u32_e32 v1, 64, v1
-; GFX9-NEXT: global_store_dword v0, v1, s[0:1]
-; GFX9-NEXT: s_endpgm
+; GFX9-SDAG-LABEL: v_test_i32_x_sub_64:
+; GFX9-SDAG: ; %bb.0:
+; GFX9-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX9-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-SDAG-NEXT: global_load_dword v1, v0, s[2:3]
+; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-SDAG-NEXT: v_subrev_u32_e32 v1, 64, v1
+; GFX9-SDAG-NEXT: global_store_dword v0, v1, s[0:1]
+; GFX9-SDAG-NEXT: s_endpgm
;
-; GFX10-LABEL: v_test_i32_x_sub_64:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
-; GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX10-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10-NEXT: global_load_dword v1, v0, s[2:3]
-; GFX10-NEXT: s_waitcnt vmcnt(0)
-; GFX10-NEXT: v_subrev_nc_u32_e32 v1, 64, v1
-; GFX10-NEXT: global_store_dword v0, v1, s[0:1]
-; GFX10-NEXT: s_endpgm
+; GFX9-GISEL-LABEL: v_test_i32_x_sub_64:
+; GFX9-GISEL: ; %bb.0:
+; GFX9-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-GISEL-NEXT: global_load_dword v1, v0, s[2:3]
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GISEL-NEXT: v_add_u32_e32 v1, 0xffffffc0, v1
+; GFX9-GISEL-NEXT: global_store_dword v0, v1, s[0:1]
+; GFX9-GISEL-NEXT: s_endpgm
;
-; GFX11-LABEL: v_test_i32_x_sub_64:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
-; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX11-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-NEXT: global_load_b32 v1, v0, s[2:3]
-; GFX11-NEXT: s_waitcnt vmcnt(0)
-; GFX11-NEXT: v_subrev_nc_u32_e32 v1, 64, v1
-; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX11-NEXT: s_endpgm
+; GFX10-SDAG-LABEL: v_test_i32_x_sub_64:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX10-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX10-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-SDAG-NEXT: global_load_dword v1, v0, s[2:3]
+; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-SDAG-NEXT: v_subrev_nc_u32_e32 v1, 64, v1
+; GFX10-SDAG-NEXT: global_store_dword v0, v1, s[0:1]
+; GFX10-SDAG-NEXT: s_endpgm
+;
+; GFX10-GISEL-LABEL: v_test_i32_x_sub_64:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX10-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-GISEL-NEXT: global_load_dword v1, v0, s[2:3]
+; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v1, 0xffffffc0, v1
+; GFX10-GISEL-NEXT: global_store_dword v0, v1, s[0:1]
+; GFX10-GISEL-NEXT: s_endpgm
+;
+; GFX11-SDAG-LABEL: v_test_i32_x_sub_64:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-SDAG-NEXT: global_load_b32 v1, v0, s[2:3]
+; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX11-SDAG-NEXT: v_subrev_nc_u32_e32 v1, 64, v1
+; GFX11-SDAG-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-SDAG-NEXT: s_endpgm
+;
+; GFX11-GISEL-LABEL: v_test_i32_x_sub_64:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-GISEL-NEXT: global_load_b32 v1, v0, s[2:3]
+; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v1, 0xffffffc0, v1
+; GFX11-GISEL-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-GISEL-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%gep = getelementptr inbounds i32, ptr addrspace(1) %in, i64 %tid.ext
@@ -163,8 +198,8 @@ define amdgpu_kernel void @v_test_i32_x_sub_64_multi_use(ptr addrspace(1) %out,
; SI-GISEL-NEXT: buffer_load_dword v3, v[0:1], s[4:7], 0 addr64 glc
; SI-GISEL-NEXT: s_waitcnt vmcnt(0)
; SI-GISEL-NEXT: s_mov_b64 s[2:3], s[6:7]
-; SI-GISEL-NEXT: v_subrev_i32_e32 v2, vcc, 64, v2
-; SI-GISEL-NEXT: v_subrev_i32_e32 v3, vcc, 64, v3
+; SI-GISEL-NEXT: v_add_i32_e32 v2, vcc, 0xffffffc0, v2
+; SI-GISEL-NEXT: v_add_i32_e32 v3, vcc, 0xffffffc0, v3
; SI-GISEL-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
; SI-GISEL-NEXT: s_waitcnt vmcnt(0)
; SI-GISEL-NEXT: buffer_store_dword v3, v[0:1], s[0:3], 0 addr64
@@ -211,66 +246,119 @@ define amdgpu_kernel void @v_test_i32_x_sub_64_multi_use(ptr addrspace(1) %out,
; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1
; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v2
; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
-; VI-GISEL-NEXT: v_subrev_u32_e32 v2, vcc, 64, v3
-; VI-GISEL-NEXT: v_subrev_u32_e32 v3, vcc, 64, v4
+; VI-GISEL-NEXT: v_add_u32_e32 v2, vcc, 0xffffffc0, v3
+; VI-GISEL-NEXT: v_add_u32_e32 v3, vcc, 0xffffffc0, v4
; VI-GISEL-NEXT: flat_store_dword v[0:1], v2
; VI-GISEL-NEXT: s_waitcnt vmcnt(0)
; VI-GISEL-NEXT: flat_store_dword v[0:1], v3
; VI-GISEL-NEXT: s_waitcnt vmcnt(0)
; VI-GISEL-NEXT: s_endpgm
;
-; GFX9-LABEL: v_test_i32_x_sub_64_multi_use:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
-; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX9-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-NEXT: global_load_dword v1, v0, s[2:3] glc
-; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: global_load_dword v2, v0, s[2:3] glc
-; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: v_subrev_u32_e32 v1, 64, v1
-; GFX9-NEXT: v_subrev_u32_e32 v2, 64, v2
-; GFX9-NEXT: global_store_dword v0, v1, s[0:1]
-; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: global_store_dword v0, v2, s[0:1]
-; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: s_endpgm
+; GFX9-SDAG-LABEL: v_test_i32_x_sub_64_multi_use:
+; GFX9-SDAG: ; %bb.0:
+; GFX9-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX9-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-SDAG-NEXT: global_load_dword v1, v0, s[2:3] glc
+; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-SDAG-NEXT: global_load_dword v2, v0, s[2:3] glc
+; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-SDAG-NEXT: v_subrev_u32_e32 v1, 64, v1
+; GFX9-SDAG-NEXT: v_subrev_u32_e32 v2, 64, v2
+; GFX9-SDAG-NEXT: global_store_dword v0, v1, s[0:1]
+; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-SDAG-NEXT: global_store_dword v0, v2, s[0:1]
+; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-SDAG-NEXT: s_endpgm
;
-; GFX10-LABEL: v_test_i32_x_sub_64_multi_use:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
-; GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX10-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10-NEXT: global_load_dword v1, v0, s[2:3] glc dlc
-; GFX10-NEXT: s_waitcnt vmcnt(0)
-; GFX10-NEXT: global_load_dword v2, v0, s[2:3] glc dlc
-; GFX10-NEXT: s_waitcnt vmcnt(0)
-; GFX10-NEXT: v_subrev_nc_u32_e32 v1, 64, v1
-; GFX10-NEXT: v_subrev_nc_u32_e32 v2, 64, v2
-; GFX10-NEXT: global_store_dword v0, v1, s[0:1]
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT: global_store_dword v0, v2, s[0:1]
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT: s_endpgm
+; GFX9-GISEL-LABEL: v_test_i32_x_sub_64_multi_use:
+; GFX9-GISEL: ; %bb.0:
+; GFX9-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-GISEL-NEXT: global_load_dword v1, v0, s[2:3] glc
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GISEL-NEXT: global_load_dword v2, v0, s[2:3] glc
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GISEL-NEXT: v_add_u32_e32 v1, 0xffffffc0, v1
+; GFX9-GISEL-NEXT: v_add_u32_e32 v2, 0xffffffc0, v2
+; GFX9-GISEL-NEXT: global_store_dword v0, v1, s[0:1]
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GISEL-NEXT: global_store_dword v0, v2, s[0:1]
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GISEL-NEXT: s_endpgm
;
-; GFX11-LABEL: v_test_i32_x_sub_64_multi_use:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
-; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX11-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-NEXT: global_load_b32 v1, v0, s[2:3] glc dlc
-; GFX11-NEXT: s_waitcnt vmcnt(0)
-; GFX11-NEXT: global_load_b32 v2, v0, s[2:3] glc dlc
-; GFX11-NEXT: s_waitcnt vmcnt(0)
-; GFX11-NEXT: v_subrev_nc_u32_e32 v1, 64, v1
-; GFX11-NEXT: v_subrev_nc_u32_e32 v2, 64, v2
-; GFX11-NEXT: global_store_b32 v0, v1, s[0:1] dlc
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX11-NEXT: global_store_b32 v0, v2, s[0:1] dlc
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX11-NEXT: s_endpgm
+; GFX10-SDAG-LABEL: v_test_i32_x_sub_64_multi_use:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX10-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX10-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-SDAG-NEXT: global_load_dword v1, v0, s[2:3] glc dlc
+; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-SDAG-NEXT: global_load_dword v2, v0, s[2:3] glc dlc
+; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-SDAG-NEXT: v_subrev_nc_u32_e32 v1, 64, v1
+; GFX10-SDAG-NEXT: v_subrev_nc_u32_e32 v2, 64, v2
+; GFX10-SDAG-NEXT: global_store_dword v0, v1, s[0:1]
+; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX10-SDAG-NEXT: global_store_dword v0, v2, s[0:1]
+; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX10-SDAG-NEXT: s_endpgm
+;
+; GFX10-GISEL-LABEL: v_test_i32_x_sub_64_multi_use:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX10-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-GISEL-NEXT: global_load_dword v1, v0, s[2:3] glc dlc
+; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-GISEL-NEXT: global_load_dword v2, v0, s[2:3] glc dlc
+; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v1, 0xffffffc0, v1
+; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v2, 0xffffffc0, v2
+; GFX10-GISEL-NEXT: global_store_dword v0, v1, s[0:1]
+; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX10-GISEL-NEXT: global_store_dword v0, v2, s[0:1]
+; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX10-GISEL-NEXT: s_endpgm
+;
+; GFX11-SDAG-LABEL: v_test_i32_x_sub_64_multi_use:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-SDAG-NEXT: global_load_b32 v1, v0, s[2:3] glc dlc
+; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX11-SDAG-NEXT: global_load_b32 v2, v0, s[2:3] glc dlc
+; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX11-SDAG-NEXT: v_subrev_nc_u32_e32 v1, 64, v1
+; GFX11-SDAG-NEXT: v_subrev_nc_u32_e32 v2, 64, v2
+; GFX11-SDAG-NEXT: global_store_b32 v0, v1, s[0:1] dlc
+; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX11-SDAG-NEXT: global_store_b32 v0, v2, s[0:1] dlc
+; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX11-SDAG-NEXT: s_endpgm
+;
+; GFX11-GISEL-LABEL: v_test_i32_x_sub_64_multi_use:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-GISEL-NEXT: global_load_b32 v1, v0, s[2:3] glc dlc
+; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GISEL-NEXT: global_load_b32 v2, v0, s[2:3] glc dlc
+; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v1, 0xffffffc0, v1
+; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v2, 0xffffffc0, v2
+; GFX11-GISEL-NEXT: global_store_b32 v0, v1, s[0:1] dlc
+; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX11-GISEL-NEXT: global_store_b32 v0, v2, s[0:1] dlc
+; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX11-GISEL-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%gep = getelementptr inbounds i32, ptr addrspace(1) %in, i64 %tid.ext
@@ -425,7 +513,7 @@ define amdgpu_kernel void @v_test_i32_x_sub_65(ptr addrspace(1) %out, ptr addrsp
; SI-GISEL-NEXT: s_mov_b64 s[4:5], s[2:3]
; SI-GISEL-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
; SI-GISEL-NEXT: s_waitcnt vmcnt(0)
-; SI-GISEL-NEXT: v_subrev_i32_e32 v2, vcc, 0x41, v2
+; SI-GISEL-NEXT: v_add_i32_e32 v2, vcc, 0xffffffbf, v2
; SI-GISEL-NEXT: s_mov_b64 s[2:3], s[6:7]
; SI-GISEL-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
; SI-GISEL-NEXT: s_endpgm
@@ -462,79 +550,44 @@ define amdgpu_kernel void @v_test_i32_x_sub_65(ptr addrspace(1) %out, ptr addrsp
; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v2
; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
; VI-GISEL-NEXT: s_waitcnt vmcnt(0)
-; VI-GISEL-NEXT: v_subrev_u32_e32 v2, vcc, 0x41, v3
+; VI-GISEL-NEXT: v_add_u32_e32 v2, vcc, 0xffffffbf, v3
; VI-GISEL-NEXT: flat_store_dword v[0:1], v2
; VI-GISEL-NEXT: s_endpgm
;
-; GFX9-SDAG-LABEL: v_test_i32_x_sub_65:
-; GFX9-SDAG: ; %bb.0:
-; GFX9-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
-; GFX9-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-SDAG-NEXT: global_load_dword v1, v0, s[2:3]
-; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0)
-; GFX9-SDAG-NEXT: v_add_u32_e32 v1, 0xffffffbf, v1
-; GFX9-SDAG-NEXT: global_store_dword v0, v1, s[0:1]
-; GFX9-SDAG-NEXT: s_endpgm
-;
-; GFX9-GISEL-LABEL: v_test_i32_x_sub_65:
-; GFX9-GISEL: ; %bb.0:
-; GFX9-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
-; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-GISEL-NEXT: global_load_dword v1, v0, s[2:3]
-; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
-; GFX9-GISEL-NEXT: v_subrev_u32_e32 v1, 0x41, v1
-; GFX9-GISEL-NEXT: global_store_dword v0, v1, s[0:1]
-; GFX9-GISEL-NEXT: s_endpgm
-;
-; GFX10-SDAG-LABEL: v_test_i32_x_sub_65:
-; GFX10-SDAG: ; %bb.0:
-; GFX10-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
-; GFX10-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX10-SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10-SDAG-NEXT: global_load_dword v1, v0, s[2:3]
-; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0)
-; GFX10-SDAG-NEXT: v_add_nc_u32_e32 v1, 0xffffffbf, v1
-; GFX10-SDAG-NEXT: global_store_dword v0, v1, s[0:1]
-; GFX10-SDAG-NEXT: s_endpgm
-;
-; GFX10-GISEL-LABEL: v_test_i32_x_sub_65:
-; GFX10-GISEL: ; %bb.0:
-; GFX10-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
-; GFX10-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10-GISEL-NEXT: global_load_dword v1, v0, s[2:3]
-; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
-; GFX10-GISEL-NEXT: v_subrev_nc_u32_e32 v1, 0x41, v1
-; GFX10-GISEL-NEXT: global_store_dword v0, v1, s[0:1]
-; GFX10-GISEL-NEXT: s_endpgm
+; GFX9-LABEL: v_test_i32_x_sub_65:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-NEXT: global_load_dword v1, v0, s[2:3]
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: v_add_u32_e32 v1, 0xffffffbf, v1
+; GFX9-NEXT: global_store_dword v0, v1, s[0:1]
+; GFX9-NEXT: s_endpgm
;
-; GFX11-SDAG-LABEL: v_test_i32_x_sub_65:
-; GFX11-SDAG: ; %bb.0:
-; GFX11-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
-; GFX11-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0
-; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-SDAG-NEXT: global_load_b32 v1, v0, s[2:3]
-; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
-; GFX11-SDAG-NEXT: v_add_nc_u32_e32 v1, 0xffffffbf, v1
-; GFX11-SDAG-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX11-SDAG-NEXT: s_endpgm
+; GFX10-LABEL: v_test_i32_x_sub_65:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-NEXT: global_load_dword v1, v0, s[2:3]
+; GFX10-NEXT: s_waitcnt vmcnt(0)
+; GFX10-NEXT: v_add_nc_u32_e32 v1, 0xffffffbf, v1
+; GFX10-NEXT: global_store_dword v0, v1, s[0:1]
+; GFX10-NEXT: s_endpgm
;
-; GFX11-GISEL-LABEL: v_test_i32_x_sub_65:
-; GFX11-GISEL: ; %bb.0:
-; GFX11-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
-; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-GISEL-NEXT: global_load_b32 v1, v0, s[2:3]
-; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
-; GFX11-GISEL-NEXT: v_subrev_nc_u32_e32 v1, 0x41, v1
-; GFX11-GISEL-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX11-GISEL-NEXT: s_endpgm
+; GFX11-LABEL: v_test_i32_x_sub_65:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-NEXT: global_load_b32 v1, v0, s[2:3]
+; GFX11-NEXT: s_waitcnt vmcnt(0)
+; GFX11-NEXT: v_add_nc_u32_e32 v1, 0xffffffbf, v1
+; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%gep = getelementptr inbounds i32, ptr addrspace(1) %in, i64 %tid.ext
@@ -686,7 +739,7 @@ define amdgpu_kernel void @v_test_i32_x_sub_neg16(ptr addrspace(1) %out, ptr add
; SI-GISEL-NEXT: s_mov_b64 s[4:5], s[2:3]
; SI-GISEL-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
; SI-GISEL-NEXT: s_waitcnt vmcnt(0)
-; SI-GISEL-NEXT: v_subrev_i32_e32 v2, vcc, -16, v2
+; SI-GISEL-NEXT: v_add_i32_e32 v2, vcc, 16, v2
; SI-GISEL-NEXT: s_mov_b64 s[2:3], s[6:7]
; SI-GISEL-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
; SI-GISEL-NEXT: s_endpgm
@@ -723,79 +776,44 @@ define amdgpu_kernel void @v_test_i32_x_sub_neg16(ptr addrspace(1) %out, ptr add
; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v2
; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
; VI-GISEL-NEXT: s_waitcnt vmcnt(0)
-; VI-GISEL-NEXT: v_subrev_u32_e32 v2, vcc, -16, v3
+; VI-GISEL-NEXT: v_add_u32_e32 v2, vcc, 16, v3
; VI-GISEL-NEXT: flat_store_dword v[0:1], v2
; VI-GISEL-NEXT: s_endpgm
;
-; GFX9-SDAG-LABEL: v_test_i32_x_sub_neg16:
-; GFX9-SDAG: ; %bb.0:
-; GFX9-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
-; GFX9-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-SDAG-NEXT: global_load_dword v1, v0, s[2:3]
-; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0)
-; GFX9-SDAG-NEXT: v_add_u32_e32 v1, 16, v1
-; GFX9-SDAG-NEXT: global_store_dword v0, v1, s[0:1]
-; GFX9-SDAG-NEXT: s_endpgm
-;
-; GFX9-GISEL-LABEL: v_test_i32_x_sub_neg16:
-; GFX9-GISEL: ; %bb.0:
-; GFX9-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
-; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-GISEL-NEXT: global_load_dword v1, v0, s[2:3]
-; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
-; GFX9-GISEL-NEXT: v_subrev_u32_e32 v1, -16, v1
-; GFX9-GISEL-NEXT: global_store_dword v0, v1, s[0:1]
-; GFX9-GISEL-NEXT: s_endpgm
-;
-; GFX10-SDAG-LABEL: v_test_i32_x_sub_neg16:
-; GFX10-SDAG: ; %bb.0:
-; GFX10-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
-; GFX10-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX10-SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10-SDAG-NEXT: global_load_dword v1, v0, s[2:3]
-; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0)
-; GFX10-SDAG-NEXT: v_add_nc_u32_e32 v1, 16, v1
-; GFX10-SDAG-NEXT: global_store_dword v0, v1, s[0:1]
-; GFX10-SDAG-NEXT: s_endpgm
-;
-; GFX10-GISEL-LABEL: v_test_i32_x_sub_neg16:
-; GFX10-GISEL: ; %bb.0:
-; GFX10-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
-; GFX10-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10-GISEL-NEXT: global_load_dword v1, v0, s[2:3]
-; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
-; GFX10-GISEL-NEXT: v_subrev_nc_u32_e32 v1, -16, v1
-; GFX10-GISEL-NEXT: global_store_dword v0, v1, s[0:1]
-; GFX10-GISEL-NEXT: s_endpgm
+; GFX9-LABEL: v_test_i32_x_sub_neg16:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-NEXT: global_load_dword v1, v0, s[2:3]
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: v_add_u32_e32 v1, 16, v1
+; GFX9-NEXT: global_store_dword v0, v1, s[0:1]
+; GFX9-NEXT: s_endpgm
;
-; GFX11-SDAG-LABEL: v_test_i32_x_sub_neg16:
-; GFX11-SDAG: ; %bb.0:
-; GFX11-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
-; GFX11-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0
-; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-SDAG-NEXT: global_load_b32 v1, v0, s[2:3]
-; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
-; GFX11-SDAG-NEXT: v_add_nc_u32_e32 v1, 16, v1
-; GFX11-SDAG-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX11-SDAG-NEXT: s_endpgm
+; GFX10-LABEL: v_test_i32_x_sub_neg16:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-NEXT: global_load_dword v1, v0, s[2:3]
+; GFX10-NEXT: s_waitcnt vmcnt(0)
+; GFX10-NEXT: v_add_nc_u32_e32 v1, 16, v1
+; GFX10-NEXT: global_store_dword v0, v1, s[0:1]
+; GFX10-NEXT: s_endpgm
;
-; GFX11-GISEL-LABEL: v_test_i32_x_sub_neg16:
-; GFX11-GISEL: ; %bb.0:
-; GFX11-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
-; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-GISEL-NEXT: global_load_b32 v1, v0, s[2:3]
-; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
-; GFX11-GISEL-NEXT: v_subrev_nc_u32_e32 v1, -16, v1
-; GFX11-GISEL-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX11-GISEL-NEXT: s_endpgm
+; GFX11-LABEL: v_test_i32_x_sub_neg16:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-NEXT: global_load_b32 v1, v0, s[2:3]
+; GFX11-NEXT: s_waitcnt vmcnt(0)
+; GFX11-NEXT: v_add_nc_u32_e32 v1, 16, v1
+; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%gep = getelementptr inbounds i32, ptr addrspace(1) %in, i64 %tid.ext
@@ -947,7 +965,7 @@ define amdgpu_kernel void @v_test_i32_x_sub_neg17(ptr addrspace(1) %out, ptr add
; SI-GISEL-NEXT: s_mov_b64 s[4:5], s[2:3]
; SI-GISEL-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
; SI-GISEL-NEXT: s_waitcnt vmcnt(0)
-; SI-GISEL-NEXT: v_subrev_i32_e32 v2, vcc, 0xffffffef, v2
+; SI-GISEL-NEXT: v_add_i32_e32 v2, vcc, 17, v2
; SI-GISEL-NEXT: s_mov_b64 s[2:3], s[6:7]
; SI-GISEL-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
; SI-GISEL-NEXT: s_endpgm
@@ -983,80 +1001,45 @@ define amdgpu_kernel void @v_test_i32_x_sub_neg17(ptr addrspace(1) %out, ptr add
; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1
; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v2
; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
-; VI-GISEL-NEXT: s_waitcnt vmcnt(0)
-; VI-GISEL-NEXT: v_subrev_u32_e32 v2, vcc, 0xffffffef, v3
-; VI-GISEL-NEXT: flat_store_dword v[0:1], v2
-; VI-GISEL-NEXT: s_endpgm
-;
-; GFX9-SDAG-LABEL: v_test_i32_x_sub_neg17:
-; GFX9-SDAG: ; %bb.0:
-; GFX9-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
-; GFX9-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-SDAG-NEXT: global_load_dword v1, v0, s[2:3]
-; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0)
-; GFX9-SDAG-NEXT: v_add_u32_e32 v1, 17, v1
-; GFX9-SDAG-NEXT: global_store_dword v0, v1, s[0:1]
-; GFX9-SDAG-NEXT: s_endpgm
-;
-; GFX9-GISEL-LABEL: v_test_i32_x_sub_neg17:
-; GFX9-GISEL: ; %bb.0:
-; GFX9-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
-; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-GISEL-NEXT: global_load_dword v1, v0, s[2:3]
-; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
-; GFX9-GISEL-NEXT: v_subrev_u32_e32 v1, 0xffffffef, v1
-; GFX9-GISEL-NEXT: global_store_dword v0, v1, s[0:1]
-; GFX9-GISEL-NEXT: s_endpgm
-;
-; GFX10-SDAG-LABEL: v_test_i32_x_sub_neg17:
-; GFX10-SDAG: ; %bb.0:
-; GFX10-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
-; GFX10-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX10-SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10-SDAG-NEXT: global_load_dword v1, v0, s[2:3]
-; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0)
-; GFX10-SDAG-NEXT: v_add_nc_u32_e32 v1, 17, v1
-; GFX10-SDAG-NEXT: global_store_dword v0, v1, s[0:1]
-; GFX10-SDAG-NEXT: s_endpgm
-;
-; GFX10-GISEL-LABEL: v_test_i32_x_sub_neg17:
-; GFX10-GISEL: ; %bb.0:
-; GFX10-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
-; GFX10-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10-GISEL-NEXT: global_load_dword v1, v0, s[2:3]
-; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
-; GFX10-GISEL-NEXT: v_subrev_nc_u32_e32 v1, 0xffffffef, v1
-; GFX10-GISEL-NEXT: global_store_dword v0, v1, s[0:1]
-; GFX10-GISEL-NEXT: s_endpgm
+; VI-GISEL-NEXT: s_waitcnt vmcnt(0)
+; VI-GISEL-NEXT: v_add_u32_e32 v2, vcc, 17, v3
+; VI-GISEL-NEXT: flat_store_dword v[0:1], v2
+; VI-GISEL-NEXT: s_endpgm
;
-; GFX11-SDAG-LABEL: v_test_i32_x_sub_neg17:
-; GFX11-SDAG: ; %bb.0:
-; GFX11-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
-; GFX11-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0
-; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-SDAG-NEXT: global_load_b32 v1, v0, s[2:3]
-; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
-; GFX11-SDAG-NEXT: v_add_nc_u32_e32 v1, 17, v1
-; GFX11-SDAG-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX11-SDAG-NEXT: s_endpgm
+; GFX9-LABEL: v_test_i32_x_sub_neg17:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-NEXT: global_load_dword v1, v0, s[2:3]
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: v_add_u32_e32 v1, 17, v1
+; GFX9-NEXT: global_store_dword v0, v1, s[0:1]
+; GFX9-NEXT: s_endpgm
;
-; GFX11-GISEL-LABEL: v_test_i32_x_sub_neg17:
-; GFX11-GISEL: ; %bb.0:
-; GFX11-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
-; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-GISEL-NEXT: global_load_b32 v1, v0, s[2:3]
-; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
-; GFX11-GISEL-NEXT: v_subrev_nc_u32_e32 v1, 0xffffffef, v1
-; GFX11-GISEL-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX11-GISEL-NEXT: s_endpgm
+; GFX10-LABEL: v_test_i32_x_sub_neg17:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-NEXT: global_load_dword v1, v0, s[2:3]
+; GFX10-NEXT: s_waitcnt vmcnt(0)
+; GFX10-NEXT: v_add_nc_u32_e32 v1, 17, v1
+; GFX10-NEXT: global_store_dword v0, v1, s[0:1]
+; GFX10-NEXT: s_endpgm
+;
+; GFX11-LABEL: v_test_i32_x_sub_neg17:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-NEXT: global_load_b32 v1, v0, s[2:3]
+; GFX11-NEXT: s_waitcnt vmcnt(0)
+; GFX11-NEXT: v_add_nc_u32_e32 v1, 17, v1
+; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%gep = getelementptr inbounds i32, ptr addrspace(1) %in, i64 %tid.ext
@@ -1263,7 +1246,7 @@ define amdgpu_kernel void @v_test_i16_x_sub_64(ptr addrspace(1) %out, ptr addrsp
; SI-GISEL-NEXT: s_mov_b64 s[4:5], s[2:3]
; SI-GISEL-NEXT: buffer_load_ushort v2, v[0:1], s[4:7], 0 addr64
; SI-GISEL-NEXT: s_waitcnt vmcnt(0)
-; SI-GISEL-NEXT: v_subrev_i32_e32 v2, vcc, 64, v2
+; SI-GISEL-NEXT: v_add_i32_e32 v2, vcc, 0xffffffc0, v2
; SI-GISEL-NEXT: s_mov_b64 s[2:3], s[6:7]
; SI-GISEL-NEXT: buffer_store_short v2, v[0:1], s[0:3], 0 addr64
; SI-GISEL-NEXT: s_endpgm
@@ -1300,44 +1283,79 @@ define amdgpu_kernel void @v_test_i16_x_sub_64(ptr addrspace(1) %out, ptr addrsp
; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v2
; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
; VI-GISEL-NEXT: s_waitcnt vmcnt(0)
-; VI-GISEL-NEXT: v_subrev_u16_e32 v2, 64, v3
+; VI-GISEL-NEXT: v_add_u16_e32 v2, 0xffc0, v3
; VI-GISEL-NEXT: flat_store_short v[0:1], v2
; VI-GISEL-NEXT: s_endpgm
;
-; GFX9-LABEL: v_test_i16_x_sub_64:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
-; GFX9-NEXT: v_lshlrev_b32_e32 v0, 1, v0
-; GFX9-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-NEXT: global_load_ushort v1, v0, s[2:3]
-; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: v_subrev_u16_e32 v1, 64, v1
-; GFX9-NEXT: global_store_short v0, v1, s[0:1]
-; GFX9-NEXT: s_endpgm
+; GFX9-SDAG-LABEL: v_test_i16_x_sub_64:
+; GFX9-SDAG: ; %bb.0:
+; GFX9-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX9-SDAG-NEXT: v_lshlrev_b32_e32 v0, 1, v0
+; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-SDAG-NEXT: global_load_ushort v1, v0, s[2:3]
+; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-SDAG-NEXT: v_subrev_u16_e32 v1, 64, v1
+; GFX9-SDAG-NEXT: global_store_short v0, v1, s[0:1]
+; GFX9-SDAG-NEXT: s_endpgm
;
-; GFX10-LABEL: v_test_i16_x_sub_64:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
-; GFX10-NEXT: v_lshlrev_b32_e32 v0, 1, v0
-; GFX10-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10-NEXT: global_load_ushort v1, v0, s[2:3]
-; GFX10-NEXT: s_waitcnt vmcnt(0)
-; GFX10-NEXT: v_sub_nc_u16 v1, v1, 64
-; GFX10-NEXT: global_store_short v0, v1, s[0:1]
-; GFX10-NEXT: s_endpgm
+; GFX9-GISEL-LABEL: v_test_i16_x_sub_64:
+; GFX9-GISEL: ; %bb.0:
+; GFX9-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v0, 1, v0
+; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-GISEL-NEXT: global_load_ushort v1, v0, s[2:3]
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GISEL-NEXT: v_add_u16_e32 v1, 0xffc0, v1
+; GFX9-GISEL-NEXT: global_store_short v0, v1, s[0:1]
+; GFX9-GISEL-NEXT: s_endpgm
;
-; GFX11-LABEL: v_test_i16_x_sub_64:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
-; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT: v_lshlrev_b32_e32 v0, 1, v0
-; GFX11-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-NEXT: global_load_u16 v1, v0, s[2:3]
-; GFX11-NEXT: s_waitcnt vmcnt(0)
-; GFX11-NEXT: v_sub_nc_u16 v1, v1, 64
-; GFX11-NEXT: global_store_b16 v0, v1, s[0:1]
-; GFX11-NEXT: s_endpgm
+; GFX10-SDAG-LABEL: v_test_i16_x_sub_64:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX10-SDAG-NEXT: v_lshlrev_b32_e32 v0, 1, v0
+; GFX10-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-SDAG-NEXT: global_load_ushort v1, v0, s[2:3]
+; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-SDAG-NEXT: v_sub_nc_u16 v1, v1, 64
+; GFX10-SDAG-NEXT: global_store_short v0, v1, s[0:1]
+; GFX10-SDAG-NEXT: s_endpgm
+;
+; GFX10-GISEL-LABEL: v_test_i16_x_sub_64:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX10-GISEL-NEXT: v_lshlrev_b32_e32 v0, 1, v0
+; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-GISEL-NEXT: global_load_ushort v1, v0, s[2:3]
+; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-GISEL-NEXT: v_add_nc_u16 v1, v1, 0xffc0
+; GFX10-GISEL-NEXT: global_store_short v0, v1, s[0:1]
+; GFX10-GISEL-NEXT: s_endpgm
+;
+; GFX11-SDAG-LABEL: v_test_i16_x_sub_64:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SDAG-NEXT: v_lshlrev_b32_e32 v0, 1, v0
+; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-SDAG-NEXT: global_load_u16 v1, v0, s[2:3]
+; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX11-SDAG-NEXT: v_sub_nc_u16 v1, v1, 64
+; GFX11-SDAG-NEXT: global_store_b16 v0, v1, s[0:1]
+; GFX11-SDAG-NEXT: s_endpgm
+;
+; GFX11-GISEL-LABEL: v_test_i16_x_sub_64:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GISEL-NEXT: v_lshlrev_b32_e32 v0, 1, v0
+; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-GISEL-NEXT: global_load_u16 v1, v0, s[2:3]
+; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GISEL-NEXT: v_add_nc_u16 v1, v1, 0xffc0
+; GFX11-GISEL-NEXT: global_store_b16 v0, v1, s[0:1]
+; GFX11-GISEL-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%gep = getelementptr inbounds i16, ptr addrspace(1) %in, i64 %tid.ext
@@ -1379,7 +1397,7 @@ define amdgpu_kernel void @v_test_i16_x_sub_64_zext_to_i32(ptr addrspace(1) %out
; SI-GISEL-NEXT: buffer_load_ushort v3, v[1:2], s[4:7], 0 addr64
; SI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 2, v0
; SI-GISEL-NEXT: s_waitcnt vmcnt(0)
-; SI-GISEL-NEXT: v_subrev_i32_e32 v0, vcc, 64, v3
+; SI-GISEL-NEXT: v_add_i32_e32 v0, vcc, 0xffffffc0, v3
; SI-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
; SI-GISEL-NEXT: s_mov_b64 s[2:3], s[6:7]
; SI-GISEL-NEXT: buffer_store_dword v0, v[1:2], s[0:3], 0 addr64
@@ -1419,50 +1437,91 @@ define amdgpu_kernel void @v_test_i16_x_sub_64_zext_to_i32(ptr addrspace(1) %out
; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v3
; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
; VI-GISEL-NEXT: s_waitcnt vmcnt(0)
-; VI-GISEL-NEXT: v_subrev_u16_e32 v2, 64, v2
+; VI-GISEL-NEXT: v_add_u16_e32 v2, 0xffc0, v2
; VI-GISEL-NEXT: flat_store_dword v[0:1], v2
; VI-GISEL-NEXT: s_endpgm
;
-; GFX9-LABEL: v_test_i16_x_sub_64_zext_to_i32:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
-; GFX9-NEXT: v_lshlrev_b32_e32 v1, 1, v0
-; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX9-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-NEXT: global_load_ushort v1, v1, s[2:3]
-; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: v_subrev_u16_e32 v1, 64, v1
-; GFX9-NEXT: global_store_dword v0, v1, s[0:1]
-; GFX9-NEXT: s_endpgm
+; GFX9-SDAG-LABEL: v_test_i16_x_sub_64_zext_to_i32:
+; GFX9-SDAG: ; %bb.0:
+; GFX9-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX9-SDAG-NEXT: v_lshlrev_b32_e32 v1, 1, v0
+; GFX9-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-SDAG-NEXT: global_load_ushort v1, v1, s[2:3]
+; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-SDAG-NEXT: v_subrev_u16_e32 v1, 64, v1
+; GFX9-SDAG-NEXT: global_store_dword v0, v1, s[0:1]
+; GFX9-SDAG-NEXT: s_endpgm
;
-; GFX10-LABEL: v_test_i16_x_sub_64_zext_to_i32:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
-; GFX10-NEXT: v_lshlrev_b32_e32 v1, 1, v0
-; GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX10-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10-NEXT: global_load_ushort v1, v1, s[2:3]
-; GFX10-NEXT: s_waitcnt vmcnt(0)
-; GFX10-NEXT: v_sub_nc_u16 v1, v1, 64
-; GFX10-NEXT: v_and_b32_e32 v1, 0xffff, v1
-; GFX10-NEXT: global_store_dword v0, v1, s[0:1]
-; GFX10-NEXT: s_endpgm
+; GFX9-GISEL-LABEL: v_test_i16_x_sub_64_zext_to_i32:
+; GFX9-GISEL: ; %bb.0:
+; GFX9-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v1, 1, v0
+; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-GISEL-NEXT: global_load_ushort v1, v1, s[2:3]
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GISEL-NEXT: v_add_u16_e32 v1, 0xffc0, v1
+; GFX9-GISEL-NEXT: global_store_dword v0, v1, s[0:1]
+; GFX9-GISEL-NEXT: s_endpgm
;
-; GFX11-LABEL: v_test_i16_x_sub_64_zext_to_i32:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
-; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT: v_lshlrev_b32_e32 v1, 1, v0
-; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX11-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-NEXT: global_load_u16 v1, v1, s[2:3]
-; GFX11-NEXT: s_waitcnt vmcnt(0)
-; GFX11-NEXT: v_sub_nc_u16 v1, v1, 64
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT: v_and_b32_e32 v1, 0xffff, v1
-; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX11-NEXT: s_endpgm
+; GFX10-SDAG-LABEL: v_test_i16_x_sub_64_zext_to_i32:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX10-SDAG-NEXT: v_lshlrev_b32_e32 v1, 1, v0
+; GFX10-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX10-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-SDAG-NEXT: global_load_ushort v1, v1, s[2:3]
+; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-SDAG-NEXT: v_sub_nc_u16 v1, v1, 64
+; GFX10-SDAG-NEXT: v_and_b32_e32 v1, 0xffff, v1
+; GFX10-SDAG-NEXT: global_store_dword v0, v1, s[0:1]
+; GFX10-SDAG-NEXT: s_endpgm
+;
+; GFX10-GISEL-LABEL: v_test_i16_x_sub_64_zext_to_i32:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX10-GISEL-NEXT: v_lshlrev_b32_e32 v1, 1, v0
+; GFX10-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-GISEL-NEXT: global_load_ushort v1, v1, s[2:3]
+; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-GISEL-NEXT: v_add_nc_u16 v1, v1, 0xffc0
+; GFX10-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
+; GFX10-GISEL-NEXT: global_store_dword v0, v1, s[0:1]
+; GFX10-GISEL-NEXT: s_endpgm
+;
+; GFX11-SDAG-LABEL: v_test_i16_x_sub_64_zext_to_i32:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SDAG-NEXT: v_lshlrev_b32_e32 v1, 1, v0
+; GFX11-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-SDAG-NEXT: global_load_u16 v1, v1, s[2:3]
+; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX11-SDAG-NEXT: v_sub_nc_u16 v1, v1, 64
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SDAG-NEXT: v_and_b32_e32 v1, 0xffff, v1
+; GFX11-SDAG-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-SDAG-NEXT: s_endpgm
+;
+; GFX11-GISEL-LABEL: v_test_i16_x_sub_64_zext_to_i32:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GISEL-NEXT: v_lshlrev_b32_e32 v1, 1, v0
+; GFX11-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-GISEL-NEXT: global_load_u16 v1, v1, s[2:3]
+; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GISEL-NEXT: v_add_nc_u16 v1, v1, 0xffc0
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
+; GFX11-GISEL-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-GISEL-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%gep = getelementptr inbounds i16, ptr addrspace(1) %in, i64 %tid.ext
@@ -1511,8 +1570,8 @@ define amdgpu_kernel void @v_test_i16_x_sub_64_multi_use(ptr addrspace(1) %out,
; SI-GISEL-NEXT: buffer_load_ushort v3, v[0:1], s[4:7], 0 addr64 glc
; SI-GISEL-NEXT: s_waitcnt vmcnt(0)
; SI-GISEL-NEXT: s_mov_b64 s[2:3], s[6:7]
-; SI-GISEL-NEXT: v_subrev_i32_e32 v2, vcc, 64, v2
-; SI-GISEL-NEXT: v_subrev_i32_e32 v3, vcc, 64, v3
+; SI-GISEL-NEXT: v_add_i32_e32 v2, vcc, 0xffffffc0, v2
+; SI-GISEL-NEXT: v_add_i32_e32 v3, vcc, 0xffffffc0, v3
; SI-GISEL-NEXT: buffer_store_short v2, v[0:1], s[0:3], 0 addr64
; SI-GISEL-NEXT: s_waitcnt vmcnt(0)
; SI-GISEL-NEXT: buffer_store_short v3, v[0:1], s[0:3], 0 addr64
@@ -1559,66 +1618,119 @@ define amdgpu_kernel void @v_test_i16_x_sub_64_multi_use(ptr addrspace(1) %out,
; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1
; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v2
; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
-; VI-GISEL-NEXT: v_subrev_u16_e32 v2, 64, v3
-; VI-GISEL-NEXT: v_subrev_u16_e32 v3, 64, v4
+; VI-GISEL-NEXT: v_add_u16_e32 v2, 0xffc0, v3
+; VI-GISEL-NEXT: v_add_u16_e32 v3, 0xffc0, v4
; VI-GISEL-NEXT: flat_store_short v[0:1], v2
; VI-GISEL-NEXT: s_waitcnt vmcnt(0)
; VI-GISEL-NEXT: flat_store_short v[0:1], v3
; VI-GISEL-NEXT: s_waitcnt vmcnt(0)
; VI-GISEL-NEXT: s_endpgm
;
-; GFX9-LABEL: v_test_i16_x_sub_64_multi_use:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
-; GFX9-NEXT: v_lshlrev_b32_e32 v0, 1, v0
-; GFX9-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-NEXT: global_load_ushort v1, v0, s[2:3] glc
-; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: global_load_ushort v2, v0, s[2:3] glc
-; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: v_subrev_u16_e32 v1, 64, v1
-; GFX9-NEXT: v_subrev_u16_e32 v2, 64, v2
-; GFX9-NEXT: global_store_short v0, v1, s[0:1]
-; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: global_store_short v0, v2, s[0:1]
-; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: s_endpgm
+; GFX9-SDAG-LABEL: v_test_i16_x_sub_64_multi_use:
+; GFX9-SDAG: ; %bb.0:
+; GFX9-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX9-SDAG-NEXT: v_lshlrev_b32_e32 v0, 1, v0
+; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-SDAG-NEXT: global_load_ushort v1, v0, s[2:3] glc
+; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-SDAG-NEXT: global_load_ushort v2, v0, s[2:3] glc
+; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-SDAG-NEXT: v_subrev_u16_e32 v1, 64, v1
+; GFX9-SDAG-NEXT: v_subrev_u16_e32 v2, 64, v2
+; GFX9-SDAG-NEXT: global_store_short v0, v1, s[0:1]
+; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-SDAG-NEXT: global_store_short v0, v2, s[0:1]
+; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-SDAG-NEXT: s_endpgm
;
-; GFX10-LABEL: v_test_i16_x_sub_64_multi_use:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
-; GFX10-NEXT: v_lshlrev_b32_e32 v0, 1, v0
-; GFX10-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10-NEXT: global_load_ushort v1, v0, s[2:3] glc dlc
-; GFX10-NEXT: s_waitcnt vmcnt(0)
-; GFX10-NEXT: global_load_ushort v2, v0, s[2:3] glc dlc
-; GFX10-NEXT: s_waitcnt vmcnt(0)
-; GFX10-NEXT: v_sub_nc_u16 v1, v1, 64
-; GFX10-NEXT: v_sub_nc_u16 v2, v2, 64
-; GFX10-NEXT: global_store_short v0, v1, s[0:1]
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT: global_store_short v0, v2, s[0:1]
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT: s_endpgm
+; GFX9-GISEL-LABEL: v_test_i16_x_sub_64_multi_use:
+; GFX9-GISEL: ; %bb.0:
+; GFX9-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v0, 1, v0
+; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-GISEL-NEXT: global_load_ushort v1, v0, s[2:3] glc
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GISEL-NEXT: global_load_ushort v2, v0, s[2:3] glc
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GISEL-NEXT: v_add_u16_e32 v1, 0xffc0, v1
+; GFX9-GISEL-NEXT: v_add_u16_e32 v2, 0xffc0, v2
+; GFX9-GISEL-NEXT: global_store_short v0, v1, s[0:1]
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GISEL-NEXT: global_store_short v0, v2, s[0:1]
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GISEL-NEXT: s_endpgm
;
-; GFX11-LABEL: v_test_i16_x_sub_64_multi_use:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
-; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT: v_lshlrev_b32_e32 v0, 1, v0
-; GFX11-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-NEXT: global_load_u16 v1, v0, s[2:3] glc dlc
-; GFX11-NEXT: s_waitcnt vmcnt(0)
-; GFX11-NEXT: global_load_u16 v2, v0, s[2:3] glc dlc
-; GFX11-NEXT: s_waitcnt vmcnt(0)
-; GFX11-NEXT: v_sub_nc_u16 v1, v1, 64
-; GFX11-NEXT: v_sub_nc_u16 v2, v2, 64
-; GFX11-NEXT: global_store_b16 v0, v1, s[0:1] dlc
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX11-NEXT: global_store_b16 v0, v2, s[0:1] dlc
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX11-NEXT: s_endpgm
+; GFX10-SDAG-LABEL: v_test_i16_x_sub_64_multi_use:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX10-SDAG-NEXT: v_lshlrev_b32_e32 v0, 1, v0
+; GFX10-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-SDAG-NEXT: global_load_ushort v1, v0, s[2:3] glc dlc
+; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-SDAG-NEXT: global_load_ushort v2, v0, s[2:3] glc dlc
+; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-SDAG-NEXT: v_sub_nc_u16 v1, v1, 64
+; GFX10-SDAG-NEXT: v_sub_nc_u16 v2, v2, 64
+; GFX10-SDAG-NEXT: global_store_short v0, v1, s[0:1]
+; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX10-SDAG-NEXT: global_store_short v0, v2, s[0:1]
+; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX10-SDAG-NEXT: s_endpgm
+;
+; GFX10-GISEL-LABEL: v_test_i16_x_sub_64_multi_use:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX10-GISEL-NEXT: v_lshlrev_b32_e32 v0, 1, v0
+; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-GISEL-NEXT: global_load_ushort v1, v0, s[2:3] glc dlc
+; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-GISEL-NEXT: global_load_ushort v2, v0, s[2:3] glc dlc
+; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-GISEL-NEXT: v_add_nc_u16 v1, v1, 0xffc0
+; GFX10-GISEL-NEXT: v_add_nc_u16 v2, v2, 0xffc0
+; GFX10-GISEL-NEXT: global_store_short v0, v1, s[0:1]
+; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX10-GISEL-NEXT: global_store_short v0, v2, s[0:1]
+; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX10-GISEL-NEXT: s_endpgm
+;
+; GFX11-SDAG-LABEL: v_test_i16_x_sub_64_multi_use:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SDAG-NEXT: v_lshlrev_b32_e32 v0, 1, v0
+; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-SDAG-NEXT: global_load_u16 v1, v0, s[2:3] glc dlc
+; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX11-SDAG-NEXT: global_load_u16 v2, v0, s[2:3] glc dlc
+; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX11-SDAG-NEXT: v_sub_nc_u16 v1, v1, 64
+; GFX11-SDAG-NEXT: v_sub_nc_u16 v2, v2, 64
+; GFX11-SDAG-NEXT: global_store_b16 v0, v1, s[0:1] dlc
+; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX11-SDAG-NEXT: global_store_b16 v0, v2, s[0:1] dlc
+; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX11-SDAG-NEXT: s_endpgm
+;
+; GFX11-GISEL-LABEL: v_test_i16_x_sub_64_multi_use:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GISEL-NEXT: v_lshlrev_b32_e32 v0, 1, v0
+; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-GISEL-NEXT: global_load_u16 v1, v0, s[2:3] glc dlc
+; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GISEL-NEXT: global_load_u16 v2, v0, s[2:3] glc dlc
+; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GISEL-NEXT: v_add_nc_u16 v1, v1, 0xffc0
+; GFX11-GISEL-NEXT: v_add_nc_u16 v2, v2, 0xffc0
+; GFX11-GISEL-NEXT: global_store_b16 v0, v1, s[0:1] dlc
+; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX11-GISEL-NEXT: global_store_b16 v0, v2, s[0:1] dlc
+; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX11-GISEL-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%gep = getelementptr inbounds i16, ptr addrspace(1) %in, i64 %tid.ext
@@ -1664,8 +1776,8 @@ define amdgpu_kernel void @v_test_v2i16_x_sub_64_64(ptr addrspace(1) %out, ptr a
; SI-GISEL-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
; SI-GISEL-NEXT: s_waitcnt vmcnt(0)
; SI-GISEL-NEXT: v_lshrrev_b32_e32 v3, 16, v2
-; SI-GISEL-NEXT: v_subrev_i32_e32 v2, vcc, 64, v2
-; SI-GISEL-NEXT: v_subrev_i32_e32 v3, vcc, 64, v3
+; SI-GISEL-NEXT: v_add_i32_e32 v2, vcc, 0xffffffc0, v2
+; SI-GISEL-NEXT: v_add_i32_e32 v3, vcc, 0xffffffc0, v3
; SI-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
; SI-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v3
; SI-GISEL-NEXT: v_lshlrev_b32_e32 v3, 16, v3
@@ -1698,7 +1810,7 @@ define amdgpu_kernel void @v_test_v2i16_x_sub_64_64(ptr addrspace(1) %out, ptr a
; VI-GISEL: ; %bb.0:
; VI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
; VI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 2, v0
-; VI-GISEL-NEXT: v_mov_b32_e32 v4, 64
+; VI-GISEL-NEXT: v_not_b32_e32 v4, 63
; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2
; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3
@@ -1710,8 +1822,8 @@ define amdgpu_kernel void @v_test_v2i16_x_sub_64_64(ptr addrspace(1) %out, ptr a
; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v2
; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
; VI-GISEL-NEXT: s_waitcnt vmcnt(0)
-; VI-GISEL-NEXT: v_subrev_u16_e32 v2, 64, v3
-; VI-GISEL-NEXT: v_sub_u16_sdwa v3, v3, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; VI-GISEL-NEXT: v_add_u16_e32 v2, 0xffc0, v3
+; VI-GISEL-NEXT: v_add_u16_sdwa v3, v3, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; VI-GISEL-NEXT: v_or_b32_e32 v2, v2, v3
; VI-GISEL-NEXT: flat_store_dword v[0:1], v2
; VI-GISEL-NEXT: s_endpgm
@@ -1792,8 +1904,8 @@ define amdgpu_kernel void @v_test_v2i16_x_sub_7_64(ptr addrspace(1) %out, ptr ad
; SI-GISEL-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
; SI-GISEL-NEXT: s_waitcnt vmcnt(0)
; SI-GISEL-NEXT: v_lshrrev_b32_e32 v3, 16, v2
-; SI-GISEL-NEXT: v_subrev_i32_e32 v2, vcc, 7, v2
-; SI-GISEL-NEXT: v_subrev_i32_e32 v3, vcc, 64, v3
+; SI-GISEL-NEXT: v_add_i32_e32 v2, vcc, -7, v2
+; SI-GISEL-NEXT: v_add_i32_e32 v3, vcc, 0xffffffc0, v3
; SI-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
; SI-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v3
; SI-GISEL-NEXT: v_lshlrev_b32_e32 v3, 16, v3
@@ -1826,7 +1938,7 @@ define amdgpu_kernel void @v_test_v2i16_x_sub_7_64(ptr addrspace(1) %out, ptr ad
; VI-GISEL: ; %bb.0:
; VI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
; VI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 2, v0
-; VI-GISEL-NEXT: v_mov_b32_e32 v4, 64
+; VI-GISEL-NEXT: v_not_b32_e32 v4, 63
; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2
; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3
@@ -1838,8 +1950,8 @@ define amdgpu_kernel void @v_test_v2i16_x_sub_7_64(ptr addrspace(1) %out, ptr ad
; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v2
; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
; VI-GISEL-NEXT: s_waitcnt vmcnt(0)
-; VI-GISEL-NEXT: v_subrev_u16_e32 v2, 7, v3
-; VI-GISEL-NEXT: v_sub_u16_sdwa v3, v3, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; VI-GISEL-NEXT: v_add_u16_e32 v2, -7, v3
+; VI-GISEL-NEXT: v_add_u16_sdwa v3, v3, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; VI-GISEL-NEXT: v_or_b32_e32 v2, v2, v3
; VI-GISEL-NEXT: flat_store_dword v[0:1], v2
; VI-GISEL-NEXT: s_endpgm
@@ -1933,8 +2045,8 @@ define amdgpu_kernel void @v_test_v2i16_x_sub_64_123(ptr addrspace(1) %out, ptr
; SI-GISEL-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
; SI-GISEL-NEXT: s_waitcnt vmcnt(0)
; SI-GISEL-NEXT: v_lshrrev_b32_e32 v3, 16, v2
-; SI-GISEL-NEXT: v_subrev_i32_e32 v2, vcc, 64, v2
-; SI-GISEL-NEXT: v_subrev_i32_e32 v3, vcc, 0x7b, v3
+; SI-GISEL-NEXT: v_add_i32_e32 v2, vcc, 0xffffffc0, v2
+; SI-GISEL-NEXT: v_add_i32_e32 v3, vcc, 0xffffff85, v3
; SI-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
; SI-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v3
; SI-GISEL-NEXT: v_lshlrev_b32_e32 v3, 16, v3
@@ -1967,7 +2079,7 @@ define amdgpu_kernel void @v_test_v2i16_x_sub_64_123(ptr addrspace(1) %out, ptr
; VI-GISEL: ; %bb.0:
; VI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
; VI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 2, v0
-; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0x7b
+; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0xffffff85
; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2
; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3
@@ -1979,8 +2091,8 @@ define amdgpu_kernel void @v_test_v2i16_x_sub_64_123(ptr addrspace(1) %out, ptr
; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v2
; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
; VI-GISEL-NEXT: s_waitcnt vmcnt(0)
-; VI-GISEL-NEXT: v_subrev_u16_e32 v2, 64, v3
-; VI-GISEL-NEXT: v_sub_u16_sdwa v3, v3, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; VI-GISEL-NEXT: v_add_u16_e32 v2, 0xffc0, v3
+; VI-GISEL-NEXT: v_add_u16_sdwa v3, v3, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; VI-GISEL-NEXT: v_or_b32_e32 v2, v2, v3
; VI-GISEL-NEXT: flat_store_dword v[0:1], v2
; VI-GISEL-NEXT: s_endpgm
@@ -2074,7 +2186,7 @@ define amdgpu_kernel void @v_test_v2i16_x_sub_7_0(ptr addrspace(1) %out, ptr add
; SI-GISEL-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
; SI-GISEL-NEXT: s_waitcnt vmcnt(0)
; SI-GISEL-NEXT: v_lshrrev_b32_e32 v3, 16, v2
-; SI-GISEL-NEXT: v_subrev_i32_e32 v2, vcc, 7, v2
+; SI-GISEL-NEXT: v_add_i32_e32 v2, vcc, -7, v2
; SI-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
; SI-GISEL-NEXT: v_lshlrev_b32_e32 v3, 16, v3
; SI-GISEL-NEXT: v_or_b32_e32 v2, v2, v3
@@ -2117,7 +2229,7 @@ define amdgpu_kernel void @v_test_v2i16_x_sub_7_0(ptr addrspace(1) %out, ptr add
; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
; VI-GISEL-NEXT: s_waitcnt vmcnt(0)
; VI-GISEL-NEXT: v_lshrrev_b32_e32 v2, 16, v3
-; VI-GISEL-NEXT: v_subrev_u16_e32 v3, 7, v3
+; VI-GISEL-NEXT: v_add_u16_e32 v3, -7, v3
; VI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 16, v2
; VI-GISEL-NEXT: v_or_b32_e32 v2, v3, v2
; VI-GISEL-NEXT: flat_store_dword v[0:1], v2
@@ -2197,7 +2309,7 @@ define amdgpu_kernel void @v_test_v2i16_x_sub_0_16(ptr addrspace(1) %out, ptr ad
; SI-GISEL-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
; SI-GISEL-NEXT: s_waitcnt vmcnt(0)
; SI-GISEL-NEXT: v_lshrrev_b32_e32 v3, 16, v2
-; SI-GISEL-NEXT: v_subrev_i32_e32 v3, vcc, 16, v3
+; SI-GISEL-NEXT: v_add_i32_e32 v3, vcc, -16, v3
; SI-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
; SI-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v3
; SI-GISEL-NEXT: v_lshlrev_b32_e32 v3, 16, v3
@@ -2237,11 +2349,11 @@ define amdgpu_kernel void @v_test_v2i16_x_sub_0_16(ptr addrspace(1) %out, ptr ad
; VI-GISEL-NEXT: flat_load_dword v3, v[0:1]
; VI-GISEL-NEXT: v_mov_b32_e32 v0, s0
; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v2
-; VI-GISEL-NEXT: v_mov_b32_e32 v2, 16
+; VI-GISEL-NEXT: v_mov_b32_e32 v2, -16
; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1
; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
; VI-GISEL-NEXT: s_waitcnt vmcnt(0)
-; VI-GISEL-NEXT: v_sub_u16_sdwa v2, v3, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; VI-GISEL-NEXT: v_add_u16_sdwa v2, v3, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; VI-GISEL-NEXT: v_or_b32_sdwa v2, v3, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; VI-GISEL-NEXT: flat_store_dword v[0:1], v2
; VI-GISEL-NEXT: s_endpgm
@@ -2319,7 +2431,7 @@ define amdgpu_kernel void @v_test_v2i16_x_sub_0_1_0(ptr addrspace(1) %out, ptr a
; SI-GISEL-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
; SI-GISEL-NEXT: s_waitcnt vmcnt(0)
; SI-GISEL-NEXT: v_lshrrev_b32_e32 v3, 16, v2
-; SI-GISEL-NEXT: v_subrev_i32_e32 v3, vcc, 0xffffc400, v3
+; SI-GISEL-NEXT: v_add_i32_e32 v3, vcc, 0x3c00, v3
; SI-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
; SI-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v3
; SI-GISEL-NEXT: v_lshlrev_b32_e32 v3, 16, v3
@@ -2359,11 +2471,11 @@ define amdgpu_kernel void @v_test_v2i16_x_sub_0_1_0(ptr addrspace(1) %out, ptr a
; VI-GISEL-NEXT: flat_load_dword v3, v[0:1]
; VI-GISEL-NEXT: v_mov_b32_e32 v0, s0
; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v2
-; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xffffc400
+; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x3c00
; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1
; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
; VI-GISEL-NEXT: s_waitcnt vmcnt(0)
-; VI-GISEL-NEXT: v_sub_u16_sdwa v2, v3, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; VI-GISEL-NEXT: v_add_u16_sdwa v2, v3, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; VI-GISEL-NEXT: v_or_b32_sdwa v2, v3, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; VI-GISEL-NEXT: flat_store_dword v[0:1], v2
; VI-GISEL-NEXT: s_endpgm
@@ -2454,7 +2566,7 @@ define amdgpu_kernel void @v_test_v2i16_x_sub_0_neg1_0(ptr addrspace(1) %out, pt
; SI-GISEL-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
; SI-GISEL-NEXT: s_waitcnt vmcnt(0)
; SI-GISEL-NEXT: v_lshrrev_b32_e32 v3, 16, v2
-; SI-GISEL-NEXT: v_subrev_i32_e32 v3, vcc, 0x4400, v3
+; SI-GISEL-NEXT: v_add_i32_e32 v3, vcc, 0xffffbc00, v3
; SI-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
; SI-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v3
; SI-GISEL-NEXT: v_lshlrev_b32_e32 v3, 16, v3
@@ -2494,11 +2606,11 @@ define amdgpu_kernel void @v_test_v2i16_x_sub_0_neg1_0(ptr addrspace(1) %out, pt
; VI-GISEL-NEXT: flat_load_dword v3, v[0:1]
; VI-GISEL-NEXT: v_mov_b32_e32 v0, s0
; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v2
-; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4400
+; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xffffbc00
; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1
; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
; VI-GISEL-NEXT: s_waitcnt vmcnt(0)
-; VI-GISEL-NEXT: v_sub_u16_sdwa v2, v3, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; VI-GISEL-NEXT: v_add_u16_sdwa v2, v3, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; VI-GISEL-NEXT: v_or_b32_sdwa v2, v3, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; VI-GISEL-NEXT: flat_store_dword v[0:1], v2
; VI-GISEL-NEXT: s_endpgm
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/alu-roundtrip.ll b/llvm/test/CodeGen/RISCV/GlobalISel/alu-roundtrip.ll
index c503d6541b0a57..14ff9e01ab3bc2 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/alu-roundtrip.ll
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/alu-roundtrip.ll
@@ -457,19 +457,19 @@ entry:
define i64 @subi_i64(i64 %a) {
; RV32IM-LABEL: subi_i64:
; RV32IM: # %bb.0: # %entry
-; RV32IM-NEXT: lui a2, 301
-; RV32IM-NEXT: addi a3, a2, 1548
-; RV32IM-NEXT: sub a2, a0, a3
-; RV32IM-NEXT: sltu a0, a0, a3
-; RV32IM-NEXT: sub a1, a1, a0
-; RV32IM-NEXT: mv a0, a2
+; RV32IM-NEXT: lui a2, 1048275
+; RV32IM-NEXT: addi a2, a2, -1548
+; RV32IM-NEXT: add a0, a0, a2
+; RV32IM-NEXT: sltu a2, a0, a2
+; RV32IM-NEXT: addi a1, a1, -1
+; RV32IM-NEXT: add a1, a1, a2
; RV32IM-NEXT: ret
;
; RV64IM-LABEL: subi_i64:
; RV64IM: # %bb.0: # %entry
-; RV64IM-NEXT: lui a1, 301
-; RV64IM-NEXT: addiw a1, a1, 1548
-; RV64IM-NEXT: sub a0, a0, a1
+; RV64IM-NEXT: lui a1, 1048275
+; RV64IM-NEXT: addiw a1, a1, -1548
+; RV64IM-NEXT: add a0, a0, a1
; RV64IM-NEXT: ret
entry:
%0 = sub i64 %a, 1234444
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/alu-rv32.mir b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/alu-rv32.mir
index 2ef5de501ee711..39d0ee7c382dfc 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/alu-rv32.mir
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/alu-rv32.mir
@@ -200,8 +200,9 @@ body: |
; RV32I: liveins: $x10
; RV32I-NEXT: {{ $}}
; RV32I-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
- ; RV32I-NEXT: [[ADDI:%[0-9]+]]:gpr = ADDI [[COPY]], 1234
- ; RV32I-NEXT: $x10 = COPY [[ADDI]]
+ ; RV32I-NEXT: [[ADDI:%[0-9]+]]:gpr = ADDI $x0, -1234
+ ; RV32I-NEXT: [[SUB:%[0-9]+]]:gpr = SUB [[COPY]], [[ADDI]]
+ ; RV32I-NEXT: $x10 = COPY [[SUB]]
; RV32I-NEXT: PseudoRET implicit $x10
%0:gprb(s32) = COPY $x10
%1:gprb(s32) = G_CONSTANT i32 -1234
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/alu-rv64.mir b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/alu-rv64.mir
index be12333e1499b2..527036d8b750fc 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/alu-rv64.mir
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/alu-rv64.mir
@@ -188,8 +188,9 @@ body: |
; RV64I: liveins: $x10
; RV64I-NEXT: {{ $}}
; RV64I-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
- ; RV64I-NEXT: [[ADDIW:%[0-9]+]]:gpr = ADDIW [[COPY]], 1234
- ; RV64I-NEXT: $x10 = COPY [[ADDIW]]
+ ; RV64I-NEXT: [[ADDI:%[0-9]+]]:gpr = ADDI $x0, -1234
+ ; RV64I-NEXT: [[SUBW:%[0-9]+]]:gpr = SUBW [[COPY]], [[ADDI]]
+ ; RV64I-NEXT: $x10 = COPY [[SUBW]]
; RV64I-NEXT: PseudoRET implicit $x10
%0:gprb(s64) = COPY $x10
%1:gprb(s32) = G_TRUNC %0(s64)
@@ -440,8 +441,9 @@ body: |
; RV64I: liveins: $x10
; RV64I-NEXT: {{ $}}
; RV64I-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
- ; RV64I-NEXT: [[ADDI:%[0-9]+]]:gpr = ADDI [[COPY]], 1234
- ; RV64I-NEXT: $x10 = COPY [[ADDI]]
+ ; RV64I-NEXT: [[ADDI:%[0-9]+]]:gpr = ADDI $x0, -1234
+ ; RV64I-NEXT: [[SUB:%[0-9]+]]:gpr = SUB [[COPY]], [[ADDI]]
+ ; RV64I-NEXT: $x10 = COPY [[SUB]]
; RV64I-NEXT: PseudoRET implicit $x10
%0:gprb(s64) = COPY $x10
%1:gprb(s64) = G_CONSTANT i64 -1234
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/jump-table-brjt-medium-rv64.mir b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/jump-table-brjt-medium-rv64.mir
index 5d980e7721458e..d0237892d132f3 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/jump-table-brjt-medium-rv64.mir
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/jump-table-brjt-medium-rv64.mir
@@ -111,8 +111,8 @@ body: |
%2:gprb(s64) = G_ASSERT_SEXT %1, 32
%7:gprb(s64) = G_CONSTANT i64 5
%3:gprb(s64) = G_SEXT_INREG %2, 32
- %4:gprb(s64) = G_CONSTANT i64 1
- %5:gprb(s64) = G_SUB %3, %4
+ %4:gprb(s64) = G_CONSTANT i64 -1
+ %5:gprb(s64) = G_ADD %3, %4
%26:gprb(s64) = G_ICMP intpred(ugt), %5(s64), %7
G_BRCOND %26(s64), %bb.8
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/jump-table-brjt-pic-rv32.mir b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/jump-table-brjt-pic-rv32.mir
index 27fe465ccf696b..396421a4ba739a 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/jump-table-brjt-pic-rv32.mir
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/jump-table-brjt-pic-rv32.mir
@@ -115,8 +115,8 @@ body: |
%12:gprb(s32) = G_CONSTANT i32 3
%13:gprb(s32) = G_CONSTANT i32 4
%14:gprb(s32) = G_CONSTANT i32 1000
- %1:gprb(s32) = G_CONSTANT i32 1
- %2:gprb(s32) = G_SUB %0, %1
+ %1:gprb(s32) = G_CONSTANT i32 -1
+ %2:gprb(s32) = G_ADD %0, %1
%16:gprb(s32) = G_ICMP intpred(ugt), %2(s32), %4
G_BRCOND %16(s32), %bb.8
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/jump-table-brjt-pic-rv64.mir b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/jump-table-brjt-pic-rv64.mir
index 77156b913c5e8b..0a08586bc1af4f 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/jump-table-brjt-pic-rv64.mir
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/jump-table-brjt-pic-rv64.mir
@@ -112,8 +112,8 @@ body: |
%2:gprb(s64) = G_ASSERT_SEXT %1, 32
%7:gprb(s64) = G_CONSTANT i64 5
%3:gprb(s64) = G_SEXT_INREG %2, 32
- %4:gprb(s64) = G_CONSTANT i64 1
- %5:gprb(s64) = G_SUB %3, %4
+ %4:gprb(s64) = G_CONSTANT i64 -1
+ %5:gprb(s64) = G_ADD %3, %4
%26:gprb(s64) = G_ICMP intpred(ugt), %5(s64), %7
G_BRCOND %26(s64), %bb.8
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/jump-table-brjt-rv32.mir b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/jump-table-brjt-rv32.mir
index 388c238b86eb6f..efa1a6c86027db 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/jump-table-brjt-rv32.mir
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/jump-table-brjt-rv32.mir
@@ -171,8 +171,8 @@ body: |
%12:gprb(s32) = G_CONSTANT i32 3
%13:gprb(s32) = G_CONSTANT i32 4
%14:gprb(s32) = G_CONSTANT i32 1000
- %1:gprb(s32) = G_CONSTANT i32 1
- %2:gprb(s32) = G_SUB %0, %1
+ %1:gprb(s32) = G_CONSTANT i32 -1
+ %2:gprb(s32) = G_ADD %0, %1
%16:gprb(s32) = G_ICMP intpred(ugt), %2(s32), %4
G_BRCOND %16(s32), %bb.8
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/jump-table-brjt-small-rv64.mir b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/jump-table-brjt-small-rv64.mir
index 09a855105c2627..12b1517e2cfb54 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/jump-table-brjt-small-rv64.mir
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/jump-table-brjt-small-rv64.mir
@@ -112,8 +112,8 @@ body: |
%2:gprb(s64) = G_ASSERT_SEXT %1, 32
%7:gprb(s64) = G_CONSTANT i64 5
%3:gprb(s64) = G_SEXT_INREG %2, 32
- %4:gprb(s64) = G_CONSTANT i64 1
- %5:gprb(s64) = G_SUB %3, %4
+ %4:gprb(s64) = G_CONSTANT i64 -1
+ %5:gprb(s64) = G_ADD %3, %4
%26:gprb(s64) = G_ICMP intpred(ugt), %5(s64), %7
G_BRCOND %26(s64), %bb.8
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/rv32zbb-zbkb.ll b/llvm/test/CodeGen/RISCV/GlobalISel/rv32zbb-zbkb.ll
index 1b323fe35b8e38..b9d1ebc54db747 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/rv32zbb-zbkb.ll
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/rv32zbb-zbkb.ll
@@ -143,7 +143,7 @@ define i64 @rol_i64(i64 %a, i64 %b) nounwind {
; CHECK-NEXT: bltu a6, a4, .LBB7_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: li a3, 0
-; CHECK-NEXT: addi a5, a6, -32
+; CHECK-NEXT: sub a5, a6, a4
; CHECK-NEXT: sll a7, a0, a5
; CHECK-NEXT: j .LBB7_3
; CHECK-NEXT: .LBB7_2:
@@ -162,7 +162,7 @@ define i64 @rol_i64(i64 %a, i64 %b) nounwind {
; CHECK-NEXT: andi a6, a5, 63
; CHECK-NEXT: bltu a6, a4, .LBB7_7
; CHECK-NEXT: # %bb.6:
-; CHECK-NEXT: addi a7, a6, -32
+; CHECK-NEXT: sub a7, a6, a4
; CHECK-NEXT: srl a7, a1, a7
; CHECK-NEXT: bnez a6, .LBB7_8
; CHECK-NEXT: j .LBB7_9
@@ -220,7 +220,7 @@ define i64 @ror_i64(i64 %a, i64 %b) nounwind {
; CHECK-NEXT: li a4, 32
; CHECK-NEXT: bltu a5, a4, .LBB9_2
; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: addi a3, a5, -32
+; CHECK-NEXT: sub a3, a5, a4
; CHECK-NEXT: srl a6, a1, a3
; CHECK-NEXT: mv a3, a0
; CHECK-NEXT: bnez a5, .LBB9_3
@@ -235,33 +235,33 @@ define i64 @ror_i64(i64 %a, i64 %b) nounwind {
; CHECK-NEXT: .LBB9_3:
; CHECK-NEXT: mv a3, a6
; CHECK-NEXT: .LBB9_4:
-; CHECK-NEXT: neg a6, a2
+; CHECK-NEXT: neg a7, a2
; CHECK-NEXT: bltu a5, a4, .LBB9_7
; CHECK-NEXT: # %bb.5:
; CHECK-NEXT: li a2, 0
-; CHECK-NEXT: andi a5, a6, 63
+; CHECK-NEXT: andi a5, a7, 63
; CHECK-NEXT: bgeu a5, a4, .LBB9_8
; CHECK-NEXT: .LBB9_6:
-; CHECK-NEXT: sll a4, a0, a6
-; CHECK-NEXT: neg a7, a5
-; CHECK-NEXT: srl a0, a0, a7
-; CHECK-NEXT: sll a6, a1, a6
-; CHECK-NEXT: or a0, a0, a6
+; CHECK-NEXT: sll a6, a0, a7
+; CHECK-NEXT: neg a4, a5
+; CHECK-NEXT: srl a0, a0, a4
+; CHECK-NEXT: sll a4, a1, a7
+; CHECK-NEXT: or a0, a0, a4
; CHECK-NEXT: bnez a5, .LBB9_9
; CHECK-NEXT: j .LBB9_10
; CHECK-NEXT: .LBB9_7:
; CHECK-NEXT: srl a2, a1, a2
-; CHECK-NEXT: andi a5, a6, 63
+; CHECK-NEXT: andi a5, a7, 63
; CHECK-NEXT: bltu a5, a4, .LBB9_6
; CHECK-NEXT: .LBB9_8:
-; CHECK-NEXT: li a4, 0
-; CHECK-NEXT: addi a6, a5, -32
-; CHECK-NEXT: sll a0, a0, a6
+; CHECK-NEXT: li a6, 0
+; CHECK-NEXT: sub a4, a5, a4
+; CHECK-NEXT: sll a0, a0, a4
; CHECK-NEXT: beqz a5, .LBB9_10
; CHECK-NEXT: .LBB9_9:
; CHECK-NEXT: mv a1, a0
; CHECK-NEXT: .LBB9_10:
-; CHECK-NEXT: or a0, a3, a4
+; CHECK-NEXT: or a0, a3, a6
; CHECK-NEXT: or a1, a2, a1
; CHECK-NEXT: ret
%or = tail call i64 @llvm.fshr.i64(i64 %a, i64 %a, i64 %b)
More information about the llvm-commits
mailing list