[llvm] cff2199 - Revert "[GISel][AArch64][AMDGPU][RISCV] Canonicalize (sub X, C) -> (add X, -C) (#114309)"
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Wed Nov 6 10:45:57 PST 2024
Author: Craig Topper
Date: 2024-11-06T10:45:23-08:00
New Revision: cff2199e0f0e54177997ecf9571ba874231cefe4
URL: https://github.com/llvm/llvm-project/commit/cff2199e0f0e54177997ecf9571ba874231cefe4
DIFF: https://github.com/llvm/llvm-project/commit/cff2199e0f0e54177997ecf9571ba874231cefe4.diff
LOG: Revert "[GISel][AArch64][AMDGPU][RISCV] Canonicalize (sub X, C) -> (add X, -C) (#114309)"
This reverts commit 999dfb2067eb75609b735944af876279025ac171.
I received a report that his may have increased fallbacks on AArch64.
Added:
Modified:
llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
llvm/include/llvm/Target/GlobalISel/Combine.td
llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
llvm/lib/Target/RISCV/RISCVGISel.td
llvm/test/CodeGen/AArch64/GlobalISel/combine-integer.mir
llvm/test/CodeGen/AArch64/GlobalISel/combine-narrow-binop.mir
llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-extending-loads-cornercases.mir
llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-trivial-arith.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/ashr.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/cvt_f32_ubyte.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f32.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/fshl.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/fshr.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/lshr.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i32.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/shl.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i32.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/sub.v2i16.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i32.ll
llvm/test/CodeGen/AMDGPU/ctlz.ll
llvm/test/CodeGen/AMDGPU/div_i128.ll
llvm/test/CodeGen/AMDGPU/div_v2i128.ll
llvm/test/CodeGen/AMDGPU/fptoi.i128.ll
llvm/test/CodeGen/AMDGPU/itofp.i128.ll
llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.ll
llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll
llvm/test/CodeGen/AMDGPU/shrink-add-sub-constant.ll
llvm/test/CodeGen/RISCV/GlobalISel/alu-roundtrip.ll
llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/alu-rv32.mir
llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/alu-rv64.mir
llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/jump-table-brjt-medium-rv64.mir
llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/jump-table-brjt-pic-rv32.mir
llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/jump-table-brjt-pic-rv64.mir
llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/jump-table-brjt-rv32.mir
llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/jump-table-brjt-small-rv64.mir
llvm/test/CodeGen/RISCV/GlobalISel/rv32zbb-zbkb.ll
Removed:
################################################################################
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
index b09981eaef506e3..9240a3c3127eb43 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
@@ -321,9 +321,6 @@ class CombinerHelper {
bool matchCombineMulToShl(MachineInstr &MI, unsigned &ShiftVal);
void applyCombineMulToShl(MachineInstr &MI, unsigned &ShiftVal);
- // Transform a G_SUB with constant on the RHS to G_ADD.
- bool matchCombineSubToAdd(MachineInstr &MI, BuildFnTy &MatchInfo);
-
// Transform a G_SHL with an extended source into a narrower shift if
// possible.
bool matchCombineShlOfExtend(MachineInstr &MI, RegisterImmPair &MatchData);
diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index 80a22c35ebceffd..ead4149fc110681 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -335,14 +335,6 @@ def mul_to_shl : GICombineRule<
[{ return Helper.matchCombineMulToShl(*${mi}, ${matchinfo}); }]),
(apply [{ Helper.applyCombineMulToShl(*${mi}, ${matchinfo}); }])>;
-// (sub x, C) -> (add x, -C)
-def sub_to_add : GICombineRule<
- (defs root:$d, build_fn_matchinfo:$matchinfo),
- (match (G_CONSTANT $c, $imm),
- (G_SUB $d, $op1, $c):$mi,
- [{ return Helper.matchCombineSubToAdd(*${mi}, ${matchinfo}); }]),
- (apply [{ Helper.applyBuildFnNoErase(*${mi}, ${matchinfo}); }])>;
-
// shl ([asz]ext x), y => zext (shl x, y), if shift does not overflow int
def reduce_shl_of_extend_matchdata : GIDefMatchData<"RegisterImmPair">;
def reduce_shl_of_extend : GICombineRule<
@@ -1911,9 +1903,8 @@ def bitreverse_shift : GICombineGroup<[bitreverse_shl, bitreverse_lshr]>;
def select_combines : GICombineGroup<[select_undef_cmp, select_constant_cmp,
select_to_iminmax, match_selects]>;
-def trivial_combines : GICombineGroup<[copy_prop, mul_to_shl, sub_to_add,
- add_p2i_to_ptradd, mul_by_neg_one,
- idempotent_prop]>;
+def trivial_combines : GICombineGroup<[copy_prop, mul_to_shl, add_p2i_to_ptradd,
+ mul_by_neg_one, idempotent_prop]>;
def fma_combines : GICombineGroup<[combine_fadd_fmul_to_fmad_or_fma,
combine_fadd_fpext_fmul_to_fmad_or_fma, combine_fadd_fma_fmul_to_fmad_or_fma,
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index ede8d82fc1a35e2..1f2baa3fa9c0f81 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -2044,31 +2044,6 @@ void CombinerHelper::applyCombineMulToShl(MachineInstr &MI,
Observer.changedInstr(MI);
}
-bool CombinerHelper::matchCombineSubToAdd(MachineInstr &MI,
- BuildFnTy &MatchInfo) {
- GSub &Sub = cast<GSub>(MI);
-
- LLT Ty = MRI.getType(Sub.getReg(0));
-
- if (!isLegalOrBeforeLegalizer({TargetOpcode::G_ADD, {Ty}}))
- return false;
-
- if (!isConstantLegalOrBeforeLegalizer(Ty))
- return false;
-
- APInt Imm = getIConstantFromReg(Sub.getRHSReg(), MRI);
-
- MatchInfo = [=, &MI](MachineIRBuilder &B) {
- auto NegCst = B.buildConstant(Ty, -Imm);
- Observer.changingInstr(MI);
- MI.setDesc(B.getTII().get(TargetOpcode::G_ADD));
- MI.getOperand(2).setReg(NegCst.getReg(0));
- MI.clearFlag(MachineInstr::MIFlag::NoUWrap);
- Observer.changedInstr(MI);
- };
- return true;
-}
-
// shl ([sza]ext x), y => zext (shl x, y), if shift does not overflow source
bool CombinerHelper::matchCombineShlOfExtend(MachineInstr &MI,
RegisterImmPair &MatchData) {
diff --git a/llvm/lib/Target/RISCV/RISCVGISel.td b/llvm/lib/Target/RISCV/RISCVGISel.td
index 9f30875a11e79ec..4ba6dd05579cd29 100644
--- a/llvm/lib/Target/RISCV/RISCVGISel.td
+++ b/llvm/lib/Target/RISCV/RISCVGISel.td
@@ -101,6 +101,15 @@ def gi_zexti32 : GIComplexOperandMatcher<s64, "selectZExtBits<32>">,
def gi_zexti16 : GIComplexOperandMatcher<s32, "selectZExtBits<16>">,
GIComplexPatternEquiv<zexti16>;
+// FIXME: Canonicalize (sub X, C) -> (add X, -C) earlier.
+def : Pat<(XLenVT (sub GPR:$rs1, simm12Plus1:$imm)),
+ (ADDI GPR:$rs1, (NegImm simm12Plus1:$imm))>;
+
+let Predicates = [IsRV64] in {
+def : Pat<(i32 (sub GPR:$rs1, simm12Plus1i32:$imm)),
+ (ADDIW GPR:$rs1, (i64 (NegImm $imm)))>;
+}
+
// Ptr type used in patterns with GlobalISelEmitter
def PtrVT : PtrValueTypeByHwMode<XLenVT, 0>;
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-integer.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-integer.mir
index 5cbff0f0c74cb7b..2f10a497fa74cb0 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-integer.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-integer.mir
@@ -308,8 +308,8 @@ body: |
; CHECK: liveins: $w0, $w1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %a:_(s64) = COPY $x0
- ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -71
- ; CHECK-NEXT: %sub:_(s64) = G_ADD %a, [[C]]
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 71
+ ; CHECK-NEXT: %sub:_(s64) = G_SUB %a, [[C]]
; CHECK-NEXT: $x0 = COPY %sub(s64)
; CHECK-NEXT: RET_ReallyLR implicit $x0
%a:_(s64) = COPY $x0
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-narrow-binop.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-narrow-binop.mir
index e9d4af7da5d06f1..f207e9c149a4763 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-narrow-binop.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-narrow-binop.mir
@@ -88,8 +88,8 @@ body: |
; CHECK-LABEL: name: test_combine_trunc_sub_i128
; CHECK: %lhs:_(s128) = COPY $q0
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC %lhs(s128)
- ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -5
- ; CHECK-NEXT: %small:_(s32) = G_ADD [[TRUNC]], [[C]]
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
+ ; CHECK-NEXT: %small:_(s32) = G_SUB [[TRUNC]], [[C]]
; CHECK-NEXT: $w0 = COPY %small(s32)
%lhs:_(s128) = COPY $q0
%rhs:_(s128) = G_CONSTANT i128 5
@@ -103,8 +103,8 @@ body: |
bb.1:
; CHECK-LABEL: name: test_combine_trunc_sub_i128_multi_use
; CHECK: %lhs:_(s128) = COPY $q0
- ; CHECK-NEXT: [[C:%[0-9]+]]:_(s128) = G_CONSTANT i128 -5
- ; CHECK-NEXT: %res:_(s128) = G_ADD %lhs, [[C]]
+ ; CHECK-NEXT: %rhs:_(s128) = G_CONSTANT i128 5
+ ; CHECK-NEXT: %res:_(s128) = G_SUB %lhs, %rhs
; CHECK-NEXT: %small:_(s32) = G_TRUNC %res(s128)
; CHECK-NEXT: $q0 = COPY %res(s128)
; CHECK-NEXT: $w0 = COPY %small(s32)
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-extending-loads-cornercases.mir b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-extending-loads-cornercases.mir
index 591b6a17928cb1f..04968dab3a37ceb 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-extending-loads-cornercases.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-extending-loads-cornercases.mir
@@ -95,7 +95,7 @@ body: |
%11:_(s8) = G_CONSTANT i8 1
; CHECK: [[T3:%[0-9]+]]:_(s8) = G_TRUNC [[T0]](s32)
%7:_(s8) = G_SUB %2, %11
- ; CHECK: [[T4:%[0-9]+]]:_(s8) = G_ADD [[T3]], {{.*}}
+ ; CHECK: [[T4:%[0-9]+]]:_(s8) = G_SUB [[T3]], {{.*}}
G_BR %bb.3.exit
bb.3.exit:
; CHECK: bb.3.exit:
@@ -197,7 +197,7 @@ body: |
%7:_(s8) = G_CONSTANT i8 1
; CHECK: [[T3:%[0-9]+]]:_(s8) = G_TRUNC [[T0]](s32)
%8:_(s8) = G_SUB %2, %7
- ; CHECK: [[T4:%[0-9]+]]:_(s8) = G_ADD [[T3]], {{.*}}
+ ; CHECK: [[T4:%[0-9]+]]:_(s8) = G_SUB [[T3]], {{.*}}
G_BR %bb.3.exit
bb.3.exit:
; CHECK: bb.3.exit:
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-trivial-arith.mir b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-trivial-arith.mir
index 4c3faa940390973..0900dd4267a2e4e 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-trivial-arith.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-trivial-arith.mir
@@ -289,8 +289,8 @@ body: |
; CHECK: liveins: $w0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %x:_(s32) = COPY $w0
- ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
- ; CHECK-NEXT: %op:_(s32) = G_ADD %x, [[C]]
+ ; CHECK-NEXT: %cst:_(s32) = G_CONSTANT i32 1
+ ; CHECK-NEXT: %op:_(s32) = G_SUB %x, %cst
; CHECK-NEXT: $w0 = COPY %op(s32)
; CHECK-NEXT: RET_ReallyLR implicit $w0
%x:_(s32) = COPY $w0
@@ -488,66 +488,3 @@ body: |
RET_ReallyLR implicit $w0
...
----
-name: sub_to_add
-tracksRegLiveness: true
-body: |
- bb.1.entry:
- liveins: $w0
- ; CHECK-LABEL: name: sub_to_add
- ; CHECK: liveins: $w0
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: %x:_(s32) = COPY $w0
- ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
- ; CHECK-NEXT: %op:_(s32) = G_ADD %x, [[C]]
- ; CHECK-NEXT: $w0 = COPY %op(s32)
- ; CHECK-NEXT: RET_ReallyLR implicit $w0
- %x:_(s32) = COPY $w0
- %cst:_(s32) = G_CONSTANT i32 1
- %op:_(s32) = G_SUB %x(s32), %cst
- $w0 = COPY %op(s32)
- RET_ReallyLR implicit $w0
-
-...
----
-name: sub_to_add_nuw
-tracksRegLiveness: true
-body: |
- bb.1.entry:
- liveins: $w0
- ; CHECK-LABEL: name: sub_to_add_nuw
- ; CHECK: liveins: $w0
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: %x:_(s32) = COPY $w0
- ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
- ; CHECK-NEXT: %op:_(s32) = G_ADD %x, [[C]]
- ; CHECK-NEXT: $w0 = COPY %op(s32)
- ; CHECK-NEXT: RET_ReallyLR implicit $w0
- %x:_(s32) = COPY $w0
- %cst:_(s32) = G_CONSTANT i32 1
- %op:_(s32) = nuw G_SUB %x(s32), %cst
- $w0 = COPY %op(s32)
- RET_ReallyLR implicit $w0
-
-...
----
-name: sub_to_add_nsw
-tracksRegLiveness: true
-body: |
- bb.1.entry:
- liveins: $w0
- ; CHECK-LABEL: name: sub_to_add_nsw
- ; CHECK: liveins: $w0
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: %x:_(s32) = COPY $w0
- ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
- ; CHECK-NEXT: %op:_(s32) = nsw G_ADD %x, [[C]]
- ; CHECK-NEXT: $w0 = COPY %op(s32)
- ; CHECK-NEXT: RET_ReallyLR implicit $w0
- %x:_(s32) = COPY $w0
- %cst:_(s32) = G_CONSTANT i32 1
- %op:_(s32) = nsw G_SUB %x(s32), %cst
- $w0 = COPY %op(s32)
- RET_ReallyLR implicit $w0
-
-...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/ashr.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/ashr.ll
index 493e8cef6389022..63f5464371cc62e 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/ashr.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/ashr.ll
@@ -1669,7 +1669,7 @@ define i65 @v_ashr_i65(i65 %value, i65 %amount) {
; GFX6-NEXT: v_sub_i32_e32 v8, vcc, 64, v3
; GFX6-NEXT: v_lshr_b64 v[6:7], v[0:1], v3
; GFX6-NEXT: v_lshl_b64 v[8:9], v[4:5], v8
-; GFX6-NEXT: v_add_i32_e32 v2, vcc, 0xffffffc0, v3
+; GFX6-NEXT: v_subrev_i32_e32 v2, vcc, 64, v3
; GFX6-NEXT: v_ashr_i64 v[10:11], v[4:5], v3
; GFX6-NEXT: v_or_b32_e32 v6, v6, v8
; GFX6-NEXT: v_ashrrev_i32_e32 v8, 31, v5
@@ -1692,7 +1692,7 @@ define i65 @v_ashr_i65(i65 %value, i65 %amount) {
; GFX8-NEXT: v_sub_u32_e32 v8, vcc, 64, v3
; GFX8-NEXT: v_lshrrev_b64 v[6:7], v3, v[0:1]
; GFX8-NEXT: v_lshlrev_b64 v[8:9], v8, v[4:5]
-; GFX8-NEXT: v_add_u32_e32 v2, vcc, 0xffffffc0, v3
+; GFX8-NEXT: v_subrev_u32_e32 v2, vcc, 64, v3
; GFX8-NEXT: v_ashrrev_i64 v[10:11], v3, v[4:5]
; GFX8-NEXT: v_or_b32_e32 v6, v6, v8
; GFX8-NEXT: v_ashrrev_i32_e32 v8, 31, v5
@@ -1715,7 +1715,7 @@ define i65 @v_ashr_i65(i65 %value, i65 %amount) {
; GFX9-NEXT: v_sub_u32_e32 v8, 64, v3
; GFX9-NEXT: v_lshrrev_b64 v[6:7], v3, v[0:1]
; GFX9-NEXT: v_lshlrev_b64 v[8:9], v8, v[4:5]
-; GFX9-NEXT: v_add_u32_e32 v2, 0xffffffc0, v3
+; GFX9-NEXT: v_subrev_u32_e32 v2, 64, v3
; GFX9-NEXT: v_ashrrev_i64 v[10:11], v3, v[4:5]
; GFX9-NEXT: v_or_b32_e32 v6, v6, v8
; GFX9-NEXT: v_ashrrev_i32_e32 v8, 31, v5
@@ -1735,7 +1735,7 @@ define i65 @v_ashr_i65(i65 %value, i65 %amount) {
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: v_bfe_i32 v4, v2, 0, 1
; GFX10-NEXT: v_sub_nc_u32_e32 v2, 64, v3
-; GFX10-NEXT: v_add_nc_u32_e32 v10, 0xffffffc0, v3
+; GFX10-NEXT: v_subrev_nc_u32_e32 v10, 64, v3
; GFX10-NEXT: v_lshrrev_b64 v[6:7], v3, v[0:1]
; GFX10-NEXT: v_cmp_gt_u32_e32 vcc_lo, 64, v3
; GFX10-NEXT: v_ashrrev_i32_e32 v5, 31, v4
@@ -1758,7 +1758,7 @@ define i65 @v_ashr_i65(i65 %value, i65 %amount) {
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_bfe_i32 v4, v2, 0, 1
; GFX11-NEXT: v_sub_nc_u32_e32 v2, 64, v3
-; GFX11-NEXT: v_add_nc_u32_e32 v10, 0xffffffc0, v3
+; GFX11-NEXT: v_subrev_nc_u32_e32 v10, 64, v3
; GFX11-NEXT: v_lshrrev_b64 v[6:7], v3, v[0:1]
; GFX11-NEXT: v_cmp_gt_u32_e32 vcc_lo, 64, v3
; GFX11-NEXT: v_ashrrev_i32_e32 v5, 31, v4
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/cvt_f32_ubyte.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/cvt_f32_ubyte.ll
index 46d6b86789c778d..405b1e8f3a250f8 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/cvt_f32_ubyte.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/cvt_f32_ubyte.ll
@@ -1438,7 +1438,7 @@ define float @v_test_sitofp_i64_byte_to_f32(i64 %arg0) {
; SI-NEXT: v_ashrrev_i32_e32 v2, 31, v0
; SI-NEXT: v_ffbh_i32_e32 v3, 0
; SI-NEXT: v_add_i32_e32 v2, vcc, 32, v2
-; SI-NEXT: v_add_i32_e32 v3, vcc, -1, v3
+; SI-NEXT: v_subrev_i32_e32 v3, vcc, 1, v3
; SI-NEXT: v_mov_b32_e32 v1, 0
; SI-NEXT: v_min_u32_e32 v2, v3, v2
; SI-NEXT: v_lshl_b64 v[0:1], v[0:1], v2
@@ -1456,7 +1456,7 @@ define float @v_test_sitofp_i64_byte_to_f32(i64 %arg0) {
; VI-NEXT: v_ashrrev_i32_e32 v2, 31, v0
; VI-NEXT: v_ffbh_i32_e32 v3, 0
; VI-NEXT: v_add_u32_e32 v2, vcc, 32, v2
-; VI-NEXT: v_add_u32_e32 v3, vcc, -1, v3
+; VI-NEXT: v_subrev_u32_e32 v3, vcc, 1, v3
; VI-NEXT: v_mov_b32_e32 v1, 0
; VI-NEXT: v_min_u32_e32 v2, v3, v2
; VI-NEXT: v_lshlrev_b64 v[0:1], v2, v[0:1]
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f32.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f32.ll
index 6e55d7fdb5e9575..146f344930a4ee7 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f32.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f32.ll
@@ -4101,7 +4101,7 @@ define float @v_fdiv_f32_constrhs0_dynamic_25ulp(float %x) #0 {
; GFX10-NEXT: v_rcp_f32_e32 v1, 0x3f40e400
; GFX10-NEXT: v_frexp_mant_f32_e32 v2, v0
; GFX10-NEXT: v_frexp_exp_i32_f32_e32 v0, v0
-; GFX10-NEXT: v_add_nc_u32_e32 v0, -14, v0
+; GFX10-NEXT: v_subrev_nc_u32_e32 v0, 14, v0
; GFX10-NEXT: v_mul_f32_e32 v1, v2, v1
; GFX10-NEXT: v_ldexp_f32 v0, v1, v0
; GFX10-NEXT: s_setpc_b64 s[30:31]
@@ -4112,9 +4112,10 @@ define float @v_fdiv_f32_constrhs0_dynamic_25ulp(float %x) #0 {
; GFX11-NEXT: v_rcp_f32_e32 v1, 0x3f40e400
; GFX11-NEXT: v_frexp_mant_f32_e32 v2, v0
; GFX11-NEXT: v_frexp_exp_i32_f32_e32 v0, v0
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1)
+; GFX11-NEXT: v_subrev_nc_u32_e32 v0, 14, v0
; GFX11-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-NEXT: v_dual_mul_f32 v1, v2, v1 :: v_dual_add_nc_u32 v0, -14, v0
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT: v_mul_f32_e32 v1, v2, v1
; GFX11-NEXT: v_ldexp_f32 v0, v1, v0
; GFX11-NEXT: s_setpc_b64 s[30:31]
; EG-LABEL: v_fdiv_f32_constrhs0_dynamic_25ulp:
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fshl.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fshl.ll
index 5d76b542fad894f..3bd3486ec261d4c 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fshl.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fshl.ll
@@ -20,10 +20,10 @@ define amdgpu_ps i7 @s_fshl_i7(i7 inreg %lhs, i7 inreg %rhs, i7 inreg %amt) {
; GFX6-NEXT: v_mul_hi_u32 v0, s2, v0
; GFX6-NEXT: v_mul_lo_u32 v0, v0, 7
; GFX6-NEXT: v_sub_i32_e32 v0, vcc, s2, v0
-; GFX6-NEXT: v_add_i32_e32 v1, vcc, -7, v0
+; GFX6-NEXT: v_subrev_i32_e32 v1, vcc, 7, v0
; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 7, v0
; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
-; GFX6-NEXT: v_add_i32_e32 v1, vcc, -7, v0
+; GFX6-NEXT: v_subrev_i32_e32 v1, vcc, 7, v0
; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 7, v0
; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
; GFX6-NEXT: v_sub_i32_e32 v1, vcc, 6, v0
@@ -51,10 +51,10 @@ define amdgpu_ps i7 @s_fshl_i7(i7 inreg %lhs, i7 inreg %rhs, i7 inreg %amt) {
; GFX8-NEXT: v_mul_hi_u32 v0, s2, v0
; GFX8-NEXT: v_mul_lo_u32 v0, v0, 7
; GFX8-NEXT: v_sub_u32_e32 v0, vcc, s2, v0
-; GFX8-NEXT: v_add_u32_e32 v1, vcc, -7, v0
+; GFX8-NEXT: v_subrev_u32_e32 v1, vcc, 7, v0
; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 7, v0
; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
-; GFX8-NEXT: v_add_u32_e32 v1, vcc, -7, v0
+; GFX8-NEXT: v_subrev_u32_e32 v1, vcc, 7, v0
; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 7, v0
; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
; GFX8-NEXT: v_sub_u16_e32 v1, 6, v0
@@ -82,10 +82,10 @@ define amdgpu_ps i7 @s_fshl_i7(i7 inreg %lhs, i7 inreg %rhs, i7 inreg %amt) {
; GFX9-NEXT: v_mul_hi_u32 v0, s2, v0
; GFX9-NEXT: v_mul_lo_u32 v0, v0, 7
; GFX9-NEXT: v_sub_u32_e32 v0, s2, v0
-; GFX9-NEXT: v_add_u32_e32 v1, -7, v0
+; GFX9-NEXT: v_subrev_u32_e32 v1, 7, v0
; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 7, v0
; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
-; GFX9-NEXT: v_add_u32_e32 v1, -7, v0
+; GFX9-NEXT: v_subrev_u32_e32 v1, 7, v0
; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 7, v0
; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
; GFX9-NEXT: v_sub_u16_e32 v1, 6, v0
@@ -113,10 +113,10 @@ define amdgpu_ps i7 @s_fshl_i7(i7 inreg %lhs, i7 inreg %rhs, i7 inreg %amt) {
; GFX10-NEXT: v_mul_hi_u32 v0, s2, v0
; GFX10-NEXT: v_mul_lo_u32 v0, v0, 7
; GFX10-NEXT: v_sub_nc_u32_e32 v0, s2, v0
-; GFX10-NEXT: v_add_nc_u32_e32 v1, -7, v0
+; GFX10-NEXT: v_subrev_nc_u32_e32 v1, 7, v0
; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 7, v0
; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
-; GFX10-NEXT: v_add_nc_u32_e32 v1, -7, v0
+; GFX10-NEXT: v_subrev_nc_u32_e32 v1, 7, v0
; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 7, v0
; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
; GFX10-NEXT: v_sub_nc_u16 v1, 6, v0
@@ -150,11 +150,11 @@ define amdgpu_ps i7 @s_fshl_i7(i7 inreg %lhs, i7 inreg %rhs, i7 inreg %amt) {
; GFX11-NEXT: v_mul_lo_u32 v0, v0, 7
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_sub_nc_u32_e32 v0, s2, v0
-; GFX11-NEXT: v_add_nc_u32_e32 v1, -7, v0
+; GFX11-NEXT: v_subrev_nc_u32_e32 v1, 7, v0
; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 7, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
-; GFX11-NEXT: v_add_nc_u32_e32 v1, -7, v0
+; GFX11-NEXT: v_subrev_nc_u32_e32 v1, 7, v0
; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 7, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
@@ -189,10 +189,10 @@ define i7 @v_fshl_i7(i7 %lhs, i7 %rhs, i7 %amt) {
; GFX6-NEXT: v_mul_hi_u32 v3, v2, v3
; GFX6-NEXT: v_mul_lo_u32 v3, v3, 7
; GFX6-NEXT: v_sub_i32_e32 v2, vcc, v2, v3
-; GFX6-NEXT: v_add_i32_e32 v3, vcc, -7, v2
+; GFX6-NEXT: v_subrev_i32_e32 v3, vcc, 7, v2
; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 7, v2
; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
-; GFX6-NEXT: v_add_i32_e32 v3, vcc, -7, v2
+; GFX6-NEXT: v_subrev_i32_e32 v3, vcc, 7, v2
; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 7, v2
; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
; GFX6-NEXT: v_sub_i32_e32 v3, vcc, 6, v2
@@ -219,10 +219,10 @@ define i7 @v_fshl_i7(i7 %lhs, i7 %rhs, i7 %amt) {
; GFX8-NEXT: v_mul_hi_u32 v3, v2, v3
; GFX8-NEXT: v_mul_lo_u32 v3, v3, 7
; GFX8-NEXT: v_sub_u32_e32 v2, vcc, v2, v3
-; GFX8-NEXT: v_add_u32_e32 v3, vcc, -7, v2
+; GFX8-NEXT: v_subrev_u32_e32 v3, vcc, 7, v2
; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 7, v2
; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
-; GFX8-NEXT: v_add_u32_e32 v3, vcc, -7, v2
+; GFX8-NEXT: v_subrev_u32_e32 v3, vcc, 7, v2
; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 7, v2
; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
; GFX8-NEXT: v_sub_u16_e32 v3, 6, v2
@@ -249,10 +249,10 @@ define i7 @v_fshl_i7(i7 %lhs, i7 %rhs, i7 %amt) {
; GFX9-NEXT: v_mul_hi_u32 v3, v2, v3
; GFX9-NEXT: v_mul_lo_u32 v3, v3, 7
; GFX9-NEXT: v_sub_u32_e32 v2, v2, v3
-; GFX9-NEXT: v_add_u32_e32 v3, -7, v2
+; GFX9-NEXT: v_subrev_u32_e32 v3, 7, v2
; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 7, v2
; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
-; GFX9-NEXT: v_add_u32_e32 v3, -7, v2
+; GFX9-NEXT: v_subrev_u32_e32 v3, 7, v2
; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 7, v2
; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
; GFX9-NEXT: v_sub_u16_e32 v3, 6, v2
@@ -279,10 +279,10 @@ define i7 @v_fshl_i7(i7 %lhs, i7 %rhs, i7 %amt) {
; GFX10-NEXT: v_mul_hi_u32 v3, v2, v3
; GFX10-NEXT: v_mul_lo_u32 v3, v3, 7
; GFX10-NEXT: v_sub_nc_u32_e32 v2, v2, v3
-; GFX10-NEXT: v_add_nc_u32_e32 v3, -7, v2
+; GFX10-NEXT: v_subrev_nc_u32_e32 v3, 7, v2
; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 7, v2
; GFX10-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc_lo
-; GFX10-NEXT: v_add_nc_u32_e32 v3, -7, v2
+; GFX10-NEXT: v_subrev_nc_u32_e32 v3, 7, v2
; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 7, v2
; GFX10-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc_lo
; GFX10-NEXT: v_sub_nc_u16 v3, 6, v2
@@ -315,11 +315,11 @@ define i7 @v_fshl_i7(i7 %lhs, i7 %rhs, i7 %amt) {
; GFX11-NEXT: v_mul_lo_u32 v3, v3, 7
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_sub_nc_u32_e32 v2, v2, v3
-; GFX11-NEXT: v_add_nc_u32_e32 v3, -7, v2
+; GFX11-NEXT: v_subrev_nc_u32_e32 v3, 7, v2
; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 7, v2
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc_lo
-; GFX11-NEXT: v_add_nc_u32_e32 v3, -7, v2
+; GFX11-NEXT: v_subrev_nc_u32_e32 v3, 7, v2
; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 7, v2
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc_lo
@@ -1550,16 +1550,16 @@ define amdgpu_ps i24 @s_fshl_i24(i24 inreg %lhs, i24 inreg %rhs, i24 inreg %amt)
; GFX6-NEXT: s_bfe_u32 s1, s1, 0x170001
; GFX6-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0
; GFX6-NEXT: v_cvt_u32_f32_e32 v0, v0
-; GFX6-NEXT: v_mul_lo_u32 v2, v0, v1
-; GFX6-NEXT: v_mul_hi_u32 v2, v0, v2
-; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v2
+; GFX6-NEXT: v_mul_lo_u32 v1, v0, v1
+; GFX6-NEXT: v_mul_hi_u32 v1, v0, v1
+; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v1
; GFX6-NEXT: v_mul_hi_u32 v0, s2, v0
; GFX6-NEXT: v_mul_lo_u32 v0, v0, 24
; GFX6-NEXT: v_sub_i32_e32 v0, vcc, s2, v0
-; GFX6-NEXT: v_add_i32_e32 v2, vcc, v0, v1
+; GFX6-NEXT: v_subrev_i32_e32 v1, vcc, 24, v0
; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v0
-; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
-; GFX6-NEXT: v_add_i32_e32 v1, vcc, v0, v1
+; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX6-NEXT: v_subrev_i32_e32 v1, vcc, 24, v0
; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v0
; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
; GFX6-NEXT: v_sub_i32_e32 v1, vcc, 23, v0
@@ -1580,16 +1580,16 @@ define amdgpu_ps i24 @s_fshl_i24(i24 inreg %lhs, i24 inreg %rhs, i24 inreg %amt)
; GFX8-NEXT: s_bfe_u32 s1, s1, 0x170001
; GFX8-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0
; GFX8-NEXT: v_cvt_u32_f32_e32 v0, v0
-; GFX8-NEXT: v_mul_lo_u32 v2, v0, v1
-; GFX8-NEXT: v_mul_hi_u32 v2, v0, v2
-; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2
+; GFX8-NEXT: v_mul_lo_u32 v1, v0, v1
+; GFX8-NEXT: v_mul_hi_u32 v1, v0, v1
+; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v1
; GFX8-NEXT: v_mul_hi_u32 v0, s2, v0
; GFX8-NEXT: v_mul_lo_u32 v0, v0, 24
; GFX8-NEXT: v_sub_u32_e32 v0, vcc, s2, v0
-; GFX8-NEXT: v_add_u32_e32 v2, vcc, v0, v1
+; GFX8-NEXT: v_subrev_u32_e32 v1, vcc, 24, v0
; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v0
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
-; GFX8-NEXT: v_add_u32_e32 v1, vcc, v0, v1
+; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX8-NEXT: v_subrev_u32_e32 v1, vcc, 24, v0
; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v0
; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
; GFX8-NEXT: v_sub_u32_e32 v1, vcc, 23, v0
@@ -1616,10 +1616,10 @@ define amdgpu_ps i24 @s_fshl_i24(i24 inreg %lhs, i24 inreg %rhs, i24 inreg %amt)
; GFX9-NEXT: v_mul_hi_u32 v0, s2, v0
; GFX9-NEXT: v_mul_lo_u32 v0, v0, 24
; GFX9-NEXT: v_sub_u32_e32 v0, s2, v0
-; GFX9-NEXT: v_add_u32_e32 v1, 0xffffffe8, v0
+; GFX9-NEXT: v_subrev_u32_e32 v1, 24, v0
; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v0
; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
-; GFX9-NEXT: v_add_u32_e32 v1, 0xffffffe8, v0
+; GFX9-NEXT: v_subrev_u32_e32 v1, 24, v0
; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v0
; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
; GFX9-NEXT: v_sub_u32_e32 v1, 23, v0
@@ -1644,10 +1644,10 @@ define amdgpu_ps i24 @s_fshl_i24(i24 inreg %lhs, i24 inreg %rhs, i24 inreg %amt)
; GFX10-NEXT: v_mul_hi_u32 v0, s2, v0
; GFX10-NEXT: v_mul_lo_u32 v0, v0, 24
; GFX10-NEXT: v_sub_nc_u32_e32 v0, s2, v0
-; GFX10-NEXT: v_add_nc_u32_e32 v1, 0xffffffe8, v0
+; GFX10-NEXT: v_subrev_nc_u32_e32 v1, 24, v0
; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v0
; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
-; GFX10-NEXT: v_add_nc_u32_e32 v1, 0xffffffe8, v0
+; GFX10-NEXT: v_subrev_nc_u32_e32 v1, 24, v0
; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v0
; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
; GFX10-NEXT: v_sub_nc_u32_e32 v1, 23, v0
@@ -1678,11 +1678,11 @@ define amdgpu_ps i24 @s_fshl_i24(i24 inreg %lhs, i24 inreg %rhs, i24 inreg %amt)
; GFX11-NEXT: v_mul_lo_u32 v0, v0, 24
; GFX11-NEXT: v_sub_nc_u32_e32 v0, s2, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-NEXT: v_add_nc_u32_e32 v1, 0xffffffe8, v0
+; GFX11-NEXT: v_subrev_nc_u32_e32 v1, 24, v0
; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v0
; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-NEXT: v_add_nc_u32_e32 v1, 0xffffffe8, v0
+; GFX11-NEXT: v_subrev_nc_u32_e32 v1, 24, v0
; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v0
; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
@@ -1710,16 +1710,16 @@ define i24 @v_fshl_i24(i24 %lhs, i24 %rhs, i24 %amt) {
; GFX6-NEXT: v_bfe_u32 v1, v1, 1, 23
; GFX6-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3
; GFX6-NEXT: v_cvt_u32_f32_e32 v3, v3
-; GFX6-NEXT: v_mul_lo_u32 v5, v3, v4
-; GFX6-NEXT: v_mul_hi_u32 v5, v3, v5
-; GFX6-NEXT: v_add_i32_e32 v3, vcc, v3, v5
+; GFX6-NEXT: v_mul_lo_u32 v4, v3, v4
+; GFX6-NEXT: v_mul_hi_u32 v4, v3, v4
+; GFX6-NEXT: v_add_i32_e32 v3, vcc, v3, v4
; GFX6-NEXT: v_mul_hi_u32 v3, v2, v3
; GFX6-NEXT: v_mul_lo_u32 v3, v3, 24
; GFX6-NEXT: v_sub_i32_e32 v2, vcc, v2, v3
-; GFX6-NEXT: v_add_i32_e32 v3, vcc, v2, v4
+; GFX6-NEXT: v_subrev_i32_e32 v3, vcc, 24, v2
; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v2
; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
-; GFX6-NEXT: v_add_i32_e32 v3, vcc, 0xffffffe8, v2
+; GFX6-NEXT: v_subrev_i32_e32 v3, vcc, 24, v2
; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v2
; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
; GFX6-NEXT: v_sub_i32_e32 v3, vcc, 23, v2
@@ -1740,16 +1740,16 @@ define i24 @v_fshl_i24(i24 %lhs, i24 %rhs, i24 %amt) {
; GFX8-NEXT: v_bfe_u32 v1, v1, 1, 23
; GFX8-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3
; GFX8-NEXT: v_cvt_u32_f32_e32 v3, v3
-; GFX8-NEXT: v_mul_lo_u32 v5, v3, v4
-; GFX8-NEXT: v_mul_hi_u32 v5, v3, v5
-; GFX8-NEXT: v_add_u32_e32 v3, vcc, v3, v5
+; GFX8-NEXT: v_mul_lo_u32 v4, v3, v4
+; GFX8-NEXT: v_mul_hi_u32 v4, v3, v4
+; GFX8-NEXT: v_add_u32_e32 v3, vcc, v3, v4
; GFX8-NEXT: v_mul_hi_u32 v3, v2, v3
; GFX8-NEXT: v_mul_lo_u32 v3, v3, 24
; GFX8-NEXT: v_sub_u32_e32 v2, vcc, v2, v3
-; GFX8-NEXT: v_add_u32_e32 v3, vcc, v2, v4
+; GFX8-NEXT: v_subrev_u32_e32 v3, vcc, 24, v2
; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v2
; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
-; GFX8-NEXT: v_add_u32_e32 v3, vcc, 0xffffffe8, v2
+; GFX8-NEXT: v_subrev_u32_e32 v3, vcc, 24, v2
; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v2
; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
; GFX8-NEXT: v_sub_u32_e32 v3, vcc, 23, v2
@@ -1776,10 +1776,10 @@ define i24 @v_fshl_i24(i24 %lhs, i24 %rhs, i24 %amt) {
; GFX9-NEXT: v_mul_hi_u32 v3, v2, v3
; GFX9-NEXT: v_mul_lo_u32 v3, v3, 24
; GFX9-NEXT: v_sub_u32_e32 v2, v2, v3
-; GFX9-NEXT: v_add_u32_e32 v3, 0xffffffe8, v2
+; GFX9-NEXT: v_subrev_u32_e32 v3, 24, v2
; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v2
; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
-; GFX9-NEXT: v_add_u32_e32 v3, 0xffffffe8, v2
+; GFX9-NEXT: v_subrev_u32_e32 v3, 24, v2
; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v2
; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
; GFX9-NEXT: v_sub_u32_e32 v3, 23, v2
@@ -1804,10 +1804,10 @@ define i24 @v_fshl_i24(i24 %lhs, i24 %rhs, i24 %amt) {
; GFX10-NEXT: v_mul_hi_u32 v3, v2, v3
; GFX10-NEXT: v_mul_lo_u32 v3, v3, 24
; GFX10-NEXT: v_sub_nc_u32_e32 v2, v2, v3
-; GFX10-NEXT: v_add_nc_u32_e32 v3, 0xffffffe8, v2
+; GFX10-NEXT: v_subrev_nc_u32_e32 v3, 24, v2
; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v2
; GFX10-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc_lo
-; GFX10-NEXT: v_add_nc_u32_e32 v3, 0xffffffe8, v2
+; GFX10-NEXT: v_subrev_nc_u32_e32 v3, 24, v2
; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v2
; GFX10-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc_lo
; GFX10-NEXT: v_sub_nc_u32_e32 v3, 23, v2
@@ -1838,11 +1838,11 @@ define i24 @v_fshl_i24(i24 %lhs, i24 %rhs, i24 %amt) {
; GFX11-NEXT: v_mul_lo_u32 v3, v3, 24
; GFX11-NEXT: v_sub_nc_u32_e32 v2, v2, v3
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-NEXT: v_add_nc_u32_e32 v3, 0xffffffe8, v2
+; GFX11-NEXT: v_subrev_nc_u32_e32 v3, 24, v2
; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v2
; GFX11-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc_lo
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-NEXT: v_add_nc_u32_e32 v3, 0xffffffe8, v2
+; GFX11-NEXT: v_subrev_nc_u32_e32 v3, 24, v2
; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v2
; GFX11-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc_lo
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
@@ -1887,7 +1887,7 @@ define amdgpu_ps i48 @s_fshl_v2i24(i48 inreg %lhs.arg, i48 inreg %rhs.arg, i48 i
; GFX6-NEXT: s_lshr_b32 s0, s2, 16
; GFX6-NEXT: s_lshr_b32 s1, s3, 8
; GFX6-NEXT: s_bfe_u32 s8, s2, 0x80008
-; GFX6-NEXT: v_mul_lo_u32 v4, v2, v3
+; GFX6-NEXT: v_mul_lo_u32 v3, v2, v3
; GFX6-NEXT: s_and_b32 s7, s2, 0xff
; GFX6-NEXT: s_lshl_b32 s8, s8, 8
; GFX6-NEXT: s_and_b32 s0, s0, 0xff
@@ -1906,7 +1906,7 @@ define amdgpu_ps i48 @s_fshl_v2i24(i48 inreg %lhs.arg, i48 inreg %rhs.arg, i48 i
; GFX6-NEXT: v_or_b32_e32 v1, s1, v1
; GFX6-NEXT: s_lshr_b32 s1, s4, 16
; GFX6-NEXT: s_bfe_u32 s7, s4, 0x80008
-; GFX6-NEXT: v_mul_hi_u32 v4, v2, v4
+; GFX6-NEXT: v_mul_hi_u32 v3, v2, v3
; GFX6-NEXT: s_and_b32 s3, s4, 0xff
; GFX6-NEXT: s_lshl_b32 s7, s7, 8
; GFX6-NEXT: s_and_b32 s1, s1, 0xff
@@ -1915,53 +1915,53 @@ define amdgpu_ps i48 @s_fshl_v2i24(i48 inreg %lhs.arg, i48 inreg %rhs.arg, i48 i
; GFX6-NEXT: s_and_b32 s3, 0xffff, s3
; GFX6-NEXT: s_lshl_b32 s1, s1, 16
; GFX6-NEXT: s_or_b32 s1, s3, s1
-; GFX6-NEXT: v_add_i32_e32 v2, vcc, v2, v4
-; GFX6-NEXT: v_mul_hi_u32 v4, s1, v2
+; GFX6-NEXT: v_add_i32_e32 v2, vcc, v2, v3
+; GFX6-NEXT: v_mul_hi_u32 v3, s1, v2
; GFX6-NEXT: s_lshr_b32 s2, s5, 8
; GFX6-NEXT: s_and_b32 s3, s5, 0xff
-; GFX6-NEXT: v_mov_b32_e32 v5, s4
+; GFX6-NEXT: v_mov_b32_e32 v4, s4
; GFX6-NEXT: s_and_b32 s2, s2, 0xff
-; GFX6-NEXT: v_alignbit_b32 v5, s3, v5, 24
+; GFX6-NEXT: v_alignbit_b32 v4, s3, v4, 24
; GFX6-NEXT: s_and_b32 s2, 0xffff, s2
-; GFX6-NEXT: v_and_b32_e32 v5, 0xffff, v5
-; GFX6-NEXT: v_mul_lo_u32 v4, v4, 24
+; GFX6-NEXT: v_and_b32_e32 v4, 0xffff, v4
+; GFX6-NEXT: v_mul_lo_u32 v3, v3, 24
; GFX6-NEXT: s_lshl_b32 s2, s2, 16
-; GFX6-NEXT: v_or_b32_e32 v5, s2, v5
-; GFX6-NEXT: v_mul_hi_u32 v2, v5, v2
-; GFX6-NEXT: v_sub_i32_e32 v4, vcc, s1, v4
-; GFX6-NEXT: v_add_i32_e32 v6, vcc, v4, v3
-; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v4
+; GFX6-NEXT: v_or_b32_e32 v4, s2, v4
+; GFX6-NEXT: v_mul_hi_u32 v2, v4, v2
+; GFX6-NEXT: v_sub_i32_e32 v3, vcc, s1, v3
+; GFX6-NEXT: v_subrev_i32_e32 v5, vcc, 24, v3
+; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v3
; GFX6-NEXT: v_mul_lo_u32 v2, v2, 24
-; GFX6-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc
-; GFX6-NEXT: v_add_i32_e32 v6, vcc, v4, v3
-; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v4
-; GFX6-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc
-; GFX6-NEXT: v_sub_i32_e32 v2, vcc, v5, v2
-; GFX6-NEXT: v_sub_i32_e32 v6, vcc, 23, v4
-; GFX6-NEXT: v_add_i32_e32 v5, vcc, v2, v3
+; GFX6-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc
+; GFX6-NEXT: v_subrev_i32_e32 v5, vcc, 24, v3
+; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v3
+; GFX6-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc
+; GFX6-NEXT: v_sub_i32_e32 v2, vcc, v4, v2
+; GFX6-NEXT: v_sub_i32_e32 v5, vcc, 23, v3
+; GFX6-NEXT: v_subrev_i32_e32 v4, vcc, 24, v2
; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v2
-; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc
-; GFX6-NEXT: v_add_i32_e32 v3, vcc, v2, v3
+; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
+; GFX6-NEXT: v_subrev_i32_e32 v4, vcc, 24, v2
; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v2
-; GFX6-NEXT: v_and_b32_e32 v4, 0xffffff, v4
+; GFX6-NEXT: v_and_b32_e32 v3, 0xffffff, v3
; GFX6-NEXT: s_lshr_b32 s0, s0, 1
-; GFX6-NEXT: v_and_b32_e32 v6, 0xffffff, v6
-; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
-; GFX6-NEXT: v_lshl_b32_e32 v4, s6, v4
-; GFX6-NEXT: v_lshr_b32_e32 v6, s0, v6
-; GFX6-NEXT: v_sub_i32_e32 v3, vcc, 23, v2
+; GFX6-NEXT: v_and_b32_e32 v5, 0xffffff, v5
+; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
+; GFX6-NEXT: v_lshl_b32_e32 v3, s6, v3
+; GFX6-NEXT: v_lshr_b32_e32 v5, s0, v5
+; GFX6-NEXT: v_sub_i32_e32 v4, vcc, 23, v2
; GFX6-NEXT: v_and_b32_e32 v2, 0xffffff, v2
-; GFX6-NEXT: v_or_b32_e32 v4, v4, v6
+; GFX6-NEXT: v_or_b32_e32 v3, v3, v5
; GFX6-NEXT: v_lshlrev_b32_e32 v0, v2, v0
; GFX6-NEXT: v_lshrrev_b32_e32 v1, 1, v1
-; GFX6-NEXT: v_and_b32_e32 v2, 0xffffff, v3
+; GFX6-NEXT: v_and_b32_e32 v2, 0xffffff, v4
; GFX6-NEXT: v_lshrrev_b32_e32 v1, v2, v1
-; GFX6-NEXT: v_bfe_u32 v2, v4, 8, 8
+; GFX6-NEXT: v_bfe_u32 v2, v3, 8, 8
; GFX6-NEXT: v_or_b32_e32 v0, v0, v1
-; GFX6-NEXT: v_and_b32_e32 v1, 0xff, v4
+; GFX6-NEXT: v_and_b32_e32 v1, 0xff, v3
; GFX6-NEXT: v_lshlrev_b32_e32 v2, 8, v2
; GFX6-NEXT: v_or_b32_e32 v1, v1, v2
-; GFX6-NEXT: v_bfe_u32 v2, v4, 16, 8
+; GFX6-NEXT: v_bfe_u32 v2, v3, 16, 8
; GFX6-NEXT: v_lshlrev_b32_e32 v2, 16, v2
; GFX6-NEXT: v_or_b32_e32 v1, v1, v2
; GFX6-NEXT: v_and_b32_e32 v2, 0xff, v0
@@ -2021,7 +2021,7 @@ define amdgpu_ps i48 @s_fshl_v2i24(i48 inreg %lhs.arg, i48 inreg %rhs.arg, i48 i
; GFX8-NEXT: v_not_b32_e32 v1, 23
; GFX8-NEXT: s_or_b32 s3, s8, s3
; GFX8-NEXT: s_and_b32 s6, 0xffff, s6
-; GFX8-NEXT: v_mul_lo_u32 v2, v0, v1
+; GFX8-NEXT: v_mul_lo_u32 v1, v0, v1
; GFX8-NEXT: s_and_b32 s3, 0xffff, s3
; GFX8-NEXT: s_lshl_b32 s6, s6, 16
; GFX8-NEXT: s_or_b32 s3, s3, s6
@@ -2031,67 +2031,67 @@ define amdgpu_ps i48 @s_fshl_v2i24(i48 inreg %lhs.arg, i48 inreg %rhs.arg, i48 i
; GFX8-NEXT: s_lshr_b32 s8, s4, 24
; GFX8-NEXT: s_and_b32 s4, s4, 0xff
; GFX8-NEXT: s_lshl_b32 s6, s6, 8
-; GFX8-NEXT: v_mul_hi_u32 v2, v0, v2
+; GFX8-NEXT: v_mul_hi_u32 v1, v0, v1
; GFX8-NEXT: s_or_b32 s4, s4, s6
; GFX8-NEXT: s_and_b32 s6, s7, 0xff
; GFX8-NEXT: s_and_b32 s6, 0xffff, s6
; GFX8-NEXT: s_and_b32 s4, 0xffff, s4
; GFX8-NEXT: s_lshl_b32 s6, s6, 16
; GFX8-NEXT: s_or_b32 s4, s4, s6
-; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2
-; GFX8-NEXT: v_mul_hi_u32 v2, s4, v0
+; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v1
+; GFX8-NEXT: v_mul_hi_u32 v1, s4, v0
; GFX8-NEXT: s_lshr_b32 s9, s5, 8
; GFX8-NEXT: s_and_b32 s5, s5, 0xff
; GFX8-NEXT: s_lshl_b32 s5, s5, 8
-; GFX8-NEXT: v_mul_lo_u32 v2, v2, 24
+; GFX8-NEXT: v_mul_lo_u32 v1, v1, 24
; GFX8-NEXT: s_and_b32 s6, s9, 0xff
; GFX8-NEXT: s_or_b32 s5, s8, s5
; GFX8-NEXT: s_and_b32 s6, 0xffff, s6
; GFX8-NEXT: s_and_b32 s5, 0xffff, s5
; GFX8-NEXT: s_lshl_b32 s6, s6, 16
; GFX8-NEXT: s_or_b32 s5, s5, s6
-; GFX8-NEXT: v_sub_u32_e32 v2, vcc, s4, v2
-; GFX8-NEXT: v_add_u32_e32 v3, vcc, v2, v1
+; GFX8-NEXT: v_sub_u32_e32 v1, vcc, s4, v1
+; GFX8-NEXT: v_subrev_u32_e32 v2, vcc, 24, v1
; GFX8-NEXT: v_mul_hi_u32 v0, s5, v0
-; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v2
-; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
-; GFX8-NEXT: v_add_u32_e32 v3, vcc, v2, v1
-; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v2
+; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v1
+; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
+; GFX8-NEXT: v_subrev_u32_e32 v2, vcc, 24, v1
+; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v1
; GFX8-NEXT: v_mul_lo_u32 v0, v0, 24
-; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
-; GFX8-NEXT: v_sub_u32_e32 v3, vcc, 23, v2
-; GFX8-NEXT: v_and_b32_e32 v2, 0xffffff, v2
-; GFX8-NEXT: v_lshlrev_b32_e64 v2, v2, s0
+; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
+; GFX8-NEXT: v_sub_u32_e32 v2, vcc, 23, v1
+; GFX8-NEXT: v_and_b32_e32 v1, 0xffffff, v1
+; GFX8-NEXT: v_lshlrev_b32_e64 v1, v1, s0
; GFX8-NEXT: s_lshr_b32 s0, s2, 1
-; GFX8-NEXT: v_and_b32_e32 v3, 0xffffff, v3
-; GFX8-NEXT: v_lshrrev_b32_e64 v3, v3, s0
+; GFX8-NEXT: v_and_b32_e32 v2, 0xffffff, v2
+; GFX8-NEXT: v_lshrrev_b32_e64 v2, v2, s0
; GFX8-NEXT: v_sub_u32_e32 v0, vcc, s5, v0
-; GFX8-NEXT: v_or_b32_e32 v2, v2, v3
-; GFX8-NEXT: v_add_u32_e32 v3, vcc, v0, v1
+; GFX8-NEXT: v_or_b32_e32 v1, v1, v2
+; GFX8-NEXT: v_subrev_u32_e32 v2, vcc, 24, v0
; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v0
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
-; GFX8-NEXT: v_add_u32_e32 v1, vcc, v0, v1
+; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX8-NEXT: v_subrev_u32_e32 v2, vcc, 24, v0
; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v0
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
-; GFX8-NEXT: v_sub_u32_e32 v1, vcc, 23, v0
+; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX8-NEXT: v_sub_u32_e32 v2, vcc, 23, v0
; GFX8-NEXT: v_and_b32_e32 v0, 0xffffff, v0
; GFX8-NEXT: s_lshr_b32 s0, s3, 1
-; GFX8-NEXT: v_and_b32_e32 v1, 0xffffff, v1
+; GFX8-NEXT: v_and_b32_e32 v2, 0xffffff, v2
; GFX8-NEXT: v_lshlrev_b32_e64 v0, v0, s1
-; GFX8-NEXT: v_lshrrev_b32_e64 v1, v1, s0
-; GFX8-NEXT: v_or_b32_e32 v0, v0, v1
-; GFX8-NEXT: v_mov_b32_e32 v1, 8
-; GFX8-NEXT: v_lshlrev_b32_sdwa v3, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
+; GFX8-NEXT: v_lshrrev_b32_e64 v2, v2, s0
+; GFX8-NEXT: v_or_b32_e32 v0, v0, v2
+; GFX8-NEXT: v_mov_b32_e32 v2, 8
+; GFX8-NEXT: v_lshlrev_b32_sdwa v3, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
; GFX8-NEXT: v_mov_b32_e32 v4, 16
-; GFX8-NEXT: v_or_b32_sdwa v3, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
-; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
-; GFX8-NEXT: v_or_b32_e32 v2, v3, v2
+; GFX8-NEXT: v_or_b32_sdwa v3, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
+; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
+; GFX8-NEXT: v_or_b32_e32 v1, v3, v1
; GFX8-NEXT: v_and_b32_e32 v3, 0xff, v0
; GFX8-NEXT: v_lshlrev_b32_e32 v3, 24, v3
-; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
-; GFX8-NEXT: v_or_b32_e32 v2, v2, v3
-; GFX8-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD
-; GFX8-NEXT: v_readfirstlane_b32 s0, v2
+; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
+; GFX8-NEXT: v_or_b32_e32 v1, v1, v3
+; GFX8-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD
+; GFX8-NEXT: v_readfirstlane_b32 s0, v1
; GFX8-NEXT: v_readfirstlane_b32 s1, v0
; GFX8-NEXT: ; return to shader part epilog
;
@@ -2172,10 +2172,10 @@ define amdgpu_ps i48 @s_fshl_v2i24(i48 inreg %lhs.arg, i48 inreg %rhs.arg, i48 i
; GFX9-NEXT: s_or_b32 s5, s5, s6
; GFX9-NEXT: v_mul_hi_u32 v0, s5, v0
; GFX9-NEXT: v_sub_u32_e32 v1, s4, v1
-; GFX9-NEXT: v_add_u32_e32 v2, 0xffffffe8, v1
+; GFX9-NEXT: v_subrev_u32_e32 v2, 24, v1
; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
-; GFX9-NEXT: v_add_u32_e32 v2, 0xffffffe8, v1
+; GFX9-NEXT: v_subrev_u32_e32 v2, 24, v1
; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v1
; GFX9-NEXT: v_mul_lo_u32 v0, v0, 24
; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
@@ -2186,10 +2186,10 @@ define amdgpu_ps i48 @s_fshl_v2i24(i48 inreg %lhs.arg, i48 inreg %rhs.arg, i48 i
; GFX9-NEXT: v_lshrrev_b32_e64 v2, v2, s2
; GFX9-NEXT: v_sub_u32_e32 v0, s5, v0
; GFX9-NEXT: v_lshl_or_b32 v1, s0, v1, v2
-; GFX9-NEXT: v_add_u32_e32 v2, 0xffffffe8, v0
+; GFX9-NEXT: v_subrev_u32_e32 v2, 24, v0
; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v0
; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
-; GFX9-NEXT: v_add_u32_e32 v2, 0xffffffe8, v0
+; GFX9-NEXT: v_subrev_u32_e32 v2, 24, v0
; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v0
; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
; GFX9-NEXT: v_sub_u32_e32 v2, 23, v0
@@ -2282,9 +2282,9 @@ define amdgpu_ps i48 @s_fshl_v2i24(i48 inreg %lhs.arg, i48 inreg %rhs.arg, i48 i
; GFX10-NEXT: s_lshr_b32 s4, s3, 8
; GFX10-NEXT: s_and_b32 s5, s9, 0xff
; GFX10-NEXT: s_and_b32 s3, s3, 0xff
-; GFX10-NEXT: v_add_nc_u32_e32 v2, 0xffffffe8, v1
+; GFX10-NEXT: v_subrev_nc_u32_e32 v2, 24, v1
; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v1
-; GFX10-NEXT: v_add_nc_u32_e32 v3, 0xffffffe8, v0
+; GFX10-NEXT: v_subrev_nc_u32_e32 v3, 24, v0
; GFX10-NEXT: s_and_b32 s5, 0xffff, s5
; GFX10-NEXT: s_lshl_b32 s3, s3, 8
; GFX10-NEXT: s_and_b32 s4, s4, 0xff
@@ -2293,13 +2293,13 @@ define amdgpu_ps i48 @s_fshl_v2i24(i48 inreg %lhs.arg, i48 inreg %rhs.arg, i48 i
; GFX10-NEXT: s_lshl_b32 s5, s5, 16
; GFX10-NEXT: s_or_b32 s3, s10, s3
; GFX10-NEXT: s_and_b32 s4, 0xffff, s4
-; GFX10-NEXT: v_add_nc_u32_e32 v2, 0xffffffe8, v1
+; GFX10-NEXT: v_subrev_nc_u32_e32 v2, 24, v1
; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc_lo
; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v1
; GFX10-NEXT: s_or_b32 s2, s2, s5
; GFX10-NEXT: s_and_b32 s3, 0xffff, s3
; GFX10-NEXT: s_lshl_b32 s4, s4, 16
-; GFX10-NEXT: v_add_nc_u32_e32 v3, 0xffffffe8, v0
+; GFX10-NEXT: v_subrev_nc_u32_e32 v3, 24, v0
; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc_lo
; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v0
; GFX10-NEXT: s_or_b32 s3, s3, s4
@@ -2399,9 +2399,9 @@ define amdgpu_ps i48 @s_fshl_v2i24(i48 inreg %lhs.arg, i48 inreg %rhs.arg, i48 i
; GFX11-NEXT: s_and_b32 s5, s8, 0xff
; GFX11-NEXT: s_lshr_b32 s4, s3, 8
; GFX11-NEXT: s_and_b32 s5, 0xffff, s5
-; GFX11-NEXT: v_add_nc_u32_e32 v2, 0xffffffe8, v1
+; GFX11-NEXT: v_subrev_nc_u32_e32 v2, 24, v1
; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v1
-; GFX11-NEXT: v_add_nc_u32_e32 v3, 0xffffffe8, v0
+; GFX11-NEXT: v_subrev_nc_u32_e32 v3, 24, v0
; GFX11-NEXT: s_and_b32 s3, s3, 0xff
; GFX11-NEXT: s_lshl_b32 s5, s5, 16
; GFX11-NEXT: s_lshl_b32 s3, s3, 8
@@ -2410,7 +2410,7 @@ define amdgpu_ps i48 @s_fshl_v2i24(i48 inreg %lhs.arg, i48 inreg %rhs.arg, i48 i
; GFX11-NEXT: s_and_b32 s4, s4, 0xff
; GFX11-NEXT: s_or_b32 s2, s2, s5
; GFX11-NEXT: s_or_b32 s3, s9, s3
-; GFX11-NEXT: v_add_nc_u32_e32 v2, 0xffffffe8, v1
+; GFX11-NEXT: v_subrev_nc_u32_e32 v2, 24, v1
; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc_lo
; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v1
; GFX11-NEXT: s_and_b32 s4, 0xffff, s4
@@ -2423,7 +2423,7 @@ define amdgpu_ps i48 @s_fshl_v2i24(i48 inreg %lhs.arg, i48 inreg %rhs.arg, i48 i
; GFX11-NEXT: s_lshr_b32 s3, s3, 1
; GFX11-NEXT: v_sub_nc_u32_e32 v2, 23, v1
; GFX11-NEXT: v_and_b32_e32 v1, 0xffffff, v1
-; GFX11-NEXT: v_add_nc_u32_e32 v3, 0xffffffe8, v0
+; GFX11-NEXT: v_subrev_nc_u32_e32 v3, 24, v0
; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX11-NEXT: v_and_b32_e32 v2, 0xffffff, v2
@@ -2479,31 +2479,31 @@ define <2 x i24> @v_fshl_v2i24(<2 x i24> %lhs, <2 x i24> %rhs, <2 x i24> %amt) {
; GFX6-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6
; GFX6-NEXT: v_cvt_u32_f32_e32 v6, v6
; GFX6-NEXT: v_bfe_u32 v2, v2, 1, 23
-; GFX6-NEXT: v_mul_lo_u32 v8, v6, v7
-; GFX6-NEXT: v_mul_hi_u32 v8, v6, v8
-; GFX6-NEXT: v_add_i32_e32 v6, vcc, v6, v8
-; GFX6-NEXT: v_mul_hi_u32 v8, v4, v6
+; GFX6-NEXT: v_mul_lo_u32 v7, v6, v7
+; GFX6-NEXT: v_mul_hi_u32 v7, v6, v7
+; GFX6-NEXT: v_add_i32_e32 v6, vcc, v6, v7
+; GFX6-NEXT: v_mul_hi_u32 v7, v4, v6
; GFX6-NEXT: v_mul_hi_u32 v6, v5, v6
-; GFX6-NEXT: v_mul_lo_u32 v8, v8, 24
+; GFX6-NEXT: v_mul_lo_u32 v7, v7, 24
; GFX6-NEXT: v_mul_lo_u32 v6, v6, 24
-; GFX6-NEXT: v_sub_i32_e32 v4, vcc, v4, v8
-; GFX6-NEXT: v_add_i32_e32 v8, vcc, v4, v7
+; GFX6-NEXT: v_sub_i32_e32 v4, vcc, v4, v7
+; GFX6-NEXT: v_subrev_i32_e32 v7, vcc, 24, v4
; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v4
-; GFX6-NEXT: v_cndmask_b32_e32 v4, v4, v8, vcc
-; GFX6-NEXT: v_add_i32_e32 v8, vcc, v4, v7
+; GFX6-NEXT: v_cndmask_b32_e32 v4, v4, v7, vcc
+; GFX6-NEXT: v_subrev_i32_e32 v7, vcc, 24, v4
; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v4
-; GFX6-NEXT: v_cndmask_b32_e32 v4, v4, v8, vcc
-; GFX6-NEXT: v_sub_i32_e32 v8, vcc, 23, v4
+; GFX6-NEXT: v_cndmask_b32_e32 v4, v4, v7, vcc
+; GFX6-NEXT: v_sub_i32_e32 v7, vcc, 23, v4
; GFX6-NEXT: v_and_b32_e32 v4, 0xffffff, v4
; GFX6-NEXT: v_lshlrev_b32_e32 v0, v4, v0
-; GFX6-NEXT: v_and_b32_e32 v4, 0xffffff, v8
+; GFX6-NEXT: v_and_b32_e32 v4, 0xffffff, v7
; GFX6-NEXT: v_lshrrev_b32_e32 v2, v4, v2
; GFX6-NEXT: v_or_b32_e32 v0, v0, v2
; GFX6-NEXT: v_sub_i32_e32 v2, vcc, v5, v6
-; GFX6-NEXT: v_add_i32_e32 v4, vcc, v2, v7
+; GFX6-NEXT: v_subrev_i32_e32 v4, vcc, 24, v2
; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v2
; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
-; GFX6-NEXT: v_add_i32_e32 v4, vcc, 0xffffffe8, v2
+; GFX6-NEXT: v_subrev_i32_e32 v4, vcc, 24, v2
; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v2
; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
; GFX6-NEXT: v_sub_i32_e32 v4, vcc, 23, v2
@@ -2526,31 +2526,31 @@ define <2 x i24> @v_fshl_v2i24(<2 x i24> %lhs, <2 x i24> %rhs, <2 x i24> %amt) {
; GFX8-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6
; GFX8-NEXT: v_cvt_u32_f32_e32 v6, v6
; GFX8-NEXT: v_bfe_u32 v2, v2, 1, 23
-; GFX8-NEXT: v_mul_lo_u32 v8, v6, v7
-; GFX8-NEXT: v_mul_hi_u32 v8, v6, v8
-; GFX8-NEXT: v_add_u32_e32 v6, vcc, v6, v8
-; GFX8-NEXT: v_mul_hi_u32 v8, v4, v6
+; GFX8-NEXT: v_mul_lo_u32 v7, v6, v7
+; GFX8-NEXT: v_mul_hi_u32 v7, v6, v7
+; GFX8-NEXT: v_add_u32_e32 v6, vcc, v6, v7
+; GFX8-NEXT: v_mul_hi_u32 v7, v4, v6
; GFX8-NEXT: v_mul_hi_u32 v6, v5, v6
-; GFX8-NEXT: v_mul_lo_u32 v8, v8, 24
+; GFX8-NEXT: v_mul_lo_u32 v7, v7, 24
; GFX8-NEXT: v_mul_lo_u32 v6, v6, 24
-; GFX8-NEXT: v_sub_u32_e32 v4, vcc, v4, v8
-; GFX8-NEXT: v_add_u32_e32 v8, vcc, v4, v7
+; GFX8-NEXT: v_sub_u32_e32 v4, vcc, v4, v7
+; GFX8-NEXT: v_subrev_u32_e32 v7, vcc, 24, v4
; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v4
-; GFX8-NEXT: v_cndmask_b32_e32 v4, v4, v8, vcc
-; GFX8-NEXT: v_add_u32_e32 v8, vcc, v4, v7
+; GFX8-NEXT: v_cndmask_b32_e32 v4, v4, v7, vcc
+; GFX8-NEXT: v_subrev_u32_e32 v7, vcc, 24, v4
; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v4
-; GFX8-NEXT: v_cndmask_b32_e32 v4, v4, v8, vcc
-; GFX8-NEXT: v_sub_u32_e32 v8, vcc, 23, v4
+; GFX8-NEXT: v_cndmask_b32_e32 v4, v4, v7, vcc
+; GFX8-NEXT: v_sub_u32_e32 v7, vcc, 23, v4
; GFX8-NEXT: v_and_b32_e32 v4, 0xffffff, v4
; GFX8-NEXT: v_lshlrev_b32_e32 v0, v4, v0
-; GFX8-NEXT: v_and_b32_e32 v4, 0xffffff, v8
+; GFX8-NEXT: v_and_b32_e32 v4, 0xffffff, v7
; GFX8-NEXT: v_lshrrev_b32_e32 v2, v4, v2
; GFX8-NEXT: v_or_b32_e32 v0, v0, v2
; GFX8-NEXT: v_sub_u32_e32 v2, vcc, v5, v6
-; GFX8-NEXT: v_add_u32_e32 v4, vcc, v2, v7
+; GFX8-NEXT: v_subrev_u32_e32 v4, vcc, 24, v2
; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v2
; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
-; GFX8-NEXT: v_add_u32_e32 v4, vcc, 0xffffffe8, v2
+; GFX8-NEXT: v_subrev_u32_e32 v4, vcc, 24, v2
; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v2
; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
; GFX8-NEXT: v_sub_u32_e32 v4, vcc, 23, v2
@@ -2583,21 +2583,21 @@ define <2 x i24> @v_fshl_v2i24(<2 x i24> %lhs, <2 x i24> %rhs, <2 x i24> %amt) {
; GFX9-NEXT: v_mul_lo_u32 v6, v6, 24
; GFX9-NEXT: v_sub_u32_e32 v4, v4, v7
; GFX9-NEXT: v_sub_u32_e32 v5, v5, v6
-; GFX9-NEXT: v_add_u32_e32 v6, 0xffffffe8, v4
+; GFX9-NEXT: v_subrev_u32_e32 v6, 24, v4
; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v4
; GFX9-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc
-; GFX9-NEXT: v_add_u32_e32 v6, 0xffffffe8, v4
+; GFX9-NEXT: v_subrev_u32_e32 v6, 24, v4
; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v4
; GFX9-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc
; GFX9-NEXT: v_sub_u32_e32 v6, 23, v4
; GFX9-NEXT: v_and_b32_e32 v6, 0xffffff, v6
-; GFX9-NEXT: v_add_u32_e32 v7, 0xffffffe8, v5
+; GFX9-NEXT: v_subrev_u32_e32 v7, 24, v5
; GFX9-NEXT: v_and_b32_e32 v4, 0xffffff, v4
; GFX9-NEXT: v_lshrrev_b32_e32 v2, v6, v2
; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v5
; GFX9-NEXT: v_lshl_or_b32 v0, v0, v4, v2
; GFX9-NEXT: v_cndmask_b32_e32 v2, v5, v7, vcc
-; GFX9-NEXT: v_add_u32_e32 v4, 0xffffffe8, v2
+; GFX9-NEXT: v_subrev_u32_e32 v4, 24, v2
; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v2
; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
; GFX9-NEXT: v_sub_u32_e32 v4, 23, v2
@@ -2627,15 +2627,15 @@ define <2 x i24> @v_fshl_v2i24(<2 x i24> %lhs, <2 x i24> %rhs, <2 x i24> %amt) {
; GFX10-NEXT: v_mul_lo_u32 v6, v6, 24
; GFX10-NEXT: v_sub_nc_u32_e32 v4, v4, v7
; GFX10-NEXT: v_sub_nc_u32_e32 v5, v5, v6
-; GFX10-NEXT: v_add_nc_u32_e32 v6, 0xffffffe8, v4
+; GFX10-NEXT: v_subrev_nc_u32_e32 v6, 24, v4
; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v4
-; GFX10-NEXT: v_add_nc_u32_e32 v7, 0xffffffe8, v5
+; GFX10-NEXT: v_subrev_nc_u32_e32 v7, 24, v5
; GFX10-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc_lo
; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v5
-; GFX10-NEXT: v_add_nc_u32_e32 v6, 0xffffffe8, v4
+; GFX10-NEXT: v_subrev_nc_u32_e32 v6, 24, v4
; GFX10-NEXT: v_cndmask_b32_e32 v5, v5, v7, vcc_lo
; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v4
-; GFX10-NEXT: v_add_nc_u32_e32 v7, 0xffffffe8, v5
+; GFX10-NEXT: v_subrev_nc_u32_e32 v7, 24, v5
; GFX10-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc_lo
; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v5
; GFX10-NEXT: v_sub_nc_u32_e32 v6, 23, v4
@@ -2679,32 +2679,34 @@ define <2 x i24> @v_fshl_v2i24(<2 x i24> %lhs, <2 x i24> %rhs, <2 x i24> %amt) {
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_mul_lo_u32 v6, v6, 24
; GFX11-NEXT: v_sub_nc_u32_e32 v5, v5, v6
-; GFX11-NEXT: v_add_nc_u32_e32 v6, 0xffffffe8, v4
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11-NEXT: v_subrev_nc_u32_e32 v6, 24, v4
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NEXT: v_subrev_nc_u32_e32 v7, 24, v5
; GFX11-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc_lo
; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v5
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1)
-; GFX11-NEXT: v_add_nc_u32_e32 v6, 0xffffffe8, v4
-; GFX11-NEXT: v_add_nc_u32_e32 v7, 0xffffffe8, v5
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-NEXT: v_subrev_nc_u32_e32 v6, 24, v4
; GFX11-NEXT: v_cndmask_b32_e32 v5, v5, v7, vcc_lo
; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v4
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-NEXT: v_dual_cndmask_b32 v4, v4, v6 :: v_dual_add_nc_u32 v7, 0xffffffe8, v5
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-NEXT: v_subrev_nc_u32_e32 v7, 24, v5
+; GFX11-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc_lo
; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v5
-; GFX11-NEXT: v_sub_nc_u32_e32 v6, 23, v4
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NEXT: v_dual_cndmask_b32 v5, v5, v7 :: v_dual_and_b32 v4, 0xffffff, v4
-; GFX11-NEXT: v_and_b32_e32 v6, 0xffffff, v6
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3)
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT: v_cndmask_b32_e32 v5, v5, v7, vcc_lo
; GFX11-NEXT: v_sub_nc_u32_e32 v7, 23, v5
; GFX11-NEXT: v_and_b32_e32 v5, 0xffffff, v5
-; GFX11-NEXT: v_lshrrev_b32_e32 v2, v6, v2
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
; GFX11-NEXT: v_and_b32_e32 v7, 0xffffff, v7
-; GFX11-NEXT: v_lshl_or_b32 v0, v0, v4, v2
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT: v_sub_nc_u32_e32 v6, 23, v4
+; GFX11-NEXT: v_and_b32_e32 v4, 0xffffff, v4
; GFX11-NEXT: v_lshrrev_b32_e32 v3, v7, v3
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NEXT: v_and_b32_e32 v6, 0xffffff, v6
; GFX11-NEXT: v_lshl_or_b32 v1, v1, v5, v3
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT: v_lshrrev_b32_e32 v2, v6, v2
+; GFX11-NEXT: v_lshl_or_b32 v0, v0, v4, v2
; GFX11-NEXT: s_setpc_b64 s[30:31]
%result = call <2 x i24> @llvm.fshl.v2i24(<2 x i24> %lhs, <2 x i24> %rhs, <2 x i24> %amt)
ret <2 x i24> %result
@@ -6059,11 +6061,11 @@ define i128 @v_fshl_i128(i128 %lhs, i128 %rhs, i128 %amt) {
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX6-NEXT: v_and_b32_e32 v15, 0x7f, v8
; GFX6-NEXT: v_sub_i32_e32 v9, vcc, 64, v15
-; GFX6-NEXT: v_add_i32_e32 v17, vcc, 0xffffffc0, v15
+; GFX6-NEXT: v_subrev_i32_e32 v16, vcc, 64, v15
; GFX6-NEXT: v_lshr_b64 v[9:10], v[0:1], v9
; GFX6-NEXT: v_lshl_b64 v[11:12], v[2:3], v15
; GFX6-NEXT: v_lshl_b64 v[13:14], v[0:1], v15
-; GFX6-NEXT: v_lshl_b64 v[0:1], v[0:1], v17
+; GFX6-NEXT: v_lshl_b64 v[0:1], v[0:1], v16
; GFX6-NEXT: v_or_b32_e32 v9, v9, v11
; GFX6-NEXT: v_or_b32_e32 v10, v10, v12
; GFX6-NEXT: v_cmp_gt_u32_e32 vcc, 64, v15
@@ -6080,9 +6082,8 @@ define i128 @v_fshl_i128(i128 %lhs, i128 %rhs, i128 %amt) {
; GFX6-NEXT: v_or_b32_e32 v1, v1, v2
; GFX6-NEXT: v_lshr_b64 v[2:3], v[6:7], 1
; GFX6-NEXT: v_and_b32_e32 v14, 0x7f, v4
-; GFX6-NEXT: v_not_b32_e32 v16, 63
; GFX6-NEXT: v_sub_i32_e32 v6, vcc, 64, v14
-; GFX6-NEXT: v_add_i32_e32 v15, vcc, v14, v16
+; GFX6-NEXT: v_subrev_i32_e32 v15, vcc, 64, v14
; GFX6-NEXT: v_lshr_b64 v[4:5], v[0:1], v14
; GFX6-NEXT: v_lshl_b64 v[6:7], v[2:3], v6
; GFX6-NEXT: v_lshr_b64 v[8:9], v[2:3], v14
@@ -6108,11 +6109,11 @@ define i128 @v_fshl_i128(i128 %lhs, i128 %rhs, i128 %amt) {
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_and_b32_e32 v15, 0x7f, v8
; GFX8-NEXT: v_sub_u32_e32 v9, vcc, 64, v15
-; GFX8-NEXT: v_add_u32_e32 v17, vcc, 0xffffffc0, v15
+; GFX8-NEXT: v_subrev_u32_e32 v16, vcc, 64, v15
; GFX8-NEXT: v_lshrrev_b64 v[9:10], v9, v[0:1]
; GFX8-NEXT: v_lshlrev_b64 v[11:12], v15, v[2:3]
; GFX8-NEXT: v_lshlrev_b64 v[13:14], v15, v[0:1]
-; GFX8-NEXT: v_lshlrev_b64 v[0:1], v17, v[0:1]
+; GFX8-NEXT: v_lshlrev_b64 v[0:1], v16, v[0:1]
; GFX8-NEXT: v_or_b32_e32 v9, v9, v11
; GFX8-NEXT: v_or_b32_e32 v10, v10, v12
; GFX8-NEXT: v_cmp_gt_u32_e32 vcc, 64, v15
@@ -6129,9 +6130,8 @@ define i128 @v_fshl_i128(i128 %lhs, i128 %rhs, i128 %amt) {
; GFX8-NEXT: v_or_b32_e32 v1, v1, v2
; GFX8-NEXT: v_lshrrev_b64 v[2:3], 1, v[6:7]
; GFX8-NEXT: v_and_b32_e32 v14, 0x7f, v4
-; GFX8-NEXT: v_not_b32_e32 v16, 63
; GFX8-NEXT: v_sub_u32_e32 v6, vcc, 64, v14
-; GFX8-NEXT: v_add_u32_e32 v15, vcc, v14, v16
+; GFX8-NEXT: v_subrev_u32_e32 v15, vcc, 64, v14
; GFX8-NEXT: v_lshrrev_b64 v[4:5], v14, v[0:1]
; GFX8-NEXT: v_lshlrev_b64 v[6:7], v6, v[2:3]
; GFX8-NEXT: v_lshrrev_b64 v[8:9], v14, v[2:3]
@@ -6157,7 +6157,7 @@ define i128 @v_fshl_i128(i128 %lhs, i128 %rhs, i128 %amt) {
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_and_b32_e32 v15, 0x7f, v8
; GFX9-NEXT: v_sub_u32_e32 v9, 64, v15
-; GFX9-NEXT: v_add_u32_e32 v16, 0xffffffc0, v15
+; GFX9-NEXT: v_subrev_u32_e32 v16, 64, v15
; GFX9-NEXT: v_lshrrev_b64 v[9:10], v9, v[0:1]
; GFX9-NEXT: v_lshlrev_b64 v[11:12], v15, v[2:3]
; GFX9-NEXT: v_lshlrev_b64 v[13:14], v15, v[0:1]
@@ -6178,7 +6178,7 @@ define i128 @v_fshl_i128(i128 %lhs, i128 %rhs, i128 %amt) {
; GFX9-NEXT: v_and_b32_e32 v14, 0x7f, v4
; GFX9-NEXT: v_lshl_or_b32 v1, v6, 31, v1
; GFX9-NEXT: v_sub_u32_e32 v6, 64, v14
-; GFX9-NEXT: v_add_u32_e32 v15, 0xffffffc0, v14
+; GFX9-NEXT: v_subrev_u32_e32 v15, 64, v14
; GFX9-NEXT: v_lshrrev_b64 v[4:5], v14, v[0:1]
; GFX9-NEXT: v_lshlrev_b64 v[6:7], v6, v[2:3]
; GFX9-NEXT: v_lshrrev_b64 v[8:9], v14, v[2:3]
@@ -6210,7 +6210,7 @@ define i128 @v_fshl_i128(i128 %lhs, i128 %rhs, i128 %amt) {
; GFX10-NEXT: v_and_b32_e32 v19, 0x7f, v10
; GFX10-NEXT: v_lshlrev_b64 v[8:9], v18, v[2:3]
; GFX10-NEXT: v_lshl_or_b32 v5, v6, 31, v5
-; GFX10-NEXT: v_add_nc_u32_e32 v20, 0xffffffc0, v18
+; GFX10-NEXT: v_subrev_nc_u32_e32 v20, 64, v18
; GFX10-NEXT: v_lshrrev_b64 v[10:11], v11, v[0:1]
; GFX10-NEXT: v_sub_nc_u32_e32 v16, 64, v19
; GFX10-NEXT: v_lshlrev_b64 v[6:7], v18, v[0:1]
@@ -6218,7 +6218,7 @@ define i128 @v_fshl_i128(i128 %lhs, i128 %rhs, i128 %amt) {
; GFX10-NEXT: v_lshlrev_b64 v[0:1], v20, v[0:1]
; GFX10-NEXT: v_cmp_gt_u32_e32 vcc_lo, 64, v18
; GFX10-NEXT: v_or_b32_e32 v10, v10, v8
-; GFX10-NEXT: v_add_nc_u32_e32 v8, 0xffffffc0, v19
+; GFX10-NEXT: v_subrev_nc_u32_e32 v8, 64, v19
; GFX10-NEXT: v_lshlrev_b64 v[16:17], v16, v[12:13]
; GFX10-NEXT: v_or_b32_e32 v11, v11, v9
; GFX10-NEXT: v_cmp_gt_u32_e64 s4, 64, v19
@@ -6258,34 +6258,34 @@ define i128 @v_fshl_i128(i128 %lhs, i128 %rhs, i128 %amt) {
; GFX11-NEXT: v_and_b32_e32 v19, 0x7f, v10
; GFX11-NEXT: v_lshlrev_b64 v[8:9], v18, v[2:3]
; GFX11-NEXT: v_lshl_or_b32 v5, v6, 31, v5
-; GFX11-NEXT: v_lshlrev_b64 v[6:7], v18, v[0:1]
+; GFX11-NEXT: v_subrev_nc_u32_e32 v20, 64, v18
; GFX11-NEXT: v_lshrrev_b64 v[10:11], v11, v[0:1]
; GFX11-NEXT: v_sub_nc_u32_e32 v16, 64, v19
-; GFX11-NEXT: v_cmp_gt_u32_e32 vcc_lo, 64, v18
-; GFX11-NEXT: v_add_nc_u32_e32 v20, 0xffffffc0, v18
+; GFX11-NEXT: v_lshlrev_b64 v[6:7], v18, v[0:1]
; GFX11-NEXT: v_lshrrev_b64 v[14:15], v19, v[4:5]
-; GFX11-NEXT: v_cmp_gt_u32_e64 s0, 64, v19
+; GFX11-NEXT: v_lshlrev_b64 v[0:1], v20, v[0:1]
+; GFX11-NEXT: v_cmp_gt_u32_e32 vcc_lo, 64, v18
; GFX11-NEXT: v_or_b32_e32 v10, v10, v8
-; GFX11-NEXT: v_cndmask_b32_e32 v7, 0, v7, vcc_lo
-; GFX11-NEXT: v_add_nc_u32_e32 v8, 0xffffffc0, v19
+; GFX11-NEXT: v_subrev_nc_u32_e32 v8, 64, v19
; GFX11-NEXT: v_lshlrev_b64 v[16:17], v16, v[12:13]
-; GFX11-NEXT: v_lshlrev_b64 v[0:1], v20, v[0:1]
; GFX11-NEXT: v_or_b32_e32 v11, v11, v9
-; GFX11-NEXT: v_cmp_eq_u32_e64 s1, 0, v19
+; GFX11-NEXT: v_cmp_gt_u32_e64 s0, 64, v19
+; GFX11-NEXT: v_cndmask_b32_e32 v10, v0, v10, vcc_lo
; GFX11-NEXT: v_lshrrev_b64 v[8:9], v8, v[12:13]
-; GFX11-NEXT: v_cndmask_b32_e32 v6, 0, v6, vcc_lo
+; GFX11-NEXT: v_cmp_eq_u32_e64 s1, 0, v19
+; GFX11-NEXT: v_cndmask_b32_e32 v11, v1, v11, vcc_lo
; GFX11-NEXT: v_or_b32_e32 v14, v14, v16
; GFX11-NEXT: v_or_b32_e32 v15, v15, v17
-; GFX11-NEXT: v_dual_cndmask_b32 v10, v0, v10 :: v_dual_cndmask_b32 v11, v1, v11
; GFX11-NEXT: v_lshrrev_b64 v[0:1], v19, v[12:13]
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_2) | instid1(VALU_DEP_3)
-; GFX11-NEXT: v_cndmask_b32_e64 v8, v8, v14, s0
; GFX11-NEXT: v_cmp_eq_u32_e64 s2, 0, v18
+; GFX11-NEXT: v_dual_cndmask_b32 v6, 0, v6 :: v_dual_cndmask_b32 v7, 0, v7
+; GFX11-NEXT: v_cndmask_b32_e64 v8, v8, v14, s0
; GFX11-NEXT: v_cndmask_b32_e64 v9, v9, v15, s0
-; GFX11-NEXT: v_cndmask_b32_e64 v4, v8, v4, s1
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_4)
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4)
; GFX11-NEXT: v_cndmask_b32_e64 v2, v10, v2, s2
; GFX11-NEXT: v_cndmask_b32_e64 v3, v11, v3, s2
+; GFX11-NEXT: v_cndmask_b32_e64 v4, v8, v4, s1
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_2) | instid1(VALU_DEP_4)
; GFX11-NEXT: v_cndmask_b32_e64 v5, v9, v5, s1
; GFX11-NEXT: v_cndmask_b32_e64 v8, 0, v0, s0
; GFX11-NEXT: v_cndmask_b32_e64 v9, 0, v1, s0
@@ -6307,15 +6307,15 @@ define amdgpu_ps <4 x float> @v_fshl_i128_ssv(i128 inreg %lhs, i128 inreg %rhs,
; GFX6-NEXT: v_sub_i32_e32 v1, vcc, 64, v7
; GFX6-NEXT: v_lshr_b64 v[1:2], s[0:1], v1
; GFX6-NEXT: v_lshl_b64 v[3:4], s[2:3], v7
-; GFX6-NEXT: v_add_i32_e32 v9, vcc, 0xffffffc0, v7
+; GFX6-NEXT: v_subrev_i32_e32 v8, vcc, 64, v7
; GFX6-NEXT: v_lshl_b64 v[5:6], s[0:1], v7
; GFX6-NEXT: v_or_b32_e32 v3, v1, v3
; GFX6-NEXT: v_or_b32_e32 v4, v2, v4
-; GFX6-NEXT: v_lshl_b64 v[1:2], s[0:1], v9
+; GFX6-NEXT: v_lshl_b64 v[1:2], s[0:1], v8
; GFX6-NEXT: v_cmp_gt_u32_e32 vcc, 64, v7
; GFX6-NEXT: v_not_b32_e32 v0, v0
; GFX6-NEXT: s_mov_b32 s8, 0
-; GFX6-NEXT: v_cndmask_b32_e32 v9, 0, v5, vcc
+; GFX6-NEXT: v_cndmask_b32_e32 v8, 0, v5, vcc
; GFX6-NEXT: v_cndmask_b32_e32 v6, 0, v6, vcc
; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
@@ -6324,34 +6324,33 @@ define amdgpu_ps <4 x float> @v_fshl_i128_ssv(i128 inreg %lhs, i128 inreg %rhs,
; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, 0, v7
; GFX6-NEXT: s_lshr_b64 s[0:1], s[4:5], 1
; GFX6-NEXT: s_lshl_b32 s9, s6, 31
-; GFX6-NEXT: v_and_b32_e32 v11, 0x7f, v0
+; GFX6-NEXT: v_and_b32_e32 v10, 0x7f, v0
; GFX6-NEXT: v_cndmask_b32_e32 v7, v1, v3, vcc
-; GFX6-NEXT: v_cndmask_b32_e32 v10, v2, v4, vcc
+; GFX6-NEXT: v_cndmask_b32_e32 v9, v2, v4, vcc
; GFX6-NEXT: s_or_b64 s[0:1], s[0:1], s[8:9]
; GFX6-NEXT: s_lshr_b64 s[2:3], s[6:7], 1
-; GFX6-NEXT: v_sub_i32_e32 v2, vcc, 64, v11
-; GFX6-NEXT: v_not_b32_e32 v8, 63
-; GFX6-NEXT: v_lshr_b64 v[0:1], s[0:1], v11
+; GFX6-NEXT: v_sub_i32_e32 v2, vcc, 64, v10
+; GFX6-NEXT: v_lshr_b64 v[0:1], s[0:1], v10
; GFX6-NEXT: v_lshl_b64 v[2:3], s[2:3], v2
-; GFX6-NEXT: v_add_i32_e32 v8, vcc, v11, v8
+; GFX6-NEXT: v_subrev_i32_e32 v11, vcc, 64, v10
; GFX6-NEXT: v_or_b32_e32 v2, v0, v2
; GFX6-NEXT: v_or_b32_e32 v3, v1, v3
-; GFX6-NEXT: v_lshr_b64 v[0:1], s[2:3], v8
-; GFX6-NEXT: v_lshr_b64 v[4:5], s[2:3], v11
-; GFX6-NEXT: v_cmp_gt_u32_e32 vcc, 64, v11
+; GFX6-NEXT: v_lshr_b64 v[0:1], s[2:3], v11
+; GFX6-NEXT: v_lshr_b64 v[4:5], s[2:3], v10
+; GFX6-NEXT: v_cmp_gt_u32_e32 vcc, 64, v10
; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
; GFX6-NEXT: v_mov_b32_e32 v2, s0
; GFX6-NEXT: v_mov_b32_e32 v3, s1
-; GFX6-NEXT: v_cmp_eq_u32_e64 s[0:1], 0, v11
+; GFX6-NEXT: v_cmp_eq_u32_e64 s[0:1], 0, v10
; GFX6-NEXT: v_cndmask_b32_e64 v0, v0, v2, s[0:1]
; GFX6-NEXT: v_cndmask_b32_e64 v1, v1, v3, s[0:1]
; GFX6-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc
; GFX6-NEXT: v_cndmask_b32_e32 v3, 0, v5, vcc
-; GFX6-NEXT: v_or_b32_e32 v0, v9, v0
+; GFX6-NEXT: v_or_b32_e32 v0, v8, v0
; GFX6-NEXT: v_or_b32_e32 v1, v6, v1
; GFX6-NEXT: v_or_b32_e32 v2, v7, v2
-; GFX6-NEXT: v_or_b32_e32 v3, v10, v3
+; GFX6-NEXT: v_or_b32_e32 v3, v9, v3
; GFX6-NEXT: ; return to shader part epilog
;
; GFX8-LABEL: v_fshl_i128_ssv:
@@ -6360,15 +6359,15 @@ define amdgpu_ps <4 x float> @v_fshl_i128_ssv(i128 inreg %lhs, i128 inreg %rhs,
; GFX8-NEXT: v_sub_u32_e32 v1, vcc, 64, v7
; GFX8-NEXT: v_lshrrev_b64 v[1:2], v1, s[0:1]
; GFX8-NEXT: v_lshlrev_b64 v[3:4], v7, s[2:3]
-; GFX8-NEXT: v_add_u32_e32 v9, vcc, 0xffffffc0, v7
+; GFX8-NEXT: v_subrev_u32_e32 v8, vcc, 64, v7
; GFX8-NEXT: v_lshlrev_b64 v[5:6], v7, s[0:1]
; GFX8-NEXT: v_or_b32_e32 v3, v1, v3
; GFX8-NEXT: v_or_b32_e32 v4, v2, v4
-; GFX8-NEXT: v_lshlrev_b64 v[1:2], v9, s[0:1]
+; GFX8-NEXT: v_lshlrev_b64 v[1:2], v8, s[0:1]
; GFX8-NEXT: v_cmp_gt_u32_e32 vcc, 64, v7
; GFX8-NEXT: v_not_b32_e32 v0, v0
; GFX8-NEXT: s_mov_b32 s8, 0
-; GFX8-NEXT: v_cndmask_b32_e32 v9, 0, v5, vcc
+; GFX8-NEXT: v_cndmask_b32_e32 v8, 0, v5, vcc
; GFX8-NEXT: v_cndmask_b32_e32 v6, 0, v6, vcc
; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
@@ -6377,34 +6376,33 @@ define amdgpu_ps <4 x float> @v_fshl_i128_ssv(i128 inreg %lhs, i128 inreg %rhs,
; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 0, v7
; GFX8-NEXT: s_lshr_b64 s[0:1], s[4:5], 1
; GFX8-NEXT: s_lshl_b32 s9, s6, 31
-; GFX8-NEXT: v_and_b32_e32 v11, 0x7f, v0
+; GFX8-NEXT: v_and_b32_e32 v10, 0x7f, v0
; GFX8-NEXT: v_cndmask_b32_e32 v7, v1, v3, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v10, v2, v4, vcc
+; GFX8-NEXT: v_cndmask_b32_e32 v9, v2, v4, vcc
; GFX8-NEXT: s_or_b64 s[0:1], s[0:1], s[8:9]
; GFX8-NEXT: s_lshr_b64 s[2:3], s[6:7], 1
-; GFX8-NEXT: v_sub_u32_e32 v2, vcc, 64, v11
-; GFX8-NEXT: v_not_b32_e32 v8, 63
-; GFX8-NEXT: v_lshrrev_b64 v[0:1], v11, s[0:1]
+; GFX8-NEXT: v_sub_u32_e32 v2, vcc, 64, v10
+; GFX8-NEXT: v_lshrrev_b64 v[0:1], v10, s[0:1]
; GFX8-NEXT: v_lshlrev_b64 v[2:3], v2, s[2:3]
-; GFX8-NEXT: v_add_u32_e32 v8, vcc, v11, v8
+; GFX8-NEXT: v_subrev_u32_e32 v11, vcc, 64, v10
; GFX8-NEXT: v_or_b32_e32 v2, v0, v2
; GFX8-NEXT: v_or_b32_e32 v3, v1, v3
-; GFX8-NEXT: v_lshrrev_b64 v[0:1], v8, s[2:3]
-; GFX8-NEXT: v_lshrrev_b64 v[4:5], v11, s[2:3]
-; GFX8-NEXT: v_cmp_gt_u32_e32 vcc, 64, v11
+; GFX8-NEXT: v_lshrrev_b64 v[0:1], v11, s[2:3]
+; GFX8-NEXT: v_lshrrev_b64 v[4:5], v10, s[2:3]
+; GFX8-NEXT: v_cmp_gt_u32_e32 vcc, 64, v10
; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
; GFX8-NEXT: v_mov_b32_e32 v2, s0
; GFX8-NEXT: v_mov_b32_e32 v3, s1
-; GFX8-NEXT: v_cmp_eq_u32_e64 s[0:1], 0, v11
+; GFX8-NEXT: v_cmp_eq_u32_e64 s[0:1], 0, v10
; GFX8-NEXT: v_cndmask_b32_e64 v0, v0, v2, s[0:1]
; GFX8-NEXT: v_cndmask_b32_e64 v1, v1, v3, s[0:1]
; GFX8-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc
; GFX8-NEXT: v_cndmask_b32_e32 v3, 0, v5, vcc
-; GFX8-NEXT: v_or_b32_e32 v0, v9, v0
+; GFX8-NEXT: v_or_b32_e32 v0, v8, v0
; GFX8-NEXT: v_or_b32_e32 v1, v6, v1
; GFX8-NEXT: v_or_b32_e32 v2, v7, v2
-; GFX8-NEXT: v_or_b32_e32 v3, v10, v3
+; GFX8-NEXT: v_or_b32_e32 v3, v9, v3
; GFX8-NEXT: ; return to shader part epilog
;
; GFX9-LABEL: v_fshl_i128_ssv:
@@ -6413,7 +6411,7 @@ define amdgpu_ps <4 x float> @v_fshl_i128_ssv(i128 inreg %lhs, i128 inreg %rhs,
; GFX9-NEXT: v_sub_u32_e32 v1, 64, v7
; GFX9-NEXT: v_lshrrev_b64 v[1:2], v1, s[0:1]
; GFX9-NEXT: v_lshlrev_b64 v[3:4], v7, s[2:3]
-; GFX9-NEXT: v_add_u32_e32 v8, 0xffffffc0, v7
+; GFX9-NEXT: v_subrev_u32_e32 v8, 64, v7
; GFX9-NEXT: v_lshlrev_b64 v[5:6], v7, s[0:1]
; GFX9-NEXT: v_or_b32_e32 v3, v1, v3
; GFX9-NEXT: v_or_b32_e32 v4, v2, v4
@@ -6438,7 +6436,7 @@ define amdgpu_ps <4 x float> @v_fshl_i128_ssv(i128 inreg %lhs, i128 inreg %rhs,
; GFX9-NEXT: v_cndmask_b32_e32 v7, v1, v3, vcc
; GFX9-NEXT: v_lshrrev_b64 v[0:1], v10, s[0:1]
; GFX9-NEXT: v_lshlrev_b64 v[2:3], v2, s[2:3]
-; GFX9-NEXT: v_add_u32_e32 v11, 0xffffffc0, v10
+; GFX9-NEXT: v_subrev_u32_e32 v11, 64, v10
; GFX9-NEXT: v_or_b32_e32 v2, v0, v2
; GFX9-NEXT: v_or_b32_e32 v3, v1, v3
; GFX9-NEXT: v_lshrrev_b64 v[0:1], v11, s[2:3]
@@ -6473,12 +6471,12 @@ define amdgpu_ps <4 x float> @v_fshl_i128_ssv(i128 inreg %lhs, i128 inreg %rhs,
; GFX10-NEXT: s_lshr_b64 s[6:7], s[6:7], 1
; GFX10-NEXT: v_lshrrev_b64 v[2:3], v3, s[0:1]
; GFX10-NEXT: v_sub_nc_u32_e32 v8, 64, v13
-; GFX10-NEXT: v_add_nc_u32_e32 v10, 0xffffffc0, v12
+; GFX10-NEXT: v_subrev_nc_u32_e32 v10, 64, v12
; GFX10-NEXT: v_lshrrev_b64 v[6:7], v13, s[8:9]
; GFX10-NEXT: v_cmp_gt_u32_e32 vcc_lo, 64, v12
; GFX10-NEXT: v_lshlrev_b64 v[4:5], v12, s[0:1]
; GFX10-NEXT: v_or_b32_e32 v2, v2, v0
-; GFX10-NEXT: v_add_nc_u32_e32 v0, 0xffffffc0, v13
+; GFX10-NEXT: v_subrev_nc_u32_e32 v0, 64, v13
; GFX10-NEXT: v_lshlrev_b64 v[8:9], v8, s[6:7]
; GFX10-NEXT: v_lshlrev_b64 v[10:11], v10, s[0:1]
; GFX10-NEXT: v_or_b32_e32 v3, v3, v1
@@ -6524,7 +6522,7 @@ define amdgpu_ps <4 x float> @v_fshl_i128_ssv(i128 inreg %lhs, i128 inreg %rhs,
; GFX11-NEXT: v_sub_nc_u32_e32 v3, 64, v12
; GFX11-NEXT: v_lshlrev_b64 v[0:1], v12, s[2:3]
; GFX11-NEXT: v_sub_nc_u32_e32 v8, 64, v13
-; GFX11-NEXT: v_add_nc_u32_e32 v10, 0xffffffc0, v12
+; GFX11-NEXT: v_subrev_nc_u32_e32 v10, 64, v12
; GFX11-NEXT: v_lshrrev_b64 v[6:7], v13, s[8:9]
; GFX11-NEXT: v_lshrrev_b64 v[2:3], v3, s[0:1]
; GFX11-NEXT: v_cmp_eq_u32_e64 s4, 0, v12
@@ -6533,7 +6531,7 @@ define amdgpu_ps <4 x float> @v_fshl_i128_ssv(i128 inreg %lhs, i128 inreg %rhs,
; GFX11-NEXT: v_cmp_gt_u32_e64 s0, 64, v13
; GFX11-NEXT: v_cmp_eq_u32_e64 s1, 0, v13
; GFX11-NEXT: v_or_b32_e32 v2, v2, v0
-; GFX11-NEXT: v_add_nc_u32_e32 v0, 0xffffffc0, v13
+; GFX11-NEXT: v_subrev_nc_u32_e32 v0, 64, v13
; GFX11-NEXT: v_or_b32_e32 v3, v3, v1
; GFX11-NEXT: v_or_b32_e32 v6, v6, v8
; GFX11-NEXT: v_or_b32_e32 v7, v7, v9
@@ -7679,13 +7677,12 @@ define <2 x i128> @v_fshl_v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %a
; GFX6: ; %bb.0:
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX6-NEXT: v_and_b32_e32 v19, 0x7f, v16
-; GFX6-NEXT: v_not_b32_e32 v25, 63
; GFX6-NEXT: v_sub_i32_e32 v17, vcc, 64, v19
-; GFX6-NEXT: v_add_i32_e32 v26, vcc, v19, v25
+; GFX6-NEXT: v_subrev_i32_e32 v25, vcc, 64, v19
; GFX6-NEXT: v_lshr_b64 v[17:18], v[0:1], v17
; GFX6-NEXT: v_lshl_b64 v[21:22], v[2:3], v19
; GFX6-NEXT: v_lshl_b64 v[23:24], v[0:1], v19
-; GFX6-NEXT: v_lshl_b64 v[0:1], v[0:1], v26
+; GFX6-NEXT: v_lshl_b64 v[0:1], v[0:1], v25
; GFX6-NEXT: v_or_b32_e32 v17, v17, v21
; GFX6-NEXT: v_or_b32_e32 v18, v18, v22
; GFX6-NEXT: v_cmp_gt_u32_e32 vcc, 64, v19
@@ -7703,7 +7700,7 @@ define <2 x i128> @v_fshl_v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %a
; GFX6-NEXT: v_lshr_b64 v[2:3], v[10:11], 1
; GFX6-NEXT: v_and_b32_e32 v23, 0x7f, v8
; GFX6-NEXT: v_sub_i32_e32 v10, vcc, 64, v23
-; GFX6-NEXT: v_add_i32_e32 v24, vcc, v23, v25
+; GFX6-NEXT: v_subrev_i32_e32 v24, vcc, 64, v23
; GFX6-NEXT: v_lshr_b64 v[8:9], v[0:1], v23
; GFX6-NEXT: v_lshl_b64 v[10:11], v[2:3], v10
; GFX6-NEXT: v_lshr_b64 v[16:17], v[2:3], v23
@@ -7722,7 +7719,7 @@ define <2 x i128> @v_fshl_v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %a
; GFX6-NEXT: v_cndmask_b32_e32 v3, 0, v17, vcc
; GFX6-NEXT: v_sub_i32_e32 v8, vcc, 64, v18
; GFX6-NEXT: v_or_b32_e32 v3, v19, v3
-; GFX6-NEXT: v_add_i32_e32 v19, vcc, v18, v25
+; GFX6-NEXT: v_subrev_i32_e32 v19, vcc, 64, v18
; GFX6-NEXT: v_lshr_b64 v[8:9], v[4:5], v8
; GFX6-NEXT: v_lshl_b64 v[10:11], v[6:7], v18
; GFX6-NEXT: v_lshl_b64 v[16:17], v[4:5], v18
@@ -7744,7 +7741,7 @@ define <2 x i128> @v_fshl_v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %a
; GFX6-NEXT: v_lshr_b64 v[6:7], v[14:15], 1
; GFX6-NEXT: v_and_b32_e32 v14, 0x7f, v8
; GFX6-NEXT: v_sub_i32_e32 v10, vcc, 64, v14
-; GFX6-NEXT: v_add_i32_e32 v15, vcc, v14, v25
+; GFX6-NEXT: v_subrev_i32_e32 v15, vcc, 64, v14
; GFX6-NEXT: v_lshr_b64 v[8:9], v[4:5], v14
; GFX6-NEXT: v_lshl_b64 v[10:11], v[6:7], v10
; GFX6-NEXT: v_lshr_b64 v[12:13], v[6:7], v14
@@ -7771,13 +7768,12 @@ define <2 x i128> @v_fshl_v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %a
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_and_b32_e32 v19, 0x7f, v16
-; GFX8-NEXT: v_not_b32_e32 v25, 63
; GFX8-NEXT: v_sub_u32_e32 v17, vcc, 64, v19
-; GFX8-NEXT: v_add_u32_e32 v26, vcc, v19, v25
+; GFX8-NEXT: v_subrev_u32_e32 v25, vcc, 64, v19
; GFX8-NEXT: v_lshrrev_b64 v[17:18], v17, v[0:1]
; GFX8-NEXT: v_lshlrev_b64 v[21:22], v19, v[2:3]
; GFX8-NEXT: v_lshlrev_b64 v[23:24], v19, v[0:1]
-; GFX8-NEXT: v_lshlrev_b64 v[0:1], v26, v[0:1]
+; GFX8-NEXT: v_lshlrev_b64 v[0:1], v25, v[0:1]
; GFX8-NEXT: v_or_b32_e32 v17, v17, v21
; GFX8-NEXT: v_or_b32_e32 v18, v18, v22
; GFX8-NEXT: v_cmp_gt_u32_e32 vcc, 64, v19
@@ -7795,7 +7791,7 @@ define <2 x i128> @v_fshl_v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %a
; GFX8-NEXT: v_lshrrev_b64 v[2:3], 1, v[10:11]
; GFX8-NEXT: v_and_b32_e32 v23, 0x7f, v8
; GFX8-NEXT: v_sub_u32_e32 v10, vcc, 64, v23
-; GFX8-NEXT: v_add_u32_e32 v24, vcc, v23, v25
+; GFX8-NEXT: v_subrev_u32_e32 v24, vcc, 64, v23
; GFX8-NEXT: v_lshrrev_b64 v[8:9], v23, v[0:1]
; GFX8-NEXT: v_lshlrev_b64 v[10:11], v10, v[2:3]
; GFX8-NEXT: v_lshrrev_b64 v[16:17], v23, v[2:3]
@@ -7814,7 +7810,7 @@ define <2 x i128> @v_fshl_v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %a
; GFX8-NEXT: v_cndmask_b32_e32 v3, 0, v17, vcc
; GFX8-NEXT: v_sub_u32_e32 v8, vcc, 64, v18
; GFX8-NEXT: v_or_b32_e32 v3, v19, v3
-; GFX8-NEXT: v_add_u32_e32 v19, vcc, v18, v25
+; GFX8-NEXT: v_subrev_u32_e32 v19, vcc, 64, v18
; GFX8-NEXT: v_lshrrev_b64 v[8:9], v8, v[4:5]
; GFX8-NEXT: v_lshlrev_b64 v[10:11], v18, v[6:7]
; GFX8-NEXT: v_lshlrev_b64 v[16:17], v18, v[4:5]
@@ -7836,7 +7832,7 @@ define <2 x i128> @v_fshl_v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %a
; GFX8-NEXT: v_lshrrev_b64 v[6:7], 1, v[14:15]
; GFX8-NEXT: v_and_b32_e32 v14, 0x7f, v8
; GFX8-NEXT: v_sub_u32_e32 v10, vcc, 64, v14
-; GFX8-NEXT: v_add_u32_e32 v15, vcc, v14, v25
+; GFX8-NEXT: v_subrev_u32_e32 v15, vcc, 64, v14
; GFX8-NEXT: v_lshrrev_b64 v[8:9], v14, v[4:5]
; GFX8-NEXT: v_lshlrev_b64 v[10:11], v10, v[6:7]
; GFX8-NEXT: v_lshrrev_b64 v[12:13], v14, v[6:7]
@@ -7864,7 +7860,7 @@ define <2 x i128> @v_fshl_v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %a
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_and_b32_e32 v19, 0x7f, v16
; GFX9-NEXT: v_sub_u32_e32 v17, 64, v19
-; GFX9-NEXT: v_add_u32_e32 v25, 0xffffffc0, v19
+; GFX9-NEXT: v_subrev_u32_e32 v25, 64, v19
; GFX9-NEXT: v_lshrrev_b64 v[17:18], v17, v[0:1]
; GFX9-NEXT: v_lshlrev_b64 v[21:22], v19, v[2:3]
; GFX9-NEXT: v_lshlrev_b64 v[23:24], v19, v[0:1]
@@ -7885,7 +7881,7 @@ define <2 x i128> @v_fshl_v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %a
; GFX9-NEXT: v_and_b32_e32 v23, 0x7f, v8
; GFX9-NEXT: v_lshl_or_b32 v1, v10, 31, v1
; GFX9-NEXT: v_sub_u32_e32 v10, 64, v23
-; GFX9-NEXT: v_add_u32_e32 v24, 0xffffffc0, v23
+; GFX9-NEXT: v_subrev_u32_e32 v24, 64, v23
; GFX9-NEXT: v_lshrrev_b64 v[8:9], v23, v[0:1]
; GFX9-NEXT: v_lshlrev_b64 v[10:11], v10, v[2:3]
; GFX9-NEXT: v_lshrrev_b64 v[16:17], v23, v[2:3]
@@ -7904,7 +7900,7 @@ define <2 x i128> @v_fshl_v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %a
; GFX9-NEXT: v_cndmask_b32_e32 v3, 0, v17, vcc
; GFX9-NEXT: v_sub_u32_e32 v8, 64, v18
; GFX9-NEXT: v_or_b32_e32 v3, v19, v3
-; GFX9-NEXT: v_add_u32_e32 v19, 0xffffffc0, v18
+; GFX9-NEXT: v_subrev_u32_e32 v19, 64, v18
; GFX9-NEXT: v_lshrrev_b64 v[8:9], v8, v[4:5]
; GFX9-NEXT: v_lshlrev_b64 v[10:11], v18, v[6:7]
; GFX9-NEXT: v_lshlrev_b64 v[16:17], v18, v[4:5]
@@ -7925,7 +7921,7 @@ define <2 x i128> @v_fshl_v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %a
; GFX9-NEXT: v_lshrrev_b64 v[6:7], 1, v[14:15]
; GFX9-NEXT: v_and_b32_e32 v14, 0x7f, v8
; GFX9-NEXT: v_sub_u32_e32 v10, 64, v14
-; GFX9-NEXT: v_add_u32_e32 v15, 0xffffffc0, v14
+; GFX9-NEXT: v_subrev_u32_e32 v15, 64, v14
; GFX9-NEXT: v_lshrrev_b64 v[8:9], v14, v[4:5]
; GFX9-NEXT: v_lshlrev_b64 v[10:11], v10, v[6:7]
; GFX9-NEXT: v_lshrrev_b64 v[12:13], v14, v[6:7]
@@ -7960,13 +7956,13 @@ define <2 x i128> @v_fshl_v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %a
; GFX10-NEXT: v_lshl_or_b32 v9, v10, 31, v9
; GFX10-NEXT: v_lshrrev_b64 v[10:11], 1, v[10:11]
; GFX10-NEXT: v_lshrrev_b64 v[16:17], v17, v[0:1]
-; GFX10-NEXT: v_add_nc_u32_e32 v29, 0xffffffc0, v27
+; GFX10-NEXT: v_subrev_nc_u32_e32 v29, 64, v27
; GFX10-NEXT: v_sub_nc_u32_e32 v25, 64, v28
; GFX10-NEXT: v_lshlrev_b64 v[21:22], v27, v[0:1]
; GFX10-NEXT: v_lshrrev_b64 v[23:24], v28, v[8:9]
; GFX10-NEXT: v_cmp_gt_u32_e32 vcc_lo, 64, v27
; GFX10-NEXT: v_or_b32_e32 v18, v16, v18
-; GFX10-NEXT: v_add_nc_u32_e32 v16, 0xffffffc0, v28
+; GFX10-NEXT: v_subrev_nc_u32_e32 v16, 64, v28
; GFX10-NEXT: v_lshlrev_b64 v[25:26], v25, v[10:11]
; GFX10-NEXT: v_lshlrev_b64 v[0:1], v29, v[0:1]
; GFX10-NEXT: v_or_b32_e32 v19, v17, v19
@@ -8003,10 +7999,10 @@ define <2 x i128> @v_fshl_v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %a
; GFX10-NEXT: v_lshl_or_b32 v9, v14, 31, v9
; GFX10-NEXT: v_lshrrev_b64 v[14:15], 1, v[14:15]
; GFX10-NEXT: v_sub_nc_u32_e32 v20, 64, v22
-; GFX10-NEXT: v_add_nc_u32_e32 v3, 0xffffffc0, v24
+; GFX10-NEXT: v_subrev_nc_u32_e32 v3, 64, v24
; GFX10-NEXT: v_cmp_gt_u32_e32 vcc_lo, 64, v24
; GFX10-NEXT: v_or_b32_e32 v12, v10, v12
-; GFX10-NEXT: v_add_nc_u32_e32 v10, 0xffffffc0, v22
+; GFX10-NEXT: v_subrev_nc_u32_e32 v10, 64, v22
; GFX10-NEXT: v_lshrrev_b64 v[18:19], v22, v[8:9]
; GFX10-NEXT: v_lshlrev_b64 v[20:21], v20, v[14:15]
; GFX10-NEXT: v_lshlrev_b64 v[3:4], v3, v[4:5]
@@ -8053,19 +8049,19 @@ define <2 x i128> @v_fshl_v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %a
; GFX11-NEXT: v_cndmask_b32_e32 v22, 0, v22, vcc_lo
; GFX11-NEXT: v_sub_nc_u32_e32 v17, 64, v27
; GFX11-NEXT: v_lshlrev_b64 v[18:19], v27, v[2:3]
+; GFX11-NEXT: v_subrev_nc_u32_e32 v29, 64, v27
; GFX11-NEXT: v_cmp_eq_u32_e64 s0, 0, v27
-; GFX11-NEXT: v_cndmask_b32_e32 v21, 0, v21, vcc_lo
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX11-NEXT: v_lshrrev_b64 v[16:17], v17, v[0:1]
-; GFX11-NEXT: v_or_b32_e32 v18, v16, v18
-; GFX11-NEXT: v_add_nc_u32_e32 v29, 0xffffffc0, v27
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NEXT: v_or_b32_e32 v19, v17, v19
; GFX11-NEXT: v_lshlrev_b64 v[0:1], v29, v[0:1]
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT: v_dual_cndmask_b32 v18, v0, v18 :: v_dual_cndmask_b32 v19, v1, v19
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11-NEXT: v_or_b32_e32 v19, v17, v19
+; GFX11-NEXT: v_or_b32_e32 v18, v16, v18
+; GFX11-NEXT: v_cndmask_b32_e32 v21, 0, v21, vcc_lo
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX11-NEXT: v_dual_cndmask_b32 v19, v1, v19 :: v_dual_cndmask_b32 v18, v0, v18
; GFX11-NEXT: v_sub_nc_u32_e32 v25, 64, v28
-; GFX11-NEXT: v_add_nc_u32_e32 v16, 0xffffffc0, v28
+; GFX11-NEXT: v_subrev_nc_u32_e32 v16, 64, v28
; GFX11-NEXT: v_lshrrev_b64 v[23:24], v28, v[8:9]
; GFX11-NEXT: v_cmp_gt_u32_e32 vcc_lo, 64, v28
; GFX11-NEXT: v_lshrrev_b64 v[0:1], v28, v[10:11]
@@ -8099,26 +8095,26 @@ define <2 x i128> @v_fshl_v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %a
; GFX11-NEXT: v_lshl_or_b32 v9, v14, 31, v9
; GFX11-NEXT: v_lshrrev_b64 v[14:15], 1, v[14:15]
; GFX11-NEXT: v_sub_nc_u32_e32 v20, 64, v22
-; GFX11-NEXT: v_add_nc_u32_e32 v3, 0xffffffc0, v24
+; GFX11-NEXT: v_subrev_nc_u32_e32 v3, 64, v24
; GFX11-NEXT: v_cmp_gt_u32_e64 s0, 64, v22
; GFX11-NEXT: v_or_b32_e32 v12, v10, v12
-; GFX11-NEXT: v_add_nc_u32_e32 v10, 0xffffffc0, v22
+; GFX11-NEXT: v_subrev_nc_u32_e32 v10, 64, v22
; GFX11-NEXT: v_lshrrev_b64 v[18:19], v22, v[8:9]
; GFX11-NEXT: v_lshlrev_b64 v[20:21], v20, v[14:15]
; GFX11-NEXT: v_lshlrev_b64 v[3:4], v3, v[4:5]
; GFX11-NEXT: v_or_b32_e32 v5, v11, v13
-; GFX11-NEXT: v_cndmask_b32_e32 v13, 0, v16, vcc_lo
; GFX11-NEXT: v_lshrrev_b64 v[10:11], v10, v[14:15]
+; GFX11-NEXT: v_cndmask_b32_e32 v13, 0, v16, vcc_lo
; GFX11-NEXT: v_cmp_eq_u32_e64 s1, 0, v22
; GFX11-NEXT: v_or_b32_e32 v16, v18, v20
; GFX11-NEXT: v_or_b32_e32 v18, v19, v21
; GFX11-NEXT: v_dual_cndmask_b32 v12, v3, v12 :: v_dual_cndmask_b32 v5, v4, v5
; GFX11-NEXT: v_lshrrev_b64 v[3:4], v22, v[14:15]
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_3) | instid1(VALU_DEP_3)
+; GFX11-NEXT: v_cndmask_b32_e32 v14, 0, v17, vcc_lo
; GFX11-NEXT: v_cndmask_b32_e64 v10, v10, v16, s0
; GFX11-NEXT: v_cmp_eq_u32_e64 s2, 0, v24
; GFX11-NEXT: v_cndmask_b32_e64 v11, v11, v18, s0
-; GFX11-NEXT: v_cndmask_b32_e32 v14, 0, v17, vcc_lo
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_4)
; GFX11-NEXT: v_cndmask_b32_e64 v6, v12, v6, s2
; GFX11-NEXT: v_cndmask_b32_e64 v7, v5, v7, s2
; GFX11-NEXT: v_cndmask_b32_e64 v5, v10, v8, s1
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fshr.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fshr.ll
index dbc8f12c2c25c48..58304d2072d7f65 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fshr.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fshr.ll
@@ -21,10 +21,10 @@ define amdgpu_ps i7 @s_fshr_i7(i7 inreg %lhs, i7 inreg %rhs, i7 inreg %amt) {
; GFX6-NEXT: v_mul_hi_u32 v0, s2, v0
; GFX6-NEXT: v_mul_lo_u32 v0, v0, 7
; GFX6-NEXT: v_sub_i32_e32 v0, vcc, s2, v0
-; GFX6-NEXT: v_add_i32_e32 v1, vcc, -7, v0
+; GFX6-NEXT: v_subrev_i32_e32 v1, vcc, 7, v0
; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 7, v0
; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
-; GFX6-NEXT: v_add_i32_e32 v1, vcc, -7, v0
+; GFX6-NEXT: v_subrev_i32_e32 v1, vcc, 7, v0
; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 7, v0
; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
; GFX6-NEXT: v_sub_i32_e32 v1, vcc, 6, v0
@@ -51,10 +51,10 @@ define amdgpu_ps i7 @s_fshr_i7(i7 inreg %lhs, i7 inreg %rhs, i7 inreg %amt) {
; GFX8-NEXT: v_mul_hi_u32 v0, s2, v0
; GFX8-NEXT: v_mul_lo_u32 v0, v0, 7
; GFX8-NEXT: v_sub_u32_e32 v0, vcc, s2, v0
-; GFX8-NEXT: v_add_u32_e32 v1, vcc, -7, v0
+; GFX8-NEXT: v_subrev_u32_e32 v1, vcc, 7, v0
; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 7, v0
; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
-; GFX8-NEXT: v_add_u32_e32 v1, vcc, -7, v0
+; GFX8-NEXT: v_subrev_u32_e32 v1, vcc, 7, v0
; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 7, v0
; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
; GFX8-NEXT: v_sub_u16_e32 v1, 6, v0
@@ -81,10 +81,10 @@ define amdgpu_ps i7 @s_fshr_i7(i7 inreg %lhs, i7 inreg %rhs, i7 inreg %amt) {
; GFX9-NEXT: v_mul_hi_u32 v0, s2, v0
; GFX9-NEXT: v_mul_lo_u32 v0, v0, 7
; GFX9-NEXT: v_sub_u32_e32 v0, s2, v0
-; GFX9-NEXT: v_add_u32_e32 v1, -7, v0
+; GFX9-NEXT: v_subrev_u32_e32 v1, 7, v0
; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 7, v0
; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
-; GFX9-NEXT: v_add_u32_e32 v1, -7, v0
+; GFX9-NEXT: v_subrev_u32_e32 v1, 7, v0
; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 7, v0
; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
; GFX9-NEXT: v_sub_u16_e32 v1, 6, v0
@@ -111,10 +111,10 @@ define amdgpu_ps i7 @s_fshr_i7(i7 inreg %lhs, i7 inreg %rhs, i7 inreg %amt) {
; GFX10-NEXT: v_mul_hi_u32 v0, s2, v0
; GFX10-NEXT: v_mul_lo_u32 v0, v0, 7
; GFX10-NEXT: v_sub_nc_u32_e32 v0, s2, v0
-; GFX10-NEXT: v_add_nc_u32_e32 v1, -7, v0
+; GFX10-NEXT: v_subrev_nc_u32_e32 v1, 7, v0
; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 7, v0
; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
-; GFX10-NEXT: v_add_nc_u32_e32 v1, -7, v0
+; GFX10-NEXT: v_subrev_nc_u32_e32 v1, 7, v0
; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 7, v0
; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
; GFX10-NEXT: v_sub_nc_u16 v1, 6, v0
@@ -147,11 +147,11 @@ define amdgpu_ps i7 @s_fshr_i7(i7 inreg %lhs, i7 inreg %rhs, i7 inreg %amt) {
; GFX11-NEXT: v_mul_lo_u32 v0, v0, 7
; GFX11-NEXT: v_sub_nc_u32_e32 v0, s2, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-NEXT: v_add_nc_u32_e32 v1, -7, v0
+; GFX11-NEXT: v_subrev_nc_u32_e32 v1, 7, v0
; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 7, v0
; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-NEXT: v_add_nc_u32_e32 v1, -7, v0
+; GFX11-NEXT: v_subrev_nc_u32_e32 v1, 7, v0
; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 7, v0
; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
@@ -186,10 +186,10 @@ define i7 @v_fshr_i7(i7 %lhs, i7 %rhs, i7 %amt) {
; GFX6-NEXT: v_mul_hi_u32 v3, v2, v3
; GFX6-NEXT: v_mul_lo_u32 v3, v3, 7
; GFX6-NEXT: v_sub_i32_e32 v2, vcc, v2, v3
-; GFX6-NEXT: v_add_i32_e32 v3, vcc, -7, v2
+; GFX6-NEXT: v_subrev_i32_e32 v3, vcc, 7, v2
; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 7, v2
; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
-; GFX6-NEXT: v_add_i32_e32 v3, vcc, -7, v2
+; GFX6-NEXT: v_subrev_i32_e32 v3, vcc, 7, v2
; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 7, v2
; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
; GFX6-NEXT: v_sub_i32_e32 v3, vcc, 6, v2
@@ -216,10 +216,10 @@ define i7 @v_fshr_i7(i7 %lhs, i7 %rhs, i7 %amt) {
; GFX8-NEXT: v_mul_hi_u32 v3, v2, v3
; GFX8-NEXT: v_mul_lo_u32 v3, v3, 7
; GFX8-NEXT: v_sub_u32_e32 v2, vcc, v2, v3
-; GFX8-NEXT: v_add_u32_e32 v3, vcc, -7, v2
+; GFX8-NEXT: v_subrev_u32_e32 v3, vcc, 7, v2
; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 7, v2
; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
-; GFX8-NEXT: v_add_u32_e32 v3, vcc, -7, v2
+; GFX8-NEXT: v_subrev_u32_e32 v3, vcc, 7, v2
; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 7, v2
; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
; GFX8-NEXT: v_sub_u16_e32 v3, 6, v2
@@ -246,10 +246,10 @@ define i7 @v_fshr_i7(i7 %lhs, i7 %rhs, i7 %amt) {
; GFX9-NEXT: v_mul_hi_u32 v3, v2, v3
; GFX9-NEXT: v_mul_lo_u32 v3, v3, 7
; GFX9-NEXT: v_sub_u32_e32 v2, v2, v3
-; GFX9-NEXT: v_add_u32_e32 v3, -7, v2
+; GFX9-NEXT: v_subrev_u32_e32 v3, 7, v2
; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 7, v2
; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
-; GFX9-NEXT: v_add_u32_e32 v3, -7, v2
+; GFX9-NEXT: v_subrev_u32_e32 v3, 7, v2
; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 7, v2
; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
; GFX9-NEXT: v_sub_u16_e32 v3, 6, v2
@@ -276,10 +276,10 @@ define i7 @v_fshr_i7(i7 %lhs, i7 %rhs, i7 %amt) {
; GFX10-NEXT: v_mul_hi_u32 v3, v2, v3
; GFX10-NEXT: v_mul_lo_u32 v3, v3, 7
; GFX10-NEXT: v_sub_nc_u32_e32 v2, v2, v3
-; GFX10-NEXT: v_add_nc_u32_e32 v3, -7, v2
+; GFX10-NEXT: v_subrev_nc_u32_e32 v3, 7, v2
; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 7, v2
; GFX10-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc_lo
-; GFX10-NEXT: v_add_nc_u32_e32 v3, -7, v2
+; GFX10-NEXT: v_subrev_nc_u32_e32 v3, 7, v2
; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 7, v2
; GFX10-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc_lo
; GFX10-NEXT: v_sub_nc_u16 v3, 6, v2
@@ -312,11 +312,11 @@ define i7 @v_fshr_i7(i7 %lhs, i7 %rhs, i7 %amt) {
; GFX11-NEXT: v_mul_lo_u32 v3, v3, 7
; GFX11-NEXT: v_sub_nc_u32_e32 v2, v2, v3
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-NEXT: v_add_nc_u32_e32 v3, -7, v2
+; GFX11-NEXT: v_subrev_nc_u32_e32 v3, 7, v2
; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 7, v2
; GFX11-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc_lo
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-NEXT: v_add_nc_u32_e32 v3, -7, v2
+; GFX11-NEXT: v_subrev_nc_u32_e32 v3, 7, v2
; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 7, v2
; GFX11-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc_lo
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
@@ -1552,16 +1552,16 @@ define amdgpu_ps i24 @s_fshr_i24(i24 inreg %lhs, i24 inreg %rhs, i24 inreg %amt)
; GFX6-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0
; GFX6-NEXT: v_cvt_u32_f32_e32 v0, v0
; GFX6-NEXT: s_and_b32 s1, s1, 0xffffff
-; GFX6-NEXT: v_mul_lo_u32 v2, v0, v1
-; GFX6-NEXT: v_mul_hi_u32 v2, v0, v2
-; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v2
+; GFX6-NEXT: v_mul_lo_u32 v1, v0, v1
+; GFX6-NEXT: v_mul_hi_u32 v1, v0, v1
+; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v1
; GFX6-NEXT: v_mul_hi_u32 v0, s2, v0
; GFX6-NEXT: v_mul_lo_u32 v0, v0, 24
; GFX6-NEXT: v_sub_i32_e32 v0, vcc, s2, v0
-; GFX6-NEXT: v_add_i32_e32 v2, vcc, v0, v1
+; GFX6-NEXT: v_subrev_i32_e32 v1, vcc, 24, v0
; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v0
-; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
-; GFX6-NEXT: v_add_i32_e32 v1, vcc, v0, v1
+; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX6-NEXT: v_subrev_i32_e32 v1, vcc, 24, v0
; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v0
; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
; GFX6-NEXT: v_sub_i32_e32 v1, vcc, 23, v0
@@ -1583,16 +1583,16 @@ define amdgpu_ps i24 @s_fshr_i24(i24 inreg %lhs, i24 inreg %rhs, i24 inreg %amt)
; GFX8-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0
; GFX8-NEXT: v_cvt_u32_f32_e32 v0, v0
; GFX8-NEXT: s_and_b32 s1, s1, 0xffffff
-; GFX8-NEXT: v_mul_lo_u32 v2, v0, v1
-; GFX8-NEXT: v_mul_hi_u32 v2, v0, v2
-; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2
+; GFX8-NEXT: v_mul_lo_u32 v1, v0, v1
+; GFX8-NEXT: v_mul_hi_u32 v1, v0, v1
+; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v1
; GFX8-NEXT: v_mul_hi_u32 v0, s2, v0
; GFX8-NEXT: v_mul_lo_u32 v0, v0, 24
; GFX8-NEXT: v_sub_u32_e32 v0, vcc, s2, v0
-; GFX8-NEXT: v_add_u32_e32 v2, vcc, v0, v1
+; GFX8-NEXT: v_subrev_u32_e32 v1, vcc, 24, v0
; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v0
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
-; GFX8-NEXT: v_add_u32_e32 v1, vcc, v0, v1
+; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX8-NEXT: v_subrev_u32_e32 v1, vcc, 24, v0
; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v0
; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
; GFX8-NEXT: v_sub_u32_e32 v1, vcc, 23, v0
@@ -1620,10 +1620,10 @@ define amdgpu_ps i24 @s_fshr_i24(i24 inreg %lhs, i24 inreg %rhs, i24 inreg %amt)
; GFX9-NEXT: v_mul_hi_u32 v0, s2, v0
; GFX9-NEXT: v_mul_lo_u32 v0, v0, 24
; GFX9-NEXT: v_sub_u32_e32 v0, s2, v0
-; GFX9-NEXT: v_add_u32_e32 v1, 0xffffffe8, v0
+; GFX9-NEXT: v_subrev_u32_e32 v1, 24, v0
; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v0
; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
-; GFX9-NEXT: v_add_u32_e32 v1, 0xffffffe8, v0
+; GFX9-NEXT: v_subrev_u32_e32 v1, 24, v0
; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v0
; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
; GFX9-NEXT: v_sub_u32_e32 v1, 23, v0
@@ -1649,10 +1649,10 @@ define amdgpu_ps i24 @s_fshr_i24(i24 inreg %lhs, i24 inreg %rhs, i24 inreg %amt)
; GFX10-NEXT: v_mul_hi_u32 v0, s2, v0
; GFX10-NEXT: v_mul_lo_u32 v0, v0, 24
; GFX10-NEXT: v_sub_nc_u32_e32 v0, s2, v0
-; GFX10-NEXT: v_add_nc_u32_e32 v1, 0xffffffe8, v0
+; GFX10-NEXT: v_subrev_nc_u32_e32 v1, 24, v0
; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v0
; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
-; GFX10-NEXT: v_add_nc_u32_e32 v1, 0xffffffe8, v0
+; GFX10-NEXT: v_subrev_nc_u32_e32 v1, 24, v0
; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v0
; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
; GFX10-NEXT: v_sub_nc_u32_e32 v1, 23, v0
@@ -1684,11 +1684,11 @@ define amdgpu_ps i24 @s_fshr_i24(i24 inreg %lhs, i24 inreg %rhs, i24 inreg %amt)
; GFX11-NEXT: v_mul_lo_u32 v0, v0, 24
; GFX11-NEXT: v_sub_nc_u32_e32 v0, s2, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-NEXT: v_add_nc_u32_e32 v1, 0xffffffe8, v0
+; GFX11-NEXT: v_subrev_nc_u32_e32 v1, 24, v0
; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v0
; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-NEXT: v_add_nc_u32_e32 v1, 0xffffffe8, v0
+; GFX11-NEXT: v_subrev_nc_u32_e32 v1, 24, v0
; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v0
; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
@@ -1717,16 +1717,16 @@ define i24 @v_fshr_i24(i24 %lhs, i24 %rhs, i24 %amt) {
; GFX6-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3
; GFX6-NEXT: v_cvt_u32_f32_e32 v3, v3
; GFX6-NEXT: v_and_b32_e32 v1, 0xffffff, v1
-; GFX6-NEXT: v_mul_lo_u32 v5, v3, v4
-; GFX6-NEXT: v_mul_hi_u32 v5, v3, v5
-; GFX6-NEXT: v_add_i32_e32 v3, vcc, v3, v5
+; GFX6-NEXT: v_mul_lo_u32 v4, v3, v4
+; GFX6-NEXT: v_mul_hi_u32 v4, v3, v4
+; GFX6-NEXT: v_add_i32_e32 v3, vcc, v3, v4
; GFX6-NEXT: v_mul_hi_u32 v3, v2, v3
; GFX6-NEXT: v_mul_lo_u32 v3, v3, 24
; GFX6-NEXT: v_sub_i32_e32 v2, vcc, v2, v3
-; GFX6-NEXT: v_add_i32_e32 v3, vcc, v2, v4
+; GFX6-NEXT: v_subrev_i32_e32 v3, vcc, 24, v2
; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v2
; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
-; GFX6-NEXT: v_add_i32_e32 v3, vcc, 0xffffffe8, v2
+; GFX6-NEXT: v_subrev_i32_e32 v3, vcc, 24, v2
; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v2
; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
; GFX6-NEXT: v_sub_i32_e32 v3, vcc, 23, v2
@@ -1748,16 +1748,16 @@ define i24 @v_fshr_i24(i24 %lhs, i24 %rhs, i24 %amt) {
; GFX8-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3
; GFX8-NEXT: v_cvt_u32_f32_e32 v3, v3
; GFX8-NEXT: v_and_b32_e32 v1, 0xffffff, v1
-; GFX8-NEXT: v_mul_lo_u32 v5, v3, v4
-; GFX8-NEXT: v_mul_hi_u32 v5, v3, v5
-; GFX8-NEXT: v_add_u32_e32 v3, vcc, v3, v5
+; GFX8-NEXT: v_mul_lo_u32 v4, v3, v4
+; GFX8-NEXT: v_mul_hi_u32 v4, v3, v4
+; GFX8-NEXT: v_add_u32_e32 v3, vcc, v3, v4
; GFX8-NEXT: v_mul_hi_u32 v3, v2, v3
; GFX8-NEXT: v_mul_lo_u32 v3, v3, 24
; GFX8-NEXT: v_sub_u32_e32 v2, vcc, v2, v3
-; GFX8-NEXT: v_add_u32_e32 v3, vcc, v2, v4
+; GFX8-NEXT: v_subrev_u32_e32 v3, vcc, 24, v2
; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v2
; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
-; GFX8-NEXT: v_add_u32_e32 v3, vcc, 0xffffffe8, v2
+; GFX8-NEXT: v_subrev_u32_e32 v3, vcc, 24, v2
; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v2
; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
; GFX8-NEXT: v_sub_u32_e32 v3, vcc, 23, v2
@@ -1785,10 +1785,10 @@ define i24 @v_fshr_i24(i24 %lhs, i24 %rhs, i24 %amt) {
; GFX9-NEXT: v_mul_hi_u32 v3, v2, v3
; GFX9-NEXT: v_mul_lo_u32 v3, v3, 24
; GFX9-NEXT: v_sub_u32_e32 v2, v2, v3
-; GFX9-NEXT: v_add_u32_e32 v3, 0xffffffe8, v2
+; GFX9-NEXT: v_subrev_u32_e32 v3, 24, v2
; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v2
; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
-; GFX9-NEXT: v_add_u32_e32 v3, 0xffffffe8, v2
+; GFX9-NEXT: v_subrev_u32_e32 v3, 24, v2
; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v2
; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
; GFX9-NEXT: v_sub_u32_e32 v3, 23, v2
@@ -1814,10 +1814,10 @@ define i24 @v_fshr_i24(i24 %lhs, i24 %rhs, i24 %amt) {
; GFX10-NEXT: v_mul_hi_u32 v3, v2, v3
; GFX10-NEXT: v_mul_lo_u32 v3, v3, 24
; GFX10-NEXT: v_sub_nc_u32_e32 v2, v2, v3
-; GFX10-NEXT: v_add_nc_u32_e32 v3, 0xffffffe8, v2
+; GFX10-NEXT: v_subrev_nc_u32_e32 v3, 24, v2
; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v2
; GFX10-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc_lo
-; GFX10-NEXT: v_add_nc_u32_e32 v3, 0xffffffe8, v2
+; GFX10-NEXT: v_subrev_nc_u32_e32 v3, 24, v2
; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v2
; GFX10-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc_lo
; GFX10-NEXT: v_sub_nc_u32_e32 v3, 23, v2
@@ -1849,11 +1849,11 @@ define i24 @v_fshr_i24(i24 %lhs, i24 %rhs, i24 %amt) {
; GFX11-NEXT: v_mul_lo_u32 v3, v3, 24
; GFX11-NEXT: v_sub_nc_u32_e32 v2, v2, v3
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-NEXT: v_add_nc_u32_e32 v3, 0xffffffe8, v2
+; GFX11-NEXT: v_subrev_nc_u32_e32 v3, 24, v2
; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v2
; GFX11-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc_lo
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-NEXT: v_add_nc_u32_e32 v3, 0xffffffe8, v2
+; GFX11-NEXT: v_subrev_nc_u32_e32 v3, 24, v2
; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v2
; GFX11-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc_lo
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
@@ -1888,7 +1888,7 @@ define amdgpu_ps i48 @s_fshr_v2i24(i48 inreg %lhs.arg, i48 inreg %rhs.arg, i48 i
; GFX6-NEXT: s_lshr_b32 s1, s2, 16
; GFX6-NEXT: s_lshr_b32 s7, s3, 8
; GFX6-NEXT: s_bfe_u32 s10, s2, 0x80008
-; GFX6-NEXT: v_mul_lo_u32 v4, v2, v3
+; GFX6-NEXT: v_mul_lo_u32 v3, v2, v3
; GFX6-NEXT: s_or_b32 s8, s8, s9
; GFX6-NEXT: s_and_b32 s9, s2, 0xff
; GFX6-NEXT: s_lshl_b32 s10, s10, 8
@@ -1908,7 +1908,7 @@ define amdgpu_ps i48 @s_fshr_v2i24(i48 inreg %lhs.arg, i48 inreg %rhs.arg, i48 i
; GFX6-NEXT: v_or_b32_e32 v1, s2, v1
; GFX6-NEXT: s_lshr_b32 s2, s4, 16
; GFX6-NEXT: s_bfe_u32 s9, s4, 0x80008
-; GFX6-NEXT: v_mul_hi_u32 v4, v2, v4
+; GFX6-NEXT: v_mul_hi_u32 v3, v2, v3
; GFX6-NEXT: s_and_b32 s7, s4, 0xff
; GFX6-NEXT: s_lshl_b32 s9, s9, 8
; GFX6-NEXT: s_and_b32 s2, s2, 0xff
@@ -1917,62 +1917,62 @@ define amdgpu_ps i48 @s_fshr_v2i24(i48 inreg %lhs.arg, i48 inreg %rhs.arg, i48 i
; GFX6-NEXT: s_and_b32 s7, 0xffff, s7
; GFX6-NEXT: s_lshl_b32 s2, s2, 16
; GFX6-NEXT: s_or_b32 s2, s7, s2
-; GFX6-NEXT: v_add_i32_e32 v2, vcc, v2, v4
-; GFX6-NEXT: v_mul_hi_u32 v4, s2, v2
+; GFX6-NEXT: v_add_i32_e32 v2, vcc, v2, v3
+; GFX6-NEXT: v_mul_hi_u32 v3, s2, v2
; GFX6-NEXT: s_lshr_b32 s3, s5, 8
; GFX6-NEXT: s_and_b32 s5, s5, 0xff
-; GFX6-NEXT: v_mov_b32_e32 v5, s4
+; GFX6-NEXT: v_mov_b32_e32 v4, s4
; GFX6-NEXT: s_and_b32 s3, s3, 0xff
-; GFX6-NEXT: v_alignbit_b32 v5, s5, v5, 24
+; GFX6-NEXT: v_alignbit_b32 v4, s5, v4, 24
; GFX6-NEXT: s_and_b32 s3, 0xffff, s3
-; GFX6-NEXT: v_and_b32_e32 v5, 0xffff, v5
-; GFX6-NEXT: v_mul_lo_u32 v4, v4, 24
+; GFX6-NEXT: v_and_b32_e32 v4, 0xffff, v4
+; GFX6-NEXT: v_mul_lo_u32 v3, v3, 24
; GFX6-NEXT: s_lshl_b32 s3, s3, 16
-; GFX6-NEXT: v_or_b32_e32 v5, s3, v5
-; GFX6-NEXT: v_mul_hi_u32 v2, v5, v2
-; GFX6-NEXT: v_sub_i32_e32 v4, vcc, s2, v4
-; GFX6-NEXT: v_add_i32_e32 v6, vcc, v4, v3
-; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v4
+; GFX6-NEXT: v_or_b32_e32 v4, s3, v4
+; GFX6-NEXT: v_mul_hi_u32 v2, v4, v2
+; GFX6-NEXT: v_sub_i32_e32 v3, vcc, s2, v3
+; GFX6-NEXT: v_subrev_i32_e32 v5, vcc, 24, v3
+; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v3
; GFX6-NEXT: v_mul_lo_u32 v2, v2, 24
-; GFX6-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc
-; GFX6-NEXT: v_add_i32_e32 v6, vcc, v4, v3
-; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v4
-; GFX6-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc
-; GFX6-NEXT: v_sub_i32_e32 v2, vcc, v5, v2
-; GFX6-NEXT: v_sub_i32_e32 v6, vcc, 23, v4
-; GFX6-NEXT: v_add_i32_e32 v5, vcc, v2, v3
+; GFX6-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc
+; GFX6-NEXT: v_subrev_i32_e32 v5, vcc, 24, v3
+; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v3
+; GFX6-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc
+; GFX6-NEXT: v_sub_i32_e32 v2, vcc, v4, v2
+; GFX6-NEXT: v_sub_i32_e32 v5, vcc, 23, v3
+; GFX6-NEXT: v_subrev_i32_e32 v4, vcc, 24, v2
; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v2
; GFX6-NEXT: s_and_b32 s6, s6, 0xff
-; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc
+; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
; GFX6-NEXT: s_and_b32 s8, 0xffff, s8
; GFX6-NEXT: s_and_b32 s6, 0xffff, s6
-; GFX6-NEXT: v_add_i32_e32 v3, vcc, v2, v3
+; GFX6-NEXT: v_subrev_i32_e32 v4, vcc, 24, v2
; GFX6-NEXT: s_lshl_b32 s2, s6, 17
; GFX6-NEXT: s_lshl_b32 s3, s8, 1
; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v2
; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX6-NEXT: s_and_b32 s0, 0xffff, s0
; GFX6-NEXT: s_or_b32 s2, s2, s3
-; GFX6-NEXT: v_and_b32_e32 v6, 0xffffff, v6
-; GFX6-NEXT: v_and_b32_e32 v4, 0xffffff, v4
-; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
-; GFX6-NEXT: v_lshl_b32_e32 v6, s2, v6
-; GFX6-NEXT: v_lshr_b32_e32 v4, s1, v4
-; GFX6-NEXT: v_sub_i32_e32 v3, vcc, 23, v2
+; GFX6-NEXT: v_and_b32_e32 v5, 0xffffff, v5
+; GFX6-NEXT: v_and_b32_e32 v3, 0xffffff, v3
+; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
+; GFX6-NEXT: v_lshl_b32_e32 v5, s2, v5
+; GFX6-NEXT: v_lshr_b32_e32 v3, s1, v3
+; GFX6-NEXT: v_sub_i32_e32 v4, vcc, 23, v2
; GFX6-NEXT: s_lshl_b32 s0, s0, 17
; GFX6-NEXT: v_lshlrev_b32_e32 v0, 1, v0
-; GFX6-NEXT: v_or_b32_e32 v4, v6, v4
+; GFX6-NEXT: v_or_b32_e32 v3, v5, v3
; GFX6-NEXT: v_or_b32_e32 v0, s0, v0
-; GFX6-NEXT: v_and_b32_e32 v3, 0xffffff, v3
+; GFX6-NEXT: v_and_b32_e32 v4, 0xffffff, v4
; GFX6-NEXT: v_and_b32_e32 v2, 0xffffff, v2
-; GFX6-NEXT: v_lshlrev_b32_e32 v0, v3, v0
+; GFX6-NEXT: v_lshlrev_b32_e32 v0, v4, v0
; GFX6-NEXT: v_lshrrev_b32_e32 v1, v2, v1
-; GFX6-NEXT: v_bfe_u32 v2, v4, 8, 8
+; GFX6-NEXT: v_bfe_u32 v2, v3, 8, 8
; GFX6-NEXT: v_or_b32_e32 v0, v0, v1
-; GFX6-NEXT: v_and_b32_e32 v1, 0xff, v4
+; GFX6-NEXT: v_and_b32_e32 v1, 0xff, v3
; GFX6-NEXT: v_lshlrev_b32_e32 v2, 8, v2
; GFX6-NEXT: v_or_b32_e32 v1, v1, v2
-; GFX6-NEXT: v_bfe_u32 v2, v4, 16, 8
+; GFX6-NEXT: v_bfe_u32 v2, v3, 16, 8
; GFX6-NEXT: v_lshlrev_b32_e32 v2, 16, v2
; GFX6-NEXT: v_or_b32_e32 v1, v1, v2
; GFX6-NEXT: v_and_b32_e32 v2, 0xff, v0
@@ -2024,7 +2024,7 @@ define amdgpu_ps i48 @s_fshr_v2i24(i48 inreg %lhs.arg, i48 inreg %rhs.arg, i48 i
; GFX8-NEXT: v_not_b32_e32 v1, 23
; GFX8-NEXT: s_or_b32 s3, s10, s3
; GFX8-NEXT: s_and_b32 s8, 0xffff, s8
-; GFX8-NEXT: v_mul_lo_u32 v2, v0, v1
+; GFX8-NEXT: v_mul_lo_u32 v1, v0, v1
; GFX8-NEXT: s_and_b32 s3, 0xffff, s3
; GFX8-NEXT: s_lshl_b32 s8, s8, 16
; GFX8-NEXT: s_or_b32 s3, s3, s8
@@ -2034,75 +2034,75 @@ define amdgpu_ps i48 @s_fshr_v2i24(i48 inreg %lhs.arg, i48 inreg %rhs.arg, i48 i
; GFX8-NEXT: s_lshr_b32 s10, s4, 24
; GFX8-NEXT: s_and_b32 s4, s4, 0xff
; GFX8-NEXT: s_lshl_b32 s8, s8, 8
-; GFX8-NEXT: v_mul_hi_u32 v2, v0, v2
+; GFX8-NEXT: v_mul_hi_u32 v1, v0, v1
; GFX8-NEXT: s_or_b32 s4, s4, s8
; GFX8-NEXT: s_and_b32 s8, s9, 0xff
; GFX8-NEXT: s_and_b32 s8, 0xffff, s8
; GFX8-NEXT: s_and_b32 s4, 0xffff, s4
; GFX8-NEXT: s_lshl_b32 s8, s8, 16
; GFX8-NEXT: s_or_b32 s4, s4, s8
-; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2
-; GFX8-NEXT: v_mul_hi_u32 v2, s4, v0
+; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v1
+; GFX8-NEXT: v_mul_hi_u32 v1, s4, v0
; GFX8-NEXT: s_lshr_b32 s11, s5, 8
; GFX8-NEXT: s_and_b32 s5, s5, 0xff
; GFX8-NEXT: s_lshl_b32 s5, s5, 8
-; GFX8-NEXT: v_mul_lo_u32 v2, v2, 24
+; GFX8-NEXT: v_mul_lo_u32 v1, v1, 24
; GFX8-NEXT: s_and_b32 s8, s11, 0xff
; GFX8-NEXT: s_or_b32 s5, s10, s5
; GFX8-NEXT: s_and_b32 s8, 0xffff, s8
; GFX8-NEXT: s_and_b32 s5, 0xffff, s5
; GFX8-NEXT: s_lshl_b32 s8, s8, 16
; GFX8-NEXT: s_or_b32 s5, s5, s8
-; GFX8-NEXT: v_sub_u32_e32 v2, vcc, s4, v2
-; GFX8-NEXT: v_add_u32_e32 v3, vcc, v2, v1
+; GFX8-NEXT: v_sub_u32_e32 v1, vcc, s4, v1
+; GFX8-NEXT: v_subrev_u32_e32 v2, vcc, 24, v1
; GFX8-NEXT: v_mul_hi_u32 v0, s5, v0
-; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v2
-; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
-; GFX8-NEXT: v_add_u32_e32 v3, vcc, v2, v1
-; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v2
+; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v1
+; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
+; GFX8-NEXT: v_subrev_u32_e32 v2, vcc, 24, v1
+; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v1
; GFX8-NEXT: v_mul_lo_u32 v0, v0, 24
; GFX8-NEXT: s_and_b32 s0, 0xffff, s0
; GFX8-NEXT: s_and_b32 s6, 0xffff, s6
-; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
-; GFX8-NEXT: v_sub_u32_e32 v3, vcc, 23, v2
+; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
+; GFX8-NEXT: v_sub_u32_e32 v2, vcc, 23, v1
; GFX8-NEXT: s_lshl_b32 s4, s6, 17
; GFX8-NEXT: s_lshl_b32 s0, s0, 1
; GFX8-NEXT: s_or_b32 s0, s4, s0
-; GFX8-NEXT: v_and_b32_e32 v3, 0xffffff, v3
; GFX8-NEXT: v_and_b32_e32 v2, 0xffffff, v2
-; GFX8-NEXT: v_lshlrev_b32_e64 v3, v3, s0
-; GFX8-NEXT: v_lshrrev_b32_e64 v2, v2, s2
+; GFX8-NEXT: v_and_b32_e32 v1, 0xffffff, v1
+; GFX8-NEXT: v_lshlrev_b32_e64 v2, v2, s0
+; GFX8-NEXT: v_lshrrev_b32_e64 v1, v1, s2
; GFX8-NEXT: v_sub_u32_e32 v0, vcc, s5, v0
-; GFX8-NEXT: v_or_b32_e32 v2, v3, v2
-; GFX8-NEXT: v_add_u32_e32 v3, vcc, v0, v1
+; GFX8-NEXT: v_or_b32_e32 v1, v2, v1
+; GFX8-NEXT: v_subrev_u32_e32 v2, vcc, 24, v0
; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v0
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
-; GFX8-NEXT: v_add_u32_e32 v1, vcc, v0, v1
+; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX8-NEXT: v_subrev_u32_e32 v2, vcc, 24, v0
; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v0
; GFX8-NEXT: s_and_b32 s1, 0xffff, s1
; GFX8-NEXT: s_and_b32 s7, 0xffff, s7
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
-; GFX8-NEXT: v_sub_u32_e32 v1, vcc, 23, v0
+; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX8-NEXT: v_sub_u32_e32 v2, vcc, 23, v0
; GFX8-NEXT: s_lshl_b32 s0, s7, 17
; GFX8-NEXT: s_lshl_b32 s1, s1, 1
; GFX8-NEXT: s_or_b32 s0, s0, s1
-; GFX8-NEXT: v_and_b32_e32 v1, 0xffffff, v1
+; GFX8-NEXT: v_and_b32_e32 v2, 0xffffff, v2
; GFX8-NEXT: v_and_b32_e32 v0, 0xffffff, v0
-; GFX8-NEXT: v_lshlrev_b32_e64 v1, v1, s0
+; GFX8-NEXT: v_lshlrev_b32_e64 v2, v2, s0
; GFX8-NEXT: v_lshrrev_b32_e64 v0, v0, s3
-; GFX8-NEXT: v_or_b32_e32 v0, v1, v0
-; GFX8-NEXT: v_mov_b32_e32 v1, 8
-; GFX8-NEXT: v_lshlrev_b32_sdwa v3, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
+; GFX8-NEXT: v_or_b32_e32 v0, v2, v0
+; GFX8-NEXT: v_mov_b32_e32 v2, 8
+; GFX8-NEXT: v_lshlrev_b32_sdwa v3, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
; GFX8-NEXT: v_mov_b32_e32 v4, 16
-; GFX8-NEXT: v_or_b32_sdwa v3, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
-; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
-; GFX8-NEXT: v_or_b32_e32 v2, v3, v2
+; GFX8-NEXT: v_or_b32_sdwa v3, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
+; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
+; GFX8-NEXT: v_or_b32_e32 v1, v3, v1
; GFX8-NEXT: v_and_b32_e32 v3, 0xff, v0
; GFX8-NEXT: v_lshlrev_b32_e32 v3, 24, v3
-; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
-; GFX8-NEXT: v_or_b32_e32 v2, v2, v3
-; GFX8-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD
-; GFX8-NEXT: v_readfirstlane_b32 s0, v2
+; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
+; GFX8-NEXT: v_or_b32_e32 v1, v1, v3
+; GFX8-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD
+; GFX8-NEXT: v_readfirstlane_b32 s0, v1
; GFX8-NEXT: v_readfirstlane_b32 s1, v0
; GFX8-NEXT: ; return to shader part epilog
;
@@ -2175,11 +2175,11 @@ define amdgpu_ps i48 @s_fshr_v2i24(i48 inreg %lhs.arg, i48 inreg %rhs.arg, i48 i
; GFX9-NEXT: s_or_b32 s5, s5, s8
; GFX9-NEXT: v_mul_hi_u32 v0, s5, v0
; GFX9-NEXT: v_sub_u32_e32 v1, s4, v1
-; GFX9-NEXT: v_add_u32_e32 v2, 0xffffffe8, v1
+; GFX9-NEXT: v_subrev_u32_e32 v2, 24, v1
; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v1
; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
; GFX9-NEXT: v_mul_lo_u32 v0, v0, 24
-; GFX9-NEXT: v_add_u32_e32 v2, 0xffffffe8, v1
+; GFX9-NEXT: v_subrev_u32_e32 v2, 24, v1
; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v1
; GFX9-NEXT: s_and_b32 s0, 0xffff, s0
; GFX9-NEXT: s_and_b32 s6, 0xffff, s6
@@ -2193,10 +2193,10 @@ define amdgpu_ps i48 @s_fshr_v2i24(i48 inreg %lhs.arg, i48 inreg %rhs.arg, i48 i
; GFX9-NEXT: v_lshrrev_b32_e64 v1, v1, s2
; GFX9-NEXT: v_sub_u32_e32 v0, s5, v0
; GFX9-NEXT: v_lshl_or_b32 v1, s0, v2, v1
-; GFX9-NEXT: v_add_u32_e32 v2, 0xffffffe8, v0
+; GFX9-NEXT: v_subrev_u32_e32 v2, 24, v0
; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v0
; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
-; GFX9-NEXT: v_add_u32_e32 v2, 0xffffffe8, v0
+; GFX9-NEXT: v_subrev_u32_e32 v2, 24, v0
; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v0
; GFX9-NEXT: s_and_b32 s1, 0xffff, s1
; GFX9-NEXT: s_and_b32 s7, 0xffff, s7
@@ -2294,23 +2294,23 @@ define amdgpu_ps i48 @s_fshr_v2i24(i48 inreg %lhs.arg, i48 inreg %rhs.arg, i48 i
; GFX10-NEXT: s_and_b32 s4, s11, 0xff
; GFX10-NEXT: s_lshl_b32 s0, s0, 1
; GFX10-NEXT: s_and_b32 s4, 0xffff, s4
-; GFX10-NEXT: v_add_nc_u32_e32 v2, 0xffffffe8, v1
+; GFX10-NEXT: v_subrev_nc_u32_e32 v2, 24, v1
; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v1
; GFX10-NEXT: s_lshl_b32 s4, s4, 16
; GFX10-NEXT: s_lshl_b32 s1, s1, 1
; GFX10-NEXT: s_or_b32 s2, s2, s4
; GFX10-NEXT: s_and_b32 s4, s13, 0xff
; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc_lo
-; GFX10-NEXT: v_add_nc_u32_e32 v2, 0xffffffe8, v0
+; GFX10-NEXT: v_subrev_nc_u32_e32 v2, 24, v0
; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v0
; GFX10-NEXT: s_and_b32 s4, 0xffff, s4
-; GFX10-NEXT: v_add_nc_u32_e32 v3, 0xffffffe8, v1
+; GFX10-NEXT: v_subrev_nc_u32_e32 v3, 24, v1
; GFX10-NEXT: s_lshl_b32 s4, s4, 16
; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo
; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v1
; GFX10-NEXT: s_or_b32 s3, s3, s4
; GFX10-NEXT: s_lshl_b32 s4, s7, 17
-; GFX10-NEXT: v_add_nc_u32_e32 v2, 0xffffffe8, v0
+; GFX10-NEXT: v_subrev_nc_u32_e32 v2, 24, v0
; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc_lo
; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v0
; GFX10-NEXT: s_or_b32 s0, s4, s0
@@ -2393,67 +2393,69 @@ define amdgpu_ps i48 @s_fshr_v2i24(i48 inreg %lhs.arg, i48 inreg %rhs.arg, i48 i
; GFX11-NEXT: s_and_b32 s0, s0, 0xff
; GFX11-NEXT: v_mul_hi_u32 v1, s4, v0
; GFX11-NEXT: v_mul_hi_u32 v0, s5, v0
-; GFX11-NEXT: s_lshr_b32 s13, s3, 8
-; GFX11-NEXT: s_and_b32 s3, s3, 0xff
; GFX11-NEXT: s_lshl_b32 s6, s6, 8
; GFX11-NEXT: s_and_b32 s7, s7, 0xff
-; GFX11-NEXT: s_lshl_b32 s3, s3, 8
-; GFX11-NEXT: s_and_b32 s13, s13, 0xff
-; GFX11-NEXT: v_mul_lo_u32 v1, v1, 24
-; GFX11-NEXT: v_mul_lo_u32 v0, v0, 24
; GFX11-NEXT: s_or_b32 s0, s0, s6
; GFX11-NEXT: s_and_b32 s7, 0xffff, s7
-; GFX11-NEXT: s_or_b32 s3, s12, s3
; GFX11-NEXT: s_and_b32 s0, 0xffff, s0
-; GFX11-NEXT: s_and_b32 s3, 0xffff, s3
+; GFX11-NEXT: s_lshr_b32 s13, s3, 8
+; GFX11-NEXT: v_mul_lo_u32 v1, v1, 24
+; GFX11-NEXT: v_mul_lo_u32 v0, v0, 24
; GFX11-NEXT: s_lshl_b32 s0, s0, 1
+; GFX11-NEXT: s_and_b32 s3, s3, 0xff
+; GFX11-NEXT: s_and_b32 s13, s13, 0xff
+; GFX11-NEXT: s_lshl_b32 s3, s3, 8
+; GFX11-NEXT: s_and_b32 s1, 0xffff, s1
+; GFX11-NEXT: s_or_b32 s3, s12, s3
; GFX11-NEXT: v_sub_nc_u32_e32 v1, s4, v1
; GFX11-NEXT: v_sub_nc_u32_e32 v0, s5, v0
; GFX11-NEXT: s_lshl_b32 s4, s10, 8
; GFX11-NEXT: s_and_b32 s10, 0xffff, s13
; GFX11-NEXT: s_or_b32 s2, s2, s4
-; GFX11-NEXT: v_add_nc_u32_e32 v2, 0xffffffe8, v1
+; GFX11-NEXT: v_subrev_nc_u32_e32 v2, 24, v1
; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v1
; GFX11-NEXT: s_lshl_b32 s4, s9, 16
; GFX11-NEXT: s_and_b32 s2, 0xffff, s2
-; GFX11-NEXT: s_lshl_b32 s5, s10, 16
+; GFX11-NEXT: s_and_b32 s3, 0xffff, s3
; GFX11-NEXT: s_or_b32 s2, s2, s4
-; GFX11-NEXT: v_dual_cndmask_b32 v1, v1, v2 :: v_dual_add_nc_u32 v2, 0xffffffe8, v0
+; GFX11-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc_lo
+; GFX11-NEXT: v_subrev_nc_u32_e32 v2, 24, v0
; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v0
; GFX11-NEXT: s_lshl_b32 s4, s7, 17
-; GFX11-NEXT: s_and_b32 s1, 0xffff, s1
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_4) | instid1(VALU_DEP_2)
-; GFX11-NEXT: v_add_nc_u32_e32 v3, 0xffffffe8, v1
+; GFX11-NEXT: s_lshl_b32 s5, s10, 16
+; GFX11-NEXT: v_subrev_nc_u32_e32 v3, 24, v1
; GFX11-NEXT: s_or_b32 s0, s4, s0
; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo
; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v1
; GFX11-NEXT: s_lshl_b32 s1, s1, 1
-; GFX11-NEXT: v_dual_cndmask_b32 v1, v1, v3 :: v_dual_add_nc_u32 v2, 0xffffffe8, v0
-; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v0
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc_lo
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_3)
; GFX11-NEXT: v_sub_nc_u32_e32 v3, 23, v1
-; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v2 :: v_dual_and_b32 v1, 0xffffff, v1
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NEXT: v_and_b32_e32 v2, 0xffffff, v3
+; GFX11-NEXT: v_and_b32_e32 v1, 0xffffff, v1
+; GFX11-NEXT: v_subrev_nc_u32_e32 v2, 24, v0
+; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v0
; GFX11-NEXT: v_lshrrev_b32_e64 v1, v1, s2
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_3)
-; GFX11-NEXT: v_sub_nc_u32_e32 v3, 23, v0
-; GFX11-NEXT: v_and_b32_e32 v0, 0xffffff, v0
; GFX11-NEXT: s_or_b32 s2, s3, s5
-; GFX11-NEXT: v_lshl_or_b32 v1, s0, v2, v1
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo
; GFX11-NEXT: v_and_b32_e32 v2, 0xffffff, v3
-; GFX11-NEXT: v_lshrrev_b32_e64 v0, v0, s2
+; GFX11-NEXT: v_sub_nc_u32_e32 v3, 23, v0
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
+; GFX11-NEXT: v_lshl_or_b32 v1, s0, v2, v1
; GFX11-NEXT: s_lshl_b32 s0, s8, 17
-; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX11-NEXT: s_or_b32 s0, s0, s1
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NEXT: v_and_b32_e32 v2, 0xffffff, v3
; GFX11-NEXT: v_bfe_u32 v3, v1, 16, 8
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX11-NEXT: v_lshlrev_b32_e32 v3, 16, v3
+; GFX11-NEXT: v_and_b32_e32 v0, 0xffffff, v0
+; GFX11-NEXT: v_lshrrev_b32_e64 v0, v0, s2
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX11-NEXT: v_lshl_or_b32 v0, s0, v2, v0
; GFX11-NEXT: v_bfe_u32 v2, v1, 8, 8
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NEXT: v_lshlrev_b32_e32 v3, 16, v3
; GFX11-NEXT: v_and_b32_e32 v4, 0xff, v0
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-NEXT: v_lshlrev_b32_e32 v2, 8, v2
; GFX11-NEXT: v_lshlrev_b32_e32 v4, 24, v4
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
@@ -2489,32 +2491,32 @@ define <2 x i24> @v_fshr_v2i24(<2 x i24> %lhs, <2 x i24> %rhs, <2 x i24> %amt) {
; GFX6-NEXT: v_lshlrev_b32_e32 v0, 1, v0
; GFX6-NEXT: v_and_b32_e32 v2, 0xffffff, v2
; GFX6-NEXT: v_lshlrev_b32_e32 v1, 1, v1
-; GFX6-NEXT: v_mul_lo_u32 v8, v6, v7
+; GFX6-NEXT: v_mul_lo_u32 v7, v6, v7
; GFX6-NEXT: v_and_b32_e32 v3, 0xffffff, v3
-; GFX6-NEXT: v_mul_hi_u32 v8, v6, v8
-; GFX6-NEXT: v_add_i32_e32 v6, vcc, v6, v8
-; GFX6-NEXT: v_mul_hi_u32 v8, v4, v6
+; GFX6-NEXT: v_mul_hi_u32 v7, v6, v7
+; GFX6-NEXT: v_add_i32_e32 v6, vcc, v6, v7
+; GFX6-NEXT: v_mul_hi_u32 v7, v4, v6
; GFX6-NEXT: v_mul_hi_u32 v6, v5, v6
-; GFX6-NEXT: v_mul_lo_u32 v8, v8, 24
+; GFX6-NEXT: v_mul_lo_u32 v7, v7, 24
; GFX6-NEXT: v_mul_lo_u32 v6, v6, 24
-; GFX6-NEXT: v_sub_i32_e32 v4, vcc, v4, v8
-; GFX6-NEXT: v_add_i32_e32 v8, vcc, v4, v7
+; GFX6-NEXT: v_sub_i32_e32 v4, vcc, v4, v7
+; GFX6-NEXT: v_subrev_i32_e32 v7, vcc, 24, v4
; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v4
-; GFX6-NEXT: v_cndmask_b32_e32 v4, v4, v8, vcc
-; GFX6-NEXT: v_add_i32_e32 v8, vcc, v4, v7
+; GFX6-NEXT: v_cndmask_b32_e32 v4, v4, v7, vcc
+; GFX6-NEXT: v_subrev_i32_e32 v7, vcc, 24, v4
; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v4
-; GFX6-NEXT: v_cndmask_b32_e32 v4, v4, v8, vcc
-; GFX6-NEXT: v_sub_i32_e32 v8, vcc, 23, v4
-; GFX6-NEXT: v_and_b32_e32 v8, 0xffffff, v8
+; GFX6-NEXT: v_cndmask_b32_e32 v4, v4, v7, vcc
+; GFX6-NEXT: v_sub_i32_e32 v7, vcc, 23, v4
+; GFX6-NEXT: v_and_b32_e32 v7, 0xffffff, v7
; GFX6-NEXT: v_and_b32_e32 v4, 0xffffff, v4
-; GFX6-NEXT: v_lshlrev_b32_e32 v0, v8, v0
+; GFX6-NEXT: v_lshlrev_b32_e32 v0, v7, v0
; GFX6-NEXT: v_lshrrev_b32_e32 v2, v4, v2
; GFX6-NEXT: v_or_b32_e32 v0, v0, v2
; GFX6-NEXT: v_sub_i32_e32 v2, vcc, v5, v6
-; GFX6-NEXT: v_add_i32_e32 v4, vcc, v2, v7
+; GFX6-NEXT: v_subrev_i32_e32 v4, vcc, 24, v2
; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v2
; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
-; GFX6-NEXT: v_add_i32_e32 v4, vcc, 0xffffffe8, v2
+; GFX6-NEXT: v_subrev_i32_e32 v4, vcc, 24, v2
; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v2
; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
; GFX6-NEXT: v_sub_i32_e32 v4, vcc, 23, v2
@@ -2538,32 +2540,32 @@ define <2 x i24> @v_fshr_v2i24(<2 x i24> %lhs, <2 x i24> %rhs, <2 x i24> %amt) {
; GFX8-NEXT: v_lshlrev_b32_e32 v0, 1, v0
; GFX8-NEXT: v_and_b32_e32 v2, 0xffffff, v2
; GFX8-NEXT: v_lshlrev_b32_e32 v1, 1, v1
-; GFX8-NEXT: v_mul_lo_u32 v8, v6, v7
+; GFX8-NEXT: v_mul_lo_u32 v7, v6, v7
; GFX8-NEXT: v_and_b32_e32 v3, 0xffffff, v3
-; GFX8-NEXT: v_mul_hi_u32 v8, v6, v8
-; GFX8-NEXT: v_add_u32_e32 v6, vcc, v6, v8
-; GFX8-NEXT: v_mul_hi_u32 v8, v4, v6
+; GFX8-NEXT: v_mul_hi_u32 v7, v6, v7
+; GFX8-NEXT: v_add_u32_e32 v6, vcc, v6, v7
+; GFX8-NEXT: v_mul_hi_u32 v7, v4, v6
; GFX8-NEXT: v_mul_hi_u32 v6, v5, v6
-; GFX8-NEXT: v_mul_lo_u32 v8, v8, 24
+; GFX8-NEXT: v_mul_lo_u32 v7, v7, 24
; GFX8-NEXT: v_mul_lo_u32 v6, v6, 24
-; GFX8-NEXT: v_sub_u32_e32 v4, vcc, v4, v8
-; GFX8-NEXT: v_add_u32_e32 v8, vcc, v4, v7
+; GFX8-NEXT: v_sub_u32_e32 v4, vcc, v4, v7
+; GFX8-NEXT: v_subrev_u32_e32 v7, vcc, 24, v4
; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v4
-; GFX8-NEXT: v_cndmask_b32_e32 v4, v4, v8, vcc
-; GFX8-NEXT: v_add_u32_e32 v8, vcc, v4, v7
+; GFX8-NEXT: v_cndmask_b32_e32 v4, v4, v7, vcc
+; GFX8-NEXT: v_subrev_u32_e32 v7, vcc, 24, v4
; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v4
-; GFX8-NEXT: v_cndmask_b32_e32 v4, v4, v8, vcc
-; GFX8-NEXT: v_sub_u32_e32 v8, vcc, 23, v4
-; GFX8-NEXT: v_and_b32_e32 v8, 0xffffff, v8
+; GFX8-NEXT: v_cndmask_b32_e32 v4, v4, v7, vcc
+; GFX8-NEXT: v_sub_u32_e32 v7, vcc, 23, v4
+; GFX8-NEXT: v_and_b32_e32 v7, 0xffffff, v7
; GFX8-NEXT: v_and_b32_e32 v4, 0xffffff, v4
-; GFX8-NEXT: v_lshlrev_b32_e32 v0, v8, v0
+; GFX8-NEXT: v_lshlrev_b32_e32 v0, v7, v0
; GFX8-NEXT: v_lshrrev_b32_e32 v2, v4, v2
; GFX8-NEXT: v_or_b32_e32 v0, v0, v2
; GFX8-NEXT: v_sub_u32_e32 v2, vcc, v5, v6
-; GFX8-NEXT: v_add_u32_e32 v4, vcc, v2, v7
+; GFX8-NEXT: v_subrev_u32_e32 v4, vcc, 24, v2
; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v2
; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
-; GFX8-NEXT: v_add_u32_e32 v4, vcc, 0xffffffe8, v2
+; GFX8-NEXT: v_subrev_u32_e32 v4, vcc, 24, v2
; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v2
; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
; GFX8-NEXT: v_sub_u32_e32 v4, vcc, 23, v2
@@ -2597,10 +2599,10 @@ define <2 x i24> @v_fshr_v2i24(<2 x i24> %lhs, <2 x i24> %rhs, <2 x i24> %amt) {
; GFX9-NEXT: v_mul_lo_u32 v6, v6, 24
; GFX9-NEXT: v_sub_u32_e32 v4, v4, v7
; GFX9-NEXT: v_sub_u32_e32 v5, v5, v6
-; GFX9-NEXT: v_add_u32_e32 v6, 0xffffffe8, v4
+; GFX9-NEXT: v_subrev_u32_e32 v6, 24, v4
; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v4
; GFX9-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc
-; GFX9-NEXT: v_add_u32_e32 v6, 0xffffffe8, v4
+; GFX9-NEXT: v_subrev_u32_e32 v6, 24, v4
; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v4
; GFX9-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc
; GFX9-NEXT: v_sub_u32_e32 v6, 23, v4
@@ -2608,10 +2610,10 @@ define <2 x i24> @v_fshr_v2i24(<2 x i24> %lhs, <2 x i24> %rhs, <2 x i24> %amt) {
; GFX9-NEXT: v_and_b32_e32 v6, 0xffffff, v6
; GFX9-NEXT: v_lshrrev_b32_e32 v2, v4, v2
; GFX9-NEXT: v_lshl_or_b32 v0, v0, v6, v2
-; GFX9-NEXT: v_add_u32_e32 v2, 0xffffffe8, v5
+; GFX9-NEXT: v_subrev_u32_e32 v2, 24, v5
; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v5
; GFX9-NEXT: v_cndmask_b32_e32 v2, v5, v2, vcc
-; GFX9-NEXT: v_add_u32_e32 v4, 0xffffffe8, v2
+; GFX9-NEXT: v_subrev_u32_e32 v4, 24, v2
; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v2
; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
; GFX9-NEXT: v_sub_u32_e32 v4, 23, v2
@@ -2643,15 +2645,15 @@ define <2 x i24> @v_fshr_v2i24(<2 x i24> %lhs, <2 x i24> %rhs, <2 x i24> %amt) {
; GFX10-NEXT: v_mul_lo_u32 v6, v6, 24
; GFX10-NEXT: v_sub_nc_u32_e32 v4, v4, v7
; GFX10-NEXT: v_sub_nc_u32_e32 v5, v5, v6
-; GFX10-NEXT: v_add_nc_u32_e32 v6, 0xffffffe8, v4
+; GFX10-NEXT: v_subrev_nc_u32_e32 v6, 24, v4
; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v4
-; GFX10-NEXT: v_add_nc_u32_e32 v7, 0xffffffe8, v5
+; GFX10-NEXT: v_subrev_nc_u32_e32 v7, 24, v5
; GFX10-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc_lo
; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v5
-; GFX10-NEXT: v_add_nc_u32_e32 v6, 0xffffffe8, v4
+; GFX10-NEXT: v_subrev_nc_u32_e32 v6, 24, v4
; GFX10-NEXT: v_cndmask_b32_e32 v5, v5, v7, vcc_lo
; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v4
-; GFX10-NEXT: v_add_nc_u32_e32 v7, 0xffffffe8, v5
+; GFX10-NEXT: v_subrev_nc_u32_e32 v7, 24, v5
; GFX10-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc_lo
; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v5
; GFX10-NEXT: v_sub_nc_u32_e32 v6, 23, v4
@@ -2673,11 +2675,12 @@ define <2 x i24> @v_fshr_v2i24(<2 x i24> %lhs, <2 x i24> %rhs, <2 x i24> %amt) {
; GFX11-NEXT: v_cvt_f32_ubyte0_e32 v6, 24
; GFX11-NEXT: v_and_b32_e32 v4, 0xffffff, v4
; GFX11-NEXT: v_and_b32_e32 v2, 0xffffff, v2
-; GFX11-NEXT: v_and_b32_e32 v3, 0xffffff, v3
; GFX11-NEXT: v_lshlrev_b32_e32 v0, 1, v0
+; GFX11-NEXT: v_lshlrev_b32_e32 v1, 1, v1
; GFX11-NEXT: v_rcp_iflag_f32_e32 v6, v6
+; GFX11-NEXT: v_and_b32_e32 v3, 0xffffff, v3
; GFX11-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-NEXT: v_dual_mul_f32 v6, 0x4f7ffffe, v6 :: v_dual_lshlrev_b32 v1, 1, v1
+; GFX11-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_cvt_u32_f32_e32 v6, v6
; GFX11-NEXT: v_mul_lo_u32 v7, 0xffffffe8, v6
@@ -2694,33 +2697,34 @@ define <2 x i24> @v_fshr_v2i24(<2 x i24> %lhs, <2 x i24> %rhs, <2 x i24> %amt) {
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_mul_hi_u32 v6, v5, v6
; GFX11-NEXT: v_mul_lo_u32 v6, v6, 24
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX11-NEXT: v_sub_nc_u32_e32 v5, v5, v6
-; GFX11-NEXT: v_add_nc_u32_e32 v6, 0xffffffe8, v4
+; GFX11-NEXT: v_subrev_nc_u32_e32 v6, 24, v4
+; GFX11-NEXT: v_subrev_nc_u32_e32 v7, 24, v5
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX11-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc_lo
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v5
-; GFX11-NEXT: v_add_nc_u32_e32 v6, 0xffffffe8, v4
-; GFX11-NEXT: v_add_nc_u32_e32 v7, 0xffffffe8, v5
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-NEXT: v_subrev_nc_u32_e32 v6, 24, v4
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX11-NEXT: v_cndmask_b32_e32 v5, v5, v7, vcc_lo
; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v4
-; GFX11-NEXT: v_dual_cndmask_b32 v4, v4, v6 :: v_dual_add_nc_u32 v7, 0xffffffe8, v5
+; GFX11-NEXT: v_subrev_nc_u32_e32 v7, 24, v5
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc_lo
; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v5
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX11-NEXT: v_sub_nc_u32_e32 v6, 23, v4
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-NEXT: v_dual_cndmask_b32 v5, v5, v7 :: v_dual_and_b32 v4, 0xffffff, v4
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-NEXT: v_and_b32_e32 v6, 0xffffff, v6
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_4)
; GFX11-NEXT: v_sub_nc_u32_e32 v7, 23, v5
; GFX11-NEXT: v_and_b32_e32 v5, 0xffffff, v5
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX11-NEXT: v_lshrrev_b32_e32 v2, v4, v2
-; GFX11-NEXT: v_and_b32_e32 v4, 0xffffff, v7
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11-NEXT: v_and_b32_e32 v4, 0xffffff, v7
; GFX11-NEXT: v_lshrrev_b32_e32 v3, v5, v3
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-NEXT: v_lshl_or_b32 v0, v0, v6, v2
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX11-NEXT: v_lshl_or_b32 v1, v1, v4, v3
; GFX11-NEXT: s_setpc_b64 s[30:31]
%result = call <2 x i24> @llvm.fshr.v2i24(<2 x i24> %lhs, <2 x i24> %rhs, <2 x i24> %amt)
@@ -6083,14 +6087,13 @@ define i128 @v_fshr_i128(i128 %lhs, i128 %rhs, i128 %amt) {
; GFX6-NEXT: v_not_b32_e32 v0, v8
; GFX6-NEXT: v_and_b32_e32 v15, 0x7f, v0
; GFX6-NEXT: v_sub_i32_e32 v0, vcc, 64, v15
-; GFX6-NEXT: v_not_b32_e32 v16, 63
; GFX6-NEXT: v_lshr_b64 v[0:1], v[9:10], v0
; GFX6-NEXT: v_lshl_b64 v[11:12], v[2:3], v15
-; GFX6-NEXT: v_add_i32_e32 v17, vcc, v15, v16
+; GFX6-NEXT: v_subrev_i32_e32 v16, vcc, 64, v15
; GFX6-NEXT: v_lshl_b64 v[13:14], v[9:10], v15
; GFX6-NEXT: v_or_b32_e32 v11, v0, v11
; GFX6-NEXT: v_or_b32_e32 v12, v1, v12
-; GFX6-NEXT: v_lshl_b64 v[0:1], v[9:10], v17
+; GFX6-NEXT: v_lshl_b64 v[0:1], v[9:10], v16
; GFX6-NEXT: v_cmp_gt_u32_e32 vcc, 64, v15
; GFX6-NEXT: v_cndmask_b32_e32 v10, 0, v13, vcc
; GFX6-NEXT: v_cndmask_b32_e32 v13, 0, v14, vcc
@@ -6103,7 +6106,7 @@ define i128 @v_fshr_i128(i128 %lhs, i128 %rhs, i128 %amt) {
; GFX6-NEXT: v_sub_i32_e32 v2, vcc, 64, v14
; GFX6-NEXT: v_lshr_b64 v[0:1], v[4:5], v14
; GFX6-NEXT: v_lshl_b64 v[2:3], v[6:7], v2
-; GFX6-NEXT: v_add_i32_e32 v15, vcc, v14, v16
+; GFX6-NEXT: v_subrev_i32_e32 v15, vcc, 64, v14
; GFX6-NEXT: v_or_b32_e32 v2, v0, v2
; GFX6-NEXT: v_or_b32_e32 v3, v1, v3
; GFX6-NEXT: v_lshr_b64 v[0:1], v[6:7], v15
@@ -6132,14 +6135,13 @@ define i128 @v_fshr_i128(i128 %lhs, i128 %rhs, i128 %amt) {
; GFX8-NEXT: v_not_b32_e32 v0, v8
; GFX8-NEXT: v_and_b32_e32 v15, 0x7f, v0
; GFX8-NEXT: v_sub_u32_e32 v0, vcc, 64, v15
-; GFX8-NEXT: v_not_b32_e32 v16, 63
; GFX8-NEXT: v_lshrrev_b64 v[0:1], v0, v[9:10]
; GFX8-NEXT: v_lshlrev_b64 v[11:12], v15, v[2:3]
-; GFX8-NEXT: v_add_u32_e32 v17, vcc, v15, v16
+; GFX8-NEXT: v_subrev_u32_e32 v16, vcc, 64, v15
; GFX8-NEXT: v_lshlrev_b64 v[13:14], v15, v[9:10]
; GFX8-NEXT: v_or_b32_e32 v11, v0, v11
; GFX8-NEXT: v_or_b32_e32 v12, v1, v12
-; GFX8-NEXT: v_lshlrev_b64 v[0:1], v17, v[9:10]
+; GFX8-NEXT: v_lshlrev_b64 v[0:1], v16, v[9:10]
; GFX8-NEXT: v_cmp_gt_u32_e32 vcc, 64, v15
; GFX8-NEXT: v_cndmask_b32_e32 v10, 0, v13, vcc
; GFX8-NEXT: v_cndmask_b32_e32 v13, 0, v14, vcc
@@ -6152,7 +6154,7 @@ define i128 @v_fshr_i128(i128 %lhs, i128 %rhs, i128 %amt) {
; GFX8-NEXT: v_sub_u32_e32 v2, vcc, 64, v14
; GFX8-NEXT: v_lshrrev_b64 v[0:1], v14, v[4:5]
; GFX8-NEXT: v_lshlrev_b64 v[2:3], v2, v[6:7]
-; GFX8-NEXT: v_add_u32_e32 v15, vcc, v14, v16
+; GFX8-NEXT: v_subrev_u32_e32 v15, vcc, 64, v14
; GFX8-NEXT: v_or_b32_e32 v2, v0, v2
; GFX8-NEXT: v_or_b32_e32 v3, v1, v3
; GFX8-NEXT: v_lshrrev_b64 v[0:1], v15, v[6:7]
@@ -6183,7 +6185,7 @@ define i128 @v_fshr_i128(i128 %lhs, i128 %rhs, i128 %amt) {
; GFX9-NEXT: v_sub_u32_e32 v0, 64, v15
; GFX9-NEXT: v_lshrrev_b64 v[0:1], v0, v[9:10]
; GFX9-NEXT: v_lshlrev_b64 v[11:12], v15, v[2:3]
-; GFX9-NEXT: v_add_u32_e32 v16, 0xffffffc0, v15
+; GFX9-NEXT: v_subrev_u32_e32 v16, 64, v15
; GFX9-NEXT: v_lshlrev_b64 v[13:14], v15, v[9:10]
; GFX9-NEXT: v_or_b32_e32 v11, v0, v11
; GFX9-NEXT: v_or_b32_e32 v12, v1, v12
@@ -6200,7 +6202,7 @@ define i128 @v_fshr_i128(i128 %lhs, i128 %rhs, i128 %amt) {
; GFX9-NEXT: v_cndmask_b32_e32 v12, v1, v3, vcc
; GFX9-NEXT: v_lshrrev_b64 v[0:1], v14, v[4:5]
; GFX9-NEXT: v_lshlrev_b64 v[2:3], v2, v[6:7]
-; GFX9-NEXT: v_add_u32_e32 v15, 0xffffffc0, v14
+; GFX9-NEXT: v_subrev_u32_e32 v15, 64, v14
; GFX9-NEXT: v_or_b32_e32 v2, v0, v2
; GFX9-NEXT: v_or_b32_e32 v3, v1, v3
; GFX9-NEXT: v_lshrrev_b64 v[0:1], v15, v[6:7]
@@ -6230,9 +6232,9 @@ define i128 @v_fshr_i128(i128 %lhs, i128 %rhs, i128 %amt) {
; GFX10-NEXT: v_and_b32_e32 v18, 0x7f, v9
; GFX10-NEXT: v_or_b32_e32 v2, v2, v10
; GFX10-NEXT: v_sub_nc_u32_e32 v16, 64, v19
-; GFX10-NEXT: v_add_nc_u32_e32 v21, 0xffffffc0, v19
+; GFX10-NEXT: v_subrev_nc_u32_e32 v21, 64, v19
; GFX10-NEXT: v_sub_nc_u32_e32 v10, 64, v18
-; GFX10-NEXT: v_add_nc_u32_e32 v20, 0xffffffc0, v18
+; GFX10-NEXT: v_subrev_nc_u32_e32 v20, 64, v18
; GFX10-NEXT: v_lshlrev_b64 v[8:9], v18, v[2:3]
; GFX10-NEXT: v_lshrrev_b64 v[12:13], v19, v[4:5]
; GFX10-NEXT: v_lshlrev_b64 v[16:17], v16, v[6:7]
@@ -6271,48 +6273,47 @@ define i128 @v_fshr_i128(i128 %lhs, i128 %rhs, i128 %amt) {
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_not_b32_e32 v9, v8
-; GFX11-NEXT: v_lshlrev_b64 v[2:3], 1, v[2:3]
; GFX11-NEXT: v_lshrrev_b32_e32 v10, 31, v1
; GFX11-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1]
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11-NEXT: v_lshlrev_b64 v[2:3], 1, v[2:3]
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-NEXT: v_and_b32_e32 v18, 0x7f, v9
; GFX11-NEXT: v_or_b32_e32 v2, v2, v10
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_4)
-; GFX11-NEXT: v_sub_nc_u32_e32 v10, 64, v18
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX11-NEXT: v_lshlrev_b64 v[14:15], v18, v[0:1]
; GFX11-NEXT: v_cmp_gt_u32_e32 vcc_lo, 64, v18
; GFX11-NEXT: v_and_b32_e32 v19, 0x7f, v8
-; GFX11-NEXT: v_add_nc_u32_e32 v20, 0xffffffc0, v18
+; GFX11-NEXT: v_sub_nc_u32_e32 v10, 64, v18
+; GFX11-NEXT: v_subrev_nc_u32_e32 v20, 64, v18
; GFX11-NEXT: v_lshlrev_b64 v[8:9], v18, v[2:3]
-; GFX11-NEXT: v_lshrrev_b64 v[10:11], v10, v[0:1]
; GFX11-NEXT: v_cndmask_b32_e32 v14, 0, v14, vcc_lo
; GFX11-NEXT: v_sub_nc_u32_e32 v16, 64, v19
-; GFX11-NEXT: v_lshlrev_b64 v[0:1], v20, v[0:1]
+; GFX11-NEXT: v_lshrrev_b64 v[10:11], v10, v[0:1]
+; GFX11-NEXT: v_subrev_nc_u32_e32 v21, 64, v19
; GFX11-NEXT: v_lshrrev_b64 v[12:13], v19, v[4:5]
+; GFX11-NEXT: v_lshlrev_b64 v[0:1], v20, v[0:1]
+; GFX11-NEXT: v_lshlrev_b64 v[16:17], v16, v[6:7]
; GFX11-NEXT: v_cmp_gt_u32_e64 s0, 64, v19
; GFX11-NEXT: v_or_b32_e32 v10, v10, v8
-; GFX11-NEXT: v_add_nc_u32_e32 v21, 0xffffffc0, v19
-; GFX11-NEXT: v_lshlrev_b64 v[16:17], v16, v[6:7]
; GFX11-NEXT: v_or_b32_e32 v11, v11, v9
-; GFX11-NEXT: v_cmp_eq_u32_e64 s1, 0, v19
-; GFX11-NEXT: v_cndmask_b32_e32 v10, v0, v10, vcc_lo
; GFX11-NEXT: v_lshrrev_b64 v[8:9], v21, v[6:7]
+; GFX11-NEXT: v_cmp_eq_u32_e64 s1, 0, v19
; GFX11-NEXT: v_or_b32_e32 v12, v12, v16
; GFX11-NEXT: v_or_b32_e32 v13, v13, v17
-; GFX11-NEXT: v_cndmask_b32_e32 v11, v1, v11, vcc_lo
+; GFX11-NEXT: v_dual_cndmask_b32 v10, v0, v10 :: v_dual_cndmask_b32 v11, v1, v11
; GFX11-NEXT: v_lshrrev_b64 v[0:1], v19, v[6:7]
-; GFX11-NEXT: v_cmp_eq_u32_e64 s2, 0, v18
+; GFX11-NEXT: v_cndmask_b32_e32 v7, 0, v15, vcc_lo
; GFX11-NEXT: v_cndmask_b32_e64 v8, v8, v12, s0
+; GFX11-NEXT: v_cmp_eq_u32_e64 s2, 0, v18
; GFX11-NEXT: v_cndmask_b32_e64 v6, v9, v13, s0
-; GFX11-NEXT: v_cndmask_b32_e32 v7, 0, v15, vcc_lo
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4)
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11-NEXT: v_cndmask_b32_e64 v4, v8, v4, s1
; GFX11-NEXT: v_cndmask_b32_e64 v2, v10, v2, s2
; GFX11-NEXT: v_cndmask_b32_e64 v3, v11, v3, s2
-; GFX11-NEXT: v_cndmask_b32_e64 v4, v8, v4, s1
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_3) | instid1(VALU_DEP_4)
; GFX11-NEXT: v_cndmask_b32_e64 v5, v6, v5, s1
; GFX11-NEXT: v_cndmask_b32_e64 v6, 0, v0, s0
; GFX11-NEXT: v_cndmask_b32_e64 v8, 0, v1, s0
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX11-NEXT: v_or_b32_e32 v0, v14, v4
; GFX11-NEXT: v_or_b32_e32 v1, v7, v5
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
@@ -6334,47 +6335,46 @@ define amdgpu_ps <4 x float> @v_fshr_i128_ssv(i128 inreg %lhs, i128 inreg %rhs,
; GFX6-NEXT: v_and_b32_e32 v7, 0x7f, v1
; GFX6-NEXT: s_or_b64 s[0:1], s[2:3], s[0:1]
; GFX6-NEXT: v_sub_i32_e32 v1, vcc, 64, v7
-; GFX6-NEXT: v_not_b32_e32 v8, 63
; GFX6-NEXT: v_lshr_b64 v[1:2], s[8:9], v1
; GFX6-NEXT: v_lshl_b64 v[3:4], s[0:1], v7
-; GFX6-NEXT: v_add_i32_e32 v9, vcc, v7, v8
+; GFX6-NEXT: v_subrev_i32_e32 v8, vcc, 64, v7
; GFX6-NEXT: v_lshl_b64 v[5:6], s[8:9], v7
; GFX6-NEXT: v_or_b32_e32 v3, v1, v3
; GFX6-NEXT: v_or_b32_e32 v4, v2, v4
-; GFX6-NEXT: v_lshl_b64 v[1:2], s[8:9], v9
+; GFX6-NEXT: v_lshl_b64 v[1:2], s[8:9], v8
; GFX6-NEXT: v_cmp_gt_u32_e32 vcc, 64, v7
-; GFX6-NEXT: v_cndmask_b32_e32 v9, 0, v5, vcc
+; GFX6-NEXT: v_cndmask_b32_e32 v8, 0, v5, vcc
; GFX6-NEXT: v_cndmask_b32_e32 v6, 0, v6, vcc
; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
; GFX6-NEXT: v_mov_b32_e32 v3, s0
; GFX6-NEXT: v_mov_b32_e32 v4, s1
; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, 0, v7
-; GFX6-NEXT: v_and_b32_e32 v11, 0x7f, v0
+; GFX6-NEXT: v_and_b32_e32 v10, 0x7f, v0
; GFX6-NEXT: v_cndmask_b32_e32 v7, v1, v3, vcc
-; GFX6-NEXT: v_cndmask_b32_e32 v10, v2, v4, vcc
-; GFX6-NEXT: v_sub_i32_e32 v2, vcc, 64, v11
-; GFX6-NEXT: v_lshr_b64 v[0:1], s[4:5], v11
+; GFX6-NEXT: v_cndmask_b32_e32 v9, v2, v4, vcc
+; GFX6-NEXT: v_sub_i32_e32 v2, vcc, 64, v10
+; GFX6-NEXT: v_lshr_b64 v[0:1], s[4:5], v10
; GFX6-NEXT: v_lshl_b64 v[2:3], s[6:7], v2
-; GFX6-NEXT: v_add_i32_e32 v8, vcc, v11, v8
+; GFX6-NEXT: v_subrev_i32_e32 v11, vcc, 64, v10
; GFX6-NEXT: v_or_b32_e32 v2, v0, v2
; GFX6-NEXT: v_or_b32_e32 v3, v1, v3
-; GFX6-NEXT: v_lshr_b64 v[0:1], s[6:7], v8
-; GFX6-NEXT: v_lshr_b64 v[4:5], s[6:7], v11
-; GFX6-NEXT: v_cmp_gt_u32_e32 vcc, 64, v11
+; GFX6-NEXT: v_lshr_b64 v[0:1], s[6:7], v11
+; GFX6-NEXT: v_lshr_b64 v[4:5], s[6:7], v10
+; GFX6-NEXT: v_cmp_gt_u32_e32 vcc, 64, v10
; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
; GFX6-NEXT: v_mov_b32_e32 v2, s4
; GFX6-NEXT: v_mov_b32_e32 v3, s5
-; GFX6-NEXT: v_cmp_eq_u32_e64 s[0:1], 0, v11
+; GFX6-NEXT: v_cmp_eq_u32_e64 s[0:1], 0, v10
; GFX6-NEXT: v_cndmask_b32_e64 v0, v0, v2, s[0:1]
; GFX6-NEXT: v_cndmask_b32_e64 v1, v1, v3, s[0:1]
; GFX6-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc
; GFX6-NEXT: v_cndmask_b32_e32 v3, 0, v5, vcc
-; GFX6-NEXT: v_or_b32_e32 v0, v9, v0
+; GFX6-NEXT: v_or_b32_e32 v0, v8, v0
; GFX6-NEXT: v_or_b32_e32 v1, v6, v1
; GFX6-NEXT: v_or_b32_e32 v2, v7, v2
-; GFX6-NEXT: v_or_b32_e32 v3, v10, v3
+; GFX6-NEXT: v_or_b32_e32 v3, v9, v3
; GFX6-NEXT: ; return to shader part epilog
;
; GFX8-LABEL: v_fshr_i128_ssv:
@@ -6387,47 +6387,46 @@ define amdgpu_ps <4 x float> @v_fshr_i128_ssv(i128 inreg %lhs, i128 inreg %rhs,
; GFX8-NEXT: v_and_b32_e32 v7, 0x7f, v1
; GFX8-NEXT: s_or_b64 s[0:1], s[2:3], s[0:1]
; GFX8-NEXT: v_sub_u32_e32 v1, vcc, 64, v7
-; GFX8-NEXT: v_not_b32_e32 v8, 63
; GFX8-NEXT: v_lshrrev_b64 v[1:2], v1, s[8:9]
; GFX8-NEXT: v_lshlrev_b64 v[3:4], v7, s[0:1]
-; GFX8-NEXT: v_add_u32_e32 v9, vcc, v7, v8
+; GFX8-NEXT: v_subrev_u32_e32 v8, vcc, 64, v7
; GFX8-NEXT: v_lshlrev_b64 v[5:6], v7, s[8:9]
; GFX8-NEXT: v_or_b32_e32 v3, v1, v3
; GFX8-NEXT: v_or_b32_e32 v4, v2, v4
-; GFX8-NEXT: v_lshlrev_b64 v[1:2], v9, s[8:9]
+; GFX8-NEXT: v_lshlrev_b64 v[1:2], v8, s[8:9]
; GFX8-NEXT: v_cmp_gt_u32_e32 vcc, 64, v7
-; GFX8-NEXT: v_cndmask_b32_e32 v9, 0, v5, vcc
+; GFX8-NEXT: v_cndmask_b32_e32 v8, 0, v5, vcc
; GFX8-NEXT: v_cndmask_b32_e32 v6, 0, v6, vcc
; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
; GFX8-NEXT: v_mov_b32_e32 v3, s0
; GFX8-NEXT: v_mov_b32_e32 v4, s1
; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 0, v7
-; GFX8-NEXT: v_and_b32_e32 v11, 0x7f, v0
+; GFX8-NEXT: v_and_b32_e32 v10, 0x7f, v0
; GFX8-NEXT: v_cndmask_b32_e32 v7, v1, v3, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v10, v2, v4, vcc
-; GFX8-NEXT: v_sub_u32_e32 v2, vcc, 64, v11
-; GFX8-NEXT: v_lshrrev_b64 v[0:1], v11, s[4:5]
+; GFX8-NEXT: v_cndmask_b32_e32 v9, v2, v4, vcc
+; GFX8-NEXT: v_sub_u32_e32 v2, vcc, 64, v10
+; GFX8-NEXT: v_lshrrev_b64 v[0:1], v10, s[4:5]
; GFX8-NEXT: v_lshlrev_b64 v[2:3], v2, s[6:7]
-; GFX8-NEXT: v_add_u32_e32 v8, vcc, v11, v8
+; GFX8-NEXT: v_subrev_u32_e32 v11, vcc, 64, v10
; GFX8-NEXT: v_or_b32_e32 v2, v0, v2
; GFX8-NEXT: v_or_b32_e32 v3, v1, v3
-; GFX8-NEXT: v_lshrrev_b64 v[0:1], v8, s[6:7]
-; GFX8-NEXT: v_lshrrev_b64 v[4:5], v11, s[6:7]
-; GFX8-NEXT: v_cmp_gt_u32_e32 vcc, 64, v11
+; GFX8-NEXT: v_lshrrev_b64 v[0:1], v11, s[6:7]
+; GFX8-NEXT: v_lshrrev_b64 v[4:5], v10, s[6:7]
+; GFX8-NEXT: v_cmp_gt_u32_e32 vcc, 64, v10
; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
; GFX8-NEXT: v_mov_b32_e32 v2, s4
; GFX8-NEXT: v_mov_b32_e32 v3, s5
-; GFX8-NEXT: v_cmp_eq_u32_e64 s[0:1], 0, v11
+; GFX8-NEXT: v_cmp_eq_u32_e64 s[0:1], 0, v10
; GFX8-NEXT: v_cndmask_b32_e64 v0, v0, v2, s[0:1]
; GFX8-NEXT: v_cndmask_b32_e64 v1, v1, v3, s[0:1]
; GFX8-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc
; GFX8-NEXT: v_cndmask_b32_e32 v3, 0, v5, vcc
-; GFX8-NEXT: v_or_b32_e32 v0, v9, v0
+; GFX8-NEXT: v_or_b32_e32 v0, v8, v0
; GFX8-NEXT: v_or_b32_e32 v1, v6, v1
; GFX8-NEXT: v_or_b32_e32 v2, v7, v2
-; GFX8-NEXT: v_or_b32_e32 v3, v10, v3
+; GFX8-NEXT: v_or_b32_e32 v3, v9, v3
; GFX8-NEXT: ; return to shader part epilog
;
; GFX9-LABEL: v_fshr_i128_ssv:
@@ -6442,7 +6441,7 @@ define amdgpu_ps <4 x float> @v_fshr_i128_ssv(i128 inreg %lhs, i128 inreg %rhs,
; GFX9-NEXT: v_sub_u32_e32 v1, 64, v7
; GFX9-NEXT: v_lshrrev_b64 v[1:2], v1, s[8:9]
; GFX9-NEXT: v_lshlrev_b64 v[3:4], v7, s[0:1]
-; GFX9-NEXT: v_add_u32_e32 v8, 0xffffffc0, v7
+; GFX9-NEXT: v_subrev_u32_e32 v8, 64, v7
; GFX9-NEXT: v_lshlrev_b64 v[5:6], v7, s[8:9]
; GFX9-NEXT: v_or_b32_e32 v3, v1, v3
; GFX9-NEXT: v_or_b32_e32 v4, v2, v4
@@ -6461,7 +6460,7 @@ define amdgpu_ps <4 x float> @v_fshr_i128_ssv(i128 inreg %lhs, i128 inreg %rhs,
; GFX9-NEXT: v_cndmask_b32_e32 v7, v1, v3, vcc
; GFX9-NEXT: v_lshrrev_b64 v[0:1], v10, s[4:5]
; GFX9-NEXT: v_lshlrev_b64 v[2:3], v2, s[6:7]
-; GFX9-NEXT: v_add_u32_e32 v11, 0xffffffc0, v10
+; GFX9-NEXT: v_subrev_u32_e32 v11, 64, v10
; GFX9-NEXT: v_or_b32_e32 v2, v0, v2
; GFX9-NEXT: v_or_b32_e32 v3, v1, v3
; GFX9-NEXT: v_lshrrev_b64 v[0:1], v11, s[6:7]
@@ -6493,10 +6492,10 @@ define amdgpu_ps <4 x float> @v_fshr_i128_ssv(i128 inreg %lhs, i128 inreg %rhs,
; GFX10-NEXT: v_sub_nc_u32_e32 v8, 64, v13
; GFX10-NEXT: s_lshl_b64 s[0:1], s[0:1], 1
; GFX10-NEXT: s_or_b64 s[8:9], s[2:3], s[8:9]
-; GFX10-NEXT: v_add_nc_u32_e32 v14, 0xffffffc0, v13
+; GFX10-NEXT: v_subrev_nc_u32_e32 v14, 64, v13
; GFX10-NEXT: v_sub_nc_u32_e32 v2, 64, v12
; GFX10-NEXT: v_lshlrev_b64 v[0:1], v12, s[8:9]
-; GFX10-NEXT: v_add_nc_u32_e32 v10, 0xffffffc0, v12
+; GFX10-NEXT: v_subrev_nc_u32_e32 v10, 64, v12
; GFX10-NEXT: v_lshrrev_b64 v[4:5], v13, s[4:5]
; GFX10-NEXT: v_lshlrev_b64 v[8:9], v8, s[6:7]
; GFX10-NEXT: v_lshrrev_b64 v[2:3], v2, s[0:1]
@@ -6545,11 +6544,11 @@ define amdgpu_ps <4 x float> @v_fshr_i128_ssv(i128 inreg %lhs, i128 inreg %rhs,
; GFX11-NEXT: v_and_b32_e32 v13, 0x7f, v0
; GFX11-NEXT: v_sub_nc_u32_e32 v2, 64, v12
; GFX11-NEXT: v_lshlrev_b64 v[0:1], v12, s[8:9]
-; GFX11-NEXT: v_add_nc_u32_e32 v10, 0xffffffc0, v12
+; GFX11-NEXT: v_subrev_nc_u32_e32 v10, 64, v12
; GFX11-NEXT: v_cndmask_b32_e32 v6, 0, v6, vcc_lo
; GFX11-NEXT: v_sub_nc_u32_e32 v8, 64, v13
; GFX11-NEXT: v_lshrrev_b64 v[2:3], v2, s[0:1]
-; GFX11-NEXT: v_add_nc_u32_e32 v14, 0xffffffc0, v13
+; GFX11-NEXT: v_subrev_nc_u32_e32 v14, 64, v13
; GFX11-NEXT: v_lshrrev_b64 v[4:5], v13, s[4:5]
; GFX11-NEXT: v_lshlrev_b64 v[10:11], v10, s[0:1]
; GFX11-NEXT: v_lshlrev_b64 v[8:9], v8, s[6:7]
@@ -7719,14 +7718,13 @@ define <2 x i128> @v_fshr_v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %a
; GFX6-NEXT: v_not_b32_e32 v0, v16
; GFX6-NEXT: v_and_b32_e32 v19, 0x7f, v0
; GFX6-NEXT: v_sub_i32_e32 v0, vcc, 64, v19
-; GFX6-NEXT: v_not_b32_e32 v25, 63
; GFX6-NEXT: v_lshr_b64 v[0:1], v[17:18], v0
; GFX6-NEXT: v_lshl_b64 v[21:22], v[2:3], v19
-; GFX6-NEXT: v_add_i32_e32 v26, vcc, v19, v25
+; GFX6-NEXT: v_subrev_i32_e32 v25, vcc, 64, v19
; GFX6-NEXT: v_lshl_b64 v[23:24], v[17:18], v19
; GFX6-NEXT: v_or_b32_e32 v21, v0, v21
; GFX6-NEXT: v_or_b32_e32 v22, v1, v22
-; GFX6-NEXT: v_lshl_b64 v[0:1], v[17:18], v26
+; GFX6-NEXT: v_lshl_b64 v[0:1], v[17:18], v25
; GFX6-NEXT: v_cmp_gt_u32_e32 vcc, 64, v19
; GFX6-NEXT: v_cndmask_b32_e32 v18, 0, v23, vcc
; GFX6-NEXT: v_cndmask_b32_e32 v23, 0, v24, vcc
@@ -7739,7 +7737,7 @@ define <2 x i128> @v_fshr_v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %a
; GFX6-NEXT: v_sub_i32_e32 v2, vcc, 64, v22
; GFX6-NEXT: v_lshr_b64 v[0:1], v[8:9], v22
; GFX6-NEXT: v_lshl_b64 v[2:3], v[10:11], v2
-; GFX6-NEXT: v_add_i32_e32 v24, vcc, v22, v25
+; GFX6-NEXT: v_subrev_i32_e32 v24, vcc, 64, v22
; GFX6-NEXT: v_or_b32_e32 v2, v0, v2
; GFX6-NEXT: v_or_b32_e32 v3, v1, v3
; GFX6-NEXT: v_lshr_b64 v[0:1], v[10:11], v24
@@ -7763,7 +7761,7 @@ define <2 x i128> @v_fshr_v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %a
; GFX6-NEXT: v_lshr_b64 v[4:5], v[8:9], v4
; GFX6-NEXT: v_lshl_b64 v[10:11], v[6:7], v18
; GFX6-NEXT: v_or_b32_e32 v2, v19, v2
-; GFX6-NEXT: v_add_i32_e32 v19, vcc, v18, v25
+; GFX6-NEXT: v_subrev_i32_e32 v19, vcc, 64, v18
; GFX6-NEXT: v_lshl_b64 v[16:17], v[8:9], v18
; GFX6-NEXT: v_or_b32_e32 v10, v4, v10
; GFX6-NEXT: v_or_b32_e32 v11, v5, v11
@@ -7780,7 +7778,7 @@ define <2 x i128> @v_fshr_v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %a
; GFX6-NEXT: v_sub_i32_e32 v6, vcc, 64, v18
; GFX6-NEXT: v_lshr_b64 v[4:5], v[12:13], v18
; GFX6-NEXT: v_lshl_b64 v[6:7], v[14:15], v6
-; GFX6-NEXT: v_add_i32_e32 v19, vcc, v18, v25
+; GFX6-NEXT: v_subrev_i32_e32 v19, vcc, 64, v18
; GFX6-NEXT: v_or_b32_e32 v6, v4, v6
; GFX6-NEXT: v_or_b32_e32 v7, v5, v7
; GFX6-NEXT: v_lshr_b64 v[4:5], v[14:15], v19
@@ -7811,14 +7809,13 @@ define <2 x i128> @v_fshr_v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %a
; GFX8-NEXT: v_not_b32_e32 v0, v16
; GFX8-NEXT: v_and_b32_e32 v19, 0x7f, v0
; GFX8-NEXT: v_sub_u32_e32 v0, vcc, 64, v19
-; GFX8-NEXT: v_not_b32_e32 v25, 63
; GFX8-NEXT: v_lshrrev_b64 v[0:1], v0, v[17:18]
; GFX8-NEXT: v_lshlrev_b64 v[21:22], v19, v[2:3]
-; GFX8-NEXT: v_add_u32_e32 v26, vcc, v19, v25
+; GFX8-NEXT: v_subrev_u32_e32 v25, vcc, 64, v19
; GFX8-NEXT: v_lshlrev_b64 v[23:24], v19, v[17:18]
; GFX8-NEXT: v_or_b32_e32 v21, v0, v21
; GFX8-NEXT: v_or_b32_e32 v22, v1, v22
-; GFX8-NEXT: v_lshlrev_b64 v[0:1], v26, v[17:18]
+; GFX8-NEXT: v_lshlrev_b64 v[0:1], v25, v[17:18]
; GFX8-NEXT: v_cmp_gt_u32_e32 vcc, 64, v19
; GFX8-NEXT: v_cndmask_b32_e32 v18, 0, v23, vcc
; GFX8-NEXT: v_cndmask_b32_e32 v23, 0, v24, vcc
@@ -7831,7 +7828,7 @@ define <2 x i128> @v_fshr_v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %a
; GFX8-NEXT: v_sub_u32_e32 v2, vcc, 64, v22
; GFX8-NEXT: v_lshrrev_b64 v[0:1], v22, v[8:9]
; GFX8-NEXT: v_lshlrev_b64 v[2:3], v2, v[10:11]
-; GFX8-NEXT: v_add_u32_e32 v24, vcc, v22, v25
+; GFX8-NEXT: v_subrev_u32_e32 v24, vcc, 64, v22
; GFX8-NEXT: v_or_b32_e32 v2, v0, v2
; GFX8-NEXT: v_or_b32_e32 v3, v1, v3
; GFX8-NEXT: v_lshrrev_b64 v[0:1], v24, v[10:11]
@@ -7855,7 +7852,7 @@ define <2 x i128> @v_fshr_v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %a
; GFX8-NEXT: v_lshrrev_b64 v[4:5], v4, v[8:9]
; GFX8-NEXT: v_lshlrev_b64 v[10:11], v18, v[6:7]
; GFX8-NEXT: v_or_b32_e32 v2, v19, v2
-; GFX8-NEXT: v_add_u32_e32 v19, vcc, v18, v25
+; GFX8-NEXT: v_subrev_u32_e32 v19, vcc, 64, v18
; GFX8-NEXT: v_lshlrev_b64 v[16:17], v18, v[8:9]
; GFX8-NEXT: v_or_b32_e32 v10, v4, v10
; GFX8-NEXT: v_or_b32_e32 v11, v5, v11
@@ -7872,7 +7869,7 @@ define <2 x i128> @v_fshr_v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %a
; GFX8-NEXT: v_sub_u32_e32 v6, vcc, 64, v18
; GFX8-NEXT: v_lshrrev_b64 v[4:5], v18, v[12:13]
; GFX8-NEXT: v_lshlrev_b64 v[6:7], v6, v[14:15]
-; GFX8-NEXT: v_add_u32_e32 v19, vcc, v18, v25
+; GFX8-NEXT: v_subrev_u32_e32 v19, vcc, 64, v18
; GFX8-NEXT: v_or_b32_e32 v6, v4, v6
; GFX8-NEXT: v_or_b32_e32 v7, v5, v7
; GFX8-NEXT: v_lshrrev_b64 v[4:5], v19, v[14:15]
@@ -7905,7 +7902,7 @@ define <2 x i128> @v_fshr_v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %a
; GFX9-NEXT: v_sub_u32_e32 v0, 64, v19
; GFX9-NEXT: v_lshrrev_b64 v[0:1], v0, v[17:18]
; GFX9-NEXT: v_lshlrev_b64 v[21:22], v19, v[2:3]
-; GFX9-NEXT: v_add_u32_e32 v25, 0xffffffc0, v19
+; GFX9-NEXT: v_subrev_u32_e32 v25, 64, v19
; GFX9-NEXT: v_lshlrev_b64 v[23:24], v19, v[17:18]
; GFX9-NEXT: v_or_b32_e32 v21, v0, v21
; GFX9-NEXT: v_or_b32_e32 v22, v1, v22
@@ -7922,7 +7919,7 @@ define <2 x i128> @v_fshr_v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %a
; GFX9-NEXT: v_cndmask_b32_e32 v21, v1, v3, vcc
; GFX9-NEXT: v_lshrrev_b64 v[0:1], v22, v[8:9]
; GFX9-NEXT: v_lshlrev_b64 v[2:3], v2, v[10:11]
-; GFX9-NEXT: v_add_u32_e32 v24, 0xffffffc0, v22
+; GFX9-NEXT: v_subrev_u32_e32 v24, 64, v22
; GFX9-NEXT: v_or_b32_e32 v2, v0, v2
; GFX9-NEXT: v_or_b32_e32 v3, v1, v3
; GFX9-NEXT: v_lshrrev_b64 v[0:1], v24, v[10:11]
@@ -7945,7 +7942,7 @@ define <2 x i128> @v_fshr_v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %a
; GFX9-NEXT: v_lshrrev_b64 v[4:5], v4, v[8:9]
; GFX9-NEXT: v_lshlrev_b64 v[10:11], v18, v[6:7]
; GFX9-NEXT: v_or_b32_e32 v2, v19, v2
-; GFX9-NEXT: v_add_u32_e32 v19, 0xffffffc0, v18
+; GFX9-NEXT: v_subrev_u32_e32 v19, 64, v18
; GFX9-NEXT: v_cndmask_b32_e32 v3, 0, v17, vcc
; GFX9-NEXT: v_lshlrev_b64 v[16:17], v18, v[8:9]
; GFX9-NEXT: v_or_b32_e32 v10, v4, v10
@@ -7963,7 +7960,7 @@ define <2 x i128> @v_fshr_v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %a
; GFX9-NEXT: v_cndmask_b32_e32 v11, v5, v7, vcc
; GFX9-NEXT: v_lshrrev_b64 v[4:5], v18, v[12:13]
; GFX9-NEXT: v_lshlrev_b64 v[6:7], v6, v[14:15]
-; GFX9-NEXT: v_add_u32_e32 v19, 0xffffffc0, v18
+; GFX9-NEXT: v_subrev_u32_e32 v19, 64, v18
; GFX9-NEXT: v_or_b32_e32 v6, v4, v6
; GFX9-NEXT: v_or_b32_e32 v7, v5, v7
; GFX9-NEXT: v_lshrrev_b64 v[4:5], v19, v[14:15]
@@ -7994,11 +7991,11 @@ define <2 x i128> @v_fshr_v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %a
; GFX10-NEXT: v_and_b32_e32 v25, 0x7f, v17
; GFX10-NEXT: v_lshrrev_b32_e32 v17, 31, v1
; GFX10-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1]
-; GFX10-NEXT: v_add_nc_u32_e32 v27, 0xffffffc0, v26
+; GFX10-NEXT: v_subrev_nc_u32_e32 v27, 64, v26
; GFX10-NEXT: v_cmp_gt_u32_e64 s4, 64, v26
; GFX10-NEXT: v_sub_nc_u32_e32 v18, 64, v25
; GFX10-NEXT: v_or_b32_e32 v2, v2, v17
-; GFX10-NEXT: v_add_nc_u32_e32 v19, 0xffffffc0, v25
+; GFX10-NEXT: v_subrev_nc_u32_e32 v19, 64, v25
; GFX10-NEXT: v_lshlrev_b64 v[23:24], v25, v[0:1]
; GFX10-NEXT: v_cmp_gt_u32_e32 vcc_lo, 64, v25
; GFX10-NEXT: v_lshrrev_b64 v[17:18], v18, v[0:1]
@@ -8038,12 +8035,12 @@ define <2 x i128> @v_fshr_v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %a
; GFX10-NEXT: v_lshrrev_b64 v[8:9], v9, v[4:5]
; GFX10-NEXT: v_lshlrev_b64 v[10:11], v25, v[6:7]
; GFX10-NEXT: v_sub_nc_u32_e32 v20, 64, v23
-; GFX10-NEXT: v_add_nc_u32_e32 v3, 0xffffffc0, v25
+; GFX10-NEXT: v_subrev_nc_u32_e32 v3, 64, v25
; GFX10-NEXT: v_or_b32_e32 v2, v18, v2
; GFX10-NEXT: v_lshlrev_b64 v[16:17], v25, v[4:5]
; GFX10-NEXT: v_lshrrev_b64 v[18:19], v23, v[12:13]
; GFX10-NEXT: v_or_b32_e32 v10, v8, v10
-; GFX10-NEXT: v_add_nc_u32_e32 v8, 0xffffffc0, v23
+; GFX10-NEXT: v_subrev_nc_u32_e32 v8, 64, v23
; GFX10-NEXT: v_lshlrev_b64 v[20:21], v20, v[14:15]
; GFX10-NEXT: v_cmp_gt_u32_e32 vcc_lo, 64, v25
; GFX10-NEXT: v_lshlrev_b64 v[3:4], v3, v[4:5]
@@ -8094,41 +8091,41 @@ define <2 x i128> @v_fshr_v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %a
; GFX11-NEXT: v_cndmask_b32_e32 v24, 0, v24, vcc_lo
; GFX11-NEXT: v_sub_nc_u32_e32 v18, 64, v25
; GFX11-NEXT: v_lshlrev_b64 v[21:22], v25, v[2:3]
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11-NEXT: v_subrev_nc_u32_e32 v19, 64, v25
+; GFX11-NEXT: v_subrev_nc_u32_e32 v27, 64, v26
; GFX11-NEXT: v_cmp_gt_u32_e64 s0, 64, v26
; GFX11-NEXT: v_lshrrev_b64 v[17:18], v18, v[0:1]
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_3)
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NEXT: v_lshlrev_b64 v[0:1], v19, v[0:1]
; GFX11-NEXT: v_or_b32_e32 v22, v18, v22
-; GFX11-NEXT: v_add_nc_u32_e32 v19, 0xffffffc0, v25
-; GFX11-NEXT: v_or_b32_e32 v21, v17, v21
; GFX11-NEXT: v_sub_nc_u32_e32 v18, 64, v26
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4)
+; GFX11-NEXT: v_or_b32_e32 v21, v17, v21
; GFX11-NEXT: v_lshrrev_b64 v[16:17], v26, v[8:9]
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NEXT: v_lshlrev_b64 v[0:1], v19, v[0:1]
+; GFX11-NEXT: v_cndmask_b32_e32 v22, v1, v22, vcc_lo
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX11-NEXT: v_lshlrev_b64 v[18:19], v18, v[10:11]
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_4)
-; GFX11-NEXT: v_dual_cndmask_b32 v22, v1, v22 :: v_dual_cndmask_b32 v21, v0, v21
+; GFX11-NEXT: v_cndmask_b32_e32 v21, v0, v21, vcc_lo
+; GFX11-NEXT: v_lshrrev_b64 v[0:1], v27, v[10:11]
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v25
-; GFX11-NEXT: v_add_nc_u32_e32 v27, 0xffffffc0, v26
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX11-NEXT: v_or_b32_e32 v16, v16, v18
; GFX11-NEXT: v_or_b32_e32 v17, v17, v19
-; GFX11-NEXT: v_cndmask_b32_e32 v22, v22, v3, vcc_lo
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT: v_lshrrev_b64 v[0:1], v27, v[10:11]
; GFX11-NEXT: v_cndmask_b32_e64 v0, v0, v16, s0
; GFX11-NEXT: v_not_b32_e32 v16, v20
; GFX11-NEXT: v_cndmask_b32_e32 v18, v21, v2, vcc_lo
-; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v26
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4)
; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, v17, s0
-; GFX11-NEXT: v_lshrrev_b64 v[2:3], v26, v[10:11]
+; GFX11-NEXT: v_cndmask_b32_e32 v22, v22, v3, vcc_lo
+; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v26
; GFX11-NEXT: v_and_b32_e32 v25, 0x7f, v16
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_4)
-; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v8 :: v_dual_cndmask_b32 v1, v1, v9
+; GFX11-NEXT: v_lshrrev_b64 v[2:3], v26, v[10:11]
+; GFX11-NEXT: v_dual_cndmask_b32 v1, v1, v9 :: v_dual_cndmask_b32 v0, v0, v8
; GFX11-NEXT: v_lshrrev_b32_e32 v8, 31, v5
; GFX11-NEXT: v_lshlrev_b64 v[4:5], 1, v[4:5]
; GFX11-NEXT: v_sub_nc_u32_e32 v9, 64, v25
; GFX11-NEXT: v_cndmask_b32_e64 v26, 0, v3, s0
-; GFX11-NEXT: v_add_nc_u32_e32 v3, 0xffffffc0, v25
+; GFX11-NEXT: v_subrev_nc_u32_e32 v3, 64, v25
; GFX11-NEXT: v_or_b32_e32 v6, v6, v8
; GFX11-NEXT: v_or_b32_e32 v0, v23, v0
; GFX11-NEXT: v_lshrrev_b64 v[8:9], v9, v[4:5]
@@ -8146,7 +8143,7 @@ define <2 x i128> @v_fshr_v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %a
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX11-NEXT: v_dual_cndmask_b32 v11, 0, v16 :: v_dual_cndmask_b32 v10, v3, v10
; GFX11-NEXT: v_sub_nc_u32_e32 v20, 64, v23
-; GFX11-NEXT: v_add_nc_u32_e32 v8, 0xffffffc0, v23
+; GFX11-NEXT: v_subrev_nc_u32_e32 v8, 64, v23
; GFX11-NEXT: v_lshrrev_b64 v[18:19], v23, v[12:13]
; GFX11-NEXT: v_cmp_gt_u32_e64 s0, 64, v23
; GFX11-NEXT: v_cndmask_b32_e32 v5, v4, v5, vcc_lo
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/lshr.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/lshr.ll
index cc185aff9eff220..5dd4fa0809131fb 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/lshr.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/lshr.ll
@@ -1577,7 +1577,7 @@ define i65 @v_lshr_i65(i65 %value, i65 %amount) {
; GFX6-NEXT: v_and_b32_e32 v4, 1, v2
; GFX6-NEXT: v_mov_b32_e32 v5, 0
; GFX6-NEXT: v_sub_i32_e32 v8, vcc, 64, v3
-; GFX6-NEXT: v_add_i32_e32 v2, vcc, 0xffffffc0, v3
+; GFX6-NEXT: v_subrev_i32_e32 v2, vcc, 64, v3
; GFX6-NEXT: v_lshr_b64 v[6:7], v[0:1], v3
; GFX6-NEXT: v_lshl_b64 v[8:9], v[4:5], v8
; GFX6-NEXT: v_lshr_b64 v[10:11], v[4:5], v3
@@ -1599,7 +1599,7 @@ define i65 @v_lshr_i65(i65 %value, i65 %amount) {
; GFX8-NEXT: v_and_b32_e32 v4, 1, v2
; GFX8-NEXT: v_mov_b32_e32 v5, 0
; GFX8-NEXT: v_sub_u32_e32 v8, vcc, 64, v3
-; GFX8-NEXT: v_add_u32_e32 v2, vcc, 0xffffffc0, v3
+; GFX8-NEXT: v_subrev_u32_e32 v2, vcc, 64, v3
; GFX8-NEXT: v_lshrrev_b64 v[6:7], v3, v[0:1]
; GFX8-NEXT: v_lshlrev_b64 v[8:9], v8, v[4:5]
; GFX8-NEXT: v_lshrrev_b64 v[10:11], v3, v[4:5]
@@ -1621,7 +1621,7 @@ define i65 @v_lshr_i65(i65 %value, i65 %amount) {
; GFX9-NEXT: v_and_b32_e32 v4, 1, v2
; GFX9-NEXT: v_mov_b32_e32 v5, 0
; GFX9-NEXT: v_sub_u32_e32 v8, 64, v3
-; GFX9-NEXT: v_add_u32_e32 v2, 0xffffffc0, v3
+; GFX9-NEXT: v_subrev_u32_e32 v2, 64, v3
; GFX9-NEXT: v_lshrrev_b64 v[6:7], v3, v[0:1]
; GFX9-NEXT: v_lshlrev_b64 v[8:9], v8, v[4:5]
; GFX9-NEXT: v_lshrrev_b64 v[10:11], v3, v[4:5]
@@ -1643,7 +1643,7 @@ define i65 @v_lshr_i65(i65 %value, i65 %amount) {
; GFX10-NEXT: v_mov_b32_e32 v5, 0
; GFX10-NEXT: v_and_b32_e32 v4, 1, v2
; GFX10-NEXT: v_sub_nc_u32_e32 v2, 64, v3
-; GFX10-NEXT: v_add_nc_u32_e32 v10, 0xffffffc0, v3
+; GFX10-NEXT: v_subrev_nc_u32_e32 v10, 64, v3
; GFX10-NEXT: v_lshrrev_b64 v[6:7], v3, v[0:1]
; GFX10-NEXT: v_cmp_gt_u32_e32 vcc_lo, 64, v3
; GFX10-NEXT: v_cmp_eq_u32_e64 s4, 0, v3
@@ -1664,20 +1664,20 @@ define i65 @v_lshr_i65(i65 %value, i65 %amount) {
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_dual_mov_b32 v5, 0 :: v_dual_and_b32 v4, 1, v2
; GFX11-NEXT: v_sub_nc_u32_e32 v2, 64, v3
+; GFX11-NEXT: v_subrev_nc_u32_e32 v10, 64, v3
; GFX11-NEXT: v_lshrrev_b64 v[6:7], v3, v[0:1]
; GFX11-NEXT: v_cmp_gt_u32_e32 vcc_lo, 64, v3
; GFX11-NEXT: v_cmp_eq_u32_e64 s0, 0, v3
; GFX11-NEXT: v_lshlrev_b64 v[8:9], v2, v[4:5]
-; GFX11-NEXT: v_or_b32_e32 v2, v6, v8
-; GFX11-NEXT: v_or_b32_e32 v6, v7, v9
-; GFX11-NEXT: v_add_nc_u32_e32 v10, 0xffffffc0, v3
; GFX11-NEXT: v_lshrrev_b64 v[10:11], v10, v[4:5]
; GFX11-NEXT: v_lshrrev_b64 v[4:5], v3, v[4:5]
-; GFX11-NEXT: v_cndmask_b32_e32 v5, v11, v6, vcc_lo
+; GFX11-NEXT: v_or_b32_e32 v2, v6, v8
+; GFX11-NEXT: v_or_b32_e32 v6, v7, v9
; GFX11-NEXT: v_cndmask_b32_e32 v2, v10, v2, vcc_lo
-; GFX11-NEXT: v_cndmask_b32_e64 v1, v5, v1, s0
+; GFX11-NEXT: v_cndmask_b32_e32 v5, v11, v6, vcc_lo
; GFX11-NEXT: v_cndmask_b32_e64 v0, v2, v0, s0
; GFX11-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc_lo
+; GFX11-NEXT: v_cndmask_b32_e64 v1, v5, v1, s0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%result = lshr i65 %value, %amount
ret i65 %result
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i32.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i32.ll
index 88eb0e4b848c956..2c2f8e914447d1d 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i32.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i32.ll
@@ -325,7 +325,7 @@ define i32 @v_sdiv_i32_oddk_denom(i32 %num) {
; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v3
; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v4
; CHECK-NEXT: v_cndmask_b32_e64 v2, v2, v5, s[4:5]
-; CHECK-NEXT: v_add_i32_e32 v3, vcc, 0xffed2705, v0
+; CHECK-NEXT: v_subrev_i32_e32 v3, vcc, 0x12d8fb, v0
; CHECK-NEXT: v_cndmask_b32_e64 v0, v0, v3, s[4:5]
; CHECK-NEXT: v_add_i32_e32 v3, vcc, 1, v2
; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v4
@@ -353,29 +353,29 @@ define <2 x i32> @v_sdiv_v2i32_oddk_denom(<2 x i32> %num) {
; GISEL-NEXT: v_mul_f32_e32 v4, 0x4f7ffffe, v4
; GISEL-NEXT: v_xor_b32_e32 v1, v1, v6
; GISEL-NEXT: v_cvt_u32_f32_e32 v4, v4
-; GISEL-NEXT: v_mul_lo_u32 v7, v4, v5
-; GISEL-NEXT: v_mul_hi_u32 v7, v4, v7
-; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v7
-; GISEL-NEXT: v_mul_hi_u32 v7, v0, v4
+; GISEL-NEXT: v_mul_lo_u32 v5, v4, v5
+; GISEL-NEXT: v_mul_hi_u32 v5, v4, v5
+; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v5
+; GISEL-NEXT: v_mul_hi_u32 v5, v0, v4
; GISEL-NEXT: v_mul_hi_u32 v4, v1, v4
-; GISEL-NEXT: v_mul_lo_u32 v8, v7, v3
-; GISEL-NEXT: v_add_i32_e32 v9, vcc, 1, v7
-; GISEL-NEXT: v_mul_lo_u32 v10, v4, v3
-; GISEL-NEXT: v_add_i32_e32 v11, vcc, 1, v4
-; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v8
-; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v10
+; GISEL-NEXT: v_mul_lo_u32 v7, v5, v3
+; GISEL-NEXT: v_add_i32_e32 v8, vcc, 1, v5
+; GISEL-NEXT: v_mul_lo_u32 v9, v4, v3
+; GISEL-NEXT: v_add_i32_e32 v10, vcc, 1, v4
+; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v7
+; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v9
; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v3
-; GISEL-NEXT: v_cndmask_b32_e64 v7, v7, v9, s[4:5]
-; GISEL-NEXT: v_add_i32_e32 v5, vcc, v0, v5
+; GISEL-NEXT: v_cndmask_b32_e64 v5, v5, v8, s[4:5]
+; GISEL-NEXT: v_sub_i32_e32 v7, vcc, v0, v3
; GISEL-NEXT: v_cmp_ge_u32_e64 s[6:7], v1, v3
-; GISEL-NEXT: v_cndmask_b32_e64 v4, v4, v11, s[6:7]
-; GISEL-NEXT: v_add_i32_e32 v8, vcc, 0xffed2705, v1
-; GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v5, s[4:5]
-; GISEL-NEXT: v_add_i32_e32 v5, vcc, 1, v7
+; GISEL-NEXT: v_cndmask_b32_e64 v4, v4, v10, s[6:7]
+; GISEL-NEXT: v_subrev_i32_e32 v8, vcc, 0x12d8fb, v1
+; GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v7, s[4:5]
+; GISEL-NEXT: v_add_i32_e32 v7, vcc, 1, v5
; GISEL-NEXT: v_cndmask_b32_e64 v1, v1, v8, s[6:7]
; GISEL-NEXT: v_add_i32_e32 v8, vcc, 1, v4
; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v3
-; GISEL-NEXT: v_cndmask_b32_e32 v0, v7, v5, vcc
+; GISEL-NEXT: v_cndmask_b32_e32 v0, v5, v7, vcc
; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3
; GISEL-NEXT: v_cndmask_b32_e32 v1, v4, v8, vcc
; GISEL-NEXT: v_xor_b32_e32 v0, v0, v2
@@ -398,29 +398,29 @@ define <2 x i32> @v_sdiv_v2i32_oddk_denom(<2 x i32> %num) {
; CGP-NEXT: v_xor_b32_e32 v0, v0, v2
; CGP-NEXT: v_cvt_u32_f32_e32 v3, v3
; CGP-NEXT: v_xor_b32_e32 v1, v1, v6
-; CGP-NEXT: v_mul_lo_u32 v7, v3, v4
-; CGP-NEXT: v_mul_hi_u32 v7, v3, v7
-; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v7
-; CGP-NEXT: v_mul_hi_u32 v7, v0, v3
+; CGP-NEXT: v_mul_lo_u32 v4, v3, v4
+; CGP-NEXT: v_mul_hi_u32 v4, v3, v4
+; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v4
+; CGP-NEXT: v_mul_hi_u32 v4, v0, v3
; CGP-NEXT: v_mul_hi_u32 v3, v1, v3
-; CGP-NEXT: v_mul_lo_u32 v8, v7, v5
-; CGP-NEXT: v_add_i32_e32 v9, vcc, 1, v7
-; CGP-NEXT: v_mul_lo_u32 v10, v3, v5
-; CGP-NEXT: v_add_i32_e32 v11, vcc, 1, v3
-; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v8
-; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v10
+; CGP-NEXT: v_mul_lo_u32 v7, v4, v5
+; CGP-NEXT: v_add_i32_e32 v8, vcc, 1, v4
+; CGP-NEXT: v_mul_lo_u32 v9, v3, v5
+; CGP-NEXT: v_add_i32_e32 v10, vcc, 1, v3
+; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v7
+; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v9
; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v5
-; CGP-NEXT: v_cndmask_b32_e64 v7, v7, v9, s[4:5]
-; CGP-NEXT: v_add_i32_e32 v4, vcc, v0, v4
+; CGP-NEXT: v_cndmask_b32_e64 v4, v4, v8, s[4:5]
+; CGP-NEXT: v_sub_i32_e32 v7, vcc, v0, v5
; CGP-NEXT: v_cmp_ge_u32_e64 s[6:7], v1, v5
-; CGP-NEXT: v_cndmask_b32_e64 v3, v3, v11, s[6:7]
-; CGP-NEXT: v_add_i32_e32 v8, vcc, 0xffed2705, v1
-; CGP-NEXT: v_cndmask_b32_e64 v0, v0, v4, s[4:5]
-; CGP-NEXT: v_add_i32_e32 v4, vcc, 1, v7
+; CGP-NEXT: v_cndmask_b32_e64 v3, v3, v10, s[6:7]
+; CGP-NEXT: v_subrev_i32_e32 v8, vcc, 0x12d8fb, v1
+; CGP-NEXT: v_cndmask_b32_e64 v0, v0, v7, s[4:5]
+; CGP-NEXT: v_add_i32_e32 v7, vcc, 1, v4
; CGP-NEXT: v_cndmask_b32_e64 v1, v1, v8, s[6:7]
; CGP-NEXT: v_add_i32_e32 v8, vcc, 1, v3
; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v5
-; CGP-NEXT: v_cndmask_b32_e32 v0, v7, v4, vcc
+; CGP-NEXT: v_cndmask_b32_e32 v0, v4, v7, vcc
; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v5
; CGP-NEXT: v_cndmask_b32_e32 v1, v3, v8, vcc
; CGP-NEXT: v_xor_b32_e32 v0, v0, v2
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/shl.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/shl.ll
index b12e915c7d21b1c..4cf1c92539c36f6 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/shl.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/shl.ll
@@ -1583,7 +1583,7 @@ define i65 @v_shl_i65(i65 %value, i65 %amount) {
; GFX6-NEXT: v_sub_i32_e32 v4, vcc, 64, v3
; GFX6-NEXT: v_lshr_b64 v[4:5], v[0:1], v4
; GFX6-NEXT: v_lshl_b64 v[5:6], v[2:3], v3
-; GFX6-NEXT: v_add_i32_e32 v8, vcc, 0xffffffc0, v3
+; GFX6-NEXT: v_subrev_i32_e32 v8, vcc, 64, v3
; GFX6-NEXT: v_lshl_b64 v[6:7], v[0:1], v3
; GFX6-NEXT: v_or_b32_e32 v9, v4, v5
; GFX6-NEXT: v_lshl_b64 v[4:5], v[0:1], v8
@@ -1601,7 +1601,7 @@ define i65 @v_shl_i65(i65 %value, i65 %amount) {
; GFX8-NEXT: v_sub_u32_e32 v4, vcc, 64, v3
; GFX8-NEXT: v_lshrrev_b64 v[4:5], v4, v[0:1]
; GFX8-NEXT: v_lshlrev_b64 v[5:6], v3, v[2:3]
-; GFX8-NEXT: v_add_u32_e32 v8, vcc, 0xffffffc0, v3
+; GFX8-NEXT: v_subrev_u32_e32 v8, vcc, 64, v3
; GFX8-NEXT: v_lshlrev_b64 v[6:7], v3, v[0:1]
; GFX8-NEXT: v_or_b32_e32 v9, v4, v5
; GFX8-NEXT: v_lshlrev_b64 v[4:5], v8, v[0:1]
@@ -1619,7 +1619,7 @@ define i65 @v_shl_i65(i65 %value, i65 %amount) {
; GFX9-NEXT: v_sub_u32_e32 v4, 64, v3
; GFX9-NEXT: v_lshrrev_b64 v[4:5], v4, v[0:1]
; GFX9-NEXT: v_lshlrev_b64 v[5:6], v3, v[2:3]
-; GFX9-NEXT: v_add_u32_e32 v8, 0xffffffc0, v3
+; GFX9-NEXT: v_subrev_u32_e32 v8, 64, v3
; GFX9-NEXT: v_lshlrev_b64 v[6:7], v3, v[0:1]
; GFX9-NEXT: v_or_b32_e32 v9, v4, v5
; GFX9-NEXT: v_lshlrev_b64 v[4:5], v8, v[0:1]
@@ -1636,7 +1636,7 @@ define i65 @v_shl_i65(i65 %value, i65 %amount) {
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: v_sub_nc_u32_e32 v6, 64, v3
; GFX10-NEXT: v_lshlrev_b64 v[4:5], v3, v[2:3]
-; GFX10-NEXT: v_add_nc_u32_e32 v8, 0xffffffc0, v3
+; GFX10-NEXT: v_subrev_nc_u32_e32 v8, 64, v3
; GFX10-NEXT: v_cmp_gt_u32_e32 vcc_lo, 64, v3
; GFX10-NEXT: v_lshrrev_b64 v[5:6], v6, v[0:1]
; GFX10-NEXT: v_lshlrev_b64 v[6:7], v3, v[0:1]
@@ -1654,7 +1654,7 @@ define i65 @v_shl_i65(i65 %value, i65 %amount) {
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_sub_nc_u32_e32 v6, 64, v3
; GFX11-NEXT: v_lshlrev_b64 v[4:5], v3, v[2:3]
-; GFX11-NEXT: v_add_nc_u32_e32 v8, 0xffffffc0, v3
+; GFX11-NEXT: v_subrev_nc_u32_e32 v8, 64, v3
; GFX11-NEXT: v_cmp_gt_u32_e32 vcc_lo, 64, v3
; GFX11-NEXT: v_lshrrev_b64 v[5:6], v6, v[0:1]
; GFX11-NEXT: v_lshlrev_b64 v[6:7], v3, v[0:1]
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i32.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i32.ll
index 2b12e4b973acb2e..1bb606f36e48d2c 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i32.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i32.ll
@@ -268,10 +268,10 @@ define i32 @v_srem_i32_pow2k_denom(i32 %num) {
; CHECK-NEXT: v_mul_hi_u32 v2, v0, v2
; CHECK-NEXT: v_lshlrev_b32_e32 v2, 12, v2
; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v2
-; CHECK-NEXT: v_add_i32_e32 v2, vcc, 0xfffff000, v0
+; CHECK-NEXT: v_subrev_i32_e32 v2, vcc, 0x1000, v0
; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v4
; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
-; CHECK-NEXT: v_add_i32_e32 v2, vcc, 0xfffff000, v0
+; CHECK-NEXT: v_subrev_i32_e32 v2, vcc, 0x1000, v0
; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v4
; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
; CHECK-NEXT: v_xor_b32_e32 v0, v0, v1
@@ -297,23 +297,23 @@ define <2 x i32> @v_srem_v2i32_pow2k_denom(<2 x i32> %num) {
; GISEL-NEXT: v_mul_f32_e32 v4, 0x4f7ffffe, v4
; GISEL-NEXT: v_xor_b32_e32 v1, v1, v6
; GISEL-NEXT: v_cvt_u32_f32_e32 v4, v4
-; GISEL-NEXT: v_mul_lo_u32 v7, v4, v5
-; GISEL-NEXT: v_mul_hi_u32 v7, v4, v7
-; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v7
-; GISEL-NEXT: v_mul_hi_u32 v7, v0, v4
+; GISEL-NEXT: v_mul_lo_u32 v5, v4, v5
+; GISEL-NEXT: v_mul_hi_u32 v5, v4, v5
+; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v5
+; GISEL-NEXT: v_mul_hi_u32 v5, v0, v4
; GISEL-NEXT: v_mul_hi_u32 v4, v1, v4
-; GISEL-NEXT: v_lshlrev_b32_e32 v7, 12, v7
+; GISEL-NEXT: v_lshlrev_b32_e32 v5, 12, v5
; GISEL-NEXT: v_lshlrev_b32_e32 v4, 12, v4
-; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v7
+; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v5
; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v4
-; GISEL-NEXT: v_add_i32_e32 v4, vcc, v0, v5
-; GISEL-NEXT: v_add_i32_e32 v7, vcc, 0xfffff000, v1
+; GISEL-NEXT: v_sub_i32_e32 v4, vcc, v0, v3
+; GISEL-NEXT: v_subrev_i32_e32 v5, vcc, 0x1000, v1
; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v3
; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3
-; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc
-; GISEL-NEXT: v_add_i32_e32 v4, vcc, v0, v5
-; GISEL-NEXT: v_add_i32_e32 v5, vcc, 0xfffff000, v1
+; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc
+; GISEL-NEXT: v_sub_i32_e32 v4, vcc, v0, v3
+; GISEL-NEXT: v_subrev_i32_e32 v5, vcc, 0x1000, v1
; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v3
; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3
@@ -338,23 +338,23 @@ define <2 x i32> @v_srem_v2i32_pow2k_denom(<2 x i32> %num) {
; CGP-NEXT: v_xor_b32_e32 v0, v0, v2
; CGP-NEXT: v_cvt_u32_f32_e32 v3, v3
; CGP-NEXT: v_xor_b32_e32 v1, v1, v6
-; CGP-NEXT: v_mul_lo_u32 v7, v3, v4
-; CGP-NEXT: v_mul_hi_u32 v7, v3, v7
-; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v7
-; CGP-NEXT: v_mul_hi_u32 v7, v0, v3
+; CGP-NEXT: v_mul_lo_u32 v4, v3, v4
+; CGP-NEXT: v_mul_hi_u32 v4, v3, v4
+; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v4
+; CGP-NEXT: v_mul_hi_u32 v4, v0, v3
; CGP-NEXT: v_mul_hi_u32 v3, v1, v3
-; CGP-NEXT: v_lshlrev_b32_e32 v7, 12, v7
+; CGP-NEXT: v_lshlrev_b32_e32 v4, 12, v4
; CGP-NEXT: v_lshlrev_b32_e32 v3, 12, v3
-; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v7
+; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v4
; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v3
-; CGP-NEXT: v_add_i32_e32 v3, vcc, v0, v4
-; CGP-NEXT: v_add_i32_e32 v7, vcc, 0xfffff000, v1
+; CGP-NEXT: v_sub_i32_e32 v3, vcc, v0, v5
+; CGP-NEXT: v_subrev_i32_e32 v4, vcc, 0x1000, v1
; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v5
; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v5
-; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc
-; CGP-NEXT: v_add_i32_e32 v3, vcc, v0, v4
-; CGP-NEXT: v_add_i32_e32 v4, vcc, 0xfffff000, v1
+; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc
+; CGP-NEXT: v_sub_i32_e32 v3, vcc, v0, v5
+; CGP-NEXT: v_subrev_i32_e32 v4, vcc, 0x1000, v1
; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v5
; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v5
@@ -386,10 +386,10 @@ define i32 @v_srem_i32_oddk_denom(i32 %num) {
; CHECK-NEXT: v_mul_hi_u32 v2, v0, v2
; CHECK-NEXT: v_mul_lo_u32 v2, v2, v4
; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v2
-; CHECK-NEXT: v_add_i32_e32 v2, vcc, 0xffed2705, v0
+; CHECK-NEXT: v_subrev_i32_e32 v2, vcc, 0x12d8fb, v0
; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v4
; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
-; CHECK-NEXT: v_add_i32_e32 v2, vcc, 0xffed2705, v0
+; CHECK-NEXT: v_subrev_i32_e32 v2, vcc, 0x12d8fb, v0
; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v4
; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
; CHECK-NEXT: v_xor_b32_e32 v0, v0, v1
@@ -415,23 +415,23 @@ define <2 x i32> @v_srem_v2i32_oddk_denom(<2 x i32> %num) {
; GISEL-NEXT: v_mul_f32_e32 v4, 0x4f7ffffe, v4
; GISEL-NEXT: v_xor_b32_e32 v1, v1, v6
; GISEL-NEXT: v_cvt_u32_f32_e32 v4, v4
-; GISEL-NEXT: v_mul_lo_u32 v7, v4, v5
-; GISEL-NEXT: v_mul_hi_u32 v7, v4, v7
-; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v7
-; GISEL-NEXT: v_mul_hi_u32 v7, v0, v4
+; GISEL-NEXT: v_mul_lo_u32 v5, v4, v5
+; GISEL-NEXT: v_mul_hi_u32 v5, v4, v5
+; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v5
+; GISEL-NEXT: v_mul_hi_u32 v5, v0, v4
; GISEL-NEXT: v_mul_hi_u32 v4, v1, v4
-; GISEL-NEXT: v_mul_lo_u32 v7, v7, v3
+; GISEL-NEXT: v_mul_lo_u32 v5, v5, v3
; GISEL-NEXT: v_mul_lo_u32 v4, v4, v3
-; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v7
+; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v5
; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v4
-; GISEL-NEXT: v_add_i32_e32 v4, vcc, v0, v5
-; GISEL-NEXT: v_add_i32_e32 v7, vcc, 0xffed2705, v1
+; GISEL-NEXT: v_sub_i32_e32 v4, vcc, v0, v3
+; GISEL-NEXT: v_subrev_i32_e32 v5, vcc, 0x12d8fb, v1
; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v3
; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3
-; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc
-; GISEL-NEXT: v_add_i32_e32 v4, vcc, v0, v5
-; GISEL-NEXT: v_add_i32_e32 v5, vcc, 0xffed2705, v1
+; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc
+; GISEL-NEXT: v_sub_i32_e32 v4, vcc, v0, v3
+; GISEL-NEXT: v_subrev_i32_e32 v5, vcc, 0x12d8fb, v1
; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v3
; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3
@@ -456,23 +456,23 @@ define <2 x i32> @v_srem_v2i32_oddk_denom(<2 x i32> %num) {
; CGP-NEXT: v_xor_b32_e32 v0, v0, v2
; CGP-NEXT: v_cvt_u32_f32_e32 v3, v3
; CGP-NEXT: v_xor_b32_e32 v1, v1, v6
-; CGP-NEXT: v_mul_lo_u32 v7, v3, v4
-; CGP-NEXT: v_mul_hi_u32 v7, v3, v7
-; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v7
-; CGP-NEXT: v_mul_hi_u32 v7, v0, v3
+; CGP-NEXT: v_mul_lo_u32 v4, v3, v4
+; CGP-NEXT: v_mul_hi_u32 v4, v3, v4
+; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v4
+; CGP-NEXT: v_mul_hi_u32 v4, v0, v3
; CGP-NEXT: v_mul_hi_u32 v3, v1, v3
-; CGP-NEXT: v_mul_lo_u32 v7, v7, v5
+; CGP-NEXT: v_mul_lo_u32 v4, v4, v5
; CGP-NEXT: v_mul_lo_u32 v3, v3, v5
-; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v7
+; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v4
; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v3
-; CGP-NEXT: v_add_i32_e32 v3, vcc, v0, v4
-; CGP-NEXT: v_add_i32_e32 v7, vcc, 0xffed2705, v1
+; CGP-NEXT: v_sub_i32_e32 v3, vcc, v0, v5
+; CGP-NEXT: v_subrev_i32_e32 v4, vcc, 0x12d8fb, v1
; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v5
; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v5
-; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc
-; CGP-NEXT: v_add_i32_e32 v3, vcc, v0, v4
-; CGP-NEXT: v_add_i32_e32 v4, vcc, 0xffed2705, v1
+; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc
+; CGP-NEXT: v_sub_i32_e32 v3, vcc, v0, v5
+; CGP-NEXT: v_subrev_i32_e32 v4, vcc, 0x12d8fb, v1
; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v5
; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v5
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll
index 7214f4ab581d5d0..2572f8581f0edf6 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll
@@ -12,9 +12,9 @@ define i7 @v_ssubsat_i7(i7 %lhs, i7 %rhs) {
; GFX6-NEXT: v_lshlrev_b32_e32 v0, 25, v0
; GFX6-NEXT: v_max_i32_e32 v2, -1, v0
; GFX6-NEXT: v_lshlrev_b32_e32 v1, 25, v1
-; GFX6-NEXT: v_add_i32_e32 v2, vcc, 0x80000001, v2
+; GFX6-NEXT: v_subrev_i32_e32 v2, vcc, 0x7fffffff, v2
; GFX6-NEXT: v_min_i32_e32 v3, -1, v0
-; GFX6-NEXT: v_add_i32_e32 v3, vcc, 0x80000000, v3
+; GFX6-NEXT: v_subrev_i32_e32 v3, vcc, 0x80000000, v3
; GFX6-NEXT: v_max_i32_e32 v1, v2, v1
; GFX6-NEXT: v_min_i32_e32 v1, v1, v3
; GFX6-NEXT: v_sub_i32_e32 v0, vcc, v0, v1
@@ -27,9 +27,9 @@ define i7 @v_ssubsat_i7(i7 %lhs, i7 %rhs) {
; GFX8-NEXT: v_lshlrev_b16_e32 v0, 9, v0
; GFX8-NEXT: v_max_i16_e32 v2, -1, v0
; GFX8-NEXT: v_lshlrev_b16_e32 v1, 9, v1
-; GFX8-NEXT: v_add_u16_e32 v2, 0x8001, v2
+; GFX8-NEXT: v_subrev_u16_e32 v2, 0x7fff, v2
; GFX8-NEXT: v_min_i16_e32 v3, -1, v0
-; GFX8-NEXT: v_add_u16_e32 v3, 0x8000, v3
+; GFX8-NEXT: v_subrev_u16_e32 v3, 0x8000, v3
; GFX8-NEXT: v_max_i16_e32 v1, v2, v1
; GFX8-NEXT: v_min_i16_e32 v1, v1, v3
; GFX8-NEXT: v_sub_u16_e32 v0, v0, v1
@@ -63,9 +63,9 @@ define amdgpu_ps i7 @s_ssubsat_i7(i7 inreg %lhs, i7 inreg %rhs) {
; GFX6-NEXT: s_lshl_b32 s0, s0, 25
; GFX6-NEXT: s_max_i32 s2, s0, -1
; GFX6-NEXT: s_lshl_b32 s1, s1, 25
-; GFX6-NEXT: s_add_i32 s2, s2, 0x80000001
+; GFX6-NEXT: s_sub_i32 s2, s2, 0x7fffffff
; GFX6-NEXT: s_min_i32 s3, s0, -1
-; GFX6-NEXT: s_add_i32 s3, s3, 0x80000000
+; GFX6-NEXT: s_sub_i32 s3, s3, 0x80000000
; GFX6-NEXT: s_max_i32 s1, s2, s1
; GFX6-NEXT: s_min_i32 s1, s1, s3
; GFX6-NEXT: s_sub_i32 s0, s0, s1
@@ -79,11 +79,11 @@ define amdgpu_ps i7 @s_ssubsat_i7(i7 inreg %lhs, i7 inreg %rhs) {
; GFX8-NEXT: s_sext_i32_i16 s3, -1
; GFX8-NEXT: s_max_i32 s4, s2, s3
; GFX8-NEXT: s_lshl_b32 s1, s1, 9
-; GFX8-NEXT: s_addk_i32 s4, 0x8001
+; GFX8-NEXT: s_sub_i32 s4, s4, 0x7fff
; GFX8-NEXT: s_min_i32 s2, s2, s3
; GFX8-NEXT: s_sext_i32_i16 s3, s4
; GFX8-NEXT: s_sext_i32_i16 s1, s1
-; GFX8-NEXT: s_addk_i32 s2, 0x8000
+; GFX8-NEXT: s_sub_i32 s2, s2, 0xffff8000
; GFX8-NEXT: s_max_i32 s1, s3, s1
; GFX8-NEXT: s_sext_i32_i16 s1, s1
; GFX8-NEXT: s_sext_i32_i16 s2, s2
@@ -122,9 +122,9 @@ define i8 @v_ssubsat_i8(i8 %lhs, i8 %rhs) {
; GFX6-NEXT: v_lshlrev_b32_e32 v0, 24, v0
; GFX6-NEXT: v_max_i32_e32 v2, -1, v0
; GFX6-NEXT: v_lshlrev_b32_e32 v1, 24, v1
-; GFX6-NEXT: v_add_i32_e32 v2, vcc, 0x80000001, v2
+; GFX6-NEXT: v_subrev_i32_e32 v2, vcc, 0x7fffffff, v2
; GFX6-NEXT: v_min_i32_e32 v3, -1, v0
-; GFX6-NEXT: v_add_i32_e32 v3, vcc, 0x80000000, v3
+; GFX6-NEXT: v_subrev_i32_e32 v3, vcc, 0x80000000, v3
; GFX6-NEXT: v_max_i32_e32 v1, v2, v1
; GFX6-NEXT: v_min_i32_e32 v1, v1, v3
; GFX6-NEXT: v_sub_i32_e32 v0, vcc, v0, v1
@@ -137,9 +137,9 @@ define i8 @v_ssubsat_i8(i8 %lhs, i8 %rhs) {
; GFX8-NEXT: v_lshlrev_b16_e32 v0, 8, v0
; GFX8-NEXT: v_max_i16_e32 v2, -1, v0
; GFX8-NEXT: v_lshlrev_b16_e32 v1, 8, v1
-; GFX8-NEXT: v_add_u16_e32 v2, 0x8001, v2
+; GFX8-NEXT: v_subrev_u16_e32 v2, 0x7fff, v2
; GFX8-NEXT: v_min_i16_e32 v3, -1, v0
-; GFX8-NEXT: v_add_u16_e32 v3, 0x8000, v3
+; GFX8-NEXT: v_subrev_u16_e32 v3, 0x8000, v3
; GFX8-NEXT: v_max_i16_e32 v1, v2, v1
; GFX8-NEXT: v_min_i16_e32 v1, v1, v3
; GFX8-NEXT: v_sub_u16_e32 v0, v0, v1
@@ -173,9 +173,9 @@ define amdgpu_ps i8 @s_ssubsat_i8(i8 inreg %lhs, i8 inreg %rhs) {
; GFX6-NEXT: s_lshl_b32 s0, s0, 24
; GFX6-NEXT: s_max_i32 s2, s0, -1
; GFX6-NEXT: s_lshl_b32 s1, s1, 24
-; GFX6-NEXT: s_add_i32 s2, s2, 0x80000001
+; GFX6-NEXT: s_sub_i32 s2, s2, 0x7fffffff
; GFX6-NEXT: s_min_i32 s3, s0, -1
-; GFX6-NEXT: s_add_i32 s3, s3, 0x80000000
+; GFX6-NEXT: s_sub_i32 s3, s3, 0x80000000
; GFX6-NEXT: s_max_i32 s1, s2, s1
; GFX6-NEXT: s_min_i32 s1, s1, s3
; GFX6-NEXT: s_sub_i32 s0, s0, s1
@@ -189,11 +189,11 @@ define amdgpu_ps i8 @s_ssubsat_i8(i8 inreg %lhs, i8 inreg %rhs) {
; GFX8-NEXT: s_sext_i32_i16 s3, -1
; GFX8-NEXT: s_max_i32 s4, s2, s3
; GFX8-NEXT: s_lshl_b32 s1, s1, 8
-; GFX8-NEXT: s_addk_i32 s4, 0x8001
+; GFX8-NEXT: s_sub_i32 s4, s4, 0x7fff
; GFX8-NEXT: s_min_i32 s2, s2, s3
; GFX8-NEXT: s_sext_i32_i16 s3, s4
; GFX8-NEXT: s_sext_i32_i16 s1, s1
-; GFX8-NEXT: s_addk_i32 s2, 0x8000
+; GFX8-NEXT: s_sub_i32 s2, s2, 0xffff8000
; GFX8-NEXT: s_max_i32 s1, s3, s1
; GFX8-NEXT: s_sext_i32_i16 s1, s1
; GFX8-NEXT: s_sext_i32_i16 s2, s2
@@ -234,19 +234,18 @@ define i16 @v_ssubsat_v2i8(i16 %lhs.arg, i16 %rhs.arg) {
; GFX6-NEXT: v_max_i32_e32 v4, -1, v0
; GFX6-NEXT: v_lshrrev_b32_e32 v3, 8, v1
; GFX6-NEXT: v_lshlrev_b32_e32 v1, 24, v1
-; GFX6-NEXT: v_add_i32_e32 v4, vcc, 0x80000001, v4
+; GFX6-NEXT: v_subrev_i32_e32 v4, vcc, 0x7fffffff, v4
; GFX6-NEXT: v_min_i32_e32 v5, -1, v0
-; GFX6-NEXT: v_bfrev_b32_e32 v6, 1
-; GFX6-NEXT: v_add_i32_e32 v5, vcc, v5, v6
+; GFX6-NEXT: v_subrev_i32_e32 v5, vcc, 0x80000000, v5
; GFX6-NEXT: v_max_i32_e32 v1, v4, v1
; GFX6-NEXT: v_min_i32_e32 v1, v1, v5
; GFX6-NEXT: v_sub_i32_e32 v0, vcc, v0, v1
; GFX6-NEXT: v_lshlrev_b32_e32 v1, 24, v2
; GFX6-NEXT: v_lshlrev_b32_e32 v2, 24, v3
; GFX6-NEXT: v_max_i32_e32 v3, -1, v1
-; GFX6-NEXT: v_add_i32_e32 v3, vcc, 0x80000001, v3
+; GFX6-NEXT: v_subrev_i32_e32 v3, vcc, 0x7fffffff, v3
; GFX6-NEXT: v_min_i32_e32 v4, -1, v1
-; GFX6-NEXT: v_add_i32_e32 v4, vcc, 0x80000000, v4
+; GFX6-NEXT: v_subrev_i32_e32 v4, vcc, 0x80000000, v4
; GFX6-NEXT: v_max_i32_e32 v2, v3, v2
; GFX6-NEXT: v_min_i32_e32 v2, v2, v4
; GFX6-NEXT: v_sub_i32_e32 v1, vcc, v1, v2
@@ -267,16 +266,16 @@ define i16 @v_ssubsat_v2i8(i16 %lhs.arg, i16 %rhs.arg) {
; GFX8-NEXT: v_max_i16_e32 v4, -1, v0
; GFX8-NEXT: v_lshrrev_b32_sdwa v2, v2, v1 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
; GFX8-NEXT: v_lshlrev_b16_e32 v1, 8, v1
-; GFX8-NEXT: v_add_u16_e32 v4, 0x8001, v4
+; GFX8-NEXT: v_subrev_u16_e32 v4, 0x7fff, v4
; GFX8-NEXT: v_min_i16_e32 v5, -1, v0
-; GFX8-NEXT: v_add_u16_e32 v5, 0x8000, v5
+; GFX8-NEXT: v_subrev_u16_e32 v5, 0x8000, v5
; GFX8-NEXT: v_max_i16_e32 v1, v4, v1
; GFX8-NEXT: v_min_i16_e32 v1, v1, v5
; GFX8-NEXT: v_sub_u16_e32 v0, v0, v1
; GFX8-NEXT: v_max_i16_e32 v1, -1, v3
-; GFX8-NEXT: v_add_u16_e32 v1, 0x8001, v1
+; GFX8-NEXT: v_subrev_u16_e32 v1, 0x7fff, v1
; GFX8-NEXT: v_min_i16_e32 v4, -1, v3
-; GFX8-NEXT: v_add_u16_e32 v4, 0x8000, v4
+; GFX8-NEXT: v_subrev_u16_e32 v4, 0x8000, v4
; GFX8-NEXT: v_max_i16_e32 v1, v1, v2
; GFX8-NEXT: v_min_i16_e32 v1, v1, v4
; GFX8-NEXT: v_sub_u16_e32 v1, v3, v1
@@ -356,18 +355,18 @@ define amdgpu_ps i16 @s_ssubsat_v2i8(i16 inreg %lhs.arg, i16 inreg %rhs.arg) {
; GFX6-NEXT: s_max_i32 s4, s0, -1
; GFX6-NEXT: s_lshr_b32 s3, s1, 8
; GFX6-NEXT: s_lshl_b32 s1, s1, 24
-; GFX6-NEXT: s_add_i32 s4, s4, 0x80000001
+; GFX6-NEXT: s_sub_i32 s4, s4, 0x7fffffff
; GFX6-NEXT: s_min_i32 s5, s0, -1
-; GFX6-NEXT: s_add_i32 s5, s5, 0x80000000
+; GFX6-NEXT: s_sub_i32 s5, s5, 0x80000000
; GFX6-NEXT: s_max_i32 s1, s4, s1
; GFX6-NEXT: s_min_i32 s1, s1, s5
; GFX6-NEXT: s_sub_i32 s0, s0, s1
; GFX6-NEXT: s_lshl_b32 s1, s2, 24
; GFX6-NEXT: s_lshl_b32 s2, s3, 24
; GFX6-NEXT: s_max_i32 s3, s1, -1
-; GFX6-NEXT: s_add_i32 s3, s3, 0x80000001
+; GFX6-NEXT: s_sub_i32 s3, s3, 0x7fffffff
; GFX6-NEXT: s_min_i32 s4, s1, -1
-; GFX6-NEXT: s_add_i32 s4, s4, 0x80000000
+; GFX6-NEXT: s_sub_i32 s4, s4, 0x80000000
; GFX6-NEXT: s_max_i32 s2, s3, s2
; GFX6-NEXT: s_min_i32 s2, s2, s4
; GFX6-NEXT: s_sub_i32 s1, s1, s2
@@ -388,11 +387,11 @@ define amdgpu_ps i16 @s_ssubsat_v2i8(i16 inreg %lhs.arg, i16 inreg %rhs.arg) {
; GFX8-NEXT: s_max_i32 s6, s4, s5
; GFX8-NEXT: s_lshr_b32 s3, s1, 8
; GFX8-NEXT: s_lshl_b32 s1, s1, 8
-; GFX8-NEXT: s_addk_i32 s6, 0x8001
+; GFX8-NEXT: s_sub_i32 s6, s6, 0x7fff
; GFX8-NEXT: s_min_i32 s4, s4, s5
; GFX8-NEXT: s_sext_i32_i16 s6, s6
; GFX8-NEXT: s_sext_i32_i16 s1, s1
-; GFX8-NEXT: s_addk_i32 s4, 0x8000
+; GFX8-NEXT: s_sub_i32 s4, s4, 0xffff8000
; GFX8-NEXT: s_max_i32 s1, s6, s1
; GFX8-NEXT: s_sext_i32_i16 s1, s1
; GFX8-NEXT: s_sext_i32_i16 s4, s4
@@ -402,11 +401,11 @@ define amdgpu_ps i16 @s_ssubsat_v2i8(i16 inreg %lhs.arg, i16 inreg %rhs.arg) {
; GFX8-NEXT: s_lshl_b32 s2, s3, 8
; GFX8-NEXT: s_sext_i32_i16 s3, s1
; GFX8-NEXT: s_max_i32 s4, s3, s5
-; GFX8-NEXT: s_addk_i32 s4, 0x8001
+; GFX8-NEXT: s_sub_i32 s4, s4, 0x7fff
; GFX8-NEXT: s_min_i32 s3, s3, s5
; GFX8-NEXT: s_sext_i32_i16 s4, s4
; GFX8-NEXT: s_sext_i32_i16 s2, s2
-; GFX8-NEXT: s_addk_i32 s3, 0x8000
+; GFX8-NEXT: s_sub_i32 s3, s3, 0xffff8000
; GFX8-NEXT: s_max_i32 s2, s4, s2
; GFX8-NEXT: s_sext_i32_i16 s2, s2
; GFX8-NEXT: s_sext_i32_i16 s3, s3
@@ -510,29 +509,29 @@ define i32 @v_ssubsat_v4i8(i32 %lhs.arg, i32 %rhs.arg) {
; GFX6-NEXT: v_lshrrev_b32_e32 v6, 16, v1
; GFX6-NEXT: v_lshrrev_b32_e32 v7, 24, v1
; GFX6-NEXT: v_lshlrev_b32_e32 v1, 24, v1
-; GFX6-NEXT: v_add_i32_e32 v8, vcc, 0x80000001, v8
+; GFX6-NEXT: v_subrev_i32_e32 v8, vcc, 0x7fffffff, v8
; GFX6-NEXT: v_min_i32_e32 v10, -1, v0
; GFX6-NEXT: v_bfrev_b32_e32 v11, 1
-; GFX6-NEXT: v_add_i32_e32 v10, vcc, v10, v11
+; GFX6-NEXT: v_sub_i32_e32 v10, vcc, v10, v11
; GFX6-NEXT: v_max_i32_e32 v1, v8, v1
; GFX6-NEXT: v_min_i32_e32 v1, v1, v10
; GFX6-NEXT: v_sub_i32_e32 v0, vcc, v0, v1
; GFX6-NEXT: v_lshlrev_b32_e32 v1, 24, v2
-; GFX6-NEXT: v_mov_b32_e32 v9, 0x80000001
+; GFX6-NEXT: v_bfrev_b32_e32 v9, -2
; GFX6-NEXT: v_lshlrev_b32_e32 v2, 24, v5
; GFX6-NEXT: v_max_i32_e32 v5, -1, v1
-; GFX6-NEXT: v_add_i32_e32 v5, vcc, v5, v9
+; GFX6-NEXT: v_sub_i32_e32 v5, vcc, v5, v9
; GFX6-NEXT: v_min_i32_e32 v8, -1, v1
-; GFX6-NEXT: v_add_i32_e32 v8, vcc, v8, v11
+; GFX6-NEXT: v_sub_i32_e32 v8, vcc, v8, v11
; GFX6-NEXT: v_max_i32_e32 v2, v5, v2
; GFX6-NEXT: v_min_i32_e32 v2, v2, v8
; GFX6-NEXT: v_sub_i32_e32 v1, vcc, v1, v2
; GFX6-NEXT: v_lshlrev_b32_e32 v2, 24, v3
; GFX6-NEXT: v_max_i32_e32 v5, -1, v2
; GFX6-NEXT: v_lshlrev_b32_e32 v3, 24, v6
-; GFX6-NEXT: v_add_i32_e32 v5, vcc, v5, v9
+; GFX6-NEXT: v_sub_i32_e32 v5, vcc, v5, v9
; GFX6-NEXT: v_min_i32_e32 v6, -1, v2
-; GFX6-NEXT: v_add_i32_e32 v6, vcc, v6, v11
+; GFX6-NEXT: v_sub_i32_e32 v6, vcc, v6, v11
; GFX6-NEXT: v_max_i32_e32 v3, v5, v3
; GFX6-NEXT: v_min_i32_e32 v3, v3, v6
; GFX6-NEXT: v_sub_i32_e32 v2, vcc, v2, v3
@@ -540,10 +539,10 @@ define i32 @v_ssubsat_v4i8(i32 %lhs.arg, i32 %rhs.arg) {
; GFX6-NEXT: v_max_i32_e32 v5, -1, v3
; GFX6-NEXT: v_ashrrev_i32_e32 v1, 24, v1
; GFX6-NEXT: v_lshlrev_b32_e32 v4, 24, v7
-; GFX6-NEXT: v_add_i32_e32 v5, vcc, v5, v9
+; GFX6-NEXT: v_sub_i32_e32 v5, vcc, v5, v9
; GFX6-NEXT: v_min_i32_e32 v6, -1, v3
; GFX6-NEXT: v_ashrrev_i32_e32 v0, 24, v0
-; GFX6-NEXT: v_add_i32_e32 v6, vcc, v6, v11
+; GFX6-NEXT: v_sub_i32_e32 v6, vcc, v6, v11
; GFX6-NEXT: v_max_i32_e32 v4, v5, v4
; GFX6-NEXT: v_and_b32_e32 v1, 0xff, v1
; GFX6-NEXT: v_ashrrev_i32_e32 v2, 24, v2
@@ -574,34 +573,34 @@ define i32 @v_ssubsat_v4i8(i32 %lhs.arg, i32 %rhs.arg) {
; GFX8-NEXT: v_lshrrev_b32_e32 v6, 16, v1
; GFX8-NEXT: v_lshrrev_b32_e32 v7, 24, v1
; GFX8-NEXT: v_lshlrev_b16_e32 v1, 8, v1
-; GFX8-NEXT: v_add_u16_e32 v8, 0x8001, v8
+; GFX8-NEXT: v_subrev_u16_e32 v8, 0x7fff, v8
; GFX8-NEXT: v_min_i16_e32 v9, -1, v0
-; GFX8-NEXT: v_add_u16_e32 v9, 0x8000, v9
+; GFX8-NEXT: v_subrev_u16_e32 v9, 0x8000, v9
; GFX8-NEXT: v_max_i16_e32 v1, v8, v1
; GFX8-NEXT: v_min_i16_e32 v1, v1, v9
; GFX8-NEXT: v_sub_u16_e32 v0, v0, v1
; GFX8-NEXT: v_max_i16_e32 v1, -1, v3
-; GFX8-NEXT: v_add_u16_e32 v1, 0x8001, v1
+; GFX8-NEXT: v_subrev_u16_e32 v1, 0x7fff, v1
; GFX8-NEXT: v_min_i16_e32 v8, -1, v3
-; GFX8-NEXT: v_add_u16_e32 v8, 0x8000, v8
+; GFX8-NEXT: v_subrev_u16_e32 v8, 0x8000, v8
; GFX8-NEXT: v_max_i16_e32 v1, v1, v2
; GFX8-NEXT: v_lshlrev_b16_e32 v2, 8, v4
; GFX8-NEXT: v_min_i16_e32 v1, v1, v8
; GFX8-NEXT: v_max_i16_e32 v4, -1, v2
; GFX8-NEXT: v_sub_u16_e32 v1, v3, v1
; GFX8-NEXT: v_lshlrev_b16_e32 v3, 8, v6
-; GFX8-NEXT: v_add_u16_e32 v4, 0x8001, v4
+; GFX8-NEXT: v_subrev_u16_e32 v4, 0x7fff, v4
; GFX8-NEXT: v_min_i16_e32 v6, -1, v2
-; GFX8-NEXT: v_add_u16_e32 v6, 0x8000, v6
+; GFX8-NEXT: v_subrev_u16_e32 v6, 0x8000, v6
; GFX8-NEXT: v_max_i16_e32 v3, v4, v3
; GFX8-NEXT: v_min_i16_e32 v3, v3, v6
; GFX8-NEXT: v_sub_u16_e32 v2, v2, v3
; GFX8-NEXT: v_lshlrev_b16_e32 v3, 8, v5
; GFX8-NEXT: v_max_i16_e32 v5, -1, v3
; GFX8-NEXT: v_lshlrev_b16_e32 v4, 8, v7
-; GFX8-NEXT: v_add_u16_e32 v5, 0x8001, v5
+; GFX8-NEXT: v_subrev_u16_e32 v5, 0x7fff, v5
; GFX8-NEXT: v_min_i16_e32 v6, -1, v3
-; GFX8-NEXT: v_add_u16_e32 v6, 0x8000, v6
+; GFX8-NEXT: v_subrev_u16_e32 v6, 0x8000, v6
; GFX8-NEXT: v_max_i16_e32 v4, v5, v4
; GFX8-NEXT: v_min_i16_e32 v4, v4, v6
; GFX8-NEXT: v_sub_u16_e32 v3, v3, v4
@@ -728,27 +727,27 @@ define amdgpu_ps i32 @s_ssubsat_v4i8(i32 inreg %lhs.arg, i32 inreg %rhs.arg) {
; GFX6-NEXT: s_lshr_b32 s6, s1, 16
; GFX6-NEXT: s_lshr_b32 s7, s1, 24
; GFX6-NEXT: s_lshl_b32 s1, s1, 24
-; GFX6-NEXT: s_add_i32 s8, s8, 0x80000001
+; GFX6-NEXT: s_sub_i32 s8, s8, 0x7fffffff
; GFX6-NEXT: s_min_i32 s9, s0, -1
-; GFX6-NEXT: s_add_i32 s9, s9, 0x80000000
+; GFX6-NEXT: s_sub_i32 s9, s9, 0x80000000
; GFX6-NEXT: s_max_i32 s1, s8, s1
; GFX6-NEXT: s_min_i32 s1, s1, s9
; GFX6-NEXT: s_sub_i32 s0, s0, s1
; GFX6-NEXT: s_lshl_b32 s1, s2, 24
; GFX6-NEXT: s_lshl_b32 s2, s5, 24
; GFX6-NEXT: s_max_i32 s5, s1, -1
-; GFX6-NEXT: s_add_i32 s5, s5, 0x80000001
+; GFX6-NEXT: s_sub_i32 s5, s5, 0x7fffffff
; GFX6-NEXT: s_min_i32 s8, s1, -1
-; GFX6-NEXT: s_add_i32 s8, s8, 0x80000000
+; GFX6-NEXT: s_sub_i32 s8, s8, 0x80000000
; GFX6-NEXT: s_max_i32 s2, s5, s2
; GFX6-NEXT: s_min_i32 s2, s2, s8
; GFX6-NEXT: s_sub_i32 s1, s1, s2
; GFX6-NEXT: s_lshl_b32 s2, s3, 24
; GFX6-NEXT: s_max_i32 s5, s2, -1
; GFX6-NEXT: s_lshl_b32 s3, s6, 24
-; GFX6-NEXT: s_add_i32 s5, s5, 0x80000001
+; GFX6-NEXT: s_sub_i32 s5, s5, 0x7fffffff
; GFX6-NEXT: s_min_i32 s6, s2, -1
-; GFX6-NEXT: s_add_i32 s6, s6, 0x80000000
+; GFX6-NEXT: s_sub_i32 s6, s6, 0x80000000
; GFX6-NEXT: s_max_i32 s3, s5, s3
; GFX6-NEXT: s_min_i32 s3, s3, s6
; GFX6-NEXT: s_sub_i32 s2, s2, s3
@@ -756,10 +755,10 @@ define amdgpu_ps i32 @s_ssubsat_v4i8(i32 inreg %lhs.arg, i32 inreg %rhs.arg) {
; GFX6-NEXT: s_max_i32 s5, s3, -1
; GFX6-NEXT: s_ashr_i32 s1, s1, 24
; GFX6-NEXT: s_lshl_b32 s4, s7, 24
-; GFX6-NEXT: s_add_i32 s5, s5, 0x80000001
+; GFX6-NEXT: s_sub_i32 s5, s5, 0x7fffffff
; GFX6-NEXT: s_min_i32 s6, s3, -1
; GFX6-NEXT: s_ashr_i32 s0, s0, 24
-; GFX6-NEXT: s_add_i32 s6, s6, 0x80000000
+; GFX6-NEXT: s_sub_i32 s6, s6, 0x80000000
; GFX6-NEXT: s_max_i32 s4, s5, s4
; GFX6-NEXT: s_and_b32 s1, s1, 0xff
; GFX6-NEXT: s_ashr_i32 s2, s2, 24
@@ -790,11 +789,11 @@ define amdgpu_ps i32 @s_ssubsat_v4i8(i32 inreg %lhs.arg, i32 inreg %rhs.arg) {
; GFX8-NEXT: s_lshr_b32 s6, s1, 16
; GFX8-NEXT: s_lshr_b32 s7, s1, 24
; GFX8-NEXT: s_lshl_b32 s1, s1, 8
-; GFX8-NEXT: s_addk_i32 s10, 0x8001
+; GFX8-NEXT: s_sub_i32 s10, s10, 0x7fff
; GFX8-NEXT: s_min_i32 s8, s8, s9
; GFX8-NEXT: s_sext_i32_i16 s10, s10
; GFX8-NEXT: s_sext_i32_i16 s1, s1
-; GFX8-NEXT: s_addk_i32 s8, 0x8000
+; GFX8-NEXT: s_sub_i32 s8, s8, 0xffff8000
; GFX8-NEXT: s_max_i32 s1, s10, s1
; GFX8-NEXT: s_sext_i32_i16 s1, s1
; GFX8-NEXT: s_sext_i32_i16 s8, s8
@@ -804,11 +803,11 @@ define amdgpu_ps i32 @s_ssubsat_v4i8(i32 inreg %lhs.arg, i32 inreg %rhs.arg) {
; GFX8-NEXT: s_lshl_b32 s2, s5, 8
; GFX8-NEXT: s_sext_i32_i16 s5, s1
; GFX8-NEXT: s_max_i32 s8, s5, s9
-; GFX8-NEXT: s_addk_i32 s8, 0x8001
+; GFX8-NEXT: s_sub_i32 s8, s8, 0x7fff
; GFX8-NEXT: s_min_i32 s5, s5, s9
; GFX8-NEXT: s_sext_i32_i16 s8, s8
; GFX8-NEXT: s_sext_i32_i16 s2, s2
-; GFX8-NEXT: s_addk_i32 s5, 0x8000
+; GFX8-NEXT: s_sub_i32 s5, s5, 0xffff8000
; GFX8-NEXT: s_max_i32 s2, s8, s2
; GFX8-NEXT: s_sext_i32_i16 s2, s2
; GFX8-NEXT: s_sext_i32_i16 s5, s5
@@ -818,11 +817,11 @@ define amdgpu_ps i32 @s_ssubsat_v4i8(i32 inreg %lhs.arg, i32 inreg %rhs.arg) {
; GFX8-NEXT: s_sext_i32_i16 s5, s2
; GFX8-NEXT: s_lshl_b32 s3, s6, 8
; GFX8-NEXT: s_max_i32 s6, s5, s9
-; GFX8-NEXT: s_addk_i32 s6, 0x8001
+; GFX8-NEXT: s_sub_i32 s6, s6, 0x7fff
; GFX8-NEXT: s_min_i32 s5, s5, s9
; GFX8-NEXT: s_sext_i32_i16 s6, s6
; GFX8-NEXT: s_sext_i32_i16 s3, s3
-; GFX8-NEXT: s_addk_i32 s5, 0x8000
+; GFX8-NEXT: s_sub_i32 s5, s5, 0xffff8000
; GFX8-NEXT: s_max_i32 s3, s6, s3
; GFX8-NEXT: s_sext_i32_i16 s3, s3
; GFX8-NEXT: s_sext_i32_i16 s5, s5
@@ -832,12 +831,12 @@ define amdgpu_ps i32 @s_ssubsat_v4i8(i32 inreg %lhs.arg, i32 inreg %rhs.arg) {
; GFX8-NEXT: s_sext_i32_i16 s5, s3
; GFX8-NEXT: s_max_i32 s6, s5, s9
; GFX8-NEXT: s_lshl_b32 s4, s7, 8
-; GFX8-NEXT: s_addk_i32 s6, 0x8001
+; GFX8-NEXT: s_sub_i32 s6, s6, 0x7fff
; GFX8-NEXT: s_min_i32 s5, s5, s9
; GFX8-NEXT: s_sext_i32_i16 s6, s6
; GFX8-NEXT: s_sext_i32_i16 s4, s4
; GFX8-NEXT: s_sext_i32_i16 s1, s1
-; GFX8-NEXT: s_addk_i32 s5, 0x8000
+; GFX8-NEXT: s_sub_i32 s5, s5, 0xffff8000
; GFX8-NEXT: s_max_i32 s4, s6, s4
; GFX8-NEXT: s_sext_i32_i16 s0, s0
; GFX8-NEXT: s_ashr_i32 s1, s1, 8
@@ -1005,9 +1004,9 @@ define i24 @v_ssubsat_i24(i24 %lhs, i24 %rhs) {
; GFX6-NEXT: v_lshlrev_b32_e32 v0, 8, v0
; GFX6-NEXT: v_max_i32_e32 v2, -1, v0
; GFX6-NEXT: v_lshlrev_b32_e32 v1, 8, v1
-; GFX6-NEXT: v_add_i32_e32 v2, vcc, 0x80000001, v2
+; GFX6-NEXT: v_subrev_i32_e32 v2, vcc, 0x7fffffff, v2
; GFX6-NEXT: v_min_i32_e32 v3, -1, v0
-; GFX6-NEXT: v_add_i32_e32 v3, vcc, 0x80000000, v3
+; GFX6-NEXT: v_subrev_i32_e32 v3, vcc, 0x80000000, v3
; GFX6-NEXT: v_max_i32_e32 v1, v2, v1
; GFX6-NEXT: v_min_i32_e32 v1, v1, v3
; GFX6-NEXT: v_sub_i32_e32 v0, vcc, v0, v1
@@ -1056,9 +1055,9 @@ define amdgpu_ps i24 @s_ssubsat_i24(i24 inreg %lhs, i24 inreg %rhs) {
; GFX6-NEXT: s_lshl_b32 s0, s0, 8
; GFX6-NEXT: s_max_i32 s2, s0, -1
; GFX6-NEXT: s_lshl_b32 s1, s1, 8
-; GFX6-NEXT: s_add_i32 s2, s2, 0x80000001
+; GFX6-NEXT: s_sub_i32 s2, s2, 0x7fffffff
; GFX6-NEXT: s_min_i32 s3, s0, -1
-; GFX6-NEXT: s_add_i32 s3, s3, 0x80000000
+; GFX6-NEXT: s_sub_i32 s3, s3, 0x80000000
; GFX6-NEXT: s_max_i32 s1, s2, s1
; GFX6-NEXT: s_min_i32 s1, s1, s3
; GFX6-NEXT: s_sub_i32 s0, s0, s1
@@ -1110,9 +1109,9 @@ define i32 @v_ssubsat_i32(i32 %lhs, i32 %rhs) {
; GFX6: ; %bb.0:
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX6-NEXT: v_max_i32_e32 v2, -1, v0
-; GFX6-NEXT: v_add_i32_e32 v2, vcc, 0x80000001, v2
+; GFX6-NEXT: v_subrev_i32_e32 v2, vcc, 0x7fffffff, v2
; GFX6-NEXT: v_min_i32_e32 v3, -1, v0
-; GFX6-NEXT: v_add_i32_e32 v3, vcc, 0x80000000, v3
+; GFX6-NEXT: v_subrev_i32_e32 v3, vcc, 0x80000000, v3
; GFX6-NEXT: v_max_i32_e32 v1, v2, v1
; GFX6-NEXT: v_min_i32_e32 v1, v1, v3
; GFX6-NEXT: v_sub_i32_e32 v0, vcc, v0, v1
@@ -1122,9 +1121,9 @@ define i32 @v_ssubsat_i32(i32 %lhs, i32 %rhs) {
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_max_i32_e32 v2, -1, v0
-; GFX8-NEXT: v_add_u32_e32 v2, vcc, 0x80000001, v2
+; GFX8-NEXT: v_subrev_u32_e32 v2, vcc, 0x7fffffff, v2
; GFX8-NEXT: v_min_i32_e32 v3, -1, v0
-; GFX8-NEXT: v_add_u32_e32 v3, vcc, 0x80000000, v3
+; GFX8-NEXT: v_subrev_u32_e32 v3, vcc, 0x80000000, v3
; GFX8-NEXT: v_max_i32_e32 v1, v2, v1
; GFX8-NEXT: v_min_i32_e32 v1, v1, v3
; GFX8-NEXT: v_sub_u32_e32 v0, vcc, v0, v1
@@ -1149,9 +1148,9 @@ define amdgpu_ps i32 @s_ssubsat_i32(i32 inreg %lhs, i32 inreg %rhs) {
; GFX6-LABEL: s_ssubsat_i32:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_max_i32 s2, s0, -1
-; GFX6-NEXT: s_add_i32 s2, s2, 0x80000001
+; GFX6-NEXT: s_sub_i32 s2, s2, 0x7fffffff
; GFX6-NEXT: s_min_i32 s3, s0, -1
-; GFX6-NEXT: s_add_i32 s3, s3, 0x80000000
+; GFX6-NEXT: s_sub_i32 s3, s3, 0x80000000
; GFX6-NEXT: s_max_i32 s1, s2, s1
; GFX6-NEXT: s_min_i32 s1, s1, s3
; GFX6-NEXT: s_sub_i32 s0, s0, s1
@@ -1160,9 +1159,9 @@ define amdgpu_ps i32 @s_ssubsat_i32(i32 inreg %lhs, i32 inreg %rhs) {
; GFX8-LABEL: s_ssubsat_i32:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_max_i32 s2, s0, -1
-; GFX8-NEXT: s_add_i32 s2, s2, 0x80000001
+; GFX8-NEXT: s_sub_i32 s2, s2, 0x7fffffff
; GFX8-NEXT: s_min_i32 s3, s0, -1
-; GFX8-NEXT: s_add_i32 s3, s3, 0x80000000
+; GFX8-NEXT: s_sub_i32 s3, s3, 0x80000000
; GFX8-NEXT: s_max_i32 s1, s2, s1
; GFX8-NEXT: s_min_i32 s1, s1, s3
; GFX8-NEXT: s_sub_i32 s0, s0, s1
@@ -1188,9 +1187,9 @@ define amdgpu_ps float @ssubsat_i32_sv(i32 inreg %lhs, i32 %rhs) {
; GFX6-LABEL: ssubsat_i32_sv:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_max_i32 s1, s0, -1
-; GFX6-NEXT: s_add_i32 s1, s1, 0x80000001
+; GFX6-NEXT: s_sub_i32 s1, s1, 0x7fffffff
; GFX6-NEXT: s_min_i32 s2, s0, -1
-; GFX6-NEXT: s_add_i32 s2, s2, 0x80000000
+; GFX6-NEXT: s_sub_i32 s2, s2, 0x80000000
; GFX6-NEXT: v_max_i32_e32 v0, s1, v0
; GFX6-NEXT: v_min_i32_e32 v0, s2, v0
; GFX6-NEXT: v_sub_i32_e32 v0, vcc, s0, v0
@@ -1199,9 +1198,9 @@ define amdgpu_ps float @ssubsat_i32_sv(i32 inreg %lhs, i32 %rhs) {
; GFX8-LABEL: ssubsat_i32_sv:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_max_i32 s1, s0, -1
-; GFX8-NEXT: s_add_i32 s1, s1, 0x80000001
+; GFX8-NEXT: s_sub_i32 s1, s1, 0x7fffffff
; GFX8-NEXT: s_min_i32 s2, s0, -1
-; GFX8-NEXT: s_add_i32 s2, s2, 0x80000000
+; GFX8-NEXT: s_sub_i32 s2, s2, 0x80000000
; GFX8-NEXT: v_max_i32_e32 v0, s1, v0
; GFX8-NEXT: v_min_i32_e32 v0, s2, v0
; GFX8-NEXT: v_sub_u32_e32 v0, vcc, s0, v0
@@ -1225,9 +1224,9 @@ define amdgpu_ps float @ssubsat_i32_vs(i32 %lhs, i32 inreg %rhs) {
; GFX6-LABEL: ssubsat_i32_vs:
; GFX6: ; %bb.0:
; GFX6-NEXT: v_max_i32_e32 v1, -1, v0
-; GFX6-NEXT: v_add_i32_e32 v1, vcc, 0x80000001, v1
+; GFX6-NEXT: v_subrev_i32_e32 v1, vcc, 0x7fffffff, v1
; GFX6-NEXT: v_min_i32_e32 v2, -1, v0
-; GFX6-NEXT: v_add_i32_e32 v2, vcc, 0x80000000, v2
+; GFX6-NEXT: v_subrev_i32_e32 v2, vcc, 0x80000000, v2
; GFX6-NEXT: v_max_i32_e32 v1, s0, v1
; GFX6-NEXT: v_min_i32_e32 v1, v1, v2
; GFX6-NEXT: v_sub_i32_e32 v0, vcc, v0, v1
@@ -1236,9 +1235,9 @@ define amdgpu_ps float @ssubsat_i32_vs(i32 %lhs, i32 inreg %rhs) {
; GFX8-LABEL: ssubsat_i32_vs:
; GFX8: ; %bb.0:
; GFX8-NEXT: v_max_i32_e32 v1, -1, v0
-; GFX8-NEXT: v_add_u32_e32 v1, vcc, 0x80000001, v1
+; GFX8-NEXT: v_subrev_u32_e32 v1, vcc, 0x7fffffff, v1
; GFX8-NEXT: v_min_i32_e32 v2, -1, v0
-; GFX8-NEXT: v_add_u32_e32 v2, vcc, 0x80000000, v2
+; GFX8-NEXT: v_subrev_u32_e32 v2, vcc, 0x80000000, v2
; GFX8-NEXT: v_max_i32_e32 v1, s0, v1
; GFX8-NEXT: v_min_i32_e32 v1, v1, v2
; GFX8-NEXT: v_sub_u32_e32 v0, vcc, v0, v1
@@ -1263,16 +1262,16 @@ define <2 x i32> @v_ssubsat_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
; GFX6: ; %bb.0:
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX6-NEXT: v_max_i32_e32 v4, -1, v0
-; GFX6-NEXT: v_add_i32_e32 v4, vcc, 0x80000001, v4
+; GFX6-NEXT: v_subrev_i32_e32 v4, vcc, 0x7fffffff, v4
; GFX6-NEXT: v_min_i32_e32 v5, -1, v0
-; GFX6-NEXT: v_add_i32_e32 v5, vcc, 0x80000000, v5
+; GFX6-NEXT: v_subrev_i32_e32 v5, vcc, 0x80000000, v5
; GFX6-NEXT: v_max_i32_e32 v2, v4, v2
; GFX6-NEXT: v_min_i32_e32 v2, v2, v5
; GFX6-NEXT: v_sub_i32_e32 v0, vcc, v0, v2
; GFX6-NEXT: v_max_i32_e32 v2, -1, v1
-; GFX6-NEXT: v_add_i32_e32 v2, vcc, 0x80000001, v2
+; GFX6-NEXT: v_subrev_i32_e32 v2, vcc, 0x7fffffff, v2
; GFX6-NEXT: v_min_i32_e32 v4, -1, v1
-; GFX6-NEXT: v_add_i32_e32 v4, vcc, 0x80000000, v4
+; GFX6-NEXT: v_subrev_i32_e32 v4, vcc, 0x80000000, v4
; GFX6-NEXT: v_max_i32_e32 v2, v2, v3
; GFX6-NEXT: v_min_i32_e32 v2, v2, v4
; GFX6-NEXT: v_sub_i32_e32 v1, vcc, v1, v2
@@ -1282,16 +1281,16 @@ define <2 x i32> @v_ssubsat_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_max_i32_e32 v4, -1, v0
-; GFX8-NEXT: v_add_u32_e32 v4, vcc, 0x80000001, v4
+; GFX8-NEXT: v_subrev_u32_e32 v4, vcc, 0x7fffffff, v4
; GFX8-NEXT: v_min_i32_e32 v5, -1, v0
-; GFX8-NEXT: v_add_u32_e32 v5, vcc, 0x80000000, v5
+; GFX8-NEXT: v_subrev_u32_e32 v5, vcc, 0x80000000, v5
; GFX8-NEXT: v_max_i32_e32 v2, v4, v2
; GFX8-NEXT: v_min_i32_e32 v2, v2, v5
; GFX8-NEXT: v_sub_u32_e32 v0, vcc, v0, v2
; GFX8-NEXT: v_max_i32_e32 v2, -1, v1
-; GFX8-NEXT: v_add_u32_e32 v2, vcc, 0x80000001, v2
+; GFX8-NEXT: v_subrev_u32_e32 v2, vcc, 0x7fffffff, v2
; GFX8-NEXT: v_min_i32_e32 v4, -1, v1
-; GFX8-NEXT: v_add_u32_e32 v4, vcc, 0x80000000, v4
+; GFX8-NEXT: v_subrev_u32_e32 v4, vcc, 0x80000000, v4
; GFX8-NEXT: v_max_i32_e32 v2, v2, v3
; GFX8-NEXT: v_min_i32_e32 v2, v2, v4
; GFX8-NEXT: v_sub_u32_e32 v1, vcc, v1, v2
@@ -1318,16 +1317,16 @@ define amdgpu_ps <2 x i32> @s_ssubsat_v2i32(<2 x i32> inreg %lhs, <2 x i32> inre
; GFX6-LABEL: s_ssubsat_v2i32:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_max_i32 s4, s0, -1
-; GFX6-NEXT: s_add_i32 s4, s4, 0x80000001
+; GFX6-NEXT: s_sub_i32 s4, s4, 0x7fffffff
; GFX6-NEXT: s_min_i32 s5, s0, -1
-; GFX6-NEXT: s_add_i32 s5, s5, 0x80000000
+; GFX6-NEXT: s_sub_i32 s5, s5, 0x80000000
; GFX6-NEXT: s_max_i32 s2, s4, s2
; GFX6-NEXT: s_min_i32 s2, s2, s5
; GFX6-NEXT: s_sub_i32 s0, s0, s2
; GFX6-NEXT: s_max_i32 s2, s1, -1
-; GFX6-NEXT: s_add_i32 s2, s2, 0x80000001
+; GFX6-NEXT: s_sub_i32 s2, s2, 0x7fffffff
; GFX6-NEXT: s_min_i32 s4, s1, -1
-; GFX6-NEXT: s_add_i32 s4, s4, 0x80000000
+; GFX6-NEXT: s_sub_i32 s4, s4, 0x80000000
; GFX6-NEXT: s_max_i32 s2, s2, s3
; GFX6-NEXT: s_min_i32 s2, s2, s4
; GFX6-NEXT: s_sub_i32 s1, s1, s2
@@ -1336,16 +1335,16 @@ define amdgpu_ps <2 x i32> @s_ssubsat_v2i32(<2 x i32> inreg %lhs, <2 x i32> inre
; GFX8-LABEL: s_ssubsat_v2i32:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_max_i32 s4, s0, -1
-; GFX8-NEXT: s_add_i32 s4, s4, 0x80000001
+; GFX8-NEXT: s_sub_i32 s4, s4, 0x7fffffff
; GFX8-NEXT: s_min_i32 s5, s0, -1
-; GFX8-NEXT: s_add_i32 s5, s5, 0x80000000
+; GFX8-NEXT: s_sub_i32 s5, s5, 0x80000000
; GFX8-NEXT: s_max_i32 s2, s4, s2
; GFX8-NEXT: s_min_i32 s2, s2, s5
; GFX8-NEXT: s_sub_i32 s0, s0, s2
; GFX8-NEXT: s_max_i32 s2, s1, -1
-; GFX8-NEXT: s_add_i32 s2, s2, 0x80000001
+; GFX8-NEXT: s_sub_i32 s2, s2, 0x7fffffff
; GFX8-NEXT: s_min_i32 s4, s1, -1
-; GFX8-NEXT: s_add_i32 s4, s4, 0x80000000
+; GFX8-NEXT: s_sub_i32 s4, s4, 0x80000000
; GFX8-NEXT: s_max_i32 s2, s2, s3
; GFX8-NEXT: s_min_i32 s2, s2, s4
; GFX8-NEXT: s_sub_i32 s1, s1, s2
@@ -1377,25 +1376,24 @@ define <3 x i32> @v_ssubsat_v3i32(<3 x i32> %lhs, <3 x i32> %rhs) {
; GFX6: ; %bb.0:
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX6-NEXT: v_max_i32_e32 v6, -1, v0
-; GFX6-NEXT: v_add_i32_e32 v6, vcc, 0x80000001, v6
+; GFX6-NEXT: v_subrev_i32_e32 v6, vcc, 0x7fffffff, v6
; GFX6-NEXT: v_min_i32_e32 v8, -1, v0
-; GFX6-NEXT: v_bfrev_b32_e32 v9, 1
-; GFX6-NEXT: v_add_i32_e32 v8, vcc, v8, v9
+; GFX6-NEXT: v_subrev_i32_e32 v8, vcc, 0x80000000, v8
; GFX6-NEXT: v_max_i32_e32 v3, v6, v3
; GFX6-NEXT: v_min_i32_e32 v3, v3, v8
-; GFX6-NEXT: v_mov_b32_e32 v7, 0x80000001
+; GFX6-NEXT: v_bfrev_b32_e32 v7, -2
; GFX6-NEXT: v_sub_i32_e32 v0, vcc, v0, v3
; GFX6-NEXT: v_max_i32_e32 v3, -1, v1
-; GFX6-NEXT: v_add_i32_e32 v3, vcc, v3, v7
+; GFX6-NEXT: v_sub_i32_e32 v3, vcc, v3, v7
; GFX6-NEXT: v_min_i32_e32 v6, -1, v1
-; GFX6-NEXT: v_add_i32_e32 v6, vcc, v6, v9
+; GFX6-NEXT: v_subrev_i32_e32 v6, vcc, 0x80000000, v6
; GFX6-NEXT: v_max_i32_e32 v3, v3, v4
; GFX6-NEXT: v_min_i32_e32 v3, v3, v6
; GFX6-NEXT: v_sub_i32_e32 v1, vcc, v1, v3
; GFX6-NEXT: v_max_i32_e32 v3, -1, v2
-; GFX6-NEXT: v_add_i32_e32 v3, vcc, 0x80000001, v3
+; GFX6-NEXT: v_subrev_i32_e32 v3, vcc, 0x7fffffff, v3
; GFX6-NEXT: v_min_i32_e32 v4, -1, v2
-; GFX6-NEXT: v_add_i32_e32 v4, vcc, 0x80000000, v4
+; GFX6-NEXT: v_subrev_i32_e32 v4, vcc, 0x80000000, v4
; GFX6-NEXT: v_max_i32_e32 v3, v3, v5
; GFX6-NEXT: v_min_i32_e32 v3, v3, v4
; GFX6-NEXT: v_sub_i32_e32 v2, vcc, v2, v3
@@ -1405,25 +1403,24 @@ define <3 x i32> @v_ssubsat_v3i32(<3 x i32> %lhs, <3 x i32> %rhs) {
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_max_i32_e32 v6, -1, v0
-; GFX8-NEXT: v_add_u32_e32 v6, vcc, 0x80000001, v6
+; GFX8-NEXT: v_subrev_u32_e32 v6, vcc, 0x7fffffff, v6
; GFX8-NEXT: v_min_i32_e32 v8, -1, v0
-; GFX8-NEXT: v_bfrev_b32_e32 v9, 1
-; GFX8-NEXT: v_add_u32_e32 v8, vcc, v8, v9
+; GFX8-NEXT: v_subrev_u32_e32 v8, vcc, 0x80000000, v8
; GFX8-NEXT: v_max_i32_e32 v3, v6, v3
; GFX8-NEXT: v_min_i32_e32 v3, v3, v8
-; GFX8-NEXT: v_mov_b32_e32 v7, 0x80000001
+; GFX8-NEXT: v_bfrev_b32_e32 v7, -2
; GFX8-NEXT: v_sub_u32_e32 v0, vcc, v0, v3
; GFX8-NEXT: v_max_i32_e32 v3, -1, v1
-; GFX8-NEXT: v_add_u32_e32 v3, vcc, v3, v7
+; GFX8-NEXT: v_sub_u32_e32 v3, vcc, v3, v7
; GFX8-NEXT: v_min_i32_e32 v6, -1, v1
-; GFX8-NEXT: v_add_u32_e32 v6, vcc, v6, v9
+; GFX8-NEXT: v_subrev_u32_e32 v6, vcc, 0x80000000, v6
; GFX8-NEXT: v_max_i32_e32 v3, v3, v4
; GFX8-NEXT: v_min_i32_e32 v3, v3, v6
; GFX8-NEXT: v_sub_u32_e32 v1, vcc, v1, v3
; GFX8-NEXT: v_max_i32_e32 v3, -1, v2
-; GFX8-NEXT: v_add_u32_e32 v3, vcc, 0x80000001, v3
+; GFX8-NEXT: v_subrev_u32_e32 v3, vcc, 0x7fffffff, v3
; GFX8-NEXT: v_min_i32_e32 v4, -1, v2
-; GFX8-NEXT: v_add_u32_e32 v4, vcc, 0x80000000, v4
+; GFX8-NEXT: v_subrev_u32_e32 v4, vcc, 0x80000000, v4
; GFX8-NEXT: v_max_i32_e32 v3, v3, v5
; GFX8-NEXT: v_min_i32_e32 v3, v3, v4
; GFX8-NEXT: v_sub_u32_e32 v2, vcc, v2, v3
@@ -1452,23 +1449,23 @@ define amdgpu_ps <3 x i32> @s_ssubsat_v3i32(<3 x i32> inreg %lhs, <3 x i32> inre
; GFX6-LABEL: s_ssubsat_v3i32:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_max_i32 s6, s0, -1
-; GFX6-NEXT: s_add_i32 s6, s6, 0x80000001
+; GFX6-NEXT: s_sub_i32 s6, s6, 0x7fffffff
; GFX6-NEXT: s_min_i32 s7, s0, -1
-; GFX6-NEXT: s_add_i32 s7, s7, 0x80000000
+; GFX6-NEXT: s_sub_i32 s7, s7, 0x80000000
; GFX6-NEXT: s_max_i32 s3, s6, s3
; GFX6-NEXT: s_min_i32 s3, s3, s7
; GFX6-NEXT: s_sub_i32 s0, s0, s3
; GFX6-NEXT: s_max_i32 s3, s1, -1
-; GFX6-NEXT: s_add_i32 s3, s3, 0x80000001
+; GFX6-NEXT: s_sub_i32 s3, s3, 0x7fffffff
; GFX6-NEXT: s_min_i32 s6, s1, -1
-; GFX6-NEXT: s_add_i32 s6, s6, 0x80000000
+; GFX6-NEXT: s_sub_i32 s6, s6, 0x80000000
; GFX6-NEXT: s_max_i32 s3, s3, s4
; GFX6-NEXT: s_min_i32 s3, s3, s6
; GFX6-NEXT: s_sub_i32 s1, s1, s3
; GFX6-NEXT: s_max_i32 s3, s2, -1
-; GFX6-NEXT: s_add_i32 s3, s3, 0x80000001
+; GFX6-NEXT: s_sub_i32 s3, s3, 0x7fffffff
; GFX6-NEXT: s_min_i32 s4, s2, -1
-; GFX6-NEXT: s_add_i32 s4, s4, 0x80000000
+; GFX6-NEXT: s_sub_i32 s4, s4, 0x80000000
; GFX6-NEXT: s_max_i32 s3, s3, s5
; GFX6-NEXT: s_min_i32 s3, s3, s4
; GFX6-NEXT: s_sub_i32 s2, s2, s3
@@ -1477,23 +1474,23 @@ define amdgpu_ps <3 x i32> @s_ssubsat_v3i32(<3 x i32> inreg %lhs, <3 x i32> inre
; GFX8-LABEL: s_ssubsat_v3i32:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_max_i32 s6, s0, -1
-; GFX8-NEXT: s_add_i32 s6, s6, 0x80000001
+; GFX8-NEXT: s_sub_i32 s6, s6, 0x7fffffff
; GFX8-NEXT: s_min_i32 s7, s0, -1
-; GFX8-NEXT: s_add_i32 s7, s7, 0x80000000
+; GFX8-NEXT: s_sub_i32 s7, s7, 0x80000000
; GFX8-NEXT: s_max_i32 s3, s6, s3
; GFX8-NEXT: s_min_i32 s3, s3, s7
; GFX8-NEXT: s_sub_i32 s0, s0, s3
; GFX8-NEXT: s_max_i32 s3, s1, -1
-; GFX8-NEXT: s_add_i32 s3, s3, 0x80000001
+; GFX8-NEXT: s_sub_i32 s3, s3, 0x7fffffff
; GFX8-NEXT: s_min_i32 s6, s1, -1
-; GFX8-NEXT: s_add_i32 s6, s6, 0x80000000
+; GFX8-NEXT: s_sub_i32 s6, s6, 0x80000000
; GFX8-NEXT: s_max_i32 s3, s3, s4
; GFX8-NEXT: s_min_i32 s3, s3, s6
; GFX8-NEXT: s_sub_i32 s1, s1, s3
; GFX8-NEXT: s_max_i32 s3, s2, -1
-; GFX8-NEXT: s_add_i32 s3, s3, 0x80000001
+; GFX8-NEXT: s_sub_i32 s3, s3, 0x7fffffff
; GFX8-NEXT: s_min_i32 s4, s2, -1
-; GFX8-NEXT: s_add_i32 s4, s4, 0x80000000
+; GFX8-NEXT: s_sub_i32 s4, s4, 0x80000000
; GFX8-NEXT: s_max_i32 s3, s3, s5
; GFX8-NEXT: s_min_i32 s3, s3, s4
; GFX8-NEXT: s_sub_i32 s2, s2, s3
@@ -1530,32 +1527,32 @@ define <4 x i32> @v_ssubsat_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
; GFX6: ; %bb.0:
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX6-NEXT: v_max_i32_e32 v8, -1, v0
-; GFX6-NEXT: v_add_i32_e32 v8, vcc, 0x80000001, v8
+; GFX6-NEXT: v_subrev_i32_e32 v8, vcc, 0x7fffffff, v8
; GFX6-NEXT: v_min_i32_e32 v10, -1, v0
; GFX6-NEXT: v_bfrev_b32_e32 v11, 1
-; GFX6-NEXT: v_add_i32_e32 v10, vcc, v10, v11
+; GFX6-NEXT: v_sub_i32_e32 v10, vcc, v10, v11
; GFX6-NEXT: v_max_i32_e32 v4, v8, v4
; GFX6-NEXT: v_min_i32_e32 v4, v4, v10
-; GFX6-NEXT: v_mov_b32_e32 v9, 0x80000001
+; GFX6-NEXT: v_bfrev_b32_e32 v9, -2
; GFX6-NEXT: v_sub_i32_e32 v0, vcc, v0, v4
; GFX6-NEXT: v_max_i32_e32 v4, -1, v1
-; GFX6-NEXT: v_add_i32_e32 v4, vcc, v4, v9
+; GFX6-NEXT: v_sub_i32_e32 v4, vcc, v4, v9
; GFX6-NEXT: v_min_i32_e32 v8, -1, v1
-; GFX6-NEXT: v_add_i32_e32 v8, vcc, v8, v11
+; GFX6-NEXT: v_subrev_i32_e32 v8, vcc, 0x80000000, v8
; GFX6-NEXT: v_max_i32_e32 v4, v4, v5
; GFX6-NEXT: v_min_i32_e32 v4, v4, v8
; GFX6-NEXT: v_sub_i32_e32 v1, vcc, v1, v4
; GFX6-NEXT: v_max_i32_e32 v4, -1, v2
-; GFX6-NEXT: v_add_i32_e32 v4, vcc, v4, v9
+; GFX6-NEXT: v_sub_i32_e32 v4, vcc, v4, v9
; GFX6-NEXT: v_min_i32_e32 v5, -1, v2
-; GFX6-NEXT: v_add_i32_e32 v5, vcc, v5, v11
+; GFX6-NEXT: v_subrev_i32_e32 v5, vcc, 0x80000000, v5
; GFX6-NEXT: v_max_i32_e32 v4, v4, v6
; GFX6-NEXT: v_min_i32_e32 v4, v4, v5
; GFX6-NEXT: v_sub_i32_e32 v2, vcc, v2, v4
; GFX6-NEXT: v_max_i32_e32 v4, -1, v3
-; GFX6-NEXT: v_add_i32_e32 v4, vcc, 0x80000001, v4
+; GFX6-NEXT: v_subrev_i32_e32 v4, vcc, 0x7fffffff, v4
; GFX6-NEXT: v_min_i32_e32 v5, -1, v3
-; GFX6-NEXT: v_add_i32_e32 v5, vcc, 0x80000000, v5
+; GFX6-NEXT: v_subrev_i32_e32 v5, vcc, 0x80000000, v5
; GFX6-NEXT: v_max_i32_e32 v4, v4, v7
; GFX6-NEXT: v_min_i32_e32 v4, v4, v5
; GFX6-NEXT: v_sub_i32_e32 v3, vcc, v3, v4
@@ -1565,32 +1562,32 @@ define <4 x i32> @v_ssubsat_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_max_i32_e32 v8, -1, v0
-; GFX8-NEXT: v_add_u32_e32 v8, vcc, 0x80000001, v8
+; GFX8-NEXT: v_subrev_u32_e32 v8, vcc, 0x7fffffff, v8
; GFX8-NEXT: v_min_i32_e32 v10, -1, v0
; GFX8-NEXT: v_bfrev_b32_e32 v11, 1
-; GFX8-NEXT: v_add_u32_e32 v10, vcc, v10, v11
+; GFX8-NEXT: v_sub_u32_e32 v10, vcc, v10, v11
; GFX8-NEXT: v_max_i32_e32 v4, v8, v4
; GFX8-NEXT: v_min_i32_e32 v4, v4, v10
-; GFX8-NEXT: v_mov_b32_e32 v9, 0x80000001
+; GFX8-NEXT: v_bfrev_b32_e32 v9, -2
; GFX8-NEXT: v_sub_u32_e32 v0, vcc, v0, v4
; GFX8-NEXT: v_max_i32_e32 v4, -1, v1
-; GFX8-NEXT: v_add_u32_e32 v4, vcc, v4, v9
+; GFX8-NEXT: v_sub_u32_e32 v4, vcc, v4, v9
; GFX8-NEXT: v_min_i32_e32 v8, -1, v1
-; GFX8-NEXT: v_add_u32_e32 v8, vcc, v8, v11
+; GFX8-NEXT: v_subrev_u32_e32 v8, vcc, 0x80000000, v8
; GFX8-NEXT: v_max_i32_e32 v4, v4, v5
; GFX8-NEXT: v_min_i32_e32 v4, v4, v8
; GFX8-NEXT: v_sub_u32_e32 v1, vcc, v1, v4
; GFX8-NEXT: v_max_i32_e32 v4, -1, v2
-; GFX8-NEXT: v_add_u32_e32 v4, vcc, v4, v9
+; GFX8-NEXT: v_sub_u32_e32 v4, vcc, v4, v9
; GFX8-NEXT: v_min_i32_e32 v5, -1, v2
-; GFX8-NEXT: v_add_u32_e32 v5, vcc, v5, v11
+; GFX8-NEXT: v_subrev_u32_e32 v5, vcc, 0x80000000, v5
; GFX8-NEXT: v_max_i32_e32 v4, v4, v6
; GFX8-NEXT: v_min_i32_e32 v4, v4, v5
; GFX8-NEXT: v_sub_u32_e32 v2, vcc, v2, v4
; GFX8-NEXT: v_max_i32_e32 v4, -1, v3
-; GFX8-NEXT: v_add_u32_e32 v4, vcc, 0x80000001, v4
+; GFX8-NEXT: v_subrev_u32_e32 v4, vcc, 0x7fffffff, v4
; GFX8-NEXT: v_min_i32_e32 v5, -1, v3
-; GFX8-NEXT: v_add_u32_e32 v5, vcc, 0x80000000, v5
+; GFX8-NEXT: v_subrev_u32_e32 v5, vcc, 0x80000000, v5
; GFX8-NEXT: v_max_i32_e32 v4, v4, v7
; GFX8-NEXT: v_min_i32_e32 v4, v4, v5
; GFX8-NEXT: v_sub_u32_e32 v3, vcc, v3, v4
@@ -1621,30 +1618,30 @@ define amdgpu_ps <4 x i32> @s_ssubsat_v4i32(<4 x i32> inreg %lhs, <4 x i32> inre
; GFX6-LABEL: s_ssubsat_v4i32:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_max_i32 s8, s0, -1
-; GFX6-NEXT: s_add_i32 s8, s8, 0x80000001
+; GFX6-NEXT: s_sub_i32 s8, s8, 0x7fffffff
; GFX6-NEXT: s_min_i32 s9, s0, -1
-; GFX6-NEXT: s_add_i32 s9, s9, 0x80000000
+; GFX6-NEXT: s_sub_i32 s9, s9, 0x80000000
; GFX6-NEXT: s_max_i32 s4, s8, s4
; GFX6-NEXT: s_min_i32 s4, s4, s9
; GFX6-NEXT: s_sub_i32 s0, s0, s4
; GFX6-NEXT: s_max_i32 s4, s1, -1
-; GFX6-NEXT: s_add_i32 s4, s4, 0x80000001
+; GFX6-NEXT: s_sub_i32 s4, s4, 0x7fffffff
; GFX6-NEXT: s_min_i32 s8, s1, -1
-; GFX6-NEXT: s_add_i32 s8, s8, 0x80000000
+; GFX6-NEXT: s_sub_i32 s8, s8, 0x80000000
; GFX6-NEXT: s_max_i32 s4, s4, s5
; GFX6-NEXT: s_min_i32 s4, s4, s8
; GFX6-NEXT: s_sub_i32 s1, s1, s4
; GFX6-NEXT: s_max_i32 s4, s2, -1
-; GFX6-NEXT: s_add_i32 s4, s4, 0x80000001
+; GFX6-NEXT: s_sub_i32 s4, s4, 0x7fffffff
; GFX6-NEXT: s_min_i32 s5, s2, -1
-; GFX6-NEXT: s_add_i32 s5, s5, 0x80000000
+; GFX6-NEXT: s_sub_i32 s5, s5, 0x80000000
; GFX6-NEXT: s_max_i32 s4, s4, s6
; GFX6-NEXT: s_min_i32 s4, s4, s5
; GFX6-NEXT: s_sub_i32 s2, s2, s4
; GFX6-NEXT: s_max_i32 s4, s3, -1
-; GFX6-NEXT: s_add_i32 s4, s4, 0x80000001
+; GFX6-NEXT: s_sub_i32 s4, s4, 0x7fffffff
; GFX6-NEXT: s_min_i32 s5, s3, -1
-; GFX6-NEXT: s_add_i32 s5, s5, 0x80000000
+; GFX6-NEXT: s_sub_i32 s5, s5, 0x80000000
; GFX6-NEXT: s_max_i32 s4, s4, s7
; GFX6-NEXT: s_min_i32 s4, s4, s5
; GFX6-NEXT: s_sub_i32 s3, s3, s4
@@ -1653,30 +1650,30 @@ define amdgpu_ps <4 x i32> @s_ssubsat_v4i32(<4 x i32> inreg %lhs, <4 x i32> inre
; GFX8-LABEL: s_ssubsat_v4i32:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_max_i32 s8, s0, -1
-; GFX8-NEXT: s_add_i32 s8, s8, 0x80000001
+; GFX8-NEXT: s_sub_i32 s8, s8, 0x7fffffff
; GFX8-NEXT: s_min_i32 s9, s0, -1
-; GFX8-NEXT: s_add_i32 s9, s9, 0x80000000
+; GFX8-NEXT: s_sub_i32 s9, s9, 0x80000000
; GFX8-NEXT: s_max_i32 s4, s8, s4
; GFX8-NEXT: s_min_i32 s4, s4, s9
; GFX8-NEXT: s_sub_i32 s0, s0, s4
; GFX8-NEXT: s_max_i32 s4, s1, -1
-; GFX8-NEXT: s_add_i32 s4, s4, 0x80000001
+; GFX8-NEXT: s_sub_i32 s4, s4, 0x7fffffff
; GFX8-NEXT: s_min_i32 s8, s1, -1
-; GFX8-NEXT: s_add_i32 s8, s8, 0x80000000
+; GFX8-NEXT: s_sub_i32 s8, s8, 0x80000000
; GFX8-NEXT: s_max_i32 s4, s4, s5
; GFX8-NEXT: s_min_i32 s4, s4, s8
; GFX8-NEXT: s_sub_i32 s1, s1, s4
; GFX8-NEXT: s_max_i32 s4, s2, -1
-; GFX8-NEXT: s_add_i32 s4, s4, 0x80000001
+; GFX8-NEXT: s_sub_i32 s4, s4, 0x7fffffff
; GFX8-NEXT: s_min_i32 s5, s2, -1
-; GFX8-NEXT: s_add_i32 s5, s5, 0x80000000
+; GFX8-NEXT: s_sub_i32 s5, s5, 0x80000000
; GFX8-NEXT: s_max_i32 s4, s4, s6
; GFX8-NEXT: s_min_i32 s4, s4, s5
; GFX8-NEXT: s_sub_i32 s2, s2, s4
; GFX8-NEXT: s_max_i32 s4, s3, -1
-; GFX8-NEXT: s_add_i32 s4, s4, 0x80000001
+; GFX8-NEXT: s_sub_i32 s4, s4, 0x7fffffff
; GFX8-NEXT: s_min_i32 s5, s3, -1
-; GFX8-NEXT: s_add_i32 s5, s5, 0x80000000
+; GFX8-NEXT: s_sub_i32 s5, s5, 0x80000000
; GFX8-NEXT: s_max_i32 s4, s4, s7
; GFX8-NEXT: s_min_i32 s4, s4, s5
; GFX8-NEXT: s_sub_i32 s3, s3, s4
@@ -1718,39 +1715,39 @@ define <5 x i32> @v_ssubsat_v5i32(<5 x i32> %lhs, <5 x i32> %rhs) {
; GFX6: ; %bb.0:
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX6-NEXT: v_max_i32_e32 v10, -1, v0
-; GFX6-NEXT: v_add_i32_e32 v10, vcc, 0x80000001, v10
+; GFX6-NEXT: v_subrev_i32_e32 v10, vcc, 0x7fffffff, v10
; GFX6-NEXT: v_min_i32_e32 v12, -1, v0
; GFX6-NEXT: v_bfrev_b32_e32 v13, 1
-; GFX6-NEXT: v_add_i32_e32 v12, vcc, v12, v13
+; GFX6-NEXT: v_sub_i32_e32 v12, vcc, v12, v13
; GFX6-NEXT: v_max_i32_e32 v5, v10, v5
; GFX6-NEXT: v_min_i32_e32 v5, v5, v12
-; GFX6-NEXT: v_mov_b32_e32 v11, 0x80000001
+; GFX6-NEXT: v_bfrev_b32_e32 v11, -2
; GFX6-NEXT: v_sub_i32_e32 v0, vcc, v0, v5
; GFX6-NEXT: v_max_i32_e32 v5, -1, v1
-; GFX6-NEXT: v_add_i32_e32 v5, vcc, v5, v11
+; GFX6-NEXT: v_sub_i32_e32 v5, vcc, v5, v11
; GFX6-NEXT: v_min_i32_e32 v10, -1, v1
-; GFX6-NEXT: v_add_i32_e32 v10, vcc, v10, v13
+; GFX6-NEXT: v_sub_i32_e32 v10, vcc, v10, v13
; GFX6-NEXT: v_max_i32_e32 v5, v5, v6
; GFX6-NEXT: v_min_i32_e32 v5, v5, v10
; GFX6-NEXT: v_sub_i32_e32 v1, vcc, v1, v5
; GFX6-NEXT: v_max_i32_e32 v5, -1, v2
-; GFX6-NEXT: v_add_i32_e32 v5, vcc, v5, v11
+; GFX6-NEXT: v_sub_i32_e32 v5, vcc, v5, v11
; GFX6-NEXT: v_min_i32_e32 v6, -1, v2
-; GFX6-NEXT: v_add_i32_e32 v6, vcc, v6, v13
+; GFX6-NEXT: v_subrev_i32_e32 v6, vcc, 0x80000000, v6
; GFX6-NEXT: v_max_i32_e32 v5, v5, v7
; GFX6-NEXT: v_min_i32_e32 v5, v5, v6
; GFX6-NEXT: v_sub_i32_e32 v2, vcc, v2, v5
; GFX6-NEXT: v_max_i32_e32 v5, -1, v3
-; GFX6-NEXT: v_add_i32_e32 v5, vcc, v5, v11
+; GFX6-NEXT: v_sub_i32_e32 v5, vcc, v5, v11
; GFX6-NEXT: v_min_i32_e32 v6, -1, v3
-; GFX6-NEXT: v_add_i32_e32 v6, vcc, v6, v13
+; GFX6-NEXT: v_subrev_i32_e32 v6, vcc, 0x80000000, v6
; GFX6-NEXT: v_max_i32_e32 v5, v5, v8
; GFX6-NEXT: v_min_i32_e32 v5, v5, v6
; GFX6-NEXT: v_sub_i32_e32 v3, vcc, v3, v5
; GFX6-NEXT: v_max_i32_e32 v5, -1, v4
-; GFX6-NEXT: v_add_i32_e32 v5, vcc, 0x80000001, v5
+; GFX6-NEXT: v_subrev_i32_e32 v5, vcc, 0x7fffffff, v5
; GFX6-NEXT: v_min_i32_e32 v6, -1, v4
-; GFX6-NEXT: v_add_i32_e32 v6, vcc, 0x80000000, v6
+; GFX6-NEXT: v_subrev_i32_e32 v6, vcc, 0x80000000, v6
; GFX6-NEXT: v_max_i32_e32 v5, v5, v9
; GFX6-NEXT: v_min_i32_e32 v5, v5, v6
; GFX6-NEXT: v_sub_i32_e32 v4, vcc, v4, v5
@@ -1760,39 +1757,39 @@ define <5 x i32> @v_ssubsat_v5i32(<5 x i32> %lhs, <5 x i32> %rhs) {
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_max_i32_e32 v10, -1, v0
-; GFX8-NEXT: v_add_u32_e32 v10, vcc, 0x80000001, v10
+; GFX8-NEXT: v_subrev_u32_e32 v10, vcc, 0x7fffffff, v10
; GFX8-NEXT: v_min_i32_e32 v12, -1, v0
; GFX8-NEXT: v_bfrev_b32_e32 v13, 1
-; GFX8-NEXT: v_add_u32_e32 v12, vcc, v12, v13
+; GFX8-NEXT: v_sub_u32_e32 v12, vcc, v12, v13
; GFX8-NEXT: v_max_i32_e32 v5, v10, v5
; GFX8-NEXT: v_min_i32_e32 v5, v5, v12
-; GFX8-NEXT: v_mov_b32_e32 v11, 0x80000001
+; GFX8-NEXT: v_bfrev_b32_e32 v11, -2
; GFX8-NEXT: v_sub_u32_e32 v0, vcc, v0, v5
; GFX8-NEXT: v_max_i32_e32 v5, -1, v1
-; GFX8-NEXT: v_add_u32_e32 v5, vcc, v5, v11
+; GFX8-NEXT: v_sub_u32_e32 v5, vcc, v5, v11
; GFX8-NEXT: v_min_i32_e32 v10, -1, v1
-; GFX8-NEXT: v_add_u32_e32 v10, vcc, v10, v13
+; GFX8-NEXT: v_sub_u32_e32 v10, vcc, v10, v13
; GFX8-NEXT: v_max_i32_e32 v5, v5, v6
; GFX8-NEXT: v_min_i32_e32 v5, v5, v10
; GFX8-NEXT: v_sub_u32_e32 v1, vcc, v1, v5
; GFX8-NEXT: v_max_i32_e32 v5, -1, v2
-; GFX8-NEXT: v_add_u32_e32 v5, vcc, v5, v11
+; GFX8-NEXT: v_sub_u32_e32 v5, vcc, v5, v11
; GFX8-NEXT: v_min_i32_e32 v6, -1, v2
-; GFX8-NEXT: v_add_u32_e32 v6, vcc, v6, v13
+; GFX8-NEXT: v_subrev_u32_e32 v6, vcc, 0x80000000, v6
; GFX8-NEXT: v_max_i32_e32 v5, v5, v7
; GFX8-NEXT: v_min_i32_e32 v5, v5, v6
; GFX8-NEXT: v_sub_u32_e32 v2, vcc, v2, v5
; GFX8-NEXT: v_max_i32_e32 v5, -1, v3
-; GFX8-NEXT: v_add_u32_e32 v5, vcc, v5, v11
+; GFX8-NEXT: v_sub_u32_e32 v5, vcc, v5, v11
; GFX8-NEXT: v_min_i32_e32 v6, -1, v3
-; GFX8-NEXT: v_add_u32_e32 v6, vcc, v6, v13
+; GFX8-NEXT: v_subrev_u32_e32 v6, vcc, 0x80000000, v6
; GFX8-NEXT: v_max_i32_e32 v5, v5, v8
; GFX8-NEXT: v_min_i32_e32 v5, v5, v6
; GFX8-NEXT: v_sub_u32_e32 v3, vcc, v3, v5
; GFX8-NEXT: v_max_i32_e32 v5, -1, v4
-; GFX8-NEXT: v_add_u32_e32 v5, vcc, 0x80000001, v5
+; GFX8-NEXT: v_subrev_u32_e32 v5, vcc, 0x7fffffff, v5
; GFX8-NEXT: v_min_i32_e32 v6, -1, v4
-; GFX8-NEXT: v_add_u32_e32 v6, vcc, 0x80000000, v6
+; GFX8-NEXT: v_subrev_u32_e32 v6, vcc, 0x80000000, v6
; GFX8-NEXT: v_max_i32_e32 v5, v5, v9
; GFX8-NEXT: v_min_i32_e32 v5, v5, v6
; GFX8-NEXT: v_sub_u32_e32 v4, vcc, v4, v5
@@ -1825,37 +1822,37 @@ define amdgpu_ps <5 x i32> @s_ssubsat_v5i32(<5 x i32> inreg %lhs, <5 x i32> inre
; GFX6-LABEL: s_ssubsat_v5i32:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_max_i32 s10, s0, -1
-; GFX6-NEXT: s_add_i32 s10, s10, 0x80000001
+; GFX6-NEXT: s_sub_i32 s10, s10, 0x7fffffff
; GFX6-NEXT: s_min_i32 s11, s0, -1
-; GFX6-NEXT: s_add_i32 s11, s11, 0x80000000
+; GFX6-NEXT: s_sub_i32 s11, s11, 0x80000000
; GFX6-NEXT: s_max_i32 s5, s10, s5
; GFX6-NEXT: s_min_i32 s5, s5, s11
; GFX6-NEXT: s_sub_i32 s0, s0, s5
; GFX6-NEXT: s_max_i32 s5, s1, -1
-; GFX6-NEXT: s_add_i32 s5, s5, 0x80000001
+; GFX6-NEXT: s_sub_i32 s5, s5, 0x7fffffff
; GFX6-NEXT: s_min_i32 s10, s1, -1
-; GFX6-NEXT: s_add_i32 s10, s10, 0x80000000
+; GFX6-NEXT: s_sub_i32 s10, s10, 0x80000000
; GFX6-NEXT: s_max_i32 s5, s5, s6
; GFX6-NEXT: s_min_i32 s5, s5, s10
; GFX6-NEXT: s_sub_i32 s1, s1, s5
; GFX6-NEXT: s_max_i32 s5, s2, -1
-; GFX6-NEXT: s_add_i32 s5, s5, 0x80000001
+; GFX6-NEXT: s_sub_i32 s5, s5, 0x7fffffff
; GFX6-NEXT: s_min_i32 s6, s2, -1
-; GFX6-NEXT: s_add_i32 s6, s6, 0x80000000
+; GFX6-NEXT: s_sub_i32 s6, s6, 0x80000000
; GFX6-NEXT: s_max_i32 s5, s5, s7
; GFX6-NEXT: s_min_i32 s5, s5, s6
; GFX6-NEXT: s_sub_i32 s2, s2, s5
; GFX6-NEXT: s_max_i32 s5, s3, -1
-; GFX6-NEXT: s_add_i32 s5, s5, 0x80000001
+; GFX6-NEXT: s_sub_i32 s5, s5, 0x7fffffff
; GFX6-NEXT: s_min_i32 s6, s3, -1
-; GFX6-NEXT: s_add_i32 s6, s6, 0x80000000
+; GFX6-NEXT: s_sub_i32 s6, s6, 0x80000000
; GFX6-NEXT: s_max_i32 s5, s5, s8
; GFX6-NEXT: s_min_i32 s5, s5, s6
; GFX6-NEXT: s_sub_i32 s3, s3, s5
; GFX6-NEXT: s_max_i32 s5, s4, -1
-; GFX6-NEXT: s_add_i32 s5, s5, 0x80000001
+; GFX6-NEXT: s_sub_i32 s5, s5, 0x7fffffff
; GFX6-NEXT: s_min_i32 s6, s4, -1
-; GFX6-NEXT: s_add_i32 s6, s6, 0x80000000
+; GFX6-NEXT: s_sub_i32 s6, s6, 0x80000000
; GFX6-NEXT: s_max_i32 s5, s5, s9
; GFX6-NEXT: s_min_i32 s5, s5, s6
; GFX6-NEXT: s_sub_i32 s4, s4, s5
@@ -1864,37 +1861,37 @@ define amdgpu_ps <5 x i32> @s_ssubsat_v5i32(<5 x i32> inreg %lhs, <5 x i32> inre
; GFX8-LABEL: s_ssubsat_v5i32:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_max_i32 s10, s0, -1
-; GFX8-NEXT: s_add_i32 s10, s10, 0x80000001
+; GFX8-NEXT: s_sub_i32 s10, s10, 0x7fffffff
; GFX8-NEXT: s_min_i32 s11, s0, -1
-; GFX8-NEXT: s_add_i32 s11, s11, 0x80000000
+; GFX8-NEXT: s_sub_i32 s11, s11, 0x80000000
; GFX8-NEXT: s_max_i32 s5, s10, s5
; GFX8-NEXT: s_min_i32 s5, s5, s11
; GFX8-NEXT: s_sub_i32 s0, s0, s5
; GFX8-NEXT: s_max_i32 s5, s1, -1
-; GFX8-NEXT: s_add_i32 s5, s5, 0x80000001
+; GFX8-NEXT: s_sub_i32 s5, s5, 0x7fffffff
; GFX8-NEXT: s_min_i32 s10, s1, -1
-; GFX8-NEXT: s_add_i32 s10, s10, 0x80000000
+; GFX8-NEXT: s_sub_i32 s10, s10, 0x80000000
; GFX8-NEXT: s_max_i32 s5, s5, s6
; GFX8-NEXT: s_min_i32 s5, s5, s10
; GFX8-NEXT: s_sub_i32 s1, s1, s5
; GFX8-NEXT: s_max_i32 s5, s2, -1
-; GFX8-NEXT: s_add_i32 s5, s5, 0x80000001
+; GFX8-NEXT: s_sub_i32 s5, s5, 0x7fffffff
; GFX8-NEXT: s_min_i32 s6, s2, -1
-; GFX8-NEXT: s_add_i32 s6, s6, 0x80000000
+; GFX8-NEXT: s_sub_i32 s6, s6, 0x80000000
; GFX8-NEXT: s_max_i32 s5, s5, s7
; GFX8-NEXT: s_min_i32 s5, s5, s6
; GFX8-NEXT: s_sub_i32 s2, s2, s5
; GFX8-NEXT: s_max_i32 s5, s3, -1
-; GFX8-NEXT: s_add_i32 s5, s5, 0x80000001
+; GFX8-NEXT: s_sub_i32 s5, s5, 0x7fffffff
; GFX8-NEXT: s_min_i32 s6, s3, -1
-; GFX8-NEXT: s_add_i32 s6, s6, 0x80000000
+; GFX8-NEXT: s_sub_i32 s6, s6, 0x80000000
; GFX8-NEXT: s_max_i32 s5, s5, s8
; GFX8-NEXT: s_min_i32 s5, s5, s6
; GFX8-NEXT: s_sub_i32 s3, s3, s5
; GFX8-NEXT: s_max_i32 s5, s4, -1
-; GFX8-NEXT: s_add_i32 s5, s5, 0x80000001
+; GFX8-NEXT: s_sub_i32 s5, s5, 0x7fffffff
; GFX8-NEXT: s_min_i32 s6, s4, -1
-; GFX8-NEXT: s_add_i32 s6, s6, 0x80000000
+; GFX8-NEXT: s_sub_i32 s6, s6, 0x80000000
; GFX8-NEXT: s_max_i32 s5, s5, s9
; GFX8-NEXT: s_min_i32 s5, s5, s6
; GFX8-NEXT: s_sub_i32 s4, s4, s5
@@ -1941,117 +1938,117 @@ define <16 x i32> @v_ssubsat_v16i32(<16 x i32> %lhs, <16 x i32> %rhs) {
; GFX6: ; %bb.0:
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX6-NEXT: v_max_i32_e32 v32, -1, v0
-; GFX6-NEXT: v_mov_b32_e32 v31, 0x80000001
-; GFX6-NEXT: v_add_i32_e32 v32, vcc, v32, v31
+; GFX6-NEXT: v_bfrev_b32_e32 v31, -2
+; GFX6-NEXT: v_sub_i32_e32 v32, vcc, v32, v31
; GFX6-NEXT: v_max_i32_e32 v32, v32, v16
; GFX6-NEXT: v_min_i32_e32 v33, -1, v0
; GFX6-NEXT: v_bfrev_b32_e32 v16, 1
-; GFX6-NEXT: v_add_i32_e32 v33, vcc, v33, v16
+; GFX6-NEXT: v_sub_i32_e32 v33, vcc, v33, v16
; GFX6-NEXT: v_min_i32_e32 v32, v32, v33
; GFX6-NEXT: v_sub_i32_e32 v0, vcc, v0, v32
; GFX6-NEXT: v_max_i32_e32 v32, -1, v1
-; GFX6-NEXT: v_add_i32_e32 v32, vcc, v32, v31
+; GFX6-NEXT: v_sub_i32_e32 v32, vcc, v32, v31
; GFX6-NEXT: v_max_i32_e32 v17, v32, v17
; GFX6-NEXT: v_min_i32_e32 v32, -1, v1
-; GFX6-NEXT: v_add_i32_e32 v32, vcc, v32, v16
+; GFX6-NEXT: v_sub_i32_e32 v32, vcc, v32, v16
; GFX6-NEXT: v_min_i32_e32 v17, v17, v32
; GFX6-NEXT: v_sub_i32_e32 v1, vcc, v1, v17
; GFX6-NEXT: v_max_i32_e32 v17, -1, v2
-; GFX6-NEXT: v_add_i32_e32 v17, vcc, v17, v31
+; GFX6-NEXT: v_sub_i32_e32 v17, vcc, v17, v31
; GFX6-NEXT: v_max_i32_e32 v17, v17, v18
; GFX6-NEXT: v_min_i32_e32 v18, -1, v2
-; GFX6-NEXT: v_add_i32_e32 v18, vcc, v18, v16
+; GFX6-NEXT: v_sub_i32_e32 v18, vcc, v18, v16
; GFX6-NEXT: v_min_i32_e32 v17, v17, v18
; GFX6-NEXT: v_sub_i32_e32 v2, vcc, v2, v17
; GFX6-NEXT: v_max_i32_e32 v17, -1, v3
-; GFX6-NEXT: v_add_i32_e32 v17, vcc, v17, v31
+; GFX6-NEXT: v_sub_i32_e32 v17, vcc, v17, v31
; GFX6-NEXT: v_min_i32_e32 v18, -1, v3
; GFX6-NEXT: v_max_i32_e32 v17, v17, v19
-; GFX6-NEXT: v_add_i32_e32 v18, vcc, v18, v16
+; GFX6-NEXT: v_sub_i32_e32 v18, vcc, v18, v16
; GFX6-NEXT: v_min_i32_e32 v17, v17, v18
; GFX6-NEXT: v_sub_i32_e32 v3, vcc, v3, v17
; GFX6-NEXT: v_max_i32_e32 v17, -1, v4
-; GFX6-NEXT: v_add_i32_e32 v17, vcc, v17, v31
+; GFX6-NEXT: v_sub_i32_e32 v17, vcc, v17, v31
; GFX6-NEXT: v_min_i32_e32 v18, -1, v4
; GFX6-NEXT: v_max_i32_e32 v17, v17, v20
-; GFX6-NEXT: v_add_i32_e32 v18, vcc, v18, v16
+; GFX6-NEXT: v_sub_i32_e32 v18, vcc, v18, v16
; GFX6-NEXT: v_min_i32_e32 v17, v17, v18
; GFX6-NEXT: v_sub_i32_e32 v4, vcc, v4, v17
; GFX6-NEXT: v_max_i32_e32 v17, -1, v5
-; GFX6-NEXT: v_add_i32_e32 v17, vcc, v17, v31
+; GFX6-NEXT: v_sub_i32_e32 v17, vcc, v17, v31
; GFX6-NEXT: v_min_i32_e32 v18, -1, v5
; GFX6-NEXT: v_max_i32_e32 v17, v17, v21
-; GFX6-NEXT: v_add_i32_e32 v18, vcc, v18, v16
+; GFX6-NEXT: v_sub_i32_e32 v18, vcc, v18, v16
; GFX6-NEXT: v_min_i32_e32 v17, v17, v18
; GFX6-NEXT: v_sub_i32_e32 v5, vcc, v5, v17
; GFX6-NEXT: v_max_i32_e32 v17, -1, v6
-; GFX6-NEXT: v_add_i32_e32 v17, vcc, v17, v31
+; GFX6-NEXT: v_sub_i32_e32 v17, vcc, v17, v31
; GFX6-NEXT: v_min_i32_e32 v18, -1, v6
; GFX6-NEXT: v_max_i32_e32 v17, v17, v22
-; GFX6-NEXT: v_add_i32_e32 v18, vcc, v18, v16
+; GFX6-NEXT: v_sub_i32_e32 v18, vcc, v18, v16
; GFX6-NEXT: v_min_i32_e32 v17, v17, v18
; GFX6-NEXT: buffer_load_dword v18, off, s[0:3], s32
; GFX6-NEXT: v_sub_i32_e32 v6, vcc, v6, v17
; GFX6-NEXT: v_max_i32_e32 v17, -1, v7
-; GFX6-NEXT: v_add_i32_e32 v17, vcc, v17, v31
+; GFX6-NEXT: v_sub_i32_e32 v17, vcc, v17, v31
; GFX6-NEXT: v_min_i32_e32 v19, -1, v7
; GFX6-NEXT: v_max_i32_e32 v17, v17, v23
-; GFX6-NEXT: v_add_i32_e32 v19, vcc, v19, v16
+; GFX6-NEXT: v_sub_i32_e32 v19, vcc, v19, v16
; GFX6-NEXT: v_min_i32_e32 v17, v17, v19
; GFX6-NEXT: v_sub_i32_e32 v7, vcc, v7, v17
; GFX6-NEXT: v_max_i32_e32 v17, -1, v8
-; GFX6-NEXT: v_add_i32_e32 v17, vcc, v17, v31
+; GFX6-NEXT: v_sub_i32_e32 v17, vcc, v17, v31
; GFX6-NEXT: v_min_i32_e32 v19, -1, v8
-; GFX6-NEXT: v_add_i32_e32 v19, vcc, v19, v16
+; GFX6-NEXT: v_sub_i32_e32 v19, vcc, v19, v16
; GFX6-NEXT: v_max_i32_e32 v17, v17, v24
; GFX6-NEXT: v_min_i32_e32 v17, v17, v19
; GFX6-NEXT: v_sub_i32_e32 v8, vcc, v8, v17
; GFX6-NEXT: v_max_i32_e32 v17, -1, v9
-; GFX6-NEXT: v_add_i32_e32 v17, vcc, v17, v31
+; GFX6-NEXT: v_sub_i32_e32 v17, vcc, v17, v31
; GFX6-NEXT: v_min_i32_e32 v19, -1, v9
-; GFX6-NEXT: v_add_i32_e32 v19, vcc, v19, v16
+; GFX6-NEXT: v_sub_i32_e32 v19, vcc, v19, v16
; GFX6-NEXT: v_max_i32_e32 v17, v17, v25
; GFX6-NEXT: v_min_i32_e32 v17, v17, v19
; GFX6-NEXT: v_sub_i32_e32 v9, vcc, v9, v17
; GFX6-NEXT: v_max_i32_e32 v17, -1, v10
-; GFX6-NEXT: v_add_i32_e32 v17, vcc, v17, v31
+; GFX6-NEXT: v_sub_i32_e32 v17, vcc, v17, v31
; GFX6-NEXT: v_min_i32_e32 v19, -1, v10
-; GFX6-NEXT: v_add_i32_e32 v19, vcc, v19, v16
+; GFX6-NEXT: v_sub_i32_e32 v19, vcc, v19, v16
; GFX6-NEXT: v_max_i32_e32 v17, v17, v26
; GFX6-NEXT: v_min_i32_e32 v17, v17, v19
; GFX6-NEXT: v_sub_i32_e32 v10, vcc, v10, v17
; GFX6-NEXT: v_max_i32_e32 v17, -1, v11
-; GFX6-NEXT: v_add_i32_e32 v17, vcc, v17, v31
+; GFX6-NEXT: v_sub_i32_e32 v17, vcc, v17, v31
; GFX6-NEXT: v_min_i32_e32 v19, -1, v11
-; GFX6-NEXT: v_add_i32_e32 v19, vcc, v19, v16
+; GFX6-NEXT: v_sub_i32_e32 v19, vcc, v19, v16
; GFX6-NEXT: v_max_i32_e32 v17, v17, v27
; GFX6-NEXT: v_min_i32_e32 v17, v17, v19
; GFX6-NEXT: v_sub_i32_e32 v11, vcc, v11, v17
; GFX6-NEXT: v_max_i32_e32 v17, -1, v12
-; GFX6-NEXT: v_add_i32_e32 v17, vcc, v17, v31
+; GFX6-NEXT: v_sub_i32_e32 v17, vcc, v17, v31
; GFX6-NEXT: v_min_i32_e32 v19, -1, v12
-; GFX6-NEXT: v_add_i32_e32 v19, vcc, v19, v16
+; GFX6-NEXT: v_sub_i32_e32 v19, vcc, v19, v16
; GFX6-NEXT: v_max_i32_e32 v17, v17, v28
; GFX6-NEXT: v_min_i32_e32 v17, v17, v19
; GFX6-NEXT: v_sub_i32_e32 v12, vcc, v12, v17
; GFX6-NEXT: v_max_i32_e32 v17, -1, v13
-; GFX6-NEXT: v_add_i32_e32 v17, vcc, v17, v31
+; GFX6-NEXT: v_sub_i32_e32 v17, vcc, v17, v31
; GFX6-NEXT: v_min_i32_e32 v19, -1, v13
-; GFX6-NEXT: v_add_i32_e32 v19, vcc, v19, v16
+; GFX6-NEXT: v_sub_i32_e32 v19, vcc, v19, v16
; GFX6-NEXT: v_max_i32_e32 v17, v17, v29
; GFX6-NEXT: v_min_i32_e32 v17, v17, v19
; GFX6-NEXT: v_sub_i32_e32 v13, vcc, v13, v17
; GFX6-NEXT: v_max_i32_e32 v17, -1, v14
-; GFX6-NEXT: v_add_i32_e32 v17, vcc, v17, v31
+; GFX6-NEXT: v_sub_i32_e32 v17, vcc, v17, v31
; GFX6-NEXT: v_min_i32_e32 v19, -1, v14
-; GFX6-NEXT: v_add_i32_e32 v19, vcc, v19, v16
+; GFX6-NEXT: v_sub_i32_e32 v19, vcc, v19, v16
; GFX6-NEXT: v_max_i32_e32 v17, v17, v30
; GFX6-NEXT: v_min_i32_e32 v17, v17, v19
; GFX6-NEXT: v_sub_i32_e32 v14, vcc, v14, v17
; GFX6-NEXT: v_max_i32_e32 v17, -1, v15
-; GFX6-NEXT: v_add_i32_e32 v17, vcc, v17, v31
+; GFX6-NEXT: v_sub_i32_e32 v17, vcc, v17, v31
; GFX6-NEXT: v_min_i32_e32 v19, -1, v15
-; GFX6-NEXT: v_add_i32_e32 v16, vcc, v19, v16
+; GFX6-NEXT: v_sub_i32_e32 v16, vcc, v19, v16
; GFX6-NEXT: s_waitcnt vmcnt(0)
; GFX6-NEXT: v_max_i32_e32 v17, v17, v18
; GFX6-NEXT: v_min_i32_e32 v16, v17, v16
@@ -2062,117 +2059,117 @@ define <16 x i32> @v_ssubsat_v16i32(<16 x i32> %lhs, <16 x i32> %rhs) {
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_max_i32_e32 v32, -1, v0
-; GFX8-NEXT: v_mov_b32_e32 v31, 0x80000001
-; GFX8-NEXT: v_add_u32_e32 v32, vcc, v32, v31
+; GFX8-NEXT: v_bfrev_b32_e32 v31, -2
+; GFX8-NEXT: v_sub_u32_e32 v32, vcc, v32, v31
; GFX8-NEXT: v_max_i32_e32 v32, v32, v16
; GFX8-NEXT: v_min_i32_e32 v33, -1, v0
; GFX8-NEXT: v_bfrev_b32_e32 v16, 1
-; GFX8-NEXT: v_add_u32_e32 v33, vcc, v33, v16
+; GFX8-NEXT: v_sub_u32_e32 v33, vcc, v33, v16
; GFX8-NEXT: v_min_i32_e32 v32, v32, v33
; GFX8-NEXT: v_sub_u32_e32 v0, vcc, v0, v32
; GFX8-NEXT: v_max_i32_e32 v32, -1, v1
-; GFX8-NEXT: v_add_u32_e32 v32, vcc, v32, v31
+; GFX8-NEXT: v_sub_u32_e32 v32, vcc, v32, v31
; GFX8-NEXT: v_max_i32_e32 v17, v32, v17
; GFX8-NEXT: v_min_i32_e32 v32, -1, v1
-; GFX8-NEXT: v_add_u32_e32 v32, vcc, v32, v16
+; GFX8-NEXT: v_sub_u32_e32 v32, vcc, v32, v16
; GFX8-NEXT: v_min_i32_e32 v17, v17, v32
; GFX8-NEXT: v_sub_u32_e32 v1, vcc, v1, v17
; GFX8-NEXT: v_max_i32_e32 v17, -1, v2
-; GFX8-NEXT: v_add_u32_e32 v17, vcc, v17, v31
+; GFX8-NEXT: v_sub_u32_e32 v17, vcc, v17, v31
; GFX8-NEXT: v_max_i32_e32 v17, v17, v18
; GFX8-NEXT: v_min_i32_e32 v18, -1, v2
-; GFX8-NEXT: v_add_u32_e32 v18, vcc, v18, v16
+; GFX8-NEXT: v_sub_u32_e32 v18, vcc, v18, v16
; GFX8-NEXT: v_min_i32_e32 v17, v17, v18
; GFX8-NEXT: v_sub_u32_e32 v2, vcc, v2, v17
; GFX8-NEXT: v_max_i32_e32 v17, -1, v3
-; GFX8-NEXT: v_add_u32_e32 v17, vcc, v17, v31
+; GFX8-NEXT: v_sub_u32_e32 v17, vcc, v17, v31
; GFX8-NEXT: v_min_i32_e32 v18, -1, v3
; GFX8-NEXT: v_max_i32_e32 v17, v17, v19
-; GFX8-NEXT: v_add_u32_e32 v18, vcc, v18, v16
+; GFX8-NEXT: v_sub_u32_e32 v18, vcc, v18, v16
; GFX8-NEXT: v_min_i32_e32 v17, v17, v18
; GFX8-NEXT: v_sub_u32_e32 v3, vcc, v3, v17
; GFX8-NEXT: v_max_i32_e32 v17, -1, v4
-; GFX8-NEXT: v_add_u32_e32 v17, vcc, v17, v31
+; GFX8-NEXT: v_sub_u32_e32 v17, vcc, v17, v31
; GFX8-NEXT: v_min_i32_e32 v18, -1, v4
; GFX8-NEXT: v_max_i32_e32 v17, v17, v20
-; GFX8-NEXT: v_add_u32_e32 v18, vcc, v18, v16
+; GFX8-NEXT: v_sub_u32_e32 v18, vcc, v18, v16
; GFX8-NEXT: v_min_i32_e32 v17, v17, v18
; GFX8-NEXT: v_sub_u32_e32 v4, vcc, v4, v17
; GFX8-NEXT: v_max_i32_e32 v17, -1, v5
-; GFX8-NEXT: v_add_u32_e32 v17, vcc, v17, v31
+; GFX8-NEXT: v_sub_u32_e32 v17, vcc, v17, v31
; GFX8-NEXT: v_min_i32_e32 v18, -1, v5
; GFX8-NEXT: v_max_i32_e32 v17, v17, v21
-; GFX8-NEXT: v_add_u32_e32 v18, vcc, v18, v16
+; GFX8-NEXT: v_sub_u32_e32 v18, vcc, v18, v16
; GFX8-NEXT: v_min_i32_e32 v17, v17, v18
; GFX8-NEXT: v_sub_u32_e32 v5, vcc, v5, v17
; GFX8-NEXT: v_max_i32_e32 v17, -1, v6
-; GFX8-NEXT: v_add_u32_e32 v17, vcc, v17, v31
+; GFX8-NEXT: v_sub_u32_e32 v17, vcc, v17, v31
; GFX8-NEXT: v_min_i32_e32 v18, -1, v6
; GFX8-NEXT: v_max_i32_e32 v17, v17, v22
-; GFX8-NEXT: v_add_u32_e32 v18, vcc, v18, v16
+; GFX8-NEXT: v_sub_u32_e32 v18, vcc, v18, v16
; GFX8-NEXT: v_min_i32_e32 v17, v17, v18
; GFX8-NEXT: buffer_load_dword v18, off, s[0:3], s32
; GFX8-NEXT: v_sub_u32_e32 v6, vcc, v6, v17
; GFX8-NEXT: v_max_i32_e32 v17, -1, v7
-; GFX8-NEXT: v_add_u32_e32 v17, vcc, v17, v31
+; GFX8-NEXT: v_sub_u32_e32 v17, vcc, v17, v31
; GFX8-NEXT: v_min_i32_e32 v19, -1, v7
; GFX8-NEXT: v_max_i32_e32 v17, v17, v23
-; GFX8-NEXT: v_add_u32_e32 v19, vcc, v19, v16
+; GFX8-NEXT: v_sub_u32_e32 v19, vcc, v19, v16
; GFX8-NEXT: v_min_i32_e32 v17, v17, v19
; GFX8-NEXT: v_sub_u32_e32 v7, vcc, v7, v17
; GFX8-NEXT: v_max_i32_e32 v17, -1, v8
-; GFX8-NEXT: v_add_u32_e32 v17, vcc, v17, v31
+; GFX8-NEXT: v_sub_u32_e32 v17, vcc, v17, v31
; GFX8-NEXT: v_min_i32_e32 v19, -1, v8
-; GFX8-NEXT: v_add_u32_e32 v19, vcc, v19, v16
+; GFX8-NEXT: v_sub_u32_e32 v19, vcc, v19, v16
; GFX8-NEXT: v_max_i32_e32 v17, v17, v24
; GFX8-NEXT: v_min_i32_e32 v17, v17, v19
; GFX8-NEXT: v_sub_u32_e32 v8, vcc, v8, v17
; GFX8-NEXT: v_max_i32_e32 v17, -1, v9
-; GFX8-NEXT: v_add_u32_e32 v17, vcc, v17, v31
+; GFX8-NEXT: v_sub_u32_e32 v17, vcc, v17, v31
; GFX8-NEXT: v_min_i32_e32 v19, -1, v9
-; GFX8-NEXT: v_add_u32_e32 v19, vcc, v19, v16
+; GFX8-NEXT: v_sub_u32_e32 v19, vcc, v19, v16
; GFX8-NEXT: v_max_i32_e32 v17, v17, v25
; GFX8-NEXT: v_min_i32_e32 v17, v17, v19
; GFX8-NEXT: v_sub_u32_e32 v9, vcc, v9, v17
; GFX8-NEXT: v_max_i32_e32 v17, -1, v10
-; GFX8-NEXT: v_add_u32_e32 v17, vcc, v17, v31
+; GFX8-NEXT: v_sub_u32_e32 v17, vcc, v17, v31
; GFX8-NEXT: v_min_i32_e32 v19, -1, v10
-; GFX8-NEXT: v_add_u32_e32 v19, vcc, v19, v16
+; GFX8-NEXT: v_sub_u32_e32 v19, vcc, v19, v16
; GFX8-NEXT: v_max_i32_e32 v17, v17, v26
; GFX8-NEXT: v_min_i32_e32 v17, v17, v19
; GFX8-NEXT: v_sub_u32_e32 v10, vcc, v10, v17
; GFX8-NEXT: v_max_i32_e32 v17, -1, v11
-; GFX8-NEXT: v_add_u32_e32 v17, vcc, v17, v31
+; GFX8-NEXT: v_sub_u32_e32 v17, vcc, v17, v31
; GFX8-NEXT: v_min_i32_e32 v19, -1, v11
-; GFX8-NEXT: v_add_u32_e32 v19, vcc, v19, v16
+; GFX8-NEXT: v_sub_u32_e32 v19, vcc, v19, v16
; GFX8-NEXT: v_max_i32_e32 v17, v17, v27
; GFX8-NEXT: v_min_i32_e32 v17, v17, v19
; GFX8-NEXT: v_sub_u32_e32 v11, vcc, v11, v17
; GFX8-NEXT: v_max_i32_e32 v17, -1, v12
-; GFX8-NEXT: v_add_u32_e32 v17, vcc, v17, v31
+; GFX8-NEXT: v_sub_u32_e32 v17, vcc, v17, v31
; GFX8-NEXT: v_min_i32_e32 v19, -1, v12
-; GFX8-NEXT: v_add_u32_e32 v19, vcc, v19, v16
+; GFX8-NEXT: v_sub_u32_e32 v19, vcc, v19, v16
; GFX8-NEXT: v_max_i32_e32 v17, v17, v28
; GFX8-NEXT: v_min_i32_e32 v17, v17, v19
; GFX8-NEXT: v_sub_u32_e32 v12, vcc, v12, v17
; GFX8-NEXT: v_max_i32_e32 v17, -1, v13
-; GFX8-NEXT: v_add_u32_e32 v17, vcc, v17, v31
+; GFX8-NEXT: v_sub_u32_e32 v17, vcc, v17, v31
; GFX8-NEXT: v_min_i32_e32 v19, -1, v13
-; GFX8-NEXT: v_add_u32_e32 v19, vcc, v19, v16
+; GFX8-NEXT: v_sub_u32_e32 v19, vcc, v19, v16
; GFX8-NEXT: v_max_i32_e32 v17, v17, v29
; GFX8-NEXT: v_min_i32_e32 v17, v17, v19
; GFX8-NEXT: v_sub_u32_e32 v13, vcc, v13, v17
; GFX8-NEXT: v_max_i32_e32 v17, -1, v14
-; GFX8-NEXT: v_add_u32_e32 v17, vcc, v17, v31
+; GFX8-NEXT: v_sub_u32_e32 v17, vcc, v17, v31
; GFX8-NEXT: v_min_i32_e32 v19, -1, v14
-; GFX8-NEXT: v_add_u32_e32 v19, vcc, v19, v16
+; GFX8-NEXT: v_sub_u32_e32 v19, vcc, v19, v16
; GFX8-NEXT: v_max_i32_e32 v17, v17, v30
; GFX8-NEXT: v_min_i32_e32 v17, v17, v19
; GFX8-NEXT: v_sub_u32_e32 v14, vcc, v14, v17
; GFX8-NEXT: v_max_i32_e32 v17, -1, v15
-; GFX8-NEXT: v_add_u32_e32 v17, vcc, v17, v31
+; GFX8-NEXT: v_sub_u32_e32 v17, vcc, v17, v31
; GFX8-NEXT: v_min_i32_e32 v19, -1, v15
-; GFX8-NEXT: v_add_u32_e32 v16, vcc, v19, v16
+; GFX8-NEXT: v_sub_u32_e32 v16, vcc, v19, v16
; GFX8-NEXT: s_waitcnt vmcnt(0)
; GFX8-NEXT: v_max_i32_e32 v17, v17, v18
; GFX8-NEXT: v_min_i32_e32 v16, v17, v16
@@ -2255,114 +2252,114 @@ define amdgpu_ps <16 x i32> @s_ssubsat_v16i32(<16 x i32> inreg %lhs, <16 x i32>
; GFX6-LABEL: s_ssubsat_v16i32:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_max_i32 s32, s0, -1
-; GFX6-NEXT: s_add_i32 s32, s32, 0x80000001
+; GFX6-NEXT: s_sub_i32 s32, s32, 0x7fffffff
; GFX6-NEXT: s_min_i32 s33, s0, -1
-; GFX6-NEXT: s_add_i32 s33, s33, 0x80000000
+; GFX6-NEXT: s_sub_i32 s33, s33, 0x80000000
; GFX6-NEXT: s_max_i32 s16, s32, s16
; GFX6-NEXT: s_min_i32 s16, s16, s33
; GFX6-NEXT: s_sub_i32 s0, s0, s16
; GFX6-NEXT: s_max_i32 s16, s1, -1
-; GFX6-NEXT: s_add_i32 s16, s16, 0x80000001
+; GFX6-NEXT: s_sub_i32 s16, s16, 0x7fffffff
; GFX6-NEXT: s_min_i32 s32, s1, -1
-; GFX6-NEXT: s_add_i32 s32, s32, 0x80000000
+; GFX6-NEXT: s_sub_i32 s32, s32, 0x80000000
; GFX6-NEXT: s_max_i32 s16, s16, s17
; GFX6-NEXT: s_min_i32 s16, s16, s32
; GFX6-NEXT: s_sub_i32 s1, s1, s16
; GFX6-NEXT: s_max_i32 s16, s2, -1
-; GFX6-NEXT: s_add_i32 s16, s16, 0x80000001
+; GFX6-NEXT: s_sub_i32 s16, s16, 0x7fffffff
; GFX6-NEXT: s_min_i32 s17, s2, -1
-; GFX6-NEXT: s_add_i32 s17, s17, 0x80000000
+; GFX6-NEXT: s_sub_i32 s17, s17, 0x80000000
; GFX6-NEXT: s_max_i32 s16, s16, s18
; GFX6-NEXT: s_min_i32 s16, s16, s17
; GFX6-NEXT: s_sub_i32 s2, s2, s16
; GFX6-NEXT: s_max_i32 s16, s3, -1
-; GFX6-NEXT: s_add_i32 s16, s16, 0x80000001
+; GFX6-NEXT: s_sub_i32 s16, s16, 0x7fffffff
; GFX6-NEXT: s_min_i32 s17, s3, -1
-; GFX6-NEXT: s_add_i32 s17, s17, 0x80000000
+; GFX6-NEXT: s_sub_i32 s17, s17, 0x80000000
; GFX6-NEXT: s_max_i32 s16, s16, s19
; GFX6-NEXT: s_min_i32 s16, s16, s17
; GFX6-NEXT: s_sub_i32 s3, s3, s16
; GFX6-NEXT: s_max_i32 s16, s4, -1
-; GFX6-NEXT: s_add_i32 s16, s16, 0x80000001
+; GFX6-NEXT: s_sub_i32 s16, s16, 0x7fffffff
; GFX6-NEXT: s_min_i32 s17, s4, -1
-; GFX6-NEXT: s_add_i32 s17, s17, 0x80000000
+; GFX6-NEXT: s_sub_i32 s17, s17, 0x80000000
; GFX6-NEXT: s_max_i32 s16, s16, s20
; GFX6-NEXT: s_min_i32 s16, s16, s17
; GFX6-NEXT: s_sub_i32 s4, s4, s16
; GFX6-NEXT: s_max_i32 s16, s5, -1
-; GFX6-NEXT: s_add_i32 s16, s16, 0x80000001
+; GFX6-NEXT: s_sub_i32 s16, s16, 0x7fffffff
; GFX6-NEXT: s_min_i32 s17, s5, -1
-; GFX6-NEXT: s_add_i32 s17, s17, 0x80000000
+; GFX6-NEXT: s_sub_i32 s17, s17, 0x80000000
; GFX6-NEXT: s_max_i32 s16, s16, s21
; GFX6-NEXT: s_min_i32 s16, s16, s17
; GFX6-NEXT: s_sub_i32 s5, s5, s16
; GFX6-NEXT: s_max_i32 s16, s6, -1
-; GFX6-NEXT: s_add_i32 s16, s16, 0x80000001
+; GFX6-NEXT: s_sub_i32 s16, s16, 0x7fffffff
; GFX6-NEXT: s_min_i32 s17, s6, -1
-; GFX6-NEXT: s_add_i32 s17, s17, 0x80000000
+; GFX6-NEXT: s_sub_i32 s17, s17, 0x80000000
; GFX6-NEXT: s_max_i32 s16, s16, s22
; GFX6-NEXT: s_min_i32 s16, s16, s17
; GFX6-NEXT: s_sub_i32 s6, s6, s16
; GFX6-NEXT: s_max_i32 s16, s7, -1
-; GFX6-NEXT: s_add_i32 s16, s16, 0x80000001
+; GFX6-NEXT: s_sub_i32 s16, s16, 0x7fffffff
; GFX6-NEXT: s_min_i32 s17, s7, -1
-; GFX6-NEXT: s_add_i32 s17, s17, 0x80000000
+; GFX6-NEXT: s_sub_i32 s17, s17, 0x80000000
; GFX6-NEXT: s_max_i32 s16, s16, s23
; GFX6-NEXT: s_min_i32 s16, s16, s17
; GFX6-NEXT: s_sub_i32 s7, s7, s16
; GFX6-NEXT: s_max_i32 s16, s8, -1
-; GFX6-NEXT: s_add_i32 s16, s16, 0x80000001
+; GFX6-NEXT: s_sub_i32 s16, s16, 0x7fffffff
; GFX6-NEXT: s_min_i32 s17, s8, -1
-; GFX6-NEXT: s_add_i32 s17, s17, 0x80000000
+; GFX6-NEXT: s_sub_i32 s17, s17, 0x80000000
; GFX6-NEXT: s_max_i32 s16, s16, s24
; GFX6-NEXT: s_min_i32 s16, s16, s17
; GFX6-NEXT: s_sub_i32 s8, s8, s16
; GFX6-NEXT: s_max_i32 s16, s9, -1
-; GFX6-NEXT: s_add_i32 s16, s16, 0x80000001
+; GFX6-NEXT: s_sub_i32 s16, s16, 0x7fffffff
; GFX6-NEXT: s_min_i32 s17, s9, -1
-; GFX6-NEXT: s_add_i32 s17, s17, 0x80000000
+; GFX6-NEXT: s_sub_i32 s17, s17, 0x80000000
; GFX6-NEXT: s_max_i32 s16, s16, s25
; GFX6-NEXT: s_min_i32 s16, s16, s17
; GFX6-NEXT: s_sub_i32 s9, s9, s16
; GFX6-NEXT: s_max_i32 s16, s10, -1
-; GFX6-NEXT: s_add_i32 s16, s16, 0x80000001
+; GFX6-NEXT: s_sub_i32 s16, s16, 0x7fffffff
; GFX6-NEXT: s_min_i32 s17, s10, -1
-; GFX6-NEXT: s_add_i32 s17, s17, 0x80000000
+; GFX6-NEXT: s_sub_i32 s17, s17, 0x80000000
; GFX6-NEXT: s_max_i32 s16, s16, s26
; GFX6-NEXT: s_min_i32 s16, s16, s17
; GFX6-NEXT: s_sub_i32 s10, s10, s16
; GFX6-NEXT: s_max_i32 s16, s11, -1
-; GFX6-NEXT: s_add_i32 s16, s16, 0x80000001
+; GFX6-NEXT: s_sub_i32 s16, s16, 0x7fffffff
; GFX6-NEXT: s_min_i32 s17, s11, -1
-; GFX6-NEXT: s_add_i32 s17, s17, 0x80000000
+; GFX6-NEXT: s_sub_i32 s17, s17, 0x80000000
; GFX6-NEXT: s_max_i32 s16, s16, s27
; GFX6-NEXT: s_min_i32 s16, s16, s17
; GFX6-NEXT: s_sub_i32 s11, s11, s16
; GFX6-NEXT: s_max_i32 s16, s12, -1
-; GFX6-NEXT: s_add_i32 s16, s16, 0x80000001
+; GFX6-NEXT: s_sub_i32 s16, s16, 0x7fffffff
; GFX6-NEXT: s_min_i32 s17, s12, -1
-; GFX6-NEXT: s_add_i32 s17, s17, 0x80000000
+; GFX6-NEXT: s_sub_i32 s17, s17, 0x80000000
; GFX6-NEXT: s_max_i32 s16, s16, s28
; GFX6-NEXT: s_min_i32 s16, s16, s17
; GFX6-NEXT: s_sub_i32 s12, s12, s16
; GFX6-NEXT: s_max_i32 s16, s13, -1
-; GFX6-NEXT: s_add_i32 s16, s16, 0x80000001
+; GFX6-NEXT: s_sub_i32 s16, s16, 0x7fffffff
; GFX6-NEXT: s_min_i32 s17, s13, -1
-; GFX6-NEXT: s_add_i32 s17, s17, 0x80000000
+; GFX6-NEXT: s_sub_i32 s17, s17, 0x80000000
; GFX6-NEXT: s_max_i32 s16, s16, s29
; GFX6-NEXT: s_min_i32 s16, s16, s17
; GFX6-NEXT: s_sub_i32 s13, s13, s16
; GFX6-NEXT: s_max_i32 s16, s14, -1
-; GFX6-NEXT: s_add_i32 s16, s16, 0x80000001
+; GFX6-NEXT: s_sub_i32 s16, s16, 0x7fffffff
; GFX6-NEXT: s_min_i32 s17, s14, -1
-; GFX6-NEXT: s_add_i32 s17, s17, 0x80000000
+; GFX6-NEXT: s_sub_i32 s17, s17, 0x80000000
; GFX6-NEXT: s_max_i32 s16, s16, s30
; GFX6-NEXT: s_min_i32 s16, s16, s17
; GFX6-NEXT: s_sub_i32 s14, s14, s16
; GFX6-NEXT: s_max_i32 s16, s15, -1
-; GFX6-NEXT: s_add_i32 s16, s16, 0x80000001
+; GFX6-NEXT: s_sub_i32 s16, s16, 0x7fffffff
; GFX6-NEXT: s_min_i32 s17, s15, -1
-; GFX6-NEXT: s_add_i32 s17, s17, 0x80000000
+; GFX6-NEXT: s_sub_i32 s17, s17, 0x80000000
; GFX6-NEXT: s_max_i32 s16, s16, s31
; GFX6-NEXT: s_min_i32 s16, s16, s17
; GFX6-NEXT: s_sub_i32 s15, s15, s16
@@ -2371,114 +2368,114 @@ define amdgpu_ps <16 x i32> @s_ssubsat_v16i32(<16 x i32> inreg %lhs, <16 x i32>
; GFX8-LABEL: s_ssubsat_v16i32:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_max_i32 s32, s0, -1
-; GFX8-NEXT: s_add_i32 s32, s32, 0x80000001
+; GFX8-NEXT: s_sub_i32 s32, s32, 0x7fffffff
; GFX8-NEXT: s_min_i32 s33, s0, -1
-; GFX8-NEXT: s_add_i32 s33, s33, 0x80000000
+; GFX8-NEXT: s_sub_i32 s33, s33, 0x80000000
; GFX8-NEXT: s_max_i32 s16, s32, s16
; GFX8-NEXT: s_min_i32 s16, s16, s33
; GFX8-NEXT: s_sub_i32 s0, s0, s16
; GFX8-NEXT: s_max_i32 s16, s1, -1
-; GFX8-NEXT: s_add_i32 s16, s16, 0x80000001
+; GFX8-NEXT: s_sub_i32 s16, s16, 0x7fffffff
; GFX8-NEXT: s_min_i32 s32, s1, -1
-; GFX8-NEXT: s_add_i32 s32, s32, 0x80000000
+; GFX8-NEXT: s_sub_i32 s32, s32, 0x80000000
; GFX8-NEXT: s_max_i32 s16, s16, s17
; GFX8-NEXT: s_min_i32 s16, s16, s32
; GFX8-NEXT: s_sub_i32 s1, s1, s16
; GFX8-NEXT: s_max_i32 s16, s2, -1
-; GFX8-NEXT: s_add_i32 s16, s16, 0x80000001
+; GFX8-NEXT: s_sub_i32 s16, s16, 0x7fffffff
; GFX8-NEXT: s_min_i32 s17, s2, -1
-; GFX8-NEXT: s_add_i32 s17, s17, 0x80000000
+; GFX8-NEXT: s_sub_i32 s17, s17, 0x80000000
; GFX8-NEXT: s_max_i32 s16, s16, s18
; GFX8-NEXT: s_min_i32 s16, s16, s17
; GFX8-NEXT: s_sub_i32 s2, s2, s16
; GFX8-NEXT: s_max_i32 s16, s3, -1
-; GFX8-NEXT: s_add_i32 s16, s16, 0x80000001
+; GFX8-NEXT: s_sub_i32 s16, s16, 0x7fffffff
; GFX8-NEXT: s_min_i32 s17, s3, -1
-; GFX8-NEXT: s_add_i32 s17, s17, 0x80000000
+; GFX8-NEXT: s_sub_i32 s17, s17, 0x80000000
; GFX8-NEXT: s_max_i32 s16, s16, s19
; GFX8-NEXT: s_min_i32 s16, s16, s17
; GFX8-NEXT: s_sub_i32 s3, s3, s16
; GFX8-NEXT: s_max_i32 s16, s4, -1
-; GFX8-NEXT: s_add_i32 s16, s16, 0x80000001
+; GFX8-NEXT: s_sub_i32 s16, s16, 0x7fffffff
; GFX8-NEXT: s_min_i32 s17, s4, -1
-; GFX8-NEXT: s_add_i32 s17, s17, 0x80000000
+; GFX8-NEXT: s_sub_i32 s17, s17, 0x80000000
; GFX8-NEXT: s_max_i32 s16, s16, s20
; GFX8-NEXT: s_min_i32 s16, s16, s17
; GFX8-NEXT: s_sub_i32 s4, s4, s16
; GFX8-NEXT: s_max_i32 s16, s5, -1
-; GFX8-NEXT: s_add_i32 s16, s16, 0x80000001
+; GFX8-NEXT: s_sub_i32 s16, s16, 0x7fffffff
; GFX8-NEXT: s_min_i32 s17, s5, -1
-; GFX8-NEXT: s_add_i32 s17, s17, 0x80000000
+; GFX8-NEXT: s_sub_i32 s17, s17, 0x80000000
; GFX8-NEXT: s_max_i32 s16, s16, s21
; GFX8-NEXT: s_min_i32 s16, s16, s17
; GFX8-NEXT: s_sub_i32 s5, s5, s16
; GFX8-NEXT: s_max_i32 s16, s6, -1
-; GFX8-NEXT: s_add_i32 s16, s16, 0x80000001
+; GFX8-NEXT: s_sub_i32 s16, s16, 0x7fffffff
; GFX8-NEXT: s_min_i32 s17, s6, -1
-; GFX8-NEXT: s_add_i32 s17, s17, 0x80000000
+; GFX8-NEXT: s_sub_i32 s17, s17, 0x80000000
; GFX8-NEXT: s_max_i32 s16, s16, s22
; GFX8-NEXT: s_min_i32 s16, s16, s17
; GFX8-NEXT: s_sub_i32 s6, s6, s16
; GFX8-NEXT: s_max_i32 s16, s7, -1
-; GFX8-NEXT: s_add_i32 s16, s16, 0x80000001
+; GFX8-NEXT: s_sub_i32 s16, s16, 0x7fffffff
; GFX8-NEXT: s_min_i32 s17, s7, -1
-; GFX8-NEXT: s_add_i32 s17, s17, 0x80000000
+; GFX8-NEXT: s_sub_i32 s17, s17, 0x80000000
; GFX8-NEXT: s_max_i32 s16, s16, s23
; GFX8-NEXT: s_min_i32 s16, s16, s17
; GFX8-NEXT: s_sub_i32 s7, s7, s16
; GFX8-NEXT: s_max_i32 s16, s8, -1
-; GFX8-NEXT: s_add_i32 s16, s16, 0x80000001
+; GFX8-NEXT: s_sub_i32 s16, s16, 0x7fffffff
; GFX8-NEXT: s_min_i32 s17, s8, -1
-; GFX8-NEXT: s_add_i32 s17, s17, 0x80000000
+; GFX8-NEXT: s_sub_i32 s17, s17, 0x80000000
; GFX8-NEXT: s_max_i32 s16, s16, s24
; GFX8-NEXT: s_min_i32 s16, s16, s17
; GFX8-NEXT: s_sub_i32 s8, s8, s16
; GFX8-NEXT: s_max_i32 s16, s9, -1
-; GFX8-NEXT: s_add_i32 s16, s16, 0x80000001
+; GFX8-NEXT: s_sub_i32 s16, s16, 0x7fffffff
; GFX8-NEXT: s_min_i32 s17, s9, -1
-; GFX8-NEXT: s_add_i32 s17, s17, 0x80000000
+; GFX8-NEXT: s_sub_i32 s17, s17, 0x80000000
; GFX8-NEXT: s_max_i32 s16, s16, s25
; GFX8-NEXT: s_min_i32 s16, s16, s17
; GFX8-NEXT: s_sub_i32 s9, s9, s16
; GFX8-NEXT: s_max_i32 s16, s10, -1
-; GFX8-NEXT: s_add_i32 s16, s16, 0x80000001
+; GFX8-NEXT: s_sub_i32 s16, s16, 0x7fffffff
; GFX8-NEXT: s_min_i32 s17, s10, -1
-; GFX8-NEXT: s_add_i32 s17, s17, 0x80000000
+; GFX8-NEXT: s_sub_i32 s17, s17, 0x80000000
; GFX8-NEXT: s_max_i32 s16, s16, s26
; GFX8-NEXT: s_min_i32 s16, s16, s17
; GFX8-NEXT: s_sub_i32 s10, s10, s16
; GFX8-NEXT: s_max_i32 s16, s11, -1
-; GFX8-NEXT: s_add_i32 s16, s16, 0x80000001
+; GFX8-NEXT: s_sub_i32 s16, s16, 0x7fffffff
; GFX8-NEXT: s_min_i32 s17, s11, -1
-; GFX8-NEXT: s_add_i32 s17, s17, 0x80000000
+; GFX8-NEXT: s_sub_i32 s17, s17, 0x80000000
; GFX8-NEXT: s_max_i32 s16, s16, s27
; GFX8-NEXT: s_min_i32 s16, s16, s17
; GFX8-NEXT: s_sub_i32 s11, s11, s16
; GFX8-NEXT: s_max_i32 s16, s12, -1
-; GFX8-NEXT: s_add_i32 s16, s16, 0x80000001
+; GFX8-NEXT: s_sub_i32 s16, s16, 0x7fffffff
; GFX8-NEXT: s_min_i32 s17, s12, -1
-; GFX8-NEXT: s_add_i32 s17, s17, 0x80000000
+; GFX8-NEXT: s_sub_i32 s17, s17, 0x80000000
; GFX8-NEXT: s_max_i32 s16, s16, s28
; GFX8-NEXT: s_min_i32 s16, s16, s17
; GFX8-NEXT: s_sub_i32 s12, s12, s16
; GFX8-NEXT: s_max_i32 s16, s13, -1
-; GFX8-NEXT: s_add_i32 s16, s16, 0x80000001
+; GFX8-NEXT: s_sub_i32 s16, s16, 0x7fffffff
; GFX8-NEXT: s_min_i32 s17, s13, -1
-; GFX8-NEXT: s_add_i32 s17, s17, 0x80000000
+; GFX8-NEXT: s_sub_i32 s17, s17, 0x80000000
; GFX8-NEXT: s_max_i32 s16, s16, s29
; GFX8-NEXT: s_min_i32 s16, s16, s17
; GFX8-NEXT: s_sub_i32 s13, s13, s16
; GFX8-NEXT: s_max_i32 s16, s14, -1
-; GFX8-NEXT: s_add_i32 s16, s16, 0x80000001
+; GFX8-NEXT: s_sub_i32 s16, s16, 0x7fffffff
; GFX8-NEXT: s_min_i32 s17, s14, -1
-; GFX8-NEXT: s_add_i32 s17, s17, 0x80000000
+; GFX8-NEXT: s_sub_i32 s17, s17, 0x80000000
; GFX8-NEXT: s_max_i32 s16, s16, s30
; GFX8-NEXT: s_min_i32 s16, s16, s17
; GFX8-NEXT: s_sub_i32 s14, s14, s16
; GFX8-NEXT: s_max_i32 s16, s15, -1
-; GFX8-NEXT: s_add_i32 s16, s16, 0x80000001
+; GFX8-NEXT: s_sub_i32 s16, s16, 0x7fffffff
; GFX8-NEXT: s_min_i32 s17, s15, -1
-; GFX8-NEXT: s_add_i32 s17, s17, 0x80000000
+; GFX8-NEXT: s_sub_i32 s17, s17, 0x80000000
; GFX8-NEXT: s_max_i32 s16, s16, s31
; GFX8-NEXT: s_min_i32 s16, s16, s17
; GFX8-NEXT: s_sub_i32 s15, s15, s16
@@ -2582,9 +2579,9 @@ define i16 @v_ssubsat_i16(i16 %lhs, i16 %rhs) {
; GFX6-NEXT: v_lshlrev_b32_e32 v0, 16, v0
; GFX6-NEXT: v_max_i32_e32 v2, -1, v0
; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1
-; GFX6-NEXT: v_add_i32_e32 v2, vcc, 0x80000001, v2
+; GFX6-NEXT: v_subrev_i32_e32 v2, vcc, 0x7fffffff, v2
; GFX6-NEXT: v_min_i32_e32 v3, -1, v0
-; GFX6-NEXT: v_add_i32_e32 v3, vcc, 0x80000000, v3
+; GFX6-NEXT: v_subrev_i32_e32 v3, vcc, 0x80000000, v3
; GFX6-NEXT: v_max_i32_e32 v1, v2, v1
; GFX6-NEXT: v_min_i32_e32 v1, v1, v3
; GFX6-NEXT: v_sub_i32_e32 v0, vcc, v0, v1
@@ -2595,9 +2592,9 @@ define i16 @v_ssubsat_i16(i16 %lhs, i16 %rhs) {
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_max_i16_e32 v2, -1, v0
-; GFX8-NEXT: v_add_u16_e32 v2, 0x8001, v2
+; GFX8-NEXT: v_subrev_u16_e32 v2, 0x7fff, v2
; GFX8-NEXT: v_min_i16_e32 v3, -1, v0
-; GFX8-NEXT: v_add_u16_e32 v3, 0x8000, v3
+; GFX8-NEXT: v_subrev_u16_e32 v3, 0x8000, v3
; GFX8-NEXT: v_max_i16_e32 v1, v2, v1
; GFX8-NEXT: v_min_i16_e32 v1, v1, v3
; GFX8-NEXT: v_sub_u16_e32 v0, v0, v1
@@ -2624,9 +2621,9 @@ define amdgpu_ps i16 @s_ssubsat_i16(i16 inreg %lhs, i16 inreg %rhs) {
; GFX6-NEXT: s_lshl_b32 s0, s0, 16
; GFX6-NEXT: s_max_i32 s2, s0, -1
; GFX6-NEXT: s_lshl_b32 s1, s1, 16
-; GFX6-NEXT: s_add_i32 s2, s2, 0x80000001
+; GFX6-NEXT: s_sub_i32 s2, s2, 0x7fffffff
; GFX6-NEXT: s_min_i32 s3, s0, -1
-; GFX6-NEXT: s_add_i32 s3, s3, 0x80000000
+; GFX6-NEXT: s_sub_i32 s3, s3, 0x80000000
; GFX6-NEXT: s_max_i32 s1, s2, s1
; GFX6-NEXT: s_min_i32 s1, s1, s3
; GFX6-NEXT: s_sub_i32 s0, s0, s1
@@ -2638,11 +2635,11 @@ define amdgpu_ps i16 @s_ssubsat_i16(i16 inreg %lhs, i16 inreg %rhs) {
; GFX8-NEXT: s_sext_i32_i16 s2, s0
; GFX8-NEXT: s_sext_i32_i16 s3, -1
; GFX8-NEXT: s_max_i32 s4, s2, s3
-; GFX8-NEXT: s_addk_i32 s4, 0x8001
+; GFX8-NEXT: s_sub_i32 s4, s4, 0x7fff
; GFX8-NEXT: s_min_i32 s2, s2, s3
; GFX8-NEXT: s_sext_i32_i16 s3, s4
; GFX8-NEXT: s_sext_i32_i16 s1, s1
-; GFX8-NEXT: s_addk_i32 s2, 0x8000
+; GFX8-NEXT: s_sub_i32 s2, s2, 0xffff8000
; GFX8-NEXT: s_max_i32 s1, s3, s1
; GFX8-NEXT: s_sext_i32_i16 s1, s1
; GFX8-NEXT: s_sext_i32_i16 s2, s2
@@ -2672,9 +2669,9 @@ define amdgpu_ps half @ssubsat_i16_sv(i16 inreg %lhs, i16 %rhs) {
; GFX6-NEXT: s_lshl_b32 s0, s0, 16
; GFX6-NEXT: s_max_i32 s1, s0, -1
; GFX6-NEXT: v_lshlrev_b32_e32 v0, 16, v0
-; GFX6-NEXT: s_add_i32 s1, s1, 0x80000001
+; GFX6-NEXT: s_sub_i32 s1, s1, 0x7fffffff
; GFX6-NEXT: s_min_i32 s2, s0, -1
-; GFX6-NEXT: s_add_i32 s2, s2, 0x80000000
+; GFX6-NEXT: s_sub_i32 s2, s2, 0x80000000
; GFX6-NEXT: v_max_i32_e32 v0, s1, v0
; GFX6-NEXT: v_min_i32_e32 v0, s2, v0
; GFX6-NEXT: v_sub_i32_e32 v0, vcc, s0, v0
@@ -2686,9 +2683,9 @@ define amdgpu_ps half @ssubsat_i16_sv(i16 inreg %lhs, i16 %rhs) {
; GFX8-NEXT: s_sext_i32_i16 s1, s0
; GFX8-NEXT: s_sext_i32_i16 s2, -1
; GFX8-NEXT: s_max_i32 s3, s1, s2
-; GFX8-NEXT: s_addk_i32 s3, 0x8001
+; GFX8-NEXT: s_sub_i32 s3, s3, 0x7fff
; GFX8-NEXT: s_min_i32 s1, s1, s2
-; GFX8-NEXT: s_addk_i32 s1, 0x8000
+; GFX8-NEXT: s_sub_i32 s1, s1, 0xffff8000
; GFX8-NEXT: v_max_i16_e32 v0, s3, v0
; GFX8-NEXT: v_min_i16_e32 v0, s1, v0
; GFX8-NEXT: v_sub_u16_e32 v0, s0, v0
@@ -2714,9 +2711,9 @@ define amdgpu_ps half @ssubsat_i16_vs(i16 %lhs, i16 inreg %rhs) {
; GFX6-NEXT: v_lshlrev_b32_e32 v0, 16, v0
; GFX6-NEXT: v_max_i32_e32 v1, -1, v0
; GFX6-NEXT: s_lshl_b32 s0, s0, 16
-; GFX6-NEXT: v_add_i32_e32 v1, vcc, 0x80000001, v1
+; GFX6-NEXT: v_subrev_i32_e32 v1, vcc, 0x7fffffff, v1
; GFX6-NEXT: v_min_i32_e32 v2, -1, v0
-; GFX6-NEXT: v_add_i32_e32 v2, vcc, 0x80000000, v2
+; GFX6-NEXT: v_subrev_i32_e32 v2, vcc, 0x80000000, v2
; GFX6-NEXT: v_max_i32_e32 v1, s0, v1
; GFX6-NEXT: v_min_i32_e32 v1, v1, v2
; GFX6-NEXT: v_sub_i32_e32 v0, vcc, v0, v1
@@ -2726,9 +2723,9 @@ define amdgpu_ps half @ssubsat_i16_vs(i16 %lhs, i16 inreg %rhs) {
; GFX8-LABEL: ssubsat_i16_vs:
; GFX8: ; %bb.0:
; GFX8-NEXT: v_max_i16_e32 v1, -1, v0
-; GFX8-NEXT: v_add_u16_e32 v1, 0x8001, v1
+; GFX8-NEXT: v_subrev_u16_e32 v1, 0x7fff, v1
; GFX8-NEXT: v_min_i16_e32 v2, -1, v0
-; GFX8-NEXT: v_add_u16_e32 v2, 0x8000, v2
+; GFX8-NEXT: v_subrev_u16_e32 v2, 0x8000, v2
; GFX8-NEXT: v_max_i16_e32 v1, s0, v1
; GFX8-NEXT: v_min_i16_e32 v1, v1, v2
; GFX8-NEXT: v_sub_u16_e32 v0, v0, v1
@@ -2755,19 +2752,18 @@ define <2 x i16> @v_ssubsat_v2i16(<2 x i16> %lhs, <2 x i16> %rhs) {
; GFX6-NEXT: v_lshlrev_b32_e32 v0, 16, v0
; GFX6-NEXT: v_max_i32_e32 v4, -1, v0
; GFX6-NEXT: v_lshlrev_b32_e32 v2, 16, v2
-; GFX6-NEXT: v_add_i32_e32 v4, vcc, 0x80000001, v4
+; GFX6-NEXT: v_subrev_i32_e32 v4, vcc, 0x7fffffff, v4
; GFX6-NEXT: v_min_i32_e32 v5, -1, v0
-; GFX6-NEXT: v_bfrev_b32_e32 v6, 1
-; GFX6-NEXT: v_add_i32_e32 v5, vcc, v5, v6
+; GFX6-NEXT: v_subrev_i32_e32 v5, vcc, 0x80000000, v5
; GFX6-NEXT: v_max_i32_e32 v2, v4, v2
; GFX6-NEXT: v_min_i32_e32 v2, v2, v5
; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1
; GFX6-NEXT: v_sub_i32_e32 v0, vcc, v0, v2
; GFX6-NEXT: v_lshlrev_b32_e32 v2, 16, v3
; GFX6-NEXT: v_max_i32_e32 v3, -1, v1
-; GFX6-NEXT: v_add_i32_e32 v3, vcc, 0x80000001, v3
+; GFX6-NEXT: v_subrev_i32_e32 v3, vcc, 0x7fffffff, v3
; GFX6-NEXT: v_min_i32_e32 v4, -1, v1
-; GFX6-NEXT: v_add_i32_e32 v4, vcc, 0x80000000, v4
+; GFX6-NEXT: v_subrev_i32_e32 v4, vcc, 0x80000000, v4
; GFX6-NEXT: v_max_i32_e32 v2, v3, v2
; GFX6-NEXT: v_min_i32_e32 v2, v2, v4
; GFX6-NEXT: v_sub_i32_e32 v1, vcc, v1, v2
@@ -2779,16 +2775,16 @@ define <2 x i16> @v_ssubsat_v2i16(<2 x i16> %lhs, <2 x i16> %rhs) {
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_max_i16_e32 v2, -1, v0
-; GFX8-NEXT: v_add_u16_e32 v2, 0x8001, v2
+; GFX8-NEXT: v_subrev_u16_e32 v2, 0x7fff, v2
; GFX8-NEXT: v_min_i16_e32 v3, -1, v0
-; GFX8-NEXT: v_add_u16_e32 v3, 0x8000, v3
+; GFX8-NEXT: v_subrev_u16_e32 v3, 0x8000, v3
; GFX8-NEXT: v_max_i16_e32 v2, v2, v1
; GFX8-NEXT: v_min_i16_e32 v2, v2, v3
; GFX8-NEXT: v_mov_b32_e32 v3, -1
; GFX8-NEXT: v_max_i16_sdwa v4, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
-; GFX8-NEXT: v_add_u16_e32 v4, 0x8001, v4
+; GFX8-NEXT: v_subrev_u16_e32 v4, 0x7fff, v4
; GFX8-NEXT: v_min_i16_sdwa v3, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
-; GFX8-NEXT: v_add_u16_e32 v3, 0x8000, v3
+; GFX8-NEXT: v_subrev_u16_e32 v3, 0x8000, v3
; GFX8-NEXT: v_max_i16_sdwa v1, v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
; GFX8-NEXT: v_min_i16_e32 v1, v1, v3
; GFX8-NEXT: v_sub_u16_e32 v2, v0, v2
@@ -2817,18 +2813,18 @@ define amdgpu_ps i32 @s_ssubsat_v2i16(<2 x i16> inreg %lhs, <2 x i16> inreg %rhs
; GFX6-NEXT: s_lshl_b32 s0, s0, 16
; GFX6-NEXT: s_max_i32 s4, s0, -1
; GFX6-NEXT: s_lshl_b32 s2, s2, 16
-; GFX6-NEXT: s_add_i32 s4, s4, 0x80000001
+; GFX6-NEXT: s_sub_i32 s4, s4, 0x7fffffff
; GFX6-NEXT: s_min_i32 s5, s0, -1
-; GFX6-NEXT: s_add_i32 s5, s5, 0x80000000
+; GFX6-NEXT: s_sub_i32 s5, s5, 0x80000000
; GFX6-NEXT: s_max_i32 s2, s4, s2
; GFX6-NEXT: s_min_i32 s2, s2, s5
; GFX6-NEXT: s_lshl_b32 s1, s1, 16
; GFX6-NEXT: s_sub_i32 s0, s0, s2
; GFX6-NEXT: s_lshl_b32 s2, s3, 16
; GFX6-NEXT: s_max_i32 s3, s1, -1
-; GFX6-NEXT: s_add_i32 s3, s3, 0x80000001
+; GFX6-NEXT: s_sub_i32 s3, s3, 0x7fffffff
; GFX6-NEXT: s_min_i32 s4, s1, -1
-; GFX6-NEXT: s_add_i32 s4, s4, 0x80000000
+; GFX6-NEXT: s_sub_i32 s4, s4, 0x80000000
; GFX6-NEXT: s_max_i32 s2, s3, s2
; GFX6-NEXT: s_min_i32 s2, s2, s4
; GFX6-NEXT: s_sub_i32 s1, s1, s2
@@ -2845,12 +2841,12 @@ define amdgpu_ps i32 @s_ssubsat_v2i16(<2 x i16> inreg %lhs, <2 x i16> inreg %rhs
; GFX8-NEXT: s_sext_i32_i16 s4, s0
; GFX8-NEXT: s_sext_i32_i16 s5, -1
; GFX8-NEXT: s_max_i32 s6, s4, s5
-; GFX8-NEXT: s_addk_i32 s6, 0x8001
+; GFX8-NEXT: s_sub_i32 s6, s6, 0x7fff
; GFX8-NEXT: s_lshr_b32 s3, s1, 16
; GFX8-NEXT: s_min_i32 s4, s4, s5
; GFX8-NEXT: s_sext_i32_i16 s6, s6
; GFX8-NEXT: s_sext_i32_i16 s1, s1
-; GFX8-NEXT: s_addk_i32 s4, 0x8000
+; GFX8-NEXT: s_sub_i32 s4, s4, 0xffff8000
; GFX8-NEXT: s_max_i32 s1, s6, s1
; GFX8-NEXT: s_sext_i32_i16 s1, s1
; GFX8-NEXT: s_sext_i32_i16 s4, s4
@@ -2859,11 +2855,11 @@ define amdgpu_ps i32 @s_ssubsat_v2i16(<2 x i16> inreg %lhs, <2 x i16> inreg %rhs
; GFX8-NEXT: s_sub_i32 s0, s0, s1
; GFX8-NEXT: s_sext_i32_i16 s1, s2
; GFX8-NEXT: s_max_i32 s4, s1, s5
-; GFX8-NEXT: s_addk_i32 s4, 0x8001
+; GFX8-NEXT: s_sub_i32 s4, s4, 0x7fff
; GFX8-NEXT: s_min_i32 s1, s1, s5
; GFX8-NEXT: s_sext_i32_i16 s4, s4
; GFX8-NEXT: s_sext_i32_i16 s3, s3
-; GFX8-NEXT: s_addk_i32 s1, 0x8000
+; GFX8-NEXT: s_sub_i32 s1, s1, 0xffff8000
; GFX8-NEXT: s_max_i32 s3, s4, s3
; GFX8-NEXT: s_sext_i32_i16 s3, s3
; GFX8-NEXT: s_sext_i32_i16 s1, s1
@@ -2898,18 +2894,18 @@ define amdgpu_ps float @ssubsat_v2i16_sv(<2 x i16> inreg %lhs, <2 x i16> %rhs) {
; GFX6-NEXT: s_lshl_b32 s0, s0, 16
; GFX6-NEXT: s_max_i32 s2, s0, -1
; GFX6-NEXT: v_lshlrev_b32_e32 v0, 16, v0
-; GFX6-NEXT: s_add_i32 s2, s2, 0x80000001
+; GFX6-NEXT: s_sub_i32 s2, s2, 0x7fffffff
; GFX6-NEXT: s_min_i32 s3, s0, -1
-; GFX6-NEXT: s_add_i32 s3, s3, 0x80000000
+; GFX6-NEXT: s_sub_i32 s3, s3, 0x80000000
; GFX6-NEXT: v_max_i32_e32 v0, s2, v0
; GFX6-NEXT: v_min_i32_e32 v0, s3, v0
; GFX6-NEXT: v_sub_i32_e32 v0, vcc, s0, v0
; GFX6-NEXT: s_lshl_b32 s0, s1, 16
; GFX6-NEXT: s_max_i32 s1, s0, -1
; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1
-; GFX6-NEXT: s_add_i32 s1, s1, 0x80000001
+; GFX6-NEXT: s_sub_i32 s1, s1, 0x7fffffff
; GFX6-NEXT: s_min_i32 s2, s0, -1
-; GFX6-NEXT: s_add_i32 s2, s2, 0x80000000
+; GFX6-NEXT: s_sub_i32 s2, s2, 0x80000000
; GFX6-NEXT: v_max_i32_e32 v1, s1, v1
; GFX6-NEXT: v_min_i32_e32 v1, s2, v1
; GFX6-NEXT: v_sub_i32_e32 v1, vcc, s0, v1
@@ -2926,18 +2922,18 @@ define amdgpu_ps float @ssubsat_v2i16_sv(<2 x i16> inreg %lhs, <2 x i16> %rhs) {
; GFX8-NEXT: s_sext_i32_i16 s2, s0
; GFX8-NEXT: s_sext_i32_i16 s3, -1
; GFX8-NEXT: s_max_i32 s4, s2, s3
-; GFX8-NEXT: s_addk_i32 s4, 0x8001
+; GFX8-NEXT: s_sub_i32 s4, s4, 0x7fff
; GFX8-NEXT: s_min_i32 s2, s2, s3
; GFX8-NEXT: s_lshr_b32 s1, s0, 16
-; GFX8-NEXT: s_addk_i32 s2, 0x8000
+; GFX8-NEXT: s_sub_i32 s2, s2, 0xffff8000
; GFX8-NEXT: v_max_i16_e32 v1, s4, v0
; GFX8-NEXT: v_min_i16_e32 v1, s2, v1
; GFX8-NEXT: s_sext_i32_i16 s2, s1
; GFX8-NEXT: s_max_i32 s4, s2, s3
-; GFX8-NEXT: s_addk_i32 s4, 0x8001
+; GFX8-NEXT: s_sub_i32 s4, s4, 0x7fff
; GFX8-NEXT: s_min_i32 s2, s2, s3
; GFX8-NEXT: v_mov_b32_e32 v2, s4
-; GFX8-NEXT: s_addk_i32 s2, 0x8000
+; GFX8-NEXT: s_sub_i32 s2, s2, 0xffff8000
; GFX8-NEXT: v_max_i16_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
; GFX8-NEXT: v_min_i16_e32 v0, s2, v0
; GFX8-NEXT: v_mov_b32_e32 v2, s1
@@ -2966,20 +2962,18 @@ define amdgpu_ps float @ssubsat_v2i16_vs(<2 x i16> %lhs, <2 x i16> inreg %rhs) {
; GFX6-NEXT: v_lshlrev_b32_e32 v0, 16, v0
; GFX6-NEXT: v_max_i32_e32 v2, -1, v0
; GFX6-NEXT: s_lshl_b32 s0, s0, 16
-; GFX6-NEXT: v_add_i32_e32 v2, vcc, 0x80000001, v2
-; GFX6-NEXT: v_min_i32_e32 v4, -1, v0
-; GFX6-NEXT: v_bfrev_b32_e32 v5, 1
-; GFX6-NEXT: v_add_i32_e32 v4, vcc, v4, v5
+; GFX6-NEXT: v_subrev_i32_e32 v2, vcc, 0x7fffffff, v2
+; GFX6-NEXT: v_min_i32_e32 v3, -1, v0
+; GFX6-NEXT: v_subrev_i32_e32 v3, vcc, 0x80000000, v3
; GFX6-NEXT: v_max_i32_e32 v2, s0, v2
-; GFX6-NEXT: v_min_i32_e32 v2, v2, v4
+; GFX6-NEXT: v_min_i32_e32 v2, v2, v3
; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1
-; GFX6-NEXT: v_mov_b32_e32 v3, 0x80000001
; GFX6-NEXT: v_sub_i32_e32 v0, vcc, v0, v2
; GFX6-NEXT: v_max_i32_e32 v2, -1, v1
; GFX6-NEXT: s_lshl_b32 s0, s1, 16
-; GFX6-NEXT: v_add_i32_e32 v2, vcc, v2, v3
+; GFX6-NEXT: v_subrev_i32_e32 v2, vcc, 0x7fffffff, v2
; GFX6-NEXT: v_min_i32_e32 v3, -1, v1
-; GFX6-NEXT: v_add_i32_e32 v3, vcc, 0x80000000, v3
+; GFX6-NEXT: v_subrev_i32_e32 v3, vcc, 0x80000000, v3
; GFX6-NEXT: v_max_i32_e32 v2, s0, v2
; GFX6-NEXT: v_min_i32_e32 v2, v2, v3
; GFX6-NEXT: v_sub_i32_e32 v1, vcc, v1, v2
@@ -2994,17 +2988,17 @@ define amdgpu_ps float @ssubsat_v2i16_vs(<2 x i16> %lhs, <2 x i16> inreg %rhs) {
; GFX8-LABEL: ssubsat_v2i16_vs:
; GFX8: ; %bb.0:
; GFX8-NEXT: v_max_i16_e32 v1, -1, v0
-; GFX8-NEXT: v_add_u16_e32 v1, 0x8001, v1
+; GFX8-NEXT: v_subrev_u16_e32 v1, 0x7fff, v1
; GFX8-NEXT: v_min_i16_e32 v2, -1, v0
-; GFX8-NEXT: v_add_u16_e32 v2, 0x8000, v2
+; GFX8-NEXT: v_subrev_u16_e32 v2, 0x8000, v2
; GFX8-NEXT: v_max_i16_e32 v1, s0, v1
; GFX8-NEXT: v_min_i16_e32 v1, v1, v2
; GFX8-NEXT: v_mov_b32_e32 v2, -1
; GFX8-NEXT: v_max_i16_sdwa v3, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; GFX8-NEXT: s_lshr_b32 s1, s0, 16
-; GFX8-NEXT: v_add_u16_e32 v3, 0x8001, v3
+; GFX8-NEXT: v_subrev_u16_e32 v3, 0x7fff, v3
; GFX8-NEXT: v_min_i16_sdwa v2, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
-; GFX8-NEXT: v_add_u16_e32 v2, 0x8000, v2
+; GFX8-NEXT: v_subrev_u16_e32 v2, 0x8000, v2
; GFX8-NEXT: v_max_i16_e32 v3, s1, v3
; GFX8-NEXT: v_min_i16_e32 v2, v3, v2
; GFX8-NEXT: v_sub_u16_e32 v1, v0, v1
@@ -3044,38 +3038,38 @@ define <2 x float> @v_ssubsat_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
; GFX6-NEXT: v_lshlrev_b32_e32 v0, 16, v0
; GFX6-NEXT: v_max_i32_e32 v8, -1, v0
; GFX6-NEXT: v_lshlrev_b32_e32 v4, 16, v4
-; GFX6-NEXT: v_add_i32_e32 v8, vcc, 0x80000001, v8
+; GFX6-NEXT: v_subrev_i32_e32 v8, vcc, 0x7fffffff, v8
; GFX6-NEXT: v_min_i32_e32 v10, -1, v0
; GFX6-NEXT: v_bfrev_b32_e32 v11, 1
-; GFX6-NEXT: v_add_i32_e32 v10, vcc, v10, v11
+; GFX6-NEXT: v_sub_i32_e32 v10, vcc, v10, v11
; GFX6-NEXT: v_max_i32_e32 v4, v8, v4
; GFX6-NEXT: v_min_i32_e32 v4, v4, v10
; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1
-; GFX6-NEXT: v_mov_b32_e32 v9, 0x80000001
+; GFX6-NEXT: v_bfrev_b32_e32 v9, -2
; GFX6-NEXT: v_sub_i32_e32 v0, vcc, v0, v4
; GFX6-NEXT: v_lshlrev_b32_e32 v4, 16, v5
; GFX6-NEXT: v_max_i32_e32 v5, -1, v1
-; GFX6-NEXT: v_add_i32_e32 v5, vcc, v5, v9
+; GFX6-NEXT: v_sub_i32_e32 v5, vcc, v5, v9
; GFX6-NEXT: v_min_i32_e32 v8, -1, v1
-; GFX6-NEXT: v_add_i32_e32 v8, vcc, v8, v11
+; GFX6-NEXT: v_sub_i32_e32 v8, vcc, v8, v11
; GFX6-NEXT: v_max_i32_e32 v4, v5, v4
; GFX6-NEXT: v_lshlrev_b32_e32 v2, 16, v2
; GFX6-NEXT: v_min_i32_e32 v4, v4, v8
; GFX6-NEXT: v_max_i32_e32 v5, -1, v2
; GFX6-NEXT: v_sub_i32_e32 v1, vcc, v1, v4
; GFX6-NEXT: v_lshlrev_b32_e32 v4, 16, v6
-; GFX6-NEXT: v_add_i32_e32 v5, vcc, v5, v9
+; GFX6-NEXT: v_sub_i32_e32 v5, vcc, v5, v9
; GFX6-NEXT: v_min_i32_e32 v6, -1, v2
-; GFX6-NEXT: v_add_i32_e32 v6, vcc, v6, v11
+; GFX6-NEXT: v_sub_i32_e32 v6, vcc, v6, v11
; GFX6-NEXT: v_max_i32_e32 v4, v5, v4
; GFX6-NEXT: v_lshlrev_b32_e32 v3, 16, v3
; GFX6-NEXT: v_min_i32_e32 v4, v4, v6
; GFX6-NEXT: v_max_i32_e32 v5, -1, v3
; GFX6-NEXT: v_sub_i32_e32 v2, vcc, v2, v4
; GFX6-NEXT: v_lshlrev_b32_e32 v4, 16, v7
-; GFX6-NEXT: v_add_i32_e32 v5, vcc, v5, v9
+; GFX6-NEXT: v_sub_i32_e32 v5, vcc, v5, v9
; GFX6-NEXT: v_min_i32_e32 v6, -1, v3
-; GFX6-NEXT: v_add_i32_e32 v6, vcc, v6, v11
+; GFX6-NEXT: v_sub_i32_e32 v6, vcc, v6, v11
; GFX6-NEXT: v_max_i32_e32 v4, v5, v4
; GFX6-NEXT: v_ashrrev_i32_e32 v1, 16, v1
; GFX6-NEXT: v_min_i32_e32 v4, v4, v6
@@ -3097,28 +3091,28 @@ define <2 x float> @v_ssubsat_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_max_i16_e32 v4, -1, v0
-; GFX8-NEXT: v_add_u16_e32 v4, 0x8001, v4
+; GFX8-NEXT: v_subrev_u16_e32 v4, 0x7fff, v4
; GFX8-NEXT: v_min_i16_e32 v5, -1, v0
-; GFX8-NEXT: v_add_u16_e32 v5, 0x8000, v5
+; GFX8-NEXT: v_subrev_u16_e32 v5, 0x8000, v5
; GFX8-NEXT: v_max_i16_e32 v4, v4, v2
; GFX8-NEXT: v_min_i16_e32 v4, v4, v5
; GFX8-NEXT: v_mov_b32_e32 v5, -1
; GFX8-NEXT: v_max_i16_sdwa v6, v0, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
-; GFX8-NEXT: v_add_u16_e32 v6, 0x8001, v6
+; GFX8-NEXT: v_subrev_u16_e32 v6, 0x7fff, v6
; GFX8-NEXT: v_min_i16_sdwa v7, v0, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
-; GFX8-NEXT: v_add_u16_e32 v7, 0x8000, v7
+; GFX8-NEXT: v_subrev_u16_e32 v7, 0x8000, v7
; GFX8-NEXT: v_max_i16_sdwa v2, v6, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
; GFX8-NEXT: v_max_i16_e32 v6, -1, v1
; GFX8-NEXT: v_min_i16_e32 v2, v2, v7
-; GFX8-NEXT: v_add_u16_e32 v6, 0x8001, v6
+; GFX8-NEXT: v_subrev_u16_e32 v6, 0x7fff, v6
; GFX8-NEXT: v_min_i16_e32 v7, -1, v1
-; GFX8-NEXT: v_add_u16_e32 v7, 0x8000, v7
+; GFX8-NEXT: v_subrev_u16_e32 v7, 0x8000, v7
; GFX8-NEXT: v_max_i16_e32 v6, v6, v3
; GFX8-NEXT: v_min_i16_e32 v6, v6, v7
; GFX8-NEXT: v_max_i16_sdwa v7, v1, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
-; GFX8-NEXT: v_add_u16_e32 v7, 0x8001, v7
+; GFX8-NEXT: v_subrev_u16_e32 v7, 0x7fff, v7
; GFX8-NEXT: v_min_i16_sdwa v5, v1, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
-; GFX8-NEXT: v_add_u16_e32 v5, 0x8000, v5
+; GFX8-NEXT: v_subrev_u16_e32 v5, 0x8000, v5
; GFX8-NEXT: v_max_i16_sdwa v3, v7, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
; GFX8-NEXT: v_min_i16_e32 v3, v3, v5
; GFX8-NEXT: v_sub_u16_e32 v4, v0, v4
@@ -3153,36 +3147,36 @@ define amdgpu_ps <2 x i32> @s_ssubsat_v4i16(<4 x i16> inreg %lhs, <4 x i16> inre
; GFX6-NEXT: s_lshl_b32 s0, s0, 16
; GFX6-NEXT: s_max_i32 s8, s0, -1
; GFX6-NEXT: s_lshl_b32 s4, s4, 16
-; GFX6-NEXT: s_add_i32 s8, s8, 0x80000001
+; GFX6-NEXT: s_sub_i32 s8, s8, 0x7fffffff
; GFX6-NEXT: s_min_i32 s9, s0, -1
-; GFX6-NEXT: s_add_i32 s9, s9, 0x80000000
+; GFX6-NEXT: s_sub_i32 s9, s9, 0x80000000
; GFX6-NEXT: s_max_i32 s4, s8, s4
; GFX6-NEXT: s_min_i32 s4, s4, s9
; GFX6-NEXT: s_lshl_b32 s1, s1, 16
; GFX6-NEXT: s_sub_i32 s0, s0, s4
; GFX6-NEXT: s_lshl_b32 s4, s5, 16
; GFX6-NEXT: s_max_i32 s5, s1, -1
-; GFX6-NEXT: s_add_i32 s5, s5, 0x80000001
+; GFX6-NEXT: s_sub_i32 s5, s5, 0x7fffffff
; GFX6-NEXT: s_min_i32 s8, s1, -1
-; GFX6-NEXT: s_add_i32 s8, s8, 0x80000000
+; GFX6-NEXT: s_sub_i32 s8, s8, 0x80000000
; GFX6-NEXT: s_max_i32 s4, s5, s4
; GFX6-NEXT: s_lshl_b32 s2, s2, 16
; GFX6-NEXT: s_min_i32 s4, s4, s8
; GFX6-NEXT: s_max_i32 s5, s2, -1
; GFX6-NEXT: s_sub_i32 s1, s1, s4
; GFX6-NEXT: s_lshl_b32 s4, s6, 16
-; GFX6-NEXT: s_add_i32 s5, s5, 0x80000001
+; GFX6-NEXT: s_sub_i32 s5, s5, 0x7fffffff
; GFX6-NEXT: s_min_i32 s6, s2, -1
-; GFX6-NEXT: s_add_i32 s6, s6, 0x80000000
+; GFX6-NEXT: s_sub_i32 s6, s6, 0x80000000
; GFX6-NEXT: s_max_i32 s4, s5, s4
; GFX6-NEXT: s_lshl_b32 s3, s3, 16
; GFX6-NEXT: s_min_i32 s4, s4, s6
; GFX6-NEXT: s_max_i32 s5, s3, -1
; GFX6-NEXT: s_sub_i32 s2, s2, s4
; GFX6-NEXT: s_lshl_b32 s4, s7, 16
-; GFX6-NEXT: s_add_i32 s5, s5, 0x80000001
+; GFX6-NEXT: s_sub_i32 s5, s5, 0x7fffffff
; GFX6-NEXT: s_min_i32 s6, s3, -1
-; GFX6-NEXT: s_add_i32 s6, s6, 0x80000000
+; GFX6-NEXT: s_sub_i32 s6, s6, 0x80000000
; GFX6-NEXT: s_max_i32 s4, s5, s4
; GFX6-NEXT: s_ashr_i32 s1, s1, 16
; GFX6-NEXT: s_min_i32 s4, s4, s6
@@ -3205,12 +3199,12 @@ define amdgpu_ps <2 x i32> @s_ssubsat_v4i16(<4 x i16> inreg %lhs, <4 x i16> inre
; GFX8-NEXT: s_sext_i32_i16 s8, s0
; GFX8-NEXT: s_sext_i32_i16 s9, -1
; GFX8-NEXT: s_max_i32 s10, s8, s9
-; GFX8-NEXT: s_addk_i32 s10, 0x8001
+; GFX8-NEXT: s_sub_i32 s10, s10, 0x7fff
; GFX8-NEXT: s_lshr_b32 s6, s2, 16
; GFX8-NEXT: s_min_i32 s8, s8, s9
; GFX8-NEXT: s_sext_i32_i16 s10, s10
; GFX8-NEXT: s_sext_i32_i16 s2, s2
-; GFX8-NEXT: s_addk_i32 s8, 0x8000
+; GFX8-NEXT: s_sub_i32 s8, s8, 0xffff8000
; GFX8-NEXT: s_max_i32 s2, s10, s2
; GFX8-NEXT: s_sext_i32_i16 s2, s2
; GFX8-NEXT: s_sext_i32_i16 s8, s8
@@ -3219,11 +3213,11 @@ define amdgpu_ps <2 x i32> @s_ssubsat_v4i16(<4 x i16> inreg %lhs, <4 x i16> inre
; GFX8-NEXT: s_sub_i32 s0, s0, s2
; GFX8-NEXT: s_sext_i32_i16 s2, s4
; GFX8-NEXT: s_max_i32 s8, s2, s9
-; GFX8-NEXT: s_addk_i32 s8, 0x8001
+; GFX8-NEXT: s_sub_i32 s8, s8, 0x7fff
; GFX8-NEXT: s_min_i32 s2, s2, s9
; GFX8-NEXT: s_sext_i32_i16 s8, s8
; GFX8-NEXT: s_sext_i32_i16 s6, s6
-; GFX8-NEXT: s_addk_i32 s2, 0x8000
+; GFX8-NEXT: s_sub_i32 s2, s2, 0xffff8000
; GFX8-NEXT: s_max_i32 s6, s8, s6
; GFX8-NEXT: s_sext_i32_i16 s6, s6
; GFX8-NEXT: s_sext_i32_i16 s2, s2
@@ -3231,12 +3225,12 @@ define amdgpu_ps <2 x i32> @s_ssubsat_v4i16(<4 x i16> inreg %lhs, <4 x i16> inre
; GFX8-NEXT: s_sub_i32 s2, s4, s2
; GFX8-NEXT: s_sext_i32_i16 s4, s1
; GFX8-NEXT: s_max_i32 s6, s4, s9
-; GFX8-NEXT: s_addk_i32 s6, 0x8001
+; GFX8-NEXT: s_sub_i32 s6, s6, 0x7fff
; GFX8-NEXT: s_lshr_b32 s7, s3, 16
; GFX8-NEXT: s_min_i32 s4, s4, s9
; GFX8-NEXT: s_sext_i32_i16 s6, s6
; GFX8-NEXT: s_sext_i32_i16 s3, s3
-; GFX8-NEXT: s_addk_i32 s4, 0x8000
+; GFX8-NEXT: s_sub_i32 s4, s4, 0xffff8000
; GFX8-NEXT: s_max_i32 s3, s6, s3
; GFX8-NEXT: s_sext_i32_i16 s3, s3
; GFX8-NEXT: s_sext_i32_i16 s4, s4
@@ -3245,11 +3239,11 @@ define amdgpu_ps <2 x i32> @s_ssubsat_v4i16(<4 x i16> inreg %lhs, <4 x i16> inre
; GFX8-NEXT: s_sub_i32 s1, s1, s3
; GFX8-NEXT: s_sext_i32_i16 s3, s5
; GFX8-NEXT: s_max_i32 s4, s3, s9
-; GFX8-NEXT: s_addk_i32 s4, 0x8001
+; GFX8-NEXT: s_sub_i32 s4, s4, 0x7fff
; GFX8-NEXT: s_min_i32 s3, s3, s9
; GFX8-NEXT: s_sext_i32_i16 s4, s4
; GFX8-NEXT: s_sext_i32_i16 s6, s7
-; GFX8-NEXT: s_addk_i32 s3, 0x8000
+; GFX8-NEXT: s_sub_i32 s3, s3, 0xffff8000
; GFX8-NEXT: s_max_i32 s4, s4, s6
; GFX8-NEXT: s_sext_i32_i16 s4, s4
; GFX8-NEXT: s_sext_i32_i16 s3, s3
@@ -3305,57 +3299,57 @@ define <3 x float> @v_ssubsat_v6i16(<6 x i16> %lhs, <6 x i16> %rhs) {
; GFX6-NEXT: v_lshlrev_b32_e32 v0, 16, v0
; GFX6-NEXT: v_max_i32_e32 v12, -1, v0
; GFX6-NEXT: v_lshlrev_b32_e32 v6, 16, v6
-; GFX6-NEXT: v_add_i32_e32 v12, vcc, 0x80000001, v12
+; GFX6-NEXT: v_subrev_i32_e32 v12, vcc, 0x7fffffff, v12
; GFX6-NEXT: v_min_i32_e32 v14, -1, v0
; GFX6-NEXT: v_bfrev_b32_e32 v15, 1
-; GFX6-NEXT: v_add_i32_e32 v14, vcc, v14, v15
+; GFX6-NEXT: v_sub_i32_e32 v14, vcc, v14, v15
; GFX6-NEXT: v_max_i32_e32 v6, v12, v6
; GFX6-NEXT: v_min_i32_e32 v6, v6, v14
; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1
-; GFX6-NEXT: v_mov_b32_e32 v13, 0x80000001
+; GFX6-NEXT: v_bfrev_b32_e32 v13, -2
; GFX6-NEXT: v_sub_i32_e32 v0, vcc, v0, v6
; GFX6-NEXT: v_lshlrev_b32_e32 v6, 16, v7
; GFX6-NEXT: v_max_i32_e32 v7, -1, v1
-; GFX6-NEXT: v_add_i32_e32 v7, vcc, v7, v13
+; GFX6-NEXT: v_sub_i32_e32 v7, vcc, v7, v13
; GFX6-NEXT: v_min_i32_e32 v12, -1, v1
-; GFX6-NEXT: v_add_i32_e32 v12, vcc, v12, v15
+; GFX6-NEXT: v_sub_i32_e32 v12, vcc, v12, v15
; GFX6-NEXT: v_max_i32_e32 v6, v7, v6
; GFX6-NEXT: v_lshlrev_b32_e32 v2, 16, v2
; GFX6-NEXT: v_min_i32_e32 v6, v6, v12
; GFX6-NEXT: v_max_i32_e32 v7, -1, v2
; GFX6-NEXT: v_sub_i32_e32 v1, vcc, v1, v6
; GFX6-NEXT: v_lshlrev_b32_e32 v6, 16, v8
-; GFX6-NEXT: v_add_i32_e32 v7, vcc, v7, v13
+; GFX6-NEXT: v_sub_i32_e32 v7, vcc, v7, v13
; GFX6-NEXT: v_min_i32_e32 v8, -1, v2
-; GFX6-NEXT: v_add_i32_e32 v8, vcc, v8, v15
+; GFX6-NEXT: v_sub_i32_e32 v8, vcc, v8, v15
; GFX6-NEXT: v_max_i32_e32 v6, v7, v6
; GFX6-NEXT: v_lshlrev_b32_e32 v3, 16, v3
; GFX6-NEXT: v_min_i32_e32 v6, v6, v8
; GFX6-NEXT: v_max_i32_e32 v7, -1, v3
; GFX6-NEXT: v_sub_i32_e32 v2, vcc, v2, v6
; GFX6-NEXT: v_lshlrev_b32_e32 v6, 16, v9
-; GFX6-NEXT: v_add_i32_e32 v7, vcc, v7, v13
+; GFX6-NEXT: v_sub_i32_e32 v7, vcc, v7, v13
; GFX6-NEXT: v_min_i32_e32 v8, -1, v3
-; GFX6-NEXT: v_add_i32_e32 v8, vcc, v8, v15
+; GFX6-NEXT: v_sub_i32_e32 v8, vcc, v8, v15
; GFX6-NEXT: v_max_i32_e32 v6, v7, v6
; GFX6-NEXT: v_lshlrev_b32_e32 v4, 16, v4
; GFX6-NEXT: v_min_i32_e32 v6, v6, v8
; GFX6-NEXT: v_max_i32_e32 v7, -1, v4
; GFX6-NEXT: v_sub_i32_e32 v3, vcc, v3, v6
; GFX6-NEXT: v_lshlrev_b32_e32 v6, 16, v10
-; GFX6-NEXT: v_add_i32_e32 v7, vcc, v7, v13
+; GFX6-NEXT: v_sub_i32_e32 v7, vcc, v7, v13
; GFX6-NEXT: v_min_i32_e32 v8, -1, v4
-; GFX6-NEXT: v_add_i32_e32 v8, vcc, v8, v15
+; GFX6-NEXT: v_sub_i32_e32 v8, vcc, v8, v15
; GFX6-NEXT: v_max_i32_e32 v6, v7, v6
; GFX6-NEXT: v_lshlrev_b32_e32 v5, 16, v5
; GFX6-NEXT: v_min_i32_e32 v6, v6, v8
; GFX6-NEXT: v_max_i32_e32 v7, -1, v5
; GFX6-NEXT: v_sub_i32_e32 v4, vcc, v4, v6
; GFX6-NEXT: v_lshlrev_b32_e32 v6, 16, v11
-; GFX6-NEXT: v_add_i32_e32 v7, vcc, v7, v13
+; GFX6-NEXT: v_sub_i32_e32 v7, vcc, v7, v13
; GFX6-NEXT: v_min_i32_e32 v8, -1, v5
; GFX6-NEXT: v_ashrrev_i32_e32 v1, 16, v1
-; GFX6-NEXT: v_add_i32_e32 v8, vcc, v8, v15
+; GFX6-NEXT: v_sub_i32_e32 v8, vcc, v8, v15
; GFX6-NEXT: v_max_i32_e32 v6, v7, v6
; GFX6-NEXT: v_ashrrev_i32_e32 v0, 16, v0
; GFX6-NEXT: v_min_i32_e32 v6, v6, v8
@@ -3382,40 +3376,40 @@ define <3 x float> @v_ssubsat_v6i16(<6 x i16> %lhs, <6 x i16> %rhs) {
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_max_i16_e32 v6, -1, v0
-; GFX8-NEXT: v_add_u16_e32 v6, 0x8001, v6
+; GFX8-NEXT: v_subrev_u16_e32 v6, 0x7fff, v6
; GFX8-NEXT: v_min_i16_e32 v7, -1, v0
-; GFX8-NEXT: v_add_u16_e32 v7, 0x8000, v7
+; GFX8-NEXT: v_subrev_u16_e32 v7, 0x8000, v7
; GFX8-NEXT: v_max_i16_e32 v6, v6, v3
; GFX8-NEXT: v_min_i16_e32 v6, v6, v7
; GFX8-NEXT: v_mov_b32_e32 v7, -1
; GFX8-NEXT: v_max_i16_sdwa v8, v0, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
-; GFX8-NEXT: v_add_u16_e32 v8, 0x8001, v8
+; GFX8-NEXT: v_subrev_u16_e32 v8, 0x7fff, v8
; GFX8-NEXT: v_min_i16_sdwa v9, v0, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
-; GFX8-NEXT: v_add_u16_e32 v9, 0x8000, v9
+; GFX8-NEXT: v_subrev_u16_e32 v9, 0x8000, v9
; GFX8-NEXT: v_max_i16_sdwa v3, v8, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
; GFX8-NEXT: v_max_i16_e32 v8, -1, v1
; GFX8-NEXT: v_min_i16_e32 v3, v3, v9
-; GFX8-NEXT: v_add_u16_e32 v8, 0x8001, v8
+; GFX8-NEXT: v_subrev_u16_e32 v8, 0x7fff, v8
; GFX8-NEXT: v_min_i16_e32 v9, -1, v1
-; GFX8-NEXT: v_add_u16_e32 v9, 0x8000, v9
+; GFX8-NEXT: v_subrev_u16_e32 v9, 0x8000, v9
; GFX8-NEXT: v_max_i16_e32 v8, v8, v4
; GFX8-NEXT: v_min_i16_e32 v8, v8, v9
; GFX8-NEXT: v_max_i16_sdwa v9, v1, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
-; GFX8-NEXT: v_add_u16_e32 v9, 0x8001, v9
+; GFX8-NEXT: v_subrev_u16_e32 v9, 0x7fff, v9
; GFX8-NEXT: v_min_i16_sdwa v10, v1, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
-; GFX8-NEXT: v_add_u16_e32 v10, 0x8000, v10
+; GFX8-NEXT: v_subrev_u16_e32 v10, 0x8000, v10
; GFX8-NEXT: v_max_i16_sdwa v4, v9, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
; GFX8-NEXT: v_max_i16_e32 v9, -1, v2
; GFX8-NEXT: v_min_i16_e32 v4, v4, v10
-; GFX8-NEXT: v_add_u16_e32 v9, 0x8001, v9
+; GFX8-NEXT: v_subrev_u16_e32 v9, 0x7fff, v9
; GFX8-NEXT: v_min_i16_e32 v10, -1, v2
-; GFX8-NEXT: v_add_u16_e32 v10, 0x8000, v10
+; GFX8-NEXT: v_subrev_u16_e32 v10, 0x8000, v10
; GFX8-NEXT: v_max_i16_e32 v9, v9, v5
; GFX8-NEXT: v_min_i16_e32 v9, v9, v10
; GFX8-NEXT: v_max_i16_sdwa v10, v2, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
-; GFX8-NEXT: v_add_u16_e32 v10, 0x8001, v10
+; GFX8-NEXT: v_subrev_u16_e32 v10, 0x7fff, v10
; GFX8-NEXT: v_min_i16_sdwa v7, v2, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
-; GFX8-NEXT: v_add_u16_e32 v7, 0x8000, v7
+; GFX8-NEXT: v_subrev_u16_e32 v7, 0x8000, v7
; GFX8-NEXT: v_max_i16_sdwa v5, v10, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
; GFX8-NEXT: v_min_i16_e32 v5, v5, v7
; GFX8-NEXT: v_sub_u16_e32 v6, v0, v6
@@ -3455,55 +3449,55 @@ define amdgpu_ps <3 x i32> @s_ssubsat_v6i16(<6 x i16> inreg %lhs, <6 x i16> inre
; GFX6-NEXT: s_lshl_b32 s0, s0, 16
; GFX6-NEXT: s_max_i32 s12, s0, -1
; GFX6-NEXT: s_lshl_b32 s6, s6, 16
-; GFX6-NEXT: s_add_i32 s12, s12, 0x80000001
+; GFX6-NEXT: s_sub_i32 s12, s12, 0x7fffffff
; GFX6-NEXT: s_min_i32 s13, s0, -1
-; GFX6-NEXT: s_add_i32 s13, s13, 0x80000000
+; GFX6-NEXT: s_sub_i32 s13, s13, 0x80000000
; GFX6-NEXT: s_max_i32 s6, s12, s6
; GFX6-NEXT: s_min_i32 s6, s6, s13
; GFX6-NEXT: s_lshl_b32 s1, s1, 16
; GFX6-NEXT: s_sub_i32 s0, s0, s6
; GFX6-NEXT: s_lshl_b32 s6, s7, 16
; GFX6-NEXT: s_max_i32 s7, s1, -1
-; GFX6-NEXT: s_add_i32 s7, s7, 0x80000001
+; GFX6-NEXT: s_sub_i32 s7, s7, 0x7fffffff
; GFX6-NEXT: s_min_i32 s12, s1, -1
-; GFX6-NEXT: s_add_i32 s12, s12, 0x80000000
+; GFX6-NEXT: s_sub_i32 s12, s12, 0x80000000
; GFX6-NEXT: s_max_i32 s6, s7, s6
; GFX6-NEXT: s_lshl_b32 s2, s2, 16
; GFX6-NEXT: s_min_i32 s6, s6, s12
; GFX6-NEXT: s_max_i32 s7, s2, -1
; GFX6-NEXT: s_sub_i32 s1, s1, s6
; GFX6-NEXT: s_lshl_b32 s6, s8, 16
-; GFX6-NEXT: s_add_i32 s7, s7, 0x80000001
+; GFX6-NEXT: s_sub_i32 s7, s7, 0x7fffffff
; GFX6-NEXT: s_min_i32 s8, s2, -1
-; GFX6-NEXT: s_add_i32 s8, s8, 0x80000000
+; GFX6-NEXT: s_sub_i32 s8, s8, 0x80000000
; GFX6-NEXT: s_max_i32 s6, s7, s6
; GFX6-NEXT: s_lshl_b32 s3, s3, 16
; GFX6-NEXT: s_min_i32 s6, s6, s8
; GFX6-NEXT: s_max_i32 s7, s3, -1
; GFX6-NEXT: s_sub_i32 s2, s2, s6
; GFX6-NEXT: s_lshl_b32 s6, s9, 16
-; GFX6-NEXT: s_add_i32 s7, s7, 0x80000001
+; GFX6-NEXT: s_sub_i32 s7, s7, 0x7fffffff
; GFX6-NEXT: s_min_i32 s8, s3, -1
-; GFX6-NEXT: s_add_i32 s8, s8, 0x80000000
+; GFX6-NEXT: s_sub_i32 s8, s8, 0x80000000
; GFX6-NEXT: s_max_i32 s6, s7, s6
; GFX6-NEXT: s_lshl_b32 s4, s4, 16
; GFX6-NEXT: s_min_i32 s6, s6, s8
; GFX6-NEXT: s_max_i32 s7, s4, -1
; GFX6-NEXT: s_sub_i32 s3, s3, s6
; GFX6-NEXT: s_lshl_b32 s6, s10, 16
-; GFX6-NEXT: s_add_i32 s7, s7, 0x80000001
+; GFX6-NEXT: s_sub_i32 s7, s7, 0x7fffffff
; GFX6-NEXT: s_min_i32 s8, s4, -1
-; GFX6-NEXT: s_add_i32 s8, s8, 0x80000000
+; GFX6-NEXT: s_sub_i32 s8, s8, 0x80000000
; GFX6-NEXT: s_max_i32 s6, s7, s6
; GFX6-NEXT: s_lshl_b32 s5, s5, 16
; GFX6-NEXT: s_min_i32 s6, s6, s8
; GFX6-NEXT: s_max_i32 s7, s5, -1
; GFX6-NEXT: s_sub_i32 s4, s4, s6
; GFX6-NEXT: s_lshl_b32 s6, s11, 16
-; GFX6-NEXT: s_add_i32 s7, s7, 0x80000001
+; GFX6-NEXT: s_sub_i32 s7, s7, 0x7fffffff
; GFX6-NEXT: s_min_i32 s8, s5, -1
; GFX6-NEXT: s_ashr_i32 s1, s1, 16
-; GFX6-NEXT: s_add_i32 s8, s8, 0x80000000
+; GFX6-NEXT: s_sub_i32 s8, s8, 0x80000000
; GFX6-NEXT: s_max_i32 s6, s7, s6
; GFX6-NEXT: s_ashr_i32 s0, s0, 16
; GFX6-NEXT: s_min_i32 s6, s6, s8
@@ -3531,12 +3525,12 @@ define amdgpu_ps <3 x i32> @s_ssubsat_v6i16(<6 x i16> inreg %lhs, <6 x i16> inre
; GFX8-NEXT: s_sext_i32_i16 s12, s0
; GFX8-NEXT: s_sext_i32_i16 s13, -1
; GFX8-NEXT: s_max_i32 s14, s12, s13
-; GFX8-NEXT: s_addk_i32 s14, 0x8001
+; GFX8-NEXT: s_sub_i32 s14, s14, 0x7fff
; GFX8-NEXT: s_lshr_b32 s9, s3, 16
; GFX8-NEXT: s_min_i32 s12, s12, s13
; GFX8-NEXT: s_sext_i32_i16 s14, s14
; GFX8-NEXT: s_sext_i32_i16 s3, s3
-; GFX8-NEXT: s_addk_i32 s12, 0x8000
+; GFX8-NEXT: s_sub_i32 s12, s12, 0xffff8000
; GFX8-NEXT: s_max_i32 s3, s14, s3
; GFX8-NEXT: s_sext_i32_i16 s3, s3
; GFX8-NEXT: s_sext_i32_i16 s12, s12
@@ -3545,11 +3539,11 @@ define amdgpu_ps <3 x i32> @s_ssubsat_v6i16(<6 x i16> inreg %lhs, <6 x i16> inre
; GFX8-NEXT: s_sub_i32 s0, s0, s3
; GFX8-NEXT: s_sext_i32_i16 s3, s6
; GFX8-NEXT: s_max_i32 s12, s3, s13
-; GFX8-NEXT: s_addk_i32 s12, 0x8001
+; GFX8-NEXT: s_sub_i32 s12, s12, 0x7fff
; GFX8-NEXT: s_min_i32 s3, s3, s13
; GFX8-NEXT: s_sext_i32_i16 s12, s12
; GFX8-NEXT: s_sext_i32_i16 s9, s9
-; GFX8-NEXT: s_addk_i32 s3, 0x8000
+; GFX8-NEXT: s_sub_i32 s3, s3, 0xffff8000
; GFX8-NEXT: s_max_i32 s9, s12, s9
; GFX8-NEXT: s_sext_i32_i16 s9, s9
; GFX8-NEXT: s_sext_i32_i16 s3, s3
@@ -3557,12 +3551,12 @@ define amdgpu_ps <3 x i32> @s_ssubsat_v6i16(<6 x i16> inreg %lhs, <6 x i16> inre
; GFX8-NEXT: s_sub_i32 s3, s6, s3
; GFX8-NEXT: s_sext_i32_i16 s6, s1
; GFX8-NEXT: s_max_i32 s9, s6, s13
-; GFX8-NEXT: s_addk_i32 s9, 0x8001
+; GFX8-NEXT: s_sub_i32 s9, s9, 0x7fff
; GFX8-NEXT: s_lshr_b32 s10, s4, 16
; GFX8-NEXT: s_min_i32 s6, s6, s13
; GFX8-NEXT: s_sext_i32_i16 s9, s9
; GFX8-NEXT: s_sext_i32_i16 s4, s4
-; GFX8-NEXT: s_addk_i32 s6, 0x8000
+; GFX8-NEXT: s_sub_i32 s6, s6, 0xffff8000
; GFX8-NEXT: s_max_i32 s4, s9, s4
; GFX8-NEXT: s_sext_i32_i16 s4, s4
; GFX8-NEXT: s_sext_i32_i16 s6, s6
@@ -3571,11 +3565,11 @@ define amdgpu_ps <3 x i32> @s_ssubsat_v6i16(<6 x i16> inreg %lhs, <6 x i16> inre
; GFX8-NEXT: s_sub_i32 s1, s1, s4
; GFX8-NEXT: s_sext_i32_i16 s4, s7
; GFX8-NEXT: s_max_i32 s6, s4, s13
-; GFX8-NEXT: s_addk_i32 s6, 0x8001
+; GFX8-NEXT: s_sub_i32 s6, s6, 0x7fff
; GFX8-NEXT: s_min_i32 s4, s4, s13
; GFX8-NEXT: s_sext_i32_i16 s6, s6
; GFX8-NEXT: s_sext_i32_i16 s9, s10
-; GFX8-NEXT: s_addk_i32 s4, 0x8000
+; GFX8-NEXT: s_sub_i32 s4, s4, 0xffff8000
; GFX8-NEXT: s_max_i32 s6, s6, s9
; GFX8-NEXT: s_sext_i32_i16 s6, s6
; GFX8-NEXT: s_sext_i32_i16 s4, s4
@@ -3583,12 +3577,12 @@ define amdgpu_ps <3 x i32> @s_ssubsat_v6i16(<6 x i16> inreg %lhs, <6 x i16> inre
; GFX8-NEXT: s_sext_i32_i16 s6, s2
; GFX8-NEXT: s_sub_i32 s4, s7, s4
; GFX8-NEXT: s_max_i32 s7, s6, s13
-; GFX8-NEXT: s_addk_i32 s7, 0x8001
+; GFX8-NEXT: s_sub_i32 s7, s7, 0x7fff
; GFX8-NEXT: s_lshr_b32 s11, s5, 16
; GFX8-NEXT: s_min_i32 s6, s6, s13
; GFX8-NEXT: s_sext_i32_i16 s7, s7
; GFX8-NEXT: s_sext_i32_i16 s5, s5
-; GFX8-NEXT: s_addk_i32 s6, 0x8000
+; GFX8-NEXT: s_sub_i32 s6, s6, 0xffff8000
; GFX8-NEXT: s_max_i32 s5, s7, s5
; GFX8-NEXT: s_sext_i32_i16 s5, s5
; GFX8-NEXT: s_sext_i32_i16 s6, s6
@@ -3597,11 +3591,11 @@ define amdgpu_ps <3 x i32> @s_ssubsat_v6i16(<6 x i16> inreg %lhs, <6 x i16> inre
; GFX8-NEXT: s_sub_i32 s2, s2, s5
; GFX8-NEXT: s_sext_i32_i16 s5, s8
; GFX8-NEXT: s_max_i32 s6, s5, s13
-; GFX8-NEXT: s_addk_i32 s6, 0x8001
+; GFX8-NEXT: s_sub_i32 s6, s6, 0x7fff
; GFX8-NEXT: s_min_i32 s5, s5, s13
; GFX8-NEXT: s_sext_i32_i16 s6, s6
; GFX8-NEXT: s_sext_i32_i16 s7, s11
-; GFX8-NEXT: s_addk_i32 s5, 0x8000
+; GFX8-NEXT: s_sub_i32 s5, s5, 0xffff8000
; GFX8-NEXT: s_max_i32 s6, s6, s7
; GFX8-NEXT: s_and_b32 s3, 0xffff, s3
; GFX8-NEXT: s_sext_i32_i16 s6, s6
@@ -3654,66 +3648,66 @@ define <4 x float> @v_ssubsat_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX6-NEXT: v_lshlrev_b32_e32 v0, 16, v0
; GFX6-NEXT: v_max_i32_e32 v16, -1, v0
-; GFX6-NEXT: v_mov_b32_e32 v17, 0x80000001
+; GFX6-NEXT: v_bfrev_b32_e32 v17, -2
; GFX6-NEXT: v_lshlrev_b32_e32 v8, 16, v8
-; GFX6-NEXT: v_add_i32_e32 v16, vcc, v16, v17
+; GFX6-NEXT: v_sub_i32_e32 v16, vcc, v16, v17
; GFX6-NEXT: v_min_i32_e32 v18, -1, v0
; GFX6-NEXT: v_bfrev_b32_e32 v19, 1
-; GFX6-NEXT: v_add_i32_e32 v18, vcc, v18, v19
+; GFX6-NEXT: v_sub_i32_e32 v18, vcc, v18, v19
; GFX6-NEXT: v_max_i32_e32 v8, v16, v8
; GFX6-NEXT: v_min_i32_e32 v8, v8, v18
; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1
; GFX6-NEXT: v_sub_i32_e32 v0, vcc, v0, v8
; GFX6-NEXT: v_lshlrev_b32_e32 v8, 16, v9
; GFX6-NEXT: v_max_i32_e32 v9, -1, v1
-; GFX6-NEXT: v_add_i32_e32 v9, vcc, v9, v17
+; GFX6-NEXT: v_sub_i32_e32 v9, vcc, v9, v17
; GFX6-NEXT: v_min_i32_e32 v16, -1, v1
-; GFX6-NEXT: v_add_i32_e32 v16, vcc, v16, v19
+; GFX6-NEXT: v_sub_i32_e32 v16, vcc, v16, v19
; GFX6-NEXT: v_max_i32_e32 v8, v9, v8
; GFX6-NEXT: v_lshlrev_b32_e32 v2, 16, v2
; GFX6-NEXT: v_min_i32_e32 v8, v8, v16
; GFX6-NEXT: v_max_i32_e32 v9, -1, v2
; GFX6-NEXT: v_sub_i32_e32 v1, vcc, v1, v8
; GFX6-NEXT: v_lshlrev_b32_e32 v8, 16, v10
-; GFX6-NEXT: v_add_i32_e32 v9, vcc, v9, v17
+; GFX6-NEXT: v_sub_i32_e32 v9, vcc, v9, v17
; GFX6-NEXT: v_min_i32_e32 v10, -1, v2
-; GFX6-NEXT: v_add_i32_e32 v10, vcc, v10, v19
+; GFX6-NEXT: v_sub_i32_e32 v10, vcc, v10, v19
; GFX6-NEXT: v_max_i32_e32 v8, v9, v8
; GFX6-NEXT: v_lshlrev_b32_e32 v3, 16, v3
; GFX6-NEXT: v_min_i32_e32 v8, v8, v10
; GFX6-NEXT: v_max_i32_e32 v9, -1, v3
; GFX6-NEXT: v_sub_i32_e32 v2, vcc, v2, v8
; GFX6-NEXT: v_lshlrev_b32_e32 v8, 16, v11
-; GFX6-NEXT: v_add_i32_e32 v9, vcc, v9, v17
+; GFX6-NEXT: v_sub_i32_e32 v9, vcc, v9, v17
; GFX6-NEXT: v_min_i32_e32 v10, -1, v3
-; GFX6-NEXT: v_add_i32_e32 v10, vcc, v10, v19
+; GFX6-NEXT: v_sub_i32_e32 v10, vcc, v10, v19
; GFX6-NEXT: v_max_i32_e32 v8, v9, v8
; GFX6-NEXT: v_lshlrev_b32_e32 v4, 16, v4
; GFX6-NEXT: v_min_i32_e32 v8, v8, v10
; GFX6-NEXT: v_max_i32_e32 v9, -1, v4
; GFX6-NEXT: v_sub_i32_e32 v3, vcc, v3, v8
; GFX6-NEXT: v_lshlrev_b32_e32 v8, 16, v12
-; GFX6-NEXT: v_add_i32_e32 v9, vcc, v9, v17
+; GFX6-NEXT: v_sub_i32_e32 v9, vcc, v9, v17
; GFX6-NEXT: v_min_i32_e32 v10, -1, v4
-; GFX6-NEXT: v_add_i32_e32 v10, vcc, v10, v19
+; GFX6-NEXT: v_sub_i32_e32 v10, vcc, v10, v19
; GFX6-NEXT: v_max_i32_e32 v8, v9, v8
; GFX6-NEXT: v_lshlrev_b32_e32 v5, 16, v5
; GFX6-NEXT: v_min_i32_e32 v8, v8, v10
; GFX6-NEXT: v_max_i32_e32 v9, -1, v5
; GFX6-NEXT: v_sub_i32_e32 v4, vcc, v4, v8
; GFX6-NEXT: v_lshlrev_b32_e32 v8, 16, v13
-; GFX6-NEXT: v_add_i32_e32 v9, vcc, v9, v17
+; GFX6-NEXT: v_sub_i32_e32 v9, vcc, v9, v17
; GFX6-NEXT: v_min_i32_e32 v10, -1, v5
-; GFX6-NEXT: v_add_i32_e32 v10, vcc, v10, v19
+; GFX6-NEXT: v_sub_i32_e32 v10, vcc, v10, v19
; GFX6-NEXT: v_max_i32_e32 v8, v9, v8
; GFX6-NEXT: v_lshlrev_b32_e32 v6, 16, v6
; GFX6-NEXT: v_min_i32_e32 v8, v8, v10
; GFX6-NEXT: v_max_i32_e32 v9, -1, v6
; GFX6-NEXT: v_sub_i32_e32 v5, vcc, v5, v8
; GFX6-NEXT: v_lshlrev_b32_e32 v8, 16, v14
-; GFX6-NEXT: v_add_i32_e32 v9, vcc, v9, v17
+; GFX6-NEXT: v_sub_i32_e32 v9, vcc, v9, v17
; GFX6-NEXT: v_min_i32_e32 v10, -1, v6
-; GFX6-NEXT: v_add_i32_e32 v10, vcc, v10, v19
+; GFX6-NEXT: v_sub_i32_e32 v10, vcc, v10, v19
; GFX6-NEXT: v_max_i32_e32 v8, v9, v8
; GFX6-NEXT: v_lshlrev_b32_e32 v7, 16, v7
; GFX6-NEXT: v_min_i32_e32 v8, v8, v10
@@ -3721,10 +3715,10 @@ define <4 x float> @v_ssubsat_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
; GFX6-NEXT: v_ashrrev_i32_e32 v1, 16, v1
; GFX6-NEXT: v_sub_i32_e32 v6, vcc, v6, v8
; GFX6-NEXT: v_lshlrev_b32_e32 v8, 16, v15
-; GFX6-NEXT: v_add_i32_e32 v9, vcc, v9, v17
+; GFX6-NEXT: v_sub_i32_e32 v9, vcc, v9, v17
; GFX6-NEXT: v_min_i32_e32 v10, -1, v7
; GFX6-NEXT: v_ashrrev_i32_e32 v0, 16, v0
-; GFX6-NEXT: v_add_i32_e32 v10, vcc, v10, v19
+; GFX6-NEXT: v_sub_i32_e32 v10, vcc, v10, v19
; GFX6-NEXT: v_max_i32_e32 v8, v9, v8
; GFX6-NEXT: v_and_b32_e32 v1, 0xffff, v1
; GFX6-NEXT: v_ashrrev_i32_e32 v2, 16, v2
@@ -3756,52 +3750,52 @@ define <4 x float> @v_ssubsat_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_max_i16_e32 v8, -1, v0
-; GFX8-NEXT: v_add_u16_e32 v8, 0x8001, v8
+; GFX8-NEXT: v_subrev_u16_e32 v8, 0x7fff, v8
; GFX8-NEXT: v_min_i16_e32 v9, -1, v0
-; GFX8-NEXT: v_add_u16_e32 v9, 0x8000, v9
+; GFX8-NEXT: v_subrev_u16_e32 v9, 0x8000, v9
; GFX8-NEXT: v_max_i16_e32 v8, v8, v4
; GFX8-NEXT: v_min_i16_e32 v8, v8, v9
; GFX8-NEXT: v_mov_b32_e32 v9, -1
; GFX8-NEXT: v_max_i16_sdwa v10, v0, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
-; GFX8-NEXT: v_add_u16_e32 v10, 0x8001, v10
+; GFX8-NEXT: v_subrev_u16_e32 v10, 0x7fff, v10
; GFX8-NEXT: v_min_i16_sdwa v11, v0, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
-; GFX8-NEXT: v_add_u16_e32 v11, 0x8000, v11
+; GFX8-NEXT: v_subrev_u16_e32 v11, 0x8000, v11
; GFX8-NEXT: v_max_i16_sdwa v4, v10, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
; GFX8-NEXT: v_max_i16_e32 v10, -1, v1
; GFX8-NEXT: v_min_i16_e32 v4, v4, v11
-; GFX8-NEXT: v_add_u16_e32 v10, 0x8001, v10
+; GFX8-NEXT: v_subrev_u16_e32 v10, 0x7fff, v10
; GFX8-NEXT: v_min_i16_e32 v11, -1, v1
-; GFX8-NEXT: v_add_u16_e32 v11, 0x8000, v11
+; GFX8-NEXT: v_subrev_u16_e32 v11, 0x8000, v11
; GFX8-NEXT: v_max_i16_e32 v10, v10, v5
; GFX8-NEXT: v_min_i16_e32 v10, v10, v11
; GFX8-NEXT: v_max_i16_sdwa v11, v1, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
-; GFX8-NEXT: v_add_u16_e32 v11, 0x8001, v11
+; GFX8-NEXT: v_subrev_u16_e32 v11, 0x7fff, v11
; GFX8-NEXT: v_min_i16_sdwa v12, v1, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
-; GFX8-NEXT: v_add_u16_e32 v12, 0x8000, v12
+; GFX8-NEXT: v_subrev_u16_e32 v12, 0x8000, v12
; GFX8-NEXT: v_max_i16_sdwa v5, v11, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
; GFX8-NEXT: v_max_i16_e32 v11, -1, v2
; GFX8-NEXT: v_min_i16_e32 v5, v5, v12
-; GFX8-NEXT: v_add_u16_e32 v11, 0x8001, v11
+; GFX8-NEXT: v_subrev_u16_e32 v11, 0x7fff, v11
; GFX8-NEXT: v_min_i16_e32 v12, -1, v2
-; GFX8-NEXT: v_add_u16_e32 v12, 0x8000, v12
+; GFX8-NEXT: v_subrev_u16_e32 v12, 0x8000, v12
; GFX8-NEXT: v_max_i16_e32 v11, v11, v6
; GFX8-NEXT: v_min_i16_e32 v11, v11, v12
; GFX8-NEXT: v_max_i16_sdwa v12, v2, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
-; GFX8-NEXT: v_add_u16_e32 v12, 0x8001, v12
+; GFX8-NEXT: v_subrev_u16_e32 v12, 0x7fff, v12
; GFX8-NEXT: v_min_i16_sdwa v13, v2, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
-; GFX8-NEXT: v_add_u16_e32 v13, 0x8000, v13
+; GFX8-NEXT: v_subrev_u16_e32 v13, 0x8000, v13
; GFX8-NEXT: v_max_i16_sdwa v6, v12, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
; GFX8-NEXT: v_max_i16_e32 v12, -1, v3
; GFX8-NEXT: v_min_i16_e32 v6, v6, v13
-; GFX8-NEXT: v_add_u16_e32 v12, 0x8001, v12
+; GFX8-NEXT: v_subrev_u16_e32 v12, 0x7fff, v12
; GFX8-NEXT: v_min_i16_e32 v13, -1, v3
-; GFX8-NEXT: v_add_u16_e32 v13, 0x8000, v13
+; GFX8-NEXT: v_subrev_u16_e32 v13, 0x8000, v13
; GFX8-NEXT: v_max_i16_e32 v12, v12, v7
; GFX8-NEXT: v_min_i16_e32 v12, v12, v13
; GFX8-NEXT: v_max_i16_sdwa v13, v3, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
-; GFX8-NEXT: v_add_u16_e32 v13, 0x8001, v13
+; GFX8-NEXT: v_subrev_u16_e32 v13, 0x7fff, v13
; GFX8-NEXT: v_min_i16_sdwa v9, v3, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
-; GFX8-NEXT: v_add_u16_e32 v9, 0x8000, v9
+; GFX8-NEXT: v_subrev_u16_e32 v9, 0x8000, v9
; GFX8-NEXT: v_max_i16_sdwa v7, v13, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
; GFX8-NEXT: v_sub_u16_e32 v8, v0, v8
; GFX8-NEXT: v_sub_u16_sdwa v0, v0, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
@@ -3846,63 +3840,63 @@ define amdgpu_ps <4 x i32> @s_ssubsat_v8i16(<8 x i16> inreg %lhs, <8 x i16> inre
; GFX6-NEXT: s_lshl_b32 s0, s0, 16
; GFX6-NEXT: s_max_i32 s16, s0, -1
; GFX6-NEXT: s_lshl_b32 s8, s8, 16
-; GFX6-NEXT: s_add_i32 s16, s16, 0x80000001
+; GFX6-NEXT: s_sub_i32 s16, s16, 0x7fffffff
; GFX6-NEXT: s_min_i32 s17, s0, -1
-; GFX6-NEXT: s_add_i32 s17, s17, 0x80000000
+; GFX6-NEXT: s_sub_i32 s17, s17, 0x80000000
; GFX6-NEXT: s_max_i32 s8, s16, s8
; GFX6-NEXT: s_min_i32 s8, s8, s17
; GFX6-NEXT: s_lshl_b32 s1, s1, 16
; GFX6-NEXT: s_sub_i32 s0, s0, s8
; GFX6-NEXT: s_lshl_b32 s8, s9, 16
; GFX6-NEXT: s_max_i32 s9, s1, -1
-; GFX6-NEXT: s_add_i32 s9, s9, 0x80000001
+; GFX6-NEXT: s_sub_i32 s9, s9, 0x7fffffff
; GFX6-NEXT: s_min_i32 s16, s1, -1
-; GFX6-NEXT: s_add_i32 s16, s16, 0x80000000
+; GFX6-NEXT: s_sub_i32 s16, s16, 0x80000000
; GFX6-NEXT: s_max_i32 s8, s9, s8
; GFX6-NEXT: s_lshl_b32 s2, s2, 16
; GFX6-NEXT: s_min_i32 s8, s8, s16
; GFX6-NEXT: s_max_i32 s9, s2, -1
; GFX6-NEXT: s_sub_i32 s1, s1, s8
; GFX6-NEXT: s_lshl_b32 s8, s10, 16
-; GFX6-NEXT: s_add_i32 s9, s9, 0x80000001
+; GFX6-NEXT: s_sub_i32 s9, s9, 0x7fffffff
; GFX6-NEXT: s_min_i32 s10, s2, -1
-; GFX6-NEXT: s_add_i32 s10, s10, 0x80000000
+; GFX6-NEXT: s_sub_i32 s10, s10, 0x80000000
; GFX6-NEXT: s_max_i32 s8, s9, s8
; GFX6-NEXT: s_lshl_b32 s3, s3, 16
; GFX6-NEXT: s_min_i32 s8, s8, s10
; GFX6-NEXT: s_max_i32 s9, s3, -1
; GFX6-NEXT: s_sub_i32 s2, s2, s8
; GFX6-NEXT: s_lshl_b32 s8, s11, 16
-; GFX6-NEXT: s_add_i32 s9, s9, 0x80000001
+; GFX6-NEXT: s_sub_i32 s9, s9, 0x7fffffff
; GFX6-NEXT: s_min_i32 s10, s3, -1
-; GFX6-NEXT: s_add_i32 s10, s10, 0x80000000
+; GFX6-NEXT: s_sub_i32 s10, s10, 0x80000000
; GFX6-NEXT: s_max_i32 s8, s9, s8
; GFX6-NEXT: s_lshl_b32 s4, s4, 16
; GFX6-NEXT: s_min_i32 s8, s8, s10
; GFX6-NEXT: s_max_i32 s9, s4, -1
; GFX6-NEXT: s_sub_i32 s3, s3, s8
; GFX6-NEXT: s_lshl_b32 s8, s12, 16
-; GFX6-NEXT: s_add_i32 s9, s9, 0x80000001
+; GFX6-NEXT: s_sub_i32 s9, s9, 0x7fffffff
; GFX6-NEXT: s_min_i32 s10, s4, -1
-; GFX6-NEXT: s_add_i32 s10, s10, 0x80000000
+; GFX6-NEXT: s_sub_i32 s10, s10, 0x80000000
; GFX6-NEXT: s_max_i32 s8, s9, s8
; GFX6-NEXT: s_lshl_b32 s5, s5, 16
; GFX6-NEXT: s_min_i32 s8, s8, s10
; GFX6-NEXT: s_max_i32 s9, s5, -1
; GFX6-NEXT: s_sub_i32 s4, s4, s8
; GFX6-NEXT: s_lshl_b32 s8, s13, 16
-; GFX6-NEXT: s_add_i32 s9, s9, 0x80000001
+; GFX6-NEXT: s_sub_i32 s9, s9, 0x7fffffff
; GFX6-NEXT: s_min_i32 s10, s5, -1
-; GFX6-NEXT: s_add_i32 s10, s10, 0x80000000
+; GFX6-NEXT: s_sub_i32 s10, s10, 0x80000000
; GFX6-NEXT: s_max_i32 s8, s9, s8
; GFX6-NEXT: s_lshl_b32 s6, s6, 16
; GFX6-NEXT: s_min_i32 s8, s8, s10
; GFX6-NEXT: s_max_i32 s9, s6, -1
; GFX6-NEXT: s_sub_i32 s5, s5, s8
; GFX6-NEXT: s_lshl_b32 s8, s14, 16
-; GFX6-NEXT: s_add_i32 s9, s9, 0x80000001
+; GFX6-NEXT: s_sub_i32 s9, s9, 0x7fffffff
; GFX6-NEXT: s_min_i32 s10, s6, -1
-; GFX6-NEXT: s_add_i32 s10, s10, 0x80000000
+; GFX6-NEXT: s_sub_i32 s10, s10, 0x80000000
; GFX6-NEXT: s_max_i32 s8, s9, s8
; GFX6-NEXT: s_lshl_b32 s7, s7, 16
; GFX6-NEXT: s_min_i32 s8, s8, s10
@@ -3910,10 +3904,10 @@ define amdgpu_ps <4 x i32> @s_ssubsat_v8i16(<8 x i16> inreg %lhs, <8 x i16> inre
; GFX6-NEXT: s_ashr_i32 s1, s1, 16
; GFX6-NEXT: s_sub_i32 s6, s6, s8
; GFX6-NEXT: s_lshl_b32 s8, s15, 16
-; GFX6-NEXT: s_add_i32 s9, s9, 0x80000001
+; GFX6-NEXT: s_sub_i32 s9, s9, 0x7fffffff
; GFX6-NEXT: s_min_i32 s10, s7, -1
; GFX6-NEXT: s_ashr_i32 s0, s0, 16
-; GFX6-NEXT: s_add_i32 s10, s10, 0x80000000
+; GFX6-NEXT: s_sub_i32 s10, s10, 0x80000000
; GFX6-NEXT: s_max_i32 s8, s9, s8
; GFX6-NEXT: s_and_b32 s1, s1, 0xffff
; GFX6-NEXT: s_ashr_i32 s2, s2, 16
@@ -3946,12 +3940,12 @@ define amdgpu_ps <4 x i32> @s_ssubsat_v8i16(<8 x i16> inreg %lhs, <8 x i16> inre
; GFX8-NEXT: s_sext_i32_i16 s16, s0
; GFX8-NEXT: s_sext_i32_i16 s17, -1
; GFX8-NEXT: s_max_i32 s18, s16, s17
-; GFX8-NEXT: s_addk_i32 s18, 0x8001
+; GFX8-NEXT: s_sub_i32 s18, s18, 0x7fff
; GFX8-NEXT: s_lshr_b32 s12, s4, 16
; GFX8-NEXT: s_min_i32 s16, s16, s17
; GFX8-NEXT: s_sext_i32_i16 s18, s18
; GFX8-NEXT: s_sext_i32_i16 s4, s4
-; GFX8-NEXT: s_addk_i32 s16, 0x8000
+; GFX8-NEXT: s_sub_i32 s16, s16, 0xffff8000
; GFX8-NEXT: s_max_i32 s4, s18, s4
; GFX8-NEXT: s_sext_i32_i16 s4, s4
; GFX8-NEXT: s_sext_i32_i16 s16, s16
@@ -3960,11 +3954,11 @@ define amdgpu_ps <4 x i32> @s_ssubsat_v8i16(<8 x i16> inreg %lhs, <8 x i16> inre
; GFX8-NEXT: s_sub_i32 s0, s0, s4
; GFX8-NEXT: s_sext_i32_i16 s4, s8
; GFX8-NEXT: s_max_i32 s16, s4, s17
-; GFX8-NEXT: s_addk_i32 s16, 0x8001
+; GFX8-NEXT: s_sub_i32 s16, s16, 0x7fff
; GFX8-NEXT: s_min_i32 s4, s4, s17
; GFX8-NEXT: s_sext_i32_i16 s16, s16
; GFX8-NEXT: s_sext_i32_i16 s12, s12
-; GFX8-NEXT: s_addk_i32 s4, 0x8000
+; GFX8-NEXT: s_sub_i32 s4, s4, 0xffff8000
; GFX8-NEXT: s_max_i32 s12, s16, s12
; GFX8-NEXT: s_sext_i32_i16 s12, s12
; GFX8-NEXT: s_sext_i32_i16 s4, s4
@@ -3972,12 +3966,12 @@ define amdgpu_ps <4 x i32> @s_ssubsat_v8i16(<8 x i16> inreg %lhs, <8 x i16> inre
; GFX8-NEXT: s_sub_i32 s4, s8, s4
; GFX8-NEXT: s_sext_i32_i16 s8, s1
; GFX8-NEXT: s_max_i32 s12, s8, s17
-; GFX8-NEXT: s_addk_i32 s12, 0x8001
+; GFX8-NEXT: s_sub_i32 s12, s12, 0x7fff
; GFX8-NEXT: s_lshr_b32 s13, s5, 16
; GFX8-NEXT: s_min_i32 s8, s8, s17
; GFX8-NEXT: s_sext_i32_i16 s12, s12
; GFX8-NEXT: s_sext_i32_i16 s5, s5
-; GFX8-NEXT: s_addk_i32 s8, 0x8000
+; GFX8-NEXT: s_sub_i32 s8, s8, 0xffff8000
; GFX8-NEXT: s_max_i32 s5, s12, s5
; GFX8-NEXT: s_sext_i32_i16 s5, s5
; GFX8-NEXT: s_sext_i32_i16 s8, s8
@@ -3986,11 +3980,11 @@ define amdgpu_ps <4 x i32> @s_ssubsat_v8i16(<8 x i16> inreg %lhs, <8 x i16> inre
; GFX8-NEXT: s_sub_i32 s1, s1, s5
; GFX8-NEXT: s_sext_i32_i16 s5, s9
; GFX8-NEXT: s_max_i32 s8, s5, s17
-; GFX8-NEXT: s_addk_i32 s8, 0x8001
+; GFX8-NEXT: s_sub_i32 s8, s8, 0x7fff
; GFX8-NEXT: s_min_i32 s5, s5, s17
; GFX8-NEXT: s_sext_i32_i16 s8, s8
; GFX8-NEXT: s_sext_i32_i16 s12, s13
-; GFX8-NEXT: s_addk_i32 s5, 0x8000
+; GFX8-NEXT: s_sub_i32 s5, s5, 0xffff8000
; GFX8-NEXT: s_max_i32 s8, s8, s12
; GFX8-NEXT: s_sext_i32_i16 s8, s8
; GFX8-NEXT: s_sext_i32_i16 s5, s5
@@ -3998,12 +3992,12 @@ define amdgpu_ps <4 x i32> @s_ssubsat_v8i16(<8 x i16> inreg %lhs, <8 x i16> inre
; GFX8-NEXT: s_sext_i32_i16 s8, s2
; GFX8-NEXT: s_sub_i32 s5, s9, s5
; GFX8-NEXT: s_max_i32 s9, s8, s17
-; GFX8-NEXT: s_addk_i32 s9, 0x8001
+; GFX8-NEXT: s_sub_i32 s9, s9, 0x7fff
; GFX8-NEXT: s_lshr_b32 s14, s6, 16
; GFX8-NEXT: s_min_i32 s8, s8, s17
; GFX8-NEXT: s_sext_i32_i16 s9, s9
; GFX8-NEXT: s_sext_i32_i16 s6, s6
-; GFX8-NEXT: s_addk_i32 s8, 0x8000
+; GFX8-NEXT: s_sub_i32 s8, s8, 0xffff8000
; GFX8-NEXT: s_max_i32 s6, s9, s6
; GFX8-NEXT: s_sext_i32_i16 s6, s6
; GFX8-NEXT: s_sext_i32_i16 s8, s8
@@ -4012,23 +4006,23 @@ define amdgpu_ps <4 x i32> @s_ssubsat_v8i16(<8 x i16> inreg %lhs, <8 x i16> inre
; GFX8-NEXT: s_sub_i32 s2, s2, s6
; GFX8-NEXT: s_sext_i32_i16 s6, s10
; GFX8-NEXT: s_max_i32 s8, s6, s17
-; GFX8-NEXT: s_addk_i32 s8, 0x8001
+; GFX8-NEXT: s_sub_i32 s8, s8, 0x7fff
; GFX8-NEXT: s_min_i32 s6, s6, s17
; GFX8-NEXT: s_sext_i32_i16 s8, s8
; GFX8-NEXT: s_sext_i32_i16 s9, s14
-; GFX8-NEXT: s_addk_i32 s6, 0x8000
+; GFX8-NEXT: s_sub_i32 s6, s6, 0xffff8000
; GFX8-NEXT: s_max_i32 s8, s8, s9
; GFX8-NEXT: s_sext_i32_i16 s8, s8
; GFX8-NEXT: s_sext_i32_i16 s6, s6
; GFX8-NEXT: s_min_i32 s6, s8, s6
; GFX8-NEXT: s_sext_i32_i16 s8, s3
; GFX8-NEXT: s_max_i32 s9, s8, s17
-; GFX8-NEXT: s_addk_i32 s9, 0x8001
+; GFX8-NEXT: s_sub_i32 s9, s9, 0x7fff
; GFX8-NEXT: s_lshr_b32 s15, s7, 16
; GFX8-NEXT: s_min_i32 s8, s8, s17
; GFX8-NEXT: s_sext_i32_i16 s9, s9
; GFX8-NEXT: s_sext_i32_i16 s7, s7
-; GFX8-NEXT: s_addk_i32 s8, 0x8000
+; GFX8-NEXT: s_sub_i32 s8, s8, 0xffff8000
; GFX8-NEXT: s_max_i32 s7, s9, s7
; GFX8-NEXT: s_sext_i32_i16 s7, s7
; GFX8-NEXT: s_sext_i32_i16 s8, s8
@@ -4037,14 +4031,14 @@ define amdgpu_ps <4 x i32> @s_ssubsat_v8i16(<8 x i16> inreg %lhs, <8 x i16> inre
; GFX8-NEXT: s_sub_i32 s3, s3, s7
; GFX8-NEXT: s_sext_i32_i16 s7, s11
; GFX8-NEXT: s_max_i32 s8, s7, s17
-; GFX8-NEXT: s_addk_i32 s8, 0x8001
+; GFX8-NEXT: s_sub_i32 s8, s8, 0x7fff
; GFX8-NEXT: s_and_b32 s4, 0xffff, s4
; GFX8-NEXT: s_min_i32 s7, s7, s17
; GFX8-NEXT: s_sext_i32_i16 s8, s8
; GFX8-NEXT: s_sext_i32_i16 s9, s15
; GFX8-NEXT: s_and_b32 s0, 0xffff, s0
; GFX8-NEXT: s_lshl_b32 s4, s4, 16
-; GFX8-NEXT: s_addk_i32 s7, 0x8000
+; GFX8-NEXT: s_sub_i32 s7, s7, 0xffff8000
; GFX8-NEXT: s_max_i32 s8, s8, s9
; GFX8-NEXT: s_or_b32 s0, s0, s4
; GFX8-NEXT: s_and_b32 s4, 0xffff, s5
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/sub.v2i16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/sub.v2i16.ll
index 6c104709f5ee3a6..855687281ce9ab2 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/sub.v2i16.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/sub.v2i16.ll
@@ -147,10 +147,10 @@ define <2 x i16> @v_sub_v2i16_neg_inline_imm_splat(<2 x i16> %a) {
; GFX8-LABEL: v_sub_v2i16_neg_inline_imm_splat:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_mov_b32_e32 v2, 64
-; GFX8-NEXT: v_add_u16_e32 v1, 64, v0
-; GFX8-NEXT: v_add_u16_sdwa v0, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
-; GFX8-NEXT: v_or_b32_e32 v0, v1, v0
+; GFX8-NEXT: v_not_b32_e32 v1, 63
+; GFX8-NEXT: v_subrev_u16_e32 v2, 0xffc0, v0
+; GFX8-NEXT: v_sub_u16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX8-NEXT: v_or_b32_e32 v0, v2, v0
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_sub_v2i16_neg_inline_imm_splat:
@@ -179,9 +179,9 @@ define <2 x i16> @v_sub_v2i16_neg_inline_imm_lo(<2 x i16> %a) {
; GFX8-LABEL: v_sub_v2i16_neg_inline_imm_lo:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_mov_b32_e32 v2, -4
-; GFX8-NEXT: v_add_u16_e32 v1, 64, v0
-; GFX8-NEXT: v_add_u16_sdwa v0, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX8-NEXT: v_mov_b32_e32 v2, 4
+; GFX8-NEXT: v_subrev_u16_e32 v1, 0xffc0, v0
+; GFX8-NEXT: v_sub_u16_sdwa v0, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; GFX8-NEXT: v_or_b32_e32 v0, v1, v0
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
@@ -211,10 +211,10 @@ define <2 x i16> @v_sub_v2i16_neg_inline_imm_hi(<2 x i16> %a) {
; GFX8-LABEL: v_sub_v2i16_neg_inline_imm_hi:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_mov_b32_e32 v2, 64
-; GFX8-NEXT: v_add_u16_e32 v1, -4, v0
-; GFX8-NEXT: v_add_u16_sdwa v0, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
-; GFX8-NEXT: v_or_b32_e32 v0, v1, v0
+; GFX8-NEXT: v_not_b32_e32 v1, 63
+; GFX8-NEXT: v_subrev_u16_e32 v2, 4, v0
+; GFX8-NEXT: v_sub_u16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX8-NEXT: v_or_b32_e32 v0, v2, v0
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_sub_v2i16_neg_inline_imm_hi:
@@ -245,8 +245,8 @@ define amdgpu_ps i32 @s_sub_v2i16_neg_inline_imm_splat(<2 x i16> inreg %a) {
; GFX8: ; %bb.0:
; GFX8-NEXT: s_lshr_b32 s1, s0, 16
; GFX8-NEXT: s_and_b32 s0, s0, 0xffff
-; GFX8-NEXT: s_add_i32 s0, s0, 0xffff0040
-; GFX8-NEXT: s_add_i32 s1, s1, 0xffff0040
+; GFX8-NEXT: s_sub_i32 s0, s0, 0xffc0
+; GFX8-NEXT: s_sub_i32 s1, s1, 0xffc0
; GFX8-NEXT: s_lshl_b32 s1, s1, 16
; GFX8-NEXT: s_and_b32 s0, s0, 0xffff
; GFX8-NEXT: s_or_b32 s0, s1, s0
@@ -285,8 +285,8 @@ define amdgpu_ps i32 @s_sub_v2i16_neg_inline_imm_lo(<2 x i16> inreg %a) {
; GFX8: ; %bb.0:
; GFX8-NEXT: s_lshr_b32 s1, s0, 16
; GFX8-NEXT: s_and_b32 s0, s0, 0xffff
-; GFX8-NEXT: s_add_i32 s0, s0, 0xffff0040
-; GFX8-NEXT: s_add_i32 s1, s1, -4
+; GFX8-NEXT: s_sub_i32 s0, s0, 0xffc0
+; GFX8-NEXT: s_sub_i32 s1, s1, 4
; GFX8-NEXT: s_lshl_b32 s1, s1, 16
; GFX8-NEXT: s_and_b32 s0, s0, 0xffff
; GFX8-NEXT: s_or_b32 s0, s1, s0
@@ -325,8 +325,8 @@ define amdgpu_ps i32 @s_sub_v2i16_neg_inline_imm_hi(<2 x i16> inreg %a) {
; GFX8: ; %bb.0:
; GFX8-NEXT: s_lshr_b32 s1, s0, 16
; GFX8-NEXT: s_and_b32 s0, s0, 0xffff
-; GFX8-NEXT: s_add_i32 s0, s0, -4
-; GFX8-NEXT: s_add_i32 s1, s1, 0xffff0040
+; GFX8-NEXT: s_sub_i32 s0, s0, 4
+; GFX8-NEXT: s_sub_i32 s1, s1, 0xffc0
; GFX8-NEXT: s_lshl_b32 s1, s1, 16
; GFX8-NEXT: s_and_b32 s0, s0, 0xffff
; GFX8-NEXT: s_or_b32 s0, s1, s0
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i32.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i32.ll
index 24ec4fa48f77890..31f61b9968b8bf7 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i32.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i32.ll
@@ -222,10 +222,10 @@ define i32 @v_urem_i32_oddk_denom(i32 %num) {
; CHECK-NEXT: v_mul_hi_u32 v1, v0, v1
; CHECK-NEXT: v_mul_lo_u32 v1, v1, v3
; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v1
-; CHECK-NEXT: v_add_i32_e32 v1, vcc, 0xffed2705, v0
+; CHECK-NEXT: v_subrev_i32_e32 v1, vcc, 0x12d8fb, v0
; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v3
; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
-; CHECK-NEXT: v_add_i32_e32 v1, vcc, 0xffed2705, v0
+; CHECK-NEXT: v_subrev_i32_e32 v1, vcc, 0x12d8fb, v0
; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v3
; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
; CHECK-NEXT: s_setpc_b64 s[30:31]
@@ -243,23 +243,23 @@ define <2 x i32> @v_urem_v2i32_oddk_denom(<2 x i32> %num) {
; GISEL-NEXT: v_rcp_iflag_f32_e32 v3, v3
; GISEL-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3
; GISEL-NEXT: v_cvt_u32_f32_e32 v3, v3
-; GISEL-NEXT: v_mul_lo_u32 v5, v3, v4
-; GISEL-NEXT: v_mul_hi_u32 v5, v3, v5
-; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v5
-; GISEL-NEXT: v_mul_hi_u32 v5, v0, v3
+; GISEL-NEXT: v_mul_lo_u32 v4, v3, v4
+; GISEL-NEXT: v_mul_hi_u32 v4, v3, v4
+; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v4
+; GISEL-NEXT: v_mul_hi_u32 v4, v0, v3
; GISEL-NEXT: v_mul_hi_u32 v3, v1, v3
-; GISEL-NEXT: v_mul_lo_u32 v5, v5, v2
+; GISEL-NEXT: v_mul_lo_u32 v4, v4, v2
; GISEL-NEXT: v_mul_lo_u32 v3, v3, v2
-; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v5
+; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v4
; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v3
-; GISEL-NEXT: v_add_i32_e32 v3, vcc, v0, v4
-; GISEL-NEXT: v_add_i32_e32 v5, vcc, 0xffed2705, v1
+; GISEL-NEXT: v_sub_i32_e32 v3, vcc, v0, v2
+; GISEL-NEXT: v_subrev_i32_e32 v4, vcc, 0x12d8fb, v1
; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2
; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v1, v2
-; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc
-; GISEL-NEXT: v_add_i32_e32 v3, vcc, v0, v4
-; GISEL-NEXT: v_add_i32_e32 v4, vcc, 0xffed2705, v1
+; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc
+; GISEL-NEXT: v_sub_i32_e32 v3, vcc, v0, v2
+; GISEL-NEXT: v_subrev_i32_e32 v4, vcc, 0x12d8fb, v1
; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2
; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v1, v2
@@ -274,23 +274,23 @@ define <2 x i32> @v_urem_v2i32_oddk_denom(<2 x i32> %num) {
; CGP-NEXT: v_mov_b32_e32 v4, 0x12d8fb
; CGP-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2
; CGP-NEXT: v_cvt_u32_f32_e32 v2, v2
-; CGP-NEXT: v_mul_lo_u32 v5, v2, v3
-; CGP-NEXT: v_mul_hi_u32 v5, v2, v5
-; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v5
-; CGP-NEXT: v_mul_hi_u32 v5, v0, v2
+; CGP-NEXT: v_mul_lo_u32 v3, v2, v3
+; CGP-NEXT: v_mul_hi_u32 v3, v2, v3
+; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v3
+; CGP-NEXT: v_mul_hi_u32 v3, v0, v2
; CGP-NEXT: v_mul_hi_u32 v2, v1, v2
-; CGP-NEXT: v_mul_lo_u32 v5, v5, v4
+; CGP-NEXT: v_mul_lo_u32 v3, v3, v4
; CGP-NEXT: v_mul_lo_u32 v2, v2, v4
-; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v5
+; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v3
; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v2
-; CGP-NEXT: v_add_i32_e32 v2, vcc, v0, v3
-; CGP-NEXT: v_add_i32_e32 v5, vcc, 0xffed2705, v1
+; CGP-NEXT: v_subrev_i32_e32 v2, vcc, 0x12d8fb, v0
+; CGP-NEXT: v_subrev_i32_e32 v3, vcc, 0x12d8fb, v1
; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v4
; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v4
-; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc
-; CGP-NEXT: v_add_i32_e32 v2, vcc, v0, v3
-; CGP-NEXT: v_add_i32_e32 v3, vcc, 0xffed2705, v1
+; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
+; CGP-NEXT: v_sub_i32_e32 v2, vcc, v0, v4
+; CGP-NEXT: v_subrev_i32_e32 v3, vcc, 0x12d8fb, v1
; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v4
; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v4
diff --git a/llvm/test/CodeGen/AMDGPU/ctlz.ll b/llvm/test/CodeGen/AMDGPU/ctlz.ll
index c63e9d471b6bf7e..a7522ef761b8ab9 100644
--- a/llvm/test/CodeGen/AMDGPU/ctlz.ll
+++ b/llvm/test/CodeGen/AMDGPU/ctlz.ll
@@ -567,7 +567,7 @@ define amdgpu_kernel void @v_ctlz_i8(ptr addrspace(1) noalias %out, ptr addrspac
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX10-GISEL-NEXT: v_ffbh_u32_e32 v1, v1
; GFX10-GISEL-NEXT: v_min_u32_e32 v1, 32, v1
-; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v1, 0xffffffe8, v1
+; GFX10-GISEL-NEXT: v_subrev_nc_u32_e32 v1, 24, v1
; GFX10-GISEL-NEXT: global_store_byte v0, v1, s[4:5]
; GFX10-GISEL-NEXT: s_endpgm
;
@@ -1566,7 +1566,7 @@ define amdgpu_kernel void @v_ctlz_i32_sel_ne_bitwidth(ptr addrspace(1) noalias %
; GFX10-GISEL-NEXT: v_ffbh_u32_e32 v1, v0
; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX10-GISEL-NEXT: v_min_u32_e32 v1, 32, v1
-; GFX10-GISEL-NEXT: v_add_nc_u16 v1, v1, 0xffe8
+; GFX10-GISEL-NEXT: v_sub_nc_u16 v1, v1, 24
; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0xffff, vcc_lo
; GFX10-GISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX10-GISEL-NEXT: global_store_byte v1, v0, s[4:5]
@@ -1686,7 +1686,7 @@ define amdgpu_kernel void @v_ctlz_i32_sel_ne_bitwidth(ptr addrspace(1) noalias %
; GFX10-GISEL-NEXT: v_ffbh_u32_e32 v2, v1
; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
; GFX10-GISEL-NEXT: v_min_u32_e32 v2, 32, v2
-; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v2, -16, v2
+; GFX10-GISEL-NEXT: v_subrev_nc_u32_e32 v2, 16, v2
; GFX10-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v1, v2, 0xffff, vcc_lo
; GFX10-GISEL-NEXT: global_store_short v0, v1, s[4:5]
@@ -1807,7 +1807,7 @@ define amdgpu_kernel void @v_ctlz_i7_sel_eq_neg1(ptr addrspace(1) noalias %out,
; GFX10-GISEL-NEXT: v_ffbh_u32_e32 v1, v0
; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX10-GISEL-NEXT: v_min_u32_e32 v1, 32, v1
-; GFX10-GISEL-NEXT: v_add_nc_u16 v1, v1, 0xffe7
+; GFX10-GISEL-NEXT: v_sub_nc_u16 v1, v1, 25
; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0x7f, vcc_lo
; GFX10-GISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX10-GISEL-NEXT: v_and_b32_e32 v0, 0x7f, v0
diff --git a/llvm/test/CodeGen/AMDGPU/div_i128.ll b/llvm/test/CodeGen/AMDGPU/div_i128.ll
index 147ddc4d4b75b27..d94ec56842ab870 100644
--- a/llvm/test/CodeGen/AMDGPU/div_i128.ll
+++ b/llvm/test/CodeGen/AMDGPU/div_i128.ll
@@ -1313,7 +1313,7 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-G-NEXT: v_sub_u32_e32 v0, 64, v8
; GFX9-G-NEXT: v_lshrrev_b64 v[0:1], v0, v[10:11]
; GFX9-G-NEXT: v_lshlrev_b64 v[2:3], v8, v[12:13]
-; GFX9-G-NEXT: v_add_u32_e32 v9, 0xffffffc0, v8
+; GFX9-G-NEXT: v_subrev_u32_e32 v9, 64, v8
; GFX9-G-NEXT: v_lshlrev_b64 v[6:7], v8, v[10:11]
; GFX9-G-NEXT: v_or_b32_e32 v2, v0, v2
; GFX9-G-NEXT: v_or_b32_e32 v3, v1, v3
@@ -1338,7 +1338,7 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-G-NEXT: v_sub_u32_e32 v2, 64, v20
; GFX9-G-NEXT: v_lshrrev_b64 v[0:1], v20, v[10:11]
; GFX9-G-NEXT: v_lshlrev_b64 v[2:3], v2, v[12:13]
-; GFX9-G-NEXT: v_add_u32_e32 v24, 0xffffffc0, v20
+; GFX9-G-NEXT: v_subrev_u32_e32 v24, 64, v20
; GFX9-G-NEXT: v_lshrrev_b64 v[14:15], v20, v[12:13]
; GFX9-G-NEXT: v_or_b32_e32 v2, v0, v2
; GFX9-G-NEXT: v_or_b32_e32 v3, v1, v3
@@ -2070,9 +2070,8 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-G-O0-NEXT: ; kill: def $vgpr20 killed $vgpr20 def $vgpr20_vgpr21 killed $exec
; GFX9-G-O0-NEXT: s_waitcnt vmcnt(1)
; GFX9-G-O0-NEXT: v_mov_b32_e32 v21, v4
-; GFX9-G-O0-NEXT: s_mov_b32 s5, 0xffffffc0
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v4, s5
-; GFX9-G-O0-NEXT: v_add_u32_e64 v4, v18, v4
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v4, s4
+; GFX9-G-O0-NEXT: v_sub_u32_e64 v4, v18, v4
; GFX9-G-O0-NEXT: v_mov_b32_e32 v5, s4
; GFX9-G-O0-NEXT: v_sub_u32_e64 v5, v5, v18
; GFX9-G-O0-NEXT: s_mov_b32 s6, 0
@@ -2204,9 +2203,8 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-G-O0-NEXT: v_mov_b32_e32 v13, v0
; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, v9
; GFX9-G-O0-NEXT: v_mov_b32_e32 v11, v3
-; GFX9-G-O0-NEXT: s_mov_b32 s6, 0xffffffc0
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, s6
-; GFX9-G-O0-NEXT: v_add_u32_e64 v2, v8, v0
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, s7
+; GFX9-G-O0-NEXT: v_sub_u32_e64 v2, v8, v0
; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, s7
; GFX9-G-O0-NEXT: v_sub_u32_e64 v14, v0, v8
; GFX9-G-O0-NEXT: s_mov_b32 s6, 0
@@ -3455,7 +3453,7 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-G-NEXT: v_sub_u32_e32 v8, 64, v16
; GFX9-G-NEXT: v_lshrrev_b64 v[8:9], v8, v[0:1]
; GFX9-G-NEXT: v_lshlrev_b64 v[10:11], v16, v[2:3]
-; GFX9-G-NEXT: v_add_u32_e32 v14, 0xffffffc0, v16
+; GFX9-G-NEXT: v_subrev_u32_e32 v14, 64, v16
; GFX9-G-NEXT: v_lshlrev_b64 v[12:13], v16, v[0:1]
; GFX9-G-NEXT: v_or_b32_e32 v10, v8, v10
; GFX9-G-NEXT: v_or_b32_e32 v11, v9, v11
@@ -3478,7 +3476,7 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-G-NEXT: s_cbranch_execz .LBB1_5
; GFX9-G-NEXT: ; %bb.2: ; %udiv-preheader
; GFX9-G-NEXT: v_sub_u32_e32 v12, 64, v18
-; GFX9-G-NEXT: v_add_u32_e32 v22, 0xffffffc0, v18
+; GFX9-G-NEXT: v_subrev_u32_e32 v22, 64, v18
; GFX9-G-NEXT: v_lshrrev_b64 v[10:11], v18, v[0:1]
; GFX9-G-NEXT: v_lshlrev_b64 v[12:13], v12, v[2:3]
; GFX9-G-NEXT: v_lshrrev_b64 v[16:17], v18, v[2:3]
@@ -4177,9 +4175,8 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-G-O0-NEXT: s_waitcnt vmcnt(0)
; GFX9-G-O0-NEXT: v_mov_b32_e32 v21, v7
; GFX9-G-O0-NEXT: v_mov_b32_e32 v20, v6
-; GFX9-G-O0-NEXT: s_mov_b32 s5, 0xffffffc0
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v4, s5
-; GFX9-G-O0-NEXT: v_add_u32_e64 v4, v12, v4
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v4, s4
+; GFX9-G-O0-NEXT: v_sub_u32_e64 v4, v12, v4
; GFX9-G-O0-NEXT: v_mov_b32_e32 v5, s4
; GFX9-G-O0-NEXT: v_sub_u32_e64 v5, v5, v12
; GFX9-G-O0-NEXT: s_mov_b32 s6, 0
@@ -4314,9 +4311,8 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-G-O0-NEXT: s_mov_b32 s7, 64
; GFX9-G-O0-NEXT: v_mov_b32_e32 v13, v9
; GFX9-G-O0-NEXT: v_mov_b32_e32 v12, v8
-; GFX9-G-O0-NEXT: s_mov_b32 s6, 0xffffffc0
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, s6
-; GFX9-G-O0-NEXT: v_add_u32_e64 v2, v3, v0
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, s7
+; GFX9-G-O0-NEXT: v_sub_u32_e64 v2, v3, v0
; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, s7
; GFX9-G-O0-NEXT: v_sub_u32_e64 v8, v0, v3
; GFX9-G-O0-NEXT: s_mov_b32 s6, 0
diff --git a/llvm/test/CodeGen/AMDGPU/div_v2i128.ll b/llvm/test/CodeGen/AMDGPU/div_v2i128.ll
index 691f3d36bc73600..e04cd7112560811 100644
--- a/llvm/test/CodeGen/AMDGPU/div_v2i128.ll
+++ b/llvm/test/CodeGen/AMDGPU/div_v2i128.ll
@@ -476,18 +476,18 @@ define <2 x i128> @v_sdiv_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
; GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[18:19]
; GISEL-NEXT: v_cndmask_b32_e32 v1, v3, v2, vcc
-; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v0, v1
-; GISEL-NEXT: v_subb_u32_e64 v3, s[4:5], 0, 0, vcc
-; GISEL-NEXT: v_subb_u32_e64 v0, s[4:5], 0, 0, s[4:5]
-; GISEL-NEXT: v_subb_u32_e64 v1, s[4:5], 0, 0, s[4:5]
-; GISEL-NEXT: v_cmp_gt_u64_e32 vcc, v[2:3], v[20:21]
+; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v1
+; GISEL-NEXT: v_subb_u32_e64 v1, s[4:5], 0, 0, vcc
+; GISEL-NEXT: v_subb_u32_e64 v2, s[4:5], 0, 0, s[4:5]
+; GISEL-NEXT: v_subb_u32_e64 v3, s[4:5], 0, 0, s[4:5]
+; GISEL-NEXT: v_cmp_gt_u64_e32 vcc, v[0:1], v[20:21]
; GISEL-NEXT: v_cndmask_b32_e64 v20, 0, 1, vcc
-; GISEL-NEXT: v_xor_b32_e32 v8, 0x7f, v2
-; GISEL-NEXT: v_cmp_lt_u64_e32 vcc, 0, v[0:1]
+; GISEL-NEXT: v_xor_b32_e32 v8, 0x7f, v0
+; GISEL-NEXT: v_cmp_lt_u64_e32 vcc, 0, v[2:3]
; GISEL-NEXT: v_cndmask_b32_e64 v21, 0, 1, vcc
-; GISEL-NEXT: v_or_b32_e32 v8, v8, v0
-; GISEL-NEXT: v_or_b32_e32 v9, v3, v1
-; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[0:1]
+; GISEL-NEXT: v_or_b32_e32 v8, v8, v2
+; GISEL-NEXT: v_or_b32_e32 v9, v1, v3
+; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[2:3]
; GISEL-NEXT: v_cndmask_b32_e32 v20, v21, v20, vcc
; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[8:9]
; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
@@ -505,13 +505,12 @@ define <2 x i128> @v_sdiv_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
; GISEL-NEXT: s_and_saveexec_b64 s[12:13], s[4:5]
; GISEL-NEXT: s_cbranch_execz .LBB0_6
; GISEL-NEXT: ; %bb.1: ; %udiv-bb15
-; GISEL-NEXT: v_add_i32_e32 v28, vcc, 1, v2
-; GISEL-NEXT: v_addc_u32_e64 v29, s[4:5], 0, v3, vcc
-; GISEL-NEXT: v_sub_i32_e32 v32, vcc, 0x7f, v2
-; GISEL-NEXT: v_not_b32_e32 v2, 63
-; GISEL-NEXT: v_addc_u32_e64 v30, vcc, 0, v0, s[4:5]
-; GISEL-NEXT: v_addc_u32_e32 v31, vcc, 0, v1, vcc
-; GISEL-NEXT: v_add_i32_e64 v20, s[4:5], v32, v2
+; GISEL-NEXT: v_add_i32_e32 v28, vcc, 1, v0
+; GISEL-NEXT: v_addc_u32_e64 v29, s[4:5], 0, v1, vcc
+; GISEL-NEXT: v_sub_i32_e32 v32, vcc, 0x7f, v0
+; GISEL-NEXT: v_addc_u32_e64 v30, vcc, 0, v2, s[4:5]
+; GISEL-NEXT: v_addc_u32_e32 v31, vcc, 0, v3, vcc
+; GISEL-NEXT: v_subrev_i32_e64 v20, s[4:5], 64, v32
; GISEL-NEXT: v_sub_i32_e64 v8, s[4:5], 64, v32
; GISEL-NEXT: v_lshl_b64 v[0:1], v[16:17], v32
; GISEL-NEXT: v_lshl_b64 v[2:3], v[18:19], v32
@@ -537,7 +536,7 @@ define <2 x i128> @v_sdiv_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
; GISEL-NEXT: s_xor_b64 s[14:15], exec, s[6:7]
; GISEL-NEXT: s_cbranch_execz .LBB0_5
; GISEL-NEXT: ; %bb.2: ; %udiv-preheader4
-; GISEL-NEXT: v_add_i32_e32 v34, vcc, 0xffffffc0, v28
+; GISEL-NEXT: v_subrev_i32_e32 v34, vcc, 64, v28
; GISEL-NEXT: v_sub_i32_e32 v22, vcc, 64, v28
; GISEL-NEXT: v_lshr_b64 v[0:1], v[18:19], v28
; GISEL-NEXT: v_lshr_b64 v[2:3], v[16:17], v28
@@ -666,18 +665,18 @@ define <2 x i128> @v_sdiv_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
; GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[12:13]
; GISEL-NEXT: v_cndmask_b32_e32 v1, v3, v2, vcc
-; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v0, v1
-; GISEL-NEXT: v_subb_u32_e64 v3, s[4:5], 0, 0, vcc
-; GISEL-NEXT: v_subb_u32_e64 v0, s[4:5], 0, 0, s[4:5]
-; GISEL-NEXT: v_subb_u32_e64 v1, s[4:5], 0, 0, s[4:5]
-; GISEL-NEXT: v_cmp_gt_u64_e32 vcc, v[2:3], v[10:11]
+; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v1
+; GISEL-NEXT: v_subb_u32_e64 v1, s[4:5], 0, 0, vcc
+; GISEL-NEXT: v_subb_u32_e64 v2, s[4:5], 0, 0, s[4:5]
+; GISEL-NEXT: v_subb_u32_e64 v3, s[4:5], 0, 0, s[4:5]
+; GISEL-NEXT: v_cmp_gt_u64_e32 vcc, v[0:1], v[10:11]
; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc
-; GISEL-NEXT: v_xor_b32_e32 v10, 0x7f, v2
-; GISEL-NEXT: v_cmp_lt_u64_e32 vcc, 0, v[0:1]
+; GISEL-NEXT: v_xor_b32_e32 v10, 0x7f, v0
+; GISEL-NEXT: v_cmp_lt_u64_e32 vcc, 0, v[2:3]
; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc
-; GISEL-NEXT: v_or_b32_e32 v10, v10, v0
-; GISEL-NEXT: v_or_b32_e32 v11, v3, v1
-; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[0:1]
+; GISEL-NEXT: v_or_b32_e32 v10, v10, v2
+; GISEL-NEXT: v_or_b32_e32 v11, v1, v3
+; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[2:3]
; GISEL-NEXT: v_cndmask_b32_e32 v15, v16, v15, vcc
; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[10:11]
; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
@@ -695,13 +694,12 @@ define <2 x i128> @v_sdiv_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
; GISEL-NEXT: s_and_saveexec_b64 s[12:13], s[4:5]
; GISEL-NEXT: s_cbranch_execz .LBB0_12
; GISEL-NEXT: ; %bb.7: ; %udiv-bb1
-; GISEL-NEXT: v_add_i32_e32 v26, vcc, 1, v2
-; GISEL-NEXT: v_addc_u32_e64 v27, s[4:5], 0, v3, vcc
-; GISEL-NEXT: v_sub_i32_e32 v30, vcc, 0x7f, v2
-; GISEL-NEXT: v_not_b32_e32 v2, 63
-; GISEL-NEXT: v_addc_u32_e64 v28, vcc, 0, v0, s[4:5]
-; GISEL-NEXT: v_addc_u32_e32 v29, vcc, 0, v1, vcc
-; GISEL-NEXT: v_add_i32_e64 v14, s[4:5], v30, v2
+; GISEL-NEXT: v_add_i32_e32 v26, vcc, 1, v0
+; GISEL-NEXT: v_addc_u32_e64 v27, s[4:5], 0, v1, vcc
+; GISEL-NEXT: v_sub_i32_e32 v30, vcc, 0x7f, v0
+; GISEL-NEXT: v_addc_u32_e64 v28, vcc, 0, v2, s[4:5]
+; GISEL-NEXT: v_addc_u32_e32 v29, vcc, 0, v3, vcc
+; GISEL-NEXT: v_subrev_i32_e64 v14, s[4:5], 64, v30
; GISEL-NEXT: v_sub_i32_e64 v10, s[4:5], 64, v30
; GISEL-NEXT: v_lshl_b64 v[0:1], v[6:7], v30
; GISEL-NEXT: v_lshl_b64 v[2:3], v[12:13], v30
@@ -727,7 +725,7 @@ define <2 x i128> @v_sdiv_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
; GISEL-NEXT: s_xor_b64 s[8:9], exec, s[6:7]
; GISEL-NEXT: s_cbranch_execz .LBB0_11
; GISEL-NEXT: ; %bb.8: ; %udiv-preheader
-; GISEL-NEXT: v_add_i32_e32 v32, vcc, 0xffffffc0, v26
+; GISEL-NEXT: v_subrev_i32_e32 v32, vcc, 64, v26
; GISEL-NEXT: v_sub_i32_e32 v16, vcc, 64, v26
; GISEL-NEXT: v_lshr_b64 v[0:1], v[12:13], v26
; GISEL-NEXT: v_lshr_b64 v[2:3], v[6:7], v26
@@ -1231,18 +1229,18 @@ define <2 x i128> @v_udiv_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
; GISEL-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[16:17]
; GISEL-NEXT: v_cndmask_b32_e32 v3, v19, v18, vcc
-; GISEL-NEXT: v_sub_i32_e32 v22, vcc, v2, v3
-; GISEL-NEXT: v_subb_u32_e64 v23, s[4:5], 0, 0, vcc
-; GISEL-NEXT: v_subb_u32_e64 v20, s[4:5], 0, 0, s[4:5]
-; GISEL-NEXT: v_subb_u32_e64 v21, s[4:5], 0, 0, s[4:5]
-; GISEL-NEXT: v_cmp_gt_u64_e32 vcc, v[22:23], v[24:25]
+; GISEL-NEXT: v_sub_i32_e32 v20, vcc, v2, v3
+; GISEL-NEXT: v_subb_u32_e64 v21, s[4:5], 0, 0, vcc
+; GISEL-NEXT: v_subb_u32_e64 v22, s[4:5], 0, 0, s[4:5]
+; GISEL-NEXT: v_subb_u32_e64 v23, s[4:5], 0, 0, s[4:5]
+; GISEL-NEXT: v_cmp_gt_u64_e32 vcc, v[20:21], v[24:25]
; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc
-; GISEL-NEXT: v_xor_b32_e32 v2, 0x7f, v22
-; GISEL-NEXT: v_cmp_lt_u64_e32 vcc, 0, v[20:21]
+; GISEL-NEXT: v_xor_b32_e32 v2, 0x7f, v20
+; GISEL-NEXT: v_cmp_lt_u64_e32 vcc, 0, v[22:23]
; GISEL-NEXT: v_cndmask_b32_e64 v19, 0, 1, vcc
-; GISEL-NEXT: v_or_b32_e32 v2, v2, v20
-; GISEL-NEXT: v_or_b32_e32 v3, v23, v21
-; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[20:21]
+; GISEL-NEXT: v_or_b32_e32 v2, v2, v22
+; GISEL-NEXT: v_or_b32_e32 v3, v21, v23
+; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[22:23]
; GISEL-NEXT: v_cndmask_b32_e32 v18, v19, v18, vcc
; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[2:3]
; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
@@ -1260,13 +1258,12 @@ define <2 x i128> @v_udiv_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
; GISEL-NEXT: s_and_saveexec_b64 s[12:13], s[4:5]
; GISEL-NEXT: s_cbranch_execz .LBB1_6
; GISEL-NEXT: ; %bb.1: ; %udiv-bb15
-; GISEL-NEXT: v_add_i32_e32 v26, vcc, 1, v22
-; GISEL-NEXT: v_addc_u32_e64 v27, s[4:5], 0, v23, vcc
-; GISEL-NEXT: v_sub_i32_e32 v30, vcc, 0x7f, v22
-; GISEL-NEXT: v_not_b32_e32 v2, 63
-; GISEL-NEXT: v_addc_u32_e64 v28, vcc, 0, v20, s[4:5]
-; GISEL-NEXT: v_addc_u32_e32 v29, vcc, 0, v21, vcc
-; GISEL-NEXT: v_add_i32_e64 v22, s[4:5], v30, v2
+; GISEL-NEXT: v_add_i32_e32 v26, vcc, 1, v20
+; GISEL-NEXT: v_addc_u32_e64 v27, s[4:5], 0, v21, vcc
+; GISEL-NEXT: v_sub_i32_e32 v30, vcc, 0x7f, v20
+; GISEL-NEXT: v_addc_u32_e64 v28, vcc, 0, v22, s[4:5]
+; GISEL-NEXT: v_addc_u32_e32 v29, vcc, 0, v23, vcc
+; GISEL-NEXT: v_subrev_i32_e64 v22, s[4:5], 64, v30
; GISEL-NEXT: v_sub_i32_e64 v20, s[4:5], 64, v30
; GISEL-NEXT: v_lshl_b64 v[2:3], v[0:1], v30
; GISEL-NEXT: v_lshl_b64 v[18:19], v[16:17], v30
@@ -1292,7 +1289,7 @@ define <2 x i128> @v_udiv_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
; GISEL-NEXT: s_xor_b64 s[8:9], exec, s[6:7]
; GISEL-NEXT: s_cbranch_execz .LBB1_5
; GISEL-NEXT: ; %bb.2: ; %udiv-preheader4
-; GISEL-NEXT: v_add_i32_e32 v32, vcc, 0xffffffc0, v26
+; GISEL-NEXT: v_subrev_i32_e32 v32, vcc, 64, v26
; GISEL-NEXT: v_sub_i32_e32 v24, vcc, 64, v26
; GISEL-NEXT: v_lshr_b64 v[18:19], v[16:17], v26
; GISEL-NEXT: v_lshr_b64 v[20:21], v[0:1], v26
@@ -1404,18 +1401,18 @@ define <2 x i128> @v_udiv_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
; GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[6:7]
; GISEL-NEXT: v_cndmask_b32_e32 v1, v9, v8, vcc
-; GISEL-NEXT: v_sub_i32_e32 v16, vcc, v0, v1
-; GISEL-NEXT: v_subb_u32_e64 v17, s[4:5], 0, 0, vcc
-; GISEL-NEXT: v_subb_u32_e64 v0, s[4:5], 0, 0, s[4:5]
-; GISEL-NEXT: v_subb_u32_e64 v1, s[4:5], 0, 0, s[4:5]
-; GISEL-NEXT: v_cmp_gt_u64_e32 vcc, v[16:17], v[10:11]
+; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v1
+; GISEL-NEXT: v_subb_u32_e64 v1, s[4:5], 0, 0, vcc
+; GISEL-NEXT: v_subb_u32_e64 v16, s[4:5], 0, 0, s[4:5]
+; GISEL-NEXT: v_subb_u32_e64 v17, s[4:5], 0, 0, s[4:5]
+; GISEL-NEXT: v_cmp_gt_u64_e32 vcc, v[0:1], v[10:11]
; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
-; GISEL-NEXT: v_xor_b32_e32 v8, 0x7f, v16
-; GISEL-NEXT: v_cmp_lt_u64_e32 vcc, 0, v[0:1]
+; GISEL-NEXT: v_xor_b32_e32 v8, 0x7f, v0
+; GISEL-NEXT: v_cmp_lt_u64_e32 vcc, 0, v[16:17]
; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
-; GISEL-NEXT: v_or_b32_e32 v8, v8, v0
-; GISEL-NEXT: v_or_b32_e32 v9, v17, v1
-; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[0:1]
+; GISEL-NEXT: v_or_b32_e32 v8, v8, v16
+; GISEL-NEXT: v_or_b32_e32 v9, v1, v17
+; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[16:17]
; GISEL-NEXT: v_cndmask_b32_e32 v10, v11, v10, vcc
; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[8:9]
; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
@@ -1433,13 +1430,12 @@ define <2 x i128> @v_udiv_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
; GISEL-NEXT: s_and_saveexec_b64 s[12:13], s[4:5]
; GISEL-NEXT: s_cbranch_execz .LBB1_12
; GISEL-NEXT: ; %bb.7: ; %udiv-bb1
-; GISEL-NEXT: v_add_i32_e32 v8, vcc, 1, v16
-; GISEL-NEXT: v_addc_u32_e64 v11, s[4:5], 0, v17, vcc
-; GISEL-NEXT: v_sub_i32_e32 v26, vcc, 0x7f, v16
-; GISEL-NEXT: v_not_b32_e32 v9, 63
-; GISEL-NEXT: v_addc_u32_e64 v24, vcc, 0, v0, s[4:5]
-; GISEL-NEXT: v_addc_u32_e32 v25, vcc, 0, v1, vcc
-; GISEL-NEXT: v_add_i32_e64 v9, s[4:5], v26, v9
+; GISEL-NEXT: v_add_i32_e32 v8, vcc, 1, v0
+; GISEL-NEXT: v_addc_u32_e64 v11, s[4:5], 0, v1, vcc
+; GISEL-NEXT: v_sub_i32_e32 v26, vcc, 0x7f, v0
+; GISEL-NEXT: v_addc_u32_e64 v24, vcc, 0, v16, s[4:5]
+; GISEL-NEXT: v_addc_u32_e32 v25, vcc, 0, v17, vcc
+; GISEL-NEXT: v_subrev_i32_e64 v9, s[4:5], 64, v26
; GISEL-NEXT: v_sub_i32_e64 v10, s[4:5], 64, v26
; GISEL-NEXT: v_lshl_b64 v[0:1], v[4:5], v26
; GISEL-NEXT: v_lshl_b64 v[16:17], v[6:7], v26
@@ -1465,7 +1461,7 @@ define <2 x i128> @v_udiv_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
; GISEL-NEXT: s_xor_b64 s[8:9], exec, s[6:7]
; GISEL-NEXT: s_cbranch_execz .LBB1_11
; GISEL-NEXT: ; %bb.8: ; %udiv-preheader
-; GISEL-NEXT: v_add_i32_e32 v28, vcc, 0xffffffc0, v8
+; GISEL-NEXT: v_subrev_i32_e32 v28, vcc, 64, v8
; GISEL-NEXT: v_sub_i32_e32 v22, vcc, 64, v8
; GISEL-NEXT: v_lshr_b64 v[16:17], v[6:7], v8
; GISEL-NEXT: v_lshr_b64 v[20:21], v[4:5], v8
@@ -2076,18 +2072,18 @@ define <2 x i128> @v_srem_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
; GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[8:9]
; GISEL-NEXT: v_cndmask_b32_e32 v1, v3, v2, vcc
-; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v0, v1
-; GISEL-NEXT: v_subb_u32_e64 v3, s[4:5], 0, 0, vcc
-; GISEL-NEXT: v_subb_u32_e64 v0, s[4:5], 0, 0, s[4:5]
-; GISEL-NEXT: v_subb_u32_e64 v1, s[4:5], 0, 0, s[4:5]
-; GISEL-NEXT: v_cmp_gt_u64_e32 vcc, v[2:3], v[18:19]
+; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v1
+; GISEL-NEXT: v_subb_u32_e64 v1, s[4:5], 0, 0, vcc
+; GISEL-NEXT: v_subb_u32_e64 v2, s[4:5], 0, 0, s[4:5]
+; GISEL-NEXT: v_subb_u32_e64 v3, s[4:5], 0, 0, s[4:5]
+; GISEL-NEXT: v_cmp_gt_u64_e32 vcc, v[0:1], v[18:19]
; GISEL-NEXT: v_cndmask_b32_e64 v21, 0, 1, vcc
-; GISEL-NEXT: v_xor_b32_e32 v18, 0x7f, v2
-; GISEL-NEXT: v_cmp_lt_u64_e32 vcc, 0, v[0:1]
+; GISEL-NEXT: v_xor_b32_e32 v18, 0x7f, v0
+; GISEL-NEXT: v_cmp_lt_u64_e32 vcc, 0, v[2:3]
; GISEL-NEXT: v_cndmask_b32_e64 v22, 0, 1, vcc
-; GISEL-NEXT: v_or_b32_e32 v18, v18, v0
-; GISEL-NEXT: v_or_b32_e32 v19, v3, v1
-; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[0:1]
+; GISEL-NEXT: v_or_b32_e32 v18, v18, v2
+; GISEL-NEXT: v_or_b32_e32 v19, v1, v3
+; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[2:3]
; GISEL-NEXT: v_cndmask_b32_e32 v21, v22, v21, vcc
; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[18:19]
; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc
@@ -2105,13 +2101,12 @@ define <2 x i128> @v_srem_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
; GISEL-NEXT: s_and_saveexec_b64 s[12:13], s[4:5]
; GISEL-NEXT: s_cbranch_execz .LBB2_6
; GISEL-NEXT: ; %bb.1: ; %udiv-bb15
-; GISEL-NEXT: v_add_i32_e32 v31, vcc, 1, v2
-; GISEL-NEXT: v_addc_u32_e64 v32, s[4:5], 0, v3, vcc
-; GISEL-NEXT: v_sub_i32_e32 v24, vcc, 0x7f, v2
-; GISEL-NEXT: v_not_b32_e32 v2, 63
-; GISEL-NEXT: v_addc_u32_e64 v33, vcc, 0, v0, s[4:5]
-; GISEL-NEXT: v_addc_u32_e32 v34, vcc, 0, v1, vcc
-; GISEL-NEXT: v_add_i32_e64 v20, s[4:5], v24, v2
+; GISEL-NEXT: v_add_i32_e32 v31, vcc, 1, v0
+; GISEL-NEXT: v_addc_u32_e64 v32, s[4:5], 0, v1, vcc
+; GISEL-NEXT: v_sub_i32_e32 v24, vcc, 0x7f, v0
+; GISEL-NEXT: v_addc_u32_e64 v33, vcc, 0, v2, s[4:5]
+; GISEL-NEXT: v_addc_u32_e32 v34, vcc, 0, v3, vcc
+; GISEL-NEXT: v_subrev_i32_e64 v20, s[4:5], 64, v24
; GISEL-NEXT: v_sub_i32_e64 v18, s[4:5], 64, v24
; GISEL-NEXT: v_lshl_b64 v[0:1], v[16:17], v24
; GISEL-NEXT: v_lshl_b64 v[2:3], v[8:9], v24
@@ -2137,7 +2132,7 @@ define <2 x i128> @v_srem_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
; GISEL-NEXT: s_xor_b64 s[8:9], exec, s[6:7]
; GISEL-NEXT: s_cbranch_execz .LBB2_5
; GISEL-NEXT: ; %bb.2: ; %udiv-preheader4
-; GISEL-NEXT: v_add_i32_e32 v24, vcc, 0xffffffc0, v31
+; GISEL-NEXT: v_subrev_i32_e32 v24, vcc, 64, v31
; GISEL-NEXT: v_sub_i32_e32 v22, vcc, 64, v31
; GISEL-NEXT: v_lshr_b64 v[0:1], v[8:9], v31
; GISEL-NEXT: v_lshr_b64 v[2:3], v[16:17], v31
@@ -2267,18 +2262,18 @@ define <2 x i128> @v_srem_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
; GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[6:7]
; GISEL-NEXT: v_cndmask_b32_e32 v1, v15, v14, vcc
-; GISEL-NEXT: v_sub_i32_e32 v14, vcc, v0, v1
-; GISEL-NEXT: v_subb_u32_e64 v15, s[4:5], 0, 0, vcc
-; GISEL-NEXT: v_subb_u32_e64 v0, s[4:5], 0, 0, s[4:5]
-; GISEL-NEXT: v_subb_u32_e64 v1, s[4:5], 0, 0, s[4:5]
-; GISEL-NEXT: v_cmp_gt_u64_e32 vcc, v[14:15], v[2:3]
+; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v1
+; GISEL-NEXT: v_subb_u32_e64 v1, s[4:5], 0, 0, vcc
+; GISEL-NEXT: v_subb_u32_e64 v14, s[4:5], 0, 0, s[4:5]
+; GISEL-NEXT: v_subb_u32_e64 v15, s[4:5], 0, 0, s[4:5]
+; GISEL-NEXT: v_cmp_gt_u64_e32 vcc, v[0:1], v[2:3]
; GISEL-NEXT: v_cndmask_b32_e64 v21, 0, 1, vcc
-; GISEL-NEXT: v_xor_b32_e32 v2, 0x7f, v14
-; GISEL-NEXT: v_cmp_lt_u64_e32 vcc, 0, v[0:1]
+; GISEL-NEXT: v_xor_b32_e32 v2, 0x7f, v0
+; GISEL-NEXT: v_cmp_lt_u64_e32 vcc, 0, v[14:15]
; GISEL-NEXT: v_cndmask_b32_e64 v22, 0, 1, vcc
-; GISEL-NEXT: v_or_b32_e32 v2, v2, v0
-; GISEL-NEXT: v_or_b32_e32 v3, v15, v1
-; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[0:1]
+; GISEL-NEXT: v_or_b32_e32 v2, v2, v14
+; GISEL-NEXT: v_or_b32_e32 v3, v1, v15
+; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[14:15]
; GISEL-NEXT: v_cndmask_b32_e32 v21, v22, v21, vcc
; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[2:3]
; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
@@ -2296,13 +2291,12 @@ define <2 x i128> @v_srem_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
; GISEL-NEXT: s_and_saveexec_b64 s[12:13], s[4:5]
; GISEL-NEXT: s_cbranch_execz .LBB2_12
; GISEL-NEXT: ; %bb.7: ; %udiv-bb1
-; GISEL-NEXT: v_add_i32_e32 v36, vcc, 1, v14
-; GISEL-NEXT: v_addc_u32_e64 v37, s[4:5], 0, v15, vcc
-; GISEL-NEXT: v_sub_i32_e32 v24, vcc, 0x7f, v14
-; GISEL-NEXT: v_not_b32_e32 v2, 63
-; GISEL-NEXT: v_addc_u32_e64 v38, vcc, 0, v0, s[4:5]
-; GISEL-NEXT: v_addc_u32_e32 v39, vcc, 0, v1, vcc
-; GISEL-NEXT: v_add_i32_e64 v20, s[4:5], v24, v2
+; GISEL-NEXT: v_add_i32_e32 v36, vcc, 1, v0
+; GISEL-NEXT: v_addc_u32_e64 v37, s[4:5], 0, v1, vcc
+; GISEL-NEXT: v_sub_i32_e32 v24, vcc, 0x7f, v0
+; GISEL-NEXT: v_addc_u32_e64 v38, vcc, 0, v14, s[4:5]
+; GISEL-NEXT: v_addc_u32_e32 v39, vcc, 0, v15, vcc
+; GISEL-NEXT: v_subrev_i32_e64 v20, s[4:5], 64, v24
; GISEL-NEXT: v_sub_i32_e64 v14, s[4:5], 64, v24
; GISEL-NEXT: v_lshl_b64 v[0:1], v[12:13], v24
; GISEL-NEXT: v_lshl_b64 v[2:3], v[6:7], v24
@@ -2328,7 +2322,7 @@ define <2 x i128> @v_srem_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
; GISEL-NEXT: s_xor_b64 s[8:9], exec, s[6:7]
; GISEL-NEXT: s_cbranch_execz .LBB2_11
; GISEL-NEXT: ; %bb.8: ; %udiv-preheader
-; GISEL-NEXT: v_add_i32_e32 v24, vcc, 0xffffffc0, v36
+; GISEL-NEXT: v_subrev_i32_e32 v24, vcc, 64, v36
; GISEL-NEXT: v_sub_i32_e32 v22, vcc, 64, v36
; GISEL-NEXT: v_lshr_b64 v[0:1], v[6:7], v36
; GISEL-NEXT: v_lshr_b64 v[2:3], v[12:13], v36
@@ -2909,18 +2903,18 @@ define <2 x i128> @v_urem_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
; GISEL-NEXT: v_cndmask_b32_e32 v16, v17, v16, vcc
; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[2:3]
; GISEL-NEXT: v_cndmask_b32_e32 v17, v19, v18, vcc
-; GISEL-NEXT: v_sub_i32_e32 v18, vcc, v16, v17
-; GISEL-NEXT: v_subb_u32_e64 v19, s[4:5], 0, 0, vcc
-; GISEL-NEXT: v_subb_u32_e64 v16, s[4:5], 0, 0, s[4:5]
-; GISEL-NEXT: v_subb_u32_e64 v17, s[4:5], 0, 0, s[4:5]
-; GISEL-NEXT: v_cmp_gt_u64_e32 vcc, v[18:19], v[20:21]
+; GISEL-NEXT: v_sub_i32_e32 v16, vcc, v16, v17
+; GISEL-NEXT: v_subb_u32_e64 v17, s[4:5], 0, 0, vcc
+; GISEL-NEXT: v_subb_u32_e64 v18, s[4:5], 0, 0, s[4:5]
+; GISEL-NEXT: v_subb_u32_e64 v19, s[4:5], 0, 0, s[4:5]
+; GISEL-NEXT: v_cmp_gt_u64_e32 vcc, v[16:17], v[20:21]
; GISEL-NEXT: v_cndmask_b32_e64 v23, 0, 1, vcc
-; GISEL-NEXT: v_xor_b32_e32 v20, 0x7f, v18
-; GISEL-NEXT: v_cmp_lt_u64_e32 vcc, 0, v[16:17]
+; GISEL-NEXT: v_xor_b32_e32 v20, 0x7f, v16
+; GISEL-NEXT: v_cmp_lt_u64_e32 vcc, 0, v[18:19]
; GISEL-NEXT: v_cndmask_b32_e64 v24, 0, 1, vcc
-; GISEL-NEXT: v_or_b32_e32 v20, v20, v16
-; GISEL-NEXT: v_or_b32_e32 v21, v19, v17
-; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[16:17]
+; GISEL-NEXT: v_or_b32_e32 v20, v20, v18
+; GISEL-NEXT: v_or_b32_e32 v21, v17, v19
+; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[18:19]
; GISEL-NEXT: v_cndmask_b32_e32 v23, v24, v23, vcc
; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[20:21]
; GISEL-NEXT: v_cndmask_b32_e64 v20, 0, 1, vcc
@@ -2938,13 +2932,12 @@ define <2 x i128> @v_urem_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
; GISEL-NEXT: s_and_saveexec_b64 s[12:13], s[4:5]
; GISEL-NEXT: s_cbranch_execz .LBB3_6
; GISEL-NEXT: ; %bb.1: ; %udiv-bb15
-; GISEL-NEXT: v_add_i32_e32 v30, vcc, 1, v18
-; GISEL-NEXT: v_addc_u32_e64 v31, s[4:5], 0, v19, vcc
-; GISEL-NEXT: v_sub_i32_e32 v26, vcc, 0x7f, v18
-; GISEL-NEXT: v_not_b32_e32 v18, 63
-; GISEL-NEXT: v_addc_u32_e64 v32, vcc, 0, v16, s[4:5]
-; GISEL-NEXT: v_addc_u32_e32 v33, vcc, 0, v17, vcc
-; GISEL-NEXT: v_add_i32_e64 v22, s[4:5], v26, v18
+; GISEL-NEXT: v_add_i32_e32 v30, vcc, 1, v16
+; GISEL-NEXT: v_addc_u32_e64 v31, s[4:5], 0, v17, vcc
+; GISEL-NEXT: v_sub_i32_e32 v26, vcc, 0x7f, v16
+; GISEL-NEXT: v_addc_u32_e64 v32, vcc, 0, v18, s[4:5]
+; GISEL-NEXT: v_addc_u32_e32 v33, vcc, 0, v19, vcc
+; GISEL-NEXT: v_subrev_i32_e64 v22, s[4:5], 64, v26
; GISEL-NEXT: v_sub_i32_e64 v20, s[4:5], 64, v26
; GISEL-NEXT: v_lshl_b64 v[16:17], v[0:1], v26
; GISEL-NEXT: v_lshl_b64 v[18:19], v[2:3], v26
@@ -2970,7 +2963,7 @@ define <2 x i128> @v_urem_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
; GISEL-NEXT: s_xor_b64 s[8:9], exec, s[6:7]
; GISEL-NEXT: s_cbranch_execz .LBB3_5
; GISEL-NEXT: ; %bb.2: ; %udiv-preheader4
-; GISEL-NEXT: v_add_i32_e32 v26, vcc, 0xffffffc0, v30
+; GISEL-NEXT: v_subrev_i32_e32 v26, vcc, 64, v30
; GISEL-NEXT: v_sub_i32_e32 v24, vcc, 64, v30
; GISEL-NEXT: v_lshr_b64 v[16:17], v[2:3], v30
; GISEL-NEXT: v_lshr_b64 v[18:19], v[0:1], v30
@@ -3082,18 +3075,18 @@ define <2 x i128> @v_urem_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
; GISEL-NEXT: v_cndmask_b32_e32 v16, v17, v16, vcc
; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[6:7]
; GISEL-NEXT: v_cndmask_b32_e32 v17, v19, v18, vcc
-; GISEL-NEXT: v_sub_i32_e32 v22, vcc, v16, v17
-; GISEL-NEXT: v_subb_u32_e64 v23, s[4:5], 0, 0, vcc
-; GISEL-NEXT: v_subb_u32_e64 v16, s[4:5], 0, 0, s[4:5]
-; GISEL-NEXT: v_subb_u32_e64 v17, s[4:5], 0, 0, s[4:5]
-; GISEL-NEXT: v_cmp_gt_u64_e32 vcc, v[22:23], v[24:25]
+; GISEL-NEXT: v_sub_i32_e32 v16, vcc, v16, v17
+; GISEL-NEXT: v_subb_u32_e64 v17, s[4:5], 0, 0, vcc
+; GISEL-NEXT: v_subb_u32_e64 v22, s[4:5], 0, 0, s[4:5]
+; GISEL-NEXT: v_subb_u32_e64 v23, s[4:5], 0, 0, s[4:5]
+; GISEL-NEXT: v_cmp_gt_u64_e32 vcc, v[16:17], v[24:25]
; GISEL-NEXT: v_cndmask_b32_e64 v24, 0, 1, vcc
-; GISEL-NEXT: v_xor_b32_e32 v18, 0x7f, v22
-; GISEL-NEXT: v_cmp_lt_u64_e32 vcc, 0, v[16:17]
+; GISEL-NEXT: v_xor_b32_e32 v18, 0x7f, v16
+; GISEL-NEXT: v_cmp_lt_u64_e32 vcc, 0, v[22:23]
; GISEL-NEXT: v_cndmask_b32_e64 v25, 0, 1, vcc
-; GISEL-NEXT: v_or_b32_e32 v18, v18, v16
-; GISEL-NEXT: v_or_b32_e32 v19, v23, v17
-; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[16:17]
+; GISEL-NEXT: v_or_b32_e32 v18, v18, v22
+; GISEL-NEXT: v_or_b32_e32 v19, v17, v23
+; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[22:23]
; GISEL-NEXT: v_cndmask_b32_e32 v24, v25, v24, vcc
; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[18:19]
; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc
@@ -3111,13 +3104,12 @@ define <2 x i128> @v_urem_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
; GISEL-NEXT: s_and_saveexec_b64 s[12:13], s[4:5]
; GISEL-NEXT: s_cbranch_execz .LBB3_12
; GISEL-NEXT: ; %bb.7: ; %udiv-bb1
-; GISEL-NEXT: v_add_i32_e32 v34, vcc, 1, v22
-; GISEL-NEXT: v_addc_u32_e64 v35, s[4:5], 0, v23, vcc
-; GISEL-NEXT: v_sub_i32_e32 v28, vcc, 0x7f, v22
-; GISEL-NEXT: v_not_b32_e32 v18, 63
-; GISEL-NEXT: v_addc_u32_e64 v36, vcc, 0, v16, s[4:5]
-; GISEL-NEXT: v_addc_u32_e32 v37, vcc, 0, v17, vcc
-; GISEL-NEXT: v_add_i32_e64 v24, s[4:5], v28, v18
+; GISEL-NEXT: v_add_i32_e32 v34, vcc, 1, v16
+; GISEL-NEXT: v_addc_u32_e64 v35, s[4:5], 0, v17, vcc
+; GISEL-NEXT: v_sub_i32_e32 v28, vcc, 0x7f, v16
+; GISEL-NEXT: v_addc_u32_e64 v36, vcc, 0, v22, s[4:5]
+; GISEL-NEXT: v_addc_u32_e32 v37, vcc, 0, v23, vcc
+; GISEL-NEXT: v_subrev_i32_e64 v24, s[4:5], 64, v28
; GISEL-NEXT: v_sub_i32_e64 v22, s[4:5], 64, v28
; GISEL-NEXT: v_lshl_b64 v[16:17], v[4:5], v28
; GISEL-NEXT: v_lshl_b64 v[18:19], v[6:7], v28
@@ -3143,7 +3135,7 @@ define <2 x i128> @v_urem_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
; GISEL-NEXT: s_xor_b64 s[8:9], exec, s[6:7]
; GISEL-NEXT: s_cbranch_execz .LBB3_11
; GISEL-NEXT: ; %bb.8: ; %udiv-preheader
-; GISEL-NEXT: v_add_i32_e32 v28, vcc, 0xffffffc0, v34
+; GISEL-NEXT: v_subrev_i32_e32 v28, vcc, 64, v34
; GISEL-NEXT: v_sub_i32_e32 v26, vcc, 64, v34
; GISEL-NEXT: v_lshr_b64 v[16:17], v[6:7], v34
; GISEL-NEXT: v_lshr_b64 v[18:19], v[4:5], v34
diff --git a/llvm/test/CodeGen/AMDGPU/fptoi.i128.ll b/llvm/test/CodeGen/AMDGPU/fptoi.i128.ll
index 6f4f7c27a514731..68ebc21e2ba4d66 100644
--- a/llvm/test/CodeGen/AMDGPU/fptoi.i128.ll
+++ b/llvm/test/CodeGen/AMDGPU/fptoi.i128.ll
@@ -234,17 +234,17 @@ define i128 @fptosi_f64_to_i128(double %x) {
; GISEL-NEXT: s_xor_b64 s[16:17], exec, s[6:7]
; GISEL-NEXT: s_cbranch_execz .LBB0_4
; GISEL-NEXT: ; %bb.3: ; %fp-to-i-if-else
-; GISEL-NEXT: v_add_u32_e32 v7, 0xfffffbcd, v6
-; GISEL-NEXT: v_lshlrev_b64 v[0:1], v7, v[4:5]
-; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v7
+; GISEL-NEXT: v_add_u32_e32 v6, 0xfffffbcd, v6
+; GISEL-NEXT: v_lshlrev_b64 v[0:1], v6, v[4:5]
+; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v6
; GISEL-NEXT: v_cndmask_b32_e32 v11, 0, v0, vcc
; GISEL-NEXT: v_cndmask_b32_e32 v12, 0, v1, vcc
; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[6:7], v11, v10, 0
-; GISEL-NEXT: v_add_u32_e32 v6, 0xfffffb8d, v6
-; GISEL-NEXT: v_sub_u32_e32 v2, 64, v7
+; GISEL-NEXT: v_subrev_u32_e32 v7, 64, v6
+; GISEL-NEXT: v_sub_u32_e32 v2, 64, v6
; GISEL-NEXT: v_lshrrev_b64 v[2:3], v2, v[4:5]
-; GISEL-NEXT: v_lshlrev_b64 v[4:5], v6, v[4:5]
-; GISEL-NEXT: v_cmp_eq_u32_e64 s[6:7], 0, v7
+; GISEL-NEXT: v_lshlrev_b64 v[4:5], v7, v[4:5]
+; GISEL-NEXT: v_cmp_eq_u32_e64 s[6:7], 0, v6
; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[8:9], v12, v9, v[0:1]
; GISEL-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
; GISEL-NEXT: v_cndmask_b32_e64 v13, v2, 0, s[6:7]
@@ -271,7 +271,7 @@ define i128 @fptosi_f64_to_i128(double %x) {
; GISEL-NEXT: s_cbranch_execz .LBB0_6
; GISEL-NEXT: ; %bb.5: ; %fp-to-i-if-then12
; GISEL-NEXT: v_sub_co_u32_e32 v6, vcc, 0x433, v6
-; GISEL-NEXT: v_add_u32_e32 v2, 0xffffffc0, v6
+; GISEL-NEXT: v_subrev_u32_e32 v2, 64, v6
; GISEL-NEXT: v_lshrrev_b64 v[0:1], v6, v[4:5]
; GISEL-NEXT: v_lshrrev_b64 v[2:3], v2, 0
; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v6
@@ -604,17 +604,17 @@ define i128 @fptoui_f64_to_i128(double %x) {
; GISEL-NEXT: s_xor_b64 s[16:17], exec, s[6:7]
; GISEL-NEXT: s_cbranch_execz .LBB1_4
; GISEL-NEXT: ; %bb.3: ; %fp-to-i-if-else
-; GISEL-NEXT: v_add_u32_e32 v7, 0xfffffbcd, v6
-; GISEL-NEXT: v_lshlrev_b64 v[0:1], v7, v[4:5]
-; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v7
+; GISEL-NEXT: v_add_u32_e32 v6, 0xfffffbcd, v6
+; GISEL-NEXT: v_lshlrev_b64 v[0:1], v6, v[4:5]
+; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v6
; GISEL-NEXT: v_cndmask_b32_e32 v11, 0, v0, vcc
; GISEL-NEXT: v_cndmask_b32_e32 v12, 0, v1, vcc
; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[6:7], v11, v10, 0
-; GISEL-NEXT: v_add_u32_e32 v6, 0xfffffb8d, v6
-; GISEL-NEXT: v_sub_u32_e32 v2, 64, v7
+; GISEL-NEXT: v_subrev_u32_e32 v7, 64, v6
+; GISEL-NEXT: v_sub_u32_e32 v2, 64, v6
; GISEL-NEXT: v_lshrrev_b64 v[2:3], v2, v[4:5]
-; GISEL-NEXT: v_lshlrev_b64 v[4:5], v6, v[4:5]
-; GISEL-NEXT: v_cmp_eq_u32_e64 s[6:7], 0, v7
+; GISEL-NEXT: v_lshlrev_b64 v[4:5], v7, v[4:5]
+; GISEL-NEXT: v_cmp_eq_u32_e64 s[6:7], 0, v6
; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[8:9], v12, v9, v[0:1]
; GISEL-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
; GISEL-NEXT: v_cndmask_b32_e64 v13, v2, 0, s[6:7]
@@ -641,7 +641,7 @@ define i128 @fptoui_f64_to_i128(double %x) {
; GISEL-NEXT: s_cbranch_execz .LBB1_6
; GISEL-NEXT: ; %bb.5: ; %fp-to-i-if-then12
; GISEL-NEXT: v_sub_co_u32_e32 v6, vcc, 0x433, v6
-; GISEL-NEXT: v_add_u32_e32 v2, 0xffffffc0, v6
+; GISEL-NEXT: v_subrev_u32_e32 v2, 64, v6
; GISEL-NEXT: v_lshrrev_b64 v[0:1], v6, v[4:5]
; GISEL-NEXT: v_lshrrev_b64 v[2:3], v2, 0
; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v6
@@ -967,17 +967,17 @@ define i128 @fptosi_f32_to_i128(float %x) {
; GISEL-NEXT: s_xor_b64 s[16:17], exec, s[6:7]
; GISEL-NEXT: s_cbranch_execz .LBB2_4
; GISEL-NEXT: ; %bb.3: ; %fp-to-i-if-else
-; GISEL-NEXT: v_add_u32_e32 v7, 0xffffff6a, v6
-; GISEL-NEXT: v_lshlrev_b64 v[0:1], v7, v[4:5]
-; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v7
+; GISEL-NEXT: v_add_u32_e32 v6, 0xffffff6a, v6
+; GISEL-NEXT: v_lshlrev_b64 v[0:1], v6, v[4:5]
+; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v6
; GISEL-NEXT: v_cndmask_b32_e32 v11, 0, v0, vcc
; GISEL-NEXT: v_cndmask_b32_e32 v12, 0, v1, vcc
; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[6:7], v11, v10, 0
-; GISEL-NEXT: v_add_u32_e32 v6, 0xffffff2a, v6
-; GISEL-NEXT: v_sub_u32_e32 v2, 64, v7
+; GISEL-NEXT: v_subrev_u32_e32 v7, 64, v6
+; GISEL-NEXT: v_sub_u32_e32 v2, 64, v6
; GISEL-NEXT: v_lshrrev_b64 v[2:3], v2, v[4:5]
-; GISEL-NEXT: v_lshlrev_b64 v[4:5], v6, v[4:5]
-; GISEL-NEXT: v_cmp_eq_u32_e64 s[6:7], 0, v7
+; GISEL-NEXT: v_lshlrev_b64 v[4:5], v7, v[4:5]
+; GISEL-NEXT: v_cmp_eq_u32_e64 s[6:7], 0, v6
; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[8:9], v12, v8, v[0:1]
; GISEL-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
; GISEL-NEXT: v_cndmask_b32_e64 v13, v2, 0, s[6:7]
@@ -1004,7 +1004,7 @@ define i128 @fptosi_f32_to_i128(float %x) {
; GISEL-NEXT: s_cbranch_execz .LBB2_6
; GISEL-NEXT: ; %bb.5: ; %fp-to-i-if-then12
; GISEL-NEXT: v_sub_co_u32_e32 v3, vcc, 0x96, v6
-; GISEL-NEXT: v_add_u32_e32 v2, 0xffffffc0, v3
+; GISEL-NEXT: v_subrev_u32_e32 v2, 64, v3
; GISEL-NEXT: v_lshrrev_b64 v[0:1], v3, v[4:5]
; GISEL-NEXT: v_lshrrev_b64 v[1:2], v2, 0
; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v3
@@ -1324,17 +1324,17 @@ define i128 @fptoui_f32_to_i128(float %x) {
; GISEL-NEXT: s_xor_b64 s[16:17], exec, s[6:7]
; GISEL-NEXT: s_cbranch_execz .LBB3_4
; GISEL-NEXT: ; %bb.3: ; %fp-to-i-if-else
-; GISEL-NEXT: v_add_u32_e32 v7, 0xffffff6a, v6
-; GISEL-NEXT: v_lshlrev_b64 v[0:1], v7, v[4:5]
-; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v7
+; GISEL-NEXT: v_add_u32_e32 v6, 0xffffff6a, v6
+; GISEL-NEXT: v_lshlrev_b64 v[0:1], v6, v[4:5]
+; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v6
; GISEL-NEXT: v_cndmask_b32_e32 v11, 0, v0, vcc
; GISEL-NEXT: v_cndmask_b32_e32 v12, 0, v1, vcc
; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[6:7], v11, v10, 0
-; GISEL-NEXT: v_add_u32_e32 v6, 0xffffff2a, v6
-; GISEL-NEXT: v_sub_u32_e32 v2, 64, v7
+; GISEL-NEXT: v_subrev_u32_e32 v7, 64, v6
+; GISEL-NEXT: v_sub_u32_e32 v2, 64, v6
; GISEL-NEXT: v_lshrrev_b64 v[2:3], v2, v[4:5]
-; GISEL-NEXT: v_lshlrev_b64 v[4:5], v6, v[4:5]
-; GISEL-NEXT: v_cmp_eq_u32_e64 s[6:7], 0, v7
+; GISEL-NEXT: v_lshlrev_b64 v[4:5], v7, v[4:5]
+; GISEL-NEXT: v_cmp_eq_u32_e64 s[6:7], 0, v6
; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[8:9], v12, v8, v[0:1]
; GISEL-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
; GISEL-NEXT: v_cndmask_b32_e64 v13, v2, 0, s[6:7]
@@ -1361,7 +1361,7 @@ define i128 @fptoui_f32_to_i128(float %x) {
; GISEL-NEXT: s_cbranch_execz .LBB3_6
; GISEL-NEXT: ; %bb.5: ; %fp-to-i-if-then12
; GISEL-NEXT: v_sub_co_u32_e32 v3, vcc, 0x96, v6
-; GISEL-NEXT: v_add_u32_e32 v2, 0xffffffc0, v3
+; GISEL-NEXT: v_subrev_u32_e32 v2, 64, v3
; GISEL-NEXT: v_lshrrev_b64 v[0:1], v3, v[4:5]
; GISEL-NEXT: v_lshrrev_b64 v[1:2], v2, 0
; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v3
@@ -1707,7 +1707,7 @@ define i128 @fptosi_bf16_to_i128(bfloat %x) {
; GISEL-NEXT: ; %bb.3: ; %fp-to-i-if-else
; GISEL-NEXT: v_add_u32_e32 v6, 0xffffff7a, v5
; GISEL-NEXT: v_lshlrev_b64 v[0:1], v6, v[7:8]
-; GISEL-NEXT: v_add_u32_e32 v4, 0xffffff3a, v5
+; GISEL-NEXT: v_subrev_u32_e32 v4, 64, v6
; GISEL-NEXT: v_sub_u32_e32 v2, 64, v6
; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v6
; GISEL-NEXT: v_lshl_or_b32 v11, v11, 16, v11
@@ -1741,7 +1741,7 @@ define i128 @fptosi_bf16_to_i128(bfloat %x) {
; GISEL-NEXT: s_cbranch_execz .LBB6_6
; GISEL-NEXT: ; %bb.5: ; %fp-to-i-if-then12
; GISEL-NEXT: v_sub_co_u32_e32 v3, vcc, 0x86, v5
-; GISEL-NEXT: v_add_u32_e32 v2, 0xffffffc0, v3
+; GISEL-NEXT: v_subrev_u32_e32 v2, 64, v3
; GISEL-NEXT: v_lshrrev_b64 v[0:1], v3, v[7:8]
; GISEL-NEXT: v_lshrrev_b64 v[1:2], v2, 0
; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v3
@@ -2058,7 +2058,7 @@ define i128 @fptoui_bf16_to_i128(bfloat %x) {
; GISEL-NEXT: ; %bb.3: ; %fp-to-i-if-else
; GISEL-NEXT: v_add_u32_e32 v6, 0xffffff7a, v5
; GISEL-NEXT: v_lshlrev_b64 v[0:1], v6, v[7:8]
-; GISEL-NEXT: v_add_u32_e32 v4, 0xffffff3a, v5
+; GISEL-NEXT: v_subrev_u32_e32 v4, 64, v6
; GISEL-NEXT: v_sub_u32_e32 v2, 64, v6
; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v6
; GISEL-NEXT: v_lshl_or_b32 v11, v11, 16, v11
@@ -2092,7 +2092,7 @@ define i128 @fptoui_bf16_to_i128(bfloat %x) {
; GISEL-NEXT: s_cbranch_execz .LBB7_6
; GISEL-NEXT: ; %bb.5: ; %fp-to-i-if-then12
; GISEL-NEXT: v_sub_co_u32_e32 v3, vcc, 0x86, v5
-; GISEL-NEXT: v_add_u32_e32 v2, 0xffffffc0, v3
+; GISEL-NEXT: v_subrev_u32_e32 v2, 64, v3
; GISEL-NEXT: v_lshrrev_b64 v[0:1], v3, v[7:8]
; GISEL-NEXT: v_lshrrev_b64 v[1:2], v2, 0
; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v3
diff --git a/llvm/test/CodeGen/AMDGPU/itofp.i128.ll b/llvm/test/CodeGen/AMDGPU/itofp.i128.ll
index f372a54894604c2..2999ddb8315883e 100644
--- a/llvm/test/CodeGen/AMDGPU/itofp.i128.ll
+++ b/llvm/test/CodeGen/AMDGPU/itofp.i128.ll
@@ -193,32 +193,32 @@ define float @sitofp_i128_to_f32(i128 %x) {
; GISEL-NEXT: v_sub_u32_e32 v11, 64, v4
; GISEL-NEXT: v_lshrrev_b64 v[9:10], v4, v[0:1]
; GISEL-NEXT: v_lshlrev_b64 v[11:12], v11, v[2:3]
-; GISEL-NEXT: v_add_u32_e32 v13, 0xffffffc0, v4
+; GISEL-NEXT: v_subrev_u32_e32 v13, 64, v4
; GISEL-NEXT: v_or_b32_e32 v11, v9, v11
; GISEL-NEXT: v_or_b32_e32 v12, v10, v12
; GISEL-NEXT: v_lshrrev_b64 v[9:10], v13, v[2:3]
; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v4
-; GISEL-NEXT: v_add_u32_e32 v14, 26, v5
+; GISEL-NEXT: v_add_u32_e32 v5, 26, v5
; GISEL-NEXT: v_cndmask_b32_e32 v9, v9, v11, vcc
; GISEL-NEXT: v_cndmask_b32_e32 v10, v10, v12, vcc
; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4
-; GISEL-NEXT: v_sub_u32_e32 v11, 64, v14
+; GISEL-NEXT: v_sub_u32_e32 v11, 64, v5
; GISEL-NEXT: v_cndmask_b32_e32 v13, v9, v0, vcc
; GISEL-NEXT: v_cndmask_b32_e32 v4, v10, v1, vcc
-; GISEL-NEXT: v_lshrrev_b64 v[9:10], v14, -1
+; GISEL-NEXT: v_lshrrev_b64 v[9:10], v5, -1
; GISEL-NEXT: v_lshlrev_b64 v[11:12], v11, -1
-; GISEL-NEXT: v_add_u32_e32 v5, 0xffffffda, v5
+; GISEL-NEXT: v_subrev_u32_e32 v14, 64, v5
; GISEL-NEXT: v_or_b32_e32 v15, v9, v11
; GISEL-NEXT: v_or_b32_e32 v16, v10, v12
-; GISEL-NEXT: v_lshrrev_b64 v[11:12], v5, -1
-; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v14
-; GISEL-NEXT: v_cndmask_b32_e32 v5, v11, v15, vcc
-; GISEL-NEXT: v_cndmask_b32_e32 v11, v12, v16, vcc
-; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v14
+; GISEL-NEXT: v_lshrrev_b64 v[11:12], v14, -1
+; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v5
+; GISEL-NEXT: v_cndmask_b32_e32 v11, v11, v15, vcc
+; GISEL-NEXT: v_cndmask_b32_e32 v12, v12, v16, vcc
+; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v5
; GISEL-NEXT: v_cndmask_b32_e32 v9, 0, v9, vcc
; GISEL-NEXT: v_cndmask_b32_e32 v10, 0, v10, vcc
-; GISEL-NEXT: v_cndmask_b32_e64 v5, v5, -1, s[4:5]
-; GISEL-NEXT: v_cndmask_b32_e64 v11, v11, -1, s[4:5]
+; GISEL-NEXT: v_cndmask_b32_e64 v5, v11, -1, s[4:5]
+; GISEL-NEXT: v_cndmask_b32_e64 v11, v12, -1, s[4:5]
; GISEL-NEXT: v_and_b32_e32 v2, v9, v2
; GISEL-NEXT: v_and_b32_e32 v3, v10, v3
; GISEL-NEXT: v_and_or_b32 v0, v5, v0, v2
@@ -438,32 +438,32 @@ define float @uitofp_i128_to_f32(i128 %x) {
; GISEL-NEXT: v_sub_u32_e32 v10, 64, v4
; GISEL-NEXT: v_lshrrev_b64 v[8:9], v4, v[0:1]
; GISEL-NEXT: v_lshlrev_b64 v[10:11], v10, v[2:3]
-; GISEL-NEXT: v_add_u32_e32 v12, 0xffffffc0, v4
+; GISEL-NEXT: v_subrev_u32_e32 v12, 64, v4
; GISEL-NEXT: v_or_b32_e32 v10, v8, v10
; GISEL-NEXT: v_or_b32_e32 v11, v9, v11
; GISEL-NEXT: v_lshrrev_b64 v[8:9], v12, v[2:3]
; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v4
-; GISEL-NEXT: v_add_u32_e32 v13, 26, v5
+; GISEL-NEXT: v_add_u32_e32 v5, 26, v5
; GISEL-NEXT: v_cndmask_b32_e32 v8, v8, v10, vcc
; GISEL-NEXT: v_cndmask_b32_e32 v9, v9, v11, vcc
; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4
-; GISEL-NEXT: v_sub_u32_e32 v10, 64, v13
+; GISEL-NEXT: v_sub_u32_e32 v10, 64, v5
; GISEL-NEXT: v_cndmask_b32_e32 v12, v8, v0, vcc
; GISEL-NEXT: v_cndmask_b32_e32 v4, v9, v1, vcc
-; GISEL-NEXT: v_lshrrev_b64 v[8:9], v13, -1
+; GISEL-NEXT: v_lshrrev_b64 v[8:9], v5, -1
; GISEL-NEXT: v_lshlrev_b64 v[10:11], v10, -1
-; GISEL-NEXT: v_add_u32_e32 v5, 0xffffffda, v5
+; GISEL-NEXT: v_subrev_u32_e32 v13, 64, v5
; GISEL-NEXT: v_or_b32_e32 v14, v8, v10
; GISEL-NEXT: v_or_b32_e32 v15, v9, v11
-; GISEL-NEXT: v_lshrrev_b64 v[10:11], v5, -1
-; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v13
-; GISEL-NEXT: v_cndmask_b32_e32 v5, v10, v14, vcc
-; GISEL-NEXT: v_cndmask_b32_e32 v10, v11, v15, vcc
-; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v13
+; GISEL-NEXT: v_lshrrev_b64 v[10:11], v13, -1
+; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v5
+; GISEL-NEXT: v_cndmask_b32_e32 v10, v10, v14, vcc
+; GISEL-NEXT: v_cndmask_b32_e32 v11, v11, v15, vcc
+; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v5
; GISEL-NEXT: v_cndmask_b32_e32 v8, 0, v8, vcc
; GISEL-NEXT: v_cndmask_b32_e32 v9, 0, v9, vcc
-; GISEL-NEXT: v_cndmask_b32_e64 v5, v5, -1, s[4:5]
-; GISEL-NEXT: v_cndmask_b32_e64 v10, v10, -1, s[4:5]
+; GISEL-NEXT: v_cndmask_b32_e64 v5, v10, -1, s[4:5]
+; GISEL-NEXT: v_cndmask_b32_e64 v10, v11, -1, s[4:5]
; GISEL-NEXT: v_and_b32_e32 v2, v8, v2
; GISEL-NEXT: v_and_b32_e32 v3, v9, v3
; GISEL-NEXT: v_and_or_b32 v0, v5, v0, v2
@@ -723,34 +723,34 @@ define double @sitofp_i128_to_f64(i128 %x) {
; GISEL-NEXT: v_sub_u32_e32 v10, 64, v14
; GISEL-NEXT: v_lshrrev_b64 v[0:1], v14, v[2:3]
; GISEL-NEXT: v_lshlrev_b64 v[10:11], v10, v[4:5]
-; GISEL-NEXT: v_add_u32_e32 v15, 0xffffffc0, v14
+; GISEL-NEXT: v_subrev_u32_e32 v15, 64, v14
; GISEL-NEXT: v_lshrrev_b64 v[12:13], v14, v[4:5]
; GISEL-NEXT: v_or_b32_e32 v10, v0, v10
; GISEL-NEXT: v_or_b32_e32 v11, v1, v11
; GISEL-NEXT: v_lshrrev_b64 v[0:1], v15, v[4:5]
; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v14
-; GISEL-NEXT: v_add_u32_e32 v15, 55, v9
+; GISEL-NEXT: v_add_u32_e32 v9, 55, v9
; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc
; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc
; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v14
; GISEL-NEXT: v_cndmask_b32_e32 v11, 0, v12, vcc
-; GISEL-NEXT: v_sub_u32_e32 v12, 64, v15
+; GISEL-NEXT: v_sub_u32_e32 v12, 64, v9
; GISEL-NEXT: v_cndmask_b32_e64 v14, v0, v2, s[4:5]
; GISEL-NEXT: v_cndmask_b32_e64 v10, v1, v3, s[4:5]
-; GISEL-NEXT: v_lshrrev_b64 v[0:1], v15, -1
+; GISEL-NEXT: v_lshrrev_b64 v[0:1], v9, -1
; GISEL-NEXT: v_lshlrev_b64 v[12:13], v12, -1
-; GISEL-NEXT: v_add_u32_e32 v9, -9, v9
+; GISEL-NEXT: v_subrev_u32_e32 v15, 64, v9
; GISEL-NEXT: v_or_b32_e32 v16, v0, v12
; GISEL-NEXT: v_or_b32_e32 v17, v1, v13
-; GISEL-NEXT: v_lshrrev_b64 v[12:13], v9, -1
-; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v15
-; GISEL-NEXT: v_cndmask_b32_e32 v9, v12, v16, vcc
-; GISEL-NEXT: v_cndmask_b32_e32 v12, v13, v17, vcc
-; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v15
+; GISEL-NEXT: v_lshrrev_b64 v[12:13], v15, -1
+; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v9
+; GISEL-NEXT: v_cndmask_b32_e32 v12, v12, v16, vcc
+; GISEL-NEXT: v_cndmask_b32_e32 v13, v13, v17, vcc
+; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v9
; GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc
; GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
-; GISEL-NEXT: v_cndmask_b32_e64 v9, v9, -1, s[4:5]
-; GISEL-NEXT: v_cndmask_b32_e64 v12, v12, -1, s[4:5]
+; GISEL-NEXT: v_cndmask_b32_e64 v9, v12, -1, s[4:5]
+; GISEL-NEXT: v_cndmask_b32_e64 v12, v13, -1, s[4:5]
; GISEL-NEXT: v_and_b32_e32 v0, v0, v4
; GISEL-NEXT: v_and_b32_e32 v1, v1, v5
; GISEL-NEXT: v_and_or_b32 v0, v9, v2, v0
@@ -999,35 +999,35 @@ define double @uitofp_i128_to_f64(i128 %x) {
; GISEL-NEXT: v_sub_u32_e32 v9, 64, v13
; GISEL-NEXT: v_lshrrev_b64 v[4:5], v13, v[0:1]
; GISEL-NEXT: v_lshlrev_b64 v[9:10], v9, v[2:3]
-; GISEL-NEXT: v_add_u32_e32 v14, 0xffffffc0, v13
+; GISEL-NEXT: v_subrev_u32_e32 v14, 64, v13
; GISEL-NEXT: v_lshrrev_b64 v[11:12], v13, v[2:3]
; GISEL-NEXT: v_or_b32_e32 v9, v4, v9
; GISEL-NEXT: v_or_b32_e32 v10, v5, v10
; GISEL-NEXT: v_lshrrev_b64 v[4:5], v14, v[2:3]
; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v13
-; GISEL-NEXT: v_add_u32_e32 v15, 55, v8
+; GISEL-NEXT: v_add_u32_e32 v8, 55, v8
; GISEL-NEXT: v_cndmask_b32_e32 v4, v4, v9, vcc
; GISEL-NEXT: v_cndmask_b32_e32 v5, v5, v10, vcc
; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v13
; GISEL-NEXT: v_cndmask_b32_e32 v10, 0, v11, vcc
; GISEL-NEXT: v_cndmask_b32_e32 v11, 0, v12, vcc
-; GISEL-NEXT: v_sub_u32_e32 v12, 64, v15
+; GISEL-NEXT: v_sub_u32_e32 v12, 64, v8
; GISEL-NEXT: v_cndmask_b32_e64 v14, v4, v0, s[4:5]
; GISEL-NEXT: v_cndmask_b32_e64 v9, v5, v1, s[4:5]
-; GISEL-NEXT: v_lshrrev_b64 v[4:5], v15, -1
+; GISEL-NEXT: v_lshrrev_b64 v[4:5], v8, -1
; GISEL-NEXT: v_lshlrev_b64 v[12:13], v12, -1
-; GISEL-NEXT: v_add_u32_e32 v8, -9, v8
+; GISEL-NEXT: v_subrev_u32_e32 v15, 64, v8
; GISEL-NEXT: v_or_b32_e32 v16, v4, v12
; GISEL-NEXT: v_or_b32_e32 v17, v5, v13
-; GISEL-NEXT: v_lshrrev_b64 v[12:13], v8, -1
-; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v15
-; GISEL-NEXT: v_cndmask_b32_e32 v8, v12, v16, vcc
-; GISEL-NEXT: v_cndmask_b32_e32 v12, v13, v17, vcc
-; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v15
+; GISEL-NEXT: v_lshrrev_b64 v[12:13], v15, -1
+; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v8
+; GISEL-NEXT: v_cndmask_b32_e32 v12, v12, v16, vcc
+; GISEL-NEXT: v_cndmask_b32_e32 v13, v13, v17, vcc
+; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v8
; GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
; GISEL-NEXT: v_cndmask_b32_e32 v5, 0, v5, vcc
-; GISEL-NEXT: v_cndmask_b32_e64 v8, v8, -1, s[4:5]
-; GISEL-NEXT: v_cndmask_b32_e64 v12, v12, -1, s[4:5]
+; GISEL-NEXT: v_cndmask_b32_e64 v8, v12, -1, s[4:5]
+; GISEL-NEXT: v_cndmask_b32_e64 v12, v13, -1, s[4:5]
; GISEL-NEXT: v_and_b32_e32 v2, v4, v2
; GISEL-NEXT: v_and_b32_e32 v3, v5, v3
; GISEL-NEXT: v_and_or_b32 v0, v8, v0, v2
@@ -1284,32 +1284,32 @@ define half @sitofp_i128_to_f16(i128 %x) {
; GISEL-NEXT: v_sub_u32_e32 v11, 64, v4
; GISEL-NEXT: v_lshrrev_b64 v[9:10], v4, v[0:1]
; GISEL-NEXT: v_lshlrev_b64 v[11:12], v11, v[2:3]
-; GISEL-NEXT: v_add_u32_e32 v13, 0xffffffc0, v4
+; GISEL-NEXT: v_subrev_u32_e32 v13, 64, v4
; GISEL-NEXT: v_or_b32_e32 v11, v9, v11
; GISEL-NEXT: v_or_b32_e32 v12, v10, v12
; GISEL-NEXT: v_lshrrev_b64 v[9:10], v13, v[2:3]
; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v4
-; GISEL-NEXT: v_add_u32_e32 v14, 26, v5
+; GISEL-NEXT: v_add_u32_e32 v5, 26, v5
; GISEL-NEXT: v_cndmask_b32_e32 v9, v9, v11, vcc
; GISEL-NEXT: v_cndmask_b32_e32 v10, v10, v12, vcc
; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4
-; GISEL-NEXT: v_sub_u32_e32 v11, 64, v14
+; GISEL-NEXT: v_sub_u32_e32 v11, 64, v5
; GISEL-NEXT: v_cndmask_b32_e32 v13, v9, v0, vcc
; GISEL-NEXT: v_cndmask_b32_e32 v4, v10, v1, vcc
-; GISEL-NEXT: v_lshrrev_b64 v[9:10], v14, -1
+; GISEL-NEXT: v_lshrrev_b64 v[9:10], v5, -1
; GISEL-NEXT: v_lshlrev_b64 v[11:12], v11, -1
-; GISEL-NEXT: v_add_u32_e32 v5, 0xffffffda, v5
+; GISEL-NEXT: v_subrev_u32_e32 v14, 64, v5
; GISEL-NEXT: v_or_b32_e32 v15, v9, v11
; GISEL-NEXT: v_or_b32_e32 v16, v10, v12
-; GISEL-NEXT: v_lshrrev_b64 v[11:12], v5, -1
-; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v14
-; GISEL-NEXT: v_cndmask_b32_e32 v5, v11, v15, vcc
-; GISEL-NEXT: v_cndmask_b32_e32 v11, v12, v16, vcc
-; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v14
+; GISEL-NEXT: v_lshrrev_b64 v[11:12], v14, -1
+; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v5
+; GISEL-NEXT: v_cndmask_b32_e32 v11, v11, v15, vcc
+; GISEL-NEXT: v_cndmask_b32_e32 v12, v12, v16, vcc
+; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v5
; GISEL-NEXT: v_cndmask_b32_e32 v9, 0, v9, vcc
; GISEL-NEXT: v_cndmask_b32_e32 v10, 0, v10, vcc
-; GISEL-NEXT: v_cndmask_b32_e64 v5, v5, -1, s[4:5]
-; GISEL-NEXT: v_cndmask_b32_e64 v11, v11, -1, s[4:5]
+; GISEL-NEXT: v_cndmask_b32_e64 v5, v11, -1, s[4:5]
+; GISEL-NEXT: v_cndmask_b32_e64 v11, v12, -1, s[4:5]
; GISEL-NEXT: v_and_b32_e32 v2, v9, v2
; GISEL-NEXT: v_and_b32_e32 v3, v10, v3
; GISEL-NEXT: v_and_or_b32 v0, v5, v0, v2
@@ -1531,32 +1531,32 @@ define half @uitofp_i128_to_f16(i128 %x) {
; GISEL-NEXT: v_sub_u32_e32 v10, 64, v4
; GISEL-NEXT: v_lshrrev_b64 v[8:9], v4, v[0:1]
; GISEL-NEXT: v_lshlrev_b64 v[10:11], v10, v[2:3]
-; GISEL-NEXT: v_add_u32_e32 v12, 0xffffffc0, v4
+; GISEL-NEXT: v_subrev_u32_e32 v12, 64, v4
; GISEL-NEXT: v_or_b32_e32 v10, v8, v10
; GISEL-NEXT: v_or_b32_e32 v11, v9, v11
; GISEL-NEXT: v_lshrrev_b64 v[8:9], v12, v[2:3]
; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v4
-; GISEL-NEXT: v_add_u32_e32 v13, 26, v5
+; GISEL-NEXT: v_add_u32_e32 v5, 26, v5
; GISEL-NEXT: v_cndmask_b32_e32 v8, v8, v10, vcc
; GISEL-NEXT: v_cndmask_b32_e32 v9, v9, v11, vcc
; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4
-; GISEL-NEXT: v_sub_u32_e32 v10, 64, v13
+; GISEL-NEXT: v_sub_u32_e32 v10, 64, v5
; GISEL-NEXT: v_cndmask_b32_e32 v12, v8, v0, vcc
; GISEL-NEXT: v_cndmask_b32_e32 v4, v9, v1, vcc
-; GISEL-NEXT: v_lshrrev_b64 v[8:9], v13, -1
+; GISEL-NEXT: v_lshrrev_b64 v[8:9], v5, -1
; GISEL-NEXT: v_lshlrev_b64 v[10:11], v10, -1
-; GISEL-NEXT: v_add_u32_e32 v5, 0xffffffda, v5
+; GISEL-NEXT: v_subrev_u32_e32 v13, 64, v5
; GISEL-NEXT: v_or_b32_e32 v14, v8, v10
; GISEL-NEXT: v_or_b32_e32 v15, v9, v11
-; GISEL-NEXT: v_lshrrev_b64 v[10:11], v5, -1
-; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v13
-; GISEL-NEXT: v_cndmask_b32_e32 v5, v10, v14, vcc
-; GISEL-NEXT: v_cndmask_b32_e32 v10, v11, v15, vcc
-; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v13
+; GISEL-NEXT: v_lshrrev_b64 v[10:11], v13, -1
+; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v5
+; GISEL-NEXT: v_cndmask_b32_e32 v10, v10, v14, vcc
+; GISEL-NEXT: v_cndmask_b32_e32 v11, v11, v15, vcc
+; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v5
; GISEL-NEXT: v_cndmask_b32_e32 v8, 0, v8, vcc
; GISEL-NEXT: v_cndmask_b32_e32 v9, 0, v9, vcc
-; GISEL-NEXT: v_cndmask_b32_e64 v5, v5, -1, s[4:5]
-; GISEL-NEXT: v_cndmask_b32_e64 v10, v10, -1, s[4:5]
+; GISEL-NEXT: v_cndmask_b32_e64 v5, v10, -1, s[4:5]
+; GISEL-NEXT: v_cndmask_b32_e64 v10, v11, -1, s[4:5]
; GISEL-NEXT: v_and_b32_e32 v2, v8, v2
; GISEL-NEXT: v_and_b32_e32 v3, v9, v3
; GISEL-NEXT: v_and_or_b32 v0, v5, v0, v2
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.ll
index b2bfc2ea4e0b28c..8d99ec2e1b709f1 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.ll
@@ -139,7 +139,7 @@ define amdgpu_kernel void @test_barrier(ptr addrspace(1) %out, i32 %size) #0 {
; VARIANT6-NEXT: s_load_b96 s[0:2], s[2:3], 0x24
; VARIANT6-NEXT: s_wait_kmcnt 0x0
; VARIANT6-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_and_b32 v4, 0x3ff, v0
-; VARIANT6-NEXT: s_add_co_i32 s2, s2, -1
+; VARIANT6-NEXT: s_sub_co_i32 s2, s2, 1
; VARIANT6-NEXT: s_delay_alu instid0(VALU_DEP_1)
; VARIANT6-NEXT: v_dual_mov_b32 v2, s0 :: v_dual_lshlrev_b32 v5, 2, v4
; VARIANT6-NEXT: v_sub_nc_u32_e32 v0, s2, v4
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll
index d874418b99dd383..a577fb3d190ab9f 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll
@@ -395,7 +395,7 @@ define i1 @posnormal_f16(half %x) nounwind {
; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX7GLISEL-NEXT: v_and_b32_e32 v2, 0xffff, v1
; GFX7GLISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v0, v2
-; GFX7GLISEL-NEXT: v_add_i32_e32 v0, vcc, 0xfffffc00, v1
+; GFX7GLISEL-NEXT: v_subrev_i32_e32 v0, vcc, 0x400, v1
; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX7GLISEL-NEXT: v_mov_b32_e32 v1, 0x7800
; GFX7GLISEL-NEXT: v_cmp_lt_u32_e32 vcc, v0, v1
@@ -459,7 +459,7 @@ define i1 @negnormal_f16(half %x) nounwind {
; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX7GLISEL-NEXT: v_and_b32_e32 v2, 0xffff, v1
; GFX7GLISEL-NEXT: v_cmp_ne_u32_e64 s[4:5], v0, v2
-; GFX7GLISEL-NEXT: v_add_i32_e32 v0, vcc, 0xfffffc00, v1
+; GFX7GLISEL-NEXT: v_subrev_i32_e32 v0, vcc, 0x400, v1
; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX7GLISEL-NEXT: v_mov_b32_e32 v1, 0x7800
; GFX7GLISEL-NEXT: v_cmp_lt_u32_e32 vcc, v0, v1
@@ -513,7 +513,7 @@ define i1 @possubnormal_f16(half %x) nounwind {
; GFX7GLISEL-LABEL: possubnormal_f16:
; GFX7GLISEL: ; %bb.0:
; GFX7GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7GLISEL-NEXT: v_add_i32_e32 v0, vcc, -1, v0
+; GFX7GLISEL-NEXT: v_subrev_i32_e32 v0, vcc, 1, v0
; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX7GLISEL-NEXT: v_mov_b32_e32 v1, 0x3ff
; GFX7GLISEL-NEXT: v_cmp_lt_u32_e32 vcc, v0, v1
@@ -575,7 +575,7 @@ define i1 @negsubnormal_f16(half %x) nounwind {
; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX7GLISEL-NEXT: v_and_b32_e32 v2, 0xffff, v1
; GFX7GLISEL-NEXT: v_cmp_ne_u32_e32 vcc, v0, v2
-; GFX7GLISEL-NEXT: v_add_i32_e64 v0, s[4:5], -1, v1
+; GFX7GLISEL-NEXT: v_subrev_i32_e64 v0, s[4:5], 1, v1
; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX7GLISEL-NEXT: v_mov_b32_e32 v1, 0x3ff
; GFX7GLISEL-NEXT: v_cmp_lt_u32_e64 s[4:5], v0, v1
@@ -1587,7 +1587,7 @@ define i1 @not_issubnormal_or_zero_f16(half %x) {
; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0x7fff, v0
; GFX7GLISEL-NEXT: v_and_b32_e32 v1, 0xffff, v0
; GFX7GLISEL-NEXT: v_mov_b32_e32 v2, 0x7c00
-; GFX7GLISEL-NEXT: v_add_i32_e32 v0, vcc, 0xfffffc00, v0
+; GFX7GLISEL-NEXT: v_subrev_i32_e32 v0, vcc, 0x400, v0
; GFX7GLISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v1, v2
; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX7GLISEL-NEXT: v_mov_b32_e32 v1, 0x7800
@@ -1647,7 +1647,7 @@ define i1 @isnormal_f16(half %x) {
; GFX7GLISEL: ; %bb.0:
; GFX7GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0x7fff, v0
-; GFX7GLISEL-NEXT: v_add_i32_e32 v0, vcc, 0xfffffc00, v0
+; GFX7GLISEL-NEXT: v_subrev_i32_e32 v0, vcc, 0x400, v0
; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX7GLISEL-NEXT: v_mov_b32_e32 v1, 0x7800
; GFX7GLISEL-NEXT: v_cmp_lt_u32_e32 vcc, v0, v1
@@ -1780,7 +1780,7 @@ define i1 @not_is_plus_normal_f16(half %x) {
; GFX7GLISEL-NEXT: s_or_b64 s[6:7], vcc, s[6:7]
; GFX7GLISEL-NEXT: v_cmp_gt_u32_e32 vcc, v3, v2
; GFX7GLISEL-NEXT: s_or_b64 s[6:7], s[6:7], vcc
-; GFX7GLISEL-NEXT: v_add_i32_e32 v0, vcc, 0xfffffc00, v1
+; GFX7GLISEL-NEXT: v_subrev_i32_e32 v0, vcc, 0x400, v1
; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX7GLISEL-NEXT: v_mov_b32_e32 v1, 0x7800
; GFX7GLISEL-NEXT: v_cmp_lt_u32_e32 vcc, v0, v1
@@ -1853,7 +1853,7 @@ define i1 @not_is_neg_normal_f16(half %x) {
; GFX7GLISEL-NEXT: s_or_b64 s[6:7], vcc, s[6:7]
; GFX7GLISEL-NEXT: v_cmp_gt_u32_e32 vcc, v3, v2
; GFX7GLISEL-NEXT: s_or_b64 s[6:7], s[6:7], vcc
-; GFX7GLISEL-NEXT: v_add_i32_e32 v0, vcc, 0xfffffc00, v1
+; GFX7GLISEL-NEXT: v_subrev_i32_e32 v0, vcc, 0x400, v1
; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX7GLISEL-NEXT: v_mov_b32_e32 v1, 0x7800
; GFX7GLISEL-NEXT: v_cmp_lt_u32_e32 vcc, v0, v1
@@ -1911,7 +1911,7 @@ define i1 @issubnormal_f16(half %x) {
; GFX7GLISEL: ; %bb.0:
; GFX7GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0x7fff, v0
-; GFX7GLISEL-NEXT: v_add_i32_e32 v0, vcc, -1, v0
+; GFX7GLISEL-NEXT: v_subrev_i32_e32 v0, vcc, 1, v0
; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX7GLISEL-NEXT: v_mov_b32_e32 v1, 0x3ff
; GFX7GLISEL-NEXT: v_cmp_lt_u32_e32 vcc, v0, v1
@@ -1974,7 +1974,7 @@ define i1 @not_issubnormal_f16(half %x) {
; GFX7GLISEL-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
; GFX7GLISEL-NEXT: v_cmp_gt_u32_e32 vcc, v1, v2
; GFX7GLISEL-NEXT: s_or_b64 s[4:5], s[4:5], vcc
-; GFX7GLISEL-NEXT: v_add_i32_e32 v0, vcc, 0xfffffc00, v0
+; GFX7GLISEL-NEXT: v_subrev_i32_e32 v0, vcc, 0x400, v0
; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX7GLISEL-NEXT: v_mov_b32_e32 v1, 0x7800
; GFX7GLISEL-NEXT: v_cmp_lt_u32_e32 vcc, v0, v1
@@ -2081,7 +2081,7 @@ define i1 @not_iszero_f16(half %x) {
; GFX7GLISEL: ; %bb.0:
; GFX7GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0x7fff, v0
-; GFX7GLISEL-NEXT: v_add_i32_e32 v1, vcc, -1, v0
+; GFX7GLISEL-NEXT: v_subrev_i32_e32 v1, vcc, 1, v0
; GFX7GLISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
; GFX7GLISEL-NEXT: v_mov_b32_e32 v2, 0x3ff
; GFX7GLISEL-NEXT: v_cmp_lt_u32_e32 vcc, v1, v2
@@ -2091,7 +2091,7 @@ define i1 @not_iszero_f16(half %x) {
; GFX7GLISEL-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
; GFX7GLISEL-NEXT: v_cmp_gt_u32_e32 vcc, v1, v2
; GFX7GLISEL-NEXT: s_or_b64 s[4:5], s[4:5], vcc
-; GFX7GLISEL-NEXT: v_add_i32_e32 v0, vcc, 0xfffffc00, v0
+; GFX7GLISEL-NEXT: v_subrev_i32_e32 v0, vcc, 0x400, v0
; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX7GLISEL-NEXT: v_mov_b32_e32 v1, 0x7800
; GFX7GLISEL-NEXT: v_cmp_lt_u32_e32 vcc, v0, v1
@@ -2568,7 +2568,7 @@ define i1 @not_iszero_or_nan_f16(half %x) {
; GFX7GLISEL: ; %bb.0: ; %entry
; GFX7GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0x7fff, v0
-; GFX7GLISEL-NEXT: v_add_i32_e32 v1, vcc, -1, v0
+; GFX7GLISEL-NEXT: v_subrev_i32_e32 v1, vcc, 1, v0
; GFX7GLISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
; GFX7GLISEL-NEXT: v_mov_b32_e32 v2, 0x3ff
; GFX7GLISEL-NEXT: v_cmp_lt_u32_e32 vcc, v1, v2
@@ -2576,7 +2576,7 @@ define i1 @not_iszero_or_nan_f16(half %x) {
; GFX7GLISEL-NEXT: v_mov_b32_e32 v2, 0x7c00
; GFX7GLISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v1, v2
; GFX7GLISEL-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
-; GFX7GLISEL-NEXT: v_add_i32_e32 v0, vcc, 0xfffffc00, v0
+; GFX7GLISEL-NEXT: v_subrev_i32_e32 v0, vcc, 0x400, v0
; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX7GLISEL-NEXT: v_mov_b32_e32 v1, 0x7800
; GFX7GLISEL-NEXT: v_cmp_lt_u32_e32 vcc, v0, v1
@@ -2635,7 +2635,7 @@ define i1 @not_iszero_or_nan_f_daz(half %x) #0 {
; GFX7GLISEL: ; %bb.0: ; %entry
; GFX7GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0x7fff, v0
-; GFX7GLISEL-NEXT: v_add_i32_e32 v1, vcc, -1, v0
+; GFX7GLISEL-NEXT: v_subrev_i32_e32 v1, vcc, 1, v0
; GFX7GLISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
; GFX7GLISEL-NEXT: v_mov_b32_e32 v2, 0x3ff
; GFX7GLISEL-NEXT: v_cmp_lt_u32_e32 vcc, v1, v2
@@ -2643,7 +2643,7 @@ define i1 @not_iszero_or_nan_f_daz(half %x) #0 {
; GFX7GLISEL-NEXT: v_mov_b32_e32 v2, 0x7c00
; GFX7GLISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v1, v2
; GFX7GLISEL-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
-; GFX7GLISEL-NEXT: v_add_i32_e32 v0, vcc, 0xfffffc00, v0
+; GFX7GLISEL-NEXT: v_subrev_i32_e32 v0, vcc, 0x400, v0
; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX7GLISEL-NEXT: v_mov_b32_e32 v1, 0x7800
; GFX7GLISEL-NEXT: v_cmp_lt_u32_e32 vcc, v0, v1
@@ -2702,7 +2702,7 @@ define i1 @not_iszero_or_nan_f_maybe_daz(half %x) #1 {
; GFX7GLISEL: ; %bb.0: ; %entry
; GFX7GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0x7fff, v0
-; GFX7GLISEL-NEXT: v_add_i32_e32 v1, vcc, -1, v0
+; GFX7GLISEL-NEXT: v_subrev_i32_e32 v1, vcc, 1, v0
; GFX7GLISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
; GFX7GLISEL-NEXT: v_mov_b32_e32 v2, 0x3ff
; GFX7GLISEL-NEXT: v_cmp_lt_u32_e32 vcc, v1, v2
@@ -2710,7 +2710,7 @@ define i1 @not_iszero_or_nan_f_maybe_daz(half %x) #1 {
; GFX7GLISEL-NEXT: v_mov_b32_e32 v2, 0x7c00
; GFX7GLISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v1, v2
; GFX7GLISEL-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
-; GFX7GLISEL-NEXT: v_add_i32_e32 v0, vcc, 0xfffffc00, v0
+; GFX7GLISEL-NEXT: v_subrev_i32_e32 v0, vcc, 0x400, v0
; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX7GLISEL-NEXT: v_mov_b32_e32 v1, 0x7800
; GFX7GLISEL-NEXT: v_cmp_lt_u32_e32 vcc, v0, v1
@@ -2902,7 +2902,7 @@ define i1 @not_iszero_or_qnan_f16(half %x) {
; GFX7GLISEL: ; %bb.0: ; %entry
; GFX7GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0x7fff, v0
-; GFX7GLISEL-NEXT: v_add_i32_e32 v1, vcc, -1, v0
+; GFX7GLISEL-NEXT: v_subrev_i32_e32 v1, vcc, 1, v0
; GFX7GLISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
; GFX7GLISEL-NEXT: v_mov_b32_e32 v2, 0x3ff
; GFX7GLISEL-NEXT: v_cmp_lt_u32_e32 vcc, v1, v2
@@ -2914,7 +2914,7 @@ define i1 @not_iszero_or_qnan_f16(half %x) {
; GFX7GLISEL-NEXT: v_mov_b32_e32 v2, 0x1ff
; GFX7GLISEL-NEXT: v_cmp_lt_u32_e32 vcc, v1, v2
; GFX7GLISEL-NEXT: s_or_b64 s[4:5], s[4:5], vcc
-; GFX7GLISEL-NEXT: v_add_i32_e32 v0, vcc, 0xfffffc00, v0
+; GFX7GLISEL-NEXT: v_subrev_i32_e32 v0, vcc, 0x400, v0
; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX7GLISEL-NEXT: v_mov_b32_e32 v1, 0x7800
; GFX7GLISEL-NEXT: v_cmp_lt_u32_e32 vcc, v0, v1
@@ -2983,7 +2983,7 @@ define i1 @not_iszero_or_snan_f16(half %x) {
; GFX7GLISEL: ; %bb.0: ; %entry
; GFX7GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0x7fff, v0
-; GFX7GLISEL-NEXT: v_add_i32_e32 v1, vcc, -1, v0
+; GFX7GLISEL-NEXT: v_subrev_i32_e32 v1, vcc, 1, v0
; GFX7GLISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
; GFX7GLISEL-NEXT: v_mov_b32_e32 v2, 0x3ff
; GFX7GLISEL-NEXT: v_cmp_lt_u32_e32 vcc, v1, v2
@@ -2994,7 +2994,7 @@ define i1 @not_iszero_or_snan_f16(half %x) {
; GFX7GLISEL-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
; GFX7GLISEL-NEXT: v_cmp_ge_u32_e32 vcc, v1, v2
; GFX7GLISEL-NEXT: s_or_b64 s[4:5], s[4:5], vcc
-; GFX7GLISEL-NEXT: v_add_i32_e32 v0, vcc, 0xfffffc00, v0
+; GFX7GLISEL-NEXT: v_subrev_i32_e32 v0, vcc, 0x400, v0
; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX7GLISEL-NEXT: v_mov_b32_e32 v1, 0x7800
; GFX7GLISEL-NEXT: v_cmp_lt_u32_e32 vcc, v0, v1
diff --git a/llvm/test/CodeGen/AMDGPU/shrink-add-sub-constant.ll b/llvm/test/CodeGen/AMDGPU/shrink-add-sub-constant.ll
index b3c06756a89872e..6ac04d8bc42bba5 100644
--- a/llvm/test/CodeGen/AMDGPU/shrink-add-sub-constant.ll
+++ b/llvm/test/CodeGen/AMDGPU/shrink-add-sub-constant.ll
@@ -41,7 +41,7 @@ define amdgpu_kernel void @v_test_i32_x_sub_64(ptr addrspace(1) %out, ptr addrsp
; SI-GISEL-NEXT: s_mov_b64 s[4:5], s[2:3]
; SI-GISEL-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
; SI-GISEL-NEXT: s_waitcnt vmcnt(0)
-; SI-GISEL-NEXT: v_add_i32_e32 v2, vcc, 0xffffffc0, v2
+; SI-GISEL-NEXT: v_subrev_i32_e32 v2, vcc, 64, v2
; SI-GISEL-NEXT: s_mov_b64 s[2:3], s[6:7]
; SI-GISEL-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
; SI-GISEL-NEXT: s_endpgm
@@ -78,79 +78,44 @@ define amdgpu_kernel void @v_test_i32_x_sub_64(ptr addrspace(1) %out, ptr addrsp
; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v2
; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
; VI-GISEL-NEXT: s_waitcnt vmcnt(0)
-; VI-GISEL-NEXT: v_add_u32_e32 v2, vcc, 0xffffffc0, v3
+; VI-GISEL-NEXT: v_subrev_u32_e32 v2, vcc, 64, v3
; VI-GISEL-NEXT: flat_store_dword v[0:1], v2
; VI-GISEL-NEXT: s_endpgm
;
-; GFX9-SDAG-LABEL: v_test_i32_x_sub_64:
-; GFX9-SDAG: ; %bb.0:
-; GFX9-SDAG-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
-; GFX9-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-SDAG-NEXT: global_load_dword v1, v0, s[6:7]
-; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0)
-; GFX9-SDAG-NEXT: v_subrev_u32_e32 v1, 64, v1
-; GFX9-SDAG-NEXT: global_store_dword v0, v1, s[4:5]
-; GFX9-SDAG-NEXT: s_endpgm
-;
-; GFX9-GISEL-LABEL: v_test_i32_x_sub_64:
-; GFX9-GISEL: ; %bb.0:
-; GFX9-GISEL-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
-; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-GISEL-NEXT: global_load_dword v1, v0, s[6:7]
-; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
-; GFX9-GISEL-NEXT: v_add_u32_e32 v1, 0xffffffc0, v1
-; GFX9-GISEL-NEXT: global_store_dword v0, v1, s[4:5]
-; GFX9-GISEL-NEXT: s_endpgm
-;
-; GFX10-SDAG-LABEL: v_test_i32_x_sub_64:
-; GFX10-SDAG: ; %bb.0:
-; GFX10-SDAG-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
-; GFX10-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX10-SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10-SDAG-NEXT: global_load_dword v1, v0, s[6:7]
-; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0)
-; GFX10-SDAG-NEXT: v_subrev_nc_u32_e32 v1, 64, v1
-; GFX10-SDAG-NEXT: global_store_dword v0, v1, s[4:5]
-; GFX10-SDAG-NEXT: s_endpgm
-;
-; GFX10-GISEL-LABEL: v_test_i32_x_sub_64:
-; GFX10-GISEL: ; %bb.0:
-; GFX10-GISEL-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
-; GFX10-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10-GISEL-NEXT: global_load_dword v1, v0, s[6:7]
-; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
-; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v1, 0xffffffc0, v1
-; GFX10-GISEL-NEXT: global_store_dword v0, v1, s[4:5]
-; GFX10-GISEL-NEXT: s_endpgm
+; GFX9-LABEL: v_test_i32_x_sub_64:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
+; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-NEXT: global_load_dword v1, v0, s[6:7]
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: v_subrev_u32_e32 v1, 64, v1
+; GFX9-NEXT: global_store_dword v0, v1, s[4:5]
+; GFX9-NEXT: s_endpgm
;
-; GFX11-SDAG-LABEL: v_test_i32_x_sub_64:
-; GFX11-SDAG: ; %bb.0:
-; GFX11-SDAG-NEXT: s_load_b128 s[0:3], s[2:3], 0x24
-; GFX11-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0
-; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-SDAG-NEXT: global_load_b32 v1, v0, s[2:3]
-; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
-; GFX11-SDAG-NEXT: v_subrev_nc_u32_e32 v1, 64, v1
-; GFX11-SDAG-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX11-SDAG-NEXT: s_endpgm
+; GFX10-LABEL: v_test_i32_x_sub_64:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
+; GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-NEXT: global_load_dword v1, v0, s[6:7]
+; GFX10-NEXT: s_waitcnt vmcnt(0)
+; GFX10-NEXT: v_subrev_nc_u32_e32 v1, 64, v1
+; GFX10-NEXT: global_store_dword v0, v1, s[4:5]
+; GFX10-NEXT: s_endpgm
;
-; GFX11-GISEL-LABEL: v_test_i32_x_sub_64:
-; GFX11-GISEL: ; %bb.0:
-; GFX11-GISEL-NEXT: s_load_b128 s[0:3], s[2:3], 0x24
-; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-GISEL-NEXT: global_load_b32 v1, v0, s[2:3]
-; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
-; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v1, 0xffffffc0, v1
-; GFX11-GISEL-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX11-GISEL-NEXT: s_endpgm
+; GFX11-LABEL: v_test_i32_x_sub_64:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_load_b128 s[0:3], s[2:3], 0x24
+; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-NEXT: global_load_b32 v1, v0, s[2:3]
+; GFX11-NEXT: s_waitcnt vmcnt(0)
+; GFX11-NEXT: v_subrev_nc_u32_e32 v1, 64, v1
+; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%gep = getelementptr inbounds i32, ptr addrspace(1) %in, i64 %tid.ext
@@ -198,8 +163,8 @@ define amdgpu_kernel void @v_test_i32_x_sub_64_multi_use(ptr addrspace(1) %out,
; SI-GISEL-NEXT: buffer_load_dword v3, v[0:1], s[4:7], 0 addr64 glc
; SI-GISEL-NEXT: s_waitcnt vmcnt(0)
; SI-GISEL-NEXT: s_mov_b64 s[2:3], s[6:7]
-; SI-GISEL-NEXT: v_add_i32_e32 v2, vcc, 0xffffffc0, v2
-; SI-GISEL-NEXT: v_add_i32_e32 v3, vcc, 0xffffffc0, v3
+; SI-GISEL-NEXT: v_subrev_i32_e32 v2, vcc, 64, v2
+; SI-GISEL-NEXT: v_subrev_i32_e32 v3, vcc, 64, v3
; SI-GISEL-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
; SI-GISEL-NEXT: s_waitcnt vmcnt(0)
; SI-GISEL-NEXT: buffer_store_dword v3, v[0:1], s[0:3], 0 addr64
@@ -246,119 +211,66 @@ define amdgpu_kernel void @v_test_i32_x_sub_64_multi_use(ptr addrspace(1) %out,
; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1
; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v2
; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
-; VI-GISEL-NEXT: v_add_u32_e32 v2, vcc, 0xffffffc0, v3
-; VI-GISEL-NEXT: v_add_u32_e32 v3, vcc, 0xffffffc0, v4
+; VI-GISEL-NEXT: v_subrev_u32_e32 v2, vcc, 64, v3
+; VI-GISEL-NEXT: v_subrev_u32_e32 v3, vcc, 64, v4
; VI-GISEL-NEXT: flat_store_dword v[0:1], v2
; VI-GISEL-NEXT: s_waitcnt vmcnt(0)
; VI-GISEL-NEXT: flat_store_dword v[0:1], v3
; VI-GISEL-NEXT: s_waitcnt vmcnt(0)
; VI-GISEL-NEXT: s_endpgm
;
-; GFX9-SDAG-LABEL: v_test_i32_x_sub_64_multi_use:
-; GFX9-SDAG: ; %bb.0:
-; GFX9-SDAG-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
-; GFX9-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-SDAG-NEXT: global_load_dword v1, v0, s[6:7] glc
-; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0)
-; GFX9-SDAG-NEXT: global_load_dword v2, v0, s[6:7] glc
-; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0)
-; GFX9-SDAG-NEXT: v_subrev_u32_e32 v1, 64, v1
-; GFX9-SDAG-NEXT: v_subrev_u32_e32 v2, 64, v2
-; GFX9-SDAG-NEXT: global_store_dword v0, v1, s[4:5]
-; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0)
-; GFX9-SDAG-NEXT: global_store_dword v0, v2, s[4:5]
-; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0)
-; GFX9-SDAG-NEXT: s_endpgm
-;
-; GFX9-GISEL-LABEL: v_test_i32_x_sub_64_multi_use:
-; GFX9-GISEL: ; %bb.0:
-; GFX9-GISEL-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
-; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-GISEL-NEXT: global_load_dword v1, v0, s[6:7] glc
-; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
-; GFX9-GISEL-NEXT: global_load_dword v2, v0, s[6:7] glc
-; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
-; GFX9-GISEL-NEXT: v_add_u32_e32 v1, 0xffffffc0, v1
-; GFX9-GISEL-NEXT: v_add_u32_e32 v2, 0xffffffc0, v2
-; GFX9-GISEL-NEXT: global_store_dword v0, v1, s[4:5]
-; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
-; GFX9-GISEL-NEXT: global_store_dword v0, v2, s[4:5]
-; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
-; GFX9-GISEL-NEXT: s_endpgm
-;
-; GFX10-SDAG-LABEL: v_test_i32_x_sub_64_multi_use:
-; GFX10-SDAG: ; %bb.0:
-; GFX10-SDAG-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
-; GFX10-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX10-SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10-SDAG-NEXT: global_load_dword v1, v0, s[6:7] glc dlc
-; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0)
-; GFX10-SDAG-NEXT: global_load_dword v2, v0, s[6:7] glc dlc
-; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0)
-; GFX10-SDAG-NEXT: v_subrev_nc_u32_e32 v1, 64, v1
-; GFX10-SDAG-NEXT: v_subrev_nc_u32_e32 v2, 64, v2
-; GFX10-SDAG-NEXT: global_store_dword v0, v1, s[4:5]
-; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10-SDAG-NEXT: global_store_dword v0, v2, s[4:5]
-; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10-SDAG-NEXT: s_endpgm
-;
-; GFX10-GISEL-LABEL: v_test_i32_x_sub_64_multi_use:
-; GFX10-GISEL: ; %bb.0:
-; GFX10-GISEL-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
-; GFX10-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10-GISEL-NEXT: global_load_dword v1, v0, s[6:7] glc dlc
-; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
-; GFX10-GISEL-NEXT: global_load_dword v2, v0, s[6:7] glc dlc
-; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
-; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v1, 0xffffffc0, v1
-; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v2, 0xffffffc0, v2
-; GFX10-GISEL-NEXT: global_store_dword v0, v1, s[4:5]
-; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10-GISEL-NEXT: global_store_dword v0, v2, s[4:5]
-; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10-GISEL-NEXT: s_endpgm
+; GFX9-LABEL: v_test_i32_x_sub_64_multi_use:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
+; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-NEXT: global_load_dword v1, v0, s[6:7] glc
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: global_load_dword v2, v0, s[6:7] glc
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: v_subrev_u32_e32 v1, 64, v1
+; GFX9-NEXT: v_subrev_u32_e32 v2, 64, v2
+; GFX9-NEXT: global_store_dword v0, v1, s[4:5]
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: global_store_dword v0, v2, s[4:5]
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: s_endpgm
;
-; GFX11-SDAG-LABEL: v_test_i32_x_sub_64_multi_use:
-; GFX11-SDAG: ; %bb.0:
-; GFX11-SDAG-NEXT: s_load_b128 s[0:3], s[2:3], 0x24
-; GFX11-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0
-; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-SDAG-NEXT: global_load_b32 v1, v0, s[2:3] glc dlc
-; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
-; GFX11-SDAG-NEXT: global_load_b32 v2, v0, s[2:3] glc dlc
-; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
-; GFX11-SDAG-NEXT: v_subrev_nc_u32_e32 v1, 64, v1
-; GFX11-SDAG-NEXT: v_subrev_nc_u32_e32 v2, 64, v2
-; GFX11-SDAG-NEXT: global_store_b32 v0, v1, s[0:1] dlc
-; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX11-SDAG-NEXT: global_store_b32 v0, v2, s[0:1] dlc
-; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX11-SDAG-NEXT: s_endpgm
+; GFX10-LABEL: v_test_i32_x_sub_64_multi_use:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
+; GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-NEXT: global_load_dword v1, v0, s[6:7] glc dlc
+; GFX10-NEXT: s_waitcnt vmcnt(0)
+; GFX10-NEXT: global_load_dword v2, v0, s[6:7] glc dlc
+; GFX10-NEXT: s_waitcnt vmcnt(0)
+; GFX10-NEXT: v_subrev_nc_u32_e32 v1, 64, v1
+; GFX10-NEXT: v_subrev_nc_u32_e32 v2, 64, v2
+; GFX10-NEXT: global_store_dword v0, v1, s[4:5]
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX10-NEXT: global_store_dword v0, v2, s[4:5]
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX10-NEXT: s_endpgm
;
-; GFX11-GISEL-LABEL: v_test_i32_x_sub_64_multi_use:
-; GFX11-GISEL: ; %bb.0:
-; GFX11-GISEL-NEXT: s_load_b128 s[0:3], s[2:3], 0x24
-; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-GISEL-NEXT: global_load_b32 v1, v0, s[2:3] glc dlc
-; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
-; GFX11-GISEL-NEXT: global_load_b32 v2, v0, s[2:3] glc dlc
-; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
-; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v1, 0xffffffc0, v1
-; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v2, 0xffffffc0, v2
-; GFX11-GISEL-NEXT: global_store_b32 v0, v1, s[0:1] dlc
-; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX11-GISEL-NEXT: global_store_b32 v0, v2, s[0:1] dlc
-; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX11-GISEL-NEXT: s_endpgm
+; GFX11-LABEL: v_test_i32_x_sub_64_multi_use:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_load_b128 s[0:3], s[2:3], 0x24
+; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-NEXT: global_load_b32 v1, v0, s[2:3] glc dlc
+; GFX11-NEXT: s_waitcnt vmcnt(0)
+; GFX11-NEXT: global_load_b32 v2, v0, s[2:3] glc dlc
+; GFX11-NEXT: s_waitcnt vmcnt(0)
+; GFX11-NEXT: v_subrev_nc_u32_e32 v1, 64, v1
+; GFX11-NEXT: v_subrev_nc_u32_e32 v2, 64, v2
+; GFX11-NEXT: global_store_b32 v0, v1, s[0:1] dlc
+; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX11-NEXT: global_store_b32 v0, v2, s[0:1] dlc
+; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%gep = getelementptr inbounds i32, ptr addrspace(1) %in, i64 %tid.ext
@@ -513,7 +425,7 @@ define amdgpu_kernel void @v_test_i32_x_sub_65(ptr addrspace(1) %out, ptr addrsp
; SI-GISEL-NEXT: s_mov_b64 s[4:5], s[2:3]
; SI-GISEL-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
; SI-GISEL-NEXT: s_waitcnt vmcnt(0)
-; SI-GISEL-NEXT: v_add_i32_e32 v2, vcc, 0xffffffbf, v2
+; SI-GISEL-NEXT: v_subrev_i32_e32 v2, vcc, 0x41, v2
; SI-GISEL-NEXT: s_mov_b64 s[2:3], s[6:7]
; SI-GISEL-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
; SI-GISEL-NEXT: s_endpgm
@@ -550,44 +462,79 @@ define amdgpu_kernel void @v_test_i32_x_sub_65(ptr addrspace(1) %out, ptr addrsp
; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v2
; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
; VI-GISEL-NEXT: s_waitcnt vmcnt(0)
-; VI-GISEL-NEXT: v_add_u32_e32 v2, vcc, 0xffffffbf, v3
+; VI-GISEL-NEXT: v_subrev_u32_e32 v2, vcc, 0x41, v3
; VI-GISEL-NEXT: flat_store_dword v[0:1], v2
; VI-GISEL-NEXT: s_endpgm
;
-; GFX9-LABEL: v_test_i32_x_sub_65:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
-; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX9-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-NEXT: global_load_dword v1, v0, s[6:7]
-; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: v_add_u32_e32 v1, 0xffffffbf, v1
-; GFX9-NEXT: global_store_dword v0, v1, s[4:5]
-; GFX9-NEXT: s_endpgm
+; GFX9-SDAG-LABEL: v_test_i32_x_sub_65:
+; GFX9-SDAG: ; %bb.0:
+; GFX9-SDAG-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
+; GFX9-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-SDAG-NEXT: global_load_dword v1, v0, s[6:7]
+; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-SDAG-NEXT: v_add_u32_e32 v1, 0xffffffbf, v1
+; GFX9-SDAG-NEXT: global_store_dword v0, v1, s[4:5]
+; GFX9-SDAG-NEXT: s_endpgm
;
-; GFX10-LABEL: v_test_i32_x_sub_65:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
-; GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX10-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10-NEXT: global_load_dword v1, v0, s[6:7]
-; GFX10-NEXT: s_waitcnt vmcnt(0)
-; GFX10-NEXT: v_add_nc_u32_e32 v1, 0xffffffbf, v1
-; GFX10-NEXT: global_store_dword v0, v1, s[4:5]
-; GFX10-NEXT: s_endpgm
+; GFX9-GISEL-LABEL: v_test_i32_x_sub_65:
+; GFX9-GISEL: ; %bb.0:
+; GFX9-GISEL-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
+; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-GISEL-NEXT: global_load_dword v1, v0, s[6:7]
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GISEL-NEXT: v_subrev_u32_e32 v1, 0x41, v1
+; GFX9-GISEL-NEXT: global_store_dword v0, v1, s[4:5]
+; GFX9-GISEL-NEXT: s_endpgm
;
-; GFX11-LABEL: v_test_i32_x_sub_65:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_load_b128 s[0:3], s[2:3], 0x24
-; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX11-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-NEXT: global_load_b32 v1, v0, s[2:3]
-; GFX11-NEXT: s_waitcnt vmcnt(0)
-; GFX11-NEXT: v_add_nc_u32_e32 v1, 0xffffffbf, v1
-; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX11-NEXT: s_endpgm
+; GFX10-SDAG-LABEL: v_test_i32_x_sub_65:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
+; GFX10-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX10-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-SDAG-NEXT: global_load_dword v1, v0, s[6:7]
+; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-SDAG-NEXT: v_add_nc_u32_e32 v1, 0xffffffbf, v1
+; GFX10-SDAG-NEXT: global_store_dword v0, v1, s[4:5]
+; GFX10-SDAG-NEXT: s_endpgm
+;
+; GFX10-GISEL-LABEL: v_test_i32_x_sub_65:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
+; GFX10-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-GISEL-NEXT: global_load_dword v1, v0, s[6:7]
+; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-GISEL-NEXT: v_subrev_nc_u32_e32 v1, 0x41, v1
+; GFX10-GISEL-NEXT: global_store_dword v0, v1, s[4:5]
+; GFX10-GISEL-NEXT: s_endpgm
+;
+; GFX11-SDAG-LABEL: v_test_i32_x_sub_65:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: s_load_b128 s[0:3], s[2:3], 0x24
+; GFX11-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-SDAG-NEXT: global_load_b32 v1, v0, s[2:3]
+; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX11-SDAG-NEXT: v_add_nc_u32_e32 v1, 0xffffffbf, v1
+; GFX11-SDAG-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-SDAG-NEXT: s_endpgm
+;
+; GFX11-GISEL-LABEL: v_test_i32_x_sub_65:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: s_load_b128 s[0:3], s[2:3], 0x24
+; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-GISEL-NEXT: global_load_b32 v1, v0, s[2:3]
+; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GISEL-NEXT: v_subrev_nc_u32_e32 v1, 0x41, v1
+; GFX11-GISEL-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-GISEL-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%gep = getelementptr inbounds i32, ptr addrspace(1) %in, i64 %tid.ext
@@ -739,7 +686,7 @@ define amdgpu_kernel void @v_test_i32_x_sub_neg16(ptr addrspace(1) %out, ptr add
; SI-GISEL-NEXT: s_mov_b64 s[4:5], s[2:3]
; SI-GISEL-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
; SI-GISEL-NEXT: s_waitcnt vmcnt(0)
-; SI-GISEL-NEXT: v_add_i32_e32 v2, vcc, 16, v2
+; SI-GISEL-NEXT: v_subrev_i32_e32 v2, vcc, -16, v2
; SI-GISEL-NEXT: s_mov_b64 s[2:3], s[6:7]
; SI-GISEL-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
; SI-GISEL-NEXT: s_endpgm
@@ -776,44 +723,79 @@ define amdgpu_kernel void @v_test_i32_x_sub_neg16(ptr addrspace(1) %out, ptr add
; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v2
; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
; VI-GISEL-NEXT: s_waitcnt vmcnt(0)
-; VI-GISEL-NEXT: v_add_u32_e32 v2, vcc, 16, v3
+; VI-GISEL-NEXT: v_subrev_u32_e32 v2, vcc, -16, v3
; VI-GISEL-NEXT: flat_store_dword v[0:1], v2
; VI-GISEL-NEXT: s_endpgm
;
-; GFX9-LABEL: v_test_i32_x_sub_neg16:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
-; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX9-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-NEXT: global_load_dword v1, v0, s[6:7]
-; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: v_add_u32_e32 v1, 16, v1
-; GFX9-NEXT: global_store_dword v0, v1, s[4:5]
-; GFX9-NEXT: s_endpgm
+; GFX9-SDAG-LABEL: v_test_i32_x_sub_neg16:
+; GFX9-SDAG: ; %bb.0:
+; GFX9-SDAG-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
+; GFX9-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-SDAG-NEXT: global_load_dword v1, v0, s[6:7]
+; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-SDAG-NEXT: v_add_u32_e32 v1, 16, v1
+; GFX9-SDAG-NEXT: global_store_dword v0, v1, s[4:5]
+; GFX9-SDAG-NEXT: s_endpgm
;
-; GFX10-LABEL: v_test_i32_x_sub_neg16:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
-; GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX10-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10-NEXT: global_load_dword v1, v0, s[6:7]
-; GFX10-NEXT: s_waitcnt vmcnt(0)
-; GFX10-NEXT: v_add_nc_u32_e32 v1, 16, v1
-; GFX10-NEXT: global_store_dword v0, v1, s[4:5]
-; GFX10-NEXT: s_endpgm
+; GFX9-GISEL-LABEL: v_test_i32_x_sub_neg16:
+; GFX9-GISEL: ; %bb.0:
+; GFX9-GISEL-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
+; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-GISEL-NEXT: global_load_dword v1, v0, s[6:7]
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GISEL-NEXT: v_subrev_u32_e32 v1, -16, v1
+; GFX9-GISEL-NEXT: global_store_dword v0, v1, s[4:5]
+; GFX9-GISEL-NEXT: s_endpgm
;
-; GFX11-LABEL: v_test_i32_x_sub_neg16:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_load_b128 s[0:3], s[2:3], 0x24
-; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX11-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-NEXT: global_load_b32 v1, v0, s[2:3]
-; GFX11-NEXT: s_waitcnt vmcnt(0)
-; GFX11-NEXT: v_add_nc_u32_e32 v1, 16, v1
-; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX11-NEXT: s_endpgm
+; GFX10-SDAG-LABEL: v_test_i32_x_sub_neg16:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
+; GFX10-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX10-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-SDAG-NEXT: global_load_dword v1, v0, s[6:7]
+; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-SDAG-NEXT: v_add_nc_u32_e32 v1, 16, v1
+; GFX10-SDAG-NEXT: global_store_dword v0, v1, s[4:5]
+; GFX10-SDAG-NEXT: s_endpgm
+;
+; GFX10-GISEL-LABEL: v_test_i32_x_sub_neg16:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
+; GFX10-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-GISEL-NEXT: global_load_dword v1, v0, s[6:7]
+; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-GISEL-NEXT: v_subrev_nc_u32_e32 v1, -16, v1
+; GFX10-GISEL-NEXT: global_store_dword v0, v1, s[4:5]
+; GFX10-GISEL-NEXT: s_endpgm
+;
+; GFX11-SDAG-LABEL: v_test_i32_x_sub_neg16:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: s_load_b128 s[0:3], s[2:3], 0x24
+; GFX11-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-SDAG-NEXT: global_load_b32 v1, v0, s[2:3]
+; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX11-SDAG-NEXT: v_add_nc_u32_e32 v1, 16, v1
+; GFX11-SDAG-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-SDAG-NEXT: s_endpgm
+;
+; GFX11-GISEL-LABEL: v_test_i32_x_sub_neg16:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: s_load_b128 s[0:3], s[2:3], 0x24
+; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-GISEL-NEXT: global_load_b32 v1, v0, s[2:3]
+; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GISEL-NEXT: v_subrev_nc_u32_e32 v1, -16, v1
+; GFX11-GISEL-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-GISEL-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%gep = getelementptr inbounds i32, ptr addrspace(1) %in, i64 %tid.ext
@@ -965,7 +947,7 @@ define amdgpu_kernel void @v_test_i32_x_sub_neg17(ptr addrspace(1) %out, ptr add
; SI-GISEL-NEXT: s_mov_b64 s[4:5], s[2:3]
; SI-GISEL-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
; SI-GISEL-NEXT: s_waitcnt vmcnt(0)
-; SI-GISEL-NEXT: v_add_i32_e32 v2, vcc, 17, v2
+; SI-GISEL-NEXT: v_subrev_i32_e32 v2, vcc, 0xffffffef, v2
; SI-GISEL-NEXT: s_mov_b64 s[2:3], s[6:7]
; SI-GISEL-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
; SI-GISEL-NEXT: s_endpgm
@@ -1002,44 +984,79 @@ define amdgpu_kernel void @v_test_i32_x_sub_neg17(ptr addrspace(1) %out, ptr add
; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v2
; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
; VI-GISEL-NEXT: s_waitcnt vmcnt(0)
-; VI-GISEL-NEXT: v_add_u32_e32 v2, vcc, 17, v3
+; VI-GISEL-NEXT: v_subrev_u32_e32 v2, vcc, 0xffffffef, v3
; VI-GISEL-NEXT: flat_store_dword v[0:1], v2
; VI-GISEL-NEXT: s_endpgm
;
-; GFX9-LABEL: v_test_i32_x_sub_neg17:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
-; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX9-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-NEXT: global_load_dword v1, v0, s[6:7]
-; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: v_add_u32_e32 v1, 17, v1
-; GFX9-NEXT: global_store_dword v0, v1, s[4:5]
-; GFX9-NEXT: s_endpgm
+; GFX9-SDAG-LABEL: v_test_i32_x_sub_neg17:
+; GFX9-SDAG: ; %bb.0:
+; GFX9-SDAG-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
+; GFX9-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-SDAG-NEXT: global_load_dword v1, v0, s[6:7]
+; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-SDAG-NEXT: v_add_u32_e32 v1, 17, v1
+; GFX9-SDAG-NEXT: global_store_dword v0, v1, s[4:5]
+; GFX9-SDAG-NEXT: s_endpgm
+;
+; GFX9-GISEL-LABEL: v_test_i32_x_sub_neg17:
+; GFX9-GISEL: ; %bb.0:
+; GFX9-GISEL-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
+; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-GISEL-NEXT: global_load_dword v1, v0, s[6:7]
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GISEL-NEXT: v_subrev_u32_e32 v1, 0xffffffef, v1
+; GFX9-GISEL-NEXT: global_store_dword v0, v1, s[4:5]
+; GFX9-GISEL-NEXT: s_endpgm
+;
+; GFX10-SDAG-LABEL: v_test_i32_x_sub_neg17:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
+; GFX10-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX10-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-SDAG-NEXT: global_load_dword v1, v0, s[6:7]
+; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-SDAG-NEXT: v_add_nc_u32_e32 v1, 17, v1
+; GFX10-SDAG-NEXT: global_store_dword v0, v1, s[4:5]
+; GFX10-SDAG-NEXT: s_endpgm
;
-; GFX10-LABEL: v_test_i32_x_sub_neg17:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
-; GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX10-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10-NEXT: global_load_dword v1, v0, s[6:7]
-; GFX10-NEXT: s_waitcnt vmcnt(0)
-; GFX10-NEXT: v_add_nc_u32_e32 v1, 17, v1
-; GFX10-NEXT: global_store_dword v0, v1, s[4:5]
-; GFX10-NEXT: s_endpgm
+; GFX10-GISEL-LABEL: v_test_i32_x_sub_neg17:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
+; GFX10-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-GISEL-NEXT: global_load_dword v1, v0, s[6:7]
+; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-GISEL-NEXT: v_subrev_nc_u32_e32 v1, 0xffffffef, v1
+; GFX10-GISEL-NEXT: global_store_dword v0, v1, s[4:5]
+; GFX10-GISEL-NEXT: s_endpgm
;
-; GFX11-LABEL: v_test_i32_x_sub_neg17:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_load_b128 s[0:3], s[2:3], 0x24
-; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX11-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-NEXT: global_load_b32 v1, v0, s[2:3]
-; GFX11-NEXT: s_waitcnt vmcnt(0)
-; GFX11-NEXT: v_add_nc_u32_e32 v1, 17, v1
-; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX11-NEXT: s_endpgm
+; GFX11-SDAG-LABEL: v_test_i32_x_sub_neg17:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: s_load_b128 s[0:3], s[2:3], 0x24
+; GFX11-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-SDAG-NEXT: global_load_b32 v1, v0, s[2:3]
+; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX11-SDAG-NEXT: v_add_nc_u32_e32 v1, 17, v1
+; GFX11-SDAG-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-SDAG-NEXT: s_endpgm
+;
+; GFX11-GISEL-LABEL: v_test_i32_x_sub_neg17:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: s_load_b128 s[0:3], s[2:3], 0x24
+; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-GISEL-NEXT: global_load_b32 v1, v0, s[2:3]
+; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GISEL-NEXT: v_subrev_nc_u32_e32 v1, 0xffffffef, v1
+; GFX11-GISEL-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-GISEL-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%gep = getelementptr inbounds i32, ptr addrspace(1) %in, i64 %tid.ext
@@ -1246,7 +1263,7 @@ define amdgpu_kernel void @v_test_i16_x_sub_64(ptr addrspace(1) %out, ptr addrsp
; SI-GISEL-NEXT: s_mov_b64 s[4:5], s[2:3]
; SI-GISEL-NEXT: buffer_load_ushort v2, v[0:1], s[4:7], 0 addr64
; SI-GISEL-NEXT: s_waitcnt vmcnt(0)
-; SI-GISEL-NEXT: v_add_i32_e32 v2, vcc, 0xffffffc0, v2
+; SI-GISEL-NEXT: v_subrev_i32_e32 v2, vcc, 64, v2
; SI-GISEL-NEXT: s_mov_b64 s[2:3], s[6:7]
; SI-GISEL-NEXT: buffer_store_short v2, v[0:1], s[0:3], 0 addr64
; SI-GISEL-NEXT: s_endpgm
@@ -1283,79 +1300,44 @@ define amdgpu_kernel void @v_test_i16_x_sub_64(ptr addrspace(1) %out, ptr addrsp
; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v2
; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
; VI-GISEL-NEXT: s_waitcnt vmcnt(0)
-; VI-GISEL-NEXT: v_add_u16_e32 v2, 0xffc0, v3
+; VI-GISEL-NEXT: v_subrev_u16_e32 v2, 64, v3
; VI-GISEL-NEXT: flat_store_short v[0:1], v2
; VI-GISEL-NEXT: s_endpgm
;
-; GFX9-SDAG-LABEL: v_test_i16_x_sub_64:
-; GFX9-SDAG: ; %bb.0:
-; GFX9-SDAG-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
-; GFX9-SDAG-NEXT: v_lshlrev_b32_e32 v0, 1, v0
-; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-SDAG-NEXT: global_load_ushort v1, v0, s[6:7]
-; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0)
-; GFX9-SDAG-NEXT: v_subrev_u16_e32 v1, 64, v1
-; GFX9-SDAG-NEXT: global_store_short v0, v1, s[4:5]
-; GFX9-SDAG-NEXT: s_endpgm
-;
-; GFX9-GISEL-LABEL: v_test_i16_x_sub_64:
-; GFX9-GISEL: ; %bb.0:
-; GFX9-GISEL-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
-; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v0, 1, v0
-; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-GISEL-NEXT: global_load_ushort v1, v0, s[6:7]
-; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
-; GFX9-GISEL-NEXT: v_add_u16_e32 v1, 0xffc0, v1
-; GFX9-GISEL-NEXT: global_store_short v0, v1, s[4:5]
-; GFX9-GISEL-NEXT: s_endpgm
-;
-; GFX10-SDAG-LABEL: v_test_i16_x_sub_64:
-; GFX10-SDAG: ; %bb.0:
-; GFX10-SDAG-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
-; GFX10-SDAG-NEXT: v_lshlrev_b32_e32 v0, 1, v0
-; GFX10-SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10-SDAG-NEXT: global_load_ushort v1, v0, s[6:7]
-; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0)
-; GFX10-SDAG-NEXT: v_sub_nc_u16 v1, v1, 64
-; GFX10-SDAG-NEXT: global_store_short v0, v1, s[4:5]
-; GFX10-SDAG-NEXT: s_endpgm
-;
-; GFX10-GISEL-LABEL: v_test_i16_x_sub_64:
-; GFX10-GISEL: ; %bb.0:
-; GFX10-GISEL-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
-; GFX10-GISEL-NEXT: v_lshlrev_b32_e32 v0, 1, v0
-; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10-GISEL-NEXT: global_load_ushort v1, v0, s[6:7]
-; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
-; GFX10-GISEL-NEXT: v_add_nc_u16 v1, v1, 0xffc0
-; GFX10-GISEL-NEXT: global_store_short v0, v1, s[4:5]
-; GFX10-GISEL-NEXT: s_endpgm
+; GFX9-LABEL: v_test_i16_x_sub_64:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
+; GFX9-NEXT: v_lshlrev_b32_e32 v0, 1, v0
+; GFX9-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-NEXT: global_load_ushort v1, v0, s[6:7]
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: v_subrev_u16_e32 v1, 64, v1
+; GFX9-NEXT: global_store_short v0, v1, s[4:5]
+; GFX9-NEXT: s_endpgm
;
-; GFX11-SDAG-LABEL: v_test_i16_x_sub_64:
-; GFX11-SDAG: ; %bb.0:
-; GFX11-SDAG-NEXT: s_load_b128 s[0:3], s[2:3], 0x24
-; GFX11-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0
-; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SDAG-NEXT: v_lshlrev_b32_e32 v0, 1, v0
-; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-SDAG-NEXT: global_load_u16 v1, v0, s[2:3]
-; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
-; GFX11-SDAG-NEXT: v_sub_nc_u16 v1, v1, 64
-; GFX11-SDAG-NEXT: global_store_b16 v0, v1, s[0:1]
-; GFX11-SDAG-NEXT: s_endpgm
+; GFX10-LABEL: v_test_i16_x_sub_64:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
+; GFX10-NEXT: v_lshlrev_b32_e32 v0, 1, v0
+; GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-NEXT: global_load_ushort v1, v0, s[6:7]
+; GFX10-NEXT: s_waitcnt vmcnt(0)
+; GFX10-NEXT: v_sub_nc_u16 v1, v1, 64
+; GFX10-NEXT: global_store_short v0, v1, s[4:5]
+; GFX10-NEXT: s_endpgm
;
-; GFX11-GISEL-LABEL: v_test_i16_x_sub_64:
-; GFX11-GISEL: ; %bb.0:
-; GFX11-GISEL-NEXT: s_load_b128 s[0:3], s[2:3], 0x24
-; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-GISEL-NEXT: v_lshlrev_b32_e32 v0, 1, v0
-; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-GISEL-NEXT: global_load_u16 v1, v0, s[2:3]
-; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
-; GFX11-GISEL-NEXT: v_add_nc_u16 v1, v1, 0xffc0
-; GFX11-GISEL-NEXT: global_store_b16 v0, v1, s[0:1]
-; GFX11-GISEL-NEXT: s_endpgm
+; GFX11-LABEL: v_test_i16_x_sub_64:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_load_b128 s[0:3], s[2:3], 0x24
+; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT: v_lshlrev_b32_e32 v0, 1, v0
+; GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-NEXT: global_load_u16 v1, v0, s[2:3]
+; GFX11-NEXT: s_waitcnt vmcnt(0)
+; GFX11-NEXT: v_sub_nc_u16 v1, v1, 64
+; GFX11-NEXT: global_store_b16 v0, v1, s[0:1]
+; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%gep = getelementptr inbounds i16, ptr addrspace(1) %in, i64 %tid.ext
@@ -1397,7 +1379,7 @@ define amdgpu_kernel void @v_test_i16_x_sub_64_zext_to_i32(ptr addrspace(1) %out
; SI-GISEL-NEXT: buffer_load_ushort v3, v[1:2], s[4:7], 0 addr64
; SI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 2, v0
; SI-GISEL-NEXT: s_waitcnt vmcnt(0)
-; SI-GISEL-NEXT: v_add_i32_e32 v0, vcc, 0xffffffc0, v3
+; SI-GISEL-NEXT: v_subrev_i32_e32 v0, vcc, 64, v3
; SI-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
; SI-GISEL-NEXT: s_mov_b64 s[2:3], s[6:7]
; SI-GISEL-NEXT: buffer_store_dword v0, v[1:2], s[0:3], 0 addr64
@@ -1437,91 +1419,50 @@ define amdgpu_kernel void @v_test_i16_x_sub_64_zext_to_i32(ptr addrspace(1) %out
; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v3
; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
; VI-GISEL-NEXT: s_waitcnt vmcnt(0)
-; VI-GISEL-NEXT: v_add_u16_e32 v2, 0xffc0, v2
+; VI-GISEL-NEXT: v_subrev_u16_e32 v2, 64, v2
; VI-GISEL-NEXT: flat_store_dword v[0:1], v2
; VI-GISEL-NEXT: s_endpgm
;
-; GFX9-SDAG-LABEL: v_test_i16_x_sub_64_zext_to_i32:
-; GFX9-SDAG: ; %bb.0:
-; GFX9-SDAG-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
-; GFX9-SDAG-NEXT: v_lshlrev_b32_e32 v1, 1, v0
-; GFX9-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-SDAG-NEXT: global_load_ushort v1, v1, s[6:7]
-; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0)
-; GFX9-SDAG-NEXT: v_subrev_u16_e32 v1, 64, v1
-; GFX9-SDAG-NEXT: global_store_dword v0, v1, s[4:5]
-; GFX9-SDAG-NEXT: s_endpgm
-;
-; GFX9-GISEL-LABEL: v_test_i16_x_sub_64_zext_to_i32:
-; GFX9-GISEL: ; %bb.0:
-; GFX9-GISEL-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
-; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v1, 1, v0
-; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-GISEL-NEXT: global_load_ushort v1, v1, s[6:7]
-; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
-; GFX9-GISEL-NEXT: v_add_u16_e32 v1, 0xffc0, v1
-; GFX9-GISEL-NEXT: global_store_dword v0, v1, s[4:5]
-; GFX9-GISEL-NEXT: s_endpgm
-;
-; GFX10-SDAG-LABEL: v_test_i16_x_sub_64_zext_to_i32:
-; GFX10-SDAG: ; %bb.0:
-; GFX10-SDAG-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
-; GFX10-SDAG-NEXT: v_lshlrev_b32_e32 v1, 1, v0
-; GFX10-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX10-SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10-SDAG-NEXT: global_load_ushort v1, v1, s[6:7]
-; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0)
-; GFX10-SDAG-NEXT: v_sub_nc_u16 v1, v1, 64
-; GFX10-SDAG-NEXT: v_and_b32_e32 v1, 0xffff, v1
-; GFX10-SDAG-NEXT: global_store_dword v0, v1, s[4:5]
-; GFX10-SDAG-NEXT: s_endpgm
-;
-; GFX10-GISEL-LABEL: v_test_i16_x_sub_64_zext_to_i32:
-; GFX10-GISEL: ; %bb.0:
-; GFX10-GISEL-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
-; GFX10-GISEL-NEXT: v_lshlrev_b32_e32 v1, 1, v0
-; GFX10-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10-GISEL-NEXT: global_load_ushort v1, v1, s[6:7]
-; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
-; GFX10-GISEL-NEXT: v_add_nc_u16 v1, v1, 0xffc0
-; GFX10-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
-; GFX10-GISEL-NEXT: global_store_dword v0, v1, s[4:5]
-; GFX10-GISEL-NEXT: s_endpgm
+; GFX9-LABEL: v_test_i16_x_sub_64_zext_to_i32:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
+; GFX9-NEXT: v_lshlrev_b32_e32 v1, 1, v0
+; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-NEXT: global_load_ushort v1, v1, s[6:7]
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: v_subrev_u16_e32 v1, 64, v1
+; GFX9-NEXT: global_store_dword v0, v1, s[4:5]
+; GFX9-NEXT: s_endpgm
;
-; GFX11-SDAG-LABEL: v_test_i16_x_sub_64_zext_to_i32:
-; GFX11-SDAG: ; %bb.0:
-; GFX11-SDAG-NEXT: s_load_b128 s[0:3], s[2:3], 0x24
-; GFX11-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0
-; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SDAG-NEXT: v_lshlrev_b32_e32 v1, 1, v0
-; GFX11-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-SDAG-NEXT: global_load_u16 v1, v1, s[2:3]
-; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
-; GFX11-SDAG-NEXT: v_sub_nc_u16 v1, v1, 64
-; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SDAG-NEXT: v_and_b32_e32 v1, 0xffff, v1
-; GFX11-SDAG-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX11-SDAG-NEXT: s_endpgm
+; GFX10-LABEL: v_test_i16_x_sub_64_zext_to_i32:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
+; GFX10-NEXT: v_lshlrev_b32_e32 v1, 1, v0
+; GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-NEXT: global_load_ushort v1, v1, s[6:7]
+; GFX10-NEXT: s_waitcnt vmcnt(0)
+; GFX10-NEXT: v_sub_nc_u16 v1, v1, 64
+; GFX10-NEXT: v_and_b32_e32 v1, 0xffff, v1
+; GFX10-NEXT: global_store_dword v0, v1, s[4:5]
+; GFX10-NEXT: s_endpgm
;
-; GFX11-GISEL-LABEL: v_test_i16_x_sub_64_zext_to_i32:
-; GFX11-GISEL: ; %bb.0:
-; GFX11-GISEL-NEXT: s_load_b128 s[0:3], s[2:3], 0x24
-; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-GISEL-NEXT: v_lshlrev_b32_e32 v1, 1, v0
-; GFX11-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-GISEL-NEXT: global_load_u16 v1, v1, s[2:3]
-; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
-; GFX11-GISEL-NEXT: v_add_nc_u16 v1, v1, 0xffc0
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
-; GFX11-GISEL-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX11-GISEL-NEXT: s_endpgm
+; GFX11-LABEL: v_test_i16_x_sub_64_zext_to_i32:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_load_b128 s[0:3], s[2:3], 0x24
+; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT: v_lshlrev_b32_e32 v1, 1, v0
+; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-NEXT: global_load_u16 v1, v1, s[2:3]
+; GFX11-NEXT: s_waitcnt vmcnt(0)
+; GFX11-NEXT: v_sub_nc_u16 v1, v1, 64
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT: v_and_b32_e32 v1, 0xffff, v1
+; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%gep = getelementptr inbounds i16, ptr addrspace(1) %in, i64 %tid.ext
@@ -1570,8 +1511,8 @@ define amdgpu_kernel void @v_test_i16_x_sub_64_multi_use(ptr addrspace(1) %out,
; SI-GISEL-NEXT: buffer_load_ushort v3, v[0:1], s[4:7], 0 addr64 glc
; SI-GISEL-NEXT: s_waitcnt vmcnt(0)
; SI-GISEL-NEXT: s_mov_b64 s[2:3], s[6:7]
-; SI-GISEL-NEXT: v_add_i32_e32 v2, vcc, 0xffffffc0, v2
-; SI-GISEL-NEXT: v_add_i32_e32 v3, vcc, 0xffffffc0, v3
+; SI-GISEL-NEXT: v_subrev_i32_e32 v2, vcc, 64, v2
+; SI-GISEL-NEXT: v_subrev_i32_e32 v3, vcc, 64, v3
; SI-GISEL-NEXT: buffer_store_short v2, v[0:1], s[0:3], 0 addr64
; SI-GISEL-NEXT: s_waitcnt vmcnt(0)
; SI-GISEL-NEXT: buffer_store_short v3, v[0:1], s[0:3], 0 addr64
@@ -1618,119 +1559,66 @@ define amdgpu_kernel void @v_test_i16_x_sub_64_multi_use(ptr addrspace(1) %out,
; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1
; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v2
; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
-; VI-GISEL-NEXT: v_add_u16_e32 v2, 0xffc0, v3
-; VI-GISEL-NEXT: v_add_u16_e32 v3, 0xffc0, v4
+; VI-GISEL-NEXT: v_subrev_u16_e32 v2, 64, v3
+; VI-GISEL-NEXT: v_subrev_u16_e32 v3, 64, v4
; VI-GISEL-NEXT: flat_store_short v[0:1], v2
; VI-GISEL-NEXT: s_waitcnt vmcnt(0)
; VI-GISEL-NEXT: flat_store_short v[0:1], v3
; VI-GISEL-NEXT: s_waitcnt vmcnt(0)
; VI-GISEL-NEXT: s_endpgm
;
-; GFX9-SDAG-LABEL: v_test_i16_x_sub_64_multi_use:
-; GFX9-SDAG: ; %bb.0:
-; GFX9-SDAG-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
-; GFX9-SDAG-NEXT: v_lshlrev_b32_e32 v0, 1, v0
-; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-SDAG-NEXT: global_load_ushort v1, v0, s[6:7] glc
-; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0)
-; GFX9-SDAG-NEXT: global_load_ushort v2, v0, s[6:7] glc
-; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0)
-; GFX9-SDAG-NEXT: v_subrev_u16_e32 v1, 64, v1
-; GFX9-SDAG-NEXT: v_subrev_u16_e32 v2, 64, v2
-; GFX9-SDAG-NEXT: global_store_short v0, v1, s[4:5]
-; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0)
-; GFX9-SDAG-NEXT: global_store_short v0, v2, s[4:5]
-; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0)
-; GFX9-SDAG-NEXT: s_endpgm
-;
-; GFX9-GISEL-LABEL: v_test_i16_x_sub_64_multi_use:
-; GFX9-GISEL: ; %bb.0:
-; GFX9-GISEL-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
-; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v0, 1, v0
-; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-GISEL-NEXT: global_load_ushort v1, v0, s[6:7] glc
-; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
-; GFX9-GISEL-NEXT: global_load_ushort v2, v0, s[6:7] glc
-; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
-; GFX9-GISEL-NEXT: v_add_u16_e32 v1, 0xffc0, v1
-; GFX9-GISEL-NEXT: v_add_u16_e32 v2, 0xffc0, v2
-; GFX9-GISEL-NEXT: global_store_short v0, v1, s[4:5]
-; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
-; GFX9-GISEL-NEXT: global_store_short v0, v2, s[4:5]
-; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
-; GFX9-GISEL-NEXT: s_endpgm
-;
-; GFX10-SDAG-LABEL: v_test_i16_x_sub_64_multi_use:
-; GFX10-SDAG: ; %bb.0:
-; GFX10-SDAG-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
-; GFX10-SDAG-NEXT: v_lshlrev_b32_e32 v0, 1, v0
-; GFX10-SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10-SDAG-NEXT: global_load_ushort v1, v0, s[6:7] glc dlc
-; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0)
-; GFX10-SDAG-NEXT: global_load_ushort v2, v0, s[6:7] glc dlc
-; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0)
-; GFX10-SDAG-NEXT: v_sub_nc_u16 v1, v1, 64
-; GFX10-SDAG-NEXT: v_sub_nc_u16 v2, v2, 64
-; GFX10-SDAG-NEXT: global_store_short v0, v1, s[4:5]
-; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10-SDAG-NEXT: global_store_short v0, v2, s[4:5]
-; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10-SDAG-NEXT: s_endpgm
-;
-; GFX10-GISEL-LABEL: v_test_i16_x_sub_64_multi_use:
-; GFX10-GISEL: ; %bb.0:
-; GFX10-GISEL-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
-; GFX10-GISEL-NEXT: v_lshlrev_b32_e32 v0, 1, v0
-; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10-GISEL-NEXT: global_load_ushort v1, v0, s[6:7] glc dlc
-; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
-; GFX10-GISEL-NEXT: global_load_ushort v2, v0, s[6:7] glc dlc
-; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
-; GFX10-GISEL-NEXT: v_add_nc_u16 v1, v1, 0xffc0
-; GFX10-GISEL-NEXT: v_add_nc_u16 v2, v2, 0xffc0
-; GFX10-GISEL-NEXT: global_store_short v0, v1, s[4:5]
-; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10-GISEL-NEXT: global_store_short v0, v2, s[4:5]
-; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10-GISEL-NEXT: s_endpgm
+; GFX9-LABEL: v_test_i16_x_sub_64_multi_use:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
+; GFX9-NEXT: v_lshlrev_b32_e32 v0, 1, v0
+; GFX9-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-NEXT: global_load_ushort v1, v0, s[6:7] glc
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: global_load_ushort v2, v0, s[6:7] glc
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: v_subrev_u16_e32 v1, 64, v1
+; GFX9-NEXT: v_subrev_u16_e32 v2, 64, v2
+; GFX9-NEXT: global_store_short v0, v1, s[4:5]
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: global_store_short v0, v2, s[4:5]
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: s_endpgm
;
-; GFX11-SDAG-LABEL: v_test_i16_x_sub_64_multi_use:
-; GFX11-SDAG: ; %bb.0:
-; GFX11-SDAG-NEXT: s_load_b128 s[0:3], s[2:3], 0x24
-; GFX11-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0
-; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SDAG-NEXT: v_lshlrev_b32_e32 v0, 1, v0
-; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-SDAG-NEXT: global_load_u16 v1, v0, s[2:3] glc dlc
-; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
-; GFX11-SDAG-NEXT: global_load_u16 v2, v0, s[2:3] glc dlc
-; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
-; GFX11-SDAG-NEXT: v_sub_nc_u16 v1, v1, 64
-; GFX11-SDAG-NEXT: v_sub_nc_u16 v2, v2, 64
-; GFX11-SDAG-NEXT: global_store_b16 v0, v1, s[0:1] dlc
-; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX11-SDAG-NEXT: global_store_b16 v0, v2, s[0:1] dlc
-; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX11-SDAG-NEXT: s_endpgm
+; GFX10-LABEL: v_test_i16_x_sub_64_multi_use:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
+; GFX10-NEXT: v_lshlrev_b32_e32 v0, 1, v0
+; GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-NEXT: global_load_ushort v1, v0, s[6:7] glc dlc
+; GFX10-NEXT: s_waitcnt vmcnt(0)
+; GFX10-NEXT: global_load_ushort v2, v0, s[6:7] glc dlc
+; GFX10-NEXT: s_waitcnt vmcnt(0)
+; GFX10-NEXT: v_sub_nc_u16 v1, v1, 64
+; GFX10-NEXT: v_sub_nc_u16 v2, v2, 64
+; GFX10-NEXT: global_store_short v0, v1, s[4:5]
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX10-NEXT: global_store_short v0, v2, s[4:5]
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX10-NEXT: s_endpgm
;
-; GFX11-GISEL-LABEL: v_test_i16_x_sub_64_multi_use:
-; GFX11-GISEL: ; %bb.0:
-; GFX11-GISEL-NEXT: s_load_b128 s[0:3], s[2:3], 0x24
-; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-GISEL-NEXT: v_lshlrev_b32_e32 v0, 1, v0
-; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-GISEL-NEXT: global_load_u16 v1, v0, s[2:3] glc dlc
-; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
-; GFX11-GISEL-NEXT: global_load_u16 v2, v0, s[2:3] glc dlc
-; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
-; GFX11-GISEL-NEXT: v_add_nc_u16 v1, v1, 0xffc0
-; GFX11-GISEL-NEXT: v_add_nc_u16 v2, v2, 0xffc0
-; GFX11-GISEL-NEXT: global_store_b16 v0, v1, s[0:1] dlc
-; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX11-GISEL-NEXT: global_store_b16 v0, v2, s[0:1] dlc
-; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX11-GISEL-NEXT: s_endpgm
+; GFX11-LABEL: v_test_i16_x_sub_64_multi_use:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_load_b128 s[0:3], s[2:3], 0x24
+; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT: v_lshlrev_b32_e32 v0, 1, v0
+; GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-NEXT: global_load_u16 v1, v0, s[2:3] glc dlc
+; GFX11-NEXT: s_waitcnt vmcnt(0)
+; GFX11-NEXT: global_load_u16 v2, v0, s[2:3] glc dlc
+; GFX11-NEXT: s_waitcnt vmcnt(0)
+; GFX11-NEXT: v_sub_nc_u16 v1, v1, 64
+; GFX11-NEXT: v_sub_nc_u16 v2, v2, 64
+; GFX11-NEXT: global_store_b16 v0, v1, s[0:1] dlc
+; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX11-NEXT: global_store_b16 v0, v2, s[0:1] dlc
+; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%gep = getelementptr inbounds i16, ptr addrspace(1) %in, i64 %tid.ext
@@ -1776,8 +1664,8 @@ define amdgpu_kernel void @v_test_v2i16_x_sub_64_64(ptr addrspace(1) %out, ptr a
; SI-GISEL-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
; SI-GISEL-NEXT: s_waitcnt vmcnt(0)
; SI-GISEL-NEXT: v_lshrrev_b32_e32 v3, 16, v2
-; SI-GISEL-NEXT: v_add_i32_e32 v2, vcc, 0xffffffc0, v2
-; SI-GISEL-NEXT: v_add_i32_e32 v3, vcc, 0xffffffc0, v3
+; SI-GISEL-NEXT: v_subrev_i32_e32 v2, vcc, 64, v2
+; SI-GISEL-NEXT: v_subrev_i32_e32 v3, vcc, 64, v3
; SI-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
; SI-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v3
; SI-GISEL-NEXT: v_lshlrev_b32_e32 v3, 16, v3
@@ -1810,7 +1698,7 @@ define amdgpu_kernel void @v_test_v2i16_x_sub_64_64(ptr addrspace(1) %out, ptr a
; VI-GISEL: ; %bb.0:
; VI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
; VI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 2, v0
-; VI-GISEL-NEXT: v_not_b32_e32 v4, 63
+; VI-GISEL-NEXT: v_mov_b32_e32 v4, 64
; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2
; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3
@@ -1822,8 +1710,8 @@ define amdgpu_kernel void @v_test_v2i16_x_sub_64_64(ptr addrspace(1) %out, ptr a
; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v2
; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
; VI-GISEL-NEXT: s_waitcnt vmcnt(0)
-; VI-GISEL-NEXT: v_add_u16_e32 v2, 0xffc0, v3
-; VI-GISEL-NEXT: v_add_u16_sdwa v3, v3, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; VI-GISEL-NEXT: v_subrev_u16_e32 v2, 64, v3
+; VI-GISEL-NEXT: v_sub_u16_sdwa v3, v3, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; VI-GISEL-NEXT: v_or_b32_e32 v2, v2, v3
; VI-GISEL-NEXT: flat_store_dword v[0:1], v2
; VI-GISEL-NEXT: s_endpgm
@@ -1904,8 +1792,8 @@ define amdgpu_kernel void @v_test_v2i16_x_sub_7_64(ptr addrspace(1) %out, ptr ad
; SI-GISEL-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
; SI-GISEL-NEXT: s_waitcnt vmcnt(0)
; SI-GISEL-NEXT: v_lshrrev_b32_e32 v3, 16, v2
-; SI-GISEL-NEXT: v_add_i32_e32 v2, vcc, -7, v2
-; SI-GISEL-NEXT: v_add_i32_e32 v3, vcc, 0xffffffc0, v3
+; SI-GISEL-NEXT: v_subrev_i32_e32 v2, vcc, 7, v2
+; SI-GISEL-NEXT: v_subrev_i32_e32 v3, vcc, 64, v3
; SI-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
; SI-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v3
; SI-GISEL-NEXT: v_lshlrev_b32_e32 v3, 16, v3
@@ -1938,7 +1826,7 @@ define amdgpu_kernel void @v_test_v2i16_x_sub_7_64(ptr addrspace(1) %out, ptr ad
; VI-GISEL: ; %bb.0:
; VI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
; VI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 2, v0
-; VI-GISEL-NEXT: v_not_b32_e32 v4, 63
+; VI-GISEL-NEXT: v_mov_b32_e32 v4, 64
; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2
; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3
@@ -1950,8 +1838,8 @@ define amdgpu_kernel void @v_test_v2i16_x_sub_7_64(ptr addrspace(1) %out, ptr ad
; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v2
; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
; VI-GISEL-NEXT: s_waitcnt vmcnt(0)
-; VI-GISEL-NEXT: v_add_u16_e32 v2, -7, v3
-; VI-GISEL-NEXT: v_add_u16_sdwa v3, v3, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; VI-GISEL-NEXT: v_subrev_u16_e32 v2, 7, v3
+; VI-GISEL-NEXT: v_sub_u16_sdwa v3, v3, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; VI-GISEL-NEXT: v_or_b32_e32 v2, v2, v3
; VI-GISEL-NEXT: flat_store_dword v[0:1], v2
; VI-GISEL-NEXT: s_endpgm
@@ -2045,8 +1933,8 @@ define amdgpu_kernel void @v_test_v2i16_x_sub_64_123(ptr addrspace(1) %out, ptr
; SI-GISEL-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
; SI-GISEL-NEXT: s_waitcnt vmcnt(0)
; SI-GISEL-NEXT: v_lshrrev_b32_e32 v3, 16, v2
-; SI-GISEL-NEXT: v_add_i32_e32 v2, vcc, 0xffffffc0, v2
-; SI-GISEL-NEXT: v_add_i32_e32 v3, vcc, 0xffffff85, v3
+; SI-GISEL-NEXT: v_subrev_i32_e32 v2, vcc, 64, v2
+; SI-GISEL-NEXT: v_subrev_i32_e32 v3, vcc, 0x7b, v3
; SI-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
; SI-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v3
; SI-GISEL-NEXT: v_lshlrev_b32_e32 v3, 16, v3
@@ -2079,7 +1967,7 @@ define amdgpu_kernel void @v_test_v2i16_x_sub_64_123(ptr addrspace(1) %out, ptr
; VI-GISEL: ; %bb.0:
; VI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
; VI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 2, v0
-; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0xffffff85
+; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0x7b
; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2
; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3
@@ -2091,8 +1979,8 @@ define amdgpu_kernel void @v_test_v2i16_x_sub_64_123(ptr addrspace(1) %out, ptr
; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v2
; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
; VI-GISEL-NEXT: s_waitcnt vmcnt(0)
-; VI-GISEL-NEXT: v_add_u16_e32 v2, 0xffc0, v3
-; VI-GISEL-NEXT: v_add_u16_sdwa v3, v3, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; VI-GISEL-NEXT: v_subrev_u16_e32 v2, 64, v3
+; VI-GISEL-NEXT: v_sub_u16_sdwa v3, v3, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; VI-GISEL-NEXT: v_or_b32_e32 v2, v2, v3
; VI-GISEL-NEXT: flat_store_dword v[0:1], v2
; VI-GISEL-NEXT: s_endpgm
@@ -2186,7 +2074,7 @@ define amdgpu_kernel void @v_test_v2i16_x_sub_7_0(ptr addrspace(1) %out, ptr add
; SI-GISEL-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
; SI-GISEL-NEXT: s_waitcnt vmcnt(0)
; SI-GISEL-NEXT: v_lshrrev_b32_e32 v3, 16, v2
-; SI-GISEL-NEXT: v_add_i32_e32 v2, vcc, -7, v2
+; SI-GISEL-NEXT: v_subrev_i32_e32 v2, vcc, 7, v2
; SI-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
; SI-GISEL-NEXT: v_lshlrev_b32_e32 v3, 16, v3
; SI-GISEL-NEXT: v_or_b32_e32 v2, v2, v3
@@ -2229,7 +2117,7 @@ define amdgpu_kernel void @v_test_v2i16_x_sub_7_0(ptr addrspace(1) %out, ptr add
; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
; VI-GISEL-NEXT: s_waitcnt vmcnt(0)
; VI-GISEL-NEXT: v_lshrrev_b32_e32 v2, 16, v3
-; VI-GISEL-NEXT: v_add_u16_e32 v3, -7, v3
+; VI-GISEL-NEXT: v_subrev_u16_e32 v3, 7, v3
; VI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 16, v2
; VI-GISEL-NEXT: v_or_b32_e32 v2, v3, v2
; VI-GISEL-NEXT: flat_store_dword v[0:1], v2
@@ -2309,7 +2197,7 @@ define amdgpu_kernel void @v_test_v2i16_x_sub_0_16(ptr addrspace(1) %out, ptr ad
; SI-GISEL-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
; SI-GISEL-NEXT: s_waitcnt vmcnt(0)
; SI-GISEL-NEXT: v_lshrrev_b32_e32 v3, 16, v2
-; SI-GISEL-NEXT: v_add_i32_e32 v3, vcc, -16, v3
+; SI-GISEL-NEXT: v_subrev_i32_e32 v3, vcc, 16, v3
; SI-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
; SI-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v3
; SI-GISEL-NEXT: v_lshlrev_b32_e32 v3, 16, v3
@@ -2349,11 +2237,11 @@ define amdgpu_kernel void @v_test_v2i16_x_sub_0_16(ptr addrspace(1) %out, ptr ad
; VI-GISEL-NEXT: flat_load_dword v3, v[0:1]
; VI-GISEL-NEXT: v_mov_b32_e32 v0, s0
; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v2
-; VI-GISEL-NEXT: v_mov_b32_e32 v2, -16
+; VI-GISEL-NEXT: v_mov_b32_e32 v2, 16
; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1
; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
; VI-GISEL-NEXT: s_waitcnt vmcnt(0)
-; VI-GISEL-NEXT: v_add_u16_sdwa v2, v3, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; VI-GISEL-NEXT: v_sub_u16_sdwa v2, v3, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; VI-GISEL-NEXT: v_or_b32_sdwa v2, v3, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; VI-GISEL-NEXT: flat_store_dword v[0:1], v2
; VI-GISEL-NEXT: s_endpgm
@@ -2431,7 +2319,7 @@ define amdgpu_kernel void @v_test_v2i16_x_sub_0_1_0(ptr addrspace(1) %out, ptr a
; SI-GISEL-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
; SI-GISEL-NEXT: s_waitcnt vmcnt(0)
; SI-GISEL-NEXT: v_lshrrev_b32_e32 v3, 16, v2
-; SI-GISEL-NEXT: v_add_i32_e32 v3, vcc, 0x3c00, v3
+; SI-GISEL-NEXT: v_subrev_i32_e32 v3, vcc, 0xffffc400, v3
; SI-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
; SI-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v3
; SI-GISEL-NEXT: v_lshlrev_b32_e32 v3, 16, v3
@@ -2471,11 +2359,11 @@ define amdgpu_kernel void @v_test_v2i16_x_sub_0_1_0(ptr addrspace(1) %out, ptr a
; VI-GISEL-NEXT: flat_load_dword v3, v[0:1]
; VI-GISEL-NEXT: v_mov_b32_e32 v0, s0
; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v2
-; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x3c00
+; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xffffc400
; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1
; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
; VI-GISEL-NEXT: s_waitcnt vmcnt(0)
-; VI-GISEL-NEXT: v_add_u16_sdwa v2, v3, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; VI-GISEL-NEXT: v_sub_u16_sdwa v2, v3, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; VI-GISEL-NEXT: v_or_b32_sdwa v2, v3, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; VI-GISEL-NEXT: flat_store_dword v[0:1], v2
; VI-GISEL-NEXT: s_endpgm
@@ -2566,7 +2454,7 @@ define amdgpu_kernel void @v_test_v2i16_x_sub_0_neg1_0(ptr addrspace(1) %out, pt
; SI-GISEL-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
; SI-GISEL-NEXT: s_waitcnt vmcnt(0)
; SI-GISEL-NEXT: v_lshrrev_b32_e32 v3, 16, v2
-; SI-GISEL-NEXT: v_add_i32_e32 v3, vcc, 0xffffbc00, v3
+; SI-GISEL-NEXT: v_subrev_i32_e32 v3, vcc, 0x4400, v3
; SI-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
; SI-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v3
; SI-GISEL-NEXT: v_lshlrev_b32_e32 v3, 16, v3
@@ -2606,11 +2494,11 @@ define amdgpu_kernel void @v_test_v2i16_x_sub_0_neg1_0(ptr addrspace(1) %out, pt
; VI-GISEL-NEXT: flat_load_dword v3, v[0:1]
; VI-GISEL-NEXT: v_mov_b32_e32 v0, s0
; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v2
-; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xffffbc00
+; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4400
; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1
; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
; VI-GISEL-NEXT: s_waitcnt vmcnt(0)
-; VI-GISEL-NEXT: v_add_u16_sdwa v2, v3, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; VI-GISEL-NEXT: v_sub_u16_sdwa v2, v3, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; VI-GISEL-NEXT: v_or_b32_sdwa v2, v3, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; VI-GISEL-NEXT: flat_store_dword v[0:1], v2
; VI-GISEL-NEXT: s_endpgm
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/alu-roundtrip.ll b/llvm/test/CodeGen/RISCV/GlobalISel/alu-roundtrip.ll
index 14ff9e01ab3bc2f..c503d6541b0a577 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/alu-roundtrip.ll
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/alu-roundtrip.ll
@@ -457,19 +457,19 @@ entry:
define i64 @subi_i64(i64 %a) {
; RV32IM-LABEL: subi_i64:
; RV32IM: # %bb.0: # %entry
-; RV32IM-NEXT: lui a2, 1048275
-; RV32IM-NEXT: addi a2, a2, -1548
-; RV32IM-NEXT: add a0, a0, a2
-; RV32IM-NEXT: sltu a2, a0, a2
-; RV32IM-NEXT: addi a1, a1, -1
-; RV32IM-NEXT: add a1, a1, a2
+; RV32IM-NEXT: lui a2, 301
+; RV32IM-NEXT: addi a3, a2, 1548
+; RV32IM-NEXT: sub a2, a0, a3
+; RV32IM-NEXT: sltu a0, a0, a3
+; RV32IM-NEXT: sub a1, a1, a0
+; RV32IM-NEXT: mv a0, a2
; RV32IM-NEXT: ret
;
; RV64IM-LABEL: subi_i64:
; RV64IM: # %bb.0: # %entry
-; RV64IM-NEXT: lui a1, 1048275
-; RV64IM-NEXT: addiw a1, a1, -1548
-; RV64IM-NEXT: add a0, a0, a1
+; RV64IM-NEXT: lui a1, 301
+; RV64IM-NEXT: addiw a1, a1, 1548
+; RV64IM-NEXT: sub a0, a0, a1
; RV64IM-NEXT: ret
entry:
%0 = sub i64 %a, 1234444
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/alu-rv32.mir b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/alu-rv32.mir
index 39d0ee7c382dfc3..2ef5de501ee7116 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/alu-rv32.mir
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/alu-rv32.mir
@@ -200,9 +200,8 @@ body: |
; RV32I: liveins: $x10
; RV32I-NEXT: {{ $}}
; RV32I-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
- ; RV32I-NEXT: [[ADDI:%[0-9]+]]:gpr = ADDI $x0, -1234
- ; RV32I-NEXT: [[SUB:%[0-9]+]]:gpr = SUB [[COPY]], [[ADDI]]
- ; RV32I-NEXT: $x10 = COPY [[SUB]]
+ ; RV32I-NEXT: [[ADDI:%[0-9]+]]:gpr = ADDI [[COPY]], 1234
+ ; RV32I-NEXT: $x10 = COPY [[ADDI]]
; RV32I-NEXT: PseudoRET implicit $x10
%0:gprb(s32) = COPY $x10
%1:gprb(s32) = G_CONSTANT i32 -1234
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/alu-rv64.mir b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/alu-rv64.mir
index 527036d8b750fcd..be12333e1499b27 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/alu-rv64.mir
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/alu-rv64.mir
@@ -188,9 +188,8 @@ body: |
; RV64I: liveins: $x10
; RV64I-NEXT: {{ $}}
; RV64I-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
- ; RV64I-NEXT: [[ADDI:%[0-9]+]]:gpr = ADDI $x0, -1234
- ; RV64I-NEXT: [[SUBW:%[0-9]+]]:gpr = SUBW [[COPY]], [[ADDI]]
- ; RV64I-NEXT: $x10 = COPY [[SUBW]]
+ ; RV64I-NEXT: [[ADDIW:%[0-9]+]]:gpr = ADDIW [[COPY]], 1234
+ ; RV64I-NEXT: $x10 = COPY [[ADDIW]]
; RV64I-NEXT: PseudoRET implicit $x10
%0:gprb(s64) = COPY $x10
%1:gprb(s32) = G_TRUNC %0(s64)
@@ -441,9 +440,8 @@ body: |
; RV64I: liveins: $x10
; RV64I-NEXT: {{ $}}
; RV64I-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
- ; RV64I-NEXT: [[ADDI:%[0-9]+]]:gpr = ADDI $x0, -1234
- ; RV64I-NEXT: [[SUB:%[0-9]+]]:gpr = SUB [[COPY]], [[ADDI]]
- ; RV64I-NEXT: $x10 = COPY [[SUB]]
+ ; RV64I-NEXT: [[ADDI:%[0-9]+]]:gpr = ADDI [[COPY]], 1234
+ ; RV64I-NEXT: $x10 = COPY [[ADDI]]
; RV64I-NEXT: PseudoRET implicit $x10
%0:gprb(s64) = COPY $x10
%1:gprb(s64) = G_CONSTANT i64 -1234
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/jump-table-brjt-medium-rv64.mir b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/jump-table-brjt-medium-rv64.mir
index d0237892d132f3a..5d980e7721458e9 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/jump-table-brjt-medium-rv64.mir
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/jump-table-brjt-medium-rv64.mir
@@ -111,8 +111,8 @@ body: |
%2:gprb(s64) = G_ASSERT_SEXT %1, 32
%7:gprb(s64) = G_CONSTANT i64 5
%3:gprb(s64) = G_SEXT_INREG %2, 32
- %4:gprb(s64) = G_CONSTANT i64 -1
- %5:gprb(s64) = G_ADD %3, %4
+ %4:gprb(s64) = G_CONSTANT i64 1
+ %5:gprb(s64) = G_SUB %3, %4
%26:gprb(s64) = G_ICMP intpred(ugt), %5(s64), %7
G_BRCOND %26(s64), %bb.8
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/jump-table-brjt-pic-rv32.mir b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/jump-table-brjt-pic-rv32.mir
index 396421a4ba739a2..27fe465ccf696bf 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/jump-table-brjt-pic-rv32.mir
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/jump-table-brjt-pic-rv32.mir
@@ -115,8 +115,8 @@ body: |
%12:gprb(s32) = G_CONSTANT i32 3
%13:gprb(s32) = G_CONSTANT i32 4
%14:gprb(s32) = G_CONSTANT i32 1000
- %1:gprb(s32) = G_CONSTANT i32 -1
- %2:gprb(s32) = G_ADD %0, %1
+ %1:gprb(s32) = G_CONSTANT i32 1
+ %2:gprb(s32) = G_SUB %0, %1
%16:gprb(s32) = G_ICMP intpred(ugt), %2(s32), %4
G_BRCOND %16(s32), %bb.8
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/jump-table-brjt-pic-rv64.mir b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/jump-table-brjt-pic-rv64.mir
index 0a08586bc1af4f1..77156b913c5e8b4 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/jump-table-brjt-pic-rv64.mir
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/jump-table-brjt-pic-rv64.mir
@@ -112,8 +112,8 @@ body: |
%2:gprb(s64) = G_ASSERT_SEXT %1, 32
%7:gprb(s64) = G_CONSTANT i64 5
%3:gprb(s64) = G_SEXT_INREG %2, 32
- %4:gprb(s64) = G_CONSTANT i64 -1
- %5:gprb(s64) = G_ADD %3, %4
+ %4:gprb(s64) = G_CONSTANT i64 1
+ %5:gprb(s64) = G_SUB %3, %4
%26:gprb(s64) = G_ICMP intpred(ugt), %5(s64), %7
G_BRCOND %26(s64), %bb.8
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/jump-table-brjt-rv32.mir b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/jump-table-brjt-rv32.mir
index efa1a6c86027db7..388c238b86eb6f4 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/jump-table-brjt-rv32.mir
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/jump-table-brjt-rv32.mir
@@ -171,8 +171,8 @@ body: |
%12:gprb(s32) = G_CONSTANT i32 3
%13:gprb(s32) = G_CONSTANT i32 4
%14:gprb(s32) = G_CONSTANT i32 1000
- %1:gprb(s32) = G_CONSTANT i32 -1
- %2:gprb(s32) = G_ADD %0, %1
+ %1:gprb(s32) = G_CONSTANT i32 1
+ %2:gprb(s32) = G_SUB %0, %1
%16:gprb(s32) = G_ICMP intpred(ugt), %2(s32), %4
G_BRCOND %16(s32), %bb.8
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/jump-table-brjt-small-rv64.mir b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/jump-table-brjt-small-rv64.mir
index 12b1517e2cfb541..09a855105c26275 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/jump-table-brjt-small-rv64.mir
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/jump-table-brjt-small-rv64.mir
@@ -112,8 +112,8 @@ body: |
%2:gprb(s64) = G_ASSERT_SEXT %1, 32
%7:gprb(s64) = G_CONSTANT i64 5
%3:gprb(s64) = G_SEXT_INREG %2, 32
- %4:gprb(s64) = G_CONSTANT i64 -1
- %5:gprb(s64) = G_ADD %3, %4
+ %4:gprb(s64) = G_CONSTANT i64 1
+ %5:gprb(s64) = G_SUB %3, %4
%26:gprb(s64) = G_ICMP intpred(ugt), %5(s64), %7
G_BRCOND %26(s64), %bb.8
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/rv32zbb-zbkb.ll b/llvm/test/CodeGen/RISCV/GlobalISel/rv32zbb-zbkb.ll
index b9d1ebc54db7474..1b323fe35b8e385 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/rv32zbb-zbkb.ll
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/rv32zbb-zbkb.ll
@@ -143,7 +143,7 @@ define i64 @rol_i64(i64 %a, i64 %b) nounwind {
; CHECK-NEXT: bltu a6, a4, .LBB7_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: li a3, 0
-; CHECK-NEXT: sub a5, a6, a4
+; CHECK-NEXT: addi a5, a6, -32
; CHECK-NEXT: sll a7, a0, a5
; CHECK-NEXT: j .LBB7_3
; CHECK-NEXT: .LBB7_2:
@@ -162,7 +162,7 @@ define i64 @rol_i64(i64 %a, i64 %b) nounwind {
; CHECK-NEXT: andi a6, a5, 63
; CHECK-NEXT: bltu a6, a4, .LBB7_7
; CHECK-NEXT: # %bb.6:
-; CHECK-NEXT: sub a7, a6, a4
+; CHECK-NEXT: addi a7, a6, -32
; CHECK-NEXT: srl a7, a1, a7
; CHECK-NEXT: bnez a6, .LBB7_8
; CHECK-NEXT: j .LBB7_9
@@ -220,7 +220,7 @@ define i64 @ror_i64(i64 %a, i64 %b) nounwind {
; CHECK-NEXT: li a4, 32
; CHECK-NEXT: bltu a5, a4, .LBB9_2
; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: sub a3, a5, a4
+; CHECK-NEXT: addi a3, a5, -32
; CHECK-NEXT: srl a6, a1, a3
; CHECK-NEXT: mv a3, a0
; CHECK-NEXT: bnez a5, .LBB9_3
@@ -235,33 +235,33 @@ define i64 @ror_i64(i64 %a, i64 %b) nounwind {
; CHECK-NEXT: .LBB9_3:
; CHECK-NEXT: mv a3, a6
; CHECK-NEXT: .LBB9_4:
-; CHECK-NEXT: neg a7, a2
+; CHECK-NEXT: neg a6, a2
; CHECK-NEXT: bltu a5, a4, .LBB9_7
; CHECK-NEXT: # %bb.5:
; CHECK-NEXT: li a2, 0
-; CHECK-NEXT: andi a5, a7, 63
+; CHECK-NEXT: andi a5, a6, 63
; CHECK-NEXT: bgeu a5, a4, .LBB9_8
; CHECK-NEXT: .LBB9_6:
-; CHECK-NEXT: sll a6, a0, a7
-; CHECK-NEXT: neg a4, a5
-; CHECK-NEXT: srl a0, a0, a4
-; CHECK-NEXT: sll a4, a1, a7
-; CHECK-NEXT: or a0, a0, a4
+; CHECK-NEXT: sll a4, a0, a6
+; CHECK-NEXT: neg a7, a5
+; CHECK-NEXT: srl a0, a0, a7
+; CHECK-NEXT: sll a6, a1, a6
+; CHECK-NEXT: or a0, a0, a6
; CHECK-NEXT: bnez a5, .LBB9_9
; CHECK-NEXT: j .LBB9_10
; CHECK-NEXT: .LBB9_7:
; CHECK-NEXT: srl a2, a1, a2
-; CHECK-NEXT: andi a5, a7, 63
+; CHECK-NEXT: andi a5, a6, 63
; CHECK-NEXT: bltu a5, a4, .LBB9_6
; CHECK-NEXT: .LBB9_8:
-; CHECK-NEXT: li a6, 0
-; CHECK-NEXT: sub a4, a5, a4
-; CHECK-NEXT: sll a0, a0, a4
+; CHECK-NEXT: li a4, 0
+; CHECK-NEXT: addi a6, a5, -32
+; CHECK-NEXT: sll a0, a0, a6
; CHECK-NEXT: beqz a5, .LBB9_10
; CHECK-NEXT: .LBB9_9:
; CHECK-NEXT: mv a1, a0
; CHECK-NEXT: .LBB9_10:
-; CHECK-NEXT: or a0, a3, a6
+; CHECK-NEXT: or a0, a3, a4
; CHECK-NEXT: or a1, a2, a1
; CHECK-NEXT: ret
%or = tail call i64 @llvm.fshr.i64(i64 %a, i64 %a, i64 %b)
More information about the llvm-commits
mailing list