[llvm] 4033aa1 - AMDGPU/GlobalISel: Sign extend integer constants

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Sun Jul 26 06:41:11 PDT 2020


Author: Matt Arsenault
Date: 2020-07-26T09:30:14-04:00
New Revision: 4033aa1467d6ea0cb5289ed9db2f4e3cc015eac1

URL: https://github.com/llvm/llvm-project/commit/4033aa1467d6ea0cb5289ed9db2f4e3cc015eac1
DIFF: https://github.com/llvm/llvm-project/commit/4033aa1467d6ea0cb5289ed9db2f4e3cc015eac1.diff

LOG: AMDGPU/GlobalISel: Sign extend integer constants

This matches the DAG behavior and fixes immediate folding

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
    llvm/test/CodeGen/AMDGPU/GlobalISel/add.v2i16.ll
    llvm/test/CodeGen/AMDGPU/GlobalISel/divergent-control-flow.ll
    llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-add.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-and.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-constant.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-extract-vector-elt.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-local.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-private.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-or.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ptrmask.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-xor.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.buffer.load.ll
    llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sbfe.ll
    llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ubfe.ll
    llvm/test/CodeGen/AMDGPU/GlobalISel/localizer.ll
    llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll
    llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i32.ll
    llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll
    llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i32.ll
    llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll
    llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll
    llvm/test/CodeGen/AMDGPU/GlobalISel/uaddsat.ll
    llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i32.ll
    llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll
    llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i32.ll
    llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index 4740a5851999..a126ed1daf17 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -1952,7 +1952,7 @@ bool AMDGPUInstructionSelector::selectG_CONSTANT(MachineInstr &I) const {
     const APInt &Imm = ImmOp.getFPImm()->getValueAPF().bitcastToAPInt();
     ImmOp.ChangeToImmediate(Imm.getZExtValue());
   } else if (ImmOp.isCImm()) {
-    ImmOp.ChangeToImmediate(ImmOp.getCImm()->getZExtValue());
+    ImmOp.ChangeToImmediate(ImmOp.getCImm()->getSExtValue());
   }
 
   Register DstReg = I.getOperand(0).getReg();

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/add.v2i16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/add.v2i16.ll
index ccf846a933ab..c6c0eb7c4a93 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/add.v2i16.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/add.v2i16.ll
@@ -91,7 +91,7 @@ define <2 x i16> @v_add_v2i16_neg_inline_imm_splat(<2 x i16> %a) {
 ; GFX9-LABEL: v_add_v2i16_neg_inline_imm_splat:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    s_mov_b32 s4, 0xffffffc0
+; GFX9-NEXT:    s_movk_i32 s4, 0xffc0
 ; GFX9-NEXT:    s_pack_ll_b32_b16 s4, s4, s4
 ; GFX9-NEXT:    v_pk_add_u16 v0, v0, s4
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
@@ -99,7 +99,7 @@ define <2 x i16> @v_add_v2i16_neg_inline_imm_splat(<2 x i16> %a) {
 ; GFX8-LABEL: v_add_v2i16_neg_inline_imm_splat:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    s_mov_b32 s4, 0xffc0
+; GFX8-NEXT:    s_movk_i32 s4, 0xffc0
 ; GFX8-NEXT:    v_mov_b32_e32 v2, s4
 ; GFX8-NEXT:    v_add_u16_e32 v1, s4, v0
 ; GFX8-NEXT:    v_add_u16_sdwa v0, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
@@ -140,7 +140,7 @@ define <2 x i16> @v_add_v2i16_neg_inline_imm_hi(<2 x i16> %a) {
 ; GFX8-LABEL: v_add_v2i16_neg_inline_imm_hi:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    v_mov_b32_e32 v1, 0xffc0
+; GFX8-NEXT:    v_mov_b32_e32 v1, 0xffffffc0
 ; GFX8-NEXT:    v_add_u16_e32 v2, 4, v0
 ; GFX8-NEXT:    v_add_u16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
 ; GFX8-NEXT:    v_or_b32_e32 v0, v2, v0
@@ -152,7 +152,7 @@ define <2 x i16> @v_add_v2i16_neg_inline_imm_hi(<2 x i16> %a) {
 define amdgpu_ps i32 @s_add_v2i16_neg_inline_imm_splat(<2 x i16> inreg %a) {
 ; GFX9-LABEL: s_add_v2i16_neg_inline_imm_splat:
 ; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_mov_b32 s1, 0xffffffc0
+; GFX9-NEXT:    s_movk_i32 s1, 0xffc0
 ; GFX9-NEXT:    s_pack_ll_b32_b16 s1, s1, s1
 ; GFX9-NEXT:    s_lshr_b32 s2, s0, 16
 ; GFX9-NEXT:    s_lshr_b32 s3, s1, 16

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergent-control-flow.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergent-control-flow.ll
index 2695952bfd19..1f9c3bc60876 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergent-control-flow.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergent-control-flow.ll
@@ -135,24 +135,24 @@ define void @constrained_if_register_class() {
 ; CHECK-NEXT:    s_load_dword s4, s[4:5], 0x0
 ; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
 ; CHECK-NEXT:    s_cmp_lg_u32 s4, 0
-; CHECK-NEXT:    s_cselect_b32 s4, 1, 0
-; CHECK-NEXT:    s_xor_b32 s4, s4, 1
-; CHECK-NEXT:    s_and_b32 s4, s4, 1
-; CHECK-NEXT:    s_cmp_lg_u32 s4, 0
+; CHECK-NEXT:    s_cselect_b32 s5, 1, 0
+; CHECK-NEXT:    s_xor_b32 s5, s5, -1
+; CHECK-NEXT:    s_and_b32 s5, s5, 1
+; CHECK-NEXT:    s_mov_b32 s4, -1
+; CHECK-NEXT:    s_cmp_lg_u32 s5, 0
 ; CHECK-NEXT:    s_cbranch_scc0 BB4_6
 ; CHECK-NEXT:  ; %bb.1: ; %bb2
-; CHECK-NEXT:    s_getpc_b64 s[4:5]
-; CHECK-NEXT:    s_add_u32 s4, s4, const.ptr at gotpcrel32@lo+4
-; CHECK-NEXT:    s_addc_u32 s5, s5, const.ptr at gotpcrel32@hi+4
-; CHECK-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x0
-; CHECK-NEXT:    v_cmp_ne_u32_e64 s[6:7], 0, 1
+; CHECK-NEXT:    s_getpc_b64 s[6:7]
+; CHECK-NEXT:    s_add_u32 s6, s6, const.ptr at gotpcrel32@lo+4
+; CHECK-NEXT:    s_addc_u32 s7, s7, const.ptr at gotpcrel32@hi+4
+; CHECK-NEXT:    s_load_dwordx2 s[6:7], s[6:7], 0x0
 ; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
-; CHECK-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x0
+; CHECK-NEXT:    s_load_dwordx2 s[6:7], s[6:7], 0x0
 ; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
-; CHECK-NEXT:    v_mov_b32_e32 v0, s4
-; CHECK-NEXT:    v_mov_b32_e32 v1, s5
+; CHECK-NEXT:    v_mov_b32_e32 v0, s6
+; CHECK-NEXT:    v_mov_b32_e32 v1, s7
 ; CHECK-NEXT:    flat_load_dword v0, v[0:1]
-; CHECK-NEXT:    s_mov_b32 s4, -1
+; CHECK-NEXT:    v_cmp_ne_u32_e64 s[6:7], 0, 1
 ; CHECK-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; CHECK-NEXT:    v_cmp_gt_f32_e32 vcc, 1.0, v0
 ; CHECK-NEXT:    s_xor_b64 s[8:9], vcc, s[6:7]

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-add.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-add.mir
index 51a116a944ad..baed490c0758 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-add.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-add.mir
@@ -94,7 +94,7 @@ body: |
     ; GFX6-LABEL: name: add_neg_inline_const_64_to_sub_s32_v
     ; GFX6: liveins: $vgpr0
     ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-    ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294967232, implicit $exec
+    ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -64, implicit $exec
     ; GFX6: %2:vgpr_32, dead %3:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
     ; GFX6: S_ENDPGM 0, implicit %2
     ; GFX9-LABEL: name: add_neg_inline_const_64_to_sub_s32_v

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-and.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-and.mir
index ecfb9b618f5e..81437acbbbc5 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-and.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-and.mir
@@ -20,7 +20,7 @@ body: |
     ; WAVE64: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
     ; WAVE64: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], implicit $exec
     ; WAVE64: [[V_CMP_EQ_U32_e64_1:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 [[COPY1]], [[V_MOV_B32_e32_]], implicit $exec
-    ; WAVE64: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[V_CMP_EQ_U32_e64_1]]
+    ; WAVE64: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[V_CMP_EQ_U32_e64_1]], implicit-def dead $scc
     ; WAVE64: S_ENDPGM 0, implicit [[S_AND_B64_]]
     ; WAVE32-LABEL: name: and_s1_vcc_vcc_vcc
     ; WAVE32: liveins: $vgpr0, $vgpr1
@@ -30,7 +30,7 @@ body: |
     ; WAVE32: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
     ; WAVE32: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], implicit $exec
     ; WAVE32: [[V_CMP_EQ_U32_e64_1:%[0-9]+]]:sreg_32 = V_CMP_EQ_U32_e64 [[COPY1]], [[V_MOV_B32_e32_]], implicit $exec
-    ; WAVE32: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[V_CMP_EQ_U32_e64_]], [[V_CMP_EQ_U32_e64_1]]
+    ; WAVE32: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[V_CMP_EQ_U32_e64_]], [[V_CMP_EQ_U32_e64_1]], implicit-def dead $scc
     ; WAVE32: S_ENDPGM 0, implicit [[S_AND_B32_]]
     %0:vgpr(s32) = COPY $vgpr0
     %1:vgpr(s32) = COPY $vgpr1
@@ -386,7 +386,7 @@ body: |
     ; WAVE64: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_NE_U32_e64 0, [[V_AND_B32_e32_]], implicit $exec
     ; WAVE64: [[V_AND_B32_e32_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY1]], implicit $exec
     ; WAVE64: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_64 = V_CMP_NE_U32_e64 0, [[V_AND_B32_e32_1]], implicit $exec
-    ; WAVE64: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]]
+    ; WAVE64: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]], implicit-def dead $scc
     ; WAVE64: S_ENDPGM 0, implicit [[S_AND_B64_]]
     ; WAVE32-LABEL: name: and_s1_vcc_copy_to_vcc
     ; WAVE32: liveins: $vgpr0, $vgpr1
@@ -397,7 +397,7 @@ body: |
     ; WAVE32: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_32 = V_CMP_NE_U32_e64 0, [[V_AND_B32_e32_]], implicit $exec
     ; WAVE32: [[V_AND_B32_e32_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY1]], implicit $exec
     ; WAVE32: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_32 = V_CMP_NE_U32_e64 0, [[V_AND_B32_e32_1]], implicit $exec
-    ; WAVE32: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]]
+    ; WAVE32: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]], implicit-def dead $scc
     ; WAVE32: S_ENDPGM 0, implicit [[S_AND_B32_]]
     %0:vgpr(s32) = COPY $vgpr0
     %1:vgpr(s32) = COPY $vgpr1
@@ -425,24 +425,24 @@ body:             |
     ; WAVE64-LABEL: name: copy_select_constrain_vcc_result_reg_wave32
     ; WAVE64: liveins: $vgpr0
     ; WAVE64: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-    ; WAVE64: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1
+    ; WAVE64: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1
     ; WAVE64: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY]], implicit $exec
     ; WAVE64: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_NE_U32_e64 0, [[V_AND_B32_e32_]], implicit $exec
     ; WAVE64: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, [[S_MOV_B32_]], implicit-def $scc
     ; WAVE64: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_64 = V_CMP_NE_U32_e64 0, [[S_AND_B32_]], implicit $exec
-    ; WAVE64: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]]
+    ; WAVE64: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]], implicit-def dead $scc
     ; WAVE64: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY [[S_AND_B64_]]
     ; WAVE64: S_ENDPGM 0, implicit [[COPY1]]
     ; WAVE32-LABEL: name: copy_select_constrain_vcc_result_reg_wave32
     ; WAVE32: liveins: $vgpr0
     ; WAVE32: $vcc_hi = IMPLICIT_DEF
     ; WAVE32: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-    ; WAVE32: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1
+    ; WAVE32: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1
     ; WAVE32: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY]], implicit $exec
     ; WAVE32: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_32 = V_CMP_NE_U32_e64 0, [[V_AND_B32_e32_]], implicit $exec
     ; WAVE32: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, [[S_MOV_B32_]], implicit-def $scc
     ; WAVE32: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_32 = V_CMP_NE_U32_e64 0, [[S_AND_B32_]], implicit $exec
-    ; WAVE32: [[S_AND_B32_1:%[0-9]+]]:sreg_32 = S_AND_B32 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]]
+    ; WAVE32: [[S_AND_B32_1:%[0-9]+]]:sreg_32 = S_AND_B32 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]], implicit-def dead $scc
     ; WAVE32: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY [[S_AND_B32_1]]
     ; WAVE32: S_ENDPGM 0, implicit [[COPY1]]
     %1:vgpr(s32) = COPY $vgpr0
@@ -471,24 +471,24 @@ body:             |
     ; WAVE64-LABEL: name: copy_select_constrain_vcc_result_reg_wave64
     ; WAVE64: liveins: $vgpr0
     ; WAVE64: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-    ; WAVE64: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1
+    ; WAVE64: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1
     ; WAVE64: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY]], implicit $exec
     ; WAVE64: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_NE_U32_e64 0, [[V_AND_B32_e32_]], implicit $exec
     ; WAVE64: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, [[S_MOV_B32_]], implicit-def $scc
     ; WAVE64: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_64 = V_CMP_NE_U32_e64 0, [[S_AND_B32_]], implicit $exec
-    ; WAVE64: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]]
+    ; WAVE64: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]], implicit-def dead $scc
     ; WAVE64: [[COPY1:%[0-9]+]]:sreg_64_xexec = COPY [[S_AND_B64_]]
     ; WAVE64: S_ENDPGM 0, implicit [[COPY1]]
     ; WAVE32-LABEL: name: copy_select_constrain_vcc_result_reg_wave64
     ; WAVE32: liveins: $vgpr0
     ; WAVE32: $vcc_hi = IMPLICIT_DEF
     ; WAVE32: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-    ; WAVE32: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1
+    ; WAVE32: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1
     ; WAVE32: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY]], implicit $exec
     ; WAVE32: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_32 = V_CMP_NE_U32_e64 0, [[V_AND_B32_e32_]], implicit $exec
     ; WAVE32: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, [[S_MOV_B32_]], implicit-def $scc
     ; WAVE32: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_32 = V_CMP_NE_U32_e64 0, [[S_AND_B32_]], implicit $exec
-    ; WAVE32: [[S_AND_B32_1:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]]
+    ; WAVE32: [[S_AND_B32_1:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]], implicit-def dead $scc
     ; WAVE32: [[COPY1:%[0-9]+]]:sreg_64_xexec = COPY [[S_AND_B32_1]]
     ; WAVE32: S_ENDPGM 0, implicit [[COPY1]]
     %1:vgpr(s32) = COPY $vgpr0

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-constant.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-constant.mir
index 2f2c305cfac7..c8762c0d578e 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-constant.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-constant.mir
@@ -12,8 +12,8 @@ body: |
     ; GCN-LABEL: name: constant_v_s32
     ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
     ; GCN: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
-    ; GCN: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294967295, implicit $exec
-    ; GCN: [[V_MOV_B32_e32_3:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294967242, implicit $exec
+    ; GCN: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec
+    ; GCN: [[V_MOV_B32_e32_3:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -54, implicit $exec
     ; GCN: [[V_MOV_B32_e32_4:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 27, implicit $exec
     ; GCN: S_ENDPGM 0, implicit [[V_MOV_B32_e32_]], implicit [[V_MOV_B32_e32_1]], implicit [[V_MOV_B32_e32_2]], implicit [[V_MOV_B32_e32_3]], implicit [[V_MOV_B32_e32_4]]
     %0:vgpr(s32) = G_CONSTANT i32 0
@@ -35,8 +35,8 @@ body: |
     ; GCN-LABEL: name: constant_s_s32
     ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
     ; GCN: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 1
-    ; GCN: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 4294967295
-    ; GCN: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 4294967242
+    ; GCN: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 -1
+    ; GCN: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 -54
     ; GCN: [[S_MOV_B32_4:%[0-9]+]]:sreg_32 = S_MOV_B32 27
     ; GCN: S_ENDPGM 0, implicit [[S_MOV_B32_]], implicit [[S_MOV_B32_1]], implicit [[S_MOV_B32_2]], implicit [[S_MOV_B32_3]], implicit [[S_MOV_B32_4]]
     %0:sgpr(s32) = G_CONSTANT i32 0

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-extract-vector-elt.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-extract-vector-elt.mir
index da730b0c9fa9..10e4cbdc1467 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-extract-vector-elt.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-extract-vector-elt.mir
@@ -315,7 +315,7 @@ body: |
     ; MOVREL-LABEL: name: extract_vector_elt_s_s32_v8s32_idx_offset_m1
     ; MOVREL: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
     ; MOVREL: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8
-    ; MOVREL: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4294967295
+    ; MOVREL: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1
     ; MOVREL: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc
     ; MOVREL: $m0 = COPY [[S_ADD_I32_]]
     ; MOVREL: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]]
@@ -323,7 +323,7 @@ body: |
     ; GPRIDX-LABEL: name: extract_vector_elt_s_s32_v8s32_idx_offset_m1
     ; GPRIDX: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
     ; GPRIDX: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8
-    ; GPRIDX: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4294967295
+    ; GPRIDX: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1
     ; GPRIDX: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc
     ; GPRIDX: $m0 = COPY [[S_ADD_I32_]]
     ; GPRIDX: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]]
@@ -468,7 +468,7 @@ body: |
     ; MOVREL-LABEL: name: extract_vector_elt_s_s64_v8s64_idx_offset_m1
     ; MOVREL: [[COPY:%[0-9]+]]:sgpr_512 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
     ; MOVREL: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8
-    ; MOVREL: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4294967295
+    ; MOVREL: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1
     ; MOVREL: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc
     ; MOVREL: $m0 = COPY [[S_ADD_I32_]]
     ; MOVREL: [[S_MOVRELS_B64_:%[0-9]+]]:sreg_64 = S_MOVRELS_B64 [[COPY]].sub0_sub1, implicit $m0, implicit [[COPY]]
@@ -476,7 +476,7 @@ body: |
     ; GPRIDX-LABEL: name: extract_vector_elt_s_s64_v8s64_idx_offset_m1
     ; GPRIDX: [[COPY:%[0-9]+]]:sgpr_512 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
     ; GPRIDX: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8
-    ; GPRIDX: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4294967295
+    ; GPRIDX: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1
     ; GPRIDX: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc
     ; GPRIDX: $m0 = COPY [[S_ADD_I32_]]
     ; GPRIDX: [[S_MOVRELS_B64_:%[0-9]+]]:sreg_64 = S_MOVRELS_B64 [[COPY]].sub0_sub1, implicit $m0, implicit [[COPY]]
@@ -699,7 +699,7 @@ body: |
     ; MOVREL-LABEL: name: extract_vector_elt_v_s32_v8s32_idx_offset_m1
     ; MOVREL: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
     ; MOVREL: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8
-    ; MOVREL: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4294967295
+    ; MOVREL: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1
     ; MOVREL: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc
     ; MOVREL: $m0 = COPY [[S_ADD_I32_]]
     ; MOVREL: [[V_MOVRELS_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOVRELS_B32_e32 undef [[COPY]].sub0, implicit $m0, implicit $exec, implicit [[COPY]]
@@ -707,7 +707,7 @@ body: |
     ; GPRIDX-LABEL: name: extract_vector_elt_v_s32_v8s32_idx_offset_m1
     ; GPRIDX: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
     ; GPRIDX: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8
-    ; GPRIDX: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4294967295
+    ; GPRIDX: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1
     ; GPRIDX: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc
     ; GPRIDX: S_SET_GPR_IDX_ON [[S_ADD_I32_]], 1, implicit-def $m0, implicit-def $mode, implicit $m0, implicit $mode
     ; GPRIDX: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 undef [[COPY]].sub0, implicit $exec, implicit [[COPY]], implicit $m0

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-local.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-local.mir
index a80ad208b589..45d74ad38e53 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-local.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-local.mir
@@ -681,7 +681,7 @@ body: |
     ; GFX7-LABEL: name: load_local_s32_from_1_gep_m1
     ; GFX7: liveins: $vgpr0
     ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-    ; GFX7: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294967295, implicit $exec
+    ; GFX7: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec
     ; GFX7: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
     ; GFX7: $m0 = S_MOV_B32 -1
     ; GFX7: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 %2, 0, 0, implicit $m0, implicit $exec :: (load 1, addrspace 3)
@@ -689,14 +689,14 @@ body: |
     ; GFX9-LABEL: name: load_local_s32_from_1_gep_m1
     ; GFX9: liveins: $vgpr0
     ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-    ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294967295, implicit $exec
+    ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec
     ; GFX9: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
     ; GFX9: [[DS_READ_U8_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_U8_gfx9 [[V_ADD_U32_e64_]], 0, 0, implicit $exec :: (load 1, addrspace 3)
     ; GFX9: $vgpr0 = COPY [[DS_READ_U8_gfx9_]]
     ; GFX6-LABEL: name: load_local_s32_from_1_gep_m1
     ; GFX6: liveins: $vgpr0
     ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-    ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294967295, implicit $exec
+    ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec
     ; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
     ; GFX6: $m0 = S_MOV_B32 -1
     ; GFX6: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 %2, 0, 0, implicit $m0, implicit $exec :: (load 1, addrspace 3)

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-private.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-private.mir
index 162dd01de66d..9f6d10722143 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-private.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-private.mir
@@ -311,14 +311,14 @@ body: |
     ; GFX6-LABEL: name: load_private_s32_from_1_gep_m2047
     ; GFX6: liveins: $vgpr0
     ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-    ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294965249, implicit $exec
+    ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -2047, implicit $exec
     ; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
     ; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
     ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
     ; GFX9-LABEL: name: load_private_s32_from_1_gep_m2047
     ; GFX9: liveins: $vgpr0
     ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-    ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294965249, implicit $exec
+    ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -2047, implicit $exec
     ; GFX9: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
     ; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
     ; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
@@ -347,14 +347,14 @@ body: |
     ; GFX6-LABEL: name: load_private_s32_from_1_gep_m2048
     ; GFX6: liveins: $vgpr0
     ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-    ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294965248, implicit $exec
+    ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -2048, implicit $exec
     ; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
     ; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
     ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
     ; GFX9-LABEL: name: load_private_s32_from_1_gep_m2048
     ; GFX9: liveins: $vgpr0
     ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-    ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294965248, implicit $exec
+    ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -2048, implicit $exec
     ; GFX9: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
     ; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
     ; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
@@ -453,14 +453,14 @@ body: |
     ; GFX6-LABEL: name: load_private_s32_from_1_gep_m4095
     ; GFX6: liveins: $vgpr0
     ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-    ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294963201, implicit $exec
+    ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -4095, implicit $exec
     ; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
     ; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
     ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
     ; GFX9-LABEL: name: load_private_s32_from_1_gep_m4095
     ; GFX9: liveins: $vgpr0
     ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-    ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294963201, implicit $exec
+    ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -4095, implicit $exec
     ; GFX9: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
     ; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
     ; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
@@ -489,14 +489,14 @@ body: |
     ; GFX6-LABEL: name: load_private_s32_from_1_gep_m4096
     ; GFX6: liveins: $vgpr0
     ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-    ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294963200, implicit $exec
+    ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -4096, implicit $exec
     ; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
     ; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
     ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
     ; GFX9-LABEL: name: load_private_s32_from_1_gep_m4096
     ; GFX9: liveins: $vgpr0
     ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-    ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294963200, implicit $exec
+    ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -4096, implicit $exec
     ; GFX9: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
     ; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
     ; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
@@ -597,14 +597,14 @@ body: |
     ; GFX6-LABEL: name: load_private_s32_from_1_gep_m8191
     ; GFX6: liveins: $vgpr0
     ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-    ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294959105, implicit $exec
+    ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -8191, implicit $exec
     ; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
     ; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
     ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
     ; GFX9-LABEL: name: load_private_s32_from_1_gep_m8191
     ; GFX9: liveins: $vgpr0
     ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-    ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294959105, implicit $exec
+    ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -8191, implicit $exec
     ; GFX9: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
     ; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
     ; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
@@ -633,14 +633,14 @@ body: |
     ; GFX6-LABEL: name: load_private_s32_from_1_gep_m8192
     ; GFX6: liveins: $vgpr0
     ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-    ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294959104, implicit $exec
+    ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -8192, implicit $exec
     ; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
     ; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
     ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
     ; GFX9-LABEL: name: load_private_s32_from_1_gep_m8192
     ; GFX9: liveins: $vgpr0
     ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-    ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294959104, implicit $exec
+    ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -8192, implicit $exec
     ; GFX9: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
     ; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
     ; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
@@ -860,11 +860,11 @@ body: |
   bb.0:
 
     ; GFX6-LABEL: name: load_private_s32_from_neg1
-    ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294967295, implicit $exec
+    ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec
     ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_MOV_B32_e32_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, -1, 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 5)
     ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
     ; GFX9-LABEL: name: load_private_s32_from_neg1
-    ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294967295, implicit $exec
+    ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec
     ; GFX9: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_MOV_B32_e32_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, -1, 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 5)
     ; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
     %0:vgpr(p5) = G_CONSTANT i32 -1

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-or.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-or.mir
index 40b7b69f83b7..7f1f52d2c522 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-or.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-or.mir
@@ -20,7 +20,7 @@ body: |
     ; WAVE64: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
     ; WAVE64: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], implicit $exec
     ; WAVE64: [[V_CMP_EQ_U32_e64_1:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 [[COPY1]], [[V_MOV_B32_e32_]], implicit $exec
-    ; WAVE64: [[S_OR_B64_:%[0-9]+]]:sreg_64 = S_OR_B64 [[V_CMP_EQ_U32_e64_]], [[V_CMP_EQ_U32_e64_1]]
+    ; WAVE64: [[S_OR_B64_:%[0-9]+]]:sreg_64 = S_OR_B64 [[V_CMP_EQ_U32_e64_]], [[V_CMP_EQ_U32_e64_1]], implicit-def dead $scc
     ; WAVE64: S_ENDPGM 0, implicit [[S_OR_B64_]]
     ; WAVE32-LABEL: name: or_s1_vcc_vcc_vcc
     ; WAVE32: liveins: $vgpr0, $vgpr1
@@ -30,7 +30,7 @@ body: |
     ; WAVE32: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
     ; WAVE32: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], implicit $exec
     ; WAVE32: [[V_CMP_EQ_U32_e64_1:%[0-9]+]]:sreg_32 = V_CMP_EQ_U32_e64 [[COPY1]], [[V_MOV_B32_e32_]], implicit $exec
-    ; WAVE32: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[V_CMP_EQ_U32_e64_]], [[V_CMP_EQ_U32_e64_1]]
+    ; WAVE32: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[V_CMP_EQ_U32_e64_]], [[V_CMP_EQ_U32_e64_1]], implicit-def dead $scc
     ; WAVE32: S_ENDPGM 0, implicit [[S_OR_B32_]]
     %0:vgpr(s32) = COPY $vgpr0
     %1:vgpr(s32) = COPY $vgpr1
@@ -386,7 +386,7 @@ body: |
     ; WAVE64: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_NE_U32_e64 0, [[V_AND_B32_e32_]], implicit $exec
     ; WAVE64: [[V_AND_B32_e32_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY1]], implicit $exec
     ; WAVE64: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_64 = V_CMP_NE_U32_e64 0, [[V_AND_B32_e32_1]], implicit $exec
-    ; WAVE64: [[S_OR_B64_:%[0-9]+]]:sreg_64 = S_OR_B64 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]]
+    ; WAVE64: [[S_OR_B64_:%[0-9]+]]:sreg_64 = S_OR_B64 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]], implicit-def dead $scc
     ; WAVE64: S_ENDPGM 0, implicit [[S_OR_B64_]]
     ; WAVE32-LABEL: name: or_s1_vcc_copy_to_vcc
     ; WAVE32: liveins: $vgpr0, $vgpr1
@@ -397,7 +397,7 @@ body: |
     ; WAVE32: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_32 = V_CMP_NE_U32_e64 0, [[V_AND_B32_e32_]], implicit $exec
     ; WAVE32: [[V_AND_B32_e32_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY1]], implicit $exec
     ; WAVE32: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_32 = V_CMP_NE_U32_e64 0, [[V_AND_B32_e32_1]], implicit $exec
-    ; WAVE32: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]]
+    ; WAVE32: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]], implicit-def dead $scc
     ; WAVE32: S_ENDPGM 0, implicit [[S_OR_B32_]]
     %0:vgpr(s32) = COPY $vgpr0
     %1:vgpr(s32) = COPY $vgpr1
@@ -425,24 +425,24 @@ body:             |
     ; WAVE64-LABEL: name: copy_select_constrain_vcc_result_reg_wave32
     ; WAVE64: liveins: $vgpr0
     ; WAVE64: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-    ; WAVE64: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1
+    ; WAVE64: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1
     ; WAVE64: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY]], implicit $exec
     ; WAVE64: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_NE_U32_e64 0, [[V_AND_B32_e32_]], implicit $exec
     ; WAVE64: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, [[S_MOV_B32_]], implicit-def $scc
     ; WAVE64: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_64 = V_CMP_NE_U32_e64 0, [[S_AND_B32_]], implicit $exec
-    ; WAVE64: [[S_OR_B64_:%[0-9]+]]:sreg_64_xexec = S_OR_B64 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]]
+    ; WAVE64: [[S_OR_B64_:%[0-9]+]]:sreg_64_xexec = S_OR_B64 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]], implicit-def dead $scc
     ; WAVE64: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY [[S_OR_B64_]]
     ; WAVE64: S_ENDPGM 0, implicit [[COPY1]]
     ; WAVE32-LABEL: name: copy_select_constrain_vcc_result_reg_wave32
     ; WAVE32: liveins: $vgpr0
     ; WAVE32: $vcc_hi = IMPLICIT_DEF
     ; WAVE32: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-    ; WAVE32: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1
+    ; WAVE32: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1
     ; WAVE32: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY]], implicit $exec
     ; WAVE32: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_32 = V_CMP_NE_U32_e64 0, [[V_AND_B32_e32_]], implicit $exec
     ; WAVE32: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, [[S_MOV_B32_]], implicit-def $scc
     ; WAVE32: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_32 = V_CMP_NE_U32_e64 0, [[S_AND_B32_]], implicit $exec
-    ; WAVE32: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]]
+    ; WAVE32: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]], implicit-def dead $scc
     ; WAVE32: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY [[S_OR_B32_]]
     ; WAVE32: S_ENDPGM 0, implicit [[COPY1]]
     %1:vgpr(s32) = COPY $vgpr0
@@ -471,24 +471,24 @@ body:             |
     ; WAVE64-LABEL: name: copy_select_constrain_vcc_result_reg_wave64
     ; WAVE64: liveins: $vgpr0
     ; WAVE64: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-    ; WAVE64: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1
+    ; WAVE64: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1
     ; WAVE64: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY]], implicit $exec
     ; WAVE64: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_NE_U32_e64 0, [[V_AND_B32_e32_]], implicit $exec
     ; WAVE64: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, [[S_MOV_B32_]], implicit-def $scc
     ; WAVE64: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_64 = V_CMP_NE_U32_e64 0, [[S_AND_B32_]], implicit $exec
-    ; WAVE64: [[S_OR_B64_:%[0-9]+]]:sreg_64 = S_OR_B64 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]]
+    ; WAVE64: [[S_OR_B64_:%[0-9]+]]:sreg_64 = S_OR_B64 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]], implicit-def dead $scc
     ; WAVE64: [[COPY1:%[0-9]+]]:sreg_64_xexec = COPY [[S_OR_B64_]]
     ; WAVE64: S_ENDPGM 0, implicit [[COPY1]]
     ; WAVE32-LABEL: name: copy_select_constrain_vcc_result_reg_wave64
     ; WAVE32: liveins: $vgpr0
     ; WAVE32: $vcc_hi = IMPLICIT_DEF
     ; WAVE32: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-    ; WAVE32: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1
+    ; WAVE32: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1
     ; WAVE32: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY]], implicit $exec
     ; WAVE32: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_32 = V_CMP_NE_U32_e64 0, [[V_AND_B32_e32_]], implicit $exec
     ; WAVE32: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, [[S_MOV_B32_]], implicit-def $scc
     ; WAVE32: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_32 = V_CMP_NE_U32_e64 0, [[S_AND_B32_]], implicit $exec
-    ; WAVE32: [[S_OR_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_OR_B32 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]]
+    ; WAVE32: [[S_OR_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_OR_B32 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]], implicit-def dead $scc
     ; WAVE32: [[COPY1:%[0-9]+]]:sreg_64_xexec = COPY [[S_OR_B32_]]
     ; WAVE32: S_ENDPGM 0, implicit [[COPY1]]
     %1:vgpr(s32) = COPY $vgpr0

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ptrmask.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ptrmask.mir
index bbb885c705ed..a7f875fcdd42 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ptrmask.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ptrmask.mir
@@ -33,7 +33,7 @@ body: |
 
     ; CHECK-LABEL: name: ptrmask_p3_s32_sgpr_sgpr_0xf0f0f0f0
     ; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
-    ; CHECK: %const:sreg_32 = S_MOV_B32 4042322160
+    ; CHECK: %const:sreg_32 = S_MOV_B32 -252645136
     ; CHECK: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], %const, implicit-def $scc
     ; CHECK: S_ENDPGM 0, implicit [[S_AND_B32_]]
     %0:sgpr(p3) = COPY $sgpr0
@@ -54,7 +54,7 @@ body: |
 
     ; CHECK-LABEL: name: ptrmask_p3_s32_sgpr_sgpr_0xffffffff
     ; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
-    ; CHECK: %const:sreg_32 = S_MOV_B32 4294967295
+    ; CHECK: %const:sreg_32 = S_MOV_B32 -1
     ; CHECK: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], %const, implicit-def $scc
     ; CHECK: S_ENDPGM 0, implicit [[S_AND_B32_]]
     %0:sgpr(p3) = COPY $sgpr0
@@ -96,7 +96,7 @@ body: |
 
     ; CHECK-LABEL: name: ptrmask_p3_s32_sgpr_sgpr_clearhi1
     ; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
-    ; CHECK: %const:sreg_32 = S_MOV_B32 2147483648
+    ; CHECK: %const:sreg_32 = S_MOV_B32 -2147483648
     ; CHECK: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], %const, implicit-def $scc
     ; CHECK: S_ENDPGM 0, implicit [[S_AND_B32_]]
     %0:sgpr(p3) = COPY $sgpr0
@@ -117,7 +117,7 @@ body: |
 
     ; CHECK-LABEL: name: ptrmask_p3_s32_sgpr_sgpr_clearhi2
     ; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
-    ; CHECK: %const:sreg_32 = S_MOV_B32 3221225472
+    ; CHECK: %const:sreg_32 = S_MOV_B32 -1073741824
     ; CHECK: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], %const, implicit-def $scc
     ; CHECK: S_ENDPGM 0, implicit [[S_AND_B32_]]
     %0:sgpr(p3) = COPY $sgpr0
@@ -138,7 +138,7 @@ body: |
 
     ; CHECK-LABEL: name: ptrmask_p3_s32_sgpr_sgpr_clearlo1
     ; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
-    ; CHECK: %const:sreg_32 = S_MOV_B32 4294967294
+    ; CHECK: %const:sreg_32 = S_MOV_B32 -2
     ; CHECK: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], %const, implicit-def $scc
     ; CHECK: S_ENDPGM 0, implicit [[S_AND_B32_]]
     %0:sgpr(p3) = COPY $sgpr0
@@ -159,7 +159,7 @@ body: |
 
     ; CHECK-LABEL: name: ptrmask_p3_s32_sgpr_sgpr_clearlo2
     ; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
-    ; CHECK: %const:sreg_32 = S_MOV_B32 4294967292
+    ; CHECK: %const:sreg_32 = S_MOV_B32 -4
     ; CHECK: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], %const, implicit-def $scc
     ; CHECK: S_ENDPGM 0, implicit [[S_AND_B32_]]
     %0:sgpr(p3) = COPY $sgpr0
@@ -180,7 +180,7 @@ body: |
 
     ; CHECK-LABEL: name: ptrmask_p3_s32_sgpr_sgpr_clearlo3
     ; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
-    ; CHECK: %const:sreg_32 = S_MOV_B32 4294967288
+    ; CHECK: %const:sreg_32 = S_MOV_B32 -8
     ; CHECK: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], %const, implicit-def $scc
     ; CHECK: S_ENDPGM 0, implicit [[S_AND_B32_]]
     %0:sgpr(p3) = COPY $sgpr0
@@ -201,7 +201,7 @@ body: |
 
     ; CHECK-LABEL: name: ptrmask_p3_s32_sgpr_sgpr_clearlo4
     ; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
-    ; CHECK: %const:sreg_32 = S_MOV_B32 4294967280
+    ; CHECK: %const:sreg_32 = S_MOV_B32 -16
     ; CHECK: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], %const, implicit-def $scc
     ; CHECK: S_ENDPGM 0, implicit [[S_AND_B32_]]
     %0:sgpr(p3) = COPY $sgpr0
@@ -222,7 +222,7 @@ body: |
 
     ; CHECK-LABEL: name: ptrmask_p3_s32_sgpr_sgpr_clearlo29
     ; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
-    ; CHECK: %const:sreg_32 = S_MOV_B32 3758096384
+    ; CHECK: %const:sreg_32 = S_MOV_B32 -536870912
     ; CHECK: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], %const, implicit-def $scc
     ; CHECK: S_ENDPGM 0, implicit [[S_AND_B32_]]
     %0:sgpr(p3) = COPY $sgpr0
@@ -560,7 +560,7 @@ body: |
 
     ; CHECK-LABEL: name: ptrmask_p3_vgpr_vgpr_0xf0f0f0f0
     ; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-    ; CHECK: %const:vgpr_32 = V_MOV_B32_e32 4042322160, implicit $exec
+    ; CHECK: %const:vgpr_32 = V_MOV_B32_e32 -252645136, implicit $exec
     ; CHECK: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY]], %const, implicit $exec
     ; CHECK: S_ENDPGM 0, implicit [[V_AND_B32_e64_]]
     %0:vgpr(p3) = COPY $vgpr0
@@ -581,7 +581,7 @@ body: |
 
     ; CHECK-LABEL: name: ptrmask_p3_vgpr_vgpr_clearlo1
     ; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-    ; CHECK: %const:vgpr_32 = V_MOV_B32_e32 4294967294, implicit $exec
+    ; CHECK: %const:vgpr_32 = V_MOV_B32_e32 -2, implicit $exec
     ; CHECK: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY]], %const, implicit $exec
     ; CHECK: S_ENDPGM 0, implicit [[V_AND_B32_e64_]]
     %0:vgpr(p3) = COPY $vgpr0
@@ -602,7 +602,7 @@ body: |
 
     ; CHECK-LABEL: name: ptrmask_p3_vgpr_vgpr_clearlo2
     ; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-    ; CHECK: %const:vgpr_32 = V_MOV_B32_e32 4294967292, implicit $exec
+    ; CHECK: %const:vgpr_32 = V_MOV_B32_e32 -4, implicit $exec
     ; CHECK: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY]], %const, implicit $exec
     ; CHECK: S_ENDPGM 0, implicit [[V_AND_B32_e64_]]
     %0:vgpr(p3) = COPY $vgpr0
@@ -623,7 +623,7 @@ body: |
 
     ; CHECK-LABEL: name: ptrmask_p3_vgpr_vgpr_clearlo3
     ; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-    ; CHECK: %const:vgpr_32 = V_MOV_B32_e32 4294967288, implicit $exec
+    ; CHECK: %const:vgpr_32 = V_MOV_B32_e32 -8, implicit $exec
     ; CHECK: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY]], %const, implicit $exec
     ; CHECK: S_ENDPGM 0, implicit [[V_AND_B32_e64_]]
     %0:vgpr(p3) = COPY $vgpr0
@@ -644,7 +644,7 @@ body: |
 
     ; CHECK-LABEL: name: ptrmask_p3_vgpr_vgpr_clearlo4
     ; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-    ; CHECK: %const:vgpr_32 = V_MOV_B32_e32 4294967280, implicit $exec
+    ; CHECK: %const:vgpr_32 = V_MOV_B32_e32 -16, implicit $exec
     ; CHECK: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY]], %const, implicit $exec
     ; CHECK: S_ENDPGM 0, implicit [[V_AND_B32_e64_]]
     %0:vgpr(p3) = COPY $vgpr0
@@ -665,7 +665,7 @@ body: |
 
     ; CHECK-LABEL: name: ptrmask_p3_vgpr_vgpr_clearlo29
     ; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-    ; CHECK: %const:vgpr_32 = V_MOV_B32_e32 3758096384, implicit $exec
+    ; CHECK: %const:vgpr_32 = V_MOV_B32_e32 -536870912, implicit $exec
     ; CHECK: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY]], %const, implicit $exec
     ; CHECK: S_ENDPGM 0, implicit [[V_AND_B32_e64_]]
     %0:vgpr(p3) = COPY $vgpr0

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-xor.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-xor.mir
index 1c03557cc495..f923a4c9f02b 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-xor.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-xor.mir
@@ -20,7 +20,7 @@ body: |
     ; WAVE64: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
     ; WAVE64: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], implicit $exec
     ; WAVE64: [[V_CMP_EQ_U32_e64_1:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 [[COPY1]], [[V_MOV_B32_e32_]], implicit $exec
-    ; WAVE64: [[S_XOR_B64_:%[0-9]+]]:sreg_64 = S_XOR_B64 [[V_CMP_EQ_U32_e64_]], [[V_CMP_EQ_U32_e64_1]]
+    ; WAVE64: [[S_XOR_B64_:%[0-9]+]]:sreg_64 = S_XOR_B64 [[V_CMP_EQ_U32_e64_]], [[V_CMP_EQ_U32_e64_1]], implicit-def dead $scc
     ; WAVE64: S_ENDPGM 0, implicit [[S_XOR_B64_]]
     ; WAVE32-LABEL: name: xor_s1_vcc_vcc_vcc
     ; WAVE32: liveins: $vgpr0, $vgpr1
@@ -30,7 +30,7 @@ body: |
     ; WAVE32: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
     ; WAVE32: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], implicit $exec
     ; WAVE32: [[V_CMP_EQ_U32_e64_1:%[0-9]+]]:sreg_32 = V_CMP_EQ_U32_e64 [[COPY1]], [[V_MOV_B32_e32_]], implicit $exec
-    ; WAVE32: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[V_CMP_EQ_U32_e64_]], [[V_CMP_EQ_U32_e64_1]]
+    ; WAVE32: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[V_CMP_EQ_U32_e64_]], [[V_CMP_EQ_U32_e64_1]], implicit-def dead $scc
     ; WAVE32: S_ENDPGM 0, implicit [[S_XOR_B32_]]
     %0:vgpr(s32) = COPY $vgpr0
     %1:vgpr(s32) = COPY $vgpr1
@@ -387,7 +387,7 @@ body: |
     ; WAVE64: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_NE_U32_e64 0, [[V_AND_B32_e32_]], implicit $exec
     ; WAVE64: [[V_AND_B32_e32_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY1]], implicit $exec
     ; WAVE64: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_64 = V_CMP_NE_U32_e64 0, [[V_AND_B32_e32_1]], implicit $exec
-    ; WAVE64: [[S_XOR_B64_:%[0-9]+]]:sreg_64 = S_XOR_B64 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]]
+    ; WAVE64: [[S_XOR_B64_:%[0-9]+]]:sreg_64 = S_XOR_B64 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]], implicit-def dead $scc
     ; WAVE64: S_ENDPGM 0, implicit [[S_XOR_B64_]]
     ; WAVE32-LABEL: name: xor_s1_vcc_copy_to_vcc
     ; WAVE32: liveins: $vgpr0, $vgpr1
@@ -398,7 +398,7 @@ body: |
     ; WAVE32: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_32 = V_CMP_NE_U32_e64 0, [[V_AND_B32_e32_]], implicit $exec
     ; WAVE32: [[V_AND_B32_e32_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY1]], implicit $exec
     ; WAVE32: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_32 = V_CMP_NE_U32_e64 0, [[V_AND_B32_e32_1]], implicit $exec
-    ; WAVE32: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]]
+    ; WAVE32: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]], implicit-def dead $scc
     ; WAVE32: S_ENDPGM 0, implicit [[S_XOR_B32_]]
     %0:vgpr(s32) = COPY $vgpr0
     %1:vgpr(s32) = COPY $vgpr1
@@ -426,24 +426,24 @@ body:             |
     ; WAVE64-LABEL: name: copy_select_constrain_vcc_result_reg_wave32
     ; WAVE64: liveins: $vgpr0
     ; WAVE64: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-    ; WAVE64: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1
+    ; WAVE64: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1
     ; WAVE64: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY]], implicit $exec
     ; WAVE64: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_NE_U32_e64 0, [[V_AND_B32_e32_]], implicit $exec
     ; WAVE64: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, [[S_MOV_B32_]], implicit-def $scc
     ; WAVE64: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_64 = V_CMP_NE_U32_e64 0, [[S_AND_B32_]], implicit $exec
-    ; WAVE64: [[S_XOR_B64_:%[0-9]+]]:sreg_64_xexec = S_XOR_B64 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]]
+    ; WAVE64: [[S_XOR_B64_:%[0-9]+]]:sreg_64_xexec = S_XOR_B64 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]], implicit-def dead $scc
     ; WAVE64: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY [[S_XOR_B64_]]
     ; WAVE64: S_ENDPGM 0, implicit [[COPY1]]
     ; WAVE32-LABEL: name: copy_select_constrain_vcc_result_reg_wave32
     ; WAVE32: liveins: $vgpr0
     ; WAVE32: $vcc_hi = IMPLICIT_DEF
     ; WAVE32: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-    ; WAVE32: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1
+    ; WAVE32: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1
     ; WAVE32: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY]], implicit $exec
     ; WAVE32: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_32 = V_CMP_NE_U32_e64 0, [[V_AND_B32_e32_]], implicit $exec
     ; WAVE32: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, [[S_MOV_B32_]], implicit-def $scc
     ; WAVE32: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_32 = V_CMP_NE_U32_e64 0, [[S_AND_B32_]], implicit $exec
-    ; WAVE32: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]]
+    ; WAVE32: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]], implicit-def dead $scc
     ; WAVE32: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY [[S_XOR_B32_]]
     ; WAVE32: S_ENDPGM 0, implicit [[COPY1]]
     %1:vgpr(s32) = COPY $vgpr0
@@ -472,24 +472,24 @@ body:             |
     ; WAVE64-LABEL: name: copy_select_constrain_vcc_result_reg_wave64
     ; WAVE64: liveins: $vgpr0
     ; WAVE64: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-    ; WAVE64: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1
+    ; WAVE64: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1
     ; WAVE64: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY]], implicit $exec
     ; WAVE64: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_NE_U32_e64 0, [[V_AND_B32_e32_]], implicit $exec
     ; WAVE64: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, [[S_MOV_B32_]], implicit-def $scc
     ; WAVE64: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_64 = V_CMP_NE_U32_e64 0, [[S_AND_B32_]], implicit $exec
-    ; WAVE64: [[S_XOR_B64_:%[0-9]+]]:sreg_64 = S_XOR_B64 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]]
+    ; WAVE64: [[S_XOR_B64_:%[0-9]+]]:sreg_64 = S_XOR_B64 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]], implicit-def dead $scc
     ; WAVE64: [[COPY1:%[0-9]+]]:sreg_64_xexec = COPY [[S_XOR_B64_]]
     ; WAVE64: S_ENDPGM 0, implicit [[COPY1]]
     ; WAVE32-LABEL: name: copy_select_constrain_vcc_result_reg_wave64
     ; WAVE32: liveins: $vgpr0
     ; WAVE32: $vcc_hi = IMPLICIT_DEF
     ; WAVE32: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-    ; WAVE32: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1
+    ; WAVE32: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1
     ; WAVE32: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY]], implicit $exec
     ; WAVE32: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_32 = V_CMP_NE_U32_e64 0, [[V_AND_B32_e32_]], implicit $exec
     ; WAVE32: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, [[S_MOV_B32_]], implicit-def $scc
     ; WAVE32: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_32 = V_CMP_NE_U32_e64 0, [[S_AND_B32_]], implicit $exec
-    ; WAVE32: [[S_XOR_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_XOR_B32 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]]
+    ; WAVE32: [[S_XOR_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_XOR_B32 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]], implicit-def dead $scc
     ; WAVE32: [[COPY1:%[0-9]+]]:sreg_64_xexec = COPY [[S_XOR_B32_]]
     ; WAVE32: S_ENDPGM 0, implicit [[COPY1]]
     %1:vgpr(s32) = COPY $vgpr0

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.buffer.load.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.buffer.load.ll
index 66425c27a19f..aba47890f61c 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.buffer.load.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.buffer.load.ll
@@ -973,7 +973,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_neg1(<4 x i32> inreg %desc) {
   ; GFX6:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
   ; GFX6:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
   ; GFX6:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
-  ; GFX6:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4294967295
+  ; GFX6:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1
   ; GFX6:   [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4)
   ; GFX6:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
   ; GFX6:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
@@ -987,7 +987,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_neg1(<4 x i32> inreg %desc) {
   ; GFX7:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
   ; GFX7:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
   ; GFX7:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
-  ; GFX7:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4294967295
+  ; GFX7:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1
   ; GFX7:   [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4)
   ; GFX7:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
   ; GFX7:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
@@ -1001,7 +1001,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_neg1(<4 x i32> inreg %desc) {
   ; GFX8:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
   ; GFX8:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
   ; GFX8:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
-  ; GFX8:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4294967295
+  ; GFX8:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1
   ; GFX8:   [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4)
   ; GFX8:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
   ; GFX8:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
@@ -1020,7 +1020,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_neg4(<4 x i32> inreg %desc) {
   ; GFX6:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
   ; GFX6:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
   ; GFX6:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
-  ; GFX6:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4294967292
+  ; GFX6:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -4
   ; GFX6:   [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4)
   ; GFX6:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
   ; GFX6:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
@@ -1047,7 +1047,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_neg4(<4 x i32> inreg %desc) {
   ; GFX8:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
   ; GFX8:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
   ; GFX8:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
-  ; GFX8:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4294967292
+  ; GFX8:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -4
   ; GFX8:   [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4)
   ; GFX8:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
   ; GFX8:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
@@ -1066,7 +1066,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_neg8(<4 x i32> inreg %desc) {
   ; GFX6:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
   ; GFX6:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
   ; GFX6:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
-  ; GFX6:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4294967288
+  ; GFX6:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -8
   ; GFX6:   [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4)
   ; GFX6:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
   ; GFX6:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
@@ -1093,7 +1093,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_neg8(<4 x i32> inreg %desc) {
   ; GFX8:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
   ; GFX8:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
   ; GFX8:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
-  ; GFX8:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4294967288
+  ; GFX8:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -8
   ; GFX8:   [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4)
   ; GFX8:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
   ; GFX8:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
@@ -1112,7 +1112,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_bit31(<4 x i32> inreg %desc) {
   ; GFX6:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
   ; GFX6:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
   ; GFX6:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
-  ; GFX6:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648
+  ; GFX6:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648
   ; GFX6:   [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4)
   ; GFX6:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
   ; GFX6:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
@@ -1139,7 +1139,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_bit31(<4 x i32> inreg %desc) {
   ; GFX8:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
   ; GFX8:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
   ; GFX8:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
-  ; GFX8:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648
+  ; GFX8:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648
   ; GFX8:   [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4)
   ; GFX8:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
   ; GFX8:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
@@ -1342,7 +1342,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_neg_bit20(<4 x i32> inreg %desc)
   ; GFX6:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
   ; GFX6:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
   ; GFX6:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
-  ; GFX6:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4293918720
+  ; GFX6:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1048576
   ; GFX6:   [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4)
   ; GFX6:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
   ; GFX6:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
@@ -1369,7 +1369,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_neg_bit20(<4 x i32> inreg %desc)
   ; GFX8:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
   ; GFX8:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
   ; GFX8:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
-  ; GFX8:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4293918720
+  ; GFX8:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1048576
   ; GFX8:   [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4)
   ; GFX8:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
   ; GFX8:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
@@ -1433,7 +1433,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_neg_bit19(<4 x i32> inreg %desc)
   ; GFX6:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
   ; GFX6:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
   ; GFX6:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
-  ; GFX6:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4294443008
+  ; GFX6:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -524288
   ; GFX6:   [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4)
   ; GFX6:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
   ; GFX6:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
@@ -1460,7 +1460,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_neg_bit19(<4 x i32> inreg %desc)
   ; GFX8:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
   ; GFX8:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
   ; GFX8:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
-  ; GFX8:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4294443008
+  ; GFX8:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -524288
   ; GFX8:   [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0 :: (dereferenceable invariant load 4)
   ; GFX8:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
   ; GFX8:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sbfe.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sbfe.ll
index 7d116f8e8925..e5d26476e942 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sbfe.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sbfe.ll
@@ -414,8 +414,8 @@ define amdgpu_kernel void @bfe_i32_constant_fold_test_4(i32 addrspace(1)* %out)
 ; GFX6-LABEL: bfe_i32_constant_fold_test_4:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
-; GFX6-NEXT:    s_bfe_i32 s3, -1, 0x10000
-; GFX6-NEXT:    v_mov_b32_e32 v0, s3
+; GFX6-NEXT:    s_bfe_i32 s2, -1, 0x10000
+; GFX6-NEXT:    v_mov_b32_e32 v0, s2
 ; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_mov_b32 s3, 0xf000
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
@@ -617,8 +617,8 @@ define amdgpu_kernel void @bfe_i32_constant_fold_test_16(i32 addrspace(1)* %out)
 ; GFX6-LABEL: bfe_i32_constant_fold_test_16:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
-; GFX6-NEXT:    s_bfe_i32 s3, -1, 0x70001
-; GFX6-NEXT:    v_mov_b32_e32 v0, s3
+; GFX6-NEXT:    s_bfe_i32 s2, -1, 0x70001
+; GFX6-NEXT:    v_mov_b32_e32 v0, s2
 ; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_mov_b32 s3, 0xf000
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ubfe.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ubfe.ll
index a5737e8233af..6fc1cd575308 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ubfe.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ubfe.ll
@@ -694,8 +694,8 @@ define amdgpu_kernel void @bfe_u32_constant_fold_test_4(i32 addrspace(1)* %out)
 ; GFX6-LABEL: bfe_u32_constant_fold_test_4:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
-; GFX6-NEXT:    s_bfe_u32 s3, -1, 0x10000
-; GFX6-NEXT:    v_mov_b32_e32 v0, s3
+; GFX6-NEXT:    s_bfe_u32 s2, -1, 0x10000
+; GFX6-NEXT:    v_mov_b32_e32 v0, s2
 ; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_mov_b32 s3, 0xf000
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
@@ -897,8 +897,8 @@ define amdgpu_kernel void @bfe_u32_constant_fold_test_16(i32 addrspace(1)* %out)
 ; GFX6-LABEL: bfe_u32_constant_fold_test_16:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
-; GFX6-NEXT:    s_bfe_u32 s3, -1, 0x70001
-; GFX6-NEXT:    v_mov_b32_e32 v0, s3
+; GFX6-NEXT:    s_bfe_u32 s2, -1, 0x70001
+; GFX6-NEXT:    v_mov_b32_e32 v0, s2
 ; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_mov_b32 s3, 0xf000
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/localizer.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/localizer.ll
index 06bf7f794930..23398f8ecf6e 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/localizer.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/localizer.ll
@@ -8,9 +8,9 @@ define amdgpu_kernel void @localize_constants(i1 %cond) {
 ; GFX9-LABEL: localize_constants:
 ; GFX9:       ; %bb.0: ; %entry
 ; GFX9-NEXT:    s_load_dword s1, s[4:5], 0x0
-; GFX9-NEXT:    s_mov_b32 s0, 1
+; GFX9-NEXT:    s_mov_b32 s0, -1
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    s_xor_b32 s1, s1, 1
+; GFX9-NEXT:    s_xor_b32 s1, s1, -1
 ; GFX9-NEXT:    s_and_b32 s1, s1, 1
 ; GFX9-NEXT:    s_cmp_lg_u32 s1, 0
 ; GFX9-NEXT:    s_cbranch_scc0 BB0_2
@@ -83,9 +83,9 @@ define amdgpu_kernel void @localize_globals(i1 %cond) {
 ; GFX9-LABEL: localize_globals:
 ; GFX9:       ; %bb.0: ; %entry
 ; GFX9-NEXT:    s_load_dword s1, s[4:5], 0x0
-; GFX9-NEXT:    s_mov_b32 s0, 1
+; GFX9-NEXT:    s_mov_b32 s0, -1
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    s_xor_b32 s1, s1, 1
+; GFX9-NEXT:    s_xor_b32 s1, s1, -1
 ; GFX9-NEXT:    s_and_b32 s1, s1, 1
 ; GFX9-NEXT:    s_cmp_lg_u32 s1, 0
 ; GFX9-NEXT:    s_cbranch_scc0 BB1_2

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll
index 80ff9bb2b575..b2e7f1ea326f 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll
@@ -101,7 +101,7 @@ define amdgpu_ps i7 @s_saddsat_i7(i7 inreg %lhs, i7 inreg %rhs) {
 ; GFX8-NEXT:    s_sub_i32 s5, 0x7fff, s5
 ; GFX8-NEXT:    s_cmp_lt_i32 s3, s4
 ; GFX8-NEXT:    s_cselect_b32 s3, s3, s4
-; GFX8-NEXT:    s_sub_i32 s3, 0x8000, s3
+; GFX8-NEXT:    s_sub_i32 s3, 0xffff8000, s3
 ; GFX8-NEXT:    s_sext_i32_i16 s3, s3
 ; GFX8-NEXT:    s_sext_i32_i16 s1, s1
 ; GFX8-NEXT:    s_cmp_gt_i32 s3, s1
@@ -127,7 +127,7 @@ define amdgpu_ps i7 @s_saddsat_i7(i7 inreg %lhs, i7 inreg %rhs) {
 ; GFX9-NEXT:    s_sub_i32 s5, 0x7fff, s5
 ; GFX9-NEXT:    s_cmp_lt_i32 s3, s4
 ; GFX9-NEXT:    s_cselect_b32 s3, s3, s4
-; GFX9-NEXT:    s_sub_i32 s3, 0x8000, s3
+; GFX9-NEXT:    s_sub_i32 s3, 0xffff8000, s3
 ; GFX9-NEXT:    s_sext_i32_i16 s3, s3
 ; GFX9-NEXT:    s_sext_i32_i16 s1, s1
 ; GFX9-NEXT:    s_cmp_gt_i32 s3, s1
@@ -155,7 +155,7 @@ define amdgpu_ps i7 @s_saddsat_i7(i7 inreg %lhs, i7 inreg %rhs) {
 ; GFX10-NEXT:    s_sub_i32 s5, 0x7fff, s5
 ; GFX10-NEXT:    s_cmp_lt_i32 s3, s4
 ; GFX10-NEXT:    s_cselect_b32 s3, s3, s4
-; GFX10-NEXT:    s_sub_i32 s3, 0x8000, s3
+; GFX10-NEXT:    s_sub_i32 s3, 0xffff8000, s3
 ; GFX10-NEXT:    s_sext_i32_i16 s3, s3
 ; GFX10-NEXT:    s_cmp_gt_i32 s3, s1
 ; GFX10-NEXT:    s_cselect_b32 s1, s3, s1
@@ -268,7 +268,7 @@ define amdgpu_ps i8 @s_saddsat_i8(i8 inreg %lhs, i8 inreg %rhs) {
 ; GFX8-NEXT:    s_sub_i32 s5, 0x7fff, s5
 ; GFX8-NEXT:    s_cmp_lt_i32 s3, s4
 ; GFX8-NEXT:    s_cselect_b32 s3, s3, s4
-; GFX8-NEXT:    s_sub_i32 s3, 0x8000, s3
+; GFX8-NEXT:    s_sub_i32 s3, 0xffff8000, s3
 ; GFX8-NEXT:    s_sext_i32_i16 s3, s3
 ; GFX8-NEXT:    s_sext_i32_i16 s1, s1
 ; GFX8-NEXT:    s_cmp_gt_i32 s3, s1
@@ -294,7 +294,7 @@ define amdgpu_ps i8 @s_saddsat_i8(i8 inreg %lhs, i8 inreg %rhs) {
 ; GFX9-NEXT:    s_sub_i32 s5, 0x7fff, s5
 ; GFX9-NEXT:    s_cmp_lt_i32 s3, s4
 ; GFX9-NEXT:    s_cselect_b32 s3, s3, s4
-; GFX9-NEXT:    s_sub_i32 s3, 0x8000, s3
+; GFX9-NEXT:    s_sub_i32 s3, 0xffff8000, s3
 ; GFX9-NEXT:    s_sext_i32_i16 s3, s3
 ; GFX9-NEXT:    s_sext_i32_i16 s1, s1
 ; GFX9-NEXT:    s_cmp_gt_i32 s3, s1
@@ -322,7 +322,7 @@ define amdgpu_ps i8 @s_saddsat_i8(i8 inreg %lhs, i8 inreg %rhs) {
 ; GFX10-NEXT:    s_sub_i32 s5, 0x7fff, s5
 ; GFX10-NEXT:    s_cmp_lt_i32 s3, s4
 ; GFX10-NEXT:    s_cselect_b32 s3, s3, s4
-; GFX10-NEXT:    s_sub_i32 s3, 0x8000, s3
+; GFX10-NEXT:    s_sub_i32 s3, 0xffff8000, s3
 ; GFX10-NEXT:    s_sext_i32_i16 s3, s3
 ; GFX10-NEXT:    s_cmp_gt_i32 s3, s1
 ; GFX10-NEXT:    s_cselect_b32 s1, s3, s1
@@ -344,7 +344,7 @@ define i16 @v_saddsat_v2i8(i16 %lhs.arg, i16 %rhs.arg) {
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX6-NEXT:    v_lshrrev_b32_e32 v2, 8, v0
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v0, 24, v0
-; GFX6-NEXT:    s_mov_b32 s5, 0x80000000
+; GFX6-NEXT:    s_brev_b32 s5, 1
 ; GFX6-NEXT:    v_min_i32_e32 v5, 0, v0
 ; GFX6-NEXT:    v_lshrrev_b32_e32 v3, 8, v1
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 24, v1
@@ -379,7 +379,7 @@ define i16 @v_saddsat_v2i8(i16 %lhs.arg, i16 %rhs.arg) {
 ; GFX8-NEXT:    v_mov_b32_e32 v2, 8
 ; GFX8-NEXT:    v_lshrrev_b32_sdwa v3, v2, v0 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
 ; GFX8-NEXT:    v_lshlrev_b16_e32 v0, 8, v0
-; GFX8-NEXT:    s_mov_b32 s5, 0x8000
+; GFX8-NEXT:    s_movk_i32 s5, 0x8000
 ; GFX8-NEXT:    v_min_i16_e32 v5, 0, v0
 ; GFX8-NEXT:    v_lshrrev_b32_sdwa v2, v2, v1 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
 ; GFX8-NEXT:    v_lshlrev_b16_e32 v1, 8, v1
@@ -409,7 +409,7 @@ define i16 @v_saddsat_v2i8(i16 %lhs.arg, i16 %rhs.arg) {
 ; GFX9-NEXT:    s_mov_b32 s4, 8
 ; GFX9-NEXT:    v_lshrrev_b32_sdwa v2, s4, v0 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
 ; GFX9-NEXT:    v_lshlrev_b16_e32 v0, 8, v0
-; GFX9-NEXT:    s_mov_b32 s5, 0x8000
+; GFX9-NEXT:    s_movk_i32 s5, 0x8000
 ; GFX9-NEXT:    v_min_i16_e32 v5, 0, v0
 ; GFX9-NEXT:    v_lshrrev_b32_sdwa v3, s4, v1 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
 ; GFX9-NEXT:    v_lshlrev_b16_e32 v1, 8, v1
@@ -440,7 +440,7 @@ define i16 @v_saddsat_v2i8(i16 %lhs.arg, i16 %rhs.arg) {
 ; GFX10-NEXT:    s_mov_b32 s4, 8
 ; GFX10-NEXT:    v_lshlrev_b16_e64 v2, 8, v0
 ; GFX10-NEXT:    v_lshrrev_b32_sdwa v0, s4, v0 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX10-NEXT:    s_mov_b32 s5, 0x8000
+; GFX10-NEXT:    s_movk_i32 s5, 0x8000
 ; GFX10-NEXT:    v_lshrrev_b32_sdwa v3, s4, v1 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
 ; GFX10-NEXT:    v_lshlrev_b16_e64 v1, 8, v1
 ; GFX10-NEXT:    v_min_i16_e64 v4, v2, 0
@@ -483,7 +483,7 @@ define amdgpu_ps i16 @s_saddsat_v2i8(i16 inreg %lhs.arg, i16 inreg %rhs.arg) {
 ; GFX6-NEXT:    s_cselect_b32 s6, s0, 0
 ; GFX6-NEXT:    s_sub_i32 s6, s4, s6
 ; GFX6-NEXT:    s_cmp_lt_i32 s0, 0
-; GFX6-NEXT:    s_mov_b32 s5, 0x80000000
+; GFX6-NEXT:    s_brev_b32 s5, 1
 ; GFX6-NEXT:    s_cselect_b32 s7, s0, 0
 ; GFX6-NEXT:    s_sub_i32 s7, s5, s7
 ; GFX6-NEXT:    s_cmp_gt_i32 s7, s1
@@ -527,7 +527,7 @@ define amdgpu_ps i16 @s_saddsat_v2i8(i16 inreg %lhs.arg, i16 inreg %rhs.arg) {
 ; GFX8-NEXT:    s_cselect_b32 s9, s7, s8
 ; GFX8-NEXT:    s_sub_i32 s9, s5, s9
 ; GFX8-NEXT:    s_cmp_lt_i32 s7, s8
-; GFX8-NEXT:    s_mov_b32 s6, 0x8000
+; GFX8-NEXT:    s_movk_i32 s6, 0x8000
 ; GFX8-NEXT:    s_cselect_b32 s7, s7, s8
 ; GFX8-NEXT:    s_sub_i32 s7, s6, s7
 ; GFX8-NEXT:    s_sext_i32_i16 s7, s7
@@ -582,7 +582,7 @@ define amdgpu_ps i16 @s_saddsat_v2i8(i16 inreg %lhs.arg, i16 inreg %rhs.arg) {
 ; GFX9-NEXT:    s_cselect_b32 s9, s7, s8
 ; GFX9-NEXT:    s_sub_i32 s9, s5, s9
 ; GFX9-NEXT:    s_cmp_lt_i32 s7, s8
-; GFX9-NEXT:    s_mov_b32 s6, 0x8000
+; GFX9-NEXT:    s_movk_i32 s6, 0x8000
 ; GFX9-NEXT:    s_cselect_b32 s7, s7, s8
 ; GFX9-NEXT:    s_sub_i32 s7, s6, s7
 ; GFX9-NEXT:    s_sext_i32_i16 s7, s7
@@ -635,7 +635,7 @@ define amdgpu_ps i16 @s_saddsat_v2i8(i16 inreg %lhs.arg, i16 inreg %rhs.arg) {
 ; GFX10-NEXT:    s_cmp_gt_i32 s5, s6
 ; GFX10-NEXT:    s_movk_i32 s7, 0x7fff
 ; GFX10-NEXT:    s_cselect_b32 s8, s5, s6
-; GFX10-NEXT:    s_mov_b32 s9, 0x8000
+; GFX10-NEXT:    s_movk_i32 s9, 0x8000
 ; GFX10-NEXT:    s_sub_i32 s8, s7, s8
 ; GFX10-NEXT:    s_cmp_lt_i32 s5, s6
 ; GFX10-NEXT:    s_sext_i32_i16 s1, s1
@@ -693,7 +693,7 @@ define i32 @v_saddsat_v4i8(i32 %lhs.arg, i32 %rhs.arg) {
 ; GFX6-NEXT:    v_lshrrev_b32_e32 v3, 16, v0
 ; GFX6-NEXT:    v_lshrrev_b32_e32 v4, 24, v0
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v0, 24, v0
-; GFX6-NEXT:    s_mov_b32 s5, 0x80000000
+; GFX6-NEXT:    s_brev_b32 s5, 1
 ; GFX6-NEXT:    v_min_i32_e32 v10, 0, v0
 ; GFX6-NEXT:    v_lshrrev_b32_e32 v5, 8, v1
 ; GFX6-NEXT:    v_lshrrev_b32_e32 v6, 16, v1
@@ -726,7 +726,7 @@ define i32 @v_saddsat_v4i8(i32 %lhs.arg, i32 %rhs.arg) {
 ; GFX6-NEXT:    v_min_i32_e32 v3, v3, v5
 ; GFX6-NEXT:    v_add_i32_e32 v2, vcc, v2, v3
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v3, 24, v4
-; GFX6-NEXT:    v_mov_b32_e32 v11, 0x80000000
+; GFX6-NEXT:    v_bfrev_b32_e32 v11, 1
 ; GFX6-NEXT:    v_min_i32_e32 v6, 0, v3
 ; GFX6-NEXT:    v_max_i32_e32 v5, 0, v3
 ; GFX6-NEXT:    v_ashrrev_i32_e32 v1, 24, v1
@@ -760,7 +760,7 @@ define i32 @v_saddsat_v4i8(i32 %lhs.arg, i32 %rhs.arg) {
 ; GFX8-NEXT:    v_lshrrev_b32_e32 v4, 16, v0
 ; GFX8-NEXT:    v_lshrrev_b32_e32 v5, 24, v0
 ; GFX8-NEXT:    v_lshlrev_b16_e32 v0, 8, v0
-; GFX8-NEXT:    s_mov_b32 s5, 0x8000
+; GFX8-NEXT:    s_movk_i32 s5, 0x8000
 ; GFX8-NEXT:    v_min_i16_e32 v10, 0, v0
 ; GFX8-NEXT:    v_lshrrev_b32_sdwa v2, v2, v1 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
 ; GFX8-NEXT:    v_lshrrev_b32_e32 v6, 16, v1
@@ -818,7 +818,7 @@ define i32 @v_saddsat_v4i8(i32 %lhs.arg, i32 %rhs.arg) {
 ; GFX9-NEXT:    v_lshrrev_b32_e32 v3, 16, v0
 ; GFX9-NEXT:    v_lshrrev_b32_e32 v4, 24, v0
 ; GFX9-NEXT:    v_lshlrev_b16_e32 v0, 8, v0
-; GFX9-NEXT:    s_mov_b32 s5, 0x8000
+; GFX9-NEXT:    s_movk_i32 s5, 0x8000
 ; GFX9-NEXT:    v_min_i16_e32 v10, 0, v0
 ; GFX9-NEXT:    v_lshrrev_b32_sdwa v5, s4, v1 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
 ; GFX9-NEXT:    v_lshrrev_b32_e32 v6, 16, v1
@@ -878,7 +878,7 @@ define i32 @v_saddsat_v4i8(i32 %lhs.arg, i32 %rhs.arg) {
 ; GFX10-NEXT:    v_lshrrev_b32_sdwa v3, s5, v0 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
 ; GFX10-NEXT:    v_min_i16_e64 v8, v4, 0
 ; GFX10-NEXT:    v_lshrrev_b32_sdwa v6, s5, v1 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX10-NEXT:    s_mov_b32 s5, 0x8000
+; GFX10-NEXT:    s_movk_i32 s5, 0x8000
 ; GFX10-NEXT:    v_min_i16_e64 v9, v2, 0
 ; GFX10-NEXT:    v_lshlrev_b16_e64 v7, 8, v1
 ; GFX10-NEXT:    v_sub_nc_u16_e64 v8, s5, v8
@@ -946,7 +946,7 @@ define amdgpu_ps i32 @s_saddsat_v4i8(i32 inreg %lhs.arg, i32 inreg %rhs.arg) {
 ; GFX6-NEXT:    s_cselect_b32 s10, s0, 0
 ; GFX6-NEXT:    s_sub_i32 s10, s8, s10
 ; GFX6-NEXT:    s_cmp_lt_i32 s0, 0
-; GFX6-NEXT:    s_mov_b32 s9, 0x80000000
+; GFX6-NEXT:    s_brev_b32 s9, 1
 ; GFX6-NEXT:    s_cselect_b32 s11, s0, 0
 ; GFX6-NEXT:    s_sub_i32 s11, s9, s11
 ; GFX6-NEXT:    s_cmp_gt_i32 s11, s1
@@ -1028,7 +1028,7 @@ define amdgpu_ps i32 @s_saddsat_v4i8(i32 inreg %lhs.arg, i32 inreg %rhs.arg) {
 ; GFX8-NEXT:    s_cselect_b32 s13, s11, s12
 ; GFX8-NEXT:    s_sub_i32 s13, s9, s13
 ; GFX8-NEXT:    s_cmp_lt_i32 s11, s12
-; GFX8-NEXT:    s_mov_b32 s10, 0x8000
+; GFX8-NEXT:    s_movk_i32 s10, 0x8000
 ; GFX8-NEXT:    s_cselect_b32 s11, s11, s12
 ; GFX8-NEXT:    s_sub_i32 s11, s10, s11
 ; GFX8-NEXT:    s_sext_i32_i16 s11, s11
@@ -1133,7 +1133,7 @@ define amdgpu_ps i32 @s_saddsat_v4i8(i32 inreg %lhs.arg, i32 inreg %rhs.arg) {
 ; GFX9-NEXT:    s_cselect_b32 s13, s11, s12
 ; GFX9-NEXT:    s_sub_i32 s13, s9, s13
 ; GFX9-NEXT:    s_cmp_lt_i32 s11, s12
-; GFX9-NEXT:    s_mov_b32 s10, 0x8000
+; GFX9-NEXT:    s_movk_i32 s10, 0x8000
 ; GFX9-NEXT:    s_cselect_b32 s11, s11, s12
 ; GFX9-NEXT:    s_sub_i32 s11, s10, s11
 ; GFX9-NEXT:    s_sext_i32_i16 s11, s11
@@ -1236,7 +1236,7 @@ define amdgpu_ps i32 @s_saddsat_v4i8(i32 inreg %lhs.arg, i32 inreg %rhs.arg) {
 ; GFX10-NEXT:    s_cmp_gt_i32 s9, s10
 ; GFX10-NEXT:    s_movk_i32 s11, 0x7fff
 ; GFX10-NEXT:    s_cselect_b32 s12, s9, s10
-; GFX10-NEXT:    s_mov_b32 s13, 0x8000
+; GFX10-NEXT:    s_movk_i32 s13, 0x8000
 ; GFX10-NEXT:    s_sub_i32 s12, s11, s12
 ; GFX10-NEXT:    s_cmp_lt_i32 s9, s10
 ; GFX10-NEXT:    s_sext_i32_i16 s1, s1
@@ -1723,7 +1723,7 @@ define <2 x i32> @v_saddsat_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
 ; GFX6-LABEL: v_saddsat_v2i32:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX6-NEXT:    s_mov_b32 s5, 0x80000000
+; GFX6-NEXT:    s_brev_b32 s5, 1
 ; GFX6-NEXT:    v_min_i32_e32 v5, 0, v0
 ; GFX6-NEXT:    v_sub_i32_e32 v5, vcc, s5, v5
 ; GFX6-NEXT:    s_brev_b32 s4, -2
@@ -1744,7 +1744,7 @@ define <2 x i32> @v_saddsat_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
 ; GFX8-LABEL: v_saddsat_v2i32:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    s_mov_b32 s5, 0x80000000
+; GFX8-NEXT:    s_brev_b32 s5, 1
 ; GFX8-NEXT:    v_min_i32_e32 v5, 0, v0
 ; GFX8-NEXT:    v_sub_u32_e32 v5, vcc, s5, v5
 ; GFX8-NEXT:    s_brev_b32 s4, -2
@@ -1765,7 +1765,7 @@ define <2 x i32> @v_saddsat_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
 ; GFX9-LABEL: v_saddsat_v2i32:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    s_mov_b32 s5, 0x80000000
+; GFX9-NEXT:    s_brev_b32 s5, 1
 ; GFX9-NEXT:    v_min_i32_e32 v5, 0, v0
 ; GFX9-NEXT:    v_sub_u32_e32 v5, s5, v5
 ; GFX9-NEXT:    s_brev_b32 s4, -2
@@ -1789,7 +1789,7 @@ define <2 x i32> @v_saddsat_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
 ; GFX10-NEXT:    v_min_i32_e32 v4, 0, v0
 ; GFX10-NEXT:    v_min_i32_e32 v5, 0, v1
-; GFX10-NEXT:    s_mov_b32 s4, 0x80000000
+; GFX10-NEXT:    s_brev_b32 s4, 1
 ; GFX10-NEXT:    v_max_i32_e32 v6, 0, v0
 ; GFX10-NEXT:    v_max_i32_e32 v7, 0, v1
 ; GFX10-NEXT:    v_sub_nc_u32_e32 v4, s4, v4
@@ -1817,7 +1817,7 @@ define amdgpu_ps <2 x i32> @s_saddsat_v2i32(<2 x i32> inreg %lhs, <2 x i32> inre
 ; GFX6-NEXT:    s_cselect_b32 s6, s0, 0
 ; GFX6-NEXT:    s_sub_i32 s6, s4, s6
 ; GFX6-NEXT:    s_cmp_lt_i32 s0, 0
-; GFX6-NEXT:    s_mov_b32 s5, 0x80000000
+; GFX6-NEXT:    s_brev_b32 s5, 1
 ; GFX6-NEXT:    s_cselect_b32 s7, s0, 0
 ; GFX6-NEXT:    s_sub_i32 s7, s5, s7
 ; GFX6-NEXT:    s_cmp_gt_i32 s7, s2
@@ -1845,7 +1845,7 @@ define amdgpu_ps <2 x i32> @s_saddsat_v2i32(<2 x i32> inreg %lhs, <2 x i32> inre
 ; GFX8-NEXT:    s_cselect_b32 s6, s0, 0
 ; GFX8-NEXT:    s_sub_i32 s6, s4, s6
 ; GFX8-NEXT:    s_cmp_lt_i32 s0, 0
-; GFX8-NEXT:    s_mov_b32 s5, 0x80000000
+; GFX8-NEXT:    s_brev_b32 s5, 1
 ; GFX8-NEXT:    s_cselect_b32 s7, s0, 0
 ; GFX8-NEXT:    s_sub_i32 s7, s5, s7
 ; GFX8-NEXT:    s_cmp_gt_i32 s7, s2
@@ -1873,7 +1873,7 @@ define amdgpu_ps <2 x i32> @s_saddsat_v2i32(<2 x i32> inreg %lhs, <2 x i32> inre
 ; GFX9-NEXT:    s_cselect_b32 s6, s0, 0
 ; GFX9-NEXT:    s_sub_i32 s6, s4, s6
 ; GFX9-NEXT:    s_cmp_lt_i32 s0, 0
-; GFX9-NEXT:    s_mov_b32 s5, 0x80000000
+; GFX9-NEXT:    s_brev_b32 s5, 1
 ; GFX9-NEXT:    s_cselect_b32 s7, s0, 0
 ; GFX9-NEXT:    s_sub_i32 s7, s5, s7
 ; GFX9-NEXT:    s_cmp_gt_i32 s7, s2
@@ -1899,7 +1899,7 @@ define amdgpu_ps <2 x i32> @s_saddsat_v2i32(<2 x i32> inreg %lhs, <2 x i32> inre
 ; GFX10-NEXT:    s_cmp_gt_i32 s0, 0
 ; GFX10-NEXT:    s_brev_b32 s4, -2
 ; GFX10-NEXT:    s_cselect_b32 s5, s0, 0
-; GFX10-NEXT:    s_mov_b32 s6, 0x80000000
+; GFX10-NEXT:    s_brev_b32 s6, 1
 ; GFX10-NEXT:    s_sub_i32 s5, s4, s5
 ; GFX10-NEXT:    s_cmp_lt_i32 s0, 0
 ; GFX10-NEXT:    ; implicit-def: $vcc_hi
@@ -1930,7 +1930,7 @@ define <3 x i32> @v_saddsat_v3i32(<3 x i32> %lhs, <3 x i32> %rhs) {
 ; GFX6-LABEL: v_saddsat_v3i32:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX6-NEXT:    s_mov_b32 s5, 0x80000000
+; GFX6-NEXT:    s_brev_b32 s5, 1
 ; GFX6-NEXT:    v_min_i32_e32 v7, 0, v0
 ; GFX6-NEXT:    v_sub_i32_e32 v7, vcc, s5, v7
 ; GFX6-NEXT:    s_brev_b32 s4, -2
@@ -1958,7 +1958,7 @@ define <3 x i32> @v_saddsat_v3i32(<3 x i32> %lhs, <3 x i32> %rhs) {
 ; GFX8-LABEL: v_saddsat_v3i32:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    s_mov_b32 s5, 0x80000000
+; GFX8-NEXT:    s_brev_b32 s5, 1
 ; GFX8-NEXT:    v_min_i32_e32 v7, 0, v0
 ; GFX8-NEXT:    v_sub_u32_e32 v7, vcc, s5, v7
 ; GFX8-NEXT:    s_brev_b32 s4, -2
@@ -1986,7 +1986,7 @@ define <3 x i32> @v_saddsat_v3i32(<3 x i32> %lhs, <3 x i32> %rhs) {
 ; GFX9-LABEL: v_saddsat_v3i32:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    s_mov_b32 s5, 0x80000000
+; GFX9-NEXT:    s_brev_b32 s5, 1
 ; GFX9-NEXT:    v_min_i32_e32 v7, 0, v0
 ; GFX9-NEXT:    v_sub_u32_e32 v7, s5, v7
 ; GFX9-NEXT:    s_brev_b32 s4, -2
@@ -2018,7 +2018,7 @@ define <3 x i32> @v_saddsat_v3i32(<3 x i32> %lhs, <3 x i32> %rhs) {
 ; GFX10-NEXT:    v_min_i32_e32 v7, 0, v0
 ; GFX10-NEXT:    v_min_i32_e32 v8, 0, v1
 ; GFX10-NEXT:    v_min_i32_e32 v9, 0, v2
-; GFX10-NEXT:    s_mov_b32 s5, 0x80000000
+; GFX10-NEXT:    s_brev_b32 s5, 1
 ; GFX10-NEXT:    v_max_i32_e32 v6, 0, v0
 ; GFX10-NEXT:    v_sub_nc_u32_e32 v14, s5, v7
 ; GFX10-NEXT:    v_sub_nc_u32_e32 v15, s5, v8
@@ -2052,7 +2052,7 @@ define amdgpu_ps <3 x i32> @s_saddsat_v3i32(<3 x i32> inreg %lhs, <3 x i32> inre
 ; GFX6-NEXT:    s_cselect_b32 s8, s0, 0
 ; GFX6-NEXT:    s_sub_i32 s8, s6, s8
 ; GFX6-NEXT:    s_cmp_lt_i32 s0, 0
-; GFX6-NEXT:    s_mov_b32 s7, 0x80000000
+; GFX6-NEXT:    s_brev_b32 s7, 1
 ; GFX6-NEXT:    s_cselect_b32 s9, s0, 0
 ; GFX6-NEXT:    s_sub_i32 s9, s7, s9
 ; GFX6-NEXT:    s_cmp_gt_i32 s9, s3
@@ -2091,7 +2091,7 @@ define amdgpu_ps <3 x i32> @s_saddsat_v3i32(<3 x i32> inreg %lhs, <3 x i32> inre
 ; GFX8-NEXT:    s_cselect_b32 s8, s0, 0
 ; GFX8-NEXT:    s_sub_i32 s8, s6, s8
 ; GFX8-NEXT:    s_cmp_lt_i32 s0, 0
-; GFX8-NEXT:    s_mov_b32 s7, 0x80000000
+; GFX8-NEXT:    s_brev_b32 s7, 1
 ; GFX8-NEXT:    s_cselect_b32 s9, s0, 0
 ; GFX8-NEXT:    s_sub_i32 s9, s7, s9
 ; GFX8-NEXT:    s_cmp_gt_i32 s9, s3
@@ -2130,7 +2130,7 @@ define amdgpu_ps <3 x i32> @s_saddsat_v3i32(<3 x i32> inreg %lhs, <3 x i32> inre
 ; GFX9-NEXT:    s_cselect_b32 s8, s0, 0
 ; GFX9-NEXT:    s_sub_i32 s8, s6, s8
 ; GFX9-NEXT:    s_cmp_lt_i32 s0, 0
-; GFX9-NEXT:    s_mov_b32 s7, 0x80000000
+; GFX9-NEXT:    s_brev_b32 s7, 1
 ; GFX9-NEXT:    s_cselect_b32 s9, s0, 0
 ; GFX9-NEXT:    s_sub_i32 s9, s7, s9
 ; GFX9-NEXT:    s_cmp_gt_i32 s9, s3
@@ -2167,7 +2167,7 @@ define amdgpu_ps <3 x i32> @s_saddsat_v3i32(<3 x i32> inreg %lhs, <3 x i32> inre
 ; GFX10-NEXT:    s_cmp_gt_i32 s0, 0
 ; GFX10-NEXT:    s_brev_b32 s6, -2
 ; GFX10-NEXT:    s_cselect_b32 s7, s0, 0
-; GFX10-NEXT:    s_mov_b32 s8, 0x80000000
+; GFX10-NEXT:    s_brev_b32 s8, 1
 ; GFX10-NEXT:    s_sub_i32 s7, s6, s7
 ; GFX10-NEXT:    s_cmp_lt_i32 s0, 0
 ; GFX10-NEXT:    ; implicit-def: $vcc_hi
@@ -2209,7 +2209,7 @@ define <4 x i32> @v_saddsat_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
 ; GFX6-LABEL: v_saddsat_v4i32:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX6-NEXT:    s_mov_b32 s5, 0x80000000
+; GFX6-NEXT:    s_brev_b32 s5, 1
 ; GFX6-NEXT:    v_min_i32_e32 v9, 0, v0
 ; GFX6-NEXT:    v_sub_i32_e32 v9, vcc, s5, v9
 ; GFX6-NEXT:    s_brev_b32 s4, -2
@@ -2244,7 +2244,7 @@ define <4 x i32> @v_saddsat_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
 ; GFX8-LABEL: v_saddsat_v4i32:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    s_mov_b32 s5, 0x80000000
+; GFX8-NEXT:    s_brev_b32 s5, 1
 ; GFX8-NEXT:    v_min_i32_e32 v9, 0, v0
 ; GFX8-NEXT:    v_sub_u32_e32 v9, vcc, s5, v9
 ; GFX8-NEXT:    s_brev_b32 s4, -2
@@ -2279,7 +2279,7 @@ define <4 x i32> @v_saddsat_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
 ; GFX9-LABEL: v_saddsat_v4i32:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    s_mov_b32 s5, 0x80000000
+; GFX9-NEXT:    s_brev_b32 s5, 1
 ; GFX9-NEXT:    v_min_i32_e32 v9, 0, v0
 ; GFX9-NEXT:    v_sub_u32_e32 v9, s5, v9
 ; GFX9-NEXT:    s_brev_b32 s4, -2
@@ -2316,7 +2316,7 @@ define <4 x i32> @v_saddsat_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
 ; GFX10-NEXT:    v_min_i32_e32 v8, 0, v0
-; GFX10-NEXT:    s_mov_b32 s4, 0x80000000
+; GFX10-NEXT:    s_brev_b32 s4, 1
 ; GFX10-NEXT:    v_min_i32_e32 v11, 0, v1
 ; GFX10-NEXT:    v_min_i32_e32 v12, 0, v3
 ; GFX10-NEXT:    v_max_i32_e32 v9, 0, v0
@@ -2359,7 +2359,7 @@ define amdgpu_ps <4 x i32> @s_saddsat_v4i32(<4 x i32> inreg %lhs, <4 x i32> inre
 ; GFX6-NEXT:    s_cselect_b32 s10, s0, 0
 ; GFX6-NEXT:    s_sub_i32 s10, s8, s10
 ; GFX6-NEXT:    s_cmp_lt_i32 s0, 0
-; GFX6-NEXT:    s_mov_b32 s9, 0x80000000
+; GFX6-NEXT:    s_brev_b32 s9, 1
 ; GFX6-NEXT:    s_cselect_b32 s11, s0, 0
 ; GFX6-NEXT:    s_sub_i32 s11, s9, s11
 ; GFX6-NEXT:    s_cmp_gt_i32 s11, s4
@@ -2409,7 +2409,7 @@ define amdgpu_ps <4 x i32> @s_saddsat_v4i32(<4 x i32> inreg %lhs, <4 x i32> inre
 ; GFX8-NEXT:    s_cselect_b32 s10, s0, 0
 ; GFX8-NEXT:    s_sub_i32 s10, s8, s10
 ; GFX8-NEXT:    s_cmp_lt_i32 s0, 0
-; GFX8-NEXT:    s_mov_b32 s9, 0x80000000
+; GFX8-NEXT:    s_brev_b32 s9, 1
 ; GFX8-NEXT:    s_cselect_b32 s11, s0, 0
 ; GFX8-NEXT:    s_sub_i32 s11, s9, s11
 ; GFX8-NEXT:    s_cmp_gt_i32 s11, s4
@@ -2459,7 +2459,7 @@ define amdgpu_ps <4 x i32> @s_saddsat_v4i32(<4 x i32> inreg %lhs, <4 x i32> inre
 ; GFX9-NEXT:    s_cselect_b32 s10, s0, 0
 ; GFX9-NEXT:    s_sub_i32 s10, s8, s10
 ; GFX9-NEXT:    s_cmp_lt_i32 s0, 0
-; GFX9-NEXT:    s_mov_b32 s9, 0x80000000
+; GFX9-NEXT:    s_brev_b32 s9, 1
 ; GFX9-NEXT:    s_cselect_b32 s11, s0, 0
 ; GFX9-NEXT:    s_sub_i32 s11, s9, s11
 ; GFX9-NEXT:    s_cmp_gt_i32 s11, s4
@@ -2507,7 +2507,7 @@ define amdgpu_ps <4 x i32> @s_saddsat_v4i32(<4 x i32> inreg %lhs, <4 x i32> inre
 ; GFX10-NEXT:    s_cmp_gt_i32 s0, 0
 ; GFX10-NEXT:    s_brev_b32 s8, -2
 ; GFX10-NEXT:    s_cselect_b32 s9, s0, 0
-; GFX10-NEXT:    s_mov_b32 s10, 0x80000000
+; GFX10-NEXT:    s_brev_b32 s10, 1
 ; GFX10-NEXT:    s_sub_i32 s9, s8, s9
 ; GFX10-NEXT:    s_cmp_lt_i32 s0, 0
 ; GFX10-NEXT:    ; implicit-def: $vcc_hi
@@ -2560,7 +2560,7 @@ define <5 x i32> @v_saddsat_v5i32(<5 x i32> %lhs, <5 x i32> %rhs) {
 ; GFX6-LABEL: v_saddsat_v5i32:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX6-NEXT:    s_mov_b32 s5, 0x80000000
+; GFX6-NEXT:    s_brev_b32 s5, 1
 ; GFX6-NEXT:    v_min_i32_e32 v12, 0, v0
 ; GFX6-NEXT:    v_sub_i32_e32 v12, vcc, s5, v12
 ; GFX6-NEXT:    s_brev_b32 s4, -2
@@ -2582,7 +2582,7 @@ define <5 x i32> @v_saddsat_v5i32(<5 x i32> %lhs, <5 x i32> %rhs) {
 ; GFX6-NEXT:    v_sub_i32_e32 v5, vcc, s4, v5
 ; GFX6-NEXT:    v_max_i32_e32 v6, v6, v7
 ; GFX6-NEXT:    v_min_i32_e32 v5, v6, v5
-; GFX6-NEXT:    v_mov_b32_e32 v13, 0x80000000
+; GFX6-NEXT:    v_bfrev_b32_e32 v13, 1
 ; GFX6-NEXT:    v_min_i32_e32 v6, 0, v3
 ; GFX6-NEXT:    v_sub_i32_e32 v6, vcc, v13, v6
 ; GFX6-NEXT:    v_add_i32_e32 v2, vcc, v2, v5
@@ -2604,7 +2604,7 @@ define <5 x i32> @v_saddsat_v5i32(<5 x i32> %lhs, <5 x i32> %rhs) {
 ; GFX8-LABEL: v_saddsat_v5i32:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    s_mov_b32 s5, 0x80000000
+; GFX8-NEXT:    s_brev_b32 s5, 1
 ; GFX8-NEXT:    v_min_i32_e32 v12, 0, v0
 ; GFX8-NEXT:    v_sub_u32_e32 v12, vcc, s5, v12
 ; GFX8-NEXT:    s_brev_b32 s4, -2
@@ -2626,7 +2626,7 @@ define <5 x i32> @v_saddsat_v5i32(<5 x i32> %lhs, <5 x i32> %rhs) {
 ; GFX8-NEXT:    v_sub_u32_e32 v5, vcc, s4, v5
 ; GFX8-NEXT:    v_max_i32_e32 v6, v6, v7
 ; GFX8-NEXT:    v_min_i32_e32 v5, v6, v5
-; GFX8-NEXT:    v_mov_b32_e32 v13, 0x80000000
+; GFX8-NEXT:    v_bfrev_b32_e32 v13, 1
 ; GFX8-NEXT:    v_min_i32_e32 v6, 0, v3
 ; GFX8-NEXT:    v_sub_u32_e32 v6, vcc, v13, v6
 ; GFX8-NEXT:    v_add_u32_e32 v2, vcc, v2, v5
@@ -2648,7 +2648,7 @@ define <5 x i32> @v_saddsat_v5i32(<5 x i32> %lhs, <5 x i32> %rhs) {
 ; GFX9-LABEL: v_saddsat_v5i32:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    s_mov_b32 s5, 0x80000000
+; GFX9-NEXT:    s_brev_b32 s5, 1
 ; GFX9-NEXT:    v_min_i32_e32 v12, 0, v0
 ; GFX9-NEXT:    v_sub_u32_e32 v12, s5, v12
 ; GFX9-NEXT:    s_brev_b32 s4, -2
@@ -2670,7 +2670,7 @@ define <5 x i32> @v_saddsat_v5i32(<5 x i32> %lhs, <5 x i32> %rhs) {
 ; GFX9-NEXT:    v_sub_u32_e32 v5, s4, v5
 ; GFX9-NEXT:    v_max_i32_e32 v6, v6, v7
 ; GFX9-NEXT:    v_min_i32_e32 v5, v6, v5
-; GFX9-NEXT:    v_mov_b32_e32 v13, 0x80000000
+; GFX9-NEXT:    v_bfrev_b32_e32 v13, 1
 ; GFX9-NEXT:    v_min_i32_e32 v6, 0, v3
 ; GFX9-NEXT:    v_sub_u32_e32 v6, v13, v6
 ; GFX9-NEXT:    v_add_u32_e32 v2, v2, v5
@@ -2694,10 +2694,10 @@ define <5 x i32> @v_saddsat_v5i32(<5 x i32> %lhs, <5 x i32> %rhs) {
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
 ; GFX10-NEXT:    v_min_i32_e32 v13, 0, v1
-; GFX10-NEXT:    s_mov_b32 s5, 0x80000000
+; GFX10-NEXT:    s_brev_b32 s5, 1
 ; GFX10-NEXT:    v_min_i32_e32 v10, 0, v0
 ; GFX10-NEXT:    v_min_i32_e32 v16, 0, v2
-; GFX10-NEXT:    v_mov_b32_e32 v15, 0x80000000
+; GFX10-NEXT:    v_bfrev_b32_e32 v15, 1
 ; GFX10-NEXT:    v_sub_nc_u32_e32 v13, s5, v13
 ; GFX10-NEXT:    v_min_i32_e32 v17, 0, v4
 ; GFX10-NEXT:    v_sub_nc_u32_e32 v10, s5, v10
@@ -2746,7 +2746,7 @@ define amdgpu_ps <5 x i32> @s_saddsat_v5i32(<5 x i32> inreg %lhs, <5 x i32> inre
 ; GFX6-NEXT:    s_cselect_b32 s12, s0, 0
 ; GFX6-NEXT:    s_sub_i32 s12, s10, s12
 ; GFX6-NEXT:    s_cmp_lt_i32 s0, 0
-; GFX6-NEXT:    s_mov_b32 s11, 0x80000000
+; GFX6-NEXT:    s_brev_b32 s11, 1
 ; GFX6-NEXT:    s_cselect_b32 s13, s0, 0
 ; GFX6-NEXT:    s_sub_i32 s13, s11, s13
 ; GFX6-NEXT:    s_cmp_gt_i32 s13, s5
@@ -2807,7 +2807,7 @@ define amdgpu_ps <5 x i32> @s_saddsat_v5i32(<5 x i32> inreg %lhs, <5 x i32> inre
 ; GFX8-NEXT:    s_cselect_b32 s12, s0, 0
 ; GFX8-NEXT:    s_sub_i32 s12, s10, s12
 ; GFX8-NEXT:    s_cmp_lt_i32 s0, 0
-; GFX8-NEXT:    s_mov_b32 s11, 0x80000000
+; GFX8-NEXT:    s_brev_b32 s11, 1
 ; GFX8-NEXT:    s_cselect_b32 s13, s0, 0
 ; GFX8-NEXT:    s_sub_i32 s13, s11, s13
 ; GFX8-NEXT:    s_cmp_gt_i32 s13, s5
@@ -2868,7 +2868,7 @@ define amdgpu_ps <5 x i32> @s_saddsat_v5i32(<5 x i32> inreg %lhs, <5 x i32> inre
 ; GFX9-NEXT:    s_cselect_b32 s12, s0, 0
 ; GFX9-NEXT:    s_sub_i32 s12, s10, s12
 ; GFX9-NEXT:    s_cmp_lt_i32 s0, 0
-; GFX9-NEXT:    s_mov_b32 s11, 0x80000000
+; GFX9-NEXT:    s_brev_b32 s11, 1
 ; GFX9-NEXT:    s_cselect_b32 s13, s0, 0
 ; GFX9-NEXT:    s_sub_i32 s13, s11, s13
 ; GFX9-NEXT:    s_cmp_gt_i32 s13, s5
@@ -2927,7 +2927,7 @@ define amdgpu_ps <5 x i32> @s_saddsat_v5i32(<5 x i32> inreg %lhs, <5 x i32> inre
 ; GFX10-NEXT:    s_cmp_gt_i32 s0, 0
 ; GFX10-NEXT:    s_brev_b32 s10, -2
 ; GFX10-NEXT:    s_cselect_b32 s11, s0, 0
-; GFX10-NEXT:    s_mov_b32 s12, 0x80000000
+; GFX10-NEXT:    s_brev_b32 s12, 1
 ; GFX10-NEXT:    s_sub_i32 s11, s10, s11
 ; GFX10-NEXT:    s_cmp_lt_i32 s0, 0
 ; GFX10-NEXT:    ; implicit-def: $vcc_hi
@@ -2991,7 +2991,7 @@ define <16 x i32> @v_saddsat_v16i32(<16 x i32> %lhs, <16 x i32> %rhs) {
 ; GFX6-LABEL: v_saddsat_v16i32:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX6-NEXT:    s_mov_b32 s4, 0x80000000
+; GFX6-NEXT:    s_brev_b32 s4, 1
 ; GFX6-NEXT:    v_min_i32_e32 v32, 0, v0
 ; GFX6-NEXT:    v_sub_i32_e32 v32, vcc, s4, v32
 ; GFX6-NEXT:    v_max_i32_e32 v16, v32, v16
@@ -3014,7 +3014,7 @@ define <16 x i32> @v_saddsat_v16i32(<16 x i32> %lhs, <16 x i32> %rhs) {
 ; GFX6-NEXT:    v_sub_i32_e32 v17, vcc, s5, v17
 ; GFX6-NEXT:    v_min_i32_e32 v16, v16, v17
 ; GFX6-NEXT:    v_add_i32_e32 v2, vcc, v2, v16
-; GFX6-NEXT:    v_mov_b32_e32 v16, 0x80000000
+; GFX6-NEXT:    v_bfrev_b32_e32 v16, 1
 ; GFX6-NEXT:    v_min_i32_e32 v17, 0, v3
 ; GFX6-NEXT:    v_sub_i32_e32 v17, vcc, v16, v17
 ; GFX6-NEXT:    v_max_i32_e32 v17, v17, v19
@@ -3112,7 +3112,7 @@ define <16 x i32> @v_saddsat_v16i32(<16 x i32> %lhs, <16 x i32> %rhs) {
 ; GFX8-LABEL: v_saddsat_v16i32:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    s_mov_b32 s4, 0x80000000
+; GFX8-NEXT:    s_brev_b32 s4, 1
 ; GFX8-NEXT:    v_min_i32_e32 v32, 0, v0
 ; GFX8-NEXT:    v_sub_u32_e32 v32, vcc, s4, v32
 ; GFX8-NEXT:    v_max_i32_e32 v16, v32, v16
@@ -3135,7 +3135,7 @@ define <16 x i32> @v_saddsat_v16i32(<16 x i32> %lhs, <16 x i32> %rhs) {
 ; GFX8-NEXT:    v_sub_u32_e32 v17, vcc, s5, v17
 ; GFX8-NEXT:    v_min_i32_e32 v16, v16, v17
 ; GFX8-NEXT:    v_add_u32_e32 v2, vcc, v2, v16
-; GFX8-NEXT:    v_mov_b32_e32 v16, 0x80000000
+; GFX8-NEXT:    v_bfrev_b32_e32 v16, 1
 ; GFX8-NEXT:    v_min_i32_e32 v17, 0, v3
 ; GFX8-NEXT:    v_sub_u32_e32 v17, vcc, v16, v17
 ; GFX8-NEXT:    v_max_i32_e32 v17, v17, v19
@@ -3233,7 +3233,7 @@ define <16 x i32> @v_saddsat_v16i32(<16 x i32> %lhs, <16 x i32> %rhs) {
 ; GFX9-LABEL: v_saddsat_v16i32:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    s_mov_b32 s4, 0x80000000
+; GFX9-NEXT:    s_brev_b32 s4, 1
 ; GFX9-NEXT:    v_min_i32_e32 v32, 0, v0
 ; GFX9-NEXT:    v_sub_u32_e32 v32, s4, v32
 ; GFX9-NEXT:    v_max_i32_e32 v16, v32, v16
@@ -3256,7 +3256,7 @@ define <16 x i32> @v_saddsat_v16i32(<16 x i32> %lhs, <16 x i32> %rhs) {
 ; GFX9-NEXT:    v_sub_u32_e32 v17, s5, v17
 ; GFX9-NEXT:    v_min_i32_e32 v16, v16, v17
 ; GFX9-NEXT:    v_add_u32_e32 v2, v2, v16
-; GFX9-NEXT:    v_mov_b32_e32 v16, 0x80000000
+; GFX9-NEXT:    v_bfrev_b32_e32 v16, 1
 ; GFX9-NEXT:    v_min_i32_e32 v17, 0, v3
 ; GFX9-NEXT:    v_sub_u32_e32 v17, v16, v17
 ; GFX9-NEXT:    v_max_i32_e32 v17, v17, v19
@@ -3356,7 +3356,7 @@ define <16 x i32> @v_saddsat_v16i32(<16 x i32> %lhs, <16 x i32> %rhs) {
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
 ; GFX10-NEXT:    v_min_i32_e32 v32, 0, v0
-; GFX10-NEXT:    s_mov_b32 s4, 0x80000000
+; GFX10-NEXT:    s_brev_b32 s4, 1
 ; GFX10-NEXT:    v_max_i32_e32 v33, 0, v0
 ; GFX10-NEXT:    s_brev_b32 s5, -2
 ; GFX10-NEXT:    v_min_i32_e32 v36, 0, v2
@@ -3367,7 +3367,7 @@ define <16 x i32> @v_saddsat_v16i32(<16 x i32> %lhs, <16 x i32> %rhs) {
 ; GFX10-NEXT:    v_sub_nc_u32_e32 v36, s4, v36
 ; GFX10-NEXT:    v_max_i32_e32 v16, v35, v16
 ; GFX10-NEXT:    v_sub_nc_u32_e32 v32, s4, v32
-; GFX10-NEXT:    v_mov_b32_e32 v35, 0x80000000
+; GFX10-NEXT:    v_bfrev_b32_e32 v35, 1
 ; GFX10-NEXT:    v_min_i32_e32 v38, 0, v3
 ; GFX10-NEXT:    v_max_i32_e32 v18, v36, v18
 ; GFX10-NEXT:    v_min_i32_e32 v16, v16, v33
@@ -3485,7 +3485,7 @@ define amdgpu_ps <16 x i32> @s_saddsat_v16i32(<16 x i32> inreg %lhs, <16 x i32>
 ; GFX6-NEXT:    s_cselect_b32 s34, s0, 0
 ; GFX6-NEXT:    s_sub_i32 s34, s32, s34
 ; GFX6-NEXT:    s_cmp_lt_i32 s0, 0
-; GFX6-NEXT:    s_mov_b32 s33, 0x80000000
+; GFX6-NEXT:    s_brev_b32 s33, 1
 ; GFX6-NEXT:    s_cselect_b32 s35, s0, 0
 ; GFX6-NEXT:    s_sub_i32 s35, s33, s35
 ; GFX6-NEXT:    s_cmp_gt_i32 s35, s16
@@ -3667,7 +3667,7 @@ define amdgpu_ps <16 x i32> @s_saddsat_v16i32(<16 x i32> inreg %lhs, <16 x i32>
 ; GFX8-NEXT:    s_cselect_b32 s34, s0, 0
 ; GFX8-NEXT:    s_sub_i32 s34, s32, s34
 ; GFX8-NEXT:    s_cmp_lt_i32 s0, 0
-; GFX8-NEXT:    s_mov_b32 s33, 0x80000000
+; GFX8-NEXT:    s_brev_b32 s33, 1
 ; GFX8-NEXT:    s_cselect_b32 s35, s0, 0
 ; GFX8-NEXT:    s_sub_i32 s35, s33, s35
 ; GFX8-NEXT:    s_cmp_gt_i32 s35, s16
@@ -3849,7 +3849,7 @@ define amdgpu_ps <16 x i32> @s_saddsat_v16i32(<16 x i32> inreg %lhs, <16 x i32>
 ; GFX9-NEXT:    s_cselect_b32 s34, s0, 0
 ; GFX9-NEXT:    s_sub_i32 s34, s32, s34
 ; GFX9-NEXT:    s_cmp_lt_i32 s0, 0
-; GFX9-NEXT:    s_mov_b32 s33, 0x80000000
+; GFX9-NEXT:    s_brev_b32 s33, 1
 ; GFX9-NEXT:    s_cselect_b32 s35, s0, 0
 ; GFX9-NEXT:    s_sub_i32 s35, s33, s35
 ; GFX9-NEXT:    s_cmp_gt_i32 s35, s16
@@ -4029,7 +4029,7 @@ define amdgpu_ps <16 x i32> @s_saddsat_v16i32(<16 x i32> inreg %lhs, <16 x i32>
 ; GFX10-NEXT:    s_cmp_gt_i32 s0, 0
 ; GFX10-NEXT:    s_brev_b32 s32, -2
 ; GFX10-NEXT:    s_cselect_b32 s33, s0, 0
-; GFX10-NEXT:    s_mov_b32 s34, 0x80000000
+; GFX10-NEXT:    s_brev_b32 s34, 1
 ; GFX10-NEXT:    s_sub_i32 s46, s32, s33
 ; GFX10-NEXT:    s_cmp_lt_i32 s0, 0
 ; GFX10-NEXT:    ; implicit-def: $vcc_hi
@@ -4295,7 +4295,7 @@ define amdgpu_ps i16 @s_saddsat_i16(i16 inreg %lhs, i16 inreg %rhs) {
 ; GFX8-NEXT:    s_sub_i32 s4, 0x7fff, s4
 ; GFX8-NEXT:    s_cmp_lt_i32 s2, s3
 ; GFX8-NEXT:    s_cselect_b32 s2, s2, s3
-; GFX8-NEXT:    s_sub_i32 s2, 0x8000, s2
+; GFX8-NEXT:    s_sub_i32 s2, 0xffff8000, s2
 ; GFX8-NEXT:    s_sext_i32_i16 s2, s2
 ; GFX8-NEXT:    s_sext_i32_i16 s1, s1
 ; GFX8-NEXT:    s_cmp_gt_i32 s2, s1
@@ -4316,7 +4316,7 @@ define amdgpu_ps i16 @s_saddsat_i16(i16 inreg %lhs, i16 inreg %rhs) {
 ; GFX9-NEXT:    s_sub_i32 s4, 0x7fff, s4
 ; GFX9-NEXT:    s_cmp_lt_i32 s2, s3
 ; GFX9-NEXT:    s_cselect_b32 s2, s2, s3
-; GFX9-NEXT:    s_sub_i32 s2, 0x8000, s2
+; GFX9-NEXT:    s_sub_i32 s2, 0xffff8000, s2
 ; GFX9-NEXT:    s_sext_i32_i16 s2, s2
 ; GFX9-NEXT:    s_sext_i32_i16 s1, s1
 ; GFX9-NEXT:    s_cmp_gt_i32 s2, s1
@@ -4339,7 +4339,7 @@ define amdgpu_ps i16 @s_saddsat_i16(i16 inreg %lhs, i16 inreg %rhs) {
 ; GFX10-NEXT:    s_sub_i32 s4, 0x7fff, s4
 ; GFX10-NEXT:    s_cmp_lt_i32 s3, s2
 ; GFX10-NEXT:    s_cselect_b32 s2, s3, s2
-; GFX10-NEXT:    s_sub_i32 s2, 0x8000, s2
+; GFX10-NEXT:    s_sub_i32 s2, 0xffff8000, s2
 ; GFX10-NEXT:    s_sext_i32_i16 s2, s2
 ; GFX10-NEXT:    s_cmp_gt_i32 s2, s1
 ; GFX10-NEXT:    s_cselect_b32 s1, s2, s1
@@ -4379,7 +4379,7 @@ define amdgpu_ps half @saddsat_i16_sv(i16 inreg %lhs, i16 %rhs) {
 ; GFX8-NEXT:    s_sub_i32 s3, 0x7fff, s3
 ; GFX8-NEXT:    s_cmp_lt_i32 s1, s2
 ; GFX8-NEXT:    s_cselect_b32 s1, s1, s2
-; GFX8-NEXT:    s_sub_i32 s1, 0x8000, s1
+; GFX8-NEXT:    s_sub_i32 s1, 0xffff8000, s1
 ; GFX8-NEXT:    v_max_i16_e32 v0, s1, v0
 ; GFX8-NEXT:    v_min_i16_e32 v0, s3, v0
 ; GFX8-NEXT:    v_add_u16_e32 v0, s0, v0
@@ -4394,7 +4394,7 @@ define amdgpu_ps half @saddsat_i16_sv(i16 inreg %lhs, i16 %rhs) {
 ; GFX9-NEXT:    s_sub_i32 s3, 0x7fff, s3
 ; GFX9-NEXT:    s_cmp_lt_i32 s1, s2
 ; GFX9-NEXT:    s_cselect_b32 s1, s1, s2
-; GFX9-NEXT:    s_sub_i32 s1, 0x8000, s1
+; GFX9-NEXT:    s_sub_i32 s1, 0xffff8000, s1
 ; GFX9-NEXT:    v_max_i16_e32 v0, s1, v0
 ; GFX9-NEXT:    v_min_i16_e32 v0, s3, v0
 ; GFX9-NEXT:    v_add_u16_e32 v0, s0, v0
@@ -4410,7 +4410,7 @@ define amdgpu_ps half @saddsat_i16_sv(i16 inreg %lhs, i16 %rhs) {
 ; GFX10-NEXT:    s_sub_i32 s3, 0x7fff, s3
 ; GFX10-NEXT:    s_cmp_lt_i32 s1, s2
 ; GFX10-NEXT:    s_cselect_b32 s1, s1, s2
-; GFX10-NEXT:    s_sub_i32 s1, 0x8000, s1
+; GFX10-NEXT:    s_sub_i32 s1, 0xffff8000, s1
 ; GFX10-NEXT:    v_max_i16_e64 v0, s1, v0
 ; GFX10-NEXT:    v_min_i16_e64 v0, v0, s3
 ; GFX10-NEXT:    v_add_nc_u16_e64 v0, s0, v0
@@ -4478,7 +4478,7 @@ define <2 x i16> @v_saddsat_v2i16(<2 x i16> %lhs, <2 x i16> %rhs) {
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
-; GFX6-NEXT:    s_mov_b32 s5, 0x80000000
+; GFX6-NEXT:    s_brev_b32 s5, 1
 ; GFX6-NEXT:    v_min_i32_e32 v5, 0, v0
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
 ; GFX6-NEXT:    v_sub_i32_e32 v5, vcc, s5, v5
@@ -4504,7 +4504,7 @@ define <2 x i16> @v_saddsat_v2i16(<2 x i16> %lhs, <2 x i16> %rhs) {
 ; GFX8-LABEL: v_saddsat_v2i16:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    s_mov_b32 s5, 0x8000
+; GFX8-NEXT:    s_movk_i32 s5, 0x8000
 ; GFX8-NEXT:    v_min_i16_e32 v4, 0, v0
 ; GFX8-NEXT:    v_sub_u16_e32 v4, s5, v4
 ; GFX8-NEXT:    v_lshrrev_b32_e32 v2, 16, v0
@@ -4527,7 +4527,7 @@ define <2 x i16> @v_saddsat_v2i16(<2 x i16> %lhs, <2 x i16> %rhs) {
 ; GFX9-LABEL: v_saddsat_v2i16:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    s_mov_b32 s5, 0xffff8000
+; GFX9-NEXT:    s_movk_i32 s5, 0x8000
 ; GFX9-NEXT:    s_pack_ll_b32_b16 s6, 0, 0
 ; GFX9-NEXT:    s_movk_i32 s4, 0x7fff
 ; GFX9-NEXT:    s_pack_ll_b32_b16 s5, s5, s5
@@ -4546,7 +4546,7 @@ define <2 x i16> @v_saddsat_v2i16(<2 x i16> %lhs, <2 x i16> %rhs) {
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
 ; GFX10-NEXT:    s_pack_ll_b32_b16 s4, 0, 0
-; GFX10-NEXT:    s_mov_b32 s5, 0xffff8000
+; GFX10-NEXT:    s_movk_i32 s5, 0x8000
 ; GFX10-NEXT:    v_pk_min_i16 v2, v0, s4
 ; GFX10-NEXT:    s_pack_ll_b32_b16 s5, s5, s5
 ; GFX10-NEXT:    v_pk_max_i16 v3, v0, s4
@@ -4573,7 +4573,7 @@ define amdgpu_ps i32 @s_saddsat_v2i16(<2 x i16> inreg %lhs, <2 x i16> inreg %rhs
 ; GFX6-NEXT:    s_cselect_b32 s6, s0, 0
 ; GFX6-NEXT:    s_sub_i32 s6, s4, s6
 ; GFX6-NEXT:    s_cmp_lt_i32 s0, 0
-; GFX6-NEXT:    s_mov_b32 s5, 0x80000000
+; GFX6-NEXT:    s_brev_b32 s5, 1
 ; GFX6-NEXT:    s_cselect_b32 s7, s0, 0
 ; GFX6-NEXT:    s_sub_i32 s7, s5, s7
 ; GFX6-NEXT:    s_cmp_gt_i32 s7, s2
@@ -4614,7 +4614,7 @@ define amdgpu_ps i32 @s_saddsat_v2i16(<2 x i16> inreg %lhs, <2 x i16> inreg %rhs
 ; GFX8-NEXT:    s_cselect_b32 s8, s6, s7
 ; GFX8-NEXT:    s_sub_i32 s8, s4, s8
 ; GFX8-NEXT:    s_cmp_lt_i32 s6, s7
-; GFX8-NEXT:    s_mov_b32 s5, 0x8000
+; GFX8-NEXT:    s_movk_i32 s5, 0x8000
 ; GFX8-NEXT:    s_cselect_b32 s6, s6, s7
 ; GFX8-NEXT:    s_sub_i32 s6, s5, s6
 ; GFX8-NEXT:    s_sext_i32_i16 s6, s6
@@ -4669,7 +4669,7 @@ define amdgpu_ps i32 @s_saddsat_v2i16(<2 x i16> inreg %lhs, <2 x i16> inreg %rhs
 ; GFX9-NEXT:    s_cmp_lt_i32 s5, s7
 ; GFX9-NEXT:    s_cselect_b32 s5, s5, s7
 ; GFX9-NEXT:    s_cmp_lt_i32 s6, s4
-; GFX9-NEXT:    s_mov_b32 s3, 0xffff8000
+; GFX9-NEXT:    s_movk_i32 s3, 0x8000
 ; GFX9-NEXT:    s_cselect_b32 s4, s6, s4
 ; GFX9-NEXT:    s_pack_ll_b32_b16 s4, s5, s4
 ; GFX9-NEXT:    s_pack_ll_b32_b16 s3, s3, s3
@@ -4726,7 +4726,7 @@ define amdgpu_ps i32 @s_saddsat_v2i16(<2 x i16> inreg %lhs, <2 x i16> inreg %rhs
 ; GFX10-NEXT:    s_cmp_lt_i32 s3, s5
 ; GFX10-NEXT:    s_cselect_b32 s3, s3, s5
 ; GFX10-NEXT:    s_cmp_lt_i32 s4, s2
-; GFX10-NEXT:    s_mov_b32 s5, 0xffff8000
+; GFX10-NEXT:    s_movk_i32 s5, 0x8000
 ; GFX10-NEXT:    s_cselect_b32 s2, s4, s2
 ; GFX10-NEXT:    s_pack_ll_b32_b16 s4, s5, s5
 ; GFX10-NEXT:    s_pack_ll_b32_b16 s2, s3, s2
@@ -4774,7 +4774,7 @@ define amdgpu_ps float @saddsat_v2i16_sv(<2 x i16> inreg %lhs, <2 x i16> %rhs) {
 ; GFX6-NEXT:    s_cselect_b32 s4, s0, 0
 ; GFX6-NEXT:    s_sub_i32 s4, s2, s4
 ; GFX6-NEXT:    s_cmp_lt_i32 s0, 0
-; GFX6-NEXT:    s_mov_b32 s3, 0x80000000
+; GFX6-NEXT:    s_brev_b32 s3, 1
 ; GFX6-NEXT:    s_cselect_b32 s5, s0, 0
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
 ; GFX6-NEXT:    s_sub_i32 s5, s3, s5
@@ -4811,7 +4811,7 @@ define amdgpu_ps float @saddsat_v2i16_sv(<2 x i16> inreg %lhs, <2 x i16> %rhs) {
 ; GFX8-NEXT:    s_cselect_b32 s6, s4, s5
 ; GFX8-NEXT:    s_sub_i32 s6, s2, s6
 ; GFX8-NEXT:    s_cmp_lt_i32 s4, s5
-; GFX8-NEXT:    s_mov_b32 s3, 0x8000
+; GFX8-NEXT:    s_movk_i32 s3, 0x8000
 ; GFX8-NEXT:    s_cselect_b32 s4, s4, s5
 ; GFX8-NEXT:    s_sub_i32 s4, s3, s4
 ; GFX8-NEXT:    v_max_i16_e32 v1, s4, v0
@@ -4853,7 +4853,7 @@ define amdgpu_ps float @saddsat_v2i16_sv(<2 x i16> inreg %lhs, <2 x i16> %rhs) {
 ; GFX9-NEXT:    s_cmp_lt_i32 s4, s6
 ; GFX9-NEXT:    s_cselect_b32 s4, s4, s6
 ; GFX9-NEXT:    s_cmp_lt_i32 s5, s3
-; GFX9-NEXT:    s_mov_b32 s2, 0xffff8000
+; GFX9-NEXT:    s_movk_i32 s2, 0x8000
 ; GFX9-NEXT:    s_cselect_b32 s3, s5, s3
 ; GFX9-NEXT:    s_pack_ll_b32_b16 s3, s4, s3
 ; GFX9-NEXT:    s_pack_ll_b32_b16 s2, s2, s2
@@ -4890,7 +4890,7 @@ define amdgpu_ps float @saddsat_v2i16_sv(<2 x i16> inreg %lhs, <2 x i16> %rhs) {
 ; GFX10-NEXT:    s_cmp_lt_i32 s2, s4
 ; GFX10-NEXT:    s_cselect_b32 s2, s2, s4
 ; GFX10-NEXT:    s_cmp_lt_i32 s3, s1
-; GFX10-NEXT:    s_mov_b32 s4, 0xffff8000
+; GFX10-NEXT:    s_movk_i32 s4, 0x8000
 ; GFX10-NEXT:    s_cselect_b32 s1, s3, s1
 ; GFX10-NEXT:    s_pack_ll_b32_b16 s3, s4, s4
 ; GFX10-NEXT:    s_pack_ll_b32_b16 s1, s2, s1
@@ -4913,7 +4913,7 @@ define amdgpu_ps float @saddsat_v2i16_vs(<2 x i16> %lhs, <2 x i16> inreg %rhs) {
 ; GFX6-LABEL: saddsat_v2i16_vs:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
-; GFX6-NEXT:    s_mov_b32 s3, 0x80000000
+; GFX6-NEXT:    s_brev_b32 s3, 1
 ; GFX6-NEXT:    v_min_i32_e32 v3, 0, v0
 ; GFX6-NEXT:    s_lshl_b32 s0, s0, 16
 ; GFX6-NEXT:    v_sub_i32_e32 v3, vcc, s3, v3
@@ -4943,7 +4943,7 @@ define amdgpu_ps float @saddsat_v2i16_vs(<2 x i16> %lhs, <2 x i16> inreg %rhs) {
 ;
 ; GFX8-LABEL: saddsat_v2i16_vs:
 ; GFX8:       ; %bb.0:
-; GFX8-NEXT:    s_mov_b32 s3, 0x8000
+; GFX8-NEXT:    s_movk_i32 s3, 0x8000
 ; GFX8-NEXT:    v_min_i16_e32 v3, 0, v0
 ; GFX8-NEXT:    v_sub_u16_e32 v3, s3, v3
 ; GFX8-NEXT:    v_lshrrev_b32_e32 v1, 16, v0
@@ -4966,7 +4966,7 @@ define amdgpu_ps float @saddsat_v2i16_vs(<2 x i16> %lhs, <2 x i16> inreg %rhs) {
 ;
 ; GFX9-LABEL: saddsat_v2i16_vs:
 ; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_mov_b32 s2, 0xffff8000
+; GFX9-NEXT:    s_movk_i32 s2, 0x8000
 ; GFX9-NEXT:    s_pack_ll_b32_b16 s3, 0, 0
 ; GFX9-NEXT:    s_movk_i32 s1, 0x7fff
 ; GFX9-NEXT:    s_pack_ll_b32_b16 s2, s2, s2
@@ -4983,7 +4983,7 @@ define amdgpu_ps float @saddsat_v2i16_vs(<2 x i16> %lhs, <2 x i16> inreg %rhs) {
 ; GFX10-LABEL: saddsat_v2i16_vs:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_pack_ll_b32_b16 s1, 0, 0
-; GFX10-NEXT:    s_mov_b32 s2, 0xffff8000
+; GFX10-NEXT:    s_movk_i32 s2, 0x8000
 ; GFX10-NEXT:    v_pk_min_i16 v1, v0, s1
 ; GFX10-NEXT:    s_pack_ll_b32_b16 s2, s2, s2
 ; GFX10-NEXT:    v_pk_max_i16 v2, v0, s1
@@ -5017,7 +5017,7 @@ define <2 x float> @v_saddsat_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
-; GFX6-NEXT:    s_mov_b32 s5, 0x80000000
+; GFX6-NEXT:    s_brev_b32 s5, 1
 ; GFX6-NEXT:    v_min_i32_e32 v10, 0, v0
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v4, 16, v4
 ; GFX6-NEXT:    v_sub_i32_e32 v10, vcc, s5, v10
@@ -5046,7 +5046,7 @@ define <2 x float> @v_saddsat_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
 ; GFX6-NEXT:    v_max_i32_e32 v4, v6, v4
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
 ; GFX6-NEXT:    v_min_i32_e32 v4, v4, v5
-; GFX6-NEXT:    v_mov_b32_e32 v11, 0x80000000
+; GFX6-NEXT:    v_bfrev_b32_e32 v11, 1
 ; GFX6-NEXT:    v_min_i32_e32 v6, 0, v3
 ; GFX6-NEXT:    v_add_i32_e32 v2, vcc, v2, v4
 ; GFX6-NEXT:    v_max_i32_e32 v5, 0, v3
@@ -5074,7 +5074,7 @@ define <2 x float> @v_saddsat_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
 ; GFX8-LABEL: v_saddsat_v4i16:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    s_mov_b32 s5, 0x8000
+; GFX8-NEXT:    s_movk_i32 s5, 0x8000
 ; GFX8-NEXT:    v_min_i16_e32 v7, 0, v0
 ; GFX8-NEXT:    v_sub_u16_e32 v7, s5, v7
 ; GFX8-NEXT:    v_lshrrev_b32_e32 v4, 16, v0
@@ -5113,7 +5113,7 @@ define <2 x float> @v_saddsat_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
 ; GFX9-LABEL: v_saddsat_v4i16:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    s_mov_b32 s5, 0xffff8000
+; GFX9-NEXT:    s_movk_i32 s5, 0x8000
 ; GFX9-NEXT:    s_pack_ll_b32_b16 s6, 0, 0
 ; GFX9-NEXT:    s_movk_i32 s4, 0x7fff
 ; GFX9-NEXT:    s_pack_ll_b32_b16 s5, s5, s5
@@ -5139,7 +5139,7 @@ define <2 x float> @v_saddsat_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
 ; GFX10-NEXT:    s_pack_ll_b32_b16 s5, 0, 0
-; GFX10-NEXT:    s_mov_b32 s6, 0xffff8000
+; GFX10-NEXT:    s_movk_i32 s6, 0x8000
 ; GFX10-NEXT:    v_pk_min_i16 v4, v0, s5
 ; GFX10-NEXT:    v_pk_min_i16 v5, v1, s5
 ; GFX10-NEXT:    s_pack_ll_b32_b16 s6, s6, s6
@@ -5174,7 +5174,7 @@ define amdgpu_ps <2 x i32> @s_saddsat_v4i16(<4 x i16> inreg %lhs, <4 x i16> inre
 ; GFX6-NEXT:    s_cselect_b32 s10, s0, 0
 ; GFX6-NEXT:    s_sub_i32 s10, s8, s10
 ; GFX6-NEXT:    s_cmp_lt_i32 s0, 0
-; GFX6-NEXT:    s_mov_b32 s9, 0x80000000
+; GFX6-NEXT:    s_brev_b32 s9, 1
 ; GFX6-NEXT:    s_cselect_b32 s11, s0, 0
 ; GFX6-NEXT:    s_sub_i32 s11, s9, s11
 ; GFX6-NEXT:    s_cmp_gt_i32 s11, s4
@@ -5249,7 +5249,7 @@ define amdgpu_ps <2 x i32> @s_saddsat_v4i16(<4 x i16> inreg %lhs, <4 x i16> inre
 ; GFX8-NEXT:    s_cselect_b32 s12, s10, s11
 ; GFX8-NEXT:    s_sub_i32 s12, s8, s12
 ; GFX8-NEXT:    s_cmp_lt_i32 s10, s11
-; GFX8-NEXT:    s_mov_b32 s9, 0x8000
+; GFX8-NEXT:    s_movk_i32 s9, 0x8000
 ; GFX8-NEXT:    s_cselect_b32 s10, s10, s11
 ; GFX8-NEXT:    s_sub_i32 s10, s9, s10
 ; GFX8-NEXT:    s_sext_i32_i16 s10, s10
@@ -5340,7 +5340,7 @@ define amdgpu_ps <2 x i32> @s_saddsat_v4i16(<4 x i16> inreg %lhs, <4 x i16> inre
 ; GFX9-NEXT:    s_cmp_lt_i32 s7, s9
 ; GFX9-NEXT:    s_cselect_b32 s7, s7, s9
 ; GFX9-NEXT:    s_cmp_lt_i32 s8, s6
-; GFX9-NEXT:    s_mov_b32 s5, 0xffff8000
+; GFX9-NEXT:    s_movk_i32 s5, 0x8000
 ; GFX9-NEXT:    s_cselect_b32 s8, s8, s6
 ; GFX9-NEXT:    s_pack_ll_b32_b16 s5, s5, s5
 ; GFX9-NEXT:    s_pack_ll_b32_b16 s7, s7, s8
@@ -5431,7 +5431,7 @@ define amdgpu_ps <2 x i32> @s_saddsat_v4i16(<4 x i16> inreg %lhs, <4 x i16> inre
 ; GFX10-NEXT:    s_cmp_gt_i32 s6, s4
 ; GFX10-NEXT:    s_pack_ll_b32_b16 s9, s9, s9
 ; GFX10-NEXT:    s_cselect_b32 s10, s6, s4
-; GFX10-NEXT:    s_mov_b32 s12, 0xffff8000
+; GFX10-NEXT:    s_movk_i32 s12, 0x8000
 ; GFX10-NEXT:    s_pack_ll_b32_b16 s8, s8, s10
 ; GFX10-NEXT:    s_lshr_b32 s10, s9, 16
 ; GFX10-NEXT:    s_lshr_b32 s11, s8, 16
@@ -5538,7 +5538,7 @@ define <3 x float> @v_saddsat_v6i16(<6 x i16> %lhs, <6 x i16> %rhs) {
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
-; GFX6-NEXT:    s_mov_b32 s5, 0x80000000
+; GFX6-NEXT:    s_brev_b32 s5, 1
 ; GFX6-NEXT:    v_min_i32_e32 v14, 0, v0
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v6, 16, v6
 ; GFX6-NEXT:    v_sub_i32_e32 v14, vcc, s5, v14
@@ -5567,7 +5567,7 @@ define <3 x float> @v_saddsat_v6i16(<6 x i16> %lhs, <6 x i16> %rhs) {
 ; GFX6-NEXT:    v_max_i32_e32 v6, v8, v6
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
 ; GFX6-NEXT:    v_min_i32_e32 v6, v6, v7
-; GFX6-NEXT:    v_mov_b32_e32 v15, 0x80000000
+; GFX6-NEXT:    v_bfrev_b32_e32 v15, 1
 ; GFX6-NEXT:    v_min_i32_e32 v8, 0, v3
 ; GFX6-NEXT:    v_add_i32_e32 v2, vcc, v2, v6
 ; GFX6-NEXT:    v_max_i32_e32 v7, 0, v3
@@ -5619,7 +5619,7 @@ define <3 x float> @v_saddsat_v6i16(<6 x i16> %lhs, <6 x i16> %rhs) {
 ; GFX8-LABEL: v_saddsat_v6i16:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    s_mov_b32 s5, 0x8000
+; GFX8-NEXT:    s_movk_i32 s5, 0x8000
 ; GFX8-NEXT:    v_min_i16_e32 v11, 0, v0
 ; GFX8-NEXT:    v_sub_u16_e32 v11, s5, v11
 ; GFX8-NEXT:    v_lshrrev_b32_e32 v6, 16, v0
@@ -5646,7 +5646,7 @@ define <3 x float> @v_saddsat_v6i16(<6 x i16> %lhs, <6 x i16> %rhs) {
 ; GFX8-NEXT:    v_sub_u16_e32 v14, s5, v14
 ; GFX8-NEXT:    v_max_i16_sdwa v4, v14, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
 ; GFX8-NEXT:    v_sub_u16_e32 v13, s4, v13
-; GFX8-NEXT:    v_mov_b32_e32 v12, 0x8000
+; GFX8-NEXT:    v_mov_b32_e32 v12, 0xffff8000
 ; GFX8-NEXT:    v_min_i16_e32 v14, 0, v2
 ; GFX8-NEXT:    v_sub_u16_e32 v14, v12, v14
 ; GFX8-NEXT:    v_min_i16_e32 v4, v4, v13
@@ -5676,7 +5676,7 @@ define <3 x float> @v_saddsat_v6i16(<6 x i16> %lhs, <6 x i16> %rhs) {
 ; GFX9-LABEL: v_saddsat_v6i16:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    s_mov_b32 s5, 0xffff8000
+; GFX9-NEXT:    s_movk_i32 s5, 0x8000
 ; GFX9-NEXT:    s_pack_ll_b32_b16 s6, 0, 0
 ; GFX9-NEXT:    s_movk_i32 s4, 0x7fff
 ; GFX9-NEXT:    s_pack_ll_b32_b16 s5, s5, s5
@@ -5709,7 +5709,7 @@ define <3 x float> @v_saddsat_v6i16(<6 x i16> %lhs, <6 x i16> %rhs) {
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
 ; GFX10-NEXT:    s_pack_ll_b32_b16 s5, 0, 0
-; GFX10-NEXT:    s_mov_b32 s6, 0xffff8000
+; GFX10-NEXT:    s_movk_i32 s6, 0x8000
 ; GFX10-NEXT:    v_pk_min_i16 v7, v0, s5
 ; GFX10-NEXT:    v_pk_min_i16 v8, v1, s5
 ; GFX10-NEXT:    v_pk_min_i16 v9, v2, s5
@@ -5751,7 +5751,7 @@ define amdgpu_ps <3 x i32> @s_saddsat_v6i16(<6 x i16> inreg %lhs, <6 x i16> inre
 ; GFX6-NEXT:    s_cselect_b32 s14, s0, 0
 ; GFX6-NEXT:    s_sub_i32 s14, s12, s14
 ; GFX6-NEXT:    s_cmp_lt_i32 s0, 0
-; GFX6-NEXT:    s_mov_b32 s13, 0x80000000
+; GFX6-NEXT:    s_brev_b32 s13, 1
 ; GFX6-NEXT:    s_cselect_b32 s15, s0, 0
 ; GFX6-NEXT:    s_sub_i32 s15, s13, s15
 ; GFX6-NEXT:    s_cmp_gt_i32 s15, s6
@@ -5860,7 +5860,7 @@ define amdgpu_ps <3 x i32> @s_saddsat_v6i16(<6 x i16> inreg %lhs, <6 x i16> inre
 ; GFX8-NEXT:    s_cselect_b32 s16, s14, s15
 ; GFX8-NEXT:    s_sub_i32 s16, s12, s16
 ; GFX8-NEXT:    s_cmp_lt_i32 s14, s15
-; GFX8-NEXT:    s_mov_b32 s13, 0x8000
+; GFX8-NEXT:    s_movk_i32 s13, 0x8000
 ; GFX8-NEXT:    s_cselect_b32 s14, s14, s15
 ; GFX8-NEXT:    s_sub_i32 s14, s13, s14
 ; GFX8-NEXT:    s_sext_i32_i16 s14, s14
@@ -5987,7 +5987,7 @@ define amdgpu_ps <3 x i32> @s_saddsat_v6i16(<6 x i16> inreg %lhs, <6 x i16> inre
 ; GFX9-NEXT:    s_cmp_lt_i32 s9, s11
 ; GFX9-NEXT:    s_cselect_b32 s9, s9, s11
 ; GFX9-NEXT:    s_cmp_lt_i32 s10, s8
-; GFX9-NEXT:    s_mov_b32 s7, 0xffff8000
+; GFX9-NEXT:    s_movk_i32 s7, 0x8000
 ; GFX9-NEXT:    s_cselect_b32 s10, s10, s8
 ; GFX9-NEXT:    s_pack_ll_b32_b16 s7, s7, s7
 ; GFX9-NEXT:    s_pack_ll_b32_b16 s9, s9, s10
@@ -6121,7 +6121,7 @@ define amdgpu_ps <3 x i32> @s_saddsat_v6i16(<6 x i16> inreg %lhs, <6 x i16> inre
 ; GFX10-NEXT:    s_cmp_gt_i32 s8, s6
 ; GFX10-NEXT:    s_pack_ll_b32_b16 s11, s11, s11
 ; GFX10-NEXT:    s_cselect_b32 s12, s8, s6
-; GFX10-NEXT:    s_mov_b32 s14, 0xffff8000
+; GFX10-NEXT:    s_movk_i32 s14, 0x8000
 ; GFX10-NEXT:    s_pack_ll_b32_b16 s10, s10, s12
 ; GFX10-NEXT:    s_lshr_b32 s12, s11, 16
 ; GFX10-NEXT:    s_lshr_b32 s13, s10, 16
@@ -6260,7 +6260,7 @@ define <4 x float> @v_saddsat_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
-; GFX6-NEXT:    s_mov_b32 s5, 0x80000000
+; GFX6-NEXT:    s_brev_b32 s5, 1
 ; GFX6-NEXT:    v_min_i32_e32 v18, 0, v0
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v8, 16, v8
 ; GFX6-NEXT:    v_sub_i32_e32 v18, vcc, s5, v18
@@ -6289,7 +6289,7 @@ define <4 x float> @v_saddsat_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
 ; GFX6-NEXT:    v_max_i32_e32 v8, v10, v8
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
 ; GFX6-NEXT:    v_min_i32_e32 v8, v8, v9
-; GFX6-NEXT:    v_mov_b32_e32 v19, 0x80000000
+; GFX6-NEXT:    v_bfrev_b32_e32 v19, 1
 ; GFX6-NEXT:    v_min_i32_e32 v10, 0, v3
 ; GFX6-NEXT:    v_add_i32_e32 v2, vcc, v2, v8
 ; GFX6-NEXT:    v_max_i32_e32 v9, 0, v3
@@ -6365,7 +6365,7 @@ define <4 x float> @v_saddsat_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
 ; GFX8-LABEL: v_saddsat_v8i16:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    s_mov_b32 s5, 0x8000
+; GFX8-NEXT:    s_movk_i32 s5, 0x8000
 ; GFX8-NEXT:    v_min_i16_e32 v14, 0, v0
 ; GFX8-NEXT:    v_sub_u16_e32 v14, s5, v14
 ; GFX8-NEXT:    v_lshrrev_b32_e32 v8, 16, v0
@@ -6392,7 +6392,7 @@ define <4 x float> @v_saddsat_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
 ; GFX8-NEXT:    v_sub_u16_e32 v17, s5, v17
 ; GFX8-NEXT:    v_max_i16_sdwa v5, v17, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
 ; GFX8-NEXT:    v_sub_u16_e32 v16, s4, v16
-; GFX8-NEXT:    v_mov_b32_e32 v15, 0x8000
+; GFX8-NEXT:    v_mov_b32_e32 v15, 0xffff8000
 ; GFX8-NEXT:    v_min_i16_e32 v17, 0, v2
 ; GFX8-NEXT:    v_sub_u16_e32 v17, v15, v17
 ; GFX8-NEXT:    v_lshrrev_b32_e32 v10, 16, v2
@@ -6438,7 +6438,7 @@ define <4 x float> @v_saddsat_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
 ; GFX9-LABEL: v_saddsat_v8i16:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    s_mov_b32 s5, 0xffff8000
+; GFX9-NEXT:    s_movk_i32 s5, 0x8000
 ; GFX9-NEXT:    s_pack_ll_b32_b16 s6, 0, 0
 ; GFX9-NEXT:    s_movk_i32 s4, 0x7fff
 ; GFX9-NEXT:    s_pack_ll_b32_b16 s5, s5, s5
@@ -6478,7 +6478,7 @@ define <4 x float> @v_saddsat_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
 ; GFX10-NEXT:    s_pack_ll_b32_b16 s4, 0, 0
-; GFX10-NEXT:    s_mov_b32 s5, 0xffff8000
+; GFX10-NEXT:    s_movk_i32 s5, 0x8000
 ; GFX10-NEXT:    v_pk_min_i16 v8, v0, s4
 ; GFX10-NEXT:    s_pack_ll_b32_b16 s5, s5, s5
 ; GFX10-NEXT:    v_pk_min_i16 v11, v1, s4
@@ -6527,7 +6527,7 @@ define amdgpu_ps <4 x i32> @s_saddsat_v8i16(<8 x i16> inreg %lhs, <8 x i16> inre
 ; GFX6-NEXT:    s_cselect_b32 s18, s0, 0
 ; GFX6-NEXT:    s_sub_i32 s18, s16, s18
 ; GFX6-NEXT:    s_cmp_lt_i32 s0, 0
-; GFX6-NEXT:    s_mov_b32 s17, 0x80000000
+; GFX6-NEXT:    s_brev_b32 s17, 1
 ; GFX6-NEXT:    s_cselect_b32 s19, s0, 0
 ; GFX6-NEXT:    s_sub_i32 s19, s17, s19
 ; GFX6-NEXT:    s_cmp_gt_i32 s19, s8
@@ -6670,7 +6670,7 @@ define amdgpu_ps <4 x i32> @s_saddsat_v8i16(<8 x i16> inreg %lhs, <8 x i16> inre
 ; GFX8-NEXT:    s_cselect_b32 s20, s18, s19
 ; GFX8-NEXT:    s_sub_i32 s20, s16, s20
 ; GFX8-NEXT:    s_cmp_lt_i32 s18, s19
-; GFX8-NEXT:    s_mov_b32 s17, 0x8000
+; GFX8-NEXT:    s_movk_i32 s17, 0x8000
 ; GFX8-NEXT:    s_cselect_b32 s18, s18, s19
 ; GFX8-NEXT:    s_sub_i32 s18, s17, s18
 ; GFX8-NEXT:    s_sext_i32_i16 s18, s18
@@ -6833,7 +6833,7 @@ define amdgpu_ps <4 x i32> @s_saddsat_v8i16(<8 x i16> inreg %lhs, <8 x i16> inre
 ; GFX9-NEXT:    s_cmp_lt_i32 s11, s13
 ; GFX9-NEXT:    s_cselect_b32 s11, s11, s13
 ; GFX9-NEXT:    s_cmp_lt_i32 s12, s10
-; GFX9-NEXT:    s_mov_b32 s9, 0xffff8000
+; GFX9-NEXT:    s_movk_i32 s9, 0x8000
 ; GFX9-NEXT:    s_cselect_b32 s12, s12, s10
 ; GFX9-NEXT:    s_pack_ll_b32_b16 s9, s9, s9
 ; GFX9-NEXT:    s_pack_ll_b32_b16 s11, s11, s12
@@ -7010,7 +7010,7 @@ define amdgpu_ps <4 x i32> @s_saddsat_v8i16(<8 x i16> inreg %lhs, <8 x i16> inre
 ; GFX10-NEXT:    s_cmp_gt_i32 s10, s8
 ; GFX10-NEXT:    s_pack_ll_b32_b16 s13, s13, s13
 ; GFX10-NEXT:    s_cselect_b32 s14, s10, s8
-; GFX10-NEXT:    s_mov_b32 s16, 0xffff8000
+; GFX10-NEXT:    s_movk_i32 s16, 0x8000
 ; GFX10-NEXT:    s_pack_ll_b32_b16 s12, s12, s14
 ; GFX10-NEXT:    s_lshr_b32 s14, s13, 16
 ; GFX10-NEXT:    s_lshr_b32 s15, s12, 16

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i32.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i32.ll
index f68465faf61c..57737aeb886f 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i32.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i32.ll
@@ -399,7 +399,7 @@ define <2 x i32> @v_sdiv_v2i32_pow2k_denom(<2 x i32> %num) {
 ; CGP-NEXT:    s_movk_i32 s4, 0x1000
 ; CGP-NEXT:    v_ashrrev_i32_e32 v2, 31, v0
 ; CGP-NEXT:    v_mov_b32_e32 v3, 0x1000
-; CGP-NEXT:    s_mov_b32 s5, 0xfffff000
+; CGP-NEXT:    s_movk_i32 s5, 0xf000
 ; CGP-NEXT:    v_mov_b32_e32 v4, 0xfffff000
 ; CGP-NEXT:    v_ashrrev_i32_e32 v5, 31, v1
 ; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v2

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll
index 1813c33019ae..f6565fe1b6e2 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll
@@ -200,13 +200,12 @@ declare i32 @llvm.amdgcn.readfirstlane(i32)
 define amdgpu_ps i64 @s_sdiv_i64(i64 inreg %num, i64 inreg %den) {
 ; CHECK-LABEL: s_sdiv_i64:
 ; CHECK:       ; %bb.0:
-; CHECK-NEXT:    s_or_b64 s[0:1], s[2:3], s[4:5]
-; CHECK-NEXT:    s_mov_b32 s6, 0
-; CHECK-NEXT:    s_mov_b32 s7, -1
-; CHECK-NEXT:    s_and_b64 s[0:1], s[0:1], s[6:7]
-; CHECK-NEXT:    v_cmp_eq_u64_e64 s[6:7], s[0:1], 0
+; CHECK-NEXT:    s_or_b64 s[6:7], s[2:3], s[4:5]
+; CHECK-NEXT:    s_mov_b32 s0, 0
+; CHECK-NEXT:    s_mov_b32 s1, -1
+; CHECK-NEXT:    s_and_b64 s[6:7], s[6:7], s[0:1]
+; CHECK-NEXT:    v_cmp_eq_u64_e64 s[6:7], s[6:7], 0
 ; CHECK-NEXT:    v_cmp_ne_u32_e64 s[8:9], 0, 1
-; CHECK-NEXT:    s_mov_b32 s0, 1
 ; CHECK-NEXT:    s_xor_b64 vcc, s[6:7], s[8:9]
 ; CHECK-NEXT:    ; implicit-def: $vgpr0_vgpr1
 ; CHECK-NEXT:    s_cbranch_vccz BB1_2
@@ -354,13 +353,13 @@ define amdgpu_ps i64 @s_sdiv_i64(i64 inreg %num, i64 inreg %den) {
 ; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v1
 ; CHECK-NEXT:    v_cndmask_b32_e32 v1, v4, v3, vcc
 ; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v2
-; CHECK-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
 ; CHECK-NEXT:    s_xor_b64 s[0:1], s[6:7], s[8:9]
+; CHECK-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
 ; CHECK-NEXT:    v_xor_b32_e32 v0, s0, v0
 ; CHECK-NEXT:    v_subrev_i32_e32 v0, vcc, s0, v0
-; CHECK-NEXT:    s_mov_b32 s0, 0
+; CHECK-NEXT:    s_mov_b32 s1, 0
 ; CHECK-NEXT:  BB1_2: ; %Flow
-; CHECK-NEXT:    s_and_b32 s0, s0, 1
+; CHECK-NEXT:    s_and_b32 s0, s1, 1
 ; CHECK-NEXT:    s_cmp_lg_u32 s0, 0
 ; CHECK-NEXT:    s_cbranch_scc0 BB1_4
 ; CHECK-NEXT:  ; %bb.3:

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i32.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i32.ll
index 43f79f4b207d..320d814be8a9 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i32.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i32.ll
@@ -369,7 +369,7 @@ define <2 x i32> @v_srem_v2i32_pow2k_denom(<2 x i32> %num) {
 ; CGP-NEXT:    s_movk_i32 s4, 0x1000
 ; CGP-NEXT:    v_ashrrev_i32_e32 v2, 31, v0
 ; CGP-NEXT:    v_mov_b32_e32 v3, 0x1000
-; CGP-NEXT:    s_mov_b32 s5, 0xfffff000
+; CGP-NEXT:    s_movk_i32 s5, 0xf000
 ; CGP-NEXT:    v_mov_b32_e32 v4, 0xfffff000
 ; CGP-NEXT:    v_ashrrev_i32_e32 v5, 31, v1
 ; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v2

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll
index 438388ebf713..06d46321a59b 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll
@@ -196,13 +196,12 @@ declare i32 @llvm.amdgcn.readfirstlane(i32)
 define amdgpu_ps i64 @s_srem_i64(i64 inreg %num, i64 inreg %den) {
 ; CHECK-LABEL: s_srem_i64:
 ; CHECK:       ; %bb.0:
-; CHECK-NEXT:    s_or_b64 s[0:1], s[2:3], s[4:5]
-; CHECK-NEXT:    s_mov_b32 s6, 0
-; CHECK-NEXT:    s_mov_b32 s7, -1
-; CHECK-NEXT:    s_and_b64 s[0:1], s[0:1], s[6:7]
-; CHECK-NEXT:    v_cmp_eq_u64_e64 s[6:7], s[0:1], 0
+; CHECK-NEXT:    s_or_b64 s[6:7], s[2:3], s[4:5]
+; CHECK-NEXT:    s_mov_b32 s0, 0
+; CHECK-NEXT:    s_mov_b32 s1, -1
+; CHECK-NEXT:    s_and_b64 s[6:7], s[6:7], s[0:1]
+; CHECK-NEXT:    v_cmp_eq_u64_e64 s[6:7], s[6:7], 0
 ; CHECK-NEXT:    v_cmp_ne_u32_e64 s[8:9], 0, 1
-; CHECK-NEXT:    s_mov_b32 s0, 1
 ; CHECK-NEXT:    s_xor_b64 vcc, s[6:7], s[8:9]
 ; CHECK-NEXT:    ; implicit-def: $vgpr0_vgpr1
 ; CHECK-NEXT:    s_cbranch_vccz BB1_2
@@ -352,9 +351,9 @@ define amdgpu_ps i64 @s_srem_i64(i64 inreg %num, i64 inreg %den) {
 ; CHECK-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
 ; CHECK-NEXT:    v_xor_b32_e32 v0, s6, v0
 ; CHECK-NEXT:    v_subrev_i32_e32 v0, vcc, s6, v0
-; CHECK-NEXT:    s_mov_b32 s0, 0
+; CHECK-NEXT:    s_mov_b32 s1, 0
 ; CHECK-NEXT:  BB1_2: ; %Flow
-; CHECK-NEXT:    s_and_b32 s0, s0, 1
+; CHECK-NEXT:    s_and_b32 s0, s1, 1
 ; CHECK-NEXT:    s_cmp_lg_u32 s0, 0
 ; CHECK-NEXT:    s_cbranch_scc0 BB1_4
 ; CHECK-NEXT:  ; %bb.3:

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll
index a7154131e3c0..3e1778bcb881 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll
@@ -24,9 +24,8 @@ define i7 @v_ssubsat_i7(i7 %lhs, i7 %rhs) {
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX8-NEXT:    v_lshlrev_b16_e32 v0, 9, v0
-; GFX8-NEXT:    s_mov_b32 s4, 0xffff
-; GFX8-NEXT:    v_max_i16_e32 v2, s4, v0
-; GFX8-NEXT:    v_min_i16_e32 v3, s4, v0
+; GFX8-NEXT:    v_max_i16_e32 v2, -1, v0
+; GFX8-NEXT:    v_min_i16_e32 v3, -1, v0
 ; GFX8-NEXT:    v_lshlrev_b16_e32 v1, 9, v1
 ; GFX8-NEXT:    v_subrev_u16_e32 v2, 0x7fff, v2
 ; GFX8-NEXT:    v_subrev_u16_e32 v3, 0x8000, v3
@@ -40,9 +39,8 @@ define i7 @v_ssubsat_i7(i7 %lhs, i7 %rhs) {
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX9-NEXT:    v_lshlrev_b16_e32 v0, 9, v0
-; GFX9-NEXT:    s_mov_b32 s4, 0xffff
-; GFX9-NEXT:    v_max_i16_e32 v2, s4, v0
-; GFX9-NEXT:    v_min_i16_e32 v3, s4, v0
+; GFX9-NEXT:    v_max_i16_e32 v2, -1, v0
+; GFX9-NEXT:    v_min_i16_e32 v3, -1, v0
 ; GFX9-NEXT:    v_lshlrev_b16_e32 v1, 9, v1
 ; GFX9-NEXT:    v_subrev_u16_e32 v2, 0x7fff, v2
 ; GFX9-NEXT:    v_subrev_u16_e32 v3, 0x8000, v3
@@ -57,11 +55,10 @@ define i7 @v_ssubsat_i7(i7 %lhs, i7 %rhs) {
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
 ; GFX10-NEXT:    v_lshlrev_b16_e64 v0, 9, v0
-; GFX10-NEXT:    s_mov_b32 s4, 0xffff
 ; GFX10-NEXT:    v_lshlrev_b16_e64 v1, 9, v1
 ; GFX10-NEXT:    ; implicit-def: $vcc_hi
-; GFX10-NEXT:    v_max_i16_e64 v2, v0, s4
-; GFX10-NEXT:    v_min_i16_e64 v3, v0, s4
+; GFX10-NEXT:    v_max_i16_e64 v2, v0, -1
+; GFX10-NEXT:    v_min_i16_e64 v3, v0, -1
 ; GFX10-NEXT:    v_sub_nc_u16_e64 v2, v2, 0x7fff
 ; GFX10-NEXT:    v_sub_nc_u16_e64 v3, v3, 0x8000
 ; GFX10-NEXT:    v_max_i16_e64 v1, v2, v1
@@ -98,13 +95,13 @@ define amdgpu_ps i7 @s_ssubsat_i7(i7 inreg %lhs, i7 inreg %rhs) {
 ; GFX8-NEXT:    s_lshl_b32 s0, s0, s2
 ; GFX8-NEXT:    s_lshl_b32 s1, s1, s2
 ; GFX8-NEXT:    s_sext_i32_i16 s3, s0
-; GFX8-NEXT:    s_sext_i32_i16 s4, 0xffff
+; GFX8-NEXT:    s_sext_i32_i16 s4, -1
 ; GFX8-NEXT:    s_cmp_gt_i32 s3, s4
 ; GFX8-NEXT:    s_cselect_b32 s5, s3, s4
 ; GFX8-NEXT:    s_sub_i32 s5, s5, 0x7fff
 ; GFX8-NEXT:    s_cmp_lt_i32 s3, s4
 ; GFX8-NEXT:    s_cselect_b32 s3, s3, s4
-; GFX8-NEXT:    s_sub_i32 s3, s3, 0x8000
+; GFX8-NEXT:    s_sub_i32 s3, s3, 0xffff8000
 ; GFX8-NEXT:    s_sext_i32_i16 s4, s5
 ; GFX8-NEXT:    s_sext_i32_i16 s1, s1
 ; GFX8-NEXT:    s_cmp_gt_i32 s4, s1
@@ -124,13 +121,13 @@ define amdgpu_ps i7 @s_ssubsat_i7(i7 inreg %lhs, i7 inreg %rhs) {
 ; GFX9-NEXT:    s_lshl_b32 s0, s0, s2
 ; GFX9-NEXT:    s_lshl_b32 s1, s1, s2
 ; GFX9-NEXT:    s_sext_i32_i16 s3, s0
-; GFX9-NEXT:    s_sext_i32_i16 s4, 0xffff
+; GFX9-NEXT:    s_sext_i32_i16 s4, -1
 ; GFX9-NEXT:    s_cmp_gt_i32 s3, s4
 ; GFX9-NEXT:    s_cselect_b32 s5, s3, s4
 ; GFX9-NEXT:    s_sub_i32 s5, s5, 0x7fff
 ; GFX9-NEXT:    s_cmp_lt_i32 s3, s4
 ; GFX9-NEXT:    s_cselect_b32 s3, s3, s4
-; GFX9-NEXT:    s_sub_i32 s3, s3, 0x8000
+; GFX9-NEXT:    s_sub_i32 s3, s3, 0xffff8000
 ; GFX9-NEXT:    s_sext_i32_i16 s4, s5
 ; GFX9-NEXT:    s_sext_i32_i16 s1, s1
 ; GFX9-NEXT:    s_cmp_gt_i32 s4, s1
@@ -147,7 +144,7 @@ define amdgpu_ps i7 @s_ssubsat_i7(i7 inreg %lhs, i7 inreg %rhs) {
 ; GFX10-LABEL: s_ssubsat_i7:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_bfe_u32 s2, 9, 0x100000
-; GFX10-NEXT:    s_sext_i32_i16 s4, 0xffff
+; GFX10-NEXT:    s_sext_i32_i16 s4, -1
 ; GFX10-NEXT:    s_lshl_b32 s0, s0, s2
 ; GFX10-NEXT:    s_lshl_b32 s1, s1, s2
 ; GFX10-NEXT:    s_sext_i32_i16 s3, s0
@@ -159,7 +156,7 @@ define amdgpu_ps i7 @s_ssubsat_i7(i7 inreg %lhs, i7 inreg %rhs) {
 ; GFX10-NEXT:    s_cmp_lt_i32 s3, s4
 ; GFX10-NEXT:    s_cselect_b32 s3, s3, s4
 ; GFX10-NEXT:    s_sext_i32_i16 s4, s5
-; GFX10-NEXT:    s_sub_i32 s3, s3, 0x8000
+; GFX10-NEXT:    s_sub_i32 s3, s3, 0xffff8000
 ; GFX10-NEXT:    s_cmp_gt_i32 s4, s1
 ; GFX10-NEXT:    s_sext_i32_i16 s3, s3
 ; GFX10-NEXT:    s_cselect_b32 s1, s4, s1
@@ -194,9 +191,8 @@ define i8 @v_ssubsat_i8(i8 %lhs, i8 %rhs) {
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX8-NEXT:    v_lshlrev_b16_e32 v0, 8, v0
-; GFX8-NEXT:    s_mov_b32 s4, 0xffff
-; GFX8-NEXT:    v_max_i16_e32 v2, s4, v0
-; GFX8-NEXT:    v_min_i16_e32 v3, s4, v0
+; GFX8-NEXT:    v_max_i16_e32 v2, -1, v0
+; GFX8-NEXT:    v_min_i16_e32 v3, -1, v0
 ; GFX8-NEXT:    v_lshlrev_b16_e32 v1, 8, v1
 ; GFX8-NEXT:    v_subrev_u16_e32 v2, 0x7fff, v2
 ; GFX8-NEXT:    v_subrev_u16_e32 v3, 0x8000, v3
@@ -210,9 +206,8 @@ define i8 @v_ssubsat_i8(i8 %lhs, i8 %rhs) {
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX9-NEXT:    v_lshlrev_b16_e32 v0, 8, v0
-; GFX9-NEXT:    s_mov_b32 s4, 0xffff
-; GFX9-NEXT:    v_max_i16_e32 v2, s4, v0
-; GFX9-NEXT:    v_min_i16_e32 v3, s4, v0
+; GFX9-NEXT:    v_max_i16_e32 v2, -1, v0
+; GFX9-NEXT:    v_min_i16_e32 v3, -1, v0
 ; GFX9-NEXT:    v_lshlrev_b16_e32 v1, 8, v1
 ; GFX9-NEXT:    v_subrev_u16_e32 v2, 0x7fff, v2
 ; GFX9-NEXT:    v_subrev_u16_e32 v3, 0x8000, v3
@@ -227,11 +222,10 @@ define i8 @v_ssubsat_i8(i8 %lhs, i8 %rhs) {
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
 ; GFX10-NEXT:    v_lshlrev_b16_e64 v0, 8, v0
-; GFX10-NEXT:    s_mov_b32 s4, 0xffff
 ; GFX10-NEXT:    v_lshlrev_b16_e64 v1, 8, v1
 ; GFX10-NEXT:    ; implicit-def: $vcc_hi
-; GFX10-NEXT:    v_max_i16_e64 v2, v0, s4
-; GFX10-NEXT:    v_min_i16_e64 v3, v0, s4
+; GFX10-NEXT:    v_max_i16_e64 v2, v0, -1
+; GFX10-NEXT:    v_min_i16_e64 v3, v0, -1
 ; GFX10-NEXT:    v_sub_nc_u16_e64 v2, v2, 0x7fff
 ; GFX10-NEXT:    v_sub_nc_u16_e64 v3, v3, 0x8000
 ; GFX10-NEXT:    v_max_i16_e64 v1, v2, v1
@@ -268,13 +262,13 @@ define amdgpu_ps i8 @s_ssubsat_i8(i8 inreg %lhs, i8 inreg %rhs) {
 ; GFX8-NEXT:    s_lshl_b32 s0, s0, s2
 ; GFX8-NEXT:    s_lshl_b32 s1, s1, s2
 ; GFX8-NEXT:    s_sext_i32_i16 s3, s0
-; GFX8-NEXT:    s_sext_i32_i16 s4, 0xffff
+; GFX8-NEXT:    s_sext_i32_i16 s4, -1
 ; GFX8-NEXT:    s_cmp_gt_i32 s3, s4
 ; GFX8-NEXT:    s_cselect_b32 s5, s3, s4
 ; GFX8-NEXT:    s_sub_i32 s5, s5, 0x7fff
 ; GFX8-NEXT:    s_cmp_lt_i32 s3, s4
 ; GFX8-NEXT:    s_cselect_b32 s3, s3, s4
-; GFX8-NEXT:    s_sub_i32 s3, s3, 0x8000
+; GFX8-NEXT:    s_sub_i32 s3, s3, 0xffff8000
 ; GFX8-NEXT:    s_sext_i32_i16 s4, s5
 ; GFX8-NEXT:    s_sext_i32_i16 s1, s1
 ; GFX8-NEXT:    s_cmp_gt_i32 s4, s1
@@ -294,13 +288,13 @@ define amdgpu_ps i8 @s_ssubsat_i8(i8 inreg %lhs, i8 inreg %rhs) {
 ; GFX9-NEXT:    s_lshl_b32 s0, s0, s2
 ; GFX9-NEXT:    s_lshl_b32 s1, s1, s2
 ; GFX9-NEXT:    s_sext_i32_i16 s3, s0
-; GFX9-NEXT:    s_sext_i32_i16 s4, 0xffff
+; GFX9-NEXT:    s_sext_i32_i16 s4, -1
 ; GFX9-NEXT:    s_cmp_gt_i32 s3, s4
 ; GFX9-NEXT:    s_cselect_b32 s5, s3, s4
 ; GFX9-NEXT:    s_sub_i32 s5, s5, 0x7fff
 ; GFX9-NEXT:    s_cmp_lt_i32 s3, s4
 ; GFX9-NEXT:    s_cselect_b32 s3, s3, s4
-; GFX9-NEXT:    s_sub_i32 s3, s3, 0x8000
+; GFX9-NEXT:    s_sub_i32 s3, s3, 0xffff8000
 ; GFX9-NEXT:    s_sext_i32_i16 s4, s5
 ; GFX9-NEXT:    s_sext_i32_i16 s1, s1
 ; GFX9-NEXT:    s_cmp_gt_i32 s4, s1
@@ -317,7 +311,7 @@ define amdgpu_ps i8 @s_ssubsat_i8(i8 inreg %lhs, i8 inreg %rhs) {
 ; GFX10-LABEL: s_ssubsat_i8:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_bfe_u32 s2, 8, 0x100000
-; GFX10-NEXT:    s_sext_i32_i16 s4, 0xffff
+; GFX10-NEXT:    s_sext_i32_i16 s4, -1
 ; GFX10-NEXT:    s_lshl_b32 s0, s0, s2
 ; GFX10-NEXT:    s_lshl_b32 s1, s1, s2
 ; GFX10-NEXT:    s_sext_i32_i16 s3, s0
@@ -329,7 +323,7 @@ define amdgpu_ps i8 @s_ssubsat_i8(i8 inreg %lhs, i8 inreg %rhs) {
 ; GFX10-NEXT:    s_cmp_lt_i32 s3, s4
 ; GFX10-NEXT:    s_cselect_b32 s3, s3, s4
 ; GFX10-NEXT:    s_sext_i32_i16 s4, s5
-; GFX10-NEXT:    s_sub_i32 s3, s3, 0x8000
+; GFX10-NEXT:    s_sub_i32 s3, s3, 0xffff8000
 ; GFX10-NEXT:    s_cmp_gt_i32 s4, s1
 ; GFX10-NEXT:    s_sext_i32_i16 s3, s3
 ; GFX10-NEXT:    s_cselect_b32 s1, s4, s1
@@ -355,7 +349,7 @@ define i16 @v_ssubsat_v2i8(i16 %lhs.arg, i16 %rhs.arg) {
 ; GFX6-NEXT:    v_lshrrev_b32_e32 v3, 8, v1
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 24, v1
 ; GFX6-NEXT:    v_subrev_i32_e32 v4, vcc, s4, v4
-; GFX6-NEXT:    s_mov_b32 s5, 0x80000000
+; GFX6-NEXT:    s_brev_b32 s5, 1
 ; GFX6-NEXT:    v_min_i32_e32 v5, -1, v0
 ; GFX6-NEXT:    v_subrev_i32_e32 v5, vcc, s5, v5
 ; GFX6-NEXT:    v_max_i32_e32 v1, v4, v1
@@ -385,21 +379,20 @@ define i16 @v_ssubsat_v2i8(i16 %lhs.arg, i16 %rhs.arg) {
 ; GFX8-NEXT:    v_mov_b32_e32 v2, 8
 ; GFX8-NEXT:    v_lshrrev_b32_sdwa v3, v2, v0 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
 ; GFX8-NEXT:    v_lshlrev_b16_e32 v0, 8, v0
-; GFX8-NEXT:    s_mov_b32 s6, 0xffff
 ; GFX8-NEXT:    s_movk_i32 s4, 0x7fff
-; GFX8-NEXT:    v_max_i16_e32 v4, s6, v0
+; GFX8-NEXT:    v_max_i16_e32 v4, -1, v0
 ; GFX8-NEXT:    v_lshrrev_b32_sdwa v2, v2, v1 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
 ; GFX8-NEXT:    v_lshlrev_b16_e32 v1, 8, v1
 ; GFX8-NEXT:    v_subrev_u16_e32 v4, s4, v4
-; GFX8-NEXT:    s_mov_b32 s5, 0x8000
-; GFX8-NEXT:    v_min_i16_e32 v5, s6, v0
+; GFX8-NEXT:    s_movk_i32 s5, 0x8000
+; GFX8-NEXT:    v_min_i16_e32 v5, -1, v0
 ; GFX8-NEXT:    v_max_i16_e32 v1, v4, v1
 ; GFX8-NEXT:    v_subrev_u16_e32 v5, s5, v5
 ; GFX8-NEXT:    v_min_i16_e32 v1, v1, v5
 ; GFX8-NEXT:    v_sub_u16_e32 v0, v0, v1
-; GFX8-NEXT:    v_max_i16_e32 v1, s6, v3
+; GFX8-NEXT:    v_max_i16_e32 v1, -1, v3
 ; GFX8-NEXT:    v_subrev_u16_e32 v1, s4, v1
-; GFX8-NEXT:    v_min_i16_e32 v4, s6, v3
+; GFX8-NEXT:    v_min_i16_e32 v4, -1, v3
 ; GFX8-NEXT:    v_max_i16_e32 v1, v1, v2
 ; GFX8-NEXT:    v_subrev_u16_e32 v4, s5, v4
 ; GFX8-NEXT:    v_min_i16_e32 v1, v1, v4
@@ -416,21 +409,20 @@ define i16 @v_ssubsat_v2i8(i16 %lhs.arg, i16 %rhs.arg) {
 ; GFX9-NEXT:    s_mov_b32 s4, 8
 ; GFX9-NEXT:    v_lshrrev_b32_sdwa v2, s4, v0 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
 ; GFX9-NEXT:    v_lshlrev_b16_e32 v0, 8, v0
-; GFX9-NEXT:    s_mov_b32 s6, 0xffff
 ; GFX9-NEXT:    v_lshrrev_b32_sdwa v3, s4, v1 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
 ; GFX9-NEXT:    s_movk_i32 s4, 0x7fff
-; GFX9-NEXT:    v_max_i16_e32 v4, s6, v0
+; GFX9-NEXT:    v_max_i16_e32 v4, -1, v0
 ; GFX9-NEXT:    v_lshlrev_b16_e32 v1, 8, v1
 ; GFX9-NEXT:    v_subrev_u16_e32 v4, s4, v4
-; GFX9-NEXT:    s_mov_b32 s5, 0x8000
-; GFX9-NEXT:    v_min_i16_e32 v5, s6, v0
+; GFX9-NEXT:    s_movk_i32 s5, 0x8000
+; GFX9-NEXT:    v_min_i16_e32 v5, -1, v0
 ; GFX9-NEXT:    v_max_i16_e32 v1, v4, v1
 ; GFX9-NEXT:    v_subrev_u16_e32 v5, s5, v5
 ; GFX9-NEXT:    v_min_i16_e32 v1, v1, v5
 ; GFX9-NEXT:    v_sub_u16_e32 v0, v0, v1
-; GFX9-NEXT:    v_max_i16_e32 v1, s6, v2
+; GFX9-NEXT:    v_max_i16_e32 v1, -1, v2
 ; GFX9-NEXT:    v_subrev_u16_e32 v1, s4, v1
-; GFX9-NEXT:    v_min_i16_e32 v4, s6, v2
+; GFX9-NEXT:    v_min_i16_e32 v4, -1, v2
 ; GFX9-NEXT:    v_subrev_u16_e32 v4, s5, v4
 ; GFX9-NEXT:    v_max_i16_e32 v1, v1, v3
 ; GFX9-NEXT:    v_min_i16_e32 v1, v1, v4
@@ -448,23 +440,22 @@ define i16 @v_ssubsat_v2i8(i16 %lhs.arg, i16 %rhs.arg) {
 ; GFX10-NEXT:    s_mov_b32 s4, 8
 ; GFX10-NEXT:    v_lshlrev_b16_e64 v2, 8, v0
 ; GFX10-NEXT:    v_lshrrev_b32_sdwa v0, s4, v0 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX10-NEXT:    s_mov_b32 s6, 0xffff
 ; GFX10-NEXT:    s_movk_i32 s5, 0x7fff
 ; GFX10-NEXT:    v_lshrrev_b32_sdwa v3, s4, v1 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX10-NEXT:    v_max_i16_e64 v4, v2, s6
-; GFX10-NEXT:    v_max_i16_e64 v5, v0, s6
 ; GFX10-NEXT:    v_lshlrev_b16_e64 v1, 8, v1
-; GFX10-NEXT:    v_min_i16_e64 v6, v2, s6
-; GFX10-NEXT:    v_min_i16_e64 v7, v0, s6
+; GFX10-NEXT:    v_max_i16_e64 v4, v2, -1
+; GFX10-NEXT:    v_max_i16_e64 v5, v0, -1
+; GFX10-NEXT:    v_min_i16_e64 v6, v2, -1
+; GFX10-NEXT:    v_min_i16_e64 v7, v0, -1
+; GFX10-NEXT:    s_movk_i32 s4, 0x8000
 ; GFX10-NEXT:    v_sub_nc_u16_e64 v4, v4, s5
 ; GFX10-NEXT:    v_sub_nc_u16_e64 v5, v5, s5
-; GFX10-NEXT:    s_mov_b32 s4, 0x8000
-; GFX10-NEXT:    ; implicit-def: $vcc_hi
 ; GFX10-NEXT:    v_sub_nc_u16_e64 v6, v6, s4
-; GFX10-NEXT:    v_max_i16_e64 v1, v4, v1
 ; GFX10-NEXT:    v_sub_nc_u16_e64 v7, v7, s4
-; GFX10-NEXT:    v_max_i16_e64 v10, v5, v3
 ; GFX10-NEXT:    s_movk_i32 s4, 0xff
+; GFX10-NEXT:    v_max_i16_e64 v1, v4, v1
+; GFX10-NEXT:    v_max_i16_e64 v10, v5, v3
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
 ; GFX10-NEXT:    v_min_i16_e64 v1, v1, v6
 ; GFX10-NEXT:    v_min_i16_e64 v3, v10, v7
 ; GFX10-NEXT:    v_sub_nc_u16_e64 v1, v2, v1
@@ -492,7 +483,7 @@ define amdgpu_ps i16 @s_ssubsat_v2i8(i16 inreg %lhs.arg, i16 inreg %rhs.arg) {
 ; GFX6-NEXT:    s_cselect_b32 s6, s0, -1
 ; GFX6-NEXT:    s_sub_i32 s6, s6, s4
 ; GFX6-NEXT:    s_cmp_lt_i32 s0, -1
-; GFX6-NEXT:    s_mov_b32 s5, 0x80000000
+; GFX6-NEXT:    s_brev_b32 s5, 1
 ; GFX6-NEXT:    s_cselect_b32 s7, s0, -1
 ; GFX6-NEXT:    s_sub_i32 s7, s7, s5
 ; GFX6-NEXT:    s_cmp_gt_i32 s6, s1
@@ -530,13 +521,13 @@ define amdgpu_ps i16 @s_ssubsat_v2i8(i16 inreg %lhs.arg, i16 inreg %rhs.arg) {
 ; GFX8-NEXT:    s_lshr_b32 s3, s1, 8
 ; GFX8-NEXT:    s_lshl_b32 s1, s1, s4
 ; GFX8-NEXT:    s_sext_i32_i16 s7, s0
-; GFX8-NEXT:    s_sext_i32_i16 s8, 0xffff
+; GFX8-NEXT:    s_sext_i32_i16 s8, -1
 ; GFX8-NEXT:    s_cmp_gt_i32 s7, s8
 ; GFX8-NEXT:    s_movk_i32 s5, 0x7fff
 ; GFX8-NEXT:    s_cselect_b32 s9, s7, s8
 ; GFX8-NEXT:    s_sub_i32 s9, s9, s5
 ; GFX8-NEXT:    s_cmp_lt_i32 s7, s8
-; GFX8-NEXT:    s_mov_b32 s6, 0x8000
+; GFX8-NEXT:    s_movk_i32 s6, 0x8000
 ; GFX8-NEXT:    s_cselect_b32 s7, s7, s8
 ; GFX8-NEXT:    s_sub_i32 s7, s7, s6
 ; GFX8-NEXT:    s_sext_i32_i16 s9, s9
@@ -585,13 +576,13 @@ define amdgpu_ps i16 @s_ssubsat_v2i8(i16 inreg %lhs.arg, i16 inreg %rhs.arg) {
 ; GFX9-NEXT:    s_lshr_b32 s3, s1, 8
 ; GFX9-NEXT:    s_lshl_b32 s1, s1, s4
 ; GFX9-NEXT:    s_sext_i32_i16 s7, s0
-; GFX9-NEXT:    s_sext_i32_i16 s8, 0xffff
+; GFX9-NEXT:    s_sext_i32_i16 s8, -1
 ; GFX9-NEXT:    s_cmp_gt_i32 s7, s8
 ; GFX9-NEXT:    s_movk_i32 s5, 0x7fff
 ; GFX9-NEXT:    s_cselect_b32 s9, s7, s8
 ; GFX9-NEXT:    s_sub_i32 s9, s9, s5
 ; GFX9-NEXT:    s_cmp_lt_i32 s7, s8
-; GFX9-NEXT:    s_mov_b32 s6, 0x8000
+; GFX9-NEXT:    s_movk_i32 s6, 0x8000
 ; GFX9-NEXT:    s_cselect_b32 s7, s7, s8
 ; GFX9-NEXT:    s_sub_i32 s7, s7, s6
 ; GFX9-NEXT:    s_sext_i32_i16 s9, s9
@@ -637,14 +628,14 @@ define amdgpu_ps i16 @s_ssubsat_v2i8(i16 inreg %lhs.arg, i16 inreg %rhs.arg) {
 ; GFX10-NEXT:    s_bfe_u32 s2, 8, 0x100000
 ; GFX10-NEXT:    s_lshr_b32 s3, s0, 8
 ; GFX10-NEXT:    s_lshl_b32 s0, s0, s2
-; GFX10-NEXT:    s_sext_i32_i16 s6, 0xffff
+; GFX10-NEXT:    s_sext_i32_i16 s6, -1
 ; GFX10-NEXT:    s_sext_i32_i16 s5, s0
 ; GFX10-NEXT:    s_lshr_b32 s4, s1, 8
 ; GFX10-NEXT:    s_lshl_b32 s1, s1, s2
 ; GFX10-NEXT:    s_cmp_gt_i32 s5, s6
 ; GFX10-NEXT:    s_movk_i32 s7, 0x7fff
 ; GFX10-NEXT:    s_cselect_b32 s8, s5, s6
-; GFX10-NEXT:    s_mov_b32 s9, 0x8000
+; GFX10-NEXT:    s_movk_i32 s9, 0x8000
 ; GFX10-NEXT:    s_sub_i32 s8, s8, s7
 ; GFX10-NEXT:    s_cmp_lt_i32 s5, s6
 ; GFX10-NEXT:    s_sext_i32_i16 s8, s8
@@ -709,7 +700,7 @@ define i32 @v_ssubsat_v4i8(i32 %lhs.arg, i32 %rhs.arg) {
 ; GFX6-NEXT:    v_lshrrev_b32_e32 v7, 24, v1
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 24, v1
 ; GFX6-NEXT:    v_subrev_i32_e32 v8, vcc, s4, v8
-; GFX6-NEXT:    s_mov_b32 s5, 0x80000000
+; GFX6-NEXT:    s_brev_b32 s5, 1
 ; GFX6-NEXT:    v_min_i32_e32 v10, -1, v0
 ; GFX6-NEXT:    v_subrev_i32_e32 v10, vcc, s5, v10
 ; GFX6-NEXT:    v_max_i32_e32 v1, v8, v1
@@ -736,7 +727,7 @@ define i32 @v_ssubsat_v4i8(i32 %lhs.arg, i32 %rhs.arg) {
 ; GFX6-NEXT:    v_sub_i32_e32 v2, vcc, v2, v3
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v3, 24, v4
 ; GFX6-NEXT:    v_max_i32_e32 v5, -1, v3
-; GFX6-NEXT:    v_mov_b32_e32 v11, 0x80000000
+; GFX6-NEXT:    v_bfrev_b32_e32 v11, 1
 ; GFX6-NEXT:    v_min_i32_e32 v6, -1, v3
 ; GFX6-NEXT:    v_ashrrev_i32_e32 v1, 24, v1
 ; GFX6-NEXT:    s_movk_i32 s4, 0xff
@@ -769,43 +760,41 @@ define i32 @v_ssubsat_v4i8(i32 %lhs.arg, i32 %rhs.arg) {
 ; GFX8-NEXT:    v_lshrrev_b32_e32 v4, 16, v0
 ; GFX8-NEXT:    v_lshrrev_b32_e32 v5, 24, v0
 ; GFX8-NEXT:    v_lshlrev_b16_e32 v0, 8, v0
-; GFX8-NEXT:    s_mov_b32 s6, 0xffff
 ; GFX8-NEXT:    s_movk_i32 s4, 0x7fff
-; GFX8-NEXT:    v_max_i16_e32 v9, s6, v0
+; GFX8-NEXT:    v_max_i16_e32 v8, -1, v0
 ; GFX8-NEXT:    v_lshrrev_b32_sdwa v2, v2, v1 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
 ; GFX8-NEXT:    v_lshrrev_b32_e32 v6, 16, v1
 ; GFX8-NEXT:    v_lshrrev_b32_e32 v7, 24, v1
 ; GFX8-NEXT:    v_lshlrev_b16_e32 v1, 8, v1
-; GFX8-NEXT:    v_subrev_u16_e32 v9, s4, v9
-; GFX8-NEXT:    s_mov_b32 s5, 0x8000
-; GFX8-NEXT:    v_min_i16_e32 v11, s6, v0
-; GFX8-NEXT:    v_max_i16_e32 v1, v9, v1
-; GFX8-NEXT:    v_subrev_u16_e32 v11, s5, v11
-; GFX8-NEXT:    v_min_i16_e32 v1, v1, v11
+; GFX8-NEXT:    v_subrev_u16_e32 v8, s4, v8
+; GFX8-NEXT:    s_movk_i32 s5, 0x8000
+; GFX8-NEXT:    v_min_i16_e32 v10, -1, v0
+; GFX8-NEXT:    v_max_i16_e32 v1, v8, v1
+; GFX8-NEXT:    v_subrev_u16_e32 v10, s5, v10
+; GFX8-NEXT:    v_min_i16_e32 v1, v1, v10
 ; GFX8-NEXT:    v_sub_u16_e32 v0, v0, v1
-; GFX8-NEXT:    v_max_i16_e32 v1, s6, v3
+; GFX8-NEXT:    v_max_i16_e32 v1, -1, v3
 ; GFX8-NEXT:    v_subrev_u16_e32 v1, s4, v1
-; GFX8-NEXT:    v_min_i16_e32 v9, s6, v3
+; GFX8-NEXT:    v_min_i16_e32 v8, -1, v3
 ; GFX8-NEXT:    v_max_i16_e32 v1, v1, v2
-; GFX8-NEXT:    v_subrev_u16_e32 v9, s5, v9
-; GFX8-NEXT:    v_mov_b32_e32 v8, 0xffff
 ; GFX8-NEXT:    v_lshlrev_b16_e32 v2, 8, v4
-; GFX8-NEXT:    v_min_i16_e32 v1, v1, v9
-; GFX8-NEXT:    v_mov_b32_e32 v10, 0x7fff
-; GFX8-NEXT:    v_max_i16_e32 v4, v2, v8
+; GFX8-NEXT:    v_subrev_u16_e32 v8, s5, v8
+; GFX8-NEXT:    v_min_i16_e32 v1, v1, v8
+; GFX8-NEXT:    v_mov_b32_e32 v9, 0x7fff
+; GFX8-NEXT:    v_max_i16_e32 v4, -1, v2
 ; GFX8-NEXT:    v_sub_u16_e32 v1, v3, v1
 ; GFX8-NEXT:    v_lshlrev_b16_e32 v3, 8, v6
-; GFX8-NEXT:    v_min_i16_e32 v6, v2, v8
-; GFX8-NEXT:    v_sub_u16_e32 v4, v4, v10
+; GFX8-NEXT:    v_min_i16_e32 v6, -1, v2
+; GFX8-NEXT:    v_sub_u16_e32 v4, v4, v9
 ; GFX8-NEXT:    v_max_i16_e32 v3, v4, v3
 ; GFX8-NEXT:    v_subrev_u16_e32 v6, s5, v6
 ; GFX8-NEXT:    v_min_i16_e32 v3, v3, v6
 ; GFX8-NEXT:    v_sub_u16_e32 v2, v2, v3
 ; GFX8-NEXT:    v_lshlrev_b16_e32 v3, 8, v5
-; GFX8-NEXT:    v_max_i16_e32 v5, v3, v8
-; GFX8-NEXT:    v_min_i16_e32 v6, v3, v8
+; GFX8-NEXT:    v_max_i16_e32 v5, -1, v3
+; GFX8-NEXT:    v_min_i16_e32 v6, -1, v3
 ; GFX8-NEXT:    v_lshlrev_b16_e32 v4, 8, v7
-; GFX8-NEXT:    v_sub_u16_e32 v5, v5, v10
+; GFX8-NEXT:    v_sub_u16_e32 v5, v5, v9
 ; GFX8-NEXT:    v_subrev_u16_e32 v6, 0x8000, v6
 ; GFX8-NEXT:    v_max_i16_e32 v4, v5, v4
 ; GFX8-NEXT:    v_min_i16_e32 v4, v4, v6
@@ -829,43 +818,41 @@ define i32 @v_ssubsat_v4i8(i32 %lhs.arg, i32 %rhs.arg) {
 ; GFX9-NEXT:    v_lshrrev_b32_e32 v3, 16, v0
 ; GFX9-NEXT:    v_lshrrev_b32_e32 v4, 24, v0
 ; GFX9-NEXT:    v_lshlrev_b16_e32 v0, 8, v0
-; GFX9-NEXT:    s_mov_b32 s6, 0xffff
 ; GFX9-NEXT:    v_lshrrev_b32_sdwa v5, s4, v1 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
 ; GFX9-NEXT:    s_movk_i32 s4, 0x7fff
-; GFX9-NEXT:    v_max_i16_e32 v9, s6, v0
+; GFX9-NEXT:    v_max_i16_e32 v8, -1, v0
 ; GFX9-NEXT:    v_lshrrev_b32_e32 v6, 16, v1
 ; GFX9-NEXT:    v_lshrrev_b32_e32 v7, 24, v1
 ; GFX9-NEXT:    v_lshlrev_b16_e32 v1, 8, v1
-; GFX9-NEXT:    v_subrev_u16_e32 v9, s4, v9
-; GFX9-NEXT:    s_mov_b32 s5, 0x8000
-; GFX9-NEXT:    v_min_i16_e32 v11, s6, v0
-; GFX9-NEXT:    v_max_i16_e32 v1, v9, v1
-; GFX9-NEXT:    v_subrev_u16_e32 v11, s5, v11
-; GFX9-NEXT:    v_min_i16_e32 v1, v1, v11
+; GFX9-NEXT:    v_subrev_u16_e32 v8, s4, v8
+; GFX9-NEXT:    s_movk_i32 s5, 0x8000
+; GFX9-NEXT:    v_min_i16_e32 v10, -1, v0
+; GFX9-NEXT:    v_max_i16_e32 v1, v8, v1
+; GFX9-NEXT:    v_subrev_u16_e32 v10, s5, v10
+; GFX9-NEXT:    v_min_i16_e32 v1, v1, v10
 ; GFX9-NEXT:    v_sub_u16_e32 v0, v0, v1
-; GFX9-NEXT:    v_max_i16_e32 v1, s6, v2
+; GFX9-NEXT:    v_max_i16_e32 v1, -1, v2
 ; GFX9-NEXT:    v_subrev_u16_e32 v1, s4, v1
-; GFX9-NEXT:    v_min_i16_e32 v9, s6, v2
-; GFX9-NEXT:    v_subrev_u16_e32 v9, s5, v9
+; GFX9-NEXT:    v_min_i16_e32 v8, -1, v2
+; GFX9-NEXT:    v_subrev_u16_e32 v8, s5, v8
 ; GFX9-NEXT:    v_max_i16_e32 v1, v1, v5
-; GFX9-NEXT:    v_min_i16_e32 v1, v1, v9
+; GFX9-NEXT:    v_min_i16_e32 v1, v1, v8
 ; GFX9-NEXT:    v_sub_u16_e32 v1, v2, v1
 ; GFX9-NEXT:    v_lshlrev_b16_e32 v2, 8, v3
-; GFX9-NEXT:    v_mov_b32_e32 v8, 0xffff
-; GFX9-NEXT:    v_mov_b32_e32 v10, 0x7fff
-; GFX9-NEXT:    v_max_i16_e32 v5, v2, v8
+; GFX9-NEXT:    v_mov_b32_e32 v9, 0x7fff
+; GFX9-NEXT:    v_max_i16_e32 v5, -1, v2
 ; GFX9-NEXT:    v_lshlrev_b16_e32 v3, 8, v6
-; GFX9-NEXT:    v_min_i16_e32 v6, v2, v8
-; GFX9-NEXT:    v_sub_u16_e32 v5, v5, v10
+; GFX9-NEXT:    v_min_i16_e32 v6, -1, v2
+; GFX9-NEXT:    v_sub_u16_e32 v5, v5, v9
 ; GFX9-NEXT:    v_subrev_u16_e32 v6, s5, v6
 ; GFX9-NEXT:    v_max_i16_e32 v3, v5, v3
 ; GFX9-NEXT:    v_min_i16_e32 v3, v3, v6
 ; GFX9-NEXT:    v_sub_u16_e32 v2, v2, v3
 ; GFX9-NEXT:    v_lshlrev_b16_e32 v3, 8, v4
-; GFX9-NEXT:    v_max_i16_e32 v5, v3, v8
-; GFX9-NEXT:    v_min_i16_e32 v6, v3, v8
+; GFX9-NEXT:    v_max_i16_e32 v5, -1, v3
+; GFX9-NEXT:    v_min_i16_e32 v6, -1, v3
 ; GFX9-NEXT:    v_lshlrev_b16_e32 v4, 8, v7
-; GFX9-NEXT:    v_sub_u16_e32 v5, v5, v10
+; GFX9-NEXT:    v_sub_u16_e32 v5, v5, v9
 ; GFX9-NEXT:    s_movk_i32 s4, 0xff
 ; GFX9-NEXT:    v_subrev_u16_e32 v6, 0x8000, v6
 ; GFX9-NEXT:    v_max_i16_e32 v4, v5, v4
@@ -884,59 +871,57 @@ define i32 @v_ssubsat_v4i8(i32 %lhs.arg, i32 %rhs.arg) {
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX10-NEXT:    v_lshlrev_b16_e64 v4, 8, v0
 ; GFX10-NEXT:    s_mov_b32 s4, 8
-; GFX10-NEXT:    s_mov_b32 s5, 16
-; GFX10-NEXT:    s_mov_b32 s6, 24
+; GFX10-NEXT:    v_lshlrev_b16_e64 v7, 8, v1
 ; GFX10-NEXT:    v_lshrrev_b32_sdwa v2, s4, v0 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX10-NEXT:    v_lshrrev_b32_sdwa v3, s5, v0 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX10-NEXT:    v_lshrrev_b32_sdwa v19, s6, v0 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX10-NEXT:    v_lshlrev_b16_e64 v0, 8, v0
 ; GFX10-NEXT:    v_lshrrev_b32_sdwa v5, s4, v1 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX10-NEXT:    s_mov_b32 s4, 0xffff
+; GFX10-NEXT:    v_max_i16_e64 v8, v4, -1
+; GFX10-NEXT:    s_movk_i32 s4, 0x7fff
+; GFX10-NEXT:    v_min_i16_e64 v10, v4, -1
+; GFX10-NEXT:    v_max_i16_e64 v9, v2, -1
+; GFX10-NEXT:    s_mov_b32 s5, 16
+; GFX10-NEXT:    v_sub_nc_u16_e64 v8, v8, s4
+; GFX10-NEXT:    v_lshrrev_b32_sdwa v3, s5, v0 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX10-NEXT:    s_mov_b32 s6, 24
 ; GFX10-NEXT:    v_lshrrev_b32_sdwa v6, s5, v1 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX10-NEXT:    s_movk_i32 s5, 0x7fff
-; GFX10-NEXT:    v_max_i16_e64 v8, v0, s4
-; GFX10-NEXT:    v_lshrrev_b32_sdwa v7, s6, v1 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX10-NEXT:    v_max_i16_e64 v9, v2, s4
-; GFX10-NEXT:    v_lshlrev_b16_e64 v1, 8, v1
-; GFX10-NEXT:    v_min_i16_e64 v10, v0, s4
-; GFX10-NEXT:    v_sub_nc_u16_e64 v8, v8, s5
-; GFX10-NEXT:    s_mov_b32 s6, 0x8000
-; GFX10-NEXT:    v_sub_nc_u16_e64 v15, v9, s5
-; GFX10-NEXT:    v_min_i16_e64 v11, v2, s4
-; GFX10-NEXT:    v_mov_b32_e32 v12, 0xffff
-; GFX10-NEXT:    v_max_i16_e64 v1, v8, v1
-; GFX10-NEXT:    v_sub_nc_u16_e64 v10, v10, s6
+; GFX10-NEXT:    s_movk_i32 s5, 0x8000
+; GFX10-NEXT:    v_sub_nc_u16_e64 v15, v9, s4
+; GFX10-NEXT:    v_lshrrev_b32_sdwa v0, s6, v0 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX10-NEXT:    v_min_i16_e64 v11, v2, -1
+; GFX10-NEXT:    v_max_i16_e64 v7, v8, v7
+; GFX10-NEXT:    v_sub_nc_u16_e64 v10, v10, s5
 ; GFX10-NEXT:    v_max_i16_e64 v5, v15, v5
-; GFX10-NEXT:    v_sub_nc_u16_e64 v8, v11, s6
 ; GFX10-NEXT:    v_mov_b32_e32 v9, 0x7fff
-; GFX10-NEXT:    v_max_i16_e64 v11, v3, v12
-; GFX10-NEXT:    v_min_i16_e64 v1, v1, v10
-; GFX10-NEXT:    v_max_i16_e64 v10, v19, v12
+; GFX10-NEXT:    v_sub_nc_u16_e64 v8, v11, s5
+; GFX10-NEXT:    v_max_i16_e64 v11, v3, -1
+; GFX10-NEXT:    v_min_i16_e64 v7, v7, v10
+; GFX10-NEXT:    v_max_i16_e64 v10, v0, -1
+; GFX10-NEXT:    v_lshrrev_b32_sdwa v1, s6, v1 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
 ; GFX10-NEXT:    v_min_i16_e64 v5, v5, v8
-; GFX10-NEXT:    v_min_i16_e64 v8, v3, v12
 ; GFX10-NEXT:    v_sub_nc_u16_e64 v11, v11, v9
-; GFX10-NEXT:    v_min_i16_e64 v12, v19, v12
-; GFX10-NEXT:    v_sub_nc_u16_e64 v9, v10, v9
+; GFX10-NEXT:    v_min_i16_e64 v8, v3, -1
+; GFX10-NEXT:    v_sub_nc_u16_e64 v15, v10, v9
+; GFX10-NEXT:    v_min_i16_e64 v12, v0, -1
 ; GFX10-NEXT:    v_sub_nc_u16_e64 v2, v2, v5
-; GFX10-NEXT:    v_sub_nc_u16_e64 v5, v8, s6
 ; GFX10-NEXT:    v_max_i16_e64 v6, v11, v6
+; GFX10-NEXT:    v_sub_nc_u16_e64 v5, v8, s5
+; GFX10-NEXT:    v_max_i16_e64 v1, v15, v1
 ; GFX10-NEXT:    v_sub_nc_u16_e64 v8, v12, 0x8000
-; GFX10-NEXT:    v_max_i16_e64 v7, v9, v7
 ; GFX10-NEXT:    s_movk_i32 s4, 0xff
-; GFX10-NEXT:    v_sub_nc_u16_e64 v0, v0, v1
-; GFX10-NEXT:    v_and_b32_sdwa v1, sext(v2), s4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD
-; GFX10-NEXT:    v_min_i16_e64 v2, v6, v5
-; GFX10-NEXT:    v_min_i16_e64 v5, v7, v8
+; GFX10-NEXT:    v_sub_nc_u16_e64 v4, v4, v7
+; GFX10-NEXT:    v_and_b32_sdwa v2, sext(v2), s4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD
+; GFX10-NEXT:    v_min_i16_e64 v5, v6, v5
+; GFX10-NEXT:    v_min_i16_e64 v1, v1, v8
 ; GFX10-NEXT:    ; implicit-def: $vcc_hi
-; GFX10-NEXT:    v_ashrrev_i16_e64 v0, 8, v0
-; GFX10-NEXT:    v_lshlrev_b32_e32 v1, 8, v1
-; GFX10-NEXT:    v_sub_nc_u16_e64 v2, v3, v2
-; GFX10-NEXT:    v_sub_nc_u16_e64 v3, v19, v5
-; GFX10-NEXT:    v_and_or_b32 v0, v0, s4, v1
-; GFX10-NEXT:    v_and_b32_sdwa v1, sext(v2), s4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD
-; GFX10-NEXT:    v_and_b32_sdwa v2, sext(v3), s4 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD
-; GFX10-NEXT:    v_or3_b32 v0, v0, v1, v2
+; GFX10-NEXT:    v_ashrrev_i16_e64 v4, 8, v4
+; GFX10-NEXT:    v_lshlrev_b32_e32 v2, 8, v2
+; GFX10-NEXT:    v_sub_nc_u16_e64 v3, v3, v5
+; GFX10-NEXT:    v_sub_nc_u16_e64 v0, v0, v1
+; GFX10-NEXT:    v_and_or_b32 v1, v4, s4, v2
+; GFX10-NEXT:    v_and_b32_sdwa v2, sext(v3), s4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD
+; GFX10-NEXT:    v_and_b32_sdwa v0, sext(v0), s4 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD
+; GFX10-NEXT:    v_or3_b32 v0, v1, v2, v0
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
   %lhs = bitcast i32 %lhs.arg to <4 x i8>
   %rhs = bitcast i32 %rhs.arg to <4 x i8>
@@ -961,7 +946,7 @@ define amdgpu_ps i32 @s_ssubsat_v4i8(i32 inreg %lhs.arg, i32 inreg %rhs.arg) {
 ; GFX6-NEXT:    s_cselect_b32 s10, s0, -1
 ; GFX6-NEXT:    s_sub_i32 s10, s10, s8
 ; GFX6-NEXT:    s_cmp_lt_i32 s0, -1
-; GFX6-NEXT:    s_mov_b32 s9, 0x80000000
+; GFX6-NEXT:    s_brev_b32 s9, 1
 ; GFX6-NEXT:    s_cselect_b32 s11, s0, -1
 ; GFX6-NEXT:    s_sub_i32 s11, s11, s9
 ; GFX6-NEXT:    s_cmp_gt_i32 s10, s1
@@ -1037,13 +1022,13 @@ define amdgpu_ps i32 @s_ssubsat_v4i8(i32 inreg %lhs.arg, i32 inreg %rhs.arg) {
 ; GFX8-NEXT:    s_lshr_b32 s7, s1, 24
 ; GFX8-NEXT:    s_lshl_b32 s1, s1, s8
 ; GFX8-NEXT:    s_sext_i32_i16 s11, s0
-; GFX8-NEXT:    s_sext_i32_i16 s12, 0xffff
+; GFX8-NEXT:    s_sext_i32_i16 s12, -1
 ; GFX8-NEXT:    s_cmp_gt_i32 s11, s12
 ; GFX8-NEXT:    s_movk_i32 s9, 0x7fff
 ; GFX8-NEXT:    s_cselect_b32 s13, s11, s12
 ; GFX8-NEXT:    s_sub_i32 s13, s13, s9
 ; GFX8-NEXT:    s_cmp_lt_i32 s11, s12
-; GFX8-NEXT:    s_mov_b32 s10, 0x8000
+; GFX8-NEXT:    s_movk_i32 s10, 0x8000
 ; GFX8-NEXT:    s_cselect_b32 s11, s11, s12
 ; GFX8-NEXT:    s_sub_i32 s11, s11, s10
 ; GFX8-NEXT:    s_sext_i32_i16 s13, s13
@@ -1142,13 +1127,13 @@ define amdgpu_ps i32 @s_ssubsat_v4i8(i32 inreg %lhs.arg, i32 inreg %rhs.arg) {
 ; GFX9-NEXT:    s_lshr_b32 s7, s1, 24
 ; GFX9-NEXT:    s_lshl_b32 s1, s1, s8
 ; GFX9-NEXT:    s_sext_i32_i16 s11, s0
-; GFX9-NEXT:    s_sext_i32_i16 s12, 0xffff
+; GFX9-NEXT:    s_sext_i32_i16 s12, -1
 ; GFX9-NEXT:    s_cmp_gt_i32 s11, s12
 ; GFX9-NEXT:    s_movk_i32 s9, 0x7fff
 ; GFX9-NEXT:    s_cselect_b32 s13, s11, s12
 ; GFX9-NEXT:    s_sub_i32 s13, s13, s9
 ; GFX9-NEXT:    s_cmp_lt_i32 s11, s12
-; GFX9-NEXT:    s_mov_b32 s10, 0x8000
+; GFX9-NEXT:    s_movk_i32 s10, 0x8000
 ; GFX9-NEXT:    s_cselect_b32 s11, s11, s12
 ; GFX9-NEXT:    s_sub_i32 s11, s11, s10
 ; GFX9-NEXT:    s_sext_i32_i16 s13, s13
@@ -1242,7 +1227,7 @@ define amdgpu_ps i32 @s_ssubsat_v4i8(i32 inreg %lhs.arg, i32 inreg %rhs.arg) {
 ; GFX10-NEXT:    s_lshr_b32 s3, s0, 16
 ; GFX10-NEXT:    s_lshr_b32 s4, s0, 24
 ; GFX10-NEXT:    s_lshl_b32 s0, s0, s6
-; GFX10-NEXT:    s_sext_i32_i16 s10, 0xffff
+; GFX10-NEXT:    s_sext_i32_i16 s10, -1
 ; GFX10-NEXT:    s_sext_i32_i16 s9, s0
 ; GFX10-NEXT:    s_lshr_b32 s5, s1, 8
 ; GFX10-NEXT:    s_lshr_b32 s7, s1, 16
@@ -1251,7 +1236,7 @@ define amdgpu_ps i32 @s_ssubsat_v4i8(i32 inreg %lhs.arg, i32 inreg %rhs.arg) {
 ; GFX10-NEXT:    s_cmp_gt_i32 s9, s10
 ; GFX10-NEXT:    s_movk_i32 s11, 0x7fff
 ; GFX10-NEXT:    s_cselect_b32 s12, s9, s10
-; GFX10-NEXT:    s_mov_b32 s13, 0x8000
+; GFX10-NEXT:    s_movk_i32 s13, 0x8000
 ; GFX10-NEXT:    s_sub_i32 s12, s12, s11
 ; GFX10-NEXT:    s_cmp_lt_i32 s9, s10
 ; GFX10-NEXT:    s_sext_i32_i16 s12, s12
@@ -1726,7 +1711,7 @@ define <2 x i32> @v_ssubsat_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
 ; GFX6-NEXT:    s_brev_b32 s4, -2
 ; GFX6-NEXT:    v_max_i32_e32 v4, -1, v0
 ; GFX6-NEXT:    v_subrev_i32_e32 v4, vcc, s4, v4
-; GFX6-NEXT:    s_mov_b32 s5, 0x80000000
+; GFX6-NEXT:    s_brev_b32 s5, 1
 ; GFX6-NEXT:    v_min_i32_e32 v5, -1, v0
 ; GFX6-NEXT:    v_max_i32_e32 v2, v4, v2
 ; GFX6-NEXT:    v_subrev_i32_e32 v5, vcc, s5, v5
@@ -1747,7 +1732,7 @@ define <2 x i32> @v_ssubsat_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
 ; GFX8-NEXT:    s_brev_b32 s4, -2
 ; GFX8-NEXT:    v_max_i32_e32 v4, -1, v0
 ; GFX8-NEXT:    v_subrev_u32_e32 v4, vcc, s4, v4
-; GFX8-NEXT:    s_mov_b32 s5, 0x80000000
+; GFX8-NEXT:    s_brev_b32 s5, 1
 ; GFX8-NEXT:    v_min_i32_e32 v5, -1, v0
 ; GFX8-NEXT:    v_max_i32_e32 v2, v4, v2
 ; GFX8-NEXT:    v_subrev_u32_e32 v5, vcc, s5, v5
@@ -1768,7 +1753,7 @@ define <2 x i32> @v_ssubsat_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
 ; GFX9-NEXT:    s_brev_b32 s4, -2
 ; GFX9-NEXT:    v_max_i32_e32 v4, -1, v0
 ; GFX9-NEXT:    v_subrev_u32_e32 v4, s4, v4
-; GFX9-NEXT:    s_mov_b32 s5, 0x80000000
+; GFX9-NEXT:    s_brev_b32 s5, 1
 ; GFX9-NEXT:    v_min_i32_e32 v5, -1, v0
 ; GFX9-NEXT:    v_max_i32_e32 v2, v4, v2
 ; GFX9-NEXT:    v_subrev_u32_e32 v5, s5, v5
@@ -1794,7 +1779,7 @@ define <2 x i32> @v_ssubsat_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
 ; GFX10-NEXT:    v_min_i32_e32 v7, -1, v1
 ; GFX10-NEXT:    v_subrev_nc_u32_e32 v4, s4, v4
 ; GFX10-NEXT:    v_subrev_nc_u32_e32 v5, s4, v5
-; GFX10-NEXT:    s_mov_b32 s4, 0x80000000
+; GFX10-NEXT:    s_brev_b32 s4, 1
 ; GFX10-NEXT:    ; implicit-def: $vcc_hi
 ; GFX10-NEXT:    v_subrev_nc_u32_e32 v6, s4, v6
 ; GFX10-NEXT:    v_max_i32_e32 v11, v4, v2
@@ -1817,7 +1802,7 @@ define amdgpu_ps <2 x i32> @s_ssubsat_v2i32(<2 x i32> inreg %lhs, <2 x i32> inre
 ; GFX6-NEXT:    s_cselect_b32 s6, s0, -1
 ; GFX6-NEXT:    s_sub_i32 s6, s6, s4
 ; GFX6-NEXT:    s_cmp_lt_i32 s0, -1
-; GFX6-NEXT:    s_mov_b32 s5, 0x80000000
+; GFX6-NEXT:    s_brev_b32 s5, 1
 ; GFX6-NEXT:    s_cselect_b32 s7, s0, -1
 ; GFX6-NEXT:    s_sub_i32 s7, s7, s5
 ; GFX6-NEXT:    s_cmp_gt_i32 s6, s2
@@ -1845,7 +1830,7 @@ define amdgpu_ps <2 x i32> @s_ssubsat_v2i32(<2 x i32> inreg %lhs, <2 x i32> inre
 ; GFX8-NEXT:    s_cselect_b32 s6, s0, -1
 ; GFX8-NEXT:    s_sub_i32 s6, s6, s4
 ; GFX8-NEXT:    s_cmp_lt_i32 s0, -1
-; GFX8-NEXT:    s_mov_b32 s5, 0x80000000
+; GFX8-NEXT:    s_brev_b32 s5, 1
 ; GFX8-NEXT:    s_cselect_b32 s7, s0, -1
 ; GFX8-NEXT:    s_sub_i32 s7, s7, s5
 ; GFX8-NEXT:    s_cmp_gt_i32 s6, s2
@@ -1873,7 +1858,7 @@ define amdgpu_ps <2 x i32> @s_ssubsat_v2i32(<2 x i32> inreg %lhs, <2 x i32> inre
 ; GFX9-NEXT:    s_cselect_b32 s6, s0, -1
 ; GFX9-NEXT:    s_sub_i32 s6, s6, s4
 ; GFX9-NEXT:    s_cmp_lt_i32 s0, -1
-; GFX9-NEXT:    s_mov_b32 s5, 0x80000000
+; GFX9-NEXT:    s_brev_b32 s5, 1
 ; GFX9-NEXT:    s_cselect_b32 s7, s0, -1
 ; GFX9-NEXT:    s_sub_i32 s7, s7, s5
 ; GFX9-NEXT:    s_cmp_gt_i32 s6, s2
@@ -1899,7 +1884,7 @@ define amdgpu_ps <2 x i32> @s_ssubsat_v2i32(<2 x i32> inreg %lhs, <2 x i32> inre
 ; GFX10-NEXT:    s_cmp_gt_i32 s0, -1
 ; GFX10-NEXT:    s_brev_b32 s4, -2
 ; GFX10-NEXT:    s_cselect_b32 s5, s0, -1
-; GFX10-NEXT:    s_mov_b32 s6, 0x80000000
+; GFX10-NEXT:    s_brev_b32 s6, 1
 ; GFX10-NEXT:    s_sub_i32 s5, s5, s4
 ; GFX10-NEXT:    s_cmp_lt_i32 s0, -1
 ; GFX10-NEXT:    ; implicit-def: $vcc_hi
@@ -1933,7 +1918,7 @@ define <3 x i32> @v_ssubsat_v3i32(<3 x i32> %lhs, <3 x i32> %rhs) {
 ; GFX6-NEXT:    s_brev_b32 s4, -2
 ; GFX6-NEXT:    v_max_i32_e32 v6, -1, v0
 ; GFX6-NEXT:    v_subrev_i32_e32 v6, vcc, s4, v6
-; GFX6-NEXT:    s_mov_b32 s5, 0x80000000
+; GFX6-NEXT:    s_brev_b32 s5, 1
 ; GFX6-NEXT:    v_min_i32_e32 v7, -1, v0
 ; GFX6-NEXT:    v_max_i32_e32 v3, v6, v3
 ; GFX6-NEXT:    v_subrev_i32_e32 v7, vcc, s5, v7
@@ -1961,7 +1946,7 @@ define <3 x i32> @v_ssubsat_v3i32(<3 x i32> %lhs, <3 x i32> %rhs) {
 ; GFX8-NEXT:    s_brev_b32 s4, -2
 ; GFX8-NEXT:    v_max_i32_e32 v6, -1, v0
 ; GFX8-NEXT:    v_subrev_u32_e32 v6, vcc, s4, v6
-; GFX8-NEXT:    s_mov_b32 s5, 0x80000000
+; GFX8-NEXT:    s_brev_b32 s5, 1
 ; GFX8-NEXT:    v_min_i32_e32 v7, -1, v0
 ; GFX8-NEXT:    v_max_i32_e32 v3, v6, v3
 ; GFX8-NEXT:    v_subrev_u32_e32 v7, vcc, s5, v7
@@ -1989,7 +1974,7 @@ define <3 x i32> @v_ssubsat_v3i32(<3 x i32> %lhs, <3 x i32> %rhs) {
 ; GFX9-NEXT:    s_brev_b32 s4, -2
 ; GFX9-NEXT:    v_max_i32_e32 v6, -1, v0
 ; GFX9-NEXT:    v_subrev_u32_e32 v6, s4, v6
-; GFX9-NEXT:    s_mov_b32 s5, 0x80000000
+; GFX9-NEXT:    s_brev_b32 s5, 1
 ; GFX9-NEXT:    v_min_i32_e32 v7, -1, v0
 ; GFX9-NEXT:    v_max_i32_e32 v3, v6, v3
 ; GFX9-NEXT:    v_subrev_u32_e32 v7, s5, v7
@@ -2025,7 +2010,7 @@ define <3 x i32> @v_ssubsat_v3i32(<3 x i32> %lhs, <3 x i32> %rhs) {
 ; GFX10-NEXT:    v_subrev_nc_u32_e32 v19, s4, v9
 ; GFX10-NEXT:    v_min_i32_e32 v10, -1, v1
 ; GFX10-NEXT:    v_min_i32_e32 v11, -1, v2
-; GFX10-NEXT:    s_mov_b32 s5, 0x80000000
+; GFX10-NEXT:    s_brev_b32 s5, 1
 ; GFX10-NEXT:    v_max_i32_e32 v14, v6, v3
 ; GFX10-NEXT:    v_subrev_nc_u32_e32 v7, s5, v7
 ; GFX10-NEXT:    v_subrev_nc_u32_e32 v6, s5, v10
@@ -2052,7 +2037,7 @@ define amdgpu_ps <3 x i32> @s_ssubsat_v3i32(<3 x i32> inreg %lhs, <3 x i32> inre
 ; GFX6-NEXT:    s_cselect_b32 s8, s0, -1
 ; GFX6-NEXT:    s_sub_i32 s8, s8, s6
 ; GFX6-NEXT:    s_cmp_lt_i32 s0, -1
-; GFX6-NEXT:    s_mov_b32 s7, 0x80000000
+; GFX6-NEXT:    s_brev_b32 s7, 1
 ; GFX6-NEXT:    s_cselect_b32 s9, s0, -1
 ; GFX6-NEXT:    s_sub_i32 s9, s9, s7
 ; GFX6-NEXT:    s_cmp_gt_i32 s8, s3
@@ -2091,7 +2076,7 @@ define amdgpu_ps <3 x i32> @s_ssubsat_v3i32(<3 x i32> inreg %lhs, <3 x i32> inre
 ; GFX8-NEXT:    s_cselect_b32 s8, s0, -1
 ; GFX8-NEXT:    s_sub_i32 s8, s8, s6
 ; GFX8-NEXT:    s_cmp_lt_i32 s0, -1
-; GFX8-NEXT:    s_mov_b32 s7, 0x80000000
+; GFX8-NEXT:    s_brev_b32 s7, 1
 ; GFX8-NEXT:    s_cselect_b32 s9, s0, -1
 ; GFX8-NEXT:    s_sub_i32 s9, s9, s7
 ; GFX8-NEXT:    s_cmp_gt_i32 s8, s3
@@ -2130,7 +2115,7 @@ define amdgpu_ps <3 x i32> @s_ssubsat_v3i32(<3 x i32> inreg %lhs, <3 x i32> inre
 ; GFX9-NEXT:    s_cselect_b32 s8, s0, -1
 ; GFX9-NEXT:    s_sub_i32 s8, s8, s6
 ; GFX9-NEXT:    s_cmp_lt_i32 s0, -1
-; GFX9-NEXT:    s_mov_b32 s7, 0x80000000
+; GFX9-NEXT:    s_brev_b32 s7, 1
 ; GFX9-NEXT:    s_cselect_b32 s9, s0, -1
 ; GFX9-NEXT:    s_sub_i32 s9, s9, s7
 ; GFX9-NEXT:    s_cmp_gt_i32 s8, s3
@@ -2167,7 +2152,7 @@ define amdgpu_ps <3 x i32> @s_ssubsat_v3i32(<3 x i32> inreg %lhs, <3 x i32> inre
 ; GFX10-NEXT:    s_cmp_gt_i32 s0, -1
 ; GFX10-NEXT:    s_brev_b32 s6, -2
 ; GFX10-NEXT:    s_cselect_b32 s7, s0, -1
-; GFX10-NEXT:    s_mov_b32 s8, 0x80000000
+; GFX10-NEXT:    s_brev_b32 s8, 1
 ; GFX10-NEXT:    s_sub_i32 s7, s7, s6
 ; GFX10-NEXT:    s_cmp_lt_i32 s0, -1
 ; GFX10-NEXT:    ; implicit-def: $vcc_hi
@@ -2212,7 +2197,7 @@ define <4 x i32> @v_ssubsat_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
 ; GFX6-NEXT:    s_brev_b32 s4, -2
 ; GFX6-NEXT:    v_max_i32_e32 v8, -1, v0
 ; GFX6-NEXT:    v_subrev_i32_e32 v8, vcc, s4, v8
-; GFX6-NEXT:    s_mov_b32 s5, 0x80000000
+; GFX6-NEXT:    s_brev_b32 s5, 1
 ; GFX6-NEXT:    v_min_i32_e32 v9, -1, v0
 ; GFX6-NEXT:    v_max_i32_e32 v4, v8, v4
 ; GFX6-NEXT:    v_subrev_i32_e32 v9, vcc, s5, v9
@@ -2247,7 +2232,7 @@ define <4 x i32> @v_ssubsat_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
 ; GFX8-NEXT:    s_brev_b32 s4, -2
 ; GFX8-NEXT:    v_max_i32_e32 v8, -1, v0
 ; GFX8-NEXT:    v_subrev_u32_e32 v8, vcc, s4, v8
-; GFX8-NEXT:    s_mov_b32 s5, 0x80000000
+; GFX8-NEXT:    s_brev_b32 s5, 1
 ; GFX8-NEXT:    v_min_i32_e32 v9, -1, v0
 ; GFX8-NEXT:    v_max_i32_e32 v4, v8, v4
 ; GFX8-NEXT:    v_subrev_u32_e32 v9, vcc, s5, v9
@@ -2282,7 +2267,7 @@ define <4 x i32> @v_ssubsat_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
 ; GFX9-NEXT:    s_brev_b32 s4, -2
 ; GFX9-NEXT:    v_max_i32_e32 v8, -1, v0
 ; GFX9-NEXT:    v_subrev_u32_e32 v8, s4, v8
-; GFX9-NEXT:    s_mov_b32 s5, 0x80000000
+; GFX9-NEXT:    s_brev_b32 s5, 1
 ; GFX9-NEXT:    v_min_i32_e32 v9, -1, v0
 ; GFX9-NEXT:    v_max_i32_e32 v4, v8, v4
 ; GFX9-NEXT:    v_subrev_u32_e32 v9, s5, v9
@@ -2328,7 +2313,7 @@ define <4 x i32> @v_ssubsat_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
 ; GFX10-NEXT:    v_min_i32_e32 v13, -1, v2
 ; GFX10-NEXT:    v_subrev_nc_u32_e32 v8, s4, v8
 ; GFX10-NEXT:    v_min_i32_e32 v14, -1, v3
-; GFX10-NEXT:    s_mov_b32 s5, 0x80000000
+; GFX10-NEXT:    s_brev_b32 s5, 1
 ; GFX10-NEXT:    v_max_i32_e32 v4, v15, v4
 ; GFX10-NEXT:    v_subrev_nc_u32_e32 v9, s5, v9
 ; GFX10-NEXT:    v_max_i32_e32 v5, v10, v5
@@ -2359,7 +2344,7 @@ define amdgpu_ps <4 x i32> @s_ssubsat_v4i32(<4 x i32> inreg %lhs, <4 x i32> inre
 ; GFX6-NEXT:    s_cselect_b32 s10, s0, -1
 ; GFX6-NEXT:    s_sub_i32 s10, s10, s8
 ; GFX6-NEXT:    s_cmp_lt_i32 s0, -1
-; GFX6-NEXT:    s_mov_b32 s9, 0x80000000
+; GFX6-NEXT:    s_brev_b32 s9, 1
 ; GFX6-NEXT:    s_cselect_b32 s11, s0, -1
 ; GFX6-NEXT:    s_sub_i32 s11, s11, s9
 ; GFX6-NEXT:    s_cmp_gt_i32 s10, s4
@@ -2409,7 +2394,7 @@ define amdgpu_ps <4 x i32> @s_ssubsat_v4i32(<4 x i32> inreg %lhs, <4 x i32> inre
 ; GFX8-NEXT:    s_cselect_b32 s10, s0, -1
 ; GFX8-NEXT:    s_sub_i32 s10, s10, s8
 ; GFX8-NEXT:    s_cmp_lt_i32 s0, -1
-; GFX8-NEXT:    s_mov_b32 s9, 0x80000000
+; GFX8-NEXT:    s_brev_b32 s9, 1
 ; GFX8-NEXT:    s_cselect_b32 s11, s0, -1
 ; GFX8-NEXT:    s_sub_i32 s11, s11, s9
 ; GFX8-NEXT:    s_cmp_gt_i32 s10, s4
@@ -2459,7 +2444,7 @@ define amdgpu_ps <4 x i32> @s_ssubsat_v4i32(<4 x i32> inreg %lhs, <4 x i32> inre
 ; GFX9-NEXT:    s_cselect_b32 s10, s0, -1
 ; GFX9-NEXT:    s_sub_i32 s10, s10, s8
 ; GFX9-NEXT:    s_cmp_lt_i32 s0, -1
-; GFX9-NEXT:    s_mov_b32 s9, 0x80000000
+; GFX9-NEXT:    s_brev_b32 s9, 1
 ; GFX9-NEXT:    s_cselect_b32 s11, s0, -1
 ; GFX9-NEXT:    s_sub_i32 s11, s11, s9
 ; GFX9-NEXT:    s_cmp_gt_i32 s10, s4
@@ -2507,7 +2492,7 @@ define amdgpu_ps <4 x i32> @s_ssubsat_v4i32(<4 x i32> inreg %lhs, <4 x i32> inre
 ; GFX10-NEXT:    s_cmp_gt_i32 s0, -1
 ; GFX10-NEXT:    s_brev_b32 s8, -2
 ; GFX10-NEXT:    s_cselect_b32 s9, s0, -1
-; GFX10-NEXT:    s_mov_b32 s10, 0x80000000
+; GFX10-NEXT:    s_brev_b32 s10, 1
 ; GFX10-NEXT:    s_sub_i32 s9, s9, s8
 ; GFX10-NEXT:    s_cmp_lt_i32 s0, -1
 ; GFX10-NEXT:    ; implicit-def: $vcc_hi
@@ -2563,7 +2548,7 @@ define <5 x i32> @v_ssubsat_v5i32(<5 x i32> %lhs, <5 x i32> %rhs) {
 ; GFX6-NEXT:    s_brev_b32 s4, -2
 ; GFX6-NEXT:    v_max_i32_e32 v10, -1, v0
 ; GFX6-NEXT:    v_subrev_i32_e32 v10, vcc, s4, v10
-; GFX6-NEXT:    s_mov_b32 s5, 0x80000000
+; GFX6-NEXT:    s_brev_b32 s5, 1
 ; GFX6-NEXT:    v_min_i32_e32 v12, -1, v0
 ; GFX6-NEXT:    v_max_i32_e32 v5, v10, v5
 ; GFX6-NEXT:    v_subrev_i32_e32 v12, vcc, s5, v12
@@ -2586,7 +2571,7 @@ define <5 x i32> @v_ssubsat_v5i32(<5 x i32> %lhs, <5 x i32> %rhs) {
 ; GFX6-NEXT:    v_bfrev_b32_e32 v11, -2
 ; GFX6-NEXT:    v_max_i32_e32 v5, -1, v3
 ; GFX6-NEXT:    v_sub_i32_e32 v5, vcc, v5, v11
-; GFX6-NEXT:    v_mov_b32_e32 v13, 0x80000000
+; GFX6-NEXT:    v_bfrev_b32_e32 v13, 1
 ; GFX6-NEXT:    v_min_i32_e32 v6, -1, v3
 ; GFX6-NEXT:    v_sub_i32_e32 v6, vcc, v6, v13
 ; GFX6-NEXT:    v_max_i32_e32 v5, v5, v8
@@ -2607,7 +2592,7 @@ define <5 x i32> @v_ssubsat_v5i32(<5 x i32> %lhs, <5 x i32> %rhs) {
 ; GFX8-NEXT:    s_brev_b32 s4, -2
 ; GFX8-NEXT:    v_max_i32_e32 v10, -1, v0
 ; GFX8-NEXT:    v_subrev_u32_e32 v10, vcc, s4, v10
-; GFX8-NEXT:    s_mov_b32 s5, 0x80000000
+; GFX8-NEXT:    s_brev_b32 s5, 1
 ; GFX8-NEXT:    v_min_i32_e32 v12, -1, v0
 ; GFX8-NEXT:    v_max_i32_e32 v5, v10, v5
 ; GFX8-NEXT:    v_subrev_u32_e32 v12, vcc, s5, v12
@@ -2630,7 +2615,7 @@ define <5 x i32> @v_ssubsat_v5i32(<5 x i32> %lhs, <5 x i32> %rhs) {
 ; GFX8-NEXT:    v_bfrev_b32_e32 v11, -2
 ; GFX8-NEXT:    v_max_i32_e32 v5, -1, v3
 ; GFX8-NEXT:    v_sub_u32_e32 v5, vcc, v5, v11
-; GFX8-NEXT:    v_mov_b32_e32 v13, 0x80000000
+; GFX8-NEXT:    v_bfrev_b32_e32 v13, 1
 ; GFX8-NEXT:    v_min_i32_e32 v6, -1, v3
 ; GFX8-NEXT:    v_sub_u32_e32 v6, vcc, v6, v13
 ; GFX8-NEXT:    v_max_i32_e32 v5, v5, v8
@@ -2651,7 +2636,7 @@ define <5 x i32> @v_ssubsat_v5i32(<5 x i32> %lhs, <5 x i32> %rhs) {
 ; GFX9-NEXT:    s_brev_b32 s4, -2
 ; GFX9-NEXT:    v_max_i32_e32 v10, -1, v0
 ; GFX9-NEXT:    v_subrev_u32_e32 v10, s4, v10
-; GFX9-NEXT:    s_mov_b32 s5, 0x80000000
+; GFX9-NEXT:    s_brev_b32 s5, 1
 ; GFX9-NEXT:    v_min_i32_e32 v12, -1, v0
 ; GFX9-NEXT:    v_max_i32_e32 v5, v10, v5
 ; GFX9-NEXT:    v_subrev_u32_e32 v12, s5, v12
@@ -2674,7 +2659,7 @@ define <5 x i32> @v_ssubsat_v5i32(<5 x i32> %lhs, <5 x i32> %rhs) {
 ; GFX9-NEXT:    v_bfrev_b32_e32 v11, -2
 ; GFX9-NEXT:    v_max_i32_e32 v5, -1, v3
 ; GFX9-NEXT:    v_sub_u32_e32 v5, v5, v11
-; GFX9-NEXT:    v_mov_b32_e32 v13, 0x80000000
+; GFX9-NEXT:    v_bfrev_b32_e32 v13, 1
 ; GFX9-NEXT:    v_min_i32_e32 v6, -1, v3
 ; GFX9-NEXT:    v_sub_u32_e32 v6, v6, v13
 ; GFX9-NEXT:    v_max_i32_e32 v5, v5, v8
@@ -2701,7 +2686,7 @@ define <5 x i32> @v_ssubsat_v5i32(<5 x i32> %lhs, <5 x i32> %rhs) {
 ; GFX10-NEXT:    v_subrev_nc_u32_e32 v10, s4, v10
 ; GFX10-NEXT:    v_subrev_nc_u32_e32 v13, s4, v13
 ; GFX10-NEXT:    v_min_i32_e32 v12, -1, v0
-; GFX10-NEXT:    v_mov_b32_e32 v14, 0x80000000
+; GFX10-NEXT:    v_bfrev_b32_e32 v14, 1
 ; GFX10-NEXT:    v_min_i32_e32 v15, -1, v1
 ; GFX10-NEXT:    v_max_i32_e32 v5, v10, v5
 ; GFX10-NEXT:    v_max_i32_e32 v10, -1, v2
@@ -2713,7 +2698,7 @@ define <5 x i32> @v_ssubsat_v5i32(<5 x i32> %lhs, <5 x i32> %rhs) {
 ; GFX10-NEXT:    v_min_i32_e32 v19, -1, v4
 ; GFX10-NEXT:    v_sub_nc_u32_e32 v13, v13, v11
 ; GFX10-NEXT:    v_sub_nc_u32_e32 v11, v17, v11
-; GFX10-NEXT:    s_mov_b32 s5, 0x80000000
+; GFX10-NEXT:    s_brev_b32 s5, 1
 ; GFX10-NEXT:    v_max_i32_e32 v7, v10, v7
 ; GFX10-NEXT:    v_subrev_nc_u32_e32 v12, s5, v12
 ; GFX10-NEXT:    v_subrev_nc_u32_e32 v15, s5, v15
@@ -2746,7 +2731,7 @@ define amdgpu_ps <5 x i32> @s_ssubsat_v5i32(<5 x i32> inreg %lhs, <5 x i32> inre
 ; GFX6-NEXT:    s_cselect_b32 s12, s0, -1
 ; GFX6-NEXT:    s_sub_i32 s12, s12, s10
 ; GFX6-NEXT:    s_cmp_lt_i32 s0, -1
-; GFX6-NEXT:    s_mov_b32 s11, 0x80000000
+; GFX6-NEXT:    s_brev_b32 s11, 1
 ; GFX6-NEXT:    s_cselect_b32 s13, s0, -1
 ; GFX6-NEXT:    s_sub_i32 s13, s13, s11
 ; GFX6-NEXT:    s_cmp_gt_i32 s12, s5
@@ -2807,7 +2792,7 @@ define amdgpu_ps <5 x i32> @s_ssubsat_v5i32(<5 x i32> inreg %lhs, <5 x i32> inre
 ; GFX8-NEXT:    s_cselect_b32 s12, s0, -1
 ; GFX8-NEXT:    s_sub_i32 s12, s12, s10
 ; GFX8-NEXT:    s_cmp_lt_i32 s0, -1
-; GFX8-NEXT:    s_mov_b32 s11, 0x80000000
+; GFX8-NEXT:    s_brev_b32 s11, 1
 ; GFX8-NEXT:    s_cselect_b32 s13, s0, -1
 ; GFX8-NEXT:    s_sub_i32 s13, s13, s11
 ; GFX8-NEXT:    s_cmp_gt_i32 s12, s5
@@ -2868,7 +2853,7 @@ define amdgpu_ps <5 x i32> @s_ssubsat_v5i32(<5 x i32> inreg %lhs, <5 x i32> inre
 ; GFX9-NEXT:    s_cselect_b32 s12, s0, -1
 ; GFX9-NEXT:    s_sub_i32 s12, s12, s10
 ; GFX9-NEXT:    s_cmp_lt_i32 s0, -1
-; GFX9-NEXT:    s_mov_b32 s11, 0x80000000
+; GFX9-NEXT:    s_brev_b32 s11, 1
 ; GFX9-NEXT:    s_cselect_b32 s13, s0, -1
 ; GFX9-NEXT:    s_sub_i32 s13, s13, s11
 ; GFX9-NEXT:    s_cmp_gt_i32 s12, s5
@@ -2927,7 +2912,7 @@ define amdgpu_ps <5 x i32> @s_ssubsat_v5i32(<5 x i32> inreg %lhs, <5 x i32> inre
 ; GFX10-NEXT:    s_cmp_gt_i32 s0, -1
 ; GFX10-NEXT:    s_brev_b32 s10, -2
 ; GFX10-NEXT:    s_cselect_b32 s11, s0, -1
-; GFX10-NEXT:    s_mov_b32 s12, 0x80000000
+; GFX10-NEXT:    s_brev_b32 s12, 1
 ; GFX10-NEXT:    s_sub_i32 s11, s11, s10
 ; GFX10-NEXT:    s_cmp_lt_i32 s0, -1
 ; GFX10-NEXT:    ; implicit-def: $vcc_hi
@@ -2995,7 +2980,7 @@ define <16 x i32> @v_ssubsat_v16i32(<16 x i32> %lhs, <16 x i32> %rhs) {
 ; GFX6-NEXT:    v_max_i32_e32 v32, -1, v0
 ; GFX6-NEXT:    v_subrev_i32_e32 v32, vcc, s4, v32
 ; GFX6-NEXT:    v_max_i32_e32 v16, v32, v16
-; GFX6-NEXT:    s_mov_b32 s5, 0x80000000
+; GFX6-NEXT:    s_brev_b32 s5, 1
 ; GFX6-NEXT:    v_min_i32_e32 v32, -1, v0
 ; GFX6-NEXT:    v_subrev_i32_e32 v32, vcc, s5, v32
 ; GFX6-NEXT:    v_min_i32_e32 v16, v16, v32
@@ -3018,7 +3003,7 @@ define <16 x i32> @v_ssubsat_v16i32(<16 x i32> %lhs, <16 x i32> %rhs) {
 ; GFX6-NEXT:    v_max_i32_e32 v17, -1, v3
 ; GFX6-NEXT:    v_sub_i32_e32 v17, vcc, v17, v16
 ; GFX6-NEXT:    v_max_i32_e32 v17, v17, v19
-; GFX6-NEXT:    v_mov_b32_e32 v18, 0x80000000
+; GFX6-NEXT:    v_bfrev_b32_e32 v18, 1
 ; GFX6-NEXT:    v_min_i32_e32 v19, -1, v3
 ; GFX6-NEXT:    v_sub_i32_e32 v19, vcc, v19, v18
 ; GFX6-NEXT:    v_min_i32_e32 v17, v17, v19
@@ -3116,7 +3101,7 @@ define <16 x i32> @v_ssubsat_v16i32(<16 x i32> %lhs, <16 x i32> %rhs) {
 ; GFX8-NEXT:    v_max_i32_e32 v32, -1, v0
 ; GFX8-NEXT:    v_subrev_u32_e32 v32, vcc, s4, v32
 ; GFX8-NEXT:    v_max_i32_e32 v16, v32, v16
-; GFX8-NEXT:    s_mov_b32 s5, 0x80000000
+; GFX8-NEXT:    s_brev_b32 s5, 1
 ; GFX8-NEXT:    v_min_i32_e32 v32, -1, v0
 ; GFX8-NEXT:    v_subrev_u32_e32 v32, vcc, s5, v32
 ; GFX8-NEXT:    v_min_i32_e32 v16, v16, v32
@@ -3139,7 +3124,7 @@ define <16 x i32> @v_ssubsat_v16i32(<16 x i32> %lhs, <16 x i32> %rhs) {
 ; GFX8-NEXT:    v_max_i32_e32 v17, -1, v3
 ; GFX8-NEXT:    v_sub_u32_e32 v17, vcc, v17, v16
 ; GFX8-NEXT:    v_max_i32_e32 v17, v17, v19
-; GFX8-NEXT:    v_mov_b32_e32 v18, 0x80000000
+; GFX8-NEXT:    v_bfrev_b32_e32 v18, 1
 ; GFX8-NEXT:    v_min_i32_e32 v19, -1, v3
 ; GFX8-NEXT:    v_sub_u32_e32 v19, vcc, v19, v18
 ; GFX8-NEXT:    v_min_i32_e32 v17, v17, v19
@@ -3237,7 +3222,7 @@ define <16 x i32> @v_ssubsat_v16i32(<16 x i32> %lhs, <16 x i32> %rhs) {
 ; GFX9-NEXT:    v_max_i32_e32 v32, -1, v0
 ; GFX9-NEXT:    v_subrev_u32_e32 v32, s4, v32
 ; GFX9-NEXT:    v_max_i32_e32 v16, v32, v16
-; GFX9-NEXT:    s_mov_b32 s5, 0x80000000
+; GFX9-NEXT:    s_brev_b32 s5, 1
 ; GFX9-NEXT:    v_min_i32_e32 v32, -1, v0
 ; GFX9-NEXT:    v_subrev_u32_e32 v32, s5, v32
 ; GFX9-NEXT:    v_min_i32_e32 v16, v16, v32
@@ -3260,7 +3245,7 @@ define <16 x i32> @v_ssubsat_v16i32(<16 x i32> %lhs, <16 x i32> %rhs) {
 ; GFX9-NEXT:    v_max_i32_e32 v17, -1, v3
 ; GFX9-NEXT:    v_sub_u32_e32 v17, v17, v16
 ; GFX9-NEXT:    v_max_i32_e32 v17, v17, v19
-; GFX9-NEXT:    v_mov_b32_e32 v18, 0x80000000
+; GFX9-NEXT:    v_bfrev_b32_e32 v18, 1
 ; GFX9-NEXT:    v_min_i32_e32 v19, -1, v3
 ; GFX9-NEXT:    v_sub_u32_e32 v19, v19, v18
 ; GFX9-NEXT:    v_min_i32_e32 v17, v17, v19
@@ -3358,7 +3343,7 @@ define <16 x i32> @v_ssubsat_v16i32(<16 x i32> %lhs, <16 x i32> %rhs) {
 ; GFX10-NEXT:    v_max_i32_e32 v32, -1, v0
 ; GFX10-NEXT:    s_brev_b32 s4, -2
 ; GFX10-NEXT:    v_min_i32_e32 v33, -1, v0
-; GFX10-NEXT:    s_mov_b32 s5, 0x80000000
+; GFX10-NEXT:    s_brev_b32 s5, 1
 ; GFX10-NEXT:    v_max_i32_e32 v36, -1, v2
 ; GFX10-NEXT:    v_subrev_nc_u32_e32 v35, s4, v32
 ; GFX10-NEXT:    v_max_i32_e32 v32, -1, v1
@@ -3375,7 +3360,7 @@ define <16 x i32> @v_ssubsat_v16i32(<16 x i32> %lhs, <16 x i32> %rhs) {
 ; GFX10-NEXT:    v_max_i32_e32 v38, v32, v17
 ; GFX10-NEXT:    v_max_i32_e32 v17, -1, v4
 ; GFX10-NEXT:    v_sub_nc_u32_e32 v36, v39, v34
-; GFX10-NEXT:    v_mov_b32_e32 v35, 0x80000000
+; GFX10-NEXT:    v_bfrev_b32_e32 v35, 1
 ; GFX10-NEXT:    v_subrev_nc_u32_e32 v32, s5, v33
 ; GFX10-NEXT:    v_sub_nc_u32_e32 v0, v0, v16
 ; GFX10-NEXT:    v_min_i32_e32 v33, -1, v3
@@ -3485,7 +3470,7 @@ define amdgpu_ps <16 x i32> @s_ssubsat_v16i32(<16 x i32> inreg %lhs, <16 x i32>
 ; GFX6-NEXT:    s_cselect_b32 s34, s0, -1
 ; GFX6-NEXT:    s_sub_i32 s34, s34, s32
 ; GFX6-NEXT:    s_cmp_lt_i32 s0, -1
-; GFX6-NEXT:    s_mov_b32 s33, 0x80000000
+; GFX6-NEXT:    s_brev_b32 s33, 1
 ; GFX6-NEXT:    s_cselect_b32 s35, s0, -1
 ; GFX6-NEXT:    s_sub_i32 s35, s35, s33
 ; GFX6-NEXT:    s_cmp_gt_i32 s34, s16
@@ -3667,7 +3652,7 @@ define amdgpu_ps <16 x i32> @s_ssubsat_v16i32(<16 x i32> inreg %lhs, <16 x i32>
 ; GFX8-NEXT:    s_cselect_b32 s34, s0, -1
 ; GFX8-NEXT:    s_sub_i32 s34, s34, s32
 ; GFX8-NEXT:    s_cmp_lt_i32 s0, -1
-; GFX8-NEXT:    s_mov_b32 s33, 0x80000000
+; GFX8-NEXT:    s_brev_b32 s33, 1
 ; GFX8-NEXT:    s_cselect_b32 s35, s0, -1
 ; GFX8-NEXT:    s_sub_i32 s35, s35, s33
 ; GFX8-NEXT:    s_cmp_gt_i32 s34, s16
@@ -3849,7 +3834,7 @@ define amdgpu_ps <16 x i32> @s_ssubsat_v16i32(<16 x i32> inreg %lhs, <16 x i32>
 ; GFX9-NEXT:    s_cselect_b32 s34, s0, -1
 ; GFX9-NEXT:    s_sub_i32 s34, s34, s32
 ; GFX9-NEXT:    s_cmp_lt_i32 s0, -1
-; GFX9-NEXT:    s_mov_b32 s33, 0x80000000
+; GFX9-NEXT:    s_brev_b32 s33, 1
 ; GFX9-NEXT:    s_cselect_b32 s35, s0, -1
 ; GFX9-NEXT:    s_sub_i32 s35, s35, s33
 ; GFX9-NEXT:    s_cmp_gt_i32 s34, s16
@@ -4029,7 +4014,7 @@ define amdgpu_ps <16 x i32> @s_ssubsat_v16i32(<16 x i32> inreg %lhs, <16 x i32>
 ; GFX10-NEXT:    s_cmp_gt_i32 s0, -1
 ; GFX10-NEXT:    s_brev_b32 s46, -2
 ; GFX10-NEXT:    s_cselect_b32 s33, s0, -1
-; GFX10-NEXT:    s_mov_b32 s34, 0x80000000
+; GFX10-NEXT:    s_brev_b32 s34, 1
 ; GFX10-NEXT:    s_sub_i32 s47, s33, s46
 ; GFX10-NEXT:    s_cmp_lt_i32 s0, -1
 ; GFX10-NEXT:    ; implicit-def: $vcc_hi
@@ -4229,9 +4214,8 @@ define i16 @v_ssubsat_i16(i16 %lhs, i16 %rhs) {
 ; GFX8-LABEL: v_ssubsat_i16:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    s_mov_b32 s4, 0xffff
-; GFX8-NEXT:    v_max_i16_e32 v2, s4, v0
-; GFX8-NEXT:    v_min_i16_e32 v3, s4, v0
+; GFX8-NEXT:    v_max_i16_e32 v2, -1, v0
+; GFX8-NEXT:    v_min_i16_e32 v3, -1, v0
 ; GFX8-NEXT:    v_subrev_u16_e32 v2, 0x7fff, v2
 ; GFX8-NEXT:    v_subrev_u16_e32 v3, 0x8000, v3
 ; GFX8-NEXT:    v_max_i16_e32 v1, v2, v1
@@ -4242,9 +4226,8 @@ define i16 @v_ssubsat_i16(i16 %lhs, i16 %rhs) {
 ; GFX9-LABEL: v_ssubsat_i16:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    s_mov_b32 s4, 0xffff
-; GFX9-NEXT:    v_max_i16_e32 v2, s4, v0
-; GFX9-NEXT:    v_min_i16_e32 v3, s4, v0
+; GFX9-NEXT:    v_max_i16_e32 v2, -1, v0
+; GFX9-NEXT:    v_min_i16_e32 v3, -1, v0
 ; GFX9-NEXT:    v_subrev_u16_e32 v2, 0x7fff, v2
 ; GFX9-NEXT:    v_subrev_u16_e32 v3, 0x8000, v3
 ; GFX9-NEXT:    v_max_i16_e32 v1, v2, v1
@@ -4256,10 +4239,9 @@ define i16 @v_ssubsat_i16(i16 %lhs, i16 %rhs) {
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT:    s_mov_b32 s4, 0xffff
+; GFX10-NEXT:    v_max_i16_e64 v2, v0, -1
+; GFX10-NEXT:    v_min_i16_e64 v3, v0, -1
 ; GFX10-NEXT:    ; implicit-def: $vcc_hi
-; GFX10-NEXT:    v_max_i16_e64 v2, v0, s4
-; GFX10-NEXT:    v_min_i16_e64 v3, v0, s4
 ; GFX10-NEXT:    v_sub_nc_u16_e64 v2, v2, 0x7fff
 ; GFX10-NEXT:    v_sub_nc_u16_e64 v3, v3, 0x8000
 ; GFX10-NEXT:    v_max_i16_e64 v1, v2, v1
@@ -4292,13 +4274,13 @@ define amdgpu_ps i16 @s_ssubsat_i16(i16 inreg %lhs, i16 inreg %rhs) {
 ; GFX8-LABEL: s_ssubsat_i16:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_sext_i32_i16 s2, s0
-; GFX8-NEXT:    s_sext_i32_i16 s3, 0xffff
+; GFX8-NEXT:    s_sext_i32_i16 s3, -1
 ; GFX8-NEXT:    s_cmp_gt_i32 s2, s3
 ; GFX8-NEXT:    s_cselect_b32 s4, s2, s3
 ; GFX8-NEXT:    s_sub_i32 s4, s4, 0x7fff
 ; GFX8-NEXT:    s_cmp_lt_i32 s2, s3
 ; GFX8-NEXT:    s_cselect_b32 s2, s2, s3
-; GFX8-NEXT:    s_sub_i32 s2, s2, 0x8000
+; GFX8-NEXT:    s_sub_i32 s2, s2, 0xffff8000
 ; GFX8-NEXT:    s_sext_i32_i16 s3, s4
 ; GFX8-NEXT:    s_sext_i32_i16 s1, s1
 ; GFX8-NEXT:    s_cmp_gt_i32 s3, s1
@@ -4313,13 +4295,13 @@ define amdgpu_ps i16 @s_ssubsat_i16(i16 inreg %lhs, i16 inreg %rhs) {
 ; GFX9-LABEL: s_ssubsat_i16:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_sext_i32_i16 s2, s0
-; GFX9-NEXT:    s_sext_i32_i16 s3, 0xffff
+; GFX9-NEXT:    s_sext_i32_i16 s3, -1
 ; GFX9-NEXT:    s_cmp_gt_i32 s2, s3
 ; GFX9-NEXT:    s_cselect_b32 s4, s2, s3
 ; GFX9-NEXT:    s_sub_i32 s4, s4, 0x7fff
 ; GFX9-NEXT:    s_cmp_lt_i32 s2, s3
 ; GFX9-NEXT:    s_cselect_b32 s2, s2, s3
-; GFX9-NEXT:    s_sub_i32 s2, s2, 0x8000
+; GFX9-NEXT:    s_sub_i32 s2, s2, 0xffff8000
 ; GFX9-NEXT:    s_sext_i32_i16 s3, s4
 ; GFX9-NEXT:    s_sext_i32_i16 s1, s1
 ; GFX9-NEXT:    s_cmp_gt_i32 s3, s1
@@ -4333,7 +4315,7 @@ define amdgpu_ps i16 @s_ssubsat_i16(i16 inreg %lhs, i16 inreg %rhs) {
 ;
 ; GFX10-LABEL: s_ssubsat_i16:
 ; GFX10:       ; %bb.0:
-; GFX10-NEXT:    s_sext_i32_i16 s2, 0xffff
+; GFX10-NEXT:    s_sext_i32_i16 s2, -1
 ; GFX10-NEXT:    s_sext_i32_i16 s3, s0
 ; GFX10-NEXT:    s_sext_i32_i16 s1, s1
 ; GFX10-NEXT:    s_cmp_gt_i32 s3, s2
@@ -4343,7 +4325,7 @@ define amdgpu_ps i16 @s_ssubsat_i16(i16 inreg %lhs, i16 inreg %rhs) {
 ; GFX10-NEXT:    s_cmp_lt_i32 s3, s2
 ; GFX10-NEXT:    s_cselect_b32 s2, s3, s2
 ; GFX10-NEXT:    s_sext_i32_i16 s3, s4
-; GFX10-NEXT:    s_sub_i32 s2, s2, 0x8000
+; GFX10-NEXT:    s_sub_i32 s2, s2, 0xffff8000
 ; GFX10-NEXT:    s_cmp_gt_i32 s3, s1
 ; GFX10-NEXT:    s_sext_i32_i16 s2, s2
 ; GFX10-NEXT:    s_cselect_b32 s1, s3, s1
@@ -4376,13 +4358,13 @@ define amdgpu_ps half @ssubsat_i16_sv(i16 inreg %lhs, i16 %rhs) {
 ; GFX8-LABEL: ssubsat_i16_sv:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_sext_i32_i16 s1, s0
-; GFX8-NEXT:    s_sext_i32_i16 s2, 0xffff
+; GFX8-NEXT:    s_sext_i32_i16 s2, -1
 ; GFX8-NEXT:    s_cmp_gt_i32 s1, s2
 ; GFX8-NEXT:    s_cselect_b32 s3, s1, s2
 ; GFX8-NEXT:    s_sub_i32 s3, s3, 0x7fff
 ; GFX8-NEXT:    s_cmp_lt_i32 s1, s2
 ; GFX8-NEXT:    s_cselect_b32 s1, s1, s2
-; GFX8-NEXT:    s_sub_i32 s1, s1, 0x8000
+; GFX8-NEXT:    s_sub_i32 s1, s1, 0xffff8000
 ; GFX8-NEXT:    v_max_i16_e32 v0, s3, v0
 ; GFX8-NEXT:    v_min_i16_e32 v0, s1, v0
 ; GFX8-NEXT:    v_sub_u16_e32 v0, s0, v0
@@ -4391,13 +4373,13 @@ define amdgpu_ps half @ssubsat_i16_sv(i16 inreg %lhs, i16 %rhs) {
 ; GFX9-LABEL: ssubsat_i16_sv:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_sext_i32_i16 s1, s0
-; GFX9-NEXT:    s_sext_i32_i16 s2, 0xffff
+; GFX9-NEXT:    s_sext_i32_i16 s2, -1
 ; GFX9-NEXT:    s_cmp_gt_i32 s1, s2
 ; GFX9-NEXT:    s_cselect_b32 s3, s1, s2
 ; GFX9-NEXT:    s_sub_i32 s3, s3, 0x7fff
 ; GFX9-NEXT:    s_cmp_lt_i32 s1, s2
 ; GFX9-NEXT:    s_cselect_b32 s1, s1, s2
-; GFX9-NEXT:    s_sub_i32 s1, s1, 0x8000
+; GFX9-NEXT:    s_sub_i32 s1, s1, 0xffff8000
 ; GFX9-NEXT:    v_max_i16_e32 v0, s3, v0
 ; GFX9-NEXT:    v_min_i16_e32 v0, s1, v0
 ; GFX9-NEXT:    v_sub_u16_e32 v0, s0, v0
@@ -4406,7 +4388,7 @@ define amdgpu_ps half @ssubsat_i16_sv(i16 inreg %lhs, i16 %rhs) {
 ; GFX10-LABEL: ssubsat_i16_sv:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_sext_i32_i16 s1, s0
-; GFX10-NEXT:    s_sext_i32_i16 s2, 0xffff
+; GFX10-NEXT:    s_sext_i32_i16 s2, -1
 ; GFX10-NEXT:    ; implicit-def: $vcc_hi
 ; GFX10-NEXT:    s_cmp_gt_i32 s1, s2
 ; GFX10-NEXT:    s_cselect_b32 s3, s1, s2
@@ -4414,7 +4396,7 @@ define amdgpu_ps half @ssubsat_i16_sv(i16 inreg %lhs, i16 %rhs) {
 ; GFX10-NEXT:    s_cmp_lt_i32 s1, s2
 ; GFX10-NEXT:    v_max_i16_e64 v0, s3, v0
 ; GFX10-NEXT:    s_cselect_b32 s1, s1, s2
-; GFX10-NEXT:    s_sub_i32 s1, s1, 0x8000
+; GFX10-NEXT:    s_sub_i32 s1, s1, 0xffff8000
 ; GFX10-NEXT:    v_min_i16_e64 v0, v0, s1
 ; GFX10-NEXT:    v_sub_nc_u16_e64 v0, s0, v0
 ; GFX10-NEXT:    ; return to shader part epilog
@@ -4440,10 +4422,9 @@ define amdgpu_ps half @ssubsat_i16_vs(i16 %lhs, i16 inreg %rhs) {
 ;
 ; GFX8-LABEL: ssubsat_i16_vs:
 ; GFX8:       ; %bb.0:
-; GFX8-NEXT:    s_mov_b32 s1, 0xffff
-; GFX8-NEXT:    v_max_i16_e32 v1, s1, v0
+; GFX8-NEXT:    v_max_i16_e32 v1, -1, v0
 ; GFX8-NEXT:    v_subrev_u16_e32 v1, 0x7fff, v1
-; GFX8-NEXT:    v_min_i16_e32 v2, s1, v0
+; GFX8-NEXT:    v_min_i16_e32 v2, -1, v0
 ; GFX8-NEXT:    v_subrev_u16_e32 v2, 0x8000, v2
 ; GFX8-NEXT:    v_max_i16_e32 v1, s0, v1
 ; GFX8-NEXT:    v_min_i16_e32 v1, v1, v2
@@ -4452,10 +4433,9 @@ define amdgpu_ps half @ssubsat_i16_vs(i16 %lhs, i16 inreg %rhs) {
 ;
 ; GFX9-LABEL: ssubsat_i16_vs:
 ; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_mov_b32 s1, 0xffff
-; GFX9-NEXT:    v_max_i16_e32 v1, s1, v0
+; GFX9-NEXT:    v_max_i16_e32 v1, -1, v0
 ; GFX9-NEXT:    v_subrev_u16_e32 v1, 0x7fff, v1
-; GFX9-NEXT:    v_min_i16_e32 v2, s1, v0
+; GFX9-NEXT:    v_min_i16_e32 v2, -1, v0
 ; GFX9-NEXT:    v_subrev_u16_e32 v2, 0x8000, v2
 ; GFX9-NEXT:    v_max_i16_e32 v1, s0, v1
 ; GFX9-NEXT:    v_min_i16_e32 v1, v1, v2
@@ -4464,10 +4444,9 @@ define amdgpu_ps half @ssubsat_i16_vs(i16 %lhs, i16 inreg %rhs) {
 ;
 ; GFX10-LABEL: ssubsat_i16_vs:
 ; GFX10:       ; %bb.0:
-; GFX10-NEXT:    s_mov_b32 s1, 0xffff
+; GFX10-NEXT:    v_max_i16_e64 v1, v0, -1
+; GFX10-NEXT:    v_min_i16_e64 v2, v0, -1
 ; GFX10-NEXT:    ; implicit-def: $vcc_hi
-; GFX10-NEXT:    v_max_i16_e64 v1, v0, s1
-; GFX10-NEXT:    v_min_i16_e64 v2, v0, s1
 ; GFX10-NEXT:    v_sub_nc_u16_e64 v1, v1, 0x7fff
 ; GFX10-NEXT:    v_sub_nc_u16_e64 v2, v2, 0x8000
 ; GFX10-NEXT:    v_max_i16_e64 v1, v1, s0
@@ -4488,7 +4467,7 @@ define <2 x i16> @v_ssubsat_v2i16(<2 x i16> %lhs, <2 x i16> %rhs) {
 ; GFX6-NEXT:    v_max_i32_e32 v4, -1, v0
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
 ; GFX6-NEXT:    v_subrev_i32_e32 v4, vcc, s4, v4
-; GFX6-NEXT:    s_mov_b32 s5, 0x80000000
+; GFX6-NEXT:    s_brev_b32 s5, 1
 ; GFX6-NEXT:    v_min_i32_e32 v5, -1, v0
 ; GFX6-NEXT:    v_subrev_i32_e32 v5, vcc, s5, v5
 ; GFX6-NEXT:    v_max_i32_e32 v2, v4, v2
@@ -4510,18 +4489,17 @@ define <2 x i16> @v_ssubsat_v2i16(<2 x i16> %lhs, <2 x i16> %rhs) {
 ; GFX8-LABEL: v_ssubsat_v2i16:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    s_mov_b32 s6, 0xffff
 ; GFX8-NEXT:    s_movk_i32 s4, 0x7fff
-; GFX8-NEXT:    v_max_i16_e32 v3, s6, v0
+; GFX8-NEXT:    v_max_i16_e32 v3, -1, v0
 ; GFX8-NEXT:    v_subrev_u16_e32 v3, s4, v3
-; GFX8-NEXT:    s_mov_b32 s5, 0x8000
-; GFX8-NEXT:    v_min_i16_e32 v4, s6, v0
+; GFX8-NEXT:    s_movk_i32 s5, 0x8000
+; GFX8-NEXT:    v_min_i16_e32 v4, -1, v0
 ; GFX8-NEXT:    v_lshrrev_b32_e32 v2, 16, v0
 ; GFX8-NEXT:    v_subrev_u16_e32 v4, s5, v4
 ; GFX8-NEXT:    v_max_i16_e32 v3, v3, v1
 ; GFX8-NEXT:    v_min_i16_e32 v3, v3, v4
-; GFX8-NEXT:    v_max_i16_e32 v4, s6, v2
-; GFX8-NEXT:    v_min_i16_e32 v5, s6, v2
+; GFX8-NEXT:    v_max_i16_e32 v4, -1, v2
+; GFX8-NEXT:    v_min_i16_e32 v5, -1, v2
 ; GFX8-NEXT:    v_subrev_u16_e32 v4, s4, v4
 ; GFX8-NEXT:    v_subrev_u16_e32 v5, s5, v5
 ; GFX8-NEXT:    v_max_i16_sdwa v1, v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
@@ -4536,7 +4514,7 @@ define <2 x i16> @v_ssubsat_v2i16(<2 x i16> %lhs, <2 x i16> %rhs) {
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX9-NEXT:    s_movk_i32 s4, 0x7fff
 ; GFX9-NEXT:    s_pack_ll_b32_b16 s6, -1, -1
-; GFX9-NEXT:    s_mov_b32 s5, 0xffff8000
+; GFX9-NEXT:    s_movk_i32 s5, 0x8000
 ; GFX9-NEXT:    s_pack_ll_b32_b16 s4, s4, s4
 ; GFX9-NEXT:    v_pk_max_i16 v2, v0, s6
 ; GFX9-NEXT:    v_pk_sub_i16 v2, v2, s4
@@ -4557,7 +4535,7 @@ define <2 x i16> @v_ssubsat_v2i16(<2 x i16> %lhs, <2 x i16> %rhs) {
 ; GFX10-NEXT:    v_pk_max_i16 v2, v0, s4
 ; GFX10-NEXT:    s_pack_ll_b32_b16 s5, s5, s5
 ; GFX10-NEXT:    v_pk_min_i16 v3, v0, s4
-; GFX10-NEXT:    s_mov_b32 s6, 0xffff8000
+; GFX10-NEXT:    s_movk_i32 s6, 0x8000
 ; GFX10-NEXT:    ; implicit-def: $vcc_hi
 ; GFX10-NEXT:    v_pk_sub_i16 v2, v2, s5
 ; GFX10-NEXT:    s_pack_ll_b32_b16 s4, s6, s6
@@ -4580,7 +4558,7 @@ define amdgpu_ps i32 @s_ssubsat_v2i16(<2 x i16> inreg %lhs, <2 x i16> inreg %rhs
 ; GFX6-NEXT:    s_cselect_b32 s6, s0, -1
 ; GFX6-NEXT:    s_sub_i32 s6, s6, s4
 ; GFX6-NEXT:    s_cmp_lt_i32 s0, -1
-; GFX6-NEXT:    s_mov_b32 s5, 0x80000000
+; GFX6-NEXT:    s_brev_b32 s5, 1
 ; GFX6-NEXT:    s_cselect_b32 s7, s0, -1
 ; GFX6-NEXT:    s_sub_i32 s7, s7, s5
 ; GFX6-NEXT:    s_cmp_gt_i32 s6, s2
@@ -4615,13 +4593,13 @@ define amdgpu_ps i32 @s_ssubsat_v2i16(<2 x i16> inreg %lhs, <2 x i16> inreg %rhs
 ; GFX8-NEXT:    s_lshr_b32 s3, s1, 16
 ; GFX8-NEXT:    s_lshr_b32 s2, s0, 16
 ; GFX8-NEXT:    s_sext_i32_i16 s6, s0
-; GFX8-NEXT:    s_sext_i32_i16 s7, 0xffff
+; GFX8-NEXT:    s_sext_i32_i16 s7, -1
 ; GFX8-NEXT:    s_cmp_gt_i32 s6, s7
 ; GFX8-NEXT:    s_movk_i32 s4, 0x7fff
 ; GFX8-NEXT:    s_cselect_b32 s8, s6, s7
 ; GFX8-NEXT:    s_sub_i32 s8, s8, s4
 ; GFX8-NEXT:    s_cmp_lt_i32 s6, s7
-; GFX8-NEXT:    s_mov_b32 s5, 0x8000
+; GFX8-NEXT:    s_movk_i32 s5, 0x8000
 ; GFX8-NEXT:    s_cselect_b32 s6, s6, s7
 ; GFX8-NEXT:    s_sub_i32 s6, s6, s5
 ; GFX8-NEXT:    s_sext_i32_i16 s8, s8
@@ -4676,7 +4654,7 @@ define amdgpu_ps i32 @s_ssubsat_v2i16(<2 x i16> inreg %lhs, <2 x i16> inreg %rhs
 ; GFX9-NEXT:    s_cmp_lt_i32 s5, s7
 ; GFX9-NEXT:    s_cselect_b32 s5, s5, s7
 ; GFX9-NEXT:    s_cmp_lt_i32 s6, s4
-; GFX9-NEXT:    s_mov_b32 s3, 0xffff8000
+; GFX9-NEXT:    s_movk_i32 s3, 0x8000
 ; GFX9-NEXT:    s_cselect_b32 s4, s6, s4
 ; GFX9-NEXT:    s_pack_ll_b32_b16 s4, s5, s4
 ; GFX9-NEXT:    s_pack_ll_b32_b16 s3, s3, s3
@@ -4733,7 +4711,7 @@ define amdgpu_ps i32 @s_ssubsat_v2i16(<2 x i16> inreg %lhs, <2 x i16> inreg %rhs
 ; GFX10-NEXT:    s_cmp_lt_i32 s3, s5
 ; GFX10-NEXT:    s_cselect_b32 s3, s3, s5
 ; GFX10-NEXT:    s_cmp_lt_i32 s4, s2
-; GFX10-NEXT:    s_mov_b32 s5, 0xffff8000
+; GFX10-NEXT:    s_movk_i32 s5, 0x8000
 ; GFX10-NEXT:    s_cselect_b32 s2, s4, s2
 ; GFX10-NEXT:    s_pack_ll_b32_b16 s4, s5, s5
 ; GFX10-NEXT:    s_pack_ll_b32_b16 s2, s3, s2
@@ -4782,7 +4760,7 @@ define amdgpu_ps float @ssubsat_v2i16_sv(<2 x i16> inreg %lhs, <2 x i16> %rhs) {
 ; GFX6-NEXT:    s_sub_i32 s4, s4, s2
 ; GFX6-NEXT:    s_cmp_lt_i32 s0, -1
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
-; GFX6-NEXT:    s_mov_b32 s3, 0x80000000
+; GFX6-NEXT:    s_brev_b32 s3, 1
 ; GFX6-NEXT:    s_cselect_b32 s5, s0, -1
 ; GFX6-NEXT:    s_sub_i32 s5, s5, s3
 ; GFX6-NEXT:    v_max_i32_e32 v0, s4, v0
@@ -4812,13 +4790,13 @@ define amdgpu_ps float @ssubsat_v2i16_sv(<2 x i16> inreg %lhs, <2 x i16> %rhs) {
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_lshr_b32 s1, s0, 16
 ; GFX8-NEXT:    s_sext_i32_i16 s4, s0
-; GFX8-NEXT:    s_sext_i32_i16 s5, 0xffff
+; GFX8-NEXT:    s_sext_i32_i16 s5, -1
 ; GFX8-NEXT:    s_cmp_gt_i32 s4, s5
 ; GFX8-NEXT:    s_movk_i32 s2, 0x7fff
 ; GFX8-NEXT:    s_cselect_b32 s6, s4, s5
 ; GFX8-NEXT:    s_sub_i32 s6, s6, s2
 ; GFX8-NEXT:    s_cmp_lt_i32 s4, s5
-; GFX8-NEXT:    s_mov_b32 s3, 0x8000
+; GFX8-NEXT:    s_movk_i32 s3, 0x8000
 ; GFX8-NEXT:    s_cselect_b32 s4, s4, s5
 ; GFX8-NEXT:    s_sub_i32 s4, s4, s3
 ; GFX8-NEXT:    v_max_i16_e32 v1, s6, v0
@@ -4860,7 +4838,7 @@ define amdgpu_ps float @ssubsat_v2i16_sv(<2 x i16> inreg %lhs, <2 x i16> %rhs) {
 ; GFX9-NEXT:    s_cmp_lt_i32 s4, s6
 ; GFX9-NEXT:    s_cselect_b32 s4, s4, s6
 ; GFX9-NEXT:    s_cmp_lt_i32 s5, s3
-; GFX9-NEXT:    s_mov_b32 s2, 0xffff8000
+; GFX9-NEXT:    s_movk_i32 s2, 0x8000
 ; GFX9-NEXT:    s_cselect_b32 s3, s5, s3
 ; GFX9-NEXT:    s_pack_ll_b32_b16 s3, s4, s3
 ; GFX9-NEXT:    s_pack_ll_b32_b16 s2, s2, s2
@@ -4897,7 +4875,7 @@ define amdgpu_ps float @ssubsat_v2i16_sv(<2 x i16> inreg %lhs, <2 x i16> %rhs) {
 ; GFX10-NEXT:    s_cmp_lt_i32 s2, s4
 ; GFX10-NEXT:    s_cselect_b32 s2, s2, s4
 ; GFX10-NEXT:    s_cmp_lt_i32 s3, s1
-; GFX10-NEXT:    s_mov_b32 s4, 0xffff8000
+; GFX10-NEXT:    s_movk_i32 s4, 0x8000
 ; GFX10-NEXT:    s_cselect_b32 s1, s3, s1
 ; GFX10-NEXT:    s_pack_ll_b32_b16 s3, s4, s4
 ; GFX10-NEXT:    s_pack_ll_b32_b16 s1, s2, s1
@@ -4924,7 +4902,7 @@ define amdgpu_ps float @ssubsat_v2i16_vs(<2 x i16> %lhs, <2 x i16> inreg %rhs) {
 ; GFX6-NEXT:    v_max_i32_e32 v2, -1, v0
 ; GFX6-NEXT:    s_lshl_b32 s0, s0, 16
 ; GFX6-NEXT:    v_subrev_i32_e32 v2, vcc, s2, v2
-; GFX6-NEXT:    s_mov_b32 s3, 0x80000000
+; GFX6-NEXT:    s_brev_b32 s3, 1
 ; GFX6-NEXT:    v_min_i32_e32 v3, -1, v0
 ; GFX6-NEXT:    v_max_i32_e32 v2, s0, v2
 ; GFX6-NEXT:    v_subrev_i32_e32 v3, vcc, s3, v3
@@ -4950,18 +4928,17 @@ define amdgpu_ps float @ssubsat_v2i16_vs(<2 x i16> %lhs, <2 x i16> inreg %rhs) {
 ;
 ; GFX8-LABEL: ssubsat_v2i16_vs:
 ; GFX8:       ; %bb.0:
-; GFX8-NEXT:    s_mov_b32 s4, 0xffff
 ; GFX8-NEXT:    s_movk_i32 s2, 0x7fff
-; GFX8-NEXT:    v_max_i16_e32 v2, s4, v0
+; GFX8-NEXT:    v_max_i16_e32 v2, -1, v0
 ; GFX8-NEXT:    v_subrev_u16_e32 v2, s2, v2
-; GFX8-NEXT:    s_mov_b32 s3, 0x8000
-; GFX8-NEXT:    v_min_i16_e32 v3, s4, v0
+; GFX8-NEXT:    s_movk_i32 s3, 0x8000
+; GFX8-NEXT:    v_min_i16_e32 v3, -1, v0
 ; GFX8-NEXT:    v_lshrrev_b32_e32 v1, 16, v0
 ; GFX8-NEXT:    v_subrev_u16_e32 v3, s3, v3
 ; GFX8-NEXT:    v_max_i16_e32 v2, s0, v2
 ; GFX8-NEXT:    v_min_i16_e32 v2, v2, v3
-; GFX8-NEXT:    v_max_i16_e32 v3, s4, v1
-; GFX8-NEXT:    v_min_i16_e32 v4, s4, v1
+; GFX8-NEXT:    v_max_i16_e32 v3, -1, v1
+; GFX8-NEXT:    v_min_i16_e32 v4, -1, v1
 ; GFX8-NEXT:    s_lshr_b32 s1, s0, 16
 ; GFX8-NEXT:    v_subrev_u16_e32 v3, s2, v3
 ; GFX8-NEXT:    v_subrev_u16_e32 v4, s3, v4
@@ -4976,7 +4953,7 @@ define amdgpu_ps float @ssubsat_v2i16_vs(<2 x i16> %lhs, <2 x i16> inreg %rhs) {
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_movk_i32 s1, 0x7fff
 ; GFX9-NEXT:    s_pack_ll_b32_b16 s3, -1, -1
-; GFX9-NEXT:    s_mov_b32 s2, 0xffff8000
+; GFX9-NEXT:    s_movk_i32 s2, 0x8000
 ; GFX9-NEXT:    s_pack_ll_b32_b16 s1, s1, s1
 ; GFX9-NEXT:    v_pk_max_i16 v1, v0, s3
 ; GFX9-NEXT:    v_pk_sub_i16 v1, v1, s1
@@ -4995,7 +4972,7 @@ define amdgpu_ps float @ssubsat_v2i16_vs(<2 x i16> %lhs, <2 x i16> inreg %rhs) {
 ; GFX10-NEXT:    v_pk_max_i16 v1, v0, s1
 ; GFX10-NEXT:    s_pack_ll_b32_b16 s2, s2, s2
 ; GFX10-NEXT:    v_pk_min_i16 v2, v0, s1
-; GFX10-NEXT:    s_mov_b32 s3, 0xffff8000
+; GFX10-NEXT:    s_movk_i32 s3, 0x8000
 ; GFX10-NEXT:    ; implicit-def: $vcc_hi
 ; GFX10-NEXT:    v_pk_sub_i16 v1, v1, s2
 ; GFX10-NEXT:    s_pack_ll_b32_b16 s1, s3, s3
@@ -5029,7 +5006,7 @@ define <2 x float> @v_ssubsat_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
 ; GFX6-NEXT:    v_max_i32_e32 v8, -1, v0
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v4, 16, v4
 ; GFX6-NEXT:    v_subrev_i32_e32 v8, vcc, s4, v8
-; GFX6-NEXT:    s_mov_b32 s5, 0x80000000
+; GFX6-NEXT:    s_brev_b32 s5, 1
 ; GFX6-NEXT:    v_min_i32_e32 v10, -1, v0
 ; GFX6-NEXT:    v_subrev_i32_e32 v10, vcc, s5, v10
 ; GFX6-NEXT:    v_max_i32_e32 v4, v8, v4
@@ -5056,7 +5033,7 @@ define <2 x float> @v_ssubsat_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
 ; GFX6-NEXT:    v_min_i32_e32 v4, v4, v6
 ; GFX6-NEXT:    v_max_i32_e32 v5, -1, v3
 ; GFX6-NEXT:    v_sub_i32_e32 v2, vcc, v2, v4
-; GFX6-NEXT:    v_mov_b32_e32 v11, 0x80000000
+; GFX6-NEXT:    v_bfrev_b32_e32 v11, 1
 ; GFX6-NEXT:    v_min_i32_e32 v6, -1, v3
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v4, 16, v7
 ; GFX6-NEXT:    v_sub_i32_e32 v5, vcc, v5, v9
@@ -5082,30 +5059,29 @@ define <2 x float> @v_ssubsat_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
 ; GFX8-LABEL: v_ssubsat_v4i16:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    s_mov_b32 s6, 0xffff
 ; GFX8-NEXT:    s_movk_i32 s4, 0x7fff
-; GFX8-NEXT:    v_max_i16_e32 v6, s6, v0
+; GFX8-NEXT:    v_max_i16_e32 v6, -1, v0
 ; GFX8-NEXT:    v_subrev_u16_e32 v6, s4, v6
-; GFX8-NEXT:    s_mov_b32 s5, 0x8000
-; GFX8-NEXT:    v_min_i16_e32 v7, s6, v0
+; GFX8-NEXT:    s_movk_i32 s5, 0x8000
+; GFX8-NEXT:    v_min_i16_e32 v7, -1, v0
 ; GFX8-NEXT:    v_lshrrev_b32_e32 v4, 16, v0
 ; GFX8-NEXT:    v_subrev_u16_e32 v7, s5, v7
 ; GFX8-NEXT:    v_max_i16_e32 v6, v6, v2
 ; GFX8-NEXT:    v_min_i16_e32 v6, v6, v7
-; GFX8-NEXT:    v_max_i16_e32 v7, s6, v4
-; GFX8-NEXT:    v_min_i16_e32 v8, s6, v4
+; GFX8-NEXT:    v_max_i16_e32 v7, -1, v4
+; GFX8-NEXT:    v_min_i16_e32 v8, -1, v4
 ; GFX8-NEXT:    v_subrev_u16_e32 v7, s4, v7
 ; GFX8-NEXT:    v_max_i16_sdwa v2, v7, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX8-NEXT:    v_max_i16_e32 v7, s6, v1
+; GFX8-NEXT:    v_max_i16_e32 v7, -1, v1
 ; GFX8-NEXT:    v_subrev_u16_e32 v8, s5, v8
 ; GFX8-NEXT:    v_min_i16_e32 v2, v2, v8
 ; GFX8-NEXT:    v_subrev_u16_e32 v7, s4, v7
-; GFX8-NEXT:    v_min_i16_e32 v8, s6, v1
+; GFX8-NEXT:    v_min_i16_e32 v8, -1, v1
 ; GFX8-NEXT:    v_lshrrev_b32_e32 v5, 16, v1
 ; GFX8-NEXT:    v_subrev_u16_e32 v8, s5, v8
 ; GFX8-NEXT:    v_max_i16_e32 v7, v7, v3
 ; GFX8-NEXT:    v_min_i16_e32 v7, v7, v8
-; GFX8-NEXT:    v_max_i16_e32 v8, s6, v5
+; GFX8-NEXT:    v_max_i16_e32 v8, -1, v5
 ; GFX8-NEXT:    v_min_i16_e32 v9, -1, v5
 ; GFX8-NEXT:    v_subrev_u16_e32 v8, s4, v8
 ; GFX8-NEXT:    v_subrev_u16_e32 v9, s5, v9
@@ -5124,7 +5100,7 @@ define <2 x float> @v_ssubsat_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX9-NEXT:    s_movk_i32 s4, 0x7fff
 ; GFX9-NEXT:    s_pack_ll_b32_b16 s6, -1, -1
-; GFX9-NEXT:    s_mov_b32 s5, 0xffff8000
+; GFX9-NEXT:    s_movk_i32 s5, 0x8000
 ; GFX9-NEXT:    s_pack_ll_b32_b16 s4, s4, s4
 ; GFX9-NEXT:    v_pk_max_i16 v4, v0, s6
 ; GFX9-NEXT:    v_pk_sub_i16 v4, v4, s4
@@ -5156,7 +5132,7 @@ define <2 x float> @v_ssubsat_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
 ; GFX10-NEXT:    v_pk_min_i16 v7, v1, s5
 ; GFX10-NEXT:    v_pk_sub_i16 v4, v4, s4
 ; GFX10-NEXT:    v_pk_sub_i16 v5, v5, s4
-; GFX10-NEXT:    s_mov_b32 s6, 0xffff8000
+; GFX10-NEXT:    s_movk_i32 s6, 0x8000
 ; GFX10-NEXT:    ; implicit-def: $vcc_hi
 ; GFX10-NEXT:    s_pack_ll_b32_b16 s6, s6, s6
 ; GFX10-NEXT:    v_pk_max_i16 v11, v4, v2
@@ -5183,7 +5159,7 @@ define amdgpu_ps <2 x i32> @s_ssubsat_v4i16(<4 x i16> inreg %lhs, <4 x i16> inre
 ; GFX6-NEXT:    s_cselect_b32 s10, s0, -1
 ; GFX6-NEXT:    s_sub_i32 s10, s10, s8
 ; GFX6-NEXT:    s_cmp_lt_i32 s0, -1
-; GFX6-NEXT:    s_mov_b32 s9, 0x80000000
+; GFX6-NEXT:    s_brev_b32 s9, 1
 ; GFX6-NEXT:    s_cselect_b32 s11, s0, -1
 ; GFX6-NEXT:    s_sub_i32 s11, s11, s9
 ; GFX6-NEXT:    s_cmp_gt_i32 s10, s4
@@ -5252,13 +5228,13 @@ define amdgpu_ps <2 x i32> @s_ssubsat_v4i16(<4 x i16> inreg %lhs, <4 x i16> inre
 ; GFX8-NEXT:    s_lshr_b32 s4, s0, 16
 ; GFX8-NEXT:    s_lshr_b32 s5, s1, 16
 ; GFX8-NEXT:    s_sext_i32_i16 s10, s0
-; GFX8-NEXT:    s_sext_i32_i16 s11, 0xffff
+; GFX8-NEXT:    s_sext_i32_i16 s11, -1
 ; GFX8-NEXT:    s_cmp_gt_i32 s10, s11
 ; GFX8-NEXT:    s_movk_i32 s8, 0x7fff
 ; GFX8-NEXT:    s_cselect_b32 s12, s10, s11
 ; GFX8-NEXT:    s_sub_i32 s12, s12, s8
 ; GFX8-NEXT:    s_cmp_lt_i32 s10, s11
-; GFX8-NEXT:    s_mov_b32 s9, 0x8000
+; GFX8-NEXT:    s_movk_i32 s9, 0x8000
 ; GFX8-NEXT:    s_cselect_b32 s10, s10, s11
 ; GFX8-NEXT:    s_sub_i32 s10, s10, s9
 ; GFX8-NEXT:    s_sext_i32_i16 s12, s12
@@ -5349,7 +5325,7 @@ define amdgpu_ps <2 x i32> @s_ssubsat_v4i16(<4 x i16> inreg %lhs, <4 x i16> inre
 ; GFX9-NEXT:    s_cmp_lt_i32 s7, s9
 ; GFX9-NEXT:    s_cselect_b32 s7, s7, s9
 ; GFX9-NEXT:    s_cmp_lt_i32 s8, s6
-; GFX9-NEXT:    s_mov_b32 s5, 0xffff8000
+; GFX9-NEXT:    s_movk_i32 s5, 0x8000
 ; GFX9-NEXT:    s_cselect_b32 s8, s8, s6
 ; GFX9-NEXT:    s_pack_ll_b32_b16 s7, s7, s8
 ; GFX9-NEXT:    s_pack_ll_b32_b16 s5, s5, s5
@@ -5438,7 +5414,7 @@ define amdgpu_ps <2 x i32> @s_ssubsat_v4i16(<4 x i16> inreg %lhs, <4 x i16> inre
 ; GFX10-NEXT:    s_movk_i32 s10, 0x7fff
 ; GFX10-NEXT:    s_cselect_b32 s8, s5, s7
 ; GFX10-NEXT:    s_cmp_gt_i32 s6, s4
-; GFX10-NEXT:    s_mov_b32 s12, 0xffff8000
+; GFX10-NEXT:    s_movk_i32 s12, 0x8000
 ; GFX10-NEXT:    s_cselect_b32 s9, s6, s4
 ; GFX10-NEXT:    s_pack_ll_b32_b16 s12, s12, s12
 ; GFX10-NEXT:    s_pack_ll_b32_b16 s8, s8, s9
@@ -5551,7 +5527,7 @@ define <3 x float> @v_ssubsat_v6i16(<6 x i16> %lhs, <6 x i16> %rhs) {
 ; GFX6-NEXT:    v_max_i32_e32 v12, -1, v0
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v6, 16, v6
 ; GFX6-NEXT:    v_subrev_i32_e32 v12, vcc, s4, v12
-; GFX6-NEXT:    s_mov_b32 s5, 0x80000000
+; GFX6-NEXT:    s_brev_b32 s5, 1
 ; GFX6-NEXT:    v_min_i32_e32 v14, -1, v0
 ; GFX6-NEXT:    v_subrev_i32_e32 v14, vcc, s5, v14
 ; GFX6-NEXT:    v_max_i32_e32 v6, v12, v6
@@ -5578,7 +5554,7 @@ define <3 x float> @v_ssubsat_v6i16(<6 x i16> %lhs, <6 x i16> %rhs) {
 ; GFX6-NEXT:    v_min_i32_e32 v6, v6, v8
 ; GFX6-NEXT:    v_max_i32_e32 v7, -1, v3
 ; GFX6-NEXT:    v_sub_i32_e32 v2, vcc, v2, v6
-; GFX6-NEXT:    v_mov_b32_e32 v15, 0x80000000
+; GFX6-NEXT:    v_bfrev_b32_e32 v15, 1
 ; GFX6-NEXT:    v_min_i32_e32 v8, -1, v3
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v6, 16, v9
 ; GFX6-NEXT:    v_sub_i32_e32 v7, vcc, v7, v13
@@ -5628,58 +5604,56 @@ define <3 x float> @v_ssubsat_v6i16(<6 x i16> %lhs, <6 x i16> %rhs) {
 ; GFX8-LABEL: v_ssubsat_v6i16:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    s_mov_b32 s6, 0xffff
 ; GFX8-NEXT:    s_movk_i32 s4, 0x7fff
-; GFX8-NEXT:    v_max_i16_e32 v10, s6, v0
-; GFX8-NEXT:    v_subrev_u16_e32 v10, s4, v10
-; GFX8-NEXT:    s_mov_b32 s5, 0x8000
-; GFX8-NEXT:    v_min_i16_e32 v12, s6, v0
+; GFX8-NEXT:    v_max_i16_e32 v9, -1, v0
+; GFX8-NEXT:    v_subrev_u16_e32 v9, s4, v9
+; GFX8-NEXT:    s_movk_i32 s5, 0x8000
+; GFX8-NEXT:    v_min_i16_e32 v11, -1, v0
 ; GFX8-NEXT:    v_lshrrev_b32_e32 v6, 16, v0
-; GFX8-NEXT:    v_subrev_u16_e32 v12, s5, v12
-; GFX8-NEXT:    v_max_i16_e32 v10, v10, v3
-; GFX8-NEXT:    v_min_i16_e32 v10, v10, v12
-; GFX8-NEXT:    v_max_i16_e32 v12, s6, v6
-; GFX8-NEXT:    v_min_i16_e32 v14, s6, v6
-; GFX8-NEXT:    v_subrev_u16_e32 v12, s4, v12
-; GFX8-NEXT:    v_max_i16_sdwa v3, v12, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX8-NEXT:    v_max_i16_e32 v12, s6, v1
-; GFX8-NEXT:    v_subrev_u16_e32 v14, s5, v14
-; GFX8-NEXT:    v_min_i16_e32 v3, v3, v14
-; GFX8-NEXT:    v_subrev_u16_e32 v12, s4, v12
-; GFX8-NEXT:    v_min_i16_e32 v14, s6, v1
+; GFX8-NEXT:    v_subrev_u16_e32 v11, s5, v11
+; GFX8-NEXT:    v_max_i16_e32 v9, v9, v3
+; GFX8-NEXT:    v_min_i16_e32 v9, v9, v11
+; GFX8-NEXT:    v_max_i16_e32 v11, -1, v6
+; GFX8-NEXT:    v_min_i16_e32 v13, -1, v6
+; GFX8-NEXT:    v_subrev_u16_e32 v11, s4, v11
+; GFX8-NEXT:    v_max_i16_sdwa v3, v11, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX8-NEXT:    v_max_i16_e32 v11, -1, v1
+; GFX8-NEXT:    v_subrev_u16_e32 v13, s5, v13
+; GFX8-NEXT:    v_min_i16_e32 v3, v3, v13
+; GFX8-NEXT:    v_subrev_u16_e32 v11, s4, v11
+; GFX8-NEXT:    v_min_i16_e32 v13, -1, v1
 ; GFX8-NEXT:    v_lshrrev_b32_e32 v7, 16, v1
+; GFX8-NEXT:    v_subrev_u16_e32 v13, s5, v13
+; GFX8-NEXT:    v_max_i16_e32 v11, v11, v4
+; GFX8-NEXT:    v_min_i16_e32 v11, v11, v13
+; GFX8-NEXT:    v_max_i16_e32 v13, -1, v7
+; GFX8-NEXT:    v_min_i16_e32 v14, -1, v7
+; GFX8-NEXT:    v_subrev_u16_e32 v13, s4, v13
+; GFX8-NEXT:    v_max_i16_sdwa v4, v13, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
 ; GFX8-NEXT:    v_subrev_u16_e32 v14, s5, v14
-; GFX8-NEXT:    v_max_i16_e32 v12, v12, v4
-; GFX8-NEXT:    v_min_i16_e32 v12, v12, v14
-; GFX8-NEXT:    v_max_i16_e32 v14, s6, v7
-; GFX8-NEXT:    v_mov_b32_e32 v9, 0xffff
-; GFX8-NEXT:    v_min_i16_e32 v15, v7, v9
-; GFX8-NEXT:    v_subrev_u16_e32 v14, s4, v14
-; GFX8-NEXT:    v_max_i16_sdwa v4, v14, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX8-NEXT:    v_subrev_u16_e32 v15, s5, v15
-; GFX8-NEXT:    v_mov_b32_e32 v11, 0x7fff
-; GFX8-NEXT:    v_max_i16_e32 v14, v2, v9
-; GFX8-NEXT:    v_sub_u16_e32 v14, v14, v11
-; GFX8-NEXT:    v_min_i16_e32 v4, v4, v15
-; GFX8-NEXT:    v_mov_b32_e32 v13, 0x8000
-; GFX8-NEXT:    v_min_i16_e32 v15, v2, v9
+; GFX8-NEXT:    v_mov_b32_e32 v10, 0x7fff
+; GFX8-NEXT:    v_max_i16_e32 v13, -1, v2
+; GFX8-NEXT:    v_sub_u16_e32 v13, v13, v10
+; GFX8-NEXT:    v_min_i16_e32 v4, v4, v14
+; GFX8-NEXT:    v_mov_b32_e32 v12, 0xffff8000
+; GFX8-NEXT:    v_min_i16_e32 v14, -1, v2
 ; GFX8-NEXT:    v_lshrrev_b32_e32 v8, 16, v2
-; GFX8-NEXT:    v_sub_u16_e32 v15, v15, v13
-; GFX8-NEXT:    v_max_i16_e32 v14, v14, v5
-; GFX8-NEXT:    v_min_i16_e32 v14, v14, v15
-; GFX8-NEXT:    v_max_i16_e32 v15, v8, v9
-; GFX8-NEXT:    v_min_i16_e32 v9, v8, v9
-; GFX8-NEXT:    v_sub_u16_e32 v11, v15, v11
-; GFX8-NEXT:    v_sub_u16_e32 v9, v9, v13
-; GFX8-NEXT:    v_max_i16_sdwa v5, v11, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX8-NEXT:    v_sub_u16_e32 v0, v0, v10
+; GFX8-NEXT:    v_sub_u16_e32 v14, v14, v12
+; GFX8-NEXT:    v_max_i16_e32 v13, v13, v5
+; GFX8-NEXT:    v_min_i16_e32 v13, v13, v14
+; GFX8-NEXT:    v_max_i16_e32 v14, -1, v8
+; GFX8-NEXT:    v_sub_u16_e32 v10, v14, v10
+; GFX8-NEXT:    v_min_i16_e32 v14, -1, v8
+; GFX8-NEXT:    v_sub_u16_e32 v12, v14, v12
+; GFX8-NEXT:    v_max_i16_sdwa v5, v10, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX8-NEXT:    v_sub_u16_e32 v0, v0, v9
 ; GFX8-NEXT:    v_sub_u16_sdwa v3, v6, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
 ; GFX8-NEXT:    v_or_b32_e32 v0, v0, v3
-; GFX8-NEXT:    v_min_i16_e32 v5, v5, v9
-; GFX8-NEXT:    v_sub_u16_e32 v1, v1, v12
+; GFX8-NEXT:    v_min_i16_e32 v5, v5, v12
+; GFX8-NEXT:    v_sub_u16_e32 v1, v1, v11
 ; GFX8-NEXT:    v_sub_u16_sdwa v3, v7, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
 ; GFX8-NEXT:    v_or_b32_e32 v1, v1, v3
-; GFX8-NEXT:    v_sub_u16_e32 v2, v2, v14
+; GFX8-NEXT:    v_sub_u16_e32 v2, v2, v13
 ; GFX8-NEXT:    v_sub_u16_sdwa v3, v8, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
 ; GFX8-NEXT:    v_or_b32_e32 v2, v2, v3
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
@@ -5689,7 +5663,7 @@ define <3 x float> @v_ssubsat_v6i16(<6 x i16> %lhs, <6 x i16> %rhs) {
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX9-NEXT:    s_movk_i32 s4, 0x7fff
 ; GFX9-NEXT:    s_pack_ll_b32_b16 s6, -1, -1
-; GFX9-NEXT:    s_mov_b32 s5, 0xffff8000
+; GFX9-NEXT:    s_movk_i32 s5, 0x8000
 ; GFX9-NEXT:    s_pack_ll_b32_b16 s4, s4, s4
 ; GFX9-NEXT:    v_pk_max_i16 v6, v0, s6
 ; GFX9-NEXT:    v_pk_sub_i16 v6, v6, s4
@@ -5731,7 +5705,7 @@ define <3 x float> @v_ssubsat_v6i16(<6 x i16> %lhs, <6 x i16> %rhs) {
 ; GFX10-NEXT:    v_pk_sub_i16 v19, v9, s4
 ; GFX10-NEXT:    v_pk_min_i16 v10, v1, s5
 ; GFX10-NEXT:    v_pk_min_i16 v11, v2, s5
-; GFX10-NEXT:    s_mov_b32 s6, 0xffff8000
+; GFX10-NEXT:    s_movk_i32 s6, 0x8000
 ; GFX10-NEXT:    v_pk_max_i16 v14, v6, v3
 ; GFX10-NEXT:    s_pack_ll_b32_b16 s6, s6, s6
 ; GFX10-NEXT:    v_pk_max_i16 v4, v15, v4
@@ -5762,7 +5736,7 @@ define amdgpu_ps <3 x i32> @s_ssubsat_v6i16(<6 x i16> inreg %lhs, <6 x i16> inre
 ; GFX6-NEXT:    s_cselect_b32 s14, s0, -1
 ; GFX6-NEXT:    s_sub_i32 s14, s14, s12
 ; GFX6-NEXT:    s_cmp_lt_i32 s0, -1
-; GFX6-NEXT:    s_mov_b32 s13, 0x80000000
+; GFX6-NEXT:    s_brev_b32 s13, 1
 ; GFX6-NEXT:    s_cselect_b32 s15, s0, -1
 ; GFX6-NEXT:    s_sub_i32 s15, s15, s13
 ; GFX6-NEXT:    s_cmp_gt_i32 s14, s6
@@ -5865,13 +5839,13 @@ define amdgpu_ps <3 x i32> @s_ssubsat_v6i16(<6 x i16> inreg %lhs, <6 x i16> inre
 ; GFX8-NEXT:    s_lshr_b32 s7, s1, 16
 ; GFX8-NEXT:    s_lshr_b32 s8, s2, 16
 ; GFX8-NEXT:    s_sext_i32_i16 s14, s0
-; GFX8-NEXT:    s_sext_i32_i16 s15, 0xffff
+; GFX8-NEXT:    s_sext_i32_i16 s15, -1
 ; GFX8-NEXT:    s_cmp_gt_i32 s14, s15
 ; GFX8-NEXT:    s_movk_i32 s12, 0x7fff
 ; GFX8-NEXT:    s_cselect_b32 s16, s14, s15
 ; GFX8-NEXT:    s_sub_i32 s16, s16, s12
 ; GFX8-NEXT:    s_cmp_lt_i32 s14, s15
-; GFX8-NEXT:    s_mov_b32 s13, 0x8000
+; GFX8-NEXT:    s_movk_i32 s13, 0x8000
 ; GFX8-NEXT:    s_cselect_b32 s14, s14, s15
 ; GFX8-NEXT:    s_sub_i32 s14, s14, s13
 ; GFX8-NEXT:    s_sext_i32_i16 s16, s16
@@ -5998,7 +5972,7 @@ define amdgpu_ps <3 x i32> @s_ssubsat_v6i16(<6 x i16> inreg %lhs, <6 x i16> inre
 ; GFX9-NEXT:    s_cmp_lt_i32 s9, s11
 ; GFX9-NEXT:    s_cselect_b32 s9, s9, s11
 ; GFX9-NEXT:    s_cmp_lt_i32 s10, s8
-; GFX9-NEXT:    s_mov_b32 s7, 0xffff8000
+; GFX9-NEXT:    s_movk_i32 s7, 0x8000
 ; GFX9-NEXT:    s_cselect_b32 s10, s10, s8
 ; GFX9-NEXT:    s_pack_ll_b32_b16 s9, s9, s10
 ; GFX9-NEXT:    s_pack_ll_b32_b16 s7, s7, s7
@@ -6130,7 +6104,7 @@ define amdgpu_ps <3 x i32> @s_ssubsat_v6i16(<6 x i16> inreg %lhs, <6 x i16> inre
 ; GFX10-NEXT:    s_movk_i32 s12, 0x7fff
 ; GFX10-NEXT:    s_cselect_b32 s10, s7, s9
 ; GFX10-NEXT:    s_cmp_gt_i32 s8, s6
-; GFX10-NEXT:    s_mov_b32 s14, 0xffff8000
+; GFX10-NEXT:    s_movk_i32 s14, 0x8000
 ; GFX10-NEXT:    s_cselect_b32 s11, s8, s6
 ; GFX10-NEXT:    s_pack_ll_b32_b16 s14, s14, s14
 ; GFX10-NEXT:    s_pack_ll_b32_b16 s10, s10, s11
@@ -6275,7 +6249,7 @@ define <4 x float> @v_ssubsat_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
 ; GFX6-NEXT:    v_max_i32_e32 v16, -1, v0
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v8, 16, v8
 ; GFX6-NEXT:    v_subrev_i32_e32 v16, vcc, s4, v16
-; GFX6-NEXT:    s_mov_b32 s5, 0x80000000
+; GFX6-NEXT:    s_brev_b32 s5, 1
 ; GFX6-NEXT:    v_min_i32_e32 v18, -1, v0
 ; GFX6-NEXT:    v_subrev_i32_e32 v18, vcc, s5, v18
 ; GFX6-NEXT:    v_max_i32_e32 v8, v16, v8
@@ -6302,7 +6276,7 @@ define <4 x float> @v_ssubsat_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
 ; GFX6-NEXT:    v_min_i32_e32 v8, v8, v10
 ; GFX6-NEXT:    v_max_i32_e32 v9, -1, v3
 ; GFX6-NEXT:    v_sub_i32_e32 v2, vcc, v2, v8
-; GFX6-NEXT:    v_mov_b32_e32 v19, 0x80000000
+; GFX6-NEXT:    v_bfrev_b32_e32 v19, 1
 ; GFX6-NEXT:    v_min_i32_e32 v10, -1, v3
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v8, 16, v11
 ; GFX6-NEXT:    v_sub_i32_e32 v9, vcc, v9, v17
@@ -6376,74 +6350,72 @@ define <4 x float> @v_ssubsat_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
 ; GFX8-LABEL: v_ssubsat_v8i16:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    s_mov_b32 s6, 0xffff
 ; GFX8-NEXT:    s_movk_i32 s4, 0x7fff
-; GFX8-NEXT:    v_max_i16_e32 v13, s6, v0
-; GFX8-NEXT:    v_subrev_u16_e32 v13, s4, v13
-; GFX8-NEXT:    s_mov_b32 s5, 0x8000
-; GFX8-NEXT:    v_min_i16_e32 v15, s6, v0
+; GFX8-NEXT:    v_max_i16_e32 v12, -1, v0
+; GFX8-NEXT:    v_subrev_u16_e32 v12, s4, v12
+; GFX8-NEXT:    s_movk_i32 s5, 0x8000
+; GFX8-NEXT:    v_min_i16_e32 v14, -1, v0
 ; GFX8-NEXT:    v_lshrrev_b32_e32 v8, 16, v0
-; GFX8-NEXT:    v_subrev_u16_e32 v15, s5, v15
-; GFX8-NEXT:    v_max_i16_e32 v13, v13, v4
-; GFX8-NEXT:    v_min_i16_e32 v13, v13, v15
-; GFX8-NEXT:    v_max_i16_e32 v15, s6, v8
-; GFX8-NEXT:    v_min_i16_e32 v17, s6, v8
-; GFX8-NEXT:    v_subrev_u16_e32 v15, s4, v15
-; GFX8-NEXT:    v_max_i16_sdwa v4, v15, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX8-NEXT:    v_max_i16_e32 v15, s6, v1
-; GFX8-NEXT:    v_subrev_u16_e32 v17, s5, v17
-; GFX8-NEXT:    v_min_i16_e32 v4, v4, v17
-; GFX8-NEXT:    v_subrev_u16_e32 v15, s4, v15
-; GFX8-NEXT:    v_min_i16_e32 v17, s6, v1
+; GFX8-NEXT:    v_subrev_u16_e32 v14, s5, v14
+; GFX8-NEXT:    v_max_i16_e32 v12, v12, v4
+; GFX8-NEXT:    v_min_i16_e32 v12, v12, v14
+; GFX8-NEXT:    v_max_i16_e32 v14, -1, v8
+; GFX8-NEXT:    v_min_i16_e32 v16, -1, v8
+; GFX8-NEXT:    v_subrev_u16_e32 v14, s4, v14
+; GFX8-NEXT:    v_max_i16_sdwa v4, v14, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX8-NEXT:    v_max_i16_e32 v14, -1, v1
+; GFX8-NEXT:    v_subrev_u16_e32 v16, s5, v16
+; GFX8-NEXT:    v_min_i16_e32 v4, v4, v16
+; GFX8-NEXT:    v_subrev_u16_e32 v14, s4, v14
+; GFX8-NEXT:    v_min_i16_e32 v16, -1, v1
 ; GFX8-NEXT:    v_lshrrev_b32_e32 v9, 16, v1
+; GFX8-NEXT:    v_subrev_u16_e32 v16, s5, v16
+; GFX8-NEXT:    v_max_i16_e32 v14, v14, v5
+; GFX8-NEXT:    v_min_i16_e32 v14, v14, v16
+; GFX8-NEXT:    v_max_i16_e32 v16, -1, v9
+; GFX8-NEXT:    v_min_i16_e32 v17, -1, v9
+; GFX8-NEXT:    v_subrev_u16_e32 v16, s4, v16
+; GFX8-NEXT:    v_max_i16_sdwa v5, v16, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
 ; GFX8-NEXT:    v_subrev_u16_e32 v17, s5, v17
-; GFX8-NEXT:    v_max_i16_e32 v15, v15, v5
-; GFX8-NEXT:    v_min_i16_e32 v15, v15, v17
-; GFX8-NEXT:    v_max_i16_e32 v17, s6, v9
-; GFX8-NEXT:    v_mov_b32_e32 v12, 0xffff
-; GFX8-NEXT:    v_min_i16_e32 v18, v9, v12
-; GFX8-NEXT:    v_subrev_u16_e32 v17, s4, v17
-; GFX8-NEXT:    v_max_i16_sdwa v5, v17, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX8-NEXT:    v_subrev_u16_e32 v18, s5, v18
-; GFX8-NEXT:    v_mov_b32_e32 v14, 0x7fff
-; GFX8-NEXT:    v_max_i16_e32 v17, v2, v12
-; GFX8-NEXT:    v_sub_u16_e32 v17, v17, v14
-; GFX8-NEXT:    v_min_i16_e32 v5, v5, v18
-; GFX8-NEXT:    v_mov_b32_e32 v16, 0x8000
-; GFX8-NEXT:    v_min_i16_e32 v18, v2, v12
+; GFX8-NEXT:    v_mov_b32_e32 v13, 0x7fff
+; GFX8-NEXT:    v_max_i16_e32 v16, -1, v2
+; GFX8-NEXT:    v_sub_u16_e32 v16, v16, v13
+; GFX8-NEXT:    v_min_i16_e32 v5, v5, v17
+; GFX8-NEXT:    v_mov_b32_e32 v15, 0xffff8000
+; GFX8-NEXT:    v_min_i16_e32 v17, -1, v2
 ; GFX8-NEXT:    v_lshrrev_b32_e32 v10, 16, v2
-; GFX8-NEXT:    v_sub_u16_e32 v18, v18, v16
-; GFX8-NEXT:    v_max_i16_e32 v17, v17, v6
-; GFX8-NEXT:    v_min_i16_e32 v17, v17, v18
-; GFX8-NEXT:    v_max_i16_e32 v18, v10, v12
-; GFX8-NEXT:    v_min_i16_e32 v19, v10, v12
-; GFX8-NEXT:    v_sub_u16_e32 v18, v18, v14
-; GFX8-NEXT:    v_max_i16_sdwa v6, v18, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX8-NEXT:    v_max_i16_e32 v18, v3, v12
-; GFX8-NEXT:    v_sub_u16_e32 v19, v19, v16
-; GFX8-NEXT:    v_sub_u16_e32 v18, v18, v14
-; GFX8-NEXT:    v_min_i16_e32 v6, v6, v19
-; GFX8-NEXT:    v_min_i16_e32 v19, v3, v12
+; GFX8-NEXT:    v_sub_u16_e32 v17, v17, v15
+; GFX8-NEXT:    v_max_i16_e32 v16, v16, v6
+; GFX8-NEXT:    v_min_i16_e32 v16, v16, v17
+; GFX8-NEXT:    v_max_i16_e32 v17, -1, v10
+; GFX8-NEXT:    v_min_i16_e32 v18, -1, v10
+; GFX8-NEXT:    v_sub_u16_e32 v17, v17, v13
+; GFX8-NEXT:    v_max_i16_sdwa v6, v17, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX8-NEXT:    v_max_i16_e32 v17, -1, v3
+; GFX8-NEXT:    v_sub_u16_e32 v18, v18, v15
+; GFX8-NEXT:    v_sub_u16_e32 v17, v17, v13
+; GFX8-NEXT:    v_min_i16_e32 v6, v6, v18
+; GFX8-NEXT:    v_min_i16_e32 v18, -1, v3
 ; GFX8-NEXT:    v_lshrrev_b32_e32 v11, 16, v3
-; GFX8-NEXT:    v_sub_u16_e32 v19, v19, v16
-; GFX8-NEXT:    v_max_i16_e32 v18, v18, v7
-; GFX8-NEXT:    v_min_i16_e32 v18, v18, v19
-; GFX8-NEXT:    v_max_i16_e32 v19, v11, v12
-; GFX8-NEXT:    v_min_i16_e32 v12, v11, v12
-; GFX8-NEXT:    v_sub_u16_e32 v14, v19, v14
-; GFX8-NEXT:    v_sub_u16_e32 v0, v0, v13
+; GFX8-NEXT:    v_sub_u16_e32 v18, v18, v15
+; GFX8-NEXT:    v_max_i16_e32 v17, v17, v7
+; GFX8-NEXT:    v_min_i16_e32 v17, v17, v18
+; GFX8-NEXT:    v_max_i16_e32 v18, -1, v11
+; GFX8-NEXT:    v_sub_u16_e32 v13, v18, v13
+; GFX8-NEXT:    v_min_i16_e32 v18, -1, v11
+; GFX8-NEXT:    v_sub_u16_e32 v0, v0, v12
 ; GFX8-NEXT:    v_sub_u16_sdwa v4, v8, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
 ; GFX8-NEXT:    v_or_b32_e32 v0, v0, v4
-; GFX8-NEXT:    v_sub_u16_e32 v12, v12, v16
-; GFX8-NEXT:    v_max_i16_sdwa v7, v14, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX8-NEXT:    v_sub_u16_e32 v1, v1, v15
+; GFX8-NEXT:    v_sub_u16_e32 v15, v18, v15
+; GFX8-NEXT:    v_max_i16_sdwa v7, v13, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX8-NEXT:    v_sub_u16_e32 v1, v1, v14
 ; GFX8-NEXT:    v_sub_u16_sdwa v4, v9, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
 ; GFX8-NEXT:    v_or_b32_e32 v1, v1, v4
-; GFX8-NEXT:    v_min_i16_e32 v7, v7, v12
-; GFX8-NEXT:    v_sub_u16_e32 v2, v2, v17
+; GFX8-NEXT:    v_min_i16_e32 v7, v7, v15
+; GFX8-NEXT:    v_sub_u16_e32 v2, v2, v16
 ; GFX8-NEXT:    v_sub_u16_sdwa v4, v10, v6 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
 ; GFX8-NEXT:    v_or_b32_e32 v2, v2, v4
-; GFX8-NEXT:    v_sub_u16_e32 v3, v3, v18
+; GFX8-NEXT:    v_sub_u16_e32 v3, v3, v17
 ; GFX8-NEXT:    v_sub_u16_sdwa v4, v11, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
 ; GFX8-NEXT:    v_or_b32_e32 v3, v3, v4
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
@@ -6453,7 +6425,7 @@ define <4 x float> @v_ssubsat_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX9-NEXT:    s_movk_i32 s4, 0x7fff
 ; GFX9-NEXT:    s_pack_ll_b32_b16 s6, -1, -1
-; GFX9-NEXT:    s_mov_b32 s5, 0xffff8000
+; GFX9-NEXT:    s_movk_i32 s5, 0x8000
 ; GFX9-NEXT:    s_pack_ll_b32_b16 s4, s4, s4
 ; GFX9-NEXT:    v_pk_max_i16 v8, v0, s6
 ; GFX9-NEXT:    v_pk_sub_i16 v8, v8, s4
@@ -6505,7 +6477,7 @@ define <4 x float> @v_ssubsat_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
 ; GFX10-NEXT:    v_pk_min_i16 v13, v2, s4
 ; GFX10-NEXT:    v_pk_sub_i16 v8, v8, s5
 ; GFX10-NEXT:    v_pk_min_i16 v14, v3, s4
-; GFX10-NEXT:    s_mov_b32 s6, 0xffff8000
+; GFX10-NEXT:    s_movk_i32 s6, 0x8000
 ; GFX10-NEXT:    v_pk_max_i16 v4, v15, v4
 ; GFX10-NEXT:    s_pack_ll_b32_b16 s6, s6, s6
 ; GFX10-NEXT:    v_pk_max_i16 v5, v10, v5
@@ -6540,7 +6512,7 @@ define amdgpu_ps <4 x i32> @s_ssubsat_v8i16(<8 x i16> inreg %lhs, <8 x i16> inre
 ; GFX6-NEXT:    s_cselect_b32 s18, s0, -1
 ; GFX6-NEXT:    s_sub_i32 s18, s18, s16
 ; GFX6-NEXT:    s_cmp_lt_i32 s0, -1
-; GFX6-NEXT:    s_mov_b32 s17, 0x80000000
+; GFX6-NEXT:    s_brev_b32 s17, 1
 ; GFX6-NEXT:    s_cselect_b32 s19, s0, -1
 ; GFX6-NEXT:    s_sub_i32 s19, s19, s17
 ; GFX6-NEXT:    s_cmp_gt_i32 s18, s8
@@ -6677,13 +6649,13 @@ define amdgpu_ps <4 x i32> @s_ssubsat_v8i16(<8 x i16> inreg %lhs, <8 x i16> inre
 ; GFX8-NEXT:    s_lshr_b32 s10, s2, 16
 ; GFX8-NEXT:    s_lshr_b32 s11, s3, 16
 ; GFX8-NEXT:    s_sext_i32_i16 s18, s0
-; GFX8-NEXT:    s_sext_i32_i16 s19, 0xffff
+; GFX8-NEXT:    s_sext_i32_i16 s19, -1
 ; GFX8-NEXT:    s_cmp_gt_i32 s18, s19
 ; GFX8-NEXT:    s_movk_i32 s16, 0x7fff
 ; GFX8-NEXT:    s_cselect_b32 s20, s18, s19
 ; GFX8-NEXT:    s_sub_i32 s20, s20, s16
 ; GFX8-NEXT:    s_cmp_lt_i32 s18, s19
-; GFX8-NEXT:    s_mov_b32 s17, 0x8000
+; GFX8-NEXT:    s_movk_i32 s17, 0x8000
 ; GFX8-NEXT:    s_cselect_b32 s18, s18, s19
 ; GFX8-NEXT:    s_sub_i32 s18, s18, s17
 ; GFX8-NEXT:    s_sext_i32_i16 s20, s20
@@ -6846,7 +6818,7 @@ define amdgpu_ps <4 x i32> @s_ssubsat_v8i16(<8 x i16> inreg %lhs, <8 x i16> inre
 ; GFX9-NEXT:    s_cmp_lt_i32 s11, s13
 ; GFX9-NEXT:    s_cselect_b32 s11, s11, s13
 ; GFX9-NEXT:    s_cmp_lt_i32 s12, s10
-; GFX9-NEXT:    s_mov_b32 s9, 0xffff8000
+; GFX9-NEXT:    s_movk_i32 s9, 0x8000
 ; GFX9-NEXT:    s_cselect_b32 s12, s12, s10
 ; GFX9-NEXT:    s_pack_ll_b32_b16 s11, s11, s12
 ; GFX9-NEXT:    s_pack_ll_b32_b16 s9, s9, s9
@@ -7021,7 +6993,7 @@ define amdgpu_ps <4 x i32> @s_ssubsat_v8i16(<8 x i16> inreg %lhs, <8 x i16> inre
 ; GFX10-NEXT:    s_movk_i32 s14, 0x7fff
 ; GFX10-NEXT:    s_cselect_b32 s12, s9, s11
 ; GFX10-NEXT:    s_cmp_gt_i32 s10, s8
-; GFX10-NEXT:    s_mov_b32 s16, 0xffff8000
+; GFX10-NEXT:    s_movk_i32 s16, 0x8000
 ; GFX10-NEXT:    s_cselect_b32 s13, s10, s8
 ; GFX10-NEXT:    s_pack_ll_b32_b16 s16, s16, s16
 ; GFX10-NEXT:    s_pack_ll_b32_b16 s12, s12, s13

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/uaddsat.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/uaddsat.ll
index 413bc7103847..5570309a5be7 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/uaddsat.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/uaddsat.ll
@@ -21,7 +21,7 @@ define i7 @v_uaddsat_i7(i7 %lhs, i7 %rhs) {
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX8-NEXT:    v_lshlrev_b16_e32 v0, 9, v0
 ; GFX8-NEXT:    v_lshlrev_b16_e32 v1, 9, v1
-; GFX8-NEXT:    v_xor_b32_e32 v2, 0xffff, v0
+; GFX8-NEXT:    v_xor_b32_e32 v2, -1, v0
 ; GFX8-NEXT:    v_min_u16_e32 v1, v2, v1
 ; GFX8-NEXT:    v_add_u16_e32 v0, v0, v1
 ; GFX8-NEXT:    v_lshrrev_b16_e32 v0, 9, v0
@@ -32,7 +32,7 @@ define i7 @v_uaddsat_i7(i7 %lhs, i7 %rhs) {
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX9-NEXT:    v_lshlrev_b16_e32 v0, 9, v0
 ; GFX9-NEXT:    v_lshlrev_b16_e32 v1, 9, v1
-; GFX9-NEXT:    v_xor_b32_e32 v2, 0xffff, v0
+; GFX9-NEXT:    v_xor_b32_e32 v2, -1, v0
 ; GFX9-NEXT:    v_min_u16_e32 v1, v2, v1
 ; GFX9-NEXT:    v_add_u16_e32 v0, v0, v1
 ; GFX9-NEXT:    v_lshrrev_b16_e32 v0, 9, v0
@@ -45,7 +45,7 @@ define i7 @v_uaddsat_i7(i7 %lhs, i7 %rhs) {
 ; GFX10-NEXT:    v_lshlrev_b16_e64 v0, 9, v0
 ; GFX10-NEXT:    v_lshlrev_b16_e64 v1, 9, v1
 ; GFX10-NEXT:    ; implicit-def: $vcc_hi
-; GFX10-NEXT:    v_xor_b32_e32 v2, 0xffff, v0
+; GFX10-NEXT:    v_xor_b32_e32 v2, -1, v0
 ; GFX10-NEXT:    v_min_u16_e64 v1, v2, v1
 ; GFX10-NEXT:    v_add_nc_u16_e64 v0, v0, v1
 ; GFX10-NEXT:    v_lshrrev_b16_e64 v0, 9, v0
@@ -71,7 +71,7 @@ define amdgpu_ps i7 @s_uaddsat_i7(i7 inreg %lhs, i7 inreg %rhs) {
 ; GFX8-NEXT:    s_bfe_u32 s2, 9, 0x100000
 ; GFX8-NEXT:    s_lshl_b32 s0, s0, s2
 ; GFX8-NEXT:    s_lshl_b32 s1, s1, s2
-; GFX8-NEXT:    s_xor_b32 s3, s0, 0xffff
+; GFX8-NEXT:    s_xor_b32 s3, s0, -1
 ; GFX8-NEXT:    s_bfe_u32 s3, s3, 0x100000
 ; GFX8-NEXT:    s_bfe_u32 s1, s1, 0x100000
 ; GFX8-NEXT:    s_cmp_lt_u32 s3, s1
@@ -86,7 +86,7 @@ define amdgpu_ps i7 @s_uaddsat_i7(i7 inreg %lhs, i7 inreg %rhs) {
 ; GFX9-NEXT:    s_bfe_u32 s2, 9, 0x100000
 ; GFX9-NEXT:    s_lshl_b32 s0, s0, s2
 ; GFX9-NEXT:    s_lshl_b32 s1, s1, s2
-; GFX9-NEXT:    s_xor_b32 s3, s0, 0xffff
+; GFX9-NEXT:    s_xor_b32 s3, s0, -1
 ; GFX9-NEXT:    s_bfe_u32 s3, s3, 0x100000
 ; GFX9-NEXT:    s_bfe_u32 s1, s1, 0x100000
 ; GFX9-NEXT:    s_cmp_lt_u32 s3, s1
@@ -102,7 +102,7 @@ define amdgpu_ps i7 @s_uaddsat_i7(i7 inreg %lhs, i7 inreg %rhs) {
 ; GFX10-NEXT:    ; implicit-def: $vcc_hi
 ; GFX10-NEXT:    s_lshl_b32 s0, s0, s2
 ; GFX10-NEXT:    s_lshl_b32 s1, s1, s2
-; GFX10-NEXT:    s_xor_b32 s3, s0, 0xffff
+; GFX10-NEXT:    s_xor_b32 s3, s0, -1
 ; GFX10-NEXT:    s_bfe_u32 s1, s1, 0x100000
 ; GFX10-NEXT:    s_bfe_u32 s3, s3, 0x100000
 ; GFX10-NEXT:    s_cmp_lt_u32 s3, s1
@@ -132,7 +132,7 @@ define i8 @v_uaddsat_i8(i8 %lhs, i8 %rhs) {
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX8-NEXT:    v_lshlrev_b16_e32 v0, 8, v0
 ; GFX8-NEXT:    v_lshlrev_b16_e32 v1, 8, v1
-; GFX8-NEXT:    v_xor_b32_e32 v2, 0xffff, v0
+; GFX8-NEXT:    v_xor_b32_e32 v2, -1, v0
 ; GFX8-NEXT:    v_min_u16_e32 v1, v2, v1
 ; GFX8-NEXT:    v_add_u16_e32 v0, v0, v1
 ; GFX8-NEXT:    v_lshrrev_b16_e32 v0, 8, v0
@@ -143,7 +143,7 @@ define i8 @v_uaddsat_i8(i8 %lhs, i8 %rhs) {
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX9-NEXT:    v_lshlrev_b16_e32 v0, 8, v0
 ; GFX9-NEXT:    v_lshlrev_b16_e32 v1, 8, v1
-; GFX9-NEXT:    v_xor_b32_e32 v2, 0xffff, v0
+; GFX9-NEXT:    v_xor_b32_e32 v2, -1, v0
 ; GFX9-NEXT:    v_min_u16_e32 v1, v2, v1
 ; GFX9-NEXT:    v_add_u16_e32 v0, v0, v1
 ; GFX9-NEXT:    v_lshrrev_b16_e32 v0, 8, v0
@@ -156,7 +156,7 @@ define i8 @v_uaddsat_i8(i8 %lhs, i8 %rhs) {
 ; GFX10-NEXT:    v_lshlrev_b16_e64 v0, 8, v0
 ; GFX10-NEXT:    v_lshlrev_b16_e64 v1, 8, v1
 ; GFX10-NEXT:    ; implicit-def: $vcc_hi
-; GFX10-NEXT:    v_xor_b32_e32 v2, 0xffff, v0
+; GFX10-NEXT:    v_xor_b32_e32 v2, -1, v0
 ; GFX10-NEXT:    v_min_u16_e64 v1, v2, v1
 ; GFX10-NEXT:    v_add_nc_u16_e64 v0, v0, v1
 ; GFX10-NEXT:    v_lshrrev_b16_e64 v0, 8, v0
@@ -182,7 +182,7 @@ define amdgpu_ps i8 @s_uaddsat_i8(i8 inreg %lhs, i8 inreg %rhs) {
 ; GFX8-NEXT:    s_bfe_u32 s2, 8, 0x100000
 ; GFX8-NEXT:    s_lshl_b32 s0, s0, s2
 ; GFX8-NEXT:    s_lshl_b32 s1, s1, s2
-; GFX8-NEXT:    s_xor_b32 s3, s0, 0xffff
+; GFX8-NEXT:    s_xor_b32 s3, s0, -1
 ; GFX8-NEXT:    s_bfe_u32 s3, s3, 0x100000
 ; GFX8-NEXT:    s_bfe_u32 s1, s1, 0x100000
 ; GFX8-NEXT:    s_cmp_lt_u32 s3, s1
@@ -197,7 +197,7 @@ define amdgpu_ps i8 @s_uaddsat_i8(i8 inreg %lhs, i8 inreg %rhs) {
 ; GFX9-NEXT:    s_bfe_u32 s2, 8, 0x100000
 ; GFX9-NEXT:    s_lshl_b32 s0, s0, s2
 ; GFX9-NEXT:    s_lshl_b32 s1, s1, s2
-; GFX9-NEXT:    s_xor_b32 s3, s0, 0xffff
+; GFX9-NEXT:    s_xor_b32 s3, s0, -1
 ; GFX9-NEXT:    s_bfe_u32 s3, s3, 0x100000
 ; GFX9-NEXT:    s_bfe_u32 s1, s1, 0x100000
 ; GFX9-NEXT:    s_cmp_lt_u32 s3, s1
@@ -213,7 +213,7 @@ define amdgpu_ps i8 @s_uaddsat_i8(i8 inreg %lhs, i8 inreg %rhs) {
 ; GFX10-NEXT:    ; implicit-def: $vcc_hi
 ; GFX10-NEXT:    s_lshl_b32 s0, s0, s2
 ; GFX10-NEXT:    s_lshl_b32 s1, s1, s2
-; GFX10-NEXT:    s_xor_b32 s3, s0, 0xffff
+; GFX10-NEXT:    s_xor_b32 s3, s0, -1
 ; GFX10-NEXT:    s_bfe_u32 s1, s1, 0x100000
 ; GFX10-NEXT:    s_bfe_u32 s3, s3, 0x100000
 ; GFX10-NEXT:    s_cmp_lt_u32 s3, s1
@@ -257,13 +257,12 @@ define i16 @v_uaddsat_v2i8(i16 %lhs.arg, i16 %rhs.arg) {
 ; GFX8-NEXT:    v_mov_b32_e32 v2, 8
 ; GFX8-NEXT:    v_lshrrev_b32_sdwa v3, v2, v0 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
 ; GFX8-NEXT:    v_lshlrev_b16_e32 v0, 8, v0
-; GFX8-NEXT:    s_mov_b32 s4, 0xffff
 ; GFX8-NEXT:    v_lshrrev_b32_sdwa v2, v2, v1 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
 ; GFX8-NEXT:    v_lshlrev_b16_e32 v1, 8, v1
-; GFX8-NEXT:    v_xor_b32_e32 v4, s4, v0
+; GFX8-NEXT:    v_xor_b32_e32 v4, -1, v0
 ; GFX8-NEXT:    v_min_u16_e32 v1, v4, v1
 ; GFX8-NEXT:    v_add_u16_e32 v0, v0, v1
-; GFX8-NEXT:    v_xor_b32_e32 v1, s4, v3
+; GFX8-NEXT:    v_xor_b32_e32 v1, -1, v3
 ; GFX8-NEXT:    v_min_u16_e32 v1, v1, v2
 ; GFX8-NEXT:    v_mov_b32_e32 v2, 0xff
 ; GFX8-NEXT:    v_add_u16_e32 v1, v3, v1
@@ -277,14 +276,13 @@ define i16 @v_uaddsat_v2i8(i16 %lhs.arg, i16 %rhs.arg) {
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX9-NEXT:    s_mov_b32 s4, 8
 ; GFX9-NEXT:    v_lshrrev_b32_sdwa v2, s4, v0 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX9-NEXT:    v_lshrrev_b32_sdwa v3, s4, v1 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
 ; GFX9-NEXT:    v_lshlrev_b16_e32 v0, 8, v0
-; GFX9-NEXT:    s_mov_b32 s4, 0xffff
+; GFX9-NEXT:    v_lshrrev_b32_sdwa v3, s4, v1 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
 ; GFX9-NEXT:    v_lshlrev_b16_e32 v1, 8, v1
-; GFX9-NEXT:    v_xor_b32_e32 v4, s4, v0
+; GFX9-NEXT:    v_xor_b32_e32 v4, -1, v0
 ; GFX9-NEXT:    v_min_u16_e32 v1, v4, v1
 ; GFX9-NEXT:    v_add_u16_e32 v0, v0, v1
-; GFX9-NEXT:    v_xor_b32_e32 v1, s4, v2
+; GFX9-NEXT:    v_xor_b32_e32 v1, -1, v2
 ; GFX9-NEXT:    v_min_u16_e32 v1, v1, v3
 ; GFX9-NEXT:    s_movk_i32 s4, 0xff
 ; GFX9-NEXT:    v_add_u16_e32 v1, v2, v1
@@ -300,12 +298,11 @@ define i16 @v_uaddsat_v2i8(i16 %lhs.arg, i16 %rhs.arg) {
 ; GFX10-NEXT:    s_mov_b32 s4, 8
 ; GFX10-NEXT:    v_lshlrev_b16_e64 v2, 8, v0
 ; GFX10-NEXT:    v_lshrrev_b32_sdwa v0, s4, v0 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX10-NEXT:    s_mov_b32 s5, 0xffff
 ; GFX10-NEXT:    v_lshrrev_b32_sdwa v3, s4, v1 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
 ; GFX10-NEXT:    v_lshlrev_b16_e64 v1, 8, v1
-; GFX10-NEXT:    v_xor_b32_e32 v4, s5, v2
-; GFX10-NEXT:    v_xor_b32_e32 v5, s5, v0
 ; GFX10-NEXT:    s_movk_i32 s4, 0xff
+; GFX10-NEXT:    v_xor_b32_e32 v4, -1, v2
+; GFX10-NEXT:    v_xor_b32_e32 v5, -1, v0
 ; GFX10-NEXT:    ; implicit-def: $vcc_hi
 ; GFX10-NEXT:    v_min_u16_e64 v1, v4, v1
 ; GFX10-NEXT:    v_min_u16_e64 v3, v5, v3
@@ -353,19 +350,18 @@ define amdgpu_ps i16 @s_uaddsat_v2i8(i16 inreg %lhs.arg, i16 inreg %rhs.arg) {
 ; GFX8-NEXT:    s_bfe_u32 s4, 8, 0x100000
 ; GFX8-NEXT:    s_lshr_b32 s2, s0, 8
 ; GFX8-NEXT:    s_lshl_b32 s0, s0, s4
-; GFX8-NEXT:    s_mov_b32 s5, 0xffff
-; GFX8-NEXT:    s_xor_b32 s6, s0, s5
+; GFX8-NEXT:    s_xor_b32 s5, s0, -1
 ; GFX8-NEXT:    s_lshr_b32 s3, s1, 8
 ; GFX8-NEXT:    s_lshl_b32 s1, s1, s4
-; GFX8-NEXT:    s_bfe_u32 s6, s6, 0x100000
+; GFX8-NEXT:    s_bfe_u32 s5, s5, 0x100000
 ; GFX8-NEXT:    s_bfe_u32 s1, s1, 0x100000
-; GFX8-NEXT:    s_cmp_lt_u32 s6, s1
-; GFX8-NEXT:    s_cselect_b32 s1, s6, s1
+; GFX8-NEXT:    s_cmp_lt_u32 s5, s1
+; GFX8-NEXT:    s_cselect_b32 s1, s5, s1
 ; GFX8-NEXT:    s_add_i32 s0, s0, s1
 ; GFX8-NEXT:    s_lshl_b32 s1, s2, s4
 ; GFX8-NEXT:    s_lshl_b32 s2, s3, s4
 ; GFX8-NEXT:    s_bfe_u32 s0, s0, 0x100000
-; GFX8-NEXT:    s_xor_b32 s3, s1, s5
+; GFX8-NEXT:    s_xor_b32 s3, s1, -1
 ; GFX8-NEXT:    s_lshr_b32 s0, s0, s4
 ; GFX8-NEXT:    s_bfe_u32 s3, s3, 0x100000
 ; GFX8-NEXT:    s_bfe_u32 s2, s2, 0x100000
@@ -386,19 +382,18 @@ define amdgpu_ps i16 @s_uaddsat_v2i8(i16 inreg %lhs.arg, i16 inreg %rhs.arg) {
 ; GFX9-NEXT:    s_bfe_u32 s4, 8, 0x100000
 ; GFX9-NEXT:    s_lshr_b32 s2, s0, 8
 ; GFX9-NEXT:    s_lshl_b32 s0, s0, s4
-; GFX9-NEXT:    s_mov_b32 s5, 0xffff
-; GFX9-NEXT:    s_xor_b32 s6, s0, s5
+; GFX9-NEXT:    s_xor_b32 s5, s0, -1
 ; GFX9-NEXT:    s_lshr_b32 s3, s1, 8
 ; GFX9-NEXT:    s_lshl_b32 s1, s1, s4
-; GFX9-NEXT:    s_bfe_u32 s6, s6, 0x100000
+; GFX9-NEXT:    s_bfe_u32 s5, s5, 0x100000
 ; GFX9-NEXT:    s_bfe_u32 s1, s1, 0x100000
-; GFX9-NEXT:    s_cmp_lt_u32 s6, s1
-; GFX9-NEXT:    s_cselect_b32 s1, s6, s1
+; GFX9-NEXT:    s_cmp_lt_u32 s5, s1
+; GFX9-NEXT:    s_cselect_b32 s1, s5, s1
 ; GFX9-NEXT:    s_add_i32 s0, s0, s1
 ; GFX9-NEXT:    s_lshl_b32 s1, s2, s4
 ; GFX9-NEXT:    s_lshl_b32 s2, s3, s4
 ; GFX9-NEXT:    s_bfe_u32 s0, s0, 0x100000
-; GFX9-NEXT:    s_xor_b32 s3, s1, s5
+; GFX9-NEXT:    s_xor_b32 s3, s1, -1
 ; GFX9-NEXT:    s_lshr_b32 s0, s0, s4
 ; GFX9-NEXT:    s_bfe_u32 s3, s3, 0x100000
 ; GFX9-NEXT:    s_bfe_u32 s2, s2, 0x100000
@@ -417,20 +412,19 @@ define amdgpu_ps i16 @s_uaddsat_v2i8(i16 inreg %lhs.arg, i16 inreg %rhs.arg) {
 ; GFX10-LABEL: s_uaddsat_v2i8:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_bfe_u32 s2, 8, 0x100000
-; GFX10-NEXT:    s_mov_b32 s4, 0xffff
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
 ; GFX10-NEXT:    s_lshl_b32 s3, s0, s2
-; GFX10-NEXT:    s_lshl_b32 s6, s1, s2
-; GFX10-NEXT:    s_xor_b32 s5, s3, s4
-; GFX10-NEXT:    s_bfe_u32 s6, s6, 0x100000
+; GFX10-NEXT:    s_lshl_b32 s5, s1, s2
+; GFX10-NEXT:    s_xor_b32 s4, s3, -1
 ; GFX10-NEXT:    s_bfe_u32 s5, s5, 0x100000
+; GFX10-NEXT:    s_bfe_u32 s4, s4, 0x100000
 ; GFX10-NEXT:    s_lshr_b32 s0, s0, 8
 ; GFX10-NEXT:    s_lshr_b32 s1, s1, 8
-; GFX10-NEXT:    s_cmp_lt_u32 s5, s6
-; GFX10-NEXT:    ; implicit-def: $vcc_hi
-; GFX10-NEXT:    s_cselect_b32 s5, s5, s6
+; GFX10-NEXT:    s_cmp_lt_u32 s4, s5
+; GFX10-NEXT:    s_cselect_b32 s4, s4, s5
 ; GFX10-NEXT:    s_lshl_b32 s0, s0, s2
-; GFX10-NEXT:    s_add_i32 s3, s3, s5
-; GFX10-NEXT:    s_xor_b32 s4, s0, s4
+; GFX10-NEXT:    s_add_i32 s3, s3, s4
+; GFX10-NEXT:    s_xor_b32 s4, s0, -1
 ; GFX10-NEXT:    s_lshl_b32 s1, s1, s2
 ; GFX10-NEXT:    s_bfe_u32 s3, s3, 0x100000
 ; GFX10-NEXT:    s_bfe_u32 s4, s4, 0x100000
@@ -509,25 +503,24 @@ define i32 @v_uaddsat_v4i8(i32 %lhs.arg, i32 %rhs.arg) {
 ; GFX8-NEXT:    v_lshrrev_b32_e32 v4, 16, v0
 ; GFX8-NEXT:    v_lshrrev_b32_e32 v5, 24, v0
 ; GFX8-NEXT:    v_lshlrev_b16_e32 v0, 8, v0
-; GFX8-NEXT:    s_mov_b32 s4, 0xffff
 ; GFX8-NEXT:    v_lshrrev_b32_sdwa v2, v2, v1 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
 ; GFX8-NEXT:    v_lshrrev_b32_e32 v6, 16, v1
 ; GFX8-NEXT:    v_lshrrev_b32_e32 v7, 24, v1
 ; GFX8-NEXT:    v_lshlrev_b16_e32 v1, 8, v1
-; GFX8-NEXT:    v_xor_b32_e32 v8, s4, v0
+; GFX8-NEXT:    v_xor_b32_e32 v8, -1, v0
 ; GFX8-NEXT:    v_min_u16_e32 v1, v8, v1
 ; GFX8-NEXT:    v_add_u16_e32 v0, v0, v1
-; GFX8-NEXT:    v_xor_b32_e32 v1, s4, v3
+; GFX8-NEXT:    v_xor_b32_e32 v1, -1, v3
 ; GFX8-NEXT:    v_min_u16_e32 v1, v1, v2
 ; GFX8-NEXT:    v_lshlrev_b16_e32 v2, 8, v4
 ; GFX8-NEXT:    v_add_u16_e32 v1, v3, v1
 ; GFX8-NEXT:    v_lshlrev_b16_e32 v3, 8, v6
-; GFX8-NEXT:    v_xor_b32_e32 v4, s4, v2
+; GFX8-NEXT:    v_xor_b32_e32 v4, -1, v2
 ; GFX8-NEXT:    v_min_u16_e32 v3, v4, v3
 ; GFX8-NEXT:    v_add_u16_e32 v2, v2, v3
 ; GFX8-NEXT:    v_lshlrev_b16_e32 v3, 8, v5
 ; GFX8-NEXT:    v_lshlrev_b16_e32 v4, 8, v7
-; GFX8-NEXT:    v_xor_b32_e32 v5, s4, v3
+; GFX8-NEXT:    v_xor_b32_e32 v5, -1, v3
 ; GFX8-NEXT:    v_min_u16_e32 v4, v5, v4
 ; GFX8-NEXT:    v_add_u16_e32 v3, v3, v4
 ; GFX8-NEXT:    v_mov_b32_e32 v4, 0xff
@@ -546,29 +539,28 @@ define i32 @v_uaddsat_v4i8(i32 %lhs.arg, i32 %rhs.arg) {
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX9-NEXT:    s_mov_b32 s4, 8
 ; GFX9-NEXT:    v_lshrrev_b32_sdwa v2, s4, v0 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX9-NEXT:    v_lshrrev_b32_sdwa v5, s4, v1 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
 ; GFX9-NEXT:    v_lshrrev_b32_e32 v3, 16, v0
 ; GFX9-NEXT:    v_lshrrev_b32_e32 v4, 24, v0
 ; GFX9-NEXT:    v_lshlrev_b16_e32 v0, 8, v0
-; GFX9-NEXT:    s_mov_b32 s4, 0xffff
+; GFX9-NEXT:    v_lshrrev_b32_sdwa v5, s4, v1 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
 ; GFX9-NEXT:    v_lshrrev_b32_e32 v6, 16, v1
 ; GFX9-NEXT:    v_lshrrev_b32_e32 v7, 24, v1
 ; GFX9-NEXT:    v_lshlrev_b16_e32 v1, 8, v1
-; GFX9-NEXT:    v_xor_b32_e32 v8, s4, v0
+; GFX9-NEXT:    v_xor_b32_e32 v8, -1, v0
 ; GFX9-NEXT:    v_min_u16_e32 v1, v8, v1
 ; GFX9-NEXT:    v_add_u16_e32 v0, v0, v1
-; GFX9-NEXT:    v_xor_b32_e32 v1, s4, v2
+; GFX9-NEXT:    v_xor_b32_e32 v1, -1, v2
 ; GFX9-NEXT:    v_min_u16_e32 v1, v1, v5
 ; GFX9-NEXT:    v_add_u16_e32 v1, v2, v1
 ; GFX9-NEXT:    v_lshlrev_b16_e32 v2, 8, v3
 ; GFX9-NEXT:    v_lshlrev_b16_e32 v3, 8, v6
-; GFX9-NEXT:    v_xor_b32_e32 v5, s4, v2
+; GFX9-NEXT:    v_xor_b32_e32 v5, -1, v2
 ; GFX9-NEXT:    v_min_u16_e32 v3, v5, v3
 ; GFX9-NEXT:    v_add_u16_e32 v2, v2, v3
 ; GFX9-NEXT:    v_lshlrev_b16_e32 v3, 8, v4
-; GFX9-NEXT:    v_xor_b32_e32 v5, s4, v3
-; GFX9-NEXT:    v_lshlrev_b16_e32 v4, 8, v7
 ; GFX9-NEXT:    s_movk_i32 s4, 0xff
+; GFX9-NEXT:    v_lshlrev_b16_e32 v4, 8, v7
+; GFX9-NEXT:    v_xor_b32_e32 v5, -1, v3
 ; GFX9-NEXT:    v_and_b32_sdwa v1, v1, s4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD
 ; GFX9-NEXT:    v_min_u16_e32 v4, v5, v4
 ; GFX9-NEXT:    v_lshrrev_b16_e32 v0, 8, v0
@@ -585,30 +577,29 @@ define i32 @v_uaddsat_v4i8(i32 %lhs.arg, i32 %rhs.arg) {
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
 ; GFX10-NEXT:    s_mov_b32 s4, 8
-; GFX10-NEXT:    s_mov_b32 s7, 0xffff
+; GFX10-NEXT:    v_lshlrev_b16_e64 v4, 8, v0
 ; GFX10-NEXT:    v_lshrrev_b32_sdwa v2, s4, v0 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
 ; GFX10-NEXT:    v_lshrrev_b32_sdwa v3, s4, v1 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX10-NEXT:    v_lshlrev_b16_e64 v4, 8, v0
 ; GFX10-NEXT:    s_mov_b32 s5, 16
 ; GFX10-NEXT:    s_mov_b32 s6, 24
-; GFX10-NEXT:    v_xor_b32_e32 v5, s7, v2
 ; GFX10-NEXT:    v_lshrrev_b32_sdwa v6, s5, v0 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX10-NEXT:    v_xor_b32_e32 v5, -1, v2
 ; GFX10-NEXT:    v_lshrrev_b32_sdwa v0, s6, v0 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
 ; GFX10-NEXT:    v_lshlrev_b16_e64 v7, 8, v1
-; GFX10-NEXT:    v_xor_b32_e32 v8, s7, v4
+; GFX10-NEXT:    v_xor_b32_e32 v8, -1, v4
+; GFX10-NEXT:    v_xor_b32_e32 v11, -1, v6
 ; GFX10-NEXT:    v_min_u16_e64 v3, v5, v3
 ; GFX10-NEXT:    v_lshrrev_b32_sdwa v5, s5, v1 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX10-NEXT:    v_xor_b32_e32 v11, s7, v6
 ; GFX10-NEXT:    v_lshrrev_b32_sdwa v1, s6, v1 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
 ; GFX10-NEXT:    v_min_u16_e64 v7, v8, v7
-; GFX10-NEXT:    v_add_nc_u16_e64 v2, v2, v3
-; GFX10-NEXT:    v_xor_b32_e32 v3, s7, v0
 ; GFX10-NEXT:    s_movk_i32 s4, 0xff
+; GFX10-NEXT:    v_add_nc_u16_e64 v2, v2, v3
+; GFX10-NEXT:    v_xor_b32_e32 v3, -1, v0
 ; GFX10-NEXT:    v_min_u16_e64 v5, v11, v5
 ; GFX10-NEXT:    v_add_nc_u16_e64 v4, v4, v7
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
 ; GFX10-NEXT:    v_and_b32_sdwa v2, v2, s4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD
 ; GFX10-NEXT:    v_min_u16_e64 v1, v3, v1
-; GFX10-NEXT:    ; implicit-def: $vcc_hi
 ; GFX10-NEXT:    v_add_nc_u16_e64 v3, v6, v5
 ; GFX10-NEXT:    v_lshrrev_b16_e64 v4, 8, v4
 ; GFX10-NEXT:    v_lshlrev_b32_e32 v2, 8, v2
@@ -682,21 +673,20 @@ define amdgpu_ps i32 @s_uaddsat_v4i8(i32 inreg %lhs.arg, i32 inreg %rhs.arg) {
 ; GFX8-NEXT:    s_lshr_b32 s3, s0, 16
 ; GFX8-NEXT:    s_lshr_b32 s4, s0, 24
 ; GFX8-NEXT:    s_lshl_b32 s0, s0, s8
-; GFX8-NEXT:    s_mov_b32 s9, 0xffff
-; GFX8-NEXT:    s_xor_b32 s10, s0, s9
+; GFX8-NEXT:    s_xor_b32 s9, s0, -1
 ; GFX8-NEXT:    s_lshr_b32 s5, s1, 8
 ; GFX8-NEXT:    s_lshr_b32 s6, s1, 16
 ; GFX8-NEXT:    s_lshr_b32 s7, s1, 24
 ; GFX8-NEXT:    s_lshl_b32 s1, s1, s8
-; GFX8-NEXT:    s_bfe_u32 s10, s10, 0x100000
+; GFX8-NEXT:    s_bfe_u32 s9, s9, 0x100000
 ; GFX8-NEXT:    s_bfe_u32 s1, s1, 0x100000
-; GFX8-NEXT:    s_cmp_lt_u32 s10, s1
-; GFX8-NEXT:    s_cselect_b32 s1, s10, s1
+; GFX8-NEXT:    s_cmp_lt_u32 s9, s1
+; GFX8-NEXT:    s_cselect_b32 s1, s9, s1
 ; GFX8-NEXT:    s_add_i32 s0, s0, s1
 ; GFX8-NEXT:    s_lshl_b32 s1, s2, s8
 ; GFX8-NEXT:    s_lshl_b32 s2, s5, s8
 ; GFX8-NEXT:    s_bfe_u32 s0, s0, 0x100000
-; GFX8-NEXT:    s_xor_b32 s5, s1, s9
+; GFX8-NEXT:    s_xor_b32 s5, s1, -1
 ; GFX8-NEXT:    s_lshr_b32 s0, s0, s8
 ; GFX8-NEXT:    s_bfe_u32 s5, s5, 0x100000
 ; GFX8-NEXT:    s_bfe_u32 s2, s2, 0x100000
@@ -706,7 +696,7 @@ define amdgpu_ps i32 @s_uaddsat_v4i8(i32 inreg %lhs.arg, i32 inreg %rhs.arg) {
 ; GFX8-NEXT:    s_lshl_b32 s2, s3, s8
 ; GFX8-NEXT:    s_bfe_u32 s1, s1, 0x100000
 ; GFX8-NEXT:    s_lshl_b32 s3, s6, s8
-; GFX8-NEXT:    s_xor_b32 s5, s2, s9
+; GFX8-NEXT:    s_xor_b32 s5, s2, -1
 ; GFX8-NEXT:    s_lshr_b32 s1, s1, s8
 ; GFX8-NEXT:    s_bfe_u32 s5, s5, 0x100000
 ; GFX8-NEXT:    s_bfe_u32 s3, s3, 0x100000
@@ -716,7 +706,7 @@ define amdgpu_ps i32 @s_uaddsat_v4i8(i32 inreg %lhs.arg, i32 inreg %rhs.arg) {
 ; GFX8-NEXT:    s_lshl_b32 s3, s4, s8
 ; GFX8-NEXT:    s_bfe_u32 s2, s2, 0x100000
 ; GFX8-NEXT:    s_lshl_b32 s4, s7, s8
-; GFX8-NEXT:    s_xor_b32 s5, s3, s9
+; GFX8-NEXT:    s_xor_b32 s5, s3, -1
 ; GFX8-NEXT:    s_lshr_b32 s2, s2, s8
 ; GFX8-NEXT:    s_bfe_u32 s5, s5, 0x100000
 ; GFX8-NEXT:    s_bfe_u32 s4, s4, 0x100000
@@ -745,21 +735,20 @@ define amdgpu_ps i32 @s_uaddsat_v4i8(i32 inreg %lhs.arg, i32 inreg %rhs.arg) {
 ; GFX9-NEXT:    s_lshr_b32 s3, s0, 16
 ; GFX9-NEXT:    s_lshr_b32 s4, s0, 24
 ; GFX9-NEXT:    s_lshl_b32 s0, s0, s8
-; GFX9-NEXT:    s_mov_b32 s9, 0xffff
-; GFX9-NEXT:    s_xor_b32 s10, s0, s9
+; GFX9-NEXT:    s_xor_b32 s9, s0, -1
 ; GFX9-NEXT:    s_lshr_b32 s5, s1, 8
 ; GFX9-NEXT:    s_lshr_b32 s6, s1, 16
 ; GFX9-NEXT:    s_lshr_b32 s7, s1, 24
 ; GFX9-NEXT:    s_lshl_b32 s1, s1, s8
-; GFX9-NEXT:    s_bfe_u32 s10, s10, 0x100000
+; GFX9-NEXT:    s_bfe_u32 s9, s9, 0x100000
 ; GFX9-NEXT:    s_bfe_u32 s1, s1, 0x100000
-; GFX9-NEXT:    s_cmp_lt_u32 s10, s1
-; GFX9-NEXT:    s_cselect_b32 s1, s10, s1
+; GFX9-NEXT:    s_cmp_lt_u32 s9, s1
+; GFX9-NEXT:    s_cselect_b32 s1, s9, s1
 ; GFX9-NEXT:    s_add_i32 s0, s0, s1
 ; GFX9-NEXT:    s_lshl_b32 s1, s2, s8
 ; GFX9-NEXT:    s_lshl_b32 s2, s5, s8
 ; GFX9-NEXT:    s_bfe_u32 s0, s0, 0x100000
-; GFX9-NEXT:    s_xor_b32 s5, s1, s9
+; GFX9-NEXT:    s_xor_b32 s5, s1, -1
 ; GFX9-NEXT:    s_lshr_b32 s0, s0, s8
 ; GFX9-NEXT:    s_bfe_u32 s5, s5, 0x100000
 ; GFX9-NEXT:    s_bfe_u32 s2, s2, 0x100000
@@ -769,7 +758,7 @@ define amdgpu_ps i32 @s_uaddsat_v4i8(i32 inreg %lhs.arg, i32 inreg %rhs.arg) {
 ; GFX9-NEXT:    s_lshl_b32 s2, s3, s8
 ; GFX9-NEXT:    s_bfe_u32 s1, s1, 0x100000
 ; GFX9-NEXT:    s_lshl_b32 s3, s6, s8
-; GFX9-NEXT:    s_xor_b32 s5, s2, s9
+; GFX9-NEXT:    s_xor_b32 s5, s2, -1
 ; GFX9-NEXT:    s_lshr_b32 s1, s1, s8
 ; GFX9-NEXT:    s_bfe_u32 s5, s5, 0x100000
 ; GFX9-NEXT:    s_bfe_u32 s3, s3, 0x100000
@@ -779,7 +768,7 @@ define amdgpu_ps i32 @s_uaddsat_v4i8(i32 inreg %lhs.arg, i32 inreg %rhs.arg) {
 ; GFX9-NEXT:    s_lshl_b32 s3, s4, s8
 ; GFX9-NEXT:    s_bfe_u32 s2, s2, 0x100000
 ; GFX9-NEXT:    s_lshl_b32 s4, s7, s8
-; GFX9-NEXT:    s_xor_b32 s5, s3, s9
+; GFX9-NEXT:    s_xor_b32 s5, s3, -1
 ; GFX9-NEXT:    s_lshr_b32 s2, s2, s8
 ; GFX9-NEXT:    s_bfe_u32 s5, s5, 0x100000
 ; GFX9-NEXT:    s_bfe_u32 s4, s4, 0x100000
@@ -808,40 +797,39 @@ define amdgpu_ps i32 @s_uaddsat_v4i8(i32 inreg %lhs.arg, i32 inreg %rhs.arg) {
 ; GFX10-NEXT:    s_lshr_b32 s3, s0, 16
 ; GFX10-NEXT:    s_lshr_b32 s4, s0, 24
 ; GFX10-NEXT:    s_lshl_b32 s0, s0, s5
-; GFX10-NEXT:    s_mov_b32 s7, 0xffff
-; GFX10-NEXT:    s_lshl_b32 s10, s1, s5
-; GFX10-NEXT:    s_xor_b32 s9, s0, s7
-; GFX10-NEXT:    s_bfe_u32 s10, s10, 0x100000
+; GFX10-NEXT:    s_lshl_b32 s9, s1, s5
+; GFX10-NEXT:    s_xor_b32 s8, s0, -1
 ; GFX10-NEXT:    s_bfe_u32 s9, s9, 0x100000
+; GFX10-NEXT:    s_bfe_u32 s8, s8, 0x100000
 ; GFX10-NEXT:    s_lshr_b32 s6, s1, 8
-; GFX10-NEXT:    s_lshr_b32 s8, s1, 16
+; GFX10-NEXT:    s_lshr_b32 s7, s1, 16
 ; GFX10-NEXT:    s_lshr_b32 s1, s1, 24
-; GFX10-NEXT:    s_cmp_lt_u32 s9, s10
+; GFX10-NEXT:    s_cmp_lt_u32 s8, s9
 ; GFX10-NEXT:    ; implicit-def: $vcc_hi
-; GFX10-NEXT:    s_cselect_b32 s9, s9, s10
+; GFX10-NEXT:    s_cselect_b32 s8, s8, s9
 ; GFX10-NEXT:    s_lshl_b32 s2, s2, s5
-; GFX10-NEXT:    s_add_i32 s0, s0, s9
-; GFX10-NEXT:    s_xor_b32 s9, s2, s7
+; GFX10-NEXT:    s_add_i32 s0, s0, s8
+; GFX10-NEXT:    s_xor_b32 s8, s2, -1
 ; GFX10-NEXT:    s_lshl_b32 s6, s6, s5
 ; GFX10-NEXT:    s_bfe_u32 s0, s0, 0x100000
-; GFX10-NEXT:    s_bfe_u32 s9, s9, 0x100000
+; GFX10-NEXT:    s_bfe_u32 s8, s8, 0x100000
 ; GFX10-NEXT:    s_bfe_u32 s6, s6, 0x100000
 ; GFX10-NEXT:    s_lshr_b32 s0, s0, s5
-; GFX10-NEXT:    s_cmp_lt_u32 s9, s6
-; GFX10-NEXT:    s_cselect_b32 s6, s9, s6
+; GFX10-NEXT:    s_cmp_lt_u32 s8, s6
+; GFX10-NEXT:    s_cselect_b32 s6, s8, s6
 ; GFX10-NEXT:    s_lshl_b32 s3, s3, s5
 ; GFX10-NEXT:    s_add_i32 s2, s2, s6
-; GFX10-NEXT:    s_xor_b32 s6, s3, s7
-; GFX10-NEXT:    s_lshl_b32 s8, s8, s5
+; GFX10-NEXT:    s_xor_b32 s6, s3, -1
+; GFX10-NEXT:    s_lshl_b32 s7, s7, s5
 ; GFX10-NEXT:    s_bfe_u32 s2, s2, 0x100000
 ; GFX10-NEXT:    s_bfe_u32 s6, s6, 0x100000
-; GFX10-NEXT:    s_bfe_u32 s8, s8, 0x100000
+; GFX10-NEXT:    s_bfe_u32 s7, s7, 0x100000
 ; GFX10-NEXT:    s_lshr_b32 s2, s2, s5
-; GFX10-NEXT:    s_cmp_lt_u32 s6, s8
-; GFX10-NEXT:    s_cselect_b32 s6, s6, s8
+; GFX10-NEXT:    s_cmp_lt_u32 s6, s7
+; GFX10-NEXT:    s_cselect_b32 s6, s6, s7
 ; GFX10-NEXT:    s_lshl_b32 s4, s4, s5
 ; GFX10-NEXT:    s_add_i32 s3, s3, s6
-; GFX10-NEXT:    s_xor_b32 s6, s4, s7
+; GFX10-NEXT:    s_xor_b32 s6, s4, -1
 ; GFX10-NEXT:    s_lshl_b32 s1, s1, s5
 ; GFX10-NEXT:    s_bfe_u32 s3, s3, 0x100000
 ; GFX10-NEXT:    s_bfe_u32 s6, s6, 0x100000
@@ -2203,7 +2191,7 @@ define i16 @v_uaddsat_i16(i16 %lhs, i16 %rhs) {
 ; GFX8-LABEL: v_uaddsat_i16:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    v_xor_b32_e32 v2, 0xffff, v0
+; GFX8-NEXT:    v_xor_b32_e32 v2, -1, v0
 ; GFX8-NEXT:    v_min_u16_e32 v1, v2, v1
 ; GFX8-NEXT:    v_add_u16_e32 v0, v0, v1
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
@@ -2211,7 +2199,7 @@ define i16 @v_uaddsat_i16(i16 %lhs, i16 %rhs) {
 ; GFX9-LABEL: v_uaddsat_i16:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_xor_b32_e32 v2, 0xffff, v0
+; GFX9-NEXT:    v_xor_b32_e32 v2, -1, v0
 ; GFX9-NEXT:    v_min_u16_e32 v1, v2, v1
 ; GFX9-NEXT:    v_add_u16_e32 v0, v0, v1
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
@@ -2220,7 +2208,7 @@ define i16 @v_uaddsat_i16(i16 %lhs, i16 %rhs) {
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT:    v_xor_b32_e32 v2, 0xffff, v0
+; GFX10-NEXT:    v_xor_b32_e32 v2, -1, v0
 ; GFX10-NEXT:    ; implicit-def: $vcc_hi
 ; GFX10-NEXT:    v_min_u16_e64 v1, v2, v1
 ; GFX10-NEXT:    v_add_nc_u16_e64 v0, v0, v1
@@ -2243,7 +2231,7 @@ define amdgpu_ps i16 @s_uaddsat_i16(i16 inreg %lhs, i16 inreg %rhs) {
 ;
 ; GFX8-LABEL: s_uaddsat_i16:
 ; GFX8:       ; %bb.0:
-; GFX8-NEXT:    s_xor_b32 s2, s0, 0xffff
+; GFX8-NEXT:    s_xor_b32 s2, s0, -1
 ; GFX8-NEXT:    s_bfe_u32 s2, s2, 0x100000
 ; GFX8-NEXT:    s_bfe_u32 s1, s1, 0x100000
 ; GFX8-NEXT:    s_cmp_lt_u32 s2, s1
@@ -2253,7 +2241,7 @@ define amdgpu_ps i16 @s_uaddsat_i16(i16 inreg %lhs, i16 inreg %rhs) {
 ;
 ; GFX9-LABEL: s_uaddsat_i16:
 ; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_xor_b32 s2, s0, 0xffff
+; GFX9-NEXT:    s_xor_b32 s2, s0, -1
 ; GFX9-NEXT:    s_bfe_u32 s2, s2, 0x100000
 ; GFX9-NEXT:    s_bfe_u32 s1, s1, 0x100000
 ; GFX9-NEXT:    s_cmp_lt_u32 s2, s1
@@ -2263,7 +2251,7 @@ define amdgpu_ps i16 @s_uaddsat_i16(i16 inreg %lhs, i16 inreg %rhs) {
 ;
 ; GFX10-LABEL: s_uaddsat_i16:
 ; GFX10:       ; %bb.0:
-; GFX10-NEXT:    s_xor_b32 s2, s0, 0xffff
+; GFX10-NEXT:    s_xor_b32 s2, s0, -1
 ; GFX10-NEXT:    s_bfe_u32 s1, s1, 0x100000
 ; GFX10-NEXT:    s_bfe_u32 s2, s2, 0x100000
 ; GFX10-NEXT:    ; implicit-def: $vcc_hi
@@ -2288,21 +2276,21 @@ define amdgpu_ps half @uaddsat_i16_sv(i16 inreg %lhs, i16 %rhs) {
 ;
 ; GFX8-LABEL: uaddsat_i16_sv:
 ; GFX8:       ; %bb.0:
-; GFX8-NEXT:    s_xor_b32 s1, s0, 0xffff
+; GFX8-NEXT:    s_xor_b32 s1, s0, -1
 ; GFX8-NEXT:    v_min_u16_e32 v0, s1, v0
 ; GFX8-NEXT:    v_add_u16_e32 v0, s0, v0
 ; GFX8-NEXT:    ; return to shader part epilog
 ;
 ; GFX9-LABEL: uaddsat_i16_sv:
 ; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_xor_b32 s1, s0, 0xffff
+; GFX9-NEXT:    s_xor_b32 s1, s0, -1
 ; GFX9-NEXT:    v_min_u16_e32 v0, s1, v0
 ; GFX9-NEXT:    v_add_u16_e32 v0, s0, v0
 ; GFX9-NEXT:    ; return to shader part epilog
 ;
 ; GFX10-LABEL: uaddsat_i16_sv:
 ; GFX10:       ; %bb.0:
-; GFX10-NEXT:    s_xor_b32 s1, s0, 0xffff
+; GFX10-NEXT:    s_xor_b32 s1, s0, -1
 ; GFX10-NEXT:    ; implicit-def: $vcc_hi
 ; GFX10-NEXT:    v_min_u16_e64 v0, s1, v0
 ; GFX10-NEXT:    v_add_nc_u16_e64 v0, s0, v0
@@ -2325,21 +2313,21 @@ define amdgpu_ps half @uaddsat_i16_vs(i16 %lhs, i16 inreg %rhs) {
 ;
 ; GFX8-LABEL: uaddsat_i16_vs:
 ; GFX8:       ; %bb.0:
-; GFX8-NEXT:    v_xor_b32_e32 v1, 0xffff, v0
+; GFX8-NEXT:    v_xor_b32_e32 v1, -1, v0
 ; GFX8-NEXT:    v_min_u16_e32 v1, s0, v1
 ; GFX8-NEXT:    v_add_u16_e32 v0, v0, v1
 ; GFX8-NEXT:    ; return to shader part epilog
 ;
 ; GFX9-LABEL: uaddsat_i16_vs:
 ; GFX9:       ; %bb.0:
-; GFX9-NEXT:    v_xor_b32_e32 v1, 0xffff, v0
+; GFX9-NEXT:    v_xor_b32_e32 v1, -1, v0
 ; GFX9-NEXT:    v_min_u16_e32 v1, s0, v1
 ; GFX9-NEXT:    v_add_u16_e32 v0, v0, v1
 ; GFX9-NEXT:    ; return to shader part epilog
 ;
 ; GFX10-LABEL: uaddsat_i16_vs:
 ; GFX10:       ; %bb.0:
-; GFX10-NEXT:    v_xor_b32_e32 v1, 0xffff, v0
+; GFX10-NEXT:    v_xor_b32_e32 v1, -1, v0
 ; GFX10-NEXT:    ; implicit-def: $vcc_hi
 ; GFX10-NEXT:    v_min_u16_e64 v1, v1, s0
 ; GFX10-NEXT:    v_add_nc_u16_e64 v0, v0, v1
@@ -2370,10 +2358,9 @@ define <2 x i16> @v_uaddsat_v2i16(<2 x i16> %lhs, <2 x i16> %rhs) {
 ; GFX8-LABEL: v_uaddsat_v2i16:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    s_mov_b32 s4, 0xffff
 ; GFX8-NEXT:    v_lshrrev_b32_e32 v2, 16, v0
-; GFX8-NEXT:    v_xor_b32_e32 v3, s4, v0
-; GFX8-NEXT:    v_xor_b32_e32 v4, s4, v2
+; GFX8-NEXT:    v_xor_b32_e32 v3, -1, v0
+; GFX8-NEXT:    v_xor_b32_e32 v4, -1, v2
 ; GFX8-NEXT:    v_min_u16_e32 v3, v3, v1
 ; GFX8-NEXT:    v_min_u16_sdwa v1, v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
 ; GFX8-NEXT:    v_add_u16_e32 v0, v0, v3
@@ -2430,16 +2417,15 @@ define amdgpu_ps i32 @s_uaddsat_v2i16(<2 x i16> inreg %lhs, <2 x i16> inreg %rhs
 ;
 ; GFX8-LABEL: s_uaddsat_v2i16:
 ; GFX8:       ; %bb.0:
-; GFX8-NEXT:    s_mov_b32 s4, 0xffff
-; GFX8-NEXT:    s_xor_b32 s5, s0, s4
+; GFX8-NEXT:    s_xor_b32 s4, s0, -1
 ; GFX8-NEXT:    s_lshr_b32 s3, s1, 16
 ; GFX8-NEXT:    s_lshr_b32 s2, s0, 16
-; GFX8-NEXT:    s_bfe_u32 s5, s5, 0x100000
+; GFX8-NEXT:    s_bfe_u32 s4, s4, 0x100000
 ; GFX8-NEXT:    s_bfe_u32 s1, s1, 0x100000
-; GFX8-NEXT:    s_cmp_lt_u32 s5, s1
-; GFX8-NEXT:    s_cselect_b32 s1, s5, s1
+; GFX8-NEXT:    s_cmp_lt_u32 s4, s1
+; GFX8-NEXT:    s_cselect_b32 s1, s4, s1
 ; GFX8-NEXT:    s_add_i32 s0, s0, s1
-; GFX8-NEXT:    s_xor_b32 s1, s2, s4
+; GFX8-NEXT:    s_xor_b32 s1, s2, -1
 ; GFX8-NEXT:    s_bfe_u32 s1, s1, 0x100000
 ; GFX8-NEXT:    s_bfe_u32 s3, s3, 0x100000
 ; GFX8-NEXT:    s_cmp_lt_u32 s1, s3
@@ -2522,12 +2508,11 @@ define amdgpu_ps float @uaddsat_v2i16_sv(<2 x i16> inreg %lhs, <2 x i16> %rhs) {
 ;
 ; GFX8-LABEL: uaddsat_v2i16_sv:
 ; GFX8:       ; %bb.0:
-; GFX8-NEXT:    s_mov_b32 s2, 0xffff
+; GFX8-NEXT:    s_xor_b32 s2, s0, -1
 ; GFX8-NEXT:    s_lshr_b32 s1, s0, 16
-; GFX8-NEXT:    s_xor_b32 s3, s0, s2
-; GFX8-NEXT:    s_xor_b32 s2, s1, s2
+; GFX8-NEXT:    v_min_u16_e32 v1, s2, v0
+; GFX8-NEXT:    s_xor_b32 s2, s1, -1
 ; GFX8-NEXT:    v_mov_b32_e32 v2, s2
-; GFX8-NEXT:    v_min_u16_e32 v1, s3, v0
 ; GFX8-NEXT:    v_min_u16_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
 ; GFX8-NEXT:    v_mov_b32_e32 v2, s1
 ; GFX8-NEXT:    v_add_u16_e32 v1, s0, v1
@@ -2580,11 +2565,10 @@ define amdgpu_ps float @uaddsat_v2i16_vs(<2 x i16> %lhs, <2 x i16> inreg %rhs) {
 ;
 ; GFX8-LABEL: uaddsat_v2i16_vs:
 ; GFX8:       ; %bb.0:
-; GFX8-NEXT:    s_mov_b32 s2, 0xffff
 ; GFX8-NEXT:    v_lshrrev_b32_e32 v1, 16, v0
-; GFX8-NEXT:    v_xor_b32_e32 v2, s2, v0
+; GFX8-NEXT:    v_xor_b32_e32 v2, -1, v0
 ; GFX8-NEXT:    s_lshr_b32 s1, s0, 16
-; GFX8-NEXT:    v_xor_b32_e32 v3, s2, v1
+; GFX8-NEXT:    v_xor_b32_e32 v3, -1, v1
 ; GFX8-NEXT:    v_min_u16_e32 v2, s0, v2
 ; GFX8-NEXT:    v_min_u16_e32 v3, s1, v3
 ; GFX8-NEXT:    v_add_u16_e32 v0, v0, v2
@@ -2666,15 +2650,14 @@ define <2 x float> @v_uaddsat_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
 ; GFX8-LABEL: v_uaddsat_v4i16:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    s_mov_b32 s4, 0xffff
 ; GFX8-NEXT:    v_lshrrev_b32_e32 v4, 16, v0
-; GFX8-NEXT:    v_xor_b32_e32 v6, s4, v0
+; GFX8-NEXT:    v_xor_b32_e32 v6, -1, v0
 ; GFX8-NEXT:    v_lshrrev_b32_e32 v5, 16, v1
-; GFX8-NEXT:    v_xor_b32_e32 v7, s4, v4
+; GFX8-NEXT:    v_xor_b32_e32 v7, -1, v4
 ; GFX8-NEXT:    v_min_u16_e32 v6, v6, v2
 ; GFX8-NEXT:    v_min_u16_sdwa v2, v7, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX8-NEXT:    v_xor_b32_e32 v7, s4, v1
-; GFX8-NEXT:    v_xor_b32_e32 v8, s4, v5
+; GFX8-NEXT:    v_xor_b32_e32 v7, -1, v1
+; GFX8-NEXT:    v_xor_b32_e32 v8, -1, v5
 ; GFX8-NEXT:    v_min_u16_e32 v7, v7, v3
 ; GFX8-NEXT:    v_min_u16_sdwa v3, v8, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
 ; GFX8-NEXT:    v_add_u16_e32 v0, v0, v6
@@ -2759,30 +2742,29 @@ define amdgpu_ps <2 x i32> @s_uaddsat_v4i16(<4 x i16> inreg %lhs, <4 x i16> inre
 ;
 ; GFX8-LABEL: s_uaddsat_v4i16:
 ; GFX8:       ; %bb.0:
-; GFX8-NEXT:    s_mov_b32 s8, 0xffff
-; GFX8-NEXT:    s_xor_b32 s9, s0, s8
+; GFX8-NEXT:    s_xor_b32 s8, s0, -1
 ; GFX8-NEXT:    s_lshr_b32 s6, s2, 16
 ; GFX8-NEXT:    s_lshr_b32 s4, s0, 16
 ; GFX8-NEXT:    s_lshr_b32 s5, s1, 16
 ; GFX8-NEXT:    s_lshr_b32 s7, s3, 16
-; GFX8-NEXT:    s_bfe_u32 s9, s9, 0x100000
+; GFX8-NEXT:    s_bfe_u32 s8, s8, 0x100000
 ; GFX8-NEXT:    s_bfe_u32 s2, s2, 0x100000
-; GFX8-NEXT:    s_cmp_lt_u32 s9, s2
-; GFX8-NEXT:    s_cselect_b32 s2, s9, s2
+; GFX8-NEXT:    s_cmp_lt_u32 s8, s2
+; GFX8-NEXT:    s_cselect_b32 s2, s8, s2
 ; GFX8-NEXT:    s_add_i32 s0, s0, s2
-; GFX8-NEXT:    s_xor_b32 s2, s4, s8
+; GFX8-NEXT:    s_xor_b32 s2, s4, -1
 ; GFX8-NEXT:    s_bfe_u32 s2, s2, 0x100000
 ; GFX8-NEXT:    s_bfe_u32 s6, s6, 0x100000
 ; GFX8-NEXT:    s_cmp_lt_u32 s2, s6
 ; GFX8-NEXT:    s_cselect_b32 s2, s2, s6
 ; GFX8-NEXT:    s_add_i32 s4, s4, s2
-; GFX8-NEXT:    s_xor_b32 s2, s1, s8
+; GFX8-NEXT:    s_xor_b32 s2, s1, -1
 ; GFX8-NEXT:    s_bfe_u32 s2, s2, 0x100000
 ; GFX8-NEXT:    s_bfe_u32 s3, s3, 0x100000
 ; GFX8-NEXT:    s_cmp_lt_u32 s2, s3
 ; GFX8-NEXT:    s_cselect_b32 s2, s2, s3
 ; GFX8-NEXT:    s_add_i32 s1, s1, s2
-; GFX8-NEXT:    s_xor_b32 s2, s5, s8
+; GFX8-NEXT:    s_xor_b32 s2, s5, -1
 ; GFX8-NEXT:    s_bfe_u32 s2, s2, 0x100000
 ; GFX8-NEXT:    s_bfe_u32 s3, s7, 0x100000
 ; GFX8-NEXT:    s_cmp_lt_u32 s2, s3
@@ -2944,20 +2926,19 @@ define <3 x float> @v_uaddsat_v6i16(<6 x i16> %lhs, <6 x i16> %rhs) {
 ; GFX8-LABEL: v_uaddsat_v6i16:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    s_mov_b32 s4, 0xffff
 ; GFX8-NEXT:    v_lshrrev_b32_e32 v6, 16, v0
-; GFX8-NEXT:    v_xor_b32_e32 v9, s4, v0
+; GFX8-NEXT:    v_xor_b32_e32 v9, -1, v0
 ; GFX8-NEXT:    v_lshrrev_b32_e32 v7, 16, v1
-; GFX8-NEXT:    v_xor_b32_e32 v10, s4, v6
+; GFX8-NEXT:    v_xor_b32_e32 v10, -1, v6
 ; GFX8-NEXT:    v_min_u16_e32 v9, v9, v3
 ; GFX8-NEXT:    v_min_u16_sdwa v3, v10, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX8-NEXT:    v_xor_b32_e32 v10, s4, v1
+; GFX8-NEXT:    v_xor_b32_e32 v10, -1, v1
 ; GFX8-NEXT:    v_lshrrev_b32_e32 v8, 16, v2
-; GFX8-NEXT:    v_xor_b32_e32 v11, s4, v7
+; GFX8-NEXT:    v_xor_b32_e32 v11, -1, v7
 ; GFX8-NEXT:    v_min_u16_e32 v10, v10, v4
 ; GFX8-NEXT:    v_min_u16_sdwa v4, v11, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX8-NEXT:    v_xor_b32_e32 v11, s4, v2
-; GFX8-NEXT:    v_xor_b32_e32 v12, s4, v8
+; GFX8-NEXT:    v_xor_b32_e32 v11, -1, v2
+; GFX8-NEXT:    v_xor_b32_e32 v12, -1, v8
 ; GFX8-NEXT:    v_add_u16_e32 v0, v0, v9
 ; GFX8-NEXT:    v_add_u16_sdwa v3, v6, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
 ; GFX8-NEXT:    v_min_u16_e32 v11, v11, v5
@@ -3069,44 +3050,43 @@ define amdgpu_ps <3 x i32> @s_uaddsat_v6i16(<6 x i16> inreg %lhs, <6 x i16> inre
 ;
 ; GFX8-LABEL: s_uaddsat_v6i16:
 ; GFX8:       ; %bb.0:
-; GFX8-NEXT:    s_mov_b32 s12, 0xffff
-; GFX8-NEXT:    s_xor_b32 s13, s0, s12
+; GFX8-NEXT:    s_xor_b32 s12, s0, -1
 ; GFX8-NEXT:    s_lshr_b32 s9, s3, 16
 ; GFX8-NEXT:    s_lshr_b32 s6, s0, 16
 ; GFX8-NEXT:    s_lshr_b32 s7, s1, 16
 ; GFX8-NEXT:    s_lshr_b32 s8, s2, 16
 ; GFX8-NEXT:    s_lshr_b32 s10, s4, 16
 ; GFX8-NEXT:    s_lshr_b32 s11, s5, 16
-; GFX8-NEXT:    s_bfe_u32 s13, s13, 0x100000
+; GFX8-NEXT:    s_bfe_u32 s12, s12, 0x100000
 ; GFX8-NEXT:    s_bfe_u32 s3, s3, 0x100000
-; GFX8-NEXT:    s_cmp_lt_u32 s13, s3
-; GFX8-NEXT:    s_cselect_b32 s3, s13, s3
+; GFX8-NEXT:    s_cmp_lt_u32 s12, s3
+; GFX8-NEXT:    s_cselect_b32 s3, s12, s3
 ; GFX8-NEXT:    s_add_i32 s0, s0, s3
-; GFX8-NEXT:    s_xor_b32 s3, s6, s12
+; GFX8-NEXT:    s_xor_b32 s3, s6, -1
 ; GFX8-NEXT:    s_bfe_u32 s3, s3, 0x100000
 ; GFX8-NEXT:    s_bfe_u32 s9, s9, 0x100000
 ; GFX8-NEXT:    s_cmp_lt_u32 s3, s9
 ; GFX8-NEXT:    s_cselect_b32 s3, s3, s9
 ; GFX8-NEXT:    s_add_i32 s6, s6, s3
-; GFX8-NEXT:    s_xor_b32 s3, s1, s12
+; GFX8-NEXT:    s_xor_b32 s3, s1, -1
 ; GFX8-NEXT:    s_bfe_u32 s3, s3, 0x100000
 ; GFX8-NEXT:    s_bfe_u32 s4, s4, 0x100000
 ; GFX8-NEXT:    s_cmp_lt_u32 s3, s4
 ; GFX8-NEXT:    s_cselect_b32 s3, s3, s4
 ; GFX8-NEXT:    s_add_i32 s1, s1, s3
-; GFX8-NEXT:    s_xor_b32 s3, s7, s12
+; GFX8-NEXT:    s_xor_b32 s3, s7, -1
 ; GFX8-NEXT:    s_bfe_u32 s3, s3, 0x100000
 ; GFX8-NEXT:    s_bfe_u32 s4, s10, 0x100000
 ; GFX8-NEXT:    s_cmp_lt_u32 s3, s4
 ; GFX8-NEXT:    s_cselect_b32 s3, s3, s4
 ; GFX8-NEXT:    s_add_i32 s7, s7, s3
-; GFX8-NEXT:    s_xor_b32 s3, s2, s12
+; GFX8-NEXT:    s_xor_b32 s3, s2, -1
 ; GFX8-NEXT:    s_bfe_u32 s3, s3, 0x100000
 ; GFX8-NEXT:    s_bfe_u32 s4, s5, 0x100000
 ; GFX8-NEXT:    s_cmp_lt_u32 s3, s4
 ; GFX8-NEXT:    s_cselect_b32 s3, s3, s4
 ; GFX8-NEXT:    s_add_i32 s2, s2, s3
-; GFX8-NEXT:    s_xor_b32 s3, s8, s12
+; GFX8-NEXT:    s_xor_b32 s3, s8, -1
 ; GFX8-NEXT:    s_bfe_u32 s3, s3, 0x100000
 ; GFX8-NEXT:    s_bfe_u32 s4, s11, 0x100000
 ; GFX8-NEXT:    s_cmp_lt_u32 s3, s4
@@ -3307,28 +3287,27 @@ define <4 x float> @v_uaddsat_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
 ; GFX8-LABEL: v_uaddsat_v8i16:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    s_mov_b32 s4, 0xffff
 ; GFX8-NEXT:    v_lshrrev_b32_e32 v8, 16, v0
-; GFX8-NEXT:    v_xor_b32_e32 v12, s4, v0
+; GFX8-NEXT:    v_xor_b32_e32 v12, -1, v0
 ; GFX8-NEXT:    v_lshrrev_b32_e32 v9, 16, v1
-; GFX8-NEXT:    v_xor_b32_e32 v13, s4, v8
+; GFX8-NEXT:    v_xor_b32_e32 v13, -1, v8
 ; GFX8-NEXT:    v_min_u16_e32 v12, v12, v4
 ; GFX8-NEXT:    v_min_u16_sdwa v4, v13, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX8-NEXT:    v_xor_b32_e32 v13, s4, v1
+; GFX8-NEXT:    v_xor_b32_e32 v13, -1, v1
 ; GFX8-NEXT:    v_lshrrev_b32_e32 v10, 16, v2
-; GFX8-NEXT:    v_xor_b32_e32 v14, s4, v9
+; GFX8-NEXT:    v_xor_b32_e32 v14, -1, v9
 ; GFX8-NEXT:    v_min_u16_e32 v13, v13, v5
 ; GFX8-NEXT:    v_lshrrev_b32_e32 v11, 16, v3
 ; GFX8-NEXT:    v_min_u16_sdwa v5, v14, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX8-NEXT:    v_xor_b32_e32 v14, s4, v2
-; GFX8-NEXT:    v_xor_b32_e32 v15, s4, v10
+; GFX8-NEXT:    v_xor_b32_e32 v14, -1, v2
+; GFX8-NEXT:    v_xor_b32_e32 v15, -1, v10
 ; GFX8-NEXT:    v_add_u16_e32 v0, v0, v12
 ; GFX8-NEXT:    v_add_u16_sdwa v4, v8, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
 ; GFX8-NEXT:    v_min_u16_e32 v14, v14, v6
 ; GFX8-NEXT:    v_min_u16_sdwa v6, v15, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX8-NEXT:    v_xor_b32_e32 v15, s4, v3
+; GFX8-NEXT:    v_xor_b32_e32 v15, -1, v3
 ; GFX8-NEXT:    v_or_b32_e32 v0, v0, v4
-; GFX8-NEXT:    v_xor_b32_e32 v16, s4, v11
+; GFX8-NEXT:    v_xor_b32_e32 v16, -1, v11
 ; GFX8-NEXT:    v_add_u16_e32 v1, v1, v13
 ; GFX8-NEXT:    v_add_u16_sdwa v4, v9, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
 ; GFX8-NEXT:    v_min_u16_e32 v15, v15, v7
@@ -3464,8 +3443,7 @@ define amdgpu_ps <4 x i32> @s_uaddsat_v8i16(<8 x i16> inreg %lhs, <8 x i16> inre
 ;
 ; GFX8-LABEL: s_uaddsat_v8i16:
 ; GFX8:       ; %bb.0:
-; GFX8-NEXT:    s_mov_b32 s16, 0xffff
-; GFX8-NEXT:    s_xor_b32 s17, s0, s16
+; GFX8-NEXT:    s_xor_b32 s16, s0, -1
 ; GFX8-NEXT:    s_lshr_b32 s12, s4, 16
 ; GFX8-NEXT:    s_lshr_b32 s8, s0, 16
 ; GFX8-NEXT:    s_lshr_b32 s9, s1, 16
@@ -3474,48 +3452,48 @@ define amdgpu_ps <4 x i32> @s_uaddsat_v8i16(<8 x i16> inreg %lhs, <8 x i16> inre
 ; GFX8-NEXT:    s_lshr_b32 s13, s5, 16
 ; GFX8-NEXT:    s_lshr_b32 s14, s6, 16
 ; GFX8-NEXT:    s_lshr_b32 s15, s7, 16
-; GFX8-NEXT:    s_bfe_u32 s17, s17, 0x100000
+; GFX8-NEXT:    s_bfe_u32 s16, s16, 0x100000
 ; GFX8-NEXT:    s_bfe_u32 s4, s4, 0x100000
-; GFX8-NEXT:    s_cmp_lt_u32 s17, s4
-; GFX8-NEXT:    s_cselect_b32 s4, s17, s4
+; GFX8-NEXT:    s_cmp_lt_u32 s16, s4
+; GFX8-NEXT:    s_cselect_b32 s4, s16, s4
 ; GFX8-NEXT:    s_add_i32 s0, s0, s4
-; GFX8-NEXT:    s_xor_b32 s4, s8, s16
+; GFX8-NEXT:    s_xor_b32 s4, s8, -1
 ; GFX8-NEXT:    s_bfe_u32 s4, s4, 0x100000
 ; GFX8-NEXT:    s_bfe_u32 s12, s12, 0x100000
 ; GFX8-NEXT:    s_cmp_lt_u32 s4, s12
 ; GFX8-NEXT:    s_cselect_b32 s4, s4, s12
 ; GFX8-NEXT:    s_add_i32 s8, s8, s4
-; GFX8-NEXT:    s_xor_b32 s4, s1, s16
+; GFX8-NEXT:    s_xor_b32 s4, s1, -1
 ; GFX8-NEXT:    s_bfe_u32 s4, s4, 0x100000
 ; GFX8-NEXT:    s_bfe_u32 s5, s5, 0x100000
 ; GFX8-NEXT:    s_cmp_lt_u32 s4, s5
 ; GFX8-NEXT:    s_cselect_b32 s4, s4, s5
 ; GFX8-NEXT:    s_add_i32 s1, s1, s4
-; GFX8-NEXT:    s_xor_b32 s4, s9, s16
+; GFX8-NEXT:    s_xor_b32 s4, s9, -1
 ; GFX8-NEXT:    s_bfe_u32 s4, s4, 0x100000
 ; GFX8-NEXT:    s_bfe_u32 s5, s13, 0x100000
 ; GFX8-NEXT:    s_cmp_lt_u32 s4, s5
 ; GFX8-NEXT:    s_cselect_b32 s4, s4, s5
 ; GFX8-NEXT:    s_add_i32 s9, s9, s4
-; GFX8-NEXT:    s_xor_b32 s4, s2, s16
+; GFX8-NEXT:    s_xor_b32 s4, s2, -1
 ; GFX8-NEXT:    s_bfe_u32 s4, s4, 0x100000
 ; GFX8-NEXT:    s_bfe_u32 s5, s6, 0x100000
 ; GFX8-NEXT:    s_cmp_lt_u32 s4, s5
 ; GFX8-NEXT:    s_cselect_b32 s4, s4, s5
 ; GFX8-NEXT:    s_add_i32 s2, s2, s4
-; GFX8-NEXT:    s_xor_b32 s4, s10, s16
+; GFX8-NEXT:    s_xor_b32 s4, s10, -1
 ; GFX8-NEXT:    s_bfe_u32 s4, s4, 0x100000
 ; GFX8-NEXT:    s_bfe_u32 s5, s14, 0x100000
 ; GFX8-NEXT:    s_cmp_lt_u32 s4, s5
 ; GFX8-NEXT:    s_cselect_b32 s4, s4, s5
 ; GFX8-NEXT:    s_add_i32 s10, s10, s4
-; GFX8-NEXT:    s_xor_b32 s4, s3, s16
+; GFX8-NEXT:    s_xor_b32 s4, s3, -1
 ; GFX8-NEXT:    s_bfe_u32 s4, s4, 0x100000
 ; GFX8-NEXT:    s_bfe_u32 s5, s7, 0x100000
 ; GFX8-NEXT:    s_cmp_lt_u32 s4, s5
 ; GFX8-NEXT:    s_cselect_b32 s4, s4, s5
 ; GFX8-NEXT:    s_add_i32 s3, s3, s4
-; GFX8-NEXT:    s_xor_b32 s4, s11, s16
+; GFX8-NEXT:    s_xor_b32 s4, s11, -1
 ; GFX8-NEXT:    s_bfe_u32 s4, s4, 0x100000
 ; GFX8-NEXT:    s_bfe_u32 s5, s15, 0x100000
 ; GFX8-NEXT:    s_cmp_lt_u32 s4, s5

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i32.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i32.ll
index 25eafb45f930..54eebc920579 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i32.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i32.ll
@@ -311,7 +311,7 @@ define <2 x i32> @v_udiv_v2i32_pow2k_denom(<2 x i32> %num) {
 ; CGP-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; CGP-NEXT:    s_movk_i32 s4, 0x1000
 ; CGP-NEXT:    v_mov_b32_e32 v2, 0x1000
-; CGP-NEXT:    s_mov_b32 s5, 0xfffff000
+; CGP-NEXT:    s_movk_i32 s5, 0xf000
 ; CGP-NEXT:    v_mov_b32_e32 v3, 0xfffff000
 ; CGP-NEXT:    v_cvt_f32_u32_e32 v4, s4
 ; CGP-NEXT:    v_cvt_f32_u32_e32 v5, v2

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll
index e956af93bc6f..59b4318012e4 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll
@@ -185,14 +185,13 @@ declare i32 @llvm.amdgcn.readfirstlane(i32)
 define amdgpu_ps i64 @s_udiv_i64(i64 inreg %num, i64 inreg %den) {
 ; CHECK-LABEL: s_udiv_i64:
 ; CHECK:       ; %bb.0:
-; CHECK-NEXT:    s_or_b64 s[4:5], s[0:1], s[2:3]
-; CHECK-NEXT:    s_mov_b32 s6, 0
-; CHECK-NEXT:    s_mov_b32 s7, -1
-; CHECK-NEXT:    s_and_b64 s[4:5], s[4:5], s[6:7]
-; CHECK-NEXT:    v_cmp_eq_u64_e64 s[4:5], s[4:5], 0
-; CHECK-NEXT:    v_cmp_ne_u32_e64 s[6:7], 0, 1
-; CHECK-NEXT:    s_xor_b64 vcc, s[4:5], s[6:7]
-; CHECK-NEXT:    s_mov_b32 s4, 1
+; CHECK-NEXT:    s_or_b64 s[6:7], s[0:1], s[2:3]
+; CHECK-NEXT:    s_mov_b32 s4, 0
+; CHECK-NEXT:    s_mov_b32 s5, -1
+; CHECK-NEXT:    s_and_b64 s[6:7], s[6:7], s[4:5]
+; CHECK-NEXT:    v_cmp_eq_u64_e64 s[6:7], s[6:7], 0
+; CHECK-NEXT:    v_cmp_ne_u32_e64 s[8:9], 0, 1
+; CHECK-NEXT:    s_xor_b64 vcc, s[6:7], s[8:9]
 ; CHECK-NEXT:    ; implicit-def: $vgpr0_vgpr1
 ; CHECK-NEXT:    s_cbranch_vccz BB1_2
 ; CHECK-NEXT:  ; %bb.1:
@@ -324,9 +323,9 @@ define amdgpu_ps i64 @s_udiv_i64(i64 inreg %num, i64 inreg %den) {
 ; CHECK-NEXT:    v_cndmask_b32_e32 v1, v8, v4, vcc
 ; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v2
 ; CHECK-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
-; CHECK-NEXT:    s_mov_b32 s4, 0
+; CHECK-NEXT:    s_mov_b32 s5, 0
 ; CHECK-NEXT:  BB1_2: ; %Flow
-; CHECK-NEXT:    s_and_b32 s1, s4, 1
+; CHECK-NEXT:    s_and_b32 s1, s5, 1
 ; CHECK-NEXT:    s_cmp_lg_u32 s1, 0
 ; CHECK-NEXT:    s_cbranch_scc0 BB1_4
 ; CHECK-NEXT:  ; %bb.3:

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i32.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i32.ll
index 68a83a91c62f..f331deea89e5 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i32.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i32.ll
@@ -286,7 +286,7 @@ define <2 x i32> @v_urem_v2i32_pow2k_denom(<2 x i32> %num) {
 ; CGP-NEXT:    s_movk_i32 s4, 0x1000
 ; CGP-NEXT:    v_mov_b32_e32 v2, 0x1000
 ; CGP-NEXT:    s_mov_b32 s5, 0x4f7ffffe
-; CGP-NEXT:    s_mov_b32 s6, 0xfffff000
+; CGP-NEXT:    s_movk_i32 s6, 0xf000
 ; CGP-NEXT:    v_cvt_f32_u32_e32 v3, s4
 ; CGP-NEXT:    v_cvt_f32_u32_e32 v4, v2
 ; CGP-NEXT:    v_rcp_iflag_f32_e32 v3, v3

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll
index 6b9357043b3c..a01ba29cc327 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll
@@ -182,14 +182,13 @@ declare i32 @llvm.amdgcn.readfirstlane(i32)
 define amdgpu_ps i64 @s_urem_i64(i64 inreg %num, i64 inreg %den) {
 ; CHECK-LABEL: s_urem_i64:
 ; CHECK:       ; %bb.0:
-; CHECK-NEXT:    s_or_b64 s[4:5], s[0:1], s[2:3]
-; CHECK-NEXT:    s_mov_b32 s6, 0
-; CHECK-NEXT:    s_mov_b32 s7, -1
-; CHECK-NEXT:    s_and_b64 s[4:5], s[4:5], s[6:7]
-; CHECK-NEXT:    v_cmp_eq_u64_e64 s[4:5], s[4:5], 0
-; CHECK-NEXT:    v_cmp_ne_u32_e64 s[6:7], 0, 1
-; CHECK-NEXT:    s_xor_b64 vcc, s[4:5], s[6:7]
-; CHECK-NEXT:    s_mov_b32 s4, 1
+; CHECK-NEXT:    s_or_b64 s[6:7], s[0:1], s[2:3]
+; CHECK-NEXT:    s_mov_b32 s4, 0
+; CHECK-NEXT:    s_mov_b32 s5, -1
+; CHECK-NEXT:    s_and_b64 s[6:7], s[6:7], s[4:5]
+; CHECK-NEXT:    v_cmp_eq_u64_e64 s[6:7], s[6:7], 0
+; CHECK-NEXT:    v_cmp_ne_u32_e64 s[8:9], 0, 1
+; CHECK-NEXT:    s_xor_b64 vcc, s[6:7], s[8:9]
 ; CHECK-NEXT:    ; implicit-def: $vgpr0_vgpr1
 ; CHECK-NEXT:    s_cbranch_vccz BB1_2
 ; CHECK-NEXT:  ; %bb.1:
@@ -320,9 +319,9 @@ define amdgpu_ps i64 @s_urem_i64(i64 inreg %num, i64 inreg %den) {
 ; CHECK-NEXT:    v_cndmask_b32_e32 v0, v3, v5, vcc
 ; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v1
 ; CHECK-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
-; CHECK-NEXT:    s_mov_b32 s4, 0
+; CHECK-NEXT:    s_mov_b32 s5, 0
 ; CHECK-NEXT:  BB1_2: ; %Flow
-; CHECK-NEXT:    s_and_b32 s1, s4, 1
+; CHECK-NEXT:    s_and_b32 s1, s5, 1
 ; CHECK-NEXT:    s_cmp_lg_u32 s1, 0
 ; CHECK-NEXT:    s_cbranch_scc0 BB1_4
 ; CHECK-NEXT:  ; %bb.3:


        


More information about the llvm-commits mailing list