[llvm] b3bb5c3 - [AMDGPU][GlobalISel] Use scalar min/max instructions
Jay Foad via llvm-commits
llvm-commits at lists.llvm.org
Thu Feb 4 09:04:41 PST 2021
Author: Jay Foad
Date: 2021-02-04T17:04:32Z
New Revision: b3bb5c3efc971f595a08446f3e58c0fd4162c26d
URL: https://github.com/llvm/llvm-project/commit/b3bb5c3efc971f595a08446f3e58c0fd4162c26d
DIFF: https://github.com/llvm/llvm-project/commit/b3bb5c3efc971f595a08446f3e58c0fd4162c26d.diff
LOG: [AMDGPU][GlobalISel] Use scalar min/max instructions
SALU min/max s32 instructions exist so use them. This means that
regbankselect can handle min/max much like add/sub/mul/shifts.
Differential Revision: https://reviews.llvm.org/D96047
Added:
Modified:
llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h
llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-smax.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-smin.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-umax.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-umin.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/uaddsat.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/usubsat.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
index 502356d4f9a4..408c8d96439e 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
@@ -591,21 +591,6 @@ AMDGPURegisterBankInfo::getInstrAlternativeMappings(
return AltMappings;
}
- case TargetOpcode::G_SMIN:
- case TargetOpcode::G_SMAX:
- case TargetOpcode::G_UMIN:
- case TargetOpcode::G_UMAX: {
- static const OpRegBankEntry<3> Table[2] = {
- { { AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID }, 1 },
-
- // Scalar requires cmp+select, and extends if 16-bit.
- // FIXME: Should there be separate costs for 32 and 16-bit
- { { AMDGPU::SGPRRegBankID, AMDGPU::SGPRRegBankID, AMDGPU::SGPRRegBankID }, 3 }
- };
-
- const std::array<unsigned, 3> RegSrcOpIdx = { { 0, 1, 2 } };
- return addMappingFromTable<3>(MI, MRI, RegSrcOpIdx, makeArrayRef(Table));
- }
case TargetOpcode::G_UADDE:
case TargetOpcode::G_USUBE:
case TargetOpcode::G_SADDE:
@@ -1576,23 +1561,8 @@ bool AMDGPURegisterBankInfo::applyMappingBFEIntrinsic(
return true;
}
-// FIXME: Duplicated from LegalizerHelper
-static CmpInst::Predicate minMaxToCompare(unsigned Opc) {
- switch (Opc) {
- case TargetOpcode::G_SMIN:
- return CmpInst::ICMP_SLT;
- case TargetOpcode::G_SMAX:
- return CmpInst::ICMP_SGT;
- case TargetOpcode::G_UMIN:
- return CmpInst::ICMP_ULT;
- case TargetOpcode::G_UMAX:
- return CmpInst::ICMP_UGT;
- default:
- llvm_unreachable("not in integer min/max");
- }
-}
-
-static unsigned minMaxToExtend(unsigned Opc) {
+// Return a suitable opcode for extending the operands of Opc when widening.
+static unsigned getExtendOp(unsigned Opc) {
switch (Opc) {
case TargetOpcode::G_SMIN:
case TargetOpcode::G_SMAX:
@@ -1601,7 +1571,7 @@ static unsigned minMaxToExtend(unsigned Opc) {
case TargetOpcode::G_UMAX:
return TargetOpcode::G_ZEXT;
default:
- llvm_unreachable("not in integer min/max");
+ return TargetOpcode::G_ANYEXT;
}
}
@@ -1628,30 +1598,6 @@ unpackV2S16ToS32(MachineIRBuilder &B, Register Src, unsigned ExtOpcode) {
return std::make_pair(Bitcast.getReg(0), ShiftHi.getReg(0));
}
-static MachineInstr *buildExpandedScalarMinMax(MachineIRBuilder &B,
- CmpInst::Predicate Pred,
- Register Dst, Register Src0,
- Register Src1) {
- const LLT CmpType = LLT::scalar(32);
- auto Cmp = B.buildICmp(Pred, CmpType, Src0, Src1);
- return B.buildSelect(Dst, Cmp, Src0, Src1);
-}
-
-// FIXME: Duplicated from LegalizerHelper, except changing the boolean type.
-void AMDGPURegisterBankInfo::lowerScalarMinMax(MachineIRBuilder &B,
- MachineInstr &MI) const {
- Register Dst = MI.getOperand(0).getReg();
- Register Src0 = MI.getOperand(1).getReg();
- Register Src1 = MI.getOperand(2).getReg();
-
- const CmpInst::Predicate Pred = minMaxToCompare(MI.getOpcode());
- MachineInstr *Sel = buildExpandedScalarMinMax(B, Pred, Dst, Src0, Src1);
-
- Register CmpReg = Sel->getOperand(1).getReg();
- B.getMRI()->setRegBank(CmpReg, AMDGPU::SGPRRegBank);
- MI.eraseFromParent();
-}
-
// For cases where only a single copy is inserted for matching register banks.
// Replace the register in the instruction operand
static bool substituteSimpleCopyRegs(
@@ -2341,7 +2287,11 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
case AMDGPU::G_MUL:
case AMDGPU::G_SHL:
case AMDGPU::G_LSHR:
- case AMDGPU::G_ASHR: {
+ case AMDGPU::G_ASHR:
+ case AMDGPU::G_SMIN:
+ case AMDGPU::G_SMAX:
+ case AMDGPU::G_UMIN:
+ case AMDGPU::G_UMAX: {
Register DstReg = MI.getOperand(0).getReg();
LLT DstTy = MRI.getType(DstReg);
@@ -2365,10 +2315,11 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
Register WideSrc0Lo, WideSrc0Hi;
Register WideSrc1Lo, WideSrc1Hi;
+ unsigned ExtendOp = getExtendOp(MI.getOpcode());
std::tie(WideSrc0Lo, WideSrc0Hi)
- = unpackV2S16ToS32(B, MI.getOperand(1).getReg(), AMDGPU::G_ANYEXT);
+ = unpackV2S16ToS32(B, MI.getOperand(1).getReg(), ExtendOp);
std::tie(WideSrc1Lo, WideSrc1Hi)
- = unpackV2S16ToS32(B, MI.getOperand(2).getReg(), AMDGPU::G_ANYEXT);
+ = unpackV2S16ToS32(B, MI.getOperand(2).getReg(), ExtendOp);
auto Lo = B.buildInstr(MI.getOpcode(), {S32}, {WideSrc0Lo, WideSrc1Lo});
auto Hi = B.buildInstr(MI.getOpcode(), {S32}, {WideSrc0Hi, WideSrc1Hi});
B.buildBuildVectorTrunc(DstReg, {Lo.getReg(0), Hi.getReg(0)});
@@ -2390,73 +2341,6 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
return;
}
- case AMDGPU::G_SMIN:
- case AMDGPU::G_SMAX:
- case AMDGPU::G_UMIN:
- case AMDGPU::G_UMAX: {
- Register DstReg = MI.getOperand(0).getReg();
- const RegisterBank *DstBank =
- OpdMapper.getInstrMapping().getOperandMapping(0).BreakDown[0].RegBank;
- if (DstBank == &AMDGPU::VGPRRegBank)
- break;
-
- MachineFunction *MF = MI.getParent()->getParent();
- MachineIRBuilder B(MI);
-
- // Turn scalar min/max into a compare and select.
- LLT Ty = MRI.getType(DstReg);
- const LLT S32 = LLT::scalar(32);
- const LLT S16 = LLT::scalar(16);
- const LLT V2S16 = LLT::vector(2, 16);
-
- if (Ty == V2S16) {
- ApplyRegBankMapping ApplySALU(*this, MRI, &AMDGPU::SGPRRegBank);
- B.setChangeObserver(ApplySALU);
-
- // Need to widen to s32, and expand as cmp + select, and avoid producing
- // illegal vector extends or unmerges that would need further
- // legalization.
- //
- // TODO: Should we just readfirstlane? That should probably be handled
- // with a UniformVGPR register bank that wouldn't need special
- // consideration here.
-
- Register Dst = MI.getOperand(0).getReg();
- Register Src0 = MI.getOperand(1).getReg();
- Register Src1 = MI.getOperand(2).getReg();
-
- Register WideSrc0Lo, WideSrc0Hi;
- Register WideSrc1Lo, WideSrc1Hi;
-
- unsigned ExtendOp = minMaxToExtend(MI.getOpcode());
-
- std::tie(WideSrc0Lo, WideSrc0Hi) = unpackV2S16ToS32(B, Src0, ExtendOp);
- std::tie(WideSrc1Lo, WideSrc1Hi) = unpackV2S16ToS32(B, Src1, ExtendOp);
-
- Register Lo = MRI.createGenericVirtualRegister(S32);
- Register Hi = MRI.createGenericVirtualRegister(S32);
- const CmpInst::Predicate Pred = minMaxToCompare(MI.getOpcode());
- buildExpandedScalarMinMax(B, Pred, Lo, WideSrc0Lo, WideSrc1Lo);
- buildExpandedScalarMinMax(B, Pred, Hi, WideSrc0Hi, WideSrc1Hi);
-
- B.buildBuildVectorTrunc(Dst, {Lo, Hi});
- MI.eraseFromParent();
- } else if (Ty == S16) {
- ApplyRegBankMapping ApplySALU(*this, MRI, &AMDGPU::SGPRRegBank);
- B.setChangeObserver(ApplySALU);
- LegalizerHelper Helper(*MF, ApplySALU, B);
-
- // Need to widen to s32, and expand as cmp + select.
- if (Helper.widenScalar(MI, 0, S32) != LegalizerHelper::Legalized)
- llvm_unreachable("widenScalar should have succeeded");
-
- // FIXME: This is relying on widenScalar leaving MI in place.
- lowerScalarMinMax(B, MI);
- } else
- lowerScalarMinMax(B, MI);
-
- return;
- }
case AMDGPU::G_SEXT_INREG: {
SmallVector<Register, 2> SrcRegs(OpdMapper.getVRegs(1));
if (SrcRegs.empty())
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h
index 1c1441729e30..c481aadeb226 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h
@@ -84,8 +84,6 @@ class AMDGPURegisterBankInfo final : public AMDGPUGenRegisterBankInfo {
bool applyMappingBFEIntrinsic(const OperandsMapper &OpdMapper,
bool Signed) const;
- void lowerScalarMinMax(MachineIRBuilder &B, MachineInstr &MI) const;
-
Register handleD16VData(MachineIRBuilder &B, MachineRegisterInfo &MRI,
Register Reg) const;
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-smax.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-smax.mir
index c12d209d9bda..d932e66ca0f9 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-smax.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-smax.mir
@@ -13,8 +13,7 @@ body: |
; CHECK-LABEL: name: smax_s32_ss
; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
- ; CHECK: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(sgt), [[COPY]](s32), [[COPY1]]
- ; CHECK: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[ICMP]](s32), [[COPY]], [[COPY1]]
+ ; CHECK: [[SMAX:%[0-9]+]]:sgpr(s32) = G_SMAX [[COPY]], [[COPY1]]
%0:_(s32) = COPY $sgpr0
%1:_(s32) = COPY $sgpr1
%2:_(s32) = G_SMAX %0, %1
@@ -90,9 +89,8 @@ body: |
; CHECK-LABEL: name: smax_s32_ss_vgpr_use
; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
- ; CHECK: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(sgt), [[COPY]](s32), [[COPY1]]
- ; CHECK: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[ICMP]](s32), [[COPY]], [[COPY1]]
- ; CHECK: $vgpr0 = COPY [[SELECT]](s32)
+ ; CHECK: [[SMAX:%[0-9]+]]:sgpr(s32) = G_SMAX [[COPY]], [[COPY1]]
+ ; CHECK: $vgpr0 = COPY [[SMAX]](s32)
%0:_(s32) = COPY $sgpr0
%1:_(s32) = COPY $sgpr1
%2:_(s32) = G_SMAX %0, %1
@@ -114,9 +112,8 @@ body: |
; CHECK: [[TRUNC1:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY1]](s32)
; CHECK: [[SEXT:%[0-9]+]]:sgpr(s32) = G_SEXT [[TRUNC]](s16)
; CHECK: [[SEXT1:%[0-9]+]]:sgpr(s32) = G_SEXT [[TRUNC1]](s16)
- ; CHECK: [[TRUNC2:%[0-9]+]]:sgpr(s16) = G_TRUNC %8(s32)
- ; CHECK: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(sgt), [[SEXT]](s32), [[SEXT1]]
- ; CHECK: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[ICMP]](s32), [[SEXT]], [[SEXT1]]
+ ; CHECK: [[SMAX:%[0-9]+]]:sgpr(s32) = G_SMAX [[SEXT]], [[SEXT1]]
+ ; CHECK: [[TRUNC2:%[0-9]+]]:sgpr(s16) = G_TRUNC [[SMAX]](s32)
; CHECK: [[ANYEXT:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC2]](s16)
; CHECK: $sgpr0 = COPY [[ANYEXT]](s32)
%0:_(s32) = COPY $sgpr0
@@ -144,9 +141,8 @@ body: |
; CHECK: [[TRUNC1:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY1]](s32)
; CHECK: [[SEXT:%[0-9]+]]:sgpr(s32) = G_SEXT [[TRUNC]](s16)
; CHECK: [[SEXT1:%[0-9]+]]:sgpr(s32) = G_SEXT [[TRUNC1]](s16)
- ; CHECK: [[TRUNC2:%[0-9]+]]:sgpr(s16) = G_TRUNC %8(s32)
- ; CHECK: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(sgt), [[SEXT]](s32), [[SEXT1]]
- ; CHECK: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[ICMP]](s32), [[SEXT]], [[SEXT1]]
+ ; CHECK: [[SMAX:%[0-9]+]]:sgpr(s32) = G_SMAX [[SEXT]], [[SEXT1]]
+ ; CHECK: [[TRUNC2:%[0-9]+]]:sgpr(s16) = G_TRUNC [[SMAX]](s32)
; CHECK: [[ANYEXT:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC2]](s16)
; CHECK: $vgpr0 = COPY [[ANYEXT]](s32)
%0:_(s32) = COPY $sgpr0
@@ -178,11 +174,9 @@ body: |
; CHECK: [[SEXT_INREG1:%[0-9]+]]:sgpr(s32) = G_SEXT_INREG [[BITCAST1]], 16
; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 16
; CHECK: [[ASHR1:%[0-9]+]]:sgpr(s32) = G_ASHR [[BITCAST1]], [[C1]](s32)
- ; CHECK: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(sgt), [[SEXT_INREG]](s32), [[SEXT_INREG1]]
- ; CHECK: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[ICMP]](s32), [[SEXT_INREG]], [[SEXT_INREG1]]
- ; CHECK: [[ICMP1:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(sgt), [[ASHR]](s32), [[ASHR1]]
- ; CHECK: [[SELECT1:%[0-9]+]]:sgpr(s32) = G_SELECT [[ICMP1]](s32), [[ASHR]], [[ASHR1]]
- ; CHECK: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[SELECT]](s32), [[SELECT1]](s32)
+ ; CHECK: [[SMAX:%[0-9]+]]:sgpr(s32) = G_SMAX [[SEXT_INREG]], [[SEXT_INREG1]]
+ ; CHECK: [[SMAX1:%[0-9]+]]:sgpr(s32) = G_SMAX [[ASHR]], [[ASHR1]]
+ ; CHECK: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[SMAX]](s32), [[SMAX1]](s32)
; CHECK: $sgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>)
%0:_(<2 x s16>) = COPY $sgpr0
%1:_(<2 x s16>) = COPY $sgpr1
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-smin.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-smin.mir
index da19ddcb86df..1efac8980b5e 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-smin.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-smin.mir
@@ -1,6 +1,6 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=regbankselect -verify-machineinstrs -regbankselect-fast -o - %s | FileCheck %s
-# XUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=regbankselect -verify-machineinstrs -regbankselect-greedy -o - %s | FileCheck %s
+# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=regbankselect -verify-machineinstrs -regbankselect-greedy -o - %s | FileCheck %s
---
name: smin_s32_ss
@@ -13,9 +13,8 @@ body: |
; CHECK-LABEL: name: smin_s32_ss
; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
- ; CHECK: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(slt), [[COPY]](s32), [[COPY1]]
- ; CHECK: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[ICMP]](s32), [[COPY]], [[COPY1]]
- ; CHECK: $sgpr0 = COPY [[SELECT]](s32)
+ ; CHECK: [[SMIN:%[0-9]+]]:sgpr(s32) = G_SMIN [[COPY]], [[COPY1]]
+ ; CHECK: $sgpr0 = COPY [[SMIN]](s32)
%0:_(s32) = COPY $sgpr0
%1:_(s32) = COPY $sgpr1
%2:_(s32) = G_SMIN %0, %1
@@ -93,9 +92,8 @@ body: |
; CHECK-LABEL: name: smin_s32_ss_vgpr_use
; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
- ; CHECK: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(slt), [[COPY]](s32), [[COPY1]]
- ; CHECK: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[ICMP]](s32), [[COPY]], [[COPY1]]
- ; CHECK: $vgpr0 = COPY [[SELECT]](s32)
+ ; CHECK: [[SMIN:%[0-9]+]]:sgpr(s32) = G_SMIN [[COPY]], [[COPY1]]
+ ; CHECK: $vgpr0 = COPY [[SMIN]](s32)
%0:_(s32) = COPY $sgpr0
%1:_(s32) = COPY $sgpr1
%2:_(s32) = G_SMIN %0, %1
@@ -117,9 +115,8 @@ body: |
; CHECK: [[TRUNC1:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY1]](s32)
; CHECK: [[SEXT:%[0-9]+]]:sgpr(s32) = G_SEXT [[TRUNC]](s16)
; CHECK: [[SEXT1:%[0-9]+]]:sgpr(s32) = G_SEXT [[TRUNC1]](s16)
- ; CHECK: [[TRUNC2:%[0-9]+]]:sgpr(s16) = G_TRUNC %8(s32)
- ; CHECK: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(slt), [[SEXT]](s32), [[SEXT1]]
- ; CHECK: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[ICMP]](s32), [[SEXT]], [[SEXT1]]
+ ; CHECK: [[SMIN:%[0-9]+]]:sgpr(s32) = G_SMIN [[SEXT]], [[SEXT1]]
+ ; CHECK: [[TRUNC2:%[0-9]+]]:sgpr(s16) = G_TRUNC [[SMIN]](s32)
; CHECK: [[ANYEXT:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC2]](s16)
; CHECK: $sgpr0 = COPY [[ANYEXT]](s32)
%0:_(s32) = COPY $sgpr0
@@ -147,9 +144,8 @@ body: |
; CHECK: [[TRUNC1:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY1]](s32)
; CHECK: [[SEXT:%[0-9]+]]:sgpr(s32) = G_SEXT [[TRUNC]](s16)
; CHECK: [[SEXT1:%[0-9]+]]:sgpr(s32) = G_SEXT [[TRUNC1]](s16)
- ; CHECK: [[TRUNC2:%[0-9]+]]:sgpr(s16) = G_TRUNC %8(s32)
- ; CHECK: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(slt), [[SEXT]](s32), [[SEXT1]]
- ; CHECK: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[ICMP]](s32), [[SEXT]], [[SEXT1]]
+ ; CHECK: [[SMIN:%[0-9]+]]:sgpr(s32) = G_SMIN [[SEXT]], [[SEXT1]]
+ ; CHECK: [[TRUNC2:%[0-9]+]]:sgpr(s16) = G_TRUNC [[SMIN]](s32)
; CHECK: [[ANYEXT:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC2]](s16)
; CHECK: $vgpr0 = COPY [[ANYEXT]](s32)
%0:_(s32) = COPY $sgpr0
@@ -181,11 +177,9 @@ body: |
; CHECK: [[SEXT_INREG1:%[0-9]+]]:sgpr(s32) = G_SEXT_INREG [[BITCAST1]], 16
; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 16
; CHECK: [[ASHR1:%[0-9]+]]:sgpr(s32) = G_ASHR [[BITCAST1]], [[C1]](s32)
- ; CHECK: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(slt), [[SEXT_INREG]](s32), [[SEXT_INREG1]]
- ; CHECK: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[ICMP]](s32), [[SEXT_INREG]], [[SEXT_INREG1]]
- ; CHECK: [[ICMP1:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(slt), [[ASHR]](s32), [[ASHR1]]
- ; CHECK: [[SELECT1:%[0-9]+]]:sgpr(s32) = G_SELECT [[ICMP1]](s32), [[ASHR]], [[ASHR1]]
- ; CHECK: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[SELECT]](s32), [[SELECT1]](s32)
+ ; CHECK: [[SMIN:%[0-9]+]]:sgpr(s32) = G_SMIN [[SEXT_INREG]], [[SEXT_INREG1]]
+ ; CHECK: [[SMIN1:%[0-9]+]]:sgpr(s32) = G_SMIN [[ASHR]], [[ASHR1]]
+ ; CHECK: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[SMIN]](s32), [[SMIN1]](s32)
; CHECK: $sgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>)
%0:_(<2 x s16>) = COPY $sgpr0
%1:_(<2 x s16>) = COPY $sgpr1
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-umax.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-umax.mir
index 3f464f890d04..5b4cc72990c6 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-umax.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-umax.mir
@@ -13,9 +13,8 @@ body: |
; CHECK-LABEL: name: umax_s32_ss
; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
- ; CHECK: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ugt), [[COPY]](s32), [[COPY1]]
- ; CHECK: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[ICMP]](s32), [[COPY]], [[COPY1]]
- ; CHECK: $sgpr0 = COPY [[SELECT]](s32)
+ ; CHECK: [[UMAX:%[0-9]+]]:sgpr(s32) = G_UMAX [[COPY]], [[COPY1]]
+ ; CHECK: $sgpr0 = COPY [[UMAX]](s32)
%0:_(s32) = COPY $sgpr0
%1:_(s32) = COPY $sgpr1
%2:_(s32) = G_UMAX %0, %1
@@ -93,9 +92,8 @@ body: |
; CHECK-LABEL: name: umax_s32_ss_vgpr_use
; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
- ; CHECK: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ugt), [[COPY]](s32), [[COPY1]]
- ; CHECK: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[ICMP]](s32), [[COPY]], [[COPY1]]
- ; CHECK: $vgpr0 = COPY [[SELECT]](s32)
+ ; CHECK: [[UMAX:%[0-9]+]]:sgpr(s32) = G_UMAX [[COPY]], [[COPY1]]
+ ; CHECK: $vgpr0 = COPY [[UMAX]](s32)
%0:_(s32) = COPY $sgpr0
%1:_(s32) = COPY $sgpr1
%2:_(s32) = G_UMAX %0, %1
@@ -117,9 +115,8 @@ body: |
; CHECK: [[TRUNC1:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY1]](s32)
; CHECK: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC]](s16)
; CHECK: [[ZEXT1:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC1]](s16)
- ; CHECK: [[TRUNC2:%[0-9]+]]:sgpr(s16) = G_TRUNC %8(s32)
- ; CHECK: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ugt), [[ZEXT]](s32), [[ZEXT1]]
- ; CHECK: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[ICMP]](s32), [[ZEXT]], [[ZEXT1]]
+ ; CHECK: [[UMAX:%[0-9]+]]:sgpr(s32) = G_UMAX [[ZEXT]], [[ZEXT1]]
+ ; CHECK: [[TRUNC2:%[0-9]+]]:sgpr(s16) = G_TRUNC [[UMAX]](s32)
; CHECK: [[ANYEXT:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC2]](s16)
; CHECK: $sgpr0 = COPY [[ANYEXT]](s32)
%0:_(s32) = COPY $sgpr0
@@ -147,9 +144,8 @@ body: |
; CHECK: [[TRUNC1:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY1]](s32)
; CHECK: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC]](s16)
; CHECK: [[ZEXT1:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC1]](s16)
- ; CHECK: [[TRUNC2:%[0-9]+]]:sgpr(s16) = G_TRUNC %8(s32)
- ; CHECK: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ugt), [[ZEXT]](s32), [[ZEXT1]]
- ; CHECK: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[ICMP]](s32), [[ZEXT]], [[ZEXT1]]
+ ; CHECK: [[UMAX:%[0-9]+]]:sgpr(s32) = G_UMAX [[ZEXT]], [[ZEXT1]]
+ ; CHECK: [[TRUNC2:%[0-9]+]]:sgpr(s16) = G_TRUNC [[UMAX]](s32)
; CHECK: [[ANYEXT:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC2]](s16)
; CHECK: $vgpr0 = COPY [[ANYEXT]](s32)
%0:_(s32) = COPY $sgpr0
@@ -183,11 +179,9 @@ body: |
; CHECK: [[LSHR1:%[0-9]+]]:sgpr(s32) = G_LSHR [[BITCAST1]], [[C2]](s32)
; CHECK: [[C3:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 65535
; CHECK: [[AND1:%[0-9]+]]:sgpr(s32) = G_AND [[BITCAST1]], [[C3]]
- ; CHECK: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ugt), [[AND]](s32), [[AND1]]
- ; CHECK: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[ICMP]](s32), [[AND]], [[AND1]]
- ; CHECK: [[ICMP1:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ugt), [[LSHR]](s32), [[LSHR1]]
- ; CHECK: [[SELECT1:%[0-9]+]]:sgpr(s32) = G_SELECT [[ICMP1]](s32), [[LSHR]], [[LSHR1]]
- ; CHECK: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[SELECT]](s32), [[SELECT1]](s32)
+ ; CHECK: [[UMAX:%[0-9]+]]:sgpr(s32) = G_UMAX [[AND]], [[AND1]]
+ ; CHECK: [[UMAX1:%[0-9]+]]:sgpr(s32) = G_UMAX [[LSHR]], [[LSHR1]]
+ ; CHECK: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[UMAX]](s32), [[UMAX1]](s32)
; CHECK: $sgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>)
%0:_(<2 x s16>) = COPY $sgpr0
%1:_(<2 x s16>) = COPY $sgpr1
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-umin.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-umin.mir
index 35072751a069..7e14b1fb6615 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-umin.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-umin.mir
@@ -13,9 +13,8 @@ body: |
; CHECK-LABEL: name: umin_s32_ss
; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
- ; CHECK: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ult), [[COPY]](s32), [[COPY1]]
- ; CHECK: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[ICMP]](s32), [[COPY]], [[COPY1]]
- ; CHECK: $sgpr0 = COPY [[SELECT]](s32)
+ ; CHECK: [[UMIN:%[0-9]+]]:sgpr(s32) = G_UMIN [[COPY]], [[COPY1]]
+ ; CHECK: $sgpr0 = COPY [[UMIN]](s32)
%0:_(s32) = COPY $sgpr0
%1:_(s32) = COPY $sgpr1
%2:_(s32) = G_UMIN %0, %1
@@ -97,9 +96,8 @@ body: |
; CHECK-LABEL: name: umin_s32_ss_vgpr_use
; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
- ; CHECK: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ult), [[COPY]](s32), [[COPY1]]
- ; CHECK: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[ICMP]](s32), [[COPY]], [[COPY1]]
- ; CHECK: $vgpr0 = COPY [[SELECT]](s32)
+ ; CHECK: [[UMIN:%[0-9]+]]:sgpr(s32) = G_UMIN [[COPY]], [[COPY1]]
+ ; CHECK: $vgpr0 = COPY [[UMIN]](s32)
%0:_(s32) = COPY $sgpr0
%1:_(s32) = COPY $sgpr1
%2:_(s32) = G_UMIN %0, %1
@@ -121,9 +119,8 @@ body: |
; CHECK: [[TRUNC1:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY1]](s32)
; CHECK: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC]](s16)
; CHECK: [[ZEXT1:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC1]](s16)
- ; CHECK: [[TRUNC2:%[0-9]+]]:sgpr(s16) = G_TRUNC %8(s32)
- ; CHECK: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ult), [[ZEXT]](s32), [[ZEXT1]]
- ; CHECK: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[ICMP]](s32), [[ZEXT]], [[ZEXT1]]
+ ; CHECK: [[UMIN:%[0-9]+]]:sgpr(s32) = G_UMIN [[ZEXT]], [[ZEXT1]]
+ ; CHECK: [[TRUNC2:%[0-9]+]]:sgpr(s16) = G_TRUNC [[UMIN]](s32)
; CHECK: [[ANYEXT:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC2]](s16)
; CHECK: $sgpr0 = COPY [[ANYEXT]](s32)
%0:_(s32) = COPY $sgpr0
@@ -151,9 +148,8 @@ body: |
; CHECK: [[TRUNC1:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY1]](s32)
; CHECK: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC]](s16)
; CHECK: [[ZEXT1:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC1]](s16)
- ; CHECK: [[TRUNC2:%[0-9]+]]:sgpr(s16) = G_TRUNC %8(s32)
- ; CHECK: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ult), [[ZEXT]](s32), [[ZEXT1]]
- ; CHECK: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[ICMP]](s32), [[ZEXT]], [[ZEXT1]]
+ ; CHECK: [[UMIN:%[0-9]+]]:sgpr(s32) = G_UMIN [[ZEXT]], [[ZEXT1]]
+ ; CHECK: [[TRUNC2:%[0-9]+]]:sgpr(s16) = G_TRUNC [[UMIN]](s32)
; CHECK: [[ANYEXT:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC2]](s16)
; CHECK: $vgpr0 = COPY [[ANYEXT]](s32)
%0:_(s32) = COPY $sgpr0
@@ -187,11 +183,9 @@ body: |
; CHECK: [[LSHR1:%[0-9]+]]:sgpr(s32) = G_LSHR [[BITCAST1]], [[C2]](s32)
; CHECK: [[C3:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 65535
; CHECK: [[AND1:%[0-9]+]]:sgpr(s32) = G_AND [[BITCAST1]], [[C3]]
- ; CHECK: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ult), [[AND]](s32), [[AND1]]
- ; CHECK: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[ICMP]](s32), [[AND]], [[AND1]]
- ; CHECK: [[ICMP1:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ult), [[LSHR]](s32), [[LSHR1]]
- ; CHECK: [[SELECT1:%[0-9]+]]:sgpr(s32) = G_SELECT [[ICMP1]](s32), [[LSHR]], [[LSHR1]]
- ; CHECK: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[SELECT]](s32), [[SELECT1]](s32)
+ ; CHECK: [[UMIN:%[0-9]+]]:sgpr(s32) = G_UMIN [[AND]], [[AND1]]
+ ; CHECK: [[UMIN1:%[0-9]+]]:sgpr(s32) = G_UMIN [[LSHR]], [[LSHR1]]
+ ; CHECK: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[UMIN]](s32), [[UMIN1]](s32)
; CHECK: $sgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>)
%0:_(<2 x s16>) = COPY $sgpr0
%1:_(<2 x s16>) = COPY $sgpr1
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll
index 8bb6c12c0c7d..a4e7e30b42b8 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll
@@ -61,17 +61,13 @@ define amdgpu_ps i7 @s_saddsat_i7(i7 inreg %lhs, i7 inreg %rhs) {
; GFX6-LABEL: s_saddsat_i7:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_lshl_b32 s0, s0, 25
+; GFX6-NEXT: s_min_i32 s3, s0, 0
+; GFX6-NEXT: s_max_i32 s2, s0, 0
; GFX6-NEXT: s_lshl_b32 s1, s1, 25
-; GFX6-NEXT: s_cmp_gt_i32 s0, 0
-; GFX6-NEXT: s_cselect_b32 s2, s0, 0
-; GFX6-NEXT: s_sub_i32 s2, 0x7fffffff, s2
-; GFX6-NEXT: s_cmp_lt_i32 s0, 0
-; GFX6-NEXT: s_cselect_b32 s3, s0, 0
; GFX6-NEXT: s_sub_i32 s3, 0x80000000, s3
-; GFX6-NEXT: s_cmp_gt_i32 s3, s1
-; GFX6-NEXT: s_cselect_b32 s1, s3, s1
-; GFX6-NEXT: s_cmp_lt_i32 s1, s2
-; GFX6-NEXT: s_cselect_b32 s1, s1, s2
+; GFX6-NEXT: s_sub_i32 s2, 0x7fffffff, s2
+; GFX6-NEXT: s_max_i32 s1, s3, s1
+; GFX6-NEXT: s_min_i32 s1, s1, s2
; GFX6-NEXT: s_add_i32 s0, s0, s1
; GFX6-NEXT: s_ashr_i32 s0, s0, 25
; GFX6-NEXT: ; return to shader part epilog
@@ -80,23 +76,19 @@ define amdgpu_ps i7 @s_saddsat_i7(i7 inreg %lhs, i7 inreg %rhs) {
; GFX8: ; %bb.0:
; GFX8-NEXT: s_bfe_u32 s2, 9, 0x100000
; GFX8-NEXT: s_lshl_b32 s0, s0, s2
-; GFX8-NEXT: s_lshl_b32 s1, s1, s2
; GFX8-NEXT: s_sext_i32_i16 s3, s0
; GFX8-NEXT: s_sext_i32_i16 s4, 0
-; GFX8-NEXT: s_cmp_gt_i32 s3, s4
-; GFX8-NEXT: s_cselect_b32 s5, s3, s4
-; GFX8-NEXT: s_sub_i32 s5, 0x7fff, s5
-; GFX8-NEXT: s_cmp_lt_i32 s3, s4
-; GFX8-NEXT: s_cselect_b32 s3, s3, s4
+; GFX8-NEXT: s_max_i32 s5, s3, s4
+; GFX8-NEXT: s_min_i32 s3, s3, s4
+; GFX8-NEXT: s_lshl_b32 s1, s1, s2
; GFX8-NEXT: s_sub_i32 s3, 0xffff8000, s3
; GFX8-NEXT: s_sext_i32_i16 s3, s3
; GFX8-NEXT: s_sext_i32_i16 s1, s1
-; GFX8-NEXT: s_cmp_gt_i32 s3, s1
-; GFX8-NEXT: s_cselect_b32 s1, s3, s1
+; GFX8-NEXT: s_max_i32 s1, s3, s1
+; GFX8-NEXT: s_sub_i32 s5, 0x7fff, s5
; GFX8-NEXT: s_sext_i32_i16 s1, s1
; GFX8-NEXT: s_sext_i32_i16 s3, s5
-; GFX8-NEXT: s_cmp_lt_i32 s1, s3
-; GFX8-NEXT: s_cselect_b32 s1, s1, s3
+; GFX8-NEXT: s_min_i32 s1, s1, s3
; GFX8-NEXT: s_add_i32 s0, s0, s1
; GFX8-NEXT: s_sext_i32_i16 s0, s0
; GFX8-NEXT: s_ashr_i32 s0, s0, s2
@@ -183,17 +175,13 @@ define amdgpu_ps i8 @s_saddsat_i8(i8 inreg %lhs, i8 inreg %rhs) {
; GFX6-LABEL: s_saddsat_i8:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_lshl_b32 s0, s0, 24
+; GFX6-NEXT: s_min_i32 s3, s0, 0
+; GFX6-NEXT: s_max_i32 s2, s0, 0
; GFX6-NEXT: s_lshl_b32 s1, s1, 24
-; GFX6-NEXT: s_cmp_gt_i32 s0, 0
-; GFX6-NEXT: s_cselect_b32 s2, s0, 0
-; GFX6-NEXT: s_sub_i32 s2, 0x7fffffff, s2
-; GFX6-NEXT: s_cmp_lt_i32 s0, 0
-; GFX6-NEXT: s_cselect_b32 s3, s0, 0
; GFX6-NEXT: s_sub_i32 s3, 0x80000000, s3
-; GFX6-NEXT: s_cmp_gt_i32 s3, s1
-; GFX6-NEXT: s_cselect_b32 s1, s3, s1
-; GFX6-NEXT: s_cmp_lt_i32 s1, s2
-; GFX6-NEXT: s_cselect_b32 s1, s1, s2
+; GFX6-NEXT: s_sub_i32 s2, 0x7fffffff, s2
+; GFX6-NEXT: s_max_i32 s1, s3, s1
+; GFX6-NEXT: s_min_i32 s1, s1, s2
; GFX6-NEXT: s_add_i32 s0, s0, s1
; GFX6-NEXT: s_ashr_i32 s0, s0, 24
; GFX6-NEXT: ; return to shader part epilog
@@ -202,23 +190,19 @@ define amdgpu_ps i8 @s_saddsat_i8(i8 inreg %lhs, i8 inreg %rhs) {
; GFX8: ; %bb.0:
; GFX8-NEXT: s_bfe_u32 s2, 8, 0x100000
; GFX8-NEXT: s_lshl_b32 s0, s0, s2
-; GFX8-NEXT: s_lshl_b32 s1, s1, s2
; GFX8-NEXT: s_sext_i32_i16 s3, s0
; GFX8-NEXT: s_sext_i32_i16 s4, 0
-; GFX8-NEXT: s_cmp_gt_i32 s3, s4
-; GFX8-NEXT: s_cselect_b32 s5, s3, s4
-; GFX8-NEXT: s_sub_i32 s5, 0x7fff, s5
-; GFX8-NEXT: s_cmp_lt_i32 s3, s4
-; GFX8-NEXT: s_cselect_b32 s3, s3, s4
+; GFX8-NEXT: s_max_i32 s5, s3, s4
+; GFX8-NEXT: s_min_i32 s3, s3, s4
+; GFX8-NEXT: s_lshl_b32 s1, s1, s2
; GFX8-NEXT: s_sub_i32 s3, 0xffff8000, s3
; GFX8-NEXT: s_sext_i32_i16 s3, s3
; GFX8-NEXT: s_sext_i32_i16 s1, s1
-; GFX8-NEXT: s_cmp_gt_i32 s3, s1
-; GFX8-NEXT: s_cselect_b32 s1, s3, s1
+; GFX8-NEXT: s_max_i32 s1, s3, s1
+; GFX8-NEXT: s_sub_i32 s5, 0x7fff, s5
; GFX8-NEXT: s_sext_i32_i16 s1, s1
; GFX8-NEXT: s_sext_i32_i16 s3, s5
-; GFX8-NEXT: s_cmp_lt_i32 s1, s3
-; GFX8-NEXT: s_cselect_b32 s1, s1, s3
+; GFX8-NEXT: s_min_i32 s1, s1, s3
; GFX8-NEXT: s_add_i32 s0, s0, s1
; GFX8-NEXT: s_sext_i32_i16 s0, s0
; GFX8-NEXT: s_ashr_i32 s0, s0, s2
@@ -360,38 +344,30 @@ define amdgpu_ps i16 @s_saddsat_v2i8(i16 inreg %lhs.arg, i16 inreg %rhs.arg) {
; GFX6-LABEL: s_saddsat_v2i8:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_lshr_b32 s2, s0, 8
-; GFX6-NEXT: s_lshr_b32 s3, s1, 8
; GFX6-NEXT: s_lshl_b32 s0, s0, 24
+; GFX6-NEXT: s_brev_b32 s5, 1
+; GFX6-NEXT: s_min_i32 s7, s0, 0
+; GFX6-NEXT: s_lshr_b32 s3, s1, 8
; GFX6-NEXT: s_lshl_b32 s1, s1, 24
-; GFX6-NEXT: s_cmp_gt_i32 s0, 0
+; GFX6-NEXT: s_sub_i32 s7, s5, s7
; GFX6-NEXT: s_brev_b32 s4, -2
-; GFX6-NEXT: s_cselect_b32 s6, s0, 0
+; GFX6-NEXT: s_max_i32 s6, s0, 0
; GFX6-NEXT: s_sub_i32 s6, s4, s6
-; GFX6-NEXT: s_cmp_lt_i32 s0, 0
-; GFX6-NEXT: s_brev_b32 s5, 1
-; GFX6-NEXT: s_cselect_b32 s7, s0, 0
-; GFX6-NEXT: s_sub_i32 s7, s5, s7
-; GFX6-NEXT: s_cmp_gt_i32 s7, s1
-; GFX6-NEXT: s_cselect_b32 s1, s7, s1
-; GFX6-NEXT: s_cmp_lt_i32 s1, s6
-; GFX6-NEXT: s_cselect_b32 s1, s1, s6
+; GFX6-NEXT: s_max_i32 s1, s7, s1
+; GFX6-NEXT: s_min_i32 s1, s1, s6
; GFX6-NEXT: s_add_i32 s0, s0, s1
; GFX6-NEXT: s_lshl_b32 s1, s2, 24
-; GFX6-NEXT: s_ashr_i32 s0, s0, 24
; GFX6-NEXT: s_lshl_b32 s2, s3, 24
-; GFX6-NEXT: s_cmp_gt_i32 s1, 0
-; GFX6-NEXT: s_cselect_b32 s3, s1, 0
+; GFX6-NEXT: s_max_i32 s3, s1, 0
; GFX6-NEXT: s_sub_i32 s3, s4, s3
-; GFX6-NEXT: s_cmp_lt_i32 s1, 0
-; GFX6-NEXT: s_cselect_b32 s4, s1, 0
+; GFX6-NEXT: s_min_i32 s4, s1, 0
; GFX6-NEXT: s_sub_i32 s4, s5, s4
-; GFX6-NEXT: s_cmp_gt_i32 s4, s2
-; GFX6-NEXT: s_cselect_b32 s2, s4, s2
-; GFX6-NEXT: s_cmp_lt_i32 s2, s3
-; GFX6-NEXT: s_cselect_b32 s2, s2, s3
+; GFX6-NEXT: s_max_i32 s2, s4, s2
+; GFX6-NEXT: s_min_i32 s2, s2, s3
; GFX6-NEXT: s_add_i32 s1, s1, s2
-; GFX6-NEXT: s_movk_i32 s2, 0xff
; GFX6-NEXT: s_ashr_i32 s1, s1, 24
+; GFX6-NEXT: s_movk_i32 s2, 0xff
+; GFX6-NEXT: s_ashr_i32 s0, s0, 24
; GFX6-NEXT: s_and_b32 s1, s1, s2
; GFX6-NEXT: s_and_b32 s0, s0, s2
; GFX6-NEXT: s_lshl_b32 s1, s1, 8
@@ -403,50 +379,42 @@ define amdgpu_ps i16 @s_saddsat_v2i8(i16 inreg %lhs.arg, i16 inreg %rhs.arg) {
; GFX8-NEXT: s_bfe_u32 s4, 8, 0x100000
; GFX8-NEXT: s_lshr_b32 s2, s0, 8
; GFX8-NEXT: s_lshl_b32 s0, s0, s4
-; GFX8-NEXT: s_lshr_b32 s3, s1, 8
-; GFX8-NEXT: s_lshl_b32 s1, s1, s4
; GFX8-NEXT: s_sext_i32_i16 s7, s0
; GFX8-NEXT: s_sext_i32_i16 s8, 0
-; GFX8-NEXT: s_cmp_gt_i32 s7, s8
-; GFX8-NEXT: s_movk_i32 s5, 0x7fff
-; GFX8-NEXT: s_cselect_b32 s9, s7, s8
-; GFX8-NEXT: s_sub_i32 s9, s5, s9
-; GFX8-NEXT: s_cmp_lt_i32 s7, s8
+; GFX8-NEXT: s_max_i32 s9, s7, s8
; GFX8-NEXT: s_movk_i32 s6, 0x8000
-; GFX8-NEXT: s_cselect_b32 s7, s7, s8
+; GFX8-NEXT: s_min_i32 s7, s7, s8
; GFX8-NEXT: s_sub_i32 s7, s6, s7
+; GFX8-NEXT: s_lshr_b32 s3, s1, 8
+; GFX8-NEXT: s_lshl_b32 s1, s1, s4
+; GFX8-NEXT: s_movk_i32 s5, 0x7fff
; GFX8-NEXT: s_sext_i32_i16 s7, s7
; GFX8-NEXT: s_sext_i32_i16 s1, s1
-; GFX8-NEXT: s_cmp_gt_i32 s7, s1
-; GFX8-NEXT: s_cselect_b32 s1, s7, s1
+; GFX8-NEXT: s_max_i32 s1, s7, s1
+; GFX8-NEXT: s_sub_i32 s9, s5, s9
; GFX8-NEXT: s_sext_i32_i16 s1, s1
; GFX8-NEXT: s_sext_i32_i16 s7, s9
-; GFX8-NEXT: s_cmp_lt_i32 s1, s7
-; GFX8-NEXT: s_cselect_b32 s1, s1, s7
+; GFX8-NEXT: s_min_i32 s1, s1, s7
; GFX8-NEXT: s_add_i32 s0, s0, s1
-; GFX8-NEXT: s_sext_i32_i16 s0, s0
; GFX8-NEXT: s_lshl_b32 s1, s2, s4
; GFX8-NEXT: s_lshl_b32 s2, s3, s4
-; GFX8-NEXT: s_ashr_i32 s0, s0, s4
; GFX8-NEXT: s_sext_i32_i16 s3, s1
-; GFX8-NEXT: s_cmp_gt_i32 s3, s8
-; GFX8-NEXT: s_cselect_b32 s7, s3, s8
-; GFX8-NEXT: s_sub_i32 s5, s5, s7
-; GFX8-NEXT: s_cmp_lt_i32 s3, s8
-; GFX8-NEXT: s_cselect_b32 s3, s3, s8
+; GFX8-NEXT: s_max_i32 s7, s3, s8
+; GFX8-NEXT: s_min_i32 s3, s3, s8
; GFX8-NEXT: s_sub_i32 s3, s6, s3
; GFX8-NEXT: s_sext_i32_i16 s3, s3
; GFX8-NEXT: s_sext_i32_i16 s2, s2
-; GFX8-NEXT: s_cmp_gt_i32 s3, s2
-; GFX8-NEXT: s_cselect_b32 s2, s3, s2
+; GFX8-NEXT: s_max_i32 s2, s3, s2
+; GFX8-NEXT: s_sub_i32 s5, s5, s7
; GFX8-NEXT: s_sext_i32_i16 s2, s2
; GFX8-NEXT: s_sext_i32_i16 s3, s5
-; GFX8-NEXT: s_cmp_lt_i32 s2, s3
-; GFX8-NEXT: s_cselect_b32 s2, s2, s3
+; GFX8-NEXT: s_min_i32 s2, s2, s3
; GFX8-NEXT: s_add_i32 s1, s1, s2
; GFX8-NEXT: s_sext_i32_i16 s1, s1
-; GFX8-NEXT: s_movk_i32 s2, 0xff
+; GFX8-NEXT: s_sext_i32_i16 s0, s0
; GFX8-NEXT: s_ashr_i32 s1, s1, s4
+; GFX8-NEXT: s_movk_i32 s2, 0xff
+; GFX8-NEXT: s_ashr_i32 s0, s0, s4
; GFX8-NEXT: s_and_b32 s1, s1, s2
; GFX8-NEXT: s_and_b32 s0, s0, s2
; GFX8-NEXT: s_lshl_b32 s1, s1, s4
@@ -714,68 +682,52 @@ define amdgpu_ps i32 @s_saddsat_v4i8(i32 inreg %lhs.arg, i32 inreg %rhs.arg) {
; GFX6-NEXT: s_lshr_b32 s2, s0, 8
; GFX6-NEXT: s_lshr_b32 s3, s0, 16
; GFX6-NEXT: s_lshr_b32 s4, s0, 24
+; GFX6-NEXT: s_lshl_b32 s0, s0, 24
+; GFX6-NEXT: s_brev_b32 s9, 1
+; GFX6-NEXT: s_min_i32 s11, s0, 0
; GFX6-NEXT: s_lshr_b32 s5, s1, 8
; GFX6-NEXT: s_lshr_b32 s6, s1, 16
; GFX6-NEXT: s_lshr_b32 s7, s1, 24
-; GFX6-NEXT: s_lshl_b32 s0, s0, 24
; GFX6-NEXT: s_lshl_b32 s1, s1, 24
-; GFX6-NEXT: s_cmp_gt_i32 s0, 0
+; GFX6-NEXT: s_sub_i32 s11, s9, s11
; GFX6-NEXT: s_brev_b32 s8, -2
-; GFX6-NEXT: s_cselect_b32 s10, s0, 0
+; GFX6-NEXT: s_max_i32 s10, s0, 0
; GFX6-NEXT: s_sub_i32 s10, s8, s10
-; GFX6-NEXT: s_cmp_lt_i32 s0, 0
-; GFX6-NEXT: s_brev_b32 s9, 1
-; GFX6-NEXT: s_cselect_b32 s11, s0, 0
-; GFX6-NEXT: s_sub_i32 s11, s9, s11
-; GFX6-NEXT: s_cmp_gt_i32 s11, s1
-; GFX6-NEXT: s_cselect_b32 s1, s11, s1
-; GFX6-NEXT: s_cmp_lt_i32 s1, s10
-; GFX6-NEXT: s_cselect_b32 s1, s1, s10
+; GFX6-NEXT: s_max_i32 s1, s11, s1
+; GFX6-NEXT: s_min_i32 s1, s1, s10
; GFX6-NEXT: s_add_i32 s0, s0, s1
; GFX6-NEXT: s_lshl_b32 s1, s2, 24
-; GFX6-NEXT: s_ashr_i32 s0, s0, 24
+; GFX6-NEXT: s_min_i32 s10, s1, 0
; GFX6-NEXT: s_lshl_b32 s2, s5, 24
-; GFX6-NEXT: s_cmp_gt_i32 s1, 0
-; GFX6-NEXT: s_cselect_b32 s5, s1, 0
-; GFX6-NEXT: s_sub_i32 s5, s8, s5
-; GFX6-NEXT: s_cmp_lt_i32 s1, 0
-; GFX6-NEXT: s_cselect_b32 s10, s1, 0
+; GFX6-NEXT: s_max_i32 s5, s1, 0
; GFX6-NEXT: s_sub_i32 s10, s9, s10
-; GFX6-NEXT: s_cmp_gt_i32 s10, s2
-; GFX6-NEXT: s_cselect_b32 s2, s10, s2
-; GFX6-NEXT: s_cmp_lt_i32 s2, s5
-; GFX6-NEXT: s_cselect_b32 s2, s2, s5
+; GFX6-NEXT: s_sub_i32 s5, s8, s5
+; GFX6-NEXT: s_max_i32 s2, s10, s2
+; GFX6-NEXT: s_min_i32 s2, s2, s5
; GFX6-NEXT: s_add_i32 s1, s1, s2
; GFX6-NEXT: s_lshl_b32 s2, s3, 24
-; GFX6-NEXT: s_ashr_i32 s1, s1, 24
; GFX6-NEXT: s_lshl_b32 s3, s6, 24
-; GFX6-NEXT: s_cmp_gt_i32 s2, 0
-; GFX6-NEXT: s_cselect_b32 s5, s2, 0
-; GFX6-NEXT: s_sub_i32 s5, s8, s5
-; GFX6-NEXT: s_cmp_lt_i32 s2, 0
-; GFX6-NEXT: s_cselect_b32 s6, s2, 0
+; GFX6-NEXT: s_min_i32 s6, s2, 0
+; GFX6-NEXT: s_max_i32 s5, s2, 0
; GFX6-NEXT: s_sub_i32 s6, s9, s6
-; GFX6-NEXT: s_cmp_gt_i32 s6, s3
-; GFX6-NEXT: s_cselect_b32 s3, s6, s3
-; GFX6-NEXT: s_cmp_lt_i32 s3, s5
-; GFX6-NEXT: s_cselect_b32 s3, s3, s5
+; GFX6-NEXT: s_sub_i32 s5, s8, s5
+; GFX6-NEXT: s_max_i32 s3, s6, s3
+; GFX6-NEXT: s_min_i32 s3, s3, s5
; GFX6-NEXT: s_add_i32 s2, s2, s3
; GFX6-NEXT: s_lshl_b32 s3, s4, 24
-; GFX6-NEXT: s_ashr_i32 s2, s2, 24
+; GFX6-NEXT: s_min_i32 s6, s3, 0
+; GFX6-NEXT: s_max_i32 s5, s3, 0
; GFX6-NEXT: s_lshl_b32 s4, s7, 24
-; GFX6-NEXT: s_cmp_gt_i32 s3, 0
-; GFX6-NEXT: s_cselect_b32 s5, s3, 0
-; GFX6-NEXT: s_sub_i32 s5, s8, s5
-; GFX6-NEXT: s_cmp_lt_i32 s3, 0
-; GFX6-NEXT: s_cselect_b32 s6, s3, 0
; GFX6-NEXT: s_sub_i32 s6, s9, s6
-; GFX6-NEXT: s_cmp_gt_i32 s6, s4
-; GFX6-NEXT: s_cselect_b32 s4, s6, s4
-; GFX6-NEXT: s_cmp_lt_i32 s4, s5
-; GFX6-NEXT: s_cselect_b32 s4, s4, s5
+; GFX6-NEXT: s_sub_i32 s5, s8, s5
+; GFX6-NEXT: s_max_i32 s4, s6, s4
+; GFX6-NEXT: s_min_i32 s4, s4, s5
; GFX6-NEXT: s_add_i32 s3, s3, s4
+; GFX6-NEXT: s_ashr_i32 s1, s1, 24
; GFX6-NEXT: s_movk_i32 s4, 0xff
+; GFX6-NEXT: s_ashr_i32 s0, s0, 24
; GFX6-NEXT: s_and_b32 s1, s1, s4
+; GFX6-NEXT: s_ashr_i32 s2, s2, 24
; GFX6-NEXT: s_and_b32 s0, s0, s4
; GFX6-NEXT: s_lshl_b32 s1, s1, 8
; GFX6-NEXT: s_or_b32 s0, s0, s1
@@ -795,91 +747,75 @@ define amdgpu_ps i32 @s_saddsat_v4i8(i32 inreg %lhs.arg, i32 inreg %rhs.arg) {
; GFX8-NEXT: s_lshr_b32 s3, s0, 16
; GFX8-NEXT: s_lshr_b32 s4, s0, 24
; GFX8-NEXT: s_lshl_b32 s0, s0, s8
+; GFX8-NEXT: s_sext_i32_i16 s11, s0
+; GFX8-NEXT: s_sext_i32_i16 s12, 0
+; GFX8-NEXT: s_max_i32 s13, s11, s12
+; GFX8-NEXT: s_movk_i32 s10, 0x8000
+; GFX8-NEXT: s_min_i32 s11, s11, s12
+; GFX8-NEXT: s_sub_i32 s11, s10, s11
; GFX8-NEXT: s_lshr_b32 s5, s1, 8
; GFX8-NEXT: s_lshr_b32 s6, s1, 16
; GFX8-NEXT: s_lshr_b32 s7, s1, 24
; GFX8-NEXT: s_lshl_b32 s1, s1, s8
-; GFX8-NEXT: s_sext_i32_i16 s11, s0
-; GFX8-NEXT: s_sext_i32_i16 s12, 0
-; GFX8-NEXT: s_cmp_gt_i32 s11, s12
; GFX8-NEXT: s_movk_i32 s9, 0x7fff
-; GFX8-NEXT: s_cselect_b32 s13, s11, s12
-; GFX8-NEXT: s_sub_i32 s13, s9, s13
-; GFX8-NEXT: s_cmp_lt_i32 s11, s12
-; GFX8-NEXT: s_movk_i32 s10, 0x8000
-; GFX8-NEXT: s_cselect_b32 s11, s11, s12
-; GFX8-NEXT: s_sub_i32 s11, s10, s11
; GFX8-NEXT: s_sext_i32_i16 s11, s11
; GFX8-NEXT: s_sext_i32_i16 s1, s1
-; GFX8-NEXT: s_cmp_gt_i32 s11, s1
-; GFX8-NEXT: s_cselect_b32 s1, s11, s1
+; GFX8-NEXT: s_max_i32 s1, s11, s1
+; GFX8-NEXT: s_sub_i32 s13, s9, s13
; GFX8-NEXT: s_sext_i32_i16 s1, s1
; GFX8-NEXT: s_sext_i32_i16 s11, s13
-; GFX8-NEXT: s_cmp_lt_i32 s1, s11
-; GFX8-NEXT: s_cselect_b32 s1, s1, s11
+; GFX8-NEXT: s_min_i32 s1, s1, s11
; GFX8-NEXT: s_add_i32 s0, s0, s1
-; GFX8-NEXT: s_sext_i32_i16 s0, s0
; GFX8-NEXT: s_lshl_b32 s1, s2, s8
; GFX8-NEXT: s_lshl_b32 s2, s5, s8
-; GFX8-NEXT: s_ashr_i32 s0, s0, s8
; GFX8-NEXT: s_sext_i32_i16 s5, s1
-; GFX8-NEXT: s_cmp_gt_i32 s5, s12
-; GFX8-NEXT: s_cselect_b32 s11, s5, s12
-; GFX8-NEXT: s_sub_i32 s11, s9, s11
-; GFX8-NEXT: s_cmp_lt_i32 s5, s12
-; GFX8-NEXT: s_cselect_b32 s5, s5, s12
+; GFX8-NEXT: s_max_i32 s11, s5, s12
+; GFX8-NEXT: s_min_i32 s5, s5, s12
; GFX8-NEXT: s_sub_i32 s5, s10, s5
; GFX8-NEXT: s_sext_i32_i16 s5, s5
; GFX8-NEXT: s_sext_i32_i16 s2, s2
-; GFX8-NEXT: s_cmp_gt_i32 s5, s2
-; GFX8-NEXT: s_cselect_b32 s2, s5, s2
+; GFX8-NEXT: s_max_i32 s2, s5, s2
+; GFX8-NEXT: s_sub_i32 s11, s9, s11
; GFX8-NEXT: s_sext_i32_i16 s2, s2
; GFX8-NEXT: s_sext_i32_i16 s5, s11
-; GFX8-NEXT: s_cmp_lt_i32 s2, s5
-; GFX8-NEXT: s_cselect_b32 s2, s2, s5
+; GFX8-NEXT: s_min_i32 s2, s2, s5
; GFX8-NEXT: s_add_i32 s1, s1, s2
-; GFX8-NEXT: s_sext_i32_i16 s1, s1
; GFX8-NEXT: s_lshl_b32 s2, s3, s8
-; GFX8-NEXT: s_lshl_b32 s3, s6, s8
-; GFX8-NEXT: s_ashr_i32 s1, s1, s8
; GFX8-NEXT: s_sext_i32_i16 s5, s2
-; GFX8-NEXT: s_cmp_gt_i32 s5, s12
-; GFX8-NEXT: s_cselect_b32 s6, s5, s12
-; GFX8-NEXT: s_sub_i32 s6, s9, s6
-; GFX8-NEXT: s_cmp_lt_i32 s5, s12
-; GFX8-NEXT: s_cselect_b32 s5, s5, s12
+; GFX8-NEXT: s_lshl_b32 s3, s6, s8
+; GFX8-NEXT: s_max_i32 s6, s5, s12
+; GFX8-NEXT: s_min_i32 s5, s5, s12
; GFX8-NEXT: s_sub_i32 s5, s10, s5
; GFX8-NEXT: s_sext_i32_i16 s5, s5
; GFX8-NEXT: s_sext_i32_i16 s3, s3
-; GFX8-NEXT: s_cmp_gt_i32 s5, s3
-; GFX8-NEXT: s_cselect_b32 s3, s5, s3
+; GFX8-NEXT: s_max_i32 s3, s5, s3
+; GFX8-NEXT: s_sub_i32 s6, s9, s6
; GFX8-NEXT: s_sext_i32_i16 s3, s3
; GFX8-NEXT: s_sext_i32_i16 s5, s6
-; GFX8-NEXT: s_cmp_lt_i32 s3, s5
-; GFX8-NEXT: s_cselect_b32 s3, s3, s5
+; GFX8-NEXT: s_min_i32 s3, s3, s5
; GFX8-NEXT: s_add_i32 s2, s2, s3
-; GFX8-NEXT: s_sext_i32_i16 s2, s2
; GFX8-NEXT: s_lshl_b32 s3, s4, s8
-; GFX8-NEXT: s_lshl_b32 s4, s7, s8
-; GFX8-NEXT: s_ashr_i32 s2, s2, s8
; GFX8-NEXT: s_sext_i32_i16 s5, s3
-; GFX8-NEXT: s_cmp_gt_i32 s5, s12
-; GFX8-NEXT: s_cselect_b32 s6, s5, s12
-; GFX8-NEXT: s_sub_i32 s6, s9, s6
-; GFX8-NEXT: s_cmp_lt_i32 s5, s12
-; GFX8-NEXT: s_cselect_b32 s5, s5, s12
+; GFX8-NEXT: s_max_i32 s6, s5, s12
+; GFX8-NEXT: s_min_i32 s5, s5, s12
+; GFX8-NEXT: s_lshl_b32 s4, s7, s8
; GFX8-NEXT: s_sub_i32 s5, s10, s5
; GFX8-NEXT: s_sext_i32_i16 s5, s5
; GFX8-NEXT: s_sext_i32_i16 s4, s4
-; GFX8-NEXT: s_cmp_gt_i32 s5, s4
-; GFX8-NEXT: s_cselect_b32 s4, s5, s4
+; GFX8-NEXT: s_max_i32 s4, s5, s4
+; GFX8-NEXT: s_sub_i32 s6, s9, s6
; GFX8-NEXT: s_sext_i32_i16 s4, s4
; GFX8-NEXT: s_sext_i32_i16 s5, s6
-; GFX8-NEXT: s_cmp_lt_i32 s4, s5
-; GFX8-NEXT: s_cselect_b32 s4, s4, s5
+; GFX8-NEXT: s_sext_i32_i16 s1, s1
+; GFX8-NEXT: s_min_i32 s4, s4, s5
; GFX8-NEXT: s_add_i32 s3, s3, s4
+; GFX8-NEXT: s_sext_i32_i16 s0, s0
+; GFX8-NEXT: s_ashr_i32 s1, s1, s8
; GFX8-NEXT: s_movk_i32 s4, 0xff
+; GFX8-NEXT: s_ashr_i32 s0, s0, s8
+; GFX8-NEXT: s_sext_i32_i16 s2, s2
; GFX8-NEXT: s_and_b32 s1, s1, s4
+; GFX8-NEXT: s_ashr_i32 s2, s2, s8
; GFX8-NEXT: s_and_b32 s0, s0, s4
; GFX8-NEXT: s_lshl_b32 s1, s1, 8
; GFX8-NEXT: s_sext_i32_i16 s3, s3
@@ -1046,17 +982,13 @@ define amdgpu_ps i24 @s_saddsat_i24(i24 inreg %lhs, i24 inreg %rhs) {
; GFX6-LABEL: s_saddsat_i24:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_lshl_b32 s0, s0, 8
+; GFX6-NEXT: s_min_i32 s3, s0, 0
+; GFX6-NEXT: s_max_i32 s2, s0, 0
; GFX6-NEXT: s_lshl_b32 s1, s1, 8
-; GFX6-NEXT: s_cmp_gt_i32 s0, 0
-; GFX6-NEXT: s_cselect_b32 s2, s0, 0
-; GFX6-NEXT: s_sub_i32 s2, 0x7fffffff, s2
-; GFX6-NEXT: s_cmp_lt_i32 s0, 0
-; GFX6-NEXT: s_cselect_b32 s3, s0, 0
; GFX6-NEXT: s_sub_i32 s3, 0x80000000, s3
-; GFX6-NEXT: s_cmp_gt_i32 s3, s1
-; GFX6-NEXT: s_cselect_b32 s1, s3, s1
-; GFX6-NEXT: s_cmp_lt_i32 s1, s2
-; GFX6-NEXT: s_cselect_b32 s1, s1, s2
+; GFX6-NEXT: s_sub_i32 s2, 0x7fffffff, s2
+; GFX6-NEXT: s_max_i32 s1, s3, s1
+; GFX6-NEXT: s_min_i32 s1, s1, s2
; GFX6-NEXT: s_add_i32 s0, s0, s1
; GFX6-NEXT: s_ashr_i32 s0, s0, 8
; GFX6-NEXT: ; return to shader part epilog
@@ -1159,31 +1091,23 @@ define amdgpu_ps i32 @s_saddsat_i32(i32 inreg %lhs, i32 inreg %rhs) {
; GCN-NEXT: ; return to shader part epilog
; GFX6-LABEL: s_saddsat_i32:
; GFX6: ; %bb.0:
-; GFX6-NEXT: s_cmp_gt_i32 s0, 0
-; GFX6-NEXT: s_cselect_b32 s2, s0, 0
-; GFX6-NEXT: s_sub_i32 s2, 0x7fffffff, s2
-; GFX6-NEXT: s_cmp_lt_i32 s0, 0
-; GFX6-NEXT: s_cselect_b32 s3, s0, 0
+; GFX6-NEXT: s_min_i32 s3, s0, 0
+; GFX6-NEXT: s_max_i32 s2, s0, 0
; GFX6-NEXT: s_sub_i32 s3, 0x80000000, s3
-; GFX6-NEXT: s_cmp_gt_i32 s3, s1
-; GFX6-NEXT: s_cselect_b32 s1, s3, s1
-; GFX6-NEXT: s_cmp_lt_i32 s1, s2
-; GFX6-NEXT: s_cselect_b32 s1, s1, s2
+; GFX6-NEXT: s_sub_i32 s2, 0x7fffffff, s2
+; GFX6-NEXT: s_max_i32 s1, s3, s1
+; GFX6-NEXT: s_min_i32 s1, s1, s2
; GFX6-NEXT: s_add_i32 s0, s0, s1
; GFX6-NEXT: ; return to shader part epilog
;
; GFX8-LABEL: s_saddsat_i32:
; GFX8: ; %bb.0:
-; GFX8-NEXT: s_cmp_gt_i32 s0, 0
-; GFX8-NEXT: s_cselect_b32 s2, s0, 0
-; GFX8-NEXT: s_sub_i32 s2, 0x7fffffff, s2
-; GFX8-NEXT: s_cmp_lt_i32 s0, 0
-; GFX8-NEXT: s_cselect_b32 s3, s0, 0
+; GFX8-NEXT: s_min_i32 s3, s0, 0
+; GFX8-NEXT: s_max_i32 s2, s0, 0
; GFX8-NEXT: s_sub_i32 s3, 0x80000000, s3
-; GFX8-NEXT: s_cmp_gt_i32 s3, s1
-; GFX8-NEXT: s_cselect_b32 s1, s3, s1
-; GFX8-NEXT: s_cmp_lt_i32 s1, s2
-; GFX8-NEXT: s_cselect_b32 s1, s1, s2
+; GFX8-NEXT: s_sub_i32 s2, 0x7fffffff, s2
+; GFX8-NEXT: s_max_i32 s1, s3, s1
+; GFX8-NEXT: s_min_i32 s1, s1, s2
; GFX8-NEXT: s_add_i32 s0, s0, s1
; GFX8-NEXT: ; return to shader part epilog
;
@@ -1206,12 +1130,10 @@ define amdgpu_ps i32 @s_saddsat_i32(i32 inreg %lhs, i32 inreg %rhs) {
define amdgpu_ps float @saddsat_i32_sv(i32 inreg %lhs, i32 %rhs) {
; GFX6-LABEL: saddsat_i32_sv:
; GFX6: ; %bb.0:
-; GFX6-NEXT: s_cmp_gt_i32 s0, 0
-; GFX6-NEXT: s_cselect_b32 s1, s0, 0
-; GFX6-NEXT: s_sub_i32 s1, 0x7fffffff, s1
-; GFX6-NEXT: s_cmp_lt_i32 s0, 0
-; GFX6-NEXT: s_cselect_b32 s2, s0, 0
+; GFX6-NEXT: s_min_i32 s2, s0, 0
+; GFX6-NEXT: s_max_i32 s1, s0, 0
; GFX6-NEXT: s_sub_i32 s2, 0x80000000, s2
+; GFX6-NEXT: s_sub_i32 s1, 0x7fffffff, s1
; GFX6-NEXT: v_max_i32_e32 v0, s2, v0
; GFX6-NEXT: v_min_i32_e32 v0, s1, v0
; GFX6-NEXT: v_add_i32_e32 v0, vcc, s0, v0
@@ -1219,12 +1141,10 @@ define amdgpu_ps float @saddsat_i32_sv(i32 inreg %lhs, i32 %rhs) {
;
; GFX8-LABEL: saddsat_i32_sv:
; GFX8: ; %bb.0:
-; GFX8-NEXT: s_cmp_gt_i32 s0, 0
-; GFX8-NEXT: s_cselect_b32 s1, s0, 0
-; GFX8-NEXT: s_sub_i32 s1, 0x7fffffff, s1
-; GFX8-NEXT: s_cmp_lt_i32 s0, 0
-; GFX8-NEXT: s_cselect_b32 s2, s0, 0
+; GFX8-NEXT: s_min_i32 s2, s0, 0
+; GFX8-NEXT: s_max_i32 s1, s0, 0
; GFX8-NEXT: s_sub_i32 s2, 0x80000000, s2
+; GFX8-NEXT: s_sub_i32 s1, 0x7fffffff, s1
; GFX8-NEXT: v_max_i32_e32 v0, s2, v0
; GFX8-NEXT: v_min_i32_e32 v0, s1, v0
; GFX8-NEXT: v_add_u32_e32 v0, vcc, s0, v0
@@ -1345,57 +1265,41 @@ define <2 x i32> @v_saddsat_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
define amdgpu_ps <2 x i32> @s_saddsat_v2i32(<2 x i32> inreg %lhs, <2 x i32> inreg %rhs) {
; GFX6-LABEL: s_saddsat_v2i32:
; GFX6: ; %bb.0:
-; GFX6-NEXT: s_cmp_gt_i32 s0, 0
-; GFX6-NEXT: s_brev_b32 s4, -2
-; GFX6-NEXT: s_cselect_b32 s6, s0, 0
-; GFX6-NEXT: s_sub_i32 s6, s4, s6
-; GFX6-NEXT: s_cmp_lt_i32 s0, 0
; GFX6-NEXT: s_brev_b32 s5, 1
-; GFX6-NEXT: s_cselect_b32 s7, s0, 0
+; GFX6-NEXT: s_min_i32 s7, s0, 0
; GFX6-NEXT: s_sub_i32 s7, s5, s7
-; GFX6-NEXT: s_cmp_gt_i32 s7, s2
-; GFX6-NEXT: s_cselect_b32 s2, s7, s2
-; GFX6-NEXT: s_cmp_lt_i32 s2, s6
-; GFX6-NEXT: s_cselect_b32 s2, s2, s6
+; GFX6-NEXT: s_brev_b32 s4, -2
+; GFX6-NEXT: s_max_i32 s6, s0, 0
+; GFX6-NEXT: s_sub_i32 s6, s4, s6
+; GFX6-NEXT: s_max_i32 s2, s7, s2
+; GFX6-NEXT: s_min_i32 s2, s2, s6
; GFX6-NEXT: s_add_i32 s0, s0, s2
-; GFX6-NEXT: s_cmp_gt_i32 s1, 0
-; GFX6-NEXT: s_cselect_b32 s2, s1, 0
+; GFX6-NEXT: s_max_i32 s2, s1, 0
; GFX6-NEXT: s_sub_i32 s2, s4, s2
-; GFX6-NEXT: s_cmp_lt_i32 s1, 0
-; GFX6-NEXT: s_cselect_b32 s4, s1, 0
+; GFX6-NEXT: s_min_i32 s4, s1, 0
; GFX6-NEXT: s_sub_i32 s4, s5, s4
-; GFX6-NEXT: s_cmp_gt_i32 s4, s3
-; GFX6-NEXT: s_cselect_b32 s3, s4, s3
-; GFX6-NEXT: s_cmp_lt_i32 s3, s2
-; GFX6-NEXT: s_cselect_b32 s2, s3, s2
+; GFX6-NEXT: s_max_i32 s3, s4, s3
+; GFX6-NEXT: s_min_i32 s2, s3, s2
; GFX6-NEXT: s_add_i32 s1, s1, s2
; GFX6-NEXT: ; return to shader part epilog
;
; GFX8-LABEL: s_saddsat_v2i32:
; GFX8: ; %bb.0:
-; GFX8-NEXT: s_cmp_gt_i32 s0, 0
-; GFX8-NEXT: s_brev_b32 s4, -2
-; GFX8-NEXT: s_cselect_b32 s6, s0, 0
-; GFX8-NEXT: s_sub_i32 s6, s4, s6
-; GFX8-NEXT: s_cmp_lt_i32 s0, 0
; GFX8-NEXT: s_brev_b32 s5, 1
-; GFX8-NEXT: s_cselect_b32 s7, s0, 0
+; GFX8-NEXT: s_min_i32 s7, s0, 0
; GFX8-NEXT: s_sub_i32 s7, s5, s7
-; GFX8-NEXT: s_cmp_gt_i32 s7, s2
-; GFX8-NEXT: s_cselect_b32 s2, s7, s2
-; GFX8-NEXT: s_cmp_lt_i32 s2, s6
-; GFX8-NEXT: s_cselect_b32 s2, s2, s6
+; GFX8-NEXT: s_brev_b32 s4, -2
+; GFX8-NEXT: s_max_i32 s6, s0, 0
+; GFX8-NEXT: s_sub_i32 s6, s4, s6
+; GFX8-NEXT: s_max_i32 s2, s7, s2
+; GFX8-NEXT: s_min_i32 s2, s2, s6
; GFX8-NEXT: s_add_i32 s0, s0, s2
-; GFX8-NEXT: s_cmp_gt_i32 s1, 0
-; GFX8-NEXT: s_cselect_b32 s2, s1, 0
+; GFX8-NEXT: s_max_i32 s2, s1, 0
; GFX8-NEXT: s_sub_i32 s2, s4, s2
-; GFX8-NEXT: s_cmp_lt_i32 s1, 0
-; GFX8-NEXT: s_cselect_b32 s4, s1, 0
+; GFX8-NEXT: s_min_i32 s4, s1, 0
; GFX8-NEXT: s_sub_i32 s4, s5, s4
-; GFX8-NEXT: s_cmp_gt_i32 s4, s3
-; GFX8-NEXT: s_cselect_b32 s3, s4, s3
-; GFX8-NEXT: s_cmp_lt_i32 s3, s2
-; GFX8-NEXT: s_cselect_b32 s2, s3, s2
+; GFX8-NEXT: s_max_i32 s3, s4, s3
+; GFX8-NEXT: s_min_i32 s2, s3, s2
; GFX8-NEXT: s_add_i32 s1, s1, s2
; GFX8-NEXT: ; return to shader part epilog
;
@@ -1500,79 +1404,55 @@ define <3 x i32> @v_saddsat_v3i32(<3 x i32> %lhs, <3 x i32> %rhs) {
define amdgpu_ps <3 x i32> @s_saddsat_v3i32(<3 x i32> inreg %lhs, <3 x i32> inreg %rhs) {
; GFX6-LABEL: s_saddsat_v3i32:
; GFX6: ; %bb.0:
-; GFX6-NEXT: s_cmp_gt_i32 s0, 0
-; GFX6-NEXT: s_brev_b32 s6, -2
-; GFX6-NEXT: s_cselect_b32 s8, s0, 0
-; GFX6-NEXT: s_sub_i32 s8, s6, s8
-; GFX6-NEXT: s_cmp_lt_i32 s0, 0
; GFX6-NEXT: s_brev_b32 s7, 1
-; GFX6-NEXT: s_cselect_b32 s9, s0, 0
+; GFX6-NEXT: s_min_i32 s9, s0, 0
; GFX6-NEXT: s_sub_i32 s9, s7, s9
-; GFX6-NEXT: s_cmp_gt_i32 s9, s3
-; GFX6-NEXT: s_cselect_b32 s3, s9, s3
-; GFX6-NEXT: s_cmp_lt_i32 s3, s8
-; GFX6-NEXT: s_cselect_b32 s3, s3, s8
+; GFX6-NEXT: s_brev_b32 s6, -2
+; GFX6-NEXT: s_max_i32 s8, s0, 0
+; GFX6-NEXT: s_sub_i32 s8, s6, s8
+; GFX6-NEXT: s_max_i32 s3, s9, s3
+; GFX6-NEXT: s_min_i32 s3, s3, s8
+; GFX6-NEXT: s_min_i32 s8, s1, 0
; GFX6-NEXT: s_add_i32 s0, s0, s3
-; GFX6-NEXT: s_cmp_gt_i32 s1, 0
-; GFX6-NEXT: s_cselect_b32 s3, s1, 0
-; GFX6-NEXT: s_sub_i32 s3, s6, s3
-; GFX6-NEXT: s_cmp_lt_i32 s1, 0
-; GFX6-NEXT: s_cselect_b32 s8, s1, 0
+; GFX6-NEXT: s_max_i32 s3, s1, 0
; GFX6-NEXT: s_sub_i32 s8, s7, s8
-; GFX6-NEXT: s_cmp_gt_i32 s8, s4
-; GFX6-NEXT: s_cselect_b32 s4, s8, s4
-; GFX6-NEXT: s_cmp_lt_i32 s4, s3
-; GFX6-NEXT: s_cselect_b32 s3, s4, s3
-; GFX6-NEXT: s_add_i32 s1, s1, s3
-; GFX6-NEXT: s_cmp_gt_i32 s2, 0
-; GFX6-NEXT: s_cselect_b32 s3, s2, 0
; GFX6-NEXT: s_sub_i32 s3, s6, s3
-; GFX6-NEXT: s_cmp_lt_i32 s2, 0
-; GFX6-NEXT: s_cselect_b32 s4, s2, 0
+; GFX6-NEXT: s_max_i32 s4, s8, s4
+; GFX6-NEXT: s_min_i32 s3, s4, s3
+; GFX6-NEXT: s_min_i32 s4, s2, 0
; GFX6-NEXT: s_sub_i32 s4, s7, s4
-; GFX6-NEXT: s_cmp_gt_i32 s4, s5
-; GFX6-NEXT: s_cselect_b32 s4, s4, s5
-; GFX6-NEXT: s_cmp_lt_i32 s4, s3
-; GFX6-NEXT: s_cselect_b32 s3, s4, s3
+; GFX6-NEXT: s_add_i32 s1, s1, s3
+; GFX6-NEXT: s_max_i32 s3, s2, 0
+; GFX6-NEXT: s_sub_i32 s3, s6, s3
+; GFX6-NEXT: s_max_i32 s4, s4, s5
+; GFX6-NEXT: s_min_i32 s3, s4, s3
; GFX6-NEXT: s_add_i32 s2, s2, s3
; GFX6-NEXT: ; return to shader part epilog
;
; GFX8-LABEL: s_saddsat_v3i32:
; GFX8: ; %bb.0:
-; GFX8-NEXT: s_cmp_gt_i32 s0, 0
-; GFX8-NEXT: s_brev_b32 s6, -2
-; GFX8-NEXT: s_cselect_b32 s8, s0, 0
-; GFX8-NEXT: s_sub_i32 s8, s6, s8
-; GFX8-NEXT: s_cmp_lt_i32 s0, 0
; GFX8-NEXT: s_brev_b32 s7, 1
-; GFX8-NEXT: s_cselect_b32 s9, s0, 0
+; GFX8-NEXT: s_min_i32 s9, s0, 0
; GFX8-NEXT: s_sub_i32 s9, s7, s9
-; GFX8-NEXT: s_cmp_gt_i32 s9, s3
-; GFX8-NEXT: s_cselect_b32 s3, s9, s3
-; GFX8-NEXT: s_cmp_lt_i32 s3, s8
-; GFX8-NEXT: s_cselect_b32 s3, s3, s8
+; GFX8-NEXT: s_brev_b32 s6, -2
+; GFX8-NEXT: s_max_i32 s8, s0, 0
+; GFX8-NEXT: s_sub_i32 s8, s6, s8
+; GFX8-NEXT: s_max_i32 s3, s9, s3
+; GFX8-NEXT: s_min_i32 s3, s3, s8
+; GFX8-NEXT: s_min_i32 s8, s1, 0
; GFX8-NEXT: s_add_i32 s0, s0, s3
-; GFX8-NEXT: s_cmp_gt_i32 s1, 0
-; GFX8-NEXT: s_cselect_b32 s3, s1, 0
-; GFX8-NEXT: s_sub_i32 s3, s6, s3
-; GFX8-NEXT: s_cmp_lt_i32 s1, 0
-; GFX8-NEXT: s_cselect_b32 s8, s1, 0
+; GFX8-NEXT: s_max_i32 s3, s1, 0
; GFX8-NEXT: s_sub_i32 s8, s7, s8
-; GFX8-NEXT: s_cmp_gt_i32 s8, s4
-; GFX8-NEXT: s_cselect_b32 s4, s8, s4
-; GFX8-NEXT: s_cmp_lt_i32 s4, s3
-; GFX8-NEXT: s_cselect_b32 s3, s4, s3
-; GFX8-NEXT: s_add_i32 s1, s1, s3
-; GFX8-NEXT: s_cmp_gt_i32 s2, 0
-; GFX8-NEXT: s_cselect_b32 s3, s2, 0
; GFX8-NEXT: s_sub_i32 s3, s6, s3
-; GFX8-NEXT: s_cmp_lt_i32 s2, 0
-; GFX8-NEXT: s_cselect_b32 s4, s2, 0
+; GFX8-NEXT: s_max_i32 s4, s8, s4
+; GFX8-NEXT: s_min_i32 s3, s4, s3
+; GFX8-NEXT: s_min_i32 s4, s2, 0
; GFX8-NEXT: s_sub_i32 s4, s7, s4
-; GFX8-NEXT: s_cmp_gt_i32 s4, s5
-; GFX8-NEXT: s_cselect_b32 s4, s4, s5
-; GFX8-NEXT: s_cmp_lt_i32 s4, s3
-; GFX8-NEXT: s_cselect_b32 s3, s4, s3
+; GFX8-NEXT: s_add_i32 s1, s1, s3
+; GFX8-NEXT: s_max_i32 s3, s2, 0
+; GFX8-NEXT: s_sub_i32 s3, s6, s3
+; GFX8-NEXT: s_max_i32 s4, s4, s5
+; GFX8-NEXT: s_min_i32 s3, s4, s3
; GFX8-NEXT: s_add_i32 s2, s2, s3
; GFX8-NEXT: ; return to shader part epilog
;
@@ -1698,101 +1578,69 @@ define <4 x i32> @v_saddsat_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
define amdgpu_ps <4 x i32> @s_saddsat_v4i32(<4 x i32> inreg %lhs, <4 x i32> inreg %rhs) {
; GFX6-LABEL: s_saddsat_v4i32:
; GFX6: ; %bb.0:
-; GFX6-NEXT: s_cmp_gt_i32 s0, 0
-; GFX6-NEXT: s_brev_b32 s8, -2
-; GFX6-NEXT: s_cselect_b32 s10, s0, 0
-; GFX6-NEXT: s_sub_i32 s10, s8, s10
-; GFX6-NEXT: s_cmp_lt_i32 s0, 0
; GFX6-NEXT: s_brev_b32 s9, 1
-; GFX6-NEXT: s_cselect_b32 s11, s0, 0
+; GFX6-NEXT: s_min_i32 s11, s0, 0
; GFX6-NEXT: s_sub_i32 s11, s9, s11
-; GFX6-NEXT: s_cmp_gt_i32 s11, s4
-; GFX6-NEXT: s_cselect_b32 s4, s11, s4
-; GFX6-NEXT: s_cmp_lt_i32 s4, s10
-; GFX6-NEXT: s_cselect_b32 s4, s4, s10
+; GFX6-NEXT: s_brev_b32 s8, -2
+; GFX6-NEXT: s_max_i32 s10, s0, 0
+; GFX6-NEXT: s_sub_i32 s10, s8, s10
+; GFX6-NEXT: s_max_i32 s4, s11, s4
+; GFX6-NEXT: s_min_i32 s4, s4, s10
+; GFX6-NEXT: s_min_i32 s10, s1, 0
; GFX6-NEXT: s_add_i32 s0, s0, s4
-; GFX6-NEXT: s_cmp_gt_i32 s1, 0
-; GFX6-NEXT: s_cselect_b32 s4, s1, 0
-; GFX6-NEXT: s_sub_i32 s4, s8, s4
-; GFX6-NEXT: s_cmp_lt_i32 s1, 0
-; GFX6-NEXT: s_cselect_b32 s10, s1, 0
+; GFX6-NEXT: s_max_i32 s4, s1, 0
; GFX6-NEXT: s_sub_i32 s10, s9, s10
-; GFX6-NEXT: s_cmp_gt_i32 s10, s5
-; GFX6-NEXT: s_cselect_b32 s5, s10, s5
-; GFX6-NEXT: s_cmp_lt_i32 s5, s4
-; GFX6-NEXT: s_cselect_b32 s4, s5, s4
+; GFX6-NEXT: s_sub_i32 s4, s8, s4
+; GFX6-NEXT: s_max_i32 s5, s10, s5
+; GFX6-NEXT: s_min_i32 s4, s5, s4
+; GFX6-NEXT: s_min_i32 s5, s2, 0
+; GFX6-NEXT: s_sub_i32 s5, s9, s5
; GFX6-NEXT: s_add_i32 s1, s1, s4
-; GFX6-NEXT: s_cmp_gt_i32 s2, 0
-; GFX6-NEXT: s_cselect_b32 s4, s2, 0
+; GFX6-NEXT: s_max_i32 s4, s2, 0
; GFX6-NEXT: s_sub_i32 s4, s8, s4
-; GFX6-NEXT: s_cmp_lt_i32 s2, 0
-; GFX6-NEXT: s_cselect_b32 s5, s2, 0
+; GFX6-NEXT: s_max_i32 s5, s5, s6
+; GFX6-NEXT: s_min_i32 s4, s5, s4
+; GFX6-NEXT: s_min_i32 s5, s3, 0
; GFX6-NEXT: s_sub_i32 s5, s9, s5
-; GFX6-NEXT: s_cmp_gt_i32 s5, s6
-; GFX6-NEXT: s_cselect_b32 s5, s5, s6
-; GFX6-NEXT: s_cmp_lt_i32 s5, s4
-; GFX6-NEXT: s_cselect_b32 s4, s5, s4
; GFX6-NEXT: s_add_i32 s2, s2, s4
-; GFX6-NEXT: s_cmp_gt_i32 s3, 0
-; GFX6-NEXT: s_cselect_b32 s4, s3, 0
+; GFX6-NEXT: s_max_i32 s4, s3, 0
; GFX6-NEXT: s_sub_i32 s4, s8, s4
-; GFX6-NEXT: s_cmp_lt_i32 s3, 0
-; GFX6-NEXT: s_cselect_b32 s5, s3, 0
-; GFX6-NEXT: s_sub_i32 s5, s9, s5
-; GFX6-NEXT: s_cmp_gt_i32 s5, s7
-; GFX6-NEXT: s_cselect_b32 s5, s5, s7
-; GFX6-NEXT: s_cmp_lt_i32 s5, s4
-; GFX6-NEXT: s_cselect_b32 s4, s5, s4
+; GFX6-NEXT: s_max_i32 s5, s5, s7
+; GFX6-NEXT: s_min_i32 s4, s5, s4
; GFX6-NEXT: s_add_i32 s3, s3, s4
; GFX6-NEXT: ; return to shader part epilog
;
; GFX8-LABEL: s_saddsat_v4i32:
; GFX8: ; %bb.0:
-; GFX8-NEXT: s_cmp_gt_i32 s0, 0
-; GFX8-NEXT: s_brev_b32 s8, -2
-; GFX8-NEXT: s_cselect_b32 s10, s0, 0
-; GFX8-NEXT: s_sub_i32 s10, s8, s10
-; GFX8-NEXT: s_cmp_lt_i32 s0, 0
; GFX8-NEXT: s_brev_b32 s9, 1
-; GFX8-NEXT: s_cselect_b32 s11, s0, 0
+; GFX8-NEXT: s_min_i32 s11, s0, 0
; GFX8-NEXT: s_sub_i32 s11, s9, s11
-; GFX8-NEXT: s_cmp_gt_i32 s11, s4
-; GFX8-NEXT: s_cselect_b32 s4, s11, s4
-; GFX8-NEXT: s_cmp_lt_i32 s4, s10
-; GFX8-NEXT: s_cselect_b32 s4, s4, s10
+; GFX8-NEXT: s_brev_b32 s8, -2
+; GFX8-NEXT: s_max_i32 s10, s0, 0
+; GFX8-NEXT: s_sub_i32 s10, s8, s10
+; GFX8-NEXT: s_max_i32 s4, s11, s4
+; GFX8-NEXT: s_min_i32 s4, s4, s10
+; GFX8-NEXT: s_min_i32 s10, s1, 0
; GFX8-NEXT: s_add_i32 s0, s0, s4
-; GFX8-NEXT: s_cmp_gt_i32 s1, 0
-; GFX8-NEXT: s_cselect_b32 s4, s1, 0
-; GFX8-NEXT: s_sub_i32 s4, s8, s4
-; GFX8-NEXT: s_cmp_lt_i32 s1, 0
-; GFX8-NEXT: s_cselect_b32 s10, s1, 0
+; GFX8-NEXT: s_max_i32 s4, s1, 0
; GFX8-NEXT: s_sub_i32 s10, s9, s10
-; GFX8-NEXT: s_cmp_gt_i32 s10, s5
-; GFX8-NEXT: s_cselect_b32 s5, s10, s5
-; GFX8-NEXT: s_cmp_lt_i32 s5, s4
-; GFX8-NEXT: s_cselect_b32 s4, s5, s4
+; GFX8-NEXT: s_sub_i32 s4, s8, s4
+; GFX8-NEXT: s_max_i32 s5, s10, s5
+; GFX8-NEXT: s_min_i32 s4, s5, s4
+; GFX8-NEXT: s_min_i32 s5, s2, 0
+; GFX8-NEXT: s_sub_i32 s5, s9, s5
; GFX8-NEXT: s_add_i32 s1, s1, s4
-; GFX8-NEXT: s_cmp_gt_i32 s2, 0
-; GFX8-NEXT: s_cselect_b32 s4, s2, 0
+; GFX8-NEXT: s_max_i32 s4, s2, 0
; GFX8-NEXT: s_sub_i32 s4, s8, s4
-; GFX8-NEXT: s_cmp_lt_i32 s2, 0
-; GFX8-NEXT: s_cselect_b32 s5, s2, 0
+; GFX8-NEXT: s_max_i32 s5, s5, s6
+; GFX8-NEXT: s_min_i32 s4, s5, s4
+; GFX8-NEXT: s_min_i32 s5, s3, 0
; GFX8-NEXT: s_sub_i32 s5, s9, s5
-; GFX8-NEXT: s_cmp_gt_i32 s5, s6
-; GFX8-NEXT: s_cselect_b32 s5, s5, s6
-; GFX8-NEXT: s_cmp_lt_i32 s5, s4
-; GFX8-NEXT: s_cselect_b32 s4, s5, s4
; GFX8-NEXT: s_add_i32 s2, s2, s4
-; GFX8-NEXT: s_cmp_gt_i32 s3, 0
-; GFX8-NEXT: s_cselect_b32 s4, s3, 0
+; GFX8-NEXT: s_max_i32 s4, s3, 0
; GFX8-NEXT: s_sub_i32 s4, s8, s4
-; GFX8-NEXT: s_cmp_lt_i32 s3, 0
-; GFX8-NEXT: s_cselect_b32 s5, s3, 0
-; GFX8-NEXT: s_sub_i32 s5, s9, s5
-; GFX8-NEXT: s_cmp_gt_i32 s5, s7
-; GFX8-NEXT: s_cselect_b32 s5, s5, s7
-; GFX8-NEXT: s_cmp_lt_i32 s5, s4
-; GFX8-NEXT: s_cselect_b32 s4, s5, s4
+; GFX8-NEXT: s_max_i32 s5, s5, s7
+; GFX8-NEXT: s_min_i32 s4, s5, s4
; GFX8-NEXT: s_add_i32 s3, s3, s4
; GFX8-NEXT: ; return to shader part epilog
;
@@ -1943,123 +1791,83 @@ define <5 x i32> @v_saddsat_v5i32(<5 x i32> %lhs, <5 x i32> %rhs) {
define amdgpu_ps <5 x i32> @s_saddsat_v5i32(<5 x i32> inreg %lhs, <5 x i32> inreg %rhs) {
; GFX6-LABEL: s_saddsat_v5i32:
; GFX6: ; %bb.0:
-; GFX6-NEXT: s_cmp_gt_i32 s0, 0
-; GFX6-NEXT: s_brev_b32 s10, -2
-; GFX6-NEXT: s_cselect_b32 s12, s0, 0
-; GFX6-NEXT: s_sub_i32 s12, s10, s12
-; GFX6-NEXT: s_cmp_lt_i32 s0, 0
; GFX6-NEXT: s_brev_b32 s11, 1
-; GFX6-NEXT: s_cselect_b32 s13, s0, 0
+; GFX6-NEXT: s_min_i32 s13, s0, 0
; GFX6-NEXT: s_sub_i32 s13, s11, s13
-; GFX6-NEXT: s_cmp_gt_i32 s13, s5
-; GFX6-NEXT: s_cselect_b32 s5, s13, s5
-; GFX6-NEXT: s_cmp_lt_i32 s5, s12
-; GFX6-NEXT: s_cselect_b32 s5, s5, s12
+; GFX6-NEXT: s_brev_b32 s10, -2
+; GFX6-NEXT: s_max_i32 s12, s0, 0
+; GFX6-NEXT: s_sub_i32 s12, s10, s12
+; GFX6-NEXT: s_max_i32 s5, s13, s5
+; GFX6-NEXT: s_min_i32 s5, s5, s12
+; GFX6-NEXT: s_min_i32 s12, s1, 0
; GFX6-NEXT: s_add_i32 s0, s0, s5
-; GFX6-NEXT: s_cmp_gt_i32 s1, 0
-; GFX6-NEXT: s_cselect_b32 s5, s1, 0
-; GFX6-NEXT: s_sub_i32 s5, s10, s5
-; GFX6-NEXT: s_cmp_lt_i32 s1, 0
-; GFX6-NEXT: s_cselect_b32 s12, s1, 0
+; GFX6-NEXT: s_max_i32 s5, s1, 0
; GFX6-NEXT: s_sub_i32 s12, s11, s12
-; GFX6-NEXT: s_cmp_gt_i32 s12, s6
-; GFX6-NEXT: s_cselect_b32 s6, s12, s6
-; GFX6-NEXT: s_cmp_lt_i32 s6, s5
-; GFX6-NEXT: s_cselect_b32 s5, s6, s5
+; GFX6-NEXT: s_sub_i32 s5, s10, s5
+; GFX6-NEXT: s_max_i32 s6, s12, s6
+; GFX6-NEXT: s_min_i32 s5, s6, s5
+; GFX6-NEXT: s_min_i32 s6, s2, 0
+; GFX6-NEXT: s_sub_i32 s6, s11, s6
; GFX6-NEXT: s_add_i32 s1, s1, s5
-; GFX6-NEXT: s_cmp_gt_i32 s2, 0
-; GFX6-NEXT: s_cselect_b32 s5, s2, 0
+; GFX6-NEXT: s_max_i32 s5, s2, 0
; GFX6-NEXT: s_sub_i32 s5, s10, s5
-; GFX6-NEXT: s_cmp_lt_i32 s2, 0
-; GFX6-NEXT: s_cselect_b32 s6, s2, 0
+; GFX6-NEXT: s_max_i32 s6, s6, s7
+; GFX6-NEXT: s_min_i32 s5, s6, s5
+; GFX6-NEXT: s_min_i32 s6, s3, 0
; GFX6-NEXT: s_sub_i32 s6, s11, s6
-; GFX6-NEXT: s_cmp_gt_i32 s6, s7
-; GFX6-NEXT: s_cselect_b32 s6, s6, s7
-; GFX6-NEXT: s_cmp_lt_i32 s6, s5
-; GFX6-NEXT: s_cselect_b32 s5, s6, s5
; GFX6-NEXT: s_add_i32 s2, s2, s5
-; GFX6-NEXT: s_cmp_gt_i32 s3, 0
-; GFX6-NEXT: s_cselect_b32 s5, s3, 0
+; GFX6-NEXT: s_max_i32 s5, s3, 0
; GFX6-NEXT: s_sub_i32 s5, s10, s5
-; GFX6-NEXT: s_cmp_lt_i32 s3, 0
-; GFX6-NEXT: s_cselect_b32 s6, s3, 0
+; GFX6-NEXT: s_max_i32 s6, s6, s8
+; GFX6-NEXT: s_min_i32 s5, s6, s5
+; GFX6-NEXT: s_min_i32 s6, s4, 0
; GFX6-NEXT: s_sub_i32 s6, s11, s6
-; GFX6-NEXT: s_cmp_gt_i32 s6, s8
-; GFX6-NEXT: s_cselect_b32 s6, s6, s8
-; GFX6-NEXT: s_cmp_lt_i32 s6, s5
-; GFX6-NEXT: s_cselect_b32 s5, s6, s5
; GFX6-NEXT: s_add_i32 s3, s3, s5
-; GFX6-NEXT: s_cmp_gt_i32 s4, 0
-; GFX6-NEXT: s_cselect_b32 s5, s4, 0
+; GFX6-NEXT: s_max_i32 s5, s4, 0
; GFX6-NEXT: s_sub_i32 s5, s10, s5
-; GFX6-NEXT: s_cmp_lt_i32 s4, 0
-; GFX6-NEXT: s_cselect_b32 s6, s4, 0
-; GFX6-NEXT: s_sub_i32 s6, s11, s6
-; GFX6-NEXT: s_cmp_gt_i32 s6, s9
-; GFX6-NEXT: s_cselect_b32 s6, s6, s9
-; GFX6-NEXT: s_cmp_lt_i32 s6, s5
-; GFX6-NEXT: s_cselect_b32 s5, s6, s5
+; GFX6-NEXT: s_max_i32 s6, s6, s9
+; GFX6-NEXT: s_min_i32 s5, s6, s5
; GFX6-NEXT: s_add_i32 s4, s4, s5
; GFX6-NEXT: ; return to shader part epilog
;
; GFX8-LABEL: s_saddsat_v5i32:
; GFX8: ; %bb.0:
-; GFX8-NEXT: s_cmp_gt_i32 s0, 0
-; GFX8-NEXT: s_brev_b32 s10, -2
-; GFX8-NEXT: s_cselect_b32 s12, s0, 0
-; GFX8-NEXT: s_sub_i32 s12, s10, s12
-; GFX8-NEXT: s_cmp_lt_i32 s0, 0
; GFX8-NEXT: s_brev_b32 s11, 1
-; GFX8-NEXT: s_cselect_b32 s13, s0, 0
+; GFX8-NEXT: s_min_i32 s13, s0, 0
; GFX8-NEXT: s_sub_i32 s13, s11, s13
-; GFX8-NEXT: s_cmp_gt_i32 s13, s5
-; GFX8-NEXT: s_cselect_b32 s5, s13, s5
-; GFX8-NEXT: s_cmp_lt_i32 s5, s12
-; GFX8-NEXT: s_cselect_b32 s5, s5, s12
+; GFX8-NEXT: s_brev_b32 s10, -2
+; GFX8-NEXT: s_max_i32 s12, s0, 0
+; GFX8-NEXT: s_sub_i32 s12, s10, s12
+; GFX8-NEXT: s_max_i32 s5, s13, s5
+; GFX8-NEXT: s_min_i32 s5, s5, s12
+; GFX8-NEXT: s_min_i32 s12, s1, 0
; GFX8-NEXT: s_add_i32 s0, s0, s5
-; GFX8-NEXT: s_cmp_gt_i32 s1, 0
-; GFX8-NEXT: s_cselect_b32 s5, s1, 0
-; GFX8-NEXT: s_sub_i32 s5, s10, s5
-; GFX8-NEXT: s_cmp_lt_i32 s1, 0
-; GFX8-NEXT: s_cselect_b32 s12, s1, 0
+; GFX8-NEXT: s_max_i32 s5, s1, 0
; GFX8-NEXT: s_sub_i32 s12, s11, s12
-; GFX8-NEXT: s_cmp_gt_i32 s12, s6
-; GFX8-NEXT: s_cselect_b32 s6, s12, s6
-; GFX8-NEXT: s_cmp_lt_i32 s6, s5
-; GFX8-NEXT: s_cselect_b32 s5, s6, s5
+; GFX8-NEXT: s_sub_i32 s5, s10, s5
+; GFX8-NEXT: s_max_i32 s6, s12, s6
+; GFX8-NEXT: s_min_i32 s5, s6, s5
+; GFX8-NEXT: s_min_i32 s6, s2, 0
+; GFX8-NEXT: s_sub_i32 s6, s11, s6
; GFX8-NEXT: s_add_i32 s1, s1, s5
-; GFX8-NEXT: s_cmp_gt_i32 s2, 0
-; GFX8-NEXT: s_cselect_b32 s5, s2, 0
+; GFX8-NEXT: s_max_i32 s5, s2, 0
; GFX8-NEXT: s_sub_i32 s5, s10, s5
-; GFX8-NEXT: s_cmp_lt_i32 s2, 0
-; GFX8-NEXT: s_cselect_b32 s6, s2, 0
+; GFX8-NEXT: s_max_i32 s6, s6, s7
+; GFX8-NEXT: s_min_i32 s5, s6, s5
+; GFX8-NEXT: s_min_i32 s6, s3, 0
; GFX8-NEXT: s_sub_i32 s6, s11, s6
-; GFX8-NEXT: s_cmp_gt_i32 s6, s7
-; GFX8-NEXT: s_cselect_b32 s6, s6, s7
-; GFX8-NEXT: s_cmp_lt_i32 s6, s5
-; GFX8-NEXT: s_cselect_b32 s5, s6, s5
; GFX8-NEXT: s_add_i32 s2, s2, s5
-; GFX8-NEXT: s_cmp_gt_i32 s3, 0
-; GFX8-NEXT: s_cselect_b32 s5, s3, 0
+; GFX8-NEXT: s_max_i32 s5, s3, 0
; GFX8-NEXT: s_sub_i32 s5, s10, s5
-; GFX8-NEXT: s_cmp_lt_i32 s3, 0
-; GFX8-NEXT: s_cselect_b32 s6, s3, 0
+; GFX8-NEXT: s_max_i32 s6, s6, s8
+; GFX8-NEXT: s_min_i32 s5, s6, s5
+; GFX8-NEXT: s_min_i32 s6, s4, 0
; GFX8-NEXT: s_sub_i32 s6, s11, s6
-; GFX8-NEXT: s_cmp_gt_i32 s6, s8
-; GFX8-NEXT: s_cselect_b32 s6, s6, s8
-; GFX8-NEXT: s_cmp_lt_i32 s6, s5
-; GFX8-NEXT: s_cselect_b32 s5, s6, s5
; GFX8-NEXT: s_add_i32 s3, s3, s5
-; GFX8-NEXT: s_cmp_gt_i32 s4, 0
-; GFX8-NEXT: s_cselect_b32 s5, s4, 0
+; GFX8-NEXT: s_max_i32 s5, s4, 0
; GFX8-NEXT: s_sub_i32 s5, s10, s5
-; GFX8-NEXT: s_cmp_lt_i32 s4, 0
-; GFX8-NEXT: s_cselect_b32 s6, s4, 0
-; GFX8-NEXT: s_sub_i32 s6, s11, s6
-; GFX8-NEXT: s_cmp_gt_i32 s6, s9
-; GFX8-NEXT: s_cselect_b32 s6, s6, s9
-; GFX8-NEXT: s_cmp_lt_i32 s6, s5
-; GFX8-NEXT: s_cselect_b32 s5, s6, s5
+; GFX8-NEXT: s_max_i32 s6, s6, s9
+; GFX8-NEXT: s_min_i32 s5, s6, s5
; GFX8-NEXT: s_add_i32 s4, s4, s5
; GFX8-NEXT: ; return to shader part epilog
;
@@ -2391,365 +2199,237 @@ define <16 x i32> @v_saddsat_v16i32(<16 x i32> %lhs, <16 x i32> %rhs) {
define amdgpu_ps <16 x i32> @s_saddsat_v16i32(<16 x i32> inreg %lhs, <16 x i32> inreg %rhs) {
; GFX6-LABEL: s_saddsat_v16i32:
; GFX6: ; %bb.0:
-; GFX6-NEXT: s_cmp_gt_i32 s0, 0
-; GFX6-NEXT: s_brev_b32 s32, -2
-; GFX6-NEXT: s_cselect_b32 s34, s0, 0
-; GFX6-NEXT: s_sub_i32 s34, s32, s34
-; GFX6-NEXT: s_cmp_lt_i32 s0, 0
; GFX6-NEXT: s_brev_b32 s33, 1
-; GFX6-NEXT: s_cselect_b32 s35, s0, 0
+; GFX6-NEXT: s_min_i32 s35, s0, 0
; GFX6-NEXT: s_sub_i32 s35, s33, s35
-; GFX6-NEXT: s_cmp_gt_i32 s35, s16
-; GFX6-NEXT: s_cselect_b32 s16, s35, s16
-; GFX6-NEXT: s_cmp_lt_i32 s16, s34
-; GFX6-NEXT: s_cselect_b32 s16, s16, s34
+; GFX6-NEXT: s_brev_b32 s32, -2
+; GFX6-NEXT: s_max_i32 s34, s0, 0
+; GFX6-NEXT: s_sub_i32 s34, s32, s34
+; GFX6-NEXT: s_max_i32 s16, s35, s16
+; GFX6-NEXT: s_min_i32 s16, s16, s34
+; GFX6-NEXT: s_min_i32 s34, s1, 0
; GFX6-NEXT: s_add_i32 s0, s0, s16
-; GFX6-NEXT: s_cmp_gt_i32 s1, 0
-; GFX6-NEXT: s_cselect_b32 s16, s1, 0
-; GFX6-NEXT: s_sub_i32 s16, s32, s16
-; GFX6-NEXT: s_cmp_lt_i32 s1, 0
-; GFX6-NEXT: s_cselect_b32 s34, s1, 0
+; GFX6-NEXT: s_max_i32 s16, s1, 0
; GFX6-NEXT: s_sub_i32 s34, s33, s34
-; GFX6-NEXT: s_cmp_gt_i32 s34, s17
-; GFX6-NEXT: s_cselect_b32 s17, s34, s17
-; GFX6-NEXT: s_cmp_lt_i32 s17, s16
-; GFX6-NEXT: s_cselect_b32 s16, s17, s16
+; GFX6-NEXT: s_sub_i32 s16, s32, s16
+; GFX6-NEXT: s_max_i32 s17, s34, s17
+; GFX6-NEXT: s_min_i32 s16, s17, s16
+; GFX6-NEXT: s_min_i32 s17, s2, 0
+; GFX6-NEXT: s_sub_i32 s17, s33, s17
; GFX6-NEXT: s_add_i32 s1, s1, s16
-; GFX6-NEXT: s_cmp_gt_i32 s2, 0
-; GFX6-NEXT: s_cselect_b32 s16, s2, 0
+; GFX6-NEXT: s_max_i32 s16, s2, 0
; GFX6-NEXT: s_sub_i32 s16, s32, s16
-; GFX6-NEXT: s_cmp_lt_i32 s2, 0
-; GFX6-NEXT: s_cselect_b32 s17, s2, 0
+; GFX6-NEXT: s_max_i32 s17, s17, s18
+; GFX6-NEXT: s_min_i32 s16, s17, s16
+; GFX6-NEXT: s_min_i32 s17, s3, 0
; GFX6-NEXT: s_sub_i32 s17, s33, s17
-; GFX6-NEXT: s_cmp_gt_i32 s17, s18
-; GFX6-NEXT: s_cselect_b32 s17, s17, s18
-; GFX6-NEXT: s_cmp_lt_i32 s17, s16
-; GFX6-NEXT: s_cselect_b32 s16, s17, s16
; GFX6-NEXT: s_add_i32 s2, s2, s16
-; GFX6-NEXT: s_cmp_gt_i32 s3, 0
-; GFX6-NEXT: s_cselect_b32 s16, s3, 0
+; GFX6-NEXT: s_max_i32 s16, s3, 0
; GFX6-NEXT: s_sub_i32 s16, s32, s16
-; GFX6-NEXT: s_cmp_lt_i32 s3, 0
-; GFX6-NEXT: s_cselect_b32 s17, s3, 0
+; GFX6-NEXT: s_max_i32 s17, s17, s19
+; GFX6-NEXT: s_min_i32 s16, s17, s16
+; GFX6-NEXT: s_min_i32 s17, s4, 0
; GFX6-NEXT: s_sub_i32 s17, s33, s17
-; GFX6-NEXT: s_cmp_gt_i32 s17, s19
-; GFX6-NEXT: s_cselect_b32 s17, s17, s19
-; GFX6-NEXT: s_cmp_lt_i32 s17, s16
-; GFX6-NEXT: s_cselect_b32 s16, s17, s16
; GFX6-NEXT: s_add_i32 s3, s3, s16
-; GFX6-NEXT: s_cmp_gt_i32 s4, 0
-; GFX6-NEXT: s_cselect_b32 s16, s4, 0
+; GFX6-NEXT: s_max_i32 s16, s4, 0
; GFX6-NEXT: s_sub_i32 s16, s32, s16
-; GFX6-NEXT: s_cmp_lt_i32 s4, 0
-; GFX6-NEXT: s_cselect_b32 s17, s4, 0
+; GFX6-NEXT: s_max_i32 s17, s17, s20
+; GFX6-NEXT: s_min_i32 s16, s17, s16
+; GFX6-NEXT: s_min_i32 s17, s5, 0
; GFX6-NEXT: s_sub_i32 s17, s33, s17
-; GFX6-NEXT: s_cmp_gt_i32 s17, s20
-; GFX6-NEXT: s_cselect_b32 s17, s17, s20
-; GFX6-NEXT: s_cmp_lt_i32 s17, s16
-; GFX6-NEXT: s_cselect_b32 s16, s17, s16
; GFX6-NEXT: s_add_i32 s4, s4, s16
-; GFX6-NEXT: s_cmp_gt_i32 s5, 0
-; GFX6-NEXT: s_cselect_b32 s16, s5, 0
+; GFX6-NEXT: s_max_i32 s16, s5, 0
; GFX6-NEXT: s_sub_i32 s16, s32, s16
-; GFX6-NEXT: s_cmp_lt_i32 s5, 0
-; GFX6-NEXT: s_cselect_b32 s17, s5, 0
+; GFX6-NEXT: s_max_i32 s17, s17, s21
+; GFX6-NEXT: s_min_i32 s16, s17, s16
+; GFX6-NEXT: s_min_i32 s17, s6, 0
; GFX6-NEXT: s_sub_i32 s17, s33, s17
-; GFX6-NEXT: s_cmp_gt_i32 s17, s21
-; GFX6-NEXT: s_cselect_b32 s17, s17, s21
-; GFX6-NEXT: s_cmp_lt_i32 s17, s16
-; GFX6-NEXT: s_cselect_b32 s16, s17, s16
; GFX6-NEXT: s_add_i32 s5, s5, s16
-; GFX6-NEXT: s_cmp_gt_i32 s6, 0
-; GFX6-NEXT: s_cselect_b32 s16, s6, 0
+; GFX6-NEXT: s_max_i32 s16, s6, 0
; GFX6-NEXT: s_sub_i32 s16, s32, s16
-; GFX6-NEXT: s_cmp_lt_i32 s6, 0
-; GFX6-NEXT: s_cselect_b32 s17, s6, 0
+; GFX6-NEXT: s_max_i32 s17, s17, s22
+; GFX6-NEXT: s_min_i32 s16, s17, s16
+; GFX6-NEXT: s_min_i32 s17, s7, 0
; GFX6-NEXT: s_sub_i32 s17, s33, s17
-; GFX6-NEXT: s_cmp_gt_i32 s17, s22
-; GFX6-NEXT: s_cselect_b32 s17, s17, s22
-; GFX6-NEXT: s_cmp_lt_i32 s17, s16
-; GFX6-NEXT: s_cselect_b32 s16, s17, s16
; GFX6-NEXT: s_add_i32 s6, s6, s16
-; GFX6-NEXT: s_cmp_gt_i32 s7, 0
-; GFX6-NEXT: s_cselect_b32 s16, s7, 0
+; GFX6-NEXT: s_max_i32 s16, s7, 0
; GFX6-NEXT: s_sub_i32 s16, s32, s16
-; GFX6-NEXT: s_cmp_lt_i32 s7, 0
-; GFX6-NEXT: s_cselect_b32 s17, s7, 0
+; GFX6-NEXT: s_max_i32 s17, s17, s23
+; GFX6-NEXT: s_min_i32 s16, s17, s16
+; GFX6-NEXT: s_min_i32 s17, s8, 0
; GFX6-NEXT: s_sub_i32 s17, s33, s17
-; GFX6-NEXT: s_cmp_gt_i32 s17, s23
-; GFX6-NEXT: s_cselect_b32 s17, s17, s23
-; GFX6-NEXT: s_cmp_lt_i32 s17, s16
-; GFX6-NEXT: s_cselect_b32 s16, s17, s16
; GFX6-NEXT: s_add_i32 s7, s7, s16
-; GFX6-NEXT: s_cmp_gt_i32 s8, 0
-; GFX6-NEXT: s_cselect_b32 s16, s8, 0
+; GFX6-NEXT: s_max_i32 s16, s8, 0
; GFX6-NEXT: s_sub_i32 s16, s32, s16
-; GFX6-NEXT: s_cmp_lt_i32 s8, 0
-; GFX6-NEXT: s_cselect_b32 s17, s8, 0
+; GFX6-NEXT: s_max_i32 s17, s17, s24
+; GFX6-NEXT: s_min_i32 s16, s17, s16
+; GFX6-NEXT: s_min_i32 s17, s9, 0
; GFX6-NEXT: s_sub_i32 s17, s33, s17
-; GFX6-NEXT: s_cmp_gt_i32 s17, s24
-; GFX6-NEXT: s_cselect_b32 s17, s17, s24
-; GFX6-NEXT: s_cmp_lt_i32 s17, s16
-; GFX6-NEXT: s_cselect_b32 s16, s17, s16
; GFX6-NEXT: s_add_i32 s8, s8, s16
-; GFX6-NEXT: s_cmp_gt_i32 s9, 0
-; GFX6-NEXT: s_cselect_b32 s16, s9, 0
+; GFX6-NEXT: s_max_i32 s16, s9, 0
; GFX6-NEXT: s_sub_i32 s16, s32, s16
-; GFX6-NEXT: s_cmp_lt_i32 s9, 0
-; GFX6-NEXT: s_cselect_b32 s17, s9, 0
+; GFX6-NEXT: s_max_i32 s17, s17, s25
+; GFX6-NEXT: s_min_i32 s16, s17, s16
+; GFX6-NEXT: s_min_i32 s17, s10, 0
; GFX6-NEXT: s_sub_i32 s17, s33, s17
-; GFX6-NEXT: s_cmp_gt_i32 s17, s25
-; GFX6-NEXT: s_cselect_b32 s17, s17, s25
-; GFX6-NEXT: s_cmp_lt_i32 s17, s16
-; GFX6-NEXT: s_cselect_b32 s16, s17, s16
; GFX6-NEXT: s_add_i32 s9, s9, s16
-; GFX6-NEXT: s_cmp_gt_i32 s10, 0
-; GFX6-NEXT: s_cselect_b32 s16, s10, 0
+; GFX6-NEXT: s_max_i32 s16, s10, 0
; GFX6-NEXT: s_sub_i32 s16, s32, s16
-; GFX6-NEXT: s_cmp_lt_i32 s10, 0
-; GFX6-NEXT: s_cselect_b32 s17, s10, 0
+; GFX6-NEXT: s_max_i32 s17, s17, s26
+; GFX6-NEXT: s_min_i32 s16, s17, s16
+; GFX6-NEXT: s_min_i32 s17, s11, 0
; GFX6-NEXT: s_sub_i32 s17, s33, s17
-; GFX6-NEXT: s_cmp_gt_i32 s17, s26
-; GFX6-NEXT: s_cselect_b32 s17, s17, s26
-; GFX6-NEXT: s_cmp_lt_i32 s17, s16
-; GFX6-NEXT: s_cselect_b32 s16, s17, s16
; GFX6-NEXT: s_add_i32 s10, s10, s16
-; GFX6-NEXT: s_cmp_gt_i32 s11, 0
-; GFX6-NEXT: s_cselect_b32 s16, s11, 0
+; GFX6-NEXT: s_max_i32 s16, s11, 0
; GFX6-NEXT: s_sub_i32 s16, s32, s16
-; GFX6-NEXT: s_cmp_lt_i32 s11, 0
-; GFX6-NEXT: s_cselect_b32 s17, s11, 0
+; GFX6-NEXT: s_max_i32 s17, s17, s27
+; GFX6-NEXT: s_min_i32 s16, s17, s16
+; GFX6-NEXT: s_min_i32 s17, s12, 0
; GFX6-NEXT: s_sub_i32 s17, s33, s17
-; GFX6-NEXT: s_cmp_gt_i32 s17, s27
-; GFX6-NEXT: s_cselect_b32 s17, s17, s27
-; GFX6-NEXT: s_cmp_lt_i32 s17, s16
-; GFX6-NEXT: s_cselect_b32 s16, s17, s16
; GFX6-NEXT: s_add_i32 s11, s11, s16
-; GFX6-NEXT: s_cmp_gt_i32 s12, 0
-; GFX6-NEXT: s_cselect_b32 s16, s12, 0
+; GFX6-NEXT: s_max_i32 s16, s12, 0
; GFX6-NEXT: s_sub_i32 s16, s32, s16
-; GFX6-NEXT: s_cmp_lt_i32 s12, 0
-; GFX6-NEXT: s_cselect_b32 s17, s12, 0
+; GFX6-NEXT: s_max_i32 s17, s17, s28
+; GFX6-NEXT: s_min_i32 s16, s17, s16
+; GFX6-NEXT: s_min_i32 s17, s13, 0
; GFX6-NEXT: s_sub_i32 s17, s33, s17
-; GFX6-NEXT: s_cmp_gt_i32 s17, s28
-; GFX6-NEXT: s_cselect_b32 s17, s17, s28
-; GFX6-NEXT: s_cmp_lt_i32 s17, s16
-; GFX6-NEXT: s_cselect_b32 s16, s17, s16
; GFX6-NEXT: s_add_i32 s12, s12, s16
-; GFX6-NEXT: s_cmp_gt_i32 s13, 0
-; GFX6-NEXT: s_cselect_b32 s16, s13, 0
+; GFX6-NEXT: s_max_i32 s16, s13, 0
; GFX6-NEXT: s_sub_i32 s16, s32, s16
-; GFX6-NEXT: s_cmp_lt_i32 s13, 0
-; GFX6-NEXT: s_cselect_b32 s17, s13, 0
+; GFX6-NEXT: s_max_i32 s17, s17, s29
+; GFX6-NEXT: s_min_i32 s16, s17, s16
+; GFX6-NEXT: s_min_i32 s17, s14, 0
; GFX6-NEXT: s_sub_i32 s17, s33, s17
-; GFX6-NEXT: s_cmp_gt_i32 s17, s29
-; GFX6-NEXT: s_cselect_b32 s17, s17, s29
-; GFX6-NEXT: s_cmp_lt_i32 s17, s16
-; GFX6-NEXT: s_cselect_b32 s16, s17, s16
; GFX6-NEXT: s_add_i32 s13, s13, s16
-; GFX6-NEXT: s_cmp_gt_i32 s14, 0
-; GFX6-NEXT: s_cselect_b32 s16, s14, 0
+; GFX6-NEXT: s_max_i32 s16, s14, 0
; GFX6-NEXT: s_sub_i32 s16, s32, s16
-; GFX6-NEXT: s_cmp_lt_i32 s14, 0
-; GFX6-NEXT: s_cselect_b32 s17, s14, 0
+; GFX6-NEXT: s_max_i32 s17, s17, s30
+; GFX6-NEXT: s_min_i32 s16, s17, s16
+; GFX6-NEXT: s_min_i32 s17, s15, 0
; GFX6-NEXT: s_sub_i32 s17, s33, s17
-; GFX6-NEXT: s_cmp_gt_i32 s17, s30
-; GFX6-NEXT: s_cselect_b32 s17, s17, s30
-; GFX6-NEXT: s_cmp_lt_i32 s17, s16
-; GFX6-NEXT: s_cselect_b32 s16, s17, s16
; GFX6-NEXT: s_add_i32 s14, s14, s16
-; GFX6-NEXT: s_cmp_gt_i32 s15, 0
-; GFX6-NEXT: s_cselect_b32 s16, s15, 0
+; GFX6-NEXT: s_max_i32 s16, s15, 0
; GFX6-NEXT: s_sub_i32 s16, s32, s16
-; GFX6-NEXT: s_cmp_lt_i32 s15, 0
-; GFX6-NEXT: s_cselect_b32 s17, s15, 0
-; GFX6-NEXT: s_sub_i32 s17, s33, s17
-; GFX6-NEXT: s_cmp_gt_i32 s17, s31
-; GFX6-NEXT: s_cselect_b32 s17, s17, s31
-; GFX6-NEXT: s_cmp_lt_i32 s17, s16
-; GFX6-NEXT: s_cselect_b32 s16, s17, s16
+; GFX6-NEXT: s_max_i32 s17, s17, s31
+; GFX6-NEXT: s_min_i32 s16, s17, s16
; GFX6-NEXT: s_add_i32 s15, s15, s16
; GFX6-NEXT: ; return to shader part epilog
;
; GFX8-LABEL: s_saddsat_v16i32:
; GFX8: ; %bb.0:
-; GFX8-NEXT: s_cmp_gt_i32 s0, 0
-; GFX8-NEXT: s_brev_b32 s32, -2
-; GFX8-NEXT: s_cselect_b32 s34, s0, 0
-; GFX8-NEXT: s_sub_i32 s34, s32, s34
-; GFX8-NEXT: s_cmp_lt_i32 s0, 0
; GFX8-NEXT: s_brev_b32 s33, 1
-; GFX8-NEXT: s_cselect_b32 s35, s0, 0
+; GFX8-NEXT: s_min_i32 s35, s0, 0
; GFX8-NEXT: s_sub_i32 s35, s33, s35
-; GFX8-NEXT: s_cmp_gt_i32 s35, s16
-; GFX8-NEXT: s_cselect_b32 s16, s35, s16
-; GFX8-NEXT: s_cmp_lt_i32 s16, s34
-; GFX8-NEXT: s_cselect_b32 s16, s16, s34
+; GFX8-NEXT: s_brev_b32 s32, -2
+; GFX8-NEXT: s_max_i32 s34, s0, 0
+; GFX8-NEXT: s_sub_i32 s34, s32, s34
+; GFX8-NEXT: s_max_i32 s16, s35, s16
+; GFX8-NEXT: s_min_i32 s16, s16, s34
+; GFX8-NEXT: s_min_i32 s34, s1, 0
; GFX8-NEXT: s_add_i32 s0, s0, s16
-; GFX8-NEXT: s_cmp_gt_i32 s1, 0
-; GFX8-NEXT: s_cselect_b32 s16, s1, 0
-; GFX8-NEXT: s_sub_i32 s16, s32, s16
-; GFX8-NEXT: s_cmp_lt_i32 s1, 0
-; GFX8-NEXT: s_cselect_b32 s34, s1, 0
+; GFX8-NEXT: s_max_i32 s16, s1, 0
; GFX8-NEXT: s_sub_i32 s34, s33, s34
-; GFX8-NEXT: s_cmp_gt_i32 s34, s17
-; GFX8-NEXT: s_cselect_b32 s17, s34, s17
-; GFX8-NEXT: s_cmp_lt_i32 s17, s16
-; GFX8-NEXT: s_cselect_b32 s16, s17, s16
+; GFX8-NEXT: s_sub_i32 s16, s32, s16
+; GFX8-NEXT: s_max_i32 s17, s34, s17
+; GFX8-NEXT: s_min_i32 s16, s17, s16
+; GFX8-NEXT: s_min_i32 s17, s2, 0
+; GFX8-NEXT: s_sub_i32 s17, s33, s17
; GFX8-NEXT: s_add_i32 s1, s1, s16
-; GFX8-NEXT: s_cmp_gt_i32 s2, 0
-; GFX8-NEXT: s_cselect_b32 s16, s2, 0
+; GFX8-NEXT: s_max_i32 s16, s2, 0
; GFX8-NEXT: s_sub_i32 s16, s32, s16
-; GFX8-NEXT: s_cmp_lt_i32 s2, 0
-; GFX8-NEXT: s_cselect_b32 s17, s2, 0
+; GFX8-NEXT: s_max_i32 s17, s17, s18
+; GFX8-NEXT: s_min_i32 s16, s17, s16
+; GFX8-NEXT: s_min_i32 s17, s3, 0
; GFX8-NEXT: s_sub_i32 s17, s33, s17
-; GFX8-NEXT: s_cmp_gt_i32 s17, s18
-; GFX8-NEXT: s_cselect_b32 s17, s17, s18
-; GFX8-NEXT: s_cmp_lt_i32 s17, s16
-; GFX8-NEXT: s_cselect_b32 s16, s17, s16
; GFX8-NEXT: s_add_i32 s2, s2, s16
-; GFX8-NEXT: s_cmp_gt_i32 s3, 0
-; GFX8-NEXT: s_cselect_b32 s16, s3, 0
+; GFX8-NEXT: s_max_i32 s16, s3, 0
; GFX8-NEXT: s_sub_i32 s16, s32, s16
-; GFX8-NEXT: s_cmp_lt_i32 s3, 0
-; GFX8-NEXT: s_cselect_b32 s17, s3, 0
+; GFX8-NEXT: s_max_i32 s17, s17, s19
+; GFX8-NEXT: s_min_i32 s16, s17, s16
+; GFX8-NEXT: s_min_i32 s17, s4, 0
; GFX8-NEXT: s_sub_i32 s17, s33, s17
-; GFX8-NEXT: s_cmp_gt_i32 s17, s19
-; GFX8-NEXT: s_cselect_b32 s17, s17, s19
-; GFX8-NEXT: s_cmp_lt_i32 s17, s16
-; GFX8-NEXT: s_cselect_b32 s16, s17, s16
; GFX8-NEXT: s_add_i32 s3, s3, s16
-; GFX8-NEXT: s_cmp_gt_i32 s4, 0
-; GFX8-NEXT: s_cselect_b32 s16, s4, 0
+; GFX8-NEXT: s_max_i32 s16, s4, 0
; GFX8-NEXT: s_sub_i32 s16, s32, s16
-; GFX8-NEXT: s_cmp_lt_i32 s4, 0
-; GFX8-NEXT: s_cselect_b32 s17, s4, 0
+; GFX8-NEXT: s_max_i32 s17, s17, s20
+; GFX8-NEXT: s_min_i32 s16, s17, s16
+; GFX8-NEXT: s_min_i32 s17, s5, 0
; GFX8-NEXT: s_sub_i32 s17, s33, s17
-; GFX8-NEXT: s_cmp_gt_i32 s17, s20
-; GFX8-NEXT: s_cselect_b32 s17, s17, s20
-; GFX8-NEXT: s_cmp_lt_i32 s17, s16
-; GFX8-NEXT: s_cselect_b32 s16, s17, s16
; GFX8-NEXT: s_add_i32 s4, s4, s16
-; GFX8-NEXT: s_cmp_gt_i32 s5, 0
-; GFX8-NEXT: s_cselect_b32 s16, s5, 0
+; GFX8-NEXT: s_max_i32 s16, s5, 0
; GFX8-NEXT: s_sub_i32 s16, s32, s16
-; GFX8-NEXT: s_cmp_lt_i32 s5, 0
-; GFX8-NEXT: s_cselect_b32 s17, s5, 0
+; GFX8-NEXT: s_max_i32 s17, s17, s21
+; GFX8-NEXT: s_min_i32 s16, s17, s16
+; GFX8-NEXT: s_min_i32 s17, s6, 0
; GFX8-NEXT: s_sub_i32 s17, s33, s17
-; GFX8-NEXT: s_cmp_gt_i32 s17, s21
-; GFX8-NEXT: s_cselect_b32 s17, s17, s21
-; GFX8-NEXT: s_cmp_lt_i32 s17, s16
-; GFX8-NEXT: s_cselect_b32 s16, s17, s16
; GFX8-NEXT: s_add_i32 s5, s5, s16
-; GFX8-NEXT: s_cmp_gt_i32 s6, 0
-; GFX8-NEXT: s_cselect_b32 s16, s6, 0
+; GFX8-NEXT: s_max_i32 s16, s6, 0
; GFX8-NEXT: s_sub_i32 s16, s32, s16
-; GFX8-NEXT: s_cmp_lt_i32 s6, 0
-; GFX8-NEXT: s_cselect_b32 s17, s6, 0
+; GFX8-NEXT: s_max_i32 s17, s17, s22
+; GFX8-NEXT: s_min_i32 s16, s17, s16
+; GFX8-NEXT: s_min_i32 s17, s7, 0
; GFX8-NEXT: s_sub_i32 s17, s33, s17
-; GFX8-NEXT: s_cmp_gt_i32 s17, s22
-; GFX8-NEXT: s_cselect_b32 s17, s17, s22
-; GFX8-NEXT: s_cmp_lt_i32 s17, s16
-; GFX8-NEXT: s_cselect_b32 s16, s17, s16
; GFX8-NEXT: s_add_i32 s6, s6, s16
-; GFX8-NEXT: s_cmp_gt_i32 s7, 0
-; GFX8-NEXT: s_cselect_b32 s16, s7, 0
+; GFX8-NEXT: s_max_i32 s16, s7, 0
; GFX8-NEXT: s_sub_i32 s16, s32, s16
-; GFX8-NEXT: s_cmp_lt_i32 s7, 0
-; GFX8-NEXT: s_cselect_b32 s17, s7, 0
+; GFX8-NEXT: s_max_i32 s17, s17, s23
+; GFX8-NEXT: s_min_i32 s16, s17, s16
+; GFX8-NEXT: s_min_i32 s17, s8, 0
; GFX8-NEXT: s_sub_i32 s17, s33, s17
-; GFX8-NEXT: s_cmp_gt_i32 s17, s23
-; GFX8-NEXT: s_cselect_b32 s17, s17, s23
-; GFX8-NEXT: s_cmp_lt_i32 s17, s16
-; GFX8-NEXT: s_cselect_b32 s16, s17, s16
; GFX8-NEXT: s_add_i32 s7, s7, s16
-; GFX8-NEXT: s_cmp_gt_i32 s8, 0
-; GFX8-NEXT: s_cselect_b32 s16, s8, 0
+; GFX8-NEXT: s_max_i32 s16, s8, 0
; GFX8-NEXT: s_sub_i32 s16, s32, s16
-; GFX8-NEXT: s_cmp_lt_i32 s8, 0
-; GFX8-NEXT: s_cselect_b32 s17, s8, 0
+; GFX8-NEXT: s_max_i32 s17, s17, s24
+; GFX8-NEXT: s_min_i32 s16, s17, s16
+; GFX8-NEXT: s_min_i32 s17, s9, 0
; GFX8-NEXT: s_sub_i32 s17, s33, s17
-; GFX8-NEXT: s_cmp_gt_i32 s17, s24
-; GFX8-NEXT: s_cselect_b32 s17, s17, s24
-; GFX8-NEXT: s_cmp_lt_i32 s17, s16
-; GFX8-NEXT: s_cselect_b32 s16, s17, s16
; GFX8-NEXT: s_add_i32 s8, s8, s16
-; GFX8-NEXT: s_cmp_gt_i32 s9, 0
-; GFX8-NEXT: s_cselect_b32 s16, s9, 0
+; GFX8-NEXT: s_max_i32 s16, s9, 0
; GFX8-NEXT: s_sub_i32 s16, s32, s16
-; GFX8-NEXT: s_cmp_lt_i32 s9, 0
-; GFX8-NEXT: s_cselect_b32 s17, s9, 0
+; GFX8-NEXT: s_max_i32 s17, s17, s25
+; GFX8-NEXT: s_min_i32 s16, s17, s16
+; GFX8-NEXT: s_min_i32 s17, s10, 0
; GFX8-NEXT: s_sub_i32 s17, s33, s17
-; GFX8-NEXT: s_cmp_gt_i32 s17, s25
-; GFX8-NEXT: s_cselect_b32 s17, s17, s25
-; GFX8-NEXT: s_cmp_lt_i32 s17, s16
-; GFX8-NEXT: s_cselect_b32 s16, s17, s16
; GFX8-NEXT: s_add_i32 s9, s9, s16
-; GFX8-NEXT: s_cmp_gt_i32 s10, 0
-; GFX8-NEXT: s_cselect_b32 s16, s10, 0
+; GFX8-NEXT: s_max_i32 s16, s10, 0
; GFX8-NEXT: s_sub_i32 s16, s32, s16
-; GFX8-NEXT: s_cmp_lt_i32 s10, 0
-; GFX8-NEXT: s_cselect_b32 s17, s10, 0
+; GFX8-NEXT: s_max_i32 s17, s17, s26
+; GFX8-NEXT: s_min_i32 s16, s17, s16
+; GFX8-NEXT: s_min_i32 s17, s11, 0
; GFX8-NEXT: s_sub_i32 s17, s33, s17
-; GFX8-NEXT: s_cmp_gt_i32 s17, s26
-; GFX8-NEXT: s_cselect_b32 s17, s17, s26
-; GFX8-NEXT: s_cmp_lt_i32 s17, s16
-; GFX8-NEXT: s_cselect_b32 s16, s17, s16
; GFX8-NEXT: s_add_i32 s10, s10, s16
-; GFX8-NEXT: s_cmp_gt_i32 s11, 0
-; GFX8-NEXT: s_cselect_b32 s16, s11, 0
+; GFX8-NEXT: s_max_i32 s16, s11, 0
; GFX8-NEXT: s_sub_i32 s16, s32, s16
-; GFX8-NEXT: s_cmp_lt_i32 s11, 0
-; GFX8-NEXT: s_cselect_b32 s17, s11, 0
+; GFX8-NEXT: s_max_i32 s17, s17, s27
+; GFX8-NEXT: s_min_i32 s16, s17, s16
+; GFX8-NEXT: s_min_i32 s17, s12, 0
; GFX8-NEXT: s_sub_i32 s17, s33, s17
-; GFX8-NEXT: s_cmp_gt_i32 s17, s27
-; GFX8-NEXT: s_cselect_b32 s17, s17, s27
-; GFX8-NEXT: s_cmp_lt_i32 s17, s16
-; GFX8-NEXT: s_cselect_b32 s16, s17, s16
; GFX8-NEXT: s_add_i32 s11, s11, s16
-; GFX8-NEXT: s_cmp_gt_i32 s12, 0
-; GFX8-NEXT: s_cselect_b32 s16, s12, 0
+; GFX8-NEXT: s_max_i32 s16, s12, 0
; GFX8-NEXT: s_sub_i32 s16, s32, s16
-; GFX8-NEXT: s_cmp_lt_i32 s12, 0
-; GFX8-NEXT: s_cselect_b32 s17, s12, 0
+; GFX8-NEXT: s_max_i32 s17, s17, s28
+; GFX8-NEXT: s_min_i32 s16, s17, s16
+; GFX8-NEXT: s_min_i32 s17, s13, 0
; GFX8-NEXT: s_sub_i32 s17, s33, s17
-; GFX8-NEXT: s_cmp_gt_i32 s17, s28
-; GFX8-NEXT: s_cselect_b32 s17, s17, s28
-; GFX8-NEXT: s_cmp_lt_i32 s17, s16
-; GFX8-NEXT: s_cselect_b32 s16, s17, s16
; GFX8-NEXT: s_add_i32 s12, s12, s16
-; GFX8-NEXT: s_cmp_gt_i32 s13, 0
-; GFX8-NEXT: s_cselect_b32 s16, s13, 0
+; GFX8-NEXT: s_max_i32 s16, s13, 0
; GFX8-NEXT: s_sub_i32 s16, s32, s16
-; GFX8-NEXT: s_cmp_lt_i32 s13, 0
-; GFX8-NEXT: s_cselect_b32 s17, s13, 0
+; GFX8-NEXT: s_max_i32 s17, s17, s29
+; GFX8-NEXT: s_min_i32 s16, s17, s16
+; GFX8-NEXT: s_min_i32 s17, s14, 0
; GFX8-NEXT: s_sub_i32 s17, s33, s17
-; GFX8-NEXT: s_cmp_gt_i32 s17, s29
-; GFX8-NEXT: s_cselect_b32 s17, s17, s29
-; GFX8-NEXT: s_cmp_lt_i32 s17, s16
-; GFX8-NEXT: s_cselect_b32 s16, s17, s16
; GFX8-NEXT: s_add_i32 s13, s13, s16
-; GFX8-NEXT: s_cmp_gt_i32 s14, 0
-; GFX8-NEXT: s_cselect_b32 s16, s14, 0
+; GFX8-NEXT: s_max_i32 s16, s14, 0
; GFX8-NEXT: s_sub_i32 s16, s32, s16
-; GFX8-NEXT: s_cmp_lt_i32 s14, 0
-; GFX8-NEXT: s_cselect_b32 s17, s14, 0
+; GFX8-NEXT: s_max_i32 s17, s17, s30
+; GFX8-NEXT: s_min_i32 s16, s17, s16
+; GFX8-NEXT: s_min_i32 s17, s15, 0
; GFX8-NEXT: s_sub_i32 s17, s33, s17
-; GFX8-NEXT: s_cmp_gt_i32 s17, s30
-; GFX8-NEXT: s_cselect_b32 s17, s17, s30
-; GFX8-NEXT: s_cmp_lt_i32 s17, s16
-; GFX8-NEXT: s_cselect_b32 s16, s17, s16
; GFX8-NEXT: s_add_i32 s14, s14, s16
-; GFX8-NEXT: s_cmp_gt_i32 s15, 0
-; GFX8-NEXT: s_cselect_b32 s16, s15, 0
+; GFX8-NEXT: s_max_i32 s16, s15, 0
; GFX8-NEXT: s_sub_i32 s16, s32, s16
-; GFX8-NEXT: s_cmp_lt_i32 s15, 0
-; GFX8-NEXT: s_cselect_b32 s17, s15, 0
-; GFX8-NEXT: s_sub_i32 s17, s33, s17
-; GFX8-NEXT: s_cmp_gt_i32 s17, s31
-; GFX8-NEXT: s_cselect_b32 s17, s17, s31
-; GFX8-NEXT: s_cmp_lt_i32 s17, s16
-; GFX8-NEXT: s_cselect_b32 s16, s17, s16
+; GFX8-NEXT: s_max_i32 s17, s17, s31
+; GFX8-NEXT: s_min_i32 s16, s17, s16
; GFX8-NEXT: s_add_i32 s15, s15, s16
; GFX8-NEXT: ; return to shader part epilog
;
@@ -2892,17 +2572,13 @@ define amdgpu_ps i16 @s_saddsat_i16(i16 inreg %lhs, i16 inreg %rhs) {
; GFX6-LABEL: s_saddsat_i16:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_lshl_b32 s0, s0, 16
+; GFX6-NEXT: s_min_i32 s3, s0, 0
+; GFX6-NEXT: s_max_i32 s2, s0, 0
; GFX6-NEXT: s_lshl_b32 s1, s1, 16
-; GFX6-NEXT: s_cmp_gt_i32 s0, 0
-; GFX6-NEXT: s_cselect_b32 s2, s0, 0
-; GFX6-NEXT: s_sub_i32 s2, 0x7fffffff, s2
-; GFX6-NEXT: s_cmp_lt_i32 s0, 0
-; GFX6-NEXT: s_cselect_b32 s3, s0, 0
; GFX6-NEXT: s_sub_i32 s3, 0x80000000, s3
-; GFX6-NEXT: s_cmp_gt_i32 s3, s1
-; GFX6-NEXT: s_cselect_b32 s1, s3, s1
-; GFX6-NEXT: s_cmp_lt_i32 s1, s2
-; GFX6-NEXT: s_cselect_b32 s1, s1, s2
+; GFX6-NEXT: s_sub_i32 s2, 0x7fffffff, s2
+; GFX6-NEXT: s_max_i32 s1, s3, s1
+; GFX6-NEXT: s_min_i32 s1, s1, s2
; GFX6-NEXT: s_add_i32 s0, s0, s1
; GFX6-NEXT: s_ashr_i32 s0, s0, 16
; GFX6-NEXT: ; return to shader part epilog
@@ -2911,20 +2587,16 @@ define amdgpu_ps i16 @s_saddsat_i16(i16 inreg %lhs, i16 inreg %rhs) {
; GFX8: ; %bb.0:
; GFX8-NEXT: s_sext_i32_i16 s2, s0
; GFX8-NEXT: s_sext_i32_i16 s3, 0
-; GFX8-NEXT: s_cmp_gt_i32 s2, s3
-; GFX8-NEXT: s_cselect_b32 s4, s2, s3
-; GFX8-NEXT: s_sub_i32 s4, 0x7fff, s4
-; GFX8-NEXT: s_cmp_lt_i32 s2, s3
-; GFX8-NEXT: s_cselect_b32 s2, s2, s3
+; GFX8-NEXT: s_max_i32 s4, s2, s3
+; GFX8-NEXT: s_min_i32 s2, s2, s3
; GFX8-NEXT: s_sub_i32 s2, 0xffff8000, s2
; GFX8-NEXT: s_sext_i32_i16 s2, s2
; GFX8-NEXT: s_sext_i32_i16 s1, s1
-; GFX8-NEXT: s_cmp_gt_i32 s2, s1
-; GFX8-NEXT: s_cselect_b32 s1, s2, s1
+; GFX8-NEXT: s_max_i32 s1, s2, s1
+; GFX8-NEXT: s_sub_i32 s4, 0x7fff, s4
; GFX8-NEXT: s_sext_i32_i16 s1, s1
; GFX8-NEXT: s_sext_i32_i16 s2, s4
-; GFX8-NEXT: s_cmp_lt_i32 s1, s2
-; GFX8-NEXT: s_cselect_b32 s1, s1, s2
+; GFX8-NEXT: s_min_i32 s1, s1, s2
; GFX8-NEXT: s_add_i32 s0, s0, s1
; GFX8-NEXT: ; return to shader part epilog
;
@@ -2948,13 +2620,11 @@ define amdgpu_ps half @saddsat_i16_sv(i16 inreg %lhs, i16 %rhs) {
; GFX6-LABEL: saddsat_i16_sv:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_lshl_b32 s0, s0, 16
-; GFX6-NEXT: s_cmp_gt_i32 s0, 0
-; GFX6-NEXT: s_cselect_b32 s1, s0, 0
-; GFX6-NEXT: s_sub_i32 s1, 0x7fffffff, s1
-; GFX6-NEXT: s_cmp_lt_i32 s0, 0
-; GFX6-NEXT: s_cselect_b32 s2, s0, 0
+; GFX6-NEXT: s_min_i32 s2, s0, 0
+; GFX6-NEXT: s_max_i32 s1, s0, 0
; GFX6-NEXT: v_lshlrev_b32_e32 v0, 16, v0
; GFX6-NEXT: s_sub_i32 s2, 0x80000000, s2
+; GFX6-NEXT: s_sub_i32 s1, 0x7fffffff, s1
; GFX6-NEXT: v_max_i32_e32 v0, s2, v0
; GFX6-NEXT: v_min_i32_e32 v0, s1, v0
; GFX6-NEXT: v_add_i32_e32 v0, vcc, s0, v0
@@ -2965,12 +2635,10 @@ define amdgpu_ps half @saddsat_i16_sv(i16 inreg %lhs, i16 %rhs) {
; GFX8: ; %bb.0:
; GFX8-NEXT: s_sext_i32_i16 s1, s0
; GFX8-NEXT: s_sext_i32_i16 s2, 0
-; GFX8-NEXT: s_cmp_gt_i32 s1, s2
-; GFX8-NEXT: s_cselect_b32 s3, s1, s2
-; GFX8-NEXT: s_sub_i32 s3, 0x7fff, s3
-; GFX8-NEXT: s_cmp_lt_i32 s1, s2
-; GFX8-NEXT: s_cselect_b32 s1, s1, s2
+; GFX8-NEXT: s_max_i32 s3, s1, s2
+; GFX8-NEXT: s_min_i32 s1, s1, s2
; GFX8-NEXT: s_sub_i32 s1, 0xffff8000, s1
+; GFX8-NEXT: s_sub_i32 s3, 0x7fff, s3
; GFX8-NEXT: v_max_i16_e32 v0, s1, v0
; GFX8-NEXT: v_min_i16_e32 v0, s3, v0
; GFX8-NEXT: v_add_u16_e32 v0, s0, v0
@@ -3101,36 +2769,28 @@ define amdgpu_ps i32 @s_saddsat_v2i16(<2 x i16> inreg %lhs, <2 x i16> inreg %rhs
; GFX6-LABEL: s_saddsat_v2i16:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_lshl_b32 s0, s0, 16
+; GFX6-NEXT: s_brev_b32 s5, 1
+; GFX6-NEXT: s_min_i32 s7, s0, 0
; GFX6-NEXT: s_lshl_b32 s2, s2, 16
-; GFX6-NEXT: s_cmp_gt_i32 s0, 0
+; GFX6-NEXT: s_sub_i32 s7, s5, s7
; GFX6-NEXT: s_brev_b32 s4, -2
-; GFX6-NEXT: s_cselect_b32 s6, s0, 0
+; GFX6-NEXT: s_max_i32 s6, s0, 0
; GFX6-NEXT: s_sub_i32 s6, s4, s6
-; GFX6-NEXT: s_cmp_lt_i32 s0, 0
-; GFX6-NEXT: s_brev_b32 s5, 1
-; GFX6-NEXT: s_cselect_b32 s7, s0, 0
-; GFX6-NEXT: s_sub_i32 s7, s5, s7
-; GFX6-NEXT: s_cmp_gt_i32 s7, s2
-; GFX6-NEXT: s_cselect_b32 s2, s7, s2
-; GFX6-NEXT: s_cmp_lt_i32 s2, s6
-; GFX6-NEXT: s_cselect_b32 s2, s2, s6
-; GFX6-NEXT: s_add_i32 s0, s0, s2
-; GFX6-NEXT: s_ashr_i32 s0, s0, 16
+; GFX6-NEXT: s_max_i32 s2, s7, s2
+; GFX6-NEXT: s_min_i32 s2, s2, s6
; GFX6-NEXT: s_lshl_b32 s1, s1, 16
+; GFX6-NEXT: s_add_i32 s0, s0, s2
; GFX6-NEXT: s_lshl_b32 s2, s3, 16
-; GFX6-NEXT: s_cmp_gt_i32 s1, 0
-; GFX6-NEXT: s_cselect_b32 s3, s1, 0
+; GFX6-NEXT: s_max_i32 s3, s1, 0
; GFX6-NEXT: s_sub_i32 s3, s4, s3
-; GFX6-NEXT: s_cmp_lt_i32 s1, 0
-; GFX6-NEXT: s_cselect_b32 s4, s1, 0
+; GFX6-NEXT: s_min_i32 s4, s1, 0
; GFX6-NEXT: s_sub_i32 s4, s5, s4
-; GFX6-NEXT: s_cmp_gt_i32 s4, s2
-; GFX6-NEXT: s_cselect_b32 s2, s4, s2
-; GFX6-NEXT: s_cmp_lt_i32 s2, s3
-; GFX6-NEXT: s_cselect_b32 s2, s2, s3
+; GFX6-NEXT: s_max_i32 s2, s4, s2
+; GFX6-NEXT: s_min_i32 s2, s2, s3
; GFX6-NEXT: s_add_i32 s1, s1, s2
-; GFX6-NEXT: s_mov_b32 s2, 0xffff
; GFX6-NEXT: s_ashr_i32 s1, s1, 16
+; GFX6-NEXT: s_mov_b32 s2, 0xffff
+; GFX6-NEXT: s_ashr_i32 s0, s0, 16
; GFX6-NEXT: s_and_b32 s1, s1, s2
; GFX6-NEXT: s_and_b32 s0, s0, s2
; GFX6-NEXT: s_lshl_b32 s1, s1, 16
@@ -3139,42 +2799,34 @@ define amdgpu_ps i32 @s_saddsat_v2i16(<2 x i16> inreg %lhs, <2 x i16> inreg %rhs
;
; GFX8-LABEL: s_saddsat_v2i16:
; GFX8: ; %bb.0:
-; GFX8-NEXT: s_lshr_b32 s3, s1, 16
-; GFX8-NEXT: s_lshr_b32 s2, s0, 16
; GFX8-NEXT: s_sext_i32_i16 s6, s0
; GFX8-NEXT: s_sext_i32_i16 s7, 0
-; GFX8-NEXT: s_cmp_gt_i32 s6, s7
-; GFX8-NEXT: s_movk_i32 s4, 0x7fff
-; GFX8-NEXT: s_cselect_b32 s8, s6, s7
-; GFX8-NEXT: s_sub_i32 s8, s4, s8
-; GFX8-NEXT: s_cmp_lt_i32 s6, s7
+; GFX8-NEXT: s_max_i32 s8, s6, s7
; GFX8-NEXT: s_movk_i32 s5, 0x8000
-; GFX8-NEXT: s_cselect_b32 s6, s6, s7
+; GFX8-NEXT: s_min_i32 s6, s6, s7
; GFX8-NEXT: s_sub_i32 s6, s5, s6
+; GFX8-NEXT: s_lshr_b32 s3, s1, 16
+; GFX8-NEXT: s_movk_i32 s4, 0x7fff
; GFX8-NEXT: s_sext_i32_i16 s6, s6
; GFX8-NEXT: s_sext_i32_i16 s1, s1
-; GFX8-NEXT: s_cmp_gt_i32 s6, s1
-; GFX8-NEXT: s_cselect_b32 s1, s6, s1
+; GFX8-NEXT: s_max_i32 s1, s6, s1
+; GFX8-NEXT: s_sub_i32 s8, s4, s8
; GFX8-NEXT: s_sext_i32_i16 s1, s1
; GFX8-NEXT: s_sext_i32_i16 s6, s8
-; GFX8-NEXT: s_cmp_lt_i32 s1, s6
-; GFX8-NEXT: s_cselect_b32 s1, s1, s6
+; GFX8-NEXT: s_lshr_b32 s2, s0, 16
+; GFX8-NEXT: s_min_i32 s1, s1, s6
; GFX8-NEXT: s_add_i32 s0, s0, s1
; GFX8-NEXT: s_sext_i32_i16 s1, s2
-; GFX8-NEXT: s_cmp_gt_i32 s1, s7
-; GFX8-NEXT: s_cselect_b32 s6, s1, s7
-; GFX8-NEXT: s_sub_i32 s4, s4, s6
-; GFX8-NEXT: s_cmp_lt_i32 s1, s7
-; GFX8-NEXT: s_cselect_b32 s1, s1, s7
+; GFX8-NEXT: s_max_i32 s6, s1, s7
+; GFX8-NEXT: s_min_i32 s1, s1, s7
; GFX8-NEXT: s_sub_i32 s1, s5, s1
; GFX8-NEXT: s_sext_i32_i16 s1, s1
; GFX8-NEXT: s_sext_i32_i16 s3, s3
-; GFX8-NEXT: s_cmp_gt_i32 s1, s3
-; GFX8-NEXT: s_cselect_b32 s1, s1, s3
+; GFX8-NEXT: s_max_i32 s1, s1, s3
+; GFX8-NEXT: s_sub_i32 s4, s4, s6
; GFX8-NEXT: s_sext_i32_i16 s1, s1
; GFX8-NEXT: s_sext_i32_i16 s3, s4
-; GFX8-NEXT: s_cmp_lt_i32 s1, s3
-; GFX8-NEXT: s_cselect_b32 s1, s1, s3
+; GFX8-NEXT: s_min_i32 s1, s1, s3
; GFX8-NEXT: s_add_i32 s2, s2, s1
; GFX8-NEXT: s_bfe_u32 s1, s2, 0x100000
; GFX8-NEXT: s_bfe_u32 s0, s0, 0x100000
@@ -3203,24 +2855,20 @@ define amdgpu_ps float @saddsat_v2i16_sv(<2 x i16> inreg %lhs, <2 x i16> %rhs) {
; GFX6-LABEL: saddsat_v2i16_sv:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_lshl_b32 s0, s0, 16
-; GFX6-NEXT: s_cmp_gt_i32 s0, 0
-; GFX6-NEXT: s_brev_b32 s2, -2
-; GFX6-NEXT: s_cselect_b32 s4, s0, 0
-; GFX6-NEXT: s_sub_i32 s4, s2, s4
-; GFX6-NEXT: s_cmp_lt_i32 s0, 0
; GFX6-NEXT: s_brev_b32 s3, 1
-; GFX6-NEXT: s_cselect_b32 s5, s0, 0
+; GFX6-NEXT: s_min_i32 s5, s0, 0
; GFX6-NEXT: v_lshlrev_b32_e32 v0, 16, v0
; GFX6-NEXT: s_sub_i32 s5, s3, s5
+; GFX6-NEXT: s_brev_b32 s2, -2
+; GFX6-NEXT: s_max_i32 s4, s0, 0
+; GFX6-NEXT: s_sub_i32 s4, s2, s4
; GFX6-NEXT: v_max_i32_e32 v0, s5, v0
; GFX6-NEXT: v_min_i32_e32 v0, s4, v0
; GFX6-NEXT: v_add_i32_e32 v0, vcc, s0, v0
; GFX6-NEXT: s_lshl_b32 s0, s1, 16
-; GFX6-NEXT: s_cmp_gt_i32 s0, 0
-; GFX6-NEXT: s_cselect_b32 s1, s0, 0
+; GFX6-NEXT: s_max_i32 s1, s0, 0
; GFX6-NEXT: s_sub_i32 s1, s2, s1
-; GFX6-NEXT: s_cmp_lt_i32 s0, 0
-; GFX6-NEXT: s_cselect_b32 s2, s0, 0
+; GFX6-NEXT: s_min_i32 s2, s0, 0
; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1
; GFX6-NEXT: s_sub_i32 s2, s3, s2
; GFX6-NEXT: v_max_i32_e32 v1, s2, v1
@@ -3237,28 +2885,24 @@ define amdgpu_ps float @saddsat_v2i16_sv(<2 x i16> inreg %lhs, <2 x i16> %rhs) {
;
; GFX8-LABEL: saddsat_v2i16_sv:
; GFX8: ; %bb.0:
-; GFX8-NEXT: s_lshr_b32 s1, s0, 16
; GFX8-NEXT: s_sext_i32_i16 s4, s0
; GFX8-NEXT: s_sext_i32_i16 s5, 0
-; GFX8-NEXT: s_cmp_gt_i32 s4, s5
-; GFX8-NEXT: s_movk_i32 s2, 0x7fff
-; GFX8-NEXT: s_cselect_b32 s6, s4, s5
-; GFX8-NEXT: s_sub_i32 s6, s2, s6
-; GFX8-NEXT: s_cmp_lt_i32 s4, s5
+; GFX8-NEXT: s_max_i32 s6, s4, s5
; GFX8-NEXT: s_movk_i32 s3, 0x8000
-; GFX8-NEXT: s_cselect_b32 s4, s4, s5
+; GFX8-NEXT: s_min_i32 s4, s4, s5
+; GFX8-NEXT: s_movk_i32 s2, 0x7fff
; GFX8-NEXT: s_sub_i32 s4, s3, s4
+; GFX8-NEXT: s_lshr_b32 s1, s0, 16
; GFX8-NEXT: v_max_i16_e32 v1, s4, v0
+; GFX8-NEXT: s_sub_i32 s6, s2, s6
; GFX8-NEXT: s_sext_i32_i16 s4, s1
-; GFX8-NEXT: s_cmp_gt_i32 s4, s5
; GFX8-NEXT: v_min_i16_e32 v1, s6, v1
-; GFX8-NEXT: s_cselect_b32 s6, s4, s5
-; GFX8-NEXT: s_sub_i32 s2, s2, s6
-; GFX8-NEXT: s_cmp_lt_i32 s4, s5
-; GFX8-NEXT: s_cselect_b32 s4, s4, s5
+; GFX8-NEXT: s_max_i32 s6, s4, s5
+; GFX8-NEXT: s_min_i32 s4, s4, s5
; GFX8-NEXT: s_sub_i32 s3, s3, s4
; GFX8-NEXT: v_mov_b32_e32 v2, s3
; GFX8-NEXT: v_max_i16_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX8-NEXT: s_sub_i32 s2, s2, s6
; GFX8-NEXT: v_min_i16_e32 v0, s2, v0
; GFX8-NEXT: v_mov_b32_e32 v2, s1
; GFX8-NEXT: v_add_u16_e32 v1, s0, v1
@@ -3481,64 +3125,48 @@ define amdgpu_ps <2 x i32> @s_saddsat_v4i16(<4 x i16> inreg %lhs, <4 x i16> inre
; GFX6-LABEL: s_saddsat_v4i16:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_lshl_b32 s0, s0, 16
+; GFX6-NEXT: s_brev_b32 s9, 1
+; GFX6-NEXT: s_min_i32 s11, s0, 0
; GFX6-NEXT: s_lshl_b32 s4, s4, 16
-; GFX6-NEXT: s_cmp_gt_i32 s0, 0
+; GFX6-NEXT: s_sub_i32 s11, s9, s11
; GFX6-NEXT: s_brev_b32 s8, -2
-; GFX6-NEXT: s_cselect_b32 s10, s0, 0
+; GFX6-NEXT: s_max_i32 s10, s0, 0
; GFX6-NEXT: s_sub_i32 s10, s8, s10
-; GFX6-NEXT: s_cmp_lt_i32 s0, 0
-; GFX6-NEXT: s_brev_b32 s9, 1
-; GFX6-NEXT: s_cselect_b32 s11, s0, 0
-; GFX6-NEXT: s_sub_i32 s11, s9, s11
-; GFX6-NEXT: s_cmp_gt_i32 s11, s4
-; GFX6-NEXT: s_cselect_b32 s4, s11, s4
-; GFX6-NEXT: s_cmp_lt_i32 s4, s10
-; GFX6-NEXT: s_cselect_b32 s4, s4, s10
-; GFX6-NEXT: s_add_i32 s0, s0, s4
-; GFX6-NEXT: s_ashr_i32 s0, s0, 16
+; GFX6-NEXT: s_max_i32 s4, s11, s4
; GFX6-NEXT: s_lshl_b32 s1, s1, 16
+; GFX6-NEXT: s_min_i32 s4, s4, s10
+; GFX6-NEXT: s_min_i32 s10, s1, 0
+; GFX6-NEXT: s_add_i32 s0, s0, s4
; GFX6-NEXT: s_lshl_b32 s4, s5, 16
-; GFX6-NEXT: s_cmp_gt_i32 s1, 0
-; GFX6-NEXT: s_cselect_b32 s5, s1, 0
-; GFX6-NEXT: s_sub_i32 s5, s8, s5
-; GFX6-NEXT: s_cmp_lt_i32 s1, 0
-; GFX6-NEXT: s_cselect_b32 s10, s1, 0
+; GFX6-NEXT: s_max_i32 s5, s1, 0
; GFX6-NEXT: s_sub_i32 s10, s9, s10
-; GFX6-NEXT: s_cmp_gt_i32 s10, s4
-; GFX6-NEXT: s_cselect_b32 s4, s10, s4
-; GFX6-NEXT: s_cmp_lt_i32 s4, s5
-; GFX6-NEXT: s_cselect_b32 s4, s4, s5
-; GFX6-NEXT: s_add_i32 s1, s1, s4
-; GFX6-NEXT: s_ashr_i32 s1, s1, 16
+; GFX6-NEXT: s_sub_i32 s5, s8, s5
+; GFX6-NEXT: s_max_i32 s4, s10, s4
+; GFX6-NEXT: s_min_i32 s4, s4, s5
; GFX6-NEXT: s_lshl_b32 s2, s2, 16
+; GFX6-NEXT: s_add_i32 s1, s1, s4
; GFX6-NEXT: s_lshl_b32 s4, s6, 16
-; GFX6-NEXT: s_cmp_gt_i32 s2, 0
-; GFX6-NEXT: s_cselect_b32 s5, s2, 0
-; GFX6-NEXT: s_sub_i32 s5, s8, s5
-; GFX6-NEXT: s_cmp_lt_i32 s2, 0
-; GFX6-NEXT: s_cselect_b32 s6, s2, 0
+; GFX6-NEXT: s_min_i32 s6, s2, 0
+; GFX6-NEXT: s_max_i32 s5, s2, 0
; GFX6-NEXT: s_sub_i32 s6, s9, s6
-; GFX6-NEXT: s_cmp_gt_i32 s6, s4
-; GFX6-NEXT: s_cselect_b32 s4, s6, s4
-; GFX6-NEXT: s_cmp_lt_i32 s4, s5
-; GFX6-NEXT: s_cselect_b32 s4, s4, s5
-; GFX6-NEXT: s_add_i32 s2, s2, s4
-; GFX6-NEXT: s_ashr_i32 s2, s2, 16
+; GFX6-NEXT: s_sub_i32 s5, s8, s5
+; GFX6-NEXT: s_max_i32 s4, s6, s4
; GFX6-NEXT: s_lshl_b32 s3, s3, 16
+; GFX6-NEXT: s_min_i32 s4, s4, s5
+; GFX6-NEXT: s_min_i32 s6, s3, 0
+; GFX6-NEXT: s_add_i32 s2, s2, s4
+; GFX6-NEXT: s_max_i32 s5, s3, 0
; GFX6-NEXT: s_lshl_b32 s4, s7, 16
-; GFX6-NEXT: s_cmp_gt_i32 s3, 0
-; GFX6-NEXT: s_cselect_b32 s5, s3, 0
-; GFX6-NEXT: s_sub_i32 s5, s8, s5
-; GFX6-NEXT: s_cmp_lt_i32 s3, 0
-; GFX6-NEXT: s_cselect_b32 s6, s3, 0
; GFX6-NEXT: s_sub_i32 s6, s9, s6
-; GFX6-NEXT: s_cmp_gt_i32 s6, s4
-; GFX6-NEXT: s_cselect_b32 s4, s6, s4
-; GFX6-NEXT: s_cmp_lt_i32 s4, s5
-; GFX6-NEXT: s_cselect_b32 s4, s4, s5
+; GFX6-NEXT: s_sub_i32 s5, s8, s5
+; GFX6-NEXT: s_max_i32 s4, s6, s4
+; GFX6-NEXT: s_min_i32 s4, s4, s5
; GFX6-NEXT: s_add_i32 s3, s3, s4
+; GFX6-NEXT: s_ashr_i32 s1, s1, 16
; GFX6-NEXT: s_mov_b32 s4, 0xffff
+; GFX6-NEXT: s_ashr_i32 s0, s0, 16
; GFX6-NEXT: s_and_b32 s1, s1, s4
+; GFX6-NEXT: s_ashr_i32 s2, s2, 16
; GFX6-NEXT: s_ashr_i32 s3, s3, 16
; GFX6-NEXT: s_and_b32 s0, s0, s4
; GFX6-NEXT: s_lshl_b32 s1, s1, 16
@@ -3551,76 +3179,60 @@ define amdgpu_ps <2 x i32> @s_saddsat_v4i16(<4 x i16> inreg %lhs, <4 x i16> inre
;
; GFX8-LABEL: s_saddsat_v4i16:
; GFX8: ; %bb.0:
-; GFX8-NEXT: s_lshr_b32 s6, s2, 16
-; GFX8-NEXT: s_lshr_b32 s7, s3, 16
-; GFX8-NEXT: s_lshr_b32 s4, s0, 16
-; GFX8-NEXT: s_lshr_b32 s5, s1, 16
; GFX8-NEXT: s_sext_i32_i16 s10, s0
; GFX8-NEXT: s_sext_i32_i16 s11, 0
-; GFX8-NEXT: s_cmp_gt_i32 s10, s11
-; GFX8-NEXT: s_movk_i32 s8, 0x7fff
-; GFX8-NEXT: s_cselect_b32 s12, s10, s11
-; GFX8-NEXT: s_sub_i32 s12, s8, s12
-; GFX8-NEXT: s_cmp_lt_i32 s10, s11
+; GFX8-NEXT: s_max_i32 s12, s10, s11
; GFX8-NEXT: s_movk_i32 s9, 0x8000
-; GFX8-NEXT: s_cselect_b32 s10, s10, s11
+; GFX8-NEXT: s_min_i32 s10, s10, s11
; GFX8-NEXT: s_sub_i32 s10, s9, s10
+; GFX8-NEXT: s_lshr_b32 s6, s2, 16
+; GFX8-NEXT: s_movk_i32 s8, 0x7fff
; GFX8-NEXT: s_sext_i32_i16 s10, s10
; GFX8-NEXT: s_sext_i32_i16 s2, s2
-; GFX8-NEXT: s_cmp_gt_i32 s10, s2
-; GFX8-NEXT: s_cselect_b32 s2, s10, s2
+; GFX8-NEXT: s_max_i32 s2, s10, s2
+; GFX8-NEXT: s_sub_i32 s12, s8, s12
; GFX8-NEXT: s_sext_i32_i16 s2, s2
; GFX8-NEXT: s_sext_i32_i16 s10, s12
-; GFX8-NEXT: s_cmp_lt_i32 s2, s10
-; GFX8-NEXT: s_cselect_b32 s2, s2, s10
+; GFX8-NEXT: s_lshr_b32 s4, s0, 16
+; GFX8-NEXT: s_min_i32 s2, s2, s10
; GFX8-NEXT: s_add_i32 s0, s0, s2
; GFX8-NEXT: s_sext_i32_i16 s2, s4
-; GFX8-NEXT: s_cmp_gt_i32 s2, s11
-; GFX8-NEXT: s_cselect_b32 s10, s2, s11
-; GFX8-NEXT: s_sub_i32 s10, s8, s10
-; GFX8-NEXT: s_cmp_lt_i32 s2, s11
-; GFX8-NEXT: s_cselect_b32 s2, s2, s11
+; GFX8-NEXT: s_max_i32 s10, s2, s11
+; GFX8-NEXT: s_min_i32 s2, s2, s11
; GFX8-NEXT: s_sub_i32 s2, s9, s2
; GFX8-NEXT: s_sext_i32_i16 s2, s2
; GFX8-NEXT: s_sext_i32_i16 s6, s6
-; GFX8-NEXT: s_cmp_gt_i32 s2, s6
-; GFX8-NEXT: s_cselect_b32 s2, s2, s6
+; GFX8-NEXT: s_max_i32 s2, s2, s6
+; GFX8-NEXT: s_sub_i32 s10, s8, s10
; GFX8-NEXT: s_sext_i32_i16 s2, s2
; GFX8-NEXT: s_sext_i32_i16 s6, s10
-; GFX8-NEXT: s_cmp_lt_i32 s2, s6
-; GFX8-NEXT: s_cselect_b32 s2, s2, s6
+; GFX8-NEXT: s_min_i32 s2, s2, s6
; GFX8-NEXT: s_add_i32 s4, s4, s2
; GFX8-NEXT: s_sext_i32_i16 s2, s1
-; GFX8-NEXT: s_cmp_gt_i32 s2, s11
-; GFX8-NEXT: s_cselect_b32 s6, s2, s11
-; GFX8-NEXT: s_sub_i32 s6, s8, s6
-; GFX8-NEXT: s_cmp_lt_i32 s2, s11
-; GFX8-NEXT: s_cselect_b32 s2, s2, s11
+; GFX8-NEXT: s_max_i32 s6, s2, s11
+; GFX8-NEXT: s_min_i32 s2, s2, s11
; GFX8-NEXT: s_sub_i32 s2, s9, s2
+; GFX8-NEXT: s_lshr_b32 s7, s3, 16
; GFX8-NEXT: s_sext_i32_i16 s2, s2
; GFX8-NEXT: s_sext_i32_i16 s3, s3
-; GFX8-NEXT: s_cmp_gt_i32 s2, s3
-; GFX8-NEXT: s_cselect_b32 s2, s2, s3
+; GFX8-NEXT: s_max_i32 s2, s2, s3
+; GFX8-NEXT: s_sub_i32 s6, s8, s6
; GFX8-NEXT: s_sext_i32_i16 s3, s6
; GFX8-NEXT: s_sext_i32_i16 s2, s2
-; GFX8-NEXT: s_cmp_lt_i32 s2, s3
-; GFX8-NEXT: s_cselect_b32 s2, s2, s3
+; GFX8-NEXT: s_lshr_b32 s5, s1, 16
+; GFX8-NEXT: s_min_i32 s2, s2, s3
; GFX8-NEXT: s_add_i32 s1, s1, s2
; GFX8-NEXT: s_sext_i32_i16 s2, s5
-; GFX8-NEXT: s_cmp_gt_i32 s2, s11
-; GFX8-NEXT: s_cselect_b32 s3, s2, s11
-; GFX8-NEXT: s_sub_i32 s3, s8, s3
-; GFX8-NEXT: s_cmp_lt_i32 s2, s11
-; GFX8-NEXT: s_cselect_b32 s2, s2, s11
+; GFX8-NEXT: s_max_i32 s3, s2, s11
+; GFX8-NEXT: s_min_i32 s2, s2, s11
; GFX8-NEXT: s_sub_i32 s2, s9, s2
; GFX8-NEXT: s_sext_i32_i16 s2, s2
; GFX8-NEXT: s_sext_i32_i16 s6, s7
-; GFX8-NEXT: s_cmp_gt_i32 s2, s6
-; GFX8-NEXT: s_cselect_b32 s2, s2, s6
+; GFX8-NEXT: s_sub_i32 s3, s8, s3
+; GFX8-NEXT: s_max_i32 s2, s2, s6
; GFX8-NEXT: s_sext_i32_i16 s2, s2
; GFX8-NEXT: s_sext_i32_i16 s3, s3
-; GFX8-NEXT: s_cmp_lt_i32 s2, s3
-; GFX8-NEXT: s_cselect_b32 s2, s2, s3
+; GFX8-NEXT: s_min_i32 s2, s2, s3
; GFX8-NEXT: s_add_i32 s5, s5, s2
; GFX8-NEXT: s_bfe_u32 s2, s4, 0x100000
; GFX8-NEXT: s_bfe_u32 s0, s0, 0x100000
@@ -3830,92 +3442,67 @@ define amdgpu_ps <3 x i32> @s_saddsat_v6i16(<6 x i16> inreg %lhs, <6 x i16> inre
; GFX6-LABEL: s_saddsat_v6i16:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_lshl_b32 s0, s0, 16
+; GFX6-NEXT: s_brev_b32 s13, 1
+; GFX6-NEXT: s_min_i32 s15, s0, 0
; GFX6-NEXT: s_lshl_b32 s6, s6, 16
-; GFX6-NEXT: s_cmp_gt_i32 s0, 0
+; GFX6-NEXT: s_sub_i32 s15, s13, s15
; GFX6-NEXT: s_brev_b32 s12, -2
-; GFX6-NEXT: s_cselect_b32 s14, s0, 0
+; GFX6-NEXT: s_max_i32 s14, s0, 0
; GFX6-NEXT: s_sub_i32 s14, s12, s14
-; GFX6-NEXT: s_cmp_lt_i32 s0, 0
-; GFX6-NEXT: s_brev_b32 s13, 1
-; GFX6-NEXT: s_cselect_b32 s15, s0, 0
-; GFX6-NEXT: s_sub_i32 s15, s13, s15
-; GFX6-NEXT: s_cmp_gt_i32 s15, s6
-; GFX6-NEXT: s_cselect_b32 s6, s15, s6
-; GFX6-NEXT: s_cmp_lt_i32 s6, s14
-; GFX6-NEXT: s_cselect_b32 s6, s6, s14
-; GFX6-NEXT: s_add_i32 s0, s0, s6
-; GFX6-NEXT: s_ashr_i32 s0, s0, 16
+; GFX6-NEXT: s_max_i32 s6, s15, s6
; GFX6-NEXT: s_lshl_b32 s1, s1, 16
+; GFX6-NEXT: s_min_i32 s6, s6, s14
+; GFX6-NEXT: s_min_i32 s14, s1, 0
+; GFX6-NEXT: s_add_i32 s0, s0, s6
; GFX6-NEXT: s_lshl_b32 s6, s7, 16
-; GFX6-NEXT: s_cmp_gt_i32 s1, 0
-; GFX6-NEXT: s_cselect_b32 s7, s1, 0
-; GFX6-NEXT: s_sub_i32 s7, s12, s7
-; GFX6-NEXT: s_cmp_lt_i32 s1, 0
-; GFX6-NEXT: s_cselect_b32 s14, s1, 0
+; GFX6-NEXT: s_max_i32 s7, s1, 0
; GFX6-NEXT: s_sub_i32 s14, s13, s14
-; GFX6-NEXT: s_cmp_gt_i32 s14, s6
-; GFX6-NEXT: s_cselect_b32 s6, s14, s6
-; GFX6-NEXT: s_cmp_lt_i32 s6, s7
-; GFX6-NEXT: s_cselect_b32 s6, s6, s7
-; GFX6-NEXT: s_add_i32 s1, s1, s6
-; GFX6-NEXT: s_ashr_i32 s1, s1, 16
+; GFX6-NEXT: s_sub_i32 s7, s12, s7
+; GFX6-NEXT: s_max_i32 s6, s14, s6
+; GFX6-NEXT: s_min_i32 s6, s6, s7
; GFX6-NEXT: s_lshl_b32 s2, s2, 16
+; GFX6-NEXT: s_add_i32 s1, s1, s6
; GFX6-NEXT: s_lshl_b32 s6, s8, 16
-; GFX6-NEXT: s_cmp_gt_i32 s2, 0
-; GFX6-NEXT: s_cselect_b32 s7, s2, 0
-; GFX6-NEXT: s_sub_i32 s7, s12, s7
-; GFX6-NEXT: s_cmp_lt_i32 s2, 0
-; GFX6-NEXT: s_cselect_b32 s8, s2, 0
+; GFX6-NEXT: s_min_i32 s8, s2, 0
+; GFX6-NEXT: s_max_i32 s7, s2, 0
; GFX6-NEXT: s_sub_i32 s8, s13, s8
-; GFX6-NEXT: s_cmp_gt_i32 s8, s6
-; GFX6-NEXT: s_cselect_b32 s6, s8, s6
-; GFX6-NEXT: s_cmp_lt_i32 s6, s7
-; GFX6-NEXT: s_cselect_b32 s6, s6, s7
-; GFX6-NEXT: s_add_i32 s2, s2, s6
-; GFX6-NEXT: s_ashr_i32 s2, s2, 16
+; GFX6-NEXT: s_sub_i32 s7, s12, s7
+; GFX6-NEXT: s_max_i32 s6, s8, s6
; GFX6-NEXT: s_lshl_b32 s3, s3, 16
+; GFX6-NEXT: s_min_i32 s6, s6, s7
+; GFX6-NEXT: s_min_i32 s8, s3, 0
+; GFX6-NEXT: s_add_i32 s2, s2, s6
+; GFX6-NEXT: s_max_i32 s7, s3, 0
; GFX6-NEXT: s_lshl_b32 s6, s9, 16
-; GFX6-NEXT: s_cmp_gt_i32 s3, 0
-; GFX6-NEXT: s_cselect_b32 s7, s3, 0
-; GFX6-NEXT: s_sub_i32 s7, s12, s7
-; GFX6-NEXT: s_cmp_lt_i32 s3, 0
-; GFX6-NEXT: s_cselect_b32 s8, s3, 0
; GFX6-NEXT: s_sub_i32 s8, s13, s8
-; GFX6-NEXT: s_cmp_gt_i32 s8, s6
-; GFX6-NEXT: s_cselect_b32 s6, s8, s6
-; GFX6-NEXT: s_cmp_lt_i32 s6, s7
-; GFX6-NEXT: s_cselect_b32 s6, s6, s7
-; GFX6-NEXT: s_add_i32 s3, s3, s6
-; GFX6-NEXT: s_ashr_i32 s3, s3, 16
+; GFX6-NEXT: s_sub_i32 s7, s12, s7
+; GFX6-NEXT: s_max_i32 s6, s8, s6
; GFX6-NEXT: s_lshl_b32 s4, s4, 16
+; GFX6-NEXT: s_min_i32 s6, s6, s7
+; GFX6-NEXT: s_min_i32 s8, s4, 0
+; GFX6-NEXT: s_add_i32 s3, s3, s6
+; GFX6-NEXT: s_max_i32 s7, s4, 0
; GFX6-NEXT: s_lshl_b32 s6, s10, 16
-; GFX6-NEXT: s_cmp_gt_i32 s4, 0
-; GFX6-NEXT: s_cselect_b32 s7, s4, 0
-; GFX6-NEXT: s_sub_i32 s7, s12, s7
-; GFX6-NEXT: s_cmp_lt_i32 s4, 0
-; GFX6-NEXT: s_cselect_b32 s8, s4, 0
; GFX6-NEXT: s_sub_i32 s8, s13, s8
-; GFX6-NEXT: s_cmp_gt_i32 s8, s6
-; GFX6-NEXT: s_cselect_b32 s6, s8, s6
-; GFX6-NEXT: s_cmp_lt_i32 s6, s7
-; GFX6-NEXT: s_cselect_b32 s6, s6, s7
-; GFX6-NEXT: s_add_i32 s4, s4, s6
-; GFX6-NEXT: s_ashr_i32 s4, s4, 16
+; GFX6-NEXT: s_sub_i32 s7, s12, s7
+; GFX6-NEXT: s_max_i32 s6, s8, s6
; GFX6-NEXT: s_lshl_b32 s5, s5, 16
+; GFX6-NEXT: s_min_i32 s6, s6, s7
+; GFX6-NEXT: s_min_i32 s8, s5, 0
+; GFX6-NEXT: s_add_i32 s4, s4, s6
+; GFX6-NEXT: s_max_i32 s7, s5, 0
; GFX6-NEXT: s_lshl_b32 s6, s11, 16
-; GFX6-NEXT: s_cmp_gt_i32 s5, 0
-; GFX6-NEXT: s_cselect_b32 s7, s5, 0
-; GFX6-NEXT: s_sub_i32 s7, s12, s7
-; GFX6-NEXT: s_cmp_lt_i32 s5, 0
-; GFX6-NEXT: s_cselect_b32 s8, s5, 0
; GFX6-NEXT: s_sub_i32 s8, s13, s8
-; GFX6-NEXT: s_cmp_gt_i32 s8, s6
-; GFX6-NEXT: s_cselect_b32 s6, s8, s6
-; GFX6-NEXT: s_cmp_lt_i32 s6, s7
-; GFX6-NEXT: s_cselect_b32 s6, s6, s7
+; GFX6-NEXT: s_sub_i32 s7, s12, s7
+; GFX6-NEXT: s_max_i32 s6, s8, s6
+; GFX6-NEXT: s_min_i32 s6, s6, s7
; GFX6-NEXT: s_add_i32 s5, s5, s6
+; GFX6-NEXT: s_ashr_i32 s1, s1, 16
; GFX6-NEXT: s_mov_b32 s6, 0xffff
+; GFX6-NEXT: s_ashr_i32 s0, s0, 16
; GFX6-NEXT: s_and_b32 s1, s1, s6
+; GFX6-NEXT: s_ashr_i32 s2, s2, 16
+; GFX6-NEXT: s_ashr_i32 s3, s3, 16
; GFX6-NEXT: s_and_b32 s0, s0, s6
; GFX6-NEXT: s_lshl_b32 s1, s1, 16
; GFX6-NEXT: s_or_b32 s0, s0, s1
@@ -3924,6 +3511,7 @@ define amdgpu_ps <3 x i32> @s_saddsat_v6i16(<6 x i16> inreg %lhs, <6 x i16> inre
; GFX6-NEXT: s_ashr_i32 s5, s5, 16
; GFX6-NEXT: s_and_b32 s3, s5, s6
; GFX6-NEXT: s_lshl_b32 s2, s2, 16
+; GFX6-NEXT: s_ashr_i32 s4, s4, 16
; GFX6-NEXT: s_or_b32 s1, s1, s2
; GFX6-NEXT: s_and_b32 s2, s4, s6
; GFX6-NEXT: s_lshl_b32 s3, s3, 16
@@ -3932,110 +3520,86 @@ define amdgpu_ps <3 x i32> @s_saddsat_v6i16(<6 x i16> inreg %lhs, <6 x i16> inre
;
; GFX8-LABEL: s_saddsat_v6i16:
; GFX8: ; %bb.0:
-; GFX8-NEXT: s_lshr_b32 s9, s3, 16
-; GFX8-NEXT: s_lshr_b32 s10, s4, 16
-; GFX8-NEXT: s_lshr_b32 s11, s5, 16
-; GFX8-NEXT: s_lshr_b32 s6, s0, 16
-; GFX8-NEXT: s_lshr_b32 s7, s1, 16
-; GFX8-NEXT: s_lshr_b32 s8, s2, 16
; GFX8-NEXT: s_sext_i32_i16 s14, s0
; GFX8-NEXT: s_sext_i32_i16 s15, 0
-; GFX8-NEXT: s_cmp_gt_i32 s14, s15
-; GFX8-NEXT: s_movk_i32 s12, 0x7fff
-; GFX8-NEXT: s_cselect_b32 s16, s14, s15
-; GFX8-NEXT: s_sub_i32 s16, s12, s16
-; GFX8-NEXT: s_cmp_lt_i32 s14, s15
+; GFX8-NEXT: s_max_i32 s16, s14, s15
; GFX8-NEXT: s_movk_i32 s13, 0x8000
-; GFX8-NEXT: s_cselect_b32 s14, s14, s15
+; GFX8-NEXT: s_min_i32 s14, s14, s15
; GFX8-NEXT: s_sub_i32 s14, s13, s14
+; GFX8-NEXT: s_lshr_b32 s9, s3, 16
+; GFX8-NEXT: s_movk_i32 s12, 0x7fff
; GFX8-NEXT: s_sext_i32_i16 s14, s14
; GFX8-NEXT: s_sext_i32_i16 s3, s3
-; GFX8-NEXT: s_cmp_gt_i32 s14, s3
-; GFX8-NEXT: s_cselect_b32 s3, s14, s3
+; GFX8-NEXT: s_max_i32 s3, s14, s3
+; GFX8-NEXT: s_sub_i32 s16, s12, s16
; GFX8-NEXT: s_sext_i32_i16 s3, s3
; GFX8-NEXT: s_sext_i32_i16 s14, s16
-; GFX8-NEXT: s_cmp_lt_i32 s3, s14
-; GFX8-NEXT: s_cselect_b32 s3, s3, s14
+; GFX8-NEXT: s_lshr_b32 s6, s0, 16
+; GFX8-NEXT: s_min_i32 s3, s3, s14
; GFX8-NEXT: s_add_i32 s0, s0, s3
; GFX8-NEXT: s_sext_i32_i16 s3, s6
-; GFX8-NEXT: s_cmp_gt_i32 s3, s15
-; GFX8-NEXT: s_cselect_b32 s14, s3, s15
-; GFX8-NEXT: s_sub_i32 s14, s12, s14
-; GFX8-NEXT: s_cmp_lt_i32 s3, s15
-; GFX8-NEXT: s_cselect_b32 s3, s3, s15
+; GFX8-NEXT: s_max_i32 s14, s3, s15
+; GFX8-NEXT: s_min_i32 s3, s3, s15
; GFX8-NEXT: s_sub_i32 s3, s13, s3
; GFX8-NEXT: s_sext_i32_i16 s3, s3
; GFX8-NEXT: s_sext_i32_i16 s9, s9
-; GFX8-NEXT: s_cmp_gt_i32 s3, s9
-; GFX8-NEXT: s_cselect_b32 s3, s3, s9
+; GFX8-NEXT: s_max_i32 s3, s3, s9
+; GFX8-NEXT: s_sub_i32 s14, s12, s14
; GFX8-NEXT: s_sext_i32_i16 s3, s3
; GFX8-NEXT: s_sext_i32_i16 s9, s14
-; GFX8-NEXT: s_cmp_lt_i32 s3, s9
-; GFX8-NEXT: s_cselect_b32 s3, s3, s9
+; GFX8-NEXT: s_min_i32 s3, s3, s9
; GFX8-NEXT: s_add_i32 s6, s6, s3
; GFX8-NEXT: s_sext_i32_i16 s3, s1
-; GFX8-NEXT: s_cmp_gt_i32 s3, s15
-; GFX8-NEXT: s_cselect_b32 s9, s3, s15
-; GFX8-NEXT: s_sub_i32 s9, s12, s9
-; GFX8-NEXT: s_cmp_lt_i32 s3, s15
-; GFX8-NEXT: s_cselect_b32 s3, s3, s15
+; GFX8-NEXT: s_max_i32 s9, s3, s15
+; GFX8-NEXT: s_min_i32 s3, s3, s15
; GFX8-NEXT: s_sub_i32 s3, s13, s3
+; GFX8-NEXT: s_lshr_b32 s10, s4, 16
; GFX8-NEXT: s_sext_i32_i16 s3, s3
; GFX8-NEXT: s_sext_i32_i16 s4, s4
-; GFX8-NEXT: s_cmp_gt_i32 s3, s4
-; GFX8-NEXT: s_cselect_b32 s3, s3, s4
+; GFX8-NEXT: s_max_i32 s3, s3, s4
+; GFX8-NEXT: s_sub_i32 s9, s12, s9
; GFX8-NEXT: s_sext_i32_i16 s4, s9
; GFX8-NEXT: s_sext_i32_i16 s3, s3
-; GFX8-NEXT: s_cmp_lt_i32 s3, s4
-; GFX8-NEXT: s_cselect_b32 s3, s3, s4
+; GFX8-NEXT: s_lshr_b32 s7, s1, 16
+; GFX8-NEXT: s_min_i32 s3, s3, s4
; GFX8-NEXT: s_add_i32 s1, s1, s3
; GFX8-NEXT: s_sext_i32_i16 s3, s7
-; GFX8-NEXT: s_cmp_gt_i32 s3, s15
-; GFX8-NEXT: s_cselect_b32 s4, s3, s15
-; GFX8-NEXT: s_sub_i32 s4, s12, s4
-; GFX8-NEXT: s_cmp_lt_i32 s3, s15
-; GFX8-NEXT: s_cselect_b32 s3, s3, s15
+; GFX8-NEXT: s_max_i32 s4, s3, s15
+; GFX8-NEXT: s_min_i32 s3, s3, s15
; GFX8-NEXT: s_sub_i32 s3, s13, s3
; GFX8-NEXT: s_sext_i32_i16 s3, s3
; GFX8-NEXT: s_sext_i32_i16 s9, s10
-; GFX8-NEXT: s_cmp_gt_i32 s3, s9
-; GFX8-NEXT: s_cselect_b32 s3, s3, s9
+; GFX8-NEXT: s_sub_i32 s4, s12, s4
+; GFX8-NEXT: s_max_i32 s3, s3, s9
; GFX8-NEXT: s_sext_i32_i16 s3, s3
; GFX8-NEXT: s_sext_i32_i16 s4, s4
-; GFX8-NEXT: s_cmp_lt_i32 s3, s4
-; GFX8-NEXT: s_cselect_b32 s3, s3, s4
+; GFX8-NEXT: s_min_i32 s3, s3, s4
; GFX8-NEXT: s_add_i32 s7, s7, s3
; GFX8-NEXT: s_sext_i32_i16 s3, s2
-; GFX8-NEXT: s_cmp_gt_i32 s3, s15
-; GFX8-NEXT: s_cselect_b32 s4, s3, s15
-; GFX8-NEXT: s_sub_i32 s4, s12, s4
-; GFX8-NEXT: s_cmp_lt_i32 s3, s15
-; GFX8-NEXT: s_cselect_b32 s3, s3, s15
+; GFX8-NEXT: s_max_i32 s4, s3, s15
+; GFX8-NEXT: s_min_i32 s3, s3, s15
; GFX8-NEXT: s_sub_i32 s3, s13, s3
+; GFX8-NEXT: s_lshr_b32 s11, s5, 16
; GFX8-NEXT: s_sext_i32_i16 s3, s3
; GFX8-NEXT: s_sext_i32_i16 s5, s5
-; GFX8-NEXT: s_cmp_gt_i32 s3, s5
-; GFX8-NEXT: s_cselect_b32 s3, s3, s5
+; GFX8-NEXT: s_max_i32 s3, s3, s5
+; GFX8-NEXT: s_sub_i32 s4, s12, s4
; GFX8-NEXT: s_sext_i32_i16 s3, s3
; GFX8-NEXT: s_sext_i32_i16 s4, s4
-; GFX8-NEXT: s_cmp_lt_i32 s3, s4
-; GFX8-NEXT: s_cselect_b32 s3, s3, s4
+; GFX8-NEXT: s_lshr_b32 s8, s2, 16
+; GFX8-NEXT: s_min_i32 s3, s3, s4
; GFX8-NEXT: s_add_i32 s2, s2, s3
; GFX8-NEXT: s_sext_i32_i16 s3, s8
-; GFX8-NEXT: s_cmp_gt_i32 s3, s15
-; GFX8-NEXT: s_cselect_b32 s4, s3, s15
-; GFX8-NEXT: s_sub_i32 s4, s12, s4
-; GFX8-NEXT: s_cmp_lt_i32 s3, s15
-; GFX8-NEXT: s_cselect_b32 s3, s3, s15
+; GFX8-NEXT: s_max_i32 s4, s3, s15
+; GFX8-NEXT: s_min_i32 s3, s3, s15
; GFX8-NEXT: s_sub_i32 s3, s13, s3
; GFX8-NEXT: s_sext_i32_i16 s3, s3
; GFX8-NEXT: s_sext_i32_i16 s5, s11
-; GFX8-NEXT: s_cmp_gt_i32 s3, s5
-; GFX8-NEXT: s_cselect_b32 s3, s3, s5
+; GFX8-NEXT: s_sub_i32 s4, s12, s4
+; GFX8-NEXT: s_max_i32 s3, s3, s5
; GFX8-NEXT: s_sext_i32_i16 s3, s3
; GFX8-NEXT: s_sext_i32_i16 s4, s4
-; GFX8-NEXT: s_cmp_lt_i32 s3, s4
-; GFX8-NEXT: s_cselect_b32 s3, s3, s4
+; GFX8-NEXT: s_min_i32 s3, s3, s4
; GFX8-NEXT: s_add_i32 s8, s8, s3
; GFX8-NEXT: s_bfe_u32 s3, s6, 0x100000
; GFX8-NEXT: s_bfe_u32 s0, s0, 0x100000
@@ -4285,132 +3849,100 @@ define amdgpu_ps <4 x i32> @s_saddsat_v8i16(<8 x i16> inreg %lhs, <8 x i16> inre
; GFX6-LABEL: s_saddsat_v8i16:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_lshl_b32 s0, s0, 16
+; GFX6-NEXT: s_brev_b32 s17, 1
+; GFX6-NEXT: s_min_i32 s19, s0, 0
; GFX6-NEXT: s_lshl_b32 s8, s8, 16
-; GFX6-NEXT: s_cmp_gt_i32 s0, 0
+; GFX6-NEXT: s_sub_i32 s19, s17, s19
; GFX6-NEXT: s_brev_b32 s16, -2
-; GFX6-NEXT: s_cselect_b32 s18, s0, 0
+; GFX6-NEXT: s_max_i32 s18, s0, 0
; GFX6-NEXT: s_sub_i32 s18, s16, s18
-; GFX6-NEXT: s_cmp_lt_i32 s0, 0
-; GFX6-NEXT: s_brev_b32 s17, 1
-; GFX6-NEXT: s_cselect_b32 s19, s0, 0
-; GFX6-NEXT: s_sub_i32 s19, s17, s19
-; GFX6-NEXT: s_cmp_gt_i32 s19, s8
-; GFX6-NEXT: s_cselect_b32 s8, s19, s8
-; GFX6-NEXT: s_cmp_lt_i32 s8, s18
-; GFX6-NEXT: s_cselect_b32 s8, s8, s18
-; GFX6-NEXT: s_add_i32 s0, s0, s8
-; GFX6-NEXT: s_ashr_i32 s0, s0, 16
+; GFX6-NEXT: s_max_i32 s8, s19, s8
; GFX6-NEXT: s_lshl_b32 s1, s1, 16
+; GFX6-NEXT: s_min_i32 s8, s8, s18
+; GFX6-NEXT: s_min_i32 s18, s1, 0
+; GFX6-NEXT: s_add_i32 s0, s0, s8
; GFX6-NEXT: s_lshl_b32 s8, s9, 16
-; GFX6-NEXT: s_cmp_gt_i32 s1, 0
-; GFX6-NEXT: s_cselect_b32 s9, s1, 0
-; GFX6-NEXT: s_sub_i32 s9, s16, s9
-; GFX6-NEXT: s_cmp_lt_i32 s1, 0
-; GFX6-NEXT: s_cselect_b32 s18, s1, 0
+; GFX6-NEXT: s_max_i32 s9, s1, 0
; GFX6-NEXT: s_sub_i32 s18, s17, s18
-; GFX6-NEXT: s_cmp_gt_i32 s18, s8
-; GFX6-NEXT: s_cselect_b32 s8, s18, s8
-; GFX6-NEXT: s_cmp_lt_i32 s8, s9
-; GFX6-NEXT: s_cselect_b32 s8, s8, s9
-; GFX6-NEXT: s_add_i32 s1, s1, s8
-; GFX6-NEXT: s_ashr_i32 s1, s1, 16
+; GFX6-NEXT: s_sub_i32 s9, s16, s9
+; GFX6-NEXT: s_max_i32 s8, s18, s8
+; GFX6-NEXT: s_min_i32 s8, s8, s9
; GFX6-NEXT: s_lshl_b32 s2, s2, 16
+; GFX6-NEXT: s_add_i32 s1, s1, s8
; GFX6-NEXT: s_lshl_b32 s8, s10, 16
-; GFX6-NEXT: s_cmp_gt_i32 s2, 0
-; GFX6-NEXT: s_cselect_b32 s9, s2, 0
-; GFX6-NEXT: s_sub_i32 s9, s16, s9
-; GFX6-NEXT: s_cmp_lt_i32 s2, 0
-; GFX6-NEXT: s_cselect_b32 s10, s2, 0
+; GFX6-NEXT: s_min_i32 s10, s2, 0
+; GFX6-NEXT: s_max_i32 s9, s2, 0
; GFX6-NEXT: s_sub_i32 s10, s17, s10
-; GFX6-NEXT: s_cmp_gt_i32 s10, s8
-; GFX6-NEXT: s_cselect_b32 s8, s10, s8
-; GFX6-NEXT: s_cmp_lt_i32 s8, s9
-; GFX6-NEXT: s_cselect_b32 s8, s8, s9
-; GFX6-NEXT: s_add_i32 s2, s2, s8
-; GFX6-NEXT: s_ashr_i32 s2, s2, 16
+; GFX6-NEXT: s_sub_i32 s9, s16, s9
+; GFX6-NEXT: s_max_i32 s8, s10, s8
; GFX6-NEXT: s_lshl_b32 s3, s3, 16
+; GFX6-NEXT: s_min_i32 s8, s8, s9
+; GFX6-NEXT: s_min_i32 s10, s3, 0
+; GFX6-NEXT: s_add_i32 s2, s2, s8
+; GFX6-NEXT: s_max_i32 s9, s3, 0
; GFX6-NEXT: s_lshl_b32 s8, s11, 16
-; GFX6-NEXT: s_cmp_gt_i32 s3, 0
-; GFX6-NEXT: s_cselect_b32 s9, s3, 0
-; GFX6-NEXT: s_sub_i32 s9, s16, s9
-; GFX6-NEXT: s_cmp_lt_i32 s3, 0
-; GFX6-NEXT: s_cselect_b32 s10, s3, 0
; GFX6-NEXT: s_sub_i32 s10, s17, s10
-; GFX6-NEXT: s_cmp_gt_i32 s10, s8
-; GFX6-NEXT: s_cselect_b32 s8, s10, s8
-; GFX6-NEXT: s_cmp_lt_i32 s8, s9
-; GFX6-NEXT: s_cselect_b32 s8, s8, s9
-; GFX6-NEXT: s_add_i32 s3, s3, s8
-; GFX6-NEXT: s_ashr_i32 s3, s3, 16
+; GFX6-NEXT: s_sub_i32 s9, s16, s9
+; GFX6-NEXT: s_max_i32 s8, s10, s8
; GFX6-NEXT: s_lshl_b32 s4, s4, 16
+; GFX6-NEXT: s_min_i32 s8, s8, s9
+; GFX6-NEXT: s_min_i32 s10, s4, 0
+; GFX6-NEXT: s_add_i32 s3, s3, s8
+; GFX6-NEXT: s_max_i32 s9, s4, 0
; GFX6-NEXT: s_lshl_b32 s8, s12, 16
-; GFX6-NEXT: s_cmp_gt_i32 s4, 0
-; GFX6-NEXT: s_cselect_b32 s9, s4, 0
-; GFX6-NEXT: s_sub_i32 s9, s16, s9
-; GFX6-NEXT: s_cmp_lt_i32 s4, 0
-; GFX6-NEXT: s_cselect_b32 s10, s4, 0
; GFX6-NEXT: s_sub_i32 s10, s17, s10
-; GFX6-NEXT: s_cmp_gt_i32 s10, s8
-; GFX6-NEXT: s_cselect_b32 s8, s10, s8
-; GFX6-NEXT: s_cmp_lt_i32 s8, s9
-; GFX6-NEXT: s_cselect_b32 s8, s8, s9
-; GFX6-NEXT: s_add_i32 s4, s4, s8
-; GFX6-NEXT: s_ashr_i32 s4, s4, 16
+; GFX6-NEXT: s_sub_i32 s9, s16, s9
+; GFX6-NEXT: s_max_i32 s8, s10, s8
; GFX6-NEXT: s_lshl_b32 s5, s5, 16
+; GFX6-NEXT: s_min_i32 s8, s8, s9
+; GFX6-NEXT: s_min_i32 s10, s5, 0
+; GFX6-NEXT: s_add_i32 s4, s4, s8
+; GFX6-NEXT: s_max_i32 s9, s5, 0
; GFX6-NEXT: s_lshl_b32 s8, s13, 16
-; GFX6-NEXT: s_cmp_gt_i32 s5, 0
-; GFX6-NEXT: s_cselect_b32 s9, s5, 0
-; GFX6-NEXT: s_sub_i32 s9, s16, s9
-; GFX6-NEXT: s_cmp_lt_i32 s5, 0
-; GFX6-NEXT: s_cselect_b32 s10, s5, 0
; GFX6-NEXT: s_sub_i32 s10, s17, s10
-; GFX6-NEXT: s_cmp_gt_i32 s10, s8
-; GFX6-NEXT: s_cselect_b32 s8, s10, s8
-; GFX6-NEXT: s_cmp_lt_i32 s8, s9
-; GFX6-NEXT: s_cselect_b32 s8, s8, s9
-; GFX6-NEXT: s_add_i32 s5, s5, s8
-; GFX6-NEXT: s_ashr_i32 s5, s5, 16
+; GFX6-NEXT: s_sub_i32 s9, s16, s9
+; GFX6-NEXT: s_max_i32 s8, s10, s8
; GFX6-NEXT: s_lshl_b32 s6, s6, 16
+; GFX6-NEXT: s_min_i32 s8, s8, s9
+; GFX6-NEXT: s_min_i32 s10, s6, 0
+; GFX6-NEXT: s_add_i32 s5, s5, s8
+; GFX6-NEXT: s_max_i32 s9, s6, 0
; GFX6-NEXT: s_lshl_b32 s8, s14, 16
-; GFX6-NEXT: s_cmp_gt_i32 s6, 0
-; GFX6-NEXT: s_cselect_b32 s9, s6, 0
-; GFX6-NEXT: s_sub_i32 s9, s16, s9
-; GFX6-NEXT: s_cmp_lt_i32 s6, 0
-; GFX6-NEXT: s_cselect_b32 s10, s6, 0
; GFX6-NEXT: s_sub_i32 s10, s17, s10
-; GFX6-NEXT: s_cmp_gt_i32 s10, s8
-; GFX6-NEXT: s_cselect_b32 s8, s10, s8
-; GFX6-NEXT: s_cmp_lt_i32 s8, s9
-; GFX6-NEXT: s_cselect_b32 s8, s8, s9
-; GFX6-NEXT: s_add_i32 s6, s6, s8
-; GFX6-NEXT: s_ashr_i32 s6, s6, 16
+; GFX6-NEXT: s_sub_i32 s9, s16, s9
+; GFX6-NEXT: s_max_i32 s8, s10, s8
; GFX6-NEXT: s_lshl_b32 s7, s7, 16
+; GFX6-NEXT: s_min_i32 s8, s8, s9
+; GFX6-NEXT: s_min_i32 s10, s7, 0
+; GFX6-NEXT: s_add_i32 s6, s6, s8
+; GFX6-NEXT: s_max_i32 s9, s7, 0
; GFX6-NEXT: s_lshl_b32 s8, s15, 16
-; GFX6-NEXT: s_cmp_gt_i32 s7, 0
-; GFX6-NEXT: s_cselect_b32 s9, s7, 0
-; GFX6-NEXT: s_sub_i32 s9, s16, s9
-; GFX6-NEXT: s_cmp_lt_i32 s7, 0
-; GFX6-NEXT: s_cselect_b32 s10, s7, 0
; GFX6-NEXT: s_sub_i32 s10, s17, s10
-; GFX6-NEXT: s_cmp_gt_i32 s10, s8
-; GFX6-NEXT: s_cselect_b32 s8, s10, s8
-; GFX6-NEXT: s_cmp_lt_i32 s8, s9
-; GFX6-NEXT: s_cselect_b32 s8, s8, s9
+; GFX6-NEXT: s_sub_i32 s9, s16, s9
+; GFX6-NEXT: s_max_i32 s8, s10, s8
+; GFX6-NEXT: s_min_i32 s8, s8, s9
; GFX6-NEXT: s_add_i32 s7, s7, s8
+; GFX6-NEXT: s_ashr_i32 s1, s1, 16
; GFX6-NEXT: s_mov_b32 s8, 0xffff
+; GFX6-NEXT: s_ashr_i32 s0, s0, 16
; GFX6-NEXT: s_and_b32 s1, s1, s8
+; GFX6-NEXT: s_ashr_i32 s2, s2, 16
+; GFX6-NEXT: s_ashr_i32 s3, s3, 16
; GFX6-NEXT: s_and_b32 s0, s0, s8
; GFX6-NEXT: s_lshl_b32 s1, s1, 16
; GFX6-NEXT: s_or_b32 s0, s0, s1
; GFX6-NEXT: s_and_b32 s1, s2, s8
; GFX6-NEXT: s_and_b32 s2, s3, s8
+; GFX6-NEXT: s_ashr_i32 s5, s5, 16
; GFX6-NEXT: s_and_b32 s3, s5, s8
; GFX6-NEXT: s_lshl_b32 s2, s2, 16
+; GFX6-NEXT: s_ashr_i32 s4, s4, 16
; GFX6-NEXT: s_ashr_i32 s7, s7, 16
; GFX6-NEXT: s_or_b32 s1, s1, s2
; GFX6-NEXT: s_and_b32 s2, s4, s8
; GFX6-NEXT: s_and_b32 s4, s7, s8
; GFX6-NEXT: s_lshl_b32 s3, s3, 16
+; GFX6-NEXT: s_ashr_i32 s6, s6, 16
; GFX6-NEXT: s_or_b32 s2, s2, s3
; GFX6-NEXT: s_and_b32 s3, s6, s8
; GFX6-NEXT: s_lshl_b32 s4, s4, 16
@@ -4419,144 +3951,112 @@ define amdgpu_ps <4 x i32> @s_saddsat_v8i16(<8 x i16> inreg %lhs, <8 x i16> inre
;
; GFX8-LABEL: s_saddsat_v8i16:
; GFX8: ; %bb.0:
-; GFX8-NEXT: s_lshr_b32 s12, s4, 16
-; GFX8-NEXT: s_lshr_b32 s13, s5, 16
-; GFX8-NEXT: s_lshr_b32 s14, s6, 16
-; GFX8-NEXT: s_lshr_b32 s8, s0, 16
-; GFX8-NEXT: s_lshr_b32 s9, s1, 16
-; GFX8-NEXT: s_lshr_b32 s10, s2, 16
-; GFX8-NEXT: s_lshr_b32 s11, s3, 16
-; GFX8-NEXT: s_lshr_b32 s15, s7, 16
; GFX8-NEXT: s_sext_i32_i16 s18, s0
; GFX8-NEXT: s_sext_i32_i16 s19, 0
-; GFX8-NEXT: s_cmp_gt_i32 s18, s19
-; GFX8-NEXT: s_movk_i32 s16, 0x7fff
-; GFX8-NEXT: s_cselect_b32 s20, s18, s19
-; GFX8-NEXT: s_sub_i32 s20, s16, s20
-; GFX8-NEXT: s_cmp_lt_i32 s18, s19
+; GFX8-NEXT: s_max_i32 s20, s18, s19
; GFX8-NEXT: s_movk_i32 s17, 0x8000
-; GFX8-NEXT: s_cselect_b32 s18, s18, s19
+; GFX8-NEXT: s_min_i32 s18, s18, s19
; GFX8-NEXT: s_sub_i32 s18, s17, s18
+; GFX8-NEXT: s_lshr_b32 s12, s4, 16
+; GFX8-NEXT: s_movk_i32 s16, 0x7fff
; GFX8-NEXT: s_sext_i32_i16 s18, s18
; GFX8-NEXT: s_sext_i32_i16 s4, s4
-; GFX8-NEXT: s_cmp_gt_i32 s18, s4
-; GFX8-NEXT: s_cselect_b32 s4, s18, s4
+; GFX8-NEXT: s_max_i32 s4, s18, s4
+; GFX8-NEXT: s_sub_i32 s20, s16, s20
; GFX8-NEXT: s_sext_i32_i16 s4, s4
; GFX8-NEXT: s_sext_i32_i16 s18, s20
-; GFX8-NEXT: s_cmp_lt_i32 s4, s18
-; GFX8-NEXT: s_cselect_b32 s4, s4, s18
+; GFX8-NEXT: s_lshr_b32 s8, s0, 16
+; GFX8-NEXT: s_min_i32 s4, s4, s18
; GFX8-NEXT: s_add_i32 s0, s0, s4
; GFX8-NEXT: s_sext_i32_i16 s4, s8
-; GFX8-NEXT: s_cmp_gt_i32 s4, s19
-; GFX8-NEXT: s_cselect_b32 s18, s4, s19
-; GFX8-NEXT: s_sub_i32 s18, s16, s18
-; GFX8-NEXT: s_cmp_lt_i32 s4, s19
-; GFX8-NEXT: s_cselect_b32 s4, s4, s19
+; GFX8-NEXT: s_max_i32 s18, s4, s19
+; GFX8-NEXT: s_min_i32 s4, s4, s19
; GFX8-NEXT: s_sub_i32 s4, s17, s4
; GFX8-NEXT: s_sext_i32_i16 s4, s4
; GFX8-NEXT: s_sext_i32_i16 s12, s12
-; GFX8-NEXT: s_cmp_gt_i32 s4, s12
-; GFX8-NEXT: s_cselect_b32 s4, s4, s12
+; GFX8-NEXT: s_max_i32 s4, s4, s12
+; GFX8-NEXT: s_sub_i32 s18, s16, s18
; GFX8-NEXT: s_sext_i32_i16 s4, s4
; GFX8-NEXT: s_sext_i32_i16 s12, s18
-; GFX8-NEXT: s_cmp_lt_i32 s4, s12
-; GFX8-NEXT: s_cselect_b32 s4, s4, s12
+; GFX8-NEXT: s_min_i32 s4, s4, s12
; GFX8-NEXT: s_add_i32 s8, s8, s4
; GFX8-NEXT: s_sext_i32_i16 s4, s1
-; GFX8-NEXT: s_cmp_gt_i32 s4, s19
-; GFX8-NEXT: s_cselect_b32 s12, s4, s19
-; GFX8-NEXT: s_sub_i32 s12, s16, s12
-; GFX8-NEXT: s_cmp_lt_i32 s4, s19
-; GFX8-NEXT: s_cselect_b32 s4, s4, s19
+; GFX8-NEXT: s_max_i32 s12, s4, s19
+; GFX8-NEXT: s_min_i32 s4, s4, s19
; GFX8-NEXT: s_sub_i32 s4, s17, s4
+; GFX8-NEXT: s_lshr_b32 s13, s5, 16
; GFX8-NEXT: s_sext_i32_i16 s4, s4
; GFX8-NEXT: s_sext_i32_i16 s5, s5
-; GFX8-NEXT: s_cmp_gt_i32 s4, s5
-; GFX8-NEXT: s_cselect_b32 s4, s4, s5
+; GFX8-NEXT: s_max_i32 s4, s4, s5
+; GFX8-NEXT: s_sub_i32 s12, s16, s12
; GFX8-NEXT: s_sext_i32_i16 s5, s12
; GFX8-NEXT: s_sext_i32_i16 s4, s4
-; GFX8-NEXT: s_cmp_lt_i32 s4, s5
-; GFX8-NEXT: s_cselect_b32 s4, s4, s5
+; GFX8-NEXT: s_lshr_b32 s9, s1, 16
+; GFX8-NEXT: s_min_i32 s4, s4, s5
; GFX8-NEXT: s_add_i32 s1, s1, s4
; GFX8-NEXT: s_sext_i32_i16 s4, s9
-; GFX8-NEXT: s_cmp_gt_i32 s4, s19
-; GFX8-NEXT: s_cselect_b32 s5, s4, s19
-; GFX8-NEXT: s_sub_i32 s5, s16, s5
-; GFX8-NEXT: s_cmp_lt_i32 s4, s19
-; GFX8-NEXT: s_cselect_b32 s4, s4, s19
+; GFX8-NEXT: s_max_i32 s5, s4, s19
+; GFX8-NEXT: s_min_i32 s4, s4, s19
; GFX8-NEXT: s_sub_i32 s4, s17, s4
; GFX8-NEXT: s_sext_i32_i16 s4, s4
; GFX8-NEXT: s_sext_i32_i16 s12, s13
-; GFX8-NEXT: s_cmp_gt_i32 s4, s12
-; GFX8-NEXT: s_cselect_b32 s4, s4, s12
+; GFX8-NEXT: s_sub_i32 s5, s16, s5
+; GFX8-NEXT: s_max_i32 s4, s4, s12
; GFX8-NEXT: s_sext_i32_i16 s4, s4
; GFX8-NEXT: s_sext_i32_i16 s5, s5
-; GFX8-NEXT: s_cmp_lt_i32 s4, s5
-; GFX8-NEXT: s_cselect_b32 s4, s4, s5
+; GFX8-NEXT: s_min_i32 s4, s4, s5
; GFX8-NEXT: s_add_i32 s9, s9, s4
; GFX8-NEXT: s_sext_i32_i16 s4, s2
-; GFX8-NEXT: s_cmp_gt_i32 s4, s19
-; GFX8-NEXT: s_cselect_b32 s5, s4, s19
-; GFX8-NEXT: s_sub_i32 s5, s16, s5
-; GFX8-NEXT: s_cmp_lt_i32 s4, s19
-; GFX8-NEXT: s_cselect_b32 s4, s4, s19
+; GFX8-NEXT: s_max_i32 s5, s4, s19
+; GFX8-NEXT: s_min_i32 s4, s4, s19
; GFX8-NEXT: s_sub_i32 s4, s17, s4
+; GFX8-NEXT: s_lshr_b32 s14, s6, 16
; GFX8-NEXT: s_sext_i32_i16 s4, s4
; GFX8-NEXT: s_sext_i32_i16 s6, s6
-; GFX8-NEXT: s_cmp_gt_i32 s4, s6
-; GFX8-NEXT: s_cselect_b32 s4, s4, s6
+; GFX8-NEXT: s_max_i32 s4, s4, s6
+; GFX8-NEXT: s_sub_i32 s5, s16, s5
; GFX8-NEXT: s_sext_i32_i16 s4, s4
; GFX8-NEXT: s_sext_i32_i16 s5, s5
-; GFX8-NEXT: s_cmp_lt_i32 s4, s5
-; GFX8-NEXT: s_cselect_b32 s4, s4, s5
+; GFX8-NEXT: s_lshr_b32 s10, s2, 16
+; GFX8-NEXT: s_min_i32 s4, s4, s5
; GFX8-NEXT: s_add_i32 s2, s2, s4
; GFX8-NEXT: s_sext_i32_i16 s4, s10
-; GFX8-NEXT: s_cmp_gt_i32 s4, s19
-; GFX8-NEXT: s_cselect_b32 s5, s4, s19
-; GFX8-NEXT: s_sub_i32 s5, s16, s5
-; GFX8-NEXT: s_cmp_lt_i32 s4, s19
-; GFX8-NEXT: s_cselect_b32 s4, s4, s19
+; GFX8-NEXT: s_max_i32 s5, s4, s19
+; GFX8-NEXT: s_min_i32 s4, s4, s19
; GFX8-NEXT: s_sub_i32 s4, s17, s4
; GFX8-NEXT: s_sext_i32_i16 s4, s4
; GFX8-NEXT: s_sext_i32_i16 s6, s14
-; GFX8-NEXT: s_cmp_gt_i32 s4, s6
-; GFX8-NEXT: s_cselect_b32 s4, s4, s6
+; GFX8-NEXT: s_max_i32 s4, s4, s6
+; GFX8-NEXT: s_sub_i32 s5, s16, s5
; GFX8-NEXT: s_sext_i32_i16 s4, s4
; GFX8-NEXT: s_sext_i32_i16 s5, s5
-; GFX8-NEXT: s_cmp_lt_i32 s4, s5
-; GFX8-NEXT: s_cselect_b32 s4, s4, s5
+; GFX8-NEXT: s_min_i32 s4, s4, s5
; GFX8-NEXT: s_add_i32 s10, s10, s4
; GFX8-NEXT: s_sext_i32_i16 s4, s3
-; GFX8-NEXT: s_cmp_gt_i32 s4, s19
-; GFX8-NEXT: s_cselect_b32 s5, s4, s19
-; GFX8-NEXT: s_sub_i32 s5, s16, s5
-; GFX8-NEXT: s_cmp_lt_i32 s4, s19
-; GFX8-NEXT: s_cselect_b32 s4, s4, s19
+; GFX8-NEXT: s_max_i32 s5, s4, s19
+; GFX8-NEXT: s_min_i32 s4, s4, s19
; GFX8-NEXT: s_sub_i32 s4, s17, s4
; GFX8-NEXT: s_sext_i32_i16 s4, s4
; GFX8-NEXT: s_sext_i32_i16 s6, s7
-; GFX8-NEXT: s_cmp_gt_i32 s4, s6
-; GFX8-NEXT: s_cselect_b32 s4, s4, s6
+; GFX8-NEXT: s_sub_i32 s5, s16, s5
+; GFX8-NEXT: s_max_i32 s4, s4, s6
; GFX8-NEXT: s_sext_i32_i16 s4, s4
; GFX8-NEXT: s_sext_i32_i16 s5, s5
-; GFX8-NEXT: s_cmp_lt_i32 s4, s5
-; GFX8-NEXT: s_cselect_b32 s4, s4, s5
+; GFX8-NEXT: s_lshr_b32 s11, s3, 16
+; GFX8-NEXT: s_min_i32 s4, s4, s5
; GFX8-NEXT: s_add_i32 s3, s3, s4
; GFX8-NEXT: s_sext_i32_i16 s4, s11
-; GFX8-NEXT: s_cmp_gt_i32 s4, s19
-; GFX8-NEXT: s_cselect_b32 s5, s4, s19
-; GFX8-NEXT: s_sub_i32 s5, s16, s5
-; GFX8-NEXT: s_cmp_lt_i32 s4, s19
-; GFX8-NEXT: s_cselect_b32 s4, s4, s19
+; GFX8-NEXT: s_max_i32 s5, s4, s19
+; GFX8-NEXT: s_min_i32 s4, s4, s19
; GFX8-NEXT: s_sub_i32 s4, s17, s4
+; GFX8-NEXT: s_lshr_b32 s15, s7, 16
; GFX8-NEXT: s_sext_i32_i16 s4, s4
; GFX8-NEXT: s_sext_i32_i16 s6, s15
-; GFX8-NEXT: s_cmp_gt_i32 s4, s6
-; GFX8-NEXT: s_cselect_b32 s4, s4, s6
+; GFX8-NEXT: s_sub_i32 s5, s16, s5
+; GFX8-NEXT: s_max_i32 s4, s4, s6
; GFX8-NEXT: s_sext_i32_i16 s4, s4
; GFX8-NEXT: s_sext_i32_i16 s5, s5
-; GFX8-NEXT: s_cmp_lt_i32 s4, s5
-; GFX8-NEXT: s_cselect_b32 s4, s4, s5
+; GFX8-NEXT: s_min_i32 s4, s4, s5
; GFX8-NEXT: s_add_i32 s11, s11, s4
; GFX8-NEXT: s_bfe_u32 s4, s8, 0x100000
; GFX8-NEXT: s_bfe_u32 s0, s0, 0x100000
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll
index 6209f7e4335b..945bac091858 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll
@@ -61,17 +61,13 @@ define amdgpu_ps i7 @s_ssubsat_i7(i7 inreg %lhs, i7 inreg %rhs) {
; GFX6-LABEL: s_ssubsat_i7:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_lshl_b32 s0, s0, 25
+; GFX6-NEXT: s_max_i32 s2, s0, -1
+; GFX6-NEXT: s_min_i32 s3, s0, -1
; GFX6-NEXT: s_lshl_b32 s1, s1, 25
-; GFX6-NEXT: s_cmp_gt_i32 s0, -1
-; GFX6-NEXT: s_cselect_b32 s2, s0, -1
; GFX6-NEXT: s_sub_i32 s2, s2, 0x7fffffff
-; GFX6-NEXT: s_cmp_lt_i32 s0, -1
-; GFX6-NEXT: s_cselect_b32 s3, s0, -1
; GFX6-NEXT: s_sub_i32 s3, s3, 0x80000000
-; GFX6-NEXT: s_cmp_gt_i32 s2, s1
-; GFX6-NEXT: s_cselect_b32 s1, s2, s1
-; GFX6-NEXT: s_cmp_lt_i32 s1, s3
-; GFX6-NEXT: s_cselect_b32 s1, s1, s3
+; GFX6-NEXT: s_max_i32 s1, s2, s1
+; GFX6-NEXT: s_min_i32 s1, s1, s3
; GFX6-NEXT: s_sub_i32 s0, s0, s1
; GFX6-NEXT: s_ashr_i32 s0, s0, 25
; GFX6-NEXT: ; return to shader part epilog
@@ -80,23 +76,19 @@ define amdgpu_ps i7 @s_ssubsat_i7(i7 inreg %lhs, i7 inreg %rhs) {
; GFX8: ; %bb.0:
; GFX8-NEXT: s_bfe_u32 s2, 9, 0x100000
; GFX8-NEXT: s_lshl_b32 s0, s0, s2
-; GFX8-NEXT: s_lshl_b32 s1, s1, s2
; GFX8-NEXT: s_sext_i32_i16 s3, s0
; GFX8-NEXT: s_sext_i32_i16 s4, -1
-; GFX8-NEXT: s_cmp_gt_i32 s3, s4
-; GFX8-NEXT: s_cselect_b32 s5, s3, s4
+; GFX8-NEXT: s_max_i32 s5, s3, s4
+; GFX8-NEXT: s_lshl_b32 s1, s1, s2
; GFX8-NEXT: s_sub_i32 s5, s5, 0x7fff
-; GFX8-NEXT: s_cmp_lt_i32 s3, s4
-; GFX8-NEXT: s_cselect_b32 s3, s3, s4
-; GFX8-NEXT: s_sub_i32 s3, s3, 0xffff8000
+; GFX8-NEXT: s_min_i32 s3, s3, s4
; GFX8-NEXT: s_sext_i32_i16 s4, s5
; GFX8-NEXT: s_sext_i32_i16 s1, s1
-; GFX8-NEXT: s_cmp_gt_i32 s4, s1
-; GFX8-NEXT: s_cselect_b32 s1, s4, s1
+; GFX8-NEXT: s_sub_i32 s3, s3, 0xffff8000
+; GFX8-NEXT: s_max_i32 s1, s4, s1
; GFX8-NEXT: s_sext_i32_i16 s1, s1
; GFX8-NEXT: s_sext_i32_i16 s3, s3
-; GFX8-NEXT: s_cmp_lt_i32 s1, s3
-; GFX8-NEXT: s_cselect_b32 s1, s1, s3
+; GFX8-NEXT: s_min_i32 s1, s1, s3
; GFX8-NEXT: s_sub_i32 s0, s0, s1
; GFX8-NEXT: s_sext_i32_i16 s0, s0
; GFX8-NEXT: s_ashr_i32 s0, s0, s2
@@ -183,17 +175,13 @@ define amdgpu_ps i8 @s_ssubsat_i8(i8 inreg %lhs, i8 inreg %rhs) {
; GFX6-LABEL: s_ssubsat_i8:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_lshl_b32 s0, s0, 24
+; GFX6-NEXT: s_max_i32 s2, s0, -1
+; GFX6-NEXT: s_min_i32 s3, s0, -1
; GFX6-NEXT: s_lshl_b32 s1, s1, 24
-; GFX6-NEXT: s_cmp_gt_i32 s0, -1
-; GFX6-NEXT: s_cselect_b32 s2, s0, -1
; GFX6-NEXT: s_sub_i32 s2, s2, 0x7fffffff
-; GFX6-NEXT: s_cmp_lt_i32 s0, -1
-; GFX6-NEXT: s_cselect_b32 s3, s0, -1
; GFX6-NEXT: s_sub_i32 s3, s3, 0x80000000
-; GFX6-NEXT: s_cmp_gt_i32 s2, s1
-; GFX6-NEXT: s_cselect_b32 s1, s2, s1
-; GFX6-NEXT: s_cmp_lt_i32 s1, s3
-; GFX6-NEXT: s_cselect_b32 s1, s1, s3
+; GFX6-NEXT: s_max_i32 s1, s2, s1
+; GFX6-NEXT: s_min_i32 s1, s1, s3
; GFX6-NEXT: s_sub_i32 s0, s0, s1
; GFX6-NEXT: s_ashr_i32 s0, s0, 24
; GFX6-NEXT: ; return to shader part epilog
@@ -202,23 +190,19 @@ define amdgpu_ps i8 @s_ssubsat_i8(i8 inreg %lhs, i8 inreg %rhs) {
; GFX8: ; %bb.0:
; GFX8-NEXT: s_bfe_u32 s2, 8, 0x100000
; GFX8-NEXT: s_lshl_b32 s0, s0, s2
-; GFX8-NEXT: s_lshl_b32 s1, s1, s2
; GFX8-NEXT: s_sext_i32_i16 s3, s0
; GFX8-NEXT: s_sext_i32_i16 s4, -1
-; GFX8-NEXT: s_cmp_gt_i32 s3, s4
-; GFX8-NEXT: s_cselect_b32 s5, s3, s4
+; GFX8-NEXT: s_max_i32 s5, s3, s4
+; GFX8-NEXT: s_lshl_b32 s1, s1, s2
; GFX8-NEXT: s_sub_i32 s5, s5, 0x7fff
-; GFX8-NEXT: s_cmp_lt_i32 s3, s4
-; GFX8-NEXT: s_cselect_b32 s3, s3, s4
-; GFX8-NEXT: s_sub_i32 s3, s3, 0xffff8000
+; GFX8-NEXT: s_min_i32 s3, s3, s4
; GFX8-NEXT: s_sext_i32_i16 s4, s5
; GFX8-NEXT: s_sext_i32_i16 s1, s1
-; GFX8-NEXT: s_cmp_gt_i32 s4, s1
-; GFX8-NEXT: s_cselect_b32 s1, s4, s1
+; GFX8-NEXT: s_sub_i32 s3, s3, 0xffff8000
+; GFX8-NEXT: s_max_i32 s1, s4, s1
; GFX8-NEXT: s_sext_i32_i16 s1, s1
; GFX8-NEXT: s_sext_i32_i16 s3, s3
-; GFX8-NEXT: s_cmp_lt_i32 s1, s3
-; GFX8-NEXT: s_cselect_b32 s1, s1, s3
+; GFX8-NEXT: s_min_i32 s1, s1, s3
; GFX8-NEXT: s_sub_i32 s0, s0, s1
; GFX8-NEXT: s_sext_i32_i16 s0, s0
; GFX8-NEXT: s_ashr_i32 s0, s0, s2
@@ -360,38 +344,30 @@ define amdgpu_ps i16 @s_ssubsat_v2i8(i16 inreg %lhs.arg, i16 inreg %rhs.arg) {
; GFX6-LABEL: s_ssubsat_v2i8:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_lshr_b32 s2, s0, 8
-; GFX6-NEXT: s_lshr_b32 s3, s1, 8
; GFX6-NEXT: s_lshl_b32 s0, s0, 24
-; GFX6-NEXT: s_lshl_b32 s1, s1, 24
-; GFX6-NEXT: s_cmp_gt_i32 s0, -1
; GFX6-NEXT: s_brev_b32 s4, -2
-; GFX6-NEXT: s_cselect_b32 s6, s0, -1
+; GFX6-NEXT: s_max_i32 s6, s0, -1
+; GFX6-NEXT: s_lshr_b32 s3, s1, 8
+; GFX6-NEXT: s_lshl_b32 s1, s1, 24
; GFX6-NEXT: s_sub_i32 s6, s6, s4
-; GFX6-NEXT: s_cmp_lt_i32 s0, -1
; GFX6-NEXT: s_brev_b32 s5, 1
-; GFX6-NEXT: s_cselect_b32 s7, s0, -1
+; GFX6-NEXT: s_min_i32 s7, s0, -1
; GFX6-NEXT: s_sub_i32 s7, s7, s5
-; GFX6-NEXT: s_cmp_gt_i32 s6, s1
-; GFX6-NEXT: s_cselect_b32 s1, s6, s1
-; GFX6-NEXT: s_cmp_lt_i32 s1, s7
-; GFX6-NEXT: s_cselect_b32 s1, s1, s7
+; GFX6-NEXT: s_max_i32 s1, s6, s1
+; GFX6-NEXT: s_min_i32 s1, s1, s7
; GFX6-NEXT: s_sub_i32 s0, s0, s1
; GFX6-NEXT: s_lshl_b32 s1, s2, 24
-; GFX6-NEXT: s_ashr_i32 s0, s0, 24
; GFX6-NEXT: s_lshl_b32 s2, s3, 24
-; GFX6-NEXT: s_cmp_gt_i32 s1, -1
-; GFX6-NEXT: s_cselect_b32 s3, s1, -1
+; GFX6-NEXT: s_max_i32 s3, s1, -1
; GFX6-NEXT: s_sub_i32 s3, s3, s4
-; GFX6-NEXT: s_cmp_lt_i32 s1, -1
-; GFX6-NEXT: s_cselect_b32 s4, s1, -1
+; GFX6-NEXT: s_min_i32 s4, s1, -1
; GFX6-NEXT: s_sub_i32 s4, s4, s5
-; GFX6-NEXT: s_cmp_gt_i32 s3, s2
-; GFX6-NEXT: s_cselect_b32 s2, s3, s2
-; GFX6-NEXT: s_cmp_lt_i32 s2, s4
-; GFX6-NEXT: s_cselect_b32 s2, s2, s4
+; GFX6-NEXT: s_max_i32 s2, s3, s2
+; GFX6-NEXT: s_min_i32 s2, s2, s4
; GFX6-NEXT: s_sub_i32 s1, s1, s2
-; GFX6-NEXT: s_movk_i32 s2, 0xff
; GFX6-NEXT: s_ashr_i32 s1, s1, 24
+; GFX6-NEXT: s_movk_i32 s2, 0xff
+; GFX6-NEXT: s_ashr_i32 s0, s0, 24
; GFX6-NEXT: s_and_b32 s1, s1, s2
; GFX6-NEXT: s_and_b32 s0, s0, s2
; GFX6-NEXT: s_lshl_b32 s1, s1, 8
@@ -403,50 +379,42 @@ define amdgpu_ps i16 @s_ssubsat_v2i8(i16 inreg %lhs.arg, i16 inreg %rhs.arg) {
; GFX8-NEXT: s_bfe_u32 s4, 8, 0x100000
; GFX8-NEXT: s_lshr_b32 s2, s0, 8
; GFX8-NEXT: s_lshl_b32 s0, s0, s4
-; GFX8-NEXT: s_lshr_b32 s3, s1, 8
-; GFX8-NEXT: s_lshl_b32 s1, s1, s4
; GFX8-NEXT: s_sext_i32_i16 s7, s0
; GFX8-NEXT: s_sext_i32_i16 s8, -1
-; GFX8-NEXT: s_cmp_gt_i32 s7, s8
+; GFX8-NEXT: s_max_i32 s9, s7, s8
; GFX8-NEXT: s_movk_i32 s5, 0x7fff
-; GFX8-NEXT: s_cselect_b32 s9, s7, s8
; GFX8-NEXT: s_sub_i32 s9, s9, s5
-; GFX8-NEXT: s_cmp_lt_i32 s7, s8
+; GFX8-NEXT: s_lshr_b32 s3, s1, 8
+; GFX8-NEXT: s_lshl_b32 s1, s1, s4
; GFX8-NEXT: s_movk_i32 s6, 0x8000
-; GFX8-NEXT: s_cselect_b32 s7, s7, s8
-; GFX8-NEXT: s_sub_i32 s7, s7, s6
+; GFX8-NEXT: s_min_i32 s7, s7, s8
; GFX8-NEXT: s_sext_i32_i16 s9, s9
; GFX8-NEXT: s_sext_i32_i16 s1, s1
-; GFX8-NEXT: s_cmp_gt_i32 s9, s1
-; GFX8-NEXT: s_cselect_b32 s1, s9, s1
+; GFX8-NEXT: s_sub_i32 s7, s7, s6
+; GFX8-NEXT: s_max_i32 s1, s9, s1
; GFX8-NEXT: s_sext_i32_i16 s1, s1
; GFX8-NEXT: s_sext_i32_i16 s7, s7
-; GFX8-NEXT: s_cmp_lt_i32 s1, s7
-; GFX8-NEXT: s_cselect_b32 s1, s1, s7
+; GFX8-NEXT: s_min_i32 s1, s1, s7
; GFX8-NEXT: s_sub_i32 s0, s0, s1
-; GFX8-NEXT: s_sext_i32_i16 s0, s0
; GFX8-NEXT: s_lshl_b32 s1, s2, s4
; GFX8-NEXT: s_lshl_b32 s2, s3, s4
-; GFX8-NEXT: s_ashr_i32 s0, s0, s4
; GFX8-NEXT: s_sext_i32_i16 s3, s1
-; GFX8-NEXT: s_cmp_gt_i32 s3, s8
-; GFX8-NEXT: s_cselect_b32 s7, s3, s8
+; GFX8-NEXT: s_max_i32 s7, s3, s8
; GFX8-NEXT: s_sub_i32 s5, s7, s5
-; GFX8-NEXT: s_cmp_lt_i32 s3, s8
-; GFX8-NEXT: s_cselect_b32 s3, s3, s8
-; GFX8-NEXT: s_sub_i32 s3, s3, s6
+; GFX8-NEXT: s_min_i32 s3, s3, s8
; GFX8-NEXT: s_sext_i32_i16 s5, s5
; GFX8-NEXT: s_sext_i32_i16 s2, s2
-; GFX8-NEXT: s_cmp_gt_i32 s5, s2
-; GFX8-NEXT: s_cselect_b32 s2, s5, s2
+; GFX8-NEXT: s_sub_i32 s3, s3, s6
+; GFX8-NEXT: s_max_i32 s2, s5, s2
; GFX8-NEXT: s_sext_i32_i16 s2, s2
; GFX8-NEXT: s_sext_i32_i16 s3, s3
-; GFX8-NEXT: s_cmp_lt_i32 s2, s3
-; GFX8-NEXT: s_cselect_b32 s2, s2, s3
+; GFX8-NEXT: s_min_i32 s2, s2, s3
; GFX8-NEXT: s_sub_i32 s1, s1, s2
; GFX8-NEXT: s_sext_i32_i16 s1, s1
-; GFX8-NEXT: s_movk_i32 s2, 0xff
+; GFX8-NEXT: s_sext_i32_i16 s0, s0
; GFX8-NEXT: s_ashr_i32 s1, s1, s4
+; GFX8-NEXT: s_movk_i32 s2, 0xff
+; GFX8-NEXT: s_ashr_i32 s0, s0, s4
; GFX8-NEXT: s_and_b32 s1, s1, s2
; GFX8-NEXT: s_and_b32 s0, s0, s2
; GFX8-NEXT: s_lshl_b32 s1, s1, s4
@@ -714,68 +682,52 @@ define amdgpu_ps i32 @s_ssubsat_v4i8(i32 inreg %lhs.arg, i32 inreg %rhs.arg) {
; GFX6-NEXT: s_lshr_b32 s2, s0, 8
; GFX6-NEXT: s_lshr_b32 s3, s0, 16
; GFX6-NEXT: s_lshr_b32 s4, s0, 24
+; GFX6-NEXT: s_lshl_b32 s0, s0, 24
+; GFX6-NEXT: s_brev_b32 s8, -2
+; GFX6-NEXT: s_max_i32 s10, s0, -1
; GFX6-NEXT: s_lshr_b32 s5, s1, 8
; GFX6-NEXT: s_lshr_b32 s6, s1, 16
; GFX6-NEXT: s_lshr_b32 s7, s1, 24
-; GFX6-NEXT: s_lshl_b32 s0, s0, 24
; GFX6-NEXT: s_lshl_b32 s1, s1, 24
-; GFX6-NEXT: s_cmp_gt_i32 s0, -1
-; GFX6-NEXT: s_brev_b32 s8, -2
-; GFX6-NEXT: s_cselect_b32 s10, s0, -1
; GFX6-NEXT: s_sub_i32 s10, s10, s8
-; GFX6-NEXT: s_cmp_lt_i32 s0, -1
; GFX6-NEXT: s_brev_b32 s9, 1
-; GFX6-NEXT: s_cselect_b32 s11, s0, -1
+; GFX6-NEXT: s_min_i32 s11, s0, -1
; GFX6-NEXT: s_sub_i32 s11, s11, s9
-; GFX6-NEXT: s_cmp_gt_i32 s10, s1
-; GFX6-NEXT: s_cselect_b32 s1, s10, s1
-; GFX6-NEXT: s_cmp_lt_i32 s1, s11
-; GFX6-NEXT: s_cselect_b32 s1, s1, s11
+; GFX6-NEXT: s_max_i32 s1, s10, s1
+; GFX6-NEXT: s_min_i32 s1, s1, s11
; GFX6-NEXT: s_sub_i32 s0, s0, s1
; GFX6-NEXT: s_lshl_b32 s1, s2, 24
-; GFX6-NEXT: s_ashr_i32 s0, s0, 24
; GFX6-NEXT: s_lshl_b32 s2, s5, 24
-; GFX6-NEXT: s_cmp_gt_i32 s1, -1
-; GFX6-NEXT: s_cselect_b32 s5, s1, -1
+; GFX6-NEXT: s_max_i32 s5, s1, -1
+; GFX6-NEXT: s_min_i32 s10, s1, -1
; GFX6-NEXT: s_sub_i32 s5, s5, s8
-; GFX6-NEXT: s_cmp_lt_i32 s1, -1
-; GFX6-NEXT: s_cselect_b32 s10, s1, -1
; GFX6-NEXT: s_sub_i32 s10, s10, s9
-; GFX6-NEXT: s_cmp_gt_i32 s5, s2
-; GFX6-NEXT: s_cselect_b32 s2, s5, s2
-; GFX6-NEXT: s_cmp_lt_i32 s2, s10
-; GFX6-NEXT: s_cselect_b32 s2, s2, s10
+; GFX6-NEXT: s_max_i32 s2, s5, s2
+; GFX6-NEXT: s_min_i32 s2, s2, s10
; GFX6-NEXT: s_sub_i32 s1, s1, s2
; GFX6-NEXT: s_lshl_b32 s2, s3, 24
-; GFX6-NEXT: s_ashr_i32 s1, s1, 24
+; GFX6-NEXT: s_max_i32 s5, s2, -1
; GFX6-NEXT: s_lshl_b32 s3, s6, 24
-; GFX6-NEXT: s_cmp_gt_i32 s2, -1
-; GFX6-NEXT: s_cselect_b32 s5, s2, -1
+; GFX6-NEXT: s_min_i32 s6, s2, -1
; GFX6-NEXT: s_sub_i32 s5, s5, s8
-; GFX6-NEXT: s_cmp_lt_i32 s2, -1
-; GFX6-NEXT: s_cselect_b32 s6, s2, -1
; GFX6-NEXT: s_sub_i32 s6, s6, s9
-; GFX6-NEXT: s_cmp_gt_i32 s5, s3
-; GFX6-NEXT: s_cselect_b32 s3, s5, s3
-; GFX6-NEXT: s_cmp_lt_i32 s3, s6
-; GFX6-NEXT: s_cselect_b32 s3, s3, s6
+; GFX6-NEXT: s_max_i32 s3, s5, s3
+; GFX6-NEXT: s_min_i32 s3, s3, s6
; GFX6-NEXT: s_sub_i32 s2, s2, s3
; GFX6-NEXT: s_lshl_b32 s3, s4, 24
-; GFX6-NEXT: s_ashr_i32 s2, s2, 24
+; GFX6-NEXT: s_max_i32 s5, s3, -1
+; GFX6-NEXT: s_min_i32 s6, s3, -1
; GFX6-NEXT: s_lshl_b32 s4, s7, 24
-; GFX6-NEXT: s_cmp_gt_i32 s3, -1
-; GFX6-NEXT: s_cselect_b32 s5, s3, -1
; GFX6-NEXT: s_sub_i32 s5, s5, s8
-; GFX6-NEXT: s_cmp_lt_i32 s3, -1
-; GFX6-NEXT: s_cselect_b32 s6, s3, -1
; GFX6-NEXT: s_sub_i32 s6, s6, s9
-; GFX6-NEXT: s_cmp_gt_i32 s5, s4
-; GFX6-NEXT: s_cselect_b32 s4, s5, s4
-; GFX6-NEXT: s_cmp_lt_i32 s4, s6
-; GFX6-NEXT: s_cselect_b32 s4, s4, s6
+; GFX6-NEXT: s_max_i32 s4, s5, s4
+; GFX6-NEXT: s_min_i32 s4, s4, s6
; GFX6-NEXT: s_sub_i32 s3, s3, s4
+; GFX6-NEXT: s_ashr_i32 s1, s1, 24
; GFX6-NEXT: s_movk_i32 s4, 0xff
+; GFX6-NEXT: s_ashr_i32 s0, s0, 24
; GFX6-NEXT: s_and_b32 s1, s1, s4
+; GFX6-NEXT: s_ashr_i32 s2, s2, 24
; GFX6-NEXT: s_and_b32 s0, s0, s4
; GFX6-NEXT: s_lshl_b32 s1, s1, 8
; GFX6-NEXT: s_or_b32 s0, s0, s1
@@ -795,91 +747,75 @@ define amdgpu_ps i32 @s_ssubsat_v4i8(i32 inreg %lhs.arg, i32 inreg %rhs.arg) {
; GFX8-NEXT: s_lshr_b32 s3, s0, 16
; GFX8-NEXT: s_lshr_b32 s4, s0, 24
; GFX8-NEXT: s_lshl_b32 s0, s0, s8
-; GFX8-NEXT: s_lshr_b32 s5, s1, 8
-; GFX8-NEXT: s_lshr_b32 s6, s1, 16
-; GFX8-NEXT: s_lshr_b32 s7, s1, 24
-; GFX8-NEXT: s_lshl_b32 s1, s1, s8
; GFX8-NEXT: s_sext_i32_i16 s11, s0
; GFX8-NEXT: s_sext_i32_i16 s12, -1
-; GFX8-NEXT: s_cmp_gt_i32 s11, s12
+; GFX8-NEXT: s_max_i32 s13, s11, s12
; GFX8-NEXT: s_movk_i32 s9, 0x7fff
-; GFX8-NEXT: s_cselect_b32 s13, s11, s12
; GFX8-NEXT: s_sub_i32 s13, s13, s9
-; GFX8-NEXT: s_cmp_lt_i32 s11, s12
+; GFX8-NEXT: s_lshr_b32 s5, s1, 8
+; GFX8-NEXT: s_lshr_b32 s6, s1, 16
+; GFX8-NEXT: s_lshr_b32 s7, s1, 24
+; GFX8-NEXT: s_lshl_b32 s1, s1, s8
; GFX8-NEXT: s_movk_i32 s10, 0x8000
-; GFX8-NEXT: s_cselect_b32 s11, s11, s12
-; GFX8-NEXT: s_sub_i32 s11, s11, s10
+; GFX8-NEXT: s_min_i32 s11, s11, s12
; GFX8-NEXT: s_sext_i32_i16 s13, s13
; GFX8-NEXT: s_sext_i32_i16 s1, s1
-; GFX8-NEXT: s_cmp_gt_i32 s13, s1
-; GFX8-NEXT: s_cselect_b32 s1, s13, s1
+; GFX8-NEXT: s_sub_i32 s11, s11, s10
+; GFX8-NEXT: s_max_i32 s1, s13, s1
; GFX8-NEXT: s_sext_i32_i16 s1, s1
; GFX8-NEXT: s_sext_i32_i16 s11, s11
-; GFX8-NEXT: s_cmp_lt_i32 s1, s11
-; GFX8-NEXT: s_cselect_b32 s1, s1, s11
+; GFX8-NEXT: s_min_i32 s1, s1, s11
; GFX8-NEXT: s_sub_i32 s0, s0, s1
-; GFX8-NEXT: s_sext_i32_i16 s0, s0
; GFX8-NEXT: s_lshl_b32 s1, s2, s8
; GFX8-NEXT: s_lshl_b32 s2, s5, s8
-; GFX8-NEXT: s_ashr_i32 s0, s0, s8
; GFX8-NEXT: s_sext_i32_i16 s5, s1
-; GFX8-NEXT: s_cmp_gt_i32 s5, s12
-; GFX8-NEXT: s_cselect_b32 s11, s5, s12
+; GFX8-NEXT: s_max_i32 s11, s5, s12
; GFX8-NEXT: s_sub_i32 s11, s11, s9
-; GFX8-NEXT: s_cmp_lt_i32 s5, s12
-; GFX8-NEXT: s_cselect_b32 s5, s5, s12
-; GFX8-NEXT: s_sub_i32 s5, s5, s10
+; GFX8-NEXT: s_min_i32 s5, s5, s12
; GFX8-NEXT: s_sext_i32_i16 s11, s11
; GFX8-NEXT: s_sext_i32_i16 s2, s2
-; GFX8-NEXT: s_cmp_gt_i32 s11, s2
-; GFX8-NEXT: s_cselect_b32 s2, s11, s2
+; GFX8-NEXT: s_sub_i32 s5, s5, s10
+; GFX8-NEXT: s_max_i32 s2, s11, s2
; GFX8-NEXT: s_sext_i32_i16 s2, s2
; GFX8-NEXT: s_sext_i32_i16 s5, s5
-; GFX8-NEXT: s_cmp_lt_i32 s2, s5
-; GFX8-NEXT: s_cselect_b32 s2, s2, s5
+; GFX8-NEXT: s_min_i32 s2, s2, s5
; GFX8-NEXT: s_sub_i32 s1, s1, s2
-; GFX8-NEXT: s_sext_i32_i16 s1, s1
; GFX8-NEXT: s_lshl_b32 s2, s3, s8
-; GFX8-NEXT: s_lshl_b32 s3, s6, s8
-; GFX8-NEXT: s_ashr_i32 s1, s1, s8
; GFX8-NEXT: s_sext_i32_i16 s5, s2
-; GFX8-NEXT: s_cmp_gt_i32 s5, s12
-; GFX8-NEXT: s_cselect_b32 s6, s5, s12
+; GFX8-NEXT: s_lshl_b32 s3, s6, s8
+; GFX8-NEXT: s_max_i32 s6, s5, s12
; GFX8-NEXT: s_sub_i32 s6, s6, s9
-; GFX8-NEXT: s_cmp_lt_i32 s5, s12
-; GFX8-NEXT: s_cselect_b32 s5, s5, s12
-; GFX8-NEXT: s_sub_i32 s5, s5, s10
+; GFX8-NEXT: s_min_i32 s5, s5, s12
; GFX8-NEXT: s_sext_i32_i16 s6, s6
; GFX8-NEXT: s_sext_i32_i16 s3, s3
-; GFX8-NEXT: s_cmp_gt_i32 s6, s3
-; GFX8-NEXT: s_cselect_b32 s3, s6, s3
+; GFX8-NEXT: s_sub_i32 s5, s5, s10
+; GFX8-NEXT: s_max_i32 s3, s6, s3
; GFX8-NEXT: s_sext_i32_i16 s3, s3
; GFX8-NEXT: s_sext_i32_i16 s5, s5
-; GFX8-NEXT: s_cmp_lt_i32 s3, s5
-; GFX8-NEXT: s_cselect_b32 s3, s3, s5
+; GFX8-NEXT: s_min_i32 s3, s3, s5
; GFX8-NEXT: s_sub_i32 s2, s2, s3
-; GFX8-NEXT: s_sext_i32_i16 s2, s2
; GFX8-NEXT: s_lshl_b32 s3, s4, s8
-; GFX8-NEXT: s_lshl_b32 s4, s7, s8
-; GFX8-NEXT: s_ashr_i32 s2, s2, s8
; GFX8-NEXT: s_sext_i32_i16 s5, s3
-; GFX8-NEXT: s_cmp_gt_i32 s5, s12
-; GFX8-NEXT: s_cselect_b32 s6, s5, s12
+; GFX8-NEXT: s_max_i32 s6, s5, s12
+; GFX8-NEXT: s_lshl_b32 s4, s7, s8
; GFX8-NEXT: s_sub_i32 s6, s6, s9
-; GFX8-NEXT: s_cmp_lt_i32 s5, s12
-; GFX8-NEXT: s_cselect_b32 s5, s5, s12
-; GFX8-NEXT: s_sub_i32 s5, s5, s10
+; GFX8-NEXT: s_min_i32 s5, s5, s12
; GFX8-NEXT: s_sext_i32_i16 s6, s6
; GFX8-NEXT: s_sext_i32_i16 s4, s4
-; GFX8-NEXT: s_cmp_gt_i32 s6, s4
-; GFX8-NEXT: s_cselect_b32 s4, s6, s4
+; GFX8-NEXT: s_sub_i32 s5, s5, s10
+; GFX8-NEXT: s_max_i32 s4, s6, s4
; GFX8-NEXT: s_sext_i32_i16 s4, s4
; GFX8-NEXT: s_sext_i32_i16 s5, s5
-; GFX8-NEXT: s_cmp_lt_i32 s4, s5
-; GFX8-NEXT: s_cselect_b32 s4, s4, s5
+; GFX8-NEXT: s_sext_i32_i16 s1, s1
+; GFX8-NEXT: s_min_i32 s4, s4, s5
; GFX8-NEXT: s_sub_i32 s3, s3, s4
+; GFX8-NEXT: s_sext_i32_i16 s0, s0
+; GFX8-NEXT: s_ashr_i32 s1, s1, s8
; GFX8-NEXT: s_movk_i32 s4, 0xff
+; GFX8-NEXT: s_ashr_i32 s0, s0, s8
+; GFX8-NEXT: s_sext_i32_i16 s2, s2
; GFX8-NEXT: s_and_b32 s1, s1, s4
+; GFX8-NEXT: s_ashr_i32 s2, s2, s8
; GFX8-NEXT: s_and_b32 s0, s0, s4
; GFX8-NEXT: s_lshl_b32 s1, s1, 8
; GFX8-NEXT: s_sext_i32_i16 s3, s3
@@ -1046,17 +982,13 @@ define amdgpu_ps i24 @s_ssubsat_i24(i24 inreg %lhs, i24 inreg %rhs) {
; GFX6-LABEL: s_ssubsat_i24:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_lshl_b32 s0, s0, 8
+; GFX6-NEXT: s_max_i32 s2, s0, -1
+; GFX6-NEXT: s_min_i32 s3, s0, -1
; GFX6-NEXT: s_lshl_b32 s1, s1, 8
-; GFX6-NEXT: s_cmp_gt_i32 s0, -1
-; GFX6-NEXT: s_cselect_b32 s2, s0, -1
; GFX6-NEXT: s_sub_i32 s2, s2, 0x7fffffff
-; GFX6-NEXT: s_cmp_lt_i32 s0, -1
-; GFX6-NEXT: s_cselect_b32 s3, s0, -1
; GFX6-NEXT: s_sub_i32 s3, s3, 0x80000000
-; GFX6-NEXT: s_cmp_gt_i32 s2, s1
-; GFX6-NEXT: s_cselect_b32 s1, s2, s1
-; GFX6-NEXT: s_cmp_lt_i32 s1, s3
-; GFX6-NEXT: s_cselect_b32 s1, s1, s3
+; GFX6-NEXT: s_max_i32 s1, s2, s1
+; GFX6-NEXT: s_min_i32 s1, s1, s3
; GFX6-NEXT: s_sub_i32 s0, s0, s1
; GFX6-NEXT: s_ashr_i32 s0, s0, 8
; GFX6-NEXT: ; return to shader part epilog
@@ -1145,31 +1077,23 @@ define i32 @v_ssubsat_i32(i32 %lhs, i32 %rhs) {
define amdgpu_ps i32 @s_ssubsat_i32(i32 inreg %lhs, i32 inreg %rhs) {
; GFX6-LABEL: s_ssubsat_i32:
; GFX6: ; %bb.0:
-; GFX6-NEXT: s_cmp_gt_i32 s0, -1
-; GFX6-NEXT: s_cselect_b32 s2, s0, -1
+; GFX6-NEXT: s_max_i32 s2, s0, -1
+; GFX6-NEXT: s_min_i32 s3, s0, -1
; GFX6-NEXT: s_sub_i32 s2, s2, 0x7fffffff
-; GFX6-NEXT: s_cmp_lt_i32 s0, -1
-; GFX6-NEXT: s_cselect_b32 s3, s0, -1
; GFX6-NEXT: s_sub_i32 s3, s3, 0x80000000
-; GFX6-NEXT: s_cmp_gt_i32 s2, s1
-; GFX6-NEXT: s_cselect_b32 s1, s2, s1
-; GFX6-NEXT: s_cmp_lt_i32 s1, s3
-; GFX6-NEXT: s_cselect_b32 s1, s1, s3
+; GFX6-NEXT: s_max_i32 s1, s2, s1
+; GFX6-NEXT: s_min_i32 s1, s1, s3
; GFX6-NEXT: s_sub_i32 s0, s0, s1
; GFX6-NEXT: ; return to shader part epilog
;
; GFX8-LABEL: s_ssubsat_i32:
; GFX8: ; %bb.0:
-; GFX8-NEXT: s_cmp_gt_i32 s0, -1
-; GFX8-NEXT: s_cselect_b32 s2, s0, -1
+; GFX8-NEXT: s_max_i32 s2, s0, -1
+; GFX8-NEXT: s_min_i32 s3, s0, -1
; GFX8-NEXT: s_sub_i32 s2, s2, 0x7fffffff
-; GFX8-NEXT: s_cmp_lt_i32 s0, -1
-; GFX8-NEXT: s_cselect_b32 s3, s0, -1
; GFX8-NEXT: s_sub_i32 s3, s3, 0x80000000
-; GFX8-NEXT: s_cmp_gt_i32 s2, s1
-; GFX8-NEXT: s_cselect_b32 s1, s2, s1
-; GFX8-NEXT: s_cmp_lt_i32 s1, s3
-; GFX8-NEXT: s_cselect_b32 s1, s1, s3
+; GFX8-NEXT: s_max_i32 s1, s2, s1
+; GFX8-NEXT: s_min_i32 s1, s1, s3
; GFX8-NEXT: s_sub_i32 s0, s0, s1
; GFX8-NEXT: ; return to shader part epilog
;
@@ -1192,11 +1116,9 @@ define amdgpu_ps i32 @s_ssubsat_i32(i32 inreg %lhs, i32 inreg %rhs) {
define amdgpu_ps float @ssubsat_i32_sv(i32 inreg %lhs, i32 %rhs) {
; GFX6-LABEL: ssubsat_i32_sv:
; GFX6: ; %bb.0:
-; GFX6-NEXT: s_cmp_gt_i32 s0, -1
-; GFX6-NEXT: s_cselect_b32 s1, s0, -1
+; GFX6-NEXT: s_max_i32 s1, s0, -1
+; GFX6-NEXT: s_min_i32 s2, s0, -1
; GFX6-NEXT: s_sub_i32 s1, s1, 0x7fffffff
-; GFX6-NEXT: s_cmp_lt_i32 s0, -1
-; GFX6-NEXT: s_cselect_b32 s2, s0, -1
; GFX6-NEXT: s_sub_i32 s2, s2, 0x80000000
; GFX6-NEXT: v_max_i32_e32 v0, s1, v0
; GFX6-NEXT: v_min_i32_e32 v0, s2, v0
@@ -1205,11 +1127,9 @@ define amdgpu_ps float @ssubsat_i32_sv(i32 inreg %lhs, i32 %rhs) {
;
; GFX8-LABEL: ssubsat_i32_sv:
; GFX8: ; %bb.0:
-; GFX8-NEXT: s_cmp_gt_i32 s0, -1
-; GFX8-NEXT: s_cselect_b32 s1, s0, -1
+; GFX8-NEXT: s_max_i32 s1, s0, -1
+; GFX8-NEXT: s_min_i32 s2, s0, -1
; GFX8-NEXT: s_sub_i32 s1, s1, 0x7fffffff
-; GFX8-NEXT: s_cmp_lt_i32 s0, -1
-; GFX8-NEXT: s_cselect_b32 s2, s0, -1
; GFX8-NEXT: s_sub_i32 s2, s2, 0x80000000
; GFX8-NEXT: v_max_i32_e32 v0, s1, v0
; GFX8-NEXT: v_min_i32_e32 v0, s2, v0
@@ -1331,57 +1251,41 @@ define <2 x i32> @v_ssubsat_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
define amdgpu_ps <2 x i32> @s_ssubsat_v2i32(<2 x i32> inreg %lhs, <2 x i32> inreg %rhs) {
; GFX6-LABEL: s_ssubsat_v2i32:
; GFX6: ; %bb.0:
-; GFX6-NEXT: s_cmp_gt_i32 s0, -1
; GFX6-NEXT: s_brev_b32 s4, -2
-; GFX6-NEXT: s_cselect_b32 s6, s0, -1
+; GFX6-NEXT: s_max_i32 s6, s0, -1
; GFX6-NEXT: s_sub_i32 s6, s6, s4
-; GFX6-NEXT: s_cmp_lt_i32 s0, -1
; GFX6-NEXT: s_brev_b32 s5, 1
-; GFX6-NEXT: s_cselect_b32 s7, s0, -1
+; GFX6-NEXT: s_min_i32 s7, s0, -1
; GFX6-NEXT: s_sub_i32 s7, s7, s5
-; GFX6-NEXT: s_cmp_gt_i32 s6, s2
-; GFX6-NEXT: s_cselect_b32 s2, s6, s2
-; GFX6-NEXT: s_cmp_lt_i32 s2, s7
-; GFX6-NEXT: s_cselect_b32 s2, s2, s7
+; GFX6-NEXT: s_max_i32 s2, s6, s2
+; GFX6-NEXT: s_min_i32 s2, s2, s7
; GFX6-NEXT: s_sub_i32 s0, s0, s2
-; GFX6-NEXT: s_cmp_gt_i32 s1, -1
-; GFX6-NEXT: s_cselect_b32 s2, s1, -1
+; GFX6-NEXT: s_max_i32 s2, s1, -1
; GFX6-NEXT: s_sub_i32 s2, s2, s4
-; GFX6-NEXT: s_cmp_lt_i32 s1, -1
-; GFX6-NEXT: s_cselect_b32 s4, s1, -1
+; GFX6-NEXT: s_min_i32 s4, s1, -1
; GFX6-NEXT: s_sub_i32 s4, s4, s5
-; GFX6-NEXT: s_cmp_gt_i32 s2, s3
-; GFX6-NEXT: s_cselect_b32 s2, s2, s3
-; GFX6-NEXT: s_cmp_lt_i32 s2, s4
-; GFX6-NEXT: s_cselect_b32 s2, s2, s4
+; GFX6-NEXT: s_max_i32 s2, s2, s3
+; GFX6-NEXT: s_min_i32 s2, s2, s4
; GFX6-NEXT: s_sub_i32 s1, s1, s2
; GFX6-NEXT: ; return to shader part epilog
;
; GFX8-LABEL: s_ssubsat_v2i32:
; GFX8: ; %bb.0:
-; GFX8-NEXT: s_cmp_gt_i32 s0, -1
; GFX8-NEXT: s_brev_b32 s4, -2
-; GFX8-NEXT: s_cselect_b32 s6, s0, -1
+; GFX8-NEXT: s_max_i32 s6, s0, -1
; GFX8-NEXT: s_sub_i32 s6, s6, s4
-; GFX8-NEXT: s_cmp_lt_i32 s0, -1
; GFX8-NEXT: s_brev_b32 s5, 1
-; GFX8-NEXT: s_cselect_b32 s7, s0, -1
+; GFX8-NEXT: s_min_i32 s7, s0, -1
; GFX8-NEXT: s_sub_i32 s7, s7, s5
-; GFX8-NEXT: s_cmp_gt_i32 s6, s2
-; GFX8-NEXT: s_cselect_b32 s2, s6, s2
-; GFX8-NEXT: s_cmp_lt_i32 s2, s7
-; GFX8-NEXT: s_cselect_b32 s2, s2, s7
+; GFX8-NEXT: s_max_i32 s2, s6, s2
+; GFX8-NEXT: s_min_i32 s2, s2, s7
; GFX8-NEXT: s_sub_i32 s0, s0, s2
-; GFX8-NEXT: s_cmp_gt_i32 s1, -1
-; GFX8-NEXT: s_cselect_b32 s2, s1, -1
+; GFX8-NEXT: s_max_i32 s2, s1, -1
; GFX8-NEXT: s_sub_i32 s2, s2, s4
-; GFX8-NEXT: s_cmp_lt_i32 s1, -1
-; GFX8-NEXT: s_cselect_b32 s4, s1, -1
+; GFX8-NEXT: s_min_i32 s4, s1, -1
; GFX8-NEXT: s_sub_i32 s4, s4, s5
-; GFX8-NEXT: s_cmp_gt_i32 s2, s3
-; GFX8-NEXT: s_cselect_b32 s2, s2, s3
-; GFX8-NEXT: s_cmp_lt_i32 s2, s4
-; GFX8-NEXT: s_cselect_b32 s2, s2, s4
+; GFX8-NEXT: s_max_i32 s2, s2, s3
+; GFX8-NEXT: s_min_i32 s2, s2, s4
; GFX8-NEXT: s_sub_i32 s1, s1, s2
; GFX8-NEXT: ; return to shader part epilog
;
@@ -1486,79 +1390,55 @@ define <3 x i32> @v_ssubsat_v3i32(<3 x i32> %lhs, <3 x i32> %rhs) {
define amdgpu_ps <3 x i32> @s_ssubsat_v3i32(<3 x i32> inreg %lhs, <3 x i32> inreg %rhs) {
; GFX6-LABEL: s_ssubsat_v3i32:
; GFX6: ; %bb.0:
-; GFX6-NEXT: s_cmp_gt_i32 s0, -1
; GFX6-NEXT: s_brev_b32 s6, -2
-; GFX6-NEXT: s_cselect_b32 s8, s0, -1
+; GFX6-NEXT: s_max_i32 s8, s0, -1
; GFX6-NEXT: s_sub_i32 s8, s8, s6
-; GFX6-NEXT: s_cmp_lt_i32 s0, -1
; GFX6-NEXT: s_brev_b32 s7, 1
-; GFX6-NEXT: s_cselect_b32 s9, s0, -1
+; GFX6-NEXT: s_min_i32 s9, s0, -1
+; GFX6-NEXT: s_max_i32 s3, s8, s3
; GFX6-NEXT: s_sub_i32 s9, s9, s7
-; GFX6-NEXT: s_cmp_gt_i32 s8, s3
-; GFX6-NEXT: s_cselect_b32 s3, s8, s3
-; GFX6-NEXT: s_cmp_lt_i32 s3, s9
-; GFX6-NEXT: s_cselect_b32 s3, s3, s9
+; GFX6-NEXT: s_min_i32 s3, s3, s9
; GFX6-NEXT: s_sub_i32 s0, s0, s3
-; GFX6-NEXT: s_cmp_gt_i32 s1, -1
-; GFX6-NEXT: s_cselect_b32 s3, s1, -1
+; GFX6-NEXT: s_max_i32 s3, s1, -1
; GFX6-NEXT: s_sub_i32 s3, s3, s6
-; GFX6-NEXT: s_cmp_lt_i32 s1, -1
-; GFX6-NEXT: s_cselect_b32 s8, s1, -1
+; GFX6-NEXT: s_min_i32 s8, s1, -1
+; GFX6-NEXT: s_max_i32 s3, s3, s4
; GFX6-NEXT: s_sub_i32 s8, s8, s7
-; GFX6-NEXT: s_cmp_gt_i32 s3, s4
-; GFX6-NEXT: s_cselect_b32 s3, s3, s4
-; GFX6-NEXT: s_cmp_lt_i32 s3, s8
-; GFX6-NEXT: s_cselect_b32 s3, s3, s8
+; GFX6-NEXT: s_min_i32 s3, s3, s8
; GFX6-NEXT: s_sub_i32 s1, s1, s3
-; GFX6-NEXT: s_cmp_gt_i32 s2, -1
-; GFX6-NEXT: s_cselect_b32 s3, s2, -1
+; GFX6-NEXT: s_max_i32 s3, s2, -1
; GFX6-NEXT: s_sub_i32 s3, s3, s6
-; GFX6-NEXT: s_cmp_lt_i32 s2, -1
-; GFX6-NEXT: s_cselect_b32 s4, s2, -1
+; GFX6-NEXT: s_min_i32 s4, s2, -1
; GFX6-NEXT: s_sub_i32 s4, s4, s7
-; GFX6-NEXT: s_cmp_gt_i32 s3, s5
-; GFX6-NEXT: s_cselect_b32 s3, s3, s5
-; GFX6-NEXT: s_cmp_lt_i32 s3, s4
-; GFX6-NEXT: s_cselect_b32 s3, s3, s4
+; GFX6-NEXT: s_max_i32 s3, s3, s5
+; GFX6-NEXT: s_min_i32 s3, s3, s4
; GFX6-NEXT: s_sub_i32 s2, s2, s3
; GFX6-NEXT: ; return to shader part epilog
;
; GFX8-LABEL: s_ssubsat_v3i32:
; GFX8: ; %bb.0:
-; GFX8-NEXT: s_cmp_gt_i32 s0, -1
; GFX8-NEXT: s_brev_b32 s6, -2
-; GFX8-NEXT: s_cselect_b32 s8, s0, -1
+; GFX8-NEXT: s_max_i32 s8, s0, -1
; GFX8-NEXT: s_sub_i32 s8, s8, s6
-; GFX8-NEXT: s_cmp_lt_i32 s0, -1
; GFX8-NEXT: s_brev_b32 s7, 1
-; GFX8-NEXT: s_cselect_b32 s9, s0, -1
+; GFX8-NEXT: s_min_i32 s9, s0, -1
+; GFX8-NEXT: s_max_i32 s3, s8, s3
; GFX8-NEXT: s_sub_i32 s9, s9, s7
-; GFX8-NEXT: s_cmp_gt_i32 s8, s3
-; GFX8-NEXT: s_cselect_b32 s3, s8, s3
-; GFX8-NEXT: s_cmp_lt_i32 s3, s9
-; GFX8-NEXT: s_cselect_b32 s3, s3, s9
+; GFX8-NEXT: s_min_i32 s3, s3, s9
; GFX8-NEXT: s_sub_i32 s0, s0, s3
-; GFX8-NEXT: s_cmp_gt_i32 s1, -1
-; GFX8-NEXT: s_cselect_b32 s3, s1, -1
+; GFX8-NEXT: s_max_i32 s3, s1, -1
; GFX8-NEXT: s_sub_i32 s3, s3, s6
-; GFX8-NEXT: s_cmp_lt_i32 s1, -1
-; GFX8-NEXT: s_cselect_b32 s8, s1, -1
+; GFX8-NEXT: s_min_i32 s8, s1, -1
+; GFX8-NEXT: s_max_i32 s3, s3, s4
; GFX8-NEXT: s_sub_i32 s8, s8, s7
-; GFX8-NEXT: s_cmp_gt_i32 s3, s4
-; GFX8-NEXT: s_cselect_b32 s3, s3, s4
-; GFX8-NEXT: s_cmp_lt_i32 s3, s8
-; GFX8-NEXT: s_cselect_b32 s3, s3, s8
+; GFX8-NEXT: s_min_i32 s3, s3, s8
; GFX8-NEXT: s_sub_i32 s1, s1, s3
-; GFX8-NEXT: s_cmp_gt_i32 s2, -1
-; GFX8-NEXT: s_cselect_b32 s3, s2, -1
+; GFX8-NEXT: s_max_i32 s3, s2, -1
; GFX8-NEXT: s_sub_i32 s3, s3, s6
-; GFX8-NEXT: s_cmp_lt_i32 s2, -1
-; GFX8-NEXT: s_cselect_b32 s4, s2, -1
+; GFX8-NEXT: s_min_i32 s4, s2, -1
; GFX8-NEXT: s_sub_i32 s4, s4, s7
-; GFX8-NEXT: s_cmp_gt_i32 s3, s5
-; GFX8-NEXT: s_cselect_b32 s3, s3, s5
-; GFX8-NEXT: s_cmp_lt_i32 s3, s4
-; GFX8-NEXT: s_cselect_b32 s3, s3, s4
+; GFX8-NEXT: s_max_i32 s3, s3, s5
+; GFX8-NEXT: s_min_i32 s3, s3, s4
; GFX8-NEXT: s_sub_i32 s2, s2, s3
; GFX8-NEXT: ; return to shader part epilog
;
@@ -1684,101 +1564,69 @@ define <4 x i32> @v_ssubsat_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
define amdgpu_ps <4 x i32> @s_ssubsat_v4i32(<4 x i32> inreg %lhs, <4 x i32> inreg %rhs) {
; GFX6-LABEL: s_ssubsat_v4i32:
; GFX6: ; %bb.0:
-; GFX6-NEXT: s_cmp_gt_i32 s0, -1
; GFX6-NEXT: s_brev_b32 s8, -2
-; GFX6-NEXT: s_cselect_b32 s10, s0, -1
+; GFX6-NEXT: s_max_i32 s10, s0, -1
; GFX6-NEXT: s_sub_i32 s10, s10, s8
-; GFX6-NEXT: s_cmp_lt_i32 s0, -1
; GFX6-NEXT: s_brev_b32 s9, 1
-; GFX6-NEXT: s_cselect_b32 s11, s0, -1
+; GFX6-NEXT: s_min_i32 s11, s0, -1
+; GFX6-NEXT: s_max_i32 s4, s10, s4
; GFX6-NEXT: s_sub_i32 s11, s11, s9
-; GFX6-NEXT: s_cmp_gt_i32 s10, s4
-; GFX6-NEXT: s_cselect_b32 s4, s10, s4
-; GFX6-NEXT: s_cmp_lt_i32 s4, s11
-; GFX6-NEXT: s_cselect_b32 s4, s4, s11
+; GFX6-NEXT: s_min_i32 s4, s4, s11
; GFX6-NEXT: s_sub_i32 s0, s0, s4
-; GFX6-NEXT: s_cmp_gt_i32 s1, -1
-; GFX6-NEXT: s_cselect_b32 s4, s1, -1
+; GFX6-NEXT: s_max_i32 s4, s1, -1
; GFX6-NEXT: s_sub_i32 s4, s4, s8
-; GFX6-NEXT: s_cmp_lt_i32 s1, -1
-; GFX6-NEXT: s_cselect_b32 s10, s1, -1
+; GFX6-NEXT: s_min_i32 s10, s1, -1
+; GFX6-NEXT: s_max_i32 s4, s4, s5
; GFX6-NEXT: s_sub_i32 s10, s10, s9
-; GFX6-NEXT: s_cmp_gt_i32 s4, s5
-; GFX6-NEXT: s_cselect_b32 s4, s4, s5
-; GFX6-NEXT: s_cmp_lt_i32 s4, s10
-; GFX6-NEXT: s_cselect_b32 s4, s4, s10
+; GFX6-NEXT: s_min_i32 s4, s4, s10
; GFX6-NEXT: s_sub_i32 s1, s1, s4
-; GFX6-NEXT: s_cmp_gt_i32 s2, -1
-; GFX6-NEXT: s_cselect_b32 s4, s2, -1
+; GFX6-NEXT: s_max_i32 s4, s2, -1
; GFX6-NEXT: s_sub_i32 s4, s4, s8
-; GFX6-NEXT: s_cmp_lt_i32 s2, -1
-; GFX6-NEXT: s_cselect_b32 s5, s2, -1
+; GFX6-NEXT: s_min_i32 s5, s2, -1
; GFX6-NEXT: s_sub_i32 s5, s5, s9
-; GFX6-NEXT: s_cmp_gt_i32 s4, s6
-; GFX6-NEXT: s_cselect_b32 s4, s4, s6
-; GFX6-NEXT: s_cmp_lt_i32 s4, s5
-; GFX6-NEXT: s_cselect_b32 s4, s4, s5
+; GFX6-NEXT: s_max_i32 s4, s4, s6
+; GFX6-NEXT: s_min_i32 s4, s4, s5
; GFX6-NEXT: s_sub_i32 s2, s2, s4
-; GFX6-NEXT: s_cmp_gt_i32 s3, -1
-; GFX6-NEXT: s_cselect_b32 s4, s3, -1
+; GFX6-NEXT: s_max_i32 s4, s3, -1
; GFX6-NEXT: s_sub_i32 s4, s4, s8
-; GFX6-NEXT: s_cmp_lt_i32 s3, -1
-; GFX6-NEXT: s_cselect_b32 s5, s3, -1
+; GFX6-NEXT: s_min_i32 s5, s3, -1
; GFX6-NEXT: s_sub_i32 s5, s5, s9
-; GFX6-NEXT: s_cmp_gt_i32 s4, s7
-; GFX6-NEXT: s_cselect_b32 s4, s4, s7
-; GFX6-NEXT: s_cmp_lt_i32 s4, s5
-; GFX6-NEXT: s_cselect_b32 s4, s4, s5
+; GFX6-NEXT: s_max_i32 s4, s4, s7
+; GFX6-NEXT: s_min_i32 s4, s4, s5
; GFX6-NEXT: s_sub_i32 s3, s3, s4
; GFX6-NEXT: ; return to shader part epilog
;
; GFX8-LABEL: s_ssubsat_v4i32:
; GFX8: ; %bb.0:
-; GFX8-NEXT: s_cmp_gt_i32 s0, -1
; GFX8-NEXT: s_brev_b32 s8, -2
-; GFX8-NEXT: s_cselect_b32 s10, s0, -1
+; GFX8-NEXT: s_max_i32 s10, s0, -1
; GFX8-NEXT: s_sub_i32 s10, s10, s8
-; GFX8-NEXT: s_cmp_lt_i32 s0, -1
; GFX8-NEXT: s_brev_b32 s9, 1
-; GFX8-NEXT: s_cselect_b32 s11, s0, -1
+; GFX8-NEXT: s_min_i32 s11, s0, -1
+; GFX8-NEXT: s_max_i32 s4, s10, s4
; GFX8-NEXT: s_sub_i32 s11, s11, s9
-; GFX8-NEXT: s_cmp_gt_i32 s10, s4
-; GFX8-NEXT: s_cselect_b32 s4, s10, s4
-; GFX8-NEXT: s_cmp_lt_i32 s4, s11
-; GFX8-NEXT: s_cselect_b32 s4, s4, s11
+; GFX8-NEXT: s_min_i32 s4, s4, s11
; GFX8-NEXT: s_sub_i32 s0, s0, s4
-; GFX8-NEXT: s_cmp_gt_i32 s1, -1
-; GFX8-NEXT: s_cselect_b32 s4, s1, -1
+; GFX8-NEXT: s_max_i32 s4, s1, -1
; GFX8-NEXT: s_sub_i32 s4, s4, s8
-; GFX8-NEXT: s_cmp_lt_i32 s1, -1
-; GFX8-NEXT: s_cselect_b32 s10, s1, -1
+; GFX8-NEXT: s_min_i32 s10, s1, -1
+; GFX8-NEXT: s_max_i32 s4, s4, s5
; GFX8-NEXT: s_sub_i32 s10, s10, s9
-; GFX8-NEXT: s_cmp_gt_i32 s4, s5
-; GFX8-NEXT: s_cselect_b32 s4, s4, s5
-; GFX8-NEXT: s_cmp_lt_i32 s4, s10
-; GFX8-NEXT: s_cselect_b32 s4, s4, s10
+; GFX8-NEXT: s_min_i32 s4, s4, s10
; GFX8-NEXT: s_sub_i32 s1, s1, s4
-; GFX8-NEXT: s_cmp_gt_i32 s2, -1
-; GFX8-NEXT: s_cselect_b32 s4, s2, -1
+; GFX8-NEXT: s_max_i32 s4, s2, -1
; GFX8-NEXT: s_sub_i32 s4, s4, s8
-; GFX8-NEXT: s_cmp_lt_i32 s2, -1
-; GFX8-NEXT: s_cselect_b32 s5, s2, -1
+; GFX8-NEXT: s_min_i32 s5, s2, -1
; GFX8-NEXT: s_sub_i32 s5, s5, s9
-; GFX8-NEXT: s_cmp_gt_i32 s4, s6
-; GFX8-NEXT: s_cselect_b32 s4, s4, s6
-; GFX8-NEXT: s_cmp_lt_i32 s4, s5
-; GFX8-NEXT: s_cselect_b32 s4, s4, s5
+; GFX8-NEXT: s_max_i32 s4, s4, s6
+; GFX8-NEXT: s_min_i32 s4, s4, s5
; GFX8-NEXT: s_sub_i32 s2, s2, s4
-; GFX8-NEXT: s_cmp_gt_i32 s3, -1
-; GFX8-NEXT: s_cselect_b32 s4, s3, -1
+; GFX8-NEXT: s_max_i32 s4, s3, -1
; GFX8-NEXT: s_sub_i32 s4, s4, s8
-; GFX8-NEXT: s_cmp_lt_i32 s3, -1
-; GFX8-NEXT: s_cselect_b32 s5, s3, -1
+; GFX8-NEXT: s_min_i32 s5, s3, -1
; GFX8-NEXT: s_sub_i32 s5, s5, s9
-; GFX8-NEXT: s_cmp_gt_i32 s4, s7
-; GFX8-NEXT: s_cselect_b32 s4, s4, s7
-; GFX8-NEXT: s_cmp_lt_i32 s4, s5
-; GFX8-NEXT: s_cselect_b32 s4, s4, s5
+; GFX8-NEXT: s_max_i32 s4, s4, s7
+; GFX8-NEXT: s_min_i32 s4, s4, s5
; GFX8-NEXT: s_sub_i32 s3, s3, s4
; GFX8-NEXT: ; return to shader part epilog
;
@@ -1929,123 +1777,83 @@ define <5 x i32> @v_ssubsat_v5i32(<5 x i32> %lhs, <5 x i32> %rhs) {
define amdgpu_ps <5 x i32> @s_ssubsat_v5i32(<5 x i32> inreg %lhs, <5 x i32> inreg %rhs) {
; GFX6-LABEL: s_ssubsat_v5i32:
; GFX6: ; %bb.0:
-; GFX6-NEXT: s_cmp_gt_i32 s0, -1
; GFX6-NEXT: s_brev_b32 s10, -2
-; GFX6-NEXT: s_cselect_b32 s12, s0, -1
+; GFX6-NEXT: s_max_i32 s12, s0, -1
; GFX6-NEXT: s_sub_i32 s12, s12, s10
-; GFX6-NEXT: s_cmp_lt_i32 s0, -1
; GFX6-NEXT: s_brev_b32 s11, 1
-; GFX6-NEXT: s_cselect_b32 s13, s0, -1
+; GFX6-NEXT: s_min_i32 s13, s0, -1
+; GFX6-NEXT: s_max_i32 s5, s12, s5
; GFX6-NEXT: s_sub_i32 s13, s13, s11
-; GFX6-NEXT: s_cmp_gt_i32 s12, s5
-; GFX6-NEXT: s_cselect_b32 s5, s12, s5
-; GFX6-NEXT: s_cmp_lt_i32 s5, s13
-; GFX6-NEXT: s_cselect_b32 s5, s5, s13
+; GFX6-NEXT: s_min_i32 s5, s5, s13
; GFX6-NEXT: s_sub_i32 s0, s0, s5
-; GFX6-NEXT: s_cmp_gt_i32 s1, -1
-; GFX6-NEXT: s_cselect_b32 s5, s1, -1
+; GFX6-NEXT: s_max_i32 s5, s1, -1
; GFX6-NEXT: s_sub_i32 s5, s5, s10
-; GFX6-NEXT: s_cmp_lt_i32 s1, -1
-; GFX6-NEXT: s_cselect_b32 s12, s1, -1
+; GFX6-NEXT: s_min_i32 s12, s1, -1
+; GFX6-NEXT: s_max_i32 s5, s5, s6
; GFX6-NEXT: s_sub_i32 s12, s12, s11
-; GFX6-NEXT: s_cmp_gt_i32 s5, s6
-; GFX6-NEXT: s_cselect_b32 s5, s5, s6
-; GFX6-NEXT: s_cmp_lt_i32 s5, s12
-; GFX6-NEXT: s_cselect_b32 s5, s5, s12
+; GFX6-NEXT: s_min_i32 s5, s5, s12
; GFX6-NEXT: s_sub_i32 s1, s1, s5
-; GFX6-NEXT: s_cmp_gt_i32 s2, -1
-; GFX6-NEXT: s_cselect_b32 s5, s2, -1
+; GFX6-NEXT: s_max_i32 s5, s2, -1
; GFX6-NEXT: s_sub_i32 s5, s5, s10
-; GFX6-NEXT: s_cmp_lt_i32 s2, -1
-; GFX6-NEXT: s_cselect_b32 s6, s2, -1
+; GFX6-NEXT: s_min_i32 s6, s2, -1
; GFX6-NEXT: s_sub_i32 s6, s6, s11
-; GFX6-NEXT: s_cmp_gt_i32 s5, s7
-; GFX6-NEXT: s_cselect_b32 s5, s5, s7
-; GFX6-NEXT: s_cmp_lt_i32 s5, s6
-; GFX6-NEXT: s_cselect_b32 s5, s5, s6
+; GFX6-NEXT: s_max_i32 s5, s5, s7
+; GFX6-NEXT: s_min_i32 s5, s5, s6
; GFX6-NEXT: s_sub_i32 s2, s2, s5
-; GFX6-NEXT: s_cmp_gt_i32 s3, -1
-; GFX6-NEXT: s_cselect_b32 s5, s3, -1
+; GFX6-NEXT: s_max_i32 s5, s3, -1
; GFX6-NEXT: s_sub_i32 s5, s5, s10
-; GFX6-NEXT: s_cmp_lt_i32 s3, -1
-; GFX6-NEXT: s_cselect_b32 s6, s3, -1
+; GFX6-NEXT: s_min_i32 s6, s3, -1
; GFX6-NEXT: s_sub_i32 s6, s6, s11
-; GFX6-NEXT: s_cmp_gt_i32 s5, s8
-; GFX6-NEXT: s_cselect_b32 s5, s5, s8
-; GFX6-NEXT: s_cmp_lt_i32 s5, s6
-; GFX6-NEXT: s_cselect_b32 s5, s5, s6
+; GFX6-NEXT: s_max_i32 s5, s5, s8
+; GFX6-NEXT: s_min_i32 s5, s5, s6
; GFX6-NEXT: s_sub_i32 s3, s3, s5
-; GFX6-NEXT: s_cmp_gt_i32 s4, -1
-; GFX6-NEXT: s_cselect_b32 s5, s4, -1
+; GFX6-NEXT: s_max_i32 s5, s4, -1
; GFX6-NEXT: s_sub_i32 s5, s5, s10
-; GFX6-NEXT: s_cmp_lt_i32 s4, -1
-; GFX6-NEXT: s_cselect_b32 s6, s4, -1
+; GFX6-NEXT: s_min_i32 s6, s4, -1
; GFX6-NEXT: s_sub_i32 s6, s6, s11
-; GFX6-NEXT: s_cmp_gt_i32 s5, s9
-; GFX6-NEXT: s_cselect_b32 s5, s5, s9
-; GFX6-NEXT: s_cmp_lt_i32 s5, s6
-; GFX6-NEXT: s_cselect_b32 s5, s5, s6
+; GFX6-NEXT: s_max_i32 s5, s5, s9
+; GFX6-NEXT: s_min_i32 s5, s5, s6
; GFX6-NEXT: s_sub_i32 s4, s4, s5
; GFX6-NEXT: ; return to shader part epilog
;
; GFX8-LABEL: s_ssubsat_v5i32:
; GFX8: ; %bb.0:
-; GFX8-NEXT: s_cmp_gt_i32 s0, -1
; GFX8-NEXT: s_brev_b32 s10, -2
-; GFX8-NEXT: s_cselect_b32 s12, s0, -1
+; GFX8-NEXT: s_max_i32 s12, s0, -1
; GFX8-NEXT: s_sub_i32 s12, s12, s10
-; GFX8-NEXT: s_cmp_lt_i32 s0, -1
; GFX8-NEXT: s_brev_b32 s11, 1
-; GFX8-NEXT: s_cselect_b32 s13, s0, -1
+; GFX8-NEXT: s_min_i32 s13, s0, -1
+; GFX8-NEXT: s_max_i32 s5, s12, s5
; GFX8-NEXT: s_sub_i32 s13, s13, s11
-; GFX8-NEXT: s_cmp_gt_i32 s12, s5
-; GFX8-NEXT: s_cselect_b32 s5, s12, s5
-; GFX8-NEXT: s_cmp_lt_i32 s5, s13
-; GFX8-NEXT: s_cselect_b32 s5, s5, s13
+; GFX8-NEXT: s_min_i32 s5, s5, s13
; GFX8-NEXT: s_sub_i32 s0, s0, s5
-; GFX8-NEXT: s_cmp_gt_i32 s1, -1
-; GFX8-NEXT: s_cselect_b32 s5, s1, -1
+; GFX8-NEXT: s_max_i32 s5, s1, -1
; GFX8-NEXT: s_sub_i32 s5, s5, s10
-; GFX8-NEXT: s_cmp_lt_i32 s1, -1
-; GFX8-NEXT: s_cselect_b32 s12, s1, -1
+; GFX8-NEXT: s_min_i32 s12, s1, -1
+; GFX8-NEXT: s_max_i32 s5, s5, s6
; GFX8-NEXT: s_sub_i32 s12, s12, s11
-; GFX8-NEXT: s_cmp_gt_i32 s5, s6
-; GFX8-NEXT: s_cselect_b32 s5, s5, s6
-; GFX8-NEXT: s_cmp_lt_i32 s5, s12
-; GFX8-NEXT: s_cselect_b32 s5, s5, s12
+; GFX8-NEXT: s_min_i32 s5, s5, s12
; GFX8-NEXT: s_sub_i32 s1, s1, s5
-; GFX8-NEXT: s_cmp_gt_i32 s2, -1
-; GFX8-NEXT: s_cselect_b32 s5, s2, -1
+; GFX8-NEXT: s_max_i32 s5, s2, -1
; GFX8-NEXT: s_sub_i32 s5, s5, s10
-; GFX8-NEXT: s_cmp_lt_i32 s2, -1
-; GFX8-NEXT: s_cselect_b32 s6, s2, -1
+; GFX8-NEXT: s_min_i32 s6, s2, -1
; GFX8-NEXT: s_sub_i32 s6, s6, s11
-; GFX8-NEXT: s_cmp_gt_i32 s5, s7
-; GFX8-NEXT: s_cselect_b32 s5, s5, s7
-; GFX8-NEXT: s_cmp_lt_i32 s5, s6
-; GFX8-NEXT: s_cselect_b32 s5, s5, s6
+; GFX8-NEXT: s_max_i32 s5, s5, s7
+; GFX8-NEXT: s_min_i32 s5, s5, s6
; GFX8-NEXT: s_sub_i32 s2, s2, s5
-; GFX8-NEXT: s_cmp_gt_i32 s3, -1
-; GFX8-NEXT: s_cselect_b32 s5, s3, -1
+; GFX8-NEXT: s_max_i32 s5, s3, -1
; GFX8-NEXT: s_sub_i32 s5, s5, s10
-; GFX8-NEXT: s_cmp_lt_i32 s3, -1
-; GFX8-NEXT: s_cselect_b32 s6, s3, -1
+; GFX8-NEXT: s_min_i32 s6, s3, -1
; GFX8-NEXT: s_sub_i32 s6, s6, s11
-; GFX8-NEXT: s_cmp_gt_i32 s5, s8
-; GFX8-NEXT: s_cselect_b32 s5, s5, s8
-; GFX8-NEXT: s_cmp_lt_i32 s5, s6
-; GFX8-NEXT: s_cselect_b32 s5, s5, s6
+; GFX8-NEXT: s_max_i32 s5, s5, s8
+; GFX8-NEXT: s_min_i32 s5, s5, s6
; GFX8-NEXT: s_sub_i32 s3, s3, s5
-; GFX8-NEXT: s_cmp_gt_i32 s4, -1
-; GFX8-NEXT: s_cselect_b32 s5, s4, -1
+; GFX8-NEXT: s_max_i32 s5, s4, -1
; GFX8-NEXT: s_sub_i32 s5, s5, s10
-; GFX8-NEXT: s_cmp_lt_i32 s4, -1
-; GFX8-NEXT: s_cselect_b32 s6, s4, -1
+; GFX8-NEXT: s_min_i32 s6, s4, -1
; GFX8-NEXT: s_sub_i32 s6, s6, s11
-; GFX8-NEXT: s_cmp_gt_i32 s5, s9
-; GFX8-NEXT: s_cselect_b32 s5, s5, s9
-; GFX8-NEXT: s_cmp_lt_i32 s5, s6
-; GFX8-NEXT: s_cselect_b32 s5, s5, s6
+; GFX8-NEXT: s_max_i32 s5, s5, s9
+; GFX8-NEXT: s_min_i32 s5, s5, s6
; GFX8-NEXT: s_sub_i32 s4, s4, s5
; GFX8-NEXT: ; return to shader part epilog
;
@@ -2377,365 +2185,237 @@ define <16 x i32> @v_ssubsat_v16i32(<16 x i32> %lhs, <16 x i32> %rhs) {
define amdgpu_ps <16 x i32> @s_ssubsat_v16i32(<16 x i32> inreg %lhs, <16 x i32> inreg %rhs) {
; GFX6-LABEL: s_ssubsat_v16i32:
; GFX6: ; %bb.0:
-; GFX6-NEXT: s_cmp_gt_i32 s0, -1
; GFX6-NEXT: s_brev_b32 s32, -2
-; GFX6-NEXT: s_cselect_b32 s34, s0, -1
+; GFX6-NEXT: s_max_i32 s34, s0, -1
; GFX6-NEXT: s_sub_i32 s34, s34, s32
-; GFX6-NEXT: s_cmp_lt_i32 s0, -1
; GFX6-NEXT: s_brev_b32 s33, 1
-; GFX6-NEXT: s_cselect_b32 s35, s0, -1
+; GFX6-NEXT: s_min_i32 s35, s0, -1
+; GFX6-NEXT: s_max_i32 s16, s34, s16
; GFX6-NEXT: s_sub_i32 s35, s35, s33
-; GFX6-NEXT: s_cmp_gt_i32 s34, s16
-; GFX6-NEXT: s_cselect_b32 s16, s34, s16
-; GFX6-NEXT: s_cmp_lt_i32 s16, s35
-; GFX6-NEXT: s_cselect_b32 s16, s16, s35
+; GFX6-NEXT: s_min_i32 s16, s16, s35
; GFX6-NEXT: s_sub_i32 s0, s0, s16
-; GFX6-NEXT: s_cmp_gt_i32 s1, -1
-; GFX6-NEXT: s_cselect_b32 s16, s1, -1
+; GFX6-NEXT: s_max_i32 s16, s1, -1
; GFX6-NEXT: s_sub_i32 s16, s16, s32
-; GFX6-NEXT: s_cmp_lt_i32 s1, -1
-; GFX6-NEXT: s_cselect_b32 s34, s1, -1
+; GFX6-NEXT: s_min_i32 s34, s1, -1
+; GFX6-NEXT: s_max_i32 s16, s16, s17
; GFX6-NEXT: s_sub_i32 s34, s34, s33
-; GFX6-NEXT: s_cmp_gt_i32 s16, s17
-; GFX6-NEXT: s_cselect_b32 s16, s16, s17
-; GFX6-NEXT: s_cmp_lt_i32 s16, s34
-; GFX6-NEXT: s_cselect_b32 s16, s16, s34
+; GFX6-NEXT: s_min_i32 s16, s16, s34
; GFX6-NEXT: s_sub_i32 s1, s1, s16
-; GFX6-NEXT: s_cmp_gt_i32 s2, -1
-; GFX6-NEXT: s_cselect_b32 s16, s2, -1
+; GFX6-NEXT: s_max_i32 s16, s2, -1
; GFX6-NEXT: s_sub_i32 s16, s16, s32
-; GFX6-NEXT: s_cmp_lt_i32 s2, -1
-; GFX6-NEXT: s_cselect_b32 s17, s2, -1
+; GFX6-NEXT: s_min_i32 s17, s2, -1
; GFX6-NEXT: s_sub_i32 s17, s17, s33
-; GFX6-NEXT: s_cmp_gt_i32 s16, s18
-; GFX6-NEXT: s_cselect_b32 s16, s16, s18
-; GFX6-NEXT: s_cmp_lt_i32 s16, s17
-; GFX6-NEXT: s_cselect_b32 s16, s16, s17
+; GFX6-NEXT: s_max_i32 s16, s16, s18
+; GFX6-NEXT: s_min_i32 s16, s16, s17
; GFX6-NEXT: s_sub_i32 s2, s2, s16
-; GFX6-NEXT: s_cmp_gt_i32 s3, -1
-; GFX6-NEXT: s_cselect_b32 s16, s3, -1
+; GFX6-NEXT: s_max_i32 s16, s3, -1
; GFX6-NEXT: s_sub_i32 s16, s16, s32
-; GFX6-NEXT: s_cmp_lt_i32 s3, -1
-; GFX6-NEXT: s_cselect_b32 s17, s3, -1
+; GFX6-NEXT: s_min_i32 s17, s3, -1
; GFX6-NEXT: s_sub_i32 s17, s17, s33
-; GFX6-NEXT: s_cmp_gt_i32 s16, s19
-; GFX6-NEXT: s_cselect_b32 s16, s16, s19
-; GFX6-NEXT: s_cmp_lt_i32 s16, s17
-; GFX6-NEXT: s_cselect_b32 s16, s16, s17
+; GFX6-NEXT: s_max_i32 s16, s16, s19
+; GFX6-NEXT: s_min_i32 s16, s16, s17
; GFX6-NEXT: s_sub_i32 s3, s3, s16
-; GFX6-NEXT: s_cmp_gt_i32 s4, -1
-; GFX6-NEXT: s_cselect_b32 s16, s4, -1
+; GFX6-NEXT: s_max_i32 s16, s4, -1
; GFX6-NEXT: s_sub_i32 s16, s16, s32
-; GFX6-NEXT: s_cmp_lt_i32 s4, -1
-; GFX6-NEXT: s_cselect_b32 s17, s4, -1
+; GFX6-NEXT: s_min_i32 s17, s4, -1
; GFX6-NEXT: s_sub_i32 s17, s17, s33
-; GFX6-NEXT: s_cmp_gt_i32 s16, s20
-; GFX6-NEXT: s_cselect_b32 s16, s16, s20
-; GFX6-NEXT: s_cmp_lt_i32 s16, s17
-; GFX6-NEXT: s_cselect_b32 s16, s16, s17
+; GFX6-NEXT: s_max_i32 s16, s16, s20
+; GFX6-NEXT: s_min_i32 s16, s16, s17
; GFX6-NEXT: s_sub_i32 s4, s4, s16
-; GFX6-NEXT: s_cmp_gt_i32 s5, -1
-; GFX6-NEXT: s_cselect_b32 s16, s5, -1
+; GFX6-NEXT: s_max_i32 s16, s5, -1
; GFX6-NEXT: s_sub_i32 s16, s16, s32
-; GFX6-NEXT: s_cmp_lt_i32 s5, -1
-; GFX6-NEXT: s_cselect_b32 s17, s5, -1
+; GFX6-NEXT: s_min_i32 s17, s5, -1
; GFX6-NEXT: s_sub_i32 s17, s17, s33
-; GFX6-NEXT: s_cmp_gt_i32 s16, s21
-; GFX6-NEXT: s_cselect_b32 s16, s16, s21
-; GFX6-NEXT: s_cmp_lt_i32 s16, s17
-; GFX6-NEXT: s_cselect_b32 s16, s16, s17
+; GFX6-NEXT: s_max_i32 s16, s16, s21
+; GFX6-NEXT: s_min_i32 s16, s16, s17
; GFX6-NEXT: s_sub_i32 s5, s5, s16
-; GFX6-NEXT: s_cmp_gt_i32 s6, -1
-; GFX6-NEXT: s_cselect_b32 s16, s6, -1
+; GFX6-NEXT: s_max_i32 s16, s6, -1
; GFX6-NEXT: s_sub_i32 s16, s16, s32
-; GFX6-NEXT: s_cmp_lt_i32 s6, -1
-; GFX6-NEXT: s_cselect_b32 s17, s6, -1
+; GFX6-NEXT: s_min_i32 s17, s6, -1
; GFX6-NEXT: s_sub_i32 s17, s17, s33
-; GFX6-NEXT: s_cmp_gt_i32 s16, s22
-; GFX6-NEXT: s_cselect_b32 s16, s16, s22
-; GFX6-NEXT: s_cmp_lt_i32 s16, s17
-; GFX6-NEXT: s_cselect_b32 s16, s16, s17
+; GFX6-NEXT: s_max_i32 s16, s16, s22
+; GFX6-NEXT: s_min_i32 s16, s16, s17
; GFX6-NEXT: s_sub_i32 s6, s6, s16
-; GFX6-NEXT: s_cmp_gt_i32 s7, -1
-; GFX6-NEXT: s_cselect_b32 s16, s7, -1
+; GFX6-NEXT: s_max_i32 s16, s7, -1
; GFX6-NEXT: s_sub_i32 s16, s16, s32
-; GFX6-NEXT: s_cmp_lt_i32 s7, -1
-; GFX6-NEXT: s_cselect_b32 s17, s7, -1
+; GFX6-NEXT: s_min_i32 s17, s7, -1
; GFX6-NEXT: s_sub_i32 s17, s17, s33
-; GFX6-NEXT: s_cmp_gt_i32 s16, s23
-; GFX6-NEXT: s_cselect_b32 s16, s16, s23
-; GFX6-NEXT: s_cmp_lt_i32 s16, s17
-; GFX6-NEXT: s_cselect_b32 s16, s16, s17
+; GFX6-NEXT: s_max_i32 s16, s16, s23
+; GFX6-NEXT: s_min_i32 s16, s16, s17
; GFX6-NEXT: s_sub_i32 s7, s7, s16
-; GFX6-NEXT: s_cmp_gt_i32 s8, -1
-; GFX6-NEXT: s_cselect_b32 s16, s8, -1
+; GFX6-NEXT: s_max_i32 s16, s8, -1
; GFX6-NEXT: s_sub_i32 s16, s16, s32
-; GFX6-NEXT: s_cmp_lt_i32 s8, -1
-; GFX6-NEXT: s_cselect_b32 s17, s8, -1
+; GFX6-NEXT: s_min_i32 s17, s8, -1
; GFX6-NEXT: s_sub_i32 s17, s17, s33
-; GFX6-NEXT: s_cmp_gt_i32 s16, s24
-; GFX6-NEXT: s_cselect_b32 s16, s16, s24
-; GFX6-NEXT: s_cmp_lt_i32 s16, s17
-; GFX6-NEXT: s_cselect_b32 s16, s16, s17
+; GFX6-NEXT: s_max_i32 s16, s16, s24
+; GFX6-NEXT: s_min_i32 s16, s16, s17
; GFX6-NEXT: s_sub_i32 s8, s8, s16
-; GFX6-NEXT: s_cmp_gt_i32 s9, -1
-; GFX6-NEXT: s_cselect_b32 s16, s9, -1
+; GFX6-NEXT: s_max_i32 s16, s9, -1
; GFX6-NEXT: s_sub_i32 s16, s16, s32
-; GFX6-NEXT: s_cmp_lt_i32 s9, -1
-; GFX6-NEXT: s_cselect_b32 s17, s9, -1
+; GFX6-NEXT: s_min_i32 s17, s9, -1
; GFX6-NEXT: s_sub_i32 s17, s17, s33
-; GFX6-NEXT: s_cmp_gt_i32 s16, s25
-; GFX6-NEXT: s_cselect_b32 s16, s16, s25
-; GFX6-NEXT: s_cmp_lt_i32 s16, s17
-; GFX6-NEXT: s_cselect_b32 s16, s16, s17
+; GFX6-NEXT: s_max_i32 s16, s16, s25
+; GFX6-NEXT: s_min_i32 s16, s16, s17
; GFX6-NEXT: s_sub_i32 s9, s9, s16
-; GFX6-NEXT: s_cmp_gt_i32 s10, -1
-; GFX6-NEXT: s_cselect_b32 s16, s10, -1
+; GFX6-NEXT: s_max_i32 s16, s10, -1
; GFX6-NEXT: s_sub_i32 s16, s16, s32
-; GFX6-NEXT: s_cmp_lt_i32 s10, -1
-; GFX6-NEXT: s_cselect_b32 s17, s10, -1
+; GFX6-NEXT: s_min_i32 s17, s10, -1
; GFX6-NEXT: s_sub_i32 s17, s17, s33
-; GFX6-NEXT: s_cmp_gt_i32 s16, s26
-; GFX6-NEXT: s_cselect_b32 s16, s16, s26
-; GFX6-NEXT: s_cmp_lt_i32 s16, s17
-; GFX6-NEXT: s_cselect_b32 s16, s16, s17
+; GFX6-NEXT: s_max_i32 s16, s16, s26
+; GFX6-NEXT: s_min_i32 s16, s16, s17
; GFX6-NEXT: s_sub_i32 s10, s10, s16
-; GFX6-NEXT: s_cmp_gt_i32 s11, -1
-; GFX6-NEXT: s_cselect_b32 s16, s11, -1
+; GFX6-NEXT: s_max_i32 s16, s11, -1
; GFX6-NEXT: s_sub_i32 s16, s16, s32
-; GFX6-NEXT: s_cmp_lt_i32 s11, -1
-; GFX6-NEXT: s_cselect_b32 s17, s11, -1
+; GFX6-NEXT: s_min_i32 s17, s11, -1
; GFX6-NEXT: s_sub_i32 s17, s17, s33
-; GFX6-NEXT: s_cmp_gt_i32 s16, s27
-; GFX6-NEXT: s_cselect_b32 s16, s16, s27
-; GFX6-NEXT: s_cmp_lt_i32 s16, s17
-; GFX6-NEXT: s_cselect_b32 s16, s16, s17
+; GFX6-NEXT: s_max_i32 s16, s16, s27
+; GFX6-NEXT: s_min_i32 s16, s16, s17
; GFX6-NEXT: s_sub_i32 s11, s11, s16
-; GFX6-NEXT: s_cmp_gt_i32 s12, -1
-; GFX6-NEXT: s_cselect_b32 s16, s12, -1
+; GFX6-NEXT: s_max_i32 s16, s12, -1
; GFX6-NEXT: s_sub_i32 s16, s16, s32
-; GFX6-NEXT: s_cmp_lt_i32 s12, -1
-; GFX6-NEXT: s_cselect_b32 s17, s12, -1
+; GFX6-NEXT: s_min_i32 s17, s12, -1
; GFX6-NEXT: s_sub_i32 s17, s17, s33
-; GFX6-NEXT: s_cmp_gt_i32 s16, s28
-; GFX6-NEXT: s_cselect_b32 s16, s16, s28
-; GFX6-NEXT: s_cmp_lt_i32 s16, s17
-; GFX6-NEXT: s_cselect_b32 s16, s16, s17
+; GFX6-NEXT: s_max_i32 s16, s16, s28
+; GFX6-NEXT: s_min_i32 s16, s16, s17
; GFX6-NEXT: s_sub_i32 s12, s12, s16
-; GFX6-NEXT: s_cmp_gt_i32 s13, -1
-; GFX6-NEXT: s_cselect_b32 s16, s13, -1
+; GFX6-NEXT: s_max_i32 s16, s13, -1
; GFX6-NEXT: s_sub_i32 s16, s16, s32
-; GFX6-NEXT: s_cmp_lt_i32 s13, -1
-; GFX6-NEXT: s_cselect_b32 s17, s13, -1
+; GFX6-NEXT: s_min_i32 s17, s13, -1
; GFX6-NEXT: s_sub_i32 s17, s17, s33
-; GFX6-NEXT: s_cmp_gt_i32 s16, s29
-; GFX6-NEXT: s_cselect_b32 s16, s16, s29
-; GFX6-NEXT: s_cmp_lt_i32 s16, s17
-; GFX6-NEXT: s_cselect_b32 s16, s16, s17
+; GFX6-NEXT: s_max_i32 s16, s16, s29
+; GFX6-NEXT: s_min_i32 s16, s16, s17
; GFX6-NEXT: s_sub_i32 s13, s13, s16
-; GFX6-NEXT: s_cmp_gt_i32 s14, -1
-; GFX6-NEXT: s_cselect_b32 s16, s14, -1
+; GFX6-NEXT: s_max_i32 s16, s14, -1
; GFX6-NEXT: s_sub_i32 s16, s16, s32
-; GFX6-NEXT: s_cmp_lt_i32 s14, -1
-; GFX6-NEXT: s_cselect_b32 s17, s14, -1
+; GFX6-NEXT: s_min_i32 s17, s14, -1
; GFX6-NEXT: s_sub_i32 s17, s17, s33
-; GFX6-NEXT: s_cmp_gt_i32 s16, s30
-; GFX6-NEXT: s_cselect_b32 s16, s16, s30
-; GFX6-NEXT: s_cmp_lt_i32 s16, s17
-; GFX6-NEXT: s_cselect_b32 s16, s16, s17
+; GFX6-NEXT: s_max_i32 s16, s16, s30
+; GFX6-NEXT: s_min_i32 s16, s16, s17
; GFX6-NEXT: s_sub_i32 s14, s14, s16
-; GFX6-NEXT: s_cmp_gt_i32 s15, -1
-; GFX6-NEXT: s_cselect_b32 s16, s15, -1
+; GFX6-NEXT: s_max_i32 s16, s15, -1
; GFX6-NEXT: s_sub_i32 s16, s16, s32
-; GFX6-NEXT: s_cmp_lt_i32 s15, -1
-; GFX6-NEXT: s_cselect_b32 s17, s15, -1
+; GFX6-NEXT: s_min_i32 s17, s15, -1
; GFX6-NEXT: s_sub_i32 s17, s17, s33
-; GFX6-NEXT: s_cmp_gt_i32 s16, s31
-; GFX6-NEXT: s_cselect_b32 s16, s16, s31
-; GFX6-NEXT: s_cmp_lt_i32 s16, s17
-; GFX6-NEXT: s_cselect_b32 s16, s16, s17
+; GFX6-NEXT: s_max_i32 s16, s16, s31
+; GFX6-NEXT: s_min_i32 s16, s16, s17
; GFX6-NEXT: s_sub_i32 s15, s15, s16
; GFX6-NEXT: ; return to shader part epilog
;
; GFX8-LABEL: s_ssubsat_v16i32:
; GFX8: ; %bb.0:
-; GFX8-NEXT: s_cmp_gt_i32 s0, -1
; GFX8-NEXT: s_brev_b32 s32, -2
-; GFX8-NEXT: s_cselect_b32 s34, s0, -1
+; GFX8-NEXT: s_max_i32 s34, s0, -1
; GFX8-NEXT: s_sub_i32 s34, s34, s32
-; GFX8-NEXT: s_cmp_lt_i32 s0, -1
; GFX8-NEXT: s_brev_b32 s33, 1
-; GFX8-NEXT: s_cselect_b32 s35, s0, -1
+; GFX8-NEXT: s_min_i32 s35, s0, -1
+; GFX8-NEXT: s_max_i32 s16, s34, s16
; GFX8-NEXT: s_sub_i32 s35, s35, s33
-; GFX8-NEXT: s_cmp_gt_i32 s34, s16
-; GFX8-NEXT: s_cselect_b32 s16, s34, s16
-; GFX8-NEXT: s_cmp_lt_i32 s16, s35
-; GFX8-NEXT: s_cselect_b32 s16, s16, s35
+; GFX8-NEXT: s_min_i32 s16, s16, s35
; GFX8-NEXT: s_sub_i32 s0, s0, s16
-; GFX8-NEXT: s_cmp_gt_i32 s1, -1
-; GFX8-NEXT: s_cselect_b32 s16, s1, -1
+; GFX8-NEXT: s_max_i32 s16, s1, -1
; GFX8-NEXT: s_sub_i32 s16, s16, s32
-; GFX8-NEXT: s_cmp_lt_i32 s1, -1
-; GFX8-NEXT: s_cselect_b32 s34, s1, -1
+; GFX8-NEXT: s_min_i32 s34, s1, -1
+; GFX8-NEXT: s_max_i32 s16, s16, s17
; GFX8-NEXT: s_sub_i32 s34, s34, s33
-; GFX8-NEXT: s_cmp_gt_i32 s16, s17
-; GFX8-NEXT: s_cselect_b32 s16, s16, s17
-; GFX8-NEXT: s_cmp_lt_i32 s16, s34
-; GFX8-NEXT: s_cselect_b32 s16, s16, s34
+; GFX8-NEXT: s_min_i32 s16, s16, s34
; GFX8-NEXT: s_sub_i32 s1, s1, s16
-; GFX8-NEXT: s_cmp_gt_i32 s2, -1
-; GFX8-NEXT: s_cselect_b32 s16, s2, -1
+; GFX8-NEXT: s_max_i32 s16, s2, -1
; GFX8-NEXT: s_sub_i32 s16, s16, s32
-; GFX8-NEXT: s_cmp_lt_i32 s2, -1
-; GFX8-NEXT: s_cselect_b32 s17, s2, -1
+; GFX8-NEXT: s_min_i32 s17, s2, -1
; GFX8-NEXT: s_sub_i32 s17, s17, s33
-; GFX8-NEXT: s_cmp_gt_i32 s16, s18
-; GFX8-NEXT: s_cselect_b32 s16, s16, s18
-; GFX8-NEXT: s_cmp_lt_i32 s16, s17
-; GFX8-NEXT: s_cselect_b32 s16, s16, s17
+; GFX8-NEXT: s_max_i32 s16, s16, s18
+; GFX8-NEXT: s_min_i32 s16, s16, s17
; GFX8-NEXT: s_sub_i32 s2, s2, s16
-; GFX8-NEXT: s_cmp_gt_i32 s3, -1
-; GFX8-NEXT: s_cselect_b32 s16, s3, -1
+; GFX8-NEXT: s_max_i32 s16, s3, -1
; GFX8-NEXT: s_sub_i32 s16, s16, s32
-; GFX8-NEXT: s_cmp_lt_i32 s3, -1
-; GFX8-NEXT: s_cselect_b32 s17, s3, -1
+; GFX8-NEXT: s_min_i32 s17, s3, -1
; GFX8-NEXT: s_sub_i32 s17, s17, s33
-; GFX8-NEXT: s_cmp_gt_i32 s16, s19
-; GFX8-NEXT: s_cselect_b32 s16, s16, s19
-; GFX8-NEXT: s_cmp_lt_i32 s16, s17
-; GFX8-NEXT: s_cselect_b32 s16, s16, s17
+; GFX8-NEXT: s_max_i32 s16, s16, s19
+; GFX8-NEXT: s_min_i32 s16, s16, s17
; GFX8-NEXT: s_sub_i32 s3, s3, s16
-; GFX8-NEXT: s_cmp_gt_i32 s4, -1
-; GFX8-NEXT: s_cselect_b32 s16, s4, -1
+; GFX8-NEXT: s_max_i32 s16, s4, -1
; GFX8-NEXT: s_sub_i32 s16, s16, s32
-; GFX8-NEXT: s_cmp_lt_i32 s4, -1
-; GFX8-NEXT: s_cselect_b32 s17, s4, -1
+; GFX8-NEXT: s_min_i32 s17, s4, -1
; GFX8-NEXT: s_sub_i32 s17, s17, s33
-; GFX8-NEXT: s_cmp_gt_i32 s16, s20
-; GFX8-NEXT: s_cselect_b32 s16, s16, s20
-; GFX8-NEXT: s_cmp_lt_i32 s16, s17
-; GFX8-NEXT: s_cselect_b32 s16, s16, s17
+; GFX8-NEXT: s_max_i32 s16, s16, s20
+; GFX8-NEXT: s_min_i32 s16, s16, s17
; GFX8-NEXT: s_sub_i32 s4, s4, s16
-; GFX8-NEXT: s_cmp_gt_i32 s5, -1
-; GFX8-NEXT: s_cselect_b32 s16, s5, -1
+; GFX8-NEXT: s_max_i32 s16, s5, -1
; GFX8-NEXT: s_sub_i32 s16, s16, s32
-; GFX8-NEXT: s_cmp_lt_i32 s5, -1
-; GFX8-NEXT: s_cselect_b32 s17, s5, -1
+; GFX8-NEXT: s_min_i32 s17, s5, -1
; GFX8-NEXT: s_sub_i32 s17, s17, s33
-; GFX8-NEXT: s_cmp_gt_i32 s16, s21
-; GFX8-NEXT: s_cselect_b32 s16, s16, s21
-; GFX8-NEXT: s_cmp_lt_i32 s16, s17
-; GFX8-NEXT: s_cselect_b32 s16, s16, s17
+; GFX8-NEXT: s_max_i32 s16, s16, s21
+; GFX8-NEXT: s_min_i32 s16, s16, s17
; GFX8-NEXT: s_sub_i32 s5, s5, s16
-; GFX8-NEXT: s_cmp_gt_i32 s6, -1
-; GFX8-NEXT: s_cselect_b32 s16, s6, -1
+; GFX8-NEXT: s_max_i32 s16, s6, -1
; GFX8-NEXT: s_sub_i32 s16, s16, s32
-; GFX8-NEXT: s_cmp_lt_i32 s6, -1
-; GFX8-NEXT: s_cselect_b32 s17, s6, -1
+; GFX8-NEXT: s_min_i32 s17, s6, -1
; GFX8-NEXT: s_sub_i32 s17, s17, s33
-; GFX8-NEXT: s_cmp_gt_i32 s16, s22
-; GFX8-NEXT: s_cselect_b32 s16, s16, s22
-; GFX8-NEXT: s_cmp_lt_i32 s16, s17
-; GFX8-NEXT: s_cselect_b32 s16, s16, s17
+; GFX8-NEXT: s_max_i32 s16, s16, s22
+; GFX8-NEXT: s_min_i32 s16, s16, s17
; GFX8-NEXT: s_sub_i32 s6, s6, s16
-; GFX8-NEXT: s_cmp_gt_i32 s7, -1
-; GFX8-NEXT: s_cselect_b32 s16, s7, -1
+; GFX8-NEXT: s_max_i32 s16, s7, -1
; GFX8-NEXT: s_sub_i32 s16, s16, s32
-; GFX8-NEXT: s_cmp_lt_i32 s7, -1
-; GFX8-NEXT: s_cselect_b32 s17, s7, -1
+; GFX8-NEXT: s_min_i32 s17, s7, -1
; GFX8-NEXT: s_sub_i32 s17, s17, s33
-; GFX8-NEXT: s_cmp_gt_i32 s16, s23
-; GFX8-NEXT: s_cselect_b32 s16, s16, s23
-; GFX8-NEXT: s_cmp_lt_i32 s16, s17
-; GFX8-NEXT: s_cselect_b32 s16, s16, s17
+; GFX8-NEXT: s_max_i32 s16, s16, s23
+; GFX8-NEXT: s_min_i32 s16, s16, s17
; GFX8-NEXT: s_sub_i32 s7, s7, s16
-; GFX8-NEXT: s_cmp_gt_i32 s8, -1
-; GFX8-NEXT: s_cselect_b32 s16, s8, -1
+; GFX8-NEXT: s_max_i32 s16, s8, -1
; GFX8-NEXT: s_sub_i32 s16, s16, s32
-; GFX8-NEXT: s_cmp_lt_i32 s8, -1
-; GFX8-NEXT: s_cselect_b32 s17, s8, -1
+; GFX8-NEXT: s_min_i32 s17, s8, -1
; GFX8-NEXT: s_sub_i32 s17, s17, s33
-; GFX8-NEXT: s_cmp_gt_i32 s16, s24
-; GFX8-NEXT: s_cselect_b32 s16, s16, s24
-; GFX8-NEXT: s_cmp_lt_i32 s16, s17
-; GFX8-NEXT: s_cselect_b32 s16, s16, s17
+; GFX8-NEXT: s_max_i32 s16, s16, s24
+; GFX8-NEXT: s_min_i32 s16, s16, s17
; GFX8-NEXT: s_sub_i32 s8, s8, s16
-; GFX8-NEXT: s_cmp_gt_i32 s9, -1
-; GFX8-NEXT: s_cselect_b32 s16, s9, -1
+; GFX8-NEXT: s_max_i32 s16, s9, -1
; GFX8-NEXT: s_sub_i32 s16, s16, s32
-; GFX8-NEXT: s_cmp_lt_i32 s9, -1
-; GFX8-NEXT: s_cselect_b32 s17, s9, -1
+; GFX8-NEXT: s_min_i32 s17, s9, -1
; GFX8-NEXT: s_sub_i32 s17, s17, s33
-; GFX8-NEXT: s_cmp_gt_i32 s16, s25
-; GFX8-NEXT: s_cselect_b32 s16, s16, s25
-; GFX8-NEXT: s_cmp_lt_i32 s16, s17
-; GFX8-NEXT: s_cselect_b32 s16, s16, s17
+; GFX8-NEXT: s_max_i32 s16, s16, s25
+; GFX8-NEXT: s_min_i32 s16, s16, s17
; GFX8-NEXT: s_sub_i32 s9, s9, s16
-; GFX8-NEXT: s_cmp_gt_i32 s10, -1
-; GFX8-NEXT: s_cselect_b32 s16, s10, -1
+; GFX8-NEXT: s_max_i32 s16, s10, -1
; GFX8-NEXT: s_sub_i32 s16, s16, s32
-; GFX8-NEXT: s_cmp_lt_i32 s10, -1
-; GFX8-NEXT: s_cselect_b32 s17, s10, -1
+; GFX8-NEXT: s_min_i32 s17, s10, -1
; GFX8-NEXT: s_sub_i32 s17, s17, s33
-; GFX8-NEXT: s_cmp_gt_i32 s16, s26
-; GFX8-NEXT: s_cselect_b32 s16, s16, s26
-; GFX8-NEXT: s_cmp_lt_i32 s16, s17
-; GFX8-NEXT: s_cselect_b32 s16, s16, s17
+; GFX8-NEXT: s_max_i32 s16, s16, s26
+; GFX8-NEXT: s_min_i32 s16, s16, s17
; GFX8-NEXT: s_sub_i32 s10, s10, s16
-; GFX8-NEXT: s_cmp_gt_i32 s11, -1
-; GFX8-NEXT: s_cselect_b32 s16, s11, -1
+; GFX8-NEXT: s_max_i32 s16, s11, -1
; GFX8-NEXT: s_sub_i32 s16, s16, s32
-; GFX8-NEXT: s_cmp_lt_i32 s11, -1
-; GFX8-NEXT: s_cselect_b32 s17, s11, -1
+; GFX8-NEXT: s_min_i32 s17, s11, -1
; GFX8-NEXT: s_sub_i32 s17, s17, s33
-; GFX8-NEXT: s_cmp_gt_i32 s16, s27
-; GFX8-NEXT: s_cselect_b32 s16, s16, s27
-; GFX8-NEXT: s_cmp_lt_i32 s16, s17
-; GFX8-NEXT: s_cselect_b32 s16, s16, s17
+; GFX8-NEXT: s_max_i32 s16, s16, s27
+; GFX8-NEXT: s_min_i32 s16, s16, s17
; GFX8-NEXT: s_sub_i32 s11, s11, s16
-; GFX8-NEXT: s_cmp_gt_i32 s12, -1
-; GFX8-NEXT: s_cselect_b32 s16, s12, -1
+; GFX8-NEXT: s_max_i32 s16, s12, -1
; GFX8-NEXT: s_sub_i32 s16, s16, s32
-; GFX8-NEXT: s_cmp_lt_i32 s12, -1
-; GFX8-NEXT: s_cselect_b32 s17, s12, -1
+; GFX8-NEXT: s_min_i32 s17, s12, -1
; GFX8-NEXT: s_sub_i32 s17, s17, s33
-; GFX8-NEXT: s_cmp_gt_i32 s16, s28
-; GFX8-NEXT: s_cselect_b32 s16, s16, s28
-; GFX8-NEXT: s_cmp_lt_i32 s16, s17
-; GFX8-NEXT: s_cselect_b32 s16, s16, s17
+; GFX8-NEXT: s_max_i32 s16, s16, s28
+; GFX8-NEXT: s_min_i32 s16, s16, s17
; GFX8-NEXT: s_sub_i32 s12, s12, s16
-; GFX8-NEXT: s_cmp_gt_i32 s13, -1
-; GFX8-NEXT: s_cselect_b32 s16, s13, -1
+; GFX8-NEXT: s_max_i32 s16, s13, -1
; GFX8-NEXT: s_sub_i32 s16, s16, s32
-; GFX8-NEXT: s_cmp_lt_i32 s13, -1
-; GFX8-NEXT: s_cselect_b32 s17, s13, -1
+; GFX8-NEXT: s_min_i32 s17, s13, -1
; GFX8-NEXT: s_sub_i32 s17, s17, s33
-; GFX8-NEXT: s_cmp_gt_i32 s16, s29
-; GFX8-NEXT: s_cselect_b32 s16, s16, s29
-; GFX8-NEXT: s_cmp_lt_i32 s16, s17
-; GFX8-NEXT: s_cselect_b32 s16, s16, s17
+; GFX8-NEXT: s_max_i32 s16, s16, s29
+; GFX8-NEXT: s_min_i32 s16, s16, s17
; GFX8-NEXT: s_sub_i32 s13, s13, s16
-; GFX8-NEXT: s_cmp_gt_i32 s14, -1
-; GFX8-NEXT: s_cselect_b32 s16, s14, -1
+; GFX8-NEXT: s_max_i32 s16, s14, -1
; GFX8-NEXT: s_sub_i32 s16, s16, s32
-; GFX8-NEXT: s_cmp_lt_i32 s14, -1
-; GFX8-NEXT: s_cselect_b32 s17, s14, -1
+; GFX8-NEXT: s_min_i32 s17, s14, -1
; GFX8-NEXT: s_sub_i32 s17, s17, s33
-; GFX8-NEXT: s_cmp_gt_i32 s16, s30
-; GFX8-NEXT: s_cselect_b32 s16, s16, s30
-; GFX8-NEXT: s_cmp_lt_i32 s16, s17
-; GFX8-NEXT: s_cselect_b32 s16, s16, s17
+; GFX8-NEXT: s_max_i32 s16, s16, s30
+; GFX8-NEXT: s_min_i32 s16, s16, s17
; GFX8-NEXT: s_sub_i32 s14, s14, s16
-; GFX8-NEXT: s_cmp_gt_i32 s15, -1
-; GFX8-NEXT: s_cselect_b32 s16, s15, -1
+; GFX8-NEXT: s_max_i32 s16, s15, -1
; GFX8-NEXT: s_sub_i32 s16, s16, s32
-; GFX8-NEXT: s_cmp_lt_i32 s15, -1
-; GFX8-NEXT: s_cselect_b32 s17, s15, -1
+; GFX8-NEXT: s_min_i32 s17, s15, -1
; GFX8-NEXT: s_sub_i32 s17, s17, s33
-; GFX8-NEXT: s_cmp_gt_i32 s16, s31
-; GFX8-NEXT: s_cselect_b32 s16, s16, s31
-; GFX8-NEXT: s_cmp_lt_i32 s16, s17
-; GFX8-NEXT: s_cselect_b32 s16, s16, s17
+; GFX8-NEXT: s_max_i32 s16, s16, s31
+; GFX8-NEXT: s_min_i32 s16, s16, s17
; GFX8-NEXT: s_sub_i32 s15, s15, s16
; GFX8-NEXT: ; return to shader part epilog
;
@@ -2878,17 +2558,13 @@ define amdgpu_ps i16 @s_ssubsat_i16(i16 inreg %lhs, i16 inreg %rhs) {
; GFX6-LABEL: s_ssubsat_i16:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_lshl_b32 s0, s0, 16
+; GFX6-NEXT: s_max_i32 s2, s0, -1
+; GFX6-NEXT: s_min_i32 s3, s0, -1
; GFX6-NEXT: s_lshl_b32 s1, s1, 16
-; GFX6-NEXT: s_cmp_gt_i32 s0, -1
-; GFX6-NEXT: s_cselect_b32 s2, s0, -1
; GFX6-NEXT: s_sub_i32 s2, s2, 0x7fffffff
-; GFX6-NEXT: s_cmp_lt_i32 s0, -1
-; GFX6-NEXT: s_cselect_b32 s3, s0, -1
; GFX6-NEXT: s_sub_i32 s3, s3, 0x80000000
-; GFX6-NEXT: s_cmp_gt_i32 s2, s1
-; GFX6-NEXT: s_cselect_b32 s1, s2, s1
-; GFX6-NEXT: s_cmp_lt_i32 s1, s3
-; GFX6-NEXT: s_cselect_b32 s1, s1, s3
+; GFX6-NEXT: s_max_i32 s1, s2, s1
+; GFX6-NEXT: s_min_i32 s1, s1, s3
; GFX6-NEXT: s_sub_i32 s0, s0, s1
; GFX6-NEXT: s_ashr_i32 s0, s0, 16
; GFX6-NEXT: ; return to shader part epilog
@@ -2897,20 +2573,16 @@ define amdgpu_ps i16 @s_ssubsat_i16(i16 inreg %lhs, i16 inreg %rhs) {
; GFX8: ; %bb.0:
; GFX8-NEXT: s_sext_i32_i16 s2, s0
; GFX8-NEXT: s_sext_i32_i16 s3, -1
-; GFX8-NEXT: s_cmp_gt_i32 s2, s3
-; GFX8-NEXT: s_cselect_b32 s4, s2, s3
+; GFX8-NEXT: s_max_i32 s4, s2, s3
; GFX8-NEXT: s_sub_i32 s4, s4, 0x7fff
-; GFX8-NEXT: s_cmp_lt_i32 s2, s3
-; GFX8-NEXT: s_cselect_b32 s2, s2, s3
-; GFX8-NEXT: s_sub_i32 s2, s2, 0xffff8000
+; GFX8-NEXT: s_min_i32 s2, s2, s3
; GFX8-NEXT: s_sext_i32_i16 s3, s4
; GFX8-NEXT: s_sext_i32_i16 s1, s1
-; GFX8-NEXT: s_cmp_gt_i32 s3, s1
-; GFX8-NEXT: s_cselect_b32 s1, s3, s1
+; GFX8-NEXT: s_sub_i32 s2, s2, 0xffff8000
+; GFX8-NEXT: s_max_i32 s1, s3, s1
; GFX8-NEXT: s_sext_i32_i16 s1, s1
; GFX8-NEXT: s_sext_i32_i16 s2, s2
-; GFX8-NEXT: s_cmp_lt_i32 s1, s2
-; GFX8-NEXT: s_cselect_b32 s1, s1, s2
+; GFX8-NEXT: s_min_i32 s1, s1, s2
; GFX8-NEXT: s_sub_i32 s0, s0, s1
; GFX8-NEXT: ; return to shader part epilog
;
@@ -2934,12 +2606,10 @@ define amdgpu_ps half @ssubsat_i16_sv(i16 inreg %lhs, i16 %rhs) {
; GFX6-LABEL: ssubsat_i16_sv:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_lshl_b32 s0, s0, 16
-; GFX6-NEXT: s_cmp_gt_i32 s0, -1
-; GFX6-NEXT: s_cselect_b32 s1, s0, -1
-; GFX6-NEXT: s_sub_i32 s1, s1, 0x7fffffff
-; GFX6-NEXT: s_cmp_lt_i32 s0, -1
-; GFX6-NEXT: s_cselect_b32 s2, s0, -1
+; GFX6-NEXT: s_max_i32 s1, s0, -1
+; GFX6-NEXT: s_min_i32 s2, s0, -1
; GFX6-NEXT: v_lshlrev_b32_e32 v0, 16, v0
+; GFX6-NEXT: s_sub_i32 s1, s1, 0x7fffffff
; GFX6-NEXT: s_sub_i32 s2, s2, 0x80000000
; GFX6-NEXT: v_max_i32_e32 v0, s1, v0
; GFX6-NEXT: v_min_i32_e32 v0, s2, v0
@@ -2951,11 +2621,9 @@ define amdgpu_ps half @ssubsat_i16_sv(i16 inreg %lhs, i16 %rhs) {
; GFX8: ; %bb.0:
; GFX8-NEXT: s_sext_i32_i16 s1, s0
; GFX8-NEXT: s_sext_i32_i16 s2, -1
-; GFX8-NEXT: s_cmp_gt_i32 s1, s2
-; GFX8-NEXT: s_cselect_b32 s3, s1, s2
+; GFX8-NEXT: s_max_i32 s3, s1, s2
+; GFX8-NEXT: s_min_i32 s1, s1, s2
; GFX8-NEXT: s_sub_i32 s3, s3, 0x7fff
-; GFX8-NEXT: s_cmp_lt_i32 s1, s2
-; GFX8-NEXT: s_cselect_b32 s1, s1, s2
; GFX8-NEXT: s_sub_i32 s1, s1, 0xffff8000
; GFX8-NEXT: v_max_i16_e32 v0, s3, v0
; GFX8-NEXT: v_min_i16_e32 v0, s1, v0
@@ -3087,36 +2755,28 @@ define amdgpu_ps i32 @s_ssubsat_v2i16(<2 x i16> inreg %lhs, <2 x i16> inreg %rhs
; GFX6-LABEL: s_ssubsat_v2i16:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_lshl_b32 s0, s0, 16
-; GFX6-NEXT: s_lshl_b32 s2, s2, 16
-; GFX6-NEXT: s_cmp_gt_i32 s0, -1
; GFX6-NEXT: s_brev_b32 s4, -2
-; GFX6-NEXT: s_cselect_b32 s6, s0, -1
+; GFX6-NEXT: s_max_i32 s6, s0, -1
+; GFX6-NEXT: s_lshl_b32 s2, s2, 16
; GFX6-NEXT: s_sub_i32 s6, s6, s4
-; GFX6-NEXT: s_cmp_lt_i32 s0, -1
; GFX6-NEXT: s_brev_b32 s5, 1
-; GFX6-NEXT: s_cselect_b32 s7, s0, -1
+; GFX6-NEXT: s_min_i32 s7, s0, -1
; GFX6-NEXT: s_sub_i32 s7, s7, s5
-; GFX6-NEXT: s_cmp_gt_i32 s6, s2
-; GFX6-NEXT: s_cselect_b32 s2, s6, s2
-; GFX6-NEXT: s_cmp_lt_i32 s2, s7
-; GFX6-NEXT: s_cselect_b32 s2, s2, s7
-; GFX6-NEXT: s_sub_i32 s0, s0, s2
-; GFX6-NEXT: s_ashr_i32 s0, s0, 16
+; GFX6-NEXT: s_max_i32 s2, s6, s2
+; GFX6-NEXT: s_min_i32 s2, s2, s7
; GFX6-NEXT: s_lshl_b32 s1, s1, 16
+; GFX6-NEXT: s_sub_i32 s0, s0, s2
; GFX6-NEXT: s_lshl_b32 s2, s3, 16
-; GFX6-NEXT: s_cmp_gt_i32 s1, -1
-; GFX6-NEXT: s_cselect_b32 s3, s1, -1
+; GFX6-NEXT: s_max_i32 s3, s1, -1
; GFX6-NEXT: s_sub_i32 s3, s3, s4
-; GFX6-NEXT: s_cmp_lt_i32 s1, -1
-; GFX6-NEXT: s_cselect_b32 s4, s1, -1
+; GFX6-NEXT: s_min_i32 s4, s1, -1
; GFX6-NEXT: s_sub_i32 s4, s4, s5
-; GFX6-NEXT: s_cmp_gt_i32 s3, s2
-; GFX6-NEXT: s_cselect_b32 s2, s3, s2
-; GFX6-NEXT: s_cmp_lt_i32 s2, s4
-; GFX6-NEXT: s_cselect_b32 s2, s2, s4
+; GFX6-NEXT: s_max_i32 s2, s3, s2
+; GFX6-NEXT: s_min_i32 s2, s2, s4
; GFX6-NEXT: s_sub_i32 s1, s1, s2
-; GFX6-NEXT: s_mov_b32 s2, 0xffff
; GFX6-NEXT: s_ashr_i32 s1, s1, 16
+; GFX6-NEXT: s_mov_b32 s2, 0xffff
+; GFX6-NEXT: s_ashr_i32 s0, s0, 16
; GFX6-NEXT: s_and_b32 s1, s1, s2
; GFX6-NEXT: s_and_b32 s0, s0, s2
; GFX6-NEXT: s_lshl_b32 s1, s1, 16
@@ -3125,42 +2785,34 @@ define amdgpu_ps i32 @s_ssubsat_v2i16(<2 x i16> inreg %lhs, <2 x i16> inreg %rhs
;
; GFX8-LABEL: s_ssubsat_v2i16:
; GFX8: ; %bb.0:
-; GFX8-NEXT: s_lshr_b32 s3, s1, 16
-; GFX8-NEXT: s_lshr_b32 s2, s0, 16
; GFX8-NEXT: s_sext_i32_i16 s6, s0
; GFX8-NEXT: s_sext_i32_i16 s7, -1
-; GFX8-NEXT: s_cmp_gt_i32 s6, s7
+; GFX8-NEXT: s_max_i32 s8, s6, s7
; GFX8-NEXT: s_movk_i32 s4, 0x7fff
-; GFX8-NEXT: s_cselect_b32 s8, s6, s7
; GFX8-NEXT: s_sub_i32 s8, s8, s4
-; GFX8-NEXT: s_cmp_lt_i32 s6, s7
+; GFX8-NEXT: s_lshr_b32 s3, s1, 16
; GFX8-NEXT: s_movk_i32 s5, 0x8000
-; GFX8-NEXT: s_cselect_b32 s6, s6, s7
-; GFX8-NEXT: s_sub_i32 s6, s6, s5
+; GFX8-NEXT: s_min_i32 s6, s6, s7
; GFX8-NEXT: s_sext_i32_i16 s8, s8
; GFX8-NEXT: s_sext_i32_i16 s1, s1
-; GFX8-NEXT: s_cmp_gt_i32 s8, s1
-; GFX8-NEXT: s_cselect_b32 s1, s8, s1
+; GFX8-NEXT: s_sub_i32 s6, s6, s5
+; GFX8-NEXT: s_max_i32 s1, s8, s1
; GFX8-NEXT: s_sext_i32_i16 s1, s1
; GFX8-NEXT: s_sext_i32_i16 s6, s6
-; GFX8-NEXT: s_cmp_lt_i32 s1, s6
-; GFX8-NEXT: s_cselect_b32 s1, s1, s6
+; GFX8-NEXT: s_lshr_b32 s2, s0, 16
+; GFX8-NEXT: s_min_i32 s1, s1, s6
; GFX8-NEXT: s_sub_i32 s0, s0, s1
; GFX8-NEXT: s_sext_i32_i16 s1, s2
-; GFX8-NEXT: s_cmp_gt_i32 s1, s7
-; GFX8-NEXT: s_cselect_b32 s6, s1, s7
+; GFX8-NEXT: s_max_i32 s6, s1, s7
; GFX8-NEXT: s_sub_i32 s4, s6, s4
-; GFX8-NEXT: s_cmp_lt_i32 s1, s7
-; GFX8-NEXT: s_cselect_b32 s1, s1, s7
-; GFX8-NEXT: s_sub_i32 s1, s1, s5
+; GFX8-NEXT: s_min_i32 s1, s1, s7
; GFX8-NEXT: s_sext_i32_i16 s4, s4
; GFX8-NEXT: s_sext_i32_i16 s3, s3
-; GFX8-NEXT: s_cmp_gt_i32 s4, s3
-; GFX8-NEXT: s_cselect_b32 s3, s4, s3
+; GFX8-NEXT: s_sub_i32 s1, s1, s5
+; GFX8-NEXT: s_max_i32 s3, s4, s3
; GFX8-NEXT: s_sext_i32_i16 s3, s3
; GFX8-NEXT: s_sext_i32_i16 s1, s1
-; GFX8-NEXT: s_cmp_lt_i32 s3, s1
-; GFX8-NEXT: s_cselect_b32 s1, s3, s1
+; GFX8-NEXT: s_min_i32 s1, s3, s1
; GFX8-NEXT: s_sub_i32 s1, s2, s1
; GFX8-NEXT: s_bfe_u32 s1, s1, 0x100000
; GFX8-NEXT: s_bfe_u32 s0, s0, 0x100000
@@ -3189,25 +2841,21 @@ define amdgpu_ps float @ssubsat_v2i16_sv(<2 x i16> inreg %lhs, <2 x i16> %rhs) {
; GFX6-LABEL: ssubsat_v2i16_sv:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_lshl_b32 s0, s0, 16
-; GFX6-NEXT: s_cmp_gt_i32 s0, -1
; GFX6-NEXT: s_brev_b32 s2, -2
-; GFX6-NEXT: s_cselect_b32 s4, s0, -1
-; GFX6-NEXT: s_sub_i32 s4, s4, s2
-; GFX6-NEXT: s_cmp_lt_i32 s0, -1
+; GFX6-NEXT: s_max_i32 s4, s0, -1
; GFX6-NEXT: v_lshlrev_b32_e32 v0, 16, v0
+; GFX6-NEXT: s_sub_i32 s4, s4, s2
; GFX6-NEXT: s_brev_b32 s3, 1
-; GFX6-NEXT: s_cselect_b32 s5, s0, -1
+; GFX6-NEXT: s_min_i32 s5, s0, -1
; GFX6-NEXT: s_sub_i32 s5, s5, s3
; GFX6-NEXT: v_max_i32_e32 v0, s4, v0
; GFX6-NEXT: v_min_i32_e32 v0, s5, v0
; GFX6-NEXT: v_sub_i32_e32 v0, vcc, s0, v0
; GFX6-NEXT: s_lshl_b32 s0, s1, 16
-; GFX6-NEXT: s_cmp_gt_i32 s0, -1
-; GFX6-NEXT: s_cselect_b32 s1, s0, -1
+; GFX6-NEXT: s_max_i32 s1, s0, -1
; GFX6-NEXT: s_sub_i32 s1, s1, s2
-; GFX6-NEXT: s_cmp_lt_i32 s0, -1
-; GFX6-NEXT: s_cselect_b32 s2, s0, -1
; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1
+; GFX6-NEXT: s_min_i32 s2, s0, -1
; GFX6-NEXT: s_sub_i32 s2, s2, s3
; GFX6-NEXT: v_max_i32_e32 v1, s1, v1
; GFX6-NEXT: v_min_i32_e32 v1, s2, v1
@@ -3223,25 +2871,21 @@ define amdgpu_ps float @ssubsat_v2i16_sv(<2 x i16> inreg %lhs, <2 x i16> %rhs) {
;
; GFX8-LABEL: ssubsat_v2i16_sv:
; GFX8: ; %bb.0:
-; GFX8-NEXT: s_lshr_b32 s1, s0, 16
; GFX8-NEXT: s_sext_i32_i16 s4, s0
; GFX8-NEXT: s_sext_i32_i16 s5, -1
-; GFX8-NEXT: s_cmp_gt_i32 s4, s5
+; GFX8-NEXT: s_max_i32 s6, s4, s5
; GFX8-NEXT: s_movk_i32 s2, 0x7fff
-; GFX8-NEXT: s_cselect_b32 s6, s4, s5
; GFX8-NEXT: s_sub_i32 s6, s6, s2
-; GFX8-NEXT: s_cmp_lt_i32 s4, s5
; GFX8-NEXT: s_movk_i32 s3, 0x8000
-; GFX8-NEXT: s_cselect_b32 s4, s4, s5
+; GFX8-NEXT: s_min_i32 s4, s4, s5
+; GFX8-NEXT: s_lshr_b32 s1, s0, 16
; GFX8-NEXT: s_sub_i32 s4, s4, s3
; GFX8-NEXT: v_max_i16_e32 v1, s6, v0
; GFX8-NEXT: v_min_i16_e32 v1, s4, v1
; GFX8-NEXT: s_sext_i32_i16 s4, s1
-; GFX8-NEXT: s_cmp_gt_i32 s4, s5
-; GFX8-NEXT: s_cselect_b32 s6, s4, s5
+; GFX8-NEXT: s_max_i32 s6, s4, s5
; GFX8-NEXT: s_sub_i32 s2, s6, s2
-; GFX8-NEXT: s_cmp_lt_i32 s4, s5
-; GFX8-NEXT: s_cselect_b32 s4, s4, s5
+; GFX8-NEXT: s_min_i32 s4, s4, s5
; GFX8-NEXT: v_mov_b32_e32 v2, s2
; GFX8-NEXT: v_max_i16_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
; GFX8-NEXT: s_sub_i32 s3, s4, s3
@@ -3467,64 +3111,48 @@ define amdgpu_ps <2 x i32> @s_ssubsat_v4i16(<4 x i16> inreg %lhs, <4 x i16> inre
; GFX6-LABEL: s_ssubsat_v4i16:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_lshl_b32 s0, s0, 16
-; GFX6-NEXT: s_lshl_b32 s4, s4, 16
-; GFX6-NEXT: s_cmp_gt_i32 s0, -1
; GFX6-NEXT: s_brev_b32 s8, -2
-; GFX6-NEXT: s_cselect_b32 s10, s0, -1
+; GFX6-NEXT: s_max_i32 s10, s0, -1
+; GFX6-NEXT: s_lshl_b32 s4, s4, 16
; GFX6-NEXT: s_sub_i32 s10, s10, s8
-; GFX6-NEXT: s_cmp_lt_i32 s0, -1
; GFX6-NEXT: s_brev_b32 s9, 1
-; GFX6-NEXT: s_cselect_b32 s11, s0, -1
+; GFX6-NEXT: s_min_i32 s11, s0, -1
; GFX6-NEXT: s_sub_i32 s11, s11, s9
-; GFX6-NEXT: s_cmp_gt_i32 s10, s4
-; GFX6-NEXT: s_cselect_b32 s4, s10, s4
-; GFX6-NEXT: s_cmp_lt_i32 s4, s11
-; GFX6-NEXT: s_cselect_b32 s4, s4, s11
-; GFX6-NEXT: s_sub_i32 s0, s0, s4
-; GFX6-NEXT: s_ashr_i32 s0, s0, 16
+; GFX6-NEXT: s_max_i32 s4, s10, s4
+; GFX6-NEXT: s_min_i32 s4, s4, s11
; GFX6-NEXT: s_lshl_b32 s1, s1, 16
+; GFX6-NEXT: s_sub_i32 s0, s0, s4
; GFX6-NEXT: s_lshl_b32 s4, s5, 16
-; GFX6-NEXT: s_cmp_gt_i32 s1, -1
-; GFX6-NEXT: s_cselect_b32 s5, s1, -1
+; GFX6-NEXT: s_max_i32 s5, s1, -1
+; GFX6-NEXT: s_min_i32 s10, s1, -1
; GFX6-NEXT: s_sub_i32 s5, s5, s8
-; GFX6-NEXT: s_cmp_lt_i32 s1, -1
-; GFX6-NEXT: s_cselect_b32 s10, s1, -1
; GFX6-NEXT: s_sub_i32 s10, s10, s9
-; GFX6-NEXT: s_cmp_gt_i32 s5, s4
-; GFX6-NEXT: s_cselect_b32 s4, s5, s4
-; GFX6-NEXT: s_cmp_lt_i32 s4, s10
-; GFX6-NEXT: s_cselect_b32 s4, s4, s10
-; GFX6-NEXT: s_sub_i32 s1, s1, s4
-; GFX6-NEXT: s_ashr_i32 s1, s1, 16
+; GFX6-NEXT: s_max_i32 s4, s5, s4
; GFX6-NEXT: s_lshl_b32 s2, s2, 16
+; GFX6-NEXT: s_max_i32 s5, s2, -1
+; GFX6-NEXT: s_min_i32 s4, s4, s10
+; GFX6-NEXT: s_sub_i32 s1, s1, s4
; GFX6-NEXT: s_lshl_b32 s4, s6, 16
-; GFX6-NEXT: s_cmp_gt_i32 s2, -1
-; GFX6-NEXT: s_cselect_b32 s5, s2, -1
+; GFX6-NEXT: s_min_i32 s6, s2, -1
; GFX6-NEXT: s_sub_i32 s5, s5, s8
-; GFX6-NEXT: s_cmp_lt_i32 s2, -1
-; GFX6-NEXT: s_cselect_b32 s6, s2, -1
; GFX6-NEXT: s_sub_i32 s6, s6, s9
-; GFX6-NEXT: s_cmp_gt_i32 s5, s4
-; GFX6-NEXT: s_cselect_b32 s4, s5, s4
-; GFX6-NEXT: s_cmp_lt_i32 s4, s6
-; GFX6-NEXT: s_cselect_b32 s4, s4, s6
-; GFX6-NEXT: s_sub_i32 s2, s2, s4
-; GFX6-NEXT: s_ashr_i32 s2, s2, 16
+; GFX6-NEXT: s_max_i32 s4, s5, s4
; GFX6-NEXT: s_lshl_b32 s3, s3, 16
+; GFX6-NEXT: s_min_i32 s4, s4, s6
+; GFX6-NEXT: s_max_i32 s5, s3, -1
+; GFX6-NEXT: s_sub_i32 s2, s2, s4
+; GFX6-NEXT: s_min_i32 s6, s3, -1
; GFX6-NEXT: s_lshl_b32 s4, s7, 16
-; GFX6-NEXT: s_cmp_gt_i32 s3, -1
-; GFX6-NEXT: s_cselect_b32 s5, s3, -1
; GFX6-NEXT: s_sub_i32 s5, s5, s8
-; GFX6-NEXT: s_cmp_lt_i32 s3, -1
-; GFX6-NEXT: s_cselect_b32 s6, s3, -1
; GFX6-NEXT: s_sub_i32 s6, s6, s9
-; GFX6-NEXT: s_cmp_gt_i32 s5, s4
-; GFX6-NEXT: s_cselect_b32 s4, s5, s4
-; GFX6-NEXT: s_cmp_lt_i32 s4, s6
-; GFX6-NEXT: s_cselect_b32 s4, s4, s6
+; GFX6-NEXT: s_max_i32 s4, s5, s4
+; GFX6-NEXT: s_min_i32 s4, s4, s6
; GFX6-NEXT: s_sub_i32 s3, s3, s4
+; GFX6-NEXT: s_ashr_i32 s1, s1, 16
; GFX6-NEXT: s_mov_b32 s4, 0xffff
+; GFX6-NEXT: s_ashr_i32 s0, s0, 16
; GFX6-NEXT: s_and_b32 s1, s1, s4
+; GFX6-NEXT: s_ashr_i32 s2, s2, 16
; GFX6-NEXT: s_ashr_i32 s3, s3, 16
; GFX6-NEXT: s_and_b32 s0, s0, s4
; GFX6-NEXT: s_lshl_b32 s1, s1, 16
@@ -3537,76 +3165,60 @@ define amdgpu_ps <2 x i32> @s_ssubsat_v4i16(<4 x i16> inreg %lhs, <4 x i16> inre
;
; GFX8-LABEL: s_ssubsat_v4i16:
; GFX8: ; %bb.0:
-; GFX8-NEXT: s_lshr_b32 s6, s2, 16
-; GFX8-NEXT: s_lshr_b32 s7, s3, 16
-; GFX8-NEXT: s_lshr_b32 s4, s0, 16
-; GFX8-NEXT: s_lshr_b32 s5, s1, 16
; GFX8-NEXT: s_sext_i32_i16 s10, s0
; GFX8-NEXT: s_sext_i32_i16 s11, -1
-; GFX8-NEXT: s_cmp_gt_i32 s10, s11
+; GFX8-NEXT: s_max_i32 s12, s10, s11
; GFX8-NEXT: s_movk_i32 s8, 0x7fff
-; GFX8-NEXT: s_cselect_b32 s12, s10, s11
; GFX8-NEXT: s_sub_i32 s12, s12, s8
-; GFX8-NEXT: s_cmp_lt_i32 s10, s11
+; GFX8-NEXT: s_lshr_b32 s6, s2, 16
; GFX8-NEXT: s_movk_i32 s9, 0x8000
-; GFX8-NEXT: s_cselect_b32 s10, s10, s11
-; GFX8-NEXT: s_sub_i32 s10, s10, s9
+; GFX8-NEXT: s_min_i32 s10, s10, s11
; GFX8-NEXT: s_sext_i32_i16 s12, s12
; GFX8-NEXT: s_sext_i32_i16 s2, s2
-; GFX8-NEXT: s_cmp_gt_i32 s12, s2
-; GFX8-NEXT: s_cselect_b32 s2, s12, s2
+; GFX8-NEXT: s_sub_i32 s10, s10, s9
+; GFX8-NEXT: s_max_i32 s2, s12, s2
; GFX8-NEXT: s_sext_i32_i16 s2, s2
; GFX8-NEXT: s_sext_i32_i16 s10, s10
-; GFX8-NEXT: s_cmp_lt_i32 s2, s10
-; GFX8-NEXT: s_cselect_b32 s2, s2, s10
+; GFX8-NEXT: s_lshr_b32 s4, s0, 16
+; GFX8-NEXT: s_min_i32 s2, s2, s10
; GFX8-NEXT: s_sub_i32 s0, s0, s2
; GFX8-NEXT: s_sext_i32_i16 s2, s4
-; GFX8-NEXT: s_cmp_gt_i32 s2, s11
-; GFX8-NEXT: s_cselect_b32 s10, s2, s11
+; GFX8-NEXT: s_max_i32 s10, s2, s11
; GFX8-NEXT: s_sub_i32 s10, s10, s8
-; GFX8-NEXT: s_cmp_lt_i32 s2, s11
-; GFX8-NEXT: s_cselect_b32 s2, s2, s11
-; GFX8-NEXT: s_sub_i32 s2, s2, s9
+; GFX8-NEXT: s_min_i32 s2, s2, s11
; GFX8-NEXT: s_sext_i32_i16 s10, s10
; GFX8-NEXT: s_sext_i32_i16 s6, s6
-; GFX8-NEXT: s_cmp_gt_i32 s10, s6
-; GFX8-NEXT: s_cselect_b32 s6, s10, s6
+; GFX8-NEXT: s_sub_i32 s2, s2, s9
+; GFX8-NEXT: s_max_i32 s6, s10, s6
; GFX8-NEXT: s_sext_i32_i16 s6, s6
; GFX8-NEXT: s_sext_i32_i16 s2, s2
-; GFX8-NEXT: s_cmp_lt_i32 s6, s2
-; GFX8-NEXT: s_cselect_b32 s2, s6, s2
+; GFX8-NEXT: s_min_i32 s2, s6, s2
; GFX8-NEXT: s_sub_i32 s2, s4, s2
; GFX8-NEXT: s_sext_i32_i16 s4, s1
-; GFX8-NEXT: s_cmp_gt_i32 s4, s11
-; GFX8-NEXT: s_cselect_b32 s6, s4, s11
+; GFX8-NEXT: s_max_i32 s6, s4, s11
; GFX8-NEXT: s_sub_i32 s6, s6, s8
-; GFX8-NEXT: s_cmp_lt_i32 s4, s11
-; GFX8-NEXT: s_cselect_b32 s4, s4, s11
-; GFX8-NEXT: s_sub_i32 s4, s4, s9
+; GFX8-NEXT: s_min_i32 s4, s4, s11
+; GFX8-NEXT: s_lshr_b32 s7, s3, 16
; GFX8-NEXT: s_sext_i32_i16 s6, s6
; GFX8-NEXT: s_sext_i32_i16 s3, s3
-; GFX8-NEXT: s_cmp_gt_i32 s6, s3
-; GFX8-NEXT: s_cselect_b32 s3, s6, s3
+; GFX8-NEXT: s_max_i32 s3, s6, s3
+; GFX8-NEXT: s_sub_i32 s4, s4, s9
; GFX8-NEXT: s_sext_i32_i16 s3, s3
; GFX8-NEXT: s_sext_i32_i16 s4, s4
-; GFX8-NEXT: s_cmp_lt_i32 s3, s4
-; GFX8-NEXT: s_cselect_b32 s3, s3, s4
+; GFX8-NEXT: s_lshr_b32 s5, s1, 16
+; GFX8-NEXT: s_min_i32 s3, s3, s4
; GFX8-NEXT: s_sub_i32 s1, s1, s3
; GFX8-NEXT: s_sext_i32_i16 s3, s5
-; GFX8-NEXT: s_cmp_gt_i32 s3, s11
-; GFX8-NEXT: s_cselect_b32 s4, s3, s11
+; GFX8-NEXT: s_max_i32 s4, s3, s11
; GFX8-NEXT: s_sub_i32 s4, s4, s8
-; GFX8-NEXT: s_cmp_lt_i32 s3, s11
-; GFX8-NEXT: s_cselect_b32 s3, s3, s11
-; GFX8-NEXT: s_sub_i32 s3, s3, s9
+; GFX8-NEXT: s_min_i32 s3, s3, s11
; GFX8-NEXT: s_sext_i32_i16 s4, s4
; GFX8-NEXT: s_sext_i32_i16 s6, s7
-; GFX8-NEXT: s_cmp_gt_i32 s4, s6
-; GFX8-NEXT: s_cselect_b32 s4, s4, s6
+; GFX8-NEXT: s_sub_i32 s3, s3, s9
+; GFX8-NEXT: s_max_i32 s4, s4, s6
; GFX8-NEXT: s_sext_i32_i16 s4, s4
; GFX8-NEXT: s_sext_i32_i16 s3, s3
-; GFX8-NEXT: s_cmp_lt_i32 s4, s3
-; GFX8-NEXT: s_cselect_b32 s3, s4, s3
+; GFX8-NEXT: s_min_i32 s3, s4, s3
; GFX8-NEXT: s_bfe_u32 s2, s2, 0x100000
; GFX8-NEXT: s_sub_i32 s3, s5, s3
; GFX8-NEXT: s_bfe_u32 s0, s0, 0x100000
@@ -3816,92 +3428,67 @@ define amdgpu_ps <3 x i32> @s_ssubsat_v6i16(<6 x i16> inreg %lhs, <6 x i16> inre
; GFX6-LABEL: s_ssubsat_v6i16:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_lshl_b32 s0, s0, 16
-; GFX6-NEXT: s_lshl_b32 s6, s6, 16
-; GFX6-NEXT: s_cmp_gt_i32 s0, -1
; GFX6-NEXT: s_brev_b32 s12, -2
-; GFX6-NEXT: s_cselect_b32 s14, s0, -1
+; GFX6-NEXT: s_max_i32 s14, s0, -1
+; GFX6-NEXT: s_lshl_b32 s6, s6, 16
; GFX6-NEXT: s_sub_i32 s14, s14, s12
-; GFX6-NEXT: s_cmp_lt_i32 s0, -1
; GFX6-NEXT: s_brev_b32 s13, 1
-; GFX6-NEXT: s_cselect_b32 s15, s0, -1
+; GFX6-NEXT: s_min_i32 s15, s0, -1
; GFX6-NEXT: s_sub_i32 s15, s15, s13
-; GFX6-NEXT: s_cmp_gt_i32 s14, s6
-; GFX6-NEXT: s_cselect_b32 s6, s14, s6
-; GFX6-NEXT: s_cmp_lt_i32 s6, s15
-; GFX6-NEXT: s_cselect_b32 s6, s6, s15
-; GFX6-NEXT: s_sub_i32 s0, s0, s6
-; GFX6-NEXT: s_ashr_i32 s0, s0, 16
+; GFX6-NEXT: s_max_i32 s6, s14, s6
+; GFX6-NEXT: s_min_i32 s6, s6, s15
; GFX6-NEXT: s_lshl_b32 s1, s1, 16
+; GFX6-NEXT: s_sub_i32 s0, s0, s6
; GFX6-NEXT: s_lshl_b32 s6, s7, 16
-; GFX6-NEXT: s_cmp_gt_i32 s1, -1
-; GFX6-NEXT: s_cselect_b32 s7, s1, -1
+; GFX6-NEXT: s_max_i32 s7, s1, -1
+; GFX6-NEXT: s_min_i32 s14, s1, -1
; GFX6-NEXT: s_sub_i32 s7, s7, s12
-; GFX6-NEXT: s_cmp_lt_i32 s1, -1
-; GFX6-NEXT: s_cselect_b32 s14, s1, -1
; GFX6-NEXT: s_sub_i32 s14, s14, s13
-; GFX6-NEXT: s_cmp_gt_i32 s7, s6
-; GFX6-NEXT: s_cselect_b32 s6, s7, s6
-; GFX6-NEXT: s_cmp_lt_i32 s6, s14
-; GFX6-NEXT: s_cselect_b32 s6, s6, s14
-; GFX6-NEXT: s_sub_i32 s1, s1, s6
-; GFX6-NEXT: s_ashr_i32 s1, s1, 16
+; GFX6-NEXT: s_max_i32 s6, s7, s6
; GFX6-NEXT: s_lshl_b32 s2, s2, 16
+; GFX6-NEXT: s_max_i32 s7, s2, -1
+; GFX6-NEXT: s_min_i32 s6, s6, s14
+; GFX6-NEXT: s_sub_i32 s1, s1, s6
; GFX6-NEXT: s_lshl_b32 s6, s8, 16
-; GFX6-NEXT: s_cmp_gt_i32 s2, -1
-; GFX6-NEXT: s_cselect_b32 s7, s2, -1
+; GFX6-NEXT: s_min_i32 s8, s2, -1
; GFX6-NEXT: s_sub_i32 s7, s7, s12
-; GFX6-NEXT: s_cmp_lt_i32 s2, -1
-; GFX6-NEXT: s_cselect_b32 s8, s2, -1
; GFX6-NEXT: s_sub_i32 s8, s8, s13
-; GFX6-NEXT: s_cmp_gt_i32 s7, s6
-; GFX6-NEXT: s_cselect_b32 s6, s7, s6
-; GFX6-NEXT: s_cmp_lt_i32 s6, s8
-; GFX6-NEXT: s_cselect_b32 s6, s6, s8
-; GFX6-NEXT: s_sub_i32 s2, s2, s6
-; GFX6-NEXT: s_ashr_i32 s2, s2, 16
+; GFX6-NEXT: s_max_i32 s6, s7, s6
; GFX6-NEXT: s_lshl_b32 s3, s3, 16
+; GFX6-NEXT: s_min_i32 s6, s6, s8
+; GFX6-NEXT: s_max_i32 s7, s3, -1
+; GFX6-NEXT: s_sub_i32 s2, s2, s6
+; GFX6-NEXT: s_min_i32 s8, s3, -1
; GFX6-NEXT: s_lshl_b32 s6, s9, 16
-; GFX6-NEXT: s_cmp_gt_i32 s3, -1
-; GFX6-NEXT: s_cselect_b32 s7, s3, -1
; GFX6-NEXT: s_sub_i32 s7, s7, s12
-; GFX6-NEXT: s_cmp_lt_i32 s3, -1
-; GFX6-NEXT: s_cselect_b32 s8, s3, -1
; GFX6-NEXT: s_sub_i32 s8, s8, s13
-; GFX6-NEXT: s_cmp_gt_i32 s7, s6
-; GFX6-NEXT: s_cselect_b32 s6, s7, s6
-; GFX6-NEXT: s_cmp_lt_i32 s6, s8
-; GFX6-NEXT: s_cselect_b32 s6, s6, s8
-; GFX6-NEXT: s_sub_i32 s3, s3, s6
-; GFX6-NEXT: s_ashr_i32 s3, s3, 16
+; GFX6-NEXT: s_max_i32 s6, s7, s6
; GFX6-NEXT: s_lshl_b32 s4, s4, 16
+; GFX6-NEXT: s_min_i32 s6, s6, s8
+; GFX6-NEXT: s_max_i32 s7, s4, -1
+; GFX6-NEXT: s_sub_i32 s3, s3, s6
+; GFX6-NEXT: s_min_i32 s8, s4, -1
; GFX6-NEXT: s_lshl_b32 s6, s10, 16
-; GFX6-NEXT: s_cmp_gt_i32 s4, -1
-; GFX6-NEXT: s_cselect_b32 s7, s4, -1
; GFX6-NEXT: s_sub_i32 s7, s7, s12
-; GFX6-NEXT: s_cmp_lt_i32 s4, -1
-; GFX6-NEXT: s_cselect_b32 s8, s4, -1
; GFX6-NEXT: s_sub_i32 s8, s8, s13
-; GFX6-NEXT: s_cmp_gt_i32 s7, s6
-; GFX6-NEXT: s_cselect_b32 s6, s7, s6
-; GFX6-NEXT: s_cmp_lt_i32 s6, s8
-; GFX6-NEXT: s_cselect_b32 s6, s6, s8
-; GFX6-NEXT: s_sub_i32 s4, s4, s6
-; GFX6-NEXT: s_ashr_i32 s4, s4, 16
+; GFX6-NEXT: s_max_i32 s6, s7, s6
; GFX6-NEXT: s_lshl_b32 s5, s5, 16
+; GFX6-NEXT: s_min_i32 s6, s6, s8
+; GFX6-NEXT: s_max_i32 s7, s5, -1
+; GFX6-NEXT: s_sub_i32 s4, s4, s6
+; GFX6-NEXT: s_min_i32 s8, s5, -1
; GFX6-NEXT: s_lshl_b32 s6, s11, 16
-; GFX6-NEXT: s_cmp_gt_i32 s5, -1
-; GFX6-NEXT: s_cselect_b32 s7, s5, -1
; GFX6-NEXT: s_sub_i32 s7, s7, s12
-; GFX6-NEXT: s_cmp_lt_i32 s5, -1
-; GFX6-NEXT: s_cselect_b32 s8, s5, -1
; GFX6-NEXT: s_sub_i32 s8, s8, s13
-; GFX6-NEXT: s_cmp_gt_i32 s7, s6
-; GFX6-NEXT: s_cselect_b32 s6, s7, s6
-; GFX6-NEXT: s_cmp_lt_i32 s6, s8
-; GFX6-NEXT: s_cselect_b32 s6, s6, s8
+; GFX6-NEXT: s_max_i32 s6, s7, s6
+; GFX6-NEXT: s_min_i32 s6, s6, s8
; GFX6-NEXT: s_sub_i32 s5, s5, s6
+; GFX6-NEXT: s_ashr_i32 s1, s1, 16
; GFX6-NEXT: s_mov_b32 s6, 0xffff
+; GFX6-NEXT: s_ashr_i32 s0, s0, 16
; GFX6-NEXT: s_and_b32 s1, s1, s6
+; GFX6-NEXT: s_ashr_i32 s2, s2, 16
+; GFX6-NEXT: s_ashr_i32 s3, s3, 16
; GFX6-NEXT: s_and_b32 s0, s0, s6
; GFX6-NEXT: s_lshl_b32 s1, s1, 16
; GFX6-NEXT: s_or_b32 s0, s0, s1
@@ -3910,6 +3497,7 @@ define amdgpu_ps <3 x i32> @s_ssubsat_v6i16(<6 x i16> inreg %lhs, <6 x i16> inre
; GFX6-NEXT: s_ashr_i32 s5, s5, 16
; GFX6-NEXT: s_and_b32 s3, s5, s6
; GFX6-NEXT: s_lshl_b32 s2, s2, 16
+; GFX6-NEXT: s_ashr_i32 s4, s4, 16
; GFX6-NEXT: s_or_b32 s1, s1, s2
; GFX6-NEXT: s_and_b32 s2, s4, s6
; GFX6-NEXT: s_lshl_b32 s3, s3, 16
@@ -3918,113 +3506,89 @@ define amdgpu_ps <3 x i32> @s_ssubsat_v6i16(<6 x i16> inreg %lhs, <6 x i16> inre
;
; GFX8-LABEL: s_ssubsat_v6i16:
; GFX8: ; %bb.0:
-; GFX8-NEXT: s_lshr_b32 s9, s3, 16
-; GFX8-NEXT: s_lshr_b32 s10, s4, 16
-; GFX8-NEXT: s_lshr_b32 s11, s5, 16
-; GFX8-NEXT: s_lshr_b32 s6, s0, 16
-; GFX8-NEXT: s_lshr_b32 s7, s1, 16
-; GFX8-NEXT: s_lshr_b32 s8, s2, 16
; GFX8-NEXT: s_sext_i32_i16 s14, s0
; GFX8-NEXT: s_sext_i32_i16 s15, -1
-; GFX8-NEXT: s_cmp_gt_i32 s14, s15
+; GFX8-NEXT: s_max_i32 s16, s14, s15
; GFX8-NEXT: s_movk_i32 s12, 0x7fff
-; GFX8-NEXT: s_cselect_b32 s16, s14, s15
; GFX8-NEXT: s_sub_i32 s16, s16, s12
-; GFX8-NEXT: s_cmp_lt_i32 s14, s15
+; GFX8-NEXT: s_lshr_b32 s9, s3, 16
; GFX8-NEXT: s_movk_i32 s13, 0x8000
-; GFX8-NEXT: s_cselect_b32 s14, s14, s15
-; GFX8-NEXT: s_sub_i32 s14, s14, s13
+; GFX8-NEXT: s_min_i32 s14, s14, s15
; GFX8-NEXT: s_sext_i32_i16 s16, s16
; GFX8-NEXT: s_sext_i32_i16 s3, s3
-; GFX8-NEXT: s_cmp_gt_i32 s16, s3
-; GFX8-NEXT: s_cselect_b32 s3, s16, s3
+; GFX8-NEXT: s_sub_i32 s14, s14, s13
+; GFX8-NEXT: s_max_i32 s3, s16, s3
; GFX8-NEXT: s_sext_i32_i16 s3, s3
; GFX8-NEXT: s_sext_i32_i16 s14, s14
-; GFX8-NEXT: s_cmp_lt_i32 s3, s14
-; GFX8-NEXT: s_cselect_b32 s3, s3, s14
+; GFX8-NEXT: s_lshr_b32 s6, s0, 16
+; GFX8-NEXT: s_min_i32 s3, s3, s14
; GFX8-NEXT: s_sub_i32 s0, s0, s3
; GFX8-NEXT: s_sext_i32_i16 s3, s6
-; GFX8-NEXT: s_cmp_gt_i32 s3, s15
-; GFX8-NEXT: s_cselect_b32 s14, s3, s15
+; GFX8-NEXT: s_max_i32 s14, s3, s15
; GFX8-NEXT: s_sub_i32 s14, s14, s12
-; GFX8-NEXT: s_cmp_lt_i32 s3, s15
-; GFX8-NEXT: s_cselect_b32 s3, s3, s15
-; GFX8-NEXT: s_sub_i32 s3, s3, s13
+; GFX8-NEXT: s_min_i32 s3, s3, s15
; GFX8-NEXT: s_sext_i32_i16 s14, s14
; GFX8-NEXT: s_sext_i32_i16 s9, s9
-; GFX8-NEXT: s_cmp_gt_i32 s14, s9
-; GFX8-NEXT: s_cselect_b32 s9, s14, s9
+; GFX8-NEXT: s_sub_i32 s3, s3, s13
+; GFX8-NEXT: s_max_i32 s9, s14, s9
; GFX8-NEXT: s_sext_i32_i16 s9, s9
; GFX8-NEXT: s_sext_i32_i16 s3, s3
-; GFX8-NEXT: s_cmp_lt_i32 s9, s3
-; GFX8-NEXT: s_cselect_b32 s3, s9, s3
+; GFX8-NEXT: s_min_i32 s3, s9, s3
; GFX8-NEXT: s_sub_i32 s3, s6, s3
; GFX8-NEXT: s_sext_i32_i16 s6, s1
-; GFX8-NEXT: s_cmp_gt_i32 s6, s15
-; GFX8-NEXT: s_cselect_b32 s9, s6, s15
+; GFX8-NEXT: s_max_i32 s9, s6, s15
; GFX8-NEXT: s_sub_i32 s9, s9, s12
-; GFX8-NEXT: s_cmp_lt_i32 s6, s15
-; GFX8-NEXT: s_cselect_b32 s6, s6, s15
-; GFX8-NEXT: s_sub_i32 s6, s6, s13
+; GFX8-NEXT: s_min_i32 s6, s6, s15
+; GFX8-NEXT: s_lshr_b32 s10, s4, 16
; GFX8-NEXT: s_sext_i32_i16 s9, s9
; GFX8-NEXT: s_sext_i32_i16 s4, s4
-; GFX8-NEXT: s_cmp_gt_i32 s9, s4
-; GFX8-NEXT: s_cselect_b32 s4, s9, s4
+; GFX8-NEXT: s_max_i32 s4, s9, s4
+; GFX8-NEXT: s_sub_i32 s6, s6, s13
; GFX8-NEXT: s_sext_i32_i16 s4, s4
; GFX8-NEXT: s_sext_i32_i16 s6, s6
-; GFX8-NEXT: s_cmp_lt_i32 s4, s6
-; GFX8-NEXT: s_cselect_b32 s4, s4, s6
+; GFX8-NEXT: s_lshr_b32 s7, s1, 16
+; GFX8-NEXT: s_min_i32 s4, s4, s6
; GFX8-NEXT: s_sub_i32 s1, s1, s4
; GFX8-NEXT: s_sext_i32_i16 s4, s7
-; GFX8-NEXT: s_cmp_gt_i32 s4, s15
-; GFX8-NEXT: s_cselect_b32 s6, s4, s15
+; GFX8-NEXT: s_max_i32 s6, s4, s15
; GFX8-NEXT: s_sub_i32 s6, s6, s12
-; GFX8-NEXT: s_cmp_lt_i32 s4, s15
-; GFX8-NEXT: s_cselect_b32 s4, s4, s15
-; GFX8-NEXT: s_sub_i32 s4, s4, s13
+; GFX8-NEXT: s_min_i32 s4, s4, s15
; GFX8-NEXT: s_sext_i32_i16 s6, s6
; GFX8-NEXT: s_sext_i32_i16 s9, s10
-; GFX8-NEXT: s_cmp_gt_i32 s6, s9
-; GFX8-NEXT: s_cselect_b32 s6, s6, s9
+; GFX8-NEXT: s_sub_i32 s4, s4, s13
+; GFX8-NEXT: s_max_i32 s6, s6, s9
; GFX8-NEXT: s_sext_i32_i16 s6, s6
; GFX8-NEXT: s_sext_i32_i16 s4, s4
-; GFX8-NEXT: s_cmp_lt_i32 s6, s4
-; GFX8-NEXT: s_cselect_b32 s4, s6, s4
-; GFX8-NEXT: s_sub_i32 s4, s7, s4
+; GFX8-NEXT: s_min_i32 s4, s6, s4
; GFX8-NEXT: s_sext_i32_i16 s6, s2
-; GFX8-NEXT: s_cmp_gt_i32 s6, s15
-; GFX8-NEXT: s_cselect_b32 s7, s6, s15
+; GFX8-NEXT: s_sub_i32 s4, s7, s4
+; GFX8-NEXT: s_max_i32 s7, s6, s15
; GFX8-NEXT: s_sub_i32 s7, s7, s12
-; GFX8-NEXT: s_cmp_lt_i32 s6, s15
-; GFX8-NEXT: s_cselect_b32 s6, s6, s15
-; GFX8-NEXT: s_sub_i32 s6, s6, s13
+; GFX8-NEXT: s_min_i32 s6, s6, s15
+; GFX8-NEXT: s_lshr_b32 s11, s5, 16
; GFX8-NEXT: s_sext_i32_i16 s7, s7
; GFX8-NEXT: s_sext_i32_i16 s5, s5
-; GFX8-NEXT: s_cmp_gt_i32 s7, s5
-; GFX8-NEXT: s_cselect_b32 s5, s7, s5
+; GFX8-NEXT: s_max_i32 s5, s7, s5
+; GFX8-NEXT: s_sub_i32 s6, s6, s13
; GFX8-NEXT: s_sext_i32_i16 s5, s5
; GFX8-NEXT: s_sext_i32_i16 s6, s6
-; GFX8-NEXT: s_cmp_lt_i32 s5, s6
-; GFX8-NEXT: s_cselect_b32 s5, s5, s6
+; GFX8-NEXT: s_lshr_b32 s8, s2, 16
+; GFX8-NEXT: s_min_i32 s5, s5, s6
; GFX8-NEXT: s_sub_i32 s2, s2, s5
; GFX8-NEXT: s_sext_i32_i16 s5, s8
-; GFX8-NEXT: s_cmp_gt_i32 s5, s15
-; GFX8-NEXT: s_cselect_b32 s6, s5, s15
+; GFX8-NEXT: s_max_i32 s6, s5, s15
; GFX8-NEXT: s_sub_i32 s6, s6, s12
-; GFX8-NEXT: s_cmp_lt_i32 s5, s15
-; GFX8-NEXT: s_cselect_b32 s5, s5, s15
-; GFX8-NEXT: s_sub_i32 s5, s5, s13
+; GFX8-NEXT: s_min_i32 s5, s5, s15
; GFX8-NEXT: s_sext_i32_i16 s6, s6
; GFX8-NEXT: s_sext_i32_i16 s7, s11
-; GFX8-NEXT: s_cmp_gt_i32 s6, s7
-; GFX8-NEXT: s_cselect_b32 s6, s6, s7
+; GFX8-NEXT: s_sub_i32 s5, s5, s13
+; GFX8-NEXT: s_max_i32 s6, s6, s7
+; GFX8-NEXT: s_bfe_u32 s3, s3, 0x100000
; GFX8-NEXT: s_sext_i32_i16 s6, s6
; GFX8-NEXT: s_sext_i32_i16 s5, s5
-; GFX8-NEXT: s_cmp_lt_i32 s6, s5
-; GFX8-NEXT: s_cselect_b32 s5, s6, s5
-; GFX8-NEXT: s_bfe_u32 s3, s3, 0x100000
; GFX8-NEXT: s_bfe_u32 s0, s0, 0x100000
; GFX8-NEXT: s_lshl_b32 s3, s3, 16
+; GFX8-NEXT: s_min_i32 s5, s6, s5
; GFX8-NEXT: s_or_b32 s0, s0, s3
; GFX8-NEXT: s_bfe_u32 s3, s4, 0x100000
; GFX8-NEXT: s_sub_i32 s5, s8, s5
@@ -4271,132 +3835,100 @@ define amdgpu_ps <4 x i32> @s_ssubsat_v8i16(<8 x i16> inreg %lhs, <8 x i16> inre
; GFX6-LABEL: s_ssubsat_v8i16:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_lshl_b32 s0, s0, 16
-; GFX6-NEXT: s_lshl_b32 s8, s8, 16
-; GFX6-NEXT: s_cmp_gt_i32 s0, -1
; GFX6-NEXT: s_brev_b32 s16, -2
-; GFX6-NEXT: s_cselect_b32 s18, s0, -1
+; GFX6-NEXT: s_max_i32 s18, s0, -1
+; GFX6-NEXT: s_lshl_b32 s8, s8, 16
; GFX6-NEXT: s_sub_i32 s18, s18, s16
-; GFX6-NEXT: s_cmp_lt_i32 s0, -1
; GFX6-NEXT: s_brev_b32 s17, 1
-; GFX6-NEXT: s_cselect_b32 s19, s0, -1
+; GFX6-NEXT: s_min_i32 s19, s0, -1
; GFX6-NEXT: s_sub_i32 s19, s19, s17
-; GFX6-NEXT: s_cmp_gt_i32 s18, s8
-; GFX6-NEXT: s_cselect_b32 s8, s18, s8
-; GFX6-NEXT: s_cmp_lt_i32 s8, s19
-; GFX6-NEXT: s_cselect_b32 s8, s8, s19
-; GFX6-NEXT: s_sub_i32 s0, s0, s8
-; GFX6-NEXT: s_ashr_i32 s0, s0, 16
+; GFX6-NEXT: s_max_i32 s8, s18, s8
+; GFX6-NEXT: s_min_i32 s8, s8, s19
; GFX6-NEXT: s_lshl_b32 s1, s1, 16
+; GFX6-NEXT: s_sub_i32 s0, s0, s8
; GFX6-NEXT: s_lshl_b32 s8, s9, 16
-; GFX6-NEXT: s_cmp_gt_i32 s1, -1
-; GFX6-NEXT: s_cselect_b32 s9, s1, -1
+; GFX6-NEXT: s_max_i32 s9, s1, -1
+; GFX6-NEXT: s_min_i32 s18, s1, -1
; GFX6-NEXT: s_sub_i32 s9, s9, s16
-; GFX6-NEXT: s_cmp_lt_i32 s1, -1
-; GFX6-NEXT: s_cselect_b32 s18, s1, -1
; GFX6-NEXT: s_sub_i32 s18, s18, s17
-; GFX6-NEXT: s_cmp_gt_i32 s9, s8
-; GFX6-NEXT: s_cselect_b32 s8, s9, s8
-; GFX6-NEXT: s_cmp_lt_i32 s8, s18
-; GFX6-NEXT: s_cselect_b32 s8, s8, s18
-; GFX6-NEXT: s_sub_i32 s1, s1, s8
-; GFX6-NEXT: s_ashr_i32 s1, s1, 16
+; GFX6-NEXT: s_max_i32 s8, s9, s8
; GFX6-NEXT: s_lshl_b32 s2, s2, 16
+; GFX6-NEXT: s_max_i32 s9, s2, -1
+; GFX6-NEXT: s_min_i32 s8, s8, s18
+; GFX6-NEXT: s_sub_i32 s1, s1, s8
; GFX6-NEXT: s_lshl_b32 s8, s10, 16
-; GFX6-NEXT: s_cmp_gt_i32 s2, -1
-; GFX6-NEXT: s_cselect_b32 s9, s2, -1
+; GFX6-NEXT: s_min_i32 s10, s2, -1
; GFX6-NEXT: s_sub_i32 s9, s9, s16
-; GFX6-NEXT: s_cmp_lt_i32 s2, -1
-; GFX6-NEXT: s_cselect_b32 s10, s2, -1
; GFX6-NEXT: s_sub_i32 s10, s10, s17
-; GFX6-NEXT: s_cmp_gt_i32 s9, s8
-; GFX6-NEXT: s_cselect_b32 s8, s9, s8
-; GFX6-NEXT: s_cmp_lt_i32 s8, s10
-; GFX6-NEXT: s_cselect_b32 s8, s8, s10
-; GFX6-NEXT: s_sub_i32 s2, s2, s8
-; GFX6-NEXT: s_ashr_i32 s2, s2, 16
+; GFX6-NEXT: s_max_i32 s8, s9, s8
; GFX6-NEXT: s_lshl_b32 s3, s3, 16
+; GFX6-NEXT: s_min_i32 s8, s8, s10
+; GFX6-NEXT: s_max_i32 s9, s3, -1
+; GFX6-NEXT: s_sub_i32 s2, s2, s8
+; GFX6-NEXT: s_min_i32 s10, s3, -1
; GFX6-NEXT: s_lshl_b32 s8, s11, 16
-; GFX6-NEXT: s_cmp_gt_i32 s3, -1
-; GFX6-NEXT: s_cselect_b32 s9, s3, -1
; GFX6-NEXT: s_sub_i32 s9, s9, s16
-; GFX6-NEXT: s_cmp_lt_i32 s3, -1
-; GFX6-NEXT: s_cselect_b32 s10, s3, -1
; GFX6-NEXT: s_sub_i32 s10, s10, s17
-; GFX6-NEXT: s_cmp_gt_i32 s9, s8
-; GFX6-NEXT: s_cselect_b32 s8, s9, s8
-; GFX6-NEXT: s_cmp_lt_i32 s8, s10
-; GFX6-NEXT: s_cselect_b32 s8, s8, s10
-; GFX6-NEXT: s_sub_i32 s3, s3, s8
-; GFX6-NEXT: s_ashr_i32 s3, s3, 16
+; GFX6-NEXT: s_max_i32 s8, s9, s8
; GFX6-NEXT: s_lshl_b32 s4, s4, 16
+; GFX6-NEXT: s_min_i32 s8, s8, s10
+; GFX6-NEXT: s_max_i32 s9, s4, -1
+; GFX6-NEXT: s_sub_i32 s3, s3, s8
+; GFX6-NEXT: s_min_i32 s10, s4, -1
; GFX6-NEXT: s_lshl_b32 s8, s12, 16
-; GFX6-NEXT: s_cmp_gt_i32 s4, -1
-; GFX6-NEXT: s_cselect_b32 s9, s4, -1
; GFX6-NEXT: s_sub_i32 s9, s9, s16
-; GFX6-NEXT: s_cmp_lt_i32 s4, -1
-; GFX6-NEXT: s_cselect_b32 s10, s4, -1
; GFX6-NEXT: s_sub_i32 s10, s10, s17
-; GFX6-NEXT: s_cmp_gt_i32 s9, s8
-; GFX6-NEXT: s_cselect_b32 s8, s9, s8
-; GFX6-NEXT: s_cmp_lt_i32 s8, s10
-; GFX6-NEXT: s_cselect_b32 s8, s8, s10
-; GFX6-NEXT: s_sub_i32 s4, s4, s8
-; GFX6-NEXT: s_ashr_i32 s4, s4, 16
+; GFX6-NEXT: s_max_i32 s8, s9, s8
; GFX6-NEXT: s_lshl_b32 s5, s5, 16
+; GFX6-NEXT: s_min_i32 s8, s8, s10
+; GFX6-NEXT: s_max_i32 s9, s5, -1
+; GFX6-NEXT: s_sub_i32 s4, s4, s8
+; GFX6-NEXT: s_min_i32 s10, s5, -1
; GFX6-NEXT: s_lshl_b32 s8, s13, 16
-; GFX6-NEXT: s_cmp_gt_i32 s5, -1
-; GFX6-NEXT: s_cselect_b32 s9, s5, -1
; GFX6-NEXT: s_sub_i32 s9, s9, s16
-; GFX6-NEXT: s_cmp_lt_i32 s5, -1
-; GFX6-NEXT: s_cselect_b32 s10, s5, -1
; GFX6-NEXT: s_sub_i32 s10, s10, s17
-; GFX6-NEXT: s_cmp_gt_i32 s9, s8
-; GFX6-NEXT: s_cselect_b32 s8, s9, s8
-; GFX6-NEXT: s_cmp_lt_i32 s8, s10
-; GFX6-NEXT: s_cselect_b32 s8, s8, s10
-; GFX6-NEXT: s_sub_i32 s5, s5, s8
-; GFX6-NEXT: s_ashr_i32 s5, s5, 16
+; GFX6-NEXT: s_max_i32 s8, s9, s8
; GFX6-NEXT: s_lshl_b32 s6, s6, 16
+; GFX6-NEXT: s_min_i32 s8, s8, s10
+; GFX6-NEXT: s_max_i32 s9, s6, -1
+; GFX6-NEXT: s_sub_i32 s5, s5, s8
+; GFX6-NEXT: s_min_i32 s10, s6, -1
; GFX6-NEXT: s_lshl_b32 s8, s14, 16
-; GFX6-NEXT: s_cmp_gt_i32 s6, -1
-; GFX6-NEXT: s_cselect_b32 s9, s6, -1
; GFX6-NEXT: s_sub_i32 s9, s9, s16
-; GFX6-NEXT: s_cmp_lt_i32 s6, -1
-; GFX6-NEXT: s_cselect_b32 s10, s6, -1
; GFX6-NEXT: s_sub_i32 s10, s10, s17
-; GFX6-NEXT: s_cmp_gt_i32 s9, s8
-; GFX6-NEXT: s_cselect_b32 s8, s9, s8
-; GFX6-NEXT: s_cmp_lt_i32 s8, s10
-; GFX6-NEXT: s_cselect_b32 s8, s8, s10
-; GFX6-NEXT: s_sub_i32 s6, s6, s8
-; GFX6-NEXT: s_ashr_i32 s6, s6, 16
+; GFX6-NEXT: s_max_i32 s8, s9, s8
; GFX6-NEXT: s_lshl_b32 s7, s7, 16
+; GFX6-NEXT: s_min_i32 s8, s8, s10
+; GFX6-NEXT: s_max_i32 s9, s7, -1
+; GFX6-NEXT: s_sub_i32 s6, s6, s8
+; GFX6-NEXT: s_min_i32 s10, s7, -1
; GFX6-NEXT: s_lshl_b32 s8, s15, 16
-; GFX6-NEXT: s_cmp_gt_i32 s7, -1
-; GFX6-NEXT: s_cselect_b32 s9, s7, -1
; GFX6-NEXT: s_sub_i32 s9, s9, s16
-; GFX6-NEXT: s_cmp_lt_i32 s7, -1
-; GFX6-NEXT: s_cselect_b32 s10, s7, -1
; GFX6-NEXT: s_sub_i32 s10, s10, s17
-; GFX6-NEXT: s_cmp_gt_i32 s9, s8
-; GFX6-NEXT: s_cselect_b32 s8, s9, s8
-; GFX6-NEXT: s_cmp_lt_i32 s8, s10
-; GFX6-NEXT: s_cselect_b32 s8, s8, s10
+; GFX6-NEXT: s_max_i32 s8, s9, s8
+; GFX6-NEXT: s_min_i32 s8, s8, s10
; GFX6-NEXT: s_sub_i32 s7, s7, s8
+; GFX6-NEXT: s_ashr_i32 s1, s1, 16
; GFX6-NEXT: s_mov_b32 s8, 0xffff
+; GFX6-NEXT: s_ashr_i32 s0, s0, 16
; GFX6-NEXT: s_and_b32 s1, s1, s8
+; GFX6-NEXT: s_ashr_i32 s2, s2, 16
+; GFX6-NEXT: s_ashr_i32 s3, s3, 16
; GFX6-NEXT: s_and_b32 s0, s0, s8
; GFX6-NEXT: s_lshl_b32 s1, s1, 16
; GFX6-NEXT: s_or_b32 s0, s0, s1
; GFX6-NEXT: s_and_b32 s1, s2, s8
; GFX6-NEXT: s_and_b32 s2, s3, s8
+; GFX6-NEXT: s_ashr_i32 s5, s5, 16
; GFX6-NEXT: s_and_b32 s3, s5, s8
; GFX6-NEXT: s_lshl_b32 s2, s2, 16
+; GFX6-NEXT: s_ashr_i32 s4, s4, 16
; GFX6-NEXT: s_ashr_i32 s7, s7, 16
; GFX6-NEXT: s_or_b32 s1, s1, s2
; GFX6-NEXT: s_and_b32 s2, s4, s8
; GFX6-NEXT: s_and_b32 s4, s7, s8
; GFX6-NEXT: s_lshl_b32 s3, s3, 16
+; GFX6-NEXT: s_ashr_i32 s6, s6, 16
; GFX6-NEXT: s_or_b32 s2, s2, s3
; GFX6-NEXT: s_and_b32 s3, s6, s8
; GFX6-NEXT: s_lshl_b32 s4, s4, 16
@@ -4405,151 +3937,119 @@ define amdgpu_ps <4 x i32> @s_ssubsat_v8i16(<8 x i16> inreg %lhs, <8 x i16> inre
;
; GFX8-LABEL: s_ssubsat_v8i16:
; GFX8: ; %bb.0:
-; GFX8-NEXT: s_lshr_b32 s12, s4, 16
-; GFX8-NEXT: s_lshr_b32 s13, s5, 16
-; GFX8-NEXT: s_lshr_b32 s14, s6, 16
-; GFX8-NEXT: s_lshr_b32 s15, s7, 16
-; GFX8-NEXT: s_lshr_b32 s8, s0, 16
-; GFX8-NEXT: s_lshr_b32 s9, s1, 16
-; GFX8-NEXT: s_lshr_b32 s10, s2, 16
-; GFX8-NEXT: s_lshr_b32 s11, s3, 16
; GFX8-NEXT: s_sext_i32_i16 s18, s0
; GFX8-NEXT: s_sext_i32_i16 s19, -1
-; GFX8-NEXT: s_cmp_gt_i32 s18, s19
+; GFX8-NEXT: s_max_i32 s20, s18, s19
; GFX8-NEXT: s_movk_i32 s16, 0x7fff
-; GFX8-NEXT: s_cselect_b32 s20, s18, s19
; GFX8-NEXT: s_sub_i32 s20, s20, s16
-; GFX8-NEXT: s_cmp_lt_i32 s18, s19
+; GFX8-NEXT: s_lshr_b32 s12, s4, 16
; GFX8-NEXT: s_movk_i32 s17, 0x8000
-; GFX8-NEXT: s_cselect_b32 s18, s18, s19
-; GFX8-NEXT: s_sub_i32 s18, s18, s17
+; GFX8-NEXT: s_min_i32 s18, s18, s19
; GFX8-NEXT: s_sext_i32_i16 s20, s20
; GFX8-NEXT: s_sext_i32_i16 s4, s4
-; GFX8-NEXT: s_cmp_gt_i32 s20, s4
-; GFX8-NEXT: s_cselect_b32 s4, s20, s4
+; GFX8-NEXT: s_sub_i32 s18, s18, s17
+; GFX8-NEXT: s_max_i32 s4, s20, s4
; GFX8-NEXT: s_sext_i32_i16 s4, s4
; GFX8-NEXT: s_sext_i32_i16 s18, s18
-; GFX8-NEXT: s_cmp_lt_i32 s4, s18
-; GFX8-NEXT: s_cselect_b32 s4, s4, s18
+; GFX8-NEXT: s_lshr_b32 s8, s0, 16
+; GFX8-NEXT: s_min_i32 s4, s4, s18
; GFX8-NEXT: s_sub_i32 s0, s0, s4
; GFX8-NEXT: s_sext_i32_i16 s4, s8
-; GFX8-NEXT: s_cmp_gt_i32 s4, s19
-; GFX8-NEXT: s_cselect_b32 s18, s4, s19
+; GFX8-NEXT: s_max_i32 s18, s4, s19
; GFX8-NEXT: s_sub_i32 s18, s18, s16
-; GFX8-NEXT: s_cmp_lt_i32 s4, s19
-; GFX8-NEXT: s_cselect_b32 s4, s4, s19
-; GFX8-NEXT: s_sub_i32 s4, s4, s17
+; GFX8-NEXT: s_min_i32 s4, s4, s19
; GFX8-NEXT: s_sext_i32_i16 s18, s18
; GFX8-NEXT: s_sext_i32_i16 s12, s12
-; GFX8-NEXT: s_cmp_gt_i32 s18, s12
-; GFX8-NEXT: s_cselect_b32 s12, s18, s12
+; GFX8-NEXT: s_sub_i32 s4, s4, s17
+; GFX8-NEXT: s_max_i32 s12, s18, s12
; GFX8-NEXT: s_sext_i32_i16 s12, s12
; GFX8-NEXT: s_sext_i32_i16 s4, s4
-; GFX8-NEXT: s_cmp_lt_i32 s12, s4
-; GFX8-NEXT: s_cselect_b32 s4, s12, s4
+; GFX8-NEXT: s_min_i32 s4, s12, s4
; GFX8-NEXT: s_sub_i32 s4, s8, s4
; GFX8-NEXT: s_sext_i32_i16 s8, s1
-; GFX8-NEXT: s_cmp_gt_i32 s8, s19
-; GFX8-NEXT: s_cselect_b32 s12, s8, s19
+; GFX8-NEXT: s_max_i32 s12, s8, s19
; GFX8-NEXT: s_sub_i32 s12, s12, s16
-; GFX8-NEXT: s_cmp_lt_i32 s8, s19
-; GFX8-NEXT: s_cselect_b32 s8, s8, s19
-; GFX8-NEXT: s_sub_i32 s8, s8, s17
+; GFX8-NEXT: s_min_i32 s8, s8, s19
+; GFX8-NEXT: s_lshr_b32 s13, s5, 16
; GFX8-NEXT: s_sext_i32_i16 s12, s12
; GFX8-NEXT: s_sext_i32_i16 s5, s5
-; GFX8-NEXT: s_cmp_gt_i32 s12, s5
-; GFX8-NEXT: s_cselect_b32 s5, s12, s5
+; GFX8-NEXT: s_max_i32 s5, s12, s5
+; GFX8-NEXT: s_sub_i32 s8, s8, s17
; GFX8-NEXT: s_sext_i32_i16 s5, s5
; GFX8-NEXT: s_sext_i32_i16 s8, s8
-; GFX8-NEXT: s_cmp_lt_i32 s5, s8
-; GFX8-NEXT: s_cselect_b32 s5, s5, s8
+; GFX8-NEXT: s_lshr_b32 s9, s1, 16
+; GFX8-NEXT: s_min_i32 s5, s5, s8
; GFX8-NEXT: s_sub_i32 s1, s1, s5
; GFX8-NEXT: s_sext_i32_i16 s5, s9
-; GFX8-NEXT: s_cmp_gt_i32 s5, s19
-; GFX8-NEXT: s_cselect_b32 s8, s5, s19
+; GFX8-NEXT: s_max_i32 s8, s5, s19
; GFX8-NEXT: s_sub_i32 s8, s8, s16
-; GFX8-NEXT: s_cmp_lt_i32 s5, s19
-; GFX8-NEXT: s_cselect_b32 s5, s5, s19
-; GFX8-NEXT: s_sub_i32 s5, s5, s17
+; GFX8-NEXT: s_min_i32 s5, s5, s19
; GFX8-NEXT: s_sext_i32_i16 s8, s8
; GFX8-NEXT: s_sext_i32_i16 s12, s13
-; GFX8-NEXT: s_cmp_gt_i32 s8, s12
-; GFX8-NEXT: s_cselect_b32 s8, s8, s12
+; GFX8-NEXT: s_sub_i32 s5, s5, s17
+; GFX8-NEXT: s_max_i32 s8, s8, s12
; GFX8-NEXT: s_sext_i32_i16 s8, s8
; GFX8-NEXT: s_sext_i32_i16 s5, s5
-; GFX8-NEXT: s_cmp_lt_i32 s8, s5
-; GFX8-NEXT: s_cselect_b32 s5, s8, s5
-; GFX8-NEXT: s_sub_i32 s5, s9, s5
+; GFX8-NEXT: s_min_i32 s5, s8, s5
; GFX8-NEXT: s_sext_i32_i16 s8, s2
-; GFX8-NEXT: s_cmp_gt_i32 s8, s19
-; GFX8-NEXT: s_cselect_b32 s9, s8, s19
+; GFX8-NEXT: s_sub_i32 s5, s9, s5
+; GFX8-NEXT: s_max_i32 s9, s8, s19
; GFX8-NEXT: s_sub_i32 s9, s9, s16
-; GFX8-NEXT: s_cmp_lt_i32 s8, s19
-; GFX8-NEXT: s_cselect_b32 s8, s8, s19
-; GFX8-NEXT: s_sub_i32 s8, s8, s17
+; GFX8-NEXT: s_min_i32 s8, s8, s19
+; GFX8-NEXT: s_lshr_b32 s14, s6, 16
; GFX8-NEXT: s_sext_i32_i16 s9, s9
; GFX8-NEXT: s_sext_i32_i16 s6, s6
-; GFX8-NEXT: s_cmp_gt_i32 s9, s6
-; GFX8-NEXT: s_cselect_b32 s6, s9, s6
+; GFX8-NEXT: s_max_i32 s6, s9, s6
+; GFX8-NEXT: s_sub_i32 s8, s8, s17
; GFX8-NEXT: s_sext_i32_i16 s6, s6
; GFX8-NEXT: s_sext_i32_i16 s8, s8
-; GFX8-NEXT: s_cmp_lt_i32 s6, s8
-; GFX8-NEXT: s_cselect_b32 s6, s6, s8
+; GFX8-NEXT: s_lshr_b32 s10, s2, 16
+; GFX8-NEXT: s_min_i32 s6, s6, s8
; GFX8-NEXT: s_sub_i32 s2, s2, s6
; GFX8-NEXT: s_sext_i32_i16 s6, s10
-; GFX8-NEXT: s_cmp_gt_i32 s6, s19
-; GFX8-NEXT: s_cselect_b32 s8, s6, s19
+; GFX8-NEXT: s_max_i32 s8, s6, s19
; GFX8-NEXT: s_sub_i32 s8, s8, s16
-; GFX8-NEXT: s_cmp_lt_i32 s6, s19
-; GFX8-NEXT: s_cselect_b32 s6, s6, s19
-; GFX8-NEXT: s_sub_i32 s6, s6, s17
+; GFX8-NEXT: s_min_i32 s6, s6, s19
; GFX8-NEXT: s_sext_i32_i16 s8, s8
; GFX8-NEXT: s_sext_i32_i16 s9, s14
-; GFX8-NEXT: s_cmp_gt_i32 s8, s9
-; GFX8-NEXT: s_cselect_b32 s8, s8, s9
+; GFX8-NEXT: s_sub_i32 s6, s6, s17
+; GFX8-NEXT: s_max_i32 s8, s8, s9
; GFX8-NEXT: s_sext_i32_i16 s8, s8
; GFX8-NEXT: s_sext_i32_i16 s6, s6
-; GFX8-NEXT: s_cmp_lt_i32 s8, s6
-; GFX8-NEXT: s_cselect_b32 s6, s8, s6
-; GFX8-NEXT: s_sub_i32 s6, s10, s6
+; GFX8-NEXT: s_min_i32 s6, s8, s6
; GFX8-NEXT: s_sext_i32_i16 s8, s3
-; GFX8-NEXT: s_cmp_gt_i32 s8, s19
-; GFX8-NEXT: s_cselect_b32 s9, s8, s19
+; GFX8-NEXT: s_max_i32 s9, s8, s19
; GFX8-NEXT: s_sub_i32 s9, s9, s16
-; GFX8-NEXT: s_cmp_lt_i32 s8, s19
-; GFX8-NEXT: s_cselect_b32 s8, s8, s19
-; GFX8-NEXT: s_sub_i32 s8, s8, s17
+; GFX8-NEXT: s_min_i32 s8, s8, s19
+; GFX8-NEXT: s_lshr_b32 s15, s7, 16
; GFX8-NEXT: s_sext_i32_i16 s9, s9
; GFX8-NEXT: s_sext_i32_i16 s7, s7
-; GFX8-NEXT: s_cmp_gt_i32 s9, s7
-; GFX8-NEXT: s_cselect_b32 s7, s9, s7
+; GFX8-NEXT: s_max_i32 s7, s9, s7
+; GFX8-NEXT: s_sub_i32 s8, s8, s17
; GFX8-NEXT: s_sext_i32_i16 s7, s7
; GFX8-NEXT: s_sext_i32_i16 s8, s8
-; GFX8-NEXT: s_cmp_lt_i32 s7, s8
-; GFX8-NEXT: s_cselect_b32 s7, s7, s8
+; GFX8-NEXT: s_lshr_b32 s11, s3, 16
+; GFX8-NEXT: s_min_i32 s7, s7, s8
; GFX8-NEXT: s_sub_i32 s3, s3, s7
; GFX8-NEXT: s_sext_i32_i16 s7, s11
-; GFX8-NEXT: s_cmp_gt_i32 s7, s19
-; GFX8-NEXT: s_cselect_b32 s8, s7, s19
+; GFX8-NEXT: s_max_i32 s8, s7, s19
; GFX8-NEXT: s_sub_i32 s8, s8, s16
-; GFX8-NEXT: s_cmp_lt_i32 s7, s19
-; GFX8-NEXT: s_cselect_b32 s7, s7, s19
-; GFX8-NEXT: s_sub_i32 s7, s7, s17
+; GFX8-NEXT: s_bfe_u32 s4, s4, 0x100000
+; GFX8-NEXT: s_min_i32 s7, s7, s19
; GFX8-NEXT: s_sext_i32_i16 s8, s8
; GFX8-NEXT: s_sext_i32_i16 s9, s15
-; GFX8-NEXT: s_cmp_gt_i32 s8, s9
-; GFX8-NEXT: s_cselect_b32 s8, s8, s9
-; GFX8-NEXT: s_sext_i32_i16 s8, s8
-; GFX8-NEXT: s_sext_i32_i16 s7, s7
-; GFX8-NEXT: s_cmp_lt_i32 s8, s7
-; GFX8-NEXT: s_cselect_b32 s7, s8, s7
-; GFX8-NEXT: s_bfe_u32 s4, s4, 0x100000
; GFX8-NEXT: s_bfe_u32 s0, s0, 0x100000
; GFX8-NEXT: s_lshl_b32 s4, s4, 16
+; GFX8-NEXT: s_sub_i32 s7, s7, s17
+; GFX8-NEXT: s_max_i32 s8, s8, s9
; GFX8-NEXT: s_or_b32 s0, s0, s4
; GFX8-NEXT: s_bfe_u32 s4, s5, 0x100000
+; GFX8-NEXT: s_sub_i32 s6, s10, s6
+; GFX8-NEXT: s_sext_i32_i16 s8, s8
+; GFX8-NEXT: s_sext_i32_i16 s7, s7
; GFX8-NEXT: s_bfe_u32 s1, s1, 0x100000
; GFX8-NEXT: s_lshl_b32 s4, s4, 16
+; GFX8-NEXT: s_min_i32 s7, s8, s7
; GFX8-NEXT: s_or_b32 s1, s1, s4
; GFX8-NEXT: s_bfe_u32 s4, s6, 0x100000
; GFX8-NEXT: s_sub_i32 s7, s11, s7
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/uaddsat.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/uaddsat.ll
index 5234d021259b..06232cec3309 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/uaddsat.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/uaddsat.ll
@@ -53,8 +53,7 @@ define amdgpu_ps i7 @s_uaddsat_i7(i7 inreg %lhs, i7 inreg %rhs) {
; GFX6-NEXT: s_lshl_b32 s0, s0, 25
; GFX6-NEXT: s_lshl_b32 s1, s1, 25
; GFX6-NEXT: s_not_b32 s2, s0
-; GFX6-NEXT: s_cmp_lt_u32 s2, s1
-; GFX6-NEXT: s_cselect_b32 s1, s2, s1
+; GFX6-NEXT: s_min_u32 s1, s2, s1
; GFX6-NEXT: s_add_i32 s0, s0, s1
; GFX6-NEXT: s_lshr_b32 s0, s0, 25
; GFX6-NEXT: ; return to shader part epilog
@@ -143,8 +142,7 @@ define amdgpu_ps i8 @s_uaddsat_i8(i8 inreg %lhs, i8 inreg %rhs) {
; GFX6-NEXT: s_lshl_b32 s0, s0, 24
; GFX6-NEXT: s_lshl_b32 s1, s1, 24
; GFX6-NEXT: s_not_b32 s2, s0
-; GFX6-NEXT: s_cmp_lt_u32 s2, s1
-; GFX6-NEXT: s_cselect_b32 s1, s2, s1
+; GFX6-NEXT: s_min_u32 s1, s2, s1
; GFX6-NEXT: s_add_i32 s0, s0, s1
; GFX6-NEXT: s_lshr_b32 s0, s0, 24
; GFX6-NEXT: ; return to shader part epilog
@@ -272,17 +270,15 @@ define amdgpu_ps i16 @s_uaddsat_v2i8(i16 inreg %lhs.arg, i16 inreg %rhs.arg) {
; GFX6-NEXT: s_lshr_b32 s3, s1, 8
; GFX6-NEXT: s_lshl_b32 s1, s1, 24
; GFX6-NEXT: s_not_b32 s4, s0
-; GFX6-NEXT: s_cmp_lt_u32 s4, s1
-; GFX6-NEXT: s_cselect_b32 s1, s4, s1
+; GFX6-NEXT: s_min_u32 s1, s4, s1
; GFX6-NEXT: s_add_i32 s0, s0, s1
; GFX6-NEXT: s_lshl_b32 s1, s2, 24
; GFX6-NEXT: s_lshl_b32 s2, s3, 24
-; GFX6-NEXT: s_lshr_b32 s0, s0, 24
; GFX6-NEXT: s_not_b32 s3, s1
-; GFX6-NEXT: s_cmp_lt_u32 s3, s2
-; GFX6-NEXT: s_cselect_b32 s2, s3, s2
+; GFX6-NEXT: s_min_u32 s2, s3, s2
; GFX6-NEXT: s_add_i32 s1, s1, s2
; GFX6-NEXT: s_lshr_b32 s1, s1, 24
+; GFX6-NEXT: s_lshr_b32 s0, s0, 24
; GFX6-NEXT: s_lshl_b32 s1, s1, 8
; GFX6-NEXT: s_or_b32 s0, s0, s1
; GFX6-NEXT: ; return to shader part epilog
@@ -521,31 +517,27 @@ define amdgpu_ps i32 @s_uaddsat_v4i8(i32 inreg %lhs.arg, i32 inreg %rhs.arg) {
; GFX6-NEXT: s_lshr_b32 s7, s1, 24
; GFX6-NEXT: s_lshl_b32 s1, s1, 24
; GFX6-NEXT: s_not_b32 s8, s0
-; GFX6-NEXT: s_cmp_lt_u32 s8, s1
-; GFX6-NEXT: s_cselect_b32 s1, s8, s1
+; GFX6-NEXT: s_min_u32 s1, s8, s1
; GFX6-NEXT: s_add_i32 s0, s0, s1
; GFX6-NEXT: s_lshl_b32 s1, s2, 24
; GFX6-NEXT: s_lshl_b32 s2, s5, 24
-; GFX6-NEXT: s_lshr_b32 s0, s0, 24
; GFX6-NEXT: s_not_b32 s5, s1
-; GFX6-NEXT: s_cmp_lt_u32 s5, s2
-; GFX6-NEXT: s_cselect_b32 s2, s5, s2
+; GFX6-NEXT: s_min_u32 s2, s5, s2
; GFX6-NEXT: s_add_i32 s1, s1, s2
; GFX6-NEXT: s_lshl_b32 s2, s3, 24
-; GFX6-NEXT: s_lshr_b32 s1, s1, 24
; GFX6-NEXT: s_lshl_b32 s3, s6, 24
; GFX6-NEXT: s_not_b32 s5, s2
-; GFX6-NEXT: s_cmp_lt_u32 s5, s3
-; GFX6-NEXT: s_cselect_b32 s3, s5, s3
+; GFX6-NEXT: s_min_u32 s3, s5, s3
; GFX6-NEXT: s_add_i32 s2, s2, s3
; GFX6-NEXT: s_lshl_b32 s3, s4, 24
-; GFX6-NEXT: s_lshr_b32 s2, s2, 24
; GFX6-NEXT: s_lshl_b32 s4, s7, 24
; GFX6-NEXT: s_not_b32 s5, s3
-; GFX6-NEXT: s_cmp_lt_u32 s5, s4
-; GFX6-NEXT: s_cselect_b32 s4, s5, s4
+; GFX6-NEXT: s_lshr_b32 s1, s1, 24
+; GFX6-NEXT: s_min_u32 s4, s5, s4
; GFX6-NEXT: s_add_i32 s3, s3, s4
+; GFX6-NEXT: s_lshr_b32 s0, s0, 24
; GFX6-NEXT: s_lshl_b32 s1, s1, 8
+; GFX6-NEXT: s_lshr_b32 s2, s2, 24
; GFX6-NEXT: s_or_b32 s0, s0, s1
; GFX6-NEXT: s_lshl_b32 s1, s2, 16
; GFX6-NEXT: s_lshr_b32 s3, s3, 24
@@ -736,8 +728,7 @@ define amdgpu_ps i24 @s_uaddsat_i24(i24 inreg %lhs, i24 inreg %rhs) {
; GFX6-NEXT: s_lshl_b32 s0, s0, 8
; GFX6-NEXT: s_lshl_b32 s1, s1, 8
; GFX6-NEXT: s_not_b32 s2, s0
-; GFX6-NEXT: s_cmp_lt_u32 s2, s1
-; GFX6-NEXT: s_cselect_b32 s1, s2, s1
+; GFX6-NEXT: s_min_u32 s1, s2, s1
; GFX6-NEXT: s_add_i32 s0, s0, s1
; GFX6-NEXT: s_lshr_b32 s0, s0, 8
; GFX6-NEXT: ; return to shader part epilog
@@ -809,8 +800,7 @@ define amdgpu_ps i32 @s_uaddsat_i32(i32 inreg %lhs, i32 inreg %rhs) {
; GFX6-LABEL: s_uaddsat_i32:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_not_b32 s2, s0
-; GFX6-NEXT: s_cmp_lt_u32 s2, s1
-; GFX6-NEXT: s_cselect_b32 s1, s2, s1
+; GFX6-NEXT: s_min_u32 s1, s2, s1
; GFX6-NEXT: s_add_i32 s0, s0, s1
; GFX6-NEXT: ; return to shader part epilog
;
@@ -932,12 +922,10 @@ define amdgpu_ps <2 x i32> @s_uaddsat_v2i32(<2 x i32> inreg %lhs, <2 x i32> inre
; GFX6-LABEL: s_uaddsat_v2i32:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_not_b32 s4, s0
-; GFX6-NEXT: s_cmp_lt_u32 s4, s2
-; GFX6-NEXT: s_cselect_b32 s2, s4, s2
+; GFX6-NEXT: s_min_u32 s2, s4, s2
; GFX6-NEXT: s_add_i32 s0, s0, s2
; GFX6-NEXT: s_not_b32 s2, s1
-; GFX6-NEXT: s_cmp_lt_u32 s2, s3
-; GFX6-NEXT: s_cselect_b32 s2, s2, s3
+; GFX6-NEXT: s_min_u32 s2, s2, s3
; GFX6-NEXT: s_add_i32 s1, s1, s2
; GFX6-NEXT: ; return to shader part epilog
;
@@ -1019,16 +1007,13 @@ define amdgpu_ps <3 x i32> @s_uaddsat_v3i32(<3 x i32> inreg %lhs, <3 x i32> inre
; GFX6-LABEL: s_uaddsat_v3i32:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_not_b32 s6, s0
-; GFX6-NEXT: s_cmp_lt_u32 s6, s3
-; GFX6-NEXT: s_cselect_b32 s3, s6, s3
+; GFX6-NEXT: s_min_u32 s3, s6, s3
; GFX6-NEXT: s_add_i32 s0, s0, s3
; GFX6-NEXT: s_not_b32 s3, s1
-; GFX6-NEXT: s_cmp_lt_u32 s3, s4
-; GFX6-NEXT: s_cselect_b32 s3, s3, s4
+; GFX6-NEXT: s_min_u32 s3, s3, s4
; GFX6-NEXT: s_add_i32 s1, s1, s3
; GFX6-NEXT: s_not_b32 s3, s2
-; GFX6-NEXT: s_cmp_lt_u32 s3, s5
-; GFX6-NEXT: s_cselect_b32 s3, s3, s5
+; GFX6-NEXT: s_min_u32 s3, s3, s5
; GFX6-NEXT: s_add_i32 s2, s2, s3
; GFX6-NEXT: ; return to shader part epilog
;
@@ -1124,20 +1109,16 @@ define amdgpu_ps <4 x i32> @s_uaddsat_v4i32(<4 x i32> inreg %lhs, <4 x i32> inre
; GFX6-LABEL: s_uaddsat_v4i32:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_not_b32 s8, s0
-; GFX6-NEXT: s_cmp_lt_u32 s8, s4
-; GFX6-NEXT: s_cselect_b32 s4, s8, s4
+; GFX6-NEXT: s_min_u32 s4, s8, s4
; GFX6-NEXT: s_add_i32 s0, s0, s4
; GFX6-NEXT: s_not_b32 s4, s1
-; GFX6-NEXT: s_cmp_lt_u32 s4, s5
-; GFX6-NEXT: s_cselect_b32 s4, s4, s5
+; GFX6-NEXT: s_min_u32 s4, s4, s5
; GFX6-NEXT: s_add_i32 s1, s1, s4
; GFX6-NEXT: s_not_b32 s4, s2
-; GFX6-NEXT: s_cmp_lt_u32 s4, s6
-; GFX6-NEXT: s_cselect_b32 s4, s4, s6
+; GFX6-NEXT: s_min_u32 s4, s4, s6
; GFX6-NEXT: s_add_i32 s2, s2, s4
; GFX6-NEXT: s_not_b32 s4, s3
-; GFX6-NEXT: s_cmp_lt_u32 s4, s7
-; GFX6-NEXT: s_cselect_b32 s4, s4, s7
+; GFX6-NEXT: s_min_u32 s4, s4, s7
; GFX6-NEXT: s_add_i32 s3, s3, s4
; GFX6-NEXT: ; return to shader part epilog
;
@@ -1247,24 +1228,19 @@ define amdgpu_ps <5 x i32> @s_uaddsat_v5i32(<5 x i32> inreg %lhs, <5 x i32> inre
; GFX6-LABEL: s_uaddsat_v5i32:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_not_b32 s10, s0
-; GFX6-NEXT: s_cmp_lt_u32 s10, s5
-; GFX6-NEXT: s_cselect_b32 s5, s10, s5
+; GFX6-NEXT: s_min_u32 s5, s10, s5
; GFX6-NEXT: s_add_i32 s0, s0, s5
; GFX6-NEXT: s_not_b32 s5, s1
-; GFX6-NEXT: s_cmp_lt_u32 s5, s6
-; GFX6-NEXT: s_cselect_b32 s5, s5, s6
+; GFX6-NEXT: s_min_u32 s5, s5, s6
; GFX6-NEXT: s_add_i32 s1, s1, s5
; GFX6-NEXT: s_not_b32 s5, s2
-; GFX6-NEXT: s_cmp_lt_u32 s5, s7
-; GFX6-NEXT: s_cselect_b32 s5, s5, s7
+; GFX6-NEXT: s_min_u32 s5, s5, s7
; GFX6-NEXT: s_add_i32 s2, s2, s5
; GFX6-NEXT: s_not_b32 s5, s3
-; GFX6-NEXT: s_cmp_lt_u32 s5, s8
-; GFX6-NEXT: s_cselect_b32 s5, s5, s8
+; GFX6-NEXT: s_min_u32 s5, s5, s8
; GFX6-NEXT: s_add_i32 s3, s3, s5
; GFX6-NEXT: s_not_b32 s5, s4
-; GFX6-NEXT: s_cmp_lt_u32 s5, s9
-; GFX6-NEXT: s_cselect_b32 s5, s5, s9
+; GFX6-NEXT: s_min_u32 s5, s5, s9
; GFX6-NEXT: s_add_i32 s4, s4, s5
; GFX6-NEXT: ; return to shader part epilog
;
@@ -1448,68 +1424,52 @@ define amdgpu_ps <16 x i32> @s_uaddsat_v16i32(<16 x i32> inreg %lhs, <16 x i32>
; GFX6-LABEL: s_uaddsat_v16i32:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_not_b32 s32, s0
-; GFX6-NEXT: s_cmp_lt_u32 s32, s16
-; GFX6-NEXT: s_cselect_b32 s16, s32, s16
+; GFX6-NEXT: s_min_u32 s16, s32, s16
; GFX6-NEXT: s_add_i32 s0, s0, s16
; GFX6-NEXT: s_not_b32 s16, s1
-; GFX6-NEXT: s_cmp_lt_u32 s16, s17
-; GFX6-NEXT: s_cselect_b32 s16, s16, s17
+; GFX6-NEXT: s_min_u32 s16, s16, s17
; GFX6-NEXT: s_add_i32 s1, s1, s16
; GFX6-NEXT: s_not_b32 s16, s2
-; GFX6-NEXT: s_cmp_lt_u32 s16, s18
-; GFX6-NEXT: s_cselect_b32 s16, s16, s18
+; GFX6-NEXT: s_min_u32 s16, s16, s18
; GFX6-NEXT: s_add_i32 s2, s2, s16
; GFX6-NEXT: s_not_b32 s16, s3
-; GFX6-NEXT: s_cmp_lt_u32 s16, s19
-; GFX6-NEXT: s_cselect_b32 s16, s16, s19
+; GFX6-NEXT: s_min_u32 s16, s16, s19
; GFX6-NEXT: s_add_i32 s3, s3, s16
; GFX6-NEXT: s_not_b32 s16, s4
-; GFX6-NEXT: s_cmp_lt_u32 s16, s20
-; GFX6-NEXT: s_cselect_b32 s16, s16, s20
+; GFX6-NEXT: s_min_u32 s16, s16, s20
; GFX6-NEXT: s_add_i32 s4, s4, s16
; GFX6-NEXT: s_not_b32 s16, s5
-; GFX6-NEXT: s_cmp_lt_u32 s16, s21
-; GFX6-NEXT: s_cselect_b32 s16, s16, s21
+; GFX6-NEXT: s_min_u32 s16, s16, s21
; GFX6-NEXT: s_add_i32 s5, s5, s16
; GFX6-NEXT: s_not_b32 s16, s6
-; GFX6-NEXT: s_cmp_lt_u32 s16, s22
-; GFX6-NEXT: s_cselect_b32 s16, s16, s22
+; GFX6-NEXT: s_min_u32 s16, s16, s22
; GFX6-NEXT: s_add_i32 s6, s6, s16
; GFX6-NEXT: s_not_b32 s16, s7
-; GFX6-NEXT: s_cmp_lt_u32 s16, s23
-; GFX6-NEXT: s_cselect_b32 s16, s16, s23
+; GFX6-NEXT: s_min_u32 s16, s16, s23
; GFX6-NEXT: s_add_i32 s7, s7, s16
; GFX6-NEXT: s_not_b32 s16, s8
-; GFX6-NEXT: s_cmp_lt_u32 s16, s24
-; GFX6-NEXT: s_cselect_b32 s16, s16, s24
+; GFX6-NEXT: s_min_u32 s16, s16, s24
; GFX6-NEXT: s_add_i32 s8, s8, s16
; GFX6-NEXT: s_not_b32 s16, s9
-; GFX6-NEXT: s_cmp_lt_u32 s16, s25
-; GFX6-NEXT: s_cselect_b32 s16, s16, s25
+; GFX6-NEXT: s_min_u32 s16, s16, s25
; GFX6-NEXT: s_add_i32 s9, s9, s16
; GFX6-NEXT: s_not_b32 s16, s10
-; GFX6-NEXT: s_cmp_lt_u32 s16, s26
-; GFX6-NEXT: s_cselect_b32 s16, s16, s26
+; GFX6-NEXT: s_min_u32 s16, s16, s26
; GFX6-NEXT: s_add_i32 s10, s10, s16
; GFX6-NEXT: s_not_b32 s16, s11
-; GFX6-NEXT: s_cmp_lt_u32 s16, s27
-; GFX6-NEXT: s_cselect_b32 s16, s16, s27
+; GFX6-NEXT: s_min_u32 s16, s16, s27
; GFX6-NEXT: s_add_i32 s11, s11, s16
; GFX6-NEXT: s_not_b32 s16, s12
-; GFX6-NEXT: s_cmp_lt_u32 s16, s28
-; GFX6-NEXT: s_cselect_b32 s16, s16, s28
+; GFX6-NEXT: s_min_u32 s16, s16, s28
; GFX6-NEXT: s_add_i32 s12, s12, s16
; GFX6-NEXT: s_not_b32 s16, s13
-; GFX6-NEXT: s_cmp_lt_u32 s16, s29
-; GFX6-NEXT: s_cselect_b32 s16, s16, s29
+; GFX6-NEXT: s_min_u32 s16, s16, s29
; GFX6-NEXT: s_add_i32 s13, s13, s16
; GFX6-NEXT: s_not_b32 s16, s14
-; GFX6-NEXT: s_cmp_lt_u32 s16, s30
-; GFX6-NEXT: s_cselect_b32 s16, s16, s30
+; GFX6-NEXT: s_min_u32 s16, s16, s30
; GFX6-NEXT: s_add_i32 s14, s14, s16
; GFX6-NEXT: s_not_b32 s16, s15
-; GFX6-NEXT: s_cmp_lt_u32 s16, s31
-; GFX6-NEXT: s_cselect_b32 s16, s16, s31
+; GFX6-NEXT: s_min_u32 s16, s16, s31
; GFX6-NEXT: s_add_i32 s15, s15, s16
; GFX6-NEXT: ; return to shader part epilog
;
@@ -1696,8 +1656,7 @@ define amdgpu_ps i16 @s_uaddsat_i16(i16 inreg %lhs, i16 inreg %rhs) {
; GFX6-NEXT: s_lshl_b32 s0, s0, 16
; GFX6-NEXT: s_lshl_b32 s1, s1, 16
; GFX6-NEXT: s_not_b32 s2, s0
-; GFX6-NEXT: s_cmp_lt_u32 s2, s1
-; GFX6-NEXT: s_cselect_b32 s1, s2, s1
+; GFX6-NEXT: s_min_u32 s1, s2, s1
; GFX6-NEXT: s_add_i32 s0, s0, s1
; GFX6-NEXT: s_lshr_b32 s0, s0, 16
; GFX6-NEXT: ; return to shader part epilog
@@ -1835,17 +1794,15 @@ define amdgpu_ps i32 @s_uaddsat_v2i16(<2 x i16> inreg %lhs, <2 x i16> inreg %rhs
; GFX6-NEXT: s_lshl_b32 s0, s0, 16
; GFX6-NEXT: s_lshl_b32 s2, s2, 16
; GFX6-NEXT: s_not_b32 s4, s0
-; GFX6-NEXT: s_cmp_lt_u32 s4, s2
-; GFX6-NEXT: s_cselect_b32 s2, s4, s2
-; GFX6-NEXT: s_add_i32 s0, s0, s2
+; GFX6-NEXT: s_min_u32 s2, s4, s2
; GFX6-NEXT: s_lshl_b32 s1, s1, 16
+; GFX6-NEXT: s_add_i32 s0, s0, s2
; GFX6-NEXT: s_lshl_b32 s2, s3, 16
-; GFX6-NEXT: s_lshr_b32 s0, s0, 16
; GFX6-NEXT: s_not_b32 s3, s1
-; GFX6-NEXT: s_cmp_lt_u32 s3, s2
-; GFX6-NEXT: s_cselect_b32 s2, s3, s2
+; GFX6-NEXT: s_min_u32 s2, s3, s2
; GFX6-NEXT: s_add_i32 s1, s1, s2
; GFX6-NEXT: s_lshr_b32 s1, s1, 16
+; GFX6-NEXT: s_lshr_b32 s0, s0, 16
; GFX6-NEXT: s_lshl_b32 s1, s1, 16
; GFX6-NEXT: s_or_b32 s0, s0, s1
; GFX6-NEXT: ; return to shader part epilog
@@ -2053,33 +2010,29 @@ define amdgpu_ps <2 x i32> @s_uaddsat_v4i16(<4 x i16> inreg %lhs, <4 x i16> inre
; GFX6-NEXT: s_lshl_b32 s0, s0, 16
; GFX6-NEXT: s_lshl_b32 s4, s4, 16
; GFX6-NEXT: s_not_b32 s8, s0
-; GFX6-NEXT: s_cmp_lt_u32 s8, s4
-; GFX6-NEXT: s_cselect_b32 s4, s8, s4
-; GFX6-NEXT: s_add_i32 s0, s0, s4
+; GFX6-NEXT: s_min_u32 s4, s8, s4
; GFX6-NEXT: s_lshl_b32 s1, s1, 16
+; GFX6-NEXT: s_add_i32 s0, s0, s4
; GFX6-NEXT: s_lshl_b32 s4, s5, 16
-; GFX6-NEXT: s_lshr_b32 s0, s0, 16
; GFX6-NEXT: s_not_b32 s5, s1
-; GFX6-NEXT: s_cmp_lt_u32 s5, s4
-; GFX6-NEXT: s_cselect_b32 s4, s5, s4
-; GFX6-NEXT: s_add_i32 s1, s1, s4
+; GFX6-NEXT: s_min_u32 s4, s5, s4
; GFX6-NEXT: s_lshl_b32 s2, s2, 16
-; GFX6-NEXT: s_lshr_b32 s1, s1, 16
+; GFX6-NEXT: s_add_i32 s1, s1, s4
; GFX6-NEXT: s_lshl_b32 s4, s6, 16
; GFX6-NEXT: s_not_b32 s5, s2
-; GFX6-NEXT: s_cmp_lt_u32 s5, s4
-; GFX6-NEXT: s_cselect_b32 s4, s5, s4
-; GFX6-NEXT: s_add_i32 s2, s2, s4
+; GFX6-NEXT: s_min_u32 s4, s5, s4
; GFX6-NEXT: s_lshl_b32 s3, s3, 16
-; GFX6-NEXT: s_lshr_b32 s2, s2, 16
+; GFX6-NEXT: s_add_i32 s2, s2, s4
; GFX6-NEXT: s_lshl_b32 s4, s7, 16
; GFX6-NEXT: s_not_b32 s5, s3
-; GFX6-NEXT: s_cmp_lt_u32 s5, s4
-; GFX6-NEXT: s_cselect_b32 s4, s5, s4
+; GFX6-NEXT: s_min_u32 s4, s5, s4
+; GFX6-NEXT: s_lshr_b32 s1, s1, 16
; GFX6-NEXT: s_add_i32 s3, s3, s4
+; GFX6-NEXT: s_lshr_b32 s0, s0, 16
; GFX6-NEXT: s_lshl_b32 s1, s1, 16
; GFX6-NEXT: s_lshr_b32 s3, s3, 16
; GFX6-NEXT: s_or_b32 s0, s0, s1
+; GFX6-NEXT: s_lshr_b32 s2, s2, 16
; GFX6-NEXT: s_lshl_b32 s1, s3, 16
; GFX6-NEXT: s_or_b32 s1, s2, s1
; GFX6-NEXT: ; return to shader part epilog
@@ -2234,49 +2187,43 @@ define amdgpu_ps <3 x i32> @s_uaddsat_v6i16(<6 x i16> inreg %lhs, <6 x i16> inre
; GFX6-NEXT: s_lshl_b32 s0, s0, 16
; GFX6-NEXT: s_lshl_b32 s6, s6, 16
; GFX6-NEXT: s_not_b32 s12, s0
-; GFX6-NEXT: s_cmp_lt_u32 s12, s6
-; GFX6-NEXT: s_cselect_b32 s6, s12, s6
-; GFX6-NEXT: s_add_i32 s0, s0, s6
+; GFX6-NEXT: s_min_u32 s6, s12, s6
; GFX6-NEXT: s_lshl_b32 s1, s1, 16
+; GFX6-NEXT: s_add_i32 s0, s0, s6
; GFX6-NEXT: s_lshl_b32 s6, s7, 16
-; GFX6-NEXT: s_lshr_b32 s0, s0, 16
; GFX6-NEXT: s_not_b32 s7, s1
-; GFX6-NEXT: s_cmp_lt_u32 s7, s6
-; GFX6-NEXT: s_cselect_b32 s6, s7, s6
-; GFX6-NEXT: s_add_i32 s1, s1, s6
+; GFX6-NEXT: s_min_u32 s6, s7, s6
; GFX6-NEXT: s_lshl_b32 s2, s2, 16
-; GFX6-NEXT: s_lshr_b32 s1, s1, 16
+; GFX6-NEXT: s_add_i32 s1, s1, s6
; GFX6-NEXT: s_lshl_b32 s6, s8, 16
; GFX6-NEXT: s_not_b32 s7, s2
-; GFX6-NEXT: s_cmp_lt_u32 s7, s6
-; GFX6-NEXT: s_cselect_b32 s6, s7, s6
-; GFX6-NEXT: s_add_i32 s2, s2, s6
+; GFX6-NEXT: s_min_u32 s6, s7, s6
; GFX6-NEXT: s_lshl_b32 s3, s3, 16
-; GFX6-NEXT: s_lshr_b32 s2, s2, 16
+; GFX6-NEXT: s_add_i32 s2, s2, s6
; GFX6-NEXT: s_lshl_b32 s6, s9, 16
; GFX6-NEXT: s_not_b32 s7, s3
-; GFX6-NEXT: s_cmp_lt_u32 s7, s6
-; GFX6-NEXT: s_cselect_b32 s6, s7, s6
-; GFX6-NEXT: s_add_i32 s3, s3, s6
+; GFX6-NEXT: s_min_u32 s6, s7, s6
; GFX6-NEXT: s_lshl_b32 s4, s4, 16
-; GFX6-NEXT: s_lshr_b32 s3, s3, 16
+; GFX6-NEXT: s_add_i32 s3, s3, s6
; GFX6-NEXT: s_lshl_b32 s6, s10, 16
; GFX6-NEXT: s_not_b32 s7, s4
-; GFX6-NEXT: s_cmp_lt_u32 s7, s6
-; GFX6-NEXT: s_cselect_b32 s6, s7, s6
-; GFX6-NEXT: s_add_i32 s4, s4, s6
+; GFX6-NEXT: s_min_u32 s6, s7, s6
; GFX6-NEXT: s_lshl_b32 s5, s5, 16
-; GFX6-NEXT: s_lshr_b32 s4, s4, 16
+; GFX6-NEXT: s_add_i32 s4, s4, s6
; GFX6-NEXT: s_lshl_b32 s6, s11, 16
; GFX6-NEXT: s_not_b32 s7, s5
-; GFX6-NEXT: s_cmp_lt_u32 s7, s6
-; GFX6-NEXT: s_cselect_b32 s6, s7, s6
+; GFX6-NEXT: s_lshr_b32 s1, s1, 16
+; GFX6-NEXT: s_min_u32 s6, s7, s6
; GFX6-NEXT: s_add_i32 s5, s5, s6
+; GFX6-NEXT: s_lshr_b32 s0, s0, 16
; GFX6-NEXT: s_lshl_b32 s1, s1, 16
+; GFX6-NEXT: s_lshr_b32 s3, s3, 16
; GFX6-NEXT: s_or_b32 s0, s0, s1
+; GFX6-NEXT: s_lshr_b32 s2, s2, 16
; GFX6-NEXT: s_lshl_b32 s1, s3, 16
; GFX6-NEXT: s_lshr_b32 s5, s5, 16
; GFX6-NEXT: s_or_b32 s1, s2, s1
+; GFX6-NEXT: s_lshr_b32 s4, s4, 16
; GFX6-NEXT: s_lshl_b32 s2, s5, 16
; GFX6-NEXT: s_or_b32 s2, s4, s2
; GFX6-NEXT: ; return to shader part epilog
@@ -2454,65 +2401,57 @@ define amdgpu_ps <4 x i32> @s_uaddsat_v8i16(<8 x i16> inreg %lhs, <8 x i16> inre
; GFX6-NEXT: s_lshl_b32 s0, s0, 16
; GFX6-NEXT: s_lshl_b32 s8, s8, 16
; GFX6-NEXT: s_not_b32 s16, s0
-; GFX6-NEXT: s_cmp_lt_u32 s16, s8
-; GFX6-NEXT: s_cselect_b32 s8, s16, s8
-; GFX6-NEXT: s_add_i32 s0, s0, s8
+; GFX6-NEXT: s_min_u32 s8, s16, s8
; GFX6-NEXT: s_lshl_b32 s1, s1, 16
+; GFX6-NEXT: s_add_i32 s0, s0, s8
; GFX6-NEXT: s_lshl_b32 s8, s9, 16
-; GFX6-NEXT: s_lshr_b32 s0, s0, 16
; GFX6-NEXT: s_not_b32 s9, s1
-; GFX6-NEXT: s_cmp_lt_u32 s9, s8
-; GFX6-NEXT: s_cselect_b32 s8, s9, s8
-; GFX6-NEXT: s_add_i32 s1, s1, s8
+; GFX6-NEXT: s_min_u32 s8, s9, s8
; GFX6-NEXT: s_lshl_b32 s2, s2, 16
-; GFX6-NEXT: s_lshr_b32 s1, s1, 16
+; GFX6-NEXT: s_add_i32 s1, s1, s8
; GFX6-NEXT: s_lshl_b32 s8, s10, 16
; GFX6-NEXT: s_not_b32 s9, s2
-; GFX6-NEXT: s_cmp_lt_u32 s9, s8
-; GFX6-NEXT: s_cselect_b32 s8, s9, s8
-; GFX6-NEXT: s_add_i32 s2, s2, s8
+; GFX6-NEXT: s_min_u32 s8, s9, s8
; GFX6-NEXT: s_lshl_b32 s3, s3, 16
-; GFX6-NEXT: s_lshr_b32 s2, s2, 16
+; GFX6-NEXT: s_add_i32 s2, s2, s8
; GFX6-NEXT: s_lshl_b32 s8, s11, 16
; GFX6-NEXT: s_not_b32 s9, s3
-; GFX6-NEXT: s_cmp_lt_u32 s9, s8
-; GFX6-NEXT: s_cselect_b32 s8, s9, s8
-; GFX6-NEXT: s_add_i32 s3, s3, s8
+; GFX6-NEXT: s_min_u32 s8, s9, s8
; GFX6-NEXT: s_lshl_b32 s4, s4, 16
-; GFX6-NEXT: s_lshr_b32 s3, s3, 16
+; GFX6-NEXT: s_add_i32 s3, s3, s8
; GFX6-NEXT: s_lshl_b32 s8, s12, 16
; GFX6-NEXT: s_not_b32 s9, s4
-; GFX6-NEXT: s_cmp_lt_u32 s9, s8
-; GFX6-NEXT: s_cselect_b32 s8, s9, s8
-; GFX6-NEXT: s_add_i32 s4, s4, s8
+; GFX6-NEXT: s_min_u32 s8, s9, s8
; GFX6-NEXT: s_lshl_b32 s5, s5, 16
-; GFX6-NEXT: s_lshr_b32 s4, s4, 16
+; GFX6-NEXT: s_add_i32 s4, s4, s8
; GFX6-NEXT: s_lshl_b32 s8, s13, 16
; GFX6-NEXT: s_not_b32 s9, s5
-; GFX6-NEXT: s_cmp_lt_u32 s9, s8
-; GFX6-NEXT: s_cselect_b32 s8, s9, s8
-; GFX6-NEXT: s_add_i32 s5, s5, s8
+; GFX6-NEXT: s_min_u32 s8, s9, s8
; GFX6-NEXT: s_lshl_b32 s6, s6, 16
-; GFX6-NEXT: s_lshr_b32 s5, s5, 16
+; GFX6-NEXT: s_add_i32 s5, s5, s8
; GFX6-NEXT: s_lshl_b32 s8, s14, 16
; GFX6-NEXT: s_not_b32 s9, s6
-; GFX6-NEXT: s_cmp_lt_u32 s9, s8
-; GFX6-NEXT: s_cselect_b32 s8, s9, s8
-; GFX6-NEXT: s_add_i32 s6, s6, s8
+; GFX6-NEXT: s_min_u32 s8, s9, s8
; GFX6-NEXT: s_lshl_b32 s7, s7, 16
-; GFX6-NEXT: s_lshr_b32 s6, s6, 16
+; GFX6-NEXT: s_add_i32 s6, s6, s8
; GFX6-NEXT: s_lshl_b32 s8, s15, 16
; GFX6-NEXT: s_not_b32 s9, s7
-; GFX6-NEXT: s_cmp_lt_u32 s9, s8
-; GFX6-NEXT: s_cselect_b32 s8, s9, s8
+; GFX6-NEXT: s_lshr_b32 s1, s1, 16
+; GFX6-NEXT: s_min_u32 s8, s9, s8
; GFX6-NEXT: s_add_i32 s7, s7, s8
+; GFX6-NEXT: s_lshr_b32 s0, s0, 16
; GFX6-NEXT: s_lshl_b32 s1, s1, 16
+; GFX6-NEXT: s_lshr_b32 s3, s3, 16
; GFX6-NEXT: s_or_b32 s0, s0, s1
+; GFX6-NEXT: s_lshr_b32 s2, s2, 16
; GFX6-NEXT: s_lshl_b32 s1, s3, 16
; GFX6-NEXT: s_lshr_b32 s7, s7, 16
-; GFX6-NEXT: s_lshl_b32 s3, s7, 16
+; GFX6-NEXT: s_lshr_b32 s5, s5, 16
; GFX6-NEXT: s_or_b32 s1, s2, s1
+; GFX6-NEXT: s_lshr_b32 s4, s4, 16
; GFX6-NEXT: s_lshl_b32 s2, s5, 16
+; GFX6-NEXT: s_lshr_b32 s6, s6, 16
+; GFX6-NEXT: s_lshl_b32 s3, s7, 16
; GFX6-NEXT: s_or_b32 s2, s4, s2
; GFX6-NEXT: s_or_b32 s3, s6, s3
; GFX6-NEXT: ; return to shader part epilog
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/usubsat.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/usubsat.ll
index 3a44903f80f1..cf3427dac88f 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/usubsat.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/usubsat.ll
@@ -51,8 +51,7 @@ define amdgpu_ps i7 @s_usubsat_i7(i7 inreg %lhs, i7 inreg %rhs) {
; GFX6: ; %bb.0:
; GFX6-NEXT: s_lshl_b32 s0, s0, 25
; GFX6-NEXT: s_lshl_b32 s1, s1, 25
-; GFX6-NEXT: s_cmp_lt_u32 s0, s1
-; GFX6-NEXT: s_cselect_b32 s1, s0, s1
+; GFX6-NEXT: s_min_u32 s1, s0, s1
; GFX6-NEXT: s_sub_i32 s0, s0, s1
; GFX6-NEXT: s_lshr_b32 s0, s0, 25
; GFX6-NEXT: ; return to shader part epilog
@@ -139,8 +138,7 @@ define amdgpu_ps i8 @s_usubsat_i8(i8 inreg %lhs, i8 inreg %rhs) {
; GFX6: ; %bb.0:
; GFX6-NEXT: s_lshl_b32 s0, s0, 24
; GFX6-NEXT: s_lshl_b32 s1, s1, 24
-; GFX6-NEXT: s_cmp_lt_u32 s0, s1
-; GFX6-NEXT: s_cselect_b32 s1, s0, s1
+; GFX6-NEXT: s_min_u32 s1, s0, s1
; GFX6-NEXT: s_sub_i32 s0, s0, s1
; GFX6-NEXT: s_lshr_b32 s0, s0, 24
; GFX6-NEXT: ; return to shader part epilog
@@ -265,16 +263,14 @@ define amdgpu_ps i16 @s_usubsat_v2i8(i16 inreg %lhs.arg, i16 inreg %rhs.arg) {
; GFX6-NEXT: s_lshr_b32 s3, s1, 8
; GFX6-NEXT: s_lshl_b32 s0, s0, 24
; GFX6-NEXT: s_lshl_b32 s1, s1, 24
-; GFX6-NEXT: s_cmp_lt_u32 s0, s1
-; GFX6-NEXT: s_cselect_b32 s1, s0, s1
+; GFX6-NEXT: s_min_u32 s1, s0, s1
; GFX6-NEXT: s_sub_i32 s0, s0, s1
; GFX6-NEXT: s_lshl_b32 s1, s2, 24
-; GFX6-NEXT: s_lshr_b32 s0, s0, 24
; GFX6-NEXT: s_lshl_b32 s2, s3, 24
-; GFX6-NEXT: s_cmp_lt_u32 s1, s2
-; GFX6-NEXT: s_cselect_b32 s2, s1, s2
+; GFX6-NEXT: s_min_u32 s2, s1, s2
; GFX6-NEXT: s_sub_i32 s1, s1, s2
; GFX6-NEXT: s_lshr_b32 s1, s1, 24
+; GFX6-NEXT: s_lshr_b32 s0, s0, 24
; GFX6-NEXT: s_lshl_b32 s1, s1, 8
; GFX6-NEXT: s_or_b32 s0, s0, s1
; GFX6-NEXT: ; return to shader part epilog
@@ -508,28 +504,24 @@ define amdgpu_ps i32 @s_usubsat_v4i8(i32 inreg %lhs.arg, i32 inreg %rhs.arg) {
; GFX6-NEXT: s_lshr_b32 s7, s1, 24
; GFX6-NEXT: s_lshl_b32 s0, s0, 24
; GFX6-NEXT: s_lshl_b32 s1, s1, 24
-; GFX6-NEXT: s_cmp_lt_u32 s0, s1
-; GFX6-NEXT: s_cselect_b32 s1, s0, s1
+; GFX6-NEXT: s_min_u32 s1, s0, s1
; GFX6-NEXT: s_sub_i32 s0, s0, s1
; GFX6-NEXT: s_lshl_b32 s1, s2, 24
-; GFX6-NEXT: s_lshr_b32 s0, s0, 24
; GFX6-NEXT: s_lshl_b32 s2, s5, 24
-; GFX6-NEXT: s_cmp_lt_u32 s1, s2
-; GFX6-NEXT: s_cselect_b32 s2, s1, s2
+; GFX6-NEXT: s_min_u32 s2, s1, s2
; GFX6-NEXT: s_sub_i32 s1, s1, s2
; GFX6-NEXT: s_lshl_b32 s2, s3, 24
-; GFX6-NEXT: s_lshr_b32 s1, s1, 24
; GFX6-NEXT: s_lshl_b32 s3, s6, 24
-; GFX6-NEXT: s_cmp_lt_u32 s2, s3
-; GFX6-NEXT: s_cselect_b32 s3, s2, s3
+; GFX6-NEXT: s_min_u32 s3, s2, s3
; GFX6-NEXT: s_sub_i32 s2, s2, s3
; GFX6-NEXT: s_lshl_b32 s3, s4, 24
-; GFX6-NEXT: s_lshr_b32 s2, s2, 24
; GFX6-NEXT: s_lshl_b32 s4, s7, 24
-; GFX6-NEXT: s_cmp_lt_u32 s3, s4
-; GFX6-NEXT: s_cselect_b32 s4, s3, s4
+; GFX6-NEXT: s_lshr_b32 s1, s1, 24
+; GFX6-NEXT: s_min_u32 s4, s3, s4
; GFX6-NEXT: s_sub_i32 s3, s3, s4
+; GFX6-NEXT: s_lshr_b32 s0, s0, 24
; GFX6-NEXT: s_lshl_b32 s1, s1, 8
+; GFX6-NEXT: s_lshr_b32 s2, s2, 24
; GFX6-NEXT: s_or_b32 s0, s0, s1
; GFX6-NEXT: s_lshl_b32 s1, s2, 16
; GFX6-NEXT: s_lshr_b32 s3, s3, 24
@@ -718,8 +710,7 @@ define amdgpu_ps i24 @s_usubsat_i24(i24 inreg %lhs, i24 inreg %rhs) {
; GFX6: ; %bb.0:
; GFX6-NEXT: s_lshl_b32 s0, s0, 8
; GFX6-NEXT: s_lshl_b32 s1, s1, 8
-; GFX6-NEXT: s_cmp_lt_u32 s0, s1
-; GFX6-NEXT: s_cselect_b32 s1, s0, s1
+; GFX6-NEXT: s_min_u32 s1, s0, s1
; GFX6-NEXT: s_sub_i32 s0, s0, s1
; GFX6-NEXT: s_lshr_b32 s0, s0, 8
; GFX6-NEXT: ; return to shader part epilog
@@ -789,8 +780,7 @@ define i32 @v_usubsat_i32(i32 %lhs, i32 %rhs) {
define amdgpu_ps i32 @s_usubsat_i32(i32 inreg %lhs, i32 inreg %rhs) {
; GFX6-LABEL: s_usubsat_i32:
; GFX6: ; %bb.0:
-; GFX6-NEXT: s_cmp_lt_u32 s0, s1
-; GFX6-NEXT: s_cselect_b32 s1, s0, s1
+; GFX6-NEXT: s_min_u32 s1, s0, s1
; GFX6-NEXT: s_sub_i32 s0, s0, s1
; GFX6-NEXT: ; return to shader part epilog
;
@@ -907,11 +897,9 @@ define <2 x i32> @v_usubsat_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
define amdgpu_ps <2 x i32> @s_usubsat_v2i32(<2 x i32> inreg %lhs, <2 x i32> inreg %rhs) {
; GFX6-LABEL: s_usubsat_v2i32:
; GFX6: ; %bb.0:
-; GFX6-NEXT: s_cmp_lt_u32 s0, s2
-; GFX6-NEXT: s_cselect_b32 s2, s0, s2
+; GFX6-NEXT: s_min_u32 s2, s0, s2
; GFX6-NEXT: s_sub_i32 s0, s0, s2
-; GFX6-NEXT: s_cmp_lt_u32 s1, s3
-; GFX6-NEXT: s_cselect_b32 s2, s1, s3
+; GFX6-NEXT: s_min_u32 s2, s1, s3
; GFX6-NEXT: s_sub_i32 s1, s1, s2
; GFX6-NEXT: ; return to shader part epilog
;
@@ -989,14 +977,11 @@ define <3 x i32> @v_usubsat_v3i32(<3 x i32> %lhs, <3 x i32> %rhs) {
define amdgpu_ps <3 x i32> @s_usubsat_v3i32(<3 x i32> inreg %lhs, <3 x i32> inreg %rhs) {
; GFX6-LABEL: s_usubsat_v3i32:
; GFX6: ; %bb.0:
-; GFX6-NEXT: s_cmp_lt_u32 s0, s3
-; GFX6-NEXT: s_cselect_b32 s3, s0, s3
+; GFX6-NEXT: s_min_u32 s3, s0, s3
; GFX6-NEXT: s_sub_i32 s0, s0, s3
-; GFX6-NEXT: s_cmp_lt_u32 s1, s4
-; GFX6-NEXT: s_cselect_b32 s3, s1, s4
+; GFX6-NEXT: s_min_u32 s3, s1, s4
; GFX6-NEXT: s_sub_i32 s1, s1, s3
-; GFX6-NEXT: s_cmp_lt_u32 s2, s5
-; GFX6-NEXT: s_cselect_b32 s3, s2, s5
+; GFX6-NEXT: s_min_u32 s3, s2, s5
; GFX6-NEXT: s_sub_i32 s2, s2, s3
; GFX6-NEXT: ; return to shader part epilog
;
@@ -1087,17 +1072,13 @@ define <4 x i32> @v_usubsat_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
define amdgpu_ps <4 x i32> @s_usubsat_v4i32(<4 x i32> inreg %lhs, <4 x i32> inreg %rhs) {
; GFX6-LABEL: s_usubsat_v4i32:
; GFX6: ; %bb.0:
-; GFX6-NEXT: s_cmp_lt_u32 s0, s4
-; GFX6-NEXT: s_cselect_b32 s4, s0, s4
+; GFX6-NEXT: s_min_u32 s4, s0, s4
; GFX6-NEXT: s_sub_i32 s0, s0, s4
-; GFX6-NEXT: s_cmp_lt_u32 s1, s5
-; GFX6-NEXT: s_cselect_b32 s4, s1, s5
+; GFX6-NEXT: s_min_u32 s4, s1, s5
; GFX6-NEXT: s_sub_i32 s1, s1, s4
-; GFX6-NEXT: s_cmp_lt_u32 s2, s6
-; GFX6-NEXT: s_cselect_b32 s4, s2, s6
+; GFX6-NEXT: s_min_u32 s4, s2, s6
; GFX6-NEXT: s_sub_i32 s2, s2, s4
-; GFX6-NEXT: s_cmp_lt_u32 s3, s7
-; GFX6-NEXT: s_cselect_b32 s4, s3, s7
+; GFX6-NEXT: s_min_u32 s4, s3, s7
; GFX6-NEXT: s_sub_i32 s3, s3, s4
; GFX6-NEXT: ; return to shader part epilog
;
@@ -1201,20 +1182,15 @@ define <5 x i32> @v_usubsat_v5i32(<5 x i32> %lhs, <5 x i32> %rhs) {
define amdgpu_ps <5 x i32> @s_usubsat_v5i32(<5 x i32> inreg %lhs, <5 x i32> inreg %rhs) {
; GFX6-LABEL: s_usubsat_v5i32:
; GFX6: ; %bb.0:
-; GFX6-NEXT: s_cmp_lt_u32 s0, s5
-; GFX6-NEXT: s_cselect_b32 s5, s0, s5
+; GFX6-NEXT: s_min_u32 s5, s0, s5
; GFX6-NEXT: s_sub_i32 s0, s0, s5
-; GFX6-NEXT: s_cmp_lt_u32 s1, s6
-; GFX6-NEXT: s_cselect_b32 s5, s1, s6
+; GFX6-NEXT: s_min_u32 s5, s1, s6
; GFX6-NEXT: s_sub_i32 s1, s1, s5
-; GFX6-NEXT: s_cmp_lt_u32 s2, s7
-; GFX6-NEXT: s_cselect_b32 s5, s2, s7
+; GFX6-NEXT: s_min_u32 s5, s2, s7
; GFX6-NEXT: s_sub_i32 s2, s2, s5
-; GFX6-NEXT: s_cmp_lt_u32 s3, s8
-; GFX6-NEXT: s_cselect_b32 s5, s3, s8
+; GFX6-NEXT: s_min_u32 s5, s3, s8
; GFX6-NEXT: s_sub_i32 s3, s3, s5
-; GFX6-NEXT: s_cmp_lt_u32 s4, s9
-; GFX6-NEXT: s_cselect_b32 s5, s4, s9
+; GFX6-NEXT: s_min_u32 s5, s4, s9
; GFX6-NEXT: s_sub_i32 s4, s4, s5
; GFX6-NEXT: ; return to shader part epilog
;
@@ -1381,53 +1357,37 @@ define <16 x i32> @v_usubsat_v16i32(<16 x i32> %lhs, <16 x i32> %rhs) {
define amdgpu_ps <16 x i32> @s_usubsat_v16i32(<16 x i32> inreg %lhs, <16 x i32> inreg %rhs) {
; GFX6-LABEL: s_usubsat_v16i32:
; GFX6: ; %bb.0:
-; GFX6-NEXT: s_cmp_lt_u32 s0, s16
-; GFX6-NEXT: s_cselect_b32 s16, s0, s16
+; GFX6-NEXT: s_min_u32 s16, s0, s16
; GFX6-NEXT: s_sub_i32 s0, s0, s16
-; GFX6-NEXT: s_cmp_lt_u32 s1, s17
-; GFX6-NEXT: s_cselect_b32 s16, s1, s17
+; GFX6-NEXT: s_min_u32 s16, s1, s17
; GFX6-NEXT: s_sub_i32 s1, s1, s16
-; GFX6-NEXT: s_cmp_lt_u32 s2, s18
-; GFX6-NEXT: s_cselect_b32 s16, s2, s18
+; GFX6-NEXT: s_min_u32 s16, s2, s18
; GFX6-NEXT: s_sub_i32 s2, s2, s16
-; GFX6-NEXT: s_cmp_lt_u32 s3, s19
-; GFX6-NEXT: s_cselect_b32 s16, s3, s19
+; GFX6-NEXT: s_min_u32 s16, s3, s19
; GFX6-NEXT: s_sub_i32 s3, s3, s16
-; GFX6-NEXT: s_cmp_lt_u32 s4, s20
-; GFX6-NEXT: s_cselect_b32 s16, s4, s20
+; GFX6-NEXT: s_min_u32 s16, s4, s20
; GFX6-NEXT: s_sub_i32 s4, s4, s16
-; GFX6-NEXT: s_cmp_lt_u32 s5, s21
-; GFX6-NEXT: s_cselect_b32 s16, s5, s21
+; GFX6-NEXT: s_min_u32 s16, s5, s21
; GFX6-NEXT: s_sub_i32 s5, s5, s16
-; GFX6-NEXT: s_cmp_lt_u32 s6, s22
-; GFX6-NEXT: s_cselect_b32 s16, s6, s22
+; GFX6-NEXT: s_min_u32 s16, s6, s22
; GFX6-NEXT: s_sub_i32 s6, s6, s16
-; GFX6-NEXT: s_cmp_lt_u32 s7, s23
-; GFX6-NEXT: s_cselect_b32 s16, s7, s23
+; GFX6-NEXT: s_min_u32 s16, s7, s23
; GFX6-NEXT: s_sub_i32 s7, s7, s16
-; GFX6-NEXT: s_cmp_lt_u32 s8, s24
-; GFX6-NEXT: s_cselect_b32 s16, s8, s24
+; GFX6-NEXT: s_min_u32 s16, s8, s24
; GFX6-NEXT: s_sub_i32 s8, s8, s16
-; GFX6-NEXT: s_cmp_lt_u32 s9, s25
-; GFX6-NEXT: s_cselect_b32 s16, s9, s25
+; GFX6-NEXT: s_min_u32 s16, s9, s25
; GFX6-NEXT: s_sub_i32 s9, s9, s16
-; GFX6-NEXT: s_cmp_lt_u32 s10, s26
-; GFX6-NEXT: s_cselect_b32 s16, s10, s26
+; GFX6-NEXT: s_min_u32 s16, s10, s26
; GFX6-NEXT: s_sub_i32 s10, s10, s16
-; GFX6-NEXT: s_cmp_lt_u32 s11, s27
-; GFX6-NEXT: s_cselect_b32 s16, s11, s27
+; GFX6-NEXT: s_min_u32 s16, s11, s27
; GFX6-NEXT: s_sub_i32 s11, s11, s16
-; GFX6-NEXT: s_cmp_lt_u32 s12, s28
-; GFX6-NEXT: s_cselect_b32 s16, s12, s28
+; GFX6-NEXT: s_min_u32 s16, s12, s28
; GFX6-NEXT: s_sub_i32 s12, s12, s16
-; GFX6-NEXT: s_cmp_lt_u32 s13, s29
-; GFX6-NEXT: s_cselect_b32 s16, s13, s29
+; GFX6-NEXT: s_min_u32 s16, s13, s29
; GFX6-NEXT: s_sub_i32 s13, s13, s16
-; GFX6-NEXT: s_cmp_lt_u32 s14, s30
-; GFX6-NEXT: s_cselect_b32 s16, s14, s30
+; GFX6-NEXT: s_min_u32 s16, s14, s30
; GFX6-NEXT: s_sub_i32 s14, s14, s16
-; GFX6-NEXT: s_cmp_lt_u32 s15, s31
-; GFX6-NEXT: s_cselect_b32 s16, s15, s31
+; GFX6-NEXT: s_min_u32 s16, s15, s31
; GFX6-NEXT: s_sub_i32 s15, s15, s16
; GFX6-NEXT: ; return to shader part epilog
;
@@ -1612,8 +1572,7 @@ define amdgpu_ps i16 @s_usubsat_i16(i16 inreg %lhs, i16 inreg %rhs) {
; GFX6: ; %bb.0:
; GFX6-NEXT: s_lshl_b32 s0, s0, 16
; GFX6-NEXT: s_lshl_b32 s1, s1, 16
-; GFX6-NEXT: s_cmp_lt_u32 s0, s1
-; GFX6-NEXT: s_cselect_b32 s1, s0, s1
+; GFX6-NEXT: s_min_u32 s1, s0, s1
; GFX6-NEXT: s_sub_i32 s0, s0, s1
; GFX6-NEXT: s_lshr_b32 s0, s0, 16
; GFX6-NEXT: ; return to shader part epilog
@@ -1746,16 +1705,14 @@ define amdgpu_ps i32 @s_usubsat_v2i16(<2 x i16> inreg %lhs, <2 x i16> inreg %rhs
; GFX6: ; %bb.0:
; GFX6-NEXT: s_lshl_b32 s0, s0, 16
; GFX6-NEXT: s_lshl_b32 s2, s2, 16
-; GFX6-NEXT: s_cmp_lt_u32 s0, s2
-; GFX6-NEXT: s_cselect_b32 s2, s0, s2
+; GFX6-NEXT: s_min_u32 s2, s0, s2
; GFX6-NEXT: s_sub_i32 s0, s0, s2
-; GFX6-NEXT: s_lshr_b32 s0, s0, 16
; GFX6-NEXT: s_lshl_b32 s1, s1, 16
; GFX6-NEXT: s_lshl_b32 s2, s3, 16
-; GFX6-NEXT: s_cmp_lt_u32 s1, s2
-; GFX6-NEXT: s_cselect_b32 s2, s1, s2
+; GFX6-NEXT: s_min_u32 s2, s1, s2
; GFX6-NEXT: s_sub_i32 s1, s1, s2
; GFX6-NEXT: s_lshr_b32 s1, s1, 16
+; GFX6-NEXT: s_lshr_b32 s0, s0, 16
; GFX6-NEXT: s_lshl_b32 s1, s1, 16
; GFX6-NEXT: s_or_b32 s0, s0, s1
; GFX6-NEXT: ; return to shader part epilog
@@ -1954,30 +1911,26 @@ define amdgpu_ps <2 x i32> @s_usubsat_v4i16(<4 x i16> inreg %lhs, <4 x i16> inre
; GFX6: ; %bb.0:
; GFX6-NEXT: s_lshl_b32 s0, s0, 16
; GFX6-NEXT: s_lshl_b32 s4, s4, 16
-; GFX6-NEXT: s_cmp_lt_u32 s0, s4
-; GFX6-NEXT: s_cselect_b32 s4, s0, s4
+; GFX6-NEXT: s_min_u32 s4, s0, s4
; GFX6-NEXT: s_sub_i32 s0, s0, s4
-; GFX6-NEXT: s_lshr_b32 s0, s0, 16
; GFX6-NEXT: s_lshl_b32 s1, s1, 16
; GFX6-NEXT: s_lshl_b32 s4, s5, 16
-; GFX6-NEXT: s_cmp_lt_u32 s1, s4
-; GFX6-NEXT: s_cselect_b32 s4, s1, s4
+; GFX6-NEXT: s_min_u32 s4, s1, s4
; GFX6-NEXT: s_sub_i32 s1, s1, s4
-; GFX6-NEXT: s_lshr_b32 s1, s1, 16
; GFX6-NEXT: s_lshl_b32 s2, s2, 16
; GFX6-NEXT: s_lshl_b32 s4, s6, 16
-; GFX6-NEXT: s_cmp_lt_u32 s2, s4
-; GFX6-NEXT: s_cselect_b32 s4, s2, s4
+; GFX6-NEXT: s_min_u32 s4, s2, s4
; GFX6-NEXT: s_sub_i32 s2, s2, s4
-; GFX6-NEXT: s_lshr_b32 s2, s2, 16
; GFX6-NEXT: s_lshl_b32 s3, s3, 16
; GFX6-NEXT: s_lshl_b32 s4, s7, 16
-; GFX6-NEXT: s_cmp_lt_u32 s3, s4
-; GFX6-NEXT: s_cselect_b32 s4, s3, s4
+; GFX6-NEXT: s_min_u32 s4, s3, s4
+; GFX6-NEXT: s_lshr_b32 s1, s1, 16
; GFX6-NEXT: s_sub_i32 s3, s3, s4
+; GFX6-NEXT: s_lshr_b32 s0, s0, 16
; GFX6-NEXT: s_lshl_b32 s1, s1, 16
; GFX6-NEXT: s_lshr_b32 s3, s3, 16
; GFX6-NEXT: s_or_b32 s0, s0, s1
+; GFX6-NEXT: s_lshr_b32 s2, s2, 16
; GFX6-NEXT: s_lshl_b32 s1, s3, 16
; GFX6-NEXT: s_or_b32 s1, s2, s1
; GFX6-NEXT: ; return to shader part epilog
@@ -2125,44 +2078,38 @@ define amdgpu_ps <3 x i32> @s_usubsat_v6i16(<6 x i16> inreg %lhs, <6 x i16> inre
; GFX6: ; %bb.0:
; GFX6-NEXT: s_lshl_b32 s0, s0, 16
; GFX6-NEXT: s_lshl_b32 s6, s6, 16
-; GFX6-NEXT: s_cmp_lt_u32 s0, s6
-; GFX6-NEXT: s_cselect_b32 s6, s0, s6
+; GFX6-NEXT: s_min_u32 s6, s0, s6
; GFX6-NEXT: s_sub_i32 s0, s0, s6
-; GFX6-NEXT: s_lshr_b32 s0, s0, 16
; GFX6-NEXT: s_lshl_b32 s1, s1, 16
; GFX6-NEXT: s_lshl_b32 s6, s7, 16
-; GFX6-NEXT: s_cmp_lt_u32 s1, s6
-; GFX6-NEXT: s_cselect_b32 s6, s1, s6
+; GFX6-NEXT: s_min_u32 s6, s1, s6
; GFX6-NEXT: s_sub_i32 s1, s1, s6
-; GFX6-NEXT: s_lshr_b32 s1, s1, 16
; GFX6-NEXT: s_lshl_b32 s2, s2, 16
; GFX6-NEXT: s_lshl_b32 s6, s8, 16
-; GFX6-NEXT: s_cmp_lt_u32 s2, s6
-; GFX6-NEXT: s_cselect_b32 s6, s2, s6
+; GFX6-NEXT: s_min_u32 s6, s2, s6
; GFX6-NEXT: s_sub_i32 s2, s2, s6
-; GFX6-NEXT: s_lshr_b32 s2, s2, 16
; GFX6-NEXT: s_lshl_b32 s3, s3, 16
; GFX6-NEXT: s_lshl_b32 s6, s9, 16
-; GFX6-NEXT: s_cmp_lt_u32 s3, s6
-; GFX6-NEXT: s_cselect_b32 s6, s3, s6
+; GFX6-NEXT: s_min_u32 s6, s3, s6
; GFX6-NEXT: s_sub_i32 s3, s3, s6
-; GFX6-NEXT: s_lshr_b32 s3, s3, 16
; GFX6-NEXT: s_lshl_b32 s4, s4, 16
; GFX6-NEXT: s_lshl_b32 s6, s10, 16
-; GFX6-NEXT: s_cmp_lt_u32 s4, s6
-; GFX6-NEXT: s_cselect_b32 s6, s4, s6
+; GFX6-NEXT: s_min_u32 s6, s4, s6
; GFX6-NEXT: s_sub_i32 s4, s4, s6
-; GFX6-NEXT: s_lshr_b32 s4, s4, 16
; GFX6-NEXT: s_lshl_b32 s5, s5, 16
; GFX6-NEXT: s_lshl_b32 s6, s11, 16
-; GFX6-NEXT: s_cmp_lt_u32 s5, s6
-; GFX6-NEXT: s_cselect_b32 s6, s5, s6
+; GFX6-NEXT: s_lshr_b32 s1, s1, 16
+; GFX6-NEXT: s_min_u32 s6, s5, s6
; GFX6-NEXT: s_sub_i32 s5, s5, s6
+; GFX6-NEXT: s_lshr_b32 s0, s0, 16
; GFX6-NEXT: s_lshl_b32 s1, s1, 16
+; GFX6-NEXT: s_lshr_b32 s3, s3, 16
; GFX6-NEXT: s_or_b32 s0, s0, s1
+; GFX6-NEXT: s_lshr_b32 s2, s2, 16
; GFX6-NEXT: s_lshl_b32 s1, s3, 16
; GFX6-NEXT: s_lshr_b32 s5, s5, 16
; GFX6-NEXT: s_or_b32 s1, s2, s1
+; GFX6-NEXT: s_lshr_b32 s4, s4, 16
; GFX6-NEXT: s_lshl_b32 s2, s5, 16
; GFX6-NEXT: s_or_b32 s2, s4, s2
; GFX6-NEXT: ; return to shader part epilog
@@ -2331,58 +2278,50 @@ define amdgpu_ps <4 x i32> @s_usubsat_v8i16(<8 x i16> inreg %lhs, <8 x i16> inre
; GFX6: ; %bb.0:
; GFX6-NEXT: s_lshl_b32 s0, s0, 16
; GFX6-NEXT: s_lshl_b32 s8, s8, 16
-; GFX6-NEXT: s_cmp_lt_u32 s0, s8
-; GFX6-NEXT: s_cselect_b32 s8, s0, s8
+; GFX6-NEXT: s_min_u32 s8, s0, s8
; GFX6-NEXT: s_sub_i32 s0, s0, s8
-; GFX6-NEXT: s_lshr_b32 s0, s0, 16
; GFX6-NEXT: s_lshl_b32 s1, s1, 16
; GFX6-NEXT: s_lshl_b32 s8, s9, 16
-; GFX6-NEXT: s_cmp_lt_u32 s1, s8
-; GFX6-NEXT: s_cselect_b32 s8, s1, s8
+; GFX6-NEXT: s_min_u32 s8, s1, s8
; GFX6-NEXT: s_sub_i32 s1, s1, s8
-; GFX6-NEXT: s_lshr_b32 s1, s1, 16
; GFX6-NEXT: s_lshl_b32 s2, s2, 16
; GFX6-NEXT: s_lshl_b32 s8, s10, 16
-; GFX6-NEXT: s_cmp_lt_u32 s2, s8
-; GFX6-NEXT: s_cselect_b32 s8, s2, s8
+; GFX6-NEXT: s_min_u32 s8, s2, s8
; GFX6-NEXT: s_sub_i32 s2, s2, s8
-; GFX6-NEXT: s_lshr_b32 s2, s2, 16
; GFX6-NEXT: s_lshl_b32 s3, s3, 16
; GFX6-NEXT: s_lshl_b32 s8, s11, 16
-; GFX6-NEXT: s_cmp_lt_u32 s3, s8
-; GFX6-NEXT: s_cselect_b32 s8, s3, s8
+; GFX6-NEXT: s_min_u32 s8, s3, s8
; GFX6-NEXT: s_sub_i32 s3, s3, s8
-; GFX6-NEXT: s_lshr_b32 s3, s3, 16
; GFX6-NEXT: s_lshl_b32 s4, s4, 16
; GFX6-NEXT: s_lshl_b32 s8, s12, 16
-; GFX6-NEXT: s_cmp_lt_u32 s4, s8
-; GFX6-NEXT: s_cselect_b32 s8, s4, s8
+; GFX6-NEXT: s_min_u32 s8, s4, s8
; GFX6-NEXT: s_sub_i32 s4, s4, s8
-; GFX6-NEXT: s_lshr_b32 s4, s4, 16
; GFX6-NEXT: s_lshl_b32 s5, s5, 16
; GFX6-NEXT: s_lshl_b32 s8, s13, 16
-; GFX6-NEXT: s_cmp_lt_u32 s5, s8
-; GFX6-NEXT: s_cselect_b32 s8, s5, s8
+; GFX6-NEXT: s_min_u32 s8, s5, s8
; GFX6-NEXT: s_sub_i32 s5, s5, s8
-; GFX6-NEXT: s_lshr_b32 s5, s5, 16
; GFX6-NEXT: s_lshl_b32 s6, s6, 16
; GFX6-NEXT: s_lshl_b32 s8, s14, 16
-; GFX6-NEXT: s_cmp_lt_u32 s6, s8
-; GFX6-NEXT: s_cselect_b32 s8, s6, s8
+; GFX6-NEXT: s_min_u32 s8, s6, s8
; GFX6-NEXT: s_sub_i32 s6, s6, s8
-; GFX6-NEXT: s_lshr_b32 s6, s6, 16
; GFX6-NEXT: s_lshl_b32 s7, s7, 16
; GFX6-NEXT: s_lshl_b32 s8, s15, 16
-; GFX6-NEXT: s_cmp_lt_u32 s7, s8
-; GFX6-NEXT: s_cselect_b32 s8, s7, s8
+; GFX6-NEXT: s_lshr_b32 s1, s1, 16
+; GFX6-NEXT: s_min_u32 s8, s7, s8
; GFX6-NEXT: s_sub_i32 s7, s7, s8
+; GFX6-NEXT: s_lshr_b32 s0, s0, 16
; GFX6-NEXT: s_lshl_b32 s1, s1, 16
+; GFX6-NEXT: s_lshr_b32 s3, s3, 16
; GFX6-NEXT: s_or_b32 s0, s0, s1
+; GFX6-NEXT: s_lshr_b32 s2, s2, 16
; GFX6-NEXT: s_lshl_b32 s1, s3, 16
; GFX6-NEXT: s_lshr_b32 s7, s7, 16
-; GFX6-NEXT: s_lshl_b32 s3, s7, 16
+; GFX6-NEXT: s_lshr_b32 s5, s5, 16
; GFX6-NEXT: s_or_b32 s1, s2, s1
+; GFX6-NEXT: s_lshr_b32 s4, s4, 16
; GFX6-NEXT: s_lshl_b32 s2, s5, 16
+; GFX6-NEXT: s_lshr_b32 s6, s6, 16
+; GFX6-NEXT: s_lshl_b32 s3, s7, 16
; GFX6-NEXT: s_or_b32 s2, s4, s2
; GFX6-NEXT: s_or_b32 s3, s6, s3
; GFX6-NEXT: ; return to shader part epilog
More information about the llvm-commits
mailing list