[llvm] 05783e1 - [amdgpu] Revise the conversion from i64 to f32.
Michael Liao via llvm-commits
llvm-commits at lists.llvm.org
Fri Aug 6 14:02:01 PDT 2021
Author: Michael Liao
Date: 2021-08-06T17:01:47-04:00
New Revision: 05783e1cfe40ce20b080c9fce1b148515c027fa4
URL: https://github.com/llvm/llvm-project/commit/05783e1cfe40ce20b080c9fce1b148515c027fa4
DIFF: https://github.com/llvm/llvm-project/commit/05783e1cfe40ce20b080c9fce1b148515c027fa4.diff
LOG: [amdgpu] Revise the conversion from i64 to f32.
- Replace 'cmp+sel' with 'umin' if possible.
Reviewed By: foad
Differential Revision: https://reviews.llvm.org/D107507
Added:
Modified:
llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
llvm/test/CodeGen/AMDGPU/GlobalISel/cvt_f32_ubyte.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sitofp.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uitofp.mir
llvm/test/CodeGen/AMDGPU/sint_to_fp.i64.ll
llvm/test/CodeGen/AMDGPU/uint_to_fp.i64.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index e8a46e050974..9fa3ad64d0be 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -2457,10 +2457,6 @@ SDValue AMDGPUTargetLowering::LowerINT_TO_FP32(SDValue Op, SelectionDAG &DAG,
SDLoc SL(Op);
SDValue Src = Op.getOperand(0);
- EVT SetCCVT =
- getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), MVT::i32);
- SDValue ZeroI32 = DAG.getConstant(0, SL, MVT::i32);
-
SDValue Lo, Hi;
std::tie(Lo, Hi) = split64BitValue(Src, DAG);
SDValue Sign;
@@ -2468,25 +2464,38 @@ SDValue AMDGPUTargetLowering::LowerINT_TO_FP32(SDValue Op, SelectionDAG &DAG,
if (Signed && Subtarget->isGCN()) {
// We also need to consider the sign bit in Lo if Hi has just sign bits,
// i.e. Hi is 0 or -1. However, that only needs to take the MSB into
- // account.
- SDValue HasSameSign =
- DAG.getSetCC(SL, SetCCVT, DAG.getNode(ISD::XOR, SL, MVT::i32, Lo, Hi),
- ZeroI32, ISD::SETGE);
- SDValue MaxShAmt = DAG.getSelect(SL, MVT::i32, HasSameSign,
- DAG.getConstant(33, SL, MVT::i32),
- DAG.getConstant(32, SL, MVT::i32));
+ // account. That is, the maximal shift is
+ // - 32 if Lo and Hi have opposite signs;
+ // - 33 if Lo and Hi have the same sign.
+ //
+ // Or, MaxShAmt = 33 + OppositeSign, where
+ //
+ // OppositeSign is defined as ((Lo ^ Hi) >> 31), which is
+ // - -1 if Lo and Hi have opposite signs; and
+ // - 0 otherwise.
+ //
+ // All in all, ShAmt is calculated as
+ //
+ // umin(sffbh(Hi), 33 + (Lo^Hi)>>31) - 1.
+ //
+ // or
+ //
+ // umin(sffbh(Hi) - 1, 32 + (Lo^Hi)>>31).
+ //
+ // to reduce the critical path.
+ SDValue OppositeSign = DAG.getNode(
+ ISD::SRA, SL, MVT::i32, DAG.getNode(ISD::XOR, SL, MVT::i32, Lo, Hi),
+ DAG.getConstant(31, SL, MVT::i32));
+ SDValue MaxShAmt =
+ DAG.getNode(ISD::ADD, SL, MVT::i32, DAG.getConstant(32, SL, MVT::i32),
+ OppositeSign);
// Count the leading sign bits.
ShAmt = DAG.getNode(AMDGPUISD::FFBH_I32, SL, MVT::i32, Hi);
- ShAmt = DAG.getSelect(SL, MVT::i32,
- DAG.getSetCC(SL, SetCCVT, ShAmt,
- DAG.getAllOnesConstant(SL, MVT::i32),
- ISD::SETNE),
- ShAmt, MaxShAmt);
- // The shift amount for signed integers is [1, 33].
// Different from unsigned conversion, the shift should be one bit less to
// preserve the sign bit.
ShAmt = DAG.getNode(ISD::SUB, SL, MVT::i32, ShAmt,
DAG.getConstant(1, SL, MVT::i32));
+ ShAmt = DAG.getNode(ISD::UMIN, SL, MVT::i32, ShAmt, MaxShAmt);
} else {
if (Signed) {
// Without 'ffbh_i32', only leading zeros could be counted. Take the
@@ -2507,9 +2516,9 @@ SDValue AMDGPUTargetLowering::LowerINT_TO_FP32(SDValue Op, SelectionDAG &DAG,
// Split it again.
std::tie(Lo, Hi) = split64BitValue(Norm, DAG);
// Calculate the adjust bit for rounding.
- SDValue Adjust = DAG.getSelect(
- SL, MVT::i32, DAG.getSetCC(SL, SetCCVT, Lo, ZeroI32, ISD::SETNE),
- DAG.getConstant(1, SL, MVT::i32), ZeroI32);
+ // (lo != 0) ? 1 : 0 => (lo >= 1) ? 1 : 0 => umin(1, lo)
+ SDValue Adjust = DAG.getNode(ISD::UMIN, SL, MVT::i32,
+ DAG.getConstant(1, SL, MVT::i32), Lo);
// Get the 32-bit normalized integer.
Norm = DAG.getNode(ISD::OR, SL, MVT::i32, Hi, Adjust);
// Convert the normalized 32-bit integer into f32.
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index 5493bce79850..5a8c14234bb7 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -2065,7 +2065,6 @@ bool AMDGPULegalizerInfo::legalizeITOFP(
const LLT S64 = LLT::scalar(64);
const LLT S32 = LLT::scalar(32);
- const LLT S1 = LLT::scalar(1);
assert(MRI.getType(Src) == S64);
@@ -2089,29 +2088,24 @@ bool AMDGPULegalizerInfo::legalizeITOFP(
assert(MRI.getType(Dst) == S32);
- auto Zero = B.buildConstant(S32, 0);
auto One = B.buildConstant(S32, 1);
- auto AllOnes = B.buildConstant(S32, -1);
MachineInstrBuilder ShAmt;
if (Signed) {
- auto ThirtyThree = B.buildConstant(S32, 33);
+ auto ThirtyOne = B.buildConstant(S32, 31);
auto X = B.buildXor(S32, Unmerge.getReg(0), Unmerge.getReg(1));
- auto HasSameSign = B.buildICmp(CmpInst::ICMP_SGE, S1, X, Zero);
- auto MaxShAmt = B.buildSelect(S32, HasSameSign, ThirtyThree, ThirtyTwo);
+ auto OppositeSign = B.buildAShr(S32, X, ThirtyOne);
+ auto MaxShAmt = B.buildAdd(S32, ThirtyTwo, OppositeSign);
auto LS = B.buildIntrinsic(Intrinsic::amdgcn_sffbh, {S32},
/*HasSideEffects=*/false)
.addUse(Unmerge.getReg(1));
- auto NotAllSameBits = B.buildICmp(CmpInst::ICMP_NE, S1, LS, AllOnes);
- auto LS2 = B.buildSelect(S32, NotAllSameBits, LS, MaxShAmt);
- ShAmt = B.buildSub(S32, LS2, One);
+ auto LS2 = B.buildSub(S32, LS, One);
+ ShAmt = B.buildUMin(S32, LS2, MaxShAmt);
} else
ShAmt = B.buildCTLZ(S32, Unmerge.getReg(1));
auto Norm = B.buildShl(S64, Src, ShAmt);
auto Unmerge2 = B.buildUnmerge({S32, S32}, Norm);
- auto NotAllZeros =
- B.buildICmp(CmpInst::ICMP_NE, S1, Unmerge2.getReg(0), Zero);
- auto Adjust = B.buildSelect(S32, NotAllZeros, One, Zero);
+ auto Adjust = B.buildUMin(S32, One, Unmerge2.getReg(0));
auto Norm2 = B.buildOr(S32, Unmerge2.getReg(1), Adjust);
auto FVal = Signed ? B.buildSITOFP(S32, Norm2) : B.buildUITOFP(S32, Norm2);
auto Scale = B.buildSub(S32, ThirtyTwo, ShAmt);
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/cvt_f32_ubyte.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/cvt_f32_ubyte.ll
index e9ca0bddc5a2..2135d94bc0fa 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/cvt_f32_ubyte.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/cvt_f32_ubyte.ll
@@ -1082,15 +1082,15 @@ define float @v_test_sitofp_i64_byte_to_f32(i64 %arg0) {
; SI-LABEL: v_test_sitofp_i64_byte_to_f32:
; SI: ; %bb.0:
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-NEXT: v_ffbh_i32_e32 v2, 0
-; SI-NEXT: v_cmp_ne_u32_e32 vcc, -1, v2
-; SI-NEXT: v_cndmask_b32_e32 v2, 33, v2, vcc
; SI-NEXT: v_and_b32_e32 v0, 0xff, v0
+; SI-NEXT: v_ashrrev_i32_e32 v2, 31, v0
+; SI-NEXT: v_ffbh_i32_e32 v3, 0
+; SI-NEXT: v_add_i32_e32 v2, vcc, 32, v2
+; SI-NEXT: v_subrev_i32_e32 v3, vcc, 1, v3
; SI-NEXT: v_mov_b32_e32 v1, 0
-; SI-NEXT: v_subrev_i32_e32 v2, vcc, 1, v2
+; SI-NEXT: v_min_u32_e32 v2, v3, v2
; SI-NEXT: v_lshl_b64 v[0:1], v[0:1], v2
-; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
-; SI-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; SI-NEXT: v_min_u32_e32 v0, 1, v0
; SI-NEXT: v_or_b32_e32 v0, v1, v0
; SI-NEXT: v_cvt_f32_i32_e32 v0, v0
; SI-NEXT: v_sub_i32_e32 v1, vcc, 32, v2
@@ -1100,15 +1100,15 @@ define float @v_test_sitofp_i64_byte_to_f32(i64 %arg0) {
; VI-LABEL: v_test_sitofp_i64_byte_to_f32:
; VI: ; %bb.0:
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-NEXT: v_ffbh_i32_e32 v2, 0
-; VI-NEXT: v_cmp_ne_u32_e32 vcc, -1, v2
-; VI-NEXT: v_cndmask_b32_e32 v2, 33, v2, vcc
; VI-NEXT: v_and_b32_e32 v0, 0xff, v0
+; VI-NEXT: v_ashrrev_i32_e32 v2, 31, v0
+; VI-NEXT: v_ffbh_i32_e32 v3, 0
+; VI-NEXT: v_add_u32_e32 v2, vcc, 32, v2
+; VI-NEXT: v_subrev_u32_e32 v3, vcc, 1, v3
; VI-NEXT: v_mov_b32_e32 v1, 0
-; VI-NEXT: v_subrev_u32_e32 v2, vcc, 1, v2
+; VI-NEXT: v_min_u32_e32 v2, v3, v2
; VI-NEXT: v_lshlrev_b64 v[0:1], v2, v[0:1]
-; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
-; VI-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; VI-NEXT: v_min_u32_e32 v0, 1, v0
; VI-NEXT: v_or_b32_e32 v0, v1, v0
; VI-NEXT: v_cvt_f32_i32_e32 v0, v0
; VI-NEXT: v_sub_u32_e32 v1, vcc, 32, v2
@@ -1128,8 +1128,7 @@ define float @v_test_uitofp_i64_byte_to_f32(i64 %arg0) {
; SI-NEXT: v_mov_b32_e32 v1, 0
; SI-NEXT: v_min_u32_e32 v2, 32, v2
; SI-NEXT: v_lshl_b64 v[0:1], v[0:1], v2
-; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
-; SI-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; SI-NEXT: v_min_u32_e32 v0, 1, v0
; SI-NEXT: v_or_b32_e32 v0, v1, v0
; SI-NEXT: v_cvt_f32_u32_e32 v0, v0
; SI-NEXT: v_sub_i32_e32 v1, vcc, 32, v2
@@ -1144,8 +1143,7 @@ define float @v_test_uitofp_i64_byte_to_f32(i64 %arg0) {
; VI-NEXT: v_mov_b32_e32 v1, 0
; VI-NEXT: v_min_u32_e32 v2, 32, v2
; VI-NEXT: v_lshlrev_b64 v[0:1], v2, v[0:1]
-; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
-; VI-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; VI-NEXT: v_min_u32_e32 v0, 1, v0
; VI-NEXT: v_or_b32_e32 v0, v1, v0
; VI-NEXT: v_cvt_f32_u32_e32 v0, v0
; VI-NEXT: v_sub_u32_e32 v1, vcc, 32, v2
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sitofp.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sitofp.mir
index ccbf30096c85..4a0310c4b229 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sitofp.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sitofp.mir
@@ -100,48 +100,40 @@ body: |
; GFX6: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
; GFX6: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64)
; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
- ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
- ; GFX6: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
- ; GFX6: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 33
+ ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 31
; GFX6: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[UV]], [[UV1]]
- ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(sge), [[XOR]](s32), [[C1]]
- ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C4]], [[C]]
+ ; GFX6: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[XOR]], [[C2]](s32)
+ ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[C]], [[ASHR]]
; GFX6: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sffbh), [[UV1]](s32)
- ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[INT]](s32), [[C3]]
- ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[INT]], [[SELECT]]
- ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[SELECT1]], [[C2]]
- ; GFX6: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[SUB]](s32)
+ ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[INT]], [[C1]]
+ ; GFX6: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[SUB]], [[ADD]]
+ ; GFX6: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[UMIN]](s32)
; GFX6: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SHL]](s64)
- ; GFX6: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV2]](s32), [[C1]]
- ; GFX6: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[C2]], [[C1]]
- ; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[UV3]], [[SELECT2]]
+ ; GFX6: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[C1]], [[UV2]]
+ ; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[UV3]], [[UMIN1]]
; GFX6: [[SITOFP:%[0-9]+]]:_(s32) = G_SITOFP [[OR]](s32)
- ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[SUB]]
+ ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[UMIN]]
; GFX6: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ldexp), [[SITOFP]](s32), [[SUB1]](s32)
; GFX6: $vgpr0 = COPY [[INT1]](s32)
; GFX8-LABEL: name: test_sitofp_s64_to_s32
; GFX8: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
; GFX8: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64)
; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
- ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
- ; GFX8: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
- ; GFX8: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 33
+ ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 31
; GFX8: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[UV]], [[UV1]]
- ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(sge), [[XOR]](s32), [[C1]]
- ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C4]], [[C]]
+ ; GFX8: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[XOR]], [[C2]](s32)
+ ; GFX8: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[C]], [[ASHR]]
; GFX8: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sffbh), [[UV1]](s32)
- ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[INT]](s32), [[C3]]
- ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[INT]], [[SELECT]]
- ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[SELECT1]], [[C2]]
- ; GFX8: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[SUB]](s32)
+ ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[INT]], [[C1]]
+ ; GFX8: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[SUB]], [[ADD]]
+ ; GFX8: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[UMIN]](s32)
; GFX8: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SHL]](s64)
- ; GFX8: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV2]](s32), [[C1]]
- ; GFX8: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[C2]], [[C1]]
- ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[UV3]], [[SELECT2]]
+ ; GFX8: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[C1]], [[UV2]]
+ ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[UV3]], [[UMIN1]]
; GFX8: [[SITOFP:%[0-9]+]]:_(s32) = G_SITOFP [[OR]](s32)
- ; GFX8: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[SUB]]
+ ; GFX8: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[UMIN]]
; GFX8: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ldexp), [[SITOFP]](s32), [[SUB1]](s32)
; GFX8: $vgpr0 = COPY [[INT1]](s32)
%0:_(s64) = COPY $vgpr0_vgpr1
@@ -420,24 +412,20 @@ body: |
; GFX6: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[COPY1]], 33
; GFX6: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SEXT_INREG]](s64)
; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
- ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
- ; GFX6: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
- ; GFX6: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 33
+ ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 31
; GFX6: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[UV]], [[UV1]]
- ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(sge), [[XOR]](s32), [[C1]]
- ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C4]], [[C]]
+ ; GFX6: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[XOR]], [[C2]](s32)
+ ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[C]], [[ASHR]]
; GFX6: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sffbh), [[UV1]](s32)
- ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[INT]](s32), [[C3]]
- ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[INT]], [[SELECT]]
- ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[SELECT1]], [[C2]]
- ; GFX6: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[SEXT_INREG]], [[SUB]](s32)
+ ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[INT]], [[C1]]
+ ; GFX6: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[SUB]], [[ADD]]
+ ; GFX6: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[SEXT_INREG]], [[UMIN]](s32)
; GFX6: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SHL]](s64)
- ; GFX6: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV2]](s32), [[C1]]
- ; GFX6: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[C2]], [[C1]]
- ; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[UV3]], [[SELECT2]]
+ ; GFX6: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[C1]], [[UV2]]
+ ; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[UV3]], [[UMIN1]]
; GFX6: [[SITOFP:%[0-9]+]]:_(s32) = G_SITOFP [[OR]](s32)
- ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[SUB]]
+ ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[UMIN]]
; GFX6: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ldexp), [[SITOFP]](s32), [[SUB1]](s32)
; GFX6: $vgpr0 = COPY [[INT1]](s32)
; GFX8-LABEL: name: test_sitofp_s33_to_s32
@@ -446,24 +434,20 @@ body: |
; GFX8: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[COPY1]], 33
; GFX8: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SEXT_INREG]](s64)
; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
- ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
- ; GFX8: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
- ; GFX8: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 33
+ ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 31
; GFX8: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[UV]], [[UV1]]
- ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(sge), [[XOR]](s32), [[C1]]
- ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C4]], [[C]]
+ ; GFX8: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[XOR]], [[C2]](s32)
+ ; GFX8: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[C]], [[ASHR]]
; GFX8: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sffbh), [[UV1]](s32)
- ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[INT]](s32), [[C3]]
- ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[INT]], [[SELECT]]
- ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[SELECT1]], [[C2]]
- ; GFX8: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[SEXT_INREG]], [[SUB]](s32)
+ ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[INT]], [[C1]]
+ ; GFX8: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[SUB]], [[ADD]]
+ ; GFX8: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[SEXT_INREG]], [[UMIN]](s32)
; GFX8: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SHL]](s64)
- ; GFX8: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV2]](s32), [[C1]]
- ; GFX8: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[C2]], [[C1]]
- ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[UV3]], [[SELECT2]]
+ ; GFX8: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[C1]], [[UV2]]
+ ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[UV3]], [[UMIN1]]
; GFX8: [[SITOFP:%[0-9]+]]:_(s32) = G_SITOFP [[OR]](s32)
- ; GFX8: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[SUB]]
+ ; GFX8: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[UMIN]]
; GFX8: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ldexp), [[SITOFP]](s32), [[SUB1]](s32)
; GFX8: $vgpr0 = COPY [[INT1]](s32)
%0:_(s64) = COPY $vgpr0_vgpr1
@@ -482,24 +466,20 @@ body: |
; GFX6: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
; GFX6: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64)
; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
- ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
- ; GFX6: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
- ; GFX6: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 33
+ ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 31
; GFX6: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[UV]], [[UV1]]
- ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(sge), [[XOR]](s32), [[C1]]
- ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C4]], [[C]]
+ ; GFX6: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[XOR]], [[C2]](s32)
+ ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[C]], [[ASHR]]
; GFX6: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sffbh), [[UV1]](s32)
- ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[INT]](s32), [[C3]]
- ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[INT]], [[SELECT]]
- ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[SELECT1]], [[C2]]
- ; GFX6: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[SUB]](s32)
+ ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[INT]], [[C1]]
+ ; GFX6: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[SUB]], [[ADD]]
+ ; GFX6: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[UMIN]](s32)
; GFX6: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SHL]](s64)
- ; GFX6: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV2]](s32), [[C1]]
- ; GFX6: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[C2]], [[C1]]
- ; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[UV3]], [[SELECT2]]
+ ; GFX6: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[C1]], [[UV2]]
+ ; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[UV3]], [[UMIN1]]
; GFX6: [[SITOFP:%[0-9]+]]:_(s32) = G_SITOFP [[OR]](s32)
- ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[SUB]]
+ ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[UMIN]]
; GFX6: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ldexp), [[SITOFP]](s32), [[SUB1]](s32)
; GFX6: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT1]](s32)
; GFX6: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16)
@@ -508,24 +488,20 @@ body: |
; GFX8: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
; GFX8: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64)
; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
- ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
- ; GFX8: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
- ; GFX8: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 33
+ ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 31
; GFX8: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[UV]], [[UV1]]
- ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(sge), [[XOR]](s32), [[C1]]
- ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C4]], [[C]]
+ ; GFX8: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[XOR]], [[C2]](s32)
+ ; GFX8: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[C]], [[ASHR]]
; GFX8: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sffbh), [[UV1]](s32)
- ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[INT]](s32), [[C3]]
- ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[INT]], [[SELECT]]
- ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[SELECT1]], [[C2]]
- ; GFX8: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[SUB]](s32)
+ ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[INT]], [[C1]]
+ ; GFX8: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[SUB]], [[ADD]]
+ ; GFX8: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[UMIN]](s32)
; GFX8: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SHL]](s64)
- ; GFX8: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV2]](s32), [[C1]]
- ; GFX8: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[C2]], [[C1]]
- ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[UV3]], [[SELECT2]]
+ ; GFX8: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[C1]], [[UV2]]
+ ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[UV3]], [[UMIN1]]
; GFX8: [[SITOFP:%[0-9]+]]:_(s32) = G_SITOFP [[OR]](s32)
- ; GFX8: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[SUB]]
+ ; GFX8: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[UMIN]]
; GFX8: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ldexp), [[SITOFP]](s32), [[SUB1]](s32)
; GFX8: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT1]](s32)
; GFX8: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16)
@@ -547,47 +523,41 @@ body: |
; GFX6: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>)
; GFX6: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64)
; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
- ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
- ; GFX6: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
- ; GFX6: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 33
+ ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 31
; GFX6: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[UV2]], [[UV3]]
- ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(sge), [[XOR]](s32), [[C1]]
- ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C4]], [[C]]
+ ; GFX6: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[XOR]], [[C2]](s32)
+ ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[C]], [[ASHR]]
; GFX6: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sffbh), [[UV3]](s32)
- ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[INT]](s32), [[C3]]
- ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[INT]], [[SELECT]]
- ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[SELECT1]], [[C2]]
- ; GFX6: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV]], [[SUB]](s32)
+ ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[INT]], [[C1]]
+ ; GFX6: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[SUB]], [[ADD]]
+ ; GFX6: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV]], [[UMIN]](s32)
; GFX6: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SHL]](s64)
- ; GFX6: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV4]](s32), [[C1]]
- ; GFX6: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[C2]], [[C1]]
- ; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[UV5]], [[SELECT2]]
+ ; GFX6: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[C1]], [[UV4]]
+ ; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[UV5]], [[UMIN1]]
; GFX6: [[SITOFP:%[0-9]+]]:_(s32) = G_SITOFP [[OR]](s32)
- ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[SUB]]
+ ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[UMIN]]
; GFX6: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ldexp), [[SITOFP]](s32), [[SUB1]](s32)
; GFX6: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT1]](s32)
; GFX6: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64)
; GFX6: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[UV6]], [[UV7]]
- ; GFX6: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(sge), [[XOR1]](s32), [[C1]]
- ; GFX6: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[C4]], [[C]]
+ ; GFX6: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[XOR1]], [[C2]](s32)
+ ; GFX6: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[C]], [[ASHR1]]
; GFX6: [[INT2:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sffbh), [[UV7]](s32)
- ; GFX6: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[INT2]](s32), [[C3]]
- ; GFX6: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP4]](s1), [[INT2]], [[SELECT3]]
- ; GFX6: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SELECT4]], [[C2]]
- ; GFX6: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[SUB2]](s32)
+ ; GFX6: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[INT2]], [[C1]]
+ ; GFX6: [[UMIN2:%[0-9]+]]:_(s32) = G_UMIN [[SUB2]], [[ADD1]]
+ ; GFX6: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[UMIN2]](s32)
; GFX6: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SHL1]](s64)
- ; GFX6: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV8]](s32), [[C1]]
- ; GFX6: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[C2]], [[C1]]
- ; GFX6: [[OR1:%[0-9]+]]:_(s32) = G_OR [[UV9]], [[SELECT5]]
+ ; GFX6: [[UMIN3:%[0-9]+]]:_(s32) = G_UMIN [[C1]], [[UV8]]
+ ; GFX6: [[OR1:%[0-9]+]]:_(s32) = G_OR [[UV9]], [[UMIN3]]
; GFX6: [[SITOFP1:%[0-9]+]]:_(s32) = G_SITOFP [[OR1]](s32)
- ; GFX6: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C]], [[SUB2]]
+ ; GFX6: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C]], [[UMIN2]]
; GFX6: [[INT3:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ldexp), [[SITOFP1]](s32), [[SUB3]](s32)
; GFX6: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT3]](s32)
; GFX6: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16)
; GFX6: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16)
- ; GFX6: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; GFX6: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32)
+ ; GFX6: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; GFX6: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C3]](s32)
; GFX6: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
; GFX6: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
; GFX6: $vgpr0 = COPY [[BITCAST]](<2 x s16>)
@@ -596,47 +566,41 @@ body: |
; GFX8: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>)
; GFX8: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64)
; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
- ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
- ; GFX8: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
- ; GFX8: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 33
+ ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 31
; GFX8: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[UV2]], [[UV3]]
- ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(sge), [[XOR]](s32), [[C1]]
- ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C4]], [[C]]
+ ; GFX8: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[XOR]], [[C2]](s32)
+ ; GFX8: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[C]], [[ASHR]]
; GFX8: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sffbh), [[UV3]](s32)
- ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[INT]](s32), [[C3]]
- ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[INT]], [[SELECT]]
- ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[SELECT1]], [[C2]]
- ; GFX8: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV]], [[SUB]](s32)
+ ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[INT]], [[C1]]
+ ; GFX8: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[SUB]], [[ADD]]
+ ; GFX8: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV]], [[UMIN]](s32)
; GFX8: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SHL]](s64)
- ; GFX8: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV4]](s32), [[C1]]
- ; GFX8: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[C2]], [[C1]]
- ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[UV5]], [[SELECT2]]
+ ; GFX8: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[C1]], [[UV4]]
+ ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[UV5]], [[UMIN1]]
; GFX8: [[SITOFP:%[0-9]+]]:_(s32) = G_SITOFP [[OR]](s32)
- ; GFX8: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[SUB]]
+ ; GFX8: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[UMIN]]
; GFX8: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ldexp), [[SITOFP]](s32), [[SUB1]](s32)
; GFX8: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT1]](s32)
; GFX8: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64)
; GFX8: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[UV6]], [[UV7]]
- ; GFX8: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(sge), [[XOR1]](s32), [[C1]]
- ; GFX8: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[C4]], [[C]]
+ ; GFX8: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[XOR1]], [[C2]](s32)
+ ; GFX8: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[C]], [[ASHR1]]
; GFX8: [[INT2:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sffbh), [[UV7]](s32)
- ; GFX8: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[INT2]](s32), [[C3]]
- ; GFX8: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP4]](s1), [[INT2]], [[SELECT3]]
- ; GFX8: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SELECT4]], [[C2]]
- ; GFX8: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[SUB2]](s32)
+ ; GFX8: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[INT2]], [[C1]]
+ ; GFX8: [[UMIN2:%[0-9]+]]:_(s32) = G_UMIN [[SUB2]], [[ADD1]]
+ ; GFX8: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[UMIN2]](s32)
; GFX8: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SHL1]](s64)
- ; GFX8: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV8]](s32), [[C1]]
- ; GFX8: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[C2]], [[C1]]
- ; GFX8: [[OR1:%[0-9]+]]:_(s32) = G_OR [[UV9]], [[SELECT5]]
+ ; GFX8: [[UMIN3:%[0-9]+]]:_(s32) = G_UMIN [[C1]], [[UV8]]
+ ; GFX8: [[OR1:%[0-9]+]]:_(s32) = G_OR [[UV9]], [[UMIN3]]
; GFX8: [[SITOFP1:%[0-9]+]]:_(s32) = G_SITOFP [[OR1]](s32)
- ; GFX8: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C]], [[SUB2]]
+ ; GFX8: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C]], [[UMIN2]]
; GFX8: [[INT3:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ldexp), [[SITOFP1]](s32), [[SUB3]](s32)
; GFX8: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT3]](s32)
; GFX8: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16)
; GFX8: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16)
- ; GFX8: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; GFX8: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32)
+ ; GFX8: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; GFX8: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C3]](s32)
; GFX8: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
; GFX8: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
; GFX8: $vgpr0 = COPY [[BITCAST]](<2 x s16>)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uitofp.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uitofp.mir
index d3db5f8bb847..747bd70082f5 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uitofp.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uitofp.mir
@@ -75,15 +75,13 @@ body: |
; GFX6: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
; GFX6: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64)
; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
- ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
; GFX6: [[AMDGPU_FFBH_U32_:%[0-9]+]]:_(s32) = G_AMDGPU_FFBH_U32 [[UV1]](s32)
; GFX6: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[AMDGPU_FFBH_U32_]], [[C]]
; GFX6: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[UMIN]](s32)
; GFX6: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SHL]](s64)
- ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV2]](s32), [[C1]]
- ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C2]], [[C1]]
- ; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[UV3]], [[SELECT]]
+ ; GFX6: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[C1]], [[UV2]]
+ ; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[UV3]], [[UMIN1]]
; GFX6: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[OR]](s32)
; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C]], [[UMIN]]
; GFX6: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ldexp), [[UITOFP]](s32), [[SUB]](s32)
@@ -92,15 +90,13 @@ body: |
; GFX8: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
; GFX8: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64)
; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
- ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
; GFX8: [[AMDGPU_FFBH_U32_:%[0-9]+]]:_(s32) = G_AMDGPU_FFBH_U32 [[UV1]](s32)
; GFX8: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[AMDGPU_FFBH_U32_]], [[C]]
; GFX8: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[UMIN]](s32)
; GFX8: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SHL]](s64)
- ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV2]](s32), [[C1]]
- ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C2]], [[C1]]
- ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[UV3]], [[SELECT]]
+ ; GFX8: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[C1]], [[UV2]]
+ ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[UV3]], [[UMIN1]]
; GFX8: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[OR]](s32)
; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C]], [[UMIN]]
; GFX8: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ldexp), [[UITOFP]](s32), [[SUB]](s32)
@@ -393,15 +389,13 @@ body: |
; GFX6: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C]]
; GFX6: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]](s64)
; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
- ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; GFX6: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
; GFX6: [[AMDGPU_FFBH_U32_:%[0-9]+]]:_(s32) = G_AMDGPU_FFBH_U32 [[UV1]](s32)
; GFX6: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[AMDGPU_FFBH_U32_]], [[C1]]
; GFX6: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[AND]], [[UMIN]](s32)
; GFX6: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SHL]](s64)
- ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV2]](s32), [[C2]]
- ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C3]], [[C2]]
- ; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[UV3]], [[SELECT]]
+ ; GFX6: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[C2]], [[UV2]]
+ ; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[UV3]], [[UMIN1]]
; GFX6: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[OR]](s32)
; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[UMIN]]
; GFX6: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ldexp), [[UITOFP]](s32), [[SUB]](s32)
@@ -413,15 +407,13 @@ body: |
; GFX8: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C]]
; GFX8: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]](s64)
; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
- ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; GFX8: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
; GFX8: [[AMDGPU_FFBH_U32_:%[0-9]+]]:_(s32) = G_AMDGPU_FFBH_U32 [[UV1]](s32)
; GFX8: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[AMDGPU_FFBH_U32_]], [[C1]]
; GFX8: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[AND]], [[UMIN]](s32)
; GFX8: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SHL]](s64)
- ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV2]](s32), [[C2]]
- ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C3]], [[C2]]
- ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[UV3]], [[SELECT]]
+ ; GFX8: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[C2]], [[UV2]]
+ ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[UV3]], [[UMIN1]]
; GFX8: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[OR]](s32)
; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[UMIN]]
; GFX8: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ldexp), [[UITOFP]](s32), [[SUB]](s32)
@@ -442,15 +434,13 @@ body: |
; GFX6: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
; GFX6: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64)
; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
- ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
; GFX6: [[AMDGPU_FFBH_U32_:%[0-9]+]]:_(s32) = G_AMDGPU_FFBH_U32 [[UV1]](s32)
; GFX6: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[AMDGPU_FFBH_U32_]], [[C]]
; GFX6: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[UMIN]](s32)
; GFX6: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SHL]](s64)
- ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV2]](s32), [[C1]]
- ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C2]], [[C1]]
- ; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[UV3]], [[SELECT]]
+ ; GFX6: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[C1]], [[UV2]]
+ ; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[UV3]], [[UMIN1]]
; GFX6: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[OR]](s32)
; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C]], [[UMIN]]
; GFX6: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ldexp), [[UITOFP]](s32), [[SUB]](s32)
@@ -461,15 +451,13 @@ body: |
; GFX8: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
; GFX8: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64)
; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
- ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
; GFX8: [[AMDGPU_FFBH_U32_:%[0-9]+]]:_(s32) = G_AMDGPU_FFBH_U32 [[UV1]](s32)
; GFX8: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[AMDGPU_FFBH_U32_]], [[C]]
; GFX8: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[UMIN]](s32)
; GFX8: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SHL]](s64)
- ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV2]](s32), [[C1]]
- ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C2]], [[C1]]
- ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[UV3]], [[SELECT]]
+ ; GFX8: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[C1]], [[UV2]]
+ ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[UV3]], [[UMIN1]]
; GFX8: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[OR]](s32)
; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C]], [[UMIN]]
; GFX8: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ldexp), [[UITOFP]](s32), [[SUB]](s32)
@@ -493,35 +481,32 @@ body: |
; GFX6: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>)
; GFX6: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64)
; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
- ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
; GFX6: [[AMDGPU_FFBH_U32_:%[0-9]+]]:_(s32) = G_AMDGPU_FFBH_U32 [[UV3]](s32)
; GFX6: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[AMDGPU_FFBH_U32_]], [[C]]
; GFX6: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV]], [[UMIN]](s32)
; GFX6: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SHL]](s64)
- ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV4]](s32), [[C1]]
- ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C2]], [[C1]]
- ; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[UV5]], [[SELECT]]
+ ; GFX6: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[C1]], [[UV4]]
+ ; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[UV5]], [[UMIN1]]
; GFX6: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[OR]](s32)
; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C]], [[UMIN]]
; GFX6: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ldexp), [[UITOFP]](s32), [[SUB]](s32)
; GFX6: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT]](s32)
; GFX6: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64)
; GFX6: [[AMDGPU_FFBH_U32_1:%[0-9]+]]:_(s32) = G_AMDGPU_FFBH_U32 [[UV7]](s32)
- ; GFX6: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[AMDGPU_FFBH_U32_1]], [[C]]
- ; GFX6: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[UMIN1]](s32)
+ ; GFX6: [[UMIN2:%[0-9]+]]:_(s32) = G_UMIN [[AMDGPU_FFBH_U32_1]], [[C]]
+ ; GFX6: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[UMIN2]](s32)
; GFX6: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SHL1]](s64)
- ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV8]](s32), [[C1]]
- ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[C2]], [[C1]]
- ; GFX6: [[OR1:%[0-9]+]]:_(s32) = G_OR [[UV9]], [[SELECT1]]
+ ; GFX6: [[UMIN3:%[0-9]+]]:_(s32) = G_UMIN [[C1]], [[UV8]]
+ ; GFX6: [[OR1:%[0-9]+]]:_(s32) = G_OR [[UV9]], [[UMIN3]]
; GFX6: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[OR1]](s32)
- ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[UMIN1]]
+ ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[UMIN2]]
; GFX6: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ldexp), [[UITOFP1]](s32), [[SUB1]](s32)
; GFX6: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT1]](s32)
; GFX6: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16)
; GFX6: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16)
- ; GFX6: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; GFX6: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C3]](s32)
+ ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; GFX6: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C2]](s32)
; GFX6: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
; GFX6: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
; GFX6: $vgpr0 = COPY [[BITCAST]](<2 x s16>)
@@ -530,35 +515,32 @@ body: |
; GFX8: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>)
; GFX8: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64)
; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
- ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
; GFX8: [[AMDGPU_FFBH_U32_:%[0-9]+]]:_(s32) = G_AMDGPU_FFBH_U32 [[UV3]](s32)
; GFX8: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[AMDGPU_FFBH_U32_]], [[C]]
; GFX8: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV]], [[UMIN]](s32)
; GFX8: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SHL]](s64)
- ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV4]](s32), [[C1]]
- ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C2]], [[C1]]
- ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[UV5]], [[SELECT]]
+ ; GFX8: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[C1]], [[UV4]]
+ ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[UV5]], [[UMIN1]]
; GFX8: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[OR]](s32)
; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C]], [[UMIN]]
; GFX8: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ldexp), [[UITOFP]](s32), [[SUB]](s32)
; GFX8: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT]](s32)
; GFX8: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64)
; GFX8: [[AMDGPU_FFBH_U32_1:%[0-9]+]]:_(s32) = G_AMDGPU_FFBH_U32 [[UV7]](s32)
- ; GFX8: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[AMDGPU_FFBH_U32_1]], [[C]]
- ; GFX8: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[UMIN1]](s32)
+ ; GFX8: [[UMIN2:%[0-9]+]]:_(s32) = G_UMIN [[AMDGPU_FFBH_U32_1]], [[C]]
+ ; GFX8: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[UMIN2]](s32)
; GFX8: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SHL1]](s64)
- ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV8]](s32), [[C1]]
- ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[C2]], [[C1]]
- ; GFX8: [[OR1:%[0-9]+]]:_(s32) = G_OR [[UV9]], [[SELECT1]]
+ ; GFX8: [[UMIN3:%[0-9]+]]:_(s32) = G_UMIN [[C1]], [[UV8]]
+ ; GFX8: [[OR1:%[0-9]+]]:_(s32) = G_OR [[UV9]], [[UMIN3]]
; GFX8: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[OR1]](s32)
- ; GFX8: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[UMIN1]]
+ ; GFX8: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[UMIN2]]
; GFX8: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ldexp), [[UITOFP1]](s32), [[SUB1]](s32)
; GFX8: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT1]](s32)
; GFX8: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16)
; GFX8: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16)
- ; GFX8: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; GFX8: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C3]](s32)
+ ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; GFX8: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C2]](s32)
; GFX8: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
; GFX8: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
; GFX8: $vgpr0 = COPY [[BITCAST]](<2 x s16>)
diff --git a/llvm/test/CodeGen/AMDGPU/sint_to_fp.i64.ll b/llvm/test/CodeGen/AMDGPU/sint_to_fp.i64.ll
index 06c678028a1c..7ea3f9249b10 100644
--- a/llvm/test/CodeGen/AMDGPU/sint_to_fp.i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/sint_to_fp.i64.ll
@@ -13,21 +13,18 @@ define amdgpu_kernel void @s_sint_to_fp_i64_to_f16(half addrspace(1)* %out, i64
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
; GFX6-NEXT: s_mov_b32 s4, s0
; GFX6-NEXT: s_mov_b32 s5, s1
-; GFX6-NEXT: s_xor_b32 s0, s2, s3
-; GFX6-NEXT: s_flbit_i32 s8, s3
-; GFX6-NEXT: v_cmp_gt_i32_e64 s[0:1], s0, -1
-; GFX6-NEXT: v_cndmask_b32_e64 v0, 32, 33, s[0:1]
-; GFX6-NEXT: v_mov_b32_e32 v1, s8
-; GFX6-NEXT: v_cmp_ne_u32_e64 vcc, s8, -1
-; GFX6-NEXT: v_cndmask_b32_e32 v2, v0, v1, vcc
-; GFX6-NEXT: v_add_i32_e32 v0, vcc, -1, v2
-; GFX6-NEXT: v_lshl_b64 v[0:1], s[2:3], v0
-; GFX6-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
-; GFX6-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
-; GFX6-NEXT: v_or_b32_e32 v0, v1, v0
-; GFX6-NEXT: v_cvt_f32_i32_e32 v0, v0
-; GFX6-NEXT: v_sub_i32_e32 v1, vcc, 33, v2
-; GFX6-NEXT: v_ldexp_f32_e32 v0, v0, v1
+; GFX6-NEXT: s_flbit_i32 s0, s3
+; GFX6-NEXT: s_xor_b32 s1, s2, s3
+; GFX6-NEXT: s_add_i32 s0, s0, -1
+; GFX6-NEXT: s_ashr_i32 s1, s1, 31
+; GFX6-NEXT: s_add_i32 s1, s1, 32
+; GFX6-NEXT: s_min_u32 s8, s0, s1
+; GFX6-NEXT: s_lshl_b64 s[0:1], s[2:3], s8
+; GFX6-NEXT: s_min_u32 s0, s0, 1
+; GFX6-NEXT: s_or_b32 s0, s1, s0
+; GFX6-NEXT: v_cvt_f32_i32_e32 v0, s0
+; GFX6-NEXT: s_sub_i32 s0, 32, s8
+; GFX6-NEXT: v_ldexp_f32_e64 v0, v0, s0
; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0
; GFX6-NEXT: buffer_store_short v0, off, s[4:7], 0
; GFX6-NEXT: s_endpgm
@@ -36,19 +33,17 @@ define amdgpu_kernel void @s_sint_to_fp_i64_to_f16(half addrspace(1)* %out, i64
; GFX8: ; %bb.0:
; GFX8-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
-; GFX8-NEXT: s_xor_b32 s4, s2, s3
-; GFX8-NEXT: s_cmp_gt_i32 s4, -1
-; GFX8-NEXT: s_flbit_i32 s5, s3
-; GFX8-NEXT: s_cselect_b32 s4, 33, 32
-; GFX8-NEXT: s_cmp_lg_u32 s5, -1
-; GFX8-NEXT: s_cselect_b32 s6, s5, s4
-; GFX8-NEXT: s_add_i32 s4, s6, -1
+; GFX8-NEXT: s_xor_b32 s5, s2, s3
+; GFX8-NEXT: s_flbit_i32 s4, s3
+; GFX8-NEXT: s_ashr_i32 s5, s5, 31
+; GFX8-NEXT: s_add_i32 s4, s4, -1
+; GFX8-NEXT: s_add_i32 s5, s5, 32
+; GFX8-NEXT: s_min_u32 s4, s4, s5
; GFX8-NEXT: s_lshl_b64 s[2:3], s[2:3], s4
-; GFX8-NEXT: v_cmp_ne_u32_e64 s[4:5], s2, 0
-; GFX8-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
-; GFX8-NEXT: v_or_b32_e32 v0, s3, v0
-; GFX8-NEXT: v_cvt_f32_i32_e32 v0, v0
-; GFX8-NEXT: s_sub_i32 s2, 33, s6
+; GFX8-NEXT: s_min_u32 s2, s2, 1
+; GFX8-NEXT: s_or_b32 s2, s3, s2
+; GFX8-NEXT: v_cvt_f32_i32_e32 v0, s2
+; GFX8-NEXT: s_sub_i32 s2, 32, s4
; GFX8-NEXT: v_mov_b32_e32 v1, s1
; GFX8-NEXT: v_ldexp_f32 v0, v0, s2
; GFX8-NEXT: v_cvt_f16_f32_e32 v2, v0
@@ -76,17 +71,15 @@ define amdgpu_kernel void @v_sint_to_fp_i64_to_f16(half addrspace(1)* %out, i64
; GFX6-NEXT: s_waitcnt vmcnt(0)
; GFX6-NEXT: v_xor_b32_e32 v0, v3, v4
; GFX6-NEXT: v_ffbh_i32_e32 v5, v4
-; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, -1, v0
-; GFX6-NEXT: v_cndmask_b32_e64 v0, 32, 33, vcc
-; GFX6-NEXT: v_cmp_ne_u32_e32 vcc, -1, v5
-; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc
-; GFX6-NEXT: v_add_i32_e32 v5, vcc, -1, v0
-; GFX6-NEXT: v_lshl_b64 v[3:4], v[3:4], v5
-; GFX6-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3
-; GFX6-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc
+; GFX6-NEXT: v_ashrrev_i32_e32 v0, 31, v0
+; GFX6-NEXT: v_add_i32_e32 v5, vcc, -1, v5
+; GFX6-NEXT: v_add_i32_e32 v0, vcc, 32, v0
+; GFX6-NEXT: v_min_u32_e32 v0, v5, v0
+; GFX6-NEXT: v_lshl_b64 v[3:4], v[3:4], v0
+; GFX6-NEXT: v_min_u32_e32 v3, 1, v3
; GFX6-NEXT: v_or_b32_e32 v3, v4, v3
; GFX6-NEXT: v_cvt_f32_i32_e32 v3, v3
-; GFX6-NEXT: v_sub_i32_e32 v0, vcc, 33, v0
+; GFX6-NEXT: v_sub_i32_e32 v0, vcc, 32, v0
; GFX6-NEXT: v_ldexp_f32_e32 v0, v3, v0
; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0
; GFX6-NEXT: buffer_store_short v0, v[1:2], s[0:3], 0 addr64
@@ -96,33 +89,30 @@ define amdgpu_kernel void @v_sint_to_fp_i64_to_f16(half addrspace(1)* %out, i64
; GFX8: ; %bb.0:
; GFX8-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
; GFX8-NEXT: v_lshlrev_b32_e32 v1, 3, v0
-; GFX8-NEXT: v_mov_b32_e32 v3, 0
; GFX8-NEXT: v_lshlrev_b32_e32 v0, 1, v0
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: v_mov_b32_e32 v2, s3
; GFX8-NEXT: v_add_u32_e32 v1, vcc, s2, v1
-; GFX8-NEXT: v_addc_u32_e32 v2, vcc, v2, v3, vcc
+; GFX8-NEXT: v_addc_u32_e32 v2, vcc, 0, v2, vcc
; GFX8-NEXT: flat_load_dwordx2 v[1:2], v[1:2]
; GFX8-NEXT: s_waitcnt vmcnt(0)
-; GFX8-NEXT: v_xor_b32_e32 v4, v1, v2
-; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, -1, v4
-; GFX8-NEXT: v_ffbh_i32_e32 v5, v2
-; GFX8-NEXT: v_cndmask_b32_e64 v4, 32, 33, vcc
-; GFX8-NEXT: v_cmp_ne_u32_e32 vcc, -1, v5
-; GFX8-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc
-; GFX8-NEXT: v_add_u32_e32 v5, vcc, -1, v4
-; GFX8-NEXT: v_lshlrev_b64 v[1:2], v5, v[1:2]
-; GFX8-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1
-; GFX8-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; GFX8-NEXT: v_xor_b32_e32 v3, v1, v2
+; GFX8-NEXT: v_ffbh_i32_e32 v4, v2
+; GFX8-NEXT: v_ashrrev_i32_e32 v3, 31, v3
+; GFX8-NEXT: v_add_u32_e32 v4, vcc, -1, v4
+; GFX8-NEXT: v_add_u32_e32 v3, vcc, 32, v3
+; GFX8-NEXT: v_min_u32_e32 v3, v4, v3
+; GFX8-NEXT: v_lshlrev_b64 v[1:2], v3, v[1:2]
+; GFX8-NEXT: v_sub_u32_e32 v3, vcc, 32, v3
+; GFX8-NEXT: v_min_u32_e32 v1, 1, v1
; GFX8-NEXT: v_or_b32_e32 v1, v2, v1
; GFX8-NEXT: v_cvt_f32_i32_e32 v1, v1
-; GFX8-NEXT: v_sub_u32_e32 v4, vcc, 33, v4
; GFX8-NEXT: v_mov_b32_e32 v2, s1
; GFX8-NEXT: v_add_u32_e32 v0, vcc, s0, v0
-; GFX8-NEXT: v_ldexp_f32 v1, v1, v4
-; GFX8-NEXT: v_cvt_f16_f32_e32 v4, v1
-; GFX8-NEXT: v_addc_u32_e32 v1, vcc, v2, v3, vcc
-; GFX8-NEXT: flat_store_short v[0:1], v4
+; GFX8-NEXT: v_ldexp_f32 v1, v1, v3
+; GFX8-NEXT: v_cvt_f16_f32_e32 v3, v1
+; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v2, vcc
+; GFX8-NEXT: flat_store_short v[0:1], v3
; GFX8-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
@@ -142,21 +132,18 @@ define amdgpu_kernel void @s_sint_to_fp_i64_to_f32(float addrspace(1)* %out, i64
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
; GFX6-NEXT: s_mov_b32 s4, s0
; GFX6-NEXT: s_mov_b32 s5, s1
-; GFX6-NEXT: s_xor_b32 s0, s2, s3
-; GFX6-NEXT: s_flbit_i32 s8, s3
-; GFX6-NEXT: v_cmp_gt_i32_e64 s[0:1], s0, -1
-; GFX6-NEXT: v_cndmask_b32_e64 v0, 32, 33, s[0:1]
-; GFX6-NEXT: v_mov_b32_e32 v1, s8
-; GFX6-NEXT: v_cmp_ne_u32_e64 vcc, s8, -1
-; GFX6-NEXT: v_cndmask_b32_e32 v2, v0, v1, vcc
-; GFX6-NEXT: v_add_i32_e32 v0, vcc, -1, v2
-; GFX6-NEXT: v_lshl_b64 v[0:1], s[2:3], v0
-; GFX6-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
-; GFX6-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
-; GFX6-NEXT: v_or_b32_e32 v0, v1, v0
-; GFX6-NEXT: v_cvt_f32_i32_e32 v0, v0
-; GFX6-NEXT: v_sub_i32_e32 v1, vcc, 33, v2
-; GFX6-NEXT: v_ldexp_f32_e32 v0, v0, v1
+; GFX6-NEXT: s_flbit_i32 s0, s3
+; GFX6-NEXT: s_xor_b32 s1, s2, s3
+; GFX6-NEXT: s_add_i32 s0, s0, -1
+; GFX6-NEXT: s_ashr_i32 s1, s1, 31
+; GFX6-NEXT: s_add_i32 s1, s1, 32
+; GFX6-NEXT: s_min_u32 s8, s0, s1
+; GFX6-NEXT: s_lshl_b64 s[0:1], s[2:3], s8
+; GFX6-NEXT: s_min_u32 s0, s0, 1
+; GFX6-NEXT: s_or_b32 s0, s1, s0
+; GFX6-NEXT: v_cvt_f32_i32_e32 v0, s0
+; GFX6-NEXT: s_sub_i32 s0, 32, s8
+; GFX6-NEXT: v_ldexp_f32_e64 v0, v0, s0
; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0
; GFX6-NEXT: s_endpgm
;
@@ -164,20 +151,18 @@ define amdgpu_kernel void @s_sint_to_fp_i64_to_f32(float addrspace(1)* %out, i64
; GFX8: ; %bb.0:
; GFX8-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
-; GFX8-NEXT: s_xor_b32 s4, s2, s3
-; GFX8-NEXT: s_cmp_gt_i32 s4, -1
-; GFX8-NEXT: s_flbit_i32 s5, s3
-; GFX8-NEXT: s_cselect_b32 s4, 33, 32
-; GFX8-NEXT: s_cmp_lg_u32 s5, -1
-; GFX8-NEXT: s_cselect_b32 s6, s5, s4
-; GFX8-NEXT: s_add_i32 s4, s6, -1
+; GFX8-NEXT: s_xor_b32 s5, s2, s3
+; GFX8-NEXT: s_flbit_i32 s4, s3
+; GFX8-NEXT: s_ashr_i32 s5, s5, 31
+; GFX8-NEXT: s_add_i32 s4, s4, -1
+; GFX8-NEXT: s_add_i32 s5, s5, 32
+; GFX8-NEXT: s_min_u32 s4, s4, s5
; GFX8-NEXT: s_lshl_b64 s[2:3], s[2:3], s4
-; GFX8-NEXT: v_cmp_ne_u32_e64 s[4:5], s2, 0
-; GFX8-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
-; GFX8-NEXT: v_or_b32_e32 v0, s3, v0
-; GFX8-NEXT: v_cvt_f32_i32_e32 v2, v0
+; GFX8-NEXT: s_min_u32 s2, s2, 1
+; GFX8-NEXT: s_or_b32 s2, s3, s2
+; GFX8-NEXT: v_cvt_f32_i32_e32 v2, s2
; GFX8-NEXT: v_mov_b32_e32 v0, s0
-; GFX8-NEXT: s_sub_i32 s0, 33, s6
+; GFX8-NEXT: s_sub_i32 s0, 32, s4
; GFX8-NEXT: v_mov_b32_e32 v1, s1
; GFX8-NEXT: v_ldexp_f32 v2, v2, s0
; GFX8-NEXT: flat_store_dword v[0:1], v2
@@ -203,17 +188,15 @@ define amdgpu_kernel void @v_sint_to_fp_i64_to_f32(float addrspace(1)* %out, i64
; GFX6-NEXT: s_waitcnt vmcnt(0)
; GFX6-NEXT: v_xor_b32_e32 v0, v3, v4
; GFX6-NEXT: v_ffbh_i32_e32 v5, v4
-; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, -1, v0
-; GFX6-NEXT: v_cndmask_b32_e64 v0, 32, 33, vcc
-; GFX6-NEXT: v_cmp_ne_u32_e32 vcc, -1, v5
-; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc
-; GFX6-NEXT: v_add_i32_e32 v5, vcc, -1, v0
-; GFX6-NEXT: v_lshl_b64 v[3:4], v[3:4], v5
-; GFX6-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3
-; GFX6-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc
+; GFX6-NEXT: v_ashrrev_i32_e32 v0, 31, v0
+; GFX6-NEXT: v_add_i32_e32 v5, vcc, -1, v5
+; GFX6-NEXT: v_add_i32_e32 v0, vcc, 32, v0
+; GFX6-NEXT: v_min_u32_e32 v0, v5, v0
+; GFX6-NEXT: v_lshl_b64 v[3:4], v[3:4], v0
+; GFX6-NEXT: v_min_u32_e32 v3, 1, v3
; GFX6-NEXT: v_or_b32_e32 v3, v4, v3
; GFX6-NEXT: v_cvt_f32_i32_e32 v3, v3
-; GFX6-NEXT: v_sub_i32_e32 v0, vcc, 33, v0
+; GFX6-NEXT: v_sub_i32_e32 v0, vcc, 32, v0
; GFX6-NEXT: v_ldexp_f32_e32 v0, v3, v0
; GFX6-NEXT: buffer_store_dword v0, v[1:2], s[0:3], 0 addr64
; GFX6-NEXT: s_endpgm
@@ -222,31 +205,28 @@ define amdgpu_kernel void @v_sint_to_fp_i64_to_f32(float addrspace(1)* %out, i64
; GFX8: ; %bb.0:
; GFX8-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
; GFX8-NEXT: v_lshlrev_b32_e32 v1, 3, v0
-; GFX8-NEXT: v_mov_b32_e32 v3, 0
-; GFX8-NEXT: v_lshlrev_b32_e32 v4, 2, v0
+; GFX8-NEXT: v_lshlrev_b32_e32 v3, 2, v0
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: v_mov_b32_e32 v2, s3
; GFX8-NEXT: v_add_u32_e32 v1, vcc, s2, v1
-; GFX8-NEXT: v_addc_u32_e32 v2, vcc, v2, v3, vcc
+; GFX8-NEXT: v_addc_u32_e32 v2, vcc, 0, v2, vcc
; GFX8-NEXT: flat_load_dwordx2 v[1:2], v[1:2]
; GFX8-NEXT: s_waitcnt vmcnt(0)
; GFX8-NEXT: v_xor_b32_e32 v0, v1, v2
-; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, -1, v0
-; GFX8-NEXT: v_ffbh_i32_e32 v5, v2
-; GFX8-NEXT: v_cndmask_b32_e64 v0, 32, 33, vcc
-; GFX8-NEXT: v_cmp_ne_u32_e32 vcc, -1, v5
-; GFX8-NEXT: v_cndmask_b32_e32 v5, v0, v5, vcc
-; GFX8-NEXT: v_add_u32_e32 v0, vcc, -1, v5
-; GFX8-NEXT: v_lshlrev_b64 v[0:1], v0, v[1:2]
+; GFX8-NEXT: v_ffbh_i32_e32 v4, v2
+; GFX8-NEXT: v_ashrrev_i32_e32 v0, 31, v0
+; GFX8-NEXT: v_add_u32_e32 v4, vcc, -1, v4
+; GFX8-NEXT: v_add_u32_e32 v0, vcc, 32, v0
+; GFX8-NEXT: v_min_u32_e32 v4, v4, v0
+; GFX8-NEXT: v_lshlrev_b64 v[0:1], v4, v[1:2]
; GFX8-NEXT: v_mov_b32_e32 v2, s1
-; GFX8-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
-; GFX8-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; GFX8-NEXT: v_min_u32_e32 v0, 1, v0
; GFX8-NEXT: v_or_b32_e32 v0, v1, v0
-; GFX8-NEXT: v_cvt_f32_i32_e32 v6, v0
-; GFX8-NEXT: v_add_u32_e32 v0, vcc, s0, v4
-; GFX8-NEXT: v_addc_u32_e32 v1, vcc, v2, v3, vcc
-; GFX8-NEXT: v_sub_u32_e32 v2, vcc, 33, v5
-; GFX8-NEXT: v_ldexp_f32 v2, v6, v2
+; GFX8-NEXT: v_cvt_f32_i32_e32 v5, v0
+; GFX8-NEXT: v_add_u32_e32 v0, vcc, s0, v3
+; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v2, vcc
+; GFX8-NEXT: v_sub_u32_e32 v2, vcc, 32, v4
+; GFX8-NEXT: v_ldexp_f32 v2, v5, v2
; GFX8-NEXT: flat_store_dword v[0:1], v2
; GFX8-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
@@ -266,74 +246,64 @@ define amdgpu_kernel void @s_sint_to_fp_v2i64_to_v2f32(<2 x float> addrspace(1)*
; GFX6-NEXT: s_mov_b32 s3, 0xf000
; GFX6-NEXT: s_mov_b32 s2, -1
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
-; GFX6-NEXT: s_xor_b32 s8, s6, s7
-; GFX6-NEXT: s_flbit_i32 s10, s7
+; GFX6-NEXT: s_flbit_i32 s8, s7
+; GFX6-NEXT: s_xor_b32 s9, s6, s7
+; GFX6-NEXT: s_flbit_i32 s10, s5
; GFX6-NEXT: s_xor_b32 s11, s4, s5
-; GFX6-NEXT: s_flbit_i32 s12, s5
-; GFX6-NEXT: v_cmp_gt_i32_e64 s[8:9], s8, -1
-; GFX6-NEXT: v_cndmask_b32_e64 v0, 32, 33, s[8:9]
-; GFX6-NEXT: v_mov_b32_e32 v1, s10
-; GFX6-NEXT: v_cmp_gt_i32_e64 s[8:9], s11, -1
-; GFX6-NEXT: v_cndmask_b32_e64 v2, 32, 33, s[8:9]
-; GFX6-NEXT: v_mov_b32_e32 v3, s12
-; GFX6-NEXT: v_cmp_ne_u32_e64 vcc, s10, -1
-; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
-; GFX6-NEXT: v_cmp_ne_u32_e64 vcc, s12, -1
-; GFX6-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc
-; GFX6-NEXT: v_add_i32_e32 v2, vcc, -1, v0
-; GFX6-NEXT: v_sub_i32_e32 v4, vcc, 33, v0
-; GFX6-NEXT: v_add_i32_e32 v3, vcc, -1, v1
-; GFX6-NEXT: v_sub_i32_e32 v5, vcc, 33, v1
-; GFX6-NEXT: v_lshl_b64 v[0:1], s[6:7], v2
-; GFX6-NEXT: v_lshl_b64 v[2:3], s[4:5], v3
-; GFX6-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
-; GFX6-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
-; GFX6-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2
-; GFX6-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
-; GFX6-NEXT: v_or_b32_e32 v0, v1, v0
-; GFX6-NEXT: v_or_b32_e32 v1, v3, v2
-; GFX6-NEXT: v_cvt_f32_i32_e32 v0, v0
-; GFX6-NEXT: v_cvt_f32_i32_e32 v2, v1
-; GFX6-NEXT: v_ldexp_f32_e32 v1, v0, v4
-; GFX6-NEXT: v_ldexp_f32_e32 v0, v2, v5
+; GFX6-NEXT: s_add_i32 s8, s8, -1
+; GFX6-NEXT: s_ashr_i32 s9, s9, 31
+; GFX6-NEXT: s_add_i32 s10, s10, -1
+; GFX6-NEXT: s_ashr_i32 s11, s11, 31
+; GFX6-NEXT: s_add_i32 s9, s9, 32
+; GFX6-NEXT: s_add_i32 s11, s11, 32
+; GFX6-NEXT: s_min_u32 s8, s8, s9
+; GFX6-NEXT: s_min_u32 s9, s10, s11
+; GFX6-NEXT: s_lshl_b64 s[6:7], s[6:7], s8
+; GFX6-NEXT: s_sub_i32 s8, 32, s8
+; GFX6-NEXT: s_lshl_b64 s[4:5], s[4:5], s9
+; GFX6-NEXT: s_sub_i32 s9, 32, s9
+; GFX6-NEXT: s_min_u32 s6, s6, 1
+; GFX6-NEXT: s_min_u32 s4, s4, 1
+; GFX6-NEXT: s_or_b32 s6, s7, s6
+; GFX6-NEXT: s_or_b32 s4, s5, s4
+; GFX6-NEXT: v_cvt_f32_i32_e32 v0, s6
+; GFX6-NEXT: v_cvt_f32_i32_e32 v2, s4
+; GFX6-NEXT: v_ldexp_f32_e64 v1, v0, s8
+; GFX6-NEXT: v_ldexp_f32_e64 v0, v2, s9
; GFX6-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
; GFX6-NEXT: s_endpgm
;
; GFX8-LABEL: s_sint_to_fp_v2i64_to_v2f32:
; GFX8: ; %bb.0:
-; GFX8-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34
-; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
+; GFX8-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24
+; GFX8-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x34
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
-; GFX8-NEXT: s_xor_b32 s2, s6, s7
-; GFX8-NEXT: s_cmp_gt_i32 s2, -1
-; GFX8-NEXT: s_flbit_i32 s3, s7
-; GFX8-NEXT: s_cselect_b32 s2, 33, 32
-; GFX8-NEXT: s_cmp_lg_u32 s3, -1
-; GFX8-NEXT: s_cselect_b32 s2, s3, s2
-; GFX8-NEXT: s_add_i32 s3, s2, -1
-; GFX8-NEXT: s_sub_i32 s8, 33, s2
-; GFX8-NEXT: s_lshl_b64 s[2:3], s[6:7], s3
-; GFX8-NEXT: v_cmp_ne_u32_e64 s[6:7], s2, 0
-; GFX8-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[6:7]
-; GFX8-NEXT: s_xor_b32 s2, s4, s5
-; GFX8-NEXT: v_or_b32_e32 v0, s3, v0
-; GFX8-NEXT: s_cmp_gt_i32 s2, -1
-; GFX8-NEXT: s_flbit_i32 s3, s5
-; GFX8-NEXT: s_cselect_b32 s2, 33, 32
-; GFX8-NEXT: s_cmp_lg_u32 s3, -1
-; GFX8-NEXT: s_cselect_b32 s6, s3, s2
-; GFX8-NEXT: s_add_i32 s2, s6, -1
-; GFX8-NEXT: s_lshl_b64 s[2:3], s[4:5], s2
-; GFX8-NEXT: v_cmp_ne_u32_e64 s[4:5], s2, 0
-; GFX8-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[4:5]
-; GFX8-NEXT: v_or_b32_e32 v1, s3, v1
-; GFX8-NEXT: v_cvt_f32_i32_e32 v0, v0
-; GFX8-NEXT: v_cvt_f32_i32_e32 v2, v1
-; GFX8-NEXT: s_sub_i32 s2, 33, s6
-; GFX8-NEXT: v_ldexp_f32 v1, v0, s8
-; GFX8-NEXT: v_ldexp_f32 v0, v2, s2
-; GFX8-NEXT: v_mov_b32_e32 v3, s1
-; GFX8-NEXT: v_mov_b32_e32 v2, s0
+; GFX8-NEXT: s_xor_b32 s7, s2, s3
+; GFX8-NEXT: s_flbit_i32 s6, s3
+; GFX8-NEXT: s_ashr_i32 s7, s7, 31
+; GFX8-NEXT: s_add_i32 s6, s6, -1
+; GFX8-NEXT: s_add_i32 s7, s7, 32
+; GFX8-NEXT: s_min_u32 s6, s6, s7
+; GFX8-NEXT: s_lshl_b64 s[2:3], s[2:3], s6
+; GFX8-NEXT: s_min_u32 s2, s2, 1
+; GFX8-NEXT: s_or_b32 s2, s3, s2
+; GFX8-NEXT: v_cvt_f32_i32_e32 v0, s2
+; GFX8-NEXT: s_xor_b32 s2, s0, s1
+; GFX8-NEXT: s_flbit_i32 s8, s1
+; GFX8-NEXT: s_ashr_i32 s2, s2, 31
+; GFX8-NEXT: s_add_i32 s8, s8, -1
+; GFX8-NEXT: s_add_i32 s2, s2, 32
+; GFX8-NEXT: s_min_u32 s2, s8, s2
+; GFX8-NEXT: s_lshl_b64 s[0:1], s[0:1], s2
+; GFX8-NEXT: s_min_u32 s0, s0, 1
+; GFX8-NEXT: s_or_b32 s0, s1, s0
+; GFX8-NEXT: v_cvt_f32_i32_e32 v2, s0
+; GFX8-NEXT: s_sub_i32 s0, 32, s6
+; GFX8-NEXT: v_ldexp_f32 v1, v0, s0
+; GFX8-NEXT: s_sub_i32 s0, 32, s2
+; GFX8-NEXT: v_ldexp_f32 v0, v2, s0
+; GFX8-NEXT: v_mov_b32_e32 v2, s4
+; GFX8-NEXT: v_mov_b32_e32 v3, s5
; GFX8-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
; GFX8-NEXT: s_endpgm
%result = sitofp <2 x i64> %in to <2 x float>
@@ -366,54 +336,46 @@ define amdgpu_kernel void @v_sint_to_fp_v4i64_to_v4f32(<4 x float> addrspace(1)*
; GFX6-NEXT: v_ffbh_i32_e32 v15, v8
; GFX6-NEXT: v_xor_b32_e32 v16, v5, v6
; GFX6-NEXT: v_ffbh_i32_e32 v17, v6
-; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, -1, v0
-; GFX6-NEXT: v_cndmask_b32_e64 v0, 32, 33, vcc
-; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, -1, v12
-; GFX6-NEXT: v_cndmask_b32_e64 v12, 32, 33, vcc
-; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, -1, v14
-; GFX6-NEXT: v_cndmask_b32_e64 v14, 32, 33, vcc
-; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, -1, v16
-; GFX6-NEXT: v_cndmask_b32_e64 v16, 32, 33, vcc
-; GFX6-NEXT: v_cmp_ne_u32_e32 vcc, -1, v9
-; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v9, vcc
-; GFX6-NEXT: v_cmp_ne_u32_e32 vcc, -1, v13
-; GFX6-NEXT: v_cndmask_b32_e32 v9, v12, v13, vcc
-; GFX6-NEXT: v_cmp_ne_u32_e32 vcc, -1, v15
-; GFX6-NEXT: v_cndmask_b32_e32 v12, v14, v15, vcc
-; GFX6-NEXT: v_cmp_ne_u32_e32 vcc, -1, v17
-; GFX6-NEXT: v_cndmask_b32_e32 v13, v16, v17, vcc
-; GFX6-NEXT: v_add_i32_e32 v14, vcc, -1, v0
-; GFX6-NEXT: v_sub_i32_e32 v15, vcc, 33, v0
-; GFX6-NEXT: v_add_i32_e32 v0, vcc, -1, v9
-; GFX6-NEXT: v_sub_i32_e32 v9, vcc, 33, v9
-; GFX6-NEXT: v_add_i32_e32 v16, vcc, -1, v12
-; GFX6-NEXT: v_sub_i32_e32 v12, vcc, 33, v12
-; GFX6-NEXT: v_add_i32_e32 v17, vcc, -1, v13
-; GFX6-NEXT: v_sub_i32_e32 v13, vcc, 33, v13
-; GFX6-NEXT: v_lshl_b64 v[3:4], v[3:4], v14
-; GFX6-NEXT: v_lshl_b64 v[0:1], v[1:2], v0
-; GFX6-NEXT: v_lshl_b64 v[7:8], v[7:8], v16
-; GFX6-NEXT: v_lshl_b64 v[5:6], v[5:6], v17
-; GFX6-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3
-; GFX6-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
-; GFX6-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
-; GFX6-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
-; GFX6-NEXT: v_cmp_ne_u32_e32 vcc, 0, v7
-; GFX6-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc
-; GFX6-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5
-; GFX6-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
-; GFX6-NEXT: v_or_b32_e32 v2, v4, v2
+; GFX6-NEXT: v_ashrrev_i32_e32 v0, 31, v0
+; GFX6-NEXT: v_add_i32_e32 v9, vcc, -1, v9
+; GFX6-NEXT: v_ashrrev_i32_e32 v12, 31, v12
+; GFX6-NEXT: v_add_i32_e32 v13, vcc, -1, v13
+; GFX6-NEXT: v_ashrrev_i32_e32 v14, 31, v14
+; GFX6-NEXT: v_add_i32_e32 v15, vcc, -1, v15
+; GFX6-NEXT: v_ashrrev_i32_e32 v16, 31, v16
+; GFX6-NEXT: v_add_i32_e32 v17, vcc, -1, v17
+; GFX6-NEXT: v_add_i32_e32 v0, vcc, 32, v0
+; GFX6-NEXT: v_add_i32_e32 v12, vcc, 32, v12
+; GFX6-NEXT: v_add_i32_e32 v14, vcc, 32, v14
+; GFX6-NEXT: v_add_i32_e32 v16, vcc, 32, v16
+; GFX6-NEXT: v_min_u32_e32 v0, v9, v0
+; GFX6-NEXT: v_min_u32_e32 v9, v13, v12
+; GFX6-NEXT: v_min_u32_e32 v12, v15, v14
+; GFX6-NEXT: v_min_u32_e32 v13, v17, v16
+; GFX6-NEXT: v_lshl_b64 v[3:4], v[3:4], v0
+; GFX6-NEXT: v_sub_i32_e32 v14, vcc, 32, v0
+; GFX6-NEXT: v_lshl_b64 v[0:1], v[1:2], v9
+; GFX6-NEXT: v_sub_i32_e32 v2, vcc, 32, v9
+; GFX6-NEXT: v_lshl_b64 v[7:8], v[7:8], v12
+; GFX6-NEXT: v_sub_i32_e32 v9, vcc, 32, v12
+; GFX6-NEXT: v_lshl_b64 v[5:6], v[5:6], v13
+; GFX6-NEXT: v_sub_i32_e32 v12, vcc, 32, v13
+; GFX6-NEXT: v_min_u32_e32 v3, 1, v3
+; GFX6-NEXT: v_min_u32_e32 v0, 1, v0
+; GFX6-NEXT: v_min_u32_e32 v7, 1, v7
+; GFX6-NEXT: v_min_u32_e32 v5, 1, v5
+; GFX6-NEXT: v_or_b32_e32 v3, v4, v3
; GFX6-NEXT: v_or_b32_e32 v0, v1, v0
-; GFX6-NEXT: v_or_b32_e32 v1, v8, v3
-; GFX6-NEXT: v_or_b32_e32 v3, v6, v5
-; GFX6-NEXT: v_cvt_f32_i32_e32 v2, v2
+; GFX6-NEXT: v_or_b32_e32 v1, v8, v7
+; GFX6-NEXT: v_or_b32_e32 v4, v6, v5
+; GFX6-NEXT: v_cvt_f32_i32_e32 v3, v3
; GFX6-NEXT: v_cvt_f32_i32_e32 v0, v0
; GFX6-NEXT: v_cvt_f32_i32_e32 v1, v1
-; GFX6-NEXT: v_cvt_f32_i32_e32 v4, v3
-; GFX6-NEXT: v_ldexp_f32_e32 v3, v2, v15
-; GFX6-NEXT: v_ldexp_f32_e32 v2, v0, v9
-; GFX6-NEXT: v_ldexp_f32_e32 v1, v1, v12
-; GFX6-NEXT: v_ldexp_f32_e32 v0, v4, v13
+; GFX6-NEXT: v_cvt_f32_i32_e32 v4, v4
+; GFX6-NEXT: v_ldexp_f32_e32 v3, v3, v14
+; GFX6-NEXT: v_ldexp_f32_e32 v2, v0, v2
+; GFX6-NEXT: v_ldexp_f32_e32 v1, v1, v9
+; GFX6-NEXT: v_ldexp_f32_e32 v0, v4, v12
; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[10:11], s[0:3], 0 addr64
; GFX6-NEXT: s_endpgm
;
@@ -421,77 +383,68 @@ define amdgpu_kernel void @v_sint_to_fp_v4i64_to_v4f32(<4 x float> addrspace(1)*
; GFX8: ; %bb.0:
; GFX8-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
; GFX8-NEXT: v_lshlrev_b32_e32 v1, 5, v0
-; GFX8-NEXT: v_mov_b32_e32 v10, 0
; GFX8-NEXT: v_lshlrev_b32_e32 v0, 4, v0
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: v_mov_b32_e32 v2, s3
; GFX8-NEXT: v_add_u32_e32 v5, vcc, s2, v1
-; GFX8-NEXT: v_addc_u32_e32 v6, vcc, v2, v10, vcc
+; GFX8-NEXT: v_addc_u32_e32 v6, vcc, 0, v2, vcc
; GFX8-NEXT: v_add_u32_e32 v1, vcc, 16, v5
; GFX8-NEXT: v_addc_u32_e32 v2, vcc, 0, v6, vcc
; GFX8-NEXT: flat_load_dwordx4 v[1:4], v[1:2]
; GFX8-NEXT: flat_load_dwordx4 v[5:8], v[5:6]
; GFX8-NEXT: v_add_u32_e32 v9, vcc, s0, v0
-; GFX8-NEXT: v_mov_b32_e32 v11, s1
-; GFX8-NEXT: v_addc_u32_e32 v10, vcc, v11, v10, vcc
+; GFX8-NEXT: v_mov_b32_e32 v10, s1
+; GFX8-NEXT: v_addc_u32_e32 v10, vcc, 0, v10, vcc
; GFX8-NEXT: s_waitcnt vmcnt(1)
; GFX8-NEXT: v_xor_b32_e32 v14, v3, v4
; GFX8-NEXT: s_waitcnt vmcnt(0)
; GFX8-NEXT: v_xor_b32_e32 v0, v7, v8
-; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, -1, v0
; GFX8-NEXT: v_xor_b32_e32 v12, v5, v6
-; GFX8-NEXT: v_cndmask_b32_e64 v0, 32, 33, vcc
-; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, -1, v12
-; GFX8-NEXT: v_cndmask_b32_e64 v12, 32, 33, vcc
-; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, -1, v14
; GFX8-NEXT: v_xor_b32_e32 v16, v1, v2
-; GFX8-NEXT: v_cndmask_b32_e64 v14, 32, 33, vcc
-; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, -1, v16
; GFX8-NEXT: v_ffbh_i32_e32 v11, v8
-; GFX8-NEXT: v_cndmask_b32_e64 v16, 32, 33, vcc
-; GFX8-NEXT: v_cmp_ne_u32_e32 vcc, -1, v11
; GFX8-NEXT: v_ffbh_i32_e32 v13, v6
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v11, vcc
-; GFX8-NEXT: v_cmp_ne_u32_e32 vcc, -1, v13
; GFX8-NEXT: v_ffbh_i32_e32 v15, v4
-; GFX8-NEXT: v_cndmask_b32_e32 v11, v12, v13, vcc
-; GFX8-NEXT: v_cmp_ne_u32_e32 vcc, -1, v15
; GFX8-NEXT: v_ffbh_i32_e32 v17, v2
-; GFX8-NEXT: v_cndmask_b32_e32 v12, v14, v15, vcc
-; GFX8-NEXT: v_cmp_ne_u32_e32 vcc, -1, v17
-; GFX8-NEXT: v_cndmask_b32_e32 v13, v16, v17, vcc
-; GFX8-NEXT: v_add_u32_e32 v14, vcc, -1, v0
-; GFX8-NEXT: v_sub_u32_e32 v15, vcc, 33, v0
-; GFX8-NEXT: v_add_u32_e32 v0, vcc, -1, v11
-; GFX8-NEXT: v_lshlrev_b64 v[7:8], v14, v[7:8]
-; GFX8-NEXT: v_add_u32_e32 v16, vcc, -1, v12
-; GFX8-NEXT: v_add_u32_e32 v17, vcc, -1, v13
-; GFX8-NEXT: v_lshlrev_b64 v[5:6], v0, v[5:6]
-; GFX8-NEXT: v_sub_u32_e32 v11, vcc, 33, v11
-; GFX8-NEXT: v_sub_u32_e32 v12, vcc, 33, v12
-; GFX8-NEXT: v_sub_u32_e32 v13, vcc, 33, v13
-; GFX8-NEXT: v_lshlrev_b64 v[3:4], v16, v[3:4]
-; GFX8-NEXT: v_cmp_ne_u32_e32 vcc, 0, v7
-; GFX8-NEXT: v_lshlrev_b64 v[0:1], v17, v[1:2]
-; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
-; GFX8-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5
-; GFX8-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
-; GFX8-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3
-; GFX8-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc
-; GFX8-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
-; GFX8-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; GFX8-NEXT: v_ashrrev_i32_e32 v0, 31, v0
+; GFX8-NEXT: v_ashrrev_i32_e32 v12, 31, v12
+; GFX8-NEXT: v_ashrrev_i32_e32 v14, 31, v14
+; GFX8-NEXT: v_ashrrev_i32_e32 v16, 31, v16
+; GFX8-NEXT: v_add_u32_e32 v11, vcc, -1, v11
+; GFX8-NEXT: v_add_u32_e32 v0, vcc, 32, v0
+; GFX8-NEXT: v_add_u32_e32 v13, vcc, -1, v13
+; GFX8-NEXT: v_add_u32_e32 v15, vcc, -1, v15
+; GFX8-NEXT: v_add_u32_e32 v17, vcc, -1, v17
+; GFX8-NEXT: v_add_u32_e32 v12, vcc, 32, v12
+; GFX8-NEXT: v_add_u32_e32 v14, vcc, 32, v14
+; GFX8-NEXT: v_add_u32_e32 v16, vcc, 32, v16
+; GFX8-NEXT: v_min_u32_e32 v0, v11, v0
+; GFX8-NEXT: v_min_u32_e32 v11, v13, v12
+; GFX8-NEXT: v_min_u32_e32 v12, v15, v14
+; GFX8-NEXT: v_min_u32_e32 v13, v17, v16
+; GFX8-NEXT: v_lshlrev_b64 v[5:6], v11, v[5:6]
+; GFX8-NEXT: v_lshlrev_b64 v[3:4], v12, v[3:4]
+; GFX8-NEXT: v_lshlrev_b64 v[7:8], v0, v[7:8]
+; GFX8-NEXT: v_sub_u32_e32 v14, vcc, 32, v0
+; GFX8-NEXT: v_lshlrev_b64 v[0:1], v13, v[1:2]
+; GFX8-NEXT: v_min_u32_e32 v7, 1, v7
+; GFX8-NEXT: v_min_u32_e32 v5, 1, v5
+; GFX8-NEXT: v_min_u32_e32 v3, 1, v3
+; GFX8-NEXT: v_min_u32_e32 v0, 1, v0
; GFX8-NEXT: v_or_b32_e32 v3, v4, v3
-; GFX8-NEXT: v_or_b32_e32 v0, v1, v0
-; GFX8-NEXT: v_or_b32_e32 v2, v8, v2
; GFX8-NEXT: v_or_b32_e32 v5, v6, v5
-; GFX8-NEXT: v_cvt_f32_i32_e32 v1, v2
+; GFX8-NEXT: v_or_b32_e32 v7, v8, v7
+; GFX8-NEXT: v_or_b32_e32 v0, v1, v0
+; GFX8-NEXT: v_cvt_f32_i32_e32 v1, v7
+; GFX8-NEXT: v_cvt_f32_i32_e32 v4, v5
; GFX8-NEXT: v_cvt_f32_i32_e32 v3, v3
-; GFX8-NEXT: v_cvt_f32_i32_e32 v2, v5
-; GFX8-NEXT: v_cvt_f32_i32_e32 v4, v0
-; GFX8-NEXT: v_ldexp_f32 v1, v1, v15
+; GFX8-NEXT: v_cvt_f32_i32_e32 v5, v0
+; GFX8-NEXT: v_sub_u32_e32 v2, vcc, 32, v13
+; GFX8-NEXT: v_sub_u32_e32 v11, vcc, 32, v11
+; GFX8-NEXT: v_sub_u32_e32 v12, vcc, 32, v12
+; GFX8-NEXT: v_ldexp_f32 v1, v1, v14
+; GFX8-NEXT: v_ldexp_f32 v0, v4, v11
; GFX8-NEXT: v_ldexp_f32 v3, v3, v12
-; GFX8-NEXT: v_ldexp_f32 v0, v2, v11
-; GFX8-NEXT: v_ldexp_f32 v2, v4, v13
+; GFX8-NEXT: v_ldexp_f32 v2, v5, v2
; GFX8-NEXT: flat_store_dwordx4 v[9:10], v[0:3]
; GFX8-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
@@ -511,36 +464,30 @@ define amdgpu_kernel void @s_sint_to_fp_v2i64_to_v2f16(<2 x half> addrspace(1)*
; GFX6-NEXT: s_mov_b32 s3, 0xf000
; GFX6-NEXT: s_mov_b32 s2, -1
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
-; GFX6-NEXT: s_xor_b32 s8, s6, s7
-; GFX6-NEXT: s_flbit_i32 s10, s7
+; GFX6-NEXT: s_flbit_i32 s8, s7
+; GFX6-NEXT: s_xor_b32 s9, s6, s7
+; GFX6-NEXT: s_flbit_i32 s10, s5
; GFX6-NEXT: s_xor_b32 s11, s4, s5
-; GFX6-NEXT: s_flbit_i32 s12, s5
-; GFX6-NEXT: v_cmp_gt_i32_e64 s[8:9], s8, -1
-; GFX6-NEXT: v_cndmask_b32_e64 v0, 32, 33, s[8:9]
-; GFX6-NEXT: v_mov_b32_e32 v1, s10
-; GFX6-NEXT: v_cmp_gt_i32_e64 s[8:9], s11, -1
-; GFX6-NEXT: v_cndmask_b32_e64 v2, 32, 33, s[8:9]
-; GFX6-NEXT: v_mov_b32_e32 v3, s12
-; GFX6-NEXT: v_cmp_ne_u32_e64 vcc, s10, -1
-; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
-; GFX6-NEXT: v_cmp_ne_u32_e64 vcc, s12, -1
-; GFX6-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc
-; GFX6-NEXT: v_add_i32_e32 v2, vcc, -1, v0
-; GFX6-NEXT: v_sub_i32_e32 v4, vcc, 33, v0
-; GFX6-NEXT: v_add_i32_e32 v3, vcc, -1, v1
-; GFX6-NEXT: v_sub_i32_e32 v5, vcc, 33, v1
-; GFX6-NEXT: v_lshl_b64 v[0:1], s[6:7], v2
-; GFX6-NEXT: v_lshl_b64 v[2:3], s[4:5], v3
-; GFX6-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
-; GFX6-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
-; GFX6-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2
-; GFX6-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
-; GFX6-NEXT: v_or_b32_e32 v0, v1, v0
-; GFX6-NEXT: v_or_b32_e32 v1, v3, v2
-; GFX6-NEXT: v_cvt_f32_i32_e32 v0, v0
-; GFX6-NEXT: v_cvt_f32_i32_e32 v1, v1
-; GFX6-NEXT: v_ldexp_f32_e32 v0, v0, v4
-; GFX6-NEXT: v_ldexp_f32_e32 v1, v1, v5
+; GFX6-NEXT: s_add_i32 s8, s8, -1
+; GFX6-NEXT: s_ashr_i32 s9, s9, 31
+; GFX6-NEXT: s_add_i32 s10, s10, -1
+; GFX6-NEXT: s_ashr_i32 s11, s11, 31
+; GFX6-NEXT: s_add_i32 s9, s9, 32
+; GFX6-NEXT: s_add_i32 s11, s11, 32
+; GFX6-NEXT: s_min_u32 s8, s8, s9
+; GFX6-NEXT: s_min_u32 s9, s10, s11
+; GFX6-NEXT: s_lshl_b64 s[6:7], s[6:7], s8
+; GFX6-NEXT: s_sub_i32 s8, 32, s8
+; GFX6-NEXT: s_lshl_b64 s[4:5], s[4:5], s9
+; GFX6-NEXT: s_sub_i32 s9, 32, s9
+; GFX6-NEXT: s_min_u32 s6, s6, 1
+; GFX6-NEXT: s_min_u32 s4, s4, 1
+; GFX6-NEXT: s_or_b32 s6, s7, s6
+; GFX6-NEXT: s_or_b32 s4, s5, s4
+; GFX6-NEXT: v_cvt_f32_i32_e32 v0, s6
+; GFX6-NEXT: v_cvt_f32_i32_e32 v1, s4
+; GFX6-NEXT: v_ldexp_f32_e64 v0, v0, s8
+; GFX6-NEXT: v_ldexp_f32_e64 v1, v1, s9
; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0
; GFX6-NEXT: v_lshlrev_b32_e32 v0, 16, v0
; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1
@@ -553,33 +500,29 @@ define amdgpu_kernel void @s_sint_to_fp_v2i64_to_v2f16(<2 x half> addrspace(1)*
; GFX8-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34
; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
-; GFX8-NEXT: s_xor_b32 s2, s6, s7
-; GFX8-NEXT: s_cmp_gt_i32 s2, -1
-; GFX8-NEXT: s_flbit_i32 s3, s7
-; GFX8-NEXT: s_cselect_b32 s2, 33, 32
-; GFX8-NEXT: s_cmp_lg_u32 s3, -1
-; GFX8-NEXT: s_cselect_b32 s2, s3, s2
-; GFX8-NEXT: s_add_i32 s3, s2, -1
-; GFX8-NEXT: s_sub_i32 s8, 33, s2
-; GFX8-NEXT: s_lshl_b64 s[2:3], s[6:7], s3
-; GFX8-NEXT: v_cmp_ne_u32_e64 s[6:7], s2, 0
-; GFX8-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[6:7]
+; GFX8-NEXT: s_xor_b32 s3, s6, s7
+; GFX8-NEXT: s_flbit_i32 s2, s7
+; GFX8-NEXT: s_ashr_i32 s3, s3, 31
+; GFX8-NEXT: s_add_i32 s2, s2, -1
+; GFX8-NEXT: s_add_i32 s3, s3, 32
+; GFX8-NEXT: s_min_u32 s9, s2, s3
+; GFX8-NEXT: s_lshl_b64 s[2:3], s[6:7], s9
+; GFX8-NEXT: s_min_u32 s2, s2, 1
+; GFX8-NEXT: s_or_b32 s2, s3, s2
+; GFX8-NEXT: v_cvt_f32_i32_e32 v0, s2
; GFX8-NEXT: s_xor_b32 s2, s4, s5
-; GFX8-NEXT: v_or_b32_e32 v0, s3, v0
-; GFX8-NEXT: s_cmp_gt_i32 s2, -1
-; GFX8-NEXT: s_flbit_i32 s3, s5
-; GFX8-NEXT: s_cselect_b32 s2, 33, 32
-; GFX8-NEXT: s_cmp_lg_u32 s3, -1
-; GFX8-NEXT: s_cselect_b32 s6, s3, s2
-; GFX8-NEXT: s_add_i32 s2, s6, -1
-; GFX8-NEXT: s_lshl_b64 s[2:3], s[4:5], s2
-; GFX8-NEXT: v_cmp_ne_u32_e64 s[4:5], s2, 0
-; GFX8-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[4:5]
-; GFX8-NEXT: v_or_b32_e32 v1, s3, v1
-; GFX8-NEXT: v_cvt_f32_i32_e32 v0, v0
-; GFX8-NEXT: v_cvt_f32_i32_e32 v1, v1
-; GFX8-NEXT: s_sub_i32 s2, 33, s6
-; GFX8-NEXT: v_ldexp_f32 v0, v0, s8
+; GFX8-NEXT: s_flbit_i32 s8, s5
+; GFX8-NEXT: s_ashr_i32 s2, s2, 31
+; GFX8-NEXT: s_add_i32 s8, s8, -1
+; GFX8-NEXT: s_add_i32 s2, s2, 32
+; GFX8-NEXT: s_min_u32 s7, s8, s2
+; GFX8-NEXT: s_lshl_b64 s[2:3], s[4:5], s7
+; GFX8-NEXT: s_min_u32 s2, s2, 1
+; GFX8-NEXT: s_or_b32 s2, s3, s2
+; GFX8-NEXT: v_cvt_f32_i32_e32 v1, s2
+; GFX8-NEXT: s_sub_i32 s6, 32, s9
+; GFX8-NEXT: s_sub_i32 s2, 32, s7
+; GFX8-NEXT: v_ldexp_f32 v0, v0, s6
; GFX8-NEXT: v_ldexp_f32 v1, v1, s2
; GFX8-NEXT: v_cvt_f16_f32_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
; GFX8-NEXT: v_cvt_f16_f32_e32 v1, v1
@@ -618,62 +561,54 @@ define amdgpu_kernel void @v_sint_to_fp_v4i64_to_v4f16(<4 x half> addrspace(1)*
; GFX6-NEXT: v_ffbh_i32_e32 v15, v8
; GFX6-NEXT: v_xor_b32_e32 v16, v5, v6
; GFX6-NEXT: v_ffbh_i32_e32 v17, v6
-; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, -1, v0
-; GFX6-NEXT: v_cndmask_b32_e64 v0, 32, 33, vcc
-; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, -1, v12
-; GFX6-NEXT: v_cndmask_b32_e64 v12, 32, 33, vcc
-; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, -1, v14
-; GFX6-NEXT: v_cndmask_b32_e64 v14, 32, 33, vcc
-; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, -1, v16
-; GFX6-NEXT: v_cndmask_b32_e64 v16, 32, 33, vcc
-; GFX6-NEXT: v_cmp_ne_u32_e32 vcc, -1, v9
-; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v9, vcc
-; GFX6-NEXT: v_cmp_ne_u32_e32 vcc, -1, v13
-; GFX6-NEXT: v_cndmask_b32_e32 v9, v12, v13, vcc
-; GFX6-NEXT: v_cmp_ne_u32_e32 vcc, -1, v15
-; GFX6-NEXT: v_cndmask_b32_e32 v12, v14, v15, vcc
-; GFX6-NEXT: v_cmp_ne_u32_e32 vcc, -1, v17
-; GFX6-NEXT: v_cndmask_b32_e32 v13, v16, v17, vcc
-; GFX6-NEXT: v_add_i32_e32 v14, vcc, -1, v0
-; GFX6-NEXT: v_sub_i32_e32 v15, vcc, 33, v0
-; GFX6-NEXT: v_add_i32_e32 v0, vcc, -1, v9
-; GFX6-NEXT: v_sub_i32_e32 v9, vcc, 33, v9
-; GFX6-NEXT: v_add_i32_e32 v16, vcc, -1, v12
-; GFX6-NEXT: v_sub_i32_e32 v12, vcc, 33, v12
-; GFX6-NEXT: v_add_i32_e32 v17, vcc, -1, v13
-; GFX6-NEXT: v_sub_i32_e32 v13, vcc, 33, v13
-; GFX6-NEXT: v_lshl_b64 v[3:4], v[3:4], v14
-; GFX6-NEXT: v_lshl_b64 v[0:1], v[1:2], v0
-; GFX6-NEXT: v_lshl_b64 v[7:8], v[7:8], v16
-; GFX6-NEXT: v_lshl_b64 v[5:6], v[5:6], v17
-; GFX6-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3
-; GFX6-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
-; GFX6-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
-; GFX6-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
-; GFX6-NEXT: v_cmp_ne_u32_e32 vcc, 0, v7
-; GFX6-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc
-; GFX6-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5
-; GFX6-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
-; GFX6-NEXT: v_or_b32_e32 v2, v4, v2
+; GFX6-NEXT: v_ashrrev_i32_e32 v0, 31, v0
+; GFX6-NEXT: v_add_i32_e32 v9, vcc, -1, v9
+; GFX6-NEXT: v_ashrrev_i32_e32 v12, 31, v12
+; GFX6-NEXT: v_add_i32_e32 v13, vcc, -1, v13
+; GFX6-NEXT: v_ashrrev_i32_e32 v14, 31, v14
+; GFX6-NEXT: v_add_i32_e32 v15, vcc, -1, v15
+; GFX6-NEXT: v_ashrrev_i32_e32 v16, 31, v16
+; GFX6-NEXT: v_add_i32_e32 v17, vcc, -1, v17
+; GFX6-NEXT: v_add_i32_e32 v0, vcc, 32, v0
+; GFX6-NEXT: v_add_i32_e32 v12, vcc, 32, v12
+; GFX6-NEXT: v_add_i32_e32 v14, vcc, 32, v14
+; GFX6-NEXT: v_add_i32_e32 v16, vcc, 32, v16
+; GFX6-NEXT: v_min_u32_e32 v0, v9, v0
+; GFX6-NEXT: v_min_u32_e32 v9, v13, v12
+; GFX6-NEXT: v_min_u32_e32 v12, v15, v14
+; GFX6-NEXT: v_min_u32_e32 v13, v17, v16
+; GFX6-NEXT: v_lshl_b64 v[3:4], v[3:4], v0
+; GFX6-NEXT: v_sub_i32_e32 v14, vcc, 32, v0
+; GFX6-NEXT: v_lshl_b64 v[0:1], v[1:2], v9
+; GFX6-NEXT: v_sub_i32_e32 v2, vcc, 32, v9
+; GFX6-NEXT: v_lshl_b64 v[7:8], v[7:8], v12
+; GFX6-NEXT: v_sub_i32_e32 v9, vcc, 32, v12
+; GFX6-NEXT: v_lshl_b64 v[5:6], v[5:6], v13
+; GFX6-NEXT: v_sub_i32_e32 v12, vcc, 32, v13
+; GFX6-NEXT: v_min_u32_e32 v3, 1, v3
+; GFX6-NEXT: v_min_u32_e32 v0, 1, v0
+; GFX6-NEXT: v_min_u32_e32 v7, 1, v7
+; GFX6-NEXT: v_min_u32_e32 v5, 1, v5
+; GFX6-NEXT: v_or_b32_e32 v3, v4, v3
; GFX6-NEXT: v_or_b32_e32 v0, v1, v0
-; GFX6-NEXT: v_or_b32_e32 v1, v8, v3
-; GFX6-NEXT: v_or_b32_e32 v3, v6, v5
-; GFX6-NEXT: v_cvt_f32_i32_e32 v2, v2
+; GFX6-NEXT: v_or_b32_e32 v1, v8, v7
+; GFX6-NEXT: v_or_b32_e32 v4, v6, v5
+; GFX6-NEXT: v_cvt_f32_i32_e32 v3, v3
; GFX6-NEXT: v_cvt_f32_i32_e32 v0, v0
; GFX6-NEXT: v_cvt_f32_i32_e32 v1, v1
-; GFX6-NEXT: v_cvt_f32_i32_e32 v3, v3
-; GFX6-NEXT: v_ldexp_f32_e32 v2, v2, v15
-; GFX6-NEXT: v_ldexp_f32_e32 v0, v0, v9
-; GFX6-NEXT: v_ldexp_f32_e32 v1, v1, v12
-; GFX6-NEXT: v_ldexp_f32_e32 v3, v3, v13
-; GFX6-NEXT: v_cvt_f16_f32_e32 v2, v2
+; GFX6-NEXT: v_cvt_f32_i32_e32 v4, v4
+; GFX6-NEXT: v_ldexp_f32_e32 v3, v3, v14
+; GFX6-NEXT: v_ldexp_f32_e32 v0, v0, v2
+; GFX6-NEXT: v_ldexp_f32_e32 v1, v1, v9
+; GFX6-NEXT: v_ldexp_f32_e32 v2, v4, v12
+; GFX6-NEXT: v_cvt_f16_f32_e32 v3, v3
; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0
; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1
-; GFX6-NEXT: v_cvt_f16_f32_e32 v3, v3
-; GFX6-NEXT: v_lshlrev_b32_e32 v2, 16, v2
+; GFX6-NEXT: v_cvt_f16_f32_e32 v2, v2
+; GFX6-NEXT: v_lshlrev_b32_e32 v3, 16, v3
; GFX6-NEXT: v_lshlrev_b32_e32 v4, 16, v1
-; GFX6-NEXT: v_or_b32_e32 v1, v0, v2
-; GFX6-NEXT: v_or_b32_e32 v0, v3, v4
+; GFX6-NEXT: v_or_b32_e32 v1, v0, v3
+; GFX6-NEXT: v_or_b32_e32 v0, v2, v4
; GFX6-NEXT: buffer_store_dwordx2 v[0:1], v[10:11], s[0:3], 0 addr64
; GFX6-NEXT: s_endpgm
;
@@ -681,83 +616,74 @@ define amdgpu_kernel void @v_sint_to_fp_v4i64_to_v4f16(<4 x half> addrspace(1)*
; GFX8: ; %bb.0:
; GFX8-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
; GFX8-NEXT: v_lshlrev_b32_e32 v1, 5, v0
-; GFX8-NEXT: v_mov_b32_e32 v9, 0
-; GFX8-NEXT: v_lshlrev_b32_e32 v10, 3, v0
+; GFX8-NEXT: v_lshlrev_b32_e32 v9, 3, v0
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: v_mov_b32_e32 v2, s3
; GFX8-NEXT: v_add_u32_e32 v5, vcc, s2, v1
-; GFX8-NEXT: v_addc_u32_e32 v6, vcc, v2, v9, vcc
+; GFX8-NEXT: v_addc_u32_e32 v6, vcc, 0, v2, vcc
; GFX8-NEXT: v_add_u32_e32 v1, vcc, 16, v5
; GFX8-NEXT: v_addc_u32_e32 v2, vcc, 0, v6, vcc
; GFX8-NEXT: flat_load_dwordx4 v[1:4], v[1:2]
; GFX8-NEXT: flat_load_dwordx4 v[5:8], v[5:6]
-; GFX8-NEXT: v_mov_b32_e32 v11, s1
+; GFX8-NEXT: v_mov_b32_e32 v10, s1
; GFX8-NEXT: s_waitcnt vmcnt(1)
-; GFX8-NEXT: v_xor_b32_e32 v15, v3, v4
+; GFX8-NEXT: v_xor_b32_e32 v14, v3, v4
; GFX8-NEXT: s_waitcnt vmcnt(0)
; GFX8-NEXT: v_xor_b32_e32 v0, v7, v8
-; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, -1, v0
-; GFX8-NEXT: v_xor_b32_e32 v13, v5, v6
-; GFX8-NEXT: v_cndmask_b32_e64 v0, 32, 33, vcc
-; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, -1, v13
-; GFX8-NEXT: v_cndmask_b32_e64 v13, 32, 33, vcc
-; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, -1, v15
-; GFX8-NEXT: v_xor_b32_e32 v17, v1, v2
-; GFX8-NEXT: v_cndmask_b32_e64 v15, 32, 33, vcc
-; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, -1, v17
-; GFX8-NEXT: v_ffbh_i32_e32 v12, v8
-; GFX8-NEXT: v_cndmask_b32_e64 v17, 32, 33, vcc
-; GFX8-NEXT: v_cmp_ne_u32_e32 vcc, -1, v12
-; GFX8-NEXT: v_ffbh_i32_e32 v14, v6
-; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v12, vcc
-; GFX8-NEXT: v_cmp_ne_u32_e32 vcc, -1, v14
-; GFX8-NEXT: v_ffbh_i32_e32 v16, v4
-; GFX8-NEXT: v_cndmask_b32_e32 v12, v13, v14, vcc
-; GFX8-NEXT: v_cmp_ne_u32_e32 vcc, -1, v16
-; GFX8-NEXT: v_ffbh_i32_e32 v18, v2
-; GFX8-NEXT: v_cndmask_b32_e32 v13, v15, v16, vcc
-; GFX8-NEXT: v_cmp_ne_u32_e32 vcc, -1, v18
-; GFX8-NEXT: v_cndmask_b32_e32 v14, v17, v18, vcc
-; GFX8-NEXT: v_add_u32_e32 v15, vcc, -1, v0
-; GFX8-NEXT: v_sub_u32_e32 v16, vcc, 33, v0
-; GFX8-NEXT: v_add_u32_e32 v0, vcc, -1, v12
-; GFX8-NEXT: v_lshlrev_b64 v[7:8], v15, v[7:8]
-; GFX8-NEXT: v_add_u32_e32 v17, vcc, -1, v13
-; GFX8-NEXT: v_add_u32_e32 v18, vcc, -1, v14
-; GFX8-NEXT: v_lshlrev_b64 v[5:6], v0, v[5:6]
-; GFX8-NEXT: v_sub_u32_e32 v12, vcc, 33, v12
-; GFX8-NEXT: v_sub_u32_e32 v13, vcc, 33, v13
-; GFX8-NEXT: v_sub_u32_e32 v14, vcc, 33, v14
-; GFX8-NEXT: v_lshlrev_b64 v[3:4], v17, v[3:4]
-; GFX8-NEXT: v_cmp_ne_u32_e32 vcc, 0, v7
-; GFX8-NEXT: v_lshlrev_b64 v[0:1], v18, v[1:2]
-; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
-; GFX8-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5
-; GFX8-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
-; GFX8-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3
-; GFX8-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc
-; GFX8-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
-; GFX8-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; GFX8-NEXT: v_xor_b32_e32 v12, v5, v6
+; GFX8-NEXT: v_xor_b32_e32 v16, v1, v2
+; GFX8-NEXT: v_ffbh_i32_e32 v11, v8
+; GFX8-NEXT: v_ffbh_i32_e32 v13, v6
+; GFX8-NEXT: v_ffbh_i32_e32 v15, v4
+; GFX8-NEXT: v_ffbh_i32_e32 v17, v2
+; GFX8-NEXT: v_ashrrev_i32_e32 v0, 31, v0
+; GFX8-NEXT: v_ashrrev_i32_e32 v12, 31, v12
+; GFX8-NEXT: v_ashrrev_i32_e32 v14, 31, v14
+; GFX8-NEXT: v_ashrrev_i32_e32 v16, 31, v16
+; GFX8-NEXT: v_add_u32_e32 v11, vcc, -1, v11
+; GFX8-NEXT: v_add_u32_e32 v0, vcc, 32, v0
+; GFX8-NEXT: v_add_u32_e32 v13, vcc, -1, v13
+; GFX8-NEXT: v_add_u32_e32 v15, vcc, -1, v15
+; GFX8-NEXT: v_add_u32_e32 v17, vcc, -1, v17
+; GFX8-NEXT: v_add_u32_e32 v12, vcc, 32, v12
+; GFX8-NEXT: v_add_u32_e32 v14, vcc, 32, v14
+; GFX8-NEXT: v_add_u32_e32 v16, vcc, 32, v16
+; GFX8-NEXT: v_min_u32_e32 v0, v11, v0
+; GFX8-NEXT: v_min_u32_e32 v11, v13, v12
+; GFX8-NEXT: v_min_u32_e32 v12, v15, v14
+; GFX8-NEXT: v_min_u32_e32 v13, v17, v16
+; GFX8-NEXT: v_lshlrev_b64 v[5:6], v11, v[5:6]
+; GFX8-NEXT: v_lshlrev_b64 v[3:4], v12, v[3:4]
+; GFX8-NEXT: v_lshlrev_b64 v[7:8], v0, v[7:8]
+; GFX8-NEXT: v_sub_u32_e32 v14, vcc, 32, v0
+; GFX8-NEXT: v_lshlrev_b64 v[0:1], v13, v[1:2]
+; GFX8-NEXT: v_min_u32_e32 v7, 1, v7
+; GFX8-NEXT: v_min_u32_e32 v5, 1, v5
+; GFX8-NEXT: v_min_u32_e32 v3, 1, v3
+; GFX8-NEXT: v_min_u32_e32 v0, 1, v0
; GFX8-NEXT: v_or_b32_e32 v3, v4, v3
; GFX8-NEXT: v_or_b32_e32 v0, v1, v0
-; GFX8-NEXT: v_or_b32_e32 v2, v8, v2
+; GFX8-NEXT: v_or_b32_e32 v7, v8, v7
; GFX8-NEXT: v_or_b32_e32 v5, v6, v5
-; GFX8-NEXT: v_cvt_f32_i32_e32 v1, v2
-; GFX8-NEXT: v_cvt_f32_i32_e32 v2, v5
+; GFX8-NEXT: v_cvt_f32_i32_e32 v1, v7
+; GFX8-NEXT: v_cvt_f32_i32_e32 v4, v5
; GFX8-NEXT: v_cvt_f32_i32_e32 v3, v3
; GFX8-NEXT: v_cvt_f32_i32_e32 v0, v0
-; GFX8-NEXT: v_ldexp_f32 v1, v1, v16
-; GFX8-NEXT: v_ldexp_f32 v2, v2, v12
-; GFX8-NEXT: v_ldexp_f32 v3, v3, v13
-; GFX8-NEXT: v_ldexp_f32 v0, v0, v14
+; GFX8-NEXT: v_sub_u32_e32 v11, vcc, 32, v11
+; GFX8-NEXT: v_sub_u32_e32 v12, vcc, 32, v12
+; GFX8-NEXT: v_sub_u32_e32 v2, vcc, 32, v13
+; GFX8-NEXT: v_ldexp_f32 v4, v4, v11
+; GFX8-NEXT: v_ldexp_f32 v3, v3, v12
+; GFX8-NEXT: v_ldexp_f32 v0, v0, v2
+; GFX8-NEXT: v_ldexp_f32 v1, v1, v14
; GFX8-NEXT: v_cvt_f16_f32_e32 v5, v0
; GFX8-NEXT: v_cvt_f16_f32_sdwa v3, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
-; GFX8-NEXT: v_cvt_f16_f32_sdwa v4, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
-; GFX8-NEXT: v_cvt_f16_f32_e32 v2, v2
-; GFX8-NEXT: v_add_u32_e32 v0, vcc, s0, v10
-; GFX8-NEXT: v_addc_u32_e32 v1, vcc, v11, v9, vcc
+; GFX8-NEXT: v_cvt_f16_f32_sdwa v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
+; GFX8-NEXT: v_cvt_f16_f32_e32 v4, v4
+; GFX8-NEXT: v_add_u32_e32 v0, vcc, s0, v9
+; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v10, vcc
; GFX8-NEXT: v_or_b32_e32 v3, v5, v3
-; GFX8-NEXT: v_or_b32_e32 v2, v2, v4
+; GFX8-NEXT: v_or_b32_e32 v2, v4, v2
; GFX8-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
; GFX8-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
diff --git a/llvm/test/CodeGen/AMDGPU/uint_to_fp.i64.ll b/llvm/test/CodeGen/AMDGPU/uint_to_fp.i64.ll
index 59ef31d06d49..9017a5e0db46 100644
--- a/llvm/test/CodeGen/AMDGPU/uint_to_fp.i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/uint_to_fp.i64.ll
@@ -16,10 +16,9 @@ define amdgpu_kernel void @s_uint_to_fp_i64_to_f16(half addrspace(1)* %out, i64
; GFX6-NEXT: s_flbit_i32_b32 s0, s3
; GFX6-NEXT: s_min_u32 s8, s0, 32
; GFX6-NEXT: s_lshl_b64 s[0:1], s[2:3], s8
-; GFX6-NEXT: v_cmp_ne_u32_e64 s[2:3], s0, 0
-; GFX6-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[2:3]
-; GFX6-NEXT: v_or_b32_e32 v0, s1, v0
-; GFX6-NEXT: v_cvt_f32_u32_e32 v0, v0
+; GFX6-NEXT: s_min_u32 s0, s0, 1
+; GFX6-NEXT: s_or_b32 s0, s1, s0
+; GFX6-NEXT: v_cvt_f32_u32_e32 v0, s0
; GFX6-NEXT: s_sub_i32 s0, 32, s8
; GFX6-NEXT: v_ldexp_f32_e64 v0, v0, s0
; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0
@@ -31,13 +30,12 @@ define amdgpu_kernel void @s_uint_to_fp_i64_to_f16(half addrspace(1)* %out, i64
; GFX8-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: s_flbit_i32_b32 s4, s3
-; GFX8-NEXT: s_min_u32 s6, s4, 32
-; GFX8-NEXT: s_lshl_b64 s[2:3], s[2:3], s6
-; GFX8-NEXT: v_cmp_ne_u32_e64 s[4:5], s2, 0
-; GFX8-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
-; GFX8-NEXT: v_or_b32_e32 v0, s3, v0
-; GFX8-NEXT: v_cvt_f32_u32_e32 v0, v0
-; GFX8-NEXT: s_sub_i32 s2, 32, s6
+; GFX8-NEXT: s_min_u32 s4, s4, 32
+; GFX8-NEXT: s_lshl_b64 s[2:3], s[2:3], s4
+; GFX8-NEXT: s_min_u32 s2, s2, 1
+; GFX8-NEXT: s_or_b32 s2, s3, s2
+; GFX8-NEXT: v_cvt_f32_u32_e32 v0, s2
+; GFX8-NEXT: s_sub_i32 s2, 32, s4
; GFX8-NEXT: v_mov_b32_e32 v1, s1
; GFX8-NEXT: v_ldexp_f32 v0, v0, s2
; GFX8-NEXT: v_cvt_f16_f32_e32 v2, v0
@@ -66,8 +64,7 @@ define amdgpu_kernel void @v_uint_to_fp_i64_to_f16(half addrspace(1)* %out, i64
; GFX6-NEXT: v_ffbh_u32_e32 v0, v4
; GFX6-NEXT: v_min_u32_e32 v0, 32, v0
; GFX6-NEXT: v_lshl_b64 v[3:4], v[3:4], v0
-; GFX6-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3
-; GFX6-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc
+; GFX6-NEXT: v_min_u32_e32 v3, 1, v3
; GFX6-NEXT: v_or_b32_e32 v3, v4, v3
; GFX6-NEXT: v_cvt_f32_u32_e32 v3, v3
; GFX6-NEXT: v_sub_i32_e32 v0, vcc, 32, v0
@@ -80,28 +77,26 @@ define amdgpu_kernel void @v_uint_to_fp_i64_to_f16(half addrspace(1)* %out, i64
; GFX8: ; %bb.0:
; GFX8-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
; GFX8-NEXT: v_lshlrev_b32_e32 v1, 3, v0
-; GFX8-NEXT: v_mov_b32_e32 v3, 0
; GFX8-NEXT: v_lshlrev_b32_e32 v0, 1, v0
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: v_mov_b32_e32 v2, s3
; GFX8-NEXT: v_add_u32_e32 v1, vcc, s2, v1
-; GFX8-NEXT: v_addc_u32_e32 v2, vcc, v2, v3, vcc
+; GFX8-NEXT: v_addc_u32_e32 v2, vcc, 0, v2, vcc
; GFX8-NEXT: flat_load_dwordx2 v[1:2], v[1:2]
; GFX8-NEXT: s_waitcnt vmcnt(0)
-; GFX8-NEXT: v_ffbh_u32_e32 v4, v2
-; GFX8-NEXT: v_min_u32_e32 v4, 32, v4
-; GFX8-NEXT: v_lshlrev_b64 v[1:2], v4, v[1:2]
-; GFX8-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1
-; GFX8-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; GFX8-NEXT: v_ffbh_u32_e32 v3, v2
+; GFX8-NEXT: v_min_u32_e32 v3, 32, v3
+; GFX8-NEXT: v_lshlrev_b64 v[1:2], v3, v[1:2]
+; GFX8-NEXT: v_sub_u32_e32 v3, vcc, 32, v3
+; GFX8-NEXT: v_min_u32_e32 v1, 1, v1
; GFX8-NEXT: v_or_b32_e32 v1, v2, v1
; GFX8-NEXT: v_cvt_f32_u32_e32 v1, v1
-; GFX8-NEXT: v_sub_u32_e32 v4, vcc, 32, v4
; GFX8-NEXT: v_mov_b32_e32 v2, s1
; GFX8-NEXT: v_add_u32_e32 v0, vcc, s0, v0
-; GFX8-NEXT: v_ldexp_f32 v1, v1, v4
-; GFX8-NEXT: v_cvt_f16_f32_e32 v4, v1
-; GFX8-NEXT: v_addc_u32_e32 v1, vcc, v2, v3, vcc
-; GFX8-NEXT: flat_store_short v[0:1], v4
+; GFX8-NEXT: v_ldexp_f32 v1, v1, v3
+; GFX8-NEXT: v_cvt_f16_f32_e32 v3, v1
+; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v2, vcc
+; GFX8-NEXT: flat_store_short v[0:1], v3
; GFX8-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
@@ -124,10 +119,9 @@ define amdgpu_kernel void @s_uint_to_fp_i64_to_f32(float addrspace(1)* %out, i64
; GFX6-NEXT: s_flbit_i32_b32 s0, s3
; GFX6-NEXT: s_min_u32 s8, s0, 32
; GFX6-NEXT: s_lshl_b64 s[0:1], s[2:3], s8
-; GFX6-NEXT: v_cmp_ne_u32_e64 s[2:3], s0, 0
-; GFX6-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[2:3]
-; GFX6-NEXT: v_or_b32_e32 v0, s1, v0
-; GFX6-NEXT: v_cvt_f32_u32_e32 v0, v0
+; GFX6-NEXT: s_min_u32 s0, s0, 1
+; GFX6-NEXT: s_or_b32 s0, s1, s0
+; GFX6-NEXT: v_cvt_f32_u32_e32 v0, s0
; GFX6-NEXT: s_sub_i32 s0, 32, s8
; GFX6-NEXT: v_ldexp_f32_e64 v0, v0, s0
; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0
@@ -138,14 +132,13 @@ define amdgpu_kernel void @s_uint_to_fp_i64_to_f32(float addrspace(1)* %out, i64
; GFX8-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: s_flbit_i32_b32 s4, s3
-; GFX8-NEXT: s_min_u32 s6, s4, 32
-; GFX8-NEXT: s_lshl_b64 s[2:3], s[2:3], s6
-; GFX8-NEXT: v_cmp_ne_u32_e64 s[4:5], s2, 0
-; GFX8-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
-; GFX8-NEXT: v_or_b32_e32 v0, s3, v0
-; GFX8-NEXT: v_cvt_f32_u32_e32 v2, v0
+; GFX8-NEXT: s_min_u32 s4, s4, 32
+; GFX8-NEXT: s_lshl_b64 s[2:3], s[2:3], s4
+; GFX8-NEXT: s_min_u32 s2, s2, 1
+; GFX8-NEXT: s_or_b32 s2, s3, s2
+; GFX8-NEXT: v_cvt_f32_u32_e32 v2, s2
; GFX8-NEXT: v_mov_b32_e32 v0, s0
-; GFX8-NEXT: s_sub_i32 s0, 32, s6
+; GFX8-NEXT: s_sub_i32 s0, 32, s4
; GFX8-NEXT: v_mov_b32_e32 v1, s1
; GFX8-NEXT: v_ldexp_f32 v2, v2, s0
; GFX8-NEXT: flat_store_dword v[0:1], v2
@@ -172,8 +165,7 @@ define amdgpu_kernel void @v_uint_to_fp_i64_to_f32(float addrspace(1)* %out, i64
; GFX6-NEXT: v_ffbh_u32_e32 v0, v4
; GFX6-NEXT: v_min_u32_e32 v0, 32, v0
; GFX6-NEXT: v_lshl_b64 v[3:4], v[3:4], v0
-; GFX6-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3
-; GFX6-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc
+; GFX6-NEXT: v_min_u32_e32 v3, 1, v3
; GFX6-NEXT: v_or_b32_e32 v3, v4, v3
; GFX6-NEXT: v_cvt_f32_u32_e32 v3, v3
; GFX6-NEXT: v_sub_i32_e32 v0, vcc, 32, v0
@@ -185,26 +177,24 @@ define amdgpu_kernel void @v_uint_to_fp_i64_to_f32(float addrspace(1)* %out, i64
; GFX8: ; %bb.0:
; GFX8-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
; GFX8-NEXT: v_lshlrev_b32_e32 v1, 3, v0
-; GFX8-NEXT: v_mov_b32_e32 v3, 0
-; GFX8-NEXT: v_lshlrev_b32_e32 v4, 2, v0
+; GFX8-NEXT: v_lshlrev_b32_e32 v3, 2, v0
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: v_mov_b32_e32 v2, s3
; GFX8-NEXT: v_add_u32_e32 v1, vcc, s2, v1
-; GFX8-NEXT: v_addc_u32_e32 v2, vcc, v2, v3, vcc
+; GFX8-NEXT: v_addc_u32_e32 v2, vcc, 0, v2, vcc
; GFX8-NEXT: flat_load_dwordx2 v[1:2], v[1:2]
; GFX8-NEXT: s_waitcnt vmcnt(0)
; GFX8-NEXT: v_ffbh_u32_e32 v0, v2
-; GFX8-NEXT: v_min_u32_e32 v5, 32, v0
-; GFX8-NEXT: v_lshlrev_b64 v[0:1], v5, v[1:2]
+; GFX8-NEXT: v_min_u32_e32 v4, 32, v0
+; GFX8-NEXT: v_lshlrev_b64 v[0:1], v4, v[1:2]
; GFX8-NEXT: v_mov_b32_e32 v2, s1
-; GFX8-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
-; GFX8-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; GFX8-NEXT: v_min_u32_e32 v0, 1, v0
; GFX8-NEXT: v_or_b32_e32 v0, v1, v0
-; GFX8-NEXT: v_cvt_f32_u32_e32 v6, v0
-; GFX8-NEXT: v_add_u32_e32 v0, vcc, s0, v4
-; GFX8-NEXT: v_addc_u32_e32 v1, vcc, v2, v3, vcc
-; GFX8-NEXT: v_sub_u32_e32 v2, vcc, 32, v5
-; GFX8-NEXT: v_ldexp_f32 v2, v6, v2
+; GFX8-NEXT: v_cvt_f32_u32_e32 v5, v0
+; GFX8-NEXT: v_add_u32_e32 v0, vcc, s0, v3
+; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v2, vcc
+; GFX8-NEXT: v_sub_u32_e32 v2, vcc, 32, v4
+; GFX8-NEXT: v_ldexp_f32 v2, v5, v2
; GFX8-NEXT: flat_store_dword v[0:1], v2
; GFX8-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
@@ -229,19 +219,17 @@ define amdgpu_kernel void @s_uint_to_fp_v2i64_to_v2f32(<2 x float> addrspace(1)*
; GFX6-NEXT: s_min_u32 s8, s8, 32
; GFX6-NEXT: s_min_u32 s9, s9, 32
; GFX6-NEXT: s_lshl_b64 s[6:7], s[6:7], s8
-; GFX6-NEXT: s_sub_i32 s10, 32, s8
+; GFX6-NEXT: s_sub_i32 s8, 32, s8
; GFX6-NEXT: s_lshl_b64 s[4:5], s[4:5], s9
-; GFX6-NEXT: s_sub_i32 s11, 32, s9
-; GFX6-NEXT: v_cmp_ne_u32_e64 s[8:9], s6, 0
-; GFX6-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[8:9]
-; GFX6-NEXT: v_cmp_ne_u32_e64 s[8:9], s4, 0
-; GFX6-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[8:9]
-; GFX6-NEXT: v_or_b32_e32 v0, s7, v0
-; GFX6-NEXT: v_or_b32_e32 v1, s5, v1
-; GFX6-NEXT: v_cvt_f32_u32_e32 v0, v0
-; GFX6-NEXT: v_cvt_f32_u32_e32 v2, v1
-; GFX6-NEXT: v_ldexp_f32_e64 v1, v0, s10
-; GFX6-NEXT: v_ldexp_f32_e64 v0, v2, s11
+; GFX6-NEXT: s_sub_i32 s9, 32, s9
+; GFX6-NEXT: s_min_u32 s6, s6, 1
+; GFX6-NEXT: s_min_u32 s4, s4, 1
+; GFX6-NEXT: s_or_b32 s6, s7, s6
+; GFX6-NEXT: s_or_b32 s4, s5, s4
+; GFX6-NEXT: v_cvt_f32_u32_e32 v0, s6
+; GFX6-NEXT: v_cvt_f32_u32_e32 v2, s4
+; GFX6-NEXT: v_ldexp_f32_e64 v1, v0, s8
+; GFX6-NEXT: v_ldexp_f32_e64 v0, v2, s9
; GFX6-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
; GFX6-NEXT: s_endpgm
;
@@ -251,22 +239,20 @@ define amdgpu_kernel void @s_uint_to_fp_v2i64_to_v2f32(<2 x float> addrspace(1)*
; GFX8-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x34
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: s_flbit_i32_b32 s6, s3
-; GFX8-NEXT: s_min_u32 s8, s6, 32
; GFX8-NEXT: s_flbit_i32_b32 s7, s1
-; GFX8-NEXT: s_lshl_b64 s[2:3], s[2:3], s8
-; GFX8-NEXT: s_min_u32 s9, s7, 32
-; GFX8-NEXT: v_cmp_ne_u32_e64 s[6:7], s2, 0
-; GFX8-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[6:7]
-; GFX8-NEXT: s_lshl_b64 s[0:1], s[0:1], s9
-; GFX8-NEXT: v_or_b32_e32 v0, s3, v0
-; GFX8-NEXT: v_cmp_ne_u32_e64 s[2:3], s0, 0
-; GFX8-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[2:3]
-; GFX8-NEXT: v_or_b32_e32 v1, s1, v1
-; GFX8-NEXT: v_cvt_f32_u32_e32 v0, v0
-; GFX8-NEXT: v_cvt_f32_u32_e32 v2, v1
-; GFX8-NEXT: s_sub_i32 s0, 32, s8
+; GFX8-NEXT: s_min_u32 s6, s6, 32
+; GFX8-NEXT: s_lshl_b64 s[2:3], s[2:3], s6
+; GFX8-NEXT: s_min_u32 s7, s7, 32
+; GFX8-NEXT: s_lshl_b64 s[0:1], s[0:1], s7
+; GFX8-NEXT: s_min_u32 s2, s2, 1
+; GFX8-NEXT: s_min_u32 s0, s0, 1
+; GFX8-NEXT: s_or_b32 s2, s3, s2
+; GFX8-NEXT: s_or_b32 s0, s1, s0
+; GFX8-NEXT: v_cvt_f32_u32_e32 v0, s2
+; GFX8-NEXT: v_cvt_f32_u32_e32 v2, s0
+; GFX8-NEXT: s_sub_i32 s0, 32, s6
; GFX8-NEXT: v_ldexp_f32 v1, v0, s0
-; GFX8-NEXT: s_sub_i32 s0, 32, s9
+; GFX8-NEXT: s_sub_i32 s0, 32, s7
; GFX8-NEXT: v_ldexp_f32 v0, v2, s0
; GFX8-NEXT: v_mov_b32_e32 v2, s4
; GFX8-NEXT: v_mov_b32_e32 v3, s5
@@ -310,14 +296,10 @@ define amdgpu_kernel void @v_uint_to_fp_v4i64_to_v4f32(<4 x float> addrspace(1)*
; GFX6-NEXT: v_sub_i32_e32 v9, vcc, 32, v12
; GFX6-NEXT: v_lshl_b64 v[5:6], v[5:6], v13
; GFX6-NEXT: v_sub_i32_e32 v12, vcc, 32, v13
-; GFX6-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3
-; GFX6-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc
-; GFX6-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
-; GFX6-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
-; GFX6-NEXT: v_cmp_ne_u32_e32 vcc, 0, v7
-; GFX6-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
-; GFX6-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5
-; GFX6-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
+; GFX6-NEXT: v_min_u32_e32 v3, 1, v3
+; GFX6-NEXT: v_min_u32_e32 v0, 1, v0
+; GFX6-NEXT: v_min_u32_e32 v7, 1, v7
+; GFX6-NEXT: v_min_u32_e32 v5, 1, v5
; GFX6-NEXT: v_or_b32_e32 v3, v4, v3
; GFX6-NEXT: v_or_b32_e32 v0, v1, v0
; GFX6-NEXT: v_or_b32_e32 v1, v8, v7
@@ -337,19 +319,18 @@ define amdgpu_kernel void @v_uint_to_fp_v4i64_to_v4f32(<4 x float> addrspace(1)*
; GFX8: ; %bb.0:
; GFX8-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
; GFX8-NEXT: v_lshlrev_b32_e32 v1, 5, v0
-; GFX8-NEXT: v_mov_b32_e32 v10, 0
; GFX8-NEXT: v_lshlrev_b32_e32 v0, 4, v0
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: v_mov_b32_e32 v2, s3
; GFX8-NEXT: v_add_u32_e32 v5, vcc, s2, v1
-; GFX8-NEXT: v_addc_u32_e32 v6, vcc, v2, v10, vcc
+; GFX8-NEXT: v_addc_u32_e32 v6, vcc, 0, v2, vcc
; GFX8-NEXT: v_add_u32_e32 v1, vcc, 16, v5
; GFX8-NEXT: v_addc_u32_e32 v2, vcc, 0, v6, vcc
; GFX8-NEXT: flat_load_dwordx4 v[1:4], v[1:2]
; GFX8-NEXT: flat_load_dwordx4 v[5:8], v[5:6]
; GFX8-NEXT: v_add_u32_e32 v9, vcc, s0, v0
-; GFX8-NEXT: v_mov_b32_e32 v11, s1
-; GFX8-NEXT: v_addc_u32_e32 v10, vcc, v11, v10, vcc
+; GFX8-NEXT: v_mov_b32_e32 v10, s1
+; GFX8-NEXT: v_addc_u32_e32 v10, vcc, 0, v10, vcc
; GFX8-NEXT: s_waitcnt vmcnt(1)
; GFX8-NEXT: v_ffbh_u32_e32 v12, v4
; GFX8-NEXT: s_waitcnt vmcnt(0)
@@ -360,33 +341,29 @@ define amdgpu_kernel void @v_uint_to_fp_v4i64_to_v4f32(<4 x float> addrspace(1)*
; GFX8-NEXT: v_min_u32_e32 v11, 32, v11
; GFX8-NEXT: v_min_u32_e32 v12, 32, v12
; GFX8-NEXT: v_min_u32_e32 v13, 32, v13
-; GFX8-NEXT: v_lshlrev_b64 v[7:8], v0, v[7:8]
-; GFX8-NEXT: v_sub_u32_e32 v14, vcc, 32, v0
; GFX8-NEXT: v_lshlrev_b64 v[5:6], v11, v[5:6]
; GFX8-NEXT: v_lshlrev_b64 v[3:4], v12, v[3:4]
+; GFX8-NEXT: v_lshlrev_b64 v[7:8], v0, v[7:8]
+; GFX8-NEXT: v_sub_u32_e32 v14, vcc, 32, v0
; GFX8-NEXT: v_lshlrev_b64 v[0:1], v13, v[1:2]
-; GFX8-NEXT: v_sub_u32_e32 v11, vcc, 32, v11
-; GFX8-NEXT: v_sub_u32_e32 v12, vcc, 32, v12
-; GFX8-NEXT: v_sub_u32_e32 v2, vcc, 32, v13
-; GFX8-NEXT: v_cmp_ne_u32_e32 vcc, 0, v7
-; GFX8-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
-; GFX8-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5
-; GFX8-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
-; GFX8-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3
-; GFX8-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc
-; GFX8-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
-; GFX8-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; GFX8-NEXT: v_min_u32_e32 v7, 1, v7
+; GFX8-NEXT: v_min_u32_e32 v5, 1, v5
+; GFX8-NEXT: v_min_u32_e32 v3, 1, v3
+; GFX8-NEXT: v_min_u32_e32 v0, 1, v0
; GFX8-NEXT: v_or_b32_e32 v3, v4, v3
; GFX8-NEXT: v_or_b32_e32 v5, v6, v5
; GFX8-NEXT: v_or_b32_e32 v7, v8, v7
; GFX8-NEXT: v_or_b32_e32 v0, v1, v0
; GFX8-NEXT: v_cvt_f32_u32_e32 v1, v7
-; GFX8-NEXT: v_cvt_f32_u32_e32 v3, v3
; GFX8-NEXT: v_cvt_f32_u32_e32 v4, v5
+; GFX8-NEXT: v_cvt_f32_u32_e32 v3, v3
; GFX8-NEXT: v_cvt_f32_u32_e32 v5, v0
+; GFX8-NEXT: v_sub_u32_e32 v2, vcc, 32, v13
+; GFX8-NEXT: v_sub_u32_e32 v11, vcc, 32, v11
+; GFX8-NEXT: v_sub_u32_e32 v12, vcc, 32, v12
; GFX8-NEXT: v_ldexp_f32 v1, v1, v14
-; GFX8-NEXT: v_ldexp_f32 v3, v3, v12
; GFX8-NEXT: v_ldexp_f32 v0, v4, v11
+; GFX8-NEXT: v_ldexp_f32 v3, v3, v12
; GFX8-NEXT: v_ldexp_f32 v2, v5, v2
; GFX8-NEXT: flat_store_dwordx4 v[9:10], v[0:3]
; GFX8-NEXT: s_endpgm
@@ -412,19 +389,17 @@ define amdgpu_kernel void @s_uint_to_fp_v2i64_to_v2f16(<2 x half> addrspace(1)*
; GFX6-NEXT: s_min_u32 s8, s8, 32
; GFX6-NEXT: s_min_u32 s9, s9, 32
; GFX6-NEXT: s_lshl_b64 s[6:7], s[6:7], s8
-; GFX6-NEXT: s_sub_i32 s10, 32, s8
+; GFX6-NEXT: s_sub_i32 s8, 32, s8
; GFX6-NEXT: s_lshl_b64 s[4:5], s[4:5], s9
-; GFX6-NEXT: s_sub_i32 s11, 32, s9
-; GFX6-NEXT: v_cmp_ne_u32_e64 s[8:9], s6, 0
-; GFX6-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[8:9]
-; GFX6-NEXT: v_cmp_ne_u32_e64 s[8:9], s4, 0
-; GFX6-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[8:9]
-; GFX6-NEXT: v_or_b32_e32 v0, s7, v0
-; GFX6-NEXT: v_or_b32_e32 v1, s5, v1
-; GFX6-NEXT: v_cvt_f32_u32_e32 v0, v0
-; GFX6-NEXT: v_cvt_f32_u32_e32 v1, v1
-; GFX6-NEXT: v_ldexp_f32_e64 v0, v0, s10
-; GFX6-NEXT: v_ldexp_f32_e64 v1, v1, s11
+; GFX6-NEXT: s_sub_i32 s9, 32, s9
+; GFX6-NEXT: s_min_u32 s6, s6, 1
+; GFX6-NEXT: s_min_u32 s4, s4, 1
+; GFX6-NEXT: s_or_b32 s6, s7, s6
+; GFX6-NEXT: s_or_b32 s4, s5, s4
+; GFX6-NEXT: v_cvt_f32_u32_e32 v0, s6
+; GFX6-NEXT: v_cvt_f32_u32_e32 v1, s4
+; GFX6-NEXT: v_ldexp_f32_e64 v0, v0, s8
+; GFX6-NEXT: v_ldexp_f32_e64 v1, v1, s9
; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0
; GFX6-NEXT: v_lshlrev_b32_e32 v0, 16, v0
; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1
@@ -442,18 +417,16 @@ define amdgpu_kernel void @s_uint_to_fp_v2i64_to_v2f16(<2 x half> addrspace(1)*
; GFX8-NEXT: s_min_u32 s8, s2, 32
; GFX8-NEXT: s_min_u32 s9, s3, 32
; GFX8-NEXT: s_lshl_b64 s[2:3], s[6:7], s8
-; GFX8-NEXT: v_cmp_ne_u32_e64 s[6:7], s2, 0
-; GFX8-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[6:7]
-; GFX8-NEXT: v_or_b32_e32 v0, s3, v0
-; GFX8-NEXT: s_lshl_b64 s[2:3], s[4:5], s9
-; GFX8-NEXT: v_cmp_ne_u32_e64 s[4:5], s2, 0
-; GFX8-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[4:5]
-; GFX8-NEXT: v_or_b32_e32 v1, s3, v1
-; GFX8-NEXT: v_cvt_f32_u32_e32 v0, v0
-; GFX8-NEXT: v_cvt_f32_u32_e32 v1, v1
-; GFX8-NEXT: s_sub_i32 s8, 32, s8
+; GFX8-NEXT: s_min_u32 s2, s2, 1
+; GFX8-NEXT: s_or_b32 s2, s3, s2
+; GFX8-NEXT: s_lshl_b64 s[4:5], s[4:5], s9
+; GFX8-NEXT: v_cvt_f32_u32_e32 v0, s2
+; GFX8-NEXT: s_min_u32 s2, s4, 1
+; GFX8-NEXT: s_or_b32 s2, s5, s2
+; GFX8-NEXT: v_cvt_f32_u32_e32 v1, s2
+; GFX8-NEXT: s_sub_i32 s6, 32, s8
; GFX8-NEXT: s_sub_i32 s2, 32, s9
-; GFX8-NEXT: v_ldexp_f32 v0, v0, s8
+; GFX8-NEXT: v_ldexp_f32 v0, v0, s6
; GFX8-NEXT: v_ldexp_f32 v1, v1, s2
; GFX8-NEXT: v_cvt_f16_f32_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
; GFX8-NEXT: v_cvt_f16_f32_e32 v1, v1
@@ -500,14 +473,10 @@ define amdgpu_kernel void @v_uint_to_fp_v4i64_to_v4f16(<4 x half> addrspace(1)*
; GFX6-NEXT: v_sub_i32_e32 v9, vcc, 32, v12
; GFX6-NEXT: v_lshl_b64 v[5:6], v[5:6], v13
; GFX6-NEXT: v_sub_i32_e32 v12, vcc, 32, v13
-; GFX6-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3
-; GFX6-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc
-; GFX6-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
-; GFX6-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
-; GFX6-NEXT: v_cmp_ne_u32_e32 vcc, 0, v7
-; GFX6-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
-; GFX6-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5
-; GFX6-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
+; GFX6-NEXT: v_min_u32_e32 v3, 1, v3
+; GFX6-NEXT: v_min_u32_e32 v0, 1, v0
+; GFX6-NEXT: v_min_u32_e32 v7, 1, v7
+; GFX6-NEXT: v_min_u32_e32 v5, 1, v5
; GFX6-NEXT: v_or_b32_e32 v3, v4, v3
; GFX6-NEXT: v_or_b32_e32 v0, v1, v0
; GFX6-NEXT: v_or_b32_e32 v1, v8, v7
@@ -535,43 +504,35 @@ define amdgpu_kernel void @v_uint_to_fp_v4i64_to_v4f16(<4 x half> addrspace(1)*
; GFX8: ; %bb.0:
; GFX8-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
; GFX8-NEXT: v_lshlrev_b32_e32 v1, 5, v0
-; GFX8-NEXT: v_mov_b32_e32 v9, 0
-; GFX8-NEXT: v_lshlrev_b32_e32 v10, 3, v0
+; GFX8-NEXT: v_lshlrev_b32_e32 v9, 3, v0
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: v_mov_b32_e32 v2, s3
; GFX8-NEXT: v_add_u32_e32 v5, vcc, s2, v1
-; GFX8-NEXT: v_addc_u32_e32 v6, vcc, v2, v9, vcc
+; GFX8-NEXT: v_addc_u32_e32 v6, vcc, 0, v2, vcc
; GFX8-NEXT: v_add_u32_e32 v1, vcc, 16, v5
; GFX8-NEXT: v_addc_u32_e32 v2, vcc, 0, v6, vcc
; GFX8-NEXT: flat_load_dwordx4 v[1:4], v[1:2]
; GFX8-NEXT: flat_load_dwordx4 v[5:8], v[5:6]
-; GFX8-NEXT: v_mov_b32_e32 v11, s1
+; GFX8-NEXT: v_mov_b32_e32 v10, s1
; GFX8-NEXT: s_waitcnt vmcnt(1)
-; GFX8-NEXT: v_ffbh_u32_e32 v13, v4
+; GFX8-NEXT: v_ffbh_u32_e32 v12, v4
; GFX8-NEXT: s_waitcnt vmcnt(0)
; GFX8-NEXT: v_ffbh_u32_e32 v0, v8
-; GFX8-NEXT: v_ffbh_u32_e32 v12, v6
-; GFX8-NEXT: v_ffbh_u32_e32 v14, v2
+; GFX8-NEXT: v_ffbh_u32_e32 v11, v6
+; GFX8-NEXT: v_ffbh_u32_e32 v13, v2
; GFX8-NEXT: v_min_u32_e32 v0, 32, v0
+; GFX8-NEXT: v_min_u32_e32 v11, 32, v11
; GFX8-NEXT: v_min_u32_e32 v12, 32, v12
; GFX8-NEXT: v_min_u32_e32 v13, 32, v13
-; GFX8-NEXT: v_min_u32_e32 v14, 32, v14
+; GFX8-NEXT: v_lshlrev_b64 v[5:6], v11, v[5:6]
+; GFX8-NEXT: v_lshlrev_b64 v[3:4], v12, v[3:4]
; GFX8-NEXT: v_lshlrev_b64 v[7:8], v0, v[7:8]
-; GFX8-NEXT: v_sub_u32_e32 v15, vcc, 32, v0
-; GFX8-NEXT: v_lshlrev_b64 v[5:6], v12, v[5:6]
-; GFX8-NEXT: v_lshlrev_b64 v[3:4], v13, v[3:4]
-; GFX8-NEXT: v_lshlrev_b64 v[0:1], v14, v[1:2]
-; GFX8-NEXT: v_sub_u32_e32 v12, vcc, 32, v12
-; GFX8-NEXT: v_sub_u32_e32 v13, vcc, 32, v13
-; GFX8-NEXT: v_sub_u32_e32 v2, vcc, 32, v14
-; GFX8-NEXT: v_cmp_ne_u32_e32 vcc, 0, v7
-; GFX8-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
-; GFX8-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5
-; GFX8-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
-; GFX8-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3
-; GFX8-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc
-; GFX8-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
-; GFX8-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; GFX8-NEXT: v_sub_u32_e32 v14, vcc, 32, v0
+; GFX8-NEXT: v_lshlrev_b64 v[0:1], v13, v[1:2]
+; GFX8-NEXT: v_min_u32_e32 v7, 1, v7
+; GFX8-NEXT: v_min_u32_e32 v5, 1, v5
+; GFX8-NEXT: v_min_u32_e32 v3, 1, v3
+; GFX8-NEXT: v_min_u32_e32 v0, 1, v0
; GFX8-NEXT: v_or_b32_e32 v3, v4, v3
; GFX8-NEXT: v_or_b32_e32 v0, v1, v0
; GFX8-NEXT: v_or_b32_e32 v7, v8, v7
@@ -580,16 +541,19 @@ define amdgpu_kernel void @v_uint_to_fp_v4i64_to_v4f16(<4 x half> addrspace(1)*
; GFX8-NEXT: v_cvt_f32_u32_e32 v4, v5
; GFX8-NEXT: v_cvt_f32_u32_e32 v3, v3
; GFX8-NEXT: v_cvt_f32_u32_e32 v0, v0
-; GFX8-NEXT: v_ldexp_f32 v1, v1, v15
-; GFX8-NEXT: v_ldexp_f32 v4, v4, v12
-; GFX8-NEXT: v_ldexp_f32 v3, v3, v13
+; GFX8-NEXT: v_sub_u32_e32 v11, vcc, 32, v11
+; GFX8-NEXT: v_sub_u32_e32 v12, vcc, 32, v12
+; GFX8-NEXT: v_sub_u32_e32 v2, vcc, 32, v13
+; GFX8-NEXT: v_ldexp_f32 v4, v4, v11
+; GFX8-NEXT: v_ldexp_f32 v3, v3, v12
; GFX8-NEXT: v_ldexp_f32 v0, v0, v2
+; GFX8-NEXT: v_ldexp_f32 v1, v1, v14
; GFX8-NEXT: v_cvt_f16_f32_e32 v5, v0
; GFX8-NEXT: v_cvt_f16_f32_sdwa v3, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
; GFX8-NEXT: v_cvt_f16_f32_sdwa v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
; GFX8-NEXT: v_cvt_f16_f32_e32 v4, v4
-; GFX8-NEXT: v_add_u32_e32 v0, vcc, s0, v10
-; GFX8-NEXT: v_addc_u32_e32 v1, vcc, v11, v9, vcc
+; GFX8-NEXT: v_add_u32_e32 v0, vcc, s0, v9
+; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v10, vcc
; GFX8-NEXT: v_or_b32_e32 v3, v5, v3
; GFX8-NEXT: v_or_b32_e32 v2, v4, v2
; GFX8-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
More information about the llvm-commits
mailing list