[llvm] 05783e1 - [amdgpu] Revise the conversion from i64 to f32.

Michael Liao via llvm-commits llvm-commits at lists.llvm.org
Fri Aug 6 14:02:01 PDT 2021


Author: Michael Liao
Date: 2021-08-06T17:01:47-04:00
New Revision: 05783e1cfe40ce20b080c9fce1b148515c027fa4

URL: https://github.com/llvm/llvm-project/commit/05783e1cfe40ce20b080c9fce1b148515c027fa4
DIFF: https://github.com/llvm/llvm-project/commit/05783e1cfe40ce20b080c9fce1b148515c027fa4.diff

LOG: [amdgpu] Revise the conversion from i64 to f32.

- Replace 'cmp+sel' with 'umin' if possible.

Reviewed By: foad

Differential Revision: https://reviews.llvm.org/D107507

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
    llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
    llvm/test/CodeGen/AMDGPU/GlobalISel/cvt_f32_ubyte.ll
    llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sitofp.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uitofp.mir
    llvm/test/CodeGen/AMDGPU/sint_to_fp.i64.ll
    llvm/test/CodeGen/AMDGPU/uint_to_fp.i64.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index e8a46e050974..9fa3ad64d0be 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -2457,10 +2457,6 @@ SDValue AMDGPUTargetLowering::LowerINT_TO_FP32(SDValue Op, SelectionDAG &DAG,
   SDLoc SL(Op);
   SDValue Src = Op.getOperand(0);
 
-  EVT SetCCVT =
-      getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), MVT::i32);
-  SDValue ZeroI32 = DAG.getConstant(0, SL, MVT::i32);
-
   SDValue Lo, Hi;
   std::tie(Lo, Hi) = split64BitValue(Src, DAG);
   SDValue Sign;
@@ -2468,25 +2464,38 @@ SDValue AMDGPUTargetLowering::LowerINT_TO_FP32(SDValue Op, SelectionDAG &DAG,
   if (Signed && Subtarget->isGCN()) {
     // We also need to consider the sign bit in Lo if Hi has just sign bits,
     // i.e. Hi is 0 or -1. However, that only needs to take the MSB into
-    // account.
-    SDValue HasSameSign =
-        DAG.getSetCC(SL, SetCCVT, DAG.getNode(ISD::XOR, SL, MVT::i32, Lo, Hi),
-                     ZeroI32, ISD::SETGE);
-    SDValue MaxShAmt = DAG.getSelect(SL, MVT::i32, HasSameSign,
-                                     DAG.getConstant(33, SL, MVT::i32),
-                                     DAG.getConstant(32, SL, MVT::i32));
+    // account. That is, the maximal shift is
+    // - 32 if Lo and Hi have opposite signs;
+    // - 33 if Lo and Hi have the same sign.
+    //
+    // Or, MaxShAmt = 33 + OppositeSign, where
+    //
+    // OppositeSign is defined as ((Lo ^ Hi) >> 31), which is
+    // - -1 if Lo and Hi have opposite signs; and
+    // -  0 otherwise.
+    //
+    // All in all, ShAmt is calculated as
+    //
+    //  umin(sffbh(Hi), 33 + (Lo^Hi)>>31) - 1.
+    //
+    // or
+    //
+    //  umin(sffbh(Hi) - 1, 32 + (Lo^Hi)>>31).
+    //
+    // to reduce the critical path.
+    SDValue OppositeSign = DAG.getNode(
+        ISD::SRA, SL, MVT::i32, DAG.getNode(ISD::XOR, SL, MVT::i32, Lo, Hi),
+        DAG.getConstant(31, SL, MVT::i32));
+    SDValue MaxShAmt =
+        DAG.getNode(ISD::ADD, SL, MVT::i32, DAG.getConstant(32, SL, MVT::i32),
+                    OppositeSign);
     // Count the leading sign bits.
     ShAmt = DAG.getNode(AMDGPUISD::FFBH_I32, SL, MVT::i32, Hi);
-    ShAmt = DAG.getSelect(SL, MVT::i32,
-                          DAG.getSetCC(SL, SetCCVT, ShAmt,
-                                       DAG.getAllOnesConstant(SL, MVT::i32),
-                                       ISD::SETNE),
-                          ShAmt, MaxShAmt);
-    // The shift amount for signed integers is [1, 33].
     // Different from unsigned conversion, the shift should be one bit less to
     // preserve the sign bit.
     ShAmt = DAG.getNode(ISD::SUB, SL, MVT::i32, ShAmt,
                         DAG.getConstant(1, SL, MVT::i32));
+    ShAmt = DAG.getNode(ISD::UMIN, SL, MVT::i32, ShAmt, MaxShAmt);
   } else {
     if (Signed) {
       // Without 'ffbh_i32', only leading zeros could be counted. Take the
@@ -2507,9 +2516,9 @@ SDValue AMDGPUTargetLowering::LowerINT_TO_FP32(SDValue Op, SelectionDAG &DAG,
   // Split it again.
   std::tie(Lo, Hi) = split64BitValue(Norm, DAG);
   // Calculate the adjust bit for rounding.
-  SDValue Adjust = DAG.getSelect(
-      SL, MVT::i32, DAG.getSetCC(SL, SetCCVT, Lo, ZeroI32, ISD::SETNE),
-      DAG.getConstant(1, SL, MVT::i32), ZeroI32);
+  // (lo != 0) ? 1 : 0 => (lo >= 1) ? 1 : 0 => umin(1, lo)
+  SDValue Adjust = DAG.getNode(ISD::UMIN, SL, MVT::i32,
+                               DAG.getConstant(1, SL, MVT::i32), Lo);
   // Get the 32-bit normalized integer.
   Norm = DAG.getNode(ISD::OR, SL, MVT::i32, Hi, Adjust);
   // Convert the normalized 32-bit integer into f32.

diff  --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index 5493bce79850..5a8c14234bb7 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -2065,7 +2065,6 @@ bool AMDGPULegalizerInfo::legalizeITOFP(
 
   const LLT S64 = LLT::scalar(64);
   const LLT S32 = LLT::scalar(32);
-  const LLT S1 = LLT::scalar(1);
 
   assert(MRI.getType(Src) == S64);
 
@@ -2089,29 +2088,24 @@ bool AMDGPULegalizerInfo::legalizeITOFP(
 
   assert(MRI.getType(Dst) == S32);
 
-  auto Zero = B.buildConstant(S32, 0);
   auto One = B.buildConstant(S32, 1);
-  auto AllOnes = B.buildConstant(S32, -1);
 
   MachineInstrBuilder ShAmt;
   if (Signed) {
-    auto ThirtyThree = B.buildConstant(S32, 33);
+    auto ThirtyOne = B.buildConstant(S32, 31);
     auto X = B.buildXor(S32, Unmerge.getReg(0), Unmerge.getReg(1));
-    auto HasSameSign = B.buildICmp(CmpInst::ICMP_SGE, S1, X, Zero);
-    auto MaxShAmt = B.buildSelect(S32, HasSameSign, ThirtyThree, ThirtyTwo);
+    auto OppositeSign = B.buildAShr(S32, X, ThirtyOne);
+    auto MaxShAmt = B.buildAdd(S32, ThirtyTwo, OppositeSign);
     auto LS = B.buildIntrinsic(Intrinsic::amdgcn_sffbh, {S32},
                                /*HasSideEffects=*/false)
                   .addUse(Unmerge.getReg(1));
-    auto NotAllSameBits = B.buildICmp(CmpInst::ICMP_NE, S1, LS, AllOnes);
-    auto LS2 = B.buildSelect(S32, NotAllSameBits, LS, MaxShAmt);
-    ShAmt = B.buildSub(S32, LS2, One);
+    auto LS2 = B.buildSub(S32, LS, One);
+    ShAmt = B.buildUMin(S32, LS2, MaxShAmt);
   } else
     ShAmt = B.buildCTLZ(S32, Unmerge.getReg(1));
   auto Norm = B.buildShl(S64, Src, ShAmt);
   auto Unmerge2 = B.buildUnmerge({S32, S32}, Norm);
-  auto NotAllZeros =
-      B.buildICmp(CmpInst::ICMP_NE, S1, Unmerge2.getReg(0), Zero);
-  auto Adjust = B.buildSelect(S32, NotAllZeros, One, Zero);
+  auto Adjust = B.buildUMin(S32, One, Unmerge2.getReg(0));
   auto Norm2 = B.buildOr(S32, Unmerge2.getReg(1), Adjust);
   auto FVal = Signed ? B.buildSITOFP(S32, Norm2) : B.buildUITOFP(S32, Norm2);
   auto Scale = B.buildSub(S32, ThirtyTwo, ShAmt);

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/cvt_f32_ubyte.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/cvt_f32_ubyte.ll
index e9ca0bddc5a2..2135d94bc0fa 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/cvt_f32_ubyte.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/cvt_f32_ubyte.ll
@@ -1082,15 +1082,15 @@ define float @v_test_sitofp_i64_byte_to_f32(i64 %arg0) {
 ; SI-LABEL: v_test_sitofp_i64_byte_to_f32:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-NEXT:    v_ffbh_i32_e32 v2, 0
-; SI-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v2
-; SI-NEXT:    v_cndmask_b32_e32 v2, 33, v2, vcc
 ; SI-NEXT:    v_and_b32_e32 v0, 0xff, v0
+; SI-NEXT:    v_ashrrev_i32_e32 v2, 31, v0
+; SI-NEXT:    v_ffbh_i32_e32 v3, 0
+; SI-NEXT:    v_add_i32_e32 v2, vcc, 32, v2
+; SI-NEXT:    v_subrev_i32_e32 v3, vcc, 1, v3
 ; SI-NEXT:    v_mov_b32_e32 v1, 0
-; SI-NEXT:    v_subrev_i32_e32 v2, vcc, 1, v2
+; SI-NEXT:    v_min_u32_e32 v2, v3, v2
 ; SI-NEXT:    v_lshl_b64 v[0:1], v[0:1], v2
-; SI-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
-; SI-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
+; SI-NEXT:    v_min_u32_e32 v0, 1, v0
 ; SI-NEXT:    v_or_b32_e32 v0, v1, v0
 ; SI-NEXT:    v_cvt_f32_i32_e32 v0, v0
 ; SI-NEXT:    v_sub_i32_e32 v1, vcc, 32, v2
@@ -1100,15 +1100,15 @@ define float @v_test_sitofp_i64_byte_to_f32(i64 %arg0) {
 ; VI-LABEL: v_test_sitofp_i64_byte_to_f32:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-NEXT:    v_ffbh_i32_e32 v2, 0
-; VI-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v2
-; VI-NEXT:    v_cndmask_b32_e32 v2, 33, v2, vcc
 ; VI-NEXT:    v_and_b32_e32 v0, 0xff, v0
+; VI-NEXT:    v_ashrrev_i32_e32 v2, 31, v0
+; VI-NEXT:    v_ffbh_i32_e32 v3, 0
+; VI-NEXT:    v_add_u32_e32 v2, vcc, 32, v2
+; VI-NEXT:    v_subrev_u32_e32 v3, vcc, 1, v3
 ; VI-NEXT:    v_mov_b32_e32 v1, 0
-; VI-NEXT:    v_subrev_u32_e32 v2, vcc, 1, v2
+; VI-NEXT:    v_min_u32_e32 v2, v3, v2
 ; VI-NEXT:    v_lshlrev_b64 v[0:1], v2, v[0:1]
-; VI-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
-; VI-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
+; VI-NEXT:    v_min_u32_e32 v0, 1, v0
 ; VI-NEXT:    v_or_b32_e32 v0, v1, v0
 ; VI-NEXT:    v_cvt_f32_i32_e32 v0, v0
 ; VI-NEXT:    v_sub_u32_e32 v1, vcc, 32, v2
@@ -1128,8 +1128,7 @@ define float @v_test_uitofp_i64_byte_to_f32(i64 %arg0) {
 ; SI-NEXT:    v_mov_b32_e32 v1, 0
 ; SI-NEXT:    v_min_u32_e32 v2, 32, v2
 ; SI-NEXT:    v_lshl_b64 v[0:1], v[0:1], v2
-; SI-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
-; SI-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
+; SI-NEXT:    v_min_u32_e32 v0, 1, v0
 ; SI-NEXT:    v_or_b32_e32 v0, v1, v0
 ; SI-NEXT:    v_cvt_f32_u32_e32 v0, v0
 ; SI-NEXT:    v_sub_i32_e32 v1, vcc, 32, v2
@@ -1144,8 +1143,7 @@ define float @v_test_uitofp_i64_byte_to_f32(i64 %arg0) {
 ; VI-NEXT:    v_mov_b32_e32 v1, 0
 ; VI-NEXT:    v_min_u32_e32 v2, 32, v2
 ; VI-NEXT:    v_lshlrev_b64 v[0:1], v2, v[0:1]
-; VI-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
-; VI-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
+; VI-NEXT:    v_min_u32_e32 v0, 1, v0
 ; VI-NEXT:    v_or_b32_e32 v0, v1, v0
 ; VI-NEXT:    v_cvt_f32_u32_e32 v0, v0
 ; VI-NEXT:    v_sub_u32_e32 v1, vcc, 32, v2

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sitofp.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sitofp.mir
index ccbf30096c85..4a0310c4b229 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sitofp.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sitofp.mir
@@ -100,48 +100,40 @@ body: |
     ; GFX6: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
     ; GFX6: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64)
     ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
-    ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-    ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; GFX6: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
-    ; GFX6: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 33
+    ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 31
     ; GFX6: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[UV]], [[UV1]]
-    ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(sge), [[XOR]](s32), [[C1]]
-    ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C4]], [[C]]
+    ; GFX6: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[XOR]], [[C2]](s32)
+    ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[C]], [[ASHR]]
     ; GFX6: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sffbh), [[UV1]](s32)
-    ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[INT]](s32), [[C3]]
-    ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[INT]], [[SELECT]]
-    ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[SELECT1]], [[C2]]
-    ; GFX6: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[SUB]](s32)
+    ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[INT]], [[C1]]
+    ; GFX6: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[SUB]], [[ADD]]
+    ; GFX6: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[UMIN]](s32)
     ; GFX6: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SHL]](s64)
-    ; GFX6: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV2]](s32), [[C1]]
-    ; GFX6: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[C2]], [[C1]]
-    ; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[UV3]], [[SELECT2]]
+    ; GFX6: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[C1]], [[UV2]]
+    ; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[UV3]], [[UMIN1]]
     ; GFX6: [[SITOFP:%[0-9]+]]:_(s32) = G_SITOFP [[OR]](s32)
-    ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[SUB]]
+    ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[UMIN]]
     ; GFX6: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ldexp), [[SITOFP]](s32), [[SUB1]](s32)
     ; GFX6: $vgpr0 = COPY [[INT1]](s32)
     ; GFX8-LABEL: name: test_sitofp_s64_to_s32
     ; GFX8: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
     ; GFX8: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64)
     ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
-    ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-    ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; GFX8: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
-    ; GFX8: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 33
+    ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 31
     ; GFX8: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[UV]], [[UV1]]
-    ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(sge), [[XOR]](s32), [[C1]]
-    ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C4]], [[C]]
+    ; GFX8: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[XOR]], [[C2]](s32)
+    ; GFX8: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[C]], [[ASHR]]
     ; GFX8: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sffbh), [[UV1]](s32)
-    ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[INT]](s32), [[C3]]
-    ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[INT]], [[SELECT]]
-    ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[SELECT1]], [[C2]]
-    ; GFX8: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[SUB]](s32)
+    ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[INT]], [[C1]]
+    ; GFX8: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[SUB]], [[ADD]]
+    ; GFX8: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[UMIN]](s32)
     ; GFX8: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SHL]](s64)
-    ; GFX8: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV2]](s32), [[C1]]
-    ; GFX8: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[C2]], [[C1]]
-    ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[UV3]], [[SELECT2]]
+    ; GFX8: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[C1]], [[UV2]]
+    ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[UV3]], [[UMIN1]]
     ; GFX8: [[SITOFP:%[0-9]+]]:_(s32) = G_SITOFP [[OR]](s32)
-    ; GFX8: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[SUB]]
+    ; GFX8: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[UMIN]]
     ; GFX8: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ldexp), [[SITOFP]](s32), [[SUB1]](s32)
     ; GFX8: $vgpr0 = COPY [[INT1]](s32)
     %0:_(s64) = COPY $vgpr0_vgpr1
@@ -420,24 +412,20 @@ body: |
     ; GFX6: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[COPY1]], 33
     ; GFX6: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SEXT_INREG]](s64)
     ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
-    ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-    ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; GFX6: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
-    ; GFX6: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 33
+    ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 31
     ; GFX6: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[UV]], [[UV1]]
-    ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(sge), [[XOR]](s32), [[C1]]
-    ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C4]], [[C]]
+    ; GFX6: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[XOR]], [[C2]](s32)
+    ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[C]], [[ASHR]]
     ; GFX6: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sffbh), [[UV1]](s32)
-    ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[INT]](s32), [[C3]]
-    ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[INT]], [[SELECT]]
-    ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[SELECT1]], [[C2]]
-    ; GFX6: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[SEXT_INREG]], [[SUB]](s32)
+    ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[INT]], [[C1]]
+    ; GFX6: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[SUB]], [[ADD]]
+    ; GFX6: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[SEXT_INREG]], [[UMIN]](s32)
     ; GFX6: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SHL]](s64)
-    ; GFX6: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV2]](s32), [[C1]]
-    ; GFX6: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[C2]], [[C1]]
-    ; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[UV3]], [[SELECT2]]
+    ; GFX6: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[C1]], [[UV2]]
+    ; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[UV3]], [[UMIN1]]
     ; GFX6: [[SITOFP:%[0-9]+]]:_(s32) = G_SITOFP [[OR]](s32)
-    ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[SUB]]
+    ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[UMIN]]
     ; GFX6: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ldexp), [[SITOFP]](s32), [[SUB1]](s32)
     ; GFX6: $vgpr0 = COPY [[INT1]](s32)
     ; GFX8-LABEL: name: test_sitofp_s33_to_s32
@@ -446,24 +434,20 @@ body: |
     ; GFX8: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[COPY1]], 33
     ; GFX8: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SEXT_INREG]](s64)
     ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
-    ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-    ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; GFX8: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
-    ; GFX8: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 33
+    ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 31
     ; GFX8: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[UV]], [[UV1]]
-    ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(sge), [[XOR]](s32), [[C1]]
-    ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C4]], [[C]]
+    ; GFX8: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[XOR]], [[C2]](s32)
+    ; GFX8: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[C]], [[ASHR]]
     ; GFX8: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sffbh), [[UV1]](s32)
-    ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[INT]](s32), [[C3]]
-    ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[INT]], [[SELECT]]
-    ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[SELECT1]], [[C2]]
-    ; GFX8: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[SEXT_INREG]], [[SUB]](s32)
+    ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[INT]], [[C1]]
+    ; GFX8: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[SUB]], [[ADD]]
+    ; GFX8: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[SEXT_INREG]], [[UMIN]](s32)
     ; GFX8: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SHL]](s64)
-    ; GFX8: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV2]](s32), [[C1]]
-    ; GFX8: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[C2]], [[C1]]
-    ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[UV3]], [[SELECT2]]
+    ; GFX8: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[C1]], [[UV2]]
+    ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[UV3]], [[UMIN1]]
     ; GFX8: [[SITOFP:%[0-9]+]]:_(s32) = G_SITOFP [[OR]](s32)
-    ; GFX8: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[SUB]]
+    ; GFX8: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[UMIN]]
     ; GFX8: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ldexp), [[SITOFP]](s32), [[SUB1]](s32)
     ; GFX8: $vgpr0 = COPY [[INT1]](s32)
     %0:_(s64) = COPY $vgpr0_vgpr1
@@ -482,24 +466,20 @@ body: |
     ; GFX6: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
     ; GFX6: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64)
     ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
-    ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-    ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; GFX6: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
-    ; GFX6: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 33
+    ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 31
     ; GFX6: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[UV]], [[UV1]]
-    ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(sge), [[XOR]](s32), [[C1]]
-    ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C4]], [[C]]
+    ; GFX6: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[XOR]], [[C2]](s32)
+    ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[C]], [[ASHR]]
     ; GFX6: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sffbh), [[UV1]](s32)
-    ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[INT]](s32), [[C3]]
-    ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[INT]], [[SELECT]]
-    ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[SELECT1]], [[C2]]
-    ; GFX6: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[SUB]](s32)
+    ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[INT]], [[C1]]
+    ; GFX6: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[SUB]], [[ADD]]
+    ; GFX6: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[UMIN]](s32)
     ; GFX6: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SHL]](s64)
-    ; GFX6: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV2]](s32), [[C1]]
-    ; GFX6: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[C2]], [[C1]]
-    ; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[UV3]], [[SELECT2]]
+    ; GFX6: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[C1]], [[UV2]]
+    ; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[UV3]], [[UMIN1]]
     ; GFX6: [[SITOFP:%[0-9]+]]:_(s32) = G_SITOFP [[OR]](s32)
-    ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[SUB]]
+    ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[UMIN]]
     ; GFX6: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ldexp), [[SITOFP]](s32), [[SUB1]](s32)
     ; GFX6: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT1]](s32)
     ; GFX6: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16)
@@ -508,24 +488,20 @@ body: |
     ; GFX8: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
     ; GFX8: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64)
     ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
-    ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-    ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; GFX8: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
-    ; GFX8: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 33
+    ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 31
     ; GFX8: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[UV]], [[UV1]]
-    ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(sge), [[XOR]](s32), [[C1]]
-    ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C4]], [[C]]
+    ; GFX8: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[XOR]], [[C2]](s32)
+    ; GFX8: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[C]], [[ASHR]]
     ; GFX8: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sffbh), [[UV1]](s32)
-    ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[INT]](s32), [[C3]]
-    ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[INT]], [[SELECT]]
-    ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[SELECT1]], [[C2]]
-    ; GFX8: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[SUB]](s32)
+    ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[INT]], [[C1]]
+    ; GFX8: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[SUB]], [[ADD]]
+    ; GFX8: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[UMIN]](s32)
     ; GFX8: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SHL]](s64)
-    ; GFX8: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV2]](s32), [[C1]]
-    ; GFX8: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[C2]], [[C1]]
-    ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[UV3]], [[SELECT2]]
+    ; GFX8: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[C1]], [[UV2]]
+    ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[UV3]], [[UMIN1]]
     ; GFX8: [[SITOFP:%[0-9]+]]:_(s32) = G_SITOFP [[OR]](s32)
-    ; GFX8: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[SUB]]
+    ; GFX8: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[UMIN]]
     ; GFX8: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ldexp), [[SITOFP]](s32), [[SUB1]](s32)
     ; GFX8: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT1]](s32)
     ; GFX8: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16)
@@ -547,47 +523,41 @@ body: |
     ; GFX6: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>)
     ; GFX6: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64)
     ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
-    ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-    ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; GFX6: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
-    ; GFX6: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 33
+    ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 31
     ; GFX6: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[UV2]], [[UV3]]
-    ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(sge), [[XOR]](s32), [[C1]]
-    ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C4]], [[C]]
+    ; GFX6: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[XOR]], [[C2]](s32)
+    ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[C]], [[ASHR]]
     ; GFX6: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sffbh), [[UV3]](s32)
-    ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[INT]](s32), [[C3]]
-    ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[INT]], [[SELECT]]
-    ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[SELECT1]], [[C2]]
-    ; GFX6: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV]], [[SUB]](s32)
+    ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[INT]], [[C1]]
+    ; GFX6: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[SUB]], [[ADD]]
+    ; GFX6: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV]], [[UMIN]](s32)
     ; GFX6: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SHL]](s64)
-    ; GFX6: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV4]](s32), [[C1]]
-    ; GFX6: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[C2]], [[C1]]
-    ; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[UV5]], [[SELECT2]]
+    ; GFX6: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[C1]], [[UV4]]
+    ; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[UV5]], [[UMIN1]]
     ; GFX6: [[SITOFP:%[0-9]+]]:_(s32) = G_SITOFP [[OR]](s32)
-    ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[SUB]]
+    ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[UMIN]]
     ; GFX6: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ldexp), [[SITOFP]](s32), [[SUB1]](s32)
     ; GFX6: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT1]](s32)
     ; GFX6: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64)
     ; GFX6: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[UV6]], [[UV7]]
-    ; GFX6: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(sge), [[XOR1]](s32), [[C1]]
-    ; GFX6: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[C4]], [[C]]
+    ; GFX6: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[XOR1]], [[C2]](s32)
+    ; GFX6: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[C]], [[ASHR1]]
     ; GFX6: [[INT2:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sffbh), [[UV7]](s32)
-    ; GFX6: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[INT2]](s32), [[C3]]
-    ; GFX6: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP4]](s1), [[INT2]], [[SELECT3]]
-    ; GFX6: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SELECT4]], [[C2]]
-    ; GFX6: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[SUB2]](s32)
+    ; GFX6: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[INT2]], [[C1]]
+    ; GFX6: [[UMIN2:%[0-9]+]]:_(s32) = G_UMIN [[SUB2]], [[ADD1]]
+    ; GFX6: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[UMIN2]](s32)
     ; GFX6: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SHL1]](s64)
-    ; GFX6: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV8]](s32), [[C1]]
-    ; GFX6: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[C2]], [[C1]]
-    ; GFX6: [[OR1:%[0-9]+]]:_(s32) = G_OR [[UV9]], [[SELECT5]]
+    ; GFX6: [[UMIN3:%[0-9]+]]:_(s32) = G_UMIN [[C1]], [[UV8]]
+    ; GFX6: [[OR1:%[0-9]+]]:_(s32) = G_OR [[UV9]], [[UMIN3]]
     ; GFX6: [[SITOFP1:%[0-9]+]]:_(s32) = G_SITOFP [[OR1]](s32)
-    ; GFX6: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C]], [[SUB2]]
+    ; GFX6: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C]], [[UMIN2]]
     ; GFX6: [[INT3:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ldexp), [[SITOFP1]](s32), [[SUB3]](s32)
     ; GFX6: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT3]](s32)
     ; GFX6: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16)
     ; GFX6: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16)
-    ; GFX6: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX6: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32)
+    ; GFX6: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX6: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C3]](s32)
     ; GFX6: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
     ; GFX6: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
     ; GFX6: $vgpr0 = COPY [[BITCAST]](<2 x s16>)
@@ -596,47 +566,41 @@ body: |
     ; GFX8: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>)
     ; GFX8: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64)
     ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
-    ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-    ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; GFX8: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
-    ; GFX8: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 33
+    ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 31
     ; GFX8: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[UV2]], [[UV3]]
-    ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(sge), [[XOR]](s32), [[C1]]
-    ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C4]], [[C]]
+    ; GFX8: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[XOR]], [[C2]](s32)
+    ; GFX8: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[C]], [[ASHR]]
     ; GFX8: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sffbh), [[UV3]](s32)
-    ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[INT]](s32), [[C3]]
-    ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[INT]], [[SELECT]]
-    ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[SELECT1]], [[C2]]
-    ; GFX8: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV]], [[SUB]](s32)
+    ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[INT]], [[C1]]
+    ; GFX8: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[SUB]], [[ADD]]
+    ; GFX8: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV]], [[UMIN]](s32)
     ; GFX8: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SHL]](s64)
-    ; GFX8: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV4]](s32), [[C1]]
-    ; GFX8: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[C2]], [[C1]]
-    ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[UV5]], [[SELECT2]]
+    ; GFX8: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[C1]], [[UV4]]
+    ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[UV5]], [[UMIN1]]
     ; GFX8: [[SITOFP:%[0-9]+]]:_(s32) = G_SITOFP [[OR]](s32)
-    ; GFX8: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[SUB]]
+    ; GFX8: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[UMIN]]
     ; GFX8: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ldexp), [[SITOFP]](s32), [[SUB1]](s32)
     ; GFX8: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT1]](s32)
     ; GFX8: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64)
     ; GFX8: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[UV6]], [[UV7]]
-    ; GFX8: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(sge), [[XOR1]](s32), [[C1]]
-    ; GFX8: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[C4]], [[C]]
+    ; GFX8: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[XOR1]], [[C2]](s32)
+    ; GFX8: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[C]], [[ASHR1]]
     ; GFX8: [[INT2:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sffbh), [[UV7]](s32)
-    ; GFX8: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[INT2]](s32), [[C3]]
-    ; GFX8: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP4]](s1), [[INT2]], [[SELECT3]]
-    ; GFX8: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SELECT4]], [[C2]]
-    ; GFX8: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[SUB2]](s32)
+    ; GFX8: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[INT2]], [[C1]]
+    ; GFX8: [[UMIN2:%[0-9]+]]:_(s32) = G_UMIN [[SUB2]], [[ADD1]]
+    ; GFX8: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[UMIN2]](s32)
     ; GFX8: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SHL1]](s64)
-    ; GFX8: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV8]](s32), [[C1]]
-    ; GFX8: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[C2]], [[C1]]
-    ; GFX8: [[OR1:%[0-9]+]]:_(s32) = G_OR [[UV9]], [[SELECT5]]
+    ; GFX8: [[UMIN3:%[0-9]+]]:_(s32) = G_UMIN [[C1]], [[UV8]]
+    ; GFX8: [[OR1:%[0-9]+]]:_(s32) = G_OR [[UV9]], [[UMIN3]]
     ; GFX8: [[SITOFP1:%[0-9]+]]:_(s32) = G_SITOFP [[OR1]](s32)
-    ; GFX8: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C]], [[SUB2]]
+    ; GFX8: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C]], [[UMIN2]]
     ; GFX8: [[INT3:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ldexp), [[SITOFP1]](s32), [[SUB3]](s32)
     ; GFX8: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT3]](s32)
     ; GFX8: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16)
     ; GFX8: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16)
-    ; GFX8: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX8: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32)
+    ; GFX8: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX8: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C3]](s32)
     ; GFX8: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
     ; GFX8: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
     ; GFX8: $vgpr0 = COPY [[BITCAST]](<2 x s16>)

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uitofp.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uitofp.mir
index d3db5f8bb847..747bd70082f5 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uitofp.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uitofp.mir
@@ -75,15 +75,13 @@ body: |
     ; GFX6: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
     ; GFX6: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64)
     ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
-    ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-    ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
     ; GFX6: [[AMDGPU_FFBH_U32_:%[0-9]+]]:_(s32) = G_AMDGPU_FFBH_U32 [[UV1]](s32)
     ; GFX6: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[AMDGPU_FFBH_U32_]], [[C]]
     ; GFX6: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[UMIN]](s32)
     ; GFX6: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SHL]](s64)
-    ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV2]](s32), [[C1]]
-    ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C2]], [[C1]]
-    ; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[UV3]], [[SELECT]]
+    ; GFX6: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[C1]], [[UV2]]
+    ; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[UV3]], [[UMIN1]]
     ; GFX6: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[OR]](s32)
     ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C]], [[UMIN]]
     ; GFX6: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ldexp), [[UITOFP]](s32), [[SUB]](s32)
@@ -92,15 +90,13 @@ body: |
     ; GFX8: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
     ; GFX8: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64)
     ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
-    ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-    ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
     ; GFX8: [[AMDGPU_FFBH_U32_:%[0-9]+]]:_(s32) = G_AMDGPU_FFBH_U32 [[UV1]](s32)
     ; GFX8: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[AMDGPU_FFBH_U32_]], [[C]]
     ; GFX8: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[UMIN]](s32)
     ; GFX8: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SHL]](s64)
-    ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV2]](s32), [[C1]]
-    ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C2]], [[C1]]
-    ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[UV3]], [[SELECT]]
+    ; GFX8: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[C1]], [[UV2]]
+    ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[UV3]], [[UMIN1]]
     ; GFX8: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[OR]](s32)
     ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C]], [[UMIN]]
     ; GFX8: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ldexp), [[UITOFP]](s32), [[SUB]](s32)
@@ -393,15 +389,13 @@ body: |
     ; GFX6: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C]]
     ; GFX6: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]](s64)
     ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
-    ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-    ; GFX6: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
     ; GFX6: [[AMDGPU_FFBH_U32_:%[0-9]+]]:_(s32) = G_AMDGPU_FFBH_U32 [[UV1]](s32)
     ; GFX6: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[AMDGPU_FFBH_U32_]], [[C1]]
     ; GFX6: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[AND]], [[UMIN]](s32)
     ; GFX6: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SHL]](s64)
-    ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV2]](s32), [[C2]]
-    ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C3]], [[C2]]
-    ; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[UV3]], [[SELECT]]
+    ; GFX6: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[C2]], [[UV2]]
+    ; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[UV3]], [[UMIN1]]
     ; GFX6: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[OR]](s32)
     ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[UMIN]]
     ; GFX6: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ldexp), [[UITOFP]](s32), [[SUB]](s32)
@@ -413,15 +407,13 @@ body: |
     ; GFX8: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C]]
     ; GFX8: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]](s64)
     ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
-    ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-    ; GFX8: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
     ; GFX8: [[AMDGPU_FFBH_U32_:%[0-9]+]]:_(s32) = G_AMDGPU_FFBH_U32 [[UV1]](s32)
     ; GFX8: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[AMDGPU_FFBH_U32_]], [[C1]]
     ; GFX8: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[AND]], [[UMIN]](s32)
     ; GFX8: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SHL]](s64)
-    ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV2]](s32), [[C2]]
-    ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C3]], [[C2]]
-    ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[UV3]], [[SELECT]]
+    ; GFX8: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[C2]], [[UV2]]
+    ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[UV3]], [[UMIN1]]
     ; GFX8: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[OR]](s32)
     ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[UMIN]]
     ; GFX8: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ldexp), [[UITOFP]](s32), [[SUB]](s32)
@@ -442,15 +434,13 @@ body: |
     ; GFX6: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
     ; GFX6: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64)
     ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
-    ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-    ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
     ; GFX6: [[AMDGPU_FFBH_U32_:%[0-9]+]]:_(s32) = G_AMDGPU_FFBH_U32 [[UV1]](s32)
     ; GFX6: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[AMDGPU_FFBH_U32_]], [[C]]
     ; GFX6: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[UMIN]](s32)
     ; GFX6: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SHL]](s64)
-    ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV2]](s32), [[C1]]
-    ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C2]], [[C1]]
-    ; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[UV3]], [[SELECT]]
+    ; GFX6: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[C1]], [[UV2]]
+    ; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[UV3]], [[UMIN1]]
     ; GFX6: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[OR]](s32)
     ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C]], [[UMIN]]
     ; GFX6: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ldexp), [[UITOFP]](s32), [[SUB]](s32)
@@ -461,15 +451,13 @@ body: |
     ; GFX8: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
     ; GFX8: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64)
     ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
-    ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-    ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
     ; GFX8: [[AMDGPU_FFBH_U32_:%[0-9]+]]:_(s32) = G_AMDGPU_FFBH_U32 [[UV1]](s32)
     ; GFX8: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[AMDGPU_FFBH_U32_]], [[C]]
     ; GFX8: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[UMIN]](s32)
     ; GFX8: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SHL]](s64)
-    ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV2]](s32), [[C1]]
-    ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C2]], [[C1]]
-    ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[UV3]], [[SELECT]]
+    ; GFX8: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[C1]], [[UV2]]
+    ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[UV3]], [[UMIN1]]
     ; GFX8: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[OR]](s32)
     ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C]], [[UMIN]]
     ; GFX8: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ldexp), [[UITOFP]](s32), [[SUB]](s32)
@@ -493,35 +481,32 @@ body: |
     ; GFX6: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>)
     ; GFX6: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64)
     ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
-    ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-    ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
     ; GFX6: [[AMDGPU_FFBH_U32_:%[0-9]+]]:_(s32) = G_AMDGPU_FFBH_U32 [[UV3]](s32)
     ; GFX6: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[AMDGPU_FFBH_U32_]], [[C]]
     ; GFX6: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV]], [[UMIN]](s32)
     ; GFX6: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SHL]](s64)
-    ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV4]](s32), [[C1]]
-    ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C2]], [[C1]]
-    ; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[UV5]], [[SELECT]]
+    ; GFX6: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[C1]], [[UV4]]
+    ; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[UV5]], [[UMIN1]]
     ; GFX6: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[OR]](s32)
     ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C]], [[UMIN]]
     ; GFX6: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ldexp), [[UITOFP]](s32), [[SUB]](s32)
     ; GFX6: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT]](s32)
     ; GFX6: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64)
     ; GFX6: [[AMDGPU_FFBH_U32_1:%[0-9]+]]:_(s32) = G_AMDGPU_FFBH_U32 [[UV7]](s32)
-    ; GFX6: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[AMDGPU_FFBH_U32_1]], [[C]]
-    ; GFX6: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[UMIN1]](s32)
+    ; GFX6: [[UMIN2:%[0-9]+]]:_(s32) = G_UMIN [[AMDGPU_FFBH_U32_1]], [[C]]
+    ; GFX6: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[UMIN2]](s32)
     ; GFX6: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SHL1]](s64)
-    ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV8]](s32), [[C1]]
-    ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[C2]], [[C1]]
-    ; GFX6: [[OR1:%[0-9]+]]:_(s32) = G_OR [[UV9]], [[SELECT1]]
+    ; GFX6: [[UMIN3:%[0-9]+]]:_(s32) = G_UMIN [[C1]], [[UV8]]
+    ; GFX6: [[OR1:%[0-9]+]]:_(s32) = G_OR [[UV9]], [[UMIN3]]
     ; GFX6: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[OR1]](s32)
-    ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[UMIN1]]
+    ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[UMIN2]]
     ; GFX6: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ldexp), [[UITOFP1]](s32), [[SUB1]](s32)
     ; GFX6: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT1]](s32)
     ; GFX6: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16)
     ; GFX6: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16)
-    ; GFX6: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX6: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C3]](s32)
+    ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX6: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C2]](s32)
     ; GFX6: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
     ; GFX6: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
     ; GFX6: $vgpr0 = COPY [[BITCAST]](<2 x s16>)
@@ -530,35 +515,32 @@ body: |
     ; GFX8: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>)
     ; GFX8: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64)
     ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
-    ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-    ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
     ; GFX8: [[AMDGPU_FFBH_U32_:%[0-9]+]]:_(s32) = G_AMDGPU_FFBH_U32 [[UV3]](s32)
     ; GFX8: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[AMDGPU_FFBH_U32_]], [[C]]
     ; GFX8: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV]], [[UMIN]](s32)
     ; GFX8: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SHL]](s64)
-    ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV4]](s32), [[C1]]
-    ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C2]], [[C1]]
-    ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[UV5]], [[SELECT]]
+    ; GFX8: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[C1]], [[UV4]]
+    ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[UV5]], [[UMIN1]]
     ; GFX8: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[OR]](s32)
     ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C]], [[UMIN]]
     ; GFX8: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ldexp), [[UITOFP]](s32), [[SUB]](s32)
     ; GFX8: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT]](s32)
     ; GFX8: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64)
     ; GFX8: [[AMDGPU_FFBH_U32_1:%[0-9]+]]:_(s32) = G_AMDGPU_FFBH_U32 [[UV7]](s32)
-    ; GFX8: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[AMDGPU_FFBH_U32_1]], [[C]]
-    ; GFX8: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[UMIN1]](s32)
+    ; GFX8: [[UMIN2:%[0-9]+]]:_(s32) = G_UMIN [[AMDGPU_FFBH_U32_1]], [[C]]
+    ; GFX8: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[UMIN2]](s32)
     ; GFX8: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SHL1]](s64)
-    ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV8]](s32), [[C1]]
-    ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[C2]], [[C1]]
-    ; GFX8: [[OR1:%[0-9]+]]:_(s32) = G_OR [[UV9]], [[SELECT1]]
+    ; GFX8: [[UMIN3:%[0-9]+]]:_(s32) = G_UMIN [[C1]], [[UV8]]
+    ; GFX8: [[OR1:%[0-9]+]]:_(s32) = G_OR [[UV9]], [[UMIN3]]
     ; GFX8: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[OR1]](s32)
-    ; GFX8: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[UMIN1]]
+    ; GFX8: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[UMIN2]]
     ; GFX8: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ldexp), [[UITOFP1]](s32), [[SUB1]](s32)
     ; GFX8: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT1]](s32)
     ; GFX8: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16)
     ; GFX8: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16)
-    ; GFX8: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; GFX8: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C3]](s32)
+    ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX8: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C2]](s32)
     ; GFX8: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
     ; GFX8: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
     ; GFX8: $vgpr0 = COPY [[BITCAST]](<2 x s16>)

diff  --git a/llvm/test/CodeGen/AMDGPU/sint_to_fp.i64.ll b/llvm/test/CodeGen/AMDGPU/sint_to_fp.i64.ll
index 06c678028a1c..7ea3f9249b10 100644
--- a/llvm/test/CodeGen/AMDGPU/sint_to_fp.i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/sint_to_fp.i64.ll
@@ -13,21 +13,18 @@ define amdgpu_kernel void @s_sint_to_fp_i64_to_f16(half addrspace(1)* %out, i64
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    s_mov_b32 s4, s0
 ; GFX6-NEXT:    s_mov_b32 s5, s1
-; GFX6-NEXT:    s_xor_b32 s0, s2, s3
-; GFX6-NEXT:    s_flbit_i32 s8, s3
-; GFX6-NEXT:    v_cmp_gt_i32_e64 s[0:1], s0, -1
-; GFX6-NEXT:    v_cndmask_b32_e64 v0, 32, 33, s[0:1]
-; GFX6-NEXT:    v_mov_b32_e32 v1, s8
-; GFX6-NEXT:    v_cmp_ne_u32_e64 vcc, s8, -1
-; GFX6-NEXT:    v_cndmask_b32_e32 v2, v0, v1, vcc
-; GFX6-NEXT:    v_add_i32_e32 v0, vcc, -1, v2
-; GFX6-NEXT:    v_lshl_b64 v[0:1], s[2:3], v0
-; GFX6-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
-; GFX6-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
-; GFX6-NEXT:    v_or_b32_e32 v0, v1, v0
-; GFX6-NEXT:    v_cvt_f32_i32_e32 v0, v0
-; GFX6-NEXT:    v_sub_i32_e32 v1, vcc, 33, v2
-; GFX6-NEXT:    v_ldexp_f32_e32 v0, v0, v1
+; GFX6-NEXT:    s_flbit_i32 s0, s3
+; GFX6-NEXT:    s_xor_b32 s1, s2, s3
+; GFX6-NEXT:    s_add_i32 s0, s0, -1
+; GFX6-NEXT:    s_ashr_i32 s1, s1, 31
+; GFX6-NEXT:    s_add_i32 s1, s1, 32
+; GFX6-NEXT:    s_min_u32 s8, s0, s1
+; GFX6-NEXT:    s_lshl_b64 s[0:1], s[2:3], s8
+; GFX6-NEXT:    s_min_u32 s0, s0, 1
+; GFX6-NEXT:    s_or_b32 s0, s1, s0
+; GFX6-NEXT:    v_cvt_f32_i32_e32 v0, s0
+; GFX6-NEXT:    s_sub_i32 s0, 32, s8
+; GFX6-NEXT:    v_ldexp_f32_e64 v0, v0, s0
 ; GFX6-NEXT:    v_cvt_f16_f32_e32 v0, v0
 ; GFX6-NEXT:    buffer_store_short v0, off, s[4:7], 0
 ; GFX6-NEXT:    s_endpgm
@@ -36,19 +33,17 @@ define amdgpu_kernel void @s_sint_to_fp_i64_to_f16(half addrspace(1)* %out, i64
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
 ; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8-NEXT:    s_xor_b32 s4, s2, s3
-; GFX8-NEXT:    s_cmp_gt_i32 s4, -1
-; GFX8-NEXT:    s_flbit_i32 s5, s3
-; GFX8-NEXT:    s_cselect_b32 s4, 33, 32
-; GFX8-NEXT:    s_cmp_lg_u32 s5, -1
-; GFX8-NEXT:    s_cselect_b32 s6, s5, s4
-; GFX8-NEXT:    s_add_i32 s4, s6, -1
+; GFX8-NEXT:    s_xor_b32 s5, s2, s3
+; GFX8-NEXT:    s_flbit_i32 s4, s3
+; GFX8-NEXT:    s_ashr_i32 s5, s5, 31
+; GFX8-NEXT:    s_add_i32 s4, s4, -1
+; GFX8-NEXT:    s_add_i32 s5, s5, 32
+; GFX8-NEXT:    s_min_u32 s4, s4, s5
 ; GFX8-NEXT:    s_lshl_b64 s[2:3], s[2:3], s4
-; GFX8-NEXT:    v_cmp_ne_u32_e64 s[4:5], s2, 0
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[4:5]
-; GFX8-NEXT:    v_or_b32_e32 v0, s3, v0
-; GFX8-NEXT:    v_cvt_f32_i32_e32 v0, v0
-; GFX8-NEXT:    s_sub_i32 s2, 33, s6
+; GFX8-NEXT:    s_min_u32 s2, s2, 1
+; GFX8-NEXT:    s_or_b32 s2, s3, s2
+; GFX8-NEXT:    v_cvt_f32_i32_e32 v0, s2
+; GFX8-NEXT:    s_sub_i32 s2, 32, s4
 ; GFX8-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX8-NEXT:    v_ldexp_f32 v0, v0, s2
 ; GFX8-NEXT:    v_cvt_f16_f32_e32 v2, v0
@@ -76,17 +71,15 @@ define amdgpu_kernel void @v_sint_to_fp_i64_to_f16(half addrspace(1)* %out, i64
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
 ; GFX6-NEXT:    v_xor_b32_e32 v0, v3, v4
 ; GFX6-NEXT:    v_ffbh_i32_e32 v5, v4
-; GFX6-NEXT:    v_cmp_lt_i32_e32 vcc, -1, v0
-; GFX6-NEXT:    v_cndmask_b32_e64 v0, 32, 33, vcc
-; GFX6-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v5
-; GFX6-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc
-; GFX6-NEXT:    v_add_i32_e32 v5, vcc, -1, v0
-; GFX6-NEXT:    v_lshl_b64 v[3:4], v[3:4], v5
-; GFX6-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v3
-; GFX6-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc
+; GFX6-NEXT:    v_ashrrev_i32_e32 v0, 31, v0
+; GFX6-NEXT:    v_add_i32_e32 v5, vcc, -1, v5
+; GFX6-NEXT:    v_add_i32_e32 v0, vcc, 32, v0
+; GFX6-NEXT:    v_min_u32_e32 v0, v5, v0
+; GFX6-NEXT:    v_lshl_b64 v[3:4], v[3:4], v0
+; GFX6-NEXT:    v_min_u32_e32 v3, 1, v3
 ; GFX6-NEXT:    v_or_b32_e32 v3, v4, v3
 ; GFX6-NEXT:    v_cvt_f32_i32_e32 v3, v3
-; GFX6-NEXT:    v_sub_i32_e32 v0, vcc, 33, v0
+; GFX6-NEXT:    v_sub_i32_e32 v0, vcc, 32, v0
 ; GFX6-NEXT:    v_ldexp_f32_e32 v0, v3, v0
 ; GFX6-NEXT:    v_cvt_f16_f32_e32 v0, v0
 ; GFX6-NEXT:    buffer_store_short v0, v[1:2], s[0:3], 0 addr64
@@ -96,33 +89,30 @@ define amdgpu_kernel void @v_sint_to_fp_i64_to_f16(half addrspace(1)* %out, i64
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
 ; GFX8-NEXT:    v_lshlrev_b32_e32 v1, 3, v0
-; GFX8-NEXT:    v_mov_b32_e32 v3, 0
 ; GFX8-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
 ; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX8-NEXT:    v_mov_b32_e32 v2, s3
 ; GFX8-NEXT:    v_add_u32_e32 v1, vcc, s2, v1
-; GFX8-NEXT:    v_addc_u32_e32 v2, vcc, v2, v3, vcc
+; GFX8-NEXT:    v_addc_u32_e32 v2, vcc, 0, v2, vcc
 ; GFX8-NEXT:    flat_load_dwordx2 v[1:2], v[1:2]
 ; GFX8-NEXT:    s_waitcnt vmcnt(0)
-; GFX8-NEXT:    v_xor_b32_e32 v4, v1, v2
-; GFX8-NEXT:    v_cmp_lt_i32_e32 vcc, -1, v4
-; GFX8-NEXT:    v_ffbh_i32_e32 v5, v2
-; GFX8-NEXT:    v_cndmask_b32_e64 v4, 32, 33, vcc
-; GFX8-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v5
-; GFX8-NEXT:    v_cndmask_b32_e32 v4, v4, v5, vcc
-; GFX8-NEXT:    v_add_u32_e32 v5, vcc, -1, v4
-; GFX8-NEXT:    v_lshlrev_b64 v[1:2], v5, v[1:2]
-; GFX8-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v1
-; GFX8-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
+; GFX8-NEXT:    v_xor_b32_e32 v3, v1, v2
+; GFX8-NEXT:    v_ffbh_i32_e32 v4, v2
+; GFX8-NEXT:    v_ashrrev_i32_e32 v3, 31, v3
+; GFX8-NEXT:    v_add_u32_e32 v4, vcc, -1, v4
+; GFX8-NEXT:    v_add_u32_e32 v3, vcc, 32, v3
+; GFX8-NEXT:    v_min_u32_e32 v3, v4, v3
+; GFX8-NEXT:    v_lshlrev_b64 v[1:2], v3, v[1:2]
+; GFX8-NEXT:    v_sub_u32_e32 v3, vcc, 32, v3
+; GFX8-NEXT:    v_min_u32_e32 v1, 1, v1
 ; GFX8-NEXT:    v_or_b32_e32 v1, v2, v1
 ; GFX8-NEXT:    v_cvt_f32_i32_e32 v1, v1
-; GFX8-NEXT:    v_sub_u32_e32 v4, vcc, 33, v4
 ; GFX8-NEXT:    v_mov_b32_e32 v2, s1
 ; GFX8-NEXT:    v_add_u32_e32 v0, vcc, s0, v0
-; GFX8-NEXT:    v_ldexp_f32 v1, v1, v4
-; GFX8-NEXT:    v_cvt_f16_f32_e32 v4, v1
-; GFX8-NEXT:    v_addc_u32_e32 v1, vcc, v2, v3, vcc
-; GFX8-NEXT:    flat_store_short v[0:1], v4
+; GFX8-NEXT:    v_ldexp_f32 v1, v1, v3
+; GFX8-NEXT:    v_cvt_f16_f32_e32 v3, v1
+; GFX8-NEXT:    v_addc_u32_e32 v1, vcc, 0, v2, vcc
+; GFX8-NEXT:    flat_store_short v[0:1], v3
 ; GFX8-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
@@ -142,21 +132,18 @@ define amdgpu_kernel void @s_sint_to_fp_i64_to_f32(float addrspace(1)* %out, i64
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    s_mov_b32 s4, s0
 ; GFX6-NEXT:    s_mov_b32 s5, s1
-; GFX6-NEXT:    s_xor_b32 s0, s2, s3
-; GFX6-NEXT:    s_flbit_i32 s8, s3
-; GFX6-NEXT:    v_cmp_gt_i32_e64 s[0:1], s0, -1
-; GFX6-NEXT:    v_cndmask_b32_e64 v0, 32, 33, s[0:1]
-; GFX6-NEXT:    v_mov_b32_e32 v1, s8
-; GFX6-NEXT:    v_cmp_ne_u32_e64 vcc, s8, -1
-; GFX6-NEXT:    v_cndmask_b32_e32 v2, v0, v1, vcc
-; GFX6-NEXT:    v_add_i32_e32 v0, vcc, -1, v2
-; GFX6-NEXT:    v_lshl_b64 v[0:1], s[2:3], v0
-; GFX6-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
-; GFX6-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
-; GFX6-NEXT:    v_or_b32_e32 v0, v1, v0
-; GFX6-NEXT:    v_cvt_f32_i32_e32 v0, v0
-; GFX6-NEXT:    v_sub_i32_e32 v1, vcc, 33, v2
-; GFX6-NEXT:    v_ldexp_f32_e32 v0, v0, v1
+; GFX6-NEXT:    s_flbit_i32 s0, s3
+; GFX6-NEXT:    s_xor_b32 s1, s2, s3
+; GFX6-NEXT:    s_add_i32 s0, s0, -1
+; GFX6-NEXT:    s_ashr_i32 s1, s1, 31
+; GFX6-NEXT:    s_add_i32 s1, s1, 32
+; GFX6-NEXT:    s_min_u32 s8, s0, s1
+; GFX6-NEXT:    s_lshl_b64 s[0:1], s[2:3], s8
+; GFX6-NEXT:    s_min_u32 s0, s0, 1
+; GFX6-NEXT:    s_or_b32 s0, s1, s0
+; GFX6-NEXT:    v_cvt_f32_i32_e32 v0, s0
+; GFX6-NEXT:    s_sub_i32 s0, 32, s8
+; GFX6-NEXT:    v_ldexp_f32_e64 v0, v0, s0
 ; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
 ; GFX6-NEXT:    s_endpgm
 ;
@@ -164,20 +151,18 @@ define amdgpu_kernel void @s_sint_to_fp_i64_to_f32(float addrspace(1)* %out, i64
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
 ; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8-NEXT:    s_xor_b32 s4, s2, s3
-; GFX8-NEXT:    s_cmp_gt_i32 s4, -1
-; GFX8-NEXT:    s_flbit_i32 s5, s3
-; GFX8-NEXT:    s_cselect_b32 s4, 33, 32
-; GFX8-NEXT:    s_cmp_lg_u32 s5, -1
-; GFX8-NEXT:    s_cselect_b32 s6, s5, s4
-; GFX8-NEXT:    s_add_i32 s4, s6, -1
+; GFX8-NEXT:    s_xor_b32 s5, s2, s3
+; GFX8-NEXT:    s_flbit_i32 s4, s3
+; GFX8-NEXT:    s_ashr_i32 s5, s5, 31
+; GFX8-NEXT:    s_add_i32 s4, s4, -1
+; GFX8-NEXT:    s_add_i32 s5, s5, 32
+; GFX8-NEXT:    s_min_u32 s4, s4, s5
 ; GFX8-NEXT:    s_lshl_b64 s[2:3], s[2:3], s4
-; GFX8-NEXT:    v_cmp_ne_u32_e64 s[4:5], s2, 0
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[4:5]
-; GFX8-NEXT:    v_or_b32_e32 v0, s3, v0
-; GFX8-NEXT:    v_cvt_f32_i32_e32 v2, v0
+; GFX8-NEXT:    s_min_u32 s2, s2, 1
+; GFX8-NEXT:    s_or_b32 s2, s3, s2
+; GFX8-NEXT:    v_cvt_f32_i32_e32 v2, s2
 ; GFX8-NEXT:    v_mov_b32_e32 v0, s0
-; GFX8-NEXT:    s_sub_i32 s0, 33, s6
+; GFX8-NEXT:    s_sub_i32 s0, 32, s4
 ; GFX8-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX8-NEXT:    v_ldexp_f32 v2, v2, s0
 ; GFX8-NEXT:    flat_store_dword v[0:1], v2
@@ -203,17 +188,15 @@ define amdgpu_kernel void @v_sint_to_fp_i64_to_f32(float addrspace(1)* %out, i64
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
 ; GFX6-NEXT:    v_xor_b32_e32 v0, v3, v4
 ; GFX6-NEXT:    v_ffbh_i32_e32 v5, v4
-; GFX6-NEXT:    v_cmp_lt_i32_e32 vcc, -1, v0
-; GFX6-NEXT:    v_cndmask_b32_e64 v0, 32, 33, vcc
-; GFX6-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v5
-; GFX6-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc
-; GFX6-NEXT:    v_add_i32_e32 v5, vcc, -1, v0
-; GFX6-NEXT:    v_lshl_b64 v[3:4], v[3:4], v5
-; GFX6-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v3
-; GFX6-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc
+; GFX6-NEXT:    v_ashrrev_i32_e32 v0, 31, v0
+; GFX6-NEXT:    v_add_i32_e32 v5, vcc, -1, v5
+; GFX6-NEXT:    v_add_i32_e32 v0, vcc, 32, v0
+; GFX6-NEXT:    v_min_u32_e32 v0, v5, v0
+; GFX6-NEXT:    v_lshl_b64 v[3:4], v[3:4], v0
+; GFX6-NEXT:    v_min_u32_e32 v3, 1, v3
 ; GFX6-NEXT:    v_or_b32_e32 v3, v4, v3
 ; GFX6-NEXT:    v_cvt_f32_i32_e32 v3, v3
-; GFX6-NEXT:    v_sub_i32_e32 v0, vcc, 33, v0
+; GFX6-NEXT:    v_sub_i32_e32 v0, vcc, 32, v0
 ; GFX6-NEXT:    v_ldexp_f32_e32 v0, v3, v0
 ; GFX6-NEXT:    buffer_store_dword v0, v[1:2], s[0:3], 0 addr64
 ; GFX6-NEXT:    s_endpgm
@@ -222,31 +205,28 @@ define amdgpu_kernel void @v_sint_to_fp_i64_to_f32(float addrspace(1)* %out, i64
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
 ; GFX8-NEXT:    v_lshlrev_b32_e32 v1, 3, v0
-; GFX8-NEXT:    v_mov_b32_e32 v3, 0
-; GFX8-NEXT:    v_lshlrev_b32_e32 v4, 2, v0
+; GFX8-NEXT:    v_lshlrev_b32_e32 v3, 2, v0
 ; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX8-NEXT:    v_mov_b32_e32 v2, s3
 ; GFX8-NEXT:    v_add_u32_e32 v1, vcc, s2, v1
-; GFX8-NEXT:    v_addc_u32_e32 v2, vcc, v2, v3, vcc
+; GFX8-NEXT:    v_addc_u32_e32 v2, vcc, 0, v2, vcc
 ; GFX8-NEXT:    flat_load_dwordx2 v[1:2], v[1:2]
 ; GFX8-NEXT:    s_waitcnt vmcnt(0)
 ; GFX8-NEXT:    v_xor_b32_e32 v0, v1, v2
-; GFX8-NEXT:    v_cmp_lt_i32_e32 vcc, -1, v0
-; GFX8-NEXT:    v_ffbh_i32_e32 v5, v2
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, 32, 33, vcc
-; GFX8-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v5
-; GFX8-NEXT:    v_cndmask_b32_e32 v5, v0, v5, vcc
-; GFX8-NEXT:    v_add_u32_e32 v0, vcc, -1, v5
-; GFX8-NEXT:    v_lshlrev_b64 v[0:1], v0, v[1:2]
+; GFX8-NEXT:    v_ffbh_i32_e32 v4, v2
+; GFX8-NEXT:    v_ashrrev_i32_e32 v0, 31, v0
+; GFX8-NEXT:    v_add_u32_e32 v4, vcc, -1, v4
+; GFX8-NEXT:    v_add_u32_e32 v0, vcc, 32, v0
+; GFX8-NEXT:    v_min_u32_e32 v4, v4, v0
+; GFX8-NEXT:    v_lshlrev_b64 v[0:1], v4, v[1:2]
 ; GFX8-NEXT:    v_mov_b32_e32 v2, s1
-; GFX8-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
+; GFX8-NEXT:    v_min_u32_e32 v0, 1, v0
 ; GFX8-NEXT:    v_or_b32_e32 v0, v1, v0
-; GFX8-NEXT:    v_cvt_f32_i32_e32 v6, v0
-; GFX8-NEXT:    v_add_u32_e32 v0, vcc, s0, v4
-; GFX8-NEXT:    v_addc_u32_e32 v1, vcc, v2, v3, vcc
-; GFX8-NEXT:    v_sub_u32_e32 v2, vcc, 33, v5
-; GFX8-NEXT:    v_ldexp_f32 v2, v6, v2
+; GFX8-NEXT:    v_cvt_f32_i32_e32 v5, v0
+; GFX8-NEXT:    v_add_u32_e32 v0, vcc, s0, v3
+; GFX8-NEXT:    v_addc_u32_e32 v1, vcc, 0, v2, vcc
+; GFX8-NEXT:    v_sub_u32_e32 v2, vcc, 32, v4
+; GFX8-NEXT:    v_ldexp_f32 v2, v5, v2
 ; GFX8-NEXT:    flat_store_dword v[0:1], v2
 ; GFX8-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
@@ -266,74 +246,64 @@ define amdgpu_kernel void @s_sint_to_fp_v2i64_to_v2f32(<2 x float> addrspace(1)*
 ; GFX6-NEXT:    s_mov_b32 s3, 0xf000
 ; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    s_xor_b32 s8, s6, s7
-; GFX6-NEXT:    s_flbit_i32 s10, s7
+; GFX6-NEXT:    s_flbit_i32 s8, s7
+; GFX6-NEXT:    s_xor_b32 s9, s6, s7
+; GFX6-NEXT:    s_flbit_i32 s10, s5
 ; GFX6-NEXT:    s_xor_b32 s11, s4, s5
-; GFX6-NEXT:    s_flbit_i32 s12, s5
-; GFX6-NEXT:    v_cmp_gt_i32_e64 s[8:9], s8, -1
-; GFX6-NEXT:    v_cndmask_b32_e64 v0, 32, 33, s[8:9]
-; GFX6-NEXT:    v_mov_b32_e32 v1, s10
-; GFX6-NEXT:    v_cmp_gt_i32_e64 s[8:9], s11, -1
-; GFX6-NEXT:    v_cndmask_b32_e64 v2, 32, 33, s[8:9]
-; GFX6-NEXT:    v_mov_b32_e32 v3, s12
-; GFX6-NEXT:    v_cmp_ne_u32_e64 vcc, s10, -1
-; GFX6-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
-; GFX6-NEXT:    v_cmp_ne_u32_e64 vcc, s12, -1
-; GFX6-NEXT:    v_cndmask_b32_e32 v1, v2, v3, vcc
-; GFX6-NEXT:    v_add_i32_e32 v2, vcc, -1, v0
-; GFX6-NEXT:    v_sub_i32_e32 v4, vcc, 33, v0
-; GFX6-NEXT:    v_add_i32_e32 v3, vcc, -1, v1
-; GFX6-NEXT:    v_sub_i32_e32 v5, vcc, 33, v1
-; GFX6-NEXT:    v_lshl_b64 v[0:1], s[6:7], v2
-; GFX6-NEXT:    v_lshl_b64 v[2:3], s[4:5], v3
-; GFX6-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
-; GFX6-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
-; GFX6-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v2
-; GFX6-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; GFX6-NEXT:    v_or_b32_e32 v0, v1, v0
-; GFX6-NEXT:    v_or_b32_e32 v1, v3, v2
-; GFX6-NEXT:    v_cvt_f32_i32_e32 v0, v0
-; GFX6-NEXT:    v_cvt_f32_i32_e32 v2, v1
-; GFX6-NEXT:    v_ldexp_f32_e32 v1, v0, v4
-; GFX6-NEXT:    v_ldexp_f32_e32 v0, v2, v5
+; GFX6-NEXT:    s_add_i32 s8, s8, -1
+; GFX6-NEXT:    s_ashr_i32 s9, s9, 31
+; GFX6-NEXT:    s_add_i32 s10, s10, -1
+; GFX6-NEXT:    s_ashr_i32 s11, s11, 31
+; GFX6-NEXT:    s_add_i32 s9, s9, 32
+; GFX6-NEXT:    s_add_i32 s11, s11, 32
+; GFX6-NEXT:    s_min_u32 s8, s8, s9
+; GFX6-NEXT:    s_min_u32 s9, s10, s11
+; GFX6-NEXT:    s_lshl_b64 s[6:7], s[6:7], s8
+; GFX6-NEXT:    s_sub_i32 s8, 32, s8
+; GFX6-NEXT:    s_lshl_b64 s[4:5], s[4:5], s9
+; GFX6-NEXT:    s_sub_i32 s9, 32, s9
+; GFX6-NEXT:    s_min_u32 s6, s6, 1
+; GFX6-NEXT:    s_min_u32 s4, s4, 1
+; GFX6-NEXT:    s_or_b32 s6, s7, s6
+; GFX6-NEXT:    s_or_b32 s4, s5, s4
+; GFX6-NEXT:    v_cvt_f32_i32_e32 v0, s6
+; GFX6-NEXT:    v_cvt_f32_i32_e32 v2, s4
+; GFX6-NEXT:    v_ldexp_f32_e64 v1, v0, s8
+; GFX6-NEXT:    v_ldexp_f32_e64 v0, v2, s9
 ; GFX6-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX8-LABEL: s_sint_to_fp_v2i64_to_v2f32:
 ; GFX8:       ; %bb.0:
-; GFX8-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x34
-; GFX8-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
+; GFX8-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x24
+; GFX8-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x34
 ; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8-NEXT:    s_xor_b32 s2, s6, s7
-; GFX8-NEXT:    s_cmp_gt_i32 s2, -1
-; GFX8-NEXT:    s_flbit_i32 s3, s7
-; GFX8-NEXT:    s_cselect_b32 s2, 33, 32
-; GFX8-NEXT:    s_cmp_lg_u32 s3, -1
-; GFX8-NEXT:    s_cselect_b32 s2, s3, s2
-; GFX8-NEXT:    s_add_i32 s3, s2, -1
-; GFX8-NEXT:    s_sub_i32 s8, 33, s2
-; GFX8-NEXT:    s_lshl_b64 s[2:3], s[6:7], s3
-; GFX8-NEXT:    v_cmp_ne_u32_e64 s[6:7], s2, 0
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[6:7]
-; GFX8-NEXT:    s_xor_b32 s2, s4, s5
-; GFX8-NEXT:    v_or_b32_e32 v0, s3, v0
-; GFX8-NEXT:    s_cmp_gt_i32 s2, -1
-; GFX8-NEXT:    s_flbit_i32 s3, s5
-; GFX8-NEXT:    s_cselect_b32 s2, 33, 32
-; GFX8-NEXT:    s_cmp_lg_u32 s3, -1
-; GFX8-NEXT:    s_cselect_b32 s6, s3, s2
-; GFX8-NEXT:    s_add_i32 s2, s6, -1
-; GFX8-NEXT:    s_lshl_b64 s[2:3], s[4:5], s2
-; GFX8-NEXT:    v_cmp_ne_u32_e64 s[4:5], s2, 0
-; GFX8-NEXT:    v_cndmask_b32_e64 v1, 0, 1, s[4:5]
-; GFX8-NEXT:    v_or_b32_e32 v1, s3, v1
-; GFX8-NEXT:    v_cvt_f32_i32_e32 v0, v0
-; GFX8-NEXT:    v_cvt_f32_i32_e32 v2, v1
-; GFX8-NEXT:    s_sub_i32 s2, 33, s6
-; GFX8-NEXT:    v_ldexp_f32 v1, v0, s8
-; GFX8-NEXT:    v_ldexp_f32 v0, v2, s2
-; GFX8-NEXT:    v_mov_b32_e32 v3, s1
-; GFX8-NEXT:    v_mov_b32_e32 v2, s0
+; GFX8-NEXT:    s_xor_b32 s7, s2, s3
+; GFX8-NEXT:    s_flbit_i32 s6, s3
+; GFX8-NEXT:    s_ashr_i32 s7, s7, 31
+; GFX8-NEXT:    s_add_i32 s6, s6, -1
+; GFX8-NEXT:    s_add_i32 s7, s7, 32
+; GFX8-NEXT:    s_min_u32 s6, s6, s7
+; GFX8-NEXT:    s_lshl_b64 s[2:3], s[2:3], s6
+; GFX8-NEXT:    s_min_u32 s2, s2, 1
+; GFX8-NEXT:    s_or_b32 s2, s3, s2
+; GFX8-NEXT:    v_cvt_f32_i32_e32 v0, s2
+; GFX8-NEXT:    s_xor_b32 s2, s0, s1
+; GFX8-NEXT:    s_flbit_i32 s8, s1
+; GFX8-NEXT:    s_ashr_i32 s2, s2, 31
+; GFX8-NEXT:    s_add_i32 s8, s8, -1
+; GFX8-NEXT:    s_add_i32 s2, s2, 32
+; GFX8-NEXT:    s_min_u32 s2, s8, s2
+; GFX8-NEXT:    s_lshl_b64 s[0:1], s[0:1], s2
+; GFX8-NEXT:    s_min_u32 s0, s0, 1
+; GFX8-NEXT:    s_or_b32 s0, s1, s0
+; GFX8-NEXT:    v_cvt_f32_i32_e32 v2, s0
+; GFX8-NEXT:    s_sub_i32 s0, 32, s6
+; GFX8-NEXT:    v_ldexp_f32 v1, v0, s0
+; GFX8-NEXT:    s_sub_i32 s0, 32, s2
+; GFX8-NEXT:    v_ldexp_f32 v0, v2, s0
+; GFX8-NEXT:    v_mov_b32_e32 v2, s4
+; GFX8-NEXT:    v_mov_b32_e32 v3, s5
 ; GFX8-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
 ; GFX8-NEXT:    s_endpgm
   %result = sitofp <2 x i64> %in to <2 x float>
@@ -366,54 +336,46 @@ define amdgpu_kernel void @v_sint_to_fp_v4i64_to_v4f32(<4 x float> addrspace(1)*
 ; GFX6-NEXT:    v_ffbh_i32_e32 v15, v8
 ; GFX6-NEXT:    v_xor_b32_e32 v16, v5, v6
 ; GFX6-NEXT:    v_ffbh_i32_e32 v17, v6
-; GFX6-NEXT:    v_cmp_lt_i32_e32 vcc, -1, v0
-; GFX6-NEXT:    v_cndmask_b32_e64 v0, 32, 33, vcc
-; GFX6-NEXT:    v_cmp_lt_i32_e32 vcc, -1, v12
-; GFX6-NEXT:    v_cndmask_b32_e64 v12, 32, 33, vcc
-; GFX6-NEXT:    v_cmp_lt_i32_e32 vcc, -1, v14
-; GFX6-NEXT:    v_cndmask_b32_e64 v14, 32, 33, vcc
-; GFX6-NEXT:    v_cmp_lt_i32_e32 vcc, -1, v16
-; GFX6-NEXT:    v_cndmask_b32_e64 v16, 32, 33, vcc
-; GFX6-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v9
-; GFX6-NEXT:    v_cndmask_b32_e32 v0, v0, v9, vcc
-; GFX6-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v13
-; GFX6-NEXT:    v_cndmask_b32_e32 v9, v12, v13, vcc
-; GFX6-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v15
-; GFX6-NEXT:    v_cndmask_b32_e32 v12, v14, v15, vcc
-; GFX6-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v17
-; GFX6-NEXT:    v_cndmask_b32_e32 v13, v16, v17, vcc
-; GFX6-NEXT:    v_add_i32_e32 v14, vcc, -1, v0
-; GFX6-NEXT:    v_sub_i32_e32 v15, vcc, 33, v0
-; GFX6-NEXT:    v_add_i32_e32 v0, vcc, -1, v9
-; GFX6-NEXT:    v_sub_i32_e32 v9, vcc, 33, v9
-; GFX6-NEXT:    v_add_i32_e32 v16, vcc, -1, v12
-; GFX6-NEXT:    v_sub_i32_e32 v12, vcc, 33, v12
-; GFX6-NEXT:    v_add_i32_e32 v17, vcc, -1, v13
-; GFX6-NEXT:    v_sub_i32_e32 v13, vcc, 33, v13
-; GFX6-NEXT:    v_lshl_b64 v[3:4], v[3:4], v14
-; GFX6-NEXT:    v_lshl_b64 v[0:1], v[1:2], v0
-; GFX6-NEXT:    v_lshl_b64 v[7:8], v[7:8], v16
-; GFX6-NEXT:    v_lshl_b64 v[5:6], v[5:6], v17
-; GFX6-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v3
-; GFX6-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; GFX6-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
-; GFX6-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
-; GFX6-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v7
-; GFX6-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc
-; GFX6-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v5
-; GFX6-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
-; GFX6-NEXT:    v_or_b32_e32 v2, v4, v2
+; GFX6-NEXT:    v_ashrrev_i32_e32 v0, 31, v0
+; GFX6-NEXT:    v_add_i32_e32 v9, vcc, -1, v9
+; GFX6-NEXT:    v_ashrrev_i32_e32 v12, 31, v12
+; GFX6-NEXT:    v_add_i32_e32 v13, vcc, -1, v13
+; GFX6-NEXT:    v_ashrrev_i32_e32 v14, 31, v14
+; GFX6-NEXT:    v_add_i32_e32 v15, vcc, -1, v15
+; GFX6-NEXT:    v_ashrrev_i32_e32 v16, 31, v16
+; GFX6-NEXT:    v_add_i32_e32 v17, vcc, -1, v17
+; GFX6-NEXT:    v_add_i32_e32 v0, vcc, 32, v0
+; GFX6-NEXT:    v_add_i32_e32 v12, vcc, 32, v12
+; GFX6-NEXT:    v_add_i32_e32 v14, vcc, 32, v14
+; GFX6-NEXT:    v_add_i32_e32 v16, vcc, 32, v16
+; GFX6-NEXT:    v_min_u32_e32 v0, v9, v0
+; GFX6-NEXT:    v_min_u32_e32 v9, v13, v12
+; GFX6-NEXT:    v_min_u32_e32 v12, v15, v14
+; GFX6-NEXT:    v_min_u32_e32 v13, v17, v16
+; GFX6-NEXT:    v_lshl_b64 v[3:4], v[3:4], v0
+; GFX6-NEXT:    v_sub_i32_e32 v14, vcc, 32, v0
+; GFX6-NEXT:    v_lshl_b64 v[0:1], v[1:2], v9
+; GFX6-NEXT:    v_sub_i32_e32 v2, vcc, 32, v9
+; GFX6-NEXT:    v_lshl_b64 v[7:8], v[7:8], v12
+; GFX6-NEXT:    v_sub_i32_e32 v9, vcc, 32, v12
+; GFX6-NEXT:    v_lshl_b64 v[5:6], v[5:6], v13
+; GFX6-NEXT:    v_sub_i32_e32 v12, vcc, 32, v13
+; GFX6-NEXT:    v_min_u32_e32 v3, 1, v3
+; GFX6-NEXT:    v_min_u32_e32 v0, 1, v0
+; GFX6-NEXT:    v_min_u32_e32 v7, 1, v7
+; GFX6-NEXT:    v_min_u32_e32 v5, 1, v5
+; GFX6-NEXT:    v_or_b32_e32 v3, v4, v3
 ; GFX6-NEXT:    v_or_b32_e32 v0, v1, v0
-; GFX6-NEXT:    v_or_b32_e32 v1, v8, v3
-; GFX6-NEXT:    v_or_b32_e32 v3, v6, v5
-; GFX6-NEXT:    v_cvt_f32_i32_e32 v2, v2
+; GFX6-NEXT:    v_or_b32_e32 v1, v8, v7
+; GFX6-NEXT:    v_or_b32_e32 v4, v6, v5
+; GFX6-NEXT:    v_cvt_f32_i32_e32 v3, v3
 ; GFX6-NEXT:    v_cvt_f32_i32_e32 v0, v0
 ; GFX6-NEXT:    v_cvt_f32_i32_e32 v1, v1
-; GFX6-NEXT:    v_cvt_f32_i32_e32 v4, v3
-; GFX6-NEXT:    v_ldexp_f32_e32 v3, v2, v15
-; GFX6-NEXT:    v_ldexp_f32_e32 v2, v0, v9
-; GFX6-NEXT:    v_ldexp_f32_e32 v1, v1, v12
-; GFX6-NEXT:    v_ldexp_f32_e32 v0, v4, v13
+; GFX6-NEXT:    v_cvt_f32_i32_e32 v4, v4
+; GFX6-NEXT:    v_ldexp_f32_e32 v3, v3, v14
+; GFX6-NEXT:    v_ldexp_f32_e32 v2, v0, v2
+; GFX6-NEXT:    v_ldexp_f32_e32 v1, v1, v9
+; GFX6-NEXT:    v_ldexp_f32_e32 v0, v4, v12
 ; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], v[10:11], s[0:3], 0 addr64
 ; GFX6-NEXT:    s_endpgm
 ;
@@ -421,77 +383,68 @@ define amdgpu_kernel void @v_sint_to_fp_v4i64_to_v4f32(<4 x float> addrspace(1)*
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
 ; GFX8-NEXT:    v_lshlrev_b32_e32 v1, 5, v0
-; GFX8-NEXT:    v_mov_b32_e32 v10, 0
 ; GFX8-NEXT:    v_lshlrev_b32_e32 v0, 4, v0
 ; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX8-NEXT:    v_mov_b32_e32 v2, s3
 ; GFX8-NEXT:    v_add_u32_e32 v5, vcc, s2, v1
-; GFX8-NEXT:    v_addc_u32_e32 v6, vcc, v2, v10, vcc
+; GFX8-NEXT:    v_addc_u32_e32 v6, vcc, 0, v2, vcc
 ; GFX8-NEXT:    v_add_u32_e32 v1, vcc, 16, v5
 ; GFX8-NEXT:    v_addc_u32_e32 v2, vcc, 0, v6, vcc
 ; GFX8-NEXT:    flat_load_dwordx4 v[1:4], v[1:2]
 ; GFX8-NEXT:    flat_load_dwordx4 v[5:8], v[5:6]
 ; GFX8-NEXT:    v_add_u32_e32 v9, vcc, s0, v0
-; GFX8-NEXT:    v_mov_b32_e32 v11, s1
-; GFX8-NEXT:    v_addc_u32_e32 v10, vcc, v11, v10, vcc
+; GFX8-NEXT:    v_mov_b32_e32 v10, s1
+; GFX8-NEXT:    v_addc_u32_e32 v10, vcc, 0, v10, vcc
 ; GFX8-NEXT:    s_waitcnt vmcnt(1)
 ; GFX8-NEXT:    v_xor_b32_e32 v14, v3, v4
 ; GFX8-NEXT:    s_waitcnt vmcnt(0)
 ; GFX8-NEXT:    v_xor_b32_e32 v0, v7, v8
-; GFX8-NEXT:    v_cmp_lt_i32_e32 vcc, -1, v0
 ; GFX8-NEXT:    v_xor_b32_e32 v12, v5, v6
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, 32, 33, vcc
-; GFX8-NEXT:    v_cmp_lt_i32_e32 vcc, -1, v12
-; GFX8-NEXT:    v_cndmask_b32_e64 v12, 32, 33, vcc
-; GFX8-NEXT:    v_cmp_lt_i32_e32 vcc, -1, v14
 ; GFX8-NEXT:    v_xor_b32_e32 v16, v1, v2
-; GFX8-NEXT:    v_cndmask_b32_e64 v14, 32, 33, vcc
-; GFX8-NEXT:    v_cmp_lt_i32_e32 vcc, -1, v16
 ; GFX8-NEXT:    v_ffbh_i32_e32 v11, v8
-; GFX8-NEXT:    v_cndmask_b32_e64 v16, 32, 33, vcc
-; GFX8-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v11
 ; GFX8-NEXT:    v_ffbh_i32_e32 v13, v6
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v11, vcc
-; GFX8-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v13
 ; GFX8-NEXT:    v_ffbh_i32_e32 v15, v4
-; GFX8-NEXT:    v_cndmask_b32_e32 v11, v12, v13, vcc
-; GFX8-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v15
 ; GFX8-NEXT:    v_ffbh_i32_e32 v17, v2
-; GFX8-NEXT:    v_cndmask_b32_e32 v12, v14, v15, vcc
-; GFX8-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v17
-; GFX8-NEXT:    v_cndmask_b32_e32 v13, v16, v17, vcc
-; GFX8-NEXT:    v_add_u32_e32 v14, vcc, -1, v0
-; GFX8-NEXT:    v_sub_u32_e32 v15, vcc, 33, v0
-; GFX8-NEXT:    v_add_u32_e32 v0, vcc, -1, v11
-; GFX8-NEXT:    v_lshlrev_b64 v[7:8], v14, v[7:8]
-; GFX8-NEXT:    v_add_u32_e32 v16, vcc, -1, v12
-; GFX8-NEXT:    v_add_u32_e32 v17, vcc, -1, v13
-; GFX8-NEXT:    v_lshlrev_b64 v[5:6], v0, v[5:6]
-; GFX8-NEXT:    v_sub_u32_e32 v11, vcc, 33, v11
-; GFX8-NEXT:    v_sub_u32_e32 v12, vcc, 33, v12
-; GFX8-NEXT:    v_sub_u32_e32 v13, vcc, 33, v13
-; GFX8-NEXT:    v_lshlrev_b64 v[3:4], v16, v[3:4]
-; GFX8-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v7
-; GFX8-NEXT:    v_lshlrev_b64 v[0:1], v17, v[1:2]
-; GFX8-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; GFX8-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v5
-; GFX8-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
-; GFX8-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v3
-; GFX8-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc
-; GFX8-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
+; GFX8-NEXT:    v_ashrrev_i32_e32 v0, 31, v0
+; GFX8-NEXT:    v_ashrrev_i32_e32 v12, 31, v12
+; GFX8-NEXT:    v_ashrrev_i32_e32 v14, 31, v14
+; GFX8-NEXT:    v_ashrrev_i32_e32 v16, 31, v16
+; GFX8-NEXT:    v_add_u32_e32 v11, vcc, -1, v11
+; GFX8-NEXT:    v_add_u32_e32 v0, vcc, 32, v0
+; GFX8-NEXT:    v_add_u32_e32 v13, vcc, -1, v13
+; GFX8-NEXT:    v_add_u32_e32 v15, vcc, -1, v15
+; GFX8-NEXT:    v_add_u32_e32 v17, vcc, -1, v17
+; GFX8-NEXT:    v_add_u32_e32 v12, vcc, 32, v12
+; GFX8-NEXT:    v_add_u32_e32 v14, vcc, 32, v14
+; GFX8-NEXT:    v_add_u32_e32 v16, vcc, 32, v16
+; GFX8-NEXT:    v_min_u32_e32 v0, v11, v0
+; GFX8-NEXT:    v_min_u32_e32 v11, v13, v12
+; GFX8-NEXT:    v_min_u32_e32 v12, v15, v14
+; GFX8-NEXT:    v_min_u32_e32 v13, v17, v16
+; GFX8-NEXT:    v_lshlrev_b64 v[5:6], v11, v[5:6]
+; GFX8-NEXT:    v_lshlrev_b64 v[3:4], v12, v[3:4]
+; GFX8-NEXT:    v_lshlrev_b64 v[7:8], v0, v[7:8]
+; GFX8-NEXT:    v_sub_u32_e32 v14, vcc, 32, v0
+; GFX8-NEXT:    v_lshlrev_b64 v[0:1], v13, v[1:2]
+; GFX8-NEXT:    v_min_u32_e32 v7, 1, v7
+; GFX8-NEXT:    v_min_u32_e32 v5, 1, v5
+; GFX8-NEXT:    v_min_u32_e32 v3, 1, v3
+; GFX8-NEXT:    v_min_u32_e32 v0, 1, v0
 ; GFX8-NEXT:    v_or_b32_e32 v3, v4, v3
-; GFX8-NEXT:    v_or_b32_e32 v0, v1, v0
-; GFX8-NEXT:    v_or_b32_e32 v2, v8, v2
 ; GFX8-NEXT:    v_or_b32_e32 v5, v6, v5
-; GFX8-NEXT:    v_cvt_f32_i32_e32 v1, v2
+; GFX8-NEXT:    v_or_b32_e32 v7, v8, v7
+; GFX8-NEXT:    v_or_b32_e32 v0, v1, v0
+; GFX8-NEXT:    v_cvt_f32_i32_e32 v1, v7
+; GFX8-NEXT:    v_cvt_f32_i32_e32 v4, v5
 ; GFX8-NEXT:    v_cvt_f32_i32_e32 v3, v3
-; GFX8-NEXT:    v_cvt_f32_i32_e32 v2, v5
-; GFX8-NEXT:    v_cvt_f32_i32_e32 v4, v0
-; GFX8-NEXT:    v_ldexp_f32 v1, v1, v15
+; GFX8-NEXT:    v_cvt_f32_i32_e32 v5, v0
+; GFX8-NEXT:    v_sub_u32_e32 v2, vcc, 32, v13
+; GFX8-NEXT:    v_sub_u32_e32 v11, vcc, 32, v11
+; GFX8-NEXT:    v_sub_u32_e32 v12, vcc, 32, v12
+; GFX8-NEXT:    v_ldexp_f32 v1, v1, v14
+; GFX8-NEXT:    v_ldexp_f32 v0, v4, v11
 ; GFX8-NEXT:    v_ldexp_f32 v3, v3, v12
-; GFX8-NEXT:    v_ldexp_f32 v0, v2, v11
-; GFX8-NEXT:    v_ldexp_f32 v2, v4, v13
+; GFX8-NEXT:    v_ldexp_f32 v2, v5, v2
 ; GFX8-NEXT:    flat_store_dwordx4 v[9:10], v[0:3]
 ; GFX8-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
@@ -511,36 +464,30 @@ define amdgpu_kernel void @s_sint_to_fp_v2i64_to_v2f16(<2 x half> addrspace(1)*
 ; GFX6-NEXT:    s_mov_b32 s3, 0xf000
 ; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    s_xor_b32 s8, s6, s7
-; GFX6-NEXT:    s_flbit_i32 s10, s7
+; GFX6-NEXT:    s_flbit_i32 s8, s7
+; GFX6-NEXT:    s_xor_b32 s9, s6, s7
+; GFX6-NEXT:    s_flbit_i32 s10, s5
 ; GFX6-NEXT:    s_xor_b32 s11, s4, s5
-; GFX6-NEXT:    s_flbit_i32 s12, s5
-; GFX6-NEXT:    v_cmp_gt_i32_e64 s[8:9], s8, -1
-; GFX6-NEXT:    v_cndmask_b32_e64 v0, 32, 33, s[8:9]
-; GFX6-NEXT:    v_mov_b32_e32 v1, s10
-; GFX6-NEXT:    v_cmp_gt_i32_e64 s[8:9], s11, -1
-; GFX6-NEXT:    v_cndmask_b32_e64 v2, 32, 33, s[8:9]
-; GFX6-NEXT:    v_mov_b32_e32 v3, s12
-; GFX6-NEXT:    v_cmp_ne_u32_e64 vcc, s10, -1
-; GFX6-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
-; GFX6-NEXT:    v_cmp_ne_u32_e64 vcc, s12, -1
-; GFX6-NEXT:    v_cndmask_b32_e32 v1, v2, v3, vcc
-; GFX6-NEXT:    v_add_i32_e32 v2, vcc, -1, v0
-; GFX6-NEXT:    v_sub_i32_e32 v4, vcc, 33, v0
-; GFX6-NEXT:    v_add_i32_e32 v3, vcc, -1, v1
-; GFX6-NEXT:    v_sub_i32_e32 v5, vcc, 33, v1
-; GFX6-NEXT:    v_lshl_b64 v[0:1], s[6:7], v2
-; GFX6-NEXT:    v_lshl_b64 v[2:3], s[4:5], v3
-; GFX6-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
-; GFX6-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
-; GFX6-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v2
-; GFX6-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; GFX6-NEXT:    v_or_b32_e32 v0, v1, v0
-; GFX6-NEXT:    v_or_b32_e32 v1, v3, v2
-; GFX6-NEXT:    v_cvt_f32_i32_e32 v0, v0
-; GFX6-NEXT:    v_cvt_f32_i32_e32 v1, v1
-; GFX6-NEXT:    v_ldexp_f32_e32 v0, v0, v4
-; GFX6-NEXT:    v_ldexp_f32_e32 v1, v1, v5
+; GFX6-NEXT:    s_add_i32 s8, s8, -1
+; GFX6-NEXT:    s_ashr_i32 s9, s9, 31
+; GFX6-NEXT:    s_add_i32 s10, s10, -1
+; GFX6-NEXT:    s_ashr_i32 s11, s11, 31
+; GFX6-NEXT:    s_add_i32 s9, s9, 32
+; GFX6-NEXT:    s_add_i32 s11, s11, 32
+; GFX6-NEXT:    s_min_u32 s8, s8, s9
+; GFX6-NEXT:    s_min_u32 s9, s10, s11
+; GFX6-NEXT:    s_lshl_b64 s[6:7], s[6:7], s8
+; GFX6-NEXT:    s_sub_i32 s8, 32, s8
+; GFX6-NEXT:    s_lshl_b64 s[4:5], s[4:5], s9
+; GFX6-NEXT:    s_sub_i32 s9, 32, s9
+; GFX6-NEXT:    s_min_u32 s6, s6, 1
+; GFX6-NEXT:    s_min_u32 s4, s4, 1
+; GFX6-NEXT:    s_or_b32 s6, s7, s6
+; GFX6-NEXT:    s_or_b32 s4, s5, s4
+; GFX6-NEXT:    v_cvt_f32_i32_e32 v0, s6
+; GFX6-NEXT:    v_cvt_f32_i32_e32 v1, s4
+; GFX6-NEXT:    v_ldexp_f32_e64 v0, v0, s8
+; GFX6-NEXT:    v_ldexp_f32_e64 v1, v1, s9
 ; GFX6-NEXT:    v_cvt_f16_f32_e32 v0, v0
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
 ; GFX6-NEXT:    v_cvt_f16_f32_e32 v1, v1
@@ -553,33 +500,29 @@ define amdgpu_kernel void @s_sint_to_fp_v2i64_to_v2f16(<2 x half> addrspace(1)*
 ; GFX8-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x34
 ; GFX8-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
 ; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8-NEXT:    s_xor_b32 s2, s6, s7
-; GFX8-NEXT:    s_cmp_gt_i32 s2, -1
-; GFX8-NEXT:    s_flbit_i32 s3, s7
-; GFX8-NEXT:    s_cselect_b32 s2, 33, 32
-; GFX8-NEXT:    s_cmp_lg_u32 s3, -1
-; GFX8-NEXT:    s_cselect_b32 s2, s3, s2
-; GFX8-NEXT:    s_add_i32 s3, s2, -1
-; GFX8-NEXT:    s_sub_i32 s8, 33, s2
-; GFX8-NEXT:    s_lshl_b64 s[2:3], s[6:7], s3
-; GFX8-NEXT:    v_cmp_ne_u32_e64 s[6:7], s2, 0
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[6:7]
+; GFX8-NEXT:    s_xor_b32 s3, s6, s7
+; GFX8-NEXT:    s_flbit_i32 s2, s7
+; GFX8-NEXT:    s_ashr_i32 s3, s3, 31
+; GFX8-NEXT:    s_add_i32 s2, s2, -1
+; GFX8-NEXT:    s_add_i32 s3, s3, 32
+; GFX8-NEXT:    s_min_u32 s9, s2, s3
+; GFX8-NEXT:    s_lshl_b64 s[2:3], s[6:7], s9
+; GFX8-NEXT:    s_min_u32 s2, s2, 1
+; GFX8-NEXT:    s_or_b32 s2, s3, s2
+; GFX8-NEXT:    v_cvt_f32_i32_e32 v0, s2
 ; GFX8-NEXT:    s_xor_b32 s2, s4, s5
-; GFX8-NEXT:    v_or_b32_e32 v0, s3, v0
-; GFX8-NEXT:    s_cmp_gt_i32 s2, -1
-; GFX8-NEXT:    s_flbit_i32 s3, s5
-; GFX8-NEXT:    s_cselect_b32 s2, 33, 32
-; GFX8-NEXT:    s_cmp_lg_u32 s3, -1
-; GFX8-NEXT:    s_cselect_b32 s6, s3, s2
-; GFX8-NEXT:    s_add_i32 s2, s6, -1
-; GFX8-NEXT:    s_lshl_b64 s[2:3], s[4:5], s2
-; GFX8-NEXT:    v_cmp_ne_u32_e64 s[4:5], s2, 0
-; GFX8-NEXT:    v_cndmask_b32_e64 v1, 0, 1, s[4:5]
-; GFX8-NEXT:    v_or_b32_e32 v1, s3, v1
-; GFX8-NEXT:    v_cvt_f32_i32_e32 v0, v0
-; GFX8-NEXT:    v_cvt_f32_i32_e32 v1, v1
-; GFX8-NEXT:    s_sub_i32 s2, 33, s6
-; GFX8-NEXT:    v_ldexp_f32 v0, v0, s8
+; GFX8-NEXT:    s_flbit_i32 s8, s5
+; GFX8-NEXT:    s_ashr_i32 s2, s2, 31
+; GFX8-NEXT:    s_add_i32 s8, s8, -1
+; GFX8-NEXT:    s_add_i32 s2, s2, 32
+; GFX8-NEXT:    s_min_u32 s7, s8, s2
+; GFX8-NEXT:    s_lshl_b64 s[2:3], s[4:5], s7
+; GFX8-NEXT:    s_min_u32 s2, s2, 1
+; GFX8-NEXT:    s_or_b32 s2, s3, s2
+; GFX8-NEXT:    v_cvt_f32_i32_e32 v1, s2
+; GFX8-NEXT:    s_sub_i32 s6, 32, s9
+; GFX8-NEXT:    s_sub_i32 s2, 32, s7
+; GFX8-NEXT:    v_ldexp_f32 v0, v0, s6
 ; GFX8-NEXT:    v_ldexp_f32 v1, v1, s2
 ; GFX8-NEXT:    v_cvt_f16_f32_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
 ; GFX8-NEXT:    v_cvt_f16_f32_e32 v1, v1
@@ -618,62 +561,54 @@ define amdgpu_kernel void @v_sint_to_fp_v4i64_to_v4f16(<4 x half> addrspace(1)*
 ; GFX6-NEXT:    v_ffbh_i32_e32 v15, v8
 ; GFX6-NEXT:    v_xor_b32_e32 v16, v5, v6
 ; GFX6-NEXT:    v_ffbh_i32_e32 v17, v6
-; GFX6-NEXT:    v_cmp_lt_i32_e32 vcc, -1, v0
-; GFX6-NEXT:    v_cndmask_b32_e64 v0, 32, 33, vcc
-; GFX6-NEXT:    v_cmp_lt_i32_e32 vcc, -1, v12
-; GFX6-NEXT:    v_cndmask_b32_e64 v12, 32, 33, vcc
-; GFX6-NEXT:    v_cmp_lt_i32_e32 vcc, -1, v14
-; GFX6-NEXT:    v_cndmask_b32_e64 v14, 32, 33, vcc
-; GFX6-NEXT:    v_cmp_lt_i32_e32 vcc, -1, v16
-; GFX6-NEXT:    v_cndmask_b32_e64 v16, 32, 33, vcc
-; GFX6-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v9
-; GFX6-NEXT:    v_cndmask_b32_e32 v0, v0, v9, vcc
-; GFX6-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v13
-; GFX6-NEXT:    v_cndmask_b32_e32 v9, v12, v13, vcc
-; GFX6-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v15
-; GFX6-NEXT:    v_cndmask_b32_e32 v12, v14, v15, vcc
-; GFX6-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v17
-; GFX6-NEXT:    v_cndmask_b32_e32 v13, v16, v17, vcc
-; GFX6-NEXT:    v_add_i32_e32 v14, vcc, -1, v0
-; GFX6-NEXT:    v_sub_i32_e32 v15, vcc, 33, v0
-; GFX6-NEXT:    v_add_i32_e32 v0, vcc, -1, v9
-; GFX6-NEXT:    v_sub_i32_e32 v9, vcc, 33, v9
-; GFX6-NEXT:    v_add_i32_e32 v16, vcc, -1, v12
-; GFX6-NEXT:    v_sub_i32_e32 v12, vcc, 33, v12
-; GFX6-NEXT:    v_add_i32_e32 v17, vcc, -1, v13
-; GFX6-NEXT:    v_sub_i32_e32 v13, vcc, 33, v13
-; GFX6-NEXT:    v_lshl_b64 v[3:4], v[3:4], v14
-; GFX6-NEXT:    v_lshl_b64 v[0:1], v[1:2], v0
-; GFX6-NEXT:    v_lshl_b64 v[7:8], v[7:8], v16
-; GFX6-NEXT:    v_lshl_b64 v[5:6], v[5:6], v17
-; GFX6-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v3
-; GFX6-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; GFX6-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
-; GFX6-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
-; GFX6-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v7
-; GFX6-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc
-; GFX6-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v5
-; GFX6-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
-; GFX6-NEXT:    v_or_b32_e32 v2, v4, v2
+; GFX6-NEXT:    v_ashrrev_i32_e32 v0, 31, v0
+; GFX6-NEXT:    v_add_i32_e32 v9, vcc, -1, v9
+; GFX6-NEXT:    v_ashrrev_i32_e32 v12, 31, v12
+; GFX6-NEXT:    v_add_i32_e32 v13, vcc, -1, v13
+; GFX6-NEXT:    v_ashrrev_i32_e32 v14, 31, v14
+; GFX6-NEXT:    v_add_i32_e32 v15, vcc, -1, v15
+; GFX6-NEXT:    v_ashrrev_i32_e32 v16, 31, v16
+; GFX6-NEXT:    v_add_i32_e32 v17, vcc, -1, v17
+; GFX6-NEXT:    v_add_i32_e32 v0, vcc, 32, v0
+; GFX6-NEXT:    v_add_i32_e32 v12, vcc, 32, v12
+; GFX6-NEXT:    v_add_i32_e32 v14, vcc, 32, v14
+; GFX6-NEXT:    v_add_i32_e32 v16, vcc, 32, v16
+; GFX6-NEXT:    v_min_u32_e32 v0, v9, v0
+; GFX6-NEXT:    v_min_u32_e32 v9, v13, v12
+; GFX6-NEXT:    v_min_u32_e32 v12, v15, v14
+; GFX6-NEXT:    v_min_u32_e32 v13, v17, v16
+; GFX6-NEXT:    v_lshl_b64 v[3:4], v[3:4], v0
+; GFX6-NEXT:    v_sub_i32_e32 v14, vcc, 32, v0
+; GFX6-NEXT:    v_lshl_b64 v[0:1], v[1:2], v9
+; GFX6-NEXT:    v_sub_i32_e32 v2, vcc, 32, v9
+; GFX6-NEXT:    v_lshl_b64 v[7:8], v[7:8], v12
+; GFX6-NEXT:    v_sub_i32_e32 v9, vcc, 32, v12
+; GFX6-NEXT:    v_lshl_b64 v[5:6], v[5:6], v13
+; GFX6-NEXT:    v_sub_i32_e32 v12, vcc, 32, v13
+; GFX6-NEXT:    v_min_u32_e32 v3, 1, v3
+; GFX6-NEXT:    v_min_u32_e32 v0, 1, v0
+; GFX6-NEXT:    v_min_u32_e32 v7, 1, v7
+; GFX6-NEXT:    v_min_u32_e32 v5, 1, v5
+; GFX6-NEXT:    v_or_b32_e32 v3, v4, v3
 ; GFX6-NEXT:    v_or_b32_e32 v0, v1, v0
-; GFX6-NEXT:    v_or_b32_e32 v1, v8, v3
-; GFX6-NEXT:    v_or_b32_e32 v3, v6, v5
-; GFX6-NEXT:    v_cvt_f32_i32_e32 v2, v2
+; GFX6-NEXT:    v_or_b32_e32 v1, v8, v7
+; GFX6-NEXT:    v_or_b32_e32 v4, v6, v5
+; GFX6-NEXT:    v_cvt_f32_i32_e32 v3, v3
 ; GFX6-NEXT:    v_cvt_f32_i32_e32 v0, v0
 ; GFX6-NEXT:    v_cvt_f32_i32_e32 v1, v1
-; GFX6-NEXT:    v_cvt_f32_i32_e32 v3, v3
-; GFX6-NEXT:    v_ldexp_f32_e32 v2, v2, v15
-; GFX6-NEXT:    v_ldexp_f32_e32 v0, v0, v9
-; GFX6-NEXT:    v_ldexp_f32_e32 v1, v1, v12
-; GFX6-NEXT:    v_ldexp_f32_e32 v3, v3, v13
-; GFX6-NEXT:    v_cvt_f16_f32_e32 v2, v2
+; GFX6-NEXT:    v_cvt_f32_i32_e32 v4, v4
+; GFX6-NEXT:    v_ldexp_f32_e32 v3, v3, v14
+; GFX6-NEXT:    v_ldexp_f32_e32 v0, v0, v2
+; GFX6-NEXT:    v_ldexp_f32_e32 v1, v1, v9
+; GFX6-NEXT:    v_ldexp_f32_e32 v2, v4, v12
+; GFX6-NEXT:    v_cvt_f16_f32_e32 v3, v3
 ; GFX6-NEXT:    v_cvt_f16_f32_e32 v0, v0
 ; GFX6-NEXT:    v_cvt_f16_f32_e32 v1, v1
-; GFX6-NEXT:    v_cvt_f16_f32_e32 v3, v3
-; GFX6-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
+; GFX6-NEXT:    v_cvt_f16_f32_e32 v2, v2
+; GFX6-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v4, 16, v1
-; GFX6-NEXT:    v_or_b32_e32 v1, v0, v2
-; GFX6-NEXT:    v_or_b32_e32 v0, v3, v4
+; GFX6-NEXT:    v_or_b32_e32 v1, v0, v3
+; GFX6-NEXT:    v_or_b32_e32 v0, v2, v4
 ; GFX6-NEXT:    buffer_store_dwordx2 v[0:1], v[10:11], s[0:3], 0 addr64
 ; GFX6-NEXT:    s_endpgm
 ;
@@ -681,83 +616,74 @@ define amdgpu_kernel void @v_sint_to_fp_v4i64_to_v4f16(<4 x half> addrspace(1)*
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
 ; GFX8-NEXT:    v_lshlrev_b32_e32 v1, 5, v0
-; GFX8-NEXT:    v_mov_b32_e32 v9, 0
-; GFX8-NEXT:    v_lshlrev_b32_e32 v10, 3, v0
+; GFX8-NEXT:    v_lshlrev_b32_e32 v9, 3, v0
 ; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX8-NEXT:    v_mov_b32_e32 v2, s3
 ; GFX8-NEXT:    v_add_u32_e32 v5, vcc, s2, v1
-; GFX8-NEXT:    v_addc_u32_e32 v6, vcc, v2, v9, vcc
+; GFX8-NEXT:    v_addc_u32_e32 v6, vcc, 0, v2, vcc
 ; GFX8-NEXT:    v_add_u32_e32 v1, vcc, 16, v5
 ; GFX8-NEXT:    v_addc_u32_e32 v2, vcc, 0, v6, vcc
 ; GFX8-NEXT:    flat_load_dwordx4 v[1:4], v[1:2]
 ; GFX8-NEXT:    flat_load_dwordx4 v[5:8], v[5:6]
-; GFX8-NEXT:    v_mov_b32_e32 v11, s1
+; GFX8-NEXT:    v_mov_b32_e32 v10, s1
 ; GFX8-NEXT:    s_waitcnt vmcnt(1)
-; GFX8-NEXT:    v_xor_b32_e32 v15, v3, v4
+; GFX8-NEXT:    v_xor_b32_e32 v14, v3, v4
 ; GFX8-NEXT:    s_waitcnt vmcnt(0)
 ; GFX8-NEXT:    v_xor_b32_e32 v0, v7, v8
-; GFX8-NEXT:    v_cmp_lt_i32_e32 vcc, -1, v0
-; GFX8-NEXT:    v_xor_b32_e32 v13, v5, v6
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, 32, 33, vcc
-; GFX8-NEXT:    v_cmp_lt_i32_e32 vcc, -1, v13
-; GFX8-NEXT:    v_cndmask_b32_e64 v13, 32, 33, vcc
-; GFX8-NEXT:    v_cmp_lt_i32_e32 vcc, -1, v15
-; GFX8-NEXT:    v_xor_b32_e32 v17, v1, v2
-; GFX8-NEXT:    v_cndmask_b32_e64 v15, 32, 33, vcc
-; GFX8-NEXT:    v_cmp_lt_i32_e32 vcc, -1, v17
-; GFX8-NEXT:    v_ffbh_i32_e32 v12, v8
-; GFX8-NEXT:    v_cndmask_b32_e64 v17, 32, 33, vcc
-; GFX8-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v12
-; GFX8-NEXT:    v_ffbh_i32_e32 v14, v6
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v12, vcc
-; GFX8-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v14
-; GFX8-NEXT:    v_ffbh_i32_e32 v16, v4
-; GFX8-NEXT:    v_cndmask_b32_e32 v12, v13, v14, vcc
-; GFX8-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v16
-; GFX8-NEXT:    v_ffbh_i32_e32 v18, v2
-; GFX8-NEXT:    v_cndmask_b32_e32 v13, v15, v16, vcc
-; GFX8-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v18
-; GFX8-NEXT:    v_cndmask_b32_e32 v14, v17, v18, vcc
-; GFX8-NEXT:    v_add_u32_e32 v15, vcc, -1, v0
-; GFX8-NEXT:    v_sub_u32_e32 v16, vcc, 33, v0
-; GFX8-NEXT:    v_add_u32_e32 v0, vcc, -1, v12
-; GFX8-NEXT:    v_lshlrev_b64 v[7:8], v15, v[7:8]
-; GFX8-NEXT:    v_add_u32_e32 v17, vcc, -1, v13
-; GFX8-NEXT:    v_add_u32_e32 v18, vcc, -1, v14
-; GFX8-NEXT:    v_lshlrev_b64 v[5:6], v0, v[5:6]
-; GFX8-NEXT:    v_sub_u32_e32 v12, vcc, 33, v12
-; GFX8-NEXT:    v_sub_u32_e32 v13, vcc, 33, v13
-; GFX8-NEXT:    v_sub_u32_e32 v14, vcc, 33, v14
-; GFX8-NEXT:    v_lshlrev_b64 v[3:4], v17, v[3:4]
-; GFX8-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v7
-; GFX8-NEXT:    v_lshlrev_b64 v[0:1], v18, v[1:2]
-; GFX8-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; GFX8-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v5
-; GFX8-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
-; GFX8-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v3
-; GFX8-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc
-; GFX8-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
+; GFX8-NEXT:    v_xor_b32_e32 v12, v5, v6
+; GFX8-NEXT:    v_xor_b32_e32 v16, v1, v2
+; GFX8-NEXT:    v_ffbh_i32_e32 v11, v8
+; GFX8-NEXT:    v_ffbh_i32_e32 v13, v6
+; GFX8-NEXT:    v_ffbh_i32_e32 v15, v4
+; GFX8-NEXT:    v_ffbh_i32_e32 v17, v2
+; GFX8-NEXT:    v_ashrrev_i32_e32 v0, 31, v0
+; GFX8-NEXT:    v_ashrrev_i32_e32 v12, 31, v12
+; GFX8-NEXT:    v_ashrrev_i32_e32 v14, 31, v14
+; GFX8-NEXT:    v_ashrrev_i32_e32 v16, 31, v16
+; GFX8-NEXT:    v_add_u32_e32 v11, vcc, -1, v11
+; GFX8-NEXT:    v_add_u32_e32 v0, vcc, 32, v0
+; GFX8-NEXT:    v_add_u32_e32 v13, vcc, -1, v13
+; GFX8-NEXT:    v_add_u32_e32 v15, vcc, -1, v15
+; GFX8-NEXT:    v_add_u32_e32 v17, vcc, -1, v17
+; GFX8-NEXT:    v_add_u32_e32 v12, vcc, 32, v12
+; GFX8-NEXT:    v_add_u32_e32 v14, vcc, 32, v14
+; GFX8-NEXT:    v_add_u32_e32 v16, vcc, 32, v16
+; GFX8-NEXT:    v_min_u32_e32 v0, v11, v0
+; GFX8-NEXT:    v_min_u32_e32 v11, v13, v12
+; GFX8-NEXT:    v_min_u32_e32 v12, v15, v14
+; GFX8-NEXT:    v_min_u32_e32 v13, v17, v16
+; GFX8-NEXT:    v_lshlrev_b64 v[5:6], v11, v[5:6]
+; GFX8-NEXT:    v_lshlrev_b64 v[3:4], v12, v[3:4]
+; GFX8-NEXT:    v_lshlrev_b64 v[7:8], v0, v[7:8]
+; GFX8-NEXT:    v_sub_u32_e32 v14, vcc, 32, v0
+; GFX8-NEXT:    v_lshlrev_b64 v[0:1], v13, v[1:2]
+; GFX8-NEXT:    v_min_u32_e32 v7, 1, v7
+; GFX8-NEXT:    v_min_u32_e32 v5, 1, v5
+; GFX8-NEXT:    v_min_u32_e32 v3, 1, v3
+; GFX8-NEXT:    v_min_u32_e32 v0, 1, v0
 ; GFX8-NEXT:    v_or_b32_e32 v3, v4, v3
 ; GFX8-NEXT:    v_or_b32_e32 v0, v1, v0
-; GFX8-NEXT:    v_or_b32_e32 v2, v8, v2
+; GFX8-NEXT:    v_or_b32_e32 v7, v8, v7
 ; GFX8-NEXT:    v_or_b32_e32 v5, v6, v5
-; GFX8-NEXT:    v_cvt_f32_i32_e32 v1, v2
-; GFX8-NEXT:    v_cvt_f32_i32_e32 v2, v5
+; GFX8-NEXT:    v_cvt_f32_i32_e32 v1, v7
+; GFX8-NEXT:    v_cvt_f32_i32_e32 v4, v5
 ; GFX8-NEXT:    v_cvt_f32_i32_e32 v3, v3
 ; GFX8-NEXT:    v_cvt_f32_i32_e32 v0, v0
-; GFX8-NEXT:    v_ldexp_f32 v1, v1, v16
-; GFX8-NEXT:    v_ldexp_f32 v2, v2, v12
-; GFX8-NEXT:    v_ldexp_f32 v3, v3, v13
-; GFX8-NEXT:    v_ldexp_f32 v0, v0, v14
+; GFX8-NEXT:    v_sub_u32_e32 v11, vcc, 32, v11
+; GFX8-NEXT:    v_sub_u32_e32 v12, vcc, 32, v12
+; GFX8-NEXT:    v_sub_u32_e32 v2, vcc, 32, v13
+; GFX8-NEXT:    v_ldexp_f32 v4, v4, v11
+; GFX8-NEXT:    v_ldexp_f32 v3, v3, v12
+; GFX8-NEXT:    v_ldexp_f32 v0, v0, v2
+; GFX8-NEXT:    v_ldexp_f32 v1, v1, v14
 ; GFX8-NEXT:    v_cvt_f16_f32_e32 v5, v0
 ; GFX8-NEXT:    v_cvt_f16_f32_sdwa v3, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
-; GFX8-NEXT:    v_cvt_f16_f32_sdwa v4, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
-; GFX8-NEXT:    v_cvt_f16_f32_e32 v2, v2
-; GFX8-NEXT:    v_add_u32_e32 v0, vcc, s0, v10
-; GFX8-NEXT:    v_addc_u32_e32 v1, vcc, v11, v9, vcc
+; GFX8-NEXT:    v_cvt_f16_f32_sdwa v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
+; GFX8-NEXT:    v_cvt_f16_f32_e32 v4, v4
+; GFX8-NEXT:    v_add_u32_e32 v0, vcc, s0, v9
+; GFX8-NEXT:    v_addc_u32_e32 v1, vcc, 0, v10, vcc
 ; GFX8-NEXT:    v_or_b32_e32 v3, v5, v3
-; GFX8-NEXT:    v_or_b32_e32 v2, v2, v4
+; GFX8-NEXT:    v_or_b32_e32 v2, v4, v2
 ; GFX8-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
 ; GFX8-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()

diff  --git a/llvm/test/CodeGen/AMDGPU/uint_to_fp.i64.ll b/llvm/test/CodeGen/AMDGPU/uint_to_fp.i64.ll
index 59ef31d06d49..9017a5e0db46 100644
--- a/llvm/test/CodeGen/AMDGPU/uint_to_fp.i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/uint_to_fp.i64.ll
@@ -16,10 +16,9 @@ define amdgpu_kernel void @s_uint_to_fp_i64_to_f16(half addrspace(1)* %out, i64
 ; GFX6-NEXT:    s_flbit_i32_b32 s0, s3
 ; GFX6-NEXT:    s_min_u32 s8, s0, 32
 ; GFX6-NEXT:    s_lshl_b64 s[0:1], s[2:3], s8
-; GFX6-NEXT:    v_cmp_ne_u32_e64 s[2:3], s0, 0
-; GFX6-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[2:3]
-; GFX6-NEXT:    v_or_b32_e32 v0, s1, v0
-; GFX6-NEXT:    v_cvt_f32_u32_e32 v0, v0
+; GFX6-NEXT:    s_min_u32 s0, s0, 1
+; GFX6-NEXT:    s_or_b32 s0, s1, s0
+; GFX6-NEXT:    v_cvt_f32_u32_e32 v0, s0
 ; GFX6-NEXT:    s_sub_i32 s0, 32, s8
 ; GFX6-NEXT:    v_ldexp_f32_e64 v0, v0, s0
 ; GFX6-NEXT:    v_cvt_f16_f32_e32 v0, v0
@@ -31,13 +30,12 @@ define amdgpu_kernel void @s_uint_to_fp_i64_to_f16(half addrspace(1)* %out, i64
 ; GFX8-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
 ; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX8-NEXT:    s_flbit_i32_b32 s4, s3
-; GFX8-NEXT:    s_min_u32 s6, s4, 32
-; GFX8-NEXT:    s_lshl_b64 s[2:3], s[2:3], s6
-; GFX8-NEXT:    v_cmp_ne_u32_e64 s[4:5], s2, 0
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[4:5]
-; GFX8-NEXT:    v_or_b32_e32 v0, s3, v0
-; GFX8-NEXT:    v_cvt_f32_u32_e32 v0, v0
-; GFX8-NEXT:    s_sub_i32 s2, 32, s6
+; GFX8-NEXT:    s_min_u32 s4, s4, 32
+; GFX8-NEXT:    s_lshl_b64 s[2:3], s[2:3], s4
+; GFX8-NEXT:    s_min_u32 s2, s2, 1
+; GFX8-NEXT:    s_or_b32 s2, s3, s2
+; GFX8-NEXT:    v_cvt_f32_u32_e32 v0, s2
+; GFX8-NEXT:    s_sub_i32 s2, 32, s4
 ; GFX8-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX8-NEXT:    v_ldexp_f32 v0, v0, s2
 ; GFX8-NEXT:    v_cvt_f16_f32_e32 v2, v0
@@ -66,8 +64,7 @@ define amdgpu_kernel void @v_uint_to_fp_i64_to_f16(half addrspace(1)* %out, i64
 ; GFX6-NEXT:    v_ffbh_u32_e32 v0, v4
 ; GFX6-NEXT:    v_min_u32_e32 v0, 32, v0
 ; GFX6-NEXT:    v_lshl_b64 v[3:4], v[3:4], v0
-; GFX6-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v3
-; GFX6-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc
+; GFX6-NEXT:    v_min_u32_e32 v3, 1, v3
 ; GFX6-NEXT:    v_or_b32_e32 v3, v4, v3
 ; GFX6-NEXT:    v_cvt_f32_u32_e32 v3, v3
 ; GFX6-NEXT:    v_sub_i32_e32 v0, vcc, 32, v0
@@ -80,28 +77,26 @@ define amdgpu_kernel void @v_uint_to_fp_i64_to_f16(half addrspace(1)* %out, i64
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
 ; GFX8-NEXT:    v_lshlrev_b32_e32 v1, 3, v0
-; GFX8-NEXT:    v_mov_b32_e32 v3, 0
 ; GFX8-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
 ; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX8-NEXT:    v_mov_b32_e32 v2, s3
 ; GFX8-NEXT:    v_add_u32_e32 v1, vcc, s2, v1
-; GFX8-NEXT:    v_addc_u32_e32 v2, vcc, v2, v3, vcc
+; GFX8-NEXT:    v_addc_u32_e32 v2, vcc, 0, v2, vcc
 ; GFX8-NEXT:    flat_load_dwordx2 v[1:2], v[1:2]
 ; GFX8-NEXT:    s_waitcnt vmcnt(0)
-; GFX8-NEXT:    v_ffbh_u32_e32 v4, v2
-; GFX8-NEXT:    v_min_u32_e32 v4, 32, v4
-; GFX8-NEXT:    v_lshlrev_b64 v[1:2], v4, v[1:2]
-; GFX8-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v1
-; GFX8-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
+; GFX8-NEXT:    v_ffbh_u32_e32 v3, v2
+; GFX8-NEXT:    v_min_u32_e32 v3, 32, v3
+; GFX8-NEXT:    v_lshlrev_b64 v[1:2], v3, v[1:2]
+; GFX8-NEXT:    v_sub_u32_e32 v3, vcc, 32, v3
+; GFX8-NEXT:    v_min_u32_e32 v1, 1, v1
 ; GFX8-NEXT:    v_or_b32_e32 v1, v2, v1
 ; GFX8-NEXT:    v_cvt_f32_u32_e32 v1, v1
-; GFX8-NEXT:    v_sub_u32_e32 v4, vcc, 32, v4
 ; GFX8-NEXT:    v_mov_b32_e32 v2, s1
 ; GFX8-NEXT:    v_add_u32_e32 v0, vcc, s0, v0
-; GFX8-NEXT:    v_ldexp_f32 v1, v1, v4
-; GFX8-NEXT:    v_cvt_f16_f32_e32 v4, v1
-; GFX8-NEXT:    v_addc_u32_e32 v1, vcc, v2, v3, vcc
-; GFX8-NEXT:    flat_store_short v[0:1], v4
+; GFX8-NEXT:    v_ldexp_f32 v1, v1, v3
+; GFX8-NEXT:    v_cvt_f16_f32_e32 v3, v1
+; GFX8-NEXT:    v_addc_u32_e32 v1, vcc, 0, v2, vcc
+; GFX8-NEXT:    flat_store_short v[0:1], v3
 ; GFX8-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
@@ -124,10 +119,9 @@ define amdgpu_kernel void @s_uint_to_fp_i64_to_f32(float addrspace(1)* %out, i64
 ; GFX6-NEXT:    s_flbit_i32_b32 s0, s3
 ; GFX6-NEXT:    s_min_u32 s8, s0, 32
 ; GFX6-NEXT:    s_lshl_b64 s[0:1], s[2:3], s8
-; GFX6-NEXT:    v_cmp_ne_u32_e64 s[2:3], s0, 0
-; GFX6-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[2:3]
-; GFX6-NEXT:    v_or_b32_e32 v0, s1, v0
-; GFX6-NEXT:    v_cvt_f32_u32_e32 v0, v0
+; GFX6-NEXT:    s_min_u32 s0, s0, 1
+; GFX6-NEXT:    s_or_b32 s0, s1, s0
+; GFX6-NEXT:    v_cvt_f32_u32_e32 v0, s0
 ; GFX6-NEXT:    s_sub_i32 s0, 32, s8
 ; GFX6-NEXT:    v_ldexp_f32_e64 v0, v0, s0
 ; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
@@ -138,14 +132,13 @@ define amdgpu_kernel void @s_uint_to_fp_i64_to_f32(float addrspace(1)* %out, i64
 ; GFX8-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
 ; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX8-NEXT:    s_flbit_i32_b32 s4, s3
-; GFX8-NEXT:    s_min_u32 s6, s4, 32
-; GFX8-NEXT:    s_lshl_b64 s[2:3], s[2:3], s6
-; GFX8-NEXT:    v_cmp_ne_u32_e64 s[4:5], s2, 0
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[4:5]
-; GFX8-NEXT:    v_or_b32_e32 v0, s3, v0
-; GFX8-NEXT:    v_cvt_f32_u32_e32 v2, v0
+; GFX8-NEXT:    s_min_u32 s4, s4, 32
+; GFX8-NEXT:    s_lshl_b64 s[2:3], s[2:3], s4
+; GFX8-NEXT:    s_min_u32 s2, s2, 1
+; GFX8-NEXT:    s_or_b32 s2, s3, s2
+; GFX8-NEXT:    v_cvt_f32_u32_e32 v2, s2
 ; GFX8-NEXT:    v_mov_b32_e32 v0, s0
-; GFX8-NEXT:    s_sub_i32 s0, 32, s6
+; GFX8-NEXT:    s_sub_i32 s0, 32, s4
 ; GFX8-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX8-NEXT:    v_ldexp_f32 v2, v2, s0
 ; GFX8-NEXT:    flat_store_dword v[0:1], v2
@@ -172,8 +165,7 @@ define amdgpu_kernel void @v_uint_to_fp_i64_to_f32(float addrspace(1)* %out, i64
 ; GFX6-NEXT:    v_ffbh_u32_e32 v0, v4
 ; GFX6-NEXT:    v_min_u32_e32 v0, 32, v0
 ; GFX6-NEXT:    v_lshl_b64 v[3:4], v[3:4], v0
-; GFX6-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v3
-; GFX6-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc
+; GFX6-NEXT:    v_min_u32_e32 v3, 1, v3
 ; GFX6-NEXT:    v_or_b32_e32 v3, v4, v3
 ; GFX6-NEXT:    v_cvt_f32_u32_e32 v3, v3
 ; GFX6-NEXT:    v_sub_i32_e32 v0, vcc, 32, v0
@@ -185,26 +177,24 @@ define amdgpu_kernel void @v_uint_to_fp_i64_to_f32(float addrspace(1)* %out, i64
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
 ; GFX8-NEXT:    v_lshlrev_b32_e32 v1, 3, v0
-; GFX8-NEXT:    v_mov_b32_e32 v3, 0
-; GFX8-NEXT:    v_lshlrev_b32_e32 v4, 2, v0
+; GFX8-NEXT:    v_lshlrev_b32_e32 v3, 2, v0
 ; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX8-NEXT:    v_mov_b32_e32 v2, s3
 ; GFX8-NEXT:    v_add_u32_e32 v1, vcc, s2, v1
-; GFX8-NEXT:    v_addc_u32_e32 v2, vcc, v2, v3, vcc
+; GFX8-NEXT:    v_addc_u32_e32 v2, vcc, 0, v2, vcc
 ; GFX8-NEXT:    flat_load_dwordx2 v[1:2], v[1:2]
 ; GFX8-NEXT:    s_waitcnt vmcnt(0)
 ; GFX8-NEXT:    v_ffbh_u32_e32 v0, v2
-; GFX8-NEXT:    v_min_u32_e32 v5, 32, v0
-; GFX8-NEXT:    v_lshlrev_b64 v[0:1], v5, v[1:2]
+; GFX8-NEXT:    v_min_u32_e32 v4, 32, v0
+; GFX8-NEXT:    v_lshlrev_b64 v[0:1], v4, v[1:2]
 ; GFX8-NEXT:    v_mov_b32_e32 v2, s1
-; GFX8-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
+; GFX8-NEXT:    v_min_u32_e32 v0, 1, v0
 ; GFX8-NEXT:    v_or_b32_e32 v0, v1, v0
-; GFX8-NEXT:    v_cvt_f32_u32_e32 v6, v0
-; GFX8-NEXT:    v_add_u32_e32 v0, vcc, s0, v4
-; GFX8-NEXT:    v_addc_u32_e32 v1, vcc, v2, v3, vcc
-; GFX8-NEXT:    v_sub_u32_e32 v2, vcc, 32, v5
-; GFX8-NEXT:    v_ldexp_f32 v2, v6, v2
+; GFX8-NEXT:    v_cvt_f32_u32_e32 v5, v0
+; GFX8-NEXT:    v_add_u32_e32 v0, vcc, s0, v3
+; GFX8-NEXT:    v_addc_u32_e32 v1, vcc, 0, v2, vcc
+; GFX8-NEXT:    v_sub_u32_e32 v2, vcc, 32, v4
+; GFX8-NEXT:    v_ldexp_f32 v2, v5, v2
 ; GFX8-NEXT:    flat_store_dword v[0:1], v2
 ; GFX8-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
@@ -229,19 +219,17 @@ define amdgpu_kernel void @s_uint_to_fp_v2i64_to_v2f32(<2 x float> addrspace(1)*
 ; GFX6-NEXT:    s_min_u32 s8, s8, 32
 ; GFX6-NEXT:    s_min_u32 s9, s9, 32
 ; GFX6-NEXT:    s_lshl_b64 s[6:7], s[6:7], s8
-; GFX6-NEXT:    s_sub_i32 s10, 32, s8
+; GFX6-NEXT:    s_sub_i32 s8, 32, s8
 ; GFX6-NEXT:    s_lshl_b64 s[4:5], s[4:5], s9
-; GFX6-NEXT:    s_sub_i32 s11, 32, s9
-; GFX6-NEXT:    v_cmp_ne_u32_e64 s[8:9], s6, 0
-; GFX6-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[8:9]
-; GFX6-NEXT:    v_cmp_ne_u32_e64 s[8:9], s4, 0
-; GFX6-NEXT:    v_cndmask_b32_e64 v1, 0, 1, s[8:9]
-; GFX6-NEXT:    v_or_b32_e32 v0, s7, v0
-; GFX6-NEXT:    v_or_b32_e32 v1, s5, v1
-; GFX6-NEXT:    v_cvt_f32_u32_e32 v0, v0
-; GFX6-NEXT:    v_cvt_f32_u32_e32 v2, v1
-; GFX6-NEXT:    v_ldexp_f32_e64 v1, v0, s10
-; GFX6-NEXT:    v_ldexp_f32_e64 v0, v2, s11
+; GFX6-NEXT:    s_sub_i32 s9, 32, s9
+; GFX6-NEXT:    s_min_u32 s6, s6, 1
+; GFX6-NEXT:    s_min_u32 s4, s4, 1
+; GFX6-NEXT:    s_or_b32 s6, s7, s6
+; GFX6-NEXT:    s_or_b32 s4, s5, s4
+; GFX6-NEXT:    v_cvt_f32_u32_e32 v0, s6
+; GFX6-NEXT:    v_cvt_f32_u32_e32 v2, s4
+; GFX6-NEXT:    v_ldexp_f32_e64 v1, v0, s8
+; GFX6-NEXT:    v_ldexp_f32_e64 v0, v2, s9
 ; GFX6-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
@@ -251,22 +239,20 @@ define amdgpu_kernel void @s_uint_to_fp_v2i64_to_v2f32(<2 x float> addrspace(1)*
 ; GFX8-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x34
 ; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX8-NEXT:    s_flbit_i32_b32 s6, s3
-; GFX8-NEXT:    s_min_u32 s8, s6, 32
 ; GFX8-NEXT:    s_flbit_i32_b32 s7, s1
-; GFX8-NEXT:    s_lshl_b64 s[2:3], s[2:3], s8
-; GFX8-NEXT:    s_min_u32 s9, s7, 32
-; GFX8-NEXT:    v_cmp_ne_u32_e64 s[6:7], s2, 0
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[6:7]
-; GFX8-NEXT:    s_lshl_b64 s[0:1], s[0:1], s9
-; GFX8-NEXT:    v_or_b32_e32 v0, s3, v0
-; GFX8-NEXT:    v_cmp_ne_u32_e64 s[2:3], s0, 0
-; GFX8-NEXT:    v_cndmask_b32_e64 v1, 0, 1, s[2:3]
-; GFX8-NEXT:    v_or_b32_e32 v1, s1, v1
-; GFX8-NEXT:    v_cvt_f32_u32_e32 v0, v0
-; GFX8-NEXT:    v_cvt_f32_u32_e32 v2, v1
-; GFX8-NEXT:    s_sub_i32 s0, 32, s8
+; GFX8-NEXT:    s_min_u32 s6, s6, 32
+; GFX8-NEXT:    s_lshl_b64 s[2:3], s[2:3], s6
+; GFX8-NEXT:    s_min_u32 s7, s7, 32
+; GFX8-NEXT:    s_lshl_b64 s[0:1], s[0:1], s7
+; GFX8-NEXT:    s_min_u32 s2, s2, 1
+; GFX8-NEXT:    s_min_u32 s0, s0, 1
+; GFX8-NEXT:    s_or_b32 s2, s3, s2
+; GFX8-NEXT:    s_or_b32 s0, s1, s0
+; GFX8-NEXT:    v_cvt_f32_u32_e32 v0, s2
+; GFX8-NEXT:    v_cvt_f32_u32_e32 v2, s0
+; GFX8-NEXT:    s_sub_i32 s0, 32, s6
 ; GFX8-NEXT:    v_ldexp_f32 v1, v0, s0
-; GFX8-NEXT:    s_sub_i32 s0, 32, s9
+; GFX8-NEXT:    s_sub_i32 s0, 32, s7
 ; GFX8-NEXT:    v_ldexp_f32 v0, v2, s0
 ; GFX8-NEXT:    v_mov_b32_e32 v2, s4
 ; GFX8-NEXT:    v_mov_b32_e32 v3, s5
@@ -310,14 +296,10 @@ define amdgpu_kernel void @v_uint_to_fp_v4i64_to_v4f32(<4 x float> addrspace(1)*
 ; GFX6-NEXT:    v_sub_i32_e32 v9, vcc, 32, v12
 ; GFX6-NEXT:    v_lshl_b64 v[5:6], v[5:6], v13
 ; GFX6-NEXT:    v_sub_i32_e32 v12, vcc, 32, v13
-; GFX6-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v3
-; GFX6-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc
-; GFX6-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
-; GFX6-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
-; GFX6-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v7
-; GFX6-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
-; GFX6-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v5
-; GFX6-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
+; GFX6-NEXT:    v_min_u32_e32 v3, 1, v3
+; GFX6-NEXT:    v_min_u32_e32 v0, 1, v0
+; GFX6-NEXT:    v_min_u32_e32 v7, 1, v7
+; GFX6-NEXT:    v_min_u32_e32 v5, 1, v5
 ; GFX6-NEXT:    v_or_b32_e32 v3, v4, v3
 ; GFX6-NEXT:    v_or_b32_e32 v0, v1, v0
 ; GFX6-NEXT:    v_or_b32_e32 v1, v8, v7
@@ -337,19 +319,18 @@ define amdgpu_kernel void @v_uint_to_fp_v4i64_to_v4f32(<4 x float> addrspace(1)*
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
 ; GFX8-NEXT:    v_lshlrev_b32_e32 v1, 5, v0
-; GFX8-NEXT:    v_mov_b32_e32 v10, 0
 ; GFX8-NEXT:    v_lshlrev_b32_e32 v0, 4, v0
 ; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX8-NEXT:    v_mov_b32_e32 v2, s3
 ; GFX8-NEXT:    v_add_u32_e32 v5, vcc, s2, v1
-; GFX8-NEXT:    v_addc_u32_e32 v6, vcc, v2, v10, vcc
+; GFX8-NEXT:    v_addc_u32_e32 v6, vcc, 0, v2, vcc
 ; GFX8-NEXT:    v_add_u32_e32 v1, vcc, 16, v5
 ; GFX8-NEXT:    v_addc_u32_e32 v2, vcc, 0, v6, vcc
 ; GFX8-NEXT:    flat_load_dwordx4 v[1:4], v[1:2]
 ; GFX8-NEXT:    flat_load_dwordx4 v[5:8], v[5:6]
 ; GFX8-NEXT:    v_add_u32_e32 v9, vcc, s0, v0
-; GFX8-NEXT:    v_mov_b32_e32 v11, s1
-; GFX8-NEXT:    v_addc_u32_e32 v10, vcc, v11, v10, vcc
+; GFX8-NEXT:    v_mov_b32_e32 v10, s1
+; GFX8-NEXT:    v_addc_u32_e32 v10, vcc, 0, v10, vcc
 ; GFX8-NEXT:    s_waitcnt vmcnt(1)
 ; GFX8-NEXT:    v_ffbh_u32_e32 v12, v4
 ; GFX8-NEXT:    s_waitcnt vmcnt(0)
@@ -360,33 +341,29 @@ define amdgpu_kernel void @v_uint_to_fp_v4i64_to_v4f32(<4 x float> addrspace(1)*
 ; GFX8-NEXT:    v_min_u32_e32 v11, 32, v11
 ; GFX8-NEXT:    v_min_u32_e32 v12, 32, v12
 ; GFX8-NEXT:    v_min_u32_e32 v13, 32, v13
-; GFX8-NEXT:    v_lshlrev_b64 v[7:8], v0, v[7:8]
-; GFX8-NEXT:    v_sub_u32_e32 v14, vcc, 32, v0
 ; GFX8-NEXT:    v_lshlrev_b64 v[5:6], v11, v[5:6]
 ; GFX8-NEXT:    v_lshlrev_b64 v[3:4], v12, v[3:4]
+; GFX8-NEXT:    v_lshlrev_b64 v[7:8], v0, v[7:8]
+; GFX8-NEXT:    v_sub_u32_e32 v14, vcc, 32, v0
 ; GFX8-NEXT:    v_lshlrev_b64 v[0:1], v13, v[1:2]
-; GFX8-NEXT:    v_sub_u32_e32 v11, vcc, 32, v11
-; GFX8-NEXT:    v_sub_u32_e32 v12, vcc, 32, v12
-; GFX8-NEXT:    v_sub_u32_e32 v2, vcc, 32, v13
-; GFX8-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v7
-; GFX8-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
-; GFX8-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v5
-; GFX8-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
-; GFX8-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v3
-; GFX8-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc
-; GFX8-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
+; GFX8-NEXT:    v_min_u32_e32 v7, 1, v7
+; GFX8-NEXT:    v_min_u32_e32 v5, 1, v5
+; GFX8-NEXT:    v_min_u32_e32 v3, 1, v3
+; GFX8-NEXT:    v_min_u32_e32 v0, 1, v0
 ; GFX8-NEXT:    v_or_b32_e32 v3, v4, v3
 ; GFX8-NEXT:    v_or_b32_e32 v5, v6, v5
 ; GFX8-NEXT:    v_or_b32_e32 v7, v8, v7
 ; GFX8-NEXT:    v_or_b32_e32 v0, v1, v0
 ; GFX8-NEXT:    v_cvt_f32_u32_e32 v1, v7
-; GFX8-NEXT:    v_cvt_f32_u32_e32 v3, v3
 ; GFX8-NEXT:    v_cvt_f32_u32_e32 v4, v5
+; GFX8-NEXT:    v_cvt_f32_u32_e32 v3, v3
 ; GFX8-NEXT:    v_cvt_f32_u32_e32 v5, v0
+; GFX8-NEXT:    v_sub_u32_e32 v2, vcc, 32, v13
+; GFX8-NEXT:    v_sub_u32_e32 v11, vcc, 32, v11
+; GFX8-NEXT:    v_sub_u32_e32 v12, vcc, 32, v12
 ; GFX8-NEXT:    v_ldexp_f32 v1, v1, v14
-; GFX8-NEXT:    v_ldexp_f32 v3, v3, v12
 ; GFX8-NEXT:    v_ldexp_f32 v0, v4, v11
+; GFX8-NEXT:    v_ldexp_f32 v3, v3, v12
 ; GFX8-NEXT:    v_ldexp_f32 v2, v5, v2
 ; GFX8-NEXT:    flat_store_dwordx4 v[9:10], v[0:3]
 ; GFX8-NEXT:    s_endpgm
@@ -412,19 +389,17 @@ define amdgpu_kernel void @s_uint_to_fp_v2i64_to_v2f16(<2 x half> addrspace(1)*
 ; GFX6-NEXT:    s_min_u32 s8, s8, 32
 ; GFX6-NEXT:    s_min_u32 s9, s9, 32
 ; GFX6-NEXT:    s_lshl_b64 s[6:7], s[6:7], s8
-; GFX6-NEXT:    s_sub_i32 s10, 32, s8
+; GFX6-NEXT:    s_sub_i32 s8, 32, s8
 ; GFX6-NEXT:    s_lshl_b64 s[4:5], s[4:5], s9
-; GFX6-NEXT:    s_sub_i32 s11, 32, s9
-; GFX6-NEXT:    v_cmp_ne_u32_e64 s[8:9], s6, 0
-; GFX6-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[8:9]
-; GFX6-NEXT:    v_cmp_ne_u32_e64 s[8:9], s4, 0
-; GFX6-NEXT:    v_cndmask_b32_e64 v1, 0, 1, s[8:9]
-; GFX6-NEXT:    v_or_b32_e32 v0, s7, v0
-; GFX6-NEXT:    v_or_b32_e32 v1, s5, v1
-; GFX6-NEXT:    v_cvt_f32_u32_e32 v0, v0
-; GFX6-NEXT:    v_cvt_f32_u32_e32 v1, v1
-; GFX6-NEXT:    v_ldexp_f32_e64 v0, v0, s10
-; GFX6-NEXT:    v_ldexp_f32_e64 v1, v1, s11
+; GFX6-NEXT:    s_sub_i32 s9, 32, s9
+; GFX6-NEXT:    s_min_u32 s6, s6, 1
+; GFX6-NEXT:    s_min_u32 s4, s4, 1
+; GFX6-NEXT:    s_or_b32 s6, s7, s6
+; GFX6-NEXT:    s_or_b32 s4, s5, s4
+; GFX6-NEXT:    v_cvt_f32_u32_e32 v0, s6
+; GFX6-NEXT:    v_cvt_f32_u32_e32 v1, s4
+; GFX6-NEXT:    v_ldexp_f32_e64 v0, v0, s8
+; GFX6-NEXT:    v_ldexp_f32_e64 v1, v1, s9
 ; GFX6-NEXT:    v_cvt_f16_f32_e32 v0, v0
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
 ; GFX6-NEXT:    v_cvt_f16_f32_e32 v1, v1
@@ -442,18 +417,16 @@ define amdgpu_kernel void @s_uint_to_fp_v2i64_to_v2f16(<2 x half> addrspace(1)*
 ; GFX8-NEXT:    s_min_u32 s8, s2, 32
 ; GFX8-NEXT:    s_min_u32 s9, s3, 32
 ; GFX8-NEXT:    s_lshl_b64 s[2:3], s[6:7], s8
-; GFX8-NEXT:    v_cmp_ne_u32_e64 s[6:7], s2, 0
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[6:7]
-; GFX8-NEXT:    v_or_b32_e32 v0, s3, v0
-; GFX8-NEXT:    s_lshl_b64 s[2:3], s[4:5], s9
-; GFX8-NEXT:    v_cmp_ne_u32_e64 s[4:5], s2, 0
-; GFX8-NEXT:    v_cndmask_b32_e64 v1, 0, 1, s[4:5]
-; GFX8-NEXT:    v_or_b32_e32 v1, s3, v1
-; GFX8-NEXT:    v_cvt_f32_u32_e32 v0, v0
-; GFX8-NEXT:    v_cvt_f32_u32_e32 v1, v1
-; GFX8-NEXT:    s_sub_i32 s8, 32, s8
+; GFX8-NEXT:    s_min_u32 s2, s2, 1
+; GFX8-NEXT:    s_or_b32 s2, s3, s2
+; GFX8-NEXT:    s_lshl_b64 s[4:5], s[4:5], s9
+; GFX8-NEXT:    v_cvt_f32_u32_e32 v0, s2
+; GFX8-NEXT:    s_min_u32 s2, s4, 1
+; GFX8-NEXT:    s_or_b32 s2, s5, s2
+; GFX8-NEXT:    v_cvt_f32_u32_e32 v1, s2
+; GFX8-NEXT:    s_sub_i32 s6, 32, s8
 ; GFX8-NEXT:    s_sub_i32 s2, 32, s9
-; GFX8-NEXT:    v_ldexp_f32 v0, v0, s8
+; GFX8-NEXT:    v_ldexp_f32 v0, v0, s6
 ; GFX8-NEXT:    v_ldexp_f32 v1, v1, s2
 ; GFX8-NEXT:    v_cvt_f16_f32_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
 ; GFX8-NEXT:    v_cvt_f16_f32_e32 v1, v1
@@ -500,14 +473,10 @@ define amdgpu_kernel void @v_uint_to_fp_v4i64_to_v4f16(<4 x half> addrspace(1)*
 ; GFX6-NEXT:    v_sub_i32_e32 v9, vcc, 32, v12
 ; GFX6-NEXT:    v_lshl_b64 v[5:6], v[5:6], v13
 ; GFX6-NEXT:    v_sub_i32_e32 v12, vcc, 32, v13
-; GFX6-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v3
-; GFX6-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc
-; GFX6-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
-; GFX6-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
-; GFX6-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v7
-; GFX6-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
-; GFX6-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v5
-; GFX6-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
+; GFX6-NEXT:    v_min_u32_e32 v3, 1, v3
+; GFX6-NEXT:    v_min_u32_e32 v0, 1, v0
+; GFX6-NEXT:    v_min_u32_e32 v7, 1, v7
+; GFX6-NEXT:    v_min_u32_e32 v5, 1, v5
 ; GFX6-NEXT:    v_or_b32_e32 v3, v4, v3
 ; GFX6-NEXT:    v_or_b32_e32 v0, v1, v0
 ; GFX6-NEXT:    v_or_b32_e32 v1, v8, v7
@@ -535,43 +504,35 @@ define amdgpu_kernel void @v_uint_to_fp_v4i64_to_v4f16(<4 x half> addrspace(1)*
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
 ; GFX8-NEXT:    v_lshlrev_b32_e32 v1, 5, v0
-; GFX8-NEXT:    v_mov_b32_e32 v9, 0
-; GFX8-NEXT:    v_lshlrev_b32_e32 v10, 3, v0
+; GFX8-NEXT:    v_lshlrev_b32_e32 v9, 3, v0
 ; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX8-NEXT:    v_mov_b32_e32 v2, s3
 ; GFX8-NEXT:    v_add_u32_e32 v5, vcc, s2, v1
-; GFX8-NEXT:    v_addc_u32_e32 v6, vcc, v2, v9, vcc
+; GFX8-NEXT:    v_addc_u32_e32 v6, vcc, 0, v2, vcc
 ; GFX8-NEXT:    v_add_u32_e32 v1, vcc, 16, v5
 ; GFX8-NEXT:    v_addc_u32_e32 v2, vcc, 0, v6, vcc
 ; GFX8-NEXT:    flat_load_dwordx4 v[1:4], v[1:2]
 ; GFX8-NEXT:    flat_load_dwordx4 v[5:8], v[5:6]
-; GFX8-NEXT:    v_mov_b32_e32 v11, s1
+; GFX8-NEXT:    v_mov_b32_e32 v10, s1
 ; GFX8-NEXT:    s_waitcnt vmcnt(1)
-; GFX8-NEXT:    v_ffbh_u32_e32 v13, v4
+; GFX8-NEXT:    v_ffbh_u32_e32 v12, v4
 ; GFX8-NEXT:    s_waitcnt vmcnt(0)
 ; GFX8-NEXT:    v_ffbh_u32_e32 v0, v8
-; GFX8-NEXT:    v_ffbh_u32_e32 v12, v6
-; GFX8-NEXT:    v_ffbh_u32_e32 v14, v2
+; GFX8-NEXT:    v_ffbh_u32_e32 v11, v6
+; GFX8-NEXT:    v_ffbh_u32_e32 v13, v2
 ; GFX8-NEXT:    v_min_u32_e32 v0, 32, v0
+; GFX8-NEXT:    v_min_u32_e32 v11, 32, v11
 ; GFX8-NEXT:    v_min_u32_e32 v12, 32, v12
 ; GFX8-NEXT:    v_min_u32_e32 v13, 32, v13
-; GFX8-NEXT:    v_min_u32_e32 v14, 32, v14
+; GFX8-NEXT:    v_lshlrev_b64 v[5:6], v11, v[5:6]
+; GFX8-NEXT:    v_lshlrev_b64 v[3:4], v12, v[3:4]
 ; GFX8-NEXT:    v_lshlrev_b64 v[7:8], v0, v[7:8]
-; GFX8-NEXT:    v_sub_u32_e32 v15, vcc, 32, v0
-; GFX8-NEXT:    v_lshlrev_b64 v[5:6], v12, v[5:6]
-; GFX8-NEXT:    v_lshlrev_b64 v[3:4], v13, v[3:4]
-; GFX8-NEXT:    v_lshlrev_b64 v[0:1], v14, v[1:2]
-; GFX8-NEXT:    v_sub_u32_e32 v12, vcc, 32, v12
-; GFX8-NEXT:    v_sub_u32_e32 v13, vcc, 32, v13
-; GFX8-NEXT:    v_sub_u32_e32 v2, vcc, 32, v14
-; GFX8-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v7
-; GFX8-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
-; GFX8-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v5
-; GFX8-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
-; GFX8-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v3
-; GFX8-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc
-; GFX8-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
+; GFX8-NEXT:    v_sub_u32_e32 v14, vcc, 32, v0
+; GFX8-NEXT:    v_lshlrev_b64 v[0:1], v13, v[1:2]
+; GFX8-NEXT:    v_min_u32_e32 v7, 1, v7
+; GFX8-NEXT:    v_min_u32_e32 v5, 1, v5
+; GFX8-NEXT:    v_min_u32_e32 v3, 1, v3
+; GFX8-NEXT:    v_min_u32_e32 v0, 1, v0
 ; GFX8-NEXT:    v_or_b32_e32 v3, v4, v3
 ; GFX8-NEXT:    v_or_b32_e32 v0, v1, v0
 ; GFX8-NEXT:    v_or_b32_e32 v7, v8, v7
@@ -580,16 +541,19 @@ define amdgpu_kernel void @v_uint_to_fp_v4i64_to_v4f16(<4 x half> addrspace(1)*
 ; GFX8-NEXT:    v_cvt_f32_u32_e32 v4, v5
 ; GFX8-NEXT:    v_cvt_f32_u32_e32 v3, v3
 ; GFX8-NEXT:    v_cvt_f32_u32_e32 v0, v0
-; GFX8-NEXT:    v_ldexp_f32 v1, v1, v15
-; GFX8-NEXT:    v_ldexp_f32 v4, v4, v12
-; GFX8-NEXT:    v_ldexp_f32 v3, v3, v13
+; GFX8-NEXT:    v_sub_u32_e32 v11, vcc, 32, v11
+; GFX8-NEXT:    v_sub_u32_e32 v12, vcc, 32, v12
+; GFX8-NEXT:    v_sub_u32_e32 v2, vcc, 32, v13
+; GFX8-NEXT:    v_ldexp_f32 v4, v4, v11
+; GFX8-NEXT:    v_ldexp_f32 v3, v3, v12
 ; GFX8-NEXT:    v_ldexp_f32 v0, v0, v2
+; GFX8-NEXT:    v_ldexp_f32 v1, v1, v14
 ; GFX8-NEXT:    v_cvt_f16_f32_e32 v5, v0
 ; GFX8-NEXT:    v_cvt_f16_f32_sdwa v3, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
 ; GFX8-NEXT:    v_cvt_f16_f32_sdwa v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
 ; GFX8-NEXT:    v_cvt_f16_f32_e32 v4, v4
-; GFX8-NEXT:    v_add_u32_e32 v0, vcc, s0, v10
-; GFX8-NEXT:    v_addc_u32_e32 v1, vcc, v11, v9, vcc
+; GFX8-NEXT:    v_add_u32_e32 v0, vcc, s0, v9
+; GFX8-NEXT:    v_addc_u32_e32 v1, vcc, 0, v10, vcc
 ; GFX8-NEXT:    v_or_b32_e32 v3, v5, v3
 ; GFX8-NEXT:    v_or_b32_e32 v2, v4, v2
 ; GFX8-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]


        


More information about the llvm-commits mailing list