[llvm] [ExpandIRInst] Support expanding fptoi to smaller type (PR #178690)

Nikita Popov via llvm-commits llvm-commits at lists.llvm.org
Thu Jan 29 08:05:15 PST 2026


https://github.com/nikic created https://github.com/llvm/llvm-project/pull/178690

In order to support expanding fptoi where the target type is smaller, make most of the code work on the float-as-integer type, rather than the target type of the cast. We only need to cast the final result to the target type, or prior to performing a left shift.

This not only allows us to handle casts to a smaller type, but also avoids performing intermediate calculations on unnecessarily large types.

This also matches how compiler-rt handles this:
https://github.com/llvm/llvm-project/blob/main/compiler-rt/lib/builtins/fp_fixint_impl.inc

Proof: https://alive2.llvm.org/ce/z/3pJ9pE

(Note that there is a pre-existing issue that we produce the same code for fptosi and fptoui.)

>From a90ba0ce9ee8d3f81983d6d812155475b39cf0db Mon Sep 17 00:00:00 2001
From: Nikita Popov <npopov at redhat.com>
Date: Thu, 29 Jan 2026 16:55:04 +0100
Subject: [PATCH] [ExpandIRInst] Support expanding fptoi to smaller type

In order to support expanding fptoi where the target type is
smaller, make most of the code work on the float-as-integer type,
rather than the target type of the cast. We only need to cast the
final result to the target type, or prior to performing a left
shift.

This not only allows us to handle casts to a smaller type, but also
avoids performing intermediate calculations on unnecessarily large
types.

This also matches how compiler-rt handles this:
https://github.com/llvm/llvm-project/blob/main/compiler-rt/lib/builtins/fp_fixint_impl.inc
---
 llvm/lib/CodeGen/ExpandIRInsts.cpp            |   42 +-
 llvm/test/CodeGen/AMDGPU/fptoi.i128.ll        | 1530 ++++++++---------
 .../X86/expand-fp-convert-small.ll            |  140 +-
 .../X86/expand-large-fp-convert-fptosi129.ll  |  156 +-
 .../X86/expand-large-fp-convert-fptoui129.ll  |  156 +-
 5 files changed, 985 insertions(+), 1039 deletions(-)

diff --git a/llvm/lib/CodeGen/ExpandIRInsts.cpp b/llvm/lib/CodeGen/ExpandIRInsts.cpp
index 2f0f8e6a79b9a..241271f4f0035 100644
--- a/llvm/lib/CodeGen/ExpandIRInsts.cpp
+++ b/llvm/lib/CodeGen/ExpandIRInsts.cpp
@@ -528,10 +528,11 @@ static void expandFPToI(Instruction *FPToI) {
       PowerOf2Ceil(FloatVal->getType()->getScalarSizeInBits());
   unsigned ExponentWidth = FloatWidth - FPMantissaWidth - 1;
   unsigned ExponentBias = (1 << (ExponentWidth - 1)) - 1;
-  Value *ImplicitBit =
-      ConstantInt::get(IntTy, APInt::getOneBitSet(BitWidth, FPMantissaWidth));
-  Value *SignificandMask =
-      ConstantInt::get(IntTy, APInt::getLowBitsSet(BitWidth, FPMantissaWidth));
+  IntegerType *FloatIntTy = Builder.getIntNTy(FloatWidth);
+  Value *ImplicitBit = ConstantInt::get(
+      FloatIntTy, APInt::getOneBitSet(FloatWidth, FPMantissaWidth));
+  Value *SignificandMask = ConstantInt::get(
+      FloatIntTy, APInt::getLowBitsSet(FloatWidth, FPMantissaWidth));
 
   BasicBlock *Entry = Builder.GetInsertBlock();
   Function *F = Entry->getParent();
@@ -559,30 +560,30 @@ static void expandFPToI(Instruction *FPToI) {
   if (FloatVal->getType()->isX86_FP80Ty())
     FloatVal0 =
         Builder.CreateFPExt(FloatVal, Type::getFP128Ty(Builder.getContext()));
-  Value *ARep0 =
-      Builder.CreateBitCast(FloatVal0, Builder.getIntNTy(FloatWidth));
-  Value *ARep = Builder.CreateZExt(ARep0, FPToI->getType());
+  Value *ARep = Builder.CreateBitCast(FloatVal0, FloatIntTy);
   Value *PosOrNeg = Builder.CreateICmpSGT(
-      ARep0, ConstantInt::getSigned(Builder.getIntNTy(FloatWidth), -1));
+      ARep, ConstantInt::getSigned(Builder.getIntNTy(FloatWidth), -1));
   Value *Sign = Builder.CreateSelect(PosOrNeg, ConstantInt::getSigned(IntTy, 1),
                                      ConstantInt::getSigned(IntTy, -1), "sign");
   Value *And =
-      Builder.CreateLShr(ARep, Builder.getIntN(BitWidth, FPMantissaWidth));
+      Builder.CreateLShr(ARep, Builder.getIntN(FloatWidth, FPMantissaWidth));
   Value *BiasedExp = Builder.CreateAnd(
-      And, Builder.getIntN(BitWidth, (1 << ExponentWidth) - 1), "biased.exp");
+      And, Builder.getIntN(FloatWidth, (1 << ExponentWidth) - 1), "biased.exp");
   Value *Abs = Builder.CreateAnd(ARep, SignificandMask);
   Value *Significand = Builder.CreateOr(Abs, ImplicitBit, "significand");
   Value *ExpIsNegative = Builder.CreateICmpULT(
-      BiasedExp, Builder.getIntN(BitWidth, ExponentBias), "exp.is.negative");
+      BiasedExp, Builder.getIntN(FloatWidth, ExponentBias), "exp.is.negative");
   Builder.CreateCondBr(ExpIsNegative, End, CheckSaturateBB);
 
   // check.saturate:
   Builder.SetInsertPoint(CheckSaturateBB);
   Value *Add1 = Builder.CreateAdd(
-      BiasedExp, ConstantInt::getSigned(
-                     IntTy, -static_cast<int64_t>(ExponentBias + BitWidth)));
+      BiasedExp,
+      ConstantInt::getSigned(FloatIntTy,
+                             -static_cast<int64_t>(ExponentBias + BitWidth)));
   Value *Cmp3 = Builder.CreateICmpULT(
-      Add1, ConstantInt::getSigned(IntTy, -static_cast<int64_t>(BitWidth)));
+      Add1,
+      ConstantInt::getSigned(FloatIntTy, -static_cast<int64_t>(BitWidth)));
   Builder.CreateCondBr(Cmp3, SaturateBB, CheckExpSizeBB);
 
   // saturate:
@@ -598,15 +599,16 @@ static void expandFPToI(Instruction *FPToI) {
   // if.end9:
   Builder.SetInsertPoint(CheckExpSizeBB);
   Value *ExpSmallerMantissaWidth = Builder.CreateICmpULT(
-      BiasedExp, Builder.getIntN(BitWidth, ExponentBias + FPMantissaWidth),
+      BiasedExp, Builder.getIntN(FloatWidth, ExponentBias + FPMantissaWidth),
       "exp.smaller.mantissa.width");
   Builder.CreateCondBr(ExpSmallerMantissaWidth, ExpSmallBB, ExpLargeBB);
 
   // exp.small:
   Builder.SetInsertPoint(ExpSmallBB);
   Value *Sub13 = Builder.CreateSub(
-      Builder.getIntN(BitWidth, ExponentBias + FPMantissaWidth), BiasedExp);
-  Value *Shr14 = Builder.CreateLShr(Significand, Sub13);
+      Builder.getIntN(FloatWidth, ExponentBias + FPMantissaWidth), BiasedExp);
+  Value *Shr14 =
+      Builder.CreateZExtOrTrunc(Builder.CreateLShr(Significand, Sub13), IntTy);
   Value *Mul = Builder.CreateMul(Shr14, Sign);
   Builder.CreateBr(End);
 
@@ -615,8 +617,10 @@ static void expandFPToI(Instruction *FPToI) {
   Value *Sub15 = Builder.CreateAdd(
       BiasedExp,
       ConstantInt::getSigned(
-          IntTy, -static_cast<int64_t>(ExponentBias + FPMantissaWidth)));
-  Value *Shl = Builder.CreateShl(Significand, Sub15);
+          FloatIntTy, -static_cast<int64_t>(ExponentBias + FPMantissaWidth)));
+  Value *SignificandCast = Builder.CreateZExtOrTrunc(Significand, IntTy);
+  Value *Shl = Builder.CreateShl(SignificandCast,
+                                 Builder.CreateZExtOrTrunc(Sub15, IntTy));
   Value *Mul16 = Builder.CreateMul(Shl, Sign);
   Builder.CreateBr(End);
 
diff --git a/llvm/test/CodeGen/AMDGPU/fptoi.i128.ll b/llvm/test/CodeGen/AMDGPU/fptoi.i128.ll
index df905f4e816a5..200fbf5d220b4 100644
--- a/llvm/test/CodeGen/AMDGPU/fptoi.i128.ll
+++ b/llvm/test/CodeGen/AMDGPU/fptoi.i128.ll
@@ -11,8 +11,8 @@ define i128 @fptosi_f64_to_i128(double %x) {
 ; SDAG-NEXT:    v_mov_b32_e32 v7, 0
 ; SDAG-NEXT:    s_mov_b64 s[4:5], 0x3fe
 ; SDAG-NEXT:    v_mov_b32_e32 v4, v0
-; SDAG-NEXT:    v_mov_b32_e32 v0, 0
 ; SDAG-NEXT:    v_cmp_lt_u64_e32 vcc, s[4:5], v[6:7]
+; SDAG-NEXT:    v_mov_b32_e32 v0, 0
 ; SDAG-NEXT:    v_mov_b32_e32 v2, 0
 ; SDAG-NEXT:    v_mov_b32_e32 v1, 0
 ; SDAG-NEXT:    v_mov_b32_e32 v3, 0
@@ -20,33 +20,28 @@ define i128 @fptosi_f64_to_i128(double %x) {
 ; SDAG-NEXT:    s_cbranch_execz .LBB0_10
 ; SDAG-NEXT:  ; %bb.1: ; %fp-to-i-if-check.saturate
 ; SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0xfffffb81, v6
-; SDAG-NEXT:    v_mov_b32_e32 v1, -1
-; SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
-; SDAG-NEXT:    v_addc_co_u32_e32 v2, vcc, -1, v7, vcc
+; SDAG-NEXT:    v_addc_co_u32_e64 v1, s[6:7], 0, -1, vcc
 ; SDAG-NEXT:    s_movk_i32 s6, 0xff7f
-; SDAG-NEXT:    v_addc_co_u32_e32 v3, vcc, -1, v7, vcc
 ; SDAG-NEXT:    s_mov_b32 s7, -1
-; SDAG-NEXT:    v_cmp_eq_u64_e64 s[4:5], -1, v[2:3]
-; SDAG-NEXT:    v_cmp_lt_u64_e64 s[6:7], s[6:7], v[0:1]
-; SDAG-NEXT:    v_cmp_lt_i64_e32 vcc, -1, v[4:5]
-; SDAG-NEXT:    s_and_b64 s[4:5], s[4:5], s[6:7]
+; SDAG-NEXT:    v_cmp_lt_i64_e64 s[4:5], -1, v[4:5]
+; SDAG-NEXT:    v_cmp_lt_u64_e32 vcc, s[6:7], v[0:1]
 ; SDAG-NEXT:    ; implicit-def: $vgpr0_vgpr1
 ; SDAG-NEXT:    ; implicit-def: $vgpr2_vgpr3
-; SDAG-NEXT:    s_and_saveexec_b64 s[6:7], s[4:5]
+; SDAG-NEXT:    s_and_saveexec_b64 s[6:7], vcc
 ; SDAG-NEXT:    s_xor_b64 s[10:11], exec, s[6:7]
 ; SDAG-NEXT:    s_cbranch_execz .LBB0_7
 ; SDAG-NEXT:  ; %bb.2: ; %fp-to-i-if-check.exp.size
-; SDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
-; SDAG-NEXT:    v_add_co_u32_e64 v9, s[4:5], -1, v0
-; SDAG-NEXT:    s_mov_b64 s[4:5], 0x432
+; SDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[4:5]
+; SDAG-NEXT:    v_add_co_u32_e32 v9, vcc, -1, v0
+; SDAG-NEXT:    s_mov_b64 s[6:7], 0x432
 ; SDAG-NEXT:    v_and_b32_e32 v0, 0xfffff, v5
-; SDAG-NEXT:    v_cmp_lt_u64_e64 s[4:5], s[4:5], v[6:7]
-; SDAG-NEXT:    v_cndmask_b32_e64 v8, -1, 0, vcc
-; SDAG-NEXT:    v_cndmask_b32_e64 v10, -1, 1, vcc
+; SDAG-NEXT:    v_cmp_lt_u64_e32 vcc, s[6:7], v[6:7]
+; SDAG-NEXT:    v_cndmask_b32_e64 v8, -1, 0, s[4:5]
+; SDAG-NEXT:    v_cndmask_b32_e64 v10, -1, 1, s[4:5]
 ; SDAG-NEXT:    v_or_b32_e32 v5, 0x100000, v0
 ; SDAG-NEXT:    ; implicit-def: $vgpr0_vgpr1
 ; SDAG-NEXT:    ; implicit-def: $vgpr2_vgpr3
-; SDAG-NEXT:    s_and_saveexec_b64 s[6:7], s[4:5]
+; SDAG-NEXT:    s_and_saveexec_b64 s[6:7], vcc
 ; SDAG-NEXT:    s_xor_b64 s[12:13], exec, s[6:7]
 ; SDAG-NEXT:    s_cbranch_execz .LBB0_4
 ; SDAG-NEXT:  ; %bb.3: ; %fp-to-i-if-exp.large
@@ -55,75 +50,69 @@ define i128 @fptosi_f64_to_i128(double %x) {
 ; SDAG-NEXT:    v_add_u32_e32 v3, 0xfffffbcd, v6
 ; SDAG-NEXT:    v_lshrrev_b64 v[0:1], v0, v[4:5]
 ; SDAG-NEXT:    v_lshlrev_b64 v[6:7], v2, v[4:5]
-; SDAG-NEXT:    v_cmp_gt_u32_e64 s[4:5], 64, v3
+; SDAG-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v3
 ; SDAG-NEXT:    v_cmp_ne_u32_e64 s[6:7], 0, v3
 ; SDAG-NEXT:    v_lshlrev_b64 v[3:4], v3, v[4:5]
-; SDAG-NEXT:    v_cndmask_b32_e64 v1, v7, v1, s[4:5]
+; SDAG-NEXT:    v_cndmask_b32_e32 v1, v7, v1, vcc
 ; SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, v1, s[6:7]
-; SDAG-NEXT:    v_cndmask_b32_e64 v0, v6, v0, s[4:5]
-; SDAG-NEXT:    v_cndmask_b32_e64 v7, 0, v3, s[4:5]
+; SDAG-NEXT:    v_cndmask_b32_e32 v0, v6, v0, vcc
+; SDAG-NEXT:    v_cndmask_b32_e32 v7, 0, v3, vcc
 ; SDAG-NEXT:    v_cndmask_b32_e64 v5, 0, v0, s[6:7]
 ; SDAG-NEXT:    v_mul_lo_u32 v12, v10, v1
 ; SDAG-NEXT:    v_mad_u64_u32 v[0:1], s[6:7], v7, v10, 0
 ; SDAG-NEXT:    v_mov_b32_e32 v2, 0
-; SDAG-NEXT:    v_cndmask_b32_e64 v13, 0, v4, s[4:5]
-; SDAG-NEXT:    v_mad_u64_u32 v[3:4], s[4:5], v13, v10, v[1:2]
+; SDAG-NEXT:    v_cndmask_b32_e32 v13, 0, v4, vcc
+; SDAG-NEXT:    v_mad_u64_u32 v[3:4], s[6:7], v13, v10, v[1:2]
 ; SDAG-NEXT:    v_mul_lo_u32 v11, v8, v5
 ; SDAG-NEXT:    v_mad_u64_u32 v[5:6], s[6:7], v10, v5, 0
 ; SDAG-NEXT:    v_mov_b32_e32 v1, v3
-; SDAG-NEXT:    v_mad_u64_u32 v[1:2], s[4:5], v7, v8, v[1:2]
+; SDAG-NEXT:    v_mad_u64_u32 v[1:2], s[6:7], v7, v8, v[1:2]
 ; SDAG-NEXT:    v_add3_u32 v6, v6, v12, v11
-; SDAG-NEXT:    v_mad_u64_u32 v[5:6], s[4:5], v9, v7, v[5:6]
-; SDAG-NEXT:    v_add_co_u32_e64 v2, s[4:5], v4, v2
-; SDAG-NEXT:    v_addc_co_u32_e64 v3, s[4:5], 0, 0, s[4:5]
+; SDAG-NEXT:    v_mad_u64_u32 v[5:6], s[6:7], v9, v7, v[5:6]
+; SDAG-NEXT:    v_add_co_u32_e32 v2, vcc, v4, v2
+; SDAG-NEXT:    v_addc_co_u32_e64 v3, s[6:7], 0, 0, vcc
 ; SDAG-NEXT:    v_mul_lo_u32 v10, v9, v13
 ; SDAG-NEXT:    v_mul_lo_u32 v7, v9, v7
-; SDAG-NEXT:    v_mad_u64_u32 v[2:3], s[4:5], v13, v8, v[2:3]
+; SDAG-NEXT:    v_mad_u64_u32 v[2:3], s[6:7], v13, v8, v[2:3]
 ; SDAG-NEXT:    ; implicit-def: $vgpr8
 ; SDAG-NEXT:    ; implicit-def: $vgpr9
 ; SDAG-NEXT:    v_add3_u32 v4, v7, v6, v10
-; SDAG-NEXT:    v_add_co_u32_e64 v2, s[4:5], v2, v5
-; SDAG-NEXT:    v_addc_co_u32_e64 v3, s[4:5], v3, v4, s[4:5]
+; SDAG-NEXT:    v_add_co_u32_e32 v2, vcc, v2, v5
+; SDAG-NEXT:    v_addc_co_u32_e32 v3, vcc, v3, v4, vcc
 ; SDAG-NEXT:    ; implicit-def: $vgpr6_vgpr7
 ; SDAG-NEXT:    ; implicit-def: $vgpr4_vgpr5
 ; SDAG-NEXT:    ; implicit-def: $vgpr10
 ; SDAG-NEXT:  .LBB0_4: ; %Flow
-; SDAG-NEXT:    s_andn2_saveexec_b64 s[12:13], s[12:13]
+; SDAG-NEXT:    s_andn2_saveexec_b64 s[6:7], s[12:13]
 ; SDAG-NEXT:    s_cbranch_execz .LBB0_6
 ; SDAG-NEXT:  ; %bb.5: ; %fp-to-i-if-exp.small
-; SDAG-NEXT:    v_sub_u32_e32 v2, 0x433, v6
-; SDAG-NEXT:    v_lshrrev_b64 v[0:1], v2, v[4:5]
-; SDAG-NEXT:    v_cmp_gt_u32_e64 s[4:5], 64, v2
-; SDAG-NEXT:    v_cmp_eq_u32_e64 s[6:7], 0, v2
-; SDAG-NEXT:    v_cndmask_b32_e64 v0, 0, v0, s[4:5]
-; SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, v1, s[4:5]
-; SDAG-NEXT:    v_cndmask_b32_e64 v6, v0, v4, s[6:7]
-; SDAG-NEXT:    v_cndmask_b32_e64 v5, v1, v5, s[6:7]
-; SDAG-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v6, v10, 0
+; SDAG-NEXT:    v_sub_u32_e32 v0, 0x433, v6
+; SDAG-NEXT:    v_lshrrev_b64 v[4:5], v0, v[4:5]
 ; SDAG-NEXT:    v_mov_b32_e32 v2, 0
-; SDAG-NEXT:    v_mad_u64_u32 v[3:4], s[4:5], v5, v10, v[1:2]
-; SDAG-NEXT:    v_mov_b32_e32 v1, v3
-; SDAG-NEXT:    v_mad_u64_u32 v[1:2], s[4:5], v6, v8, v[1:2]
-; SDAG-NEXT:    v_add_co_u32_e64 v2, s[4:5], v4, v2
-; SDAG-NEXT:    v_addc_co_u32_e64 v3, s[4:5], 0, 0, s[4:5]
-; SDAG-NEXT:    v_mad_u64_u32 v[2:3], s[4:5], v5, v8, v[2:3]
-; SDAG-NEXT:    v_mad_u64_u32 v[2:3], s[4:5], v9, v6, v[2:3]
-; SDAG-NEXT:    v_mad_u64_u32 v[3:4], s[4:5], v9, v6, v[3:4]
+; SDAG-NEXT:    v_mad_u64_u32 v[0:1], s[12:13], v4, v10, 0
+; SDAG-NEXT:    v_mad_u64_u32 v[6:7], s[12:13], v5, v10, v[1:2]
+; SDAG-NEXT:    v_mov_b32_e32 v1, v6
+; SDAG-NEXT:    v_mad_u64_u32 v[1:2], s[12:13], v4, v8, v[1:2]
+; SDAG-NEXT:    v_add_co_u32_e32 v2, vcc, v7, v2
+; SDAG-NEXT:    v_addc_co_u32_e64 v3, s[12:13], 0, 0, vcc
+; SDAG-NEXT:    v_mad_u64_u32 v[2:3], s[12:13], v5, v8, v[2:3]
+; SDAG-NEXT:    v_mad_u64_u32 v[2:3], s[12:13], v9, v4, v[2:3]
+; SDAG-NEXT:    v_mad_u64_u32 v[3:4], s[12:13], v9, v4, v[3:4]
 ; SDAG-NEXT:    v_mad_i32_i24 v3, v9, v5, v3
 ; SDAG-NEXT:  .LBB0_6: ; %Flow1
-; SDAG-NEXT:    s_or_b64 exec, exec, s[12:13]
+; SDAG-NEXT:    s_or_b64 exec, exec, s[6:7]
 ; SDAG-NEXT:  .LBB0_7: ; %Flow2
-; SDAG-NEXT:    s_andn2_saveexec_b64 s[4:5], s[10:11]
+; SDAG-NEXT:    s_andn2_saveexec_b64 s[6:7], s[10:11]
 ; SDAG-NEXT:  ; %bb.8: ; %fp-to-i-if-saturate
 ; SDAG-NEXT:    v_bfrev_b32_e32 v0, 1
 ; SDAG-NEXT:    v_bfrev_b32_e32 v1, -2
-; SDAG-NEXT:    v_cndmask_b32_e32 v2, v0, v1, vcc
-; SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, -1, vcc
+; SDAG-NEXT:    v_cndmask_b32_e64 v2, v0, v1, s[4:5]
+; SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, -1, s[4:5]
 ; SDAG-NEXT:    v_mov_b32_e32 v3, v2
 ; SDAG-NEXT:    v_mov_b32_e32 v0, v1
 ; SDAG-NEXT:    v_mov_b32_e32 v2, v1
 ; SDAG-NEXT:  ; %bb.9: ; %Flow3
-; SDAG-NEXT:    s_or_b64 exec, exec, s[4:5]
+; SDAG-NEXT:    s_or_b64 exec, exec, s[6:7]
 ; SDAG-NEXT:  .LBB0_10: ; %fp-to-i-cleanup
 ; SDAG-NEXT:    s_or_b64 exec, exec, s[8:9]
 ; SDAG-NEXT:    s_setpc_b64 s[30:31]
@@ -131,15 +120,15 @@ define i128 @fptosi_f64_to_i128(double %x) {
 ; GISEL-LABEL: fptosi_f64_to_i128:
 ; GISEL:       ; %bb.0: ; %fp-to-i-entry
 ; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GISEL-NEXT:    v_mov_b32_e32 v5, v1
 ; GISEL-NEXT:    v_mov_b32_e32 v4, v0
-; GISEL-NEXT:    v_lshrrev_b32_e32 v2, 20, v5
-; GISEL-NEXT:    v_mov_b32_e32 v0, 0x3ff
-; GISEL-NEXT:    s_mov_b64 s[4:5], 0
+; GISEL-NEXT:    v_mov_b32_e32 v5, v1
+; GISEL-NEXT:    v_lshrrev_b64 v[0:1], 52, v[4:5]
+; GISEL-NEXT:    v_mov_b32_e32 v1, 0x3ff
 ; GISEL-NEXT:    v_mov_b32_e32 v7, 0
-; GISEL-NEXT:    v_mov_b32_e32 v1, 0
-; GISEL-NEXT:    v_and_b32_e32 v6, 0x7ff, v2
-; GISEL-NEXT:    v_cmp_ge_u64_e32 vcc, v[6:7], v[0:1]
+; GISEL-NEXT:    v_mov_b32_e32 v2, 0
+; GISEL-NEXT:    v_bfe_u32 v6, v0, 0, 11
+; GISEL-NEXT:    s_mov_b64 s[4:5], 0
+; GISEL-NEXT:    v_cmp_ge_u64_e32 vcc, v[6:7], v[1:2]
 ; GISEL-NEXT:    s_mov_b64 s[6:7], s[4:5]
 ; GISEL-NEXT:    v_mov_b32_e32 v0, s4
 ; GISEL-NEXT:    v_mov_b32_e32 v1, s5
@@ -150,19 +139,10 @@ define i128 @fptosi_f64_to_i128(double %x) {
 ; GISEL-NEXT:  ; %bb.1: ; %fp-to-i-if-check.saturate
 ; GISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 0xfffffb81, v6
 ; GISEL-NEXT:    v_mov_b32_e32 v2, 0xffffff80
-; GISEL-NEXT:    v_addc_co_u32_e64 v1, s[6:7], 0, -1, vcc
 ; GISEL-NEXT:    v_mov_b32_e32 v3, -1
-; GISEL-NEXT:    v_addc_co_u32_e64 v8, s[6:7], 0, -1, s[6:7]
-; GISEL-NEXT:    v_cmp_ge_u64_e32 vcc, v[0:1], v[2:3]
-; GISEL-NEXT:    v_addc_co_u32_e64 v9, s[6:7], 0, -1, s[6:7]
-; GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
-; GISEL-NEXT:    v_cmp_le_u64_e32 vcc, -1, v[8:9]
+; GISEL-NEXT:    v_addc_co_u32_e64 v1, s[6:7], 0, -1, vcc
 ; GISEL-NEXT:    v_cmp_lt_i64_e64 s[4:5], -1, v[4:5]
-; GISEL-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; GISEL-NEXT:    v_cmp_eq_u64_e32 vcc, -1, v[8:9]
-; GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
-; GISEL-NEXT:    v_and_b32_e32 v0, 1, v0
-; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
+; GISEL-NEXT:    v_cmp_ge_u64_e32 vcc, v[0:1], v[2:3]
 ; GISEL-NEXT:    ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
 ; GISEL-NEXT:    s_and_saveexec_b64 s[6:7], vcc
 ; GISEL-NEXT:    s_xor_b64 s[14:15], exec, s[6:7]
@@ -267,22 +247,16 @@ define i128 @fptosi_f64_to_i128(double %x) {
 ; GISEL-NEXT:    s_andn2_saveexec_b64 s[8:9], s[16:17]
 ; GISEL-NEXT:    s_cbranch_execz .LBB0_6
 ; GISEL-NEXT:  ; %bb.5: ; %fp-to-i-if-exp.small
-; GISEL-NEXT:    v_sub_co_u32_e32 v2, vcc, 0x433, v6
-; GISEL-NEXT:    v_lshrrev_b64 v[0:1], v2, v[4:5]
-; GISEL-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v2
-; GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
-; GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
-; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v2
-; GISEL-NEXT:    v_cndmask_b32_e32 v6, v0, v4, vcc
-; GISEL-NEXT:    v_cndmask_b32_e32 v7, v1, v5, vcc
-; GISEL-NEXT:    v_mad_u64_u32 v[4:5], s[6:7], v6, v9, 0
-; GISEL-NEXT:    v_mad_u64_u32 v[0:1], s[6:7], v6, v8, 0
-; GISEL-NEXT:    v_mad_u64_u32 v[2:3], s[6:7], v7, v9, v[4:5]
-; GISEL-NEXT:    v_mul_lo_u32 v10, v7, v9
-; GISEL-NEXT:    v_mad_u64_u32 v[4:5], vcc, v6, v9, v[1:2]
-; GISEL-NEXT:    v_mul_lo_u32 v6, v6, v9
-; GISEL-NEXT:    v_mad_u64_u32 v[1:2], s[6:7], v7, v8, v[4:5]
-; GISEL-NEXT:    v_addc_co_u32_e64 v3, s[6:7], v3, v6, s[6:7]
+; GISEL-NEXT:    v_sub_co_u32_e32 v0, vcc, 0x433, v6
+; GISEL-NEXT:    v_lshrrev_b64 v[4:5], v0, v[4:5]
+; GISEL-NEXT:    v_mad_u64_u32 v[6:7], s[6:7], v4, v9, 0
+; GISEL-NEXT:    v_mad_u64_u32 v[0:1], s[6:7], v4, v8, 0
+; GISEL-NEXT:    v_mad_u64_u32 v[2:3], s[6:7], v5, v9, v[6:7]
+; GISEL-NEXT:    v_mul_lo_u32 v10, v5, v9
+; GISEL-NEXT:    v_mad_u64_u32 v[6:7], vcc, v4, v9, v[1:2]
+; GISEL-NEXT:    v_mul_lo_u32 v4, v4, v9
+; GISEL-NEXT:    v_mad_u64_u32 v[1:2], s[6:7], v5, v8, v[6:7]
+; GISEL-NEXT:    v_addc_co_u32_e64 v3, s[6:7], v3, v4, s[6:7]
 ; GISEL-NEXT:    v_addc_co_u32_e32 v3, vcc, v3, v10, vcc
 ; GISEL-NEXT:  .LBB0_6: ; %Flow1
 ; GISEL-NEXT:    s_or_b64 exec, exec, s[8:9]
@@ -376,8 +350,8 @@ define i128 @fptoui_f64_to_i128(double %x) {
 ; SDAG-NEXT:    v_mov_b32_e32 v7, 0
 ; SDAG-NEXT:    s_mov_b64 s[4:5], 0x3fe
 ; SDAG-NEXT:    v_mov_b32_e32 v4, v0
-; SDAG-NEXT:    v_mov_b32_e32 v0, 0
 ; SDAG-NEXT:    v_cmp_lt_u64_e32 vcc, s[4:5], v[6:7]
+; SDAG-NEXT:    v_mov_b32_e32 v0, 0
 ; SDAG-NEXT:    v_mov_b32_e32 v2, 0
 ; SDAG-NEXT:    v_mov_b32_e32 v1, 0
 ; SDAG-NEXT:    v_mov_b32_e32 v3, 0
@@ -385,33 +359,28 @@ define i128 @fptoui_f64_to_i128(double %x) {
 ; SDAG-NEXT:    s_cbranch_execz .LBB1_10
 ; SDAG-NEXT:  ; %bb.1: ; %fp-to-i-if-check.saturate
 ; SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0xfffffb81, v6
-; SDAG-NEXT:    v_mov_b32_e32 v1, -1
-; SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
-; SDAG-NEXT:    v_addc_co_u32_e32 v2, vcc, -1, v7, vcc
+; SDAG-NEXT:    v_addc_co_u32_e64 v1, s[6:7], 0, -1, vcc
 ; SDAG-NEXT:    s_movk_i32 s6, 0xff7f
-; SDAG-NEXT:    v_addc_co_u32_e32 v3, vcc, -1, v7, vcc
 ; SDAG-NEXT:    s_mov_b32 s7, -1
-; SDAG-NEXT:    v_cmp_eq_u64_e64 s[4:5], -1, v[2:3]
-; SDAG-NEXT:    v_cmp_lt_u64_e64 s[6:7], s[6:7], v[0:1]
-; SDAG-NEXT:    v_cmp_lt_i64_e32 vcc, -1, v[4:5]
-; SDAG-NEXT:    s_and_b64 s[4:5], s[4:5], s[6:7]
+; SDAG-NEXT:    v_cmp_lt_i64_e64 s[4:5], -1, v[4:5]
+; SDAG-NEXT:    v_cmp_lt_u64_e32 vcc, s[6:7], v[0:1]
 ; SDAG-NEXT:    ; implicit-def: $vgpr0_vgpr1
 ; SDAG-NEXT:    ; implicit-def: $vgpr2_vgpr3
-; SDAG-NEXT:    s_and_saveexec_b64 s[6:7], s[4:5]
+; SDAG-NEXT:    s_and_saveexec_b64 s[6:7], vcc
 ; SDAG-NEXT:    s_xor_b64 s[10:11], exec, s[6:7]
 ; SDAG-NEXT:    s_cbranch_execz .LBB1_7
 ; SDAG-NEXT:  ; %bb.2: ; %fp-to-i-if-check.exp.size
-; SDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
-; SDAG-NEXT:    v_add_co_u32_e64 v9, s[4:5], -1, v0
-; SDAG-NEXT:    s_mov_b64 s[4:5], 0x432
+; SDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[4:5]
+; SDAG-NEXT:    v_add_co_u32_e32 v9, vcc, -1, v0
+; SDAG-NEXT:    s_mov_b64 s[6:7], 0x432
 ; SDAG-NEXT:    v_and_b32_e32 v0, 0xfffff, v5
-; SDAG-NEXT:    v_cmp_lt_u64_e64 s[4:5], s[4:5], v[6:7]
-; SDAG-NEXT:    v_cndmask_b32_e64 v8, -1, 0, vcc
-; SDAG-NEXT:    v_cndmask_b32_e64 v10, -1, 1, vcc
+; SDAG-NEXT:    v_cmp_lt_u64_e32 vcc, s[6:7], v[6:7]
+; SDAG-NEXT:    v_cndmask_b32_e64 v8, -1, 0, s[4:5]
+; SDAG-NEXT:    v_cndmask_b32_e64 v10, -1, 1, s[4:5]
 ; SDAG-NEXT:    v_or_b32_e32 v5, 0x100000, v0
 ; SDAG-NEXT:    ; implicit-def: $vgpr0_vgpr1
 ; SDAG-NEXT:    ; implicit-def: $vgpr2_vgpr3
-; SDAG-NEXT:    s_and_saveexec_b64 s[6:7], s[4:5]
+; SDAG-NEXT:    s_and_saveexec_b64 s[6:7], vcc
 ; SDAG-NEXT:    s_xor_b64 s[12:13], exec, s[6:7]
 ; SDAG-NEXT:    s_cbranch_execz .LBB1_4
 ; SDAG-NEXT:  ; %bb.3: ; %fp-to-i-if-exp.large
@@ -420,75 +389,69 @@ define i128 @fptoui_f64_to_i128(double %x) {
 ; SDAG-NEXT:    v_add_u32_e32 v3, 0xfffffbcd, v6
 ; SDAG-NEXT:    v_lshrrev_b64 v[0:1], v0, v[4:5]
 ; SDAG-NEXT:    v_lshlrev_b64 v[6:7], v2, v[4:5]
-; SDAG-NEXT:    v_cmp_gt_u32_e64 s[4:5], 64, v3
+; SDAG-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v3
 ; SDAG-NEXT:    v_cmp_ne_u32_e64 s[6:7], 0, v3
 ; SDAG-NEXT:    v_lshlrev_b64 v[3:4], v3, v[4:5]
-; SDAG-NEXT:    v_cndmask_b32_e64 v1, v7, v1, s[4:5]
+; SDAG-NEXT:    v_cndmask_b32_e32 v1, v7, v1, vcc
 ; SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, v1, s[6:7]
-; SDAG-NEXT:    v_cndmask_b32_e64 v0, v6, v0, s[4:5]
-; SDAG-NEXT:    v_cndmask_b32_e64 v7, 0, v3, s[4:5]
+; SDAG-NEXT:    v_cndmask_b32_e32 v0, v6, v0, vcc
+; SDAG-NEXT:    v_cndmask_b32_e32 v7, 0, v3, vcc
 ; SDAG-NEXT:    v_cndmask_b32_e64 v5, 0, v0, s[6:7]
 ; SDAG-NEXT:    v_mul_lo_u32 v12, v10, v1
 ; SDAG-NEXT:    v_mad_u64_u32 v[0:1], s[6:7], v7, v10, 0
 ; SDAG-NEXT:    v_mov_b32_e32 v2, 0
-; SDAG-NEXT:    v_cndmask_b32_e64 v13, 0, v4, s[4:5]
-; SDAG-NEXT:    v_mad_u64_u32 v[3:4], s[4:5], v13, v10, v[1:2]
+; SDAG-NEXT:    v_cndmask_b32_e32 v13, 0, v4, vcc
+; SDAG-NEXT:    v_mad_u64_u32 v[3:4], s[6:7], v13, v10, v[1:2]
 ; SDAG-NEXT:    v_mul_lo_u32 v11, v8, v5
 ; SDAG-NEXT:    v_mad_u64_u32 v[5:6], s[6:7], v10, v5, 0
 ; SDAG-NEXT:    v_mov_b32_e32 v1, v3
-; SDAG-NEXT:    v_mad_u64_u32 v[1:2], s[4:5], v7, v8, v[1:2]
+; SDAG-NEXT:    v_mad_u64_u32 v[1:2], s[6:7], v7, v8, v[1:2]
 ; SDAG-NEXT:    v_add3_u32 v6, v6, v12, v11
-; SDAG-NEXT:    v_mad_u64_u32 v[5:6], s[4:5], v9, v7, v[5:6]
-; SDAG-NEXT:    v_add_co_u32_e64 v2, s[4:5], v4, v2
-; SDAG-NEXT:    v_addc_co_u32_e64 v3, s[4:5], 0, 0, s[4:5]
+; SDAG-NEXT:    v_mad_u64_u32 v[5:6], s[6:7], v9, v7, v[5:6]
+; SDAG-NEXT:    v_add_co_u32_e32 v2, vcc, v4, v2
+; SDAG-NEXT:    v_addc_co_u32_e64 v3, s[6:7], 0, 0, vcc
 ; SDAG-NEXT:    v_mul_lo_u32 v10, v9, v13
 ; SDAG-NEXT:    v_mul_lo_u32 v7, v9, v7
-; SDAG-NEXT:    v_mad_u64_u32 v[2:3], s[4:5], v13, v8, v[2:3]
+; SDAG-NEXT:    v_mad_u64_u32 v[2:3], s[6:7], v13, v8, v[2:3]
 ; SDAG-NEXT:    ; implicit-def: $vgpr8
 ; SDAG-NEXT:    ; implicit-def: $vgpr9
 ; SDAG-NEXT:    v_add3_u32 v4, v7, v6, v10
-; SDAG-NEXT:    v_add_co_u32_e64 v2, s[4:5], v2, v5
-; SDAG-NEXT:    v_addc_co_u32_e64 v3, s[4:5], v3, v4, s[4:5]
+; SDAG-NEXT:    v_add_co_u32_e32 v2, vcc, v2, v5
+; SDAG-NEXT:    v_addc_co_u32_e32 v3, vcc, v3, v4, vcc
 ; SDAG-NEXT:    ; implicit-def: $vgpr6_vgpr7
 ; SDAG-NEXT:    ; implicit-def: $vgpr4_vgpr5
 ; SDAG-NEXT:    ; implicit-def: $vgpr10
 ; SDAG-NEXT:  .LBB1_4: ; %Flow
-; SDAG-NEXT:    s_andn2_saveexec_b64 s[12:13], s[12:13]
+; SDAG-NEXT:    s_andn2_saveexec_b64 s[6:7], s[12:13]
 ; SDAG-NEXT:    s_cbranch_execz .LBB1_6
 ; SDAG-NEXT:  ; %bb.5: ; %fp-to-i-if-exp.small
-; SDAG-NEXT:    v_sub_u32_e32 v2, 0x433, v6
-; SDAG-NEXT:    v_lshrrev_b64 v[0:1], v2, v[4:5]
-; SDAG-NEXT:    v_cmp_gt_u32_e64 s[4:5], 64, v2
-; SDAG-NEXT:    v_cmp_eq_u32_e64 s[6:7], 0, v2
-; SDAG-NEXT:    v_cndmask_b32_e64 v0, 0, v0, s[4:5]
-; SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, v1, s[4:5]
-; SDAG-NEXT:    v_cndmask_b32_e64 v6, v0, v4, s[6:7]
-; SDAG-NEXT:    v_cndmask_b32_e64 v5, v1, v5, s[6:7]
-; SDAG-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v6, v10, 0
+; SDAG-NEXT:    v_sub_u32_e32 v0, 0x433, v6
+; SDAG-NEXT:    v_lshrrev_b64 v[4:5], v0, v[4:5]
 ; SDAG-NEXT:    v_mov_b32_e32 v2, 0
-; SDAG-NEXT:    v_mad_u64_u32 v[3:4], s[4:5], v5, v10, v[1:2]
-; SDAG-NEXT:    v_mov_b32_e32 v1, v3
-; SDAG-NEXT:    v_mad_u64_u32 v[1:2], s[4:5], v6, v8, v[1:2]
-; SDAG-NEXT:    v_add_co_u32_e64 v2, s[4:5], v4, v2
-; SDAG-NEXT:    v_addc_co_u32_e64 v3, s[4:5], 0, 0, s[4:5]
-; SDAG-NEXT:    v_mad_u64_u32 v[2:3], s[4:5], v5, v8, v[2:3]
-; SDAG-NEXT:    v_mad_u64_u32 v[2:3], s[4:5], v9, v6, v[2:3]
-; SDAG-NEXT:    v_mad_u64_u32 v[3:4], s[4:5], v9, v6, v[3:4]
+; SDAG-NEXT:    v_mad_u64_u32 v[0:1], s[12:13], v4, v10, 0
+; SDAG-NEXT:    v_mad_u64_u32 v[6:7], s[12:13], v5, v10, v[1:2]
+; SDAG-NEXT:    v_mov_b32_e32 v1, v6
+; SDAG-NEXT:    v_mad_u64_u32 v[1:2], s[12:13], v4, v8, v[1:2]
+; SDAG-NEXT:    v_add_co_u32_e32 v2, vcc, v7, v2
+; SDAG-NEXT:    v_addc_co_u32_e64 v3, s[12:13], 0, 0, vcc
+; SDAG-NEXT:    v_mad_u64_u32 v[2:3], s[12:13], v5, v8, v[2:3]
+; SDAG-NEXT:    v_mad_u64_u32 v[2:3], s[12:13], v9, v4, v[2:3]
+; SDAG-NEXT:    v_mad_u64_u32 v[3:4], s[12:13], v9, v4, v[3:4]
 ; SDAG-NEXT:    v_mad_i32_i24 v3, v9, v5, v3
 ; SDAG-NEXT:  .LBB1_6: ; %Flow1
-; SDAG-NEXT:    s_or_b64 exec, exec, s[12:13]
+; SDAG-NEXT:    s_or_b64 exec, exec, s[6:7]
 ; SDAG-NEXT:  .LBB1_7: ; %Flow2
-; SDAG-NEXT:    s_andn2_saveexec_b64 s[4:5], s[10:11]
+; SDAG-NEXT:    s_andn2_saveexec_b64 s[6:7], s[10:11]
 ; SDAG-NEXT:  ; %bb.8: ; %fp-to-i-if-saturate
 ; SDAG-NEXT:    v_bfrev_b32_e32 v0, 1
 ; SDAG-NEXT:    v_bfrev_b32_e32 v1, -2
-; SDAG-NEXT:    v_cndmask_b32_e32 v2, v0, v1, vcc
-; SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, -1, vcc
+; SDAG-NEXT:    v_cndmask_b32_e64 v2, v0, v1, s[4:5]
+; SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, -1, s[4:5]
 ; SDAG-NEXT:    v_mov_b32_e32 v3, v2
 ; SDAG-NEXT:    v_mov_b32_e32 v0, v1
 ; SDAG-NEXT:    v_mov_b32_e32 v2, v1
 ; SDAG-NEXT:  ; %bb.9: ; %Flow3
-; SDAG-NEXT:    s_or_b64 exec, exec, s[4:5]
+; SDAG-NEXT:    s_or_b64 exec, exec, s[6:7]
 ; SDAG-NEXT:  .LBB1_10: ; %fp-to-i-cleanup
 ; SDAG-NEXT:    s_or_b64 exec, exec, s[8:9]
 ; SDAG-NEXT:    s_setpc_b64 s[30:31]
@@ -496,15 +459,15 @@ define i128 @fptoui_f64_to_i128(double %x) {
 ; GISEL-LABEL: fptoui_f64_to_i128:
 ; GISEL:       ; %bb.0: ; %fp-to-i-entry
 ; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GISEL-NEXT:    v_mov_b32_e32 v5, v1
 ; GISEL-NEXT:    v_mov_b32_e32 v4, v0
-; GISEL-NEXT:    v_lshrrev_b32_e32 v2, 20, v5
-; GISEL-NEXT:    v_mov_b32_e32 v0, 0x3ff
-; GISEL-NEXT:    s_mov_b64 s[4:5], 0
+; GISEL-NEXT:    v_mov_b32_e32 v5, v1
+; GISEL-NEXT:    v_lshrrev_b64 v[0:1], 52, v[4:5]
+; GISEL-NEXT:    v_mov_b32_e32 v1, 0x3ff
 ; GISEL-NEXT:    v_mov_b32_e32 v7, 0
-; GISEL-NEXT:    v_mov_b32_e32 v1, 0
-; GISEL-NEXT:    v_and_b32_e32 v6, 0x7ff, v2
-; GISEL-NEXT:    v_cmp_ge_u64_e32 vcc, v[6:7], v[0:1]
+; GISEL-NEXT:    v_mov_b32_e32 v2, 0
+; GISEL-NEXT:    v_bfe_u32 v6, v0, 0, 11
+; GISEL-NEXT:    s_mov_b64 s[4:5], 0
+; GISEL-NEXT:    v_cmp_ge_u64_e32 vcc, v[6:7], v[1:2]
 ; GISEL-NEXT:    s_mov_b64 s[6:7], s[4:5]
 ; GISEL-NEXT:    v_mov_b32_e32 v0, s4
 ; GISEL-NEXT:    v_mov_b32_e32 v1, s5
@@ -515,19 +478,10 @@ define i128 @fptoui_f64_to_i128(double %x) {
 ; GISEL-NEXT:  ; %bb.1: ; %fp-to-i-if-check.saturate
 ; GISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 0xfffffb81, v6
 ; GISEL-NEXT:    v_mov_b32_e32 v2, 0xffffff80
-; GISEL-NEXT:    v_addc_co_u32_e64 v1, s[6:7], 0, -1, vcc
 ; GISEL-NEXT:    v_mov_b32_e32 v3, -1
-; GISEL-NEXT:    v_addc_co_u32_e64 v8, s[6:7], 0, -1, s[6:7]
-; GISEL-NEXT:    v_cmp_ge_u64_e32 vcc, v[0:1], v[2:3]
-; GISEL-NEXT:    v_addc_co_u32_e64 v9, s[6:7], 0, -1, s[6:7]
-; GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
-; GISEL-NEXT:    v_cmp_le_u64_e32 vcc, -1, v[8:9]
+; GISEL-NEXT:    v_addc_co_u32_e64 v1, s[6:7], 0, -1, vcc
 ; GISEL-NEXT:    v_cmp_lt_i64_e64 s[4:5], -1, v[4:5]
-; GISEL-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; GISEL-NEXT:    v_cmp_eq_u64_e32 vcc, -1, v[8:9]
-; GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
-; GISEL-NEXT:    v_and_b32_e32 v0, 1, v0
-; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
+; GISEL-NEXT:    v_cmp_ge_u64_e32 vcc, v[0:1], v[2:3]
 ; GISEL-NEXT:    ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
 ; GISEL-NEXT:    s_and_saveexec_b64 s[6:7], vcc
 ; GISEL-NEXT:    s_xor_b64 s[14:15], exec, s[6:7]
@@ -632,22 +586,16 @@ define i128 @fptoui_f64_to_i128(double %x) {
 ; GISEL-NEXT:    s_andn2_saveexec_b64 s[8:9], s[16:17]
 ; GISEL-NEXT:    s_cbranch_execz .LBB1_6
 ; GISEL-NEXT:  ; %bb.5: ; %fp-to-i-if-exp.small
-; GISEL-NEXT:    v_sub_co_u32_e32 v2, vcc, 0x433, v6
-; GISEL-NEXT:    v_lshrrev_b64 v[0:1], v2, v[4:5]
-; GISEL-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v2
-; GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
-; GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
-; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v2
-; GISEL-NEXT:    v_cndmask_b32_e32 v6, v0, v4, vcc
-; GISEL-NEXT:    v_cndmask_b32_e32 v7, v1, v5, vcc
-; GISEL-NEXT:    v_mad_u64_u32 v[4:5], s[6:7], v6, v9, 0
-; GISEL-NEXT:    v_mad_u64_u32 v[0:1], s[6:7], v6, v8, 0
-; GISEL-NEXT:    v_mad_u64_u32 v[2:3], s[6:7], v7, v9, v[4:5]
-; GISEL-NEXT:    v_mul_lo_u32 v10, v7, v9
-; GISEL-NEXT:    v_mad_u64_u32 v[4:5], vcc, v6, v9, v[1:2]
-; GISEL-NEXT:    v_mul_lo_u32 v6, v6, v9
-; GISEL-NEXT:    v_mad_u64_u32 v[1:2], s[6:7], v7, v8, v[4:5]
-; GISEL-NEXT:    v_addc_co_u32_e64 v3, s[6:7], v3, v6, s[6:7]
+; GISEL-NEXT:    v_sub_co_u32_e32 v0, vcc, 0x433, v6
+; GISEL-NEXT:    v_lshrrev_b64 v[4:5], v0, v[4:5]
+; GISEL-NEXT:    v_mad_u64_u32 v[6:7], s[6:7], v4, v9, 0
+; GISEL-NEXT:    v_mad_u64_u32 v[0:1], s[6:7], v4, v8, 0
+; GISEL-NEXT:    v_mad_u64_u32 v[2:3], s[6:7], v5, v9, v[6:7]
+; GISEL-NEXT:    v_mul_lo_u32 v10, v5, v9
+; GISEL-NEXT:    v_mad_u64_u32 v[6:7], vcc, v4, v9, v[1:2]
+; GISEL-NEXT:    v_mul_lo_u32 v4, v4, v9
+; GISEL-NEXT:    v_mad_u64_u32 v[1:2], s[6:7], v5, v8, v[6:7]
+; GISEL-NEXT:    v_addc_co_u32_e64 v3, s[6:7], v3, v4, s[6:7]
 ; GISEL-NEXT:    v_addc_co_u32_e32 v3, vcc, v3, v10, vcc
 ; GISEL-NEXT:  .LBB1_6: ; %Flow1
 ; GISEL-NEXT:    s_or_b64 exec, exec, s[8:9]
@@ -737,28 +685,20 @@ define i128 @fptosi_f32_to_i128(float %x) {
 ; SDAG:       ; %bb.0: ; %fp-to-i-entry
 ; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; SDAG-NEXT:    v_mov_b32_e32 v4, v0
-; SDAG-NEXT:    v_mov_b32_e32 v0, 0
-; SDAG-NEXT:    v_bfe_u32 v5, v4, 23, 8
+; SDAG-NEXT:    v_bfe_u32 v7, v4, 23, 8
 ; SDAG-NEXT:    s_movk_i32 s4, 0x7e
+; SDAG-NEXT:    v_mov_b32_e32 v0, 0
 ; SDAG-NEXT:    v_mov_b32_e32 v2, 0
 ; SDAG-NEXT:    v_mov_b32_e32 v1, 0
 ; SDAG-NEXT:    v_mov_b32_e32 v3, 0
-; SDAG-NEXT:    v_cmp_lt_u32_e32 vcc, s4, v5
+; SDAG-NEXT:    v_cmp_lt_u32_e32 vcc, s4, v7
 ; SDAG-NEXT:    s_and_saveexec_b64 s[8:9], vcc
 ; SDAG-NEXT:    s_cbranch_execz .LBB2_10
 ; SDAG-NEXT:  ; %bb.1: ; %fp-to-i-if-check.saturate
-; SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0xffffff01, v5
-; SDAG-NEXT:    v_mov_b32_e32 v1, -1
-; SDAG-NEXT:    v_mov_b32_e32 v6, 0
-; SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
-; SDAG-NEXT:    v_addc_co_u32_e32 v2, vcc, -1, v6, vcc
-; SDAG-NEXT:    s_movk_i32 s6, 0xff7f
-; SDAG-NEXT:    v_addc_co_u32_e32 v3, vcc, -1, v6, vcc
-; SDAG-NEXT:    s_mov_b32 s7, -1
-; SDAG-NEXT:    v_cmp_eq_u64_e64 s[4:5], -1, v[2:3]
-; SDAG-NEXT:    v_cmp_lt_u64_e64 s[6:7], s[6:7], v[0:1]
+; SDAG-NEXT:    v_add_u32_e32 v0, 0xffffff01, v7
+; SDAG-NEXT:    s_movk_i32 s4, 0xff7f
 ; SDAG-NEXT:    v_cmp_lt_i32_e32 vcc, -1, v4
-; SDAG-NEXT:    s_and_b64 s[4:5], s[4:5], s[6:7]
+; SDAG-NEXT:    v_cmp_lt_u32_e64 s[4:5], s4, v0
 ; SDAG-NEXT:    ; implicit-def: $vgpr0_vgpr1
 ; SDAG-NEXT:    ; implicit-def: $vgpr2_vgpr3
 ; SDAG-NEXT:    s_and_saveexec_b64 s[6:7], s[4:5]
@@ -766,76 +706,68 @@ define i128 @fptosi_f32_to_i128(float %x) {
 ; SDAG-NEXT:    s_cbranch_execz .LBB2_7
 ; SDAG-NEXT:  ; %bb.2: ; %fp-to-i-if-check.exp.size
 ; SDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
-; SDAG-NEXT:    v_add_co_u32_e64 v10, s[4:5], -1, v0
-; SDAG-NEXT:    s_mov_b64 s[4:5], 0x95
+; SDAG-NEXT:    v_add_co_u32_e64 v6, s[4:5], -1, v0
 ; SDAG-NEXT:    v_and_b32_e32 v0, 0x7fffff, v4
-; SDAG-NEXT:    v_cmp_lt_u64_e64 s[4:5], s[4:5], v[5:6]
-; SDAG-NEXT:    v_cndmask_b32_e64 v9, -1, 0, vcc
-; SDAG-NEXT:    v_cndmask_b32_e64 v11, -1, 1, vcc
-; SDAG-NEXT:    v_or_b32_e32 v7, 0x800000, v0
-; SDAG-NEXT:    v_mov_b32_e32 v8, v6
-; SDAG-NEXT:    ; implicit-def: $vgpr0_vgpr1
+; SDAG-NEXT:    s_movk_i32 s4, 0x95
+; SDAG-NEXT:    v_cndmask_b32_e64 v5, -1, 0, vcc
+; SDAG-NEXT:    v_cndmask_b32_e64 v8, -1, 1, vcc
+; SDAG-NEXT:    v_or_b32_e32 v1, 0x800000, v0
+; SDAG-NEXT:    v_cmp_lt_u32_e64 s[4:5], s4, v7
 ; SDAG-NEXT:    ; implicit-def: $vgpr2_vgpr3
 ; SDAG-NEXT:    s_and_saveexec_b64 s[6:7], s[4:5]
 ; SDAG-NEXT:    s_xor_b64 s[12:13], exec, s[6:7]
 ; SDAG-NEXT:    s_cbranch_execz .LBB2_4
 ; SDAG-NEXT:  ; %bb.3: ; %fp-to-i-if-exp.large
-; SDAG-NEXT:    v_sub_u32_e32 v0, 0xd6, v5
-; SDAG-NEXT:    v_add_u32_e32 v2, 0xffffff2a, v5
-; SDAG-NEXT:    v_add_u32_e32 v4, 0xffffff6a, v5
-; SDAG-NEXT:    v_lshrrev_b64 v[0:1], v0, v[7:8]
-; SDAG-NEXT:    v_lshlrev_b64 v[2:3], v2, v[7:8]
-; SDAG-NEXT:    v_cmp_gt_u32_e64 s[4:5], 64, v4
-; SDAG-NEXT:    v_cndmask_b32_e64 v1, v3, v1, s[4:5]
-; SDAG-NEXT:    v_cmp_ne_u32_e64 s[6:7], 0, v4
-; SDAG-NEXT:    v_cndmask_b32_e64 v3, 0, v1, s[6:7]
-; SDAG-NEXT:    v_cndmask_b32_e64 v2, v2, v0, s[4:5]
-; SDAG-NEXT:    v_lshlrev_b64 v[0:1], v4, v[7:8]
-; SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, v2, s[6:7]
-; SDAG-NEXT:    v_cndmask_b32_e64 v7, 0, v0, s[4:5]
-; SDAG-NEXT:    v_mad_u64_u32 v[4:5], s[6:7], v7, v11, 0
-; SDAG-NEXT:    v_cndmask_b32_e64 v13, 0, v1, s[4:5]
-; SDAG-NEXT:    v_mul_lo_u32 v8, v9, v2
-; SDAG-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v13, v11, v[5:6]
-; SDAG-NEXT:    v_mul_lo_u32 v12, v11, v3
-; SDAG-NEXT:    v_mad_u64_u32 v[2:3], s[6:7], v11, v2, 0
-; SDAG-NEXT:    v_mov_b32_e32 v5, v0
-; SDAG-NEXT:    v_mad_u64_u32 v[5:6], s[4:5], v7, v9, v[5:6]
-; SDAG-NEXT:    v_add3_u32 v3, v3, v12, v8
-; SDAG-NEXT:    v_mad_u64_u32 v[2:3], s[4:5], v10, v7, v[2:3]
-; SDAG-NEXT:    v_add_co_u32_e64 v0, s[4:5], v1, v6
-; SDAG-NEXT:    v_addc_co_u32_e64 v1, s[4:5], 0, 0, s[4:5]
-; SDAG-NEXT:    v_mul_lo_u32 v8, v10, v13
-; SDAG-NEXT:    v_mul_lo_u32 v7, v10, v7
-; SDAG-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v13, v9, v[0:1]
-; SDAG-NEXT:    ; implicit-def: $vgpr11
-; SDAG-NEXT:    ; implicit-def: $vgpr9
-; SDAG-NEXT:    ; implicit-def: $vgpr10
-; SDAG-NEXT:    v_add3_u32 v3, v7, v3, v8
-; SDAG-NEXT:    v_add_co_u32_e64 v2, s[4:5], v0, v2
-; SDAG-NEXT:    v_addc_co_u32_e64 v3, s[4:5], v1, v3, s[4:5]
-; SDAG-NEXT:    v_mov_b32_e32 v0, v4
-; SDAG-NEXT:    v_mov_b32_e32 v1, v5
-; SDAG-NEXT:    ; implicit-def: $vgpr5_vgpr6
-; SDAG-NEXT:    ; implicit-def: $vgpr7_vgpr8
+; SDAG-NEXT:    v_add_u32_e32 v0, 0xffffff6a, v7
+; SDAG-NEXT:    v_mov_b32_e32 v2, 0
+; SDAG-NEXT:    v_sub_u32_e32 v3, 0xd6, v7
+; SDAG-NEXT:    v_add_u32_e32 v7, 0xffffff2a, v7
+; SDAG-NEXT:    v_lshrrev_b64 v[3:4], v3, v[1:2]
+; SDAG-NEXT:    v_lshlrev_b64 v[9:10], v7, v[1:2]
+; SDAG-NEXT:    v_cmp_gt_u32_e64 s[4:5], 64, v0
+; SDAG-NEXT:    v_cndmask_b32_e64 v4, v10, v4, s[4:5]
+; SDAG-NEXT:    v_cmp_ne_u32_e64 s[6:7], 0, v0
+; SDAG-NEXT:    v_cndmask_b32_e64 v7, 0, v4, s[6:7]
+; SDAG-NEXT:    v_cndmask_b32_e64 v9, v9, v3, s[4:5]
+; SDAG-NEXT:    v_lshlrev_b64 v[3:4], v0, v[1:2]
+; SDAG-NEXT:    v_cndmask_b32_e64 v9, 0, v9, s[6:7]
+; SDAG-NEXT:    v_cndmask_b32_e64 v11, 0, v3, s[4:5]
+; SDAG-NEXT:    v_mad_u64_u32 v[0:1], s[6:7], v11, v8, 0
+; SDAG-NEXT:    v_cndmask_b32_e64 v13, 0, v4, s[4:5]
+; SDAG-NEXT:    v_mul_lo_u32 v12, v5, v9
+; SDAG-NEXT:    v_mad_u64_u32 v[3:4], s[4:5], v13, v8, v[1:2]
+; SDAG-NEXT:    v_mul_lo_u32 v7, v8, v7
+; SDAG-NEXT:    v_mad_u64_u32 v[9:10], s[6:7], v8, v9, 0
+; SDAG-NEXT:    v_mov_b32_e32 v1, v3
+; SDAG-NEXT:    v_mad_u64_u32 v[1:2], s[4:5], v11, v5, v[1:2]
+; SDAG-NEXT:    v_add3_u32 v10, v10, v7, v12
+; SDAG-NEXT:    v_mad_u64_u32 v[7:8], s[4:5], v6, v11, v[9:10]
+; SDAG-NEXT:    v_add_co_u32_e64 v2, s[4:5], v4, v2
+; SDAG-NEXT:    v_addc_co_u32_e64 v3, s[4:5], 0, 0, s[4:5]
+; SDAG-NEXT:    v_mul_lo_u32 v9, v6, v13
+; SDAG-NEXT:    v_mul_lo_u32 v6, v6, v11
+; SDAG-NEXT:    v_mad_u64_u32 v[2:3], s[4:5], v13, v5, v[2:3]
+; SDAG-NEXT:    ; implicit-def: $vgpr5
+; SDAG-NEXT:    v_add3_u32 v4, v6, v8, v9
+; SDAG-NEXT:    v_add_co_u32_e64 v2, s[4:5], v2, v7
+; SDAG-NEXT:    v_addc_co_u32_e64 v3, s[4:5], v3, v4, s[4:5]
+; SDAG-NEXT:    ; implicit-def: $vgpr7
+; SDAG-NEXT:    ; implicit-def: $vgpr8
+; SDAG-NEXT:    ; implicit-def: $vgpr6
 ; SDAG-NEXT:  .LBB2_4: ; %Flow
-; SDAG-NEXT:    s_andn2_saveexec_b64 s[6:7], s[12:13]
+; SDAG-NEXT:    s_andn2_saveexec_b64 s[4:5], s[12:13]
 ; SDAG-NEXT:    s_cbranch_execz .LBB2_6
 ; SDAG-NEXT:  ; %bb.5: ; %fp-to-i-if-exp.small
-; SDAG-NEXT:    v_sub_u32_e32 v2, 0x96, v5
-; SDAG-NEXT:    v_lshrrev_b64 v[0:1], v2, v[7:8]
-; SDAG-NEXT:    v_cmp_gt_u32_e64 s[4:5], 64, v2
-; SDAG-NEXT:    v_cndmask_b32_e64 v0, 0, v0, s[4:5]
-; SDAG-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v2
-; SDAG-NEXT:    v_cndmask_b32_e64 v3, v0, v7, s[4:5]
-; SDAG-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v3, v11, 0
+; SDAG-NEXT:    v_sub_u32_e32 v0, 0x96, v7
+; SDAG-NEXT:    v_lshrrev_b32_e32 v3, v0, v1
+; SDAG-NEXT:    v_mad_u64_u32 v[0:1], s[6:7], v3, v8, 0
 ; SDAG-NEXT:    v_mov_b32_e32 v2, 0
-; SDAG-NEXT:    v_mov_b32_e32 v6, v2
-; SDAG-NEXT:    v_mad_u64_u32 v[4:5], s[4:5], v3, v9, v[1:2]
-; SDAG-NEXT:    v_mad_i64_i32 v[2:3], s[4:5], v10, v3, v[5:6]
-; SDAG-NEXT:    v_mov_b32_e32 v1, v4
+; SDAG-NEXT:    v_mov_b32_e32 v9, v2
+; SDAG-NEXT:    v_mad_u64_u32 v[7:8], s[6:7], v3, v5, v[1:2]
+; SDAG-NEXT:    v_mad_i64_i32 v[2:3], s[6:7], v6, v3, v[8:9]
+; SDAG-NEXT:    v_mov_b32_e32 v1, v7
 ; SDAG-NEXT:  .LBB2_6: ; %Flow1
-; SDAG-NEXT:    s_or_b64 exec, exec, s[6:7]
+; SDAG-NEXT:    s_or_b64 exec, exec, s[4:5]
 ; SDAG-NEXT:  .LBB2_7: ; %Flow2
 ; SDAG-NEXT:    s_andn2_saveexec_b64 s[4:5], s[10:11]
 ; SDAG-NEXT:  ; %bb.8: ; %fp-to-i-if-saturate
@@ -856,14 +788,10 @@ define i128 @fptosi_f32_to_i128(float %x) {
 ; GISEL:       ; %bb.0: ; %fp-to-i-entry
 ; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GISEL-NEXT:    v_mov_b32_e32 v4, v0
-; GISEL-NEXT:    v_mov_b32_e32 v5, 0
-; GISEL-NEXT:    v_lshrrev_b64 v[0:1], 23, v[4:5]
-; GISEL-NEXT:    s_mov_b64 s[4:5], 0
-; GISEL-NEXT:    v_bfe_u32 v6, v0, 0, 8
+; GISEL-NEXT:    v_bfe_u32 v6, v4, 23, 8
 ; GISEL-NEXT:    v_mov_b32_e32 v0, 0x7f
-; GISEL-NEXT:    v_mov_b32_e32 v1, 0
-; GISEL-NEXT:    v_mov_b32_e32 v7, v5
-; GISEL-NEXT:    v_cmp_ge_u64_e32 vcc, v[6:7], v[0:1]
+; GISEL-NEXT:    s_mov_b64 s[4:5], 0
+; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v6, v0
 ; GISEL-NEXT:    s_mov_b64 s[6:7], s[4:5]
 ; GISEL-NEXT:    v_mov_b32_e32 v0, s4
 ; GISEL-NEXT:    v_mov_b32_e32 v1, s5
@@ -872,146 +800,132 @@ define i128 @fptosi_f32_to_i128(float %x) {
 ; GISEL-NEXT:    s_and_saveexec_b64 s[12:13], vcc
 ; GISEL-NEXT:    s_cbranch_execz .LBB2_10
 ; GISEL-NEXT:  ; %bb.1: ; %fp-to-i-if-check.saturate
-; GISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 0xffffff01, v6
-; GISEL-NEXT:    v_mov_b32_e32 v2, 0xffffff80
-; GISEL-NEXT:    v_addc_co_u32_e64 v1, s[6:7], 0, -1, vcc
-; GISEL-NEXT:    v_mov_b32_e32 v3, -1
-; GISEL-NEXT:    v_addc_co_u32_e64 v8, s[6:7], 0, -1, s[6:7]
-; GISEL-NEXT:    v_cmp_ge_u64_e32 vcc, v[0:1], v[2:3]
-; GISEL-NEXT:    v_addc_co_u32_e64 v9, s[6:7], 0, -1, s[6:7]
-; GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
-; GISEL-NEXT:    v_cmp_le_u64_e32 vcc, -1, v[8:9]
-; GISEL-NEXT:    v_cmp_lt_i32_e64 s[4:5], -1, v4
-; GISEL-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; GISEL-NEXT:    v_cmp_eq_u64_e32 vcc, -1, v[8:9]
-; GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
-; GISEL-NEXT:    v_and_b32_e32 v0, 1, v0
-; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
+; GISEL-NEXT:    v_add_u32_e32 v0, 0xffffff01, v6
+; GISEL-NEXT:    v_mov_b32_e32 v1, 0xffffff80
+; GISEL-NEXT:    v_cmp_lt_i32_e32 vcc, -1, v4
+; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v0, v1
 ; GISEL-NEXT:    ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
-; GISEL-NEXT:    s_and_saveexec_b64 s[6:7], vcc
+; GISEL-NEXT:    s_and_saveexec_b64 s[6:7], s[4:5]
 ; GISEL-NEXT:    s_xor_b64 s[14:15], exec, s[6:7]
 ; GISEL-NEXT:    s_cbranch_execz .LBB2_7
 ; GISEL-NEXT:  ; %bb.2: ; %fp-to-i-if-check.exp.size
-; GISEL-NEXT:    s_xor_b64 s[6:7], s[4:5], -1
-; GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, -1, s[6:7]
+; GISEL-NEXT:    s_xor_b64 s[4:5], vcc, -1
+; GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, -1, s[4:5]
 ; GISEL-NEXT:    v_and_b32_e32 v0, 1, v0
-; GISEL-NEXT:    v_cndmask_b32_e64 v1, 0, 1, s[6:7]
+; GISEL-NEXT:    v_cndmask_b32_e64 v1, 0, 1, s[4:5]
 ; GISEL-NEXT:    v_lshlrev_b16_e32 v2, 1, v0
 ; GISEL-NEXT:    v_or_b32_e32 v1, v1, v2
 ; GISEL-NEXT:    v_lshlrev_b16_e32 v3, 2, v0
 ; GISEL-NEXT:    v_or_b32_e32 v2, v0, v2
 ; GISEL-NEXT:    v_or_b32_e32 v1, v1, v3
-; GISEL-NEXT:    v_lshlrev_b16_e32 v8, 3, v0
+; GISEL-NEXT:    v_lshlrev_b16_e32 v7, 3, v0
 ; GISEL-NEXT:    v_or_b32_e32 v2, v2, v3
+; GISEL-NEXT:    v_or_b32_e32 v1, v1, v7
+; GISEL-NEXT:    v_lshlrev_b16_e32 v8, 4, v0
+; GISEL-NEXT:    v_or_b32_e32 v2, v2, v7
 ; GISEL-NEXT:    v_or_b32_e32 v1, v1, v8
-; GISEL-NEXT:    v_lshlrev_b16_e32 v9, 4, v0
+; GISEL-NEXT:    v_lshlrev_b16_e32 v9, 5, v0
 ; GISEL-NEXT:    v_or_b32_e32 v2, v2, v8
 ; GISEL-NEXT:    v_or_b32_e32 v1, v1, v9
-; GISEL-NEXT:    v_lshlrev_b16_e32 v10, 5, v0
+; GISEL-NEXT:    v_lshlrev_b16_e32 v10, 6, v0
 ; GISEL-NEXT:    v_or_b32_e32 v2, v2, v9
 ; GISEL-NEXT:    v_or_b32_e32 v1, v1, v10
-; GISEL-NEXT:    v_lshlrev_b16_e32 v11, 6, v0
+; GISEL-NEXT:    v_lshlrev_b16_e32 v11, 7, v0
 ; GISEL-NEXT:    v_or_b32_e32 v2, v2, v10
 ; GISEL-NEXT:    v_or_b32_e32 v1, v1, v11
-; GISEL-NEXT:    v_lshlrev_b16_e32 v12, 7, v0
+; GISEL-NEXT:    v_lshlrev_b16_e32 v12, 8, v0
 ; GISEL-NEXT:    v_or_b32_e32 v2, v2, v11
 ; GISEL-NEXT:    v_or_b32_e32 v1, v1, v12
-; GISEL-NEXT:    v_lshlrev_b16_e32 v13, 8, v0
+; GISEL-NEXT:    v_lshlrev_b16_e32 v13, 9, v0
 ; GISEL-NEXT:    v_or_b32_e32 v2, v2, v12
 ; GISEL-NEXT:    v_or_b32_e32 v1, v1, v13
-; GISEL-NEXT:    v_lshlrev_b16_e32 v14, 9, v0
+; GISEL-NEXT:    v_lshlrev_b16_e32 v14, 10, v0
 ; GISEL-NEXT:    v_or_b32_e32 v2, v2, v13
 ; GISEL-NEXT:    v_or_b32_e32 v1, v1, v14
-; GISEL-NEXT:    v_lshlrev_b16_e32 v15, 10, v0
+; GISEL-NEXT:    v_lshlrev_b16_e32 v15, 11, v0
 ; GISEL-NEXT:    v_or_b32_e32 v2, v2, v14
 ; GISEL-NEXT:    v_or_b32_e32 v1, v1, v15
-; GISEL-NEXT:    v_lshlrev_b16_e32 v16, 11, v0
+; GISEL-NEXT:    v_lshlrev_b16_e32 v16, 12, v0
 ; GISEL-NEXT:    v_or_b32_e32 v2, v2, v15
 ; GISEL-NEXT:    v_or_b32_e32 v1, v1, v16
-; GISEL-NEXT:    v_lshlrev_b16_e32 v17, 12, v0
+; GISEL-NEXT:    v_lshlrev_b16_e32 v17, 13, v0
 ; GISEL-NEXT:    v_or_b32_e32 v2, v2, v16
 ; GISEL-NEXT:    v_or_b32_e32 v1, v1, v17
-; GISEL-NEXT:    v_lshlrev_b16_e32 v18, 13, v0
+; GISEL-NEXT:    v_lshlrev_b16_e32 v18, 14, v0
 ; GISEL-NEXT:    v_or_b32_e32 v2, v2, v17
 ; GISEL-NEXT:    v_or_b32_e32 v1, v1, v18
-; GISEL-NEXT:    v_lshlrev_b16_e32 v19, 14, v0
-; GISEL-NEXT:    v_or_b32_e32 v2, v2, v18
-; GISEL-NEXT:    v_or_b32_e32 v1, v1, v19
 ; GISEL-NEXT:    v_lshlrev_b16_e32 v0, 15, v0
-; GISEL-NEXT:    v_or_b32_e32 v2, v2, v19
+; GISEL-NEXT:    v_or_b32_e32 v2, v2, v18
 ; GISEL-NEXT:    v_or_b32_e32 v1, v1, v0
 ; GISEL-NEXT:    v_or_b32_e32 v0, v2, v0
 ; GISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
 ; GISEL-NEXT:    v_and_b32_e32 v1, 0xffff, v1
 ; GISEL-NEXT:    v_lshlrev_b32_e32 v2, 16, v0
-; GISEL-NEXT:    v_lshl_or_b32 v8, v0, 16, v0
-; GISEL-NEXT:    v_or3_b32 v9, v1, v2, 1
+; GISEL-NEXT:    v_lshl_or_b32 v7, v0, 16, v0
+; GISEL-NEXT:    v_or3_b32 v8, v1, v2, 1
+; GISEL-NEXT:    v_mov_b32_e32 v0, 0x7fffff
+; GISEL-NEXT:    v_mov_b32_e32 v1, 0x800000
+; GISEL-NEXT:    v_and_or_b32 v4, v4, v0, v1
 ; GISEL-NEXT:    v_mov_b32_e32 v0, 0x96
-; GISEL-NEXT:    v_mov_b32_e32 v1, 0
-; GISEL-NEXT:    v_and_b32_e32 v2, 0x7fffff, v4
-; GISEL-NEXT:    v_cmp_ge_u64_e32 vcc, v[6:7], v[0:1]
-; GISEL-NEXT:    v_or_b32_e32 v4, 0x800000, v2
+; GISEL-NEXT:    v_mov_b32_e32 v5, 0
+; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v6, v0
 ; GISEL-NEXT:    ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
-; GISEL-NEXT:    s_and_saveexec_b64 s[6:7], vcc
+; GISEL-NEXT:    s_and_saveexec_b64 s[6:7], s[4:5]
 ; GISEL-NEXT:    s_xor_b64 s[16:17], exec, s[6:7]
 ; GISEL-NEXT:    s_cbranch_execz .LBB2_4
 ; GISEL-NEXT:  ; %bb.3: ; %fp-to-i-if-exp.large
 ; GISEL-NEXT:    v_add_u32_e32 v2, 0xffffff6a, v6
 ; GISEL-NEXT:    v_lshlrev_b64 v[0:1], v2, v[4:5]
-; GISEL-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v2
-; GISEL-NEXT:    v_cndmask_b32_e32 v10, 0, v0, vcc
-; GISEL-NEXT:    v_cndmask_b32_e32 v11, 0, v1, vcc
-; GISEL-NEXT:    v_mad_u64_u32 v[0:1], s[6:7], v10, v8, 0
+; GISEL-NEXT:    v_cmp_gt_u32_e64 s[4:5], 64, v2
+; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, v0, s[4:5]
+; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, v1, s[4:5]
+; GISEL-NEXT:    v_mad_u64_u32 v[0:1], s[6:7], v11, v7, 0
 ; GISEL-NEXT:    v_add_u32_e32 v3, 0xffffff2a, v6
 ; GISEL-NEXT:    v_sub_u32_e32 v6, 64, v2
-; GISEL-NEXT:    v_lshrrev_b64 v[6:7], v6, v[4:5]
+; GISEL-NEXT:    v_lshrrev_b64 v[9:10], v6, v[4:5]
 ; GISEL-NEXT:    v_lshlrev_b64 v[3:4], v3, v[4:5]
+; GISEL-NEXT:    v_mad_u64_u32 v[5:6], s[8:9], v12, v7, v[0:1]
+; GISEL-NEXT:    v_cndmask_b32_e64 v3, v3, v9, s[4:5]
 ; GISEL-NEXT:    v_cmp_eq_u32_e64 s[6:7], 0, v2
-; GISEL-NEXT:    v_cndmask_b32_e32 v3, v3, v6, vcc
-; GISEL-NEXT:    v_mad_u64_u32 v[5:6], s[8:9], v11, v8, v[0:1]
-; GISEL-NEXT:    v_cndmask_b32_e64 v12, v3, 0, s[6:7]
-; GISEL-NEXT:    v_mad_u64_u32 v[0:1], s[8:9], v10, v9, 0
-; GISEL-NEXT:    v_mad_u64_u32 v[2:3], s[8:9], v12, v9, v[5:6]
-; GISEL-NEXT:    v_mul_lo_u32 v13, v11, v8
-; GISEL-NEXT:    v_mad_u64_u32 v[5:6], s[8:9], v10, v8, v[1:2]
-; GISEL-NEXT:    v_mul_lo_u32 v10, v10, v8
-; GISEL-NEXT:    v_mad_u64_u32 v[1:2], s[10:11], v11, v9, v[5:6]
-; GISEL-NEXT:    v_addc_co_u32_e64 v3, s[10:11], v3, v10, s[10:11]
+; GISEL-NEXT:    v_cndmask_b32_e64 v9, v3, 0, s[6:7]
+; GISEL-NEXT:    v_mad_u64_u32 v[0:1], s[8:9], v11, v8, 0
+; GISEL-NEXT:    v_mad_u64_u32 v[2:3], s[8:9], v9, v8, v[5:6]
+; GISEL-NEXT:    v_mul_lo_u32 v13, v12, v7
+; GISEL-NEXT:    v_mad_u64_u32 v[5:6], s[8:9], v11, v7, v[1:2]
+; GISEL-NEXT:    v_mul_lo_u32 v11, v11, v7
+; GISEL-NEXT:    v_mad_u64_u32 v[1:2], s[10:11], v12, v8, v[5:6]
+; GISEL-NEXT:    v_addc_co_u32_e64 v3, s[10:11], v3, v11, s[10:11]
 ; GISEL-NEXT:    v_addc_co_u32_e64 v3, s[8:9], v3, v13, s[8:9]
-; GISEL-NEXT:    v_mad_u64_u32 v[5:6], s[8:9], v12, v8, v[3:4]
-; GISEL-NEXT:    v_cndmask_b32_e32 v3, v4, v7, vcc
+; GISEL-NEXT:    v_mad_u64_u32 v[5:6], s[8:9], v9, v7, v[3:4]
+; GISEL-NEXT:    v_cndmask_b32_e64 v3, v4, v10, s[4:5]
 ; GISEL-NEXT:    v_cndmask_b32_e64 v7, v3, 0, s[6:7]
-; GISEL-NEXT:    v_mad_u64_u32 v[3:4], s[6:7], v7, v9, v[5:6]
+; GISEL-NEXT:    v_mad_u64_u32 v[3:4], s[4:5], v7, v8, v[5:6]
 ; GISEL-NEXT:    ; implicit-def: $vgpr6
-; GISEL-NEXT:    ; implicit-def: $vgpr4_vgpr5
-; GISEL-NEXT:    ; implicit-def: $vgpr9
+; GISEL-NEXT:    ; implicit-def: $vgpr4
 ; GISEL-NEXT:    ; implicit-def: $vgpr8
+; GISEL-NEXT:    ; implicit-def: $vgpr7
 ; GISEL-NEXT:  .LBB2_4: ; %Flow
 ; GISEL-NEXT:    s_andn2_saveexec_b64 s[6:7], s[16:17]
 ; GISEL-NEXT:    s_cbranch_execz .LBB2_6
 ; GISEL-NEXT:  ; %bb.5: ; %fp-to-i-if-exp.small
-; GISEL-NEXT:    v_sub_co_u32_e32 v2, vcc, 0x96, v6
-; GISEL-NEXT:    v_lshrrev_b64 v[0:1], v2, v[4:5]
-; GISEL-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v2
-; GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
-; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v2
-; GISEL-NEXT:    v_cndmask_b32_e32 v6, v0, v4, vcc
-; GISEL-NEXT:    v_mad_u64_u32 v[0:1], s[8:9], v6, v9, 0
-; GISEL-NEXT:    v_mad_u64_u32 v[2:3], s[8:9], v6, v8, 0
-; GISEL-NEXT:    v_mul_lo_u32 v7, v6, v8
-; GISEL-NEXT:    v_mad_u64_u32 v[4:5], vcc, v6, v8, v[1:2]
-; GISEL-NEXT:    v_addc_co_u32_e32 v3, vcc, v3, v7, vcc
+; GISEL-NEXT:    v_sub_u32_e32 v0, 0x96, v6
+; GISEL-NEXT:    v_lshrrev_b32_e32 v6, v0, v4
+; GISEL-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v6, v8, 0
+; GISEL-NEXT:    v_mad_u64_u32 v[2:3], s[4:5], v6, v7, 0
+; GISEL-NEXT:    v_mul_lo_u32 v8, v6, v7
+; GISEL-NEXT:    v_mad_u64_u32 v[4:5], s[4:5], v6, v7, v[1:2]
+; GISEL-NEXT:    v_addc_co_u32_e64 v3, s[4:5], v3, v8, s[4:5]
 ; GISEL-NEXT:    v_mov_b32_e32 v1, v4
 ; GISEL-NEXT:    v_mov_b32_e32 v2, v5
 ; GISEL-NEXT:  .LBB2_6: ; %Flow1
 ; GISEL-NEXT:    s_or_b64 exec, exec, s[6:7]
 ; GISEL-NEXT:  .LBB2_7: ; %Flow2
-; GISEL-NEXT:    s_andn2_saveexec_b64 s[6:7], s[14:15]
+; GISEL-NEXT:    s_andn2_saveexec_b64 s[4:5], s[14:15]
 ; GISEL-NEXT:    s_cbranch_execz .LBB2_9
 ; GISEL-NEXT:  ; %bb.8: ; %fp-to-i-if-saturate
-; GISEL-NEXT:    v_cndmask_b32_e64 v1, 0, -1, s[4:5]
+; GISEL-NEXT:    v_cndmask_b32_e64 v1, 0, -1, vcc
 ; GISEL-NEXT:    v_and_b32_e32 v1, 1, v1
-; GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[4:5]
+; GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
 ; GISEL-NEXT:    v_lshlrev_b32_e32 v2, 1, v1
 ; GISEL-NEXT:    v_or_b32_e32 v0, v0, v2
 ; GISEL-NEXT:    v_lshlrev_b32_e32 v3, 2, v1
@@ -1078,7 +992,7 @@ define i128 @fptosi_f32_to_i128(float %x) {
 ; GISEL-NEXT:    v_add_u32_e32 v3, 0x80000000, v1
 ; GISEL-NEXT:    v_mov_b32_e32 v2, v1
 ; GISEL-NEXT:  .LBB2_9: ; %Flow3
-; GISEL-NEXT:    s_or_b64 exec, exec, s[6:7]
+; GISEL-NEXT:    s_or_b64 exec, exec, s[4:5]
 ; GISEL-NEXT:  .LBB2_10: ; %fp-to-i-cleanup
 ; GISEL-NEXT:    s_or_b64 exec, exec, s[12:13]
 ; GISEL-NEXT:    s_setpc_b64 s[30:31]
@@ -1091,28 +1005,20 @@ define i128 @fptoui_f32_to_i128(float %x) {
 ; SDAG:       ; %bb.0: ; %fp-to-i-entry
 ; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; SDAG-NEXT:    v_mov_b32_e32 v4, v0
-; SDAG-NEXT:    v_mov_b32_e32 v0, 0
-; SDAG-NEXT:    v_bfe_u32 v5, v4, 23, 8
+; SDAG-NEXT:    v_bfe_u32 v7, v4, 23, 8
 ; SDAG-NEXT:    s_movk_i32 s4, 0x7e
+; SDAG-NEXT:    v_mov_b32_e32 v0, 0
 ; SDAG-NEXT:    v_mov_b32_e32 v2, 0
 ; SDAG-NEXT:    v_mov_b32_e32 v1, 0
 ; SDAG-NEXT:    v_mov_b32_e32 v3, 0
-; SDAG-NEXT:    v_cmp_lt_u32_e32 vcc, s4, v5
+; SDAG-NEXT:    v_cmp_lt_u32_e32 vcc, s4, v7
 ; SDAG-NEXT:    s_and_saveexec_b64 s[8:9], vcc
 ; SDAG-NEXT:    s_cbranch_execz .LBB3_10
 ; SDAG-NEXT:  ; %bb.1: ; %fp-to-i-if-check.saturate
-; SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0xffffff01, v5
-; SDAG-NEXT:    v_mov_b32_e32 v1, -1
-; SDAG-NEXT:    v_mov_b32_e32 v6, 0
-; SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
-; SDAG-NEXT:    v_addc_co_u32_e32 v2, vcc, -1, v6, vcc
-; SDAG-NEXT:    s_movk_i32 s6, 0xff7f
-; SDAG-NEXT:    v_addc_co_u32_e32 v3, vcc, -1, v6, vcc
-; SDAG-NEXT:    s_mov_b32 s7, -1
-; SDAG-NEXT:    v_cmp_eq_u64_e64 s[4:5], -1, v[2:3]
-; SDAG-NEXT:    v_cmp_lt_u64_e64 s[6:7], s[6:7], v[0:1]
+; SDAG-NEXT:    v_add_u32_e32 v0, 0xffffff01, v7
+; SDAG-NEXT:    s_movk_i32 s4, 0xff7f
 ; SDAG-NEXT:    v_cmp_lt_i32_e32 vcc, -1, v4
-; SDAG-NEXT:    s_and_b64 s[4:5], s[4:5], s[6:7]
+; SDAG-NEXT:    v_cmp_lt_u32_e64 s[4:5], s4, v0
 ; SDAG-NEXT:    ; implicit-def: $vgpr0_vgpr1
 ; SDAG-NEXT:    ; implicit-def: $vgpr2_vgpr3
 ; SDAG-NEXT:    s_and_saveexec_b64 s[6:7], s[4:5]
@@ -1120,76 +1026,68 @@ define i128 @fptoui_f32_to_i128(float %x) {
 ; SDAG-NEXT:    s_cbranch_execz .LBB3_7
 ; SDAG-NEXT:  ; %bb.2: ; %fp-to-i-if-check.exp.size
 ; SDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
-; SDAG-NEXT:    v_add_co_u32_e64 v10, s[4:5], -1, v0
-; SDAG-NEXT:    s_mov_b64 s[4:5], 0x95
+; SDAG-NEXT:    v_add_co_u32_e64 v6, s[4:5], -1, v0
 ; SDAG-NEXT:    v_and_b32_e32 v0, 0x7fffff, v4
-; SDAG-NEXT:    v_cmp_lt_u64_e64 s[4:5], s[4:5], v[5:6]
-; SDAG-NEXT:    v_cndmask_b32_e64 v9, -1, 0, vcc
-; SDAG-NEXT:    v_cndmask_b32_e64 v11, -1, 1, vcc
-; SDAG-NEXT:    v_or_b32_e32 v7, 0x800000, v0
-; SDAG-NEXT:    v_mov_b32_e32 v8, v6
-; SDAG-NEXT:    ; implicit-def: $vgpr0_vgpr1
+; SDAG-NEXT:    s_movk_i32 s4, 0x95
+; SDAG-NEXT:    v_cndmask_b32_e64 v5, -1, 0, vcc
+; SDAG-NEXT:    v_cndmask_b32_e64 v8, -1, 1, vcc
+; SDAG-NEXT:    v_or_b32_e32 v1, 0x800000, v0
+; SDAG-NEXT:    v_cmp_lt_u32_e64 s[4:5], s4, v7
 ; SDAG-NEXT:    ; implicit-def: $vgpr2_vgpr3
 ; SDAG-NEXT:    s_and_saveexec_b64 s[6:7], s[4:5]
 ; SDAG-NEXT:    s_xor_b64 s[12:13], exec, s[6:7]
 ; SDAG-NEXT:    s_cbranch_execz .LBB3_4
 ; SDAG-NEXT:  ; %bb.3: ; %fp-to-i-if-exp.large
-; SDAG-NEXT:    v_sub_u32_e32 v0, 0xd6, v5
-; SDAG-NEXT:    v_add_u32_e32 v2, 0xffffff2a, v5
-; SDAG-NEXT:    v_add_u32_e32 v4, 0xffffff6a, v5
-; SDAG-NEXT:    v_lshrrev_b64 v[0:1], v0, v[7:8]
-; SDAG-NEXT:    v_lshlrev_b64 v[2:3], v2, v[7:8]
-; SDAG-NEXT:    v_cmp_gt_u32_e64 s[4:5], 64, v4
-; SDAG-NEXT:    v_cndmask_b32_e64 v1, v3, v1, s[4:5]
-; SDAG-NEXT:    v_cmp_ne_u32_e64 s[6:7], 0, v4
-; SDAG-NEXT:    v_cndmask_b32_e64 v3, 0, v1, s[6:7]
-; SDAG-NEXT:    v_cndmask_b32_e64 v2, v2, v0, s[4:5]
-; SDAG-NEXT:    v_lshlrev_b64 v[0:1], v4, v[7:8]
-; SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, v2, s[6:7]
-; SDAG-NEXT:    v_cndmask_b32_e64 v7, 0, v0, s[4:5]
-; SDAG-NEXT:    v_mad_u64_u32 v[4:5], s[6:7], v7, v11, 0
-; SDAG-NEXT:    v_cndmask_b32_e64 v13, 0, v1, s[4:5]
-; SDAG-NEXT:    v_mul_lo_u32 v8, v9, v2
-; SDAG-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v13, v11, v[5:6]
-; SDAG-NEXT:    v_mul_lo_u32 v12, v11, v3
-; SDAG-NEXT:    v_mad_u64_u32 v[2:3], s[6:7], v11, v2, 0
-; SDAG-NEXT:    v_mov_b32_e32 v5, v0
-; SDAG-NEXT:    v_mad_u64_u32 v[5:6], s[4:5], v7, v9, v[5:6]
-; SDAG-NEXT:    v_add3_u32 v3, v3, v12, v8
-; SDAG-NEXT:    v_mad_u64_u32 v[2:3], s[4:5], v10, v7, v[2:3]
-; SDAG-NEXT:    v_add_co_u32_e64 v0, s[4:5], v1, v6
-; SDAG-NEXT:    v_addc_co_u32_e64 v1, s[4:5], 0, 0, s[4:5]
-; SDAG-NEXT:    v_mul_lo_u32 v8, v10, v13
-; SDAG-NEXT:    v_mul_lo_u32 v7, v10, v7
-; SDAG-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v13, v9, v[0:1]
-; SDAG-NEXT:    ; implicit-def: $vgpr11
-; SDAG-NEXT:    ; implicit-def: $vgpr9
-; SDAG-NEXT:    ; implicit-def: $vgpr10
-; SDAG-NEXT:    v_add3_u32 v3, v7, v3, v8
-; SDAG-NEXT:    v_add_co_u32_e64 v2, s[4:5], v0, v2
-; SDAG-NEXT:    v_addc_co_u32_e64 v3, s[4:5], v1, v3, s[4:5]
-; SDAG-NEXT:    v_mov_b32_e32 v0, v4
-; SDAG-NEXT:    v_mov_b32_e32 v1, v5
-; SDAG-NEXT:    ; implicit-def: $vgpr5_vgpr6
-; SDAG-NEXT:    ; implicit-def: $vgpr7_vgpr8
+; SDAG-NEXT:    v_add_u32_e32 v0, 0xffffff6a, v7
+; SDAG-NEXT:    v_mov_b32_e32 v2, 0
+; SDAG-NEXT:    v_sub_u32_e32 v3, 0xd6, v7
+; SDAG-NEXT:    v_add_u32_e32 v7, 0xffffff2a, v7
+; SDAG-NEXT:    v_lshrrev_b64 v[3:4], v3, v[1:2]
+; SDAG-NEXT:    v_lshlrev_b64 v[9:10], v7, v[1:2]
+; SDAG-NEXT:    v_cmp_gt_u32_e64 s[4:5], 64, v0
+; SDAG-NEXT:    v_cndmask_b32_e64 v4, v10, v4, s[4:5]
+; SDAG-NEXT:    v_cmp_ne_u32_e64 s[6:7], 0, v0
+; SDAG-NEXT:    v_cndmask_b32_e64 v7, 0, v4, s[6:7]
+; SDAG-NEXT:    v_cndmask_b32_e64 v9, v9, v3, s[4:5]
+; SDAG-NEXT:    v_lshlrev_b64 v[3:4], v0, v[1:2]
+; SDAG-NEXT:    v_cndmask_b32_e64 v9, 0, v9, s[6:7]
+; SDAG-NEXT:    v_cndmask_b32_e64 v11, 0, v3, s[4:5]
+; SDAG-NEXT:    v_mad_u64_u32 v[0:1], s[6:7], v11, v8, 0
+; SDAG-NEXT:    v_cndmask_b32_e64 v13, 0, v4, s[4:5]
+; SDAG-NEXT:    v_mul_lo_u32 v12, v5, v9
+; SDAG-NEXT:    v_mad_u64_u32 v[3:4], s[4:5], v13, v8, v[1:2]
+; SDAG-NEXT:    v_mul_lo_u32 v7, v8, v7
+; SDAG-NEXT:    v_mad_u64_u32 v[9:10], s[6:7], v8, v9, 0
+; SDAG-NEXT:    v_mov_b32_e32 v1, v3
+; SDAG-NEXT:    v_mad_u64_u32 v[1:2], s[4:5], v11, v5, v[1:2]
+; SDAG-NEXT:    v_add3_u32 v10, v10, v7, v12
+; SDAG-NEXT:    v_mad_u64_u32 v[7:8], s[4:5], v6, v11, v[9:10]
+; SDAG-NEXT:    v_add_co_u32_e64 v2, s[4:5], v4, v2
+; SDAG-NEXT:    v_addc_co_u32_e64 v3, s[4:5], 0, 0, s[4:5]
+; SDAG-NEXT:    v_mul_lo_u32 v9, v6, v13
+; SDAG-NEXT:    v_mul_lo_u32 v6, v6, v11
+; SDAG-NEXT:    v_mad_u64_u32 v[2:3], s[4:5], v13, v5, v[2:3]
+; SDAG-NEXT:    ; implicit-def: $vgpr5
+; SDAG-NEXT:    v_add3_u32 v4, v6, v8, v9
+; SDAG-NEXT:    v_add_co_u32_e64 v2, s[4:5], v2, v7
+; SDAG-NEXT:    v_addc_co_u32_e64 v3, s[4:5], v3, v4, s[4:5]
+; SDAG-NEXT:    ; implicit-def: $vgpr7
+; SDAG-NEXT:    ; implicit-def: $vgpr8
+; SDAG-NEXT:    ; implicit-def: $vgpr6
 ; SDAG-NEXT:  .LBB3_4: ; %Flow
-; SDAG-NEXT:    s_andn2_saveexec_b64 s[6:7], s[12:13]
+; SDAG-NEXT:    s_andn2_saveexec_b64 s[4:5], s[12:13]
 ; SDAG-NEXT:    s_cbranch_execz .LBB3_6
 ; SDAG-NEXT:  ; %bb.5: ; %fp-to-i-if-exp.small
-; SDAG-NEXT:    v_sub_u32_e32 v2, 0x96, v5
-; SDAG-NEXT:    v_lshrrev_b64 v[0:1], v2, v[7:8]
-; SDAG-NEXT:    v_cmp_gt_u32_e64 s[4:5], 64, v2
-; SDAG-NEXT:    v_cndmask_b32_e64 v0, 0, v0, s[4:5]
-; SDAG-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v2
-; SDAG-NEXT:    v_cndmask_b32_e64 v3, v0, v7, s[4:5]
-; SDAG-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v3, v11, 0
+; SDAG-NEXT:    v_sub_u32_e32 v0, 0x96, v7
+; SDAG-NEXT:    v_lshrrev_b32_e32 v3, v0, v1
+; SDAG-NEXT:    v_mad_u64_u32 v[0:1], s[6:7], v3, v8, 0
 ; SDAG-NEXT:    v_mov_b32_e32 v2, 0
-; SDAG-NEXT:    v_mov_b32_e32 v6, v2
-; SDAG-NEXT:    v_mad_u64_u32 v[4:5], s[4:5], v3, v9, v[1:2]
-; SDAG-NEXT:    v_mad_i64_i32 v[2:3], s[4:5], v10, v3, v[5:6]
-; SDAG-NEXT:    v_mov_b32_e32 v1, v4
+; SDAG-NEXT:    v_mov_b32_e32 v9, v2
+; SDAG-NEXT:    v_mad_u64_u32 v[7:8], s[6:7], v3, v5, v[1:2]
+; SDAG-NEXT:    v_mad_i64_i32 v[2:3], s[6:7], v6, v3, v[8:9]
+; SDAG-NEXT:    v_mov_b32_e32 v1, v7
 ; SDAG-NEXT:  .LBB3_6: ; %Flow1
-; SDAG-NEXT:    s_or_b64 exec, exec, s[6:7]
+; SDAG-NEXT:    s_or_b64 exec, exec, s[4:5]
 ; SDAG-NEXT:  .LBB3_7: ; %Flow2
 ; SDAG-NEXT:    s_andn2_saveexec_b64 s[4:5], s[10:11]
 ; SDAG-NEXT:  ; %bb.8: ; %fp-to-i-if-saturate
@@ -1210,14 +1108,10 @@ define i128 @fptoui_f32_to_i128(float %x) {
 ; GISEL:       ; %bb.0: ; %fp-to-i-entry
 ; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GISEL-NEXT:    v_mov_b32_e32 v4, v0
-; GISEL-NEXT:    v_mov_b32_e32 v5, 0
-; GISEL-NEXT:    v_lshrrev_b64 v[0:1], 23, v[4:5]
-; GISEL-NEXT:    s_mov_b64 s[4:5], 0
-; GISEL-NEXT:    v_bfe_u32 v6, v0, 0, 8
+; GISEL-NEXT:    v_bfe_u32 v6, v4, 23, 8
 ; GISEL-NEXT:    v_mov_b32_e32 v0, 0x7f
-; GISEL-NEXT:    v_mov_b32_e32 v1, 0
-; GISEL-NEXT:    v_mov_b32_e32 v7, v5
-; GISEL-NEXT:    v_cmp_ge_u64_e32 vcc, v[6:7], v[0:1]
+; GISEL-NEXT:    s_mov_b64 s[4:5], 0
+; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v6, v0
 ; GISEL-NEXT:    s_mov_b64 s[6:7], s[4:5]
 ; GISEL-NEXT:    v_mov_b32_e32 v0, s4
 ; GISEL-NEXT:    v_mov_b32_e32 v1, s5
@@ -1226,146 +1120,132 @@ define i128 @fptoui_f32_to_i128(float %x) {
 ; GISEL-NEXT:    s_and_saveexec_b64 s[12:13], vcc
 ; GISEL-NEXT:    s_cbranch_execz .LBB3_10
 ; GISEL-NEXT:  ; %bb.1: ; %fp-to-i-if-check.saturate
-; GISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 0xffffff01, v6
-; GISEL-NEXT:    v_mov_b32_e32 v2, 0xffffff80
-; GISEL-NEXT:    v_addc_co_u32_e64 v1, s[6:7], 0, -1, vcc
-; GISEL-NEXT:    v_mov_b32_e32 v3, -1
-; GISEL-NEXT:    v_addc_co_u32_e64 v8, s[6:7], 0, -1, s[6:7]
-; GISEL-NEXT:    v_cmp_ge_u64_e32 vcc, v[0:1], v[2:3]
-; GISEL-NEXT:    v_addc_co_u32_e64 v9, s[6:7], 0, -1, s[6:7]
-; GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
-; GISEL-NEXT:    v_cmp_le_u64_e32 vcc, -1, v[8:9]
-; GISEL-NEXT:    v_cmp_lt_i32_e64 s[4:5], -1, v4
-; GISEL-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; GISEL-NEXT:    v_cmp_eq_u64_e32 vcc, -1, v[8:9]
-; GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
-; GISEL-NEXT:    v_and_b32_e32 v0, 1, v0
-; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
+; GISEL-NEXT:    v_add_u32_e32 v0, 0xffffff01, v6
+; GISEL-NEXT:    v_mov_b32_e32 v1, 0xffffff80
+; GISEL-NEXT:    v_cmp_lt_i32_e32 vcc, -1, v4
+; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v0, v1
 ; GISEL-NEXT:    ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
-; GISEL-NEXT:    s_and_saveexec_b64 s[6:7], vcc
+; GISEL-NEXT:    s_and_saveexec_b64 s[6:7], s[4:5]
 ; GISEL-NEXT:    s_xor_b64 s[14:15], exec, s[6:7]
 ; GISEL-NEXT:    s_cbranch_execz .LBB3_7
 ; GISEL-NEXT:  ; %bb.2: ; %fp-to-i-if-check.exp.size
-; GISEL-NEXT:    s_xor_b64 s[6:7], s[4:5], -1
-; GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, -1, s[6:7]
+; GISEL-NEXT:    s_xor_b64 s[4:5], vcc, -1
+; GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, -1, s[4:5]
 ; GISEL-NEXT:    v_and_b32_e32 v0, 1, v0
-; GISEL-NEXT:    v_cndmask_b32_e64 v1, 0, 1, s[6:7]
+; GISEL-NEXT:    v_cndmask_b32_e64 v1, 0, 1, s[4:5]
 ; GISEL-NEXT:    v_lshlrev_b16_e32 v2, 1, v0
 ; GISEL-NEXT:    v_or_b32_e32 v1, v1, v2
 ; GISEL-NEXT:    v_lshlrev_b16_e32 v3, 2, v0
 ; GISEL-NEXT:    v_or_b32_e32 v2, v0, v2
 ; GISEL-NEXT:    v_or_b32_e32 v1, v1, v3
-; GISEL-NEXT:    v_lshlrev_b16_e32 v8, 3, v0
+; GISEL-NEXT:    v_lshlrev_b16_e32 v7, 3, v0
 ; GISEL-NEXT:    v_or_b32_e32 v2, v2, v3
+; GISEL-NEXT:    v_or_b32_e32 v1, v1, v7
+; GISEL-NEXT:    v_lshlrev_b16_e32 v8, 4, v0
+; GISEL-NEXT:    v_or_b32_e32 v2, v2, v7
 ; GISEL-NEXT:    v_or_b32_e32 v1, v1, v8
-; GISEL-NEXT:    v_lshlrev_b16_e32 v9, 4, v0
+; GISEL-NEXT:    v_lshlrev_b16_e32 v9, 5, v0
 ; GISEL-NEXT:    v_or_b32_e32 v2, v2, v8
 ; GISEL-NEXT:    v_or_b32_e32 v1, v1, v9
-; GISEL-NEXT:    v_lshlrev_b16_e32 v10, 5, v0
+; GISEL-NEXT:    v_lshlrev_b16_e32 v10, 6, v0
 ; GISEL-NEXT:    v_or_b32_e32 v2, v2, v9
 ; GISEL-NEXT:    v_or_b32_e32 v1, v1, v10
-; GISEL-NEXT:    v_lshlrev_b16_e32 v11, 6, v0
+; GISEL-NEXT:    v_lshlrev_b16_e32 v11, 7, v0
 ; GISEL-NEXT:    v_or_b32_e32 v2, v2, v10
 ; GISEL-NEXT:    v_or_b32_e32 v1, v1, v11
-; GISEL-NEXT:    v_lshlrev_b16_e32 v12, 7, v0
+; GISEL-NEXT:    v_lshlrev_b16_e32 v12, 8, v0
 ; GISEL-NEXT:    v_or_b32_e32 v2, v2, v11
 ; GISEL-NEXT:    v_or_b32_e32 v1, v1, v12
-; GISEL-NEXT:    v_lshlrev_b16_e32 v13, 8, v0
+; GISEL-NEXT:    v_lshlrev_b16_e32 v13, 9, v0
 ; GISEL-NEXT:    v_or_b32_e32 v2, v2, v12
 ; GISEL-NEXT:    v_or_b32_e32 v1, v1, v13
-; GISEL-NEXT:    v_lshlrev_b16_e32 v14, 9, v0
+; GISEL-NEXT:    v_lshlrev_b16_e32 v14, 10, v0
 ; GISEL-NEXT:    v_or_b32_e32 v2, v2, v13
 ; GISEL-NEXT:    v_or_b32_e32 v1, v1, v14
-; GISEL-NEXT:    v_lshlrev_b16_e32 v15, 10, v0
+; GISEL-NEXT:    v_lshlrev_b16_e32 v15, 11, v0
 ; GISEL-NEXT:    v_or_b32_e32 v2, v2, v14
 ; GISEL-NEXT:    v_or_b32_e32 v1, v1, v15
-; GISEL-NEXT:    v_lshlrev_b16_e32 v16, 11, v0
+; GISEL-NEXT:    v_lshlrev_b16_e32 v16, 12, v0
 ; GISEL-NEXT:    v_or_b32_e32 v2, v2, v15
 ; GISEL-NEXT:    v_or_b32_e32 v1, v1, v16
-; GISEL-NEXT:    v_lshlrev_b16_e32 v17, 12, v0
+; GISEL-NEXT:    v_lshlrev_b16_e32 v17, 13, v0
 ; GISEL-NEXT:    v_or_b32_e32 v2, v2, v16
 ; GISEL-NEXT:    v_or_b32_e32 v1, v1, v17
-; GISEL-NEXT:    v_lshlrev_b16_e32 v18, 13, v0
+; GISEL-NEXT:    v_lshlrev_b16_e32 v18, 14, v0
 ; GISEL-NEXT:    v_or_b32_e32 v2, v2, v17
 ; GISEL-NEXT:    v_or_b32_e32 v1, v1, v18
-; GISEL-NEXT:    v_lshlrev_b16_e32 v19, 14, v0
-; GISEL-NEXT:    v_or_b32_e32 v2, v2, v18
-; GISEL-NEXT:    v_or_b32_e32 v1, v1, v19
 ; GISEL-NEXT:    v_lshlrev_b16_e32 v0, 15, v0
-; GISEL-NEXT:    v_or_b32_e32 v2, v2, v19
+; GISEL-NEXT:    v_or_b32_e32 v2, v2, v18
 ; GISEL-NEXT:    v_or_b32_e32 v1, v1, v0
 ; GISEL-NEXT:    v_or_b32_e32 v0, v2, v0
 ; GISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
 ; GISEL-NEXT:    v_and_b32_e32 v1, 0xffff, v1
 ; GISEL-NEXT:    v_lshlrev_b32_e32 v2, 16, v0
-; GISEL-NEXT:    v_lshl_or_b32 v8, v0, 16, v0
-; GISEL-NEXT:    v_or3_b32 v9, v1, v2, 1
+; GISEL-NEXT:    v_lshl_or_b32 v7, v0, 16, v0
+; GISEL-NEXT:    v_or3_b32 v8, v1, v2, 1
+; GISEL-NEXT:    v_mov_b32_e32 v0, 0x7fffff
+; GISEL-NEXT:    v_mov_b32_e32 v1, 0x800000
+; GISEL-NEXT:    v_and_or_b32 v4, v4, v0, v1
 ; GISEL-NEXT:    v_mov_b32_e32 v0, 0x96
-; GISEL-NEXT:    v_mov_b32_e32 v1, 0
-; GISEL-NEXT:    v_and_b32_e32 v2, 0x7fffff, v4
-; GISEL-NEXT:    v_cmp_ge_u64_e32 vcc, v[6:7], v[0:1]
-; GISEL-NEXT:    v_or_b32_e32 v4, 0x800000, v2
+; GISEL-NEXT:    v_mov_b32_e32 v5, 0
+; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v6, v0
 ; GISEL-NEXT:    ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
-; GISEL-NEXT:    s_and_saveexec_b64 s[6:7], vcc
+; GISEL-NEXT:    s_and_saveexec_b64 s[6:7], s[4:5]
 ; GISEL-NEXT:    s_xor_b64 s[16:17], exec, s[6:7]
 ; GISEL-NEXT:    s_cbranch_execz .LBB3_4
 ; GISEL-NEXT:  ; %bb.3: ; %fp-to-i-if-exp.large
 ; GISEL-NEXT:    v_add_u32_e32 v2, 0xffffff6a, v6
 ; GISEL-NEXT:    v_lshlrev_b64 v[0:1], v2, v[4:5]
-; GISEL-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v2
-; GISEL-NEXT:    v_cndmask_b32_e32 v10, 0, v0, vcc
-; GISEL-NEXT:    v_cndmask_b32_e32 v11, 0, v1, vcc
-; GISEL-NEXT:    v_mad_u64_u32 v[0:1], s[6:7], v10, v8, 0
+; GISEL-NEXT:    v_cmp_gt_u32_e64 s[4:5], 64, v2
+; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, v0, s[4:5]
+; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, v1, s[4:5]
+; GISEL-NEXT:    v_mad_u64_u32 v[0:1], s[6:7], v11, v7, 0
 ; GISEL-NEXT:    v_add_u32_e32 v3, 0xffffff2a, v6
 ; GISEL-NEXT:    v_sub_u32_e32 v6, 64, v2
-; GISEL-NEXT:    v_lshrrev_b64 v[6:7], v6, v[4:5]
+; GISEL-NEXT:    v_lshrrev_b64 v[9:10], v6, v[4:5]
 ; GISEL-NEXT:    v_lshlrev_b64 v[3:4], v3, v[4:5]
+; GISEL-NEXT:    v_mad_u64_u32 v[5:6], s[8:9], v12, v7, v[0:1]
+; GISEL-NEXT:    v_cndmask_b32_e64 v3, v3, v9, s[4:5]
 ; GISEL-NEXT:    v_cmp_eq_u32_e64 s[6:7], 0, v2
-; GISEL-NEXT:    v_cndmask_b32_e32 v3, v3, v6, vcc
-; GISEL-NEXT:    v_mad_u64_u32 v[5:6], s[8:9], v11, v8, v[0:1]
-; GISEL-NEXT:    v_cndmask_b32_e64 v12, v3, 0, s[6:7]
-; GISEL-NEXT:    v_mad_u64_u32 v[0:1], s[8:9], v10, v9, 0
-; GISEL-NEXT:    v_mad_u64_u32 v[2:3], s[8:9], v12, v9, v[5:6]
-; GISEL-NEXT:    v_mul_lo_u32 v13, v11, v8
-; GISEL-NEXT:    v_mad_u64_u32 v[5:6], s[8:9], v10, v8, v[1:2]
-; GISEL-NEXT:    v_mul_lo_u32 v10, v10, v8
-; GISEL-NEXT:    v_mad_u64_u32 v[1:2], s[10:11], v11, v9, v[5:6]
-; GISEL-NEXT:    v_addc_co_u32_e64 v3, s[10:11], v3, v10, s[10:11]
+; GISEL-NEXT:    v_cndmask_b32_e64 v9, v3, 0, s[6:7]
+; GISEL-NEXT:    v_mad_u64_u32 v[0:1], s[8:9], v11, v8, 0
+; GISEL-NEXT:    v_mad_u64_u32 v[2:3], s[8:9], v9, v8, v[5:6]
+; GISEL-NEXT:    v_mul_lo_u32 v13, v12, v7
+; GISEL-NEXT:    v_mad_u64_u32 v[5:6], s[8:9], v11, v7, v[1:2]
+; GISEL-NEXT:    v_mul_lo_u32 v11, v11, v7
+; GISEL-NEXT:    v_mad_u64_u32 v[1:2], s[10:11], v12, v8, v[5:6]
+; GISEL-NEXT:    v_addc_co_u32_e64 v3, s[10:11], v3, v11, s[10:11]
 ; GISEL-NEXT:    v_addc_co_u32_e64 v3, s[8:9], v3, v13, s[8:9]
-; GISEL-NEXT:    v_mad_u64_u32 v[5:6], s[8:9], v12, v8, v[3:4]
-; GISEL-NEXT:    v_cndmask_b32_e32 v3, v4, v7, vcc
+; GISEL-NEXT:    v_mad_u64_u32 v[5:6], s[8:9], v9, v7, v[3:4]
+; GISEL-NEXT:    v_cndmask_b32_e64 v3, v4, v10, s[4:5]
 ; GISEL-NEXT:    v_cndmask_b32_e64 v7, v3, 0, s[6:7]
-; GISEL-NEXT:    v_mad_u64_u32 v[3:4], s[6:7], v7, v9, v[5:6]
+; GISEL-NEXT:    v_mad_u64_u32 v[3:4], s[4:5], v7, v8, v[5:6]
 ; GISEL-NEXT:    ; implicit-def: $vgpr6
-; GISEL-NEXT:    ; implicit-def: $vgpr4_vgpr5
-; GISEL-NEXT:    ; implicit-def: $vgpr9
+; GISEL-NEXT:    ; implicit-def: $vgpr4
 ; GISEL-NEXT:    ; implicit-def: $vgpr8
+; GISEL-NEXT:    ; implicit-def: $vgpr7
 ; GISEL-NEXT:  .LBB3_4: ; %Flow
 ; GISEL-NEXT:    s_andn2_saveexec_b64 s[6:7], s[16:17]
 ; GISEL-NEXT:    s_cbranch_execz .LBB3_6
 ; GISEL-NEXT:  ; %bb.5: ; %fp-to-i-if-exp.small
-; GISEL-NEXT:    v_sub_co_u32_e32 v2, vcc, 0x96, v6
-; GISEL-NEXT:    v_lshrrev_b64 v[0:1], v2, v[4:5]
-; GISEL-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v2
-; GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
-; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v2
-; GISEL-NEXT:    v_cndmask_b32_e32 v6, v0, v4, vcc
-; GISEL-NEXT:    v_mad_u64_u32 v[0:1], s[8:9], v6, v9, 0
-; GISEL-NEXT:    v_mad_u64_u32 v[2:3], s[8:9], v6, v8, 0
-; GISEL-NEXT:    v_mul_lo_u32 v7, v6, v8
-; GISEL-NEXT:    v_mad_u64_u32 v[4:5], vcc, v6, v8, v[1:2]
-; GISEL-NEXT:    v_addc_co_u32_e32 v3, vcc, v3, v7, vcc
+; GISEL-NEXT:    v_sub_u32_e32 v0, 0x96, v6
+; GISEL-NEXT:    v_lshrrev_b32_e32 v6, v0, v4
+; GISEL-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v6, v8, 0
+; GISEL-NEXT:    v_mad_u64_u32 v[2:3], s[4:5], v6, v7, 0
+; GISEL-NEXT:    v_mul_lo_u32 v8, v6, v7
+; GISEL-NEXT:    v_mad_u64_u32 v[4:5], s[4:5], v6, v7, v[1:2]
+; GISEL-NEXT:    v_addc_co_u32_e64 v3, s[4:5], v3, v8, s[4:5]
 ; GISEL-NEXT:    v_mov_b32_e32 v1, v4
 ; GISEL-NEXT:    v_mov_b32_e32 v2, v5
 ; GISEL-NEXT:  .LBB3_6: ; %Flow1
 ; GISEL-NEXT:    s_or_b64 exec, exec, s[6:7]
 ; GISEL-NEXT:  .LBB3_7: ; %Flow2
-; GISEL-NEXT:    s_andn2_saveexec_b64 s[6:7], s[14:15]
+; GISEL-NEXT:    s_andn2_saveexec_b64 s[4:5], s[14:15]
 ; GISEL-NEXT:    s_cbranch_execz .LBB3_9
 ; GISEL-NEXT:  ; %bb.8: ; %fp-to-i-if-saturate
-; GISEL-NEXT:    v_cndmask_b32_e64 v1, 0, -1, s[4:5]
+; GISEL-NEXT:    v_cndmask_b32_e64 v1, 0, -1, vcc
 ; GISEL-NEXT:    v_and_b32_e32 v1, 1, v1
-; GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[4:5]
+; GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
 ; GISEL-NEXT:    v_lshlrev_b32_e32 v2, 1, v1
 ; GISEL-NEXT:    v_or_b32_e32 v0, v0, v2
 ; GISEL-NEXT:    v_lshlrev_b32_e32 v3, 2, v1
@@ -1432,7 +1312,7 @@ define i128 @fptoui_f32_to_i128(float %x) {
 ; GISEL-NEXT:    v_add_u32_e32 v3, 0x80000000, v1
 ; GISEL-NEXT:    v_mov_b32_e32 v2, v1
 ; GISEL-NEXT:  .LBB3_9: ; %Flow3
-; GISEL-NEXT:    s_or_b64 exec, exec, s[6:7]
+; GISEL-NEXT:    s_or_b64 exec, exec, s[4:5]
 ; GISEL-NEXT:  .LBB3_10: ; %fp-to-i-cleanup
 ; GISEL-NEXT:    s_or_b64 exec, exec, s[12:13]
 ; GISEL-NEXT:    s_setpc_b64 s[30:31]
@@ -1483,102 +1363,92 @@ define i128 @fptosi_bf16_to_i128(bfloat %x) {
 ; SDAG:       ; %bb.0: ; %fp-to-i-entry
 ; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; SDAG-NEXT:    v_mov_b32_e32 v4, v0
-; SDAG-NEXT:    v_mov_b32_e32 v0, 0
-; SDAG-NEXT:    v_bfe_u32 v5, v4, 7, 8
+; SDAG-NEXT:    v_lshrrev_b16_e32 v7, 7, v4
 ; SDAG-NEXT:    s_movk_i32 s4, 0x7e
+; SDAG-NEXT:    v_mov_b32_e32 v0, 0
 ; SDAG-NEXT:    v_mov_b32_e32 v2, 0
 ; SDAG-NEXT:    v_mov_b32_e32 v1, 0
 ; SDAG-NEXT:    v_mov_b32_e32 v3, 0
-; SDAG-NEXT:    v_cmp_lt_u32_e32 vcc, s4, v5
-; SDAG-NEXT:    s_and_saveexec_b64 s[8:9], vcc
+; SDAG-NEXT:    v_cmp_gt_u16_sdwa s[4:5], v7, s4 src0_sel:BYTE_0 src1_sel:DWORD
+; SDAG-NEXT:    s_and_saveexec_b64 s[8:9], s[4:5]
 ; SDAG-NEXT:    s_cbranch_execz .LBB6_10
 ; SDAG-NEXT:  ; %bb.1: ; %fp-to-i-if-check.saturate
-; SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0xffffff01, v5
-; SDAG-NEXT:    v_mov_b32_e32 v1, -1
-; SDAG-NEXT:    v_mov_b32_e32 v6, 0
-; SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
-; SDAG-NEXT:    v_addc_co_u32_e32 v2, vcc, -1, v6, vcc
-; SDAG-NEXT:    s_movk_i32 s6, 0xff7f
-; SDAG-NEXT:    v_addc_co_u32_e32 v3, vcc, -1, v6, vcc
-; SDAG-NEXT:    s_mov_b32 s7, -1
-; SDAG-NEXT:    v_cmp_eq_u64_e64 s[4:5], -1, v[2:3]
-; SDAG-NEXT:    v_cmp_lt_u64_e64 s[6:7], s[6:7], v[0:1]
+; SDAG-NEXT:    s_movk_i32 s4, 0xff01
+; SDAG-NEXT:    v_add_u16_sdwa v0, v7, s4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
+; SDAG-NEXT:    s_movk_i32 s4, 0xff7f
 ; SDAG-NEXT:    v_cmp_lt_i16_e32 vcc, -1, v4
-; SDAG-NEXT:    s_and_b64 s[4:5], s[4:5], s[6:7]
+; SDAG-NEXT:    v_cmp_lt_u16_e64 s[4:5], s4, v0
 ; SDAG-NEXT:    ; implicit-def: $vgpr0_vgpr1
 ; SDAG-NEXT:    ; implicit-def: $vgpr2_vgpr3
 ; SDAG-NEXT:    s_and_saveexec_b64 s[6:7], s[4:5]
 ; SDAG-NEXT:    s_xor_b64 s[10:11], exec, s[6:7]
 ; SDAG-NEXT:    s_cbranch_execz .LBB6_7
 ; SDAG-NEXT:  ; %bb.2: ; %fp-to-i-if-check.exp.size
-; SDAG-NEXT:    s_movk_i32 s4, 0x7f
-; SDAG-NEXT:    v_and_b32_sdwa v0, v4, s4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; SDAG-NEXT:    s_mov_b64 s[4:5], 0x85
-; SDAG-NEXT:    v_cmp_lt_u64_e64 s[4:5], s[4:5], v[5:6]
-; SDAG-NEXT:    v_cndmask_b32_e64 v10, -1, 0, vcc
-; SDAG-NEXT:    v_cndmask_b32_e64 v9, -1, 1, vcc
-; SDAG-NEXT:    v_or_b32_e32 v7, 0x80, v0
-; SDAG-NEXT:    v_mov_b32_e32 v8, v6
+; SDAG-NEXT:    v_and_b32_e32 v0, 0x7f, v4
+; SDAG-NEXT:    s_movk_i32 s4, 0x85
+; SDAG-NEXT:    s_mov_b32 s6, 0
+; SDAG-NEXT:    v_cndmask_b32_e64 v6, -1, 0, vcc
+; SDAG-NEXT:    v_cndmask_b32_e64 v5, -1, 1, vcc
+; SDAG-NEXT:    v_or_b32_e32 v4, 0x80, v0
+; SDAG-NEXT:    v_cmp_gt_u16_sdwa s[4:5], v7, s4 src0_sel:BYTE_0 src1_sel:DWORD
 ; SDAG-NEXT:    ; implicit-def: $vgpr0_vgpr1
 ; SDAG-NEXT:    ; implicit-def: $vgpr2_vgpr3
-; SDAG-NEXT:    s_and_saveexec_b64 s[6:7], s[4:5]
-; SDAG-NEXT:    s_xor_b64 s[12:13], exec, s[6:7]
+; SDAG-NEXT:    s_and_saveexec_b64 s[12:13], s[4:5]
+; SDAG-NEXT:    s_xor_b64 s[12:13], exec, s[12:13]
 ; SDAG-NEXT:    s_cbranch_execz .LBB6_4
 ; SDAG-NEXT:  ; %bb.3: ; %fp-to-i-if-exp.large
 ; SDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
-; SDAG-NEXT:    v_add_co_u32_e64 v11, s[4:5], -1, v0
-; SDAG-NEXT:    v_sub_u32_e32 v0, 0xc6, v5
-; SDAG-NEXT:    v_add_u32_e32 v2, 0xffffff3a, v5
-; SDAG-NEXT:    v_add_u32_e32 v4, 0xffffff7a, v5
-; SDAG-NEXT:    v_lshrrev_b64 v[0:1], v0, v[7:8]
-; SDAG-NEXT:    v_lshlrev_b64 v[2:3], v2, v[7:8]
-; SDAG-NEXT:    v_cmp_gt_u32_e64 s[4:5], 64, v4
-; SDAG-NEXT:    v_cndmask_b32_e64 v1, v3, v1, s[4:5]
-; SDAG-NEXT:    v_cmp_ne_u32_e64 s[6:7], 0, v4
-; SDAG-NEXT:    v_cndmask_b32_e64 v3, 0, v1, s[6:7]
-; SDAG-NEXT:    v_cndmask_b32_e64 v2, v2, v0, s[4:5]
-; SDAG-NEXT:    v_lshlrev_b64 v[0:1], v4, v[7:8]
-; SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, v2, s[6:7]
-; SDAG-NEXT:    v_cndmask_b32_e64 v7, 0, v0, s[4:5]
-; SDAG-NEXT:    v_mad_u64_u32 v[4:5], s[6:7], v7, v9, 0
-; SDAG-NEXT:    v_cndmask_b32_e64 v13, 0, v1, s[4:5]
-; SDAG-NEXT:    v_mul_lo_u32 v8, v10, v2
-; SDAG-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v13, v9, v[5:6]
-; SDAG-NEXT:    v_mul_lo_u32 v12, v9, v3
-; SDAG-NEXT:    v_mad_u64_u32 v[2:3], s[6:7], v9, v2, 0
-; SDAG-NEXT:    v_mov_b32_e32 v5, v0
-; SDAG-NEXT:    v_mad_u64_u32 v[5:6], s[4:5], v7, v10, v[5:6]
-; SDAG-NEXT:    v_add3_u32 v3, v3, v12, v8
-; SDAG-NEXT:    v_mad_u64_u32 v[2:3], s[4:5], v11, v7, v[2:3]
-; SDAG-NEXT:    v_add_co_u32_e64 v0, s[4:5], v1, v6
-; SDAG-NEXT:    v_addc_co_u32_e64 v1, s[4:5], 0, 0, s[4:5]
-; SDAG-NEXT:    v_mul_lo_u32 v8, v11, v13
-; SDAG-NEXT:    v_mul_lo_u32 v7, v11, v7
-; SDAG-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v13, v10, v[0:1]
-; SDAG-NEXT:    ; implicit-def: $vgpr9
-; SDAG-NEXT:    v_add3_u32 v3, v7, v3, v8
-; SDAG-NEXT:    v_add_co_u32_e64 v2, s[4:5], v0, v2
-; SDAG-NEXT:    v_addc_co_u32_e64 v3, s[4:5], v1, v3, s[4:5]
-; SDAG-NEXT:    v_mov_b32_e32 v0, v4
-; SDAG-NEXT:    v_mov_b32_e32 v1, v5
-; SDAG-NEXT:    ; implicit-def: $vgpr5_vgpr6
-; SDAG-NEXT:    ; implicit-def: $vgpr7_vgpr8
+; SDAG-NEXT:    v_add_co_u32_e64 v9, s[4:5], -1, v0
+; SDAG-NEXT:    s_movk_i32 s4, 0xff7a
+; SDAG-NEXT:    v_add_u16_sdwa v10, v7, s4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
+; SDAG-NEXT:    v_and_b32_e32 v0, 0xffff, v4
+; SDAG-NEXT:    v_mov_b32_e32 v1, s6
+; SDAG-NEXT:    v_sub_u32_e32 v2, 64, v10
+; SDAG-NEXT:    v_lshrrev_b64 v[3:4], v2, v[0:1]
+; SDAG-NEXT:    v_subrev_u32_e32 v2, 64, v10
+; SDAG-NEXT:    v_lshlrev_b64 v[7:8], v2, v[0:1]
+; SDAG-NEXT:    v_cmp_gt_u16_e64 s[4:5], 64, v10
+; SDAG-NEXT:    v_cndmask_b32_e64 v4, v8, v4, s[4:5]
+; SDAG-NEXT:    v_cmp_ne_u16_e64 s[6:7], 0, v10
+; SDAG-NEXT:    v_cndmask_b32_e64 v8, 0, v4, s[6:7]
+; SDAG-NEXT:    v_cndmask_b32_e64 v7, v7, v3, s[4:5]
+; SDAG-NEXT:    v_lshlrev_b64 v[3:4], v10, v[0:1]
+; SDAG-NEXT:    v_cndmask_b32_e64 v7, 0, v7, s[6:7]
+; SDAG-NEXT:    v_cndmask_b32_e64 v10, 0, v3, s[4:5]
+; SDAG-NEXT:    v_mad_u64_u32 v[0:1], s[6:7], v10, v5, 0
+; SDAG-NEXT:    v_mov_b32_e32 v2, 0
+; SDAG-NEXT:    v_cndmask_b32_e64 v13, 0, v4, s[4:5]
+; SDAG-NEXT:    v_mad_u64_u32 v[3:4], s[4:5], v13, v5, v[1:2]
+; SDAG-NEXT:    v_mul_lo_u32 v11, v6, v7
+; SDAG-NEXT:    v_mul_lo_u32 v12, v5, v8
+; SDAG-NEXT:    v_mad_u64_u32 v[7:8], s[6:7], v5, v7, 0
+; SDAG-NEXT:    v_mov_b32_e32 v1, v3
+; SDAG-NEXT:    v_mad_u64_u32 v[1:2], s[4:5], v10, v6, v[1:2]
+; SDAG-NEXT:    v_add3_u32 v8, v8, v12, v11
+; SDAG-NEXT:    v_mad_u64_u32 v[7:8], s[4:5], v9, v10, v[7:8]
+; SDAG-NEXT:    v_add_co_u32_e64 v2, s[4:5], v4, v2
+; SDAG-NEXT:    v_addc_co_u32_e64 v3, s[4:5], 0, 0, s[4:5]
+; SDAG-NEXT:    v_mul_lo_u32 v5, v9, v13
+; SDAG-NEXT:    v_mul_lo_u32 v9, v9, v10
+; SDAG-NEXT:    v_mad_u64_u32 v[2:3], s[4:5], v13, v6, v[2:3]
+; SDAG-NEXT:    v_add3_u32 v4, v9, v8, v5
+; SDAG-NEXT:    v_add_co_u32_e64 v2, s[4:5], v2, v7
+; SDAG-NEXT:    v_addc_co_u32_e64 v3, s[4:5], v3, v4, s[4:5]
+; SDAG-NEXT:    ; implicit-def: $vgpr7
+; SDAG-NEXT:    ; implicit-def: $vgpr4
+; SDAG-NEXT:    ; implicit-def: $vgpr5
 ; SDAG-NEXT:  .LBB6_4: ; %Flow
-; SDAG-NEXT:    s_andn2_saveexec_b64 s[6:7], s[12:13]
-; SDAG-NEXT:    s_cbranch_execz .LBB6_6
+; SDAG-NEXT:    s_andn2_saveexec_b64 s[4:5], s[12:13]
 ; SDAG-NEXT:  ; %bb.5: ; %fp-to-i-if-exp.small
-; SDAG-NEXT:    v_sub_u32_e32 v2, 0x86, v5
-; SDAG-NEXT:    v_lshrrev_b64 v[0:1], v2, v[7:8]
-; SDAG-NEXT:    v_cmp_gt_u32_e64 s[4:5], 64, v2
-; SDAG-NEXT:    v_cndmask_b32_e64 v0, 0, v0, s[4:5]
-; SDAG-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v2
-; SDAG-NEXT:    v_cndmask_b32_e64 v0, v0, v7, s[4:5]
-; SDAG-NEXT:    v_mul_hi_i32_i24_e32 v1, v0, v9
+; SDAG-NEXT:    s_movk_i32 s6, 0x86
+; SDAG-NEXT:    v_sub_u16_sdwa v0, s6, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
+; SDAG-NEXT:    v_lshrrev_b16_e32 v0, v0, v4
+; SDAG-NEXT:    v_mul_hi_i32_i24_e32 v1, v0, v5
 ; SDAG-NEXT:    v_ashrrev_i32_e32 v2, 31, v1
-; SDAG-NEXT:    v_mul_i32_i24_e32 v0, v0, v9
+; SDAG-NEXT:    v_mul_i32_i24_e32 v0, v0, v5
 ; SDAG-NEXT:    v_mov_b32_e32 v3, v2
-; SDAG-NEXT:  .LBB6_6: ; %Flow1
-; SDAG-NEXT:    s_or_b64 exec, exec, s[6:7]
+; SDAG-NEXT:  ; %bb.6: ; %Flow1
+; SDAG-NEXT:    s_or_b64 exec, exec, s[4:5]
 ; SDAG-NEXT:  .LBB6_7: ; %Flow2
 ; SDAG-NEXT:    s_andn2_saveexec_b64 s[4:5], s[10:11]
 ; SDAG-NEXT:  ; %bb.8: ; %fp-to-i-if-saturate
@@ -1599,159 +1469,142 @@ define i128 @fptosi_bf16_to_i128(bfloat %x) {
 ; GISEL:       ; %bb.0: ; %fp-to-i-entry
 ; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GISEL-NEXT:    v_mov_b32_e32 v4, v0
-; GISEL-NEXT:    v_and_b32_e32 v5, 0xffff, v4
-; GISEL-NEXT:    v_mov_b32_e32 v6, 0
-; GISEL-NEXT:    v_lshrrev_b64 v[0:1], 7, v[5:6]
-; GISEL-NEXT:    v_mov_b32_e32 v1, 0x7f
+; GISEL-NEXT:    v_lshrrev_b16_e32 v6, 7, v4
+; GISEL-NEXT:    v_mov_b32_e32 v0, 0x7f
 ; GISEL-NEXT:    s_mov_b64 s[4:5], 0
-; GISEL-NEXT:    v_mov_b32_e32 v2, 0
-; GISEL-NEXT:    v_bfe_u32 v5, v0, 0, 8
-; GISEL-NEXT:    v_cmp_ge_u64_e32 vcc, v[5:6], v[1:2]
+; GISEL-NEXT:    v_cmp_ge_u16_sdwa s[8:9], v6, v0 src0_sel:BYTE_0 src1_sel:DWORD
 ; GISEL-NEXT:    s_mov_b64 s[6:7], s[4:5]
 ; GISEL-NEXT:    v_mov_b32_e32 v0, s4
 ; GISEL-NEXT:    v_mov_b32_e32 v1, s5
 ; GISEL-NEXT:    v_mov_b32_e32 v2, s6
 ; GISEL-NEXT:    v_mov_b32_e32 v3, s7
-; GISEL-NEXT:    s_and_saveexec_b64 s[12:13], vcc
+; GISEL-NEXT:    s_and_saveexec_b64 s[12:13], s[8:9]
 ; GISEL-NEXT:    s_cbranch_execz .LBB6_10
 ; GISEL-NEXT:  ; %bb.1: ; %fp-to-i-if-check.saturate
-; GISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 0xffffff01, v5
-; GISEL-NEXT:    v_mov_b32_e32 v2, 0xffffff80
-; GISEL-NEXT:    v_addc_co_u32_e64 v1, s[6:7], 0, -1, vcc
-; GISEL-NEXT:    v_mov_b32_e32 v3, -1
-; GISEL-NEXT:    v_addc_co_u32_e64 v7, s[6:7], 0, -1, s[6:7]
-; GISEL-NEXT:    v_cmp_ge_u64_e32 vcc, v[0:1], v[2:3]
-; GISEL-NEXT:    v_addc_co_u32_e64 v8, s[6:7], 0, -1, s[6:7]
-; GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
-; GISEL-NEXT:    v_cmp_le_u64_e32 vcc, -1, v[7:8]
-; GISEL-NEXT:    v_cmp_lt_i16_e64 s[4:5], -1, v4
-; GISEL-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; GISEL-NEXT:    v_cmp_eq_u64_e32 vcc, -1, v[7:8]
-; GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
-; GISEL-NEXT:    v_and_b32_e32 v0, 1, v0
-; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
+; GISEL-NEXT:    v_mov_b32_e32 v0, 0xffffff01
+; GISEL-NEXT:    v_add_u16_sdwa v0, v6, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
+; GISEL-NEXT:    v_mov_b32_e32 v1, 0xffffff80
+; GISEL-NEXT:    v_cmp_lt_i16_e32 vcc, -1, v4
+; GISEL-NEXT:    v_cmp_ge_u16_e64 s[4:5], v0, v1
 ; GISEL-NEXT:    ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
-; GISEL-NEXT:    s_and_saveexec_b64 s[6:7], vcc
+; GISEL-NEXT:    s_and_saveexec_b64 s[6:7], s[4:5]
 ; GISEL-NEXT:    s_xor_b64 s[14:15], exec, s[6:7]
 ; GISEL-NEXT:    s_cbranch_execz .LBB6_7
 ; GISEL-NEXT:  ; %bb.2: ; %fp-to-i-if-check.exp.size
-; GISEL-NEXT:    s_xor_b64 s[6:7], s[4:5], -1
-; GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, -1, s[6:7]
+; GISEL-NEXT:    s_xor_b64 s[4:5], vcc, -1
+; GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, -1, s[4:5]
 ; GISEL-NEXT:    v_and_b32_e32 v0, 1, v0
+; GISEL-NEXT:    v_cndmask_b32_e64 v1, 0, 1, s[4:5]
 ; GISEL-NEXT:    v_lshlrev_b16_e32 v2, 1, v0
-; GISEL-NEXT:    v_cndmask_b32_e64 v1, 0, 1, s[6:7]
-; GISEL-NEXT:    v_lshlrev_b16_e32 v3, 2, v0
-; GISEL-NEXT:    v_lshlrev_b16_e32 v7, 3, v0
-; GISEL-NEXT:    v_lshlrev_b16_e32 v8, 4, v0
-; GISEL-NEXT:    v_lshlrev_b16_e32 v9, 5, v0
-; GISEL-NEXT:    v_lshlrev_b16_e32 v10, 6, v0
-; GISEL-NEXT:    v_lshlrev_b16_e32 v11, 7, v0
-; GISEL-NEXT:    v_lshlrev_b16_e32 v12, 8, v0
-; GISEL-NEXT:    v_lshlrev_b16_e32 v13, 9, v0
-; GISEL-NEXT:    v_lshlrev_b16_e32 v14, 10, v0
-; GISEL-NEXT:    v_lshlrev_b16_e32 v15, 11, v0
-; GISEL-NEXT:    v_lshlrev_b16_e32 v16, 12, v0
-; GISEL-NEXT:    v_lshlrev_b16_e32 v17, 13, v0
-; GISEL-NEXT:    v_lshlrev_b16_e32 v18, 14, v0
-; GISEL-NEXT:    v_lshlrev_b16_e32 v19, 15, v0
-; GISEL-NEXT:    v_or_b32_e32 v0, v0, v2
 ; GISEL-NEXT:    v_or_b32_e32 v1, v1, v2
-; GISEL-NEXT:    v_or_b32_e32 v0, v0, v3
+; GISEL-NEXT:    v_lshlrev_b16_e32 v3, 2, v0
+; GISEL-NEXT:    v_or_b32_e32 v2, v0, v2
 ; GISEL-NEXT:    v_or_b32_e32 v1, v1, v3
-; GISEL-NEXT:    v_or_b32_e32 v0, v0, v7
+; GISEL-NEXT:    v_lshlrev_b16_e32 v7, 3, v0
+; GISEL-NEXT:    v_or_b32_e32 v2, v2, v3
 ; GISEL-NEXT:    v_or_b32_e32 v1, v1, v7
-; GISEL-NEXT:    v_or_b32_e32 v0, v0, v8
+; GISEL-NEXT:    v_lshlrev_b16_e32 v8, 4, v0
+; GISEL-NEXT:    v_or_b32_e32 v2, v2, v7
 ; GISEL-NEXT:    v_or_b32_e32 v1, v1, v8
-; GISEL-NEXT:    v_or_b32_e32 v0, v0, v9
+; GISEL-NEXT:    v_lshlrev_b16_e32 v9, 5, v0
+; GISEL-NEXT:    v_or_b32_e32 v2, v2, v8
 ; GISEL-NEXT:    v_or_b32_e32 v1, v1, v9
-; GISEL-NEXT:    v_or_b32_e32 v0, v0, v10
+; GISEL-NEXT:    v_lshlrev_b16_e32 v10, 6, v0
+; GISEL-NEXT:    v_or_b32_e32 v2, v2, v9
 ; GISEL-NEXT:    v_or_b32_e32 v1, v1, v10
-; GISEL-NEXT:    v_or_b32_e32 v0, v0, v11
+; GISEL-NEXT:    v_lshlrev_b16_e32 v11, 7, v0
+; GISEL-NEXT:    v_or_b32_e32 v2, v2, v10
 ; GISEL-NEXT:    v_or_b32_e32 v1, v1, v11
-; GISEL-NEXT:    v_or_b32_e32 v0, v0, v12
+; GISEL-NEXT:    v_lshlrev_b16_e32 v12, 8, v0
+; GISEL-NEXT:    v_or_b32_e32 v2, v2, v11
 ; GISEL-NEXT:    v_or_b32_e32 v1, v1, v12
-; GISEL-NEXT:    v_or_b32_e32 v0, v0, v13
+; GISEL-NEXT:    v_lshlrev_b16_e32 v13, 9, v0
+; GISEL-NEXT:    v_or_b32_e32 v2, v2, v12
 ; GISEL-NEXT:    v_or_b32_e32 v1, v1, v13
-; GISEL-NEXT:    v_or_b32_e32 v0, v0, v14
+; GISEL-NEXT:    v_lshlrev_b16_e32 v14, 10, v0
+; GISEL-NEXT:    v_or_b32_e32 v2, v2, v13
 ; GISEL-NEXT:    v_or_b32_e32 v1, v1, v14
-; GISEL-NEXT:    v_or_b32_e32 v0, v0, v15
+; GISEL-NEXT:    v_lshlrev_b16_e32 v15, 11, v0
+; GISEL-NEXT:    v_or_b32_e32 v2, v2, v14
 ; GISEL-NEXT:    v_or_b32_e32 v1, v1, v15
-; GISEL-NEXT:    v_or_b32_e32 v0, v0, v16
+; GISEL-NEXT:    v_lshlrev_b16_e32 v16, 12, v0
+; GISEL-NEXT:    v_or_b32_e32 v2, v2, v15
 ; GISEL-NEXT:    v_or_b32_e32 v1, v1, v16
-; GISEL-NEXT:    v_or_b32_e32 v0, v0, v17
+; GISEL-NEXT:    v_lshlrev_b16_e32 v17, 13, v0
+; GISEL-NEXT:    v_or_b32_e32 v2, v2, v16
 ; GISEL-NEXT:    v_or_b32_e32 v1, v1, v17
-; GISEL-NEXT:    v_or_b32_e32 v0, v0, v18
+; GISEL-NEXT:    v_lshlrev_b16_e32 v18, 14, v0
+; GISEL-NEXT:    v_or_b32_e32 v2, v2, v17
 ; GISEL-NEXT:    v_or_b32_e32 v1, v1, v18
-; GISEL-NEXT:    v_or_b32_e32 v0, v0, v19
-; GISEL-NEXT:    v_or_b32_e32 v1, v1, v19
+; GISEL-NEXT:    v_lshlrev_b16_e32 v0, 15, v0
+; GISEL-NEXT:    v_or_b32_e32 v2, v2, v18
+; GISEL-NEXT:    v_or_b32_e32 v1, v1, v0
+; GISEL-NEXT:    v_or_b32_e32 v0, v2, v0
 ; GISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
-; GISEL-NEXT:    v_and_b32_e32 v1, 0xffff, v1
 ; GISEL-NEXT:    v_lshlrev_b32_e32 v2, 16, v0
-; GISEL-NEXT:    v_lshl_or_b32 v9, v0, 16, v0
-; GISEL-NEXT:    v_or3_b32 v8, v1, v2, 1
+; GISEL-NEXT:    v_lshl_or_b32 v8, v0, 16, v0
+; GISEL-NEXT:    v_and_b32_e32 v0, 0x7f, v4
+; GISEL-NEXT:    v_and_b32_e32 v1, 0xffff, v1
+; GISEL-NEXT:    v_or_b32_e32 v4, 0x80, v0
 ; GISEL-NEXT:    v_mov_b32_e32 v0, 0x86
-; GISEL-NEXT:    v_mov_b32_e32 v1, 0
-; GISEL-NEXT:    v_and_b32_e32 v2, 0x7f, v4
-; GISEL-NEXT:    v_cmp_ge_u64_e32 vcc, v[5:6], v[0:1]
-; GISEL-NEXT:    v_mov_b32_e32 v7, 0
-; GISEL-NEXT:    v_or_b32_e32 v6, 0x80, v2
+; GISEL-NEXT:    v_mov_b32_e32 v5, 0
+; GISEL-NEXT:    v_or3_b32 v7, v1, v2, 1
+; GISEL-NEXT:    v_cmp_ge_u16_sdwa s[4:5], v6, v0 src0_sel:BYTE_0 src1_sel:DWORD
 ; GISEL-NEXT:    ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
-; GISEL-NEXT:    s_and_saveexec_b64 s[6:7], vcc
+; GISEL-NEXT:    s_and_saveexec_b64 s[6:7], s[4:5]
 ; GISEL-NEXT:    s_xor_b64 s[16:17], exec, s[6:7]
 ; GISEL-NEXT:    s_cbranch_execz .LBB6_4
 ; GISEL-NEXT:  ; %bb.3: ; %fp-to-i-if-exp.large
-; GISEL-NEXT:    v_add_u32_e32 v2, 0xffffff7a, v5
-; GISEL-NEXT:    v_lshlrev_b64 v[0:1], v2, v[6:7]
-; GISEL-NEXT:    v_add_u32_e32 v5, 0xffffff3a, v5
-; GISEL-NEXT:    v_sub_u32_e32 v3, 64, v2
-; GISEL-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v2
-; GISEL-NEXT:    v_lshrrev_b64 v[3:4], v3, v[6:7]
-; GISEL-NEXT:    v_lshlrev_b64 v[5:6], v5, v[6:7]
-; GISEL-NEXT:    v_cndmask_b32_e32 v7, 0, v0, vcc
-; GISEL-NEXT:    v_cndmask_b32_e32 v12, 0, v1, vcc
-; GISEL-NEXT:    v_mad_u64_u32 v[0:1], s[6:7], v7, v9, 0
-; GISEL-NEXT:    v_cndmask_b32_e32 v3, v5, v3, vcc
+; GISEL-NEXT:    v_mov_b32_e32 v0, 0xffffff7a
+; GISEL-NEXT:    v_add_u16_sdwa v2, v6, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
+; GISEL-NEXT:    v_and_b32_e32 v4, 0xffff, v4
+; GISEL-NEXT:    v_lshlrev_b64 v[0:1], v2, v[4:5]
+; GISEL-NEXT:    v_cmp_gt_u32_e64 s[4:5], 64, v2
+; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, v0, s[4:5]
+; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, v1, s[4:5]
+; GISEL-NEXT:    v_mad_u64_u32 v[0:1], s[6:7], v11, v8, 0
+; GISEL-NEXT:    v_add_u32_e32 v3, 0xffffffc0, v2
+; GISEL-NEXT:    v_sub_u32_e32 v6, 64, v2
+; GISEL-NEXT:    v_lshrrev_b64 v[9:10], v6, v[4:5]
+; GISEL-NEXT:    v_lshlrev_b64 v[3:4], v3, v[4:5]
+; GISEL-NEXT:    v_mad_u64_u32 v[5:6], s[8:9], v12, v8, v[0:1]
+; GISEL-NEXT:    v_cndmask_b32_e64 v3, v3, v9, s[4:5]
 ; GISEL-NEXT:    v_cmp_eq_u32_e64 s[6:7], 0, v2
-; GISEL-NEXT:    v_mad_u64_u32 v[10:11], s[8:9], v12, v9, v[0:1]
-; GISEL-NEXT:    v_cndmask_b32_e64 v5, v3, 0, s[6:7]
-; GISEL-NEXT:    v_mad_u64_u32 v[0:1], s[8:9], v7, v8, 0
-; GISEL-NEXT:    v_mad_u64_u32 v[2:3], s[8:9], v5, v8, v[10:11]
-; GISEL-NEXT:    v_mul_lo_u32 v13, v12, v9
-; GISEL-NEXT:    v_mad_u64_u32 v[10:11], s[8:9], v7, v9, v[1:2]
-; GISEL-NEXT:    v_mul_lo_u32 v7, v7, v9
-; GISEL-NEXT:    v_mad_u64_u32 v[1:2], s[10:11], v12, v8, v[10:11]
-; GISEL-NEXT:    v_addc_co_u32_e64 v3, s[10:11], v3, v7, s[10:11]
+; GISEL-NEXT:    v_cndmask_b32_e64 v9, v3, 0, s[6:7]
+; GISEL-NEXT:    v_mad_u64_u32 v[0:1], s[8:9], v11, v7, 0
+; GISEL-NEXT:    v_mad_u64_u32 v[2:3], s[8:9], v9, v7, v[5:6]
+; GISEL-NEXT:    v_mul_lo_u32 v13, v12, v8
+; GISEL-NEXT:    v_mad_u64_u32 v[5:6], s[8:9], v11, v8, v[1:2]
+; GISEL-NEXT:    v_mul_lo_u32 v11, v11, v8
+; GISEL-NEXT:    v_mad_u64_u32 v[1:2], s[10:11], v12, v7, v[5:6]
+; GISEL-NEXT:    v_addc_co_u32_e64 v3, s[10:11], v3, v11, s[10:11]
 ; GISEL-NEXT:    v_addc_co_u32_e64 v3, s[8:9], v3, v13, s[8:9]
-; GISEL-NEXT:    v_mad_u64_u32 v[10:11], s[8:9], v5, v9, v[3:4]
-; GISEL-NEXT:    v_cndmask_b32_e32 v3, v6, v4, vcc
-; GISEL-NEXT:    v_cndmask_b32_e64 v5, v3, 0, s[6:7]
-; GISEL-NEXT:    v_mad_u64_u32 v[3:4], s[6:7], v5, v8, v[10:11]
-; GISEL-NEXT:    ; implicit-def: $vgpr5
-; GISEL-NEXT:    ; implicit-def: $vgpr6_vgpr7
-; GISEL-NEXT:    ; implicit-def: $vgpr8
+; GISEL-NEXT:    v_mad_u64_u32 v[5:6], s[8:9], v9, v8, v[3:4]
+; GISEL-NEXT:    v_cndmask_b32_e64 v3, v4, v10, s[4:5]
+; GISEL-NEXT:    v_cndmask_b32_e64 v8, v3, 0, s[6:7]
+; GISEL-NEXT:    v_mad_u64_u32 v[3:4], s[4:5], v8, v7, v[5:6]
+; GISEL-NEXT:    ; implicit-def: $vgpr6
+; GISEL-NEXT:    ; implicit-def: $vgpr4
+; GISEL-NEXT:    ; implicit-def: $vgpr7
 ; GISEL-NEXT:  .LBB6_4: ; %Flow
-; GISEL-NEXT:    s_andn2_saveexec_b64 s[6:7], s[16:17]
-; GISEL-NEXT:    s_cbranch_execz .LBB6_6
+; GISEL-NEXT:    s_andn2_saveexec_b64 s[4:5], s[16:17]
 ; GISEL-NEXT:  ; %bb.5: ; %fp-to-i-if-exp.small
-; GISEL-NEXT:    v_sub_co_u32_e32 v2, vcc, 0x86, v5
-; GISEL-NEXT:    v_lshrrev_b64 v[0:1], v2, v[6:7]
-; GISEL-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v2
-; GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
-; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v2
-; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
-; GISEL-NEXT:    v_mul_hi_i32_i24_e32 v1, v0, v8
+; GISEL-NEXT:    v_mov_b32_e32 v0, 0x86
+; GISEL-NEXT:    v_sub_u16_sdwa v0, v0, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
+; GISEL-NEXT:    v_lshrrev_b16_e32 v0, v0, v4
+; GISEL-NEXT:    v_mul_hi_i32_i24_e32 v1, v0, v7
 ; GISEL-NEXT:    v_ashrrev_i32_e32 v2, 31, v1
-; GISEL-NEXT:    v_mul_i32_i24_e32 v0, v0, v8
+; GISEL-NEXT:    v_mul_i32_i24_e32 v0, v0, v7
 ; GISEL-NEXT:    v_mov_b32_e32 v3, v2
-; GISEL-NEXT:  .LBB6_6: ; %Flow1
-; GISEL-NEXT:    s_or_b64 exec, exec, s[6:7]
+; GISEL-NEXT:  ; %bb.6: ; %Flow1
+; GISEL-NEXT:    s_or_b64 exec, exec, s[4:5]
 ; GISEL-NEXT:  .LBB6_7: ; %Flow2
-; GISEL-NEXT:    s_andn2_saveexec_b64 s[6:7], s[14:15]
+; GISEL-NEXT:    s_andn2_saveexec_b64 s[4:5], s[14:15]
 ; GISEL-NEXT:    s_cbranch_execz .LBB6_9
 ; GISEL-NEXT:  ; %bb.8: ; %fp-to-i-if-saturate
-; GISEL-NEXT:    v_cndmask_b32_e64 v1, 0, -1, s[4:5]
+; GISEL-NEXT:    v_cndmask_b32_e64 v1, 0, -1, vcc
 ; GISEL-NEXT:    v_and_b32_e32 v1, 1, v1
-; GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[4:5]
+; GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
 ; GISEL-NEXT:    v_lshlrev_b32_e32 v2, 1, v1
 ; GISEL-NEXT:    v_or_b32_e32 v0, v0, v2
 ; GISEL-NEXT:    v_lshlrev_b32_e32 v3, 2, v1
@@ -1818,7 +1671,7 @@ define i128 @fptosi_bf16_to_i128(bfloat %x) {
 ; GISEL-NEXT:    v_add_u32_e32 v3, 0x80000000, v1
 ; GISEL-NEXT:    v_mov_b32_e32 v2, v1
 ; GISEL-NEXT:  .LBB6_9: ; %Flow3
-; GISEL-NEXT:    s_or_b64 exec, exec, s[6:7]
+; GISEL-NEXT:    s_or_b64 exec, exec, s[4:5]
 ; GISEL-NEXT:  .LBB6_10: ; %fp-to-i-cleanup
 ; GISEL-NEXT:    s_or_b64 exec, exec, s[12:13]
 ; GISEL-NEXT:    s_setpc_b64 s[30:31]
@@ -1831,102 +1684,92 @@ define i128 @fptoui_bf16_to_i128(bfloat %x) {
 ; SDAG:       ; %bb.0: ; %fp-to-i-entry
 ; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; SDAG-NEXT:    v_mov_b32_e32 v4, v0
-; SDAG-NEXT:    v_mov_b32_e32 v0, 0
-; SDAG-NEXT:    v_bfe_u32 v5, v4, 7, 8
+; SDAG-NEXT:    v_lshrrev_b16_e32 v7, 7, v4
 ; SDAG-NEXT:    s_movk_i32 s4, 0x7e
+; SDAG-NEXT:    v_mov_b32_e32 v0, 0
 ; SDAG-NEXT:    v_mov_b32_e32 v2, 0
 ; SDAG-NEXT:    v_mov_b32_e32 v1, 0
 ; SDAG-NEXT:    v_mov_b32_e32 v3, 0
-; SDAG-NEXT:    v_cmp_lt_u32_e32 vcc, s4, v5
-; SDAG-NEXT:    s_and_saveexec_b64 s[8:9], vcc
+; SDAG-NEXT:    v_cmp_gt_u16_sdwa s[4:5], v7, s4 src0_sel:BYTE_0 src1_sel:DWORD
+; SDAG-NEXT:    s_and_saveexec_b64 s[8:9], s[4:5]
 ; SDAG-NEXT:    s_cbranch_execz .LBB7_10
 ; SDAG-NEXT:  ; %bb.1: ; %fp-to-i-if-check.saturate
-; SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0xffffff01, v5
-; SDAG-NEXT:    v_mov_b32_e32 v1, -1
-; SDAG-NEXT:    v_mov_b32_e32 v6, 0
-; SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
-; SDAG-NEXT:    v_addc_co_u32_e32 v2, vcc, -1, v6, vcc
-; SDAG-NEXT:    s_movk_i32 s6, 0xff7f
-; SDAG-NEXT:    v_addc_co_u32_e32 v3, vcc, -1, v6, vcc
-; SDAG-NEXT:    s_mov_b32 s7, -1
-; SDAG-NEXT:    v_cmp_eq_u64_e64 s[4:5], -1, v[2:3]
-; SDAG-NEXT:    v_cmp_lt_u64_e64 s[6:7], s[6:7], v[0:1]
+; SDAG-NEXT:    s_movk_i32 s4, 0xff01
+; SDAG-NEXT:    v_add_u16_sdwa v0, v7, s4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
+; SDAG-NEXT:    s_movk_i32 s4, 0xff7f
 ; SDAG-NEXT:    v_cmp_lt_i16_e32 vcc, -1, v4
-; SDAG-NEXT:    s_and_b64 s[4:5], s[4:5], s[6:7]
+; SDAG-NEXT:    v_cmp_lt_u16_e64 s[4:5], s4, v0
 ; SDAG-NEXT:    ; implicit-def: $vgpr0_vgpr1
 ; SDAG-NEXT:    ; implicit-def: $vgpr2_vgpr3
 ; SDAG-NEXT:    s_and_saveexec_b64 s[6:7], s[4:5]
 ; SDAG-NEXT:    s_xor_b64 s[10:11], exec, s[6:7]
 ; SDAG-NEXT:    s_cbranch_execz .LBB7_7
 ; SDAG-NEXT:  ; %bb.2: ; %fp-to-i-if-check.exp.size
-; SDAG-NEXT:    s_movk_i32 s4, 0x7f
-; SDAG-NEXT:    v_and_b32_sdwa v0, v4, s4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; SDAG-NEXT:    s_mov_b64 s[4:5], 0x85
-; SDAG-NEXT:    v_cmp_lt_u64_e64 s[4:5], s[4:5], v[5:6]
-; SDAG-NEXT:    v_cndmask_b32_e64 v10, -1, 0, vcc
-; SDAG-NEXT:    v_cndmask_b32_e64 v9, -1, 1, vcc
-; SDAG-NEXT:    v_or_b32_e32 v7, 0x80, v0
-; SDAG-NEXT:    v_mov_b32_e32 v8, v6
+; SDAG-NEXT:    v_and_b32_e32 v0, 0x7f, v4
+; SDAG-NEXT:    s_movk_i32 s4, 0x85
+; SDAG-NEXT:    s_mov_b32 s6, 0
+; SDAG-NEXT:    v_cndmask_b32_e64 v6, -1, 0, vcc
+; SDAG-NEXT:    v_cndmask_b32_e64 v5, -1, 1, vcc
+; SDAG-NEXT:    v_or_b32_e32 v4, 0x80, v0
+; SDAG-NEXT:    v_cmp_gt_u16_sdwa s[4:5], v7, s4 src0_sel:BYTE_0 src1_sel:DWORD
 ; SDAG-NEXT:    ; implicit-def: $vgpr0_vgpr1
 ; SDAG-NEXT:    ; implicit-def: $vgpr2_vgpr3
-; SDAG-NEXT:    s_and_saveexec_b64 s[6:7], s[4:5]
-; SDAG-NEXT:    s_xor_b64 s[12:13], exec, s[6:7]
+; SDAG-NEXT:    s_and_saveexec_b64 s[12:13], s[4:5]
+; SDAG-NEXT:    s_xor_b64 s[12:13], exec, s[12:13]
 ; SDAG-NEXT:    s_cbranch_execz .LBB7_4
 ; SDAG-NEXT:  ; %bb.3: ; %fp-to-i-if-exp.large
 ; SDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
-; SDAG-NEXT:    v_add_co_u32_e64 v11, s[4:5], -1, v0
-; SDAG-NEXT:    v_sub_u32_e32 v0, 0xc6, v5
-; SDAG-NEXT:    v_add_u32_e32 v2, 0xffffff3a, v5
-; SDAG-NEXT:    v_add_u32_e32 v4, 0xffffff7a, v5
-; SDAG-NEXT:    v_lshrrev_b64 v[0:1], v0, v[7:8]
-; SDAG-NEXT:    v_lshlrev_b64 v[2:3], v2, v[7:8]
-; SDAG-NEXT:    v_cmp_gt_u32_e64 s[4:5], 64, v4
-; SDAG-NEXT:    v_cndmask_b32_e64 v1, v3, v1, s[4:5]
-; SDAG-NEXT:    v_cmp_ne_u32_e64 s[6:7], 0, v4
-; SDAG-NEXT:    v_cndmask_b32_e64 v3, 0, v1, s[6:7]
-; SDAG-NEXT:    v_cndmask_b32_e64 v2, v2, v0, s[4:5]
-; SDAG-NEXT:    v_lshlrev_b64 v[0:1], v4, v[7:8]
-; SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, v2, s[6:7]
-; SDAG-NEXT:    v_cndmask_b32_e64 v7, 0, v0, s[4:5]
-; SDAG-NEXT:    v_mad_u64_u32 v[4:5], s[6:7], v7, v9, 0
-; SDAG-NEXT:    v_cndmask_b32_e64 v13, 0, v1, s[4:5]
-; SDAG-NEXT:    v_mul_lo_u32 v8, v10, v2
-; SDAG-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v13, v9, v[5:6]
-; SDAG-NEXT:    v_mul_lo_u32 v12, v9, v3
-; SDAG-NEXT:    v_mad_u64_u32 v[2:3], s[6:7], v9, v2, 0
-; SDAG-NEXT:    v_mov_b32_e32 v5, v0
-; SDAG-NEXT:    v_mad_u64_u32 v[5:6], s[4:5], v7, v10, v[5:6]
-; SDAG-NEXT:    v_add3_u32 v3, v3, v12, v8
-; SDAG-NEXT:    v_mad_u64_u32 v[2:3], s[4:5], v11, v7, v[2:3]
-; SDAG-NEXT:    v_add_co_u32_e64 v0, s[4:5], v1, v6
-; SDAG-NEXT:    v_addc_co_u32_e64 v1, s[4:5], 0, 0, s[4:5]
-; SDAG-NEXT:    v_mul_lo_u32 v8, v11, v13
-; SDAG-NEXT:    v_mul_lo_u32 v7, v11, v7
-; SDAG-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v13, v10, v[0:1]
-; SDAG-NEXT:    ; implicit-def: $vgpr9
-; SDAG-NEXT:    v_add3_u32 v3, v7, v3, v8
-; SDAG-NEXT:    v_add_co_u32_e64 v2, s[4:5], v0, v2
-; SDAG-NEXT:    v_addc_co_u32_e64 v3, s[4:5], v1, v3, s[4:5]
-; SDAG-NEXT:    v_mov_b32_e32 v0, v4
-; SDAG-NEXT:    v_mov_b32_e32 v1, v5
-; SDAG-NEXT:    ; implicit-def: $vgpr5_vgpr6
-; SDAG-NEXT:    ; implicit-def: $vgpr7_vgpr8
+; SDAG-NEXT:    v_add_co_u32_e64 v9, s[4:5], -1, v0
+; SDAG-NEXT:    s_movk_i32 s4, 0xff7a
+; SDAG-NEXT:    v_add_u16_sdwa v10, v7, s4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
+; SDAG-NEXT:    v_and_b32_e32 v0, 0xffff, v4
+; SDAG-NEXT:    v_mov_b32_e32 v1, s6
+; SDAG-NEXT:    v_sub_u32_e32 v2, 64, v10
+; SDAG-NEXT:    v_lshrrev_b64 v[3:4], v2, v[0:1]
+; SDAG-NEXT:    v_subrev_u32_e32 v2, 64, v10
+; SDAG-NEXT:    v_lshlrev_b64 v[7:8], v2, v[0:1]
+; SDAG-NEXT:    v_cmp_gt_u16_e64 s[4:5], 64, v10
+; SDAG-NEXT:    v_cndmask_b32_e64 v4, v8, v4, s[4:5]
+; SDAG-NEXT:    v_cmp_ne_u16_e64 s[6:7], 0, v10
+; SDAG-NEXT:    v_cndmask_b32_e64 v8, 0, v4, s[6:7]
+; SDAG-NEXT:    v_cndmask_b32_e64 v7, v7, v3, s[4:5]
+; SDAG-NEXT:    v_lshlrev_b64 v[3:4], v10, v[0:1]
+; SDAG-NEXT:    v_cndmask_b32_e64 v7, 0, v7, s[6:7]
+; SDAG-NEXT:    v_cndmask_b32_e64 v10, 0, v3, s[4:5]
+; SDAG-NEXT:    v_mad_u64_u32 v[0:1], s[6:7], v10, v5, 0
+; SDAG-NEXT:    v_mov_b32_e32 v2, 0
+; SDAG-NEXT:    v_cndmask_b32_e64 v13, 0, v4, s[4:5]
+; SDAG-NEXT:    v_mad_u64_u32 v[3:4], s[4:5], v13, v5, v[1:2]
+; SDAG-NEXT:    v_mul_lo_u32 v11, v6, v7
+; SDAG-NEXT:    v_mul_lo_u32 v12, v5, v8
+; SDAG-NEXT:    v_mad_u64_u32 v[7:8], s[6:7], v5, v7, 0
+; SDAG-NEXT:    v_mov_b32_e32 v1, v3
+; SDAG-NEXT:    v_mad_u64_u32 v[1:2], s[4:5], v10, v6, v[1:2]
+; SDAG-NEXT:    v_add3_u32 v8, v8, v12, v11
+; SDAG-NEXT:    v_mad_u64_u32 v[7:8], s[4:5], v9, v10, v[7:8]
+; SDAG-NEXT:    v_add_co_u32_e64 v2, s[4:5], v4, v2
+; SDAG-NEXT:    v_addc_co_u32_e64 v3, s[4:5], 0, 0, s[4:5]
+; SDAG-NEXT:    v_mul_lo_u32 v5, v9, v13
+; SDAG-NEXT:    v_mul_lo_u32 v9, v9, v10
+; SDAG-NEXT:    v_mad_u64_u32 v[2:3], s[4:5], v13, v6, v[2:3]
+; SDAG-NEXT:    v_add3_u32 v4, v9, v8, v5
+; SDAG-NEXT:    v_add_co_u32_e64 v2, s[4:5], v2, v7
+; SDAG-NEXT:    v_addc_co_u32_e64 v3, s[4:5], v3, v4, s[4:5]
+; SDAG-NEXT:    ; implicit-def: $vgpr7
+; SDAG-NEXT:    ; implicit-def: $vgpr4
+; SDAG-NEXT:    ; implicit-def: $vgpr5
 ; SDAG-NEXT:  .LBB7_4: ; %Flow
-; SDAG-NEXT:    s_andn2_saveexec_b64 s[6:7], s[12:13]
-; SDAG-NEXT:    s_cbranch_execz .LBB7_6
+; SDAG-NEXT:    s_andn2_saveexec_b64 s[4:5], s[12:13]
 ; SDAG-NEXT:  ; %bb.5: ; %fp-to-i-if-exp.small
-; SDAG-NEXT:    v_sub_u32_e32 v2, 0x86, v5
-; SDAG-NEXT:    v_lshrrev_b64 v[0:1], v2, v[7:8]
-; SDAG-NEXT:    v_cmp_gt_u32_e64 s[4:5], 64, v2
-; SDAG-NEXT:    v_cndmask_b32_e64 v0, 0, v0, s[4:5]
-; SDAG-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v2
-; SDAG-NEXT:    v_cndmask_b32_e64 v0, v0, v7, s[4:5]
-; SDAG-NEXT:    v_mul_hi_i32_i24_e32 v1, v0, v9
+; SDAG-NEXT:    s_movk_i32 s6, 0x86
+; SDAG-NEXT:    v_sub_u16_sdwa v0, s6, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
+; SDAG-NEXT:    v_lshrrev_b16_e32 v0, v0, v4
+; SDAG-NEXT:    v_mul_hi_i32_i24_e32 v1, v0, v5
 ; SDAG-NEXT:    v_ashrrev_i32_e32 v2, 31, v1
-; SDAG-NEXT:    v_mul_i32_i24_e32 v0, v0, v9
+; SDAG-NEXT:    v_mul_i32_i24_e32 v0, v0, v5
 ; SDAG-NEXT:    v_mov_b32_e32 v3, v2
-; SDAG-NEXT:  .LBB7_6: ; %Flow1
-; SDAG-NEXT:    s_or_b64 exec, exec, s[6:7]
+; SDAG-NEXT:  ; %bb.6: ; %Flow1
+; SDAG-NEXT:    s_or_b64 exec, exec, s[4:5]
 ; SDAG-NEXT:  .LBB7_7: ; %Flow2
 ; SDAG-NEXT:    s_andn2_saveexec_b64 s[4:5], s[10:11]
 ; SDAG-NEXT:  ; %bb.8: ; %fp-to-i-if-saturate
@@ -1947,159 +1790,142 @@ define i128 @fptoui_bf16_to_i128(bfloat %x) {
 ; GISEL:       ; %bb.0: ; %fp-to-i-entry
 ; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GISEL-NEXT:    v_mov_b32_e32 v4, v0
-; GISEL-NEXT:    v_and_b32_e32 v5, 0xffff, v4
-; GISEL-NEXT:    v_mov_b32_e32 v6, 0
-; GISEL-NEXT:    v_lshrrev_b64 v[0:1], 7, v[5:6]
-; GISEL-NEXT:    v_mov_b32_e32 v1, 0x7f
+; GISEL-NEXT:    v_lshrrev_b16_e32 v6, 7, v4
+; GISEL-NEXT:    v_mov_b32_e32 v0, 0x7f
 ; GISEL-NEXT:    s_mov_b64 s[4:5], 0
-; GISEL-NEXT:    v_mov_b32_e32 v2, 0
-; GISEL-NEXT:    v_bfe_u32 v5, v0, 0, 8
-; GISEL-NEXT:    v_cmp_ge_u64_e32 vcc, v[5:6], v[1:2]
+; GISEL-NEXT:    v_cmp_ge_u16_sdwa s[8:9], v6, v0 src0_sel:BYTE_0 src1_sel:DWORD
 ; GISEL-NEXT:    s_mov_b64 s[6:7], s[4:5]
 ; GISEL-NEXT:    v_mov_b32_e32 v0, s4
 ; GISEL-NEXT:    v_mov_b32_e32 v1, s5
 ; GISEL-NEXT:    v_mov_b32_e32 v2, s6
 ; GISEL-NEXT:    v_mov_b32_e32 v3, s7
-; GISEL-NEXT:    s_and_saveexec_b64 s[12:13], vcc
+; GISEL-NEXT:    s_and_saveexec_b64 s[12:13], s[8:9]
 ; GISEL-NEXT:    s_cbranch_execz .LBB7_10
 ; GISEL-NEXT:  ; %bb.1: ; %fp-to-i-if-check.saturate
-; GISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 0xffffff01, v5
-; GISEL-NEXT:    v_mov_b32_e32 v2, 0xffffff80
-; GISEL-NEXT:    v_addc_co_u32_e64 v1, s[6:7], 0, -1, vcc
-; GISEL-NEXT:    v_mov_b32_e32 v3, -1
-; GISEL-NEXT:    v_addc_co_u32_e64 v7, s[6:7], 0, -1, s[6:7]
-; GISEL-NEXT:    v_cmp_ge_u64_e32 vcc, v[0:1], v[2:3]
-; GISEL-NEXT:    v_addc_co_u32_e64 v8, s[6:7], 0, -1, s[6:7]
-; GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
-; GISEL-NEXT:    v_cmp_le_u64_e32 vcc, -1, v[7:8]
-; GISEL-NEXT:    v_cmp_lt_i16_e64 s[4:5], -1, v4
-; GISEL-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; GISEL-NEXT:    v_cmp_eq_u64_e32 vcc, -1, v[7:8]
-; GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
-; GISEL-NEXT:    v_and_b32_e32 v0, 1, v0
-; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
+; GISEL-NEXT:    v_mov_b32_e32 v0, 0xffffff01
+; GISEL-NEXT:    v_add_u16_sdwa v0, v6, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
+; GISEL-NEXT:    v_mov_b32_e32 v1, 0xffffff80
+; GISEL-NEXT:    v_cmp_lt_i16_e32 vcc, -1, v4
+; GISEL-NEXT:    v_cmp_ge_u16_e64 s[4:5], v0, v1
 ; GISEL-NEXT:    ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
-; GISEL-NEXT:    s_and_saveexec_b64 s[6:7], vcc
+; GISEL-NEXT:    s_and_saveexec_b64 s[6:7], s[4:5]
 ; GISEL-NEXT:    s_xor_b64 s[14:15], exec, s[6:7]
 ; GISEL-NEXT:    s_cbranch_execz .LBB7_7
 ; GISEL-NEXT:  ; %bb.2: ; %fp-to-i-if-check.exp.size
-; GISEL-NEXT:    s_xor_b64 s[6:7], s[4:5], -1
-; GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, -1, s[6:7]
+; GISEL-NEXT:    s_xor_b64 s[4:5], vcc, -1
+; GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, -1, s[4:5]
 ; GISEL-NEXT:    v_and_b32_e32 v0, 1, v0
+; GISEL-NEXT:    v_cndmask_b32_e64 v1, 0, 1, s[4:5]
 ; GISEL-NEXT:    v_lshlrev_b16_e32 v2, 1, v0
-; GISEL-NEXT:    v_cndmask_b32_e64 v1, 0, 1, s[6:7]
-; GISEL-NEXT:    v_lshlrev_b16_e32 v3, 2, v0
-; GISEL-NEXT:    v_lshlrev_b16_e32 v7, 3, v0
-; GISEL-NEXT:    v_lshlrev_b16_e32 v8, 4, v0
-; GISEL-NEXT:    v_lshlrev_b16_e32 v9, 5, v0
-; GISEL-NEXT:    v_lshlrev_b16_e32 v10, 6, v0
-; GISEL-NEXT:    v_lshlrev_b16_e32 v11, 7, v0
-; GISEL-NEXT:    v_lshlrev_b16_e32 v12, 8, v0
-; GISEL-NEXT:    v_lshlrev_b16_e32 v13, 9, v0
-; GISEL-NEXT:    v_lshlrev_b16_e32 v14, 10, v0
-; GISEL-NEXT:    v_lshlrev_b16_e32 v15, 11, v0
-; GISEL-NEXT:    v_lshlrev_b16_e32 v16, 12, v0
-; GISEL-NEXT:    v_lshlrev_b16_e32 v17, 13, v0
-; GISEL-NEXT:    v_lshlrev_b16_e32 v18, 14, v0
-; GISEL-NEXT:    v_lshlrev_b16_e32 v19, 15, v0
-; GISEL-NEXT:    v_or_b32_e32 v0, v0, v2
 ; GISEL-NEXT:    v_or_b32_e32 v1, v1, v2
-; GISEL-NEXT:    v_or_b32_e32 v0, v0, v3
+; GISEL-NEXT:    v_lshlrev_b16_e32 v3, 2, v0
+; GISEL-NEXT:    v_or_b32_e32 v2, v0, v2
 ; GISEL-NEXT:    v_or_b32_e32 v1, v1, v3
-; GISEL-NEXT:    v_or_b32_e32 v0, v0, v7
+; GISEL-NEXT:    v_lshlrev_b16_e32 v7, 3, v0
+; GISEL-NEXT:    v_or_b32_e32 v2, v2, v3
 ; GISEL-NEXT:    v_or_b32_e32 v1, v1, v7
-; GISEL-NEXT:    v_or_b32_e32 v0, v0, v8
+; GISEL-NEXT:    v_lshlrev_b16_e32 v8, 4, v0
+; GISEL-NEXT:    v_or_b32_e32 v2, v2, v7
 ; GISEL-NEXT:    v_or_b32_e32 v1, v1, v8
-; GISEL-NEXT:    v_or_b32_e32 v0, v0, v9
+; GISEL-NEXT:    v_lshlrev_b16_e32 v9, 5, v0
+; GISEL-NEXT:    v_or_b32_e32 v2, v2, v8
 ; GISEL-NEXT:    v_or_b32_e32 v1, v1, v9
-; GISEL-NEXT:    v_or_b32_e32 v0, v0, v10
+; GISEL-NEXT:    v_lshlrev_b16_e32 v10, 6, v0
+; GISEL-NEXT:    v_or_b32_e32 v2, v2, v9
 ; GISEL-NEXT:    v_or_b32_e32 v1, v1, v10
-; GISEL-NEXT:    v_or_b32_e32 v0, v0, v11
+; GISEL-NEXT:    v_lshlrev_b16_e32 v11, 7, v0
+; GISEL-NEXT:    v_or_b32_e32 v2, v2, v10
 ; GISEL-NEXT:    v_or_b32_e32 v1, v1, v11
-; GISEL-NEXT:    v_or_b32_e32 v0, v0, v12
+; GISEL-NEXT:    v_lshlrev_b16_e32 v12, 8, v0
+; GISEL-NEXT:    v_or_b32_e32 v2, v2, v11
 ; GISEL-NEXT:    v_or_b32_e32 v1, v1, v12
-; GISEL-NEXT:    v_or_b32_e32 v0, v0, v13
+; GISEL-NEXT:    v_lshlrev_b16_e32 v13, 9, v0
+; GISEL-NEXT:    v_or_b32_e32 v2, v2, v12
 ; GISEL-NEXT:    v_or_b32_e32 v1, v1, v13
-; GISEL-NEXT:    v_or_b32_e32 v0, v0, v14
+; GISEL-NEXT:    v_lshlrev_b16_e32 v14, 10, v0
+; GISEL-NEXT:    v_or_b32_e32 v2, v2, v13
 ; GISEL-NEXT:    v_or_b32_e32 v1, v1, v14
-; GISEL-NEXT:    v_or_b32_e32 v0, v0, v15
+; GISEL-NEXT:    v_lshlrev_b16_e32 v15, 11, v0
+; GISEL-NEXT:    v_or_b32_e32 v2, v2, v14
 ; GISEL-NEXT:    v_or_b32_e32 v1, v1, v15
-; GISEL-NEXT:    v_or_b32_e32 v0, v0, v16
+; GISEL-NEXT:    v_lshlrev_b16_e32 v16, 12, v0
+; GISEL-NEXT:    v_or_b32_e32 v2, v2, v15
 ; GISEL-NEXT:    v_or_b32_e32 v1, v1, v16
-; GISEL-NEXT:    v_or_b32_e32 v0, v0, v17
+; GISEL-NEXT:    v_lshlrev_b16_e32 v17, 13, v0
+; GISEL-NEXT:    v_or_b32_e32 v2, v2, v16
 ; GISEL-NEXT:    v_or_b32_e32 v1, v1, v17
-; GISEL-NEXT:    v_or_b32_e32 v0, v0, v18
+; GISEL-NEXT:    v_lshlrev_b16_e32 v18, 14, v0
+; GISEL-NEXT:    v_or_b32_e32 v2, v2, v17
 ; GISEL-NEXT:    v_or_b32_e32 v1, v1, v18
-; GISEL-NEXT:    v_or_b32_e32 v0, v0, v19
-; GISEL-NEXT:    v_or_b32_e32 v1, v1, v19
+; GISEL-NEXT:    v_lshlrev_b16_e32 v0, 15, v0
+; GISEL-NEXT:    v_or_b32_e32 v2, v2, v18
+; GISEL-NEXT:    v_or_b32_e32 v1, v1, v0
+; GISEL-NEXT:    v_or_b32_e32 v0, v2, v0
 ; GISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
-; GISEL-NEXT:    v_and_b32_e32 v1, 0xffff, v1
 ; GISEL-NEXT:    v_lshlrev_b32_e32 v2, 16, v0
-; GISEL-NEXT:    v_lshl_or_b32 v9, v0, 16, v0
-; GISEL-NEXT:    v_or3_b32 v8, v1, v2, 1
+; GISEL-NEXT:    v_lshl_or_b32 v8, v0, 16, v0
+; GISEL-NEXT:    v_and_b32_e32 v0, 0x7f, v4
+; GISEL-NEXT:    v_and_b32_e32 v1, 0xffff, v1
+; GISEL-NEXT:    v_or_b32_e32 v4, 0x80, v0
 ; GISEL-NEXT:    v_mov_b32_e32 v0, 0x86
-; GISEL-NEXT:    v_mov_b32_e32 v1, 0
-; GISEL-NEXT:    v_and_b32_e32 v2, 0x7f, v4
-; GISEL-NEXT:    v_cmp_ge_u64_e32 vcc, v[5:6], v[0:1]
-; GISEL-NEXT:    v_mov_b32_e32 v7, 0
-; GISEL-NEXT:    v_or_b32_e32 v6, 0x80, v2
+; GISEL-NEXT:    v_mov_b32_e32 v5, 0
+; GISEL-NEXT:    v_or3_b32 v7, v1, v2, 1
+; GISEL-NEXT:    v_cmp_ge_u16_sdwa s[4:5], v6, v0 src0_sel:BYTE_0 src1_sel:DWORD
 ; GISEL-NEXT:    ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
-; GISEL-NEXT:    s_and_saveexec_b64 s[6:7], vcc
+; GISEL-NEXT:    s_and_saveexec_b64 s[6:7], s[4:5]
 ; GISEL-NEXT:    s_xor_b64 s[16:17], exec, s[6:7]
 ; GISEL-NEXT:    s_cbranch_execz .LBB7_4
 ; GISEL-NEXT:  ; %bb.3: ; %fp-to-i-if-exp.large
-; GISEL-NEXT:    v_add_u32_e32 v2, 0xffffff7a, v5
-; GISEL-NEXT:    v_lshlrev_b64 v[0:1], v2, v[6:7]
-; GISEL-NEXT:    v_add_u32_e32 v5, 0xffffff3a, v5
-; GISEL-NEXT:    v_sub_u32_e32 v3, 64, v2
-; GISEL-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v2
-; GISEL-NEXT:    v_lshrrev_b64 v[3:4], v3, v[6:7]
-; GISEL-NEXT:    v_lshlrev_b64 v[5:6], v5, v[6:7]
-; GISEL-NEXT:    v_cndmask_b32_e32 v7, 0, v0, vcc
-; GISEL-NEXT:    v_cndmask_b32_e32 v12, 0, v1, vcc
-; GISEL-NEXT:    v_mad_u64_u32 v[0:1], s[6:7], v7, v9, 0
-; GISEL-NEXT:    v_cndmask_b32_e32 v3, v5, v3, vcc
+; GISEL-NEXT:    v_mov_b32_e32 v0, 0xffffff7a
+; GISEL-NEXT:    v_add_u16_sdwa v2, v6, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
+; GISEL-NEXT:    v_and_b32_e32 v4, 0xffff, v4
+; GISEL-NEXT:    v_lshlrev_b64 v[0:1], v2, v[4:5]
+; GISEL-NEXT:    v_cmp_gt_u32_e64 s[4:5], 64, v2
+; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, v0, s[4:5]
+; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, v1, s[4:5]
+; GISEL-NEXT:    v_mad_u64_u32 v[0:1], s[6:7], v11, v8, 0
+; GISEL-NEXT:    v_add_u32_e32 v3, 0xffffffc0, v2
+; GISEL-NEXT:    v_sub_u32_e32 v6, 64, v2
+; GISEL-NEXT:    v_lshrrev_b64 v[9:10], v6, v[4:5]
+; GISEL-NEXT:    v_lshlrev_b64 v[3:4], v3, v[4:5]
+; GISEL-NEXT:    v_mad_u64_u32 v[5:6], s[8:9], v12, v8, v[0:1]
+; GISEL-NEXT:    v_cndmask_b32_e64 v3, v3, v9, s[4:5]
 ; GISEL-NEXT:    v_cmp_eq_u32_e64 s[6:7], 0, v2
-; GISEL-NEXT:    v_mad_u64_u32 v[10:11], s[8:9], v12, v9, v[0:1]
-; GISEL-NEXT:    v_cndmask_b32_e64 v5, v3, 0, s[6:7]
-; GISEL-NEXT:    v_mad_u64_u32 v[0:1], s[8:9], v7, v8, 0
-; GISEL-NEXT:    v_mad_u64_u32 v[2:3], s[8:9], v5, v8, v[10:11]
-; GISEL-NEXT:    v_mul_lo_u32 v13, v12, v9
-; GISEL-NEXT:    v_mad_u64_u32 v[10:11], s[8:9], v7, v9, v[1:2]
-; GISEL-NEXT:    v_mul_lo_u32 v7, v7, v9
-; GISEL-NEXT:    v_mad_u64_u32 v[1:2], s[10:11], v12, v8, v[10:11]
-; GISEL-NEXT:    v_addc_co_u32_e64 v3, s[10:11], v3, v7, s[10:11]
+; GISEL-NEXT:    v_cndmask_b32_e64 v9, v3, 0, s[6:7]
+; GISEL-NEXT:    v_mad_u64_u32 v[0:1], s[8:9], v11, v7, 0
+; GISEL-NEXT:    v_mad_u64_u32 v[2:3], s[8:9], v9, v7, v[5:6]
+; GISEL-NEXT:    v_mul_lo_u32 v13, v12, v8
+; GISEL-NEXT:    v_mad_u64_u32 v[5:6], s[8:9], v11, v8, v[1:2]
+; GISEL-NEXT:    v_mul_lo_u32 v11, v11, v8
+; GISEL-NEXT:    v_mad_u64_u32 v[1:2], s[10:11], v12, v7, v[5:6]
+; GISEL-NEXT:    v_addc_co_u32_e64 v3, s[10:11], v3, v11, s[10:11]
 ; GISEL-NEXT:    v_addc_co_u32_e64 v3, s[8:9], v3, v13, s[8:9]
-; GISEL-NEXT:    v_mad_u64_u32 v[10:11], s[8:9], v5, v9, v[3:4]
-; GISEL-NEXT:    v_cndmask_b32_e32 v3, v6, v4, vcc
-; GISEL-NEXT:    v_cndmask_b32_e64 v5, v3, 0, s[6:7]
-; GISEL-NEXT:    v_mad_u64_u32 v[3:4], s[6:7], v5, v8, v[10:11]
-; GISEL-NEXT:    ; implicit-def: $vgpr5
-; GISEL-NEXT:    ; implicit-def: $vgpr6_vgpr7
-; GISEL-NEXT:    ; implicit-def: $vgpr8
+; GISEL-NEXT:    v_mad_u64_u32 v[5:6], s[8:9], v9, v8, v[3:4]
+; GISEL-NEXT:    v_cndmask_b32_e64 v3, v4, v10, s[4:5]
+; GISEL-NEXT:    v_cndmask_b32_e64 v8, v3, 0, s[6:7]
+; GISEL-NEXT:    v_mad_u64_u32 v[3:4], s[4:5], v8, v7, v[5:6]
+; GISEL-NEXT:    ; implicit-def: $vgpr6
+; GISEL-NEXT:    ; implicit-def: $vgpr4
+; GISEL-NEXT:    ; implicit-def: $vgpr7
 ; GISEL-NEXT:  .LBB7_4: ; %Flow
-; GISEL-NEXT:    s_andn2_saveexec_b64 s[6:7], s[16:17]
-; GISEL-NEXT:    s_cbranch_execz .LBB7_6
+; GISEL-NEXT:    s_andn2_saveexec_b64 s[4:5], s[16:17]
 ; GISEL-NEXT:  ; %bb.5: ; %fp-to-i-if-exp.small
-; GISEL-NEXT:    v_sub_co_u32_e32 v2, vcc, 0x86, v5
-; GISEL-NEXT:    v_lshrrev_b64 v[0:1], v2, v[6:7]
-; GISEL-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v2
-; GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
-; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v2
-; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
-; GISEL-NEXT:    v_mul_hi_i32_i24_e32 v1, v0, v8
+; GISEL-NEXT:    v_mov_b32_e32 v0, 0x86
+; GISEL-NEXT:    v_sub_u16_sdwa v0, v0, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
+; GISEL-NEXT:    v_lshrrev_b16_e32 v0, v0, v4
+; GISEL-NEXT:    v_mul_hi_i32_i24_e32 v1, v0, v7
 ; GISEL-NEXT:    v_ashrrev_i32_e32 v2, 31, v1
-; GISEL-NEXT:    v_mul_i32_i24_e32 v0, v0, v8
+; GISEL-NEXT:    v_mul_i32_i24_e32 v0, v0, v7
 ; GISEL-NEXT:    v_mov_b32_e32 v3, v2
-; GISEL-NEXT:  .LBB7_6: ; %Flow1
-; GISEL-NEXT:    s_or_b64 exec, exec, s[6:7]
+; GISEL-NEXT:  ; %bb.6: ; %Flow1
+; GISEL-NEXT:    s_or_b64 exec, exec, s[4:5]
 ; GISEL-NEXT:  .LBB7_7: ; %Flow2
-; GISEL-NEXT:    s_andn2_saveexec_b64 s[6:7], s[14:15]
+; GISEL-NEXT:    s_andn2_saveexec_b64 s[4:5], s[14:15]
 ; GISEL-NEXT:    s_cbranch_execz .LBB7_9
 ; GISEL-NEXT:  ; %bb.8: ; %fp-to-i-if-saturate
-; GISEL-NEXT:    v_cndmask_b32_e64 v1, 0, -1, s[4:5]
+; GISEL-NEXT:    v_cndmask_b32_e64 v1, 0, -1, vcc
 ; GISEL-NEXT:    v_and_b32_e32 v1, 1, v1
-; GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[4:5]
+; GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
 ; GISEL-NEXT:    v_lshlrev_b32_e32 v2, 1, v1
 ; GISEL-NEXT:    v_or_b32_e32 v0, v0, v2
 ; GISEL-NEXT:    v_lshlrev_b32_e32 v3, 2, v1
@@ -2166,7 +1992,7 @@ define i128 @fptoui_bf16_to_i128(bfloat %x) {
 ; GISEL-NEXT:    v_add_u32_e32 v3, 0x80000000, v1
 ; GISEL-NEXT:    v_mov_b32_e32 v2, v1
 ; GISEL-NEXT:  .LBB7_9: ; %Flow3
-; GISEL-NEXT:    s_or_b64 exec, exec, s[6:7]
+; GISEL-NEXT:    s_or_b64 exec, exec, s[4:5]
 ; GISEL-NEXT:  .LBB7_10: ; %fp-to-i-cleanup
 ; GISEL-NEXT:    s_or_b64 exec, exec, s[12:13]
 ; GISEL-NEXT:    s_setpc_b64 s[30:31]
diff --git a/llvm/test/Transforms/ExpandIRInsts/X86/expand-fp-convert-small.ll b/llvm/test/Transforms/ExpandIRInsts/X86/expand-fp-convert-small.ll
index bca37df905303..467bf76e7320b 100644
--- a/llvm/test/Transforms/ExpandIRInsts/X86/expand-fp-convert-small.ll
+++ b/llvm/test/Transforms/ExpandIRInsts/X86/expand-fp-convert-small.ll
@@ -49,32 +49,34 @@ define i24 @fptosi_f16_i24(half %x) {
 ; CHECK-SAME: half [[X:%.*]]) {
 ; CHECK-NEXT:  [[FP_TO_I_ENTRY:.*]]:
 ; CHECK-NEXT:    [[TMP0:%.*]] = bitcast half [[X]] to i16
-; CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[TMP0]] to i24
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp sgt i16 [[TMP0]], -1
 ; CHECK-NEXT:    [[SIGN:%.*]] = select i1 [[TMP2]], i24 1, i24 -1
-; CHECK-NEXT:    [[TMP3:%.*]] = lshr i24 [[TMP1]], 10
-; CHECK-NEXT:    [[BIASED_EXP:%.*]] = and i24 [[TMP3]], 31
-; CHECK-NEXT:    [[TMP4:%.*]] = and i24 [[TMP1]], 1023
-; CHECK-NEXT:    [[SIGNIFICAND:%.*]] = or i24 [[TMP4]], 1024
-; CHECK-NEXT:    [[EXP_IS_NEGATIVE:%.*]] = icmp ult i24 [[BIASED_EXP]], 15
+; CHECK-NEXT:    [[TMP5:%.*]] = lshr i16 [[TMP0]], 10
+; CHECK-NEXT:    [[BIASED_EXP:%.*]] = and i16 [[TMP5]], 31
+; CHECK-NEXT:    [[TMP3:%.*]] = and i16 [[TMP0]], 1023
+; CHECK-NEXT:    [[SIGNIFICAND1:%.*]] = or i16 [[TMP3]], 1024
+; CHECK-NEXT:    [[EXP_IS_NEGATIVE:%.*]] = icmp ult i16 [[BIASED_EXP]], 15
 ; CHECK-NEXT:    br i1 [[EXP_IS_NEGATIVE]], label %[[FP_TO_I_CLEANUP:.*]], label %[[FP_TO_I_IF_CHECK_SATURATE:.*]]
 ; CHECK:       [[FP_TO_I_IF_CHECK_SATURATE]]:
-; CHECK-NEXT:    [[TMP5:%.*]] = add i24 [[BIASED_EXP]], -39
-; CHECK-NEXT:    [[TMP6:%.*]] = icmp ult i24 [[TMP5]], -24
+; CHECK-NEXT:    [[TMP4:%.*]] = add i16 [[BIASED_EXP]], -39
+; CHECK-NEXT:    [[TMP6:%.*]] = icmp ult i16 [[TMP4]], -24
 ; CHECK-NEXT:    br i1 [[TMP6]], label %[[FP_TO_I_IF_SATURATE:.*]], label %[[FP_TO_I_IF_CHECK_EXP_SIZE:.*]]
 ; CHECK:       [[FP_TO_I_IF_SATURATE]]:
 ; CHECK-NEXT:    [[SATURATED:%.*]] = select i1 [[TMP2]], i24 8388607, i24 -8388608
 ; CHECK-NEXT:    br label %[[FP_TO_I_CLEANUP]]
 ; CHECK:       [[FP_TO_I_IF_CHECK_EXP_SIZE]]:
-; CHECK-NEXT:    [[EXP_SMALLER_MANTISSA_WIDTH:%.*]] = icmp ult i24 [[BIASED_EXP]], 25
+; CHECK-NEXT:    [[EXP_SMALLER_MANTISSA_WIDTH:%.*]] = icmp ult i16 [[BIASED_EXP]], 25
 ; CHECK-NEXT:    br i1 [[EXP_SMALLER_MANTISSA_WIDTH]], label %[[FP_TO_I_IF_EXP_SMALL:.*]], label %[[FP_TO_I_IF_EXP_LARGE:.*]]
 ; CHECK:       [[FP_TO_I_IF_EXP_SMALL]]:
-; CHECK-NEXT:    [[TMP7:%.*]] = sub i24 25, [[BIASED_EXP]]
-; CHECK-NEXT:    [[TMP8:%.*]] = lshr i24 [[SIGNIFICAND]], [[TMP7]]
+; CHECK-NEXT:    [[TMP14:%.*]] = sub i16 25, [[BIASED_EXP]]
+; CHECK-NEXT:    [[TMP7:%.*]] = lshr i16 [[SIGNIFICAND1]], [[TMP14]]
+; CHECK-NEXT:    [[TMP8:%.*]] = zext i16 [[TMP7]] to i24
 ; CHECK-NEXT:    [[TMP9:%.*]] = mul i24 [[TMP8]], [[SIGN]]
 ; CHECK-NEXT:    br label %[[FP_TO_I_CLEANUP]]
 ; CHECK:       [[FP_TO_I_IF_EXP_LARGE]]:
-; CHECK-NEXT:    [[TMP10:%.*]] = add i24 [[BIASED_EXP]], -25
+; CHECK-NEXT:    [[TMP15:%.*]] = add i16 [[BIASED_EXP]], -25
+; CHECK-NEXT:    [[SIGNIFICAND:%.*]] = zext i16 [[SIGNIFICAND1]] to i24
+; CHECK-NEXT:    [[TMP10:%.*]] = zext i16 [[TMP15]] to i24
 ; CHECK-NEXT:    [[TMP11:%.*]] = shl i24 [[SIGNIFICAND]], [[TMP10]]
 ; CHECK-NEXT:    [[TMP12:%.*]] = mul i24 [[TMP11]], [[SIGN]]
 ; CHECK-NEXT:    br label %[[FP_TO_I_CLEANUP]]
@@ -86,6 +88,50 @@ define i24 @fptosi_f16_i24(half %x) {
   ret i24 %res
 }
 
+define i8 @fptosi_f16_i8(half %x) {
+; CHECK-LABEL: define i8 @fptosi_f16_i8(
+; CHECK-SAME: half [[X:%.*]]) {
+; CHECK-NEXT:  [[FP_TO_I_ENTRY:.*]]:
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast half [[X]] to i16
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt i16 [[TMP0]], -1
+; CHECK-NEXT:    [[SIGN:%.*]] = select i1 [[TMP1]], i8 1, i8 -1
+; CHECK-NEXT:    [[TMP2:%.*]] = lshr i16 [[TMP0]], 10
+; CHECK-NEXT:    [[BIASED_EXP:%.*]] = and i16 [[TMP2]], 31
+; CHECK-NEXT:    [[TMP3:%.*]] = and i16 [[TMP0]], 1023
+; CHECK-NEXT:    [[SIGNIFICAND:%.*]] = or i16 [[TMP3]], 1024
+; CHECK-NEXT:    [[EXP_IS_NEGATIVE:%.*]] = icmp ult i16 [[BIASED_EXP]], 15
+; CHECK-NEXT:    br i1 [[EXP_IS_NEGATIVE]], label %[[FP_TO_I_CLEANUP:.*]], label %[[FP_TO_I_IF_CHECK_SATURATE:.*]]
+; CHECK:       [[FP_TO_I_IF_CHECK_SATURATE]]:
+; CHECK-NEXT:    [[TMP4:%.*]] = add i16 [[BIASED_EXP]], -23
+; CHECK-NEXT:    [[TMP5:%.*]] = icmp ult i16 [[TMP4]], -8
+; CHECK-NEXT:    br i1 [[TMP5]], label %[[FP_TO_I_IF_SATURATE:.*]], label %[[FP_TO_I_IF_CHECK_EXP_SIZE:.*]]
+; CHECK:       [[FP_TO_I_IF_SATURATE]]:
+; CHECK-NEXT:    [[SATURATED:%.*]] = select i1 [[TMP1]], i8 127, i8 -128
+; CHECK-NEXT:    br label %[[FP_TO_I_CLEANUP]]
+; CHECK:       [[FP_TO_I_IF_CHECK_EXP_SIZE]]:
+; CHECK-NEXT:    [[EXP_SMALLER_MANTISSA_WIDTH:%.*]] = icmp ult i16 [[BIASED_EXP]], 25
+; CHECK-NEXT:    br i1 [[EXP_SMALLER_MANTISSA_WIDTH]], label %[[FP_TO_I_IF_EXP_SMALL:.*]], label %[[FP_TO_I_IF_EXP_LARGE:.*]]
+; CHECK:       [[FP_TO_I_IF_EXP_SMALL]]:
+; CHECK-NEXT:    [[TMP6:%.*]] = sub i16 25, [[BIASED_EXP]]
+; CHECK-NEXT:    [[TMP7:%.*]] = lshr i16 [[SIGNIFICAND]], [[TMP6]]
+; CHECK-NEXT:    [[TMP8:%.*]] = trunc i16 [[TMP7]] to i8
+; CHECK-NEXT:    [[TMP9:%.*]] = mul i8 [[TMP8]], [[SIGN]]
+; CHECK-NEXT:    br label %[[FP_TO_I_CLEANUP]]
+; CHECK:       [[FP_TO_I_IF_EXP_LARGE]]:
+; CHECK-NEXT:    [[TMP10:%.*]] = add i16 [[BIASED_EXP]], -25
+; CHECK-NEXT:    [[TMP11:%.*]] = trunc i16 [[SIGNIFICAND]] to i8
+; CHECK-NEXT:    [[TMP12:%.*]] = trunc i16 [[TMP10]] to i8
+; CHECK-NEXT:    [[TMP13:%.*]] = shl i8 [[TMP11]], [[TMP12]]
+; CHECK-NEXT:    [[TMP14:%.*]] = mul i8 [[TMP13]], [[SIGN]]
+; CHECK-NEXT:    br label %[[FP_TO_I_CLEANUP]]
+; CHECK:       [[FP_TO_I_CLEANUP]]:
+; CHECK-NEXT:    [[TMP15:%.*]] = phi i8 [ [[SATURATED]], %[[FP_TO_I_IF_SATURATE]] ], [ [[TMP9]], %[[FP_TO_I_IF_EXP_SMALL]] ], [ [[TMP14]], %[[FP_TO_I_IF_EXP_LARGE]] ], [ 0, %[[FP_TO_I_ENTRY]] ]
+; CHECK-NEXT:    ret i8 [[TMP15]]
+;
+  %res = fptosi half %x to i8
+  ret i8 %res
+}
+
 define i16 @fptoui_f16_i16(half %x) {
 ; CHECK-LABEL: define i16 @fptoui_f16_i16(
 ; CHECK-SAME: half [[X:%.*]]) {
@@ -132,32 +178,34 @@ define i24 @fptoui_f16_i24(half %x) {
 ; CHECK-SAME: half [[X:%.*]]) {
 ; CHECK-NEXT:  [[FP_TO_I_ENTRY:.*]]:
 ; CHECK-NEXT:    [[TMP0:%.*]] = bitcast half [[X]] to i16
-; CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[TMP0]] to i24
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp sgt i16 [[TMP0]], -1
 ; CHECK-NEXT:    [[SIGN:%.*]] = select i1 [[TMP2]], i24 1, i24 -1
-; CHECK-NEXT:    [[TMP3:%.*]] = lshr i24 [[TMP1]], 10
-; CHECK-NEXT:    [[BIASED_EXP:%.*]] = and i24 [[TMP3]], 31
-; CHECK-NEXT:    [[TMP4:%.*]] = and i24 [[TMP1]], 1023
-; CHECK-NEXT:    [[SIGNIFICAND:%.*]] = or i24 [[TMP4]], 1024
-; CHECK-NEXT:    [[EXP_IS_NEGATIVE:%.*]] = icmp ult i24 [[BIASED_EXP]], 15
+; CHECK-NEXT:    [[TMP5:%.*]] = lshr i16 [[TMP0]], 10
+; CHECK-NEXT:    [[BIASED_EXP:%.*]] = and i16 [[TMP5]], 31
+; CHECK-NEXT:    [[TMP3:%.*]] = and i16 [[TMP0]], 1023
+; CHECK-NEXT:    [[SIGNIFICAND1:%.*]] = or i16 [[TMP3]], 1024
+; CHECK-NEXT:    [[EXP_IS_NEGATIVE:%.*]] = icmp ult i16 [[BIASED_EXP]], 15
 ; CHECK-NEXT:    br i1 [[EXP_IS_NEGATIVE]], label %[[FP_TO_I_CLEANUP:.*]], label %[[FP_TO_I_IF_CHECK_SATURATE:.*]]
 ; CHECK:       [[FP_TO_I_IF_CHECK_SATURATE]]:
-; CHECK-NEXT:    [[TMP5:%.*]] = add i24 [[BIASED_EXP]], -39
-; CHECK-NEXT:    [[TMP6:%.*]] = icmp ult i24 [[TMP5]], -24
+; CHECK-NEXT:    [[TMP4:%.*]] = add i16 [[BIASED_EXP]], -39
+; CHECK-NEXT:    [[TMP6:%.*]] = icmp ult i16 [[TMP4]], -24
 ; CHECK-NEXT:    br i1 [[TMP6]], label %[[FP_TO_I_IF_SATURATE:.*]], label %[[FP_TO_I_IF_CHECK_EXP_SIZE:.*]]
 ; CHECK:       [[FP_TO_I_IF_SATURATE]]:
 ; CHECK-NEXT:    [[SATURATED:%.*]] = select i1 [[TMP2]], i24 8388607, i24 -8388608
 ; CHECK-NEXT:    br label %[[FP_TO_I_CLEANUP]]
 ; CHECK:       [[FP_TO_I_IF_CHECK_EXP_SIZE]]:
-; CHECK-NEXT:    [[EXP_SMALLER_MANTISSA_WIDTH:%.*]] = icmp ult i24 [[BIASED_EXP]], 25
+; CHECK-NEXT:    [[EXP_SMALLER_MANTISSA_WIDTH:%.*]] = icmp ult i16 [[BIASED_EXP]], 25
 ; CHECK-NEXT:    br i1 [[EXP_SMALLER_MANTISSA_WIDTH]], label %[[FP_TO_I_IF_EXP_SMALL:.*]], label %[[FP_TO_I_IF_EXP_LARGE:.*]]
 ; CHECK:       [[FP_TO_I_IF_EXP_SMALL]]:
-; CHECK-NEXT:    [[TMP7:%.*]] = sub i24 25, [[BIASED_EXP]]
-; CHECK-NEXT:    [[TMP8:%.*]] = lshr i24 [[SIGNIFICAND]], [[TMP7]]
+; CHECK-NEXT:    [[TMP14:%.*]] = sub i16 25, [[BIASED_EXP]]
+; CHECK-NEXT:    [[TMP7:%.*]] = lshr i16 [[SIGNIFICAND1]], [[TMP14]]
+; CHECK-NEXT:    [[TMP8:%.*]] = zext i16 [[TMP7]] to i24
 ; CHECK-NEXT:    [[TMP9:%.*]] = mul i24 [[TMP8]], [[SIGN]]
 ; CHECK-NEXT:    br label %[[FP_TO_I_CLEANUP]]
 ; CHECK:       [[FP_TO_I_IF_EXP_LARGE]]:
-; CHECK-NEXT:    [[TMP10:%.*]] = add i24 [[BIASED_EXP]], -25
+; CHECK-NEXT:    [[TMP15:%.*]] = add i16 [[BIASED_EXP]], -25
+; CHECK-NEXT:    [[SIGNIFICAND:%.*]] = zext i16 [[SIGNIFICAND1]] to i24
+; CHECK-NEXT:    [[TMP10:%.*]] = zext i16 [[TMP15]] to i24
 ; CHECK-NEXT:    [[TMP11:%.*]] = shl i24 [[SIGNIFICAND]], [[TMP10]]
 ; CHECK-NEXT:    [[TMP12:%.*]] = mul i24 [[TMP11]], [[SIGN]]
 ; CHECK-NEXT:    br label %[[FP_TO_I_CLEANUP]]
@@ -168,3 +216,47 @@ define i24 @fptoui_f16_i24(half %x) {
   %res = fptoui half %x to i24
   ret i24 %res
 }
+
+define i8 @fptoui_f16_i8(half %x) {
+; CHECK-LABEL: define i8 @fptoui_f16_i8(
+; CHECK-SAME: half [[X:%.*]]) {
+; CHECK-NEXT:  [[FP_TO_I_ENTRY:.*]]:
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast half [[X]] to i16
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt i16 [[TMP0]], -1
+; CHECK-NEXT:    [[SIGN:%.*]] = select i1 [[TMP1]], i8 1, i8 -1
+; CHECK-NEXT:    [[TMP2:%.*]] = lshr i16 [[TMP0]], 10
+; CHECK-NEXT:    [[BIASED_EXP:%.*]] = and i16 [[TMP2]], 31
+; CHECK-NEXT:    [[TMP3:%.*]] = and i16 [[TMP0]], 1023
+; CHECK-NEXT:    [[SIGNIFICAND:%.*]] = or i16 [[TMP3]], 1024
+; CHECK-NEXT:    [[EXP_IS_NEGATIVE:%.*]] = icmp ult i16 [[BIASED_EXP]], 15
+; CHECK-NEXT:    br i1 [[EXP_IS_NEGATIVE]], label %[[FP_TO_I_CLEANUP:.*]], label %[[FP_TO_I_IF_CHECK_SATURATE:.*]]
+; CHECK:       [[FP_TO_I_IF_CHECK_SATURATE]]:
+; CHECK-NEXT:    [[TMP4:%.*]] = add i16 [[BIASED_EXP]], -23
+; CHECK-NEXT:    [[TMP5:%.*]] = icmp ult i16 [[TMP4]], -8
+; CHECK-NEXT:    br i1 [[TMP5]], label %[[FP_TO_I_IF_SATURATE:.*]], label %[[FP_TO_I_IF_CHECK_EXP_SIZE:.*]]
+; CHECK:       [[FP_TO_I_IF_SATURATE]]:
+; CHECK-NEXT:    [[SATURATED:%.*]] = select i1 [[TMP1]], i8 127, i8 -128
+; CHECK-NEXT:    br label %[[FP_TO_I_CLEANUP]]
+; CHECK:       [[FP_TO_I_IF_CHECK_EXP_SIZE]]:
+; CHECK-NEXT:    [[EXP_SMALLER_MANTISSA_WIDTH:%.*]] = icmp ult i16 [[BIASED_EXP]], 25
+; CHECK-NEXT:    br i1 [[EXP_SMALLER_MANTISSA_WIDTH]], label %[[FP_TO_I_IF_EXP_SMALL:.*]], label %[[FP_TO_I_IF_EXP_LARGE:.*]]
+; CHECK:       [[FP_TO_I_IF_EXP_SMALL]]:
+; CHECK-NEXT:    [[TMP6:%.*]] = sub i16 25, [[BIASED_EXP]]
+; CHECK-NEXT:    [[TMP7:%.*]] = lshr i16 [[SIGNIFICAND]], [[TMP6]]
+; CHECK-NEXT:    [[TMP8:%.*]] = trunc i16 [[TMP7]] to i8
+; CHECK-NEXT:    [[TMP9:%.*]] = mul i8 [[TMP8]], [[SIGN]]
+; CHECK-NEXT:    br label %[[FP_TO_I_CLEANUP]]
+; CHECK:       [[FP_TO_I_IF_EXP_LARGE]]:
+; CHECK-NEXT:    [[TMP10:%.*]] = add i16 [[BIASED_EXP]], -25
+; CHECK-NEXT:    [[TMP11:%.*]] = trunc i16 [[SIGNIFICAND]] to i8
+; CHECK-NEXT:    [[TMP12:%.*]] = trunc i16 [[TMP10]] to i8
+; CHECK-NEXT:    [[TMP13:%.*]] = shl i8 [[TMP11]], [[TMP12]]
+; CHECK-NEXT:    [[TMP14:%.*]] = mul i8 [[TMP13]], [[SIGN]]
+; CHECK-NEXT:    br label %[[FP_TO_I_CLEANUP]]
+; CHECK:       [[FP_TO_I_CLEANUP]]:
+; CHECK-NEXT:    [[TMP15:%.*]] = phi i8 [ [[SATURATED]], %[[FP_TO_I_IF_SATURATE]] ], [ [[TMP9]], %[[FP_TO_I_IF_EXP_SMALL]] ], [ [[TMP14]], %[[FP_TO_I_IF_EXP_LARGE]] ], [ 0, %[[FP_TO_I_ENTRY]] ]
+; CHECK-NEXT:    ret i8 [[TMP15]]
+;
+  %res = fptoui half %x to i8
+  ret i8 %res
+}
diff --git a/llvm/test/Transforms/ExpandIRInsts/X86/expand-large-fp-convert-fptosi129.ll b/llvm/test/Transforms/ExpandIRInsts/X86/expand-large-fp-convert-fptosi129.ll
index 3dd56820637b6..24c1476fdb512 100644
--- a/llvm/test/Transforms/ExpandIRInsts/X86/expand-large-fp-convert-fptosi129.ll
+++ b/llvm/test/Transforms/ExpandIRInsts/X86/expand-large-fp-convert-fptosi129.ll
@@ -16,32 +16,34 @@ define i129 @floattosi129(float %a) {
 ; CHECK-LABEL: @floattosi129(
 ; CHECK-NEXT:  fp-to-i-entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = bitcast float [[A:%.*]] to i32
-; CHECK-NEXT:    [[TMP1:%.*]] = zext i32 [[TMP0]] to i129
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp sgt i32 [[TMP0]], -1
 ; CHECK-NEXT:    [[SIGN:%.*]] = select i1 [[TMP2]], i129 1, i129 -1
-; CHECK-NEXT:    [[TMP3:%.*]] = lshr i129 [[TMP1]], 23
-; CHECK-NEXT:    [[BIASED_EXP:%.*]] = and i129 [[TMP3]], 255
-; CHECK-NEXT:    [[TMP4:%.*]] = and i129 [[TMP1]], 8388607
-; CHECK-NEXT:    [[SIGNIFICAND:%.*]] = or i129 [[TMP4]], 8388608
-; CHECK-NEXT:    [[EXP_IS_NEGATIVE:%.*]] = icmp ult i129 [[BIASED_EXP]], 127
+; CHECK-NEXT:    [[TMP5:%.*]] = lshr i32 [[TMP0]], 23
+; CHECK-NEXT:    [[BIASED_EXP:%.*]] = and i32 [[TMP5]], 255
+; CHECK-NEXT:    [[TMP3:%.*]] = and i32 [[TMP0]], 8388607
+; CHECK-NEXT:    [[SIGNIFICAND1:%.*]] = or i32 [[TMP3]], 8388608
+; CHECK-NEXT:    [[EXP_IS_NEGATIVE:%.*]] = icmp ult i32 [[BIASED_EXP]], 127
 ; CHECK-NEXT:    br i1 [[EXP_IS_NEGATIVE]], label [[FP_TO_I_CLEANUP:%.*]], label [[FP_TO_I_IF_CHECK_SATURATE:%.*]]
 ; CHECK:       fp-to-i-if-check.saturate:
-; CHECK-NEXT:    [[TMP5:%.*]] = add i129 [[BIASED_EXP]], -256
-; CHECK-NEXT:    [[TMP6:%.*]] = icmp ult i129 [[TMP5]], -129
+; CHECK-NEXT:    [[TMP4:%.*]] = add i32 [[BIASED_EXP]], -256
+; CHECK-NEXT:    [[TMP6:%.*]] = icmp ult i32 [[TMP4]], -129
 ; CHECK-NEXT:    br i1 [[TMP6]], label [[FP_TO_I_IF_SATURATE:%.*]], label [[FP_TO_I_IF_CHECK_EXP_SIZE:%.*]]
 ; CHECK:       fp-to-i-if-saturate:
 ; CHECK-NEXT:    [[SATURATED:%.*]] = select i1 [[TMP2]], i129 340282366920938463463374607431768211455, i129 -340282366920938463463374607431768211456
 ; CHECK-NEXT:    br label [[FP_TO_I_CLEANUP]]
 ; CHECK:       fp-to-i-if-check.exp.size:
-; CHECK-NEXT:    [[EXP_SMALLER_MANTISSA_WIDTH:%.*]] = icmp ult i129 [[BIASED_EXP]], 150
+; CHECK-NEXT:    [[EXP_SMALLER_MANTISSA_WIDTH:%.*]] = icmp ult i32 [[BIASED_EXP]], 150
 ; CHECK-NEXT:    br i1 [[EXP_SMALLER_MANTISSA_WIDTH]], label [[FP_TO_I_IF_EXP_SMALL:%.*]], label [[FP_TO_I_IF_EXP_LARGE:%.*]]
 ; CHECK:       fp-to-i-if-exp.small:
-; CHECK-NEXT:    [[TMP7:%.*]] = sub i129 150, [[BIASED_EXP]]
-; CHECK-NEXT:    [[TMP8:%.*]] = lshr i129 [[SIGNIFICAND]], [[TMP7]]
+; CHECK-NEXT:    [[TMP14:%.*]] = sub i32 150, [[BIASED_EXP]]
+; CHECK-NEXT:    [[TMP7:%.*]] = lshr i32 [[SIGNIFICAND1]], [[TMP14]]
+; CHECK-NEXT:    [[TMP8:%.*]] = zext i32 [[TMP7]] to i129
 ; CHECK-NEXT:    [[TMP9:%.*]] = mul i129 [[TMP8]], [[SIGN]]
 ; CHECK-NEXT:    br label [[FP_TO_I_CLEANUP]]
 ; CHECK:       fp-to-i-if-exp.large:
-; CHECK-NEXT:    [[TMP10:%.*]] = add i129 [[BIASED_EXP]], -150
+; CHECK-NEXT:    [[TMP15:%.*]] = add i32 [[BIASED_EXP]], -150
+; CHECK-NEXT:    [[SIGNIFICAND:%.*]] = zext i32 [[SIGNIFICAND1]] to i129
+; CHECK-NEXT:    [[TMP10:%.*]] = zext i32 [[TMP15]] to i129
 ; CHECK-NEXT:    [[TMP11:%.*]] = shl i129 [[SIGNIFICAND]], [[TMP10]]
 ; CHECK-NEXT:    [[TMP12:%.*]] = mul i129 [[TMP11]], [[SIGN]]
 ; CHECK-NEXT:    br label [[FP_TO_I_CLEANUP]]
@@ -57,32 +59,34 @@ define i129 @doubletosi129(double %a) {
 ; CHECK-LABEL: @doubletosi129(
 ; CHECK-NEXT:  fp-to-i-entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = bitcast double [[A:%.*]] to i64
-; CHECK-NEXT:    [[TMP1:%.*]] = zext i64 [[TMP0]] to i129
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp sgt i64 [[TMP0]], -1
 ; CHECK-NEXT:    [[SIGN:%.*]] = select i1 [[TMP2]], i129 1, i129 -1
-; CHECK-NEXT:    [[TMP3:%.*]] = lshr i129 [[TMP1]], 52
-; CHECK-NEXT:    [[BIASED_EXP:%.*]] = and i129 [[TMP3]], 2047
-; CHECK-NEXT:    [[TMP4:%.*]] = and i129 [[TMP1]], 4503599627370495
-; CHECK-NEXT:    [[SIGNIFICAND:%.*]] = or i129 [[TMP4]], 4503599627370496
-; CHECK-NEXT:    [[EXP_IS_NEGATIVE:%.*]] = icmp ult i129 [[BIASED_EXP]], 1023
+; CHECK-NEXT:    [[TMP5:%.*]] = lshr i64 [[TMP0]], 52
+; CHECK-NEXT:    [[BIASED_EXP:%.*]] = and i64 [[TMP5]], 2047
+; CHECK-NEXT:    [[TMP3:%.*]] = and i64 [[TMP0]], 4503599627370495
+; CHECK-NEXT:    [[SIGNIFICAND1:%.*]] = or i64 [[TMP3]], 4503599627370496
+; CHECK-NEXT:    [[EXP_IS_NEGATIVE:%.*]] = icmp ult i64 [[BIASED_EXP]], 1023
 ; CHECK-NEXT:    br i1 [[EXP_IS_NEGATIVE]], label [[FP_TO_I_CLEANUP:%.*]], label [[FP_TO_I_IF_CHECK_SATURATE:%.*]]
 ; CHECK:       fp-to-i-if-check.saturate:
-; CHECK-NEXT:    [[TMP5:%.*]] = add i129 [[BIASED_EXP]], -1152
-; CHECK-NEXT:    [[TMP6:%.*]] = icmp ult i129 [[TMP5]], -129
+; CHECK-NEXT:    [[TMP4:%.*]] = add i64 [[BIASED_EXP]], -1152
+; CHECK-NEXT:    [[TMP6:%.*]] = icmp ult i64 [[TMP4]], -129
 ; CHECK-NEXT:    br i1 [[TMP6]], label [[FP_TO_I_IF_SATURATE:%.*]], label [[FP_TO_I_IF_CHECK_EXP_SIZE:%.*]]
 ; CHECK:       fp-to-i-if-saturate:
 ; CHECK-NEXT:    [[SATURATED:%.*]] = select i1 [[TMP2]], i129 340282366920938463463374607431768211455, i129 -340282366920938463463374607431768211456
 ; CHECK-NEXT:    br label [[FP_TO_I_CLEANUP]]
 ; CHECK:       fp-to-i-if-check.exp.size:
-; CHECK-NEXT:    [[EXP_SMALLER_MANTISSA_WIDTH:%.*]] = icmp ult i129 [[BIASED_EXP]], 1075
+; CHECK-NEXT:    [[EXP_SMALLER_MANTISSA_WIDTH:%.*]] = icmp ult i64 [[BIASED_EXP]], 1075
 ; CHECK-NEXT:    br i1 [[EXP_SMALLER_MANTISSA_WIDTH]], label [[FP_TO_I_IF_EXP_SMALL:%.*]], label [[FP_TO_I_IF_EXP_LARGE:%.*]]
 ; CHECK:       fp-to-i-if-exp.small:
-; CHECK-NEXT:    [[TMP7:%.*]] = sub i129 1075, [[BIASED_EXP]]
-; CHECK-NEXT:    [[TMP8:%.*]] = lshr i129 [[SIGNIFICAND]], [[TMP7]]
+; CHECK-NEXT:    [[TMP14:%.*]] = sub i64 1075, [[BIASED_EXP]]
+; CHECK-NEXT:    [[TMP7:%.*]] = lshr i64 [[SIGNIFICAND1]], [[TMP14]]
+; CHECK-NEXT:    [[TMP8:%.*]] = zext i64 [[TMP7]] to i129
 ; CHECK-NEXT:    [[TMP9:%.*]] = mul i129 [[TMP8]], [[SIGN]]
 ; CHECK-NEXT:    br label [[FP_TO_I_CLEANUP]]
 ; CHECK:       fp-to-i-if-exp.large:
-; CHECK-NEXT:    [[TMP10:%.*]] = add i129 [[BIASED_EXP]], -1075
+; CHECK-NEXT:    [[TMP15:%.*]] = add i64 [[BIASED_EXP]], -1075
+; CHECK-NEXT:    [[SIGNIFICAND:%.*]] = zext i64 [[SIGNIFICAND1]] to i129
+; CHECK-NEXT:    [[TMP10:%.*]] = zext i64 [[TMP15]] to i129
 ; CHECK-NEXT:    [[TMP11:%.*]] = shl i129 [[SIGNIFICAND]], [[TMP10]]
 ; CHECK-NEXT:    [[TMP12:%.*]] = mul i129 [[TMP11]], [[SIGN]]
 ; CHECK-NEXT:    br label [[FP_TO_I_CLEANUP]]
@@ -99,32 +103,34 @@ define i129 @x86_fp80tosi129(x86_fp80 %a) {
 ; CHECK-NEXT:  fp-to-i-entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = fpext x86_fp80 [[A:%.*]] to fp128
 ; CHECK-NEXT:    [[TMP1:%.*]] = bitcast fp128 [[TMP0]] to i128
-; CHECK-NEXT:    [[TMP2:%.*]] = zext i128 [[TMP1]] to i129
 ; CHECK-NEXT:    [[TMP3:%.*]] = icmp sgt i128 [[TMP1]], -1
 ; CHECK-NEXT:    [[SIGN:%.*]] = select i1 [[TMP3]], i129 1, i129 -1
-; CHECK-NEXT:    [[TMP4:%.*]] = lshr i129 [[TMP2]], 112
-; CHECK-NEXT:    [[BIASED_EXP:%.*]] = and i129 [[TMP4]], 32767
-; CHECK-NEXT:    [[TMP5:%.*]] = and i129 [[TMP2]], 5192296858534827628530496329220095
-; CHECK-NEXT:    [[SIGNIFICAND:%.*]] = or i129 [[TMP5]], 5192296858534827628530496329220096
-; CHECK-NEXT:    [[EXP_IS_NEGATIVE:%.*]] = icmp ult i129 [[BIASED_EXP]], 16383
+; CHECK-NEXT:    [[TMP6:%.*]] = lshr i128 [[TMP1]], 112
+; CHECK-NEXT:    [[BIASED_EXP:%.*]] = and i128 [[TMP6]], 32767
+; CHECK-NEXT:    [[TMP4:%.*]] = and i128 [[TMP1]], 5192296858534827628530496329220095
+; CHECK-NEXT:    [[SIGNIFICAND1:%.*]] = or i128 [[TMP4]], 5192296858534827628530496329220096
+; CHECK-NEXT:    [[EXP_IS_NEGATIVE:%.*]] = icmp ult i128 [[BIASED_EXP]], 16383
 ; CHECK-NEXT:    br i1 [[EXP_IS_NEGATIVE]], label [[FP_TO_I_CLEANUP:%.*]], label [[FP_TO_I_IF_CHECK_SATURATE:%.*]]
 ; CHECK:       fp-to-i-if-check.saturate:
-; CHECK-NEXT:    [[TMP6:%.*]] = add i129 [[BIASED_EXP]], -16512
-; CHECK-NEXT:    [[TMP7:%.*]] = icmp ult i129 [[TMP6]], -129
+; CHECK-NEXT:    [[TMP5:%.*]] = add i128 [[BIASED_EXP]], -16512
+; CHECK-NEXT:    [[TMP7:%.*]] = icmp ult i128 [[TMP5]], -129
 ; CHECK-NEXT:    br i1 [[TMP7]], label [[FP_TO_I_IF_SATURATE:%.*]], label [[FP_TO_I_IF_CHECK_EXP_SIZE:%.*]]
 ; CHECK:       fp-to-i-if-saturate:
 ; CHECK-NEXT:    [[SATURATED:%.*]] = select i1 [[TMP3]], i129 340282366920938463463374607431768211455, i129 -340282366920938463463374607431768211456
 ; CHECK-NEXT:    br label [[FP_TO_I_CLEANUP]]
 ; CHECK:       fp-to-i-if-check.exp.size:
-; CHECK-NEXT:    [[EXP_SMALLER_MANTISSA_WIDTH:%.*]] = icmp ult i129 [[BIASED_EXP]], 16495
+; CHECK-NEXT:    [[EXP_SMALLER_MANTISSA_WIDTH:%.*]] = icmp ult i128 [[BIASED_EXP]], 16495
 ; CHECK-NEXT:    br i1 [[EXP_SMALLER_MANTISSA_WIDTH]], label [[FP_TO_I_IF_EXP_SMALL:%.*]], label [[FP_TO_I_IF_EXP_LARGE:%.*]]
 ; CHECK:       fp-to-i-if-exp.small:
-; CHECK-NEXT:    [[TMP8:%.*]] = sub i129 16495, [[BIASED_EXP]]
-; CHECK-NEXT:    [[TMP9:%.*]] = lshr i129 [[SIGNIFICAND]], [[TMP8]]
+; CHECK-NEXT:    [[TMP15:%.*]] = sub i128 16495, [[BIASED_EXP]]
+; CHECK-NEXT:    [[TMP8:%.*]] = lshr i128 [[SIGNIFICAND1]], [[TMP15]]
+; CHECK-NEXT:    [[TMP9:%.*]] = zext i128 [[TMP8]] to i129
 ; CHECK-NEXT:    [[TMP10:%.*]] = mul i129 [[TMP9]], [[SIGN]]
 ; CHECK-NEXT:    br label [[FP_TO_I_CLEANUP]]
 ; CHECK:       fp-to-i-if-exp.large:
-; CHECK-NEXT:    [[TMP11:%.*]] = add i129 [[BIASED_EXP]], -16495
+; CHECK-NEXT:    [[TMP16:%.*]] = add i128 [[BIASED_EXP]], -16495
+; CHECK-NEXT:    [[SIGNIFICAND:%.*]] = zext i128 [[SIGNIFICAND1]] to i129
+; CHECK-NEXT:    [[TMP11:%.*]] = zext i128 [[TMP16]] to i129
 ; CHECK-NEXT:    [[TMP12:%.*]] = shl i129 [[SIGNIFICAND]], [[TMP11]]
 ; CHECK-NEXT:    [[TMP13:%.*]] = mul i129 [[TMP12]], [[SIGN]]
 ; CHECK-NEXT:    br label [[FP_TO_I_CLEANUP]]
@@ -140,32 +146,34 @@ define i129 @fp128tosi129(fp128 %a) {
 ; CHECK-LABEL: @fp128tosi129(
 ; CHECK-NEXT:  fp-to-i-entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = bitcast fp128 [[A:%.*]] to i128
-; CHECK-NEXT:    [[TMP1:%.*]] = zext i128 [[TMP0]] to i129
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp sgt i128 [[TMP0]], -1
 ; CHECK-NEXT:    [[SIGN:%.*]] = select i1 [[TMP2]], i129 1, i129 -1
-; CHECK-NEXT:    [[TMP3:%.*]] = lshr i129 [[TMP1]], 112
-; CHECK-NEXT:    [[BIASED_EXP:%.*]] = and i129 [[TMP3]], 32767
-; CHECK-NEXT:    [[TMP4:%.*]] = and i129 [[TMP1]], 5192296858534827628530496329220095
-; CHECK-NEXT:    [[SIGNIFICAND:%.*]] = or i129 [[TMP4]], 5192296858534827628530496329220096
-; CHECK-NEXT:    [[EXP_IS_NEGATIVE:%.*]] = icmp ult i129 [[BIASED_EXP]], 16383
+; CHECK-NEXT:    [[TMP5:%.*]] = lshr i128 [[TMP0]], 112
+; CHECK-NEXT:    [[BIASED_EXP:%.*]] = and i128 [[TMP5]], 32767
+; CHECK-NEXT:    [[TMP3:%.*]] = and i128 [[TMP0]], 5192296858534827628530496329220095
+; CHECK-NEXT:    [[SIGNIFICAND1:%.*]] = or i128 [[TMP3]], 5192296858534827628530496329220096
+; CHECK-NEXT:    [[EXP_IS_NEGATIVE:%.*]] = icmp ult i128 [[BIASED_EXP]], 16383
 ; CHECK-NEXT:    br i1 [[EXP_IS_NEGATIVE]], label [[FP_TO_I_CLEANUP:%.*]], label [[FP_TO_I_IF_CHECK_SATURATE:%.*]]
 ; CHECK:       fp-to-i-if-check.saturate:
-; CHECK-NEXT:    [[TMP5:%.*]] = add i129 [[BIASED_EXP]], -16512
-; CHECK-NEXT:    [[TMP6:%.*]] = icmp ult i129 [[TMP5]], -129
+; CHECK-NEXT:    [[TMP4:%.*]] = add i128 [[BIASED_EXP]], -16512
+; CHECK-NEXT:    [[TMP6:%.*]] = icmp ult i128 [[TMP4]], -129
 ; CHECK-NEXT:    br i1 [[TMP6]], label [[FP_TO_I_IF_SATURATE:%.*]], label [[FP_TO_I_IF_CHECK_EXP_SIZE:%.*]]
 ; CHECK:       fp-to-i-if-saturate:
 ; CHECK-NEXT:    [[SATURATED:%.*]] = select i1 [[TMP2]], i129 340282366920938463463374607431768211455, i129 -340282366920938463463374607431768211456
 ; CHECK-NEXT:    br label [[FP_TO_I_CLEANUP]]
 ; CHECK:       fp-to-i-if-check.exp.size:
-; CHECK-NEXT:    [[EXP_SMALLER_MANTISSA_WIDTH:%.*]] = icmp ult i129 [[BIASED_EXP]], 16495
+; CHECK-NEXT:    [[EXP_SMALLER_MANTISSA_WIDTH:%.*]] = icmp ult i128 [[BIASED_EXP]], 16495
 ; CHECK-NEXT:    br i1 [[EXP_SMALLER_MANTISSA_WIDTH]], label [[FP_TO_I_IF_EXP_SMALL:%.*]], label [[FP_TO_I_IF_EXP_LARGE:%.*]]
 ; CHECK:       fp-to-i-if-exp.small:
-; CHECK-NEXT:    [[TMP7:%.*]] = sub i129 16495, [[BIASED_EXP]]
-; CHECK-NEXT:    [[TMP8:%.*]] = lshr i129 [[SIGNIFICAND]], [[TMP7]]
+; CHECK-NEXT:    [[TMP14:%.*]] = sub i128 16495, [[BIASED_EXP]]
+; CHECK-NEXT:    [[TMP7:%.*]] = lshr i128 [[SIGNIFICAND1]], [[TMP14]]
+; CHECK-NEXT:    [[TMP8:%.*]] = zext i128 [[TMP7]] to i129
 ; CHECK-NEXT:    [[TMP9:%.*]] = mul i129 [[TMP8]], [[SIGN]]
 ; CHECK-NEXT:    br label [[FP_TO_I_CLEANUP]]
 ; CHECK:       fp-to-i-if-exp.large:
-; CHECK-NEXT:    [[TMP10:%.*]] = add i129 [[BIASED_EXP]], -16495
+; CHECK-NEXT:    [[TMP15:%.*]] = add i128 [[BIASED_EXP]], -16495
+; CHECK-NEXT:    [[SIGNIFICAND:%.*]] = zext i128 [[SIGNIFICAND1]] to i129
+; CHECK-NEXT:    [[TMP10:%.*]] = zext i128 [[TMP15]] to i129
 ; CHECK-NEXT:    [[TMP11:%.*]] = shl i129 [[SIGNIFICAND]], [[TMP10]]
 ; CHECK-NEXT:    [[TMP12:%.*]] = mul i129 [[TMP11]], [[SIGN]]
 ; CHECK-NEXT:    br label [[FP_TO_I_CLEANUP]]
@@ -182,32 +190,34 @@ define <2 x i129> @floattosi129v2(<2 x float> %a) {
 ; CHECK-NEXT:  fp-to-i-entryfp-to-i-entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <2 x float> [[A:%.*]], i64 0
 ; CHECK-NEXT:    [[TMP1:%.*]] = bitcast float [[TMP0]] to i32
-; CHECK-NEXT:    [[TMP2:%.*]] = zext i32 [[TMP1]] to i129
 ; CHECK-NEXT:    [[TMP3:%.*]] = icmp sgt i32 [[TMP1]], -1
 ; CHECK-NEXT:    [[SIGN7:%.*]] = select i1 [[TMP3]], i129 1, i129 -1
-; CHECK-NEXT:    [[TMP4:%.*]] = lshr i129 [[TMP2]], 23
-; CHECK-NEXT:    [[BIASED_EXP8:%.*]] = and i129 [[TMP4]], 255
-; CHECK-NEXT:    [[TMP5:%.*]] = and i129 [[TMP2]], 8388607
-; CHECK-NEXT:    [[SIGNIFICAND9:%.*]] = or i129 [[TMP5]], 8388608
-; CHECK-NEXT:    [[EXP_IS_NEGATIVE10:%.*]] = icmp ult i129 [[BIASED_EXP8]], 127
+; CHECK-NEXT:    [[TMP6:%.*]] = lshr i32 [[TMP1]], 23
+; CHECK-NEXT:    [[BIASED_EXP8:%.*]] = and i32 [[TMP6]], 255
+; CHECK-NEXT:    [[TMP4:%.*]] = and i32 [[TMP1]], 8388607
+; CHECK-NEXT:    [[SIGNIFICAND10:%.*]] = or i32 [[TMP4]], 8388608
+; CHECK-NEXT:    [[EXP_IS_NEGATIVE10:%.*]] = icmp ult i32 [[BIASED_EXP8]], 127
 ; CHECK-NEXT:    br i1 [[EXP_IS_NEGATIVE10]], label [[FP_TO_I_CLEANUP1:%.*]], label [[FP_TO_I_IF_CHECK_SATURATE2:%.*]]
 ; CHECK:       fp-to-i-if-check.saturate2:
-; CHECK-NEXT:    [[TMP6:%.*]] = add i129 [[BIASED_EXP8]], -256
-; CHECK-NEXT:    [[TMP7:%.*]] = icmp ult i129 [[TMP6]], -129
+; CHECK-NEXT:    [[TMP5:%.*]] = add i32 [[BIASED_EXP8]], -256
+; CHECK-NEXT:    [[TMP7:%.*]] = icmp ult i32 [[TMP5]], -129
 ; CHECK-NEXT:    br i1 [[TMP7]], label [[FP_TO_I_IF_SATURATE3:%.*]], label [[FP_TO_I_IF_CHECK_EXP_SIZE4:%.*]]
 ; CHECK:       fp-to-i-if-saturate3:
 ; CHECK-NEXT:    [[SATURATED11:%.*]] = select i1 [[TMP3]], i129 340282366920938463463374607431768211455, i129 -340282366920938463463374607431768211456
 ; CHECK-NEXT:    br label [[FP_TO_I_CLEANUP1]]
 ; CHECK:       fp-to-i-if-check.exp.size4:
-; CHECK-NEXT:    [[EXP_SMALLER_MANTISSA_WIDTH12:%.*]] = icmp ult i129 [[BIASED_EXP8]], 150
+; CHECK-NEXT:    [[EXP_SMALLER_MANTISSA_WIDTH12:%.*]] = icmp ult i32 [[BIASED_EXP8]], 150
 ; CHECK-NEXT:    br i1 [[EXP_SMALLER_MANTISSA_WIDTH12]], label [[FP_TO_I_IF_EXP_SMALL5:%.*]], label [[FP_TO_I_IF_EXP_LARGE6:%.*]]
 ; CHECK:       fp-to-i-if-exp.small5:
-; CHECK-NEXT:    [[TMP8:%.*]] = sub i129 150, [[BIASED_EXP8]]
-; CHECK-NEXT:    [[TMP9:%.*]] = lshr i129 [[SIGNIFICAND9]], [[TMP8]]
+; CHECK-NEXT:    [[TMP18:%.*]] = sub i32 150, [[BIASED_EXP8]]
+; CHECK-NEXT:    [[TMP8:%.*]] = lshr i32 [[SIGNIFICAND10]], [[TMP18]]
+; CHECK-NEXT:    [[TMP9:%.*]] = zext i32 [[TMP8]] to i129
 ; CHECK-NEXT:    [[TMP10:%.*]] = mul i129 [[TMP9]], [[SIGN7]]
 ; CHECK-NEXT:    br label [[FP_TO_I_CLEANUP1]]
 ; CHECK:       fp-to-i-if-exp.large6:
-; CHECK-NEXT:    [[TMP11:%.*]] = add i129 [[BIASED_EXP8]], -150
+; CHECK-NEXT:    [[TMP20:%.*]] = add i32 [[BIASED_EXP8]], -150
+; CHECK-NEXT:    [[SIGNIFICAND9:%.*]] = zext i32 [[SIGNIFICAND10]] to i129
+; CHECK-NEXT:    [[TMP11:%.*]] = zext i32 [[TMP20]] to i129
 ; CHECK-NEXT:    [[TMP12:%.*]] = shl i129 [[SIGNIFICAND9]], [[TMP11]]
 ; CHECK-NEXT:    [[TMP13:%.*]] = mul i129 [[TMP12]], [[SIGN7]]
 ; CHECK-NEXT:    br label [[FP_TO_I_CLEANUP1]]
@@ -216,32 +226,34 @@ define <2 x i129> @floattosi129v2(<2 x float> %a) {
 ; CHECK-NEXT:    [[TMP15:%.*]] = insertelement <2 x i129> poison, i129 [[TMP14]], i64 0
 ; CHECK-NEXT:    [[TMP16:%.*]] = extractelement <2 x float> [[A]], i64 1
 ; CHECK-NEXT:    [[TMP17:%.*]] = bitcast float [[TMP16]] to i32
-; CHECK-NEXT:    [[TMP18:%.*]] = zext i32 [[TMP17]] to i129
 ; CHECK-NEXT:    [[TMP19:%.*]] = icmp sgt i32 [[TMP17]], -1
 ; CHECK-NEXT:    [[SIGN:%.*]] = select i1 [[TMP19]], i129 1, i129 -1
-; CHECK-NEXT:    [[TMP20:%.*]] = lshr i129 [[TMP18]], 23
-; CHECK-NEXT:    [[BIASED_EXP:%.*]] = and i129 [[TMP20]], 255
-; CHECK-NEXT:    [[TMP21:%.*]] = and i129 [[TMP18]], 8388607
-; CHECK-NEXT:    [[SIGNIFICAND:%.*]] = or i129 [[TMP21]], 8388608
-; CHECK-NEXT:    [[EXP_IS_NEGATIVE:%.*]] = icmp ult i129 [[BIASED_EXP]], 127
+; CHECK-NEXT:    [[TMP21:%.*]] = lshr i32 [[TMP17]], 23
+; CHECK-NEXT:    [[BIASED_EXP:%.*]] = and i32 [[TMP21]], 255
+; CHECK-NEXT:    [[TMP22:%.*]] = and i32 [[TMP17]], 8388607
+; CHECK-NEXT:    [[SIGNIFICAND1:%.*]] = or i32 [[TMP22]], 8388608
+; CHECK-NEXT:    [[EXP_IS_NEGATIVE:%.*]] = icmp ult i32 [[BIASED_EXP]], 127
 ; CHECK-NEXT:    br i1 [[EXP_IS_NEGATIVE]], label [[FP_TO_I_CLEANUP:%.*]], label [[FP_TO_I_IF_CHECK_SATURATE:%.*]]
 ; CHECK:       fp-to-i-if-check.saturate:
-; CHECK-NEXT:    [[TMP22:%.*]] = add i129 [[BIASED_EXP]], -256
-; CHECK-NEXT:    [[TMP23:%.*]] = icmp ult i129 [[TMP22]], -129
+; CHECK-NEXT:    [[TMP24:%.*]] = add i32 [[BIASED_EXP]], -256
+; CHECK-NEXT:    [[TMP23:%.*]] = icmp ult i32 [[TMP24]], -129
 ; CHECK-NEXT:    br i1 [[TMP23]], label [[FP_TO_I_IF_SATURATE:%.*]], label [[FP_TO_I_IF_CHECK_EXP_SIZE:%.*]]
 ; CHECK:       fp-to-i-if-saturate:
 ; CHECK-NEXT:    [[SATURATED:%.*]] = select i1 [[TMP19]], i129 340282366920938463463374607431768211455, i129 -340282366920938463463374607431768211456
 ; CHECK-NEXT:    br label [[FP_TO_I_CLEANUP]]
 ; CHECK:       fp-to-i-if-check.exp.size:
-; CHECK-NEXT:    [[EXP_SMALLER_MANTISSA_WIDTH:%.*]] = icmp ult i129 [[BIASED_EXP]], 150
+; CHECK-NEXT:    [[EXP_SMALLER_MANTISSA_WIDTH:%.*]] = icmp ult i32 [[BIASED_EXP]], 150
 ; CHECK-NEXT:    br i1 [[EXP_SMALLER_MANTISSA_WIDTH]], label [[FP_TO_I_IF_EXP_SMALL:%.*]], label [[FP_TO_I_IF_EXP_LARGE:%.*]]
 ; CHECK:       fp-to-i-if-exp.small:
-; CHECK-NEXT:    [[TMP24:%.*]] = sub i129 150, [[BIASED_EXP]]
-; CHECK-NEXT:    [[TMP25:%.*]] = lshr i129 [[SIGNIFICAND]], [[TMP24]]
+; CHECK-NEXT:    [[TMP32:%.*]] = sub i32 150, [[BIASED_EXP]]
+; CHECK-NEXT:    [[TMP33:%.*]] = lshr i32 [[SIGNIFICAND1]], [[TMP32]]
+; CHECK-NEXT:    [[TMP25:%.*]] = zext i32 [[TMP33]] to i129
 ; CHECK-NEXT:    [[TMP26:%.*]] = mul i129 [[TMP25]], [[SIGN]]
 ; CHECK-NEXT:    br label [[FP_TO_I_CLEANUP]]
 ; CHECK:       fp-to-i-if-exp.large:
-; CHECK-NEXT:    [[TMP27:%.*]] = add i129 [[BIASED_EXP]], -150
+; CHECK-NEXT:    [[TMP34:%.*]] = add i32 [[BIASED_EXP]], -150
+; CHECK-NEXT:    [[SIGNIFICAND:%.*]] = zext i32 [[SIGNIFICAND1]] to i129
+; CHECK-NEXT:    [[TMP27:%.*]] = zext i32 [[TMP34]] to i129
 ; CHECK-NEXT:    [[TMP28:%.*]] = shl i129 [[SIGNIFICAND]], [[TMP27]]
 ; CHECK-NEXT:    [[TMP29:%.*]] = mul i129 [[TMP28]], [[SIGN]]
 ; CHECK-NEXT:    br label [[FP_TO_I_CLEANUP]]
diff --git a/llvm/test/Transforms/ExpandIRInsts/X86/expand-large-fp-convert-fptoui129.ll b/llvm/test/Transforms/ExpandIRInsts/X86/expand-large-fp-convert-fptoui129.ll
index 442ba82d7ffe6..fd29c01ef580d 100644
--- a/llvm/test/Transforms/ExpandIRInsts/X86/expand-large-fp-convert-fptoui129.ll
+++ b/llvm/test/Transforms/ExpandIRInsts/X86/expand-large-fp-convert-fptoui129.ll
@@ -16,32 +16,34 @@ define i129 @floattoui129(float %a) {
 ; CHECK-LABEL: @floattoui129(
 ; CHECK-NEXT:  fp-to-i-entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = bitcast float [[A:%.*]] to i32
-; CHECK-NEXT:    [[TMP1:%.*]] = zext i32 [[TMP0]] to i129
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp sgt i32 [[TMP0]], -1
 ; CHECK-NEXT:    [[SIGN:%.*]] = select i1 [[TMP2]], i129 1, i129 -1
-; CHECK-NEXT:    [[TMP3:%.*]] = lshr i129 [[TMP1]], 23
-; CHECK-NEXT:    [[BIASED_EXP:%.*]] = and i129 [[TMP3]], 255
-; CHECK-NEXT:    [[TMP4:%.*]] = and i129 [[TMP1]], 8388607
-; CHECK-NEXT:    [[SIGNIFICAND:%.*]] = or i129 [[TMP4]], 8388608
-; CHECK-NEXT:    [[EXP_IS_NEGATIVE:%.*]] = icmp ult i129 [[BIASED_EXP]], 127
+; CHECK-NEXT:    [[TMP5:%.*]] = lshr i32 [[TMP0]], 23
+; CHECK-NEXT:    [[BIASED_EXP:%.*]] = and i32 [[TMP5]], 255
+; CHECK-NEXT:    [[TMP3:%.*]] = and i32 [[TMP0]], 8388607
+; CHECK-NEXT:    [[SIGNIFICAND1:%.*]] = or i32 [[TMP3]], 8388608
+; CHECK-NEXT:    [[EXP_IS_NEGATIVE:%.*]] = icmp ult i32 [[BIASED_EXP]], 127
 ; CHECK-NEXT:    br i1 [[EXP_IS_NEGATIVE]], label [[FP_TO_I_CLEANUP:%.*]], label [[FP_TO_I_IF_CHECK_SATURATE:%.*]]
 ; CHECK:       fp-to-i-if-check.saturate:
-; CHECK-NEXT:    [[TMP5:%.*]] = add i129 [[BIASED_EXP]], -256
-; CHECK-NEXT:    [[TMP6:%.*]] = icmp ult i129 [[TMP5]], -129
+; CHECK-NEXT:    [[TMP4:%.*]] = add i32 [[BIASED_EXP]], -256
+; CHECK-NEXT:    [[TMP6:%.*]] = icmp ult i32 [[TMP4]], -129
 ; CHECK-NEXT:    br i1 [[TMP6]], label [[FP_TO_I_IF_SATURATE:%.*]], label [[FP_TO_I_IF_CHECK_EXP_SIZE:%.*]]
 ; CHECK:       fp-to-i-if-saturate:
 ; CHECK-NEXT:    [[SATURATED:%.*]] = select i1 [[TMP2]], i129 340282366920938463463374607431768211455, i129 -340282366920938463463374607431768211456
 ; CHECK-NEXT:    br label [[FP_TO_I_CLEANUP]]
 ; CHECK:       fp-to-i-if-check.exp.size:
-; CHECK-NEXT:    [[EXP_SMALLER_MANTISSA_WIDTH:%.*]] = icmp ult i129 [[BIASED_EXP]], 150
+; CHECK-NEXT:    [[EXP_SMALLER_MANTISSA_WIDTH:%.*]] = icmp ult i32 [[BIASED_EXP]], 150
 ; CHECK-NEXT:    br i1 [[EXP_SMALLER_MANTISSA_WIDTH]], label [[FP_TO_I_IF_EXP_SMALL:%.*]], label [[FP_TO_I_IF_EXP_LARGE:%.*]]
 ; CHECK:       fp-to-i-if-exp.small:
-; CHECK-NEXT:    [[TMP7:%.*]] = sub i129 150, [[BIASED_EXP]]
-; CHECK-NEXT:    [[TMP8:%.*]] = lshr i129 [[SIGNIFICAND]], [[TMP7]]
+; CHECK-NEXT:    [[TMP14:%.*]] = sub i32 150, [[BIASED_EXP]]
+; CHECK-NEXT:    [[TMP7:%.*]] = lshr i32 [[SIGNIFICAND1]], [[TMP14]]
+; CHECK-NEXT:    [[TMP8:%.*]] = zext i32 [[TMP7]] to i129
 ; CHECK-NEXT:    [[TMP9:%.*]] = mul i129 [[TMP8]], [[SIGN]]
 ; CHECK-NEXT:    br label [[FP_TO_I_CLEANUP]]
 ; CHECK:       fp-to-i-if-exp.large:
-; CHECK-NEXT:    [[TMP10:%.*]] = add i129 [[BIASED_EXP]], -150
+; CHECK-NEXT:    [[TMP15:%.*]] = add i32 [[BIASED_EXP]], -150
+; CHECK-NEXT:    [[SIGNIFICAND:%.*]] = zext i32 [[SIGNIFICAND1]] to i129
+; CHECK-NEXT:    [[TMP10:%.*]] = zext i32 [[TMP15]] to i129
 ; CHECK-NEXT:    [[TMP11:%.*]] = shl i129 [[SIGNIFICAND]], [[TMP10]]
 ; CHECK-NEXT:    [[TMP12:%.*]] = mul i129 [[TMP11]], [[SIGN]]
 ; CHECK-NEXT:    br label [[FP_TO_I_CLEANUP]]
@@ -57,32 +59,34 @@ define i129 @doubletoui129(double %a) {
 ; CHECK-LABEL: @doubletoui129(
 ; CHECK-NEXT:  fp-to-i-entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = bitcast double [[A:%.*]] to i64
-; CHECK-NEXT:    [[TMP1:%.*]] = zext i64 [[TMP0]] to i129
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp sgt i64 [[TMP0]], -1
 ; CHECK-NEXT:    [[SIGN:%.*]] = select i1 [[TMP2]], i129 1, i129 -1
-; CHECK-NEXT:    [[TMP3:%.*]] = lshr i129 [[TMP1]], 52
-; CHECK-NEXT:    [[BIASED_EXP:%.*]] = and i129 [[TMP3]], 2047
-; CHECK-NEXT:    [[TMP4:%.*]] = and i129 [[TMP1]], 4503599627370495
-; CHECK-NEXT:    [[SIGNIFICAND:%.*]] = or i129 [[TMP4]], 4503599627370496
-; CHECK-NEXT:    [[EXP_IS_NEGATIVE:%.*]] = icmp ult i129 [[BIASED_EXP]], 1023
+; CHECK-NEXT:    [[TMP5:%.*]] = lshr i64 [[TMP0]], 52
+; CHECK-NEXT:    [[BIASED_EXP:%.*]] = and i64 [[TMP5]], 2047
+; CHECK-NEXT:    [[TMP3:%.*]] = and i64 [[TMP0]], 4503599627370495
+; CHECK-NEXT:    [[SIGNIFICAND1:%.*]] = or i64 [[TMP3]], 4503599627370496
+; CHECK-NEXT:    [[EXP_IS_NEGATIVE:%.*]] = icmp ult i64 [[BIASED_EXP]], 1023
 ; CHECK-NEXT:    br i1 [[EXP_IS_NEGATIVE]], label [[FP_TO_I_CLEANUP:%.*]], label [[FP_TO_I_IF_CHECK_SATURATE:%.*]]
 ; CHECK:       fp-to-i-if-check.saturate:
-; CHECK-NEXT:    [[TMP5:%.*]] = add i129 [[BIASED_EXP]], -1152
-; CHECK-NEXT:    [[TMP6:%.*]] = icmp ult i129 [[TMP5]], -129
+; CHECK-NEXT:    [[TMP4:%.*]] = add i64 [[BIASED_EXP]], -1152
+; CHECK-NEXT:    [[TMP6:%.*]] = icmp ult i64 [[TMP4]], -129
 ; CHECK-NEXT:    br i1 [[TMP6]], label [[FP_TO_I_IF_SATURATE:%.*]], label [[FP_TO_I_IF_CHECK_EXP_SIZE:%.*]]
 ; CHECK:       fp-to-i-if-saturate:
 ; CHECK-NEXT:    [[SATURATED:%.*]] = select i1 [[TMP2]], i129 340282366920938463463374607431768211455, i129 -340282366920938463463374607431768211456
 ; CHECK-NEXT:    br label [[FP_TO_I_CLEANUP]]
 ; CHECK:       fp-to-i-if-check.exp.size:
-; CHECK-NEXT:    [[EXP_SMALLER_MANTISSA_WIDTH:%.*]] = icmp ult i129 [[BIASED_EXP]], 1075
+; CHECK-NEXT:    [[EXP_SMALLER_MANTISSA_WIDTH:%.*]] = icmp ult i64 [[BIASED_EXP]], 1075
 ; CHECK-NEXT:    br i1 [[EXP_SMALLER_MANTISSA_WIDTH]], label [[FP_TO_I_IF_EXP_SMALL:%.*]], label [[FP_TO_I_IF_EXP_LARGE:%.*]]
 ; CHECK:       fp-to-i-if-exp.small:
-; CHECK-NEXT:    [[TMP7:%.*]] = sub i129 1075, [[BIASED_EXP]]
-; CHECK-NEXT:    [[TMP8:%.*]] = lshr i129 [[SIGNIFICAND]], [[TMP7]]
+; CHECK-NEXT:    [[TMP14:%.*]] = sub i64 1075, [[BIASED_EXP]]
+; CHECK-NEXT:    [[TMP7:%.*]] = lshr i64 [[SIGNIFICAND1]], [[TMP14]]
+; CHECK-NEXT:    [[TMP8:%.*]] = zext i64 [[TMP7]] to i129
 ; CHECK-NEXT:    [[TMP9:%.*]] = mul i129 [[TMP8]], [[SIGN]]
 ; CHECK-NEXT:    br label [[FP_TO_I_CLEANUP]]
 ; CHECK:       fp-to-i-if-exp.large:
-; CHECK-NEXT:    [[TMP10:%.*]] = add i129 [[BIASED_EXP]], -1075
+; CHECK-NEXT:    [[TMP15:%.*]] = add i64 [[BIASED_EXP]], -1075
+; CHECK-NEXT:    [[SIGNIFICAND:%.*]] = zext i64 [[SIGNIFICAND1]] to i129
+; CHECK-NEXT:    [[TMP10:%.*]] = zext i64 [[TMP15]] to i129
 ; CHECK-NEXT:    [[TMP11:%.*]] = shl i129 [[SIGNIFICAND]], [[TMP10]]
 ; CHECK-NEXT:    [[TMP12:%.*]] = mul i129 [[TMP11]], [[SIGN]]
 ; CHECK-NEXT:    br label [[FP_TO_I_CLEANUP]]
@@ -99,32 +103,34 @@ define i129 @x86_fp80toui129(x86_fp80 %a) {
 ; CHECK-NEXT:  fp-to-i-entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = fpext x86_fp80 [[A:%.*]] to fp128
 ; CHECK-NEXT:    [[TMP1:%.*]] = bitcast fp128 [[TMP0]] to i128
-; CHECK-NEXT:    [[TMP2:%.*]] = zext i128 [[TMP1]] to i129
 ; CHECK-NEXT:    [[TMP3:%.*]] = icmp sgt i128 [[TMP1]], -1
 ; CHECK-NEXT:    [[SIGN:%.*]] = select i1 [[TMP3]], i129 1, i129 -1
-; CHECK-NEXT:    [[TMP4:%.*]] = lshr i129 [[TMP2]], 112
-; CHECK-NEXT:    [[BIASED_EXP:%.*]] = and i129 [[TMP4]], 32767
-; CHECK-NEXT:    [[TMP5:%.*]] = and i129 [[TMP2]], 5192296858534827628530496329220095
-; CHECK-NEXT:    [[SIGNIFICAND:%.*]] = or i129 [[TMP5]], 5192296858534827628530496329220096
-; CHECK-NEXT:    [[EXP_IS_NEGATIVE:%.*]] = icmp ult i129 [[BIASED_EXP]], 16383
+; CHECK-NEXT:    [[TMP6:%.*]] = lshr i128 [[TMP1]], 112
+; CHECK-NEXT:    [[BIASED_EXP:%.*]] = and i128 [[TMP6]], 32767
+; CHECK-NEXT:    [[TMP4:%.*]] = and i128 [[TMP1]], 5192296858534827628530496329220095
+; CHECK-NEXT:    [[SIGNIFICAND1:%.*]] = or i128 [[TMP4]], 5192296858534827628530496329220096
+; CHECK-NEXT:    [[EXP_IS_NEGATIVE:%.*]] = icmp ult i128 [[BIASED_EXP]], 16383
 ; CHECK-NEXT:    br i1 [[EXP_IS_NEGATIVE]], label [[FP_TO_I_CLEANUP:%.*]], label [[FP_TO_I_IF_CHECK_SATURATE:%.*]]
 ; CHECK:       fp-to-i-if-check.saturate:
-; CHECK-NEXT:    [[TMP6:%.*]] = add i129 [[BIASED_EXP]], -16512
-; CHECK-NEXT:    [[TMP7:%.*]] = icmp ult i129 [[TMP6]], -129
+; CHECK-NEXT:    [[TMP5:%.*]] = add i128 [[BIASED_EXP]], -16512
+; CHECK-NEXT:    [[TMP7:%.*]] = icmp ult i128 [[TMP5]], -129
 ; CHECK-NEXT:    br i1 [[TMP7]], label [[FP_TO_I_IF_SATURATE:%.*]], label [[FP_TO_I_IF_CHECK_EXP_SIZE:%.*]]
 ; CHECK:       fp-to-i-if-saturate:
 ; CHECK-NEXT:    [[SATURATED:%.*]] = select i1 [[TMP3]], i129 340282366920938463463374607431768211455, i129 -340282366920938463463374607431768211456
 ; CHECK-NEXT:    br label [[FP_TO_I_CLEANUP]]
 ; CHECK:       fp-to-i-if-check.exp.size:
-; CHECK-NEXT:    [[EXP_SMALLER_MANTISSA_WIDTH:%.*]] = icmp ult i129 [[BIASED_EXP]], 16495
+; CHECK-NEXT:    [[EXP_SMALLER_MANTISSA_WIDTH:%.*]] = icmp ult i128 [[BIASED_EXP]], 16495
 ; CHECK-NEXT:    br i1 [[EXP_SMALLER_MANTISSA_WIDTH]], label [[FP_TO_I_IF_EXP_SMALL:%.*]], label [[FP_TO_I_IF_EXP_LARGE:%.*]]
 ; CHECK:       fp-to-i-if-exp.small:
-; CHECK-NEXT:    [[TMP8:%.*]] = sub i129 16495, [[BIASED_EXP]]
-; CHECK-NEXT:    [[TMP9:%.*]] = lshr i129 [[SIGNIFICAND]], [[TMP8]]
+; CHECK-NEXT:    [[TMP15:%.*]] = sub i128 16495, [[BIASED_EXP]]
+; CHECK-NEXT:    [[TMP8:%.*]] = lshr i128 [[SIGNIFICAND1]], [[TMP15]]
+; CHECK-NEXT:    [[TMP9:%.*]] = zext i128 [[TMP8]] to i129
 ; CHECK-NEXT:    [[TMP10:%.*]] = mul i129 [[TMP9]], [[SIGN]]
 ; CHECK-NEXT:    br label [[FP_TO_I_CLEANUP]]
 ; CHECK:       fp-to-i-if-exp.large:
-; CHECK-NEXT:    [[TMP11:%.*]] = add i129 [[BIASED_EXP]], -16495
+; CHECK-NEXT:    [[TMP16:%.*]] = add i128 [[BIASED_EXP]], -16495
+; CHECK-NEXT:    [[SIGNIFICAND:%.*]] = zext i128 [[SIGNIFICAND1]] to i129
+; CHECK-NEXT:    [[TMP11:%.*]] = zext i128 [[TMP16]] to i129
 ; CHECK-NEXT:    [[TMP12:%.*]] = shl i129 [[SIGNIFICAND]], [[TMP11]]
 ; CHECK-NEXT:    [[TMP13:%.*]] = mul i129 [[TMP12]], [[SIGN]]
 ; CHECK-NEXT:    br label [[FP_TO_I_CLEANUP]]
@@ -140,32 +146,34 @@ define i129 @fp128toui129(fp128 %a) {
 ; CHECK-LABEL: @fp128toui129(
 ; CHECK-NEXT:  fp-to-i-entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = bitcast fp128 [[A:%.*]] to i128
-; CHECK-NEXT:    [[TMP1:%.*]] = zext i128 [[TMP0]] to i129
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp sgt i128 [[TMP0]], -1
 ; CHECK-NEXT:    [[SIGN:%.*]] = select i1 [[TMP2]], i129 1, i129 -1
-; CHECK-NEXT:    [[TMP3:%.*]] = lshr i129 [[TMP1]], 112
-; CHECK-NEXT:    [[BIASED_EXP:%.*]] = and i129 [[TMP3]], 32767
-; CHECK-NEXT:    [[TMP4:%.*]] = and i129 [[TMP1]], 5192296858534827628530496329220095
-; CHECK-NEXT:    [[SIGNIFICAND:%.*]] = or i129 [[TMP4]], 5192296858534827628530496329220096
-; CHECK-NEXT:    [[EXP_IS_NEGATIVE:%.*]] = icmp ult i129 [[BIASED_EXP]], 16383
+; CHECK-NEXT:    [[TMP5:%.*]] = lshr i128 [[TMP0]], 112
+; CHECK-NEXT:    [[BIASED_EXP:%.*]] = and i128 [[TMP5]], 32767
+; CHECK-NEXT:    [[TMP3:%.*]] = and i128 [[TMP0]], 5192296858534827628530496329220095
+; CHECK-NEXT:    [[SIGNIFICAND1:%.*]] = or i128 [[TMP3]], 5192296858534827628530496329220096
+; CHECK-NEXT:    [[EXP_IS_NEGATIVE:%.*]] = icmp ult i128 [[BIASED_EXP]], 16383
 ; CHECK-NEXT:    br i1 [[EXP_IS_NEGATIVE]], label [[FP_TO_I_CLEANUP:%.*]], label [[FP_TO_I_IF_CHECK_SATURATE:%.*]]
 ; CHECK:       fp-to-i-if-check.saturate:
-; CHECK-NEXT:    [[TMP5:%.*]] = add i129 [[BIASED_EXP]], -16512
-; CHECK-NEXT:    [[TMP6:%.*]] = icmp ult i129 [[TMP5]], -129
+; CHECK-NEXT:    [[TMP4:%.*]] = add i128 [[BIASED_EXP]], -16512
+; CHECK-NEXT:    [[TMP6:%.*]] = icmp ult i128 [[TMP4]], -129
 ; CHECK-NEXT:    br i1 [[TMP6]], label [[FP_TO_I_IF_SATURATE:%.*]], label [[FP_TO_I_IF_CHECK_EXP_SIZE:%.*]]
 ; CHECK:       fp-to-i-if-saturate:
 ; CHECK-NEXT:    [[SATURATED:%.*]] = select i1 [[TMP2]], i129 340282366920938463463374607431768211455, i129 -340282366920938463463374607431768211456
 ; CHECK-NEXT:    br label [[FP_TO_I_CLEANUP]]
 ; CHECK:       fp-to-i-if-check.exp.size:
-; CHECK-NEXT:    [[EXP_SMALLER_MANTISSA_WIDTH:%.*]] = icmp ult i129 [[BIASED_EXP]], 16495
+; CHECK-NEXT:    [[EXP_SMALLER_MANTISSA_WIDTH:%.*]] = icmp ult i128 [[BIASED_EXP]], 16495
 ; CHECK-NEXT:    br i1 [[EXP_SMALLER_MANTISSA_WIDTH]], label [[FP_TO_I_IF_EXP_SMALL:%.*]], label [[FP_TO_I_IF_EXP_LARGE:%.*]]
 ; CHECK:       fp-to-i-if-exp.small:
-; CHECK-NEXT:    [[TMP7:%.*]] = sub i129 16495, [[BIASED_EXP]]
-; CHECK-NEXT:    [[TMP8:%.*]] = lshr i129 [[SIGNIFICAND]], [[TMP7]]
+; CHECK-NEXT:    [[TMP14:%.*]] = sub i128 16495, [[BIASED_EXP]]
+; CHECK-NEXT:    [[TMP7:%.*]] = lshr i128 [[SIGNIFICAND1]], [[TMP14]]
+; CHECK-NEXT:    [[TMP8:%.*]] = zext i128 [[TMP7]] to i129
 ; CHECK-NEXT:    [[TMP9:%.*]] = mul i129 [[TMP8]], [[SIGN]]
 ; CHECK-NEXT:    br label [[FP_TO_I_CLEANUP]]
 ; CHECK:       fp-to-i-if-exp.large:
-; CHECK-NEXT:    [[TMP10:%.*]] = add i129 [[BIASED_EXP]], -16495
+; CHECK-NEXT:    [[TMP15:%.*]] = add i128 [[BIASED_EXP]], -16495
+; CHECK-NEXT:    [[SIGNIFICAND:%.*]] = zext i128 [[SIGNIFICAND1]] to i129
+; CHECK-NEXT:    [[TMP10:%.*]] = zext i128 [[TMP15]] to i129
 ; CHECK-NEXT:    [[TMP11:%.*]] = shl i129 [[SIGNIFICAND]], [[TMP10]]
 ; CHECK-NEXT:    [[TMP12:%.*]] = mul i129 [[TMP11]], [[SIGN]]
 ; CHECK-NEXT:    br label [[FP_TO_I_CLEANUP]]
@@ -182,32 +190,34 @@ define <2 x i129> @floattoui129v2(<2 x float> %a) {
 ; CHECK-NEXT:  fp-to-i-entryfp-to-i-entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <2 x float> [[A:%.*]], i64 0
 ; CHECK-NEXT:    [[TMP1:%.*]] = bitcast float [[TMP0]] to i32
-; CHECK-NEXT:    [[TMP2:%.*]] = zext i32 [[TMP1]] to i129
 ; CHECK-NEXT:    [[TMP3:%.*]] = icmp sgt i32 [[TMP1]], -1
 ; CHECK-NEXT:    [[SIGN7:%.*]] = select i1 [[TMP3]], i129 1, i129 -1
-; CHECK-NEXT:    [[TMP4:%.*]] = lshr i129 [[TMP2]], 23
-; CHECK-NEXT:    [[BIASED_EXP8:%.*]] = and i129 [[TMP4]], 255
-; CHECK-NEXT:    [[TMP5:%.*]] = and i129 [[TMP2]], 8388607
-; CHECK-NEXT:    [[SIGNIFICAND9:%.*]] = or i129 [[TMP5]], 8388608
-; CHECK-NEXT:    [[EXP_IS_NEGATIVE10:%.*]] = icmp ult i129 [[BIASED_EXP8]], 127
+; CHECK-NEXT:    [[TMP6:%.*]] = lshr i32 [[TMP1]], 23
+; CHECK-NEXT:    [[BIASED_EXP8:%.*]] = and i32 [[TMP6]], 255
+; CHECK-NEXT:    [[TMP4:%.*]] = and i32 [[TMP1]], 8388607
+; CHECK-NEXT:    [[SIGNIFICAND10:%.*]] = or i32 [[TMP4]], 8388608
+; CHECK-NEXT:    [[EXP_IS_NEGATIVE10:%.*]] = icmp ult i32 [[BIASED_EXP8]], 127
 ; CHECK-NEXT:    br i1 [[EXP_IS_NEGATIVE10]], label [[FP_TO_I_CLEANUP1:%.*]], label [[FP_TO_I_IF_CHECK_SATURATE2:%.*]]
 ; CHECK:       fp-to-i-if-check.saturate2:
-; CHECK-NEXT:    [[TMP6:%.*]] = add i129 [[BIASED_EXP8]], -256
-; CHECK-NEXT:    [[TMP7:%.*]] = icmp ult i129 [[TMP6]], -129
+; CHECK-NEXT:    [[TMP5:%.*]] = add i32 [[BIASED_EXP8]], -256
+; CHECK-NEXT:    [[TMP7:%.*]] = icmp ult i32 [[TMP5]], -129
 ; CHECK-NEXT:    br i1 [[TMP7]], label [[FP_TO_I_IF_SATURATE3:%.*]], label [[FP_TO_I_IF_CHECK_EXP_SIZE4:%.*]]
 ; CHECK:       fp-to-i-if-saturate3:
 ; CHECK-NEXT:    [[SATURATED11:%.*]] = select i1 [[TMP3]], i129 340282366920938463463374607431768211455, i129 -340282366920938463463374607431768211456
 ; CHECK-NEXT:    br label [[FP_TO_I_CLEANUP1]]
 ; CHECK:       fp-to-i-if-check.exp.size4:
-; CHECK-NEXT:    [[EXP_SMALLER_MANTISSA_WIDTH12:%.*]] = icmp ult i129 [[BIASED_EXP8]], 150
+; CHECK-NEXT:    [[EXP_SMALLER_MANTISSA_WIDTH12:%.*]] = icmp ult i32 [[BIASED_EXP8]], 150
 ; CHECK-NEXT:    br i1 [[EXP_SMALLER_MANTISSA_WIDTH12]], label [[FP_TO_I_IF_EXP_SMALL5:%.*]], label [[FP_TO_I_IF_EXP_LARGE6:%.*]]
 ; CHECK:       fp-to-i-if-exp.small5:
-; CHECK-NEXT:    [[TMP8:%.*]] = sub i129 150, [[BIASED_EXP8]]
-; CHECK-NEXT:    [[TMP9:%.*]] = lshr i129 [[SIGNIFICAND9]], [[TMP8]]
+; CHECK-NEXT:    [[TMP18:%.*]] = sub i32 150, [[BIASED_EXP8]]
+; CHECK-NEXT:    [[TMP8:%.*]] = lshr i32 [[SIGNIFICAND10]], [[TMP18]]
+; CHECK-NEXT:    [[TMP9:%.*]] = zext i32 [[TMP8]] to i129
 ; CHECK-NEXT:    [[TMP10:%.*]] = mul i129 [[TMP9]], [[SIGN7]]
 ; CHECK-NEXT:    br label [[FP_TO_I_CLEANUP1]]
 ; CHECK:       fp-to-i-if-exp.large6:
-; CHECK-NEXT:    [[TMP11:%.*]] = add i129 [[BIASED_EXP8]], -150
+; CHECK-NEXT:    [[TMP20:%.*]] = add i32 [[BIASED_EXP8]], -150
+; CHECK-NEXT:    [[SIGNIFICAND9:%.*]] = zext i32 [[SIGNIFICAND10]] to i129
+; CHECK-NEXT:    [[TMP11:%.*]] = zext i32 [[TMP20]] to i129
 ; CHECK-NEXT:    [[TMP12:%.*]] = shl i129 [[SIGNIFICAND9]], [[TMP11]]
 ; CHECK-NEXT:    [[TMP13:%.*]] = mul i129 [[TMP12]], [[SIGN7]]
 ; CHECK-NEXT:    br label [[FP_TO_I_CLEANUP1]]
@@ -216,32 +226,34 @@ define <2 x i129> @floattoui129v2(<2 x float> %a) {
 ; CHECK-NEXT:    [[TMP15:%.*]] = insertelement <2 x i129> poison, i129 [[TMP14]], i64 0
 ; CHECK-NEXT:    [[TMP16:%.*]] = extractelement <2 x float> [[A]], i64 1
 ; CHECK-NEXT:    [[TMP17:%.*]] = bitcast float [[TMP16]] to i32
-; CHECK-NEXT:    [[TMP18:%.*]] = zext i32 [[TMP17]] to i129
 ; CHECK-NEXT:    [[TMP19:%.*]] = icmp sgt i32 [[TMP17]], -1
 ; CHECK-NEXT:    [[SIGN:%.*]] = select i1 [[TMP19]], i129 1, i129 -1
-; CHECK-NEXT:    [[TMP20:%.*]] = lshr i129 [[TMP18]], 23
-; CHECK-NEXT:    [[BIASED_EXP:%.*]] = and i129 [[TMP20]], 255
-; CHECK-NEXT:    [[TMP21:%.*]] = and i129 [[TMP18]], 8388607
-; CHECK-NEXT:    [[SIGNIFICAND:%.*]] = or i129 [[TMP21]], 8388608
-; CHECK-NEXT:    [[EXP_IS_NEGATIVE:%.*]] = icmp ult i129 [[BIASED_EXP]], 127
+; CHECK-NEXT:    [[TMP21:%.*]] = lshr i32 [[TMP17]], 23
+; CHECK-NEXT:    [[BIASED_EXP:%.*]] = and i32 [[TMP21]], 255
+; CHECK-NEXT:    [[TMP22:%.*]] = and i32 [[TMP17]], 8388607
+; CHECK-NEXT:    [[SIGNIFICAND1:%.*]] = or i32 [[TMP22]], 8388608
+; CHECK-NEXT:    [[EXP_IS_NEGATIVE:%.*]] = icmp ult i32 [[BIASED_EXP]], 127
 ; CHECK-NEXT:    br i1 [[EXP_IS_NEGATIVE]], label [[FP_TO_I_CLEANUP:%.*]], label [[FP_TO_I_IF_CHECK_SATURATE:%.*]]
 ; CHECK:       fp-to-i-if-check.saturate:
-; CHECK-NEXT:    [[TMP22:%.*]] = add i129 [[BIASED_EXP]], -256
-; CHECK-NEXT:    [[TMP23:%.*]] = icmp ult i129 [[TMP22]], -129
+; CHECK-NEXT:    [[TMP24:%.*]] = add i32 [[BIASED_EXP]], -256
+; CHECK-NEXT:    [[TMP23:%.*]] = icmp ult i32 [[TMP24]], -129
 ; CHECK-NEXT:    br i1 [[TMP23]], label [[FP_TO_I_IF_SATURATE:%.*]], label [[FP_TO_I_IF_CHECK_EXP_SIZE:%.*]]
 ; CHECK:       fp-to-i-if-saturate:
 ; CHECK-NEXT:    [[SATURATED:%.*]] = select i1 [[TMP19]], i129 340282366920938463463374607431768211455, i129 -340282366920938463463374607431768211456
 ; CHECK-NEXT:    br label [[FP_TO_I_CLEANUP]]
 ; CHECK:       fp-to-i-if-check.exp.size:
-; CHECK-NEXT:    [[EXP_SMALLER_MANTISSA_WIDTH:%.*]] = icmp ult i129 [[BIASED_EXP]], 150
+; CHECK-NEXT:    [[EXP_SMALLER_MANTISSA_WIDTH:%.*]] = icmp ult i32 [[BIASED_EXP]], 150
 ; CHECK-NEXT:    br i1 [[EXP_SMALLER_MANTISSA_WIDTH]], label [[FP_TO_I_IF_EXP_SMALL:%.*]], label [[FP_TO_I_IF_EXP_LARGE:%.*]]
 ; CHECK:       fp-to-i-if-exp.small:
-; CHECK-NEXT:    [[TMP24:%.*]] = sub i129 150, [[BIASED_EXP]]
-; CHECK-NEXT:    [[TMP25:%.*]] = lshr i129 [[SIGNIFICAND]], [[TMP24]]
+; CHECK-NEXT:    [[TMP32:%.*]] = sub i32 150, [[BIASED_EXP]]
+; CHECK-NEXT:    [[TMP33:%.*]] = lshr i32 [[SIGNIFICAND1]], [[TMP32]]
+; CHECK-NEXT:    [[TMP25:%.*]] = zext i32 [[TMP33]] to i129
 ; CHECK-NEXT:    [[TMP26:%.*]] = mul i129 [[TMP25]], [[SIGN]]
 ; CHECK-NEXT:    br label [[FP_TO_I_CLEANUP]]
 ; CHECK:       fp-to-i-if-exp.large:
-; CHECK-NEXT:    [[TMP27:%.*]] = add i129 [[BIASED_EXP]], -150
+; CHECK-NEXT:    [[TMP34:%.*]] = add i32 [[BIASED_EXP]], -150
+; CHECK-NEXT:    [[SIGNIFICAND:%.*]] = zext i32 [[SIGNIFICAND1]] to i129
+; CHECK-NEXT:    [[TMP27:%.*]] = zext i32 [[TMP34]] to i129
 ; CHECK-NEXT:    [[TMP28:%.*]] = shl i129 [[SIGNIFICAND]], [[TMP27]]
 ; CHECK-NEXT:    [[TMP29:%.*]] = mul i129 [[TMP28]], [[SIGN]]
 ; CHECK-NEXT:    br label [[FP_TO_I_CLEANUP]]



More information about the llvm-commits mailing list