[llvm] [ExpandIRInst] Support expanding fptoi to smaller type (PR #178690)
Nikita Popov via llvm-commits
llvm-commits at lists.llvm.org
Thu Jan 29 08:05:15 PST 2026
https://github.com/nikic created https://github.com/llvm/llvm-project/pull/178690
In order to support expanding fptoi where the target type is smaller, make most of the code work on the float-as-integer type, rather than the target type of the cast. We only need to cast the final result to the target type, or prior to performing a left shift.
This not only allows us to handle casts to a smaller type, but also avoids performing intermediate calculations on unnecessarily large types.
This also matches how compiler-rt handles this:
https://github.com/llvm/llvm-project/blob/main/compiler-rt/lib/builtins/fp_fixint_impl.inc
Proof: https://alive2.llvm.org/ce/z/3pJ9pE
(Note that there is a pre-existing issue that we produce the same code for fptosi and fptoui.)
>From a90ba0ce9ee8d3f81983d6d812155475b39cf0db Mon Sep 17 00:00:00 2001
From: Nikita Popov <npopov at redhat.com>
Date: Thu, 29 Jan 2026 16:55:04 +0100
Subject: [PATCH] [ExpandIRInst] Support expanding fptoi to smaller type
In order to support expanding fptoi where the target type is
smaller, make most of the code work on the float-as-integer type,
rather than the target type of the cast. We only need to cast the
final result to the target type, or prior to performing a left
shift.
This not only allows us to handle casts to a smaller type, but also
avoids performing intermediate calculations on unnecessarily large
types.
This also matches how compiler-rt handles this:
https://github.com/llvm/llvm-project/blob/main/compiler-rt/lib/builtins/fp_fixint_impl.inc
---
llvm/lib/CodeGen/ExpandIRInsts.cpp | 42 +-
llvm/test/CodeGen/AMDGPU/fptoi.i128.ll | 1530 ++++++++---------
.../X86/expand-fp-convert-small.ll | 140 +-
.../X86/expand-large-fp-convert-fptosi129.ll | 156 +-
.../X86/expand-large-fp-convert-fptoui129.ll | 156 +-
5 files changed, 985 insertions(+), 1039 deletions(-)
diff --git a/llvm/lib/CodeGen/ExpandIRInsts.cpp b/llvm/lib/CodeGen/ExpandIRInsts.cpp
index 2f0f8e6a79b9a..241271f4f0035 100644
--- a/llvm/lib/CodeGen/ExpandIRInsts.cpp
+++ b/llvm/lib/CodeGen/ExpandIRInsts.cpp
@@ -528,10 +528,11 @@ static void expandFPToI(Instruction *FPToI) {
PowerOf2Ceil(FloatVal->getType()->getScalarSizeInBits());
unsigned ExponentWidth = FloatWidth - FPMantissaWidth - 1;
unsigned ExponentBias = (1 << (ExponentWidth - 1)) - 1;
- Value *ImplicitBit =
- ConstantInt::get(IntTy, APInt::getOneBitSet(BitWidth, FPMantissaWidth));
- Value *SignificandMask =
- ConstantInt::get(IntTy, APInt::getLowBitsSet(BitWidth, FPMantissaWidth));
+ IntegerType *FloatIntTy = Builder.getIntNTy(FloatWidth);
+ Value *ImplicitBit = ConstantInt::get(
+ FloatIntTy, APInt::getOneBitSet(FloatWidth, FPMantissaWidth));
+ Value *SignificandMask = ConstantInt::get(
+ FloatIntTy, APInt::getLowBitsSet(FloatWidth, FPMantissaWidth));
BasicBlock *Entry = Builder.GetInsertBlock();
Function *F = Entry->getParent();
@@ -559,30 +560,30 @@ static void expandFPToI(Instruction *FPToI) {
if (FloatVal->getType()->isX86_FP80Ty())
FloatVal0 =
Builder.CreateFPExt(FloatVal, Type::getFP128Ty(Builder.getContext()));
- Value *ARep0 =
- Builder.CreateBitCast(FloatVal0, Builder.getIntNTy(FloatWidth));
- Value *ARep = Builder.CreateZExt(ARep0, FPToI->getType());
+ Value *ARep = Builder.CreateBitCast(FloatVal0, FloatIntTy);
Value *PosOrNeg = Builder.CreateICmpSGT(
- ARep0, ConstantInt::getSigned(Builder.getIntNTy(FloatWidth), -1));
+ ARep, ConstantInt::getSigned(Builder.getIntNTy(FloatWidth), -1));
Value *Sign = Builder.CreateSelect(PosOrNeg, ConstantInt::getSigned(IntTy, 1),
ConstantInt::getSigned(IntTy, -1), "sign");
Value *And =
- Builder.CreateLShr(ARep, Builder.getIntN(BitWidth, FPMantissaWidth));
+ Builder.CreateLShr(ARep, Builder.getIntN(FloatWidth, FPMantissaWidth));
Value *BiasedExp = Builder.CreateAnd(
- And, Builder.getIntN(BitWidth, (1 << ExponentWidth) - 1), "biased.exp");
+ And, Builder.getIntN(FloatWidth, (1 << ExponentWidth) - 1), "biased.exp");
Value *Abs = Builder.CreateAnd(ARep, SignificandMask);
Value *Significand = Builder.CreateOr(Abs, ImplicitBit, "significand");
Value *ExpIsNegative = Builder.CreateICmpULT(
- BiasedExp, Builder.getIntN(BitWidth, ExponentBias), "exp.is.negative");
+ BiasedExp, Builder.getIntN(FloatWidth, ExponentBias), "exp.is.negative");
Builder.CreateCondBr(ExpIsNegative, End, CheckSaturateBB);
// check.saturate:
Builder.SetInsertPoint(CheckSaturateBB);
Value *Add1 = Builder.CreateAdd(
- BiasedExp, ConstantInt::getSigned(
- IntTy, -static_cast<int64_t>(ExponentBias + BitWidth)));
+ BiasedExp,
+ ConstantInt::getSigned(FloatIntTy,
+ -static_cast<int64_t>(ExponentBias + BitWidth)));
Value *Cmp3 = Builder.CreateICmpULT(
- Add1, ConstantInt::getSigned(IntTy, -static_cast<int64_t>(BitWidth)));
+ Add1,
+ ConstantInt::getSigned(FloatIntTy, -static_cast<int64_t>(BitWidth)));
Builder.CreateCondBr(Cmp3, SaturateBB, CheckExpSizeBB);
// saturate:
@@ -598,15 +599,16 @@ static void expandFPToI(Instruction *FPToI) {
// if.end9:
Builder.SetInsertPoint(CheckExpSizeBB);
Value *ExpSmallerMantissaWidth = Builder.CreateICmpULT(
- BiasedExp, Builder.getIntN(BitWidth, ExponentBias + FPMantissaWidth),
+ BiasedExp, Builder.getIntN(FloatWidth, ExponentBias + FPMantissaWidth),
"exp.smaller.mantissa.width");
Builder.CreateCondBr(ExpSmallerMantissaWidth, ExpSmallBB, ExpLargeBB);
// exp.small:
Builder.SetInsertPoint(ExpSmallBB);
Value *Sub13 = Builder.CreateSub(
- Builder.getIntN(BitWidth, ExponentBias + FPMantissaWidth), BiasedExp);
- Value *Shr14 = Builder.CreateLShr(Significand, Sub13);
+ Builder.getIntN(FloatWidth, ExponentBias + FPMantissaWidth), BiasedExp);
+ Value *Shr14 =
+ Builder.CreateZExtOrTrunc(Builder.CreateLShr(Significand, Sub13), IntTy);
Value *Mul = Builder.CreateMul(Shr14, Sign);
Builder.CreateBr(End);
@@ -615,8 +617,10 @@ static void expandFPToI(Instruction *FPToI) {
Value *Sub15 = Builder.CreateAdd(
BiasedExp,
ConstantInt::getSigned(
- IntTy, -static_cast<int64_t>(ExponentBias + FPMantissaWidth)));
- Value *Shl = Builder.CreateShl(Significand, Sub15);
+ FloatIntTy, -static_cast<int64_t>(ExponentBias + FPMantissaWidth)));
+ Value *SignificandCast = Builder.CreateZExtOrTrunc(Significand, IntTy);
+ Value *Shl = Builder.CreateShl(SignificandCast,
+ Builder.CreateZExtOrTrunc(Sub15, IntTy));
Value *Mul16 = Builder.CreateMul(Shl, Sign);
Builder.CreateBr(End);
diff --git a/llvm/test/CodeGen/AMDGPU/fptoi.i128.ll b/llvm/test/CodeGen/AMDGPU/fptoi.i128.ll
index df905f4e816a5..200fbf5d220b4 100644
--- a/llvm/test/CodeGen/AMDGPU/fptoi.i128.ll
+++ b/llvm/test/CodeGen/AMDGPU/fptoi.i128.ll
@@ -11,8 +11,8 @@ define i128 @fptosi_f64_to_i128(double %x) {
; SDAG-NEXT: v_mov_b32_e32 v7, 0
; SDAG-NEXT: s_mov_b64 s[4:5], 0x3fe
; SDAG-NEXT: v_mov_b32_e32 v4, v0
-; SDAG-NEXT: v_mov_b32_e32 v0, 0
; SDAG-NEXT: v_cmp_lt_u64_e32 vcc, s[4:5], v[6:7]
+; SDAG-NEXT: v_mov_b32_e32 v0, 0
; SDAG-NEXT: v_mov_b32_e32 v2, 0
; SDAG-NEXT: v_mov_b32_e32 v1, 0
; SDAG-NEXT: v_mov_b32_e32 v3, 0
@@ -20,33 +20,28 @@ define i128 @fptosi_f64_to_i128(double %x) {
; SDAG-NEXT: s_cbranch_execz .LBB0_10
; SDAG-NEXT: ; %bb.1: ; %fp-to-i-if-check.saturate
; SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffffb81, v6
-; SDAG-NEXT: v_mov_b32_e32 v1, -1
-; SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
-; SDAG-NEXT: v_addc_co_u32_e32 v2, vcc, -1, v7, vcc
+; SDAG-NEXT: v_addc_co_u32_e64 v1, s[6:7], 0, -1, vcc
; SDAG-NEXT: s_movk_i32 s6, 0xff7f
-; SDAG-NEXT: v_addc_co_u32_e32 v3, vcc, -1, v7, vcc
; SDAG-NEXT: s_mov_b32 s7, -1
-; SDAG-NEXT: v_cmp_eq_u64_e64 s[4:5], -1, v[2:3]
-; SDAG-NEXT: v_cmp_lt_u64_e64 s[6:7], s[6:7], v[0:1]
-; SDAG-NEXT: v_cmp_lt_i64_e32 vcc, -1, v[4:5]
-; SDAG-NEXT: s_and_b64 s[4:5], s[4:5], s[6:7]
+; SDAG-NEXT: v_cmp_lt_i64_e64 s[4:5], -1, v[4:5]
+; SDAG-NEXT: v_cmp_lt_u64_e32 vcc, s[6:7], v[0:1]
; SDAG-NEXT: ; implicit-def: $vgpr0_vgpr1
; SDAG-NEXT: ; implicit-def: $vgpr2_vgpr3
-; SDAG-NEXT: s_and_saveexec_b64 s[6:7], s[4:5]
+; SDAG-NEXT: s_and_saveexec_b64 s[6:7], vcc
; SDAG-NEXT: s_xor_b64 s[10:11], exec, s[6:7]
; SDAG-NEXT: s_cbranch_execz .LBB0_7
; SDAG-NEXT: ; %bb.2: ; %fp-to-i-if-check.exp.size
-; SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
-; SDAG-NEXT: v_add_co_u32_e64 v9, s[4:5], -1, v0
-; SDAG-NEXT: s_mov_b64 s[4:5], 0x432
+; SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
+; SDAG-NEXT: v_add_co_u32_e32 v9, vcc, -1, v0
+; SDAG-NEXT: s_mov_b64 s[6:7], 0x432
; SDAG-NEXT: v_and_b32_e32 v0, 0xfffff, v5
-; SDAG-NEXT: v_cmp_lt_u64_e64 s[4:5], s[4:5], v[6:7]
-; SDAG-NEXT: v_cndmask_b32_e64 v8, -1, 0, vcc
-; SDAG-NEXT: v_cndmask_b32_e64 v10, -1, 1, vcc
+; SDAG-NEXT: v_cmp_lt_u64_e32 vcc, s[6:7], v[6:7]
+; SDAG-NEXT: v_cndmask_b32_e64 v8, -1, 0, s[4:5]
+; SDAG-NEXT: v_cndmask_b32_e64 v10, -1, 1, s[4:5]
; SDAG-NEXT: v_or_b32_e32 v5, 0x100000, v0
; SDAG-NEXT: ; implicit-def: $vgpr0_vgpr1
; SDAG-NEXT: ; implicit-def: $vgpr2_vgpr3
-; SDAG-NEXT: s_and_saveexec_b64 s[6:7], s[4:5]
+; SDAG-NEXT: s_and_saveexec_b64 s[6:7], vcc
; SDAG-NEXT: s_xor_b64 s[12:13], exec, s[6:7]
; SDAG-NEXT: s_cbranch_execz .LBB0_4
; SDAG-NEXT: ; %bb.3: ; %fp-to-i-if-exp.large
@@ -55,75 +50,69 @@ define i128 @fptosi_f64_to_i128(double %x) {
; SDAG-NEXT: v_add_u32_e32 v3, 0xfffffbcd, v6
; SDAG-NEXT: v_lshrrev_b64 v[0:1], v0, v[4:5]
; SDAG-NEXT: v_lshlrev_b64 v[6:7], v2, v[4:5]
-; SDAG-NEXT: v_cmp_gt_u32_e64 s[4:5], 64, v3
+; SDAG-NEXT: v_cmp_gt_u32_e32 vcc, 64, v3
; SDAG-NEXT: v_cmp_ne_u32_e64 s[6:7], 0, v3
; SDAG-NEXT: v_lshlrev_b64 v[3:4], v3, v[4:5]
-; SDAG-NEXT: v_cndmask_b32_e64 v1, v7, v1, s[4:5]
+; SDAG-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc
; SDAG-NEXT: v_cndmask_b32_e64 v1, 0, v1, s[6:7]
-; SDAG-NEXT: v_cndmask_b32_e64 v0, v6, v0, s[4:5]
-; SDAG-NEXT: v_cndmask_b32_e64 v7, 0, v3, s[4:5]
+; SDAG-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc
+; SDAG-NEXT: v_cndmask_b32_e32 v7, 0, v3, vcc
; SDAG-NEXT: v_cndmask_b32_e64 v5, 0, v0, s[6:7]
; SDAG-NEXT: v_mul_lo_u32 v12, v10, v1
; SDAG-NEXT: v_mad_u64_u32 v[0:1], s[6:7], v7, v10, 0
; SDAG-NEXT: v_mov_b32_e32 v2, 0
-; SDAG-NEXT: v_cndmask_b32_e64 v13, 0, v4, s[4:5]
-; SDAG-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v13, v10, v[1:2]
+; SDAG-NEXT: v_cndmask_b32_e32 v13, 0, v4, vcc
+; SDAG-NEXT: v_mad_u64_u32 v[3:4], s[6:7], v13, v10, v[1:2]
; SDAG-NEXT: v_mul_lo_u32 v11, v8, v5
; SDAG-NEXT: v_mad_u64_u32 v[5:6], s[6:7], v10, v5, 0
; SDAG-NEXT: v_mov_b32_e32 v1, v3
-; SDAG-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v7, v8, v[1:2]
+; SDAG-NEXT: v_mad_u64_u32 v[1:2], s[6:7], v7, v8, v[1:2]
; SDAG-NEXT: v_add3_u32 v6, v6, v12, v11
-; SDAG-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v9, v7, v[5:6]
-; SDAG-NEXT: v_add_co_u32_e64 v2, s[4:5], v4, v2
-; SDAG-NEXT: v_addc_co_u32_e64 v3, s[4:5], 0, 0, s[4:5]
+; SDAG-NEXT: v_mad_u64_u32 v[5:6], s[6:7], v9, v7, v[5:6]
+; SDAG-NEXT: v_add_co_u32_e32 v2, vcc, v4, v2
+; SDAG-NEXT: v_addc_co_u32_e64 v3, s[6:7], 0, 0, vcc
; SDAG-NEXT: v_mul_lo_u32 v10, v9, v13
; SDAG-NEXT: v_mul_lo_u32 v7, v9, v7
-; SDAG-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v13, v8, v[2:3]
+; SDAG-NEXT: v_mad_u64_u32 v[2:3], s[6:7], v13, v8, v[2:3]
; SDAG-NEXT: ; implicit-def: $vgpr8
; SDAG-NEXT: ; implicit-def: $vgpr9
; SDAG-NEXT: v_add3_u32 v4, v7, v6, v10
-; SDAG-NEXT: v_add_co_u32_e64 v2, s[4:5], v2, v5
-; SDAG-NEXT: v_addc_co_u32_e64 v3, s[4:5], v3, v4, s[4:5]
+; SDAG-NEXT: v_add_co_u32_e32 v2, vcc, v2, v5
+; SDAG-NEXT: v_addc_co_u32_e32 v3, vcc, v3, v4, vcc
; SDAG-NEXT: ; implicit-def: $vgpr6_vgpr7
; SDAG-NEXT: ; implicit-def: $vgpr4_vgpr5
; SDAG-NEXT: ; implicit-def: $vgpr10
; SDAG-NEXT: .LBB0_4: ; %Flow
-; SDAG-NEXT: s_andn2_saveexec_b64 s[12:13], s[12:13]
+; SDAG-NEXT: s_andn2_saveexec_b64 s[6:7], s[12:13]
; SDAG-NEXT: s_cbranch_execz .LBB0_6
; SDAG-NEXT: ; %bb.5: ; %fp-to-i-if-exp.small
-; SDAG-NEXT: v_sub_u32_e32 v2, 0x433, v6
-; SDAG-NEXT: v_lshrrev_b64 v[0:1], v2, v[4:5]
-; SDAG-NEXT: v_cmp_gt_u32_e64 s[4:5], 64, v2
-; SDAG-NEXT: v_cmp_eq_u32_e64 s[6:7], 0, v2
-; SDAG-NEXT: v_cndmask_b32_e64 v0, 0, v0, s[4:5]
-; SDAG-NEXT: v_cndmask_b32_e64 v1, 0, v1, s[4:5]
-; SDAG-NEXT: v_cndmask_b32_e64 v6, v0, v4, s[6:7]
-; SDAG-NEXT: v_cndmask_b32_e64 v5, v1, v5, s[6:7]
-; SDAG-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v6, v10, 0
+; SDAG-NEXT: v_sub_u32_e32 v0, 0x433, v6
+; SDAG-NEXT: v_lshrrev_b64 v[4:5], v0, v[4:5]
; SDAG-NEXT: v_mov_b32_e32 v2, 0
-; SDAG-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v5, v10, v[1:2]
-; SDAG-NEXT: v_mov_b32_e32 v1, v3
-; SDAG-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v6, v8, v[1:2]
-; SDAG-NEXT: v_add_co_u32_e64 v2, s[4:5], v4, v2
-; SDAG-NEXT: v_addc_co_u32_e64 v3, s[4:5], 0, 0, s[4:5]
-; SDAG-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v5, v8, v[2:3]
-; SDAG-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v9, v6, v[2:3]
-; SDAG-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v9, v6, v[3:4]
+; SDAG-NEXT: v_mad_u64_u32 v[0:1], s[12:13], v4, v10, 0
+; SDAG-NEXT: v_mad_u64_u32 v[6:7], s[12:13], v5, v10, v[1:2]
+; SDAG-NEXT: v_mov_b32_e32 v1, v6
+; SDAG-NEXT: v_mad_u64_u32 v[1:2], s[12:13], v4, v8, v[1:2]
+; SDAG-NEXT: v_add_co_u32_e32 v2, vcc, v7, v2
+; SDAG-NEXT: v_addc_co_u32_e64 v3, s[12:13], 0, 0, vcc
+; SDAG-NEXT: v_mad_u64_u32 v[2:3], s[12:13], v5, v8, v[2:3]
+; SDAG-NEXT: v_mad_u64_u32 v[2:3], s[12:13], v9, v4, v[2:3]
+; SDAG-NEXT: v_mad_u64_u32 v[3:4], s[12:13], v9, v4, v[3:4]
; SDAG-NEXT: v_mad_i32_i24 v3, v9, v5, v3
; SDAG-NEXT: .LBB0_6: ; %Flow1
-; SDAG-NEXT: s_or_b64 exec, exec, s[12:13]
+; SDAG-NEXT: s_or_b64 exec, exec, s[6:7]
; SDAG-NEXT: .LBB0_7: ; %Flow2
-; SDAG-NEXT: s_andn2_saveexec_b64 s[4:5], s[10:11]
+; SDAG-NEXT: s_andn2_saveexec_b64 s[6:7], s[10:11]
; SDAG-NEXT: ; %bb.8: ; %fp-to-i-if-saturate
; SDAG-NEXT: v_bfrev_b32_e32 v0, 1
; SDAG-NEXT: v_bfrev_b32_e32 v1, -2
-; SDAG-NEXT: v_cndmask_b32_e32 v2, v0, v1, vcc
-; SDAG-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc
+; SDAG-NEXT: v_cndmask_b32_e64 v2, v0, v1, s[4:5]
+; SDAG-NEXT: v_cndmask_b32_e64 v1, 0, -1, s[4:5]
; SDAG-NEXT: v_mov_b32_e32 v3, v2
; SDAG-NEXT: v_mov_b32_e32 v0, v1
; SDAG-NEXT: v_mov_b32_e32 v2, v1
; SDAG-NEXT: ; %bb.9: ; %Flow3
-; SDAG-NEXT: s_or_b64 exec, exec, s[4:5]
+; SDAG-NEXT: s_or_b64 exec, exec, s[6:7]
; SDAG-NEXT: .LBB0_10: ; %fp-to-i-cleanup
; SDAG-NEXT: s_or_b64 exec, exec, s[8:9]
; SDAG-NEXT: s_setpc_b64 s[30:31]
@@ -131,15 +120,15 @@ define i128 @fptosi_f64_to_i128(double %x) {
; GISEL-LABEL: fptosi_f64_to_i128:
; GISEL: ; %bb.0: ; %fp-to-i-entry
; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GISEL-NEXT: v_mov_b32_e32 v5, v1
; GISEL-NEXT: v_mov_b32_e32 v4, v0
-; GISEL-NEXT: v_lshrrev_b32_e32 v2, 20, v5
-; GISEL-NEXT: v_mov_b32_e32 v0, 0x3ff
-; GISEL-NEXT: s_mov_b64 s[4:5], 0
+; GISEL-NEXT: v_mov_b32_e32 v5, v1
+; GISEL-NEXT: v_lshrrev_b64 v[0:1], 52, v[4:5]
+; GISEL-NEXT: v_mov_b32_e32 v1, 0x3ff
; GISEL-NEXT: v_mov_b32_e32 v7, 0
-; GISEL-NEXT: v_mov_b32_e32 v1, 0
-; GISEL-NEXT: v_and_b32_e32 v6, 0x7ff, v2
-; GISEL-NEXT: v_cmp_ge_u64_e32 vcc, v[6:7], v[0:1]
+; GISEL-NEXT: v_mov_b32_e32 v2, 0
+; GISEL-NEXT: v_bfe_u32 v6, v0, 0, 11
+; GISEL-NEXT: s_mov_b64 s[4:5], 0
+; GISEL-NEXT: v_cmp_ge_u64_e32 vcc, v[6:7], v[1:2]
; GISEL-NEXT: s_mov_b64 s[6:7], s[4:5]
; GISEL-NEXT: v_mov_b32_e32 v0, s4
; GISEL-NEXT: v_mov_b32_e32 v1, s5
@@ -150,19 +139,10 @@ define i128 @fptosi_f64_to_i128(double %x) {
; GISEL-NEXT: ; %bb.1: ; %fp-to-i-if-check.saturate
; GISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffffb81, v6
; GISEL-NEXT: v_mov_b32_e32 v2, 0xffffff80
-; GISEL-NEXT: v_addc_co_u32_e64 v1, s[6:7], 0, -1, vcc
; GISEL-NEXT: v_mov_b32_e32 v3, -1
-; GISEL-NEXT: v_addc_co_u32_e64 v8, s[6:7], 0, -1, s[6:7]
-; GISEL-NEXT: v_cmp_ge_u64_e32 vcc, v[0:1], v[2:3]
-; GISEL-NEXT: v_addc_co_u32_e64 v9, s[6:7], 0, -1, s[6:7]
-; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
-; GISEL-NEXT: v_cmp_le_u64_e32 vcc, -1, v[8:9]
+; GISEL-NEXT: v_addc_co_u32_e64 v1, s[6:7], 0, -1, vcc
; GISEL-NEXT: v_cmp_lt_i64_e64 s[4:5], -1, v[4:5]
-; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
-; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, -1, v[8:9]
-; GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
-; GISEL-NEXT: v_and_b32_e32 v0, 1, v0
-; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
+; GISEL-NEXT: v_cmp_ge_u64_e32 vcc, v[0:1], v[2:3]
; GISEL-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GISEL-NEXT: s_and_saveexec_b64 s[6:7], vcc
; GISEL-NEXT: s_xor_b64 s[14:15], exec, s[6:7]
@@ -267,22 +247,16 @@ define i128 @fptosi_f64_to_i128(double %x) {
; GISEL-NEXT: s_andn2_saveexec_b64 s[8:9], s[16:17]
; GISEL-NEXT: s_cbranch_execz .LBB0_6
; GISEL-NEXT: ; %bb.5: ; %fp-to-i-if-exp.small
-; GISEL-NEXT: v_sub_co_u32_e32 v2, vcc, 0x433, v6
-; GISEL-NEXT: v_lshrrev_b64 v[0:1], v2, v[4:5]
-; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v2
-; GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc
-; GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
-; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2
-; GISEL-NEXT: v_cndmask_b32_e32 v6, v0, v4, vcc
-; GISEL-NEXT: v_cndmask_b32_e32 v7, v1, v5, vcc
-; GISEL-NEXT: v_mad_u64_u32 v[4:5], s[6:7], v6, v9, 0
-; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[6:7], v6, v8, 0
-; GISEL-NEXT: v_mad_u64_u32 v[2:3], s[6:7], v7, v9, v[4:5]
-; GISEL-NEXT: v_mul_lo_u32 v10, v7, v9
-; GISEL-NEXT: v_mad_u64_u32 v[4:5], vcc, v6, v9, v[1:2]
-; GISEL-NEXT: v_mul_lo_u32 v6, v6, v9
-; GISEL-NEXT: v_mad_u64_u32 v[1:2], s[6:7], v7, v8, v[4:5]
-; GISEL-NEXT: v_addc_co_u32_e64 v3, s[6:7], v3, v6, s[6:7]
+; GISEL-NEXT: v_sub_co_u32_e32 v0, vcc, 0x433, v6
+; GISEL-NEXT: v_lshrrev_b64 v[4:5], v0, v[4:5]
+; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[6:7], v4, v9, 0
+; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[6:7], v4, v8, 0
+; GISEL-NEXT: v_mad_u64_u32 v[2:3], s[6:7], v5, v9, v[6:7]
+; GISEL-NEXT: v_mul_lo_u32 v10, v5, v9
+; GISEL-NEXT: v_mad_u64_u32 v[6:7], vcc, v4, v9, v[1:2]
+; GISEL-NEXT: v_mul_lo_u32 v4, v4, v9
+; GISEL-NEXT: v_mad_u64_u32 v[1:2], s[6:7], v5, v8, v[6:7]
+; GISEL-NEXT: v_addc_co_u32_e64 v3, s[6:7], v3, v4, s[6:7]
; GISEL-NEXT: v_addc_co_u32_e32 v3, vcc, v3, v10, vcc
; GISEL-NEXT: .LBB0_6: ; %Flow1
; GISEL-NEXT: s_or_b64 exec, exec, s[8:9]
@@ -376,8 +350,8 @@ define i128 @fptoui_f64_to_i128(double %x) {
; SDAG-NEXT: v_mov_b32_e32 v7, 0
; SDAG-NEXT: s_mov_b64 s[4:5], 0x3fe
; SDAG-NEXT: v_mov_b32_e32 v4, v0
-; SDAG-NEXT: v_mov_b32_e32 v0, 0
; SDAG-NEXT: v_cmp_lt_u64_e32 vcc, s[4:5], v[6:7]
+; SDAG-NEXT: v_mov_b32_e32 v0, 0
; SDAG-NEXT: v_mov_b32_e32 v2, 0
; SDAG-NEXT: v_mov_b32_e32 v1, 0
; SDAG-NEXT: v_mov_b32_e32 v3, 0
@@ -385,33 +359,28 @@ define i128 @fptoui_f64_to_i128(double %x) {
; SDAG-NEXT: s_cbranch_execz .LBB1_10
; SDAG-NEXT: ; %bb.1: ; %fp-to-i-if-check.saturate
; SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffffb81, v6
-; SDAG-NEXT: v_mov_b32_e32 v1, -1
-; SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
-; SDAG-NEXT: v_addc_co_u32_e32 v2, vcc, -1, v7, vcc
+; SDAG-NEXT: v_addc_co_u32_e64 v1, s[6:7], 0, -1, vcc
; SDAG-NEXT: s_movk_i32 s6, 0xff7f
-; SDAG-NEXT: v_addc_co_u32_e32 v3, vcc, -1, v7, vcc
; SDAG-NEXT: s_mov_b32 s7, -1
-; SDAG-NEXT: v_cmp_eq_u64_e64 s[4:5], -1, v[2:3]
-; SDAG-NEXT: v_cmp_lt_u64_e64 s[6:7], s[6:7], v[0:1]
-; SDAG-NEXT: v_cmp_lt_i64_e32 vcc, -1, v[4:5]
-; SDAG-NEXT: s_and_b64 s[4:5], s[4:5], s[6:7]
+; SDAG-NEXT: v_cmp_lt_i64_e64 s[4:5], -1, v[4:5]
+; SDAG-NEXT: v_cmp_lt_u64_e32 vcc, s[6:7], v[0:1]
; SDAG-NEXT: ; implicit-def: $vgpr0_vgpr1
; SDAG-NEXT: ; implicit-def: $vgpr2_vgpr3
-; SDAG-NEXT: s_and_saveexec_b64 s[6:7], s[4:5]
+; SDAG-NEXT: s_and_saveexec_b64 s[6:7], vcc
; SDAG-NEXT: s_xor_b64 s[10:11], exec, s[6:7]
; SDAG-NEXT: s_cbranch_execz .LBB1_7
; SDAG-NEXT: ; %bb.2: ; %fp-to-i-if-check.exp.size
-; SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
-; SDAG-NEXT: v_add_co_u32_e64 v9, s[4:5], -1, v0
-; SDAG-NEXT: s_mov_b64 s[4:5], 0x432
+; SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
+; SDAG-NEXT: v_add_co_u32_e32 v9, vcc, -1, v0
+; SDAG-NEXT: s_mov_b64 s[6:7], 0x432
; SDAG-NEXT: v_and_b32_e32 v0, 0xfffff, v5
-; SDAG-NEXT: v_cmp_lt_u64_e64 s[4:5], s[4:5], v[6:7]
-; SDAG-NEXT: v_cndmask_b32_e64 v8, -1, 0, vcc
-; SDAG-NEXT: v_cndmask_b32_e64 v10, -1, 1, vcc
+; SDAG-NEXT: v_cmp_lt_u64_e32 vcc, s[6:7], v[6:7]
+; SDAG-NEXT: v_cndmask_b32_e64 v8, -1, 0, s[4:5]
+; SDAG-NEXT: v_cndmask_b32_e64 v10, -1, 1, s[4:5]
; SDAG-NEXT: v_or_b32_e32 v5, 0x100000, v0
; SDAG-NEXT: ; implicit-def: $vgpr0_vgpr1
; SDAG-NEXT: ; implicit-def: $vgpr2_vgpr3
-; SDAG-NEXT: s_and_saveexec_b64 s[6:7], s[4:5]
+; SDAG-NEXT: s_and_saveexec_b64 s[6:7], vcc
; SDAG-NEXT: s_xor_b64 s[12:13], exec, s[6:7]
; SDAG-NEXT: s_cbranch_execz .LBB1_4
; SDAG-NEXT: ; %bb.3: ; %fp-to-i-if-exp.large
@@ -420,75 +389,69 @@ define i128 @fptoui_f64_to_i128(double %x) {
; SDAG-NEXT: v_add_u32_e32 v3, 0xfffffbcd, v6
; SDAG-NEXT: v_lshrrev_b64 v[0:1], v0, v[4:5]
; SDAG-NEXT: v_lshlrev_b64 v[6:7], v2, v[4:5]
-; SDAG-NEXT: v_cmp_gt_u32_e64 s[4:5], 64, v3
+; SDAG-NEXT: v_cmp_gt_u32_e32 vcc, 64, v3
; SDAG-NEXT: v_cmp_ne_u32_e64 s[6:7], 0, v3
; SDAG-NEXT: v_lshlrev_b64 v[3:4], v3, v[4:5]
-; SDAG-NEXT: v_cndmask_b32_e64 v1, v7, v1, s[4:5]
+; SDAG-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc
; SDAG-NEXT: v_cndmask_b32_e64 v1, 0, v1, s[6:7]
-; SDAG-NEXT: v_cndmask_b32_e64 v0, v6, v0, s[4:5]
-; SDAG-NEXT: v_cndmask_b32_e64 v7, 0, v3, s[4:5]
+; SDAG-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc
+; SDAG-NEXT: v_cndmask_b32_e32 v7, 0, v3, vcc
; SDAG-NEXT: v_cndmask_b32_e64 v5, 0, v0, s[6:7]
; SDAG-NEXT: v_mul_lo_u32 v12, v10, v1
; SDAG-NEXT: v_mad_u64_u32 v[0:1], s[6:7], v7, v10, 0
; SDAG-NEXT: v_mov_b32_e32 v2, 0
-; SDAG-NEXT: v_cndmask_b32_e64 v13, 0, v4, s[4:5]
-; SDAG-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v13, v10, v[1:2]
+; SDAG-NEXT: v_cndmask_b32_e32 v13, 0, v4, vcc
+; SDAG-NEXT: v_mad_u64_u32 v[3:4], s[6:7], v13, v10, v[1:2]
; SDAG-NEXT: v_mul_lo_u32 v11, v8, v5
; SDAG-NEXT: v_mad_u64_u32 v[5:6], s[6:7], v10, v5, 0
; SDAG-NEXT: v_mov_b32_e32 v1, v3
-; SDAG-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v7, v8, v[1:2]
+; SDAG-NEXT: v_mad_u64_u32 v[1:2], s[6:7], v7, v8, v[1:2]
; SDAG-NEXT: v_add3_u32 v6, v6, v12, v11
-; SDAG-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v9, v7, v[5:6]
-; SDAG-NEXT: v_add_co_u32_e64 v2, s[4:5], v4, v2
-; SDAG-NEXT: v_addc_co_u32_e64 v3, s[4:5], 0, 0, s[4:5]
+; SDAG-NEXT: v_mad_u64_u32 v[5:6], s[6:7], v9, v7, v[5:6]
+; SDAG-NEXT: v_add_co_u32_e32 v2, vcc, v4, v2
+; SDAG-NEXT: v_addc_co_u32_e64 v3, s[6:7], 0, 0, vcc
; SDAG-NEXT: v_mul_lo_u32 v10, v9, v13
; SDAG-NEXT: v_mul_lo_u32 v7, v9, v7
-; SDAG-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v13, v8, v[2:3]
+; SDAG-NEXT: v_mad_u64_u32 v[2:3], s[6:7], v13, v8, v[2:3]
; SDAG-NEXT: ; implicit-def: $vgpr8
; SDAG-NEXT: ; implicit-def: $vgpr9
; SDAG-NEXT: v_add3_u32 v4, v7, v6, v10
-; SDAG-NEXT: v_add_co_u32_e64 v2, s[4:5], v2, v5
-; SDAG-NEXT: v_addc_co_u32_e64 v3, s[4:5], v3, v4, s[4:5]
+; SDAG-NEXT: v_add_co_u32_e32 v2, vcc, v2, v5
+; SDAG-NEXT: v_addc_co_u32_e32 v3, vcc, v3, v4, vcc
; SDAG-NEXT: ; implicit-def: $vgpr6_vgpr7
; SDAG-NEXT: ; implicit-def: $vgpr4_vgpr5
; SDAG-NEXT: ; implicit-def: $vgpr10
; SDAG-NEXT: .LBB1_4: ; %Flow
-; SDAG-NEXT: s_andn2_saveexec_b64 s[12:13], s[12:13]
+; SDAG-NEXT: s_andn2_saveexec_b64 s[6:7], s[12:13]
; SDAG-NEXT: s_cbranch_execz .LBB1_6
; SDAG-NEXT: ; %bb.5: ; %fp-to-i-if-exp.small
-; SDAG-NEXT: v_sub_u32_e32 v2, 0x433, v6
-; SDAG-NEXT: v_lshrrev_b64 v[0:1], v2, v[4:5]
-; SDAG-NEXT: v_cmp_gt_u32_e64 s[4:5], 64, v2
-; SDAG-NEXT: v_cmp_eq_u32_e64 s[6:7], 0, v2
-; SDAG-NEXT: v_cndmask_b32_e64 v0, 0, v0, s[4:5]
-; SDAG-NEXT: v_cndmask_b32_e64 v1, 0, v1, s[4:5]
-; SDAG-NEXT: v_cndmask_b32_e64 v6, v0, v4, s[6:7]
-; SDAG-NEXT: v_cndmask_b32_e64 v5, v1, v5, s[6:7]
-; SDAG-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v6, v10, 0
+; SDAG-NEXT: v_sub_u32_e32 v0, 0x433, v6
+; SDAG-NEXT: v_lshrrev_b64 v[4:5], v0, v[4:5]
; SDAG-NEXT: v_mov_b32_e32 v2, 0
-; SDAG-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v5, v10, v[1:2]
-; SDAG-NEXT: v_mov_b32_e32 v1, v3
-; SDAG-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v6, v8, v[1:2]
-; SDAG-NEXT: v_add_co_u32_e64 v2, s[4:5], v4, v2
-; SDAG-NEXT: v_addc_co_u32_e64 v3, s[4:5], 0, 0, s[4:5]
-; SDAG-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v5, v8, v[2:3]
-; SDAG-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v9, v6, v[2:3]
-; SDAG-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v9, v6, v[3:4]
+; SDAG-NEXT: v_mad_u64_u32 v[0:1], s[12:13], v4, v10, 0
+; SDAG-NEXT: v_mad_u64_u32 v[6:7], s[12:13], v5, v10, v[1:2]
+; SDAG-NEXT: v_mov_b32_e32 v1, v6
+; SDAG-NEXT: v_mad_u64_u32 v[1:2], s[12:13], v4, v8, v[1:2]
+; SDAG-NEXT: v_add_co_u32_e32 v2, vcc, v7, v2
+; SDAG-NEXT: v_addc_co_u32_e64 v3, s[12:13], 0, 0, vcc
+; SDAG-NEXT: v_mad_u64_u32 v[2:3], s[12:13], v5, v8, v[2:3]
+; SDAG-NEXT: v_mad_u64_u32 v[2:3], s[12:13], v9, v4, v[2:3]
+; SDAG-NEXT: v_mad_u64_u32 v[3:4], s[12:13], v9, v4, v[3:4]
; SDAG-NEXT: v_mad_i32_i24 v3, v9, v5, v3
; SDAG-NEXT: .LBB1_6: ; %Flow1
-; SDAG-NEXT: s_or_b64 exec, exec, s[12:13]
+; SDAG-NEXT: s_or_b64 exec, exec, s[6:7]
; SDAG-NEXT: .LBB1_7: ; %Flow2
-; SDAG-NEXT: s_andn2_saveexec_b64 s[4:5], s[10:11]
+; SDAG-NEXT: s_andn2_saveexec_b64 s[6:7], s[10:11]
; SDAG-NEXT: ; %bb.8: ; %fp-to-i-if-saturate
; SDAG-NEXT: v_bfrev_b32_e32 v0, 1
; SDAG-NEXT: v_bfrev_b32_e32 v1, -2
-; SDAG-NEXT: v_cndmask_b32_e32 v2, v0, v1, vcc
-; SDAG-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc
+; SDAG-NEXT: v_cndmask_b32_e64 v2, v0, v1, s[4:5]
+; SDAG-NEXT: v_cndmask_b32_e64 v1, 0, -1, s[4:5]
; SDAG-NEXT: v_mov_b32_e32 v3, v2
; SDAG-NEXT: v_mov_b32_e32 v0, v1
; SDAG-NEXT: v_mov_b32_e32 v2, v1
; SDAG-NEXT: ; %bb.9: ; %Flow3
-; SDAG-NEXT: s_or_b64 exec, exec, s[4:5]
+; SDAG-NEXT: s_or_b64 exec, exec, s[6:7]
; SDAG-NEXT: .LBB1_10: ; %fp-to-i-cleanup
; SDAG-NEXT: s_or_b64 exec, exec, s[8:9]
; SDAG-NEXT: s_setpc_b64 s[30:31]
@@ -496,15 +459,15 @@ define i128 @fptoui_f64_to_i128(double %x) {
; GISEL-LABEL: fptoui_f64_to_i128:
; GISEL: ; %bb.0: ; %fp-to-i-entry
; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GISEL-NEXT: v_mov_b32_e32 v5, v1
; GISEL-NEXT: v_mov_b32_e32 v4, v0
-; GISEL-NEXT: v_lshrrev_b32_e32 v2, 20, v5
-; GISEL-NEXT: v_mov_b32_e32 v0, 0x3ff
-; GISEL-NEXT: s_mov_b64 s[4:5], 0
+; GISEL-NEXT: v_mov_b32_e32 v5, v1
+; GISEL-NEXT: v_lshrrev_b64 v[0:1], 52, v[4:5]
+; GISEL-NEXT: v_mov_b32_e32 v1, 0x3ff
; GISEL-NEXT: v_mov_b32_e32 v7, 0
-; GISEL-NEXT: v_mov_b32_e32 v1, 0
-; GISEL-NEXT: v_and_b32_e32 v6, 0x7ff, v2
-; GISEL-NEXT: v_cmp_ge_u64_e32 vcc, v[6:7], v[0:1]
+; GISEL-NEXT: v_mov_b32_e32 v2, 0
+; GISEL-NEXT: v_bfe_u32 v6, v0, 0, 11
+; GISEL-NEXT: s_mov_b64 s[4:5], 0
+; GISEL-NEXT: v_cmp_ge_u64_e32 vcc, v[6:7], v[1:2]
; GISEL-NEXT: s_mov_b64 s[6:7], s[4:5]
; GISEL-NEXT: v_mov_b32_e32 v0, s4
; GISEL-NEXT: v_mov_b32_e32 v1, s5
@@ -515,19 +478,10 @@ define i128 @fptoui_f64_to_i128(double %x) {
; GISEL-NEXT: ; %bb.1: ; %fp-to-i-if-check.saturate
; GISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffffb81, v6
; GISEL-NEXT: v_mov_b32_e32 v2, 0xffffff80
-; GISEL-NEXT: v_addc_co_u32_e64 v1, s[6:7], 0, -1, vcc
; GISEL-NEXT: v_mov_b32_e32 v3, -1
-; GISEL-NEXT: v_addc_co_u32_e64 v8, s[6:7], 0, -1, s[6:7]
-; GISEL-NEXT: v_cmp_ge_u64_e32 vcc, v[0:1], v[2:3]
-; GISEL-NEXT: v_addc_co_u32_e64 v9, s[6:7], 0, -1, s[6:7]
-; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
-; GISEL-NEXT: v_cmp_le_u64_e32 vcc, -1, v[8:9]
+; GISEL-NEXT: v_addc_co_u32_e64 v1, s[6:7], 0, -1, vcc
; GISEL-NEXT: v_cmp_lt_i64_e64 s[4:5], -1, v[4:5]
-; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
-; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, -1, v[8:9]
-; GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
-; GISEL-NEXT: v_and_b32_e32 v0, 1, v0
-; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
+; GISEL-NEXT: v_cmp_ge_u64_e32 vcc, v[0:1], v[2:3]
; GISEL-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GISEL-NEXT: s_and_saveexec_b64 s[6:7], vcc
; GISEL-NEXT: s_xor_b64 s[14:15], exec, s[6:7]
@@ -632,22 +586,16 @@ define i128 @fptoui_f64_to_i128(double %x) {
; GISEL-NEXT: s_andn2_saveexec_b64 s[8:9], s[16:17]
; GISEL-NEXT: s_cbranch_execz .LBB1_6
; GISEL-NEXT: ; %bb.5: ; %fp-to-i-if-exp.small
-; GISEL-NEXT: v_sub_co_u32_e32 v2, vcc, 0x433, v6
-; GISEL-NEXT: v_lshrrev_b64 v[0:1], v2, v[4:5]
-; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v2
-; GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc
-; GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
-; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2
-; GISEL-NEXT: v_cndmask_b32_e32 v6, v0, v4, vcc
-; GISEL-NEXT: v_cndmask_b32_e32 v7, v1, v5, vcc
-; GISEL-NEXT: v_mad_u64_u32 v[4:5], s[6:7], v6, v9, 0
-; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[6:7], v6, v8, 0
-; GISEL-NEXT: v_mad_u64_u32 v[2:3], s[6:7], v7, v9, v[4:5]
-; GISEL-NEXT: v_mul_lo_u32 v10, v7, v9
-; GISEL-NEXT: v_mad_u64_u32 v[4:5], vcc, v6, v9, v[1:2]
-; GISEL-NEXT: v_mul_lo_u32 v6, v6, v9
-; GISEL-NEXT: v_mad_u64_u32 v[1:2], s[6:7], v7, v8, v[4:5]
-; GISEL-NEXT: v_addc_co_u32_e64 v3, s[6:7], v3, v6, s[6:7]
+; GISEL-NEXT: v_sub_co_u32_e32 v0, vcc, 0x433, v6
+; GISEL-NEXT: v_lshrrev_b64 v[4:5], v0, v[4:5]
+; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[6:7], v4, v9, 0
+; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[6:7], v4, v8, 0
+; GISEL-NEXT: v_mad_u64_u32 v[2:3], s[6:7], v5, v9, v[6:7]
+; GISEL-NEXT: v_mul_lo_u32 v10, v5, v9
+; GISEL-NEXT: v_mad_u64_u32 v[6:7], vcc, v4, v9, v[1:2]
+; GISEL-NEXT: v_mul_lo_u32 v4, v4, v9
+; GISEL-NEXT: v_mad_u64_u32 v[1:2], s[6:7], v5, v8, v[6:7]
+; GISEL-NEXT: v_addc_co_u32_e64 v3, s[6:7], v3, v4, s[6:7]
; GISEL-NEXT: v_addc_co_u32_e32 v3, vcc, v3, v10, vcc
; GISEL-NEXT: .LBB1_6: ; %Flow1
; GISEL-NEXT: s_or_b64 exec, exec, s[8:9]
@@ -737,28 +685,20 @@ define i128 @fptosi_f32_to_i128(float %x) {
; SDAG: ; %bb.0: ; %fp-to-i-entry
; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT: v_mov_b32_e32 v4, v0
-; SDAG-NEXT: v_mov_b32_e32 v0, 0
-; SDAG-NEXT: v_bfe_u32 v5, v4, 23, 8
+; SDAG-NEXT: v_bfe_u32 v7, v4, 23, 8
; SDAG-NEXT: s_movk_i32 s4, 0x7e
+; SDAG-NEXT: v_mov_b32_e32 v0, 0
; SDAG-NEXT: v_mov_b32_e32 v2, 0
; SDAG-NEXT: v_mov_b32_e32 v1, 0
; SDAG-NEXT: v_mov_b32_e32 v3, 0
-; SDAG-NEXT: v_cmp_lt_u32_e32 vcc, s4, v5
+; SDAG-NEXT: v_cmp_lt_u32_e32 vcc, s4, v7
; SDAG-NEXT: s_and_saveexec_b64 s[8:9], vcc
; SDAG-NEXT: s_cbranch_execz .LBB2_10
; SDAG-NEXT: ; %bb.1: ; %fp-to-i-if-check.saturate
-; SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0xffffff01, v5
-; SDAG-NEXT: v_mov_b32_e32 v1, -1
-; SDAG-NEXT: v_mov_b32_e32 v6, 0
-; SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
-; SDAG-NEXT: v_addc_co_u32_e32 v2, vcc, -1, v6, vcc
-; SDAG-NEXT: s_movk_i32 s6, 0xff7f
-; SDAG-NEXT: v_addc_co_u32_e32 v3, vcc, -1, v6, vcc
-; SDAG-NEXT: s_mov_b32 s7, -1
-; SDAG-NEXT: v_cmp_eq_u64_e64 s[4:5], -1, v[2:3]
-; SDAG-NEXT: v_cmp_lt_u64_e64 s[6:7], s[6:7], v[0:1]
+; SDAG-NEXT: v_add_u32_e32 v0, 0xffffff01, v7
+; SDAG-NEXT: s_movk_i32 s4, 0xff7f
; SDAG-NEXT: v_cmp_lt_i32_e32 vcc, -1, v4
-; SDAG-NEXT: s_and_b64 s[4:5], s[4:5], s[6:7]
+; SDAG-NEXT: v_cmp_lt_u32_e64 s[4:5], s4, v0
; SDAG-NEXT: ; implicit-def: $vgpr0_vgpr1
; SDAG-NEXT: ; implicit-def: $vgpr2_vgpr3
; SDAG-NEXT: s_and_saveexec_b64 s[6:7], s[4:5]
@@ -766,76 +706,68 @@ define i128 @fptosi_f32_to_i128(float %x) {
; SDAG-NEXT: s_cbranch_execz .LBB2_7
; SDAG-NEXT: ; %bb.2: ; %fp-to-i-if-check.exp.size
; SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
-; SDAG-NEXT: v_add_co_u32_e64 v10, s[4:5], -1, v0
-; SDAG-NEXT: s_mov_b64 s[4:5], 0x95
+; SDAG-NEXT: v_add_co_u32_e64 v6, s[4:5], -1, v0
; SDAG-NEXT: v_and_b32_e32 v0, 0x7fffff, v4
-; SDAG-NEXT: v_cmp_lt_u64_e64 s[4:5], s[4:5], v[5:6]
-; SDAG-NEXT: v_cndmask_b32_e64 v9, -1, 0, vcc
-; SDAG-NEXT: v_cndmask_b32_e64 v11, -1, 1, vcc
-; SDAG-NEXT: v_or_b32_e32 v7, 0x800000, v0
-; SDAG-NEXT: v_mov_b32_e32 v8, v6
-; SDAG-NEXT: ; implicit-def: $vgpr0_vgpr1
+; SDAG-NEXT: s_movk_i32 s4, 0x95
+; SDAG-NEXT: v_cndmask_b32_e64 v5, -1, 0, vcc
+; SDAG-NEXT: v_cndmask_b32_e64 v8, -1, 1, vcc
+; SDAG-NEXT: v_or_b32_e32 v1, 0x800000, v0
+; SDAG-NEXT: v_cmp_lt_u32_e64 s[4:5], s4, v7
; SDAG-NEXT: ; implicit-def: $vgpr2_vgpr3
; SDAG-NEXT: s_and_saveexec_b64 s[6:7], s[4:5]
; SDAG-NEXT: s_xor_b64 s[12:13], exec, s[6:7]
; SDAG-NEXT: s_cbranch_execz .LBB2_4
; SDAG-NEXT: ; %bb.3: ; %fp-to-i-if-exp.large
-; SDAG-NEXT: v_sub_u32_e32 v0, 0xd6, v5
-; SDAG-NEXT: v_add_u32_e32 v2, 0xffffff2a, v5
-; SDAG-NEXT: v_add_u32_e32 v4, 0xffffff6a, v5
-; SDAG-NEXT: v_lshrrev_b64 v[0:1], v0, v[7:8]
-; SDAG-NEXT: v_lshlrev_b64 v[2:3], v2, v[7:8]
-; SDAG-NEXT: v_cmp_gt_u32_e64 s[4:5], 64, v4
-; SDAG-NEXT: v_cndmask_b32_e64 v1, v3, v1, s[4:5]
-; SDAG-NEXT: v_cmp_ne_u32_e64 s[6:7], 0, v4
-; SDAG-NEXT: v_cndmask_b32_e64 v3, 0, v1, s[6:7]
-; SDAG-NEXT: v_cndmask_b32_e64 v2, v2, v0, s[4:5]
-; SDAG-NEXT: v_lshlrev_b64 v[0:1], v4, v[7:8]
-; SDAG-NEXT: v_cndmask_b32_e64 v2, 0, v2, s[6:7]
-; SDAG-NEXT: v_cndmask_b32_e64 v7, 0, v0, s[4:5]
-; SDAG-NEXT: v_mad_u64_u32 v[4:5], s[6:7], v7, v11, 0
-; SDAG-NEXT: v_cndmask_b32_e64 v13, 0, v1, s[4:5]
-; SDAG-NEXT: v_mul_lo_u32 v8, v9, v2
-; SDAG-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v13, v11, v[5:6]
-; SDAG-NEXT: v_mul_lo_u32 v12, v11, v3
-; SDAG-NEXT: v_mad_u64_u32 v[2:3], s[6:7], v11, v2, 0
-; SDAG-NEXT: v_mov_b32_e32 v5, v0
-; SDAG-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v7, v9, v[5:6]
-; SDAG-NEXT: v_add3_u32 v3, v3, v12, v8
-; SDAG-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v10, v7, v[2:3]
-; SDAG-NEXT: v_add_co_u32_e64 v0, s[4:5], v1, v6
-; SDAG-NEXT: v_addc_co_u32_e64 v1, s[4:5], 0, 0, s[4:5]
-; SDAG-NEXT: v_mul_lo_u32 v8, v10, v13
-; SDAG-NEXT: v_mul_lo_u32 v7, v10, v7
-; SDAG-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v13, v9, v[0:1]
-; SDAG-NEXT: ; implicit-def: $vgpr11
-; SDAG-NEXT: ; implicit-def: $vgpr9
-; SDAG-NEXT: ; implicit-def: $vgpr10
-; SDAG-NEXT: v_add3_u32 v3, v7, v3, v8
-; SDAG-NEXT: v_add_co_u32_e64 v2, s[4:5], v0, v2
-; SDAG-NEXT: v_addc_co_u32_e64 v3, s[4:5], v1, v3, s[4:5]
-; SDAG-NEXT: v_mov_b32_e32 v0, v4
-; SDAG-NEXT: v_mov_b32_e32 v1, v5
-; SDAG-NEXT: ; implicit-def: $vgpr5_vgpr6
-; SDAG-NEXT: ; implicit-def: $vgpr7_vgpr8
+; SDAG-NEXT: v_add_u32_e32 v0, 0xffffff6a, v7
+; SDAG-NEXT: v_mov_b32_e32 v2, 0
+; SDAG-NEXT: v_sub_u32_e32 v3, 0xd6, v7
+; SDAG-NEXT: v_add_u32_e32 v7, 0xffffff2a, v7
+; SDAG-NEXT: v_lshrrev_b64 v[3:4], v3, v[1:2]
+; SDAG-NEXT: v_lshlrev_b64 v[9:10], v7, v[1:2]
+; SDAG-NEXT: v_cmp_gt_u32_e64 s[4:5], 64, v0
+; SDAG-NEXT: v_cndmask_b32_e64 v4, v10, v4, s[4:5]
+; SDAG-NEXT: v_cmp_ne_u32_e64 s[6:7], 0, v0
+; SDAG-NEXT: v_cndmask_b32_e64 v7, 0, v4, s[6:7]
+; SDAG-NEXT: v_cndmask_b32_e64 v9, v9, v3, s[4:5]
+; SDAG-NEXT: v_lshlrev_b64 v[3:4], v0, v[1:2]
+; SDAG-NEXT: v_cndmask_b32_e64 v9, 0, v9, s[6:7]
+; SDAG-NEXT: v_cndmask_b32_e64 v11, 0, v3, s[4:5]
+; SDAG-NEXT: v_mad_u64_u32 v[0:1], s[6:7], v11, v8, 0
+; SDAG-NEXT: v_cndmask_b32_e64 v13, 0, v4, s[4:5]
+; SDAG-NEXT: v_mul_lo_u32 v12, v5, v9
+; SDAG-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v13, v8, v[1:2]
+; SDAG-NEXT: v_mul_lo_u32 v7, v8, v7
+; SDAG-NEXT: v_mad_u64_u32 v[9:10], s[6:7], v8, v9, 0
+; SDAG-NEXT: v_mov_b32_e32 v1, v3
+; SDAG-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v11, v5, v[1:2]
+; SDAG-NEXT: v_add3_u32 v10, v10, v7, v12
+; SDAG-NEXT: v_mad_u64_u32 v[7:8], s[4:5], v6, v11, v[9:10]
+; SDAG-NEXT: v_add_co_u32_e64 v2, s[4:5], v4, v2
+; SDAG-NEXT: v_addc_co_u32_e64 v3, s[4:5], 0, 0, s[4:5]
+; SDAG-NEXT: v_mul_lo_u32 v9, v6, v13
+; SDAG-NEXT: v_mul_lo_u32 v6, v6, v11
+; SDAG-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v13, v5, v[2:3]
+; SDAG-NEXT: ; implicit-def: $vgpr5
+; SDAG-NEXT: v_add3_u32 v4, v6, v8, v9
+; SDAG-NEXT: v_add_co_u32_e64 v2, s[4:5], v2, v7
+; SDAG-NEXT: v_addc_co_u32_e64 v3, s[4:5], v3, v4, s[4:5]
+; SDAG-NEXT: ; implicit-def: $vgpr7
+; SDAG-NEXT: ; implicit-def: $vgpr8
+; SDAG-NEXT: ; implicit-def: $vgpr6
; SDAG-NEXT: .LBB2_4: ; %Flow
-; SDAG-NEXT: s_andn2_saveexec_b64 s[6:7], s[12:13]
+; SDAG-NEXT: s_andn2_saveexec_b64 s[4:5], s[12:13]
; SDAG-NEXT: s_cbranch_execz .LBB2_6
; SDAG-NEXT: ; %bb.5: ; %fp-to-i-if-exp.small
-; SDAG-NEXT: v_sub_u32_e32 v2, 0x96, v5
-; SDAG-NEXT: v_lshrrev_b64 v[0:1], v2, v[7:8]
-; SDAG-NEXT: v_cmp_gt_u32_e64 s[4:5], 64, v2
-; SDAG-NEXT: v_cndmask_b32_e64 v0, 0, v0, s[4:5]
-; SDAG-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v2
-; SDAG-NEXT: v_cndmask_b32_e64 v3, v0, v7, s[4:5]
-; SDAG-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v3, v11, 0
+; SDAG-NEXT: v_sub_u32_e32 v0, 0x96, v7
+; SDAG-NEXT: v_lshrrev_b32_e32 v3, v0, v1
+; SDAG-NEXT: v_mad_u64_u32 v[0:1], s[6:7], v3, v8, 0
; SDAG-NEXT: v_mov_b32_e32 v2, 0
-; SDAG-NEXT: v_mov_b32_e32 v6, v2
-; SDAG-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v3, v9, v[1:2]
-; SDAG-NEXT: v_mad_i64_i32 v[2:3], s[4:5], v10, v3, v[5:6]
-; SDAG-NEXT: v_mov_b32_e32 v1, v4
+; SDAG-NEXT: v_mov_b32_e32 v9, v2
+; SDAG-NEXT: v_mad_u64_u32 v[7:8], s[6:7], v3, v5, v[1:2]
+; SDAG-NEXT: v_mad_i64_i32 v[2:3], s[6:7], v6, v3, v[8:9]
+; SDAG-NEXT: v_mov_b32_e32 v1, v7
; SDAG-NEXT: .LBB2_6: ; %Flow1
-; SDAG-NEXT: s_or_b64 exec, exec, s[6:7]
+; SDAG-NEXT: s_or_b64 exec, exec, s[4:5]
; SDAG-NEXT: .LBB2_7: ; %Flow2
; SDAG-NEXT: s_andn2_saveexec_b64 s[4:5], s[10:11]
; SDAG-NEXT: ; %bb.8: ; %fp-to-i-if-saturate
@@ -856,14 +788,10 @@ define i128 @fptosi_f32_to_i128(float %x) {
; GISEL: ; %bb.0: ; %fp-to-i-entry
; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT: v_mov_b32_e32 v4, v0
-; GISEL-NEXT: v_mov_b32_e32 v5, 0
-; GISEL-NEXT: v_lshrrev_b64 v[0:1], 23, v[4:5]
-; GISEL-NEXT: s_mov_b64 s[4:5], 0
-; GISEL-NEXT: v_bfe_u32 v6, v0, 0, 8
+; GISEL-NEXT: v_bfe_u32 v6, v4, 23, 8
; GISEL-NEXT: v_mov_b32_e32 v0, 0x7f
-; GISEL-NEXT: v_mov_b32_e32 v1, 0
-; GISEL-NEXT: v_mov_b32_e32 v7, v5
-; GISEL-NEXT: v_cmp_ge_u64_e32 vcc, v[6:7], v[0:1]
+; GISEL-NEXT: s_mov_b64 s[4:5], 0
+; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v6, v0
; GISEL-NEXT: s_mov_b64 s[6:7], s[4:5]
; GISEL-NEXT: v_mov_b32_e32 v0, s4
; GISEL-NEXT: v_mov_b32_e32 v1, s5
@@ -872,146 +800,132 @@ define i128 @fptosi_f32_to_i128(float %x) {
; GISEL-NEXT: s_and_saveexec_b64 s[12:13], vcc
; GISEL-NEXT: s_cbranch_execz .LBB2_10
; GISEL-NEXT: ; %bb.1: ; %fp-to-i-if-check.saturate
-; GISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0xffffff01, v6
-; GISEL-NEXT: v_mov_b32_e32 v2, 0xffffff80
-; GISEL-NEXT: v_addc_co_u32_e64 v1, s[6:7], 0, -1, vcc
-; GISEL-NEXT: v_mov_b32_e32 v3, -1
-; GISEL-NEXT: v_addc_co_u32_e64 v8, s[6:7], 0, -1, s[6:7]
-; GISEL-NEXT: v_cmp_ge_u64_e32 vcc, v[0:1], v[2:3]
-; GISEL-NEXT: v_addc_co_u32_e64 v9, s[6:7], 0, -1, s[6:7]
-; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
-; GISEL-NEXT: v_cmp_le_u64_e32 vcc, -1, v[8:9]
-; GISEL-NEXT: v_cmp_lt_i32_e64 s[4:5], -1, v4
-; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
-; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, -1, v[8:9]
-; GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
-; GISEL-NEXT: v_and_b32_e32 v0, 1, v0
-; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
+; GISEL-NEXT: v_add_u32_e32 v0, 0xffffff01, v6
+; GISEL-NEXT: v_mov_b32_e32 v1, 0xffffff80
+; GISEL-NEXT: v_cmp_lt_i32_e32 vcc, -1, v4
+; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v1
; GISEL-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
-; GISEL-NEXT: s_and_saveexec_b64 s[6:7], vcc
+; GISEL-NEXT: s_and_saveexec_b64 s[6:7], s[4:5]
; GISEL-NEXT: s_xor_b64 s[14:15], exec, s[6:7]
; GISEL-NEXT: s_cbranch_execz .LBB2_7
; GISEL-NEXT: ; %bb.2: ; %fp-to-i-if-check.exp.size
-; GISEL-NEXT: s_xor_b64 s[6:7], s[4:5], -1
-; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, -1, s[6:7]
+; GISEL-NEXT: s_xor_b64 s[4:5], vcc, -1
+; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, -1, s[4:5]
; GISEL-NEXT: v_and_b32_e32 v0, 1, v0
-; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[6:7]
+; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[4:5]
; GISEL-NEXT: v_lshlrev_b16_e32 v2, 1, v0
; GISEL-NEXT: v_or_b32_e32 v1, v1, v2
; GISEL-NEXT: v_lshlrev_b16_e32 v3, 2, v0
; GISEL-NEXT: v_or_b32_e32 v2, v0, v2
; GISEL-NEXT: v_or_b32_e32 v1, v1, v3
-; GISEL-NEXT: v_lshlrev_b16_e32 v8, 3, v0
+; GISEL-NEXT: v_lshlrev_b16_e32 v7, 3, v0
; GISEL-NEXT: v_or_b32_e32 v2, v2, v3
+; GISEL-NEXT: v_or_b32_e32 v1, v1, v7
+; GISEL-NEXT: v_lshlrev_b16_e32 v8, 4, v0
+; GISEL-NEXT: v_or_b32_e32 v2, v2, v7
; GISEL-NEXT: v_or_b32_e32 v1, v1, v8
-; GISEL-NEXT: v_lshlrev_b16_e32 v9, 4, v0
+; GISEL-NEXT: v_lshlrev_b16_e32 v9, 5, v0
; GISEL-NEXT: v_or_b32_e32 v2, v2, v8
; GISEL-NEXT: v_or_b32_e32 v1, v1, v9
-; GISEL-NEXT: v_lshlrev_b16_e32 v10, 5, v0
+; GISEL-NEXT: v_lshlrev_b16_e32 v10, 6, v0
; GISEL-NEXT: v_or_b32_e32 v2, v2, v9
; GISEL-NEXT: v_or_b32_e32 v1, v1, v10
-; GISEL-NEXT: v_lshlrev_b16_e32 v11, 6, v0
+; GISEL-NEXT: v_lshlrev_b16_e32 v11, 7, v0
; GISEL-NEXT: v_or_b32_e32 v2, v2, v10
; GISEL-NEXT: v_or_b32_e32 v1, v1, v11
-; GISEL-NEXT: v_lshlrev_b16_e32 v12, 7, v0
+; GISEL-NEXT: v_lshlrev_b16_e32 v12, 8, v0
; GISEL-NEXT: v_or_b32_e32 v2, v2, v11
; GISEL-NEXT: v_or_b32_e32 v1, v1, v12
-; GISEL-NEXT: v_lshlrev_b16_e32 v13, 8, v0
+; GISEL-NEXT: v_lshlrev_b16_e32 v13, 9, v0
; GISEL-NEXT: v_or_b32_e32 v2, v2, v12
; GISEL-NEXT: v_or_b32_e32 v1, v1, v13
-; GISEL-NEXT: v_lshlrev_b16_e32 v14, 9, v0
+; GISEL-NEXT: v_lshlrev_b16_e32 v14, 10, v0
; GISEL-NEXT: v_or_b32_e32 v2, v2, v13
; GISEL-NEXT: v_or_b32_e32 v1, v1, v14
-; GISEL-NEXT: v_lshlrev_b16_e32 v15, 10, v0
+; GISEL-NEXT: v_lshlrev_b16_e32 v15, 11, v0
; GISEL-NEXT: v_or_b32_e32 v2, v2, v14
; GISEL-NEXT: v_or_b32_e32 v1, v1, v15
-; GISEL-NEXT: v_lshlrev_b16_e32 v16, 11, v0
+; GISEL-NEXT: v_lshlrev_b16_e32 v16, 12, v0
; GISEL-NEXT: v_or_b32_e32 v2, v2, v15
; GISEL-NEXT: v_or_b32_e32 v1, v1, v16
-; GISEL-NEXT: v_lshlrev_b16_e32 v17, 12, v0
+; GISEL-NEXT: v_lshlrev_b16_e32 v17, 13, v0
; GISEL-NEXT: v_or_b32_e32 v2, v2, v16
; GISEL-NEXT: v_or_b32_e32 v1, v1, v17
-; GISEL-NEXT: v_lshlrev_b16_e32 v18, 13, v0
+; GISEL-NEXT: v_lshlrev_b16_e32 v18, 14, v0
; GISEL-NEXT: v_or_b32_e32 v2, v2, v17
; GISEL-NEXT: v_or_b32_e32 v1, v1, v18
-; GISEL-NEXT: v_lshlrev_b16_e32 v19, 14, v0
-; GISEL-NEXT: v_or_b32_e32 v2, v2, v18
-; GISEL-NEXT: v_or_b32_e32 v1, v1, v19
; GISEL-NEXT: v_lshlrev_b16_e32 v0, 15, v0
-; GISEL-NEXT: v_or_b32_e32 v2, v2, v19
+; GISEL-NEXT: v_or_b32_e32 v2, v2, v18
; GISEL-NEXT: v_or_b32_e32 v1, v1, v0
; GISEL-NEXT: v_or_b32_e32 v0, v2, v0
; GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
; GISEL-NEXT: v_lshlrev_b32_e32 v2, 16, v0
-; GISEL-NEXT: v_lshl_or_b32 v8, v0, 16, v0
-; GISEL-NEXT: v_or3_b32 v9, v1, v2, 1
+; GISEL-NEXT: v_lshl_or_b32 v7, v0, 16, v0
+; GISEL-NEXT: v_or3_b32 v8, v1, v2, 1
+; GISEL-NEXT: v_mov_b32_e32 v0, 0x7fffff
+; GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
+; GISEL-NEXT: v_and_or_b32 v4, v4, v0, v1
; GISEL-NEXT: v_mov_b32_e32 v0, 0x96
-; GISEL-NEXT: v_mov_b32_e32 v1, 0
-; GISEL-NEXT: v_and_b32_e32 v2, 0x7fffff, v4
-; GISEL-NEXT: v_cmp_ge_u64_e32 vcc, v[6:7], v[0:1]
-; GISEL-NEXT: v_or_b32_e32 v4, 0x800000, v2
+; GISEL-NEXT: v_mov_b32_e32 v5, 0
+; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v6, v0
; GISEL-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
-; GISEL-NEXT: s_and_saveexec_b64 s[6:7], vcc
+; GISEL-NEXT: s_and_saveexec_b64 s[6:7], s[4:5]
; GISEL-NEXT: s_xor_b64 s[16:17], exec, s[6:7]
; GISEL-NEXT: s_cbranch_execz .LBB2_4
; GISEL-NEXT: ; %bb.3: ; %fp-to-i-if-exp.large
; GISEL-NEXT: v_add_u32_e32 v2, 0xffffff6a, v6
; GISEL-NEXT: v_lshlrev_b64 v[0:1], v2, v[4:5]
-; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v2
-; GISEL-NEXT: v_cndmask_b32_e32 v10, 0, v0, vcc
-; GISEL-NEXT: v_cndmask_b32_e32 v11, 0, v1, vcc
-; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[6:7], v10, v8, 0
+; GISEL-NEXT: v_cmp_gt_u32_e64 s[4:5], 64, v2
+; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, v0, s[4:5]
+; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, v1, s[4:5]
+; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[6:7], v11, v7, 0
; GISEL-NEXT: v_add_u32_e32 v3, 0xffffff2a, v6
; GISEL-NEXT: v_sub_u32_e32 v6, 64, v2
-; GISEL-NEXT: v_lshrrev_b64 v[6:7], v6, v[4:5]
+; GISEL-NEXT: v_lshrrev_b64 v[9:10], v6, v[4:5]
; GISEL-NEXT: v_lshlrev_b64 v[3:4], v3, v[4:5]
+; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[8:9], v12, v7, v[0:1]
+; GISEL-NEXT: v_cndmask_b32_e64 v3, v3, v9, s[4:5]
; GISEL-NEXT: v_cmp_eq_u32_e64 s[6:7], 0, v2
-; GISEL-NEXT: v_cndmask_b32_e32 v3, v3, v6, vcc
-; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[8:9], v11, v8, v[0:1]
-; GISEL-NEXT: v_cndmask_b32_e64 v12, v3, 0, s[6:7]
-; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[8:9], v10, v9, 0
-; GISEL-NEXT: v_mad_u64_u32 v[2:3], s[8:9], v12, v9, v[5:6]
-; GISEL-NEXT: v_mul_lo_u32 v13, v11, v8
-; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[8:9], v10, v8, v[1:2]
-; GISEL-NEXT: v_mul_lo_u32 v10, v10, v8
-; GISEL-NEXT: v_mad_u64_u32 v[1:2], s[10:11], v11, v9, v[5:6]
-; GISEL-NEXT: v_addc_co_u32_e64 v3, s[10:11], v3, v10, s[10:11]
+; GISEL-NEXT: v_cndmask_b32_e64 v9, v3, 0, s[6:7]
+; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[8:9], v11, v8, 0
+; GISEL-NEXT: v_mad_u64_u32 v[2:3], s[8:9], v9, v8, v[5:6]
+; GISEL-NEXT: v_mul_lo_u32 v13, v12, v7
+; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[8:9], v11, v7, v[1:2]
+; GISEL-NEXT: v_mul_lo_u32 v11, v11, v7
+; GISEL-NEXT: v_mad_u64_u32 v[1:2], s[10:11], v12, v8, v[5:6]
+; GISEL-NEXT: v_addc_co_u32_e64 v3, s[10:11], v3, v11, s[10:11]
; GISEL-NEXT: v_addc_co_u32_e64 v3, s[8:9], v3, v13, s[8:9]
-; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[8:9], v12, v8, v[3:4]
-; GISEL-NEXT: v_cndmask_b32_e32 v3, v4, v7, vcc
+; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[8:9], v9, v7, v[3:4]
+; GISEL-NEXT: v_cndmask_b32_e64 v3, v4, v10, s[4:5]
; GISEL-NEXT: v_cndmask_b32_e64 v7, v3, 0, s[6:7]
-; GISEL-NEXT: v_mad_u64_u32 v[3:4], s[6:7], v7, v9, v[5:6]
+; GISEL-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v7, v8, v[5:6]
; GISEL-NEXT: ; implicit-def: $vgpr6
-; GISEL-NEXT: ; implicit-def: $vgpr4_vgpr5
-; GISEL-NEXT: ; implicit-def: $vgpr9
+; GISEL-NEXT: ; implicit-def: $vgpr4
; GISEL-NEXT: ; implicit-def: $vgpr8
+; GISEL-NEXT: ; implicit-def: $vgpr7
; GISEL-NEXT: .LBB2_4: ; %Flow
; GISEL-NEXT: s_andn2_saveexec_b64 s[6:7], s[16:17]
; GISEL-NEXT: s_cbranch_execz .LBB2_6
; GISEL-NEXT: ; %bb.5: ; %fp-to-i-if-exp.small
-; GISEL-NEXT: v_sub_co_u32_e32 v2, vcc, 0x96, v6
-; GISEL-NEXT: v_lshrrev_b64 v[0:1], v2, v[4:5]
-; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v2
-; GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc
-; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2
-; GISEL-NEXT: v_cndmask_b32_e32 v6, v0, v4, vcc
-; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[8:9], v6, v9, 0
-; GISEL-NEXT: v_mad_u64_u32 v[2:3], s[8:9], v6, v8, 0
-; GISEL-NEXT: v_mul_lo_u32 v7, v6, v8
-; GISEL-NEXT: v_mad_u64_u32 v[4:5], vcc, v6, v8, v[1:2]
-; GISEL-NEXT: v_addc_co_u32_e32 v3, vcc, v3, v7, vcc
+; GISEL-NEXT: v_sub_u32_e32 v0, 0x96, v6
+; GISEL-NEXT: v_lshrrev_b32_e32 v6, v0, v4
+; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v6, v8, 0
+; GISEL-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v6, v7, 0
+; GISEL-NEXT: v_mul_lo_u32 v8, v6, v7
+; GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v6, v7, v[1:2]
+; GISEL-NEXT: v_addc_co_u32_e64 v3, s[4:5], v3, v8, s[4:5]
; GISEL-NEXT: v_mov_b32_e32 v1, v4
; GISEL-NEXT: v_mov_b32_e32 v2, v5
; GISEL-NEXT: .LBB2_6: ; %Flow1
; GISEL-NEXT: s_or_b64 exec, exec, s[6:7]
; GISEL-NEXT: .LBB2_7: ; %Flow2
-; GISEL-NEXT: s_andn2_saveexec_b64 s[6:7], s[14:15]
+; GISEL-NEXT: s_andn2_saveexec_b64 s[4:5], s[14:15]
; GISEL-NEXT: s_cbranch_execz .LBB2_9
; GISEL-NEXT: ; %bb.8: ; %fp-to-i-if-saturate
-; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, -1, s[4:5]
+; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc
; GISEL-NEXT: v_and_b32_e32 v1, 1, v1
-; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
+; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
; GISEL-NEXT: v_lshlrev_b32_e32 v2, 1, v1
; GISEL-NEXT: v_or_b32_e32 v0, v0, v2
; GISEL-NEXT: v_lshlrev_b32_e32 v3, 2, v1
@@ -1078,7 +992,7 @@ define i128 @fptosi_f32_to_i128(float %x) {
; GISEL-NEXT: v_add_u32_e32 v3, 0x80000000, v1
; GISEL-NEXT: v_mov_b32_e32 v2, v1
; GISEL-NEXT: .LBB2_9: ; %Flow3
-; GISEL-NEXT: s_or_b64 exec, exec, s[6:7]
+; GISEL-NEXT: s_or_b64 exec, exec, s[4:5]
; GISEL-NEXT: .LBB2_10: ; %fp-to-i-cleanup
; GISEL-NEXT: s_or_b64 exec, exec, s[12:13]
; GISEL-NEXT: s_setpc_b64 s[30:31]
@@ -1091,28 +1005,20 @@ define i128 @fptoui_f32_to_i128(float %x) {
; SDAG: ; %bb.0: ; %fp-to-i-entry
; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT: v_mov_b32_e32 v4, v0
-; SDAG-NEXT: v_mov_b32_e32 v0, 0
-; SDAG-NEXT: v_bfe_u32 v5, v4, 23, 8
+; SDAG-NEXT: v_bfe_u32 v7, v4, 23, 8
; SDAG-NEXT: s_movk_i32 s4, 0x7e
+; SDAG-NEXT: v_mov_b32_e32 v0, 0
; SDAG-NEXT: v_mov_b32_e32 v2, 0
; SDAG-NEXT: v_mov_b32_e32 v1, 0
; SDAG-NEXT: v_mov_b32_e32 v3, 0
-; SDAG-NEXT: v_cmp_lt_u32_e32 vcc, s4, v5
+; SDAG-NEXT: v_cmp_lt_u32_e32 vcc, s4, v7
; SDAG-NEXT: s_and_saveexec_b64 s[8:9], vcc
; SDAG-NEXT: s_cbranch_execz .LBB3_10
; SDAG-NEXT: ; %bb.1: ; %fp-to-i-if-check.saturate
-; SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0xffffff01, v5
-; SDAG-NEXT: v_mov_b32_e32 v1, -1
-; SDAG-NEXT: v_mov_b32_e32 v6, 0
-; SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
-; SDAG-NEXT: v_addc_co_u32_e32 v2, vcc, -1, v6, vcc
-; SDAG-NEXT: s_movk_i32 s6, 0xff7f
-; SDAG-NEXT: v_addc_co_u32_e32 v3, vcc, -1, v6, vcc
-; SDAG-NEXT: s_mov_b32 s7, -1
-; SDAG-NEXT: v_cmp_eq_u64_e64 s[4:5], -1, v[2:3]
-; SDAG-NEXT: v_cmp_lt_u64_e64 s[6:7], s[6:7], v[0:1]
+; SDAG-NEXT: v_add_u32_e32 v0, 0xffffff01, v7
+; SDAG-NEXT: s_movk_i32 s4, 0xff7f
; SDAG-NEXT: v_cmp_lt_i32_e32 vcc, -1, v4
-; SDAG-NEXT: s_and_b64 s[4:5], s[4:5], s[6:7]
+; SDAG-NEXT: v_cmp_lt_u32_e64 s[4:5], s4, v0
; SDAG-NEXT: ; implicit-def: $vgpr0_vgpr1
; SDAG-NEXT: ; implicit-def: $vgpr2_vgpr3
; SDAG-NEXT: s_and_saveexec_b64 s[6:7], s[4:5]
@@ -1120,76 +1026,68 @@ define i128 @fptoui_f32_to_i128(float %x) {
; SDAG-NEXT: s_cbranch_execz .LBB3_7
; SDAG-NEXT: ; %bb.2: ; %fp-to-i-if-check.exp.size
; SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
-; SDAG-NEXT: v_add_co_u32_e64 v10, s[4:5], -1, v0
-; SDAG-NEXT: s_mov_b64 s[4:5], 0x95
+; SDAG-NEXT: v_add_co_u32_e64 v6, s[4:5], -1, v0
; SDAG-NEXT: v_and_b32_e32 v0, 0x7fffff, v4
-; SDAG-NEXT: v_cmp_lt_u64_e64 s[4:5], s[4:5], v[5:6]
-; SDAG-NEXT: v_cndmask_b32_e64 v9, -1, 0, vcc
-; SDAG-NEXT: v_cndmask_b32_e64 v11, -1, 1, vcc
-; SDAG-NEXT: v_or_b32_e32 v7, 0x800000, v0
-; SDAG-NEXT: v_mov_b32_e32 v8, v6
-; SDAG-NEXT: ; implicit-def: $vgpr0_vgpr1
+; SDAG-NEXT: s_movk_i32 s4, 0x95
+; SDAG-NEXT: v_cndmask_b32_e64 v5, -1, 0, vcc
+; SDAG-NEXT: v_cndmask_b32_e64 v8, -1, 1, vcc
+; SDAG-NEXT: v_or_b32_e32 v1, 0x800000, v0
+; SDAG-NEXT: v_cmp_lt_u32_e64 s[4:5], s4, v7
; SDAG-NEXT: ; implicit-def: $vgpr2_vgpr3
; SDAG-NEXT: s_and_saveexec_b64 s[6:7], s[4:5]
; SDAG-NEXT: s_xor_b64 s[12:13], exec, s[6:7]
; SDAG-NEXT: s_cbranch_execz .LBB3_4
; SDAG-NEXT: ; %bb.3: ; %fp-to-i-if-exp.large
-; SDAG-NEXT: v_sub_u32_e32 v0, 0xd6, v5
-; SDAG-NEXT: v_add_u32_e32 v2, 0xffffff2a, v5
-; SDAG-NEXT: v_add_u32_e32 v4, 0xffffff6a, v5
-; SDAG-NEXT: v_lshrrev_b64 v[0:1], v0, v[7:8]
-; SDAG-NEXT: v_lshlrev_b64 v[2:3], v2, v[7:8]
-; SDAG-NEXT: v_cmp_gt_u32_e64 s[4:5], 64, v4
-; SDAG-NEXT: v_cndmask_b32_e64 v1, v3, v1, s[4:5]
-; SDAG-NEXT: v_cmp_ne_u32_e64 s[6:7], 0, v4
-; SDAG-NEXT: v_cndmask_b32_e64 v3, 0, v1, s[6:7]
-; SDAG-NEXT: v_cndmask_b32_e64 v2, v2, v0, s[4:5]
-; SDAG-NEXT: v_lshlrev_b64 v[0:1], v4, v[7:8]
-; SDAG-NEXT: v_cndmask_b32_e64 v2, 0, v2, s[6:7]
-; SDAG-NEXT: v_cndmask_b32_e64 v7, 0, v0, s[4:5]
-; SDAG-NEXT: v_mad_u64_u32 v[4:5], s[6:7], v7, v11, 0
-; SDAG-NEXT: v_cndmask_b32_e64 v13, 0, v1, s[4:5]
-; SDAG-NEXT: v_mul_lo_u32 v8, v9, v2
-; SDAG-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v13, v11, v[5:6]
-; SDAG-NEXT: v_mul_lo_u32 v12, v11, v3
-; SDAG-NEXT: v_mad_u64_u32 v[2:3], s[6:7], v11, v2, 0
-; SDAG-NEXT: v_mov_b32_e32 v5, v0
-; SDAG-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v7, v9, v[5:6]
-; SDAG-NEXT: v_add3_u32 v3, v3, v12, v8
-; SDAG-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v10, v7, v[2:3]
-; SDAG-NEXT: v_add_co_u32_e64 v0, s[4:5], v1, v6
-; SDAG-NEXT: v_addc_co_u32_e64 v1, s[4:5], 0, 0, s[4:5]
-; SDAG-NEXT: v_mul_lo_u32 v8, v10, v13
-; SDAG-NEXT: v_mul_lo_u32 v7, v10, v7
-; SDAG-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v13, v9, v[0:1]
-; SDAG-NEXT: ; implicit-def: $vgpr11
-; SDAG-NEXT: ; implicit-def: $vgpr9
-; SDAG-NEXT: ; implicit-def: $vgpr10
-; SDAG-NEXT: v_add3_u32 v3, v7, v3, v8
-; SDAG-NEXT: v_add_co_u32_e64 v2, s[4:5], v0, v2
-; SDAG-NEXT: v_addc_co_u32_e64 v3, s[4:5], v1, v3, s[4:5]
-; SDAG-NEXT: v_mov_b32_e32 v0, v4
-; SDAG-NEXT: v_mov_b32_e32 v1, v5
-; SDAG-NEXT: ; implicit-def: $vgpr5_vgpr6
-; SDAG-NEXT: ; implicit-def: $vgpr7_vgpr8
+; SDAG-NEXT: v_add_u32_e32 v0, 0xffffff6a, v7
+; SDAG-NEXT: v_mov_b32_e32 v2, 0
+; SDAG-NEXT: v_sub_u32_e32 v3, 0xd6, v7
+; SDAG-NEXT: v_add_u32_e32 v7, 0xffffff2a, v7
+; SDAG-NEXT: v_lshrrev_b64 v[3:4], v3, v[1:2]
+; SDAG-NEXT: v_lshlrev_b64 v[9:10], v7, v[1:2]
+; SDAG-NEXT: v_cmp_gt_u32_e64 s[4:5], 64, v0
+; SDAG-NEXT: v_cndmask_b32_e64 v4, v10, v4, s[4:5]
+; SDAG-NEXT: v_cmp_ne_u32_e64 s[6:7], 0, v0
+; SDAG-NEXT: v_cndmask_b32_e64 v7, 0, v4, s[6:7]
+; SDAG-NEXT: v_cndmask_b32_e64 v9, v9, v3, s[4:5]
+; SDAG-NEXT: v_lshlrev_b64 v[3:4], v0, v[1:2]
+; SDAG-NEXT: v_cndmask_b32_e64 v9, 0, v9, s[6:7]
+; SDAG-NEXT: v_cndmask_b32_e64 v11, 0, v3, s[4:5]
+; SDAG-NEXT: v_mad_u64_u32 v[0:1], s[6:7], v11, v8, 0
+; SDAG-NEXT: v_cndmask_b32_e64 v13, 0, v4, s[4:5]
+; SDAG-NEXT: v_mul_lo_u32 v12, v5, v9
+; SDAG-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v13, v8, v[1:2]
+; SDAG-NEXT: v_mul_lo_u32 v7, v8, v7
+; SDAG-NEXT: v_mad_u64_u32 v[9:10], s[6:7], v8, v9, 0
+; SDAG-NEXT: v_mov_b32_e32 v1, v3
+; SDAG-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v11, v5, v[1:2]
+; SDAG-NEXT: v_add3_u32 v10, v10, v7, v12
+; SDAG-NEXT: v_mad_u64_u32 v[7:8], s[4:5], v6, v11, v[9:10]
+; SDAG-NEXT: v_add_co_u32_e64 v2, s[4:5], v4, v2
+; SDAG-NEXT: v_addc_co_u32_e64 v3, s[4:5], 0, 0, s[4:5]
+; SDAG-NEXT: v_mul_lo_u32 v9, v6, v13
+; SDAG-NEXT: v_mul_lo_u32 v6, v6, v11
+; SDAG-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v13, v5, v[2:3]
+; SDAG-NEXT: ; implicit-def: $vgpr5
+; SDAG-NEXT: v_add3_u32 v4, v6, v8, v9
+; SDAG-NEXT: v_add_co_u32_e64 v2, s[4:5], v2, v7
+; SDAG-NEXT: v_addc_co_u32_e64 v3, s[4:5], v3, v4, s[4:5]
+; SDAG-NEXT: ; implicit-def: $vgpr7
+; SDAG-NEXT: ; implicit-def: $vgpr8
+; SDAG-NEXT: ; implicit-def: $vgpr6
; SDAG-NEXT: .LBB3_4: ; %Flow
-; SDAG-NEXT: s_andn2_saveexec_b64 s[6:7], s[12:13]
+; SDAG-NEXT: s_andn2_saveexec_b64 s[4:5], s[12:13]
; SDAG-NEXT: s_cbranch_execz .LBB3_6
; SDAG-NEXT: ; %bb.5: ; %fp-to-i-if-exp.small
-; SDAG-NEXT: v_sub_u32_e32 v2, 0x96, v5
-; SDAG-NEXT: v_lshrrev_b64 v[0:1], v2, v[7:8]
-; SDAG-NEXT: v_cmp_gt_u32_e64 s[4:5], 64, v2
-; SDAG-NEXT: v_cndmask_b32_e64 v0, 0, v0, s[4:5]
-; SDAG-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v2
-; SDAG-NEXT: v_cndmask_b32_e64 v3, v0, v7, s[4:5]
-; SDAG-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v3, v11, 0
+; SDAG-NEXT: v_sub_u32_e32 v0, 0x96, v7
+; SDAG-NEXT: v_lshrrev_b32_e32 v3, v0, v1
+; SDAG-NEXT: v_mad_u64_u32 v[0:1], s[6:7], v3, v8, 0
; SDAG-NEXT: v_mov_b32_e32 v2, 0
-; SDAG-NEXT: v_mov_b32_e32 v6, v2
-; SDAG-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v3, v9, v[1:2]
-; SDAG-NEXT: v_mad_i64_i32 v[2:3], s[4:5], v10, v3, v[5:6]
-; SDAG-NEXT: v_mov_b32_e32 v1, v4
+; SDAG-NEXT: v_mov_b32_e32 v9, v2
+; SDAG-NEXT: v_mad_u64_u32 v[7:8], s[6:7], v3, v5, v[1:2]
+; SDAG-NEXT: v_mad_i64_i32 v[2:3], s[6:7], v6, v3, v[8:9]
+; SDAG-NEXT: v_mov_b32_e32 v1, v7
; SDAG-NEXT: .LBB3_6: ; %Flow1
-; SDAG-NEXT: s_or_b64 exec, exec, s[6:7]
+; SDAG-NEXT: s_or_b64 exec, exec, s[4:5]
; SDAG-NEXT: .LBB3_7: ; %Flow2
; SDAG-NEXT: s_andn2_saveexec_b64 s[4:5], s[10:11]
; SDAG-NEXT: ; %bb.8: ; %fp-to-i-if-saturate
@@ -1210,14 +1108,10 @@ define i128 @fptoui_f32_to_i128(float %x) {
; GISEL: ; %bb.0: ; %fp-to-i-entry
; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT: v_mov_b32_e32 v4, v0
-; GISEL-NEXT: v_mov_b32_e32 v5, 0
-; GISEL-NEXT: v_lshrrev_b64 v[0:1], 23, v[4:5]
-; GISEL-NEXT: s_mov_b64 s[4:5], 0
-; GISEL-NEXT: v_bfe_u32 v6, v0, 0, 8
+; GISEL-NEXT: v_bfe_u32 v6, v4, 23, 8
; GISEL-NEXT: v_mov_b32_e32 v0, 0x7f
-; GISEL-NEXT: v_mov_b32_e32 v1, 0
-; GISEL-NEXT: v_mov_b32_e32 v7, v5
-; GISEL-NEXT: v_cmp_ge_u64_e32 vcc, v[6:7], v[0:1]
+; GISEL-NEXT: s_mov_b64 s[4:5], 0
+; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v6, v0
; GISEL-NEXT: s_mov_b64 s[6:7], s[4:5]
; GISEL-NEXT: v_mov_b32_e32 v0, s4
; GISEL-NEXT: v_mov_b32_e32 v1, s5
@@ -1226,146 +1120,132 @@ define i128 @fptoui_f32_to_i128(float %x) {
; GISEL-NEXT: s_and_saveexec_b64 s[12:13], vcc
; GISEL-NEXT: s_cbranch_execz .LBB3_10
; GISEL-NEXT: ; %bb.1: ; %fp-to-i-if-check.saturate
-; GISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0xffffff01, v6
-; GISEL-NEXT: v_mov_b32_e32 v2, 0xffffff80
-; GISEL-NEXT: v_addc_co_u32_e64 v1, s[6:7], 0, -1, vcc
-; GISEL-NEXT: v_mov_b32_e32 v3, -1
-; GISEL-NEXT: v_addc_co_u32_e64 v8, s[6:7], 0, -1, s[6:7]
-; GISEL-NEXT: v_cmp_ge_u64_e32 vcc, v[0:1], v[2:3]
-; GISEL-NEXT: v_addc_co_u32_e64 v9, s[6:7], 0, -1, s[6:7]
-; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
-; GISEL-NEXT: v_cmp_le_u64_e32 vcc, -1, v[8:9]
-; GISEL-NEXT: v_cmp_lt_i32_e64 s[4:5], -1, v4
-; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
-; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, -1, v[8:9]
-; GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
-; GISEL-NEXT: v_and_b32_e32 v0, 1, v0
-; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
+; GISEL-NEXT: v_add_u32_e32 v0, 0xffffff01, v6
+; GISEL-NEXT: v_mov_b32_e32 v1, 0xffffff80
+; GISEL-NEXT: v_cmp_lt_i32_e32 vcc, -1, v4
+; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v1
; GISEL-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
-; GISEL-NEXT: s_and_saveexec_b64 s[6:7], vcc
+; GISEL-NEXT: s_and_saveexec_b64 s[6:7], s[4:5]
; GISEL-NEXT: s_xor_b64 s[14:15], exec, s[6:7]
; GISEL-NEXT: s_cbranch_execz .LBB3_7
; GISEL-NEXT: ; %bb.2: ; %fp-to-i-if-check.exp.size
-; GISEL-NEXT: s_xor_b64 s[6:7], s[4:5], -1
-; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, -1, s[6:7]
+; GISEL-NEXT: s_xor_b64 s[4:5], vcc, -1
+; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, -1, s[4:5]
; GISEL-NEXT: v_and_b32_e32 v0, 1, v0
-; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[6:7]
+; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[4:5]
; GISEL-NEXT: v_lshlrev_b16_e32 v2, 1, v0
; GISEL-NEXT: v_or_b32_e32 v1, v1, v2
; GISEL-NEXT: v_lshlrev_b16_e32 v3, 2, v0
; GISEL-NEXT: v_or_b32_e32 v2, v0, v2
; GISEL-NEXT: v_or_b32_e32 v1, v1, v3
-; GISEL-NEXT: v_lshlrev_b16_e32 v8, 3, v0
+; GISEL-NEXT: v_lshlrev_b16_e32 v7, 3, v0
; GISEL-NEXT: v_or_b32_e32 v2, v2, v3
+; GISEL-NEXT: v_or_b32_e32 v1, v1, v7
+; GISEL-NEXT: v_lshlrev_b16_e32 v8, 4, v0
+; GISEL-NEXT: v_or_b32_e32 v2, v2, v7
; GISEL-NEXT: v_or_b32_e32 v1, v1, v8
-; GISEL-NEXT: v_lshlrev_b16_e32 v9, 4, v0
+; GISEL-NEXT: v_lshlrev_b16_e32 v9, 5, v0
; GISEL-NEXT: v_or_b32_e32 v2, v2, v8
; GISEL-NEXT: v_or_b32_e32 v1, v1, v9
-; GISEL-NEXT: v_lshlrev_b16_e32 v10, 5, v0
+; GISEL-NEXT: v_lshlrev_b16_e32 v10, 6, v0
; GISEL-NEXT: v_or_b32_e32 v2, v2, v9
; GISEL-NEXT: v_or_b32_e32 v1, v1, v10
-; GISEL-NEXT: v_lshlrev_b16_e32 v11, 6, v0
+; GISEL-NEXT: v_lshlrev_b16_e32 v11, 7, v0
; GISEL-NEXT: v_or_b32_e32 v2, v2, v10
; GISEL-NEXT: v_or_b32_e32 v1, v1, v11
-; GISEL-NEXT: v_lshlrev_b16_e32 v12, 7, v0
+; GISEL-NEXT: v_lshlrev_b16_e32 v12, 8, v0
; GISEL-NEXT: v_or_b32_e32 v2, v2, v11
; GISEL-NEXT: v_or_b32_e32 v1, v1, v12
-; GISEL-NEXT: v_lshlrev_b16_e32 v13, 8, v0
+; GISEL-NEXT: v_lshlrev_b16_e32 v13, 9, v0
; GISEL-NEXT: v_or_b32_e32 v2, v2, v12
; GISEL-NEXT: v_or_b32_e32 v1, v1, v13
-; GISEL-NEXT: v_lshlrev_b16_e32 v14, 9, v0
+; GISEL-NEXT: v_lshlrev_b16_e32 v14, 10, v0
; GISEL-NEXT: v_or_b32_e32 v2, v2, v13
; GISEL-NEXT: v_or_b32_e32 v1, v1, v14
-; GISEL-NEXT: v_lshlrev_b16_e32 v15, 10, v0
+; GISEL-NEXT: v_lshlrev_b16_e32 v15, 11, v0
; GISEL-NEXT: v_or_b32_e32 v2, v2, v14
; GISEL-NEXT: v_or_b32_e32 v1, v1, v15
-; GISEL-NEXT: v_lshlrev_b16_e32 v16, 11, v0
+; GISEL-NEXT: v_lshlrev_b16_e32 v16, 12, v0
; GISEL-NEXT: v_or_b32_e32 v2, v2, v15
; GISEL-NEXT: v_or_b32_e32 v1, v1, v16
-; GISEL-NEXT: v_lshlrev_b16_e32 v17, 12, v0
+; GISEL-NEXT: v_lshlrev_b16_e32 v17, 13, v0
; GISEL-NEXT: v_or_b32_e32 v2, v2, v16
; GISEL-NEXT: v_or_b32_e32 v1, v1, v17
-; GISEL-NEXT: v_lshlrev_b16_e32 v18, 13, v0
+; GISEL-NEXT: v_lshlrev_b16_e32 v18, 14, v0
; GISEL-NEXT: v_or_b32_e32 v2, v2, v17
; GISEL-NEXT: v_or_b32_e32 v1, v1, v18
-; GISEL-NEXT: v_lshlrev_b16_e32 v19, 14, v0
-; GISEL-NEXT: v_or_b32_e32 v2, v2, v18
-; GISEL-NEXT: v_or_b32_e32 v1, v1, v19
; GISEL-NEXT: v_lshlrev_b16_e32 v0, 15, v0
-; GISEL-NEXT: v_or_b32_e32 v2, v2, v19
+; GISEL-NEXT: v_or_b32_e32 v2, v2, v18
; GISEL-NEXT: v_or_b32_e32 v1, v1, v0
; GISEL-NEXT: v_or_b32_e32 v0, v2, v0
; GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
; GISEL-NEXT: v_lshlrev_b32_e32 v2, 16, v0
-; GISEL-NEXT: v_lshl_or_b32 v8, v0, 16, v0
-; GISEL-NEXT: v_or3_b32 v9, v1, v2, 1
+; GISEL-NEXT: v_lshl_or_b32 v7, v0, 16, v0
+; GISEL-NEXT: v_or3_b32 v8, v1, v2, 1
+; GISEL-NEXT: v_mov_b32_e32 v0, 0x7fffff
+; GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
+; GISEL-NEXT: v_and_or_b32 v4, v4, v0, v1
; GISEL-NEXT: v_mov_b32_e32 v0, 0x96
-; GISEL-NEXT: v_mov_b32_e32 v1, 0
-; GISEL-NEXT: v_and_b32_e32 v2, 0x7fffff, v4
-; GISEL-NEXT: v_cmp_ge_u64_e32 vcc, v[6:7], v[0:1]
-; GISEL-NEXT: v_or_b32_e32 v4, 0x800000, v2
+; GISEL-NEXT: v_mov_b32_e32 v5, 0
+; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v6, v0
; GISEL-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
-; GISEL-NEXT: s_and_saveexec_b64 s[6:7], vcc
+; GISEL-NEXT: s_and_saveexec_b64 s[6:7], s[4:5]
; GISEL-NEXT: s_xor_b64 s[16:17], exec, s[6:7]
; GISEL-NEXT: s_cbranch_execz .LBB3_4
; GISEL-NEXT: ; %bb.3: ; %fp-to-i-if-exp.large
; GISEL-NEXT: v_add_u32_e32 v2, 0xffffff6a, v6
; GISEL-NEXT: v_lshlrev_b64 v[0:1], v2, v[4:5]
-; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v2
-; GISEL-NEXT: v_cndmask_b32_e32 v10, 0, v0, vcc
-; GISEL-NEXT: v_cndmask_b32_e32 v11, 0, v1, vcc
-; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[6:7], v10, v8, 0
+; GISEL-NEXT: v_cmp_gt_u32_e64 s[4:5], 64, v2
+; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, v0, s[4:5]
+; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, v1, s[4:5]
+; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[6:7], v11, v7, 0
; GISEL-NEXT: v_add_u32_e32 v3, 0xffffff2a, v6
; GISEL-NEXT: v_sub_u32_e32 v6, 64, v2
-; GISEL-NEXT: v_lshrrev_b64 v[6:7], v6, v[4:5]
+; GISEL-NEXT: v_lshrrev_b64 v[9:10], v6, v[4:5]
; GISEL-NEXT: v_lshlrev_b64 v[3:4], v3, v[4:5]
+; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[8:9], v12, v7, v[0:1]
+; GISEL-NEXT: v_cndmask_b32_e64 v3, v3, v9, s[4:5]
; GISEL-NEXT: v_cmp_eq_u32_e64 s[6:7], 0, v2
-; GISEL-NEXT: v_cndmask_b32_e32 v3, v3, v6, vcc
-; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[8:9], v11, v8, v[0:1]
-; GISEL-NEXT: v_cndmask_b32_e64 v12, v3, 0, s[6:7]
-; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[8:9], v10, v9, 0
-; GISEL-NEXT: v_mad_u64_u32 v[2:3], s[8:9], v12, v9, v[5:6]
-; GISEL-NEXT: v_mul_lo_u32 v13, v11, v8
-; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[8:9], v10, v8, v[1:2]
-; GISEL-NEXT: v_mul_lo_u32 v10, v10, v8
-; GISEL-NEXT: v_mad_u64_u32 v[1:2], s[10:11], v11, v9, v[5:6]
-; GISEL-NEXT: v_addc_co_u32_e64 v3, s[10:11], v3, v10, s[10:11]
+; GISEL-NEXT: v_cndmask_b32_e64 v9, v3, 0, s[6:7]
+; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[8:9], v11, v8, 0
+; GISEL-NEXT: v_mad_u64_u32 v[2:3], s[8:9], v9, v8, v[5:6]
+; GISEL-NEXT: v_mul_lo_u32 v13, v12, v7
+; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[8:9], v11, v7, v[1:2]
+; GISEL-NEXT: v_mul_lo_u32 v11, v11, v7
+; GISEL-NEXT: v_mad_u64_u32 v[1:2], s[10:11], v12, v8, v[5:6]
+; GISEL-NEXT: v_addc_co_u32_e64 v3, s[10:11], v3, v11, s[10:11]
; GISEL-NEXT: v_addc_co_u32_e64 v3, s[8:9], v3, v13, s[8:9]
-; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[8:9], v12, v8, v[3:4]
-; GISEL-NEXT: v_cndmask_b32_e32 v3, v4, v7, vcc
+; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[8:9], v9, v7, v[3:4]
+; GISEL-NEXT: v_cndmask_b32_e64 v3, v4, v10, s[4:5]
; GISEL-NEXT: v_cndmask_b32_e64 v7, v3, 0, s[6:7]
-; GISEL-NEXT: v_mad_u64_u32 v[3:4], s[6:7], v7, v9, v[5:6]
+; GISEL-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v7, v8, v[5:6]
; GISEL-NEXT: ; implicit-def: $vgpr6
-; GISEL-NEXT: ; implicit-def: $vgpr4_vgpr5
-; GISEL-NEXT: ; implicit-def: $vgpr9
+; GISEL-NEXT: ; implicit-def: $vgpr4
; GISEL-NEXT: ; implicit-def: $vgpr8
+; GISEL-NEXT: ; implicit-def: $vgpr7
; GISEL-NEXT: .LBB3_4: ; %Flow
; GISEL-NEXT: s_andn2_saveexec_b64 s[6:7], s[16:17]
; GISEL-NEXT: s_cbranch_execz .LBB3_6
; GISEL-NEXT: ; %bb.5: ; %fp-to-i-if-exp.small
-; GISEL-NEXT: v_sub_co_u32_e32 v2, vcc, 0x96, v6
-; GISEL-NEXT: v_lshrrev_b64 v[0:1], v2, v[4:5]
-; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v2
-; GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc
-; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2
-; GISEL-NEXT: v_cndmask_b32_e32 v6, v0, v4, vcc
-; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[8:9], v6, v9, 0
-; GISEL-NEXT: v_mad_u64_u32 v[2:3], s[8:9], v6, v8, 0
-; GISEL-NEXT: v_mul_lo_u32 v7, v6, v8
-; GISEL-NEXT: v_mad_u64_u32 v[4:5], vcc, v6, v8, v[1:2]
-; GISEL-NEXT: v_addc_co_u32_e32 v3, vcc, v3, v7, vcc
+; GISEL-NEXT: v_sub_u32_e32 v0, 0x96, v6
+; GISEL-NEXT: v_lshrrev_b32_e32 v6, v0, v4
+; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v6, v8, 0
+; GISEL-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v6, v7, 0
+; GISEL-NEXT: v_mul_lo_u32 v8, v6, v7
+; GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v6, v7, v[1:2]
+; GISEL-NEXT: v_addc_co_u32_e64 v3, s[4:5], v3, v8, s[4:5]
; GISEL-NEXT: v_mov_b32_e32 v1, v4
; GISEL-NEXT: v_mov_b32_e32 v2, v5
; GISEL-NEXT: .LBB3_6: ; %Flow1
; GISEL-NEXT: s_or_b64 exec, exec, s[6:7]
; GISEL-NEXT: .LBB3_7: ; %Flow2
-; GISEL-NEXT: s_andn2_saveexec_b64 s[6:7], s[14:15]
+; GISEL-NEXT: s_andn2_saveexec_b64 s[4:5], s[14:15]
; GISEL-NEXT: s_cbranch_execz .LBB3_9
; GISEL-NEXT: ; %bb.8: ; %fp-to-i-if-saturate
-; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, -1, s[4:5]
+; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc
; GISEL-NEXT: v_and_b32_e32 v1, 1, v1
-; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
+; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
; GISEL-NEXT: v_lshlrev_b32_e32 v2, 1, v1
; GISEL-NEXT: v_or_b32_e32 v0, v0, v2
; GISEL-NEXT: v_lshlrev_b32_e32 v3, 2, v1
@@ -1432,7 +1312,7 @@ define i128 @fptoui_f32_to_i128(float %x) {
; GISEL-NEXT: v_add_u32_e32 v3, 0x80000000, v1
; GISEL-NEXT: v_mov_b32_e32 v2, v1
; GISEL-NEXT: .LBB3_9: ; %Flow3
-; GISEL-NEXT: s_or_b64 exec, exec, s[6:7]
+; GISEL-NEXT: s_or_b64 exec, exec, s[4:5]
; GISEL-NEXT: .LBB3_10: ; %fp-to-i-cleanup
; GISEL-NEXT: s_or_b64 exec, exec, s[12:13]
; GISEL-NEXT: s_setpc_b64 s[30:31]
@@ -1483,102 +1363,92 @@ define i128 @fptosi_bf16_to_i128(bfloat %x) {
; SDAG: ; %bb.0: ; %fp-to-i-entry
; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT: v_mov_b32_e32 v4, v0
-; SDAG-NEXT: v_mov_b32_e32 v0, 0
-; SDAG-NEXT: v_bfe_u32 v5, v4, 7, 8
+; SDAG-NEXT: v_lshrrev_b16_e32 v7, 7, v4
; SDAG-NEXT: s_movk_i32 s4, 0x7e
+; SDAG-NEXT: v_mov_b32_e32 v0, 0
; SDAG-NEXT: v_mov_b32_e32 v2, 0
; SDAG-NEXT: v_mov_b32_e32 v1, 0
; SDAG-NEXT: v_mov_b32_e32 v3, 0
-; SDAG-NEXT: v_cmp_lt_u32_e32 vcc, s4, v5
-; SDAG-NEXT: s_and_saveexec_b64 s[8:9], vcc
+; SDAG-NEXT: v_cmp_gt_u16_sdwa s[4:5], v7, s4 src0_sel:BYTE_0 src1_sel:DWORD
+; SDAG-NEXT: s_and_saveexec_b64 s[8:9], s[4:5]
; SDAG-NEXT: s_cbranch_execz .LBB6_10
; SDAG-NEXT: ; %bb.1: ; %fp-to-i-if-check.saturate
-; SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0xffffff01, v5
-; SDAG-NEXT: v_mov_b32_e32 v1, -1
-; SDAG-NEXT: v_mov_b32_e32 v6, 0
-; SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
-; SDAG-NEXT: v_addc_co_u32_e32 v2, vcc, -1, v6, vcc
-; SDAG-NEXT: s_movk_i32 s6, 0xff7f
-; SDAG-NEXT: v_addc_co_u32_e32 v3, vcc, -1, v6, vcc
-; SDAG-NEXT: s_mov_b32 s7, -1
-; SDAG-NEXT: v_cmp_eq_u64_e64 s[4:5], -1, v[2:3]
-; SDAG-NEXT: v_cmp_lt_u64_e64 s[6:7], s[6:7], v[0:1]
+; SDAG-NEXT: s_movk_i32 s4, 0xff01
+; SDAG-NEXT: v_add_u16_sdwa v0, v7, s4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
+; SDAG-NEXT: s_movk_i32 s4, 0xff7f
; SDAG-NEXT: v_cmp_lt_i16_e32 vcc, -1, v4
-; SDAG-NEXT: s_and_b64 s[4:5], s[4:5], s[6:7]
+; SDAG-NEXT: v_cmp_lt_u16_e64 s[4:5], s4, v0
; SDAG-NEXT: ; implicit-def: $vgpr0_vgpr1
; SDAG-NEXT: ; implicit-def: $vgpr2_vgpr3
; SDAG-NEXT: s_and_saveexec_b64 s[6:7], s[4:5]
; SDAG-NEXT: s_xor_b64 s[10:11], exec, s[6:7]
; SDAG-NEXT: s_cbranch_execz .LBB6_7
; SDAG-NEXT: ; %bb.2: ; %fp-to-i-if-check.exp.size
-; SDAG-NEXT: s_movk_i32 s4, 0x7f
-; SDAG-NEXT: v_and_b32_sdwa v0, v4, s4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; SDAG-NEXT: s_mov_b64 s[4:5], 0x85
-; SDAG-NEXT: v_cmp_lt_u64_e64 s[4:5], s[4:5], v[5:6]
-; SDAG-NEXT: v_cndmask_b32_e64 v10, -1, 0, vcc
-; SDAG-NEXT: v_cndmask_b32_e64 v9, -1, 1, vcc
-; SDAG-NEXT: v_or_b32_e32 v7, 0x80, v0
-; SDAG-NEXT: v_mov_b32_e32 v8, v6
+; SDAG-NEXT: v_and_b32_e32 v0, 0x7f, v4
+; SDAG-NEXT: s_movk_i32 s4, 0x85
+; SDAG-NEXT: s_mov_b32 s6, 0
+; SDAG-NEXT: v_cndmask_b32_e64 v6, -1, 0, vcc
+; SDAG-NEXT: v_cndmask_b32_e64 v5, -1, 1, vcc
+; SDAG-NEXT: v_or_b32_e32 v4, 0x80, v0
+; SDAG-NEXT: v_cmp_gt_u16_sdwa s[4:5], v7, s4 src0_sel:BYTE_0 src1_sel:DWORD
; SDAG-NEXT: ; implicit-def: $vgpr0_vgpr1
; SDAG-NEXT: ; implicit-def: $vgpr2_vgpr3
-; SDAG-NEXT: s_and_saveexec_b64 s[6:7], s[4:5]
-; SDAG-NEXT: s_xor_b64 s[12:13], exec, s[6:7]
+; SDAG-NEXT: s_and_saveexec_b64 s[12:13], s[4:5]
+; SDAG-NEXT: s_xor_b64 s[12:13], exec, s[12:13]
; SDAG-NEXT: s_cbranch_execz .LBB6_4
; SDAG-NEXT: ; %bb.3: ; %fp-to-i-if-exp.large
; SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
-; SDAG-NEXT: v_add_co_u32_e64 v11, s[4:5], -1, v0
-; SDAG-NEXT: v_sub_u32_e32 v0, 0xc6, v5
-; SDAG-NEXT: v_add_u32_e32 v2, 0xffffff3a, v5
-; SDAG-NEXT: v_add_u32_e32 v4, 0xffffff7a, v5
-; SDAG-NEXT: v_lshrrev_b64 v[0:1], v0, v[7:8]
-; SDAG-NEXT: v_lshlrev_b64 v[2:3], v2, v[7:8]
-; SDAG-NEXT: v_cmp_gt_u32_e64 s[4:5], 64, v4
-; SDAG-NEXT: v_cndmask_b32_e64 v1, v3, v1, s[4:5]
-; SDAG-NEXT: v_cmp_ne_u32_e64 s[6:7], 0, v4
-; SDAG-NEXT: v_cndmask_b32_e64 v3, 0, v1, s[6:7]
-; SDAG-NEXT: v_cndmask_b32_e64 v2, v2, v0, s[4:5]
-; SDAG-NEXT: v_lshlrev_b64 v[0:1], v4, v[7:8]
-; SDAG-NEXT: v_cndmask_b32_e64 v2, 0, v2, s[6:7]
-; SDAG-NEXT: v_cndmask_b32_e64 v7, 0, v0, s[4:5]
-; SDAG-NEXT: v_mad_u64_u32 v[4:5], s[6:7], v7, v9, 0
-; SDAG-NEXT: v_cndmask_b32_e64 v13, 0, v1, s[4:5]
-; SDAG-NEXT: v_mul_lo_u32 v8, v10, v2
-; SDAG-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v13, v9, v[5:6]
-; SDAG-NEXT: v_mul_lo_u32 v12, v9, v3
-; SDAG-NEXT: v_mad_u64_u32 v[2:3], s[6:7], v9, v2, 0
-; SDAG-NEXT: v_mov_b32_e32 v5, v0
-; SDAG-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v7, v10, v[5:6]
-; SDAG-NEXT: v_add3_u32 v3, v3, v12, v8
-; SDAG-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v11, v7, v[2:3]
-; SDAG-NEXT: v_add_co_u32_e64 v0, s[4:5], v1, v6
-; SDAG-NEXT: v_addc_co_u32_e64 v1, s[4:5], 0, 0, s[4:5]
-; SDAG-NEXT: v_mul_lo_u32 v8, v11, v13
-; SDAG-NEXT: v_mul_lo_u32 v7, v11, v7
-; SDAG-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v13, v10, v[0:1]
-; SDAG-NEXT: ; implicit-def: $vgpr9
-; SDAG-NEXT: v_add3_u32 v3, v7, v3, v8
-; SDAG-NEXT: v_add_co_u32_e64 v2, s[4:5], v0, v2
-; SDAG-NEXT: v_addc_co_u32_e64 v3, s[4:5], v1, v3, s[4:5]
-; SDAG-NEXT: v_mov_b32_e32 v0, v4
-; SDAG-NEXT: v_mov_b32_e32 v1, v5
-; SDAG-NEXT: ; implicit-def: $vgpr5_vgpr6
-; SDAG-NEXT: ; implicit-def: $vgpr7_vgpr8
+; SDAG-NEXT: v_add_co_u32_e64 v9, s[4:5], -1, v0
+; SDAG-NEXT: s_movk_i32 s4, 0xff7a
+; SDAG-NEXT: v_add_u16_sdwa v10, v7, s4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
+; SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v4
+; SDAG-NEXT: v_mov_b32_e32 v1, s6
+; SDAG-NEXT: v_sub_u32_e32 v2, 64, v10
+; SDAG-NEXT: v_lshrrev_b64 v[3:4], v2, v[0:1]
+; SDAG-NEXT: v_subrev_u32_e32 v2, 64, v10
+; SDAG-NEXT: v_lshlrev_b64 v[7:8], v2, v[0:1]
+; SDAG-NEXT: v_cmp_gt_u16_e64 s[4:5], 64, v10
+; SDAG-NEXT: v_cndmask_b32_e64 v4, v8, v4, s[4:5]
+; SDAG-NEXT: v_cmp_ne_u16_e64 s[6:7], 0, v10
+; SDAG-NEXT: v_cndmask_b32_e64 v8, 0, v4, s[6:7]
+; SDAG-NEXT: v_cndmask_b32_e64 v7, v7, v3, s[4:5]
+; SDAG-NEXT: v_lshlrev_b64 v[3:4], v10, v[0:1]
+; SDAG-NEXT: v_cndmask_b32_e64 v7, 0, v7, s[6:7]
+; SDAG-NEXT: v_cndmask_b32_e64 v10, 0, v3, s[4:5]
+; SDAG-NEXT: v_mad_u64_u32 v[0:1], s[6:7], v10, v5, 0
+; SDAG-NEXT: v_mov_b32_e32 v2, 0
+; SDAG-NEXT: v_cndmask_b32_e64 v13, 0, v4, s[4:5]
+; SDAG-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v13, v5, v[1:2]
+; SDAG-NEXT: v_mul_lo_u32 v11, v6, v7
+; SDAG-NEXT: v_mul_lo_u32 v12, v5, v8
+; SDAG-NEXT: v_mad_u64_u32 v[7:8], s[6:7], v5, v7, 0
+; SDAG-NEXT: v_mov_b32_e32 v1, v3
+; SDAG-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v10, v6, v[1:2]
+; SDAG-NEXT: v_add3_u32 v8, v8, v12, v11
+; SDAG-NEXT: v_mad_u64_u32 v[7:8], s[4:5], v9, v10, v[7:8]
+; SDAG-NEXT: v_add_co_u32_e64 v2, s[4:5], v4, v2
+; SDAG-NEXT: v_addc_co_u32_e64 v3, s[4:5], 0, 0, s[4:5]
+; SDAG-NEXT: v_mul_lo_u32 v5, v9, v13
+; SDAG-NEXT: v_mul_lo_u32 v9, v9, v10
+; SDAG-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v13, v6, v[2:3]
+; SDAG-NEXT: v_add3_u32 v4, v9, v8, v5
+; SDAG-NEXT: v_add_co_u32_e64 v2, s[4:5], v2, v7
+; SDAG-NEXT: v_addc_co_u32_e64 v3, s[4:5], v3, v4, s[4:5]
+; SDAG-NEXT: ; implicit-def: $vgpr7
+; SDAG-NEXT: ; implicit-def: $vgpr4
+; SDAG-NEXT: ; implicit-def: $vgpr5
; SDAG-NEXT: .LBB6_4: ; %Flow
-; SDAG-NEXT: s_andn2_saveexec_b64 s[6:7], s[12:13]
-; SDAG-NEXT: s_cbranch_execz .LBB6_6
+; SDAG-NEXT: s_andn2_saveexec_b64 s[4:5], s[12:13]
; SDAG-NEXT: ; %bb.5: ; %fp-to-i-if-exp.small
-; SDAG-NEXT: v_sub_u32_e32 v2, 0x86, v5
-; SDAG-NEXT: v_lshrrev_b64 v[0:1], v2, v[7:8]
-; SDAG-NEXT: v_cmp_gt_u32_e64 s[4:5], 64, v2
-; SDAG-NEXT: v_cndmask_b32_e64 v0, 0, v0, s[4:5]
-; SDAG-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v2
-; SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v7, s[4:5]
-; SDAG-NEXT: v_mul_hi_i32_i24_e32 v1, v0, v9
+; SDAG-NEXT: s_movk_i32 s6, 0x86
+; SDAG-NEXT: v_sub_u16_sdwa v0, s6, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
+; SDAG-NEXT: v_lshrrev_b16_e32 v0, v0, v4
+; SDAG-NEXT: v_mul_hi_i32_i24_e32 v1, v0, v5
; SDAG-NEXT: v_ashrrev_i32_e32 v2, 31, v1
-; SDAG-NEXT: v_mul_i32_i24_e32 v0, v0, v9
+; SDAG-NEXT: v_mul_i32_i24_e32 v0, v0, v5
; SDAG-NEXT: v_mov_b32_e32 v3, v2
-; SDAG-NEXT: .LBB6_6: ; %Flow1
-; SDAG-NEXT: s_or_b64 exec, exec, s[6:7]
+; SDAG-NEXT: ; %bb.6: ; %Flow1
+; SDAG-NEXT: s_or_b64 exec, exec, s[4:5]
; SDAG-NEXT: .LBB6_7: ; %Flow2
; SDAG-NEXT: s_andn2_saveexec_b64 s[4:5], s[10:11]
; SDAG-NEXT: ; %bb.8: ; %fp-to-i-if-saturate
@@ -1599,159 +1469,142 @@ define i128 @fptosi_bf16_to_i128(bfloat %x) {
; GISEL: ; %bb.0: ; %fp-to-i-entry
; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT: v_mov_b32_e32 v4, v0
-; GISEL-NEXT: v_and_b32_e32 v5, 0xffff, v4
-; GISEL-NEXT: v_mov_b32_e32 v6, 0
-; GISEL-NEXT: v_lshrrev_b64 v[0:1], 7, v[5:6]
-; GISEL-NEXT: v_mov_b32_e32 v1, 0x7f
+; GISEL-NEXT: v_lshrrev_b16_e32 v6, 7, v4
+; GISEL-NEXT: v_mov_b32_e32 v0, 0x7f
; GISEL-NEXT: s_mov_b64 s[4:5], 0
-; GISEL-NEXT: v_mov_b32_e32 v2, 0
-; GISEL-NEXT: v_bfe_u32 v5, v0, 0, 8
-; GISEL-NEXT: v_cmp_ge_u64_e32 vcc, v[5:6], v[1:2]
+; GISEL-NEXT: v_cmp_ge_u16_sdwa s[8:9], v6, v0 src0_sel:BYTE_0 src1_sel:DWORD
; GISEL-NEXT: s_mov_b64 s[6:7], s[4:5]
; GISEL-NEXT: v_mov_b32_e32 v0, s4
; GISEL-NEXT: v_mov_b32_e32 v1, s5
; GISEL-NEXT: v_mov_b32_e32 v2, s6
; GISEL-NEXT: v_mov_b32_e32 v3, s7
-; GISEL-NEXT: s_and_saveexec_b64 s[12:13], vcc
+; GISEL-NEXT: s_and_saveexec_b64 s[12:13], s[8:9]
; GISEL-NEXT: s_cbranch_execz .LBB6_10
; GISEL-NEXT: ; %bb.1: ; %fp-to-i-if-check.saturate
-; GISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0xffffff01, v5
-; GISEL-NEXT: v_mov_b32_e32 v2, 0xffffff80
-; GISEL-NEXT: v_addc_co_u32_e64 v1, s[6:7], 0, -1, vcc
-; GISEL-NEXT: v_mov_b32_e32 v3, -1
-; GISEL-NEXT: v_addc_co_u32_e64 v7, s[6:7], 0, -1, s[6:7]
-; GISEL-NEXT: v_cmp_ge_u64_e32 vcc, v[0:1], v[2:3]
-; GISEL-NEXT: v_addc_co_u32_e64 v8, s[6:7], 0, -1, s[6:7]
-; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
-; GISEL-NEXT: v_cmp_le_u64_e32 vcc, -1, v[7:8]
-; GISEL-NEXT: v_cmp_lt_i16_e64 s[4:5], -1, v4
-; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
-; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, -1, v[7:8]
-; GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
-; GISEL-NEXT: v_and_b32_e32 v0, 1, v0
-; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
+; GISEL-NEXT: v_mov_b32_e32 v0, 0xffffff01
+; GISEL-NEXT: v_add_u16_sdwa v0, v6, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
+; GISEL-NEXT: v_mov_b32_e32 v1, 0xffffff80
+; GISEL-NEXT: v_cmp_lt_i16_e32 vcc, -1, v4
+; GISEL-NEXT: v_cmp_ge_u16_e64 s[4:5], v0, v1
; GISEL-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
-; GISEL-NEXT: s_and_saveexec_b64 s[6:7], vcc
+; GISEL-NEXT: s_and_saveexec_b64 s[6:7], s[4:5]
; GISEL-NEXT: s_xor_b64 s[14:15], exec, s[6:7]
; GISEL-NEXT: s_cbranch_execz .LBB6_7
; GISEL-NEXT: ; %bb.2: ; %fp-to-i-if-check.exp.size
-; GISEL-NEXT: s_xor_b64 s[6:7], s[4:5], -1
-; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, -1, s[6:7]
+; GISEL-NEXT: s_xor_b64 s[4:5], vcc, -1
+; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, -1, s[4:5]
; GISEL-NEXT: v_and_b32_e32 v0, 1, v0
+; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[4:5]
; GISEL-NEXT: v_lshlrev_b16_e32 v2, 1, v0
-; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[6:7]
-; GISEL-NEXT: v_lshlrev_b16_e32 v3, 2, v0
-; GISEL-NEXT: v_lshlrev_b16_e32 v7, 3, v0
-; GISEL-NEXT: v_lshlrev_b16_e32 v8, 4, v0
-; GISEL-NEXT: v_lshlrev_b16_e32 v9, 5, v0
-; GISEL-NEXT: v_lshlrev_b16_e32 v10, 6, v0
-; GISEL-NEXT: v_lshlrev_b16_e32 v11, 7, v0
-; GISEL-NEXT: v_lshlrev_b16_e32 v12, 8, v0
-; GISEL-NEXT: v_lshlrev_b16_e32 v13, 9, v0
-; GISEL-NEXT: v_lshlrev_b16_e32 v14, 10, v0
-; GISEL-NEXT: v_lshlrev_b16_e32 v15, 11, v0
-; GISEL-NEXT: v_lshlrev_b16_e32 v16, 12, v0
-; GISEL-NEXT: v_lshlrev_b16_e32 v17, 13, v0
-; GISEL-NEXT: v_lshlrev_b16_e32 v18, 14, v0
-; GISEL-NEXT: v_lshlrev_b16_e32 v19, 15, v0
-; GISEL-NEXT: v_or_b32_e32 v0, v0, v2
; GISEL-NEXT: v_or_b32_e32 v1, v1, v2
-; GISEL-NEXT: v_or_b32_e32 v0, v0, v3
+; GISEL-NEXT: v_lshlrev_b16_e32 v3, 2, v0
+; GISEL-NEXT: v_or_b32_e32 v2, v0, v2
; GISEL-NEXT: v_or_b32_e32 v1, v1, v3
-; GISEL-NEXT: v_or_b32_e32 v0, v0, v7
+; GISEL-NEXT: v_lshlrev_b16_e32 v7, 3, v0
+; GISEL-NEXT: v_or_b32_e32 v2, v2, v3
; GISEL-NEXT: v_or_b32_e32 v1, v1, v7
-; GISEL-NEXT: v_or_b32_e32 v0, v0, v8
+; GISEL-NEXT: v_lshlrev_b16_e32 v8, 4, v0
+; GISEL-NEXT: v_or_b32_e32 v2, v2, v7
; GISEL-NEXT: v_or_b32_e32 v1, v1, v8
-; GISEL-NEXT: v_or_b32_e32 v0, v0, v9
+; GISEL-NEXT: v_lshlrev_b16_e32 v9, 5, v0
+; GISEL-NEXT: v_or_b32_e32 v2, v2, v8
; GISEL-NEXT: v_or_b32_e32 v1, v1, v9
-; GISEL-NEXT: v_or_b32_e32 v0, v0, v10
+; GISEL-NEXT: v_lshlrev_b16_e32 v10, 6, v0
+; GISEL-NEXT: v_or_b32_e32 v2, v2, v9
; GISEL-NEXT: v_or_b32_e32 v1, v1, v10
-; GISEL-NEXT: v_or_b32_e32 v0, v0, v11
+; GISEL-NEXT: v_lshlrev_b16_e32 v11, 7, v0
+; GISEL-NEXT: v_or_b32_e32 v2, v2, v10
; GISEL-NEXT: v_or_b32_e32 v1, v1, v11
-; GISEL-NEXT: v_or_b32_e32 v0, v0, v12
+; GISEL-NEXT: v_lshlrev_b16_e32 v12, 8, v0
+; GISEL-NEXT: v_or_b32_e32 v2, v2, v11
; GISEL-NEXT: v_or_b32_e32 v1, v1, v12
-; GISEL-NEXT: v_or_b32_e32 v0, v0, v13
+; GISEL-NEXT: v_lshlrev_b16_e32 v13, 9, v0
+; GISEL-NEXT: v_or_b32_e32 v2, v2, v12
; GISEL-NEXT: v_or_b32_e32 v1, v1, v13
-; GISEL-NEXT: v_or_b32_e32 v0, v0, v14
+; GISEL-NEXT: v_lshlrev_b16_e32 v14, 10, v0
+; GISEL-NEXT: v_or_b32_e32 v2, v2, v13
; GISEL-NEXT: v_or_b32_e32 v1, v1, v14
-; GISEL-NEXT: v_or_b32_e32 v0, v0, v15
+; GISEL-NEXT: v_lshlrev_b16_e32 v15, 11, v0
+; GISEL-NEXT: v_or_b32_e32 v2, v2, v14
; GISEL-NEXT: v_or_b32_e32 v1, v1, v15
-; GISEL-NEXT: v_or_b32_e32 v0, v0, v16
+; GISEL-NEXT: v_lshlrev_b16_e32 v16, 12, v0
+; GISEL-NEXT: v_or_b32_e32 v2, v2, v15
; GISEL-NEXT: v_or_b32_e32 v1, v1, v16
-; GISEL-NEXT: v_or_b32_e32 v0, v0, v17
+; GISEL-NEXT: v_lshlrev_b16_e32 v17, 13, v0
+; GISEL-NEXT: v_or_b32_e32 v2, v2, v16
; GISEL-NEXT: v_or_b32_e32 v1, v1, v17
-; GISEL-NEXT: v_or_b32_e32 v0, v0, v18
+; GISEL-NEXT: v_lshlrev_b16_e32 v18, 14, v0
+; GISEL-NEXT: v_or_b32_e32 v2, v2, v17
; GISEL-NEXT: v_or_b32_e32 v1, v1, v18
-; GISEL-NEXT: v_or_b32_e32 v0, v0, v19
-; GISEL-NEXT: v_or_b32_e32 v1, v1, v19
+; GISEL-NEXT: v_lshlrev_b16_e32 v0, 15, v0
+; GISEL-NEXT: v_or_b32_e32 v2, v2, v18
+; GISEL-NEXT: v_or_b32_e32 v1, v1, v0
+; GISEL-NEXT: v_or_b32_e32 v0, v2, v0
; GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
; GISEL-NEXT: v_lshlrev_b32_e32 v2, 16, v0
-; GISEL-NEXT: v_lshl_or_b32 v9, v0, 16, v0
-; GISEL-NEXT: v_or3_b32 v8, v1, v2, 1
+; GISEL-NEXT: v_lshl_or_b32 v8, v0, 16, v0
+; GISEL-NEXT: v_and_b32_e32 v0, 0x7f, v4
+; GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
+; GISEL-NEXT: v_or_b32_e32 v4, 0x80, v0
; GISEL-NEXT: v_mov_b32_e32 v0, 0x86
-; GISEL-NEXT: v_mov_b32_e32 v1, 0
-; GISEL-NEXT: v_and_b32_e32 v2, 0x7f, v4
-; GISEL-NEXT: v_cmp_ge_u64_e32 vcc, v[5:6], v[0:1]
-; GISEL-NEXT: v_mov_b32_e32 v7, 0
-; GISEL-NEXT: v_or_b32_e32 v6, 0x80, v2
+; GISEL-NEXT: v_mov_b32_e32 v5, 0
+; GISEL-NEXT: v_or3_b32 v7, v1, v2, 1
+; GISEL-NEXT: v_cmp_ge_u16_sdwa s[4:5], v6, v0 src0_sel:BYTE_0 src1_sel:DWORD
; GISEL-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
-; GISEL-NEXT: s_and_saveexec_b64 s[6:7], vcc
+; GISEL-NEXT: s_and_saveexec_b64 s[6:7], s[4:5]
; GISEL-NEXT: s_xor_b64 s[16:17], exec, s[6:7]
; GISEL-NEXT: s_cbranch_execz .LBB6_4
; GISEL-NEXT: ; %bb.3: ; %fp-to-i-if-exp.large
-; GISEL-NEXT: v_add_u32_e32 v2, 0xffffff7a, v5
-; GISEL-NEXT: v_lshlrev_b64 v[0:1], v2, v[6:7]
-; GISEL-NEXT: v_add_u32_e32 v5, 0xffffff3a, v5
-; GISEL-NEXT: v_sub_u32_e32 v3, 64, v2
-; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v2
-; GISEL-NEXT: v_lshrrev_b64 v[3:4], v3, v[6:7]
-; GISEL-NEXT: v_lshlrev_b64 v[5:6], v5, v[6:7]
-; GISEL-NEXT: v_cndmask_b32_e32 v7, 0, v0, vcc
-; GISEL-NEXT: v_cndmask_b32_e32 v12, 0, v1, vcc
-; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[6:7], v7, v9, 0
-; GISEL-NEXT: v_cndmask_b32_e32 v3, v5, v3, vcc
+; GISEL-NEXT: v_mov_b32_e32 v0, 0xffffff7a
+; GISEL-NEXT: v_add_u16_sdwa v2, v6, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
+; GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v4
+; GISEL-NEXT: v_lshlrev_b64 v[0:1], v2, v[4:5]
+; GISEL-NEXT: v_cmp_gt_u32_e64 s[4:5], 64, v2
+; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, v0, s[4:5]
+; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, v1, s[4:5]
+; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[6:7], v11, v8, 0
+; GISEL-NEXT: v_add_u32_e32 v3, 0xffffffc0, v2
+; GISEL-NEXT: v_sub_u32_e32 v6, 64, v2
+; GISEL-NEXT: v_lshrrev_b64 v[9:10], v6, v[4:5]
+; GISEL-NEXT: v_lshlrev_b64 v[3:4], v3, v[4:5]
+; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[8:9], v12, v8, v[0:1]
+; GISEL-NEXT: v_cndmask_b32_e64 v3, v3, v9, s[4:5]
; GISEL-NEXT: v_cmp_eq_u32_e64 s[6:7], 0, v2
-; GISEL-NEXT: v_mad_u64_u32 v[10:11], s[8:9], v12, v9, v[0:1]
-; GISEL-NEXT: v_cndmask_b32_e64 v5, v3, 0, s[6:7]
-; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[8:9], v7, v8, 0
-; GISEL-NEXT: v_mad_u64_u32 v[2:3], s[8:9], v5, v8, v[10:11]
-; GISEL-NEXT: v_mul_lo_u32 v13, v12, v9
-; GISEL-NEXT: v_mad_u64_u32 v[10:11], s[8:9], v7, v9, v[1:2]
-; GISEL-NEXT: v_mul_lo_u32 v7, v7, v9
-; GISEL-NEXT: v_mad_u64_u32 v[1:2], s[10:11], v12, v8, v[10:11]
-; GISEL-NEXT: v_addc_co_u32_e64 v3, s[10:11], v3, v7, s[10:11]
+; GISEL-NEXT: v_cndmask_b32_e64 v9, v3, 0, s[6:7]
+; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[8:9], v11, v7, 0
+; GISEL-NEXT: v_mad_u64_u32 v[2:3], s[8:9], v9, v7, v[5:6]
+; GISEL-NEXT: v_mul_lo_u32 v13, v12, v8
+; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[8:9], v11, v8, v[1:2]
+; GISEL-NEXT: v_mul_lo_u32 v11, v11, v8
+; GISEL-NEXT: v_mad_u64_u32 v[1:2], s[10:11], v12, v7, v[5:6]
+; GISEL-NEXT: v_addc_co_u32_e64 v3, s[10:11], v3, v11, s[10:11]
; GISEL-NEXT: v_addc_co_u32_e64 v3, s[8:9], v3, v13, s[8:9]
-; GISEL-NEXT: v_mad_u64_u32 v[10:11], s[8:9], v5, v9, v[3:4]
-; GISEL-NEXT: v_cndmask_b32_e32 v3, v6, v4, vcc
-; GISEL-NEXT: v_cndmask_b32_e64 v5, v3, 0, s[6:7]
-; GISEL-NEXT: v_mad_u64_u32 v[3:4], s[6:7], v5, v8, v[10:11]
-; GISEL-NEXT: ; implicit-def: $vgpr5
-; GISEL-NEXT: ; implicit-def: $vgpr6_vgpr7
-; GISEL-NEXT: ; implicit-def: $vgpr8
+; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[8:9], v9, v8, v[3:4]
+; GISEL-NEXT: v_cndmask_b32_e64 v3, v4, v10, s[4:5]
+; GISEL-NEXT: v_cndmask_b32_e64 v8, v3, 0, s[6:7]
+; GISEL-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v8, v7, v[5:6]
+; GISEL-NEXT: ; implicit-def: $vgpr6
+; GISEL-NEXT: ; implicit-def: $vgpr4
+; GISEL-NEXT: ; implicit-def: $vgpr7
; GISEL-NEXT: .LBB6_4: ; %Flow
-; GISEL-NEXT: s_andn2_saveexec_b64 s[6:7], s[16:17]
-; GISEL-NEXT: s_cbranch_execz .LBB6_6
+; GISEL-NEXT: s_andn2_saveexec_b64 s[4:5], s[16:17]
; GISEL-NEXT: ; %bb.5: ; %fp-to-i-if-exp.small
-; GISEL-NEXT: v_sub_co_u32_e32 v2, vcc, 0x86, v5
-; GISEL-NEXT: v_lshrrev_b64 v[0:1], v2, v[6:7]
-; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v2
-; GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc
-; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2
-; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc
-; GISEL-NEXT: v_mul_hi_i32_i24_e32 v1, v0, v8
+; GISEL-NEXT: v_mov_b32_e32 v0, 0x86
+; GISEL-NEXT: v_sub_u16_sdwa v0, v0, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
+; GISEL-NEXT: v_lshrrev_b16_e32 v0, v0, v4
+; GISEL-NEXT: v_mul_hi_i32_i24_e32 v1, v0, v7
; GISEL-NEXT: v_ashrrev_i32_e32 v2, 31, v1
-; GISEL-NEXT: v_mul_i32_i24_e32 v0, v0, v8
+; GISEL-NEXT: v_mul_i32_i24_e32 v0, v0, v7
; GISEL-NEXT: v_mov_b32_e32 v3, v2
-; GISEL-NEXT: .LBB6_6: ; %Flow1
-; GISEL-NEXT: s_or_b64 exec, exec, s[6:7]
+; GISEL-NEXT: ; %bb.6: ; %Flow1
+; GISEL-NEXT: s_or_b64 exec, exec, s[4:5]
; GISEL-NEXT: .LBB6_7: ; %Flow2
-; GISEL-NEXT: s_andn2_saveexec_b64 s[6:7], s[14:15]
+; GISEL-NEXT: s_andn2_saveexec_b64 s[4:5], s[14:15]
; GISEL-NEXT: s_cbranch_execz .LBB6_9
; GISEL-NEXT: ; %bb.8: ; %fp-to-i-if-saturate
-; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, -1, s[4:5]
+; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc
; GISEL-NEXT: v_and_b32_e32 v1, 1, v1
-; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
+; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
; GISEL-NEXT: v_lshlrev_b32_e32 v2, 1, v1
; GISEL-NEXT: v_or_b32_e32 v0, v0, v2
; GISEL-NEXT: v_lshlrev_b32_e32 v3, 2, v1
@@ -1818,7 +1671,7 @@ define i128 @fptosi_bf16_to_i128(bfloat %x) {
; GISEL-NEXT: v_add_u32_e32 v3, 0x80000000, v1
; GISEL-NEXT: v_mov_b32_e32 v2, v1
; GISEL-NEXT: .LBB6_9: ; %Flow3
-; GISEL-NEXT: s_or_b64 exec, exec, s[6:7]
+; GISEL-NEXT: s_or_b64 exec, exec, s[4:5]
; GISEL-NEXT: .LBB6_10: ; %fp-to-i-cleanup
; GISEL-NEXT: s_or_b64 exec, exec, s[12:13]
; GISEL-NEXT: s_setpc_b64 s[30:31]
@@ -1831,102 +1684,92 @@ define i128 @fptoui_bf16_to_i128(bfloat %x) {
; SDAG: ; %bb.0: ; %fp-to-i-entry
; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT: v_mov_b32_e32 v4, v0
-; SDAG-NEXT: v_mov_b32_e32 v0, 0
-; SDAG-NEXT: v_bfe_u32 v5, v4, 7, 8
+; SDAG-NEXT: v_lshrrev_b16_e32 v7, 7, v4
; SDAG-NEXT: s_movk_i32 s4, 0x7e
+; SDAG-NEXT: v_mov_b32_e32 v0, 0
; SDAG-NEXT: v_mov_b32_e32 v2, 0
; SDAG-NEXT: v_mov_b32_e32 v1, 0
; SDAG-NEXT: v_mov_b32_e32 v3, 0
-; SDAG-NEXT: v_cmp_lt_u32_e32 vcc, s4, v5
-; SDAG-NEXT: s_and_saveexec_b64 s[8:9], vcc
+; SDAG-NEXT: v_cmp_gt_u16_sdwa s[4:5], v7, s4 src0_sel:BYTE_0 src1_sel:DWORD
+; SDAG-NEXT: s_and_saveexec_b64 s[8:9], s[4:5]
; SDAG-NEXT: s_cbranch_execz .LBB7_10
; SDAG-NEXT: ; %bb.1: ; %fp-to-i-if-check.saturate
-; SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0xffffff01, v5
-; SDAG-NEXT: v_mov_b32_e32 v1, -1
-; SDAG-NEXT: v_mov_b32_e32 v6, 0
-; SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
-; SDAG-NEXT: v_addc_co_u32_e32 v2, vcc, -1, v6, vcc
-; SDAG-NEXT: s_movk_i32 s6, 0xff7f
-; SDAG-NEXT: v_addc_co_u32_e32 v3, vcc, -1, v6, vcc
-; SDAG-NEXT: s_mov_b32 s7, -1
-; SDAG-NEXT: v_cmp_eq_u64_e64 s[4:5], -1, v[2:3]
-; SDAG-NEXT: v_cmp_lt_u64_e64 s[6:7], s[6:7], v[0:1]
+; SDAG-NEXT: s_movk_i32 s4, 0xff01
+; SDAG-NEXT: v_add_u16_sdwa v0, v7, s4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
+; SDAG-NEXT: s_movk_i32 s4, 0xff7f
; SDAG-NEXT: v_cmp_lt_i16_e32 vcc, -1, v4
-; SDAG-NEXT: s_and_b64 s[4:5], s[4:5], s[6:7]
+; SDAG-NEXT: v_cmp_lt_u16_e64 s[4:5], s4, v0
; SDAG-NEXT: ; implicit-def: $vgpr0_vgpr1
; SDAG-NEXT: ; implicit-def: $vgpr2_vgpr3
; SDAG-NEXT: s_and_saveexec_b64 s[6:7], s[4:5]
; SDAG-NEXT: s_xor_b64 s[10:11], exec, s[6:7]
; SDAG-NEXT: s_cbranch_execz .LBB7_7
; SDAG-NEXT: ; %bb.2: ; %fp-to-i-if-check.exp.size
-; SDAG-NEXT: s_movk_i32 s4, 0x7f
-; SDAG-NEXT: v_and_b32_sdwa v0, v4, s4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; SDAG-NEXT: s_mov_b64 s[4:5], 0x85
-; SDAG-NEXT: v_cmp_lt_u64_e64 s[4:5], s[4:5], v[5:6]
-; SDAG-NEXT: v_cndmask_b32_e64 v10, -1, 0, vcc
-; SDAG-NEXT: v_cndmask_b32_e64 v9, -1, 1, vcc
-; SDAG-NEXT: v_or_b32_e32 v7, 0x80, v0
-; SDAG-NEXT: v_mov_b32_e32 v8, v6
+; SDAG-NEXT: v_and_b32_e32 v0, 0x7f, v4
+; SDAG-NEXT: s_movk_i32 s4, 0x85
+; SDAG-NEXT: s_mov_b32 s6, 0
+; SDAG-NEXT: v_cndmask_b32_e64 v6, -1, 0, vcc
+; SDAG-NEXT: v_cndmask_b32_e64 v5, -1, 1, vcc
+; SDAG-NEXT: v_or_b32_e32 v4, 0x80, v0
+; SDAG-NEXT: v_cmp_gt_u16_sdwa s[4:5], v7, s4 src0_sel:BYTE_0 src1_sel:DWORD
; SDAG-NEXT: ; implicit-def: $vgpr0_vgpr1
; SDAG-NEXT: ; implicit-def: $vgpr2_vgpr3
-; SDAG-NEXT: s_and_saveexec_b64 s[6:7], s[4:5]
-; SDAG-NEXT: s_xor_b64 s[12:13], exec, s[6:7]
+; SDAG-NEXT: s_and_saveexec_b64 s[12:13], s[4:5]
+; SDAG-NEXT: s_xor_b64 s[12:13], exec, s[12:13]
; SDAG-NEXT: s_cbranch_execz .LBB7_4
; SDAG-NEXT: ; %bb.3: ; %fp-to-i-if-exp.large
; SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
-; SDAG-NEXT: v_add_co_u32_e64 v11, s[4:5], -1, v0
-; SDAG-NEXT: v_sub_u32_e32 v0, 0xc6, v5
-; SDAG-NEXT: v_add_u32_e32 v2, 0xffffff3a, v5
-; SDAG-NEXT: v_add_u32_e32 v4, 0xffffff7a, v5
-; SDAG-NEXT: v_lshrrev_b64 v[0:1], v0, v[7:8]
-; SDAG-NEXT: v_lshlrev_b64 v[2:3], v2, v[7:8]
-; SDAG-NEXT: v_cmp_gt_u32_e64 s[4:5], 64, v4
-; SDAG-NEXT: v_cndmask_b32_e64 v1, v3, v1, s[4:5]
-; SDAG-NEXT: v_cmp_ne_u32_e64 s[6:7], 0, v4
-; SDAG-NEXT: v_cndmask_b32_e64 v3, 0, v1, s[6:7]
-; SDAG-NEXT: v_cndmask_b32_e64 v2, v2, v0, s[4:5]
-; SDAG-NEXT: v_lshlrev_b64 v[0:1], v4, v[7:8]
-; SDAG-NEXT: v_cndmask_b32_e64 v2, 0, v2, s[6:7]
-; SDAG-NEXT: v_cndmask_b32_e64 v7, 0, v0, s[4:5]
-; SDAG-NEXT: v_mad_u64_u32 v[4:5], s[6:7], v7, v9, 0
-; SDAG-NEXT: v_cndmask_b32_e64 v13, 0, v1, s[4:5]
-; SDAG-NEXT: v_mul_lo_u32 v8, v10, v2
-; SDAG-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v13, v9, v[5:6]
-; SDAG-NEXT: v_mul_lo_u32 v12, v9, v3
-; SDAG-NEXT: v_mad_u64_u32 v[2:3], s[6:7], v9, v2, 0
-; SDAG-NEXT: v_mov_b32_e32 v5, v0
-; SDAG-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v7, v10, v[5:6]
-; SDAG-NEXT: v_add3_u32 v3, v3, v12, v8
-; SDAG-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v11, v7, v[2:3]
-; SDAG-NEXT: v_add_co_u32_e64 v0, s[4:5], v1, v6
-; SDAG-NEXT: v_addc_co_u32_e64 v1, s[4:5], 0, 0, s[4:5]
-; SDAG-NEXT: v_mul_lo_u32 v8, v11, v13
-; SDAG-NEXT: v_mul_lo_u32 v7, v11, v7
-; SDAG-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v13, v10, v[0:1]
-; SDAG-NEXT: ; implicit-def: $vgpr9
-; SDAG-NEXT: v_add3_u32 v3, v7, v3, v8
-; SDAG-NEXT: v_add_co_u32_e64 v2, s[4:5], v0, v2
-; SDAG-NEXT: v_addc_co_u32_e64 v3, s[4:5], v1, v3, s[4:5]
-; SDAG-NEXT: v_mov_b32_e32 v0, v4
-; SDAG-NEXT: v_mov_b32_e32 v1, v5
-; SDAG-NEXT: ; implicit-def: $vgpr5_vgpr6
-; SDAG-NEXT: ; implicit-def: $vgpr7_vgpr8
+; SDAG-NEXT: v_add_co_u32_e64 v9, s[4:5], -1, v0
+; SDAG-NEXT: s_movk_i32 s4, 0xff7a
+; SDAG-NEXT: v_add_u16_sdwa v10, v7, s4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
+; SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v4
+; SDAG-NEXT: v_mov_b32_e32 v1, s6
+; SDAG-NEXT: v_sub_u32_e32 v2, 64, v10
+; SDAG-NEXT: v_lshrrev_b64 v[3:4], v2, v[0:1]
+; SDAG-NEXT: v_subrev_u32_e32 v2, 64, v10
+; SDAG-NEXT: v_lshlrev_b64 v[7:8], v2, v[0:1]
+; SDAG-NEXT: v_cmp_gt_u16_e64 s[4:5], 64, v10
+; SDAG-NEXT: v_cndmask_b32_e64 v4, v8, v4, s[4:5]
+; SDAG-NEXT: v_cmp_ne_u16_e64 s[6:7], 0, v10
+; SDAG-NEXT: v_cndmask_b32_e64 v8, 0, v4, s[6:7]
+; SDAG-NEXT: v_cndmask_b32_e64 v7, v7, v3, s[4:5]
+; SDAG-NEXT: v_lshlrev_b64 v[3:4], v10, v[0:1]
+; SDAG-NEXT: v_cndmask_b32_e64 v7, 0, v7, s[6:7]
+; SDAG-NEXT: v_cndmask_b32_e64 v10, 0, v3, s[4:5]
+; SDAG-NEXT: v_mad_u64_u32 v[0:1], s[6:7], v10, v5, 0
+; SDAG-NEXT: v_mov_b32_e32 v2, 0
+; SDAG-NEXT: v_cndmask_b32_e64 v13, 0, v4, s[4:5]
+; SDAG-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v13, v5, v[1:2]
+; SDAG-NEXT: v_mul_lo_u32 v11, v6, v7
+; SDAG-NEXT: v_mul_lo_u32 v12, v5, v8
+; SDAG-NEXT: v_mad_u64_u32 v[7:8], s[6:7], v5, v7, 0
+; SDAG-NEXT: v_mov_b32_e32 v1, v3
+; SDAG-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v10, v6, v[1:2]
+; SDAG-NEXT: v_add3_u32 v8, v8, v12, v11
+; SDAG-NEXT: v_mad_u64_u32 v[7:8], s[4:5], v9, v10, v[7:8]
+; SDAG-NEXT: v_add_co_u32_e64 v2, s[4:5], v4, v2
+; SDAG-NEXT: v_addc_co_u32_e64 v3, s[4:5], 0, 0, s[4:5]
+; SDAG-NEXT: v_mul_lo_u32 v5, v9, v13
+; SDAG-NEXT: v_mul_lo_u32 v9, v9, v10
+; SDAG-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v13, v6, v[2:3]
+; SDAG-NEXT: v_add3_u32 v4, v9, v8, v5
+; SDAG-NEXT: v_add_co_u32_e64 v2, s[4:5], v2, v7
+; SDAG-NEXT: v_addc_co_u32_e64 v3, s[4:5], v3, v4, s[4:5]
+; SDAG-NEXT: ; implicit-def: $vgpr7
+; SDAG-NEXT: ; implicit-def: $vgpr4
+; SDAG-NEXT: ; implicit-def: $vgpr5
; SDAG-NEXT: .LBB7_4: ; %Flow
-; SDAG-NEXT: s_andn2_saveexec_b64 s[6:7], s[12:13]
-; SDAG-NEXT: s_cbranch_execz .LBB7_6
+; SDAG-NEXT: s_andn2_saveexec_b64 s[4:5], s[12:13]
; SDAG-NEXT: ; %bb.5: ; %fp-to-i-if-exp.small
-; SDAG-NEXT: v_sub_u32_e32 v2, 0x86, v5
-; SDAG-NEXT: v_lshrrev_b64 v[0:1], v2, v[7:8]
-; SDAG-NEXT: v_cmp_gt_u32_e64 s[4:5], 64, v2
-; SDAG-NEXT: v_cndmask_b32_e64 v0, 0, v0, s[4:5]
-; SDAG-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v2
-; SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v7, s[4:5]
-; SDAG-NEXT: v_mul_hi_i32_i24_e32 v1, v0, v9
+; SDAG-NEXT: s_movk_i32 s6, 0x86
+; SDAG-NEXT: v_sub_u16_sdwa v0, s6, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
+; SDAG-NEXT: v_lshrrev_b16_e32 v0, v0, v4
+; SDAG-NEXT: v_mul_hi_i32_i24_e32 v1, v0, v5
; SDAG-NEXT: v_ashrrev_i32_e32 v2, 31, v1
-; SDAG-NEXT: v_mul_i32_i24_e32 v0, v0, v9
+; SDAG-NEXT: v_mul_i32_i24_e32 v0, v0, v5
; SDAG-NEXT: v_mov_b32_e32 v3, v2
-; SDAG-NEXT: .LBB7_6: ; %Flow1
-; SDAG-NEXT: s_or_b64 exec, exec, s[6:7]
+; SDAG-NEXT: ; %bb.6: ; %Flow1
+; SDAG-NEXT: s_or_b64 exec, exec, s[4:5]
; SDAG-NEXT: .LBB7_7: ; %Flow2
; SDAG-NEXT: s_andn2_saveexec_b64 s[4:5], s[10:11]
; SDAG-NEXT: ; %bb.8: ; %fp-to-i-if-saturate
@@ -1947,159 +1790,142 @@ define i128 @fptoui_bf16_to_i128(bfloat %x) {
; GISEL: ; %bb.0: ; %fp-to-i-entry
; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT: v_mov_b32_e32 v4, v0
-; GISEL-NEXT: v_and_b32_e32 v5, 0xffff, v4
-; GISEL-NEXT: v_mov_b32_e32 v6, 0
-; GISEL-NEXT: v_lshrrev_b64 v[0:1], 7, v[5:6]
-; GISEL-NEXT: v_mov_b32_e32 v1, 0x7f
+; GISEL-NEXT: v_lshrrev_b16_e32 v6, 7, v4
+; GISEL-NEXT: v_mov_b32_e32 v0, 0x7f
; GISEL-NEXT: s_mov_b64 s[4:5], 0
-; GISEL-NEXT: v_mov_b32_e32 v2, 0
-; GISEL-NEXT: v_bfe_u32 v5, v0, 0, 8
-; GISEL-NEXT: v_cmp_ge_u64_e32 vcc, v[5:6], v[1:2]
+; GISEL-NEXT: v_cmp_ge_u16_sdwa s[8:9], v6, v0 src0_sel:BYTE_0 src1_sel:DWORD
; GISEL-NEXT: s_mov_b64 s[6:7], s[4:5]
; GISEL-NEXT: v_mov_b32_e32 v0, s4
; GISEL-NEXT: v_mov_b32_e32 v1, s5
; GISEL-NEXT: v_mov_b32_e32 v2, s6
; GISEL-NEXT: v_mov_b32_e32 v3, s7
-; GISEL-NEXT: s_and_saveexec_b64 s[12:13], vcc
+; GISEL-NEXT: s_and_saveexec_b64 s[12:13], s[8:9]
; GISEL-NEXT: s_cbranch_execz .LBB7_10
; GISEL-NEXT: ; %bb.1: ; %fp-to-i-if-check.saturate
-; GISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0xffffff01, v5
-; GISEL-NEXT: v_mov_b32_e32 v2, 0xffffff80
-; GISEL-NEXT: v_addc_co_u32_e64 v1, s[6:7], 0, -1, vcc
-; GISEL-NEXT: v_mov_b32_e32 v3, -1
-; GISEL-NEXT: v_addc_co_u32_e64 v7, s[6:7], 0, -1, s[6:7]
-; GISEL-NEXT: v_cmp_ge_u64_e32 vcc, v[0:1], v[2:3]
-; GISEL-NEXT: v_addc_co_u32_e64 v8, s[6:7], 0, -1, s[6:7]
-; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
-; GISEL-NEXT: v_cmp_le_u64_e32 vcc, -1, v[7:8]
-; GISEL-NEXT: v_cmp_lt_i16_e64 s[4:5], -1, v4
-; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
-; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, -1, v[7:8]
-; GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
-; GISEL-NEXT: v_and_b32_e32 v0, 1, v0
-; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
+; GISEL-NEXT: v_mov_b32_e32 v0, 0xffffff01
+; GISEL-NEXT: v_add_u16_sdwa v0, v6, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
+; GISEL-NEXT: v_mov_b32_e32 v1, 0xffffff80
+; GISEL-NEXT: v_cmp_lt_i16_e32 vcc, -1, v4
+; GISEL-NEXT: v_cmp_ge_u16_e64 s[4:5], v0, v1
; GISEL-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
-; GISEL-NEXT: s_and_saveexec_b64 s[6:7], vcc
+; GISEL-NEXT: s_and_saveexec_b64 s[6:7], s[4:5]
; GISEL-NEXT: s_xor_b64 s[14:15], exec, s[6:7]
; GISEL-NEXT: s_cbranch_execz .LBB7_7
; GISEL-NEXT: ; %bb.2: ; %fp-to-i-if-check.exp.size
-; GISEL-NEXT: s_xor_b64 s[6:7], s[4:5], -1
-; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, -1, s[6:7]
+; GISEL-NEXT: s_xor_b64 s[4:5], vcc, -1
+; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, -1, s[4:5]
; GISEL-NEXT: v_and_b32_e32 v0, 1, v0
+; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[4:5]
; GISEL-NEXT: v_lshlrev_b16_e32 v2, 1, v0
-; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[6:7]
-; GISEL-NEXT: v_lshlrev_b16_e32 v3, 2, v0
-; GISEL-NEXT: v_lshlrev_b16_e32 v7, 3, v0
-; GISEL-NEXT: v_lshlrev_b16_e32 v8, 4, v0
-; GISEL-NEXT: v_lshlrev_b16_e32 v9, 5, v0
-; GISEL-NEXT: v_lshlrev_b16_e32 v10, 6, v0
-; GISEL-NEXT: v_lshlrev_b16_e32 v11, 7, v0
-; GISEL-NEXT: v_lshlrev_b16_e32 v12, 8, v0
-; GISEL-NEXT: v_lshlrev_b16_e32 v13, 9, v0
-; GISEL-NEXT: v_lshlrev_b16_e32 v14, 10, v0
-; GISEL-NEXT: v_lshlrev_b16_e32 v15, 11, v0
-; GISEL-NEXT: v_lshlrev_b16_e32 v16, 12, v0
-; GISEL-NEXT: v_lshlrev_b16_e32 v17, 13, v0
-; GISEL-NEXT: v_lshlrev_b16_e32 v18, 14, v0
-; GISEL-NEXT: v_lshlrev_b16_e32 v19, 15, v0
-; GISEL-NEXT: v_or_b32_e32 v0, v0, v2
; GISEL-NEXT: v_or_b32_e32 v1, v1, v2
-; GISEL-NEXT: v_or_b32_e32 v0, v0, v3
+; GISEL-NEXT: v_lshlrev_b16_e32 v3, 2, v0
+; GISEL-NEXT: v_or_b32_e32 v2, v0, v2
; GISEL-NEXT: v_or_b32_e32 v1, v1, v3
-; GISEL-NEXT: v_or_b32_e32 v0, v0, v7
+; GISEL-NEXT: v_lshlrev_b16_e32 v7, 3, v0
+; GISEL-NEXT: v_or_b32_e32 v2, v2, v3
; GISEL-NEXT: v_or_b32_e32 v1, v1, v7
-; GISEL-NEXT: v_or_b32_e32 v0, v0, v8
+; GISEL-NEXT: v_lshlrev_b16_e32 v8, 4, v0
+; GISEL-NEXT: v_or_b32_e32 v2, v2, v7
; GISEL-NEXT: v_or_b32_e32 v1, v1, v8
-; GISEL-NEXT: v_or_b32_e32 v0, v0, v9
+; GISEL-NEXT: v_lshlrev_b16_e32 v9, 5, v0
+; GISEL-NEXT: v_or_b32_e32 v2, v2, v8
; GISEL-NEXT: v_or_b32_e32 v1, v1, v9
-; GISEL-NEXT: v_or_b32_e32 v0, v0, v10
+; GISEL-NEXT: v_lshlrev_b16_e32 v10, 6, v0
+; GISEL-NEXT: v_or_b32_e32 v2, v2, v9
; GISEL-NEXT: v_or_b32_e32 v1, v1, v10
-; GISEL-NEXT: v_or_b32_e32 v0, v0, v11
+; GISEL-NEXT: v_lshlrev_b16_e32 v11, 7, v0
+; GISEL-NEXT: v_or_b32_e32 v2, v2, v10
; GISEL-NEXT: v_or_b32_e32 v1, v1, v11
-; GISEL-NEXT: v_or_b32_e32 v0, v0, v12
+; GISEL-NEXT: v_lshlrev_b16_e32 v12, 8, v0
+; GISEL-NEXT: v_or_b32_e32 v2, v2, v11
; GISEL-NEXT: v_or_b32_e32 v1, v1, v12
-; GISEL-NEXT: v_or_b32_e32 v0, v0, v13
+; GISEL-NEXT: v_lshlrev_b16_e32 v13, 9, v0
+; GISEL-NEXT: v_or_b32_e32 v2, v2, v12
; GISEL-NEXT: v_or_b32_e32 v1, v1, v13
-; GISEL-NEXT: v_or_b32_e32 v0, v0, v14
+; GISEL-NEXT: v_lshlrev_b16_e32 v14, 10, v0
+; GISEL-NEXT: v_or_b32_e32 v2, v2, v13
; GISEL-NEXT: v_or_b32_e32 v1, v1, v14
-; GISEL-NEXT: v_or_b32_e32 v0, v0, v15
+; GISEL-NEXT: v_lshlrev_b16_e32 v15, 11, v0
+; GISEL-NEXT: v_or_b32_e32 v2, v2, v14
; GISEL-NEXT: v_or_b32_e32 v1, v1, v15
-; GISEL-NEXT: v_or_b32_e32 v0, v0, v16
+; GISEL-NEXT: v_lshlrev_b16_e32 v16, 12, v0
+; GISEL-NEXT: v_or_b32_e32 v2, v2, v15
; GISEL-NEXT: v_or_b32_e32 v1, v1, v16
-; GISEL-NEXT: v_or_b32_e32 v0, v0, v17
+; GISEL-NEXT: v_lshlrev_b16_e32 v17, 13, v0
+; GISEL-NEXT: v_or_b32_e32 v2, v2, v16
; GISEL-NEXT: v_or_b32_e32 v1, v1, v17
-; GISEL-NEXT: v_or_b32_e32 v0, v0, v18
+; GISEL-NEXT: v_lshlrev_b16_e32 v18, 14, v0
+; GISEL-NEXT: v_or_b32_e32 v2, v2, v17
; GISEL-NEXT: v_or_b32_e32 v1, v1, v18
-; GISEL-NEXT: v_or_b32_e32 v0, v0, v19
-; GISEL-NEXT: v_or_b32_e32 v1, v1, v19
+; GISEL-NEXT: v_lshlrev_b16_e32 v0, 15, v0
+; GISEL-NEXT: v_or_b32_e32 v2, v2, v18
+; GISEL-NEXT: v_or_b32_e32 v1, v1, v0
+; GISEL-NEXT: v_or_b32_e32 v0, v2, v0
; GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
; GISEL-NEXT: v_lshlrev_b32_e32 v2, 16, v0
-; GISEL-NEXT: v_lshl_or_b32 v9, v0, 16, v0
-; GISEL-NEXT: v_or3_b32 v8, v1, v2, 1
+; GISEL-NEXT: v_lshl_or_b32 v8, v0, 16, v0
+; GISEL-NEXT: v_and_b32_e32 v0, 0x7f, v4
+; GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
+; GISEL-NEXT: v_or_b32_e32 v4, 0x80, v0
; GISEL-NEXT: v_mov_b32_e32 v0, 0x86
-; GISEL-NEXT: v_mov_b32_e32 v1, 0
-; GISEL-NEXT: v_and_b32_e32 v2, 0x7f, v4
-; GISEL-NEXT: v_cmp_ge_u64_e32 vcc, v[5:6], v[0:1]
-; GISEL-NEXT: v_mov_b32_e32 v7, 0
-; GISEL-NEXT: v_or_b32_e32 v6, 0x80, v2
+; GISEL-NEXT: v_mov_b32_e32 v5, 0
+; GISEL-NEXT: v_or3_b32 v7, v1, v2, 1
+; GISEL-NEXT: v_cmp_ge_u16_sdwa s[4:5], v6, v0 src0_sel:BYTE_0 src1_sel:DWORD
; GISEL-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
-; GISEL-NEXT: s_and_saveexec_b64 s[6:7], vcc
+; GISEL-NEXT: s_and_saveexec_b64 s[6:7], s[4:5]
; GISEL-NEXT: s_xor_b64 s[16:17], exec, s[6:7]
; GISEL-NEXT: s_cbranch_execz .LBB7_4
; GISEL-NEXT: ; %bb.3: ; %fp-to-i-if-exp.large
-; GISEL-NEXT: v_add_u32_e32 v2, 0xffffff7a, v5
-; GISEL-NEXT: v_lshlrev_b64 v[0:1], v2, v[6:7]
-; GISEL-NEXT: v_add_u32_e32 v5, 0xffffff3a, v5
-; GISEL-NEXT: v_sub_u32_e32 v3, 64, v2
-; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v2
-; GISEL-NEXT: v_lshrrev_b64 v[3:4], v3, v[6:7]
-; GISEL-NEXT: v_lshlrev_b64 v[5:6], v5, v[6:7]
-; GISEL-NEXT: v_cndmask_b32_e32 v7, 0, v0, vcc
-; GISEL-NEXT: v_cndmask_b32_e32 v12, 0, v1, vcc
-; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[6:7], v7, v9, 0
-; GISEL-NEXT: v_cndmask_b32_e32 v3, v5, v3, vcc
+; GISEL-NEXT: v_mov_b32_e32 v0, 0xffffff7a
+; GISEL-NEXT: v_add_u16_sdwa v2, v6, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
+; GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v4
+; GISEL-NEXT: v_lshlrev_b64 v[0:1], v2, v[4:5]
+; GISEL-NEXT: v_cmp_gt_u32_e64 s[4:5], 64, v2
+; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, v0, s[4:5]
+; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, v1, s[4:5]
+; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[6:7], v11, v8, 0
+; GISEL-NEXT: v_add_u32_e32 v3, 0xffffffc0, v2
+; GISEL-NEXT: v_sub_u32_e32 v6, 64, v2
+; GISEL-NEXT: v_lshrrev_b64 v[9:10], v6, v[4:5]
+; GISEL-NEXT: v_lshlrev_b64 v[3:4], v3, v[4:5]
+; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[8:9], v12, v8, v[0:1]
+; GISEL-NEXT: v_cndmask_b32_e64 v3, v3, v9, s[4:5]
; GISEL-NEXT: v_cmp_eq_u32_e64 s[6:7], 0, v2
-; GISEL-NEXT: v_mad_u64_u32 v[10:11], s[8:9], v12, v9, v[0:1]
-; GISEL-NEXT: v_cndmask_b32_e64 v5, v3, 0, s[6:7]
-; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[8:9], v7, v8, 0
-; GISEL-NEXT: v_mad_u64_u32 v[2:3], s[8:9], v5, v8, v[10:11]
-; GISEL-NEXT: v_mul_lo_u32 v13, v12, v9
-; GISEL-NEXT: v_mad_u64_u32 v[10:11], s[8:9], v7, v9, v[1:2]
-; GISEL-NEXT: v_mul_lo_u32 v7, v7, v9
-; GISEL-NEXT: v_mad_u64_u32 v[1:2], s[10:11], v12, v8, v[10:11]
-; GISEL-NEXT: v_addc_co_u32_e64 v3, s[10:11], v3, v7, s[10:11]
+; GISEL-NEXT: v_cndmask_b32_e64 v9, v3, 0, s[6:7]
+; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[8:9], v11, v7, 0
+; GISEL-NEXT: v_mad_u64_u32 v[2:3], s[8:9], v9, v7, v[5:6]
+; GISEL-NEXT: v_mul_lo_u32 v13, v12, v8
+; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[8:9], v11, v8, v[1:2]
+; GISEL-NEXT: v_mul_lo_u32 v11, v11, v8
+; GISEL-NEXT: v_mad_u64_u32 v[1:2], s[10:11], v12, v7, v[5:6]
+; GISEL-NEXT: v_addc_co_u32_e64 v3, s[10:11], v3, v11, s[10:11]
; GISEL-NEXT: v_addc_co_u32_e64 v3, s[8:9], v3, v13, s[8:9]
-; GISEL-NEXT: v_mad_u64_u32 v[10:11], s[8:9], v5, v9, v[3:4]
-; GISEL-NEXT: v_cndmask_b32_e32 v3, v6, v4, vcc
-; GISEL-NEXT: v_cndmask_b32_e64 v5, v3, 0, s[6:7]
-; GISEL-NEXT: v_mad_u64_u32 v[3:4], s[6:7], v5, v8, v[10:11]
-; GISEL-NEXT: ; implicit-def: $vgpr5
-; GISEL-NEXT: ; implicit-def: $vgpr6_vgpr7
-; GISEL-NEXT: ; implicit-def: $vgpr8
+; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[8:9], v9, v8, v[3:4]
+; GISEL-NEXT: v_cndmask_b32_e64 v3, v4, v10, s[4:5]
+; GISEL-NEXT: v_cndmask_b32_e64 v8, v3, 0, s[6:7]
+; GISEL-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v8, v7, v[5:6]
+; GISEL-NEXT: ; implicit-def: $vgpr6
+; GISEL-NEXT: ; implicit-def: $vgpr4
+; GISEL-NEXT: ; implicit-def: $vgpr7
; GISEL-NEXT: .LBB7_4: ; %Flow
-; GISEL-NEXT: s_andn2_saveexec_b64 s[6:7], s[16:17]
-; GISEL-NEXT: s_cbranch_execz .LBB7_6
+; GISEL-NEXT: s_andn2_saveexec_b64 s[4:5], s[16:17]
; GISEL-NEXT: ; %bb.5: ; %fp-to-i-if-exp.small
-; GISEL-NEXT: v_sub_co_u32_e32 v2, vcc, 0x86, v5
-; GISEL-NEXT: v_lshrrev_b64 v[0:1], v2, v[6:7]
-; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v2
-; GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc
-; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2
-; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc
-; GISEL-NEXT: v_mul_hi_i32_i24_e32 v1, v0, v8
+; GISEL-NEXT: v_mov_b32_e32 v0, 0x86
+; GISEL-NEXT: v_sub_u16_sdwa v0, v0, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
+; GISEL-NEXT: v_lshrrev_b16_e32 v0, v0, v4
+; GISEL-NEXT: v_mul_hi_i32_i24_e32 v1, v0, v7
; GISEL-NEXT: v_ashrrev_i32_e32 v2, 31, v1
-; GISEL-NEXT: v_mul_i32_i24_e32 v0, v0, v8
+; GISEL-NEXT: v_mul_i32_i24_e32 v0, v0, v7
; GISEL-NEXT: v_mov_b32_e32 v3, v2
-; GISEL-NEXT: .LBB7_6: ; %Flow1
-; GISEL-NEXT: s_or_b64 exec, exec, s[6:7]
+; GISEL-NEXT: ; %bb.6: ; %Flow1
+; GISEL-NEXT: s_or_b64 exec, exec, s[4:5]
; GISEL-NEXT: .LBB7_7: ; %Flow2
-; GISEL-NEXT: s_andn2_saveexec_b64 s[6:7], s[14:15]
+; GISEL-NEXT: s_andn2_saveexec_b64 s[4:5], s[14:15]
; GISEL-NEXT: s_cbranch_execz .LBB7_9
; GISEL-NEXT: ; %bb.8: ; %fp-to-i-if-saturate
-; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, -1, s[4:5]
+; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc
; GISEL-NEXT: v_and_b32_e32 v1, 1, v1
-; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
+; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
; GISEL-NEXT: v_lshlrev_b32_e32 v2, 1, v1
; GISEL-NEXT: v_or_b32_e32 v0, v0, v2
; GISEL-NEXT: v_lshlrev_b32_e32 v3, 2, v1
@@ -2166,7 +1992,7 @@ define i128 @fptoui_bf16_to_i128(bfloat %x) {
; GISEL-NEXT: v_add_u32_e32 v3, 0x80000000, v1
; GISEL-NEXT: v_mov_b32_e32 v2, v1
; GISEL-NEXT: .LBB7_9: ; %Flow3
-; GISEL-NEXT: s_or_b64 exec, exec, s[6:7]
+; GISEL-NEXT: s_or_b64 exec, exec, s[4:5]
; GISEL-NEXT: .LBB7_10: ; %fp-to-i-cleanup
; GISEL-NEXT: s_or_b64 exec, exec, s[12:13]
; GISEL-NEXT: s_setpc_b64 s[30:31]
diff --git a/llvm/test/Transforms/ExpandIRInsts/X86/expand-fp-convert-small.ll b/llvm/test/Transforms/ExpandIRInsts/X86/expand-fp-convert-small.ll
index bca37df905303..467bf76e7320b 100644
--- a/llvm/test/Transforms/ExpandIRInsts/X86/expand-fp-convert-small.ll
+++ b/llvm/test/Transforms/ExpandIRInsts/X86/expand-fp-convert-small.ll
@@ -49,32 +49,34 @@ define i24 @fptosi_f16_i24(half %x) {
; CHECK-SAME: half [[X:%.*]]) {
; CHECK-NEXT: [[FP_TO_I_ENTRY:.*]]:
; CHECK-NEXT: [[TMP0:%.*]] = bitcast half [[X]] to i16
-; CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[TMP0]] to i24
; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt i16 [[TMP0]], -1
; CHECK-NEXT: [[SIGN:%.*]] = select i1 [[TMP2]], i24 1, i24 -1
-; CHECK-NEXT: [[TMP3:%.*]] = lshr i24 [[TMP1]], 10
-; CHECK-NEXT: [[BIASED_EXP:%.*]] = and i24 [[TMP3]], 31
-; CHECK-NEXT: [[TMP4:%.*]] = and i24 [[TMP1]], 1023
-; CHECK-NEXT: [[SIGNIFICAND:%.*]] = or i24 [[TMP4]], 1024
-; CHECK-NEXT: [[EXP_IS_NEGATIVE:%.*]] = icmp ult i24 [[BIASED_EXP]], 15
+; CHECK-NEXT: [[TMP5:%.*]] = lshr i16 [[TMP0]], 10
+; CHECK-NEXT: [[BIASED_EXP:%.*]] = and i16 [[TMP5]], 31
+; CHECK-NEXT: [[TMP3:%.*]] = and i16 [[TMP0]], 1023
+; CHECK-NEXT: [[SIGNIFICAND1:%.*]] = or i16 [[TMP3]], 1024
+; CHECK-NEXT: [[EXP_IS_NEGATIVE:%.*]] = icmp ult i16 [[BIASED_EXP]], 15
; CHECK-NEXT: br i1 [[EXP_IS_NEGATIVE]], label %[[FP_TO_I_CLEANUP:.*]], label %[[FP_TO_I_IF_CHECK_SATURATE:.*]]
; CHECK: [[FP_TO_I_IF_CHECK_SATURATE]]:
-; CHECK-NEXT: [[TMP5:%.*]] = add i24 [[BIASED_EXP]], -39
-; CHECK-NEXT: [[TMP6:%.*]] = icmp ult i24 [[TMP5]], -24
+; CHECK-NEXT: [[TMP4:%.*]] = add i16 [[BIASED_EXP]], -39
+; CHECK-NEXT: [[TMP6:%.*]] = icmp ult i16 [[TMP4]], -24
; CHECK-NEXT: br i1 [[TMP6]], label %[[FP_TO_I_IF_SATURATE:.*]], label %[[FP_TO_I_IF_CHECK_EXP_SIZE:.*]]
; CHECK: [[FP_TO_I_IF_SATURATE]]:
; CHECK-NEXT: [[SATURATED:%.*]] = select i1 [[TMP2]], i24 8388607, i24 -8388608
; CHECK-NEXT: br label %[[FP_TO_I_CLEANUP]]
; CHECK: [[FP_TO_I_IF_CHECK_EXP_SIZE]]:
-; CHECK-NEXT: [[EXP_SMALLER_MANTISSA_WIDTH:%.*]] = icmp ult i24 [[BIASED_EXP]], 25
+; CHECK-NEXT: [[EXP_SMALLER_MANTISSA_WIDTH:%.*]] = icmp ult i16 [[BIASED_EXP]], 25
; CHECK-NEXT: br i1 [[EXP_SMALLER_MANTISSA_WIDTH]], label %[[FP_TO_I_IF_EXP_SMALL:.*]], label %[[FP_TO_I_IF_EXP_LARGE:.*]]
; CHECK: [[FP_TO_I_IF_EXP_SMALL]]:
-; CHECK-NEXT: [[TMP7:%.*]] = sub i24 25, [[BIASED_EXP]]
-; CHECK-NEXT: [[TMP8:%.*]] = lshr i24 [[SIGNIFICAND]], [[TMP7]]
+; CHECK-NEXT: [[TMP14:%.*]] = sub i16 25, [[BIASED_EXP]]
+; CHECK-NEXT: [[TMP7:%.*]] = lshr i16 [[SIGNIFICAND1]], [[TMP14]]
+; CHECK-NEXT: [[TMP8:%.*]] = zext i16 [[TMP7]] to i24
; CHECK-NEXT: [[TMP9:%.*]] = mul i24 [[TMP8]], [[SIGN]]
; CHECK-NEXT: br label %[[FP_TO_I_CLEANUP]]
; CHECK: [[FP_TO_I_IF_EXP_LARGE]]:
-; CHECK-NEXT: [[TMP10:%.*]] = add i24 [[BIASED_EXP]], -25
+; CHECK-NEXT: [[TMP15:%.*]] = add i16 [[BIASED_EXP]], -25
+; CHECK-NEXT: [[SIGNIFICAND:%.*]] = zext i16 [[SIGNIFICAND1]] to i24
+; CHECK-NEXT: [[TMP10:%.*]] = zext i16 [[TMP15]] to i24
; CHECK-NEXT: [[TMP11:%.*]] = shl i24 [[SIGNIFICAND]], [[TMP10]]
; CHECK-NEXT: [[TMP12:%.*]] = mul i24 [[TMP11]], [[SIGN]]
; CHECK-NEXT: br label %[[FP_TO_I_CLEANUP]]
@@ -86,6 +88,50 @@ define i24 @fptosi_f16_i24(half %x) {
ret i24 %res
}
+define i8 @fptosi_f16_i8(half %x) {
+; CHECK-LABEL: define i8 @fptosi_f16_i8(
+; CHECK-SAME: half [[X:%.*]]) {
+; CHECK-NEXT: [[FP_TO_I_ENTRY:.*]]:
+; CHECK-NEXT: [[TMP0:%.*]] = bitcast half [[X]] to i16
+; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i16 [[TMP0]], -1
+; CHECK-NEXT: [[SIGN:%.*]] = select i1 [[TMP1]], i8 1, i8 -1
+; CHECK-NEXT: [[TMP2:%.*]] = lshr i16 [[TMP0]], 10
+; CHECK-NEXT: [[BIASED_EXP:%.*]] = and i16 [[TMP2]], 31
+; CHECK-NEXT: [[TMP3:%.*]] = and i16 [[TMP0]], 1023
+; CHECK-NEXT: [[SIGNIFICAND:%.*]] = or i16 [[TMP3]], 1024
+; CHECK-NEXT: [[EXP_IS_NEGATIVE:%.*]] = icmp ult i16 [[BIASED_EXP]], 15
+; CHECK-NEXT: br i1 [[EXP_IS_NEGATIVE]], label %[[FP_TO_I_CLEANUP:.*]], label %[[FP_TO_I_IF_CHECK_SATURATE:.*]]
+; CHECK: [[FP_TO_I_IF_CHECK_SATURATE]]:
+; CHECK-NEXT: [[TMP4:%.*]] = add i16 [[BIASED_EXP]], -23
+; CHECK-NEXT: [[TMP5:%.*]] = icmp ult i16 [[TMP4]], -8
+; CHECK-NEXT: br i1 [[TMP5]], label %[[FP_TO_I_IF_SATURATE:.*]], label %[[FP_TO_I_IF_CHECK_EXP_SIZE:.*]]
+; CHECK: [[FP_TO_I_IF_SATURATE]]:
+; CHECK-NEXT: [[SATURATED:%.*]] = select i1 [[TMP1]], i8 127, i8 -128
+; CHECK-NEXT: br label %[[FP_TO_I_CLEANUP]]
+; CHECK: [[FP_TO_I_IF_CHECK_EXP_SIZE]]:
+; CHECK-NEXT: [[EXP_SMALLER_MANTISSA_WIDTH:%.*]] = icmp ult i16 [[BIASED_EXP]], 25
+; CHECK-NEXT: br i1 [[EXP_SMALLER_MANTISSA_WIDTH]], label %[[FP_TO_I_IF_EXP_SMALL:.*]], label %[[FP_TO_I_IF_EXP_LARGE:.*]]
+; CHECK: [[FP_TO_I_IF_EXP_SMALL]]:
+; CHECK-NEXT: [[TMP6:%.*]] = sub i16 25, [[BIASED_EXP]]
+; CHECK-NEXT: [[TMP7:%.*]] = lshr i16 [[SIGNIFICAND]], [[TMP6]]
+; CHECK-NEXT: [[TMP8:%.*]] = trunc i16 [[TMP7]] to i8
+; CHECK-NEXT: [[TMP9:%.*]] = mul i8 [[TMP8]], [[SIGN]]
+; CHECK-NEXT: br label %[[FP_TO_I_CLEANUP]]
+; CHECK: [[FP_TO_I_IF_EXP_LARGE]]:
+; CHECK-NEXT: [[TMP10:%.*]] = add i16 [[BIASED_EXP]], -25
+; CHECK-NEXT: [[TMP11:%.*]] = trunc i16 [[SIGNIFICAND]] to i8
+; CHECK-NEXT: [[TMP12:%.*]] = trunc i16 [[TMP10]] to i8
+; CHECK-NEXT: [[TMP13:%.*]] = shl i8 [[TMP11]], [[TMP12]]
+; CHECK-NEXT: [[TMP14:%.*]] = mul i8 [[TMP13]], [[SIGN]]
+; CHECK-NEXT: br label %[[FP_TO_I_CLEANUP]]
+; CHECK: [[FP_TO_I_CLEANUP]]:
+; CHECK-NEXT: [[TMP15:%.*]] = phi i8 [ [[SATURATED]], %[[FP_TO_I_IF_SATURATE]] ], [ [[TMP9]], %[[FP_TO_I_IF_EXP_SMALL]] ], [ [[TMP14]], %[[FP_TO_I_IF_EXP_LARGE]] ], [ 0, %[[FP_TO_I_ENTRY]] ]
+; CHECK-NEXT: ret i8 [[TMP15]]
+;
+ %res = fptosi half %x to i8
+ ret i8 %res
+}
+
define i16 @fptoui_f16_i16(half %x) {
; CHECK-LABEL: define i16 @fptoui_f16_i16(
; CHECK-SAME: half [[X:%.*]]) {
@@ -132,32 +178,34 @@ define i24 @fptoui_f16_i24(half %x) {
; CHECK-SAME: half [[X:%.*]]) {
; CHECK-NEXT: [[FP_TO_I_ENTRY:.*]]:
; CHECK-NEXT: [[TMP0:%.*]] = bitcast half [[X]] to i16
-; CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[TMP0]] to i24
; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt i16 [[TMP0]], -1
; CHECK-NEXT: [[SIGN:%.*]] = select i1 [[TMP2]], i24 1, i24 -1
-; CHECK-NEXT: [[TMP3:%.*]] = lshr i24 [[TMP1]], 10
-; CHECK-NEXT: [[BIASED_EXP:%.*]] = and i24 [[TMP3]], 31
-; CHECK-NEXT: [[TMP4:%.*]] = and i24 [[TMP1]], 1023
-; CHECK-NEXT: [[SIGNIFICAND:%.*]] = or i24 [[TMP4]], 1024
-; CHECK-NEXT: [[EXP_IS_NEGATIVE:%.*]] = icmp ult i24 [[BIASED_EXP]], 15
+; CHECK-NEXT: [[TMP5:%.*]] = lshr i16 [[TMP0]], 10
+; CHECK-NEXT: [[BIASED_EXP:%.*]] = and i16 [[TMP5]], 31
+; CHECK-NEXT: [[TMP3:%.*]] = and i16 [[TMP0]], 1023
+; CHECK-NEXT: [[SIGNIFICAND1:%.*]] = or i16 [[TMP3]], 1024
+; CHECK-NEXT: [[EXP_IS_NEGATIVE:%.*]] = icmp ult i16 [[BIASED_EXP]], 15
; CHECK-NEXT: br i1 [[EXP_IS_NEGATIVE]], label %[[FP_TO_I_CLEANUP:.*]], label %[[FP_TO_I_IF_CHECK_SATURATE:.*]]
; CHECK: [[FP_TO_I_IF_CHECK_SATURATE]]:
-; CHECK-NEXT: [[TMP5:%.*]] = add i24 [[BIASED_EXP]], -39
-; CHECK-NEXT: [[TMP6:%.*]] = icmp ult i24 [[TMP5]], -24
+; CHECK-NEXT: [[TMP4:%.*]] = add i16 [[BIASED_EXP]], -39
+; CHECK-NEXT: [[TMP6:%.*]] = icmp ult i16 [[TMP4]], -24
; CHECK-NEXT: br i1 [[TMP6]], label %[[FP_TO_I_IF_SATURATE:.*]], label %[[FP_TO_I_IF_CHECK_EXP_SIZE:.*]]
; CHECK: [[FP_TO_I_IF_SATURATE]]:
; CHECK-NEXT: [[SATURATED:%.*]] = select i1 [[TMP2]], i24 8388607, i24 -8388608
; CHECK-NEXT: br label %[[FP_TO_I_CLEANUP]]
; CHECK: [[FP_TO_I_IF_CHECK_EXP_SIZE]]:
-; CHECK-NEXT: [[EXP_SMALLER_MANTISSA_WIDTH:%.*]] = icmp ult i24 [[BIASED_EXP]], 25
+; CHECK-NEXT: [[EXP_SMALLER_MANTISSA_WIDTH:%.*]] = icmp ult i16 [[BIASED_EXP]], 25
; CHECK-NEXT: br i1 [[EXP_SMALLER_MANTISSA_WIDTH]], label %[[FP_TO_I_IF_EXP_SMALL:.*]], label %[[FP_TO_I_IF_EXP_LARGE:.*]]
; CHECK: [[FP_TO_I_IF_EXP_SMALL]]:
-; CHECK-NEXT: [[TMP7:%.*]] = sub i24 25, [[BIASED_EXP]]
-; CHECK-NEXT: [[TMP8:%.*]] = lshr i24 [[SIGNIFICAND]], [[TMP7]]
+; CHECK-NEXT: [[TMP14:%.*]] = sub i16 25, [[BIASED_EXP]]
+; CHECK-NEXT: [[TMP7:%.*]] = lshr i16 [[SIGNIFICAND1]], [[TMP14]]
+; CHECK-NEXT: [[TMP8:%.*]] = zext i16 [[TMP7]] to i24
; CHECK-NEXT: [[TMP9:%.*]] = mul i24 [[TMP8]], [[SIGN]]
; CHECK-NEXT: br label %[[FP_TO_I_CLEANUP]]
; CHECK: [[FP_TO_I_IF_EXP_LARGE]]:
-; CHECK-NEXT: [[TMP10:%.*]] = add i24 [[BIASED_EXP]], -25
+; CHECK-NEXT: [[TMP15:%.*]] = add i16 [[BIASED_EXP]], -25
+; CHECK-NEXT: [[SIGNIFICAND:%.*]] = zext i16 [[SIGNIFICAND1]] to i24
+; CHECK-NEXT: [[TMP10:%.*]] = zext i16 [[TMP15]] to i24
; CHECK-NEXT: [[TMP11:%.*]] = shl i24 [[SIGNIFICAND]], [[TMP10]]
; CHECK-NEXT: [[TMP12:%.*]] = mul i24 [[TMP11]], [[SIGN]]
; CHECK-NEXT: br label %[[FP_TO_I_CLEANUP]]
@@ -168,3 +216,47 @@ define i24 @fptoui_f16_i24(half %x) {
%res = fptoui half %x to i24
ret i24 %res
}
+
+define i8 @fptoui_f16_i8(half %x) {
+; CHECK-LABEL: define i8 @fptoui_f16_i8(
+; CHECK-SAME: half [[X:%.*]]) {
+; CHECK-NEXT: [[FP_TO_I_ENTRY:.*]]:
+; CHECK-NEXT: [[TMP0:%.*]] = bitcast half [[X]] to i16
+; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i16 [[TMP0]], -1
+; CHECK-NEXT: [[SIGN:%.*]] = select i1 [[TMP1]], i8 1, i8 -1
+; CHECK-NEXT: [[TMP2:%.*]] = lshr i16 [[TMP0]], 10
+; CHECK-NEXT: [[BIASED_EXP:%.*]] = and i16 [[TMP2]], 31
+; CHECK-NEXT: [[TMP3:%.*]] = and i16 [[TMP0]], 1023
+; CHECK-NEXT: [[SIGNIFICAND:%.*]] = or i16 [[TMP3]], 1024
+; CHECK-NEXT: [[EXP_IS_NEGATIVE:%.*]] = icmp ult i16 [[BIASED_EXP]], 15
+; CHECK-NEXT: br i1 [[EXP_IS_NEGATIVE]], label %[[FP_TO_I_CLEANUP:.*]], label %[[FP_TO_I_IF_CHECK_SATURATE:.*]]
+; CHECK: [[FP_TO_I_IF_CHECK_SATURATE]]:
+; CHECK-NEXT: [[TMP4:%.*]] = add i16 [[BIASED_EXP]], -23
+; CHECK-NEXT: [[TMP5:%.*]] = icmp ult i16 [[TMP4]], -8
+; CHECK-NEXT: br i1 [[TMP5]], label %[[FP_TO_I_IF_SATURATE:.*]], label %[[FP_TO_I_IF_CHECK_EXP_SIZE:.*]]
+; CHECK: [[FP_TO_I_IF_SATURATE]]:
+; CHECK-NEXT: [[SATURATED:%.*]] = select i1 [[TMP1]], i8 127, i8 -128
+; CHECK-NEXT: br label %[[FP_TO_I_CLEANUP]]
+; CHECK: [[FP_TO_I_IF_CHECK_EXP_SIZE]]:
+; CHECK-NEXT: [[EXP_SMALLER_MANTISSA_WIDTH:%.*]] = icmp ult i16 [[BIASED_EXP]], 25
+; CHECK-NEXT: br i1 [[EXP_SMALLER_MANTISSA_WIDTH]], label %[[FP_TO_I_IF_EXP_SMALL:.*]], label %[[FP_TO_I_IF_EXP_LARGE:.*]]
+; CHECK: [[FP_TO_I_IF_EXP_SMALL]]:
+; CHECK-NEXT: [[TMP6:%.*]] = sub i16 25, [[BIASED_EXP]]
+; CHECK-NEXT: [[TMP7:%.*]] = lshr i16 [[SIGNIFICAND]], [[TMP6]]
+; CHECK-NEXT: [[TMP8:%.*]] = trunc i16 [[TMP7]] to i8
+; CHECK-NEXT: [[TMP9:%.*]] = mul i8 [[TMP8]], [[SIGN]]
+; CHECK-NEXT: br label %[[FP_TO_I_CLEANUP]]
+; CHECK: [[FP_TO_I_IF_EXP_LARGE]]:
+; CHECK-NEXT: [[TMP10:%.*]] = add i16 [[BIASED_EXP]], -25
+; CHECK-NEXT: [[TMP11:%.*]] = trunc i16 [[SIGNIFICAND]] to i8
+; CHECK-NEXT: [[TMP12:%.*]] = trunc i16 [[TMP10]] to i8
+; CHECK-NEXT: [[TMP13:%.*]] = shl i8 [[TMP11]], [[TMP12]]
+; CHECK-NEXT: [[TMP14:%.*]] = mul i8 [[TMP13]], [[SIGN]]
+; CHECK-NEXT: br label %[[FP_TO_I_CLEANUP]]
+; CHECK: [[FP_TO_I_CLEANUP]]:
+; CHECK-NEXT: [[TMP15:%.*]] = phi i8 [ [[SATURATED]], %[[FP_TO_I_IF_SATURATE]] ], [ [[TMP9]], %[[FP_TO_I_IF_EXP_SMALL]] ], [ [[TMP14]], %[[FP_TO_I_IF_EXP_LARGE]] ], [ 0, %[[FP_TO_I_ENTRY]] ]
+; CHECK-NEXT: ret i8 [[TMP15]]
+;
+ %res = fptoui half %x to i8
+ ret i8 %res
+}
diff --git a/llvm/test/Transforms/ExpandIRInsts/X86/expand-large-fp-convert-fptosi129.ll b/llvm/test/Transforms/ExpandIRInsts/X86/expand-large-fp-convert-fptosi129.ll
index 3dd56820637b6..24c1476fdb512 100644
--- a/llvm/test/Transforms/ExpandIRInsts/X86/expand-large-fp-convert-fptosi129.ll
+++ b/llvm/test/Transforms/ExpandIRInsts/X86/expand-large-fp-convert-fptosi129.ll
@@ -16,32 +16,34 @@ define i129 @floattosi129(float %a) {
; CHECK-LABEL: @floattosi129(
; CHECK-NEXT: fp-to-i-entry:
; CHECK-NEXT: [[TMP0:%.*]] = bitcast float [[A:%.*]] to i32
-; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i129
; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt i32 [[TMP0]], -1
; CHECK-NEXT: [[SIGN:%.*]] = select i1 [[TMP2]], i129 1, i129 -1
-; CHECK-NEXT: [[TMP3:%.*]] = lshr i129 [[TMP1]], 23
-; CHECK-NEXT: [[BIASED_EXP:%.*]] = and i129 [[TMP3]], 255
-; CHECK-NEXT: [[TMP4:%.*]] = and i129 [[TMP1]], 8388607
-; CHECK-NEXT: [[SIGNIFICAND:%.*]] = or i129 [[TMP4]], 8388608
-; CHECK-NEXT: [[EXP_IS_NEGATIVE:%.*]] = icmp ult i129 [[BIASED_EXP]], 127
+; CHECK-NEXT: [[TMP5:%.*]] = lshr i32 [[TMP0]], 23
+; CHECK-NEXT: [[BIASED_EXP:%.*]] = and i32 [[TMP5]], 255
+; CHECK-NEXT: [[TMP3:%.*]] = and i32 [[TMP0]], 8388607
+; CHECK-NEXT: [[SIGNIFICAND1:%.*]] = or i32 [[TMP3]], 8388608
+; CHECK-NEXT: [[EXP_IS_NEGATIVE:%.*]] = icmp ult i32 [[BIASED_EXP]], 127
; CHECK-NEXT: br i1 [[EXP_IS_NEGATIVE]], label [[FP_TO_I_CLEANUP:%.*]], label [[FP_TO_I_IF_CHECK_SATURATE:%.*]]
; CHECK: fp-to-i-if-check.saturate:
-; CHECK-NEXT: [[TMP5:%.*]] = add i129 [[BIASED_EXP]], -256
-; CHECK-NEXT: [[TMP6:%.*]] = icmp ult i129 [[TMP5]], -129
+; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[BIASED_EXP]], -256
+; CHECK-NEXT: [[TMP6:%.*]] = icmp ult i32 [[TMP4]], -129
; CHECK-NEXT: br i1 [[TMP6]], label [[FP_TO_I_IF_SATURATE:%.*]], label [[FP_TO_I_IF_CHECK_EXP_SIZE:%.*]]
; CHECK: fp-to-i-if-saturate:
; CHECK-NEXT: [[SATURATED:%.*]] = select i1 [[TMP2]], i129 340282366920938463463374607431768211455, i129 -340282366920938463463374607431768211456
; CHECK-NEXT: br label [[FP_TO_I_CLEANUP]]
; CHECK: fp-to-i-if-check.exp.size:
-; CHECK-NEXT: [[EXP_SMALLER_MANTISSA_WIDTH:%.*]] = icmp ult i129 [[BIASED_EXP]], 150
+; CHECK-NEXT: [[EXP_SMALLER_MANTISSA_WIDTH:%.*]] = icmp ult i32 [[BIASED_EXP]], 150
; CHECK-NEXT: br i1 [[EXP_SMALLER_MANTISSA_WIDTH]], label [[FP_TO_I_IF_EXP_SMALL:%.*]], label [[FP_TO_I_IF_EXP_LARGE:%.*]]
; CHECK: fp-to-i-if-exp.small:
-; CHECK-NEXT: [[TMP7:%.*]] = sub i129 150, [[BIASED_EXP]]
-; CHECK-NEXT: [[TMP8:%.*]] = lshr i129 [[SIGNIFICAND]], [[TMP7]]
+; CHECK-NEXT: [[TMP14:%.*]] = sub i32 150, [[BIASED_EXP]]
+; CHECK-NEXT: [[TMP7:%.*]] = lshr i32 [[SIGNIFICAND1]], [[TMP14]]
+; CHECK-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i129
; CHECK-NEXT: [[TMP9:%.*]] = mul i129 [[TMP8]], [[SIGN]]
; CHECK-NEXT: br label [[FP_TO_I_CLEANUP]]
; CHECK: fp-to-i-if-exp.large:
-; CHECK-NEXT: [[TMP10:%.*]] = add i129 [[BIASED_EXP]], -150
+; CHECK-NEXT: [[TMP15:%.*]] = add i32 [[BIASED_EXP]], -150
+; CHECK-NEXT: [[SIGNIFICAND:%.*]] = zext i32 [[SIGNIFICAND1]] to i129
+; CHECK-NEXT: [[TMP10:%.*]] = zext i32 [[TMP15]] to i129
; CHECK-NEXT: [[TMP11:%.*]] = shl i129 [[SIGNIFICAND]], [[TMP10]]
; CHECK-NEXT: [[TMP12:%.*]] = mul i129 [[TMP11]], [[SIGN]]
; CHECK-NEXT: br label [[FP_TO_I_CLEANUP]]
@@ -57,32 +59,34 @@ define i129 @doubletosi129(double %a) {
; CHECK-LABEL: @doubletosi129(
; CHECK-NEXT: fp-to-i-entry:
; CHECK-NEXT: [[TMP0:%.*]] = bitcast double [[A:%.*]] to i64
-; CHECK-NEXT: [[TMP1:%.*]] = zext i64 [[TMP0]] to i129
; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt i64 [[TMP0]], -1
; CHECK-NEXT: [[SIGN:%.*]] = select i1 [[TMP2]], i129 1, i129 -1
-; CHECK-NEXT: [[TMP3:%.*]] = lshr i129 [[TMP1]], 52
-; CHECK-NEXT: [[BIASED_EXP:%.*]] = and i129 [[TMP3]], 2047
-; CHECK-NEXT: [[TMP4:%.*]] = and i129 [[TMP1]], 4503599627370495
-; CHECK-NEXT: [[SIGNIFICAND:%.*]] = or i129 [[TMP4]], 4503599627370496
-; CHECK-NEXT: [[EXP_IS_NEGATIVE:%.*]] = icmp ult i129 [[BIASED_EXP]], 1023
+; CHECK-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP0]], 52
+; CHECK-NEXT: [[BIASED_EXP:%.*]] = and i64 [[TMP5]], 2047
+; CHECK-NEXT: [[TMP3:%.*]] = and i64 [[TMP0]], 4503599627370495
+; CHECK-NEXT: [[SIGNIFICAND1:%.*]] = or i64 [[TMP3]], 4503599627370496
+; CHECK-NEXT: [[EXP_IS_NEGATIVE:%.*]] = icmp ult i64 [[BIASED_EXP]], 1023
; CHECK-NEXT: br i1 [[EXP_IS_NEGATIVE]], label [[FP_TO_I_CLEANUP:%.*]], label [[FP_TO_I_IF_CHECK_SATURATE:%.*]]
; CHECK: fp-to-i-if-check.saturate:
-; CHECK-NEXT: [[TMP5:%.*]] = add i129 [[BIASED_EXP]], -1152
-; CHECK-NEXT: [[TMP6:%.*]] = icmp ult i129 [[TMP5]], -129
+; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[BIASED_EXP]], -1152
+; CHECK-NEXT: [[TMP6:%.*]] = icmp ult i64 [[TMP4]], -129
; CHECK-NEXT: br i1 [[TMP6]], label [[FP_TO_I_IF_SATURATE:%.*]], label [[FP_TO_I_IF_CHECK_EXP_SIZE:%.*]]
; CHECK: fp-to-i-if-saturate:
; CHECK-NEXT: [[SATURATED:%.*]] = select i1 [[TMP2]], i129 340282366920938463463374607431768211455, i129 -340282366920938463463374607431768211456
; CHECK-NEXT: br label [[FP_TO_I_CLEANUP]]
; CHECK: fp-to-i-if-check.exp.size:
-; CHECK-NEXT: [[EXP_SMALLER_MANTISSA_WIDTH:%.*]] = icmp ult i129 [[BIASED_EXP]], 1075
+; CHECK-NEXT: [[EXP_SMALLER_MANTISSA_WIDTH:%.*]] = icmp ult i64 [[BIASED_EXP]], 1075
; CHECK-NEXT: br i1 [[EXP_SMALLER_MANTISSA_WIDTH]], label [[FP_TO_I_IF_EXP_SMALL:%.*]], label [[FP_TO_I_IF_EXP_LARGE:%.*]]
; CHECK: fp-to-i-if-exp.small:
-; CHECK-NEXT: [[TMP7:%.*]] = sub i129 1075, [[BIASED_EXP]]
-; CHECK-NEXT: [[TMP8:%.*]] = lshr i129 [[SIGNIFICAND]], [[TMP7]]
+; CHECK-NEXT: [[TMP14:%.*]] = sub i64 1075, [[BIASED_EXP]]
+; CHECK-NEXT: [[TMP7:%.*]] = lshr i64 [[SIGNIFICAND1]], [[TMP14]]
+; CHECK-NEXT: [[TMP8:%.*]] = zext i64 [[TMP7]] to i129
; CHECK-NEXT: [[TMP9:%.*]] = mul i129 [[TMP8]], [[SIGN]]
; CHECK-NEXT: br label [[FP_TO_I_CLEANUP]]
; CHECK: fp-to-i-if-exp.large:
-; CHECK-NEXT: [[TMP10:%.*]] = add i129 [[BIASED_EXP]], -1075
+; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[BIASED_EXP]], -1075
+; CHECK-NEXT: [[SIGNIFICAND:%.*]] = zext i64 [[SIGNIFICAND1]] to i129
+; CHECK-NEXT: [[TMP10:%.*]] = zext i64 [[TMP15]] to i129
; CHECK-NEXT: [[TMP11:%.*]] = shl i129 [[SIGNIFICAND]], [[TMP10]]
; CHECK-NEXT: [[TMP12:%.*]] = mul i129 [[TMP11]], [[SIGN]]
; CHECK-NEXT: br label [[FP_TO_I_CLEANUP]]
@@ -99,32 +103,34 @@ define i129 @x86_fp80tosi129(x86_fp80 %a) {
; CHECK-NEXT: fp-to-i-entry:
; CHECK-NEXT: [[TMP0:%.*]] = fpext x86_fp80 [[A:%.*]] to fp128
; CHECK-NEXT: [[TMP1:%.*]] = bitcast fp128 [[TMP0]] to i128
-; CHECK-NEXT: [[TMP2:%.*]] = zext i128 [[TMP1]] to i129
; CHECK-NEXT: [[TMP3:%.*]] = icmp sgt i128 [[TMP1]], -1
; CHECK-NEXT: [[SIGN:%.*]] = select i1 [[TMP3]], i129 1, i129 -1
-; CHECK-NEXT: [[TMP4:%.*]] = lshr i129 [[TMP2]], 112
-; CHECK-NEXT: [[BIASED_EXP:%.*]] = and i129 [[TMP4]], 32767
-; CHECK-NEXT: [[TMP5:%.*]] = and i129 [[TMP2]], 5192296858534827628530496329220095
-; CHECK-NEXT: [[SIGNIFICAND:%.*]] = or i129 [[TMP5]], 5192296858534827628530496329220096
-; CHECK-NEXT: [[EXP_IS_NEGATIVE:%.*]] = icmp ult i129 [[BIASED_EXP]], 16383
+; CHECK-NEXT: [[TMP6:%.*]] = lshr i128 [[TMP1]], 112
+; CHECK-NEXT: [[BIASED_EXP:%.*]] = and i128 [[TMP6]], 32767
+; CHECK-NEXT: [[TMP4:%.*]] = and i128 [[TMP1]], 5192296858534827628530496329220095
+; CHECK-NEXT: [[SIGNIFICAND1:%.*]] = or i128 [[TMP4]], 5192296858534827628530496329220096
+; CHECK-NEXT: [[EXP_IS_NEGATIVE:%.*]] = icmp ult i128 [[BIASED_EXP]], 16383
; CHECK-NEXT: br i1 [[EXP_IS_NEGATIVE]], label [[FP_TO_I_CLEANUP:%.*]], label [[FP_TO_I_IF_CHECK_SATURATE:%.*]]
; CHECK: fp-to-i-if-check.saturate:
-; CHECK-NEXT: [[TMP6:%.*]] = add i129 [[BIASED_EXP]], -16512
-; CHECK-NEXT: [[TMP7:%.*]] = icmp ult i129 [[TMP6]], -129
+; CHECK-NEXT: [[TMP5:%.*]] = add i128 [[BIASED_EXP]], -16512
+; CHECK-NEXT: [[TMP7:%.*]] = icmp ult i128 [[TMP5]], -129
; CHECK-NEXT: br i1 [[TMP7]], label [[FP_TO_I_IF_SATURATE:%.*]], label [[FP_TO_I_IF_CHECK_EXP_SIZE:%.*]]
; CHECK: fp-to-i-if-saturate:
; CHECK-NEXT: [[SATURATED:%.*]] = select i1 [[TMP3]], i129 340282366920938463463374607431768211455, i129 -340282366920938463463374607431768211456
; CHECK-NEXT: br label [[FP_TO_I_CLEANUP]]
; CHECK: fp-to-i-if-check.exp.size:
-; CHECK-NEXT: [[EXP_SMALLER_MANTISSA_WIDTH:%.*]] = icmp ult i129 [[BIASED_EXP]], 16495
+; CHECK-NEXT: [[EXP_SMALLER_MANTISSA_WIDTH:%.*]] = icmp ult i128 [[BIASED_EXP]], 16495
; CHECK-NEXT: br i1 [[EXP_SMALLER_MANTISSA_WIDTH]], label [[FP_TO_I_IF_EXP_SMALL:%.*]], label [[FP_TO_I_IF_EXP_LARGE:%.*]]
; CHECK: fp-to-i-if-exp.small:
-; CHECK-NEXT: [[TMP8:%.*]] = sub i129 16495, [[BIASED_EXP]]
-; CHECK-NEXT: [[TMP9:%.*]] = lshr i129 [[SIGNIFICAND]], [[TMP8]]
+; CHECK-NEXT: [[TMP15:%.*]] = sub i128 16495, [[BIASED_EXP]]
+; CHECK-NEXT: [[TMP8:%.*]] = lshr i128 [[SIGNIFICAND1]], [[TMP15]]
+; CHECK-NEXT: [[TMP9:%.*]] = zext i128 [[TMP8]] to i129
; CHECK-NEXT: [[TMP10:%.*]] = mul i129 [[TMP9]], [[SIGN]]
; CHECK-NEXT: br label [[FP_TO_I_CLEANUP]]
; CHECK: fp-to-i-if-exp.large:
-; CHECK-NEXT: [[TMP11:%.*]] = add i129 [[BIASED_EXP]], -16495
+; CHECK-NEXT: [[TMP16:%.*]] = add i128 [[BIASED_EXP]], -16495
+; CHECK-NEXT: [[SIGNIFICAND:%.*]] = zext i128 [[SIGNIFICAND1]] to i129
+; CHECK-NEXT: [[TMP11:%.*]] = zext i128 [[TMP16]] to i129
; CHECK-NEXT: [[TMP12:%.*]] = shl i129 [[SIGNIFICAND]], [[TMP11]]
; CHECK-NEXT: [[TMP13:%.*]] = mul i129 [[TMP12]], [[SIGN]]
; CHECK-NEXT: br label [[FP_TO_I_CLEANUP]]
@@ -140,32 +146,34 @@ define i129 @fp128tosi129(fp128 %a) {
; CHECK-LABEL: @fp128tosi129(
; CHECK-NEXT: fp-to-i-entry:
; CHECK-NEXT: [[TMP0:%.*]] = bitcast fp128 [[A:%.*]] to i128
-; CHECK-NEXT: [[TMP1:%.*]] = zext i128 [[TMP0]] to i129
; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt i128 [[TMP0]], -1
; CHECK-NEXT: [[SIGN:%.*]] = select i1 [[TMP2]], i129 1, i129 -1
-; CHECK-NEXT: [[TMP3:%.*]] = lshr i129 [[TMP1]], 112
-; CHECK-NEXT: [[BIASED_EXP:%.*]] = and i129 [[TMP3]], 32767
-; CHECK-NEXT: [[TMP4:%.*]] = and i129 [[TMP1]], 5192296858534827628530496329220095
-; CHECK-NEXT: [[SIGNIFICAND:%.*]] = or i129 [[TMP4]], 5192296858534827628530496329220096
-; CHECK-NEXT: [[EXP_IS_NEGATIVE:%.*]] = icmp ult i129 [[BIASED_EXP]], 16383
+; CHECK-NEXT: [[TMP5:%.*]] = lshr i128 [[TMP0]], 112
+; CHECK-NEXT: [[BIASED_EXP:%.*]] = and i128 [[TMP5]], 32767
+; CHECK-NEXT: [[TMP3:%.*]] = and i128 [[TMP0]], 5192296858534827628530496329220095
+; CHECK-NEXT: [[SIGNIFICAND1:%.*]] = or i128 [[TMP3]], 5192296858534827628530496329220096
+; CHECK-NEXT: [[EXP_IS_NEGATIVE:%.*]] = icmp ult i128 [[BIASED_EXP]], 16383
; CHECK-NEXT: br i1 [[EXP_IS_NEGATIVE]], label [[FP_TO_I_CLEANUP:%.*]], label [[FP_TO_I_IF_CHECK_SATURATE:%.*]]
; CHECK: fp-to-i-if-check.saturate:
-; CHECK-NEXT: [[TMP5:%.*]] = add i129 [[BIASED_EXP]], -16512
-; CHECK-NEXT: [[TMP6:%.*]] = icmp ult i129 [[TMP5]], -129
+; CHECK-NEXT: [[TMP4:%.*]] = add i128 [[BIASED_EXP]], -16512
+; CHECK-NEXT: [[TMP6:%.*]] = icmp ult i128 [[TMP4]], -129
; CHECK-NEXT: br i1 [[TMP6]], label [[FP_TO_I_IF_SATURATE:%.*]], label [[FP_TO_I_IF_CHECK_EXP_SIZE:%.*]]
; CHECK: fp-to-i-if-saturate:
; CHECK-NEXT: [[SATURATED:%.*]] = select i1 [[TMP2]], i129 340282366920938463463374607431768211455, i129 -340282366920938463463374607431768211456
; CHECK-NEXT: br label [[FP_TO_I_CLEANUP]]
; CHECK: fp-to-i-if-check.exp.size:
-; CHECK-NEXT: [[EXP_SMALLER_MANTISSA_WIDTH:%.*]] = icmp ult i129 [[BIASED_EXP]], 16495
+; CHECK-NEXT: [[EXP_SMALLER_MANTISSA_WIDTH:%.*]] = icmp ult i128 [[BIASED_EXP]], 16495
; CHECK-NEXT: br i1 [[EXP_SMALLER_MANTISSA_WIDTH]], label [[FP_TO_I_IF_EXP_SMALL:%.*]], label [[FP_TO_I_IF_EXP_LARGE:%.*]]
; CHECK: fp-to-i-if-exp.small:
-; CHECK-NEXT: [[TMP7:%.*]] = sub i129 16495, [[BIASED_EXP]]
-; CHECK-NEXT: [[TMP8:%.*]] = lshr i129 [[SIGNIFICAND]], [[TMP7]]
+; CHECK-NEXT: [[TMP14:%.*]] = sub i128 16495, [[BIASED_EXP]]
+; CHECK-NEXT: [[TMP7:%.*]] = lshr i128 [[SIGNIFICAND1]], [[TMP14]]
+; CHECK-NEXT: [[TMP8:%.*]] = zext i128 [[TMP7]] to i129
; CHECK-NEXT: [[TMP9:%.*]] = mul i129 [[TMP8]], [[SIGN]]
; CHECK-NEXT: br label [[FP_TO_I_CLEANUP]]
; CHECK: fp-to-i-if-exp.large:
-; CHECK-NEXT: [[TMP10:%.*]] = add i129 [[BIASED_EXP]], -16495
+; CHECK-NEXT: [[TMP15:%.*]] = add i128 [[BIASED_EXP]], -16495
+; CHECK-NEXT: [[SIGNIFICAND:%.*]] = zext i128 [[SIGNIFICAND1]] to i129
+; CHECK-NEXT: [[TMP10:%.*]] = zext i128 [[TMP15]] to i129
; CHECK-NEXT: [[TMP11:%.*]] = shl i129 [[SIGNIFICAND]], [[TMP10]]
; CHECK-NEXT: [[TMP12:%.*]] = mul i129 [[TMP11]], [[SIGN]]
; CHECK-NEXT: br label [[FP_TO_I_CLEANUP]]
@@ -182,32 +190,34 @@ define <2 x i129> @floattosi129v2(<2 x float> %a) {
; CHECK-NEXT: fp-to-i-entryfp-to-i-entry:
; CHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x float> [[A:%.*]], i64 0
; CHECK-NEXT: [[TMP1:%.*]] = bitcast float [[TMP0]] to i32
-; CHECK-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i129
; CHECK-NEXT: [[TMP3:%.*]] = icmp sgt i32 [[TMP1]], -1
; CHECK-NEXT: [[SIGN7:%.*]] = select i1 [[TMP3]], i129 1, i129 -1
-; CHECK-NEXT: [[TMP4:%.*]] = lshr i129 [[TMP2]], 23
-; CHECK-NEXT: [[BIASED_EXP8:%.*]] = and i129 [[TMP4]], 255
-; CHECK-NEXT: [[TMP5:%.*]] = and i129 [[TMP2]], 8388607
-; CHECK-NEXT: [[SIGNIFICAND9:%.*]] = or i129 [[TMP5]], 8388608
-; CHECK-NEXT: [[EXP_IS_NEGATIVE10:%.*]] = icmp ult i129 [[BIASED_EXP8]], 127
+; CHECK-NEXT: [[TMP6:%.*]] = lshr i32 [[TMP1]], 23
+; CHECK-NEXT: [[BIASED_EXP8:%.*]] = and i32 [[TMP6]], 255
+; CHECK-NEXT: [[TMP4:%.*]] = and i32 [[TMP1]], 8388607
+; CHECK-NEXT: [[SIGNIFICAND10:%.*]] = or i32 [[TMP4]], 8388608
+; CHECK-NEXT: [[EXP_IS_NEGATIVE10:%.*]] = icmp ult i32 [[BIASED_EXP8]], 127
; CHECK-NEXT: br i1 [[EXP_IS_NEGATIVE10]], label [[FP_TO_I_CLEANUP1:%.*]], label [[FP_TO_I_IF_CHECK_SATURATE2:%.*]]
; CHECK: fp-to-i-if-check.saturate2:
-; CHECK-NEXT: [[TMP6:%.*]] = add i129 [[BIASED_EXP8]], -256
-; CHECK-NEXT: [[TMP7:%.*]] = icmp ult i129 [[TMP6]], -129
+; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[BIASED_EXP8]], -256
+; CHECK-NEXT: [[TMP7:%.*]] = icmp ult i32 [[TMP5]], -129
; CHECK-NEXT: br i1 [[TMP7]], label [[FP_TO_I_IF_SATURATE3:%.*]], label [[FP_TO_I_IF_CHECK_EXP_SIZE4:%.*]]
; CHECK: fp-to-i-if-saturate3:
; CHECK-NEXT: [[SATURATED11:%.*]] = select i1 [[TMP3]], i129 340282366920938463463374607431768211455, i129 -340282366920938463463374607431768211456
; CHECK-NEXT: br label [[FP_TO_I_CLEANUP1]]
; CHECK: fp-to-i-if-check.exp.size4:
-; CHECK-NEXT: [[EXP_SMALLER_MANTISSA_WIDTH12:%.*]] = icmp ult i129 [[BIASED_EXP8]], 150
+; CHECK-NEXT: [[EXP_SMALLER_MANTISSA_WIDTH12:%.*]] = icmp ult i32 [[BIASED_EXP8]], 150
; CHECK-NEXT: br i1 [[EXP_SMALLER_MANTISSA_WIDTH12]], label [[FP_TO_I_IF_EXP_SMALL5:%.*]], label [[FP_TO_I_IF_EXP_LARGE6:%.*]]
; CHECK: fp-to-i-if-exp.small5:
-; CHECK-NEXT: [[TMP8:%.*]] = sub i129 150, [[BIASED_EXP8]]
-; CHECK-NEXT: [[TMP9:%.*]] = lshr i129 [[SIGNIFICAND9]], [[TMP8]]
+; CHECK-NEXT: [[TMP18:%.*]] = sub i32 150, [[BIASED_EXP8]]
+; CHECK-NEXT: [[TMP8:%.*]] = lshr i32 [[SIGNIFICAND10]], [[TMP18]]
+; CHECK-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i129
; CHECK-NEXT: [[TMP10:%.*]] = mul i129 [[TMP9]], [[SIGN7]]
; CHECK-NEXT: br label [[FP_TO_I_CLEANUP1]]
; CHECK: fp-to-i-if-exp.large6:
-; CHECK-NEXT: [[TMP11:%.*]] = add i129 [[BIASED_EXP8]], -150
+; CHECK-NEXT: [[TMP20:%.*]] = add i32 [[BIASED_EXP8]], -150
+; CHECK-NEXT: [[SIGNIFICAND9:%.*]] = zext i32 [[SIGNIFICAND10]] to i129
+; CHECK-NEXT: [[TMP11:%.*]] = zext i32 [[TMP20]] to i129
; CHECK-NEXT: [[TMP12:%.*]] = shl i129 [[SIGNIFICAND9]], [[TMP11]]
; CHECK-NEXT: [[TMP13:%.*]] = mul i129 [[TMP12]], [[SIGN7]]
; CHECK-NEXT: br label [[FP_TO_I_CLEANUP1]]
@@ -216,32 +226,34 @@ define <2 x i129> @floattosi129v2(<2 x float> %a) {
; CHECK-NEXT: [[TMP15:%.*]] = insertelement <2 x i129> poison, i129 [[TMP14]], i64 0
; CHECK-NEXT: [[TMP16:%.*]] = extractelement <2 x float> [[A]], i64 1
; CHECK-NEXT: [[TMP17:%.*]] = bitcast float [[TMP16]] to i32
-; CHECK-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i129
; CHECK-NEXT: [[TMP19:%.*]] = icmp sgt i32 [[TMP17]], -1
; CHECK-NEXT: [[SIGN:%.*]] = select i1 [[TMP19]], i129 1, i129 -1
-; CHECK-NEXT: [[TMP20:%.*]] = lshr i129 [[TMP18]], 23
-; CHECK-NEXT: [[BIASED_EXP:%.*]] = and i129 [[TMP20]], 255
-; CHECK-NEXT: [[TMP21:%.*]] = and i129 [[TMP18]], 8388607
-; CHECK-NEXT: [[SIGNIFICAND:%.*]] = or i129 [[TMP21]], 8388608
-; CHECK-NEXT: [[EXP_IS_NEGATIVE:%.*]] = icmp ult i129 [[BIASED_EXP]], 127
+; CHECK-NEXT: [[TMP21:%.*]] = lshr i32 [[TMP17]], 23
+; CHECK-NEXT: [[BIASED_EXP:%.*]] = and i32 [[TMP21]], 255
+; CHECK-NEXT: [[TMP22:%.*]] = and i32 [[TMP17]], 8388607
+; CHECK-NEXT: [[SIGNIFICAND1:%.*]] = or i32 [[TMP22]], 8388608
+; CHECK-NEXT: [[EXP_IS_NEGATIVE:%.*]] = icmp ult i32 [[BIASED_EXP]], 127
; CHECK-NEXT: br i1 [[EXP_IS_NEGATIVE]], label [[FP_TO_I_CLEANUP:%.*]], label [[FP_TO_I_IF_CHECK_SATURATE:%.*]]
; CHECK: fp-to-i-if-check.saturate:
-; CHECK-NEXT: [[TMP22:%.*]] = add i129 [[BIASED_EXP]], -256
-; CHECK-NEXT: [[TMP23:%.*]] = icmp ult i129 [[TMP22]], -129
+; CHECK-NEXT: [[TMP24:%.*]] = add i32 [[BIASED_EXP]], -256
+; CHECK-NEXT: [[TMP23:%.*]] = icmp ult i32 [[TMP24]], -129
; CHECK-NEXT: br i1 [[TMP23]], label [[FP_TO_I_IF_SATURATE:%.*]], label [[FP_TO_I_IF_CHECK_EXP_SIZE:%.*]]
; CHECK: fp-to-i-if-saturate:
; CHECK-NEXT: [[SATURATED:%.*]] = select i1 [[TMP19]], i129 340282366920938463463374607431768211455, i129 -340282366920938463463374607431768211456
; CHECK-NEXT: br label [[FP_TO_I_CLEANUP]]
; CHECK: fp-to-i-if-check.exp.size:
-; CHECK-NEXT: [[EXP_SMALLER_MANTISSA_WIDTH:%.*]] = icmp ult i129 [[BIASED_EXP]], 150
+; CHECK-NEXT: [[EXP_SMALLER_MANTISSA_WIDTH:%.*]] = icmp ult i32 [[BIASED_EXP]], 150
; CHECK-NEXT: br i1 [[EXP_SMALLER_MANTISSA_WIDTH]], label [[FP_TO_I_IF_EXP_SMALL:%.*]], label [[FP_TO_I_IF_EXP_LARGE:%.*]]
; CHECK: fp-to-i-if-exp.small:
-; CHECK-NEXT: [[TMP24:%.*]] = sub i129 150, [[BIASED_EXP]]
-; CHECK-NEXT: [[TMP25:%.*]] = lshr i129 [[SIGNIFICAND]], [[TMP24]]
+; CHECK-NEXT: [[TMP32:%.*]] = sub i32 150, [[BIASED_EXP]]
+; CHECK-NEXT: [[TMP33:%.*]] = lshr i32 [[SIGNIFICAND1]], [[TMP32]]
+; CHECK-NEXT: [[TMP25:%.*]] = zext i32 [[TMP33]] to i129
; CHECK-NEXT: [[TMP26:%.*]] = mul i129 [[TMP25]], [[SIGN]]
; CHECK-NEXT: br label [[FP_TO_I_CLEANUP]]
; CHECK: fp-to-i-if-exp.large:
-; CHECK-NEXT: [[TMP27:%.*]] = add i129 [[BIASED_EXP]], -150
+; CHECK-NEXT: [[TMP34:%.*]] = add i32 [[BIASED_EXP]], -150
+; CHECK-NEXT: [[SIGNIFICAND:%.*]] = zext i32 [[SIGNIFICAND1]] to i129
+; CHECK-NEXT: [[TMP27:%.*]] = zext i32 [[TMP34]] to i129
; CHECK-NEXT: [[TMP28:%.*]] = shl i129 [[SIGNIFICAND]], [[TMP27]]
; CHECK-NEXT: [[TMP29:%.*]] = mul i129 [[TMP28]], [[SIGN]]
; CHECK-NEXT: br label [[FP_TO_I_CLEANUP]]
diff --git a/llvm/test/Transforms/ExpandIRInsts/X86/expand-large-fp-convert-fptoui129.ll b/llvm/test/Transforms/ExpandIRInsts/X86/expand-large-fp-convert-fptoui129.ll
index 442ba82d7ffe6..fd29c01ef580d 100644
--- a/llvm/test/Transforms/ExpandIRInsts/X86/expand-large-fp-convert-fptoui129.ll
+++ b/llvm/test/Transforms/ExpandIRInsts/X86/expand-large-fp-convert-fptoui129.ll
@@ -16,32 +16,34 @@ define i129 @floattoui129(float %a) {
; CHECK-LABEL: @floattoui129(
; CHECK-NEXT: fp-to-i-entry:
; CHECK-NEXT: [[TMP0:%.*]] = bitcast float [[A:%.*]] to i32
-; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i129
; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt i32 [[TMP0]], -1
; CHECK-NEXT: [[SIGN:%.*]] = select i1 [[TMP2]], i129 1, i129 -1
-; CHECK-NEXT: [[TMP3:%.*]] = lshr i129 [[TMP1]], 23
-; CHECK-NEXT: [[BIASED_EXP:%.*]] = and i129 [[TMP3]], 255
-; CHECK-NEXT: [[TMP4:%.*]] = and i129 [[TMP1]], 8388607
-; CHECK-NEXT: [[SIGNIFICAND:%.*]] = or i129 [[TMP4]], 8388608
-; CHECK-NEXT: [[EXP_IS_NEGATIVE:%.*]] = icmp ult i129 [[BIASED_EXP]], 127
+; CHECK-NEXT: [[TMP5:%.*]] = lshr i32 [[TMP0]], 23
+; CHECK-NEXT: [[BIASED_EXP:%.*]] = and i32 [[TMP5]], 255
+; CHECK-NEXT: [[TMP3:%.*]] = and i32 [[TMP0]], 8388607
+; CHECK-NEXT: [[SIGNIFICAND1:%.*]] = or i32 [[TMP3]], 8388608
+; CHECK-NEXT: [[EXP_IS_NEGATIVE:%.*]] = icmp ult i32 [[BIASED_EXP]], 127
; CHECK-NEXT: br i1 [[EXP_IS_NEGATIVE]], label [[FP_TO_I_CLEANUP:%.*]], label [[FP_TO_I_IF_CHECK_SATURATE:%.*]]
; CHECK: fp-to-i-if-check.saturate:
-; CHECK-NEXT: [[TMP5:%.*]] = add i129 [[BIASED_EXP]], -256
-; CHECK-NEXT: [[TMP6:%.*]] = icmp ult i129 [[TMP5]], -129
+; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[BIASED_EXP]], -256
+; CHECK-NEXT: [[TMP6:%.*]] = icmp ult i32 [[TMP4]], -129
; CHECK-NEXT: br i1 [[TMP6]], label [[FP_TO_I_IF_SATURATE:%.*]], label [[FP_TO_I_IF_CHECK_EXP_SIZE:%.*]]
; CHECK: fp-to-i-if-saturate:
; CHECK-NEXT: [[SATURATED:%.*]] = select i1 [[TMP2]], i129 340282366920938463463374607431768211455, i129 -340282366920938463463374607431768211456
; CHECK-NEXT: br label [[FP_TO_I_CLEANUP]]
; CHECK: fp-to-i-if-check.exp.size:
-; CHECK-NEXT: [[EXP_SMALLER_MANTISSA_WIDTH:%.*]] = icmp ult i129 [[BIASED_EXP]], 150
+; CHECK-NEXT: [[EXP_SMALLER_MANTISSA_WIDTH:%.*]] = icmp ult i32 [[BIASED_EXP]], 150
; CHECK-NEXT: br i1 [[EXP_SMALLER_MANTISSA_WIDTH]], label [[FP_TO_I_IF_EXP_SMALL:%.*]], label [[FP_TO_I_IF_EXP_LARGE:%.*]]
; CHECK: fp-to-i-if-exp.small:
-; CHECK-NEXT: [[TMP7:%.*]] = sub i129 150, [[BIASED_EXP]]
-; CHECK-NEXT: [[TMP8:%.*]] = lshr i129 [[SIGNIFICAND]], [[TMP7]]
+; CHECK-NEXT: [[TMP14:%.*]] = sub i32 150, [[BIASED_EXP]]
+; CHECK-NEXT: [[TMP7:%.*]] = lshr i32 [[SIGNIFICAND1]], [[TMP14]]
+; CHECK-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i129
; CHECK-NEXT: [[TMP9:%.*]] = mul i129 [[TMP8]], [[SIGN]]
; CHECK-NEXT: br label [[FP_TO_I_CLEANUP]]
; CHECK: fp-to-i-if-exp.large:
-; CHECK-NEXT: [[TMP10:%.*]] = add i129 [[BIASED_EXP]], -150
+; CHECK-NEXT: [[TMP15:%.*]] = add i32 [[BIASED_EXP]], -150
+; CHECK-NEXT: [[SIGNIFICAND:%.*]] = zext i32 [[SIGNIFICAND1]] to i129
+; CHECK-NEXT: [[TMP10:%.*]] = zext i32 [[TMP15]] to i129
; CHECK-NEXT: [[TMP11:%.*]] = shl i129 [[SIGNIFICAND]], [[TMP10]]
; CHECK-NEXT: [[TMP12:%.*]] = mul i129 [[TMP11]], [[SIGN]]
; CHECK-NEXT: br label [[FP_TO_I_CLEANUP]]
@@ -57,32 +59,34 @@ define i129 @doubletoui129(double %a) {
; CHECK-LABEL: @doubletoui129(
; CHECK-NEXT: fp-to-i-entry:
; CHECK-NEXT: [[TMP0:%.*]] = bitcast double [[A:%.*]] to i64
-; CHECK-NEXT: [[TMP1:%.*]] = zext i64 [[TMP0]] to i129
; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt i64 [[TMP0]], -1
; CHECK-NEXT: [[SIGN:%.*]] = select i1 [[TMP2]], i129 1, i129 -1
-; CHECK-NEXT: [[TMP3:%.*]] = lshr i129 [[TMP1]], 52
-; CHECK-NEXT: [[BIASED_EXP:%.*]] = and i129 [[TMP3]], 2047
-; CHECK-NEXT: [[TMP4:%.*]] = and i129 [[TMP1]], 4503599627370495
-; CHECK-NEXT: [[SIGNIFICAND:%.*]] = or i129 [[TMP4]], 4503599627370496
-; CHECK-NEXT: [[EXP_IS_NEGATIVE:%.*]] = icmp ult i129 [[BIASED_EXP]], 1023
+; CHECK-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP0]], 52
+; CHECK-NEXT: [[BIASED_EXP:%.*]] = and i64 [[TMP5]], 2047
+; CHECK-NEXT: [[TMP3:%.*]] = and i64 [[TMP0]], 4503599627370495
+; CHECK-NEXT: [[SIGNIFICAND1:%.*]] = or i64 [[TMP3]], 4503599627370496
+; CHECK-NEXT: [[EXP_IS_NEGATIVE:%.*]] = icmp ult i64 [[BIASED_EXP]], 1023
; CHECK-NEXT: br i1 [[EXP_IS_NEGATIVE]], label [[FP_TO_I_CLEANUP:%.*]], label [[FP_TO_I_IF_CHECK_SATURATE:%.*]]
; CHECK: fp-to-i-if-check.saturate:
-; CHECK-NEXT: [[TMP5:%.*]] = add i129 [[BIASED_EXP]], -1152
-; CHECK-NEXT: [[TMP6:%.*]] = icmp ult i129 [[TMP5]], -129
+; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[BIASED_EXP]], -1152
+; CHECK-NEXT: [[TMP6:%.*]] = icmp ult i64 [[TMP4]], -129
; CHECK-NEXT: br i1 [[TMP6]], label [[FP_TO_I_IF_SATURATE:%.*]], label [[FP_TO_I_IF_CHECK_EXP_SIZE:%.*]]
; CHECK: fp-to-i-if-saturate:
; CHECK-NEXT: [[SATURATED:%.*]] = select i1 [[TMP2]], i129 340282366920938463463374607431768211455, i129 -340282366920938463463374607431768211456
; CHECK-NEXT: br label [[FP_TO_I_CLEANUP]]
; CHECK: fp-to-i-if-check.exp.size:
-; CHECK-NEXT: [[EXP_SMALLER_MANTISSA_WIDTH:%.*]] = icmp ult i129 [[BIASED_EXP]], 1075
+; CHECK-NEXT: [[EXP_SMALLER_MANTISSA_WIDTH:%.*]] = icmp ult i64 [[BIASED_EXP]], 1075
; CHECK-NEXT: br i1 [[EXP_SMALLER_MANTISSA_WIDTH]], label [[FP_TO_I_IF_EXP_SMALL:%.*]], label [[FP_TO_I_IF_EXP_LARGE:%.*]]
; CHECK: fp-to-i-if-exp.small:
-; CHECK-NEXT: [[TMP7:%.*]] = sub i129 1075, [[BIASED_EXP]]
-; CHECK-NEXT: [[TMP8:%.*]] = lshr i129 [[SIGNIFICAND]], [[TMP7]]
+; CHECK-NEXT: [[TMP14:%.*]] = sub i64 1075, [[BIASED_EXP]]
+; CHECK-NEXT: [[TMP7:%.*]] = lshr i64 [[SIGNIFICAND1]], [[TMP14]]
+; CHECK-NEXT: [[TMP8:%.*]] = zext i64 [[TMP7]] to i129
; CHECK-NEXT: [[TMP9:%.*]] = mul i129 [[TMP8]], [[SIGN]]
; CHECK-NEXT: br label [[FP_TO_I_CLEANUP]]
; CHECK: fp-to-i-if-exp.large:
-; CHECK-NEXT: [[TMP10:%.*]] = add i129 [[BIASED_EXP]], -1075
+; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[BIASED_EXP]], -1075
+; CHECK-NEXT: [[SIGNIFICAND:%.*]] = zext i64 [[SIGNIFICAND1]] to i129
+; CHECK-NEXT: [[TMP10:%.*]] = zext i64 [[TMP15]] to i129
; CHECK-NEXT: [[TMP11:%.*]] = shl i129 [[SIGNIFICAND]], [[TMP10]]
; CHECK-NEXT: [[TMP12:%.*]] = mul i129 [[TMP11]], [[SIGN]]
; CHECK-NEXT: br label [[FP_TO_I_CLEANUP]]
@@ -99,32 +103,34 @@ define i129 @x86_fp80toui129(x86_fp80 %a) {
; CHECK-NEXT: fp-to-i-entry:
; CHECK-NEXT: [[TMP0:%.*]] = fpext x86_fp80 [[A:%.*]] to fp128
; CHECK-NEXT: [[TMP1:%.*]] = bitcast fp128 [[TMP0]] to i128
-; CHECK-NEXT: [[TMP2:%.*]] = zext i128 [[TMP1]] to i129
; CHECK-NEXT: [[TMP3:%.*]] = icmp sgt i128 [[TMP1]], -1
; CHECK-NEXT: [[SIGN:%.*]] = select i1 [[TMP3]], i129 1, i129 -1
-; CHECK-NEXT: [[TMP4:%.*]] = lshr i129 [[TMP2]], 112
-; CHECK-NEXT: [[BIASED_EXP:%.*]] = and i129 [[TMP4]], 32767
-; CHECK-NEXT: [[TMP5:%.*]] = and i129 [[TMP2]], 5192296858534827628530496329220095
-; CHECK-NEXT: [[SIGNIFICAND:%.*]] = or i129 [[TMP5]], 5192296858534827628530496329220096
-; CHECK-NEXT: [[EXP_IS_NEGATIVE:%.*]] = icmp ult i129 [[BIASED_EXP]], 16383
+; CHECK-NEXT: [[TMP6:%.*]] = lshr i128 [[TMP1]], 112
+; CHECK-NEXT: [[BIASED_EXP:%.*]] = and i128 [[TMP6]], 32767
+; CHECK-NEXT: [[TMP4:%.*]] = and i128 [[TMP1]], 5192296858534827628530496329220095
+; CHECK-NEXT: [[SIGNIFICAND1:%.*]] = or i128 [[TMP4]], 5192296858534827628530496329220096
+; CHECK-NEXT: [[EXP_IS_NEGATIVE:%.*]] = icmp ult i128 [[BIASED_EXP]], 16383
; CHECK-NEXT: br i1 [[EXP_IS_NEGATIVE]], label [[FP_TO_I_CLEANUP:%.*]], label [[FP_TO_I_IF_CHECK_SATURATE:%.*]]
; CHECK: fp-to-i-if-check.saturate:
-; CHECK-NEXT: [[TMP6:%.*]] = add i129 [[BIASED_EXP]], -16512
-; CHECK-NEXT: [[TMP7:%.*]] = icmp ult i129 [[TMP6]], -129
+; CHECK-NEXT: [[TMP5:%.*]] = add i128 [[BIASED_EXP]], -16512
+; CHECK-NEXT: [[TMP7:%.*]] = icmp ult i128 [[TMP5]], -129
; CHECK-NEXT: br i1 [[TMP7]], label [[FP_TO_I_IF_SATURATE:%.*]], label [[FP_TO_I_IF_CHECK_EXP_SIZE:%.*]]
; CHECK: fp-to-i-if-saturate:
; CHECK-NEXT: [[SATURATED:%.*]] = select i1 [[TMP3]], i129 340282366920938463463374607431768211455, i129 -340282366920938463463374607431768211456
; CHECK-NEXT: br label [[FP_TO_I_CLEANUP]]
; CHECK: fp-to-i-if-check.exp.size:
-; CHECK-NEXT: [[EXP_SMALLER_MANTISSA_WIDTH:%.*]] = icmp ult i129 [[BIASED_EXP]], 16495
+; CHECK-NEXT: [[EXP_SMALLER_MANTISSA_WIDTH:%.*]] = icmp ult i128 [[BIASED_EXP]], 16495
; CHECK-NEXT: br i1 [[EXP_SMALLER_MANTISSA_WIDTH]], label [[FP_TO_I_IF_EXP_SMALL:%.*]], label [[FP_TO_I_IF_EXP_LARGE:%.*]]
; CHECK: fp-to-i-if-exp.small:
-; CHECK-NEXT: [[TMP8:%.*]] = sub i129 16495, [[BIASED_EXP]]
-; CHECK-NEXT: [[TMP9:%.*]] = lshr i129 [[SIGNIFICAND]], [[TMP8]]
+; CHECK-NEXT: [[TMP15:%.*]] = sub i128 16495, [[BIASED_EXP]]
+; CHECK-NEXT: [[TMP8:%.*]] = lshr i128 [[SIGNIFICAND1]], [[TMP15]]
+; CHECK-NEXT: [[TMP9:%.*]] = zext i128 [[TMP8]] to i129
; CHECK-NEXT: [[TMP10:%.*]] = mul i129 [[TMP9]], [[SIGN]]
; CHECK-NEXT: br label [[FP_TO_I_CLEANUP]]
; CHECK: fp-to-i-if-exp.large:
-; CHECK-NEXT: [[TMP11:%.*]] = add i129 [[BIASED_EXP]], -16495
+; CHECK-NEXT: [[TMP16:%.*]] = add i128 [[BIASED_EXP]], -16495
+; CHECK-NEXT: [[SIGNIFICAND:%.*]] = zext i128 [[SIGNIFICAND1]] to i129
+; CHECK-NEXT: [[TMP11:%.*]] = zext i128 [[TMP16]] to i129
; CHECK-NEXT: [[TMP12:%.*]] = shl i129 [[SIGNIFICAND]], [[TMP11]]
; CHECK-NEXT: [[TMP13:%.*]] = mul i129 [[TMP12]], [[SIGN]]
; CHECK-NEXT: br label [[FP_TO_I_CLEANUP]]
@@ -140,32 +146,34 @@ define i129 @fp128toui129(fp128 %a) {
; CHECK-LABEL: @fp128toui129(
; CHECK-NEXT: fp-to-i-entry:
; CHECK-NEXT: [[TMP0:%.*]] = bitcast fp128 [[A:%.*]] to i128
-; CHECK-NEXT: [[TMP1:%.*]] = zext i128 [[TMP0]] to i129
; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt i128 [[TMP0]], -1
; CHECK-NEXT: [[SIGN:%.*]] = select i1 [[TMP2]], i129 1, i129 -1
-; CHECK-NEXT: [[TMP3:%.*]] = lshr i129 [[TMP1]], 112
-; CHECK-NEXT: [[BIASED_EXP:%.*]] = and i129 [[TMP3]], 32767
-; CHECK-NEXT: [[TMP4:%.*]] = and i129 [[TMP1]], 5192296858534827628530496329220095
-; CHECK-NEXT: [[SIGNIFICAND:%.*]] = or i129 [[TMP4]], 5192296858534827628530496329220096
-; CHECK-NEXT: [[EXP_IS_NEGATIVE:%.*]] = icmp ult i129 [[BIASED_EXP]], 16383
+; CHECK-NEXT: [[TMP5:%.*]] = lshr i128 [[TMP0]], 112
+; CHECK-NEXT: [[BIASED_EXP:%.*]] = and i128 [[TMP5]], 32767
+; CHECK-NEXT: [[TMP3:%.*]] = and i128 [[TMP0]], 5192296858534827628530496329220095
+; CHECK-NEXT: [[SIGNIFICAND1:%.*]] = or i128 [[TMP3]], 5192296858534827628530496329220096
+; CHECK-NEXT: [[EXP_IS_NEGATIVE:%.*]] = icmp ult i128 [[BIASED_EXP]], 16383
; CHECK-NEXT: br i1 [[EXP_IS_NEGATIVE]], label [[FP_TO_I_CLEANUP:%.*]], label [[FP_TO_I_IF_CHECK_SATURATE:%.*]]
; CHECK: fp-to-i-if-check.saturate:
-; CHECK-NEXT: [[TMP5:%.*]] = add i129 [[BIASED_EXP]], -16512
-; CHECK-NEXT: [[TMP6:%.*]] = icmp ult i129 [[TMP5]], -129
+; CHECK-NEXT: [[TMP4:%.*]] = add i128 [[BIASED_EXP]], -16512
+; CHECK-NEXT: [[TMP6:%.*]] = icmp ult i128 [[TMP4]], -129
; CHECK-NEXT: br i1 [[TMP6]], label [[FP_TO_I_IF_SATURATE:%.*]], label [[FP_TO_I_IF_CHECK_EXP_SIZE:%.*]]
; CHECK: fp-to-i-if-saturate:
; CHECK-NEXT: [[SATURATED:%.*]] = select i1 [[TMP2]], i129 340282366920938463463374607431768211455, i129 -340282366920938463463374607431768211456
; CHECK-NEXT: br label [[FP_TO_I_CLEANUP]]
; CHECK: fp-to-i-if-check.exp.size:
-; CHECK-NEXT: [[EXP_SMALLER_MANTISSA_WIDTH:%.*]] = icmp ult i129 [[BIASED_EXP]], 16495
+; CHECK-NEXT: [[EXP_SMALLER_MANTISSA_WIDTH:%.*]] = icmp ult i128 [[BIASED_EXP]], 16495
; CHECK-NEXT: br i1 [[EXP_SMALLER_MANTISSA_WIDTH]], label [[FP_TO_I_IF_EXP_SMALL:%.*]], label [[FP_TO_I_IF_EXP_LARGE:%.*]]
; CHECK: fp-to-i-if-exp.small:
-; CHECK-NEXT: [[TMP7:%.*]] = sub i129 16495, [[BIASED_EXP]]
-; CHECK-NEXT: [[TMP8:%.*]] = lshr i129 [[SIGNIFICAND]], [[TMP7]]
+; CHECK-NEXT: [[TMP14:%.*]] = sub i128 16495, [[BIASED_EXP]]
+; CHECK-NEXT: [[TMP7:%.*]] = lshr i128 [[SIGNIFICAND1]], [[TMP14]]
+; CHECK-NEXT: [[TMP8:%.*]] = zext i128 [[TMP7]] to i129
; CHECK-NEXT: [[TMP9:%.*]] = mul i129 [[TMP8]], [[SIGN]]
; CHECK-NEXT: br label [[FP_TO_I_CLEANUP]]
; CHECK: fp-to-i-if-exp.large:
-; CHECK-NEXT: [[TMP10:%.*]] = add i129 [[BIASED_EXP]], -16495
+; CHECK-NEXT: [[TMP15:%.*]] = add i128 [[BIASED_EXP]], -16495
+; CHECK-NEXT: [[SIGNIFICAND:%.*]] = zext i128 [[SIGNIFICAND1]] to i129
+; CHECK-NEXT: [[TMP10:%.*]] = zext i128 [[TMP15]] to i129
; CHECK-NEXT: [[TMP11:%.*]] = shl i129 [[SIGNIFICAND]], [[TMP10]]
; CHECK-NEXT: [[TMP12:%.*]] = mul i129 [[TMP11]], [[SIGN]]
; CHECK-NEXT: br label [[FP_TO_I_CLEANUP]]
@@ -182,32 +190,34 @@ define <2 x i129> @floattoui129v2(<2 x float> %a) {
; CHECK-NEXT: fp-to-i-entryfp-to-i-entry:
; CHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x float> [[A:%.*]], i64 0
; CHECK-NEXT: [[TMP1:%.*]] = bitcast float [[TMP0]] to i32
-; CHECK-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i129
; CHECK-NEXT: [[TMP3:%.*]] = icmp sgt i32 [[TMP1]], -1
; CHECK-NEXT: [[SIGN7:%.*]] = select i1 [[TMP3]], i129 1, i129 -1
-; CHECK-NEXT: [[TMP4:%.*]] = lshr i129 [[TMP2]], 23
-; CHECK-NEXT: [[BIASED_EXP8:%.*]] = and i129 [[TMP4]], 255
-; CHECK-NEXT: [[TMP5:%.*]] = and i129 [[TMP2]], 8388607
-; CHECK-NEXT: [[SIGNIFICAND9:%.*]] = or i129 [[TMP5]], 8388608
-; CHECK-NEXT: [[EXP_IS_NEGATIVE10:%.*]] = icmp ult i129 [[BIASED_EXP8]], 127
+; CHECK-NEXT: [[TMP6:%.*]] = lshr i32 [[TMP1]], 23
+; CHECK-NEXT: [[BIASED_EXP8:%.*]] = and i32 [[TMP6]], 255
+; CHECK-NEXT: [[TMP4:%.*]] = and i32 [[TMP1]], 8388607
+; CHECK-NEXT: [[SIGNIFICAND10:%.*]] = or i32 [[TMP4]], 8388608
+; CHECK-NEXT: [[EXP_IS_NEGATIVE10:%.*]] = icmp ult i32 [[BIASED_EXP8]], 127
; CHECK-NEXT: br i1 [[EXP_IS_NEGATIVE10]], label [[FP_TO_I_CLEANUP1:%.*]], label [[FP_TO_I_IF_CHECK_SATURATE2:%.*]]
; CHECK: fp-to-i-if-check.saturate2:
-; CHECK-NEXT: [[TMP6:%.*]] = add i129 [[BIASED_EXP8]], -256
-; CHECK-NEXT: [[TMP7:%.*]] = icmp ult i129 [[TMP6]], -129
+; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[BIASED_EXP8]], -256
+; CHECK-NEXT: [[TMP7:%.*]] = icmp ult i32 [[TMP5]], -129
; CHECK-NEXT: br i1 [[TMP7]], label [[FP_TO_I_IF_SATURATE3:%.*]], label [[FP_TO_I_IF_CHECK_EXP_SIZE4:%.*]]
; CHECK: fp-to-i-if-saturate3:
; CHECK-NEXT: [[SATURATED11:%.*]] = select i1 [[TMP3]], i129 340282366920938463463374607431768211455, i129 -340282366920938463463374607431768211456
; CHECK-NEXT: br label [[FP_TO_I_CLEANUP1]]
; CHECK: fp-to-i-if-check.exp.size4:
-; CHECK-NEXT: [[EXP_SMALLER_MANTISSA_WIDTH12:%.*]] = icmp ult i129 [[BIASED_EXP8]], 150
+; CHECK-NEXT: [[EXP_SMALLER_MANTISSA_WIDTH12:%.*]] = icmp ult i32 [[BIASED_EXP8]], 150
; CHECK-NEXT: br i1 [[EXP_SMALLER_MANTISSA_WIDTH12]], label [[FP_TO_I_IF_EXP_SMALL5:%.*]], label [[FP_TO_I_IF_EXP_LARGE6:%.*]]
; CHECK: fp-to-i-if-exp.small5:
-; CHECK-NEXT: [[TMP8:%.*]] = sub i129 150, [[BIASED_EXP8]]
-; CHECK-NEXT: [[TMP9:%.*]] = lshr i129 [[SIGNIFICAND9]], [[TMP8]]
+; CHECK-NEXT: [[TMP18:%.*]] = sub i32 150, [[BIASED_EXP8]]
+; CHECK-NEXT: [[TMP8:%.*]] = lshr i32 [[SIGNIFICAND10]], [[TMP18]]
+; CHECK-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i129
; CHECK-NEXT: [[TMP10:%.*]] = mul i129 [[TMP9]], [[SIGN7]]
; CHECK-NEXT: br label [[FP_TO_I_CLEANUP1]]
; CHECK: fp-to-i-if-exp.large6:
-; CHECK-NEXT: [[TMP11:%.*]] = add i129 [[BIASED_EXP8]], -150
+; CHECK-NEXT: [[TMP20:%.*]] = add i32 [[BIASED_EXP8]], -150
+; CHECK-NEXT: [[SIGNIFICAND9:%.*]] = zext i32 [[SIGNIFICAND10]] to i129
+; CHECK-NEXT: [[TMP11:%.*]] = zext i32 [[TMP20]] to i129
; CHECK-NEXT: [[TMP12:%.*]] = shl i129 [[SIGNIFICAND9]], [[TMP11]]
; CHECK-NEXT: [[TMP13:%.*]] = mul i129 [[TMP12]], [[SIGN7]]
; CHECK-NEXT: br label [[FP_TO_I_CLEANUP1]]
@@ -216,32 +226,34 @@ define <2 x i129> @floattoui129v2(<2 x float> %a) {
; CHECK-NEXT: [[TMP15:%.*]] = insertelement <2 x i129> poison, i129 [[TMP14]], i64 0
; CHECK-NEXT: [[TMP16:%.*]] = extractelement <2 x float> [[A]], i64 1
; CHECK-NEXT: [[TMP17:%.*]] = bitcast float [[TMP16]] to i32
-; CHECK-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i129
; CHECK-NEXT: [[TMP19:%.*]] = icmp sgt i32 [[TMP17]], -1
; CHECK-NEXT: [[SIGN:%.*]] = select i1 [[TMP19]], i129 1, i129 -1
-; CHECK-NEXT: [[TMP20:%.*]] = lshr i129 [[TMP18]], 23
-; CHECK-NEXT: [[BIASED_EXP:%.*]] = and i129 [[TMP20]], 255
-; CHECK-NEXT: [[TMP21:%.*]] = and i129 [[TMP18]], 8388607
-; CHECK-NEXT: [[SIGNIFICAND:%.*]] = or i129 [[TMP21]], 8388608
-; CHECK-NEXT: [[EXP_IS_NEGATIVE:%.*]] = icmp ult i129 [[BIASED_EXP]], 127
+; CHECK-NEXT: [[TMP21:%.*]] = lshr i32 [[TMP17]], 23
+; CHECK-NEXT: [[BIASED_EXP:%.*]] = and i32 [[TMP21]], 255
+; CHECK-NEXT: [[TMP22:%.*]] = and i32 [[TMP17]], 8388607
+; CHECK-NEXT: [[SIGNIFICAND1:%.*]] = or i32 [[TMP22]], 8388608
+; CHECK-NEXT: [[EXP_IS_NEGATIVE:%.*]] = icmp ult i32 [[BIASED_EXP]], 127
; CHECK-NEXT: br i1 [[EXP_IS_NEGATIVE]], label [[FP_TO_I_CLEANUP:%.*]], label [[FP_TO_I_IF_CHECK_SATURATE:%.*]]
; CHECK: fp-to-i-if-check.saturate:
-; CHECK-NEXT: [[TMP22:%.*]] = add i129 [[BIASED_EXP]], -256
-; CHECK-NEXT: [[TMP23:%.*]] = icmp ult i129 [[TMP22]], -129
+; CHECK-NEXT: [[TMP24:%.*]] = add i32 [[BIASED_EXP]], -256
+; CHECK-NEXT: [[TMP23:%.*]] = icmp ult i32 [[TMP24]], -129
; CHECK-NEXT: br i1 [[TMP23]], label [[FP_TO_I_IF_SATURATE:%.*]], label [[FP_TO_I_IF_CHECK_EXP_SIZE:%.*]]
; CHECK: fp-to-i-if-saturate:
; CHECK-NEXT: [[SATURATED:%.*]] = select i1 [[TMP19]], i129 340282366920938463463374607431768211455, i129 -340282366920938463463374607431768211456
; CHECK-NEXT: br label [[FP_TO_I_CLEANUP]]
; CHECK: fp-to-i-if-check.exp.size:
-; CHECK-NEXT: [[EXP_SMALLER_MANTISSA_WIDTH:%.*]] = icmp ult i129 [[BIASED_EXP]], 150
+; CHECK-NEXT: [[EXP_SMALLER_MANTISSA_WIDTH:%.*]] = icmp ult i32 [[BIASED_EXP]], 150
; CHECK-NEXT: br i1 [[EXP_SMALLER_MANTISSA_WIDTH]], label [[FP_TO_I_IF_EXP_SMALL:%.*]], label [[FP_TO_I_IF_EXP_LARGE:%.*]]
; CHECK: fp-to-i-if-exp.small:
-; CHECK-NEXT: [[TMP24:%.*]] = sub i129 150, [[BIASED_EXP]]
-; CHECK-NEXT: [[TMP25:%.*]] = lshr i129 [[SIGNIFICAND]], [[TMP24]]
+; CHECK-NEXT: [[TMP32:%.*]] = sub i32 150, [[BIASED_EXP]]
+; CHECK-NEXT: [[TMP33:%.*]] = lshr i32 [[SIGNIFICAND1]], [[TMP32]]
+; CHECK-NEXT: [[TMP25:%.*]] = zext i32 [[TMP33]] to i129
; CHECK-NEXT: [[TMP26:%.*]] = mul i129 [[TMP25]], [[SIGN]]
; CHECK-NEXT: br label [[FP_TO_I_CLEANUP]]
; CHECK: fp-to-i-if-exp.large:
-; CHECK-NEXT: [[TMP27:%.*]] = add i129 [[BIASED_EXP]], -150
+; CHECK-NEXT: [[TMP34:%.*]] = add i32 [[BIASED_EXP]], -150
+; CHECK-NEXT: [[SIGNIFICAND:%.*]] = zext i32 [[SIGNIFICAND1]] to i129
+; CHECK-NEXT: [[TMP27:%.*]] = zext i32 [[TMP34]] to i129
; CHECK-NEXT: [[TMP28:%.*]] = shl i129 [[SIGNIFICAND]], [[TMP27]]
; CHECK-NEXT: [[TMP29:%.*]] = mul i129 [[TMP28]], [[SIGN]]
; CHECK-NEXT: br label [[FP_TO_I_CLEANUP]]
More information about the llvm-commits
mailing list