[llvm] [ExpandIRInsts] Support saturating fptoi (PR #179710)

via llvm-commits llvm-commits at lists.llvm.org
Wed Feb 4 09:37:57 PST 2026


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu

@llvm/pr-subscribers-llvm-transforms

Author: Nikita Popov (nikic)

<details>
<summary>Changes</summary>

Add support for expanding fptosi.sat and fptoui.sat via IR expansions. Similar to fptosi/fptoui we would get legalization errors otherwise.

The previous expansion for fptosi/fptoui was already saturating -- but those instructions do not actually require saturation, and the implementation of the saturation was incorrect in lots of ways. What this PR does is:

 * For fptosi, remove the unnecessary saturation handling.
 * For fptoui, remove the unnecessary saturation handling and sign multiplication.
 * For fptosi, use the previous saturation handling with fixes: We need to map NaNs to 0 and the saturation condition on the exponent was incorrect. (I'm performing the NaN check via fcmp -- there's no requirement to do everything bitwise here.)
 * For fptoui use a variation of the signed saturation handling: Negative values need to go to zero and we saturate to unsigned max.

Proofs: https://alive2.llvm.org/ce/z/Xv9FNd

---

Patch is 161.40 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/179710.diff


5 Files Affected:

- (modified) llvm/lib/CodeGen/ExpandIRInsts.cpp (+86-42) 
- (modified) llvm/test/CodeGen/AMDGPU/fptoi.i128.ll (+469-1416) 
- (modified) llvm/test/Transforms/ExpandIRInsts/X86/expand-fp-convert-small.ll (+103-75) 
- (modified) llvm/test/Transforms/ExpandIRInsts/X86/expand-large-fp-convert-fptosi129.ll (+9-51) 
- (modified) llvm/test/Transforms/ExpandIRInsts/X86/expand-large-fp-convert-fptoui129.ll (+9-75) 


``````````diff
diff --git a/llvm/lib/CodeGen/ExpandIRInsts.cpp b/llvm/lib/CodeGen/ExpandIRInsts.cpp
index af8dc03ff8037..0e504beac78e2 100644
--- a/llvm/lib/CodeGen/ExpandIRInsts.cpp
+++ b/llvm/lib/CodeGen/ExpandIRInsts.cpp
@@ -40,6 +40,7 @@
 #include "llvm/CodeGen/TargetSubtargetInfo.h"
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/InstIterator.h"
+#include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/PassManager.h"
 #include "llvm/InitializePasses.h"
@@ -495,7 +496,7 @@ static bool expandFRem(BinaryOperator &I, std::optional<SimplifyQuery> &SQ) {
 /// }
 ///
 /// Replace fp to integer with generated code.
-static void expandFPToI(Instruction *FPToI) {
+static void expandFPToI(Instruction *FPToI, bool IsSaturating, bool IsSigned) {
   // clang-format on
   IRBuilder<> Builder(FPToI);
   auto *FloatVal = FPToI->getOperand(0);
@@ -537,12 +538,15 @@ static void expandFPToI(Instruction *FPToI) {
   BasicBlock *Entry = Builder.GetInsertBlock();
   Function *F = Entry->getParent();
   Entry->setName(Twine(Entry->getName(), "fp-to-i-entry"));
+  BasicBlock *CheckSaturateBB, *SaturateBB;
   BasicBlock *End =
       Entry->splitBasicBlock(Builder.GetInsertPoint(), "fp-to-i-cleanup");
-  BasicBlock *CheckSaturateBB = BasicBlock::Create(
-      Builder.getContext(), "fp-to-i-if-check.saturate", F, End);
-  BasicBlock *SaturateBB =
-      BasicBlock::Create(Builder.getContext(), "fp-to-i-if-saturate", F, End);
+  if (IsSaturating) {
+    CheckSaturateBB = BasicBlock::Create(Builder.getContext(),
+                                         "fp-to-i-if-check.saturate", F, End);
+    SaturateBB =
+        BasicBlock::Create(Builder.getContext(), "fp-to-i-if-saturate", F, End);
+  }
   BasicBlock *CheckExpSizeBB = BasicBlock::Create(
       Builder.getContext(), "fp-to-i-if-check.exp.size", F, End);
   BasicBlock *ExpSmallBB =
@@ -563,40 +567,56 @@ static void expandFPToI(Instruction *FPToI) {
     FloatVal =
         Builder.CreateFPExt(FloatVal, Type::getFP128Ty(Builder.getContext()));
   Value *ARep = Builder.CreateBitCast(FloatVal, FloatIntTy);
-  Value *PosOrNeg =
-      Builder.CreateICmpSGT(ARep, ConstantInt::getSigned(FloatIntTy, -1));
-  Value *Sign = Builder.CreateSelect(PosOrNeg, ConstantInt::getSigned(IntTy, 1),
-                                     ConstantInt::getSigned(IntTy, -1), "sign");
+  Value *PosOrNeg, *Sign;
+  if (IsSigned) {
+    PosOrNeg =
+        Builder.CreateICmpSGT(ARep, ConstantInt::getSigned(FloatIntTy, -1));
+    Sign = Builder.CreateSelect(PosOrNeg, ConstantInt::getSigned(IntTy, 1),
+                                ConstantInt::getSigned(IntTy, -1), "sign");
+  }
   Value *And =
       Builder.CreateLShr(ARep, Builder.getIntN(FloatWidth, FPMantissaWidth));
   Value *BiasedExp = Builder.CreateAnd(
       And, Builder.getIntN(FloatWidth, (1 << ExponentWidth) - 1), "biased.exp");
   Value *Abs = Builder.CreateAnd(ARep, SignificandMask);
   Value *Significand = Builder.CreateOr(Abs, ImplicitBit, "significand");
-  Value *ExpIsNegative = Builder.CreateICmpULT(
+  Value *ZeroResultCond = Builder.CreateICmpULT(
       BiasedExp, Builder.getIntN(FloatWidth, ExponentBias), "exp.is.negative");
-  Builder.CreateCondBr(ExpIsNegative, End, CheckSaturateBB);
-
-  // check.saturate:
-  Builder.SetInsertPoint(CheckSaturateBB);
-  Value *Add1 = Builder.CreateAdd(
-      BiasedExp,
-      ConstantInt::getSigned(FloatIntTy,
-                             -static_cast<int64_t>(ExponentBias + BitWidth)));
-  Value *Cmp3 = Builder.CreateICmpULT(
-      Add1,
-      ConstantInt::getSigned(FloatIntTy, -static_cast<int64_t>(BitWidth)));
-  Builder.CreateCondBr(Cmp3, SaturateBB, CheckExpSizeBB);
-
-  // saturate:
-  Builder.SetInsertPoint(SaturateBB);
-  Value *SignedMax =
-      ConstantInt::get(IntTy, APInt::getSignedMaxValue(BitWidth));
-  Value *SignedMin =
-      ConstantInt::get(IntTy, APInt::getSignedMinValue(BitWidth));
-  Value *Saturated =
-      Builder.CreateSelect(PosOrNeg, SignedMax, SignedMin, "saturated");
-  Builder.CreateBr(End);
+  if (IsSaturating) {
+    Value *IsNaN = Builder.CreateFCmpUNO(FloatVal, FloatVal, "is.nan");
+    ZeroResultCond = Builder.CreateOr(ZeroResultCond, IsNaN);
+    if (!IsSigned) {
+      Value *IsNeg = Builder.CreateIsNeg(ARep);
+      ZeroResultCond = Builder.CreateOr(ZeroResultCond, IsNeg);
+    }
+  }
+  Builder.CreateCondBr(ZeroResultCond, End,
+                       IsSaturating ? CheckSaturateBB : CheckExpSizeBB);
+
+  Value *Saturated;
+  if (IsSaturating) {
+    // check.saturate:
+    Builder.SetInsertPoint(CheckSaturateBB);
+    Value *Cmp3 = Builder.CreateICmpUGE(
+        BiasedExp, ConstantInt::getSigned(
+                       FloatIntTy, static_cast<int64_t>(ExponentBias +
+                                                        BitWidth - IsSigned)));
+    Builder.CreateCondBr(Cmp3, SaturateBB, CheckExpSizeBB);
+
+    // saturate:
+    Builder.SetInsertPoint(SaturateBB);
+    if (IsSigned) {
+      Value *SignedMax =
+          ConstantInt::get(IntTy, APInt::getSignedMaxValue(BitWidth));
+      Value *SignedMin =
+          ConstantInt::get(IntTy, APInt::getSignedMinValue(BitWidth));
+      Saturated =
+          Builder.CreateSelect(PosOrNeg, SignedMax, SignedMin, "saturated");
+    } else {
+      Saturated = ConstantInt::getAllOnesValue(IntTy);
+    }
+    Builder.CreateBr(End);
+  }
 
   // if.end9:
   Builder.SetInsertPoint(CheckExpSizeBB);
@@ -609,9 +629,10 @@ static void expandFPToI(Instruction *FPToI) {
   Builder.SetInsertPoint(ExpSmallBB);
   Value *Sub13 = Builder.CreateSub(
       Builder.getIntN(FloatWidth, ExponentBias + FPMantissaWidth), BiasedExp);
-  Value *Shr14 =
+  Value *ExpSmallRes =
       Builder.CreateZExtOrTrunc(Builder.CreateLShr(Significand, Sub13), IntTy);
-  Value *Mul = Builder.CreateMul(Shr14, Sign);
+  if (IsSigned)
+    ExpSmallRes = Builder.CreateMul(ExpSmallRes, Sign);
   Builder.CreateBr(End);
 
   // exp.large:
@@ -621,18 +642,20 @@ static void expandFPToI(Instruction *FPToI) {
       ConstantInt::getSigned(
           FloatIntTy, -static_cast<int64_t>(ExponentBias + FPMantissaWidth)));
   Value *SignificandCast = Builder.CreateZExtOrTrunc(Significand, IntTy);
-  Value *Shl = Builder.CreateShl(SignificandCast,
-                                 Builder.CreateZExtOrTrunc(Sub15, IntTy));
-  Value *Mul16 = Builder.CreateMul(Shl, Sign);
+  Value *ExpLargeRes = Builder.CreateShl(
+      SignificandCast, Builder.CreateZExtOrTrunc(Sub15, IntTy));
+  if (IsSigned)
+    ExpLargeRes = Builder.CreateMul(ExpLargeRes, Sign);
   Builder.CreateBr(End);
 
   // cleanup:
   Builder.SetInsertPoint(End, End->begin());
-  PHINode *Retval0 = Builder.CreatePHI(FPToI->getType(), 4);
+  PHINode *Retval0 = Builder.CreatePHI(FPToI->getType(), 3 + IsSaturating);
 
-  Retval0->addIncoming(Saturated, SaturateBB);
-  Retval0->addIncoming(Mul, ExpSmallBB);
-  Retval0->addIncoming(Mul16, ExpLargeBB);
+  if (IsSaturating)
+    Retval0->addIncoming(Saturated, SaturateBB);
+  Retval0->addIncoming(ExpSmallRes, ExpSmallBB);
+  Retval0->addIncoming(ExpLargeRes, ExpLargeBB);
   Retval0->addIncoming(Builder.getIntN(BitWidth, 0), Entry);
 
   FPToI->replaceAllUsesWith(Retval0);
@@ -1080,6 +1103,16 @@ static bool runImpl(Function &F, const TargetLowering &TLI,
              // The backend has peephole optimizations for powers of two.
              // TODO: We don't consider vectors here.
              && !isConstantPowerOfTwo(I.getOperand(1), isSigned(I.getOpcode()));
+    case Instruction::Call: {
+      auto *II = dyn_cast<IntrinsicInst>(&I);
+      if (II && (II->getIntrinsicID() == Intrinsic::fptoui_sat ||
+                 II->getIntrinsicID() == Intrinsic::fptosi_sat)) {
+        return !DisableExpandLargeFp &&
+               cast<IntegerType>(Ty->getScalarType())->getIntegerBitWidth() >
+                   MaxLegalFpConvertBitWidth;
+      }
+      return false;
+    }
     }
 
     return false;
@@ -1115,8 +1148,10 @@ static bool runImpl(Function &F, const TargetLowering &TLI,
     }
 
     case Instruction::FPToUI:
+      expandFPToI(I, /*IsSaturating=*/false, /*IsSigned=*/false);
+      break;
     case Instruction::FPToSI:
-      expandFPToI(I);
+      expandFPToI(I, /*IsSaturating=*/false, /*IsSigned=*/true);
       break;
 
     case Instruction::UIToFP:
@@ -1132,6 +1167,15 @@ static bool runImpl(Function &F, const TargetLowering &TLI,
     case Instruction::SRem:
       expandRemainder(cast<BinaryOperator>(I));
       break;
+
+    case Instruction::Call: {
+      auto *II = cast<IntrinsicInst>(I);
+      assert(II->getIntrinsicID() == Intrinsic::fptoui_sat ||
+             II->getIntrinsicID() == Intrinsic::fptosi_sat);
+      expandFPToI(I, /*IsSaturating=*/true,
+                  /*IsSigned=*/II->getIntrinsicID() == Intrinsic::fptosi_sat);
+      break;
+    }
     }
   }
 
diff --git a/llvm/test/CodeGen/AMDGPU/fptoi.i128.ll b/llvm/test/CodeGen/AMDGPU/fptoi.i128.ll
index 200fbf5d220b4..472e79f58969f 100644
--- a/llvm/test/CodeGen/AMDGPU/fptoi.i128.ll
+++ b/llvm/test/CodeGen/AMDGPU/fptoi.i128.ll
@@ -16,105 +16,79 @@ define i128 @fptosi_f64_to_i128(double %x) {
 ; SDAG-NEXT:    v_mov_b32_e32 v2, 0
 ; SDAG-NEXT:    v_mov_b32_e32 v1, 0
 ; SDAG-NEXT:    v_mov_b32_e32 v3, 0
-; SDAG-NEXT:    s_and_saveexec_b64 s[8:9], vcc
-; SDAG-NEXT:    s_cbranch_execz .LBB0_10
-; SDAG-NEXT:  ; %bb.1: ; %fp-to-i-if-check.saturate
-; SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 0xfffffb81, v6
-; SDAG-NEXT:    v_addc_co_u32_e64 v1, s[6:7], 0, -1, vcc
-; SDAG-NEXT:    s_movk_i32 s6, 0xff7f
-; SDAG-NEXT:    s_mov_b32 s7, -1
-; SDAG-NEXT:    v_cmp_lt_i64_e64 s[4:5], -1, v[4:5]
-; SDAG-NEXT:    v_cmp_lt_u64_e32 vcc, s[6:7], v[0:1]
-; SDAG-NEXT:    ; implicit-def: $vgpr0_vgpr1
-; SDAG-NEXT:    ; implicit-def: $vgpr2_vgpr3
 ; SDAG-NEXT:    s_and_saveexec_b64 s[6:7], vcc
-; SDAG-NEXT:    s_xor_b64 s[10:11], exec, s[6:7]
-; SDAG-NEXT:    s_cbranch_execz .LBB0_7
-; SDAG-NEXT:  ; %bb.2: ; %fp-to-i-if-check.exp.size
-; SDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[4:5]
-; SDAG-NEXT:    v_add_co_u32_e32 v9, vcc, -1, v0
-; SDAG-NEXT:    s_mov_b64 s[6:7], 0x432
+; SDAG-NEXT:    s_cbranch_execz .LBB0_6
+; SDAG-NEXT:  ; %bb.1: ; %fp-to-i-if-check.exp.size
+; SDAG-NEXT:    s_mov_b64 s[4:5], 0x432
+; SDAG-NEXT:    v_ashrrev_i32_e32 v8, 31, v5
 ; SDAG-NEXT:    v_and_b32_e32 v0, 0xfffff, v5
-; SDAG-NEXT:    v_cmp_lt_u64_e32 vcc, s[6:7], v[6:7]
-; SDAG-NEXT:    v_cndmask_b32_e64 v8, -1, 0, s[4:5]
-; SDAG-NEXT:    v_cndmask_b32_e64 v10, -1, 1, s[4:5]
+; SDAG-NEXT:    v_cmp_lt_u64_e32 vcc, s[4:5], v[6:7]
+; SDAG-NEXT:    v_or_b32_e32 v9, 1, v8
 ; SDAG-NEXT:    v_or_b32_e32 v5, 0x100000, v0
 ; SDAG-NEXT:    ; implicit-def: $vgpr0_vgpr1
 ; SDAG-NEXT:    ; implicit-def: $vgpr2_vgpr3
-; SDAG-NEXT:    s_and_saveexec_b64 s[6:7], vcc
-; SDAG-NEXT:    s_xor_b64 s[12:13], exec, s[6:7]
-; SDAG-NEXT:    s_cbranch_execz .LBB0_4
-; SDAG-NEXT:  ; %bb.3: ; %fp-to-i-if-exp.large
+; SDAG-NEXT:    s_and_saveexec_b64 s[4:5], vcc
+; SDAG-NEXT:    s_xor_b64 s[8:9], exec, s[4:5]
+; SDAG-NEXT:    s_cbranch_execz .LBB0_3
+; SDAG-NEXT:  ; %bb.2: ; %fp-to-i-if-exp.large
 ; SDAG-NEXT:    v_sub_u32_e32 v0, 0x473, v6
 ; SDAG-NEXT:    v_add_u32_e32 v2, 0xfffffb8d, v6
 ; SDAG-NEXT:    v_add_u32_e32 v3, 0xfffffbcd, v6
 ; SDAG-NEXT:    v_lshrrev_b64 v[0:1], v0, v[4:5]
 ; SDAG-NEXT:    v_lshlrev_b64 v[6:7], v2, v[4:5]
 ; SDAG-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v3
-; SDAG-NEXT:    v_cmp_ne_u32_e64 s[6:7], 0, v3
+; SDAG-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, v3
 ; SDAG-NEXT:    v_lshlrev_b64 v[3:4], v3, v[4:5]
 ; SDAG-NEXT:    v_cndmask_b32_e32 v1, v7, v1, vcc
-; SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, v1, s[6:7]
+; SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, v1, s[4:5]
 ; SDAG-NEXT:    v_cndmask_b32_e32 v0, v6, v0, vcc
 ; SDAG-NEXT:    v_cndmask_b32_e32 v7, 0, v3, vcc
-; SDAG-NEXT:    v_cndmask_b32_e64 v5, 0, v0, s[6:7]
-; SDAG-NEXT:    v_mul_lo_u32 v12, v10, v1
-; SDAG-NEXT:    v_mad_u64_u32 v[0:1], s[6:7], v7, v10, 0
+; SDAG-NEXT:    v_cndmask_b32_e64 v5, 0, v0, s[4:5]
+; SDAG-NEXT:    v_mul_lo_u32 v11, v9, v1
+; SDAG-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v7, v9, 0
 ; SDAG-NEXT:    v_mov_b32_e32 v2, 0
-; SDAG-NEXT:    v_cndmask_b32_e32 v13, 0, v4, vcc
-; SDAG-NEXT:    v_mad_u64_u32 v[3:4], s[6:7], v13, v10, v[1:2]
-; SDAG-NEXT:    v_mul_lo_u32 v11, v8, v5
-; SDAG-NEXT:    v_mad_u64_u32 v[5:6], s[6:7], v10, v5, 0
+; SDAG-NEXT:    v_cndmask_b32_e32 v12, 0, v4, vcc
+; SDAG-NEXT:    v_mad_u64_u32 v[3:4], s[4:5], v12, v9, v[1:2]
+; SDAG-NEXT:    v_mul_lo_u32 v10, v8, v5
+; SDAG-NEXT:    v_mad_u64_u32 v[5:6], s[4:5], v9, v5, 0
 ; SDAG-NEXT:    v_mov_b32_e32 v1, v3
-; SDAG-NEXT:    v_mad_u64_u32 v[1:2], s[6:7], v7, v8, v[1:2]
-; SDAG-NEXT:    v_add3_u32 v6, v6, v12, v11
-; SDAG-NEXT:    v_mad_u64_u32 v[5:6], s[6:7], v9, v7, v[5:6]
+; SDAG-NEXT:    v_mad_u64_u32 v[1:2], s[4:5], v7, v8, v[1:2]
+; SDAG-NEXT:    v_add3_u32 v6, v6, v11, v10
+; SDAG-NEXT:    v_mad_u64_u32 v[5:6], s[4:5], v8, v7, v[5:6]
 ; SDAG-NEXT:    v_add_co_u32_e32 v2, vcc, v4, v2
-; SDAG-NEXT:    v_addc_co_u32_e64 v3, s[6:7], 0, 0, vcc
-; SDAG-NEXT:    v_mul_lo_u32 v10, v9, v13
-; SDAG-NEXT:    v_mul_lo_u32 v7, v9, v7
-; SDAG-NEXT:    v_mad_u64_u32 v[2:3], s[6:7], v13, v8, v[2:3]
+; SDAG-NEXT:    v_addc_co_u32_e64 v3, s[4:5], 0, 0, vcc
+; SDAG-NEXT:    v_mul_lo_u32 v9, v8, v12
+; SDAG-NEXT:    v_mul_lo_u32 v7, v8, v7
+; SDAG-NEXT:    v_mad_u64_u32 v[2:3], s[4:5], v12, v8, v[2:3]
 ; SDAG-NEXT:    ; implicit-def: $vgpr8
-; SDAG-NEXT:    ; implicit-def: $vgpr9
-; SDAG-NEXT:    v_add3_u32 v4, v7, v6, v10
+; SDAG-NEXT:    v_add3_u32 v4, v7, v6, v9
 ; SDAG-NEXT:    v_add_co_u32_e32 v2, vcc, v2, v5
 ; SDAG-NEXT:    v_addc_co_u32_e32 v3, vcc, v3, v4, vcc
 ; SDAG-NEXT:    ; implicit-def: $vgpr6_vgpr7
 ; SDAG-NEXT:    ; implicit-def: $vgpr4_vgpr5
-; SDAG-NEXT:    ; implicit-def: $vgpr10
-; SDAG-NEXT:  .LBB0_4: ; %Flow
-; SDAG-NEXT:    s_andn2_saveexec_b64 s[6:7], s[12:13]
-; SDAG-NEXT:    s_cbranch_execz .LBB0_6
-; SDAG-NEXT:  ; %bb.5: ; %fp-to-i-if-exp.small
+; SDAG-NEXT:    ; implicit-def: $vgpr9
+; SDAG-NEXT:  .LBB0_3: ; %Flow
+; SDAG-NEXT:    s_andn2_saveexec_b64 s[4:5], s[8:9]
+; SDAG-NEXT:    s_cbranch_execz .LBB0_5
+; SDAG-NEXT:  ; %bb.4: ; %fp-to-i-if-exp.small
 ; SDAG-NEXT:    v_sub_u32_e32 v0, 0x433, v6
 ; SDAG-NEXT:    v_lshrrev_b64 v[4:5], v0, v[4:5]
 ; SDAG-NEXT:    v_mov_b32_e32 v2, 0
-; SDAG-NEXT:    v_mad_u64_u32 v[0:1], s[12:13], v4, v10, 0
-; SDAG-NEXT:    v_mad_u64_u32 v[6:7], s[12:13], v5, v10, v[1:2]
+; SDAG-NEXT:    v_mad_u64_u32 v[0:1], s[8:9], v4, v9, 0
+; SDAG-NEXT:    v_mad_u64_u32 v[6:7], s[8:9], v5, v9, v[1:2]
 ; SDAG-NEXT:    v_mov_b32_e32 v1, v6
-; SDAG-NEXT:    v_mad_u64_u32 v[1:2], s[12:13], v4, v8, v[1:2]
+; SDAG-NEXT:    v_mad_u64_u32 v[1:2], s[8:9], v4, v8, v[1:2]
 ; SDAG-NEXT:    v_add_co_u32_e32 v2, vcc, v7, v2
-; SDAG-NEXT:    v_addc_co_u32_e64 v3, s[12:13], 0, 0, vcc
-; SDAG-NEXT:    v_mad_u64_u32 v[2:3], s[12:13], v5, v8, v[2:3]
-; SDAG-NEXT:    v_mad_u64_u32 v[2:3], s[12:13], v9, v4, v[2:3]
-; SDAG-NEXT:    v_mad_u64_u32 v[3:4], s[12:13], v9, v4, v[3:4]
-; SDAG-NEXT:    v_mad_i32_i24 v3, v9, v5, v3
-; SDAG-NEXT:  .LBB0_6: ; %Flow1
-; SDAG-NEXT:    s_or_b64 exec, exec, s[6:7]
-; SDAG-NEXT:  .LBB0_7: ; %Flow2
-; SDAG-NEXT:    s_andn2_saveexec_b64 s[6:7], s[10:11]
-; SDAG-NEXT:  ; %bb.8: ; %fp-to-i-if-saturate
-; SDAG-NEXT:    v_bfrev_b32_e32 v0, 1
-; SDAG-NEXT:    v_bfrev_b32_e32 v1, -2
-; SDAG-NEXT:    v_cndmask_b32_e64 v2, v0, v1, s[4:5]
-; SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, -1, s[4:5]
-; SDAG-NEXT:    v_mov_b32_e32 v3, v2
-; SDAG-NEXT:    v_mov_b32_e32 v0, v1
-; SDAG-NEXT:    v_mov_b32_e32 v2, v1
-; SDAG-NEXT:  ; %bb.9: ; %Flow3
+; SDAG-NEXT:    v_addc_co_u32_e64 v3, s[8:9], 0, 0, vcc
+; SDAG-NEXT:    v_mad_u64_u32 v[2:3], s[8:9], v5, v8, v[2:3]
+; SDAG-NEXT:    v_mul_lo_u32 v5, v8, v5
+; SDAG-NEXT:    v_mad_u64_u32 v[2:3], s[8:9], v8, v4, v[2:3]
+; SDAG-NEXT:    v_mul_lo_u32 v4, v8, v4
+; SDAG-NEXT:    v_add3_u32 v3, v4, v3, v5
+; SDAG-NEXT:  .LBB0_5: ; %Flow1
+; SDAG-NEXT:    s_or_b64 exec, exec, s[4:5]
+; SDAG-NEXT:  .LBB0_6: ; %fp-to-i-cleanup
 ; SDAG-NEXT:    s_or_b64 exec, exec, s[6:7]
-; SDAG-NEXT:  .LBB0_10: ; %fp-to-i-cleanup
-; SDAG-NEXT:    s_or_b64 exec, exec, s[8:9]
 ; SDAG-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GISEL-LABEL: fptosi_f64_to_i128:
@@ -134,24 +108,14 @@ define i128 @fptosi_f64_to_i128(double %x) {
 ; GISEL-NEXT:    v_mov_b32_e32 v1, s5
 ; GISEL-NEXT:    v_mov_b32_e32 v2, s6
 ; GISEL-NEXT:    v_mov_b32_e32 v3, s7
-; GISEL-NEXT:    s_and_saveexec_b64 s[12:13], vcc
-; GISEL-NEXT:    s_cbranch_execz .LBB0_10
-; GISEL-NEXT:  ; %bb.1: ; %fp-to-i-if-check.saturate
-; GISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 0xfffffb81, v6
-; GISEL-NEXT:    v_mov_b32_e32 v2, 0xffffff80
-; GISEL-NEXT:    v_mov_b32_e32 v3, -1
-; GISEL-NEXT:    v_addc_co_u32_e64 v1, s[6:7], 0, -1, vcc
-; GISEL-NEXT:    v_cmp_lt_i64_e64 s[4:5], -1, v[4:5]
-; GISEL-NEXT:    v_cmp_ge_u64_e32 vcc, v[0:1], v[2:3]
-; GISEL-NEXT:    ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
-; GISEL-NEXT:    s_and_saveexec_b64 s[6:7], vcc
-; GISEL-NEXT:    s_xor_b64 s[14:15], exec, s[6:7]
-; GISEL-NEXT:    s_cbranch_execz .LBB0_7
-; GISEL-NEXT:  ; %bb.2: ; %fp-to-i-if-check.exp.size
-; GISEL-NEXT:    s_xor_b64 s[6:7], s[4:5], -1
-; GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, -1, s[6:7]
+; GISEL-NEXT:    s_and_saveexec_b64 s[10:11], vcc
+; GISEL-NEXT:    s_cbranch_execz .LBB0_6
+; GISEL-NEXT:  ; %bb.1: ; %fp-to-i-if-check.exp.size
+; GISEL-NEXT:    v_cmp_ge_i64_e32 vcc, -1, v[4:5]
+; GISEL-NEXT:    s_mov_b32 s4, 0x100000
+; GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, -1, vcc
 ; GISEL-NEXT:    v_and_b32_e32 v0, 1, v0
-; GISEL-NEXT:    v_cndmask_b32_e64 v1, 0, 1, s[6:7]
+; GISEL-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
 ; GISEL-NEXT:    v_lshlrev_b16_e32 v2, 1, v0
 ; GISEL-NEXT:    v_or_b32_e32 v1, v1, v2
 ; GISEL-NEXT:    v_lshlrev_b16_e32 v3, 2, v0
@@ -205,137 +169,62 @@ define i128 @fptosi_f64_to_i128(double %x) {
 ; GISEL-NEXT:    v_mov_b32_e32 v0, 0x433
 ; GISEL-NEXT:    v_mov_b32_e32 v1, 0
 ; GISEL-NEXT:    v_mov_b32_e32 v2, 0xfffff
-; GISEL-NEXT:    s_mov_b32 s6, 0x100000
 ; GISEL-NEXT:    v_cmp_ge_u64_e32 vcc, v[6:7], v[0:1]
-; GISEL-NEXT:    v_and_or_b32 v5, v5, v2, s6
+; GISEL-NEXT:    v_and_or_b32 v5, v5, v2, s4
 ; GISEL-NEXT:    ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
-; GISEL-NEXT:    s_and_saveexec_b64 s[6:7], vcc
-; GISEL-NEXT:    s_xor_b64 s[16:17], exec, s[6:7]
-; GISEL-NEXT:    s_cbranch_execz .LBB0_4
-; GISEL-NEXT:  ; %bb.3: ; %fp-to-i-if-exp.large
+; GISEL-NEXT:    s_and_saveexec_b64 s[4:5], vcc
+; GISEL-NEXT:    s_xor_b64 s[12:13], exec, s[4:5]
+; GISEL-NEXT:    s_cbranch_execz .LBB0_3
+; GISEL-NEXT:  ; %bb.2: ; %fp-to-i-if-exp.large
 ; GISEL-NEXT:    v_add_u32_e32 v2, 0xfffffbcd, v6
 ; GISEL-NEXT:    v_lshlrev_b64 v[0:1], v2, v[4:5]
 ; GISEL-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v2
 ; GISEL-NEXT:    v_cndmask_b32_e32 v10, 0, v0, vcc
 ; GISEL-NEXT:    v_cndmask_b32_e32 v11, 0, v1, vcc
-; GISEL-NEXT:    v_mad_u64_u32 v[0:1], s[6:7], v10, v9, 0
+; GISEL-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v10, v9, 0
 ; GISEL-NEXT:    v_add_u32_e32 v3, 0xfffffb8d, v6
 ; GISEL-NEXT:    v_sub_u32_e32 v6, 64, v2
 ; GISEL-NEXT:    v_lshrrev_b64 v[6:7], v6, v[4:5]
 ; GISEL-NEXT:    v_lshlrev_b64 v[3:4], v3, v[4:5]
-; GISEL-NEXT:    v_cmp_eq_u32_e64 s[6:7], 0, v2
+; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v2
 ; GISEL-NEXT:    v_cndmask_b32_e32 v3, v3, v6, vcc
-; GISEL-NEXT:    v_mad_u64_u32 v[5:6], s[8:9], v11, v9, v[0:1]
-; GISEL-NEXT:    v_cndmask_b32_e64 v12, v3, 0, s[6:7]
-; GISEL-NEXT:    v_mad_u64_u32 v[0:1], s[8:9], v10, v8, 0
-; GISEL-NEXT:    v_mad_u64_u32 v[2:3], s[8:9], v12, v8, v[5:6]
+; GISEL-NEXT:    v_mad_u64_u32 v[5:6], s[6:7], v11, v9, v[0:1]
+; GISEL-NEXT:    v_cndmask_b32_e64 v12, v3, 0, s[4:5]
+; GISEL-NEXT:    v_mad_u64_u32 v[0:1], s[6:7], v10, v8, 0
+; GISEL-NEXT:    v_mad_u64_u32 v[2:3], s[6:7], v12, v8, v[5:6]
 ; GISEL-NEXT:    v_mul_lo_u32 v13, v11, v9
-; GISEL-NEXT:    v_mad_u64_u32 v[5:6], s[8:9], v10, v9, v[1:2]
+; GISEL-NEXT:    v_mad_u64_u32 v[5:6], s[6:7], v10, v9, v[1:2]
 ; GISEL-NEXT:    v_mul_lo_u32 v10, v10, v9
-; GISEL-NEXT:    v_mad_u64_u32 v[1:2], s[10:11], v11, v8, v[5:6]
-; GISEL-NEXT:    v_addc_co_u32_e64 v3, s[10:11], v3, v10, s[10:11]
-; GISEL-NEXT:    v_addc_co_u32_e64 v3, s[8:9], v3, v13, s[8:9]
-; GISEL-NEXT:    v_mad_u64_u32 v[5:6], s[8:9], v12, v9, v[3:4]
+; GISEL-NEXT:    v_mad_u64_u32 v[1:2], s[8:9], v11, v8, v[5:6]
+; GISEL-NEXT:    v_addc_co_u32_e64 v3, s[8:9], v3, v10, s[8:9]
+; GISEL-NEXT:    v_addc_c...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/179710


More information about the llvm-commits mailing list