[llvm] [InstCombine] Restore narrowing of double to float for integer casts (PR #190550)

via llvm-commits llvm-commits at lists.llvm.org
Sun Apr 5 15:03:24 PDT 2026


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-llvm-transforms

Author: Kavin Gnanapandithan (KavinTheG)

<details>
<summary>Changes</summary>

Resolves #<!-- -->190503

This patch modifies `visitFPTrunc` to simplify the following expression:

 ```llvm
fptrunc(OpI (sitofp/uitofp x), (sitofp/uitofp y))
```
to
```llvm
OpI (sitofp/uitofp x), (sitofp/uitofp y)
```

`visitFPTrunc` now calls `isKnownExactCastIntToFP` on `x` and `y`. 

This allows a double to be narrowed to a float if the source operands originate from sitofp/uitofp and can be represented exactly in the target float type.

This fixes a regression pointed out in the issue, where `visitFPExt` began folding `fpext(sitofp)` into `uitofp nneg i64 %x to double`, causing `visitFPTrunc` to lose the `fpext` it relied on to recognize the narrowing opportunity. On certain target, this would cause more expensive operations (i.e, division with f64 instead of f32).

---
Full diff: https://github.com/llvm/llvm-project/pull/190550.diff


3 Files Affected:

- (modified) llvm/include/llvm/Transforms/InstCombine/InstCombiner.h (+1) 
- (modified) llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp (+32-11) 
- (added) llvm/test/Transforms/InstCombine/fptrunc-narrow-fp.ll (+20) 


``````````diff
diff --git a/llvm/include/llvm/Transforms/InstCombine/InstCombiner.h b/llvm/include/llvm/Transforms/InstCombine/InstCombiner.h
index bc52bf1168d4a..84f8495888382 100644
--- a/llvm/include/llvm/Transforms/InstCombine/InstCombiner.h
+++ b/llvm/include/llvm/Transforms/InstCombine/InstCombiner.h
@@ -481,6 +481,7 @@ class LLVM_LIBRARY_VISIBILITY InstCombiner {
   /// Return true if the cast from integer to FP can be proven to be exact
   /// for all possible inputs (the conversion does not lose any precision).
   bool isKnownExactCastIntToFP(CastInst &I) const;
+  bool canBeCastedExactlyIntToFP(Value &V, Type *FPTy, bool IsSigned) const;
 
   OverflowResult computeOverflowForUnsignedMul(const Value *LHS,
                                                const Value *RHS,
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
index a1d67a0c60ac5..24f3cddbcaf00 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -2030,30 +2030,25 @@ static Type *getMinimumFPType(Value *V, bool PreferBFloat) {
   return V->getType();
 }
 
-bool InstCombiner::isKnownExactCastIntToFP(CastInst &I) const {
-  CastInst::CastOps Opcode = I.getOpcode();
-  assert((Opcode == CastInst::SIToFP || Opcode == CastInst::UIToFP) &&
-         "Unexpected cast");
-  Value *Src = I.getOperand(0);
-  Type *SrcTy = Src->getType();
-  Type *FPTy = I.getType();
-  bool IsSigned = Opcode == Instruction::SIToFP;
+bool InstCombiner::canBeCastedExactlyIntToFP(Value &V, Type *FPTy,
+                                             bool IsSigned) const {
+  Type *SrcTy = V.getType();
   int SrcSize = (int)SrcTy->getScalarSizeInBits() - IsSigned;
+  int DestNumSigBits = FPTy->getFPMantissaWidth();
 
   // Easy case - if the source integer type has less bits than the FP mantissa,
   // then the cast must be exact.
-  int DestNumSigBits = FPTy->getFPMantissaWidth();
   if (SrcSize <= DestNumSigBits)
     return true;
 
   // Cast from FP to integer and back to FP is independent of the intermediate
   // integer width because of poison on overflow.
   Value *F;
-  if (match(Src, m_FPToI(m_Value(F)))) {
+  if (match(&V, m_FPToI(m_Value(F)))) {
     // If this is uitofp (fptosi F), the source needs an extra bit to avoid
     // potential rounding of negative FP input values.
     int SrcNumSigBits = F->getType()->getFPMantissaWidth();
-    if (!IsSigned && match(Src, m_FPToSI(m_Value())))
+    if (!IsSigned && match(&V, m_FPToSI(m_Value())))
       SrcNumSigBits++;
 
     // [su]itofp (fpto[su]i F) --> exact if the source type has less or equal
@@ -2064,6 +2059,21 @@ bool InstCombiner::isKnownExactCastIntToFP(CastInst &I) const {
       return true;
   }
 
+  return false;
+}
+
+bool InstCombiner::isKnownExactCastIntToFP(CastInst &I) const {
+  CastInst::CastOps Opcode = I.getOpcode();
+  assert((Opcode == CastInst::SIToFP || Opcode == CastInst::UIToFP) &&
+         "Unexpected cast");
+  Value *Src = I.getOperand(0);
+  Type *SrcTy = Src->getType();
+  Type *FPTy = I.getType();
+  bool IsSigned = Opcode == Instruction::SIToFP;
+  int DestNumSigBits = FPTy->getFPMantissaWidth();
+  if (canBeCastedExactlyIntToFP(*Src, FPTy, IsSigned))
+    return true;
+
   // Try harder to find if the source integer type has less significant bits.
   // Compute number of sign bits or determine trailing zeros.
   KnownBits SrcKnown = computeKnownBits(Src, &I);
@@ -2102,6 +2112,17 @@ Instruction *InstCombinerImpl::visitFPTrunc(FPTruncInst &FPT) {
     Type *LHSMinType = getMinimumFPType(BO->getOperand(0), PreferBFloat);
     Type *RHSMinType = getMinimumFPType(BO->getOperand(1), PreferBFloat);
     unsigned OpWidth = BO->getType()->getFPMantissaWidth();
+
+    if (auto *FPCast = dyn_cast<CastInst>(BO->getOperand(0)))
+      if (isa<UIToFPInst>(FPCast) || isa<SIToFPInst>(FPCast))
+        if (isKnownExactCastIntToFP(*FPCast))
+          LHSMinType = Ty;
+
+    if (auto *FPCast = dyn_cast<CastInst>(BO->getOperand(1)))
+      if (isa<UIToFPInst>(FPCast) || isa<SIToFPInst>(FPCast))
+        if (isKnownExactCastIntToFP(*FPCast))
+          RHSMinType = Ty;
+
     unsigned LHSWidth = LHSMinType->getFPMantissaWidth();
     unsigned RHSWidth = RHSMinType->getFPMantissaWidth();
     unsigned SrcWidth = std::max(LHSWidth, RHSWidth);
diff --git a/llvm/test/Transforms/InstCombine/fptrunc-narrow-fp.ll b/llvm/test/Transforms/InstCombine/fptrunc-narrow-fp.ll
new file mode 100644
index 0000000000000..896dc0869f4f8
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/fptrunc-narrow-fp.ll
@@ -0,0 +1,20 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt < %s -passes=instcombine -S | FileCheck %s
+
+define float @narrow_fdiv_sitofp(i64 %x) {
+; CHECK-LABEL: define float @narrow_fdiv_sitofp(
+; CHECK-SAME: i64 [[X:%.*]]) {
+; CHECK-NEXT:    [[MUL:%.*]] = mul i64 [[X]], 27611
+; CHECK-NEXT:    [[TMP1:%.*]] = urem i64 [[MUL]], 74383
+; CHECK-NEXT:    [[TMP2:%.*]] = uitofp nneg i64 [[TMP1]] to float
+; CHECK-NEXT:    [[CONV4:%.*]] = fdiv float [[TMP2]], 7.438300e+04
+; CHECK-NEXT:    ret float [[CONV4]]
+;
+  %mul = mul i64 %x, 27611
+  %1 = urem i64 %mul, 74383
+  %conv  = sitofp i64 %1 to float
+  %conv2 = fpext float %conv to double
+  %div3  = fdiv double %conv2, 7.438300e+04
+  %conv4 = fptrunc double %div3 to float
+  ret float %conv4
+}

``````````

</details>


https://github.com/llvm/llvm-project/pull/190550


More information about the llvm-commits mailing list