[llvm] [InstCombine] Restore narrowing of double to float for integer casts (PR #190550)
Kavin Gnanapandithan via llvm-commits
llvm-commits at lists.llvm.org
Sun Apr 5 15:02:49 PDT 2026
https://github.com/KavinTheG created https://github.com/llvm/llvm-project/pull/190550
Resolves #190503
This patch modifies `visitFPTrunc` to simplify the following expression:
```llvm
fptrunc(OpI (sitofp/uitofp x), (sitofp/uitofp y))
```
to
```llvm
OpI (sitofp/uitofp x), (sitofp/uitofp y)
```
`visitFPTrunc` now calls `isKnownExactCastIntToFP` on `x` and `y`.
This allows a double to be narrowed to a float if the source operands originate from sitofp/uitofp and can be represented exactly in the target float type.
This fixes a regression pointed out in the issue, where `visitFPExt` began folding `fpext(sitofp)` into `uitofp nneg i64 %x to double`, causing `visitFPTrunc` to lose the `fpext` it relied on to recognize the narrowing opportunity. On certain target, this would cause more expensive operations (i.e, division with f64 instead of f32).
>From 6b3a81ab66e06df20796e3951fb4b20806125f2b Mon Sep 17 00:00:00 2001
From: Kavin Gnanapandithan <kavin.balag at gmail.com>
Date: Sun, 5 Apr 2026 12:18:51 -0400
Subject: [PATCH 1/2] Precommit test.
---
.../InstCombine/fptrunc-narrow-fp.ll | 21 +++++++++++++++++++
1 file changed, 21 insertions(+)
create mode 100644 llvm/test/Transforms/InstCombine/fptrunc-narrow-fp.ll
diff --git a/llvm/test/Transforms/InstCombine/fptrunc-narrow-fp.ll b/llvm/test/Transforms/InstCombine/fptrunc-narrow-fp.ll
new file mode 100644
index 0000000000000..1eba158fe3990
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/fptrunc-narrow-fp.ll
@@ -0,0 +1,21 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt < %s -passes=instcombine -S | FileCheck %s
+
+define float @narrow_fdiv_sitofp(i64 %x) {
+; CHECK-LABEL: define float @narrow_fdiv_sitofp(
+; CHECK-SAME: i64 [[X:%.*]]) {
+; CHECK-NEXT: [[MUL:%.*]] = mul i64 [[X]], 27611
+; CHECK-NEXT: [[TMP1:%.*]] = urem i64 [[MUL]], 74383
+; CHECK-NEXT: [[CONV2:%.*]] = uitofp nneg i64 [[TMP1]] to double
+; CHECK-NEXT: [[DIV3:%.*]] = fdiv double [[CONV2]], 7.438300e+04
+; CHECK-NEXT: [[CONV4:%.*]] = fptrunc double [[DIV3]] to float
+; CHECK-NEXT: ret float [[CONV4]]
+;
+ %mul = mul i64 %x, 27611
+ %1 = urem i64 %mul, 74383
+ %conv = sitofp i64 %1 to float
+ %conv2 = fpext float %conv to double
+ %div3 = fdiv double %conv2, 7.438300e+04
+ %conv4 = fptrunc double %div3 to float
+ ret float %conv4
+}
>From fbc4d6b2f77c08e351deb4bdd9c1d48f06591355 Mon Sep 17 00:00:00 2001
From: Kavin Gnanapandithan <kavin.balag at gmail.com>
Date: Sun, 5 Apr 2026 13:45:54 -0400
Subject: [PATCH 2/2] [InstCombine] Restore narrowing of double to float for
integer casts
This patch modifies `visitFPTrunc` to simplify the following expression.
```llvm
fptrunc(OpI (sitofp/uitofp x), (sitofp/uitofp y))
```
->
```llvm
OpI (sitofp/uitofp x), (sitofp/uitofp y)
```
This allows a double to be narrowed to a float if the source operands
originate from sitofp/uitofp and can be represented exactly in the
target float type.
---
.../Transforms/InstCombine/InstCombiner.h | 1 +
.../InstCombine/InstCombineCasts.cpp | 43 ++++++++++++++-----
.../InstCombine/fptrunc-narrow-fp.ll | 5 +--
3 files changed, 35 insertions(+), 14 deletions(-)
diff --git a/llvm/include/llvm/Transforms/InstCombine/InstCombiner.h b/llvm/include/llvm/Transforms/InstCombine/InstCombiner.h
index bc52bf1168d4a..84f8495888382 100644
--- a/llvm/include/llvm/Transforms/InstCombine/InstCombiner.h
+++ b/llvm/include/llvm/Transforms/InstCombine/InstCombiner.h
@@ -481,6 +481,7 @@ class LLVM_LIBRARY_VISIBILITY InstCombiner {
/// Return true if the cast from integer to FP can be proven to be exact
/// for all possible inputs (the conversion does not lose any precision).
bool isKnownExactCastIntToFP(CastInst &I) const;
+ bool canBeCastedExactlyIntToFP(Value &V, Type *FPTy, bool IsSigned) const;
OverflowResult computeOverflowForUnsignedMul(const Value *LHS,
const Value *RHS,
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
index a1d67a0c60ac5..24f3cddbcaf00 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -2030,30 +2030,25 @@ static Type *getMinimumFPType(Value *V, bool PreferBFloat) {
return V->getType();
}
-bool InstCombiner::isKnownExactCastIntToFP(CastInst &I) const {
- CastInst::CastOps Opcode = I.getOpcode();
- assert((Opcode == CastInst::SIToFP || Opcode == CastInst::UIToFP) &&
- "Unexpected cast");
- Value *Src = I.getOperand(0);
- Type *SrcTy = Src->getType();
- Type *FPTy = I.getType();
- bool IsSigned = Opcode == Instruction::SIToFP;
+bool InstCombiner::canBeCastedExactlyIntToFP(Value &V, Type *FPTy,
+ bool IsSigned) const {
+ Type *SrcTy = V.getType();
int SrcSize = (int)SrcTy->getScalarSizeInBits() - IsSigned;
+ int DestNumSigBits = FPTy->getFPMantissaWidth();
// Easy case - if the source integer type has less bits than the FP mantissa,
// then the cast must be exact.
- int DestNumSigBits = FPTy->getFPMantissaWidth();
if (SrcSize <= DestNumSigBits)
return true;
// Cast from FP to integer and back to FP is independent of the intermediate
// integer width because of poison on overflow.
Value *F;
- if (match(Src, m_FPToI(m_Value(F)))) {
+ if (match(&V, m_FPToI(m_Value(F)))) {
// If this is uitofp (fptosi F), the source needs an extra bit to avoid
// potential rounding of negative FP input values.
int SrcNumSigBits = F->getType()->getFPMantissaWidth();
- if (!IsSigned && match(Src, m_FPToSI(m_Value())))
+ if (!IsSigned && match(&V, m_FPToSI(m_Value())))
SrcNumSigBits++;
// [su]itofp (fpto[su]i F) --> exact if the source type has less or equal
@@ -2064,6 +2059,21 @@ bool InstCombiner::isKnownExactCastIntToFP(CastInst &I) const {
return true;
}
+ return false;
+}
+
+bool InstCombiner::isKnownExactCastIntToFP(CastInst &I) const {
+ CastInst::CastOps Opcode = I.getOpcode();
+ assert((Opcode == CastInst::SIToFP || Opcode == CastInst::UIToFP) &&
+ "Unexpected cast");
+ Value *Src = I.getOperand(0);
+ Type *SrcTy = Src->getType();
+ Type *FPTy = I.getType();
+ bool IsSigned = Opcode == Instruction::SIToFP;
+ int DestNumSigBits = FPTy->getFPMantissaWidth();
+ if (canBeCastedExactlyIntToFP(*Src, FPTy, IsSigned))
+ return true;
+
// Try harder to find if the source integer type has less significant bits.
// Compute number of sign bits or determine trailing zeros.
KnownBits SrcKnown = computeKnownBits(Src, &I);
@@ -2102,6 +2112,17 @@ Instruction *InstCombinerImpl::visitFPTrunc(FPTruncInst &FPT) {
Type *LHSMinType = getMinimumFPType(BO->getOperand(0), PreferBFloat);
Type *RHSMinType = getMinimumFPType(BO->getOperand(1), PreferBFloat);
unsigned OpWidth = BO->getType()->getFPMantissaWidth();
+
+ if (auto *FPCast = dyn_cast<CastInst>(BO->getOperand(0)))
+ if (isa<UIToFPInst>(FPCast) || isa<SIToFPInst>(FPCast))
+ if (isKnownExactCastIntToFP(*FPCast))
+ LHSMinType = Ty;
+
+ if (auto *FPCast = dyn_cast<CastInst>(BO->getOperand(1)))
+ if (isa<UIToFPInst>(FPCast) || isa<SIToFPInst>(FPCast))
+ if (isKnownExactCastIntToFP(*FPCast))
+ RHSMinType = Ty;
+
unsigned LHSWidth = LHSMinType->getFPMantissaWidth();
unsigned RHSWidth = RHSMinType->getFPMantissaWidth();
unsigned SrcWidth = std::max(LHSWidth, RHSWidth);
diff --git a/llvm/test/Transforms/InstCombine/fptrunc-narrow-fp.ll b/llvm/test/Transforms/InstCombine/fptrunc-narrow-fp.ll
index 1eba158fe3990..896dc0869f4f8 100644
--- a/llvm/test/Transforms/InstCombine/fptrunc-narrow-fp.ll
+++ b/llvm/test/Transforms/InstCombine/fptrunc-narrow-fp.ll
@@ -6,9 +6,8 @@ define float @narrow_fdiv_sitofp(i64 %x) {
; CHECK-SAME: i64 [[X:%.*]]) {
; CHECK-NEXT: [[MUL:%.*]] = mul i64 [[X]], 27611
; CHECK-NEXT: [[TMP1:%.*]] = urem i64 [[MUL]], 74383
-; CHECK-NEXT: [[CONV2:%.*]] = uitofp nneg i64 [[TMP1]] to double
-; CHECK-NEXT: [[DIV3:%.*]] = fdiv double [[CONV2]], 7.438300e+04
-; CHECK-NEXT: [[CONV4:%.*]] = fptrunc double [[DIV3]] to float
+; CHECK-NEXT: [[TMP2:%.*]] = uitofp nneg i64 [[TMP1]] to float
+; CHECK-NEXT: [[CONV4:%.*]] = fdiv float [[TMP2]], 7.438300e+04
; CHECK-NEXT: ret float [[CONV4]]
;
%mul = mul i64 %x, 27611
More information about the llvm-commits
mailing list