[llvm] [AArch64] Fix #94909: Optimize vector fmul(sitofp(x), 0.5) -> scvtf(x, 2) (PR #141480)
JP Hafer via llvm-commits
llvm-commits at lists.llvm.org
Fri May 30 11:54:12 PDT 2025
https://github.com/jph-13 updated https://github.com/llvm/llvm-project/pull/141480
>From 67a4484f25cbd425a05d5cd4b80db17ec601d381 Mon Sep 17 00:00:00 2001
From: JP Hafer <jhafer at mathworks.com>
Date: Fri, 30 May 2025 14:19:15 -0400
Subject: [PATCH] [AArch64] Fix #94909: Optimize vector fmul(sitofp(x), 0.5) ->
scvtf(x, 2)
This commit reintroduces the optimization in InstCombine that was previously removed due to limited applicability.
See: #91924
This update targets `fmul(sitofp(x), C)` where `C` is a constant reciprocal of a power of two. For both scalar and vector inputs, if we have `sitofp(X) * C` (where `C` is `1/2^N`), this can be optimized to `scvtf(X, 2^N)`. This eliminates the floating-point multiply by directly converting the integer to a scaled floating-point value.
[AArch64] Fix #94909: Optimize vector fmul(sitofp(x), 0.5) -> scvtf(x, 2)
This commit reintroduces the optimization in InstCombine that was previously removed due to limited applicability.
See: #91924
This update targets `fmul(sitofp(x), C)` where `C` is a constant reciprocal of a power of two. For both scalar and vector inputs, if we have `sitofp(X) * C` (where `C` is `1/2^N`), this can be optimized to `scvtf(X, 2^N)`. This eliminates the floating-point multiply by directly converting the integer to a scaled floating-point value.
revert orig try
---
.../Target/AArch64/AArch64ISelDAGToDAG.cpp | 58 ++++++++++++
.../AArch64/scvtf-div-mul-combine.ll | 93 +++++++++++++++++++
2 files changed, 151 insertions(+)
create mode 100644 llvm/test/Transforms/InstCombine/AArch64/scvtf-div-mul-combine.ll
diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index 96fa85179d023..27f35f3120b49 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -3907,6 +3907,64 @@ static bool checkCVTFixedPointOperandWithFBits(SelectionDAG *CurDAG, SDValue N,
unsigned RegWidth,
bool isReciprocal) {
APFloat FVal(0.0);
+
+ if (N.getOpcode() == ISD::BUILD_VECTOR) {
+ EVT VT = N.getValueType();
+ EVT EltVT = VT.getVectorElementType();
+
+ unsigned NumElts = N.getNumOperands();
+ SDValue FirstOp = N.getOperand(0);
+
+ ConstantFPSDNode *FirstCN = dyn_cast<ConstantFPSDNode>(FirstOp);
+ if (!FirstCN)
+ return false;
+
+ APFloat FirstVal = FirstCN->getValueAPF();
+ if (EltVT == MVT::f16) {
+ bool ignored;
+ FirstVal.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven,
+ &ignored);
+ }
+
+ // Handle reciprocal case if needed
+ if (isReciprocal) {
+ if (!FirstVal.getExactInverse(&FirstVal))
+ return false;
+ }
+
+ bool IsExact;
+ APSInt IntVal(65, true);
+ FirstVal.convertToInteger(IntVal, APFloat::rmTowardZero, &IsExact);
+
+ if (!IsExact || !IntVal.isPowerOf2())
+ return false;
+
+ unsigned FBits = IntVal.logBase2();
+ if (FBits == 0 || FBits > RegWidth)
+ return false;
+
+ APInt FirstBits = FirstVal.bitcastToAPInt();
+
+ for (unsigned i = 1; i < NumElts; ++i) {
+ ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N.getOperand(i));
+ if (!CN)
+ return false;
+
+ APFloat ElemVal = CN->getValueAPF();
+ if (EltVT == MVT::f16) {
+ bool ignored;
+ ElemVal.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven,
+ &ignored);
+ }
+
+ if (ElemVal.bitcastToAPInt() != FirstBits)
+ return false;
+ }
+
+ FixedPos = CurDAG->getTargetConstant(FBits, SDLoc(N), MVT::i32);
+ return true;
+ }
+
if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N))
FVal = CN->getValueAPF();
else if (LoadSDNode *LN = dyn_cast<LoadSDNode>(N)) {
diff --git a/llvm/test/Transforms/InstCombine/AArch64/scvtf-div-mul-combine.ll b/llvm/test/Transforms/InstCombine/AArch64/scvtf-div-mul-combine.ll
new file mode 100644
index 0000000000000..27f1158f3a0b3
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/AArch64/scvtf-div-mul-combine.ll
@@ -0,0 +1,93 @@
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+fullfp16 -aarch64-neon-syntax=apple -verify-machineinstrs -o - %s | FileCheck %s
+
+; Scalar fdiv by 16.0 (f32)
+define float @tests_f32_div(i32 %in) {
+; CHECK-LABEL: tests_f32_div:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: scvtf s0, w0, #4
+; CHECK-NEXT: ret
+entry:
+ %vcvt.i = sitofp i32 %in to float
+ %div.i = fdiv float %vcvt.i, 16.0
+ ret float %div.i
+}
+
+; Scalar fmul by (2^-4) (f32)
+define float @testsmul_f32_mul(i32 %in) local_unnamed_addr #0 {
+; CHECK-LABEL: testsmul_f32_mul:
+; CHECK: // %bb.0:
+; CHECK-NEXT: scvtf s0, w0, #4
+; CHECK-NEXT: ret
+ %vcvt.i = sitofp i32 %in to float
+ %div.i = fmul float %vcvt.i, 6.250000e-02 ; 0.0625 is 2^-4
+ ret float %div.i
+}
+
+; Vector fdiv by 16.0 (v2f32)
+define <2 x float> @testv_v2f32_div(<2 x i32> %in) {
+; CHECK-LABEL: testv_v2f32_div:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: scvtf.2s v0, v0, #4
+; CHECK-NEXT: ret
+entry:
+ %vcvt.i = sitofp <2 x i32> %in to <2 x float>
+ %div.i = fdiv <2 x float> %vcvt.i, <float 16.0, float 16.0>
+ ret <2 x float> %div.i
+}
+
+; Vector fmul by 2^-4 (v2f32)
+define <2 x float> @testvmul_v2f32_mul(<2 x i32> %in) local_unnamed_addr #0 {
+; CHECK-LABEL: testvmul_v2f32_mul:
+; CHECK: // %bb.0:
+; CHECK-NEXT: scvtf.2s v0, v0, #4
+; CHECK-NEXT: ret
+ %vcvt.i = sitofp <2 x i32> %in to <2 x float>
+ %div.i = fmul <2 x float> %vcvt.i, splat (float 6.250000e-02) ; 0.0625 is 2^-4
+ ret <2 x float> %div.i
+}
+
+; Scalar fdiv by 16.0 (f64)
+define double @tests_f64_div(i64 %in) {
+; CHECK-LABEL: tests_f64_div:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: scvtf d0, x0, #4
+; CHECK-NEXT: ret
+entry:
+ %vcvt.i = sitofp i64 %in to double
+ %div.i = fdiv double %vcvt.i, 1.600000e+01 ; 16.0 in double-precision
+ ret double %div.i
+}
+
+; Scalar fmul by (2^-4) (f64)
+define double @testsmul_f64_mul(i64 %in) local_unnamed_addr #0 {
+; CHECK-LABEL: testsmul_f64_mul:
+; CHECK: // %bb.0:
+; CHECK-NEXT: scvtf d0, x0, #4
+; CHECK-NEXT: ret
+ %vcvt.i = sitofp i64 %in to double
+ %div.i = fmul double %vcvt.i, 6.250000e-02 ; 0.0625 is 2^-4 in double-precision
+ ret double %div.i
+}
+
+; Vector fdiv by 16.0 (v2f64)
+define <2 x double> @testv_v2f64_div(<2 x i64> %in) {
+; CHECK-LABEL: testv_v2f64_div:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: scvtf.2d v0, v0, #4
+; CHECK-NEXT: ret
+entry:
+ %vcvt.i = sitofp <2 x i64> %in to <2 x double>
+ %div.i = fdiv <2 x double> %vcvt.i, <double 1.600000e+01, double 1.600000e+01>
+ ret <2 x double> %div.i
+}
+
+; Vector fmul by 2^-4 (v2f64)
+define <2 x double> @testvmul_v2f64_mul(<2 x i64> %in) local_unnamed_addr #0 {
+; CHECK-LABEL: testvmul_v2f64_mul:
+; CHECK: // %bb.0:
+; CHECK-NEXT: scvtf.2d v0, v0, #4
+; CHECK-NEXT: ret
+ %vcvt.i = sitofp <2 x i64> %in to <2 x double>
+ %div.i = fmul <2 x double> %vcvt.i, splat (double 6.250000e-02) ; 0.0625 is 2^-4 in double-precision
+ ret <2 x double> %div.i
+}
\ No newline at end of file
More information about the llvm-commits
mailing list