[llvm] [AArch64] Fix #94909: Optimize vector fmul(sitofp(x), 0.5) -> scvtf(x, 2) (PR #141480)
JP Hafer via llvm-commits
llvm-commits at lists.llvm.org
Thu Jun 26 10:45:07 PDT 2025
https://github.com/jph-13 updated https://github.com/llvm/llvm-project/pull/141480
>From 6ebc4b19f37c3ad6dce7e70c39e5f750497a93c1 Mon Sep 17 00:00:00 2001
From: JP Hafer <jhafer at mathworks.com>
Date: Wed, 18 Jun 2025 11:10:46 -0400
Subject: [PATCH] [AArch64] Fix #94909: Optimize vector fmul(sitofp(x), 0.5) ->
scvtf(x, 2)
This commit reintroduces the optimization in InstCombine that was previously removed due to limited applicability.
See: #91924
This update targets `fmul(sitofp(x), C)` where `C` is a constant reciprocal of a power of two. For both scalar and vector inputs, if we have `sitofp(X) * C` (where `C` is `1/2^N`), this can be optimized to `scvtf(X, 2^N)`. This eliminates the floating-point multiply by directly converting the integer to a scaled floating-point value.
---
.../Target/AArch64/AArch64ISelDAGToDAG.cpp | 140 +++++
llvm/lib/Target/AArch64/AArch64InstrInfo.td | 52 ++
.../CodeGen/AArch64/scvtf-div-mul-combine.ll | 487 ++++++++++++++++++
3 files changed, 679 insertions(+)
create mode 100644 llvm/test/CodeGen/AArch64/scvtf-div-mul-combine.ll
diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index 009d69b2b9433..264b6f2c2c944 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -487,6 +487,14 @@ class AArch64DAGToDAGISel : public SelectionDAGISel {
bool SelectCVTFixedPosRecipOperand(SDValue N, SDValue &FixedPos,
unsigned Width);
+ template <unsigned FloatWidth>
+ bool SelectCVTFixedPosRecipOperandVec(SDValue N, SDValue &FixedPos) {
+ return SelectCVTFixedPosRecipOperandVec(N, FixedPos, FloatWidth);
+ }
+
+ bool SelectCVTFixedPosRecipOperandVec(SDValue N, SDValue &FixedPos,
+ unsigned Width);
+
bool SelectCMP_SWAP(SDNode *N);
bool SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift);
@@ -3952,6 +3960,132 @@ static bool checkCVTFixedPointOperandWithFBits(SelectionDAG *CurDAG, SDValue N,
return true;
}
+static bool checkCVTFixedPointOperandWithFBitsForVectors(SelectionDAG *CurDAG,
+ SDValue N,
+ SDValue &FixedPos,
+ unsigned FloatWidth,
+ bool IsReciprocal) {
+
+ SDValue ImmediateNode;
+ // N must be a bitcast, nvcast, or fmov
+ if (N.getOpcode() == ISD::BITCAST || N.getOpcode() == AArch64ISD::NVCAST ||
+ N.getOpcode() == AArch64ISD::FMOV) {
+ ImmediateNode = N.getOperand(0);
+ } else {
+ return false;
+ }
+
+ EVT NodeVT = N.getValueType();
+ // In theory the immediate node value type would be a vector. However,
+ // this is not the case when using 2.0. Thus check N's value type for
+ // vector and floating point instead.
+ if (!NodeVT.isVector() || !NodeVT.isFloatingPoint())
+ return false;
+
+ if (!(ImmediateNode.getOpcode() == AArch64ISD::DUP ||
+ ImmediateNode.getOpcode() == AArch64ISD::MOVIshift ||
+ ImmediateNode.getOpcode() == ISD::Constant ||
+ ImmediateNode.getOpcode() == ISD::SPLAT_VECTOR ||
+ ImmediateNode.getOpcode() == ISD::BUILD_VECTOR)) {
+ return false; // Not a possible splat
+ }
+
+ if (ImmediateNode.getOpcode() == ISD::BUILD_VECTOR) {
+ // For BUILD_VECTOR, we must explicitly check if it's a constant splat.
+ BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(ImmediateNode.getNode());
+ APInt SplatValue;
+ APInt SplatUndef;
+ unsigned SplatBitSize;
+ bool HasAnyUndefs;
+ if (!BVN->isConstantSplat(SplatValue, SplatUndef, SplatBitSize,
+ HasAnyUndefs)) {
+ return false;
+ }
+ }
+
+ APInt Imm;
+ bool IsIntConstant = false;
+ if (ImmediateNode.getOpcode() == AArch64ISD::MOVIshift) {
+ Imm = APInt(NodeVT.getScalarSizeInBits(),
+ ImmediateNode.getConstantOperandVal(0)
+ << ImmediateNode.getConstantOperandVal(1));
+ IsIntConstant = true;
+ } else if (ImmediateNode.getOpcode() == ISD::Constant) {
+ auto *C = dyn_cast<ConstantSDNode>(ImmediateNode);
+ if (!C)
+ return false;
+ uint8_t EncodedU8 = static_cast<uint8_t>(C->getZExtValue());
+ uint64_t DecodedBits = AArch64_AM::decodeAdvSIMDModImmType11(EncodedU8);
+
+ unsigned BitWidth = N.getValueType().getVectorElementType().getSizeInBits();
+ uint64_t Mask = (BitWidth == 64) ? ~0ULL : ((1ULL << BitWidth) - 1);
+ uint64_t MaskedBits = DecodedBits & Mask;
+
+ Imm = APInt(BitWidth, MaskedBits);
+ IsIntConstant = true;
+ } else if (auto *CI = dyn_cast<ConstantSDNode>(ImmediateNode.getOperand(0))) {
+ Imm = CI->getAPIntValue();
+ IsIntConstant = true;
+ }
+
+ APFloat FVal(0.0);
+ // --- Extract the actual constant value ---
+ if (IsIntConstant) {
+ // Scalar source is an integer constant; interpret its bits as
+ // floating-point.
+ EVT FloatEltVT = N.getValueType().getVectorElementType();
+
+ if (FloatEltVT == MVT::f32) {
+ FVal = APFloat(APFloat::IEEEsingle(), Imm);
+ } else if (FloatEltVT == MVT::f64) {
+ FVal = APFloat(APFloat::IEEEdouble(), Imm);
+ } else if (FloatEltVT == MVT::f16) {
+ FVal = APFloat(APFloat::IEEEhalf(), Imm);
+ } else {
+ // Unsupported floating-point element type.
+ return false;
+ }
+ } else if (auto *CFP =
+ dyn_cast<ConstantFPSDNode>(ImmediateNode.getOperand(0))) {
+ // Scalar source is a floating-point constant.
+ FVal = CFP->getValueAPF();
+ } else {
+ // ScalarSourceNode is not a recognized constant type.
+ return false;
+ }
+
+ // Handle reciprocal case.
+ if (IsReciprocal) {
+ if (!FVal.getExactInverse(&FVal))
+ // Not an exact reciprocal, or reciprocal not a power of 2.
+ return false;
+ }
+
+ bool IsExact;
+ unsigned TargetIntBits =
+ N.getValueType().getVectorElementType().getSizeInBits();
+ APSInt IntVal(
+ TargetIntBits + 1,
+ true); // Use TargetIntBits + 1 for sufficient bits for conversion
+
+ FVal.convertToInteger(IntVal, APFloat::rmTowardZero, &IsExact);
+
+ if (!IsExact || !IntVal.isPowerOf2())
+ return false;
+
+ unsigned FBits = IntVal.logBase2();
+ // FBits must be non-zero (implies actual scaling) and within the range
+ // supported by the instruction (typically 1 to 64 for AArch64 FCVTZS/FCVTZU).
+ // FloatWidth should ideally be the width of the *integer elements* in the
+ // vector (16, 32, 64).
+ if (FBits == 0 || FBits > FloatWidth)
+ return false;
+
+ // Set FixedPos to the extracted FBits as an i32 constant SDValue.
+ FixedPos = CurDAG->getTargetConstant(FBits, SDLoc(N), MVT::i32);
+ return true;
+}
+
bool AArch64DAGToDAGISel::SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos,
unsigned RegWidth) {
return checkCVTFixedPointOperandWithFBits(CurDAG, N, FixedPos, RegWidth,
@@ -3965,6 +4099,12 @@ bool AArch64DAGToDAGISel::SelectCVTFixedPosRecipOperand(SDValue N,
true);
}
+bool AArch64DAGToDAGISel::SelectCVTFixedPosRecipOperandVec(
+ SDValue N, SDValue &FixedPos, unsigned FloatWidth) {
+ return checkCVTFixedPointOperandWithFBitsForVectors(CurDAG, N, FixedPos,
+ FloatWidth, true);
+}
+
// Inspects a register string of the form o0:op1:CRn:CRm:op2 gets the fields
// of the string and obtains the integer values from them and combines these
// into a single value to be used in the MRS/MSR instruction.
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index f90f12b5ac3c7..5613128d0e9fd 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -8473,6 +8473,58 @@ def : Pat<(v8f16 (sint_to_fp (v8i16 (AArch64vashr_exact v8i16:$Vn, i32:$shift)))
(SCVTFv8i16_shift $Vn, vecshiftR16:$shift)>;
}
+// Select fmul(sitofp(x), C) where C is a constant reciprocal of a power of two.
+// For both scalar and vector inputs, if we have sitofp(X) * C (where C is
+// 1/2^N), this can be optimized to scvtf(X, 2^N).
+class fixedpoint_recip_vec_i16<ValueType FloatVT>
+ : ComplexPattern<FloatVT, 1, "SelectCVTFixedPosRecipOperandVec<16>", []>;
+class fixedpoint_recip_vec_i32<ValueType FloatVT>
+ : ComplexPattern<FloatVT, 1, "SelectCVTFixedPosRecipOperandVec<32>", []>;
+class fixedpoint_recip_vec_i64<ValueType FloatVT>
+ : ComplexPattern<FloatVT, 1, "SelectCVTFixedPosRecipOperandVec<64>", []>;
+def fixedpoint_recip_vec_xform : SDNodeXForm<timm, [{
+ // Suppress the unused variable warning by explicitly using N.
+ // The actual value needed for the pattern is already in V.
+ (void)N;
+ return V;
+}]>;
+
+def fixedpoint_recip_v2f32_v2i32 : fixedpoint_recip_vec_i32<v2f32>;
+def fixedpoint_recip_v4f32_v4i32 : fixedpoint_recip_vec_i32<v4f32>;
+def fixedpoint_recip_v2f64_v2i64 : fixedpoint_recip_vec_i64<v2f64>;
+
+def fixedpoint_recip_v4f16_v4i16 : fixedpoint_recip_vec_i16<v4f16>;
+def fixedpoint_recip_v8f16_v8i16 : fixedpoint_recip_vec_i16<v8f16>;
+
+let Predicates = [HasNEON] in {
+ def : Pat<(v2f32(fmul(sint_to_fp(v2i32 V64:$Rn)),
+ fixedpoint_recip_v2f32_v2i32:$scale)),
+ (v2f32(SCVTFv2i32_shift(v2i32 V64:$Rn),
+ (fixedpoint_recip_vec_xform fixedpoint_recip_v2f32_v2i32:$scale)))>;
+
+ def : Pat<(v4f32(fmul(sint_to_fp(v4i32 FPR128:$Rn)),
+ fixedpoint_recip_v4f32_v4i32:$scale)),
+ (v4f32(SCVTFv4i32_shift(v4i32 FPR128:$Rn),
+ (fixedpoint_recip_vec_xform fixedpoint_recip_v4f32_v4i32:$scale)))>;
+
+ def : Pat<(v2f64(fmul(sint_to_fp(v2i64 FPR128:$Rn)),
+ fixedpoint_recip_v2f64_v2i64:$scale)),
+ (v2f64(SCVTFv2i64_shift(v2i64 FPR128:$Rn),
+ (fixedpoint_recip_vec_xform fixedpoint_recip_v2f64_v2i64:$scale)))>;
+}
+
+let Predicates = [HasNEON, HasFullFP16] in {
+ def : Pat<(v4f16(fmul(sint_to_fp(v4i16 V64:$Rn)),
+ fixedpoint_recip_v4f16_v4i16:$scale)),
+ (v4f16(SCVTFv4i16_shift(v4i16 V64:$Rn),
+ (fixedpoint_recip_vec_xform fixedpoint_recip_v4f16_v4i16:$scale)))>;
+
+ def : Pat<(v8f16(fmul(sint_to_fp(v8i16 FPR128:$Rn)),
+ fixedpoint_recip_v8f16_v8i16:$scale)),
+ (v8f16(SCVTFv8i16_shift(v8i16 FPR128:$Rn),
+ (fixedpoint_recip_vec_xform fixedpoint_recip_v8f16_v8i16:$scale)))>;
+}
+
// X << 1 ==> X + X
class SHLToADDPat<ValueType ty, RegisterClass regtype>
: Pat<(ty (AArch64vshl (ty regtype:$Rn), (i32 1))),
diff --git a/llvm/test/CodeGen/AArch64/scvtf-div-mul-combine.ll b/llvm/test/CodeGen/AArch64/scvtf-div-mul-combine.ll
new file mode 100644
index 0000000000000..ed967cf1f13f5
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/scvtf-div-mul-combine.ll
@@ -0,0 +1,487 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=aarch64-linux-gnu -aarch64-neon-syntax=apple -mattr=+fullfp16 -o - %s | FileCheck %s
+
+; This test file verifies that fdiv(sitofp(x), C),
+; where C is a constant power of two,
+; is optimized to scvtf(X, shift_amount).
+; This typically involves an implicit fdiv -> fmul_reciprocal transformation.
+
+; Scalar f32 (from i32)
+define float @test_f32_div_const(i32 %in) {
+; CHECK-LABEL: test_f32_div_const:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: scvtf s0, w0, #4
+; CHECK-NEXT: ret
+entry:
+ %vcvt.i = sitofp i32 %in to float
+ %div.i = fdiv float %vcvt.i, 16.0
+ ret float %div.i
+}
+
+; Scalar f64 (from i64)
+define double @test_f64_div_const(i64 %in) {
+; CHECK-LABEL: test_f64_div_const:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: scvtf d0, x0, #4
+; CHECK-NEXT: ret
+entry:
+ %vcvt.i = sitofp i64 %in to double
+ %div.i = fdiv double %vcvt.i, 16.0
+ ret double %div.i
+}
+
+; Vector v2f32 (from v2i32)
+define <2 x float> @test_v2f32_div_const_2(<2 x i32> %in) {
+; CHECK-LABEL: test_v2f32_div_const_2:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: scvtf.2s v0, v0, #1
+; CHECK-NEXT: ret
+entry:
+ %vcvt.i = sitofp <2 x i32> %in to <2 x float>
+ %div.i = fdiv <2 x float> %vcvt.i, <float 2.0, float 2.0>
+ ret <2 x float> %div.i
+}
+
+define <2 x float> @test_v2f32_div_const_4(<2 x i32> %in) {
+; CHECK-LABEL: test_v2f32_div_const_4:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: scvtf.2s v0, v0, #2
+; CHECK-NEXT: ret
+entry:
+ %vcvt.i = sitofp <2 x i32> %in to <2 x float>
+ %div.i = fdiv <2 x float> %vcvt.i, <float 4.0, float 4.0>
+ ret <2 x float> %div.i
+}
+
+define <2 x float> @test_v2f32_div_const_8(<2 x i32> %in) {
+; CHECK-LABEL: test_v2f32_div_const_8:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: scvtf.2s v0, v0, #3
+; CHECK-NEXT: ret
+entry:
+ %vcvt.i = sitofp <2 x i32> %in to <2 x float>
+ %div.i = fdiv <2 x float> %vcvt.i, <float 8.0, float 8.0>
+ ret <2 x float> %div.i
+}
+
+define <2 x float> @test_v2f32_div_const_16(<2 x i32> %in) {
+; CHECK-LABEL: test_v2f32_div_const_16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: scvtf.2s v0, v0, #4
+; CHECK-NEXT: ret
+entry:
+ %vcvt.i = sitofp <2 x i32> %in to <2 x float>
+ %div.i = fdiv <2 x float> %vcvt.i, <float 16.0, float 16.0>
+ ret <2 x float> %div.i
+}
+
+define <2 x float> @test_v2f32_div_const_32(<2 x i32> %in) {
+; CHECK-LABEL: test_v2f32_div_const_32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: scvtf.2s v0, v0, #5
+; CHECK-NEXT: ret
+entry:
+ %vcvt.i = sitofp <2 x i32> %in to <2 x float>
+ %div.i = fdiv <2 x float> %vcvt.i, <float 32.0, float 32.0>
+ ret <2 x float> %div.i
+}
+
+define <2 x float> @test_v2f32_div_const_64(<2 x i32> %in) {
+; CHECK-LABEL: test_v2f32_div_const_64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: scvtf.2s v0, v0, #6
+; CHECK-NEXT: ret
+entry:
+ %vcvt.i = sitofp <2 x i32> %in to <2 x float>
+ %div.i = fdiv <2 x float> %vcvt.i, <float 64.0, float 64.0>
+ ret <2 x float> %div.i
+}
+
+define <2 x float> @test_v2f32_div_const_128(<2 x i32> %in) {
+; CHECK-LABEL: test_v2f32_div_const_128:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: scvtf.2s v0, v0, #7
+; CHECK-NEXT: ret
+entry:
+ %vcvt.i = sitofp <2 x i32> %in to <2 x float>
+ %div.i = fdiv <2 x float> %vcvt.i, <float 128.0, float 128.0>
+ ret <2 x float> %div.i
+}
+
+define <2 x float> @test_v2f32_div_const_256(<2 x i32> %in) {
+; CHECK-LABEL: test_v2f32_div_const_256:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: scvtf.2s v0, v0, #8
+; CHECK-NEXT: ret
+entry:
+ %vcvt.i = sitofp <2 x i32> %in to <2 x float>
+ %div.i = fdiv <2 x float> %vcvt.i, <float 256.0, float 256.0>
+ ret <2 x float> %div.i
+}
+
+define <2 x float> @test_v2f32_div_const_512(<2 x i32> %in) {
+; CHECK-LABEL: test_v2f32_div_const_512:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: scvtf.2s v0, v0, #9
+; CHECK-NEXT: ret
+entry:
+ %vcvt.i = sitofp <2 x i32> %in to <2 x float>
+ %div.i = fdiv <2 x float> %vcvt.i, <float 512.0, float 512.0>
+ ret <2 x float> %div.i
+}
+
+define <2 x float> @test_v2f32_div_const_1024(<2 x i32> %in) {
+; CHECK-LABEL: test_v2f32_div_const_1024:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: scvtf.2s v0, v0, #10
+; CHECK-NEXT: ret
+entry:
+ %vcvt.i = sitofp <2 x i32> %in to <2 x float>
+ %div.i = fdiv <2 x float> %vcvt.i, <float 1024.0, float 1024.0>
+ ret <2 x float> %div.i
+}
+
+; Vector v4f32 (from v4i32)
+define <4 x float> @test_v4f32_div_const_2(<4 x i32> %in) {
+; CHECK-LABEL: test_v4f32_div_const_2:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: scvtf.4s v0, v0, #1
+; CHECK-NEXT: ret
+entry:
+ %vcvt.i = sitofp <4 x i32> %in to <4 x float>
+ %div.i = fdiv <4 x float> %vcvt.i, <float 2.0, float 2.0, float 2.0, float 2.0>
+ ret <4 x float> %div.i
+}
+
+define <4 x float> @test_v4f32_div_const_4(<4 x i32> %in) {
+; CHECK-LABEL: test_v4f32_div_const_4:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: scvtf.4s v0, v0, #2
+; CHECK-NEXT: ret
+entry:
+ %vcvt.i = sitofp <4 x i32> %in to <4 x float>
+ %div.i = fdiv <4 x float> %vcvt.i, <float 4.0, float 4.0, float 4.0, float 4.0>
+ ret <4 x float> %div.i
+}
+
+define <4 x float> @test_v4f32_div_const_8(<4 x i32> %in) {
+; CHECK-LABEL: test_v4f32_div_const_8:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: scvtf.4s v0, v0, #3
+; CHECK-NEXT: ret
+entry:
+ %vcvt.i = sitofp <4 x i32> %in to <4 x float>
+ %div.i = fdiv <4 x float> %vcvt.i, <float 8.0, float 8.0, float 8.0, float 8.0>
+ ret <4 x float> %div.i
+}
+
+define <4 x float> @test_v4f32_div_const_16(<4 x i32> %in) {
+; CHECK-LABEL: test_v4f32_div_const_16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: scvtf.4s v0, v0, #4
+; CHECK-NEXT: ret
+entry:
+ %vcvt.i = sitofp <4 x i32> %in to <4 x float>
+ %div.i = fdiv <4 x float> %vcvt.i, <float 16.0, float 16.0, float 16.0, float 16.0>
+ ret <4 x float> %div.i
+}
+
+define <4 x float> @test_v4f32_div_const_32(<4 x i32> %in) {
+; CHECK-LABEL: test_v4f32_div_const_32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: scvtf.4s v0, v0, #5
+; CHECK-NEXT: ret
+entry:
+ %vcvt.i = sitofp <4 x i32> %in to <4 x float>
+ %div.i = fdiv <4 x float> %vcvt.i, <float 32.0, float 32.0, float 32.0, float 32.0>
+ ret <4 x float> %div.i
+}
+
+define <4 x float> @test_v4f32_div_const_64(<4 x i32> %in) {
+; CHECK-LABEL: test_v4f32_div_const_64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: scvtf.4s v0, v0, #6
+; CHECK-NEXT: ret
+entry:
+ %vcvt.i = sitofp <4 x i32> %in to <4 x float>
+ %div.i = fdiv <4 x float> %vcvt.i, <float 64.0, float 64.0, float 64.0, float 64.0>
+ ret <4 x float> %div.i
+}
+
+define <4 x float> @test_v4f32_div_const_128(<4 x i32> %in) {
+; CHECK-LABEL: test_v4f32_div_const_128:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: scvtf.4s v0, v0, #7
+; CHECK-NEXT: ret
+entry:
+ %vcvt.i = sitofp <4 x i32> %in to <4 x float>
+ %div.i = fdiv <4 x float> %vcvt.i, <float 128.0, float 128.0, float 128.0, float 128.0>
+ ret <4 x float> %div.i
+}
+
+define <4 x float> @test_v4f32_div_const_256(<4 x i32> %in) {
+; CHECK-LABEL: test_v4f32_div_const_256:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: scvtf.4s v0, v0, #8
+; CHECK-NEXT: ret
+entry:
+ %vcvt.i = sitofp <4 x i32> %in to <4 x float>
+ %div.i = fdiv <4 x float> %vcvt.i, <float 256.0, float 256.0, float 256.0, float 256.0>
+ ret <4 x float> %div.i
+}
+
+define <4 x float> @test_v4f32_div_const_512(<4 x i32> %in) {
+; CHECK-LABEL: test_v4f32_div_const_512:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: scvtf.4s v0, v0, #9
+; CHECK-NEXT: ret
+entry:
+ %vcvt.i = sitofp <4 x i32> %in to <4 x float>
+ %div.i = fdiv <4 x float> %vcvt.i, <float 512.0, float 512.0, float 512.0, float 512.0>
+ ret <4 x float> %div.i
+}
+
+define <4 x float> @test_v4f32_div_const_1024(<4 x i32> %in) {
+; CHECK-LABEL: test_v4f32_div_const_1024:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: scvtf.4s v0, v0, #10
+; CHECK-NEXT: ret
+entry:
+ %vcvt.i = sitofp <4 x i32> %in to <4 x float>
+ %div.i = fdiv <4 x float> %vcvt.i, <float 1024.0, float 1024.0, float 1024.0, float 1024.0>
+ ret <4 x float> %div.i
+}
+
+; Vector v2f64 (from v2i64)
+define <2 x double> @test_v2f64_div_const(<2 x i64> %in) {
+; CHECK-LABEL: test_v2f64_div_const:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: scvtf.2d v0, v0, #4
+; CHECK-NEXT: ret
+entry:
+ %vcvt.i = sitofp <2 x i64> %in to <2 x double>
+ %div.i = fdiv <2 x double> %vcvt.i, <double 16.0, double 16.0>
+ ret <2 x double> %div.i
+}
+
+; Vector v4f16 (from v4i16)
+define <4 x half> @test_v4f16_div_const_2(<4 x i16> %in) {
+; CHECK-LABEL: test_v4f16_div_const_2:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: scvtf.4h v0, v0, #1
+; CHECK-NEXT: ret
+entry:
+ %vcvt.i = sitofp <4 x i16> %in to <4 x half>
+ %div.i = fdiv <4 x half> %vcvt.i, <half 2.0, half 2.0, half 2.0, half 2.0>
+ ret <4 x half> %div.i
+}
+
+define <4 x half> @test_v4f16_div_const_4(<4 x i16> %in) {
+; CHECK-LABEL: test_v4f16_div_const_4:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: scvtf.4h v0, v0, #2
+; CHECK-NEXT: ret
+entry:
+ %vcvt.i = sitofp <4 x i16> %in to <4 x half>
+ %div.i = fdiv <4 x half> %vcvt.i, <half 4.0, half 4.0, half 4.0, half 4.0>
+ ret <4 x half> %div.i
+}
+
+define <4 x half> @test_v4f16_div_const_8(<4 x i16> %in) {
+; CHECK-LABEL: test_v4f16_div_const_8:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: scvtf.4h v0, v0, #3
+; CHECK-NEXT: ret
+entry:
+ %vcvt.i = sitofp <4 x i16> %in to <4 x half>
+ %div.i = fdiv <4 x half> %vcvt.i, <half 8.0, half 8.0, half 8.0, half 8.0>
+ ret <4 x half> %div.i
+}
+
+define <4 x half> @test_v4f16_div_const_16(<4 x i16> %in) {
+; CHECK-LABEL: test_v4f16_div_const_16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: scvtf.4h v0, v0, #4
+; CHECK-NEXT: ret
+entry:
+ %vcvt.i = sitofp <4 x i16> %in to <4 x half>
+ %div.i = fdiv <4 x half> %vcvt.i, <half 16.0, half 16.0, half 16.0, half 16.0>
+ ret <4 x half> %div.i
+}
+
+define <4 x half> @test_v4f16_div_const_32(<4 x i16> %in) {
+; CHECK-LABEL: test_v4f16_div_const_32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: scvtf.4h v0, v0, #5
+; CHECK-NEXT: ret
+entry:
+ %vcvt.i = sitofp <4 x i16> %in to <4 x half>
+ %div.i = fdiv <4 x half> %vcvt.i, <half 32.0, half 32.0, half 32.0, half 32.0>
+ ret <4 x half> %div.i
+}
+
+; Vector v8f16 (from v8i16)
+define <8 x half> @test_v8f16_div_const_2(<8 x i16> %in) {
+; CHECK-LABEL: test_v8f16_div_const_2:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: scvtf.8h v0, v0, #1
+; CHECK-NEXT: ret
+entry:
+ %vcvt.i = sitofp <8 x i16> %in to <8 x half>
+ %div.i = fdiv <8 x half> %vcvt.i, <half 2.0, half 2.0, half 2.0, half 2.0, half 2.0, half 2.0, half 2.0, half 2.0>
+ ret <8 x half> %div.i
+}
+
+define <8 x half> @test_v8f16_div_const_4(<8 x i16> %in) {
+; CHECK-LABEL: test_v8f16_div_const_4:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: scvtf.8h v0, v0, #2
+; CHECK-NEXT: ret
+entry:
+ %vcvt.i = sitofp <8 x i16> %in to <8 x half>
+ %div.i = fdiv <8 x half> %vcvt.i, <half 4.0, half 4.0, half 4.0, half 4.0, half 4.0, half 4.0, half 4.0, half 4.0>
+ ret <8 x half> %div.i
+}
+
+define <8 x half> @test_v8f16_div_const_8(<8 x i16> %in) {
+; CHECK-LABEL: test_v8f16_div_const_8:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: scvtf.8h v0, v0, #3
+; CHECK-NEXT: ret
+entry:
+ %vcvt.i = sitofp <8 x i16> %in to <8 x half>
+ %div.i = fdiv <8 x half> %vcvt.i, <half 8.0, half 8.0, half 8.0, half 8.0, half 8.0, half 8.0, half 8.0, half 8.0>
+ ret <8 x half> %div.i
+}
+
+define <8 x half> @test_v8f16_div_const_16(<8 x i16> %in) {
+; CHECK-LABEL: test_v8f16_div_const_16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: scvtf.8h v0, v0, #4
+; CHECK-NEXT: ret
+entry:
+ %vcvt.i = sitofp <8 x i16> %in to <8 x half>
+ %div.i = fdiv <8 x half> %vcvt.i, <half 16.0, half 16.0, half 16.0, half 16.0, half 16.0, half 16.0, half 16.0, half 16.0>
+ ret <8 x half> %div.i
+}
+
+define <8 x half> @test_v8f16_div_const_32(<8 x i16> %in) {
+; CHECK-LABEL: test_v8f16_div_const_32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: scvtf.8h v0, v0, #5
+; CHECK-NEXT: ret
+entry:
+ %vcvt.i = sitofp <8 x i16> %in to <8 x half>
+ %div.i = fdiv <8 x half> %vcvt.i, <half 32.0, half 32.0, half 32.0, half 32.0, half 32.0, half 32.0, half 32.0, half 32.0>
+ ret <8 x half> %div.i
+}
+
+; Vector v2f32 (from v2i32)
+define <2 x float> @test_v2f32_div_big_pow2_const(<2 x i32> %in) {
+; CHECK-LABEL: test_v2f32_div_big_pow2_const:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: movi.2s v1, #47, lsl #24
+; CHECK-NEXT: scvtf.2s v0, v0
+; CHECK-NEXT: fmul.2s v0, v0, v1
+; CHECK-NEXT: ret
+entry:
+ %vcvt.i = sitofp <2 x i32> %in to <2 x float>
+ %div.i = fdiv <2 x float> %vcvt.i, <float 8589934592.0, float 8589934592.0>
+ ret <2 x float> %div.i
+}
+
+; Vector v4f32 (from v4i32)
+define <4 x float> @test_v4f32_div_big_pow2_const(<4 x i32> %in) {
+; CHECK-LABEL: test_v4f32_div_big_pow2_const:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: movi.4s v1, #47, lsl #24
+; CHECK-NEXT: scvtf.4s v0, v0
+; CHECK-NEXT: fmul.4s v0, v0, v1
+; CHECK-NEXT: ret
+entry:
+ %vcvt.i = sitofp <4 x i32> %in to <4 x float>
+ %div.i = fdiv <4 x float> %vcvt.i, <float 8589934592.0, float 8589934592.0, float 8589934592.0, float 8589934592.0>
+ ret <4 x float> %div.i
+}
+
+; Vector v2f64 (from v2i64)
+define <2 x double> @test_v2f64_div_big_pow2_const(<2 x i64> %in) {
+; CHECK-LABEL: test_v2f64_div_big_pow2_const:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov x8, #4314448443020935168 // =0x3be0000000000000
+; CHECK-NEXT: scvtf.2d v0, v0
+; CHECK-NEXT: dup.2d v1, x8
+; CHECK-NEXT: fmul.2d v0, v0, v1
+; CHECK-NEXT: ret
+entry:
+ %vcvt.i = sitofp <2 x i64> %in to <2 x double>
+ %div.i = fdiv <2 x double> %vcvt.i, <double 36893488147419103232.0, double 36893488147419103232.0>
+ ret <2 x double> %div.i
+}
+
+; Scalar f32 with non-power-of-2 constant
+define float @negative_test_f32_mul_non_pow2_const(i32 %in) {
+; CHECK-LABEL: negative_test_f32_mul_non_pow2_const:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov w8, #36704 // =0x8f60
+; CHECK-NEXT: scvtf s0, w0
+; CHECK-NEXT: movk w8, #9666, lsl #16
+; CHECK-NEXT: fmov s1, w8
+; CHECK-NEXT: fmul s0, s0, s1
+; CHECK-NEXT: ret
+entry:
+ %vcvt.i = sitofp i32 %in to float
+ %mul.i = fmul float %vcvt.i, 0x3CB851EC00000000
+ ret float %mul.i
+}
+
+
+; Vector v2f32 with non-power-of-2 constant
+define <2 x float> @negative_test_v2f32_mul_non_pow2_const(<2 x i32> %in) {
+; CHECK-LABEL: negative_test_v2f32_mul_non_pow2_const:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov w8, #36704 // =0x8f60
+; CHECK-NEXT: scvtf.2s v0, v0
+; CHECK-NEXT: movk w8, #9666, lsl #16
+; CHECK-NEXT: dup.2s v1, w8
+; CHECK-NEXT: fmul.2s v0, v0, v1
+; CHECK-NEXT: ret
+entry:
+ %vcvt.i = sitofp <2 x i32> %in to <2 x float>
+ %mul.i = fmul <2 x float> %vcvt.i, <float 0x3CB851EC00000000, float 0x3CB851EC00000000>
+ ret <2 x float> %mul.i
+}
+
+; Vector v4f32 with non-power-of-2 constant
+define <4 x float> @negative_test_v4f32_mul_non_pow2_const(<4 x i32> %in) {
+; CHECK-LABEL: negative_test_v4f32_mul_non_pow2_const:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov w8, #36704 // =0x8f60
+; CHECK-NEXT: scvtf.4s v0, v0
+; CHECK-NEXT: movk w8, #9666, lsl #16
+; CHECK-NEXT: dup.4s v1, w8
+; CHECK-NEXT: fmul.4s v0, v0, v1
+; CHECK-NEXT: ret
+entry:
+ %vcvt.i = sitofp <4 x i32> %in to <4 x float>
+ %mul.i = fmul <4 x float> %vcvt.i, <float 0x3CB851EC00000000, float 0x3CB851EC00000000, float 0x3CB851EC00000000, float 0x3CB851EC00000000>
+ ret <4 x float> %mul.i
+}
+
+; Vector v2f64 with non-power-of-2 constant
+define <2 x double> @negative_test_v2f64_mul_non_pow2_const(<2 x i64> %in) {
+; CHECK-LABEL: negative_test_v2f64_mul_non_pow2_const:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov x8, #5243 // =0x147b
+; CHECK-NEXT: scvtf.2d v0, v0
+; CHECK-NEXT: movk x8, #18350, lsl #16
+; CHECK-NEXT: movk x8, #31457, lsl #32
+; CHECK-NEXT: movk x8, #16276, lsl #48
+; CHECK-NEXT: dup.2d v1, x8
+; CHECK-NEXT: fmul.2d v0, v0, v1
+; CHECK-NEXT: ret
+entry:
+ %vcvt.i = sitofp <2 x i64> %in to <2 x double>
+ %mul.i = fmul <2 x double> %vcvt.i, <double 0.02, double 0.02>
+ ret <2 x double> %mul.i
+}
More information about the llvm-commits
mailing list