[llvm] [AArch64] Fix #94909: Optimize vector fmul(sitofp(x), 0.5) -> scvtf(x, 2) (PR #141480)
Stephen Canon via llvm-commits
llvm-commits at lists.llvm.org
Mon May 26 08:35:55 PDT 2025
================
@@ -19250,6 +19253,153 @@ static SDValue performFpToIntCombine(SDNode *N, SelectionDAG &DAG,
return FixConv;
}
+/// Try to extract a log2 exponent from a uniform constant FP splat.
+/// Returns -1 if the value is not a power-of-two float.
+static int getUniformFPSplatLog2(const BuildVectorSDNode *BV,
+ unsigned MaxExponent) {
+ SDValue FirstElt = BV->getOperand(0);
+ if (!isa<ConstantFPSDNode>(FirstElt))
+ return -1;
+
+ const ConstantFPSDNode *FirstConst = cast<ConstantFPSDNode>(FirstElt);
+ const APFloat &FirstVal = FirstConst->getValueAPF();
+ const fltSemantics &Sem = FirstVal.getSemantics();
+
+ // Check all elements are the same
+ for (unsigned i = 1, e = BV->getNumOperands(); i != e; ++i) {
+ SDValue Elt = BV->getOperand(i);
+ if (!isa<ConstantFPSDNode>(Elt))
+ return -1;
+ const APFloat &Val = cast<ConstantFPSDNode>(Elt)->getValueAPF();
+ if (!Val.bitwiseIsEqual(FirstVal))
+ return -1;
+ }
+
+ // Reject zero, NaN, or negative values
+ if (FirstVal.isZero() || FirstVal.isNaN() || FirstVal.isNegative())
+ return -1;
+
+ // Get raw bits
+ APInt Bits = FirstVal.bitcastToAPInt();
+
+ int ExponentBias = 0;
+ unsigned ExponentBits = 0;
+ unsigned MantissaBits = 0;
+
+ if (&Sem == &APFloat::IEEEsingle()) {
+ ExponentBias = 127;
+ ExponentBits = 8;
+ MantissaBits = 23;
+ } else if (&Sem == &APFloat::IEEEdouble()) {
+ ExponentBias = 1023;
+ ExponentBits = 11;
+ MantissaBits = 52;
+ } else {
+ // Unsupported type
+ return -1;
+ }
+
+ // Mask out mantissa and check it's zero (i.e., power of two)
+ APInt MantissaMask = APInt::getLowBitsSet(Bits.getBitWidth(), MantissaBits);
+ if ((Bits & MantissaMask) != 0)
+ return -1;
+
+ // Extract exponent
+ unsigned ExponentShift = MantissaBits;
+ APInt ExponentMask = APInt::getBitsSet(Bits.getBitWidth(), ExponentShift,
+ ExponentShift + ExponentBits);
+ int Exponent = (Bits & ExponentMask).lshr(ExponentShift).getZExtValue();
+ int Log2 = ExponentBias - Exponent;
+
+ if (static_cast<unsigned>(Log2) > MaxExponent)
+ return -1;
+
+ return Log2;
+}
+
+/// Fold a floating-point multiply by power of two into fixed-point to
+/// floating-point conversion.
+static SDValue performFMulCombine(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const AArch64Subtarget *Subtarget) {
+
+ if (!Subtarget->hasNEON())
+ return SDValue();
+
+ // N is the FMUL node.
+ if (N->getOpcode() != ISD::FMUL)
+ return SDValue();
+
+ // SINT_TO_FP or UINT_TO_FP
+ SDValue Op = N->getOperand(0);
+ unsigned Opc = Op->getOpcode();
+ if (!Op.getValueType().isVector() || !Op.getValueType().isSimple() ||
+ !Op.getOperand(0).getValueType().isSimple() ||
+ (Opc != ISD::SINT_TO_FP && Opc != ISD::UINT_TO_FP))
+ return SDValue();
+
+ SDValue ConstVec = N->getOperand(1);
+ if (!isa<BuildVectorSDNode>(ConstVec))
+ return SDValue();
+
+ MVT IntTy = Op.getOperand(0).getSimpleValueType().getVectorElementType();
+ int32_t IntBits = IntTy.getSizeInBits();
+ if (IntBits != 16 && IntBits != 32 && IntBits != 64)
+ return SDValue();
+
+ MVT FloatTy = N->getSimpleValueType(0).getVectorElementType();
+ int32_t FloatBits = FloatTy.getSizeInBits();
+ if (FloatBits != 32 && FloatBits != 64)
+ return SDValue();
+
+ if (IntBits > FloatBits)
+ return SDValue();
----------------
stephentyrone wrote:
Maybe worth a comment here noting that we cannot just convert to a wider float type and then narrow because it would introduce a potential double-rounding.
https://github.com/llvm/llvm-project/pull/141480
More information about the llvm-commits
mailing list