[llvm] [AArch64] Fix #94909: Optimize vector fmul(sitofp(x), 0.5) -> scvtf(x, 2) (PR #141480)

Mon May 26 08:35:55 PDT 2025

================
@@ -19250,6 +19253,153 @@ static SDValue performFpToIntCombine(SDNode *N, SelectionDAG &DAG,
   return FixConv;
 }
 
+/// Try to extract a log2 exponent from a uniform constant FP splat.
+/// Returns -1 if the value is not a power-of-two float.
+static int getUniformFPSplatLog2(const BuildVectorSDNode *BV,
+                                 unsigned MaxExponent) {
+  SDValue FirstElt = BV->getOperand(0);
+  if (!isa<ConstantFPSDNode>(FirstElt))
+    return -1;
+
+  const ConstantFPSDNode *FirstConst = cast<ConstantFPSDNode>(FirstElt);
+  const APFloat &FirstVal = FirstConst->getValueAPF();
+  const fltSemantics &Sem = FirstVal.getSemantics();
+
+  // Check all elements are the same
+  for (unsigned i = 1, e = BV->getNumOperands(); i != e; ++i) {
+    SDValue Elt = BV->getOperand(i);
+    if (!isa<ConstantFPSDNode>(Elt))
+      return -1;
+    const APFloat &Val = cast<ConstantFPSDNode>(Elt)->getValueAPF();
+    if (!Val.bitwiseIsEqual(FirstVal))
+      return -1;
+  }
+
+  // Reject zero, NaN, or negative values
+  if (FirstVal.isZero() || FirstVal.isNaN() || FirstVal.isNegative())
+    return -1;
+
+  // Get raw bits
+  APInt Bits = FirstVal.bitcastToAPInt();
+
+  int ExponentBias = 0;
+  unsigned ExponentBits = 0;
+  unsigned MantissaBits = 0;
+
+  if (&Sem == &APFloat::IEEEsingle()) {
+    ExponentBias = 127;
+    ExponentBits = 8;
+    MantissaBits = 23;
+  } else if (&Sem == &APFloat::IEEEdouble()) {
+    ExponentBias = 1023;
+    ExponentBits = 11;
+    MantissaBits = 52;
+  } else {
+    // Unsupported type
+    return -1;
+  }
+
+  // Mask out mantissa and check it's zero (i.e., power of two)
+  APInt MantissaMask = APInt::getLowBitsSet(Bits.getBitWidth(), MantissaBits);
+  if ((Bits & MantissaMask) != 0)
+    return -1;
+
+  // Extract exponent
+  unsigned ExponentShift = MantissaBits;
+  APInt ExponentMask = APInt::getBitsSet(Bits.getBitWidth(), ExponentShift,
+                                         ExponentShift + ExponentBits);
+  int Exponent = (Bits & ExponentMask).lshr(ExponentShift).getZExtValue();
+  int Log2 = ExponentBias - Exponent;
+
+  if (static_cast<unsigned>(Log2) > MaxExponent)
+    return -1;
+
+  return Log2;
+}
+
+/// Fold a floating-point multiply by power of two into fixed-point to
+/// floating-point conversion.
+static SDValue performFMulCombine(SDNode *N, SelectionDAG &DAG,
+                                  TargetLowering::DAGCombinerInfo &DCI,
+                                  const AArch64Subtarget *Subtarget) {
+
+  if (!Subtarget->hasNEON())
+    return SDValue();
+
+  // N is the FMUL node.
+  if (N->getOpcode() != ISD::FMUL)
+    return SDValue();
+
+  // SINT_TO_FP or UINT_TO_FP
+  SDValue Op = N->getOperand(0);
+  unsigned Opc = Op->getOpcode();
+  if (!Op.getValueType().isVector() || !Op.getValueType().isSimple() ||
+      !Op.getOperand(0).getValueType().isSimple() ||
+      (Opc != ISD::SINT_TO_FP && Opc != ISD::UINT_TO_FP))
+    return SDValue();
+
+  SDValue ConstVec = N->getOperand(1);
+  if (!isa<BuildVectorSDNode>(ConstVec))
+    return SDValue();
+
+  MVT IntTy = Op.getOperand(0).getSimpleValueType().getVectorElementType();
+  int32_t IntBits = IntTy.getSizeInBits();
+  if (IntBits != 16 && IntBits != 32 && IntBits != 64)
+    return SDValue();
+
+  MVT FloatTy = N->getSimpleValueType(0).getVectorElementType();
+  int32_t FloatBits = FloatTy.getSizeInBits();
+  if (FloatBits != 32 && FloatBits != 64)
+    return SDValue();
+
+  if (IntBits > FloatBits)
+    return SDValue();
----------------
stephentyrone wrote:

Maybe worth a comment here noting that we cannot just convert to a wider float type and then narrow because it would introduce a potential double-rounding.

https://github.com/llvm/llvm-project/pull/141480