[llvm] [AArch64] Keep floating-point conversion in SIMD (PR #147707)

Tue Jul 29 08:27:14 PDT 2025

================
@@ -24026,6 +24026,66 @@ static SDValue combineBoolVectorAndTruncateStore(SelectionDAG &DAG,
                       Store->getMemOperand());
 }
 
+// Combine store (fp_to_int X) with to use vector semantics around the
+// conversion when NEON is available.
+static SDValue combineStoreValueFPToInt(StoreSDNode *ST,
+                                        TargetLowering::DAGCombinerInfo &DCI,
+                                        SelectionDAG &DAG,
+                                        const AArch64Subtarget *Subtarget) {
+  // Limit to post-legalization in order to avoid peeling truncating stores.
+  if (DCI.isBeforeLegalize())
+    return {};
+  if (!Subtarget->isNeonAvailable())
+    return {};
+  // Source operand is already a vector.
+  SDValue Value = ST->getValue();
+  if (Value.getValueType().isVector())
+    return {};
+
+  // Look through potential assertions.
+  while (Value->isAssert())
+    Value = Value.getOperand(0);
+
+  if (Value.getOpcode() != ISD::FP_TO_SINT &&
+      Value.getOpcode() != ISD::FP_TO_UINT)
+    return {};
+  if (!Value->hasOneUse())
+    return {};
+
+  SDValue FPSrc = Value.getOperand(0);
+  EVT SrcVT = FPSrc.getValueType();
+  assert(!SrcVT.isVector());
+
+  // No support for assignments such as i64 = fp_to_sint i32
+  EVT VT = Value.getSimpleValueType();
+  if (VT != SrcVT.changeTypeToInteger())
+    return {};
+
+  // Create a 128-bit element vector to avoid widening. The floating point
+  // conversion is transformed into a single element conversion via a pattern.
+  unsigned NumElements = 128 / SrcVT.getFixedSizeInBits();
+  EVT VecSrcVT = EVT::getVectorVT(*DAG.getContext(), SrcVT, NumElements);
+  EVT VecDstVT = VecSrcVT.changeTypeToInteger();
+  SDLoc DL(ST);
+  SDValue VecFP = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VecSrcVT, FPSrc);
+
+  SDValue VecConv = DAG.getNode(Value.getOpcode(), DL, VecDstVT, VecFP);
+
+  if (ST->getMemoryVT().getFixedSizeInBits() < SrcVT.getSizeInBits()) {
+    EVT NewVecDstVT = EVT::getVectorVT(
+        *DAG.getContext(), ST->getMemoryVT(),
+        VecDstVT.getFixedSizeInBits() / ST->getMemoryVT().getFixedSizeInBits());
+    VecConv = DAG.getNode(ISD::BITCAST, DL, NewVecDstVT, VecConv);
+  }
----------------
guy-david wrote:

Good point. It seems to be required to support truncating stores which also have post/pre-increments. Maybe there's a missing tablegen pattern for that. Looking into it now.

https://github.com/llvm/llvm-project/pull/147707