[llvm] [AArch64] Generalize integer FPR lane stores for all types (PR #134117)

Thu Apr 10 05:51:18 PDT 2025

================
@@ -24066,39 +24069,63 @@ static SDValue performSTORECombine(SDNode *N,
     SDValue ExtIdx = Value.getOperand(1);
     EVT VectorVT = Vector.getValueType();
     EVT ElemVT = VectorVT.getVectorElementType();
-    if (!ValueVT.isInteger() || ElemVT == MVT::i8 || MemVT == MVT::i8)
+
+    if (!ValueVT.isInteger())
       return SDValue();
     if (ValueVT != MemVT && !ST->isTruncatingStore())
       return SDValue();
 
-    // Heuristic: If there are other users of integer scalars extracted from
-    // this vector that won't fold into the store -- abandon folding. Applying
-    // this fold may extend the vector lifetime and disrupt paired stores.
-    for (const auto &Use : Vector->uses()) {
-      if (Use.getResNo() != Vector.getResNo())
-        continue;
-      const SDNode *User = Use.getUser();
-      if (User->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
-          (!User->hasOneUse() ||
-           (*User->user_begin())->getOpcode() != ISD::STORE))
-        return SDValue();
-    }
+    // This could generate an additional extract if the index is non-zero and
+    // the extracted value has multiple uses.
+    auto *ExtCst = dyn_cast<ConstantSDNode>(ExtIdx);
+    if ((!ExtCst || !ExtCst->isZero()) && !Value.hasOneUse())
+      return SDValue();
 
-    EVT FPElemVT = EVT::getFloatingPointVT(ElemVT.getSizeInBits());
-    EVT FPVectorVT = VectorVT.changeVectorElementType(FPElemVT);
-    SDValue Cast = DAG.getNode(ISD::BITCAST, DL, FPVectorVT, Vector);
-    SDValue Ext =
-        DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, FPElemVT, Cast, ExtIdx);
+    if (Subtarget->isNeonAvailable() && ElemVT == MemVT &&
+        (VectorVT.is64BitVector() || VectorVT.is128BitVector()) && ExtCst &&
+        !ExtCst->isZero() && ST->getBasePtr().getOpcode() != ISD::ADD) {
+      // These can lower to st1, which is preferable if we're unlikely to fold
+      // the addressing into the store.
+      return SDValue();
+    }
 
-    EVT FPMemVT = EVT::getFloatingPointVT(MemVT.getSizeInBits());
-    if (ST->isTruncatingStore() && FPMemVT != FPElemVT) {
-      SDValue Trunc = DAG.getTargetExtractSubreg(getFPSubregForVT(FPMemVT), DL,
-                                                 FPMemVT, Ext);
-      return DAG.getStore(ST->getChain(), DL, Trunc, ST->getBasePtr(),
-                          ST->getMemOperand());
+    if (MemVT == MVT::i64 || MemVT == MVT::i32) {
+      // Heuristic: If there are other users of w/x integer scalars extracted
+      // from this vector that won't fold into the store -- abandon folding.
+      // Applying this fold may disrupt paired stores.
+      for (const auto &Use : Vector->uses()) {
+        if (Use.getResNo() != Vector.getResNo())
+          continue;
+        const SDNode *User = Use.getUser();
+        if (User->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
+            (!User->hasOneUse() ||
+             (*User->user_begin())->getOpcode() != ISD::STORE))
+          return SDValue();
+      }
     }
 
-    return DAG.getStore(ST->getChain(), DL, Ext, ST->getBasePtr(),
+    SDValue ExtVector = Vector;
+    if (!ExtCst || !ExtCst->isZero()) {
+      // Handle extracting from lanes != 0.
+      SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
+                                Value.getValueType(), Vector, ExtIdx);
+      // FIXME: Using a fixed-size vector for the insertion should not be
+      // necessary, but SVE ISEL is missing some folds to avoid fmovs.
+      SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
----------------
paulwalker-arm wrote:

```suggestion
      SDValue Zero = getVectorIdxConstant(0, DL);
```

https://github.com/llvm/llvm-project/pull/134117