[llvm] [AArch64] Generalize integer FPR lane stores for all types (PR #134117)
Paul Walker via llvm-commits
llvm-commits at lists.llvm.org
Thu Apr 10 05:51:18 PDT 2025
================
@@ -24066,39 +24069,63 @@ static SDValue performSTORECombine(SDNode *N,
SDValue ExtIdx = Value.getOperand(1);
EVT VectorVT = Vector.getValueType();
EVT ElemVT = VectorVT.getVectorElementType();
- if (!ValueVT.isInteger() || ElemVT == MVT::i8 || MemVT == MVT::i8)
+
+ if (!ValueVT.isInteger())
return SDValue();
if (ValueVT != MemVT && !ST->isTruncatingStore())
return SDValue();
- // Heuristic: If there are other users of integer scalars extracted from
- // this vector that won't fold into the store -- abandon folding. Applying
- // this fold may extend the vector lifetime and disrupt paired stores.
- for (const auto &Use : Vector->uses()) {
- if (Use.getResNo() != Vector.getResNo())
- continue;
- const SDNode *User = Use.getUser();
- if (User->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
- (!User->hasOneUse() ||
- (*User->user_begin())->getOpcode() != ISD::STORE))
- return SDValue();
- }
+ // This could generate an additional extract if the index is non-zero and
+ // the extracted value has multiple uses.
+ auto *ExtCst = dyn_cast<ConstantSDNode>(ExtIdx);
+ if ((!ExtCst || !ExtCst->isZero()) && !Value.hasOneUse())
+ return SDValue();
- EVT FPElemVT = EVT::getFloatingPointVT(ElemVT.getSizeInBits());
- EVT FPVectorVT = VectorVT.changeVectorElementType(FPElemVT);
- SDValue Cast = DAG.getNode(ISD::BITCAST, DL, FPVectorVT, Vector);
- SDValue Ext =
- DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, FPElemVT, Cast, ExtIdx);
+ if (Subtarget->isNeonAvailable() && ElemVT == MemVT &&
+ (VectorVT.is64BitVector() || VectorVT.is128BitVector()) && ExtCst &&
+ !ExtCst->isZero() && ST->getBasePtr().getOpcode() != ISD::ADD) {
+ // These can lower to st1, which is preferable if we're unlikely to fold
+ // the addressing into the store.
+ return SDValue();
+ }
- EVT FPMemVT = EVT::getFloatingPointVT(MemVT.getSizeInBits());
- if (ST->isTruncatingStore() && FPMemVT != FPElemVT) {
- SDValue Trunc = DAG.getTargetExtractSubreg(getFPSubregForVT(FPMemVT), DL,
- FPMemVT, Ext);
- return DAG.getStore(ST->getChain(), DL, Trunc, ST->getBasePtr(),
- ST->getMemOperand());
+ if (MemVT == MVT::i64 || MemVT == MVT::i32) {
+ // Heuristic: If there are other users of w/x integer scalars extracted
+ // from this vector that won't fold into the store -- abandon folding.
+ // Applying this fold may disrupt paired stores.
+ for (const auto &Use : Vector->uses()) {
+ if (Use.getResNo() != Vector.getResNo())
+ continue;
+ const SDNode *User = Use.getUser();
+ if (User->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
+ (!User->hasOneUse() ||
+ (*User->user_begin())->getOpcode() != ISD::STORE))
+ return SDValue();
+ }
}
- return DAG.getStore(ST->getChain(), DL, Ext, ST->getBasePtr(),
+ SDValue ExtVector = Vector;
+ if (!ExtCst || !ExtCst->isZero()) {
+ // Handle extracting from lanes != 0.
+ SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
+ Value.getValueType(), Vector, ExtIdx);
+ // FIXME: Using a fixed-size vector for the insertion should not be
+ // necessary, but SVE ISEL is missing some folds to avoid fmovs.
+ SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
----------------
paulwalker-arm wrote:
```suggestion
SDValue Zero = getVectorIdxConstant(0, DL);
```
https://github.com/llvm/llvm-project/pull/134117
More information about the llvm-commits
mailing list