[llvm] [AArch64] Make use of byte FPR stores for bytes extracted from vectors (PR #134117)
Paul Walker via llvm-commits
llvm-commits at lists.llvm.org
Fri Apr 4 03:11:04 PDT 2025
================
@@ -24066,11 +24086,44 @@ static SDValue performSTORECombine(SDNode *N,
SDValue ExtIdx = Value.getOperand(1);
EVT VectorVT = Vector.getValueType();
EVT ElemVT = VectorVT.getVectorElementType();
- if (!ValueVT.isInteger() || ElemVT == MVT::i8 || MemVT == MVT::i8)
+ if (!ValueVT.isInteger())
return SDValue();
if (ValueVT != MemVT && !ST->isTruncatingStore())
return SDValue();
+ if (MemVT == MVT::i8) {
+ auto *ExtCst = dyn_cast<ConstantSDNode>(ExtIdx);
+ if (Subtarget->isNeonAvailable() &&
+ (VectorVT == MVT::v8i8 || VectorVT == MVT::v16i8) && ExtCst &&
+ !ExtCst->isZero() && ST->getBasePtr().getOpcode() != ISD::ADD) {
+ // These can lower to st1.b, which is preferable if we're unlikely to
+ // fold the addressing into the store.
+ return SDValue();
+ }
+
+ // Lower as truncstore of v1i64 -> v1i8 (which can lower to a bsub store).
+ SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
+ SDValue ExtVector;
+ EVT VecVT64 = get64BitVector(ElemVT);
+ if (ExtCst && ExtCst->isZero()) {
+ ExtVector =
+ DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VecVT64, Vector, Zero);
+ } else {
+ SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
+ Value.getValueType(), Vector, ExtIdx);
+ ExtVector = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VecVT64,
+ DAG.getUNDEF(VecVT64), Ext, Zero);
+ }
+
----------------
paulwalker-arm wrote:
Rather than the extracts and inserts would it be possible to first `nvcast` the input to an `i64` based vector of the same bit length and then optionally extract a `v1i64` subvector to truncstore?
Perhaps it's also worth revisiting the previous patch that added the i16-i64 support to see if it's possible to send all the types down the same new path?
https://github.com/llvm/llvm-project/pull/134117
More information about the llvm-commits
mailing list