[llvm] [AArch64] Make use of byte FPR stores for bytes extracted from vectors (PR #134117)
Benjamin Maxwell via llvm-commits
llvm-commits at lists.llvm.org
Fri Apr 4 06:10:25 PDT 2025
================
@@ -24066,11 +24086,44 @@ static SDValue performSTORECombine(SDNode *N,
SDValue ExtIdx = Value.getOperand(1);
EVT VectorVT = Vector.getValueType();
EVT ElemVT = VectorVT.getVectorElementType();
- if (!ValueVT.isInteger() || ElemVT == MVT::i8 || MemVT == MVT::i8)
+ if (!ValueVT.isInteger())
return SDValue();
if (ValueVT != MemVT && !ST->isTruncatingStore())
return SDValue();
+ if (MemVT == MVT::i8) {
+ auto *ExtCst = dyn_cast<ConstantSDNode>(ExtIdx);
+ if (Subtarget->isNeonAvailable() &&
+ (VectorVT == MVT::v8i8 || VectorVT == MVT::v16i8) && ExtCst &&
+ !ExtCst->isZero() && ST->getBasePtr().getOpcode() != ISD::ADD) {
+ // These can lower to st1.b, which is preferable if we're unlikely to
+ // fold the addressing into the store.
+ return SDValue();
+ }
+
+ // Lower as truncstore of v1i64 -> v1i8 (which can lower to a bsub store).
+ SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
+ SDValue ExtVector;
+ EVT VecVT64 = get64BitVector(ElemVT);
+ if (ExtCst && ExtCst->isZero()) {
+ ExtVector =
+ DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VecVT64, Vector, Zero);
+ } else {
+ SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
+ Value.getValueType(), Vector, ExtIdx);
+ ExtVector = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VecVT64,
+ DAG.getUNDEF(VecVT64), Ext, Zero);
+ }
+
----------------
MacDue wrote:
> Rather than the extracts and inserts would it be possible to first nvcast the input to an i64 based vector of the same bit length and then optionally extract a v1i64 subvector to truncstore?
If you mean:
```
..vi64 %cast = nvcast vector
v1i64 = extract_subvector <idx>, %cast
truncstore %cast v1i64 -> v1i8
```
I don't think that would work for anything other than vectors of `i64` (and a restricted range of indices). E.g. If you're storing lane 1 of a vector of `nxv16i8`, then you'd extract the first `v1i64` subvector, but truncstoring that to `v1i8` would store lane 0, not lane 1.
https://github.com/llvm/llvm-project/pull/134117
More information about the llvm-commits
mailing list