[llvm] [AArch64] Optimize extending loads of small vectors (PR #163064)
Guy David via llvm-commits
llvm-commits at lists.llvm.org
Mon Nov 17 04:02:54 PST 2025
================
@@ -7188,12 +7230,89 @@ SDValue AArch64TargetLowering::LowerStore128(SDValue Op,
return Result;
}
+/// Helper function to optimize loads of extended small vectors.
+/// These patterns would otherwise get scalarized into inefficient sequences.
+static SDValue performSmallVectorLoadExtCombine(LoadSDNode *Load,
+ SelectionDAG &DAG) {
+ const AArch64Subtarget &Subtarget = DAG.getSubtarget<AArch64Subtarget>();
+ if (!isEligibleForSmallVectorLoadOpt(Load, Subtarget))
+ return SDValue();
+
+ EVT MemVT = Load->getMemoryVT();
+ EVT ResVT = Load->getValueType(0);
+ unsigned NumElts = ResVT.getVectorNumElements();
+ unsigned DstEltBits = ResVT.getScalarSizeInBits();
+ unsigned SrcEltBits = MemVT.getScalarSizeInBits();
+
+ unsigned ExtOpcode;
+ switch (Load->getExtensionType()) {
+ case ISD::EXTLOAD:
+ case ISD::ZEXTLOAD:
+ ExtOpcode = ISD::ZERO_EXTEND;
+ break;
+ case ISD::SEXTLOAD:
+ ExtOpcode = ISD::SIGN_EXTEND;
+ break;
+ case ISD::NON_EXTLOAD:
+ return SDValue();
+ }
+
+ SDLoc DL(Load);
+ SDValue Chain = Load->getChain();
+ SDValue BasePtr = Load->getBasePtr();
+ const MachinePointerInfo &PtrInfo = Load->getPointerInfo();
+ Align Alignment = Load->getAlign();
+
+ // Load the data as an FP scalar to avoid issues with integer loads.
+ unsigned LoadBits = MemVT.getStoreSizeInBits();
+ MVT ScalarLoadType = MVT::getFloatingPointVT(LoadBits);
+ SDValue ScalarLoad =
+ DAG.getLoad(ScalarLoadType, DL, Chain, BasePtr, PtrInfo, Alignment);
+
+ MVT ScalarToVecTy = MVT::getVectorVT(ScalarLoadType, 128 / LoadBits);
+ SDValue ScalarToVec =
+ DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, ScalarToVecTy, ScalarLoad);
+ MVT BitcastTy =
+ MVT::getVectorVT(MVT::getIntegerVT(SrcEltBits), 128 / SrcEltBits);
+ SDValue Bitcast = DAG.getNode(ISD::BITCAST, DL, BitcastTy, ScalarToVec);
----------------
guy-david wrote:
The endianness is handled at the instruction selection level, and the `SCALAR_TO_VECTOR` + `BITCAST` sequence is agnostic to it.
https://github.com/llvm/llvm-project/pull/163064
More information about the llvm-commits
mailing list