[llvm] [AArch64] Add custom lowering for load <3 x i8>. (PR #78632)
Florian Hahn via llvm-commits
llvm-commits at lists.llvm.org
Fri Jan 19 08:49:04 PST 2024
================
@@ -21095,6 +21095,50 @@ static SDValue foldTruncStoreOfExt(SelectionDAG &DAG, SDNode *N) {
return SDValue();
}
+// A custom combine to lower load <3 x i8> as the more efficient sequence
+// below:
+// ldrb wX, [x0, #2]
+// ldrh wY, [x0]
+// orr wX, wY, wX, lsl #16
+// fmov s0, wX
+//
+static SDValue combineV3I8LoadExt(LoadSDNode *LD, SelectionDAG &DAG) {
+ EVT MemVT = LD->getMemoryVT();
+ if (MemVT != EVT::getVectorVT(*DAG.getContext(), MVT::i8, 3) ||
+ LD->getOriginalAlign() >= 4)
+ return SDValue();
+
+ SDLoc DL(LD);
+ SDValue Chain = LD->getChain();
+ SDValue BasePtr = LD->getBasePtr();
+
+ // Load 2 x i8, then 1 x i8.
+ SDValue L16 = DAG.getLoad(MVT::i16, DL, Chain, BasePtr, LD->getPointerInfo(),
+ LD->getOriginalAlign());
+ SDValue L8 =
+ DAG.getLoad(MVT::i8, DL, Chain,
+ DAG.getMemBasePlusOffset(BasePtr, TypeSize::getFixed(2), DL),
+ LD->getPointerInfo(), LD->getOriginalAlign());
----------------
fhahn wrote:
Updated to use commonAlignment and adjust pointer info by offset as well
https://github.com/llvm/llvm-project/pull/78632
More information about the llvm-commits
mailing list