[llvm] [InterleavedAccess] Construct interleaved access store with shuffles (PR #164000)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Oct 22 07:47:04 PDT 2025
================
@@ -18173,6 +18180,135 @@ bool AArch64TargetLowering::lowerInterleavedStore(Instruction *Store,
return true;
}
+/// If the interleaved vector elements are greter than supported MaxFactor
+/// then, interleaving the data with additional shuffles can be used to
+/// achieve the same.
+/// Below shows how 8 interleaved data are shuffled to store with stN
+/// instructions. Data need store in this order v0,v1,v2,v3,v4,v5,v6,v7
+/// v0 v4 v2 v6 v1 v5 v3 v7
+/// | | | | | | | |
+/// \ / \ / \ / \ /
+/// [zip v0,v4] [zip v2,v6] [zip v1,v5] [zip v3,v7]==> stN = 4
+/// | | | |
+/// \ / \ /
+/// \ / \ /
+/// \ / \ /
+/// [zip [v0,v2,v4,v6]] [zip [v1,v3,v5,v7]] ==> stN = 2
+///
+/// In stN = 4 level upper half of interleaved data V0,V1,V2,V3 is store
+/// withone st4 instruction. Lower half V4,V5,V6,V7 store with another st4.
+///
+/// In stN = 2 level first upper half of interleaved data V0,V1 is store
+/// with one st2 instruction. Second set V2,V3 with store with another st2.
+/// Total of 4 st2 are required.
+bool AArch64TargetLowering::lowerInterleavedStoreWithShuffle(
+ StoreInst *SI, ShuffleVectorInst *SVI, unsigned Factor) const {
+ unsigned MaxSupportedFactor = getMaxSupportedInterleaveFactor();
+
+ auto *VecTy = cast<FixedVectorType>(SVI->getType());
+ assert(VecTy->getNumElements() % Factor == 0 && "Invalid interleaved store");
+
+ unsigned LaneLen = VecTy->getNumElements() / Factor;
+ Type *EltTy = VecTy->getElementType();
+ auto *SubVecTy = FixedVectorType::get(EltTy, Factor);
+
+ const DataLayout &DL = SI->getModule()->getDataLayout();
+ bool UseScalable;
+
+ // Skip if we do not have NEON and skip illegal vector types. We can
+ // "legalize" wide vector types into multiple interleaved accesses as long as
+ // the vector types are divisible by 128.
+ if (!Subtarget->hasNEON() ||
+ !isLegalInterleavedAccessType(SubVecTy, DL, UseScalable))
+ return false;
+
+ if (UseScalable)
+ return false;
+
+ std::deque<Value *> Shuffles;
+ Shuffles.push_back(SVI);
+ unsigned ConcatLevel = Factor;
+ while (ConcatLevel > 1) {
+ std::deque<Value *> ShufflesIntermediate;
+ ShufflesIntermediate = Shuffles;
+ Shuffles.clear();
+ while (!ShufflesIntermediate.empty()) {
+ ShuffleVectorInst *SFL =
+ dyn_cast<ShuffleVectorInst>(ShufflesIntermediate.front());
+ if (!SFL)
+ break;
+ ShufflesIntermediate.pop_front();
+
+ Value *Op0 = SFL->getOperand(0);
+ Value *Op1 = SFL->getOperand(1);
+
+ Shuffles.push_back(dyn_cast<Value>(Op0));
+ Shuffles.push_back(dyn_cast<Value>(Op1));
+ }
+ if (!ShufflesIntermediate.empty()) {
+ Shuffles = ShufflesIntermediate;
+ break;
+ }
+ ConcatLevel = ConcatLevel >> 1;
+ }
+
+ if (Shuffles.size() != Factor)
+ return false;
+
+ IRBuilder<> Builder(SI);
+ auto Mask = createInterleaveMask(LaneLen, 2);
+ SmallVector<int, 16> UpperHalfMask, LowerHalfMask;
+ for (unsigned i = 0; i < (2 * LaneLen); i++) {
+ if (i < LaneLen)
+ LowerHalfMask.push_back(Mask[i]);
+ else
+ UpperHalfMask.push_back(Mask[i]);
+ }
+
+ unsigned InterleaveFactor = Factor >> 1;
+ while (InterleaveFactor >= MaxSupportedFactor) {
+ std::deque<Value *> ShufflesIntermediate;
+ for (unsigned j = 0; j < Factor; j += (InterleaveFactor * 2)) {
+ for (unsigned i = 0; i < InterleaveFactor; i++) {
+ auto *Shuffle = Builder.CreateShuffleVector(
+ Shuffles[i + j], Shuffles[i + j + InterleaveFactor], LowerHalfMask);
+ ShufflesIntermediate.push_back(Shuffle);
+ }
+ for (unsigned i = 0; i < InterleaveFactor; i++) {
+ auto *Shuffle = Builder.CreateShuffleVector(
+ Shuffles[i + j], Shuffles[i + j + InterleaveFactor], UpperHalfMask);
+ ShufflesIntermediate.push_back(Shuffle);
+ }
+ }
----------------
ram-NK wrote:
It is not possible to fuse two inner loops. All the lower half shuffles need to be together and after that all upper half of shuffles. Then only I can access the same shuffles list again with index to interleave again or prepare operands for stN call.
https://github.com/llvm/llvm-project/pull/164000
More information about the llvm-commits
mailing list