[llvm] [InterleavedAccess] Construct interleaved access store with shuffles (PR #164000)
    via llvm-commits 
    llvm-commits at lists.llvm.org
       
    Mon Oct 20 09:09:13 PDT 2025
    
    
  
================
@@ -18173,6 +18179,136 @@ bool AArch64TargetLowering::lowerInterleavedStore(Instruction *Store,
   return true;
 }
 
+/// If the interleaved vector elements are greter than supported MaxFactor
+/// then, interleaving the data with additional shuffles can be used to
+/// achieve the same.
+/// Below shows how 8 interleaved data are shuffled to store with stN
+/// instructions. Data need store in this order v0,v1,v2,v3,v4,v5,v6,v7
+///      v0      v4      v2      v6      v1      v5      v3      v7
+///      |       |       |       |       |       |       |       |
+///       \     /         \     /         \     /         \     /
+///     [zip v0,v4]      [zip v2,v6]    [zip v1,v5]      [zip v3,v7]==> stN = 4
+///          |               |              |                 |
+///           \             /                \               /
+///            \           /                  \             /
+///             \         /                    \           /
+///         [zip [v0,v2,v4,v6]]            [zip [v1,v3,v5,v7]]     ==> stN = 2
+///
+/// In stN = 4 level upper half of interleaved data V0,V1,V2,V3 is store
+/// withone st4 instruction. Lower half V4,V5,V6,V7 store with another st4.
+///
+/// In stN = 2 level first upper half of interleaved data V0,V1 is store
+/// with one st2 instruction. Second set V2,V3 with store with another st2.
+/// Total of 4 st2 are  required.
+bool AArch64TargetLowering::lowerInterleavedStoreWithShuffle(
+    StoreInst *SI, ShuffleVectorInst *SVI, unsigned Factor) const {
+  unsigned MaxSupportedFactor = getMaxSupportedInterleaveFactor();
+
+  auto *VecTy = cast<FixedVectorType>(SVI->getType());
+  assert(VecTy->getNumElements() % Factor == 0 && "Invalid interleaved store");
+
+  unsigned LaneLen = VecTy->getNumElements() / Factor;
+  Type *EltTy = VecTy->getElementType();
+  auto *SubVecTy = FixedVectorType::get(EltTy, Factor);
+
+  const DataLayout &DL = SI->getModule()->getDataLayout();
+  bool UseScalable;
+
+  // Skip if we do not have NEON and skip illegal vector types. We can
+  // "legalize" wide vector types into multiple interleaved accesses as long as
+  // the vector types are divisible by 128.
+  if (!Subtarget->hasNEON() ||
+      !isLegalInterleavedAccessType(SubVecTy, DL, UseScalable))
+    return false;
+
+  if (UseScalable)
+    return false;
+
+  SmallVector<Value *, 8> Shufflelist;
+  Shufflelist.push_back(SVI);
+  unsigned ConcatLevel = Factor;
+  while (ConcatLevel > 1) {
+    SmallVector<Value *, 8> ShufflelistIntermediate;
+    ShufflelistIntermediate = Shufflelist;
+    Shufflelist.clear();
+    while (!ShufflelistIntermediate.empty()) {
+      ShuffleVectorInst *SFL =
+          dyn_cast<ShuffleVectorInst>(ShufflelistIntermediate[0]);
+      if (!SFL)
+        break;
+      ShufflelistIntermediate.erase(ShufflelistIntermediate.begin());
----------------
ram-NK wrote:
Changed the data structure. used pop from  front.
https://github.com/llvm/llvm-project/pull/164000
    
    
More information about the llvm-commits
mailing list