[llvm] [DA] batch delinearization (PR #170519)

Fri Dec 12 06:54:51 PST 2025

================
@@ -960,3 +960,140 @@ PreservedAnalyses DelinearizationPrinterPass::run(Function &F,
                        &AM.getResult<ScalarEvolutionAnalysis>(F));
   return PreservedAnalyses::all();
 }
+
+//===----------------------------------------------------------------------===//
+// BatchDelinearization Implementation
+//===----------------------------------------------------------------------===//
+
+/// Return true for a Load or Store instruction.
+static bool isLoadOrStore(const Instruction *I) {
+  return isa<LoadInst>(I) || isa<StoreInst>(I);
+}
+
+void BatchDelinearization::populate() {
+  if (Populated)
+    return;
+
+  Populated = true;
+
+  // Step 1: Collect all memory accesses grouped by base pointer.
+  // Map from base pointer to list of (Instruction, AccessFunction) pairs.
+  SmallDenseMap<const SCEVUnknown *,
+                SmallVector<std::pair<Instruction *, const SCEV *>, 4>, 8>
+      AccessesByBase;
+
+  for (Instruction &I : instructions(F)) {
+    if (!isLoadOrStore(&I))
+      continue;
+
+    Value *Ptr = getLoadStorePointerOperand(&I);
+    Loop *L = LI.getLoopFor(I.getParent());
+    const SCEV *AccessFn = SE.getSCEVAtScope(Ptr, L);
+    const SCEVUnknown *Base =
+        dyn_cast<SCEVUnknown>(SE.getPointerBase(AccessFn));
+
+    if (!Base)
+      continue;
+
+    // Only consider accesses where the base is loop invariant.
+    if (L && !SE.isLoopInvariant(Base, L))
+      continue;
+
+    AccessesByBase[Base].push_back({&I, AccessFn});
+  }
+
+  // Step 2: For each base pointer, collect terms from ALL accesses and
+  // compute array dimensions once.
+  for (auto &Entry : AccessesByBase) {
+    const SCEVUnknown *Base = Entry.first;
+    auto &Accesses = Entry.second;
+
+    // Skip if there's only one access - no benefit from batch processing.
+    if (Accesses.size() < 2)
+      continue;
+
+    // Determine element size - use the smallest among all accesses.
+    const SCEV *ElemSize = nullptr;
+    for (auto &Access : Accesses) {
+      const SCEV *EltSize = SE.getElementSize(Access.first);
+      if (!ElemSize)
+        ElemSize = EltSize;
+      else if (SE.isKnownPredicate(ICmpInst::ICMP_ULT, EltSize, ElemSize))
+        ElemSize = EltSize;
+    }
+
+    if (!ElemSize)
+      continue;
+
+    ElementSizes[Base] = ElemSize;
+
+    // Collect parametric terms from all accesses to this base.
+    SmallVector<const SCEV *, 8> Terms;
+    for (auto &Access : Accesses) {
+      const SCEV *AccessFn = Access.second;
+      const SCEV *OffsetSCEV = SE.getMinusSCEV(AccessFn, Base);
+      const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(OffsetSCEV);
+      if (AR && AR->isAffine())
+        collectParametricTerms(SE, AR, Terms);
+    }
+
+    // Find array dimensions using all collected terms.
+    SmallVector<const SCEV *, 4> Sizes;
+    findArrayDimensions(SE, Terms, Sizes, ElemSize);
+
+    // Skip if we couldn't determine dimensions.
+    if (Sizes.size() < 2)
+      continue;
+
+    ArraySizes[Base] = Sizes;
+
+    // Pre-compute subscripts for each access using parametric sizes.
+    for (auto &Access : Accesses) {
+      Instruction *Inst = Access.first;
+      const SCEV *AccessFn = Access.second;
+      const SCEV *OffsetSCEV = SE.getMinusSCEV(AccessFn, Base);
+      const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(OffsetSCEV);
+
+      if (!AR || !AR->isAffine())
+        continue;
+
+      SmallVector<const SCEV *, 4> Subs;
+      computeAccessFunctions(SE, AR, Subs, Sizes);
+
+      if (Subs.size() >= 2)
+        Subscripts[Inst] = std::move(Subs);
+    }
+  }
----------------
kasuga-fj wrote:

Could you separate the parametric delinearization part into separate PR? I think it would be better to support only fixed-size delinearization at first, as it's simpler than parametric delinearization.

https://github.com/llvm/llvm-project/pull/170519