[llvm] [Delinearization] Add function for fixed size array without relying on GEP (PR #145050)

Wed Jul 9 04:09:45 PDT 2025

https://github.com/kasuga-fj updated https://github.com/llvm/llvm-project/pull/145050

>From e3e2f5a388944d35076280a51c7079141395978a Mon Sep 17 00:00:00 2001
From: Ryotaro Kasuga <kasuga.ryotaro at fujitsu.com>
Date: Thu, 19 Jun 2025 12:04:11 +0000
Subject: [PATCH 1/7] [Delinearization] Add function for fixed size array
 without relying on GEP

The existing functions `getIndexExpressionsFromGEP` and
`tryDelinearizeFixedSizeImpl` provide functionality to delinearize
memory accesses for fixed size array. They use the GEP source element
type in their optimization heuristics. However, driving optimization
heuristics based on GEP type information is not allowed.
This patch introduces a new function `delinearizeFixedSizeArray` to
remove them. This is an initial implementation that may not cover all
cases, but is intended to replace the existing function in the future.
---
 llvm/include/llvm/Analysis/Delinearization.h  |  23 +
 llvm/lib/Analysis/Delinearization.cpp         | 194 +++++++-
 .../Delinearization/fixed_size_array.ll       | 446 ++++++++++++++++++
 3 files changed, 661 insertions(+), 2 deletions(-)
 create mode 100644 llvm/test/Analysis/Delinearization/fixed_size_array.ll

diff --git a/llvm/include/llvm/Analysis/Delinearization.h b/llvm/include/llvm/Analysis/Delinearization.h
index eb775babd6061..dca423235b3c0 100644
--- a/llvm/include/llvm/Analysis/Delinearization.h
+++ b/llvm/include/llvm/Analysis/Delinearization.h
@@ -112,6 +112,29 @@ void delinearize(ScalarEvolution &SE, const SCEV *Expr,
                  SmallVectorImpl<const SCEV *> &Subscripts,
                  SmallVectorImpl<const SCEV *> &Sizes, const SCEV *ElementSize);
 
+/// Split this SCEVAddRecExpr into two vectors of SCEVs representing the
+/// subscripts and sizes of an access to a fixed size array. This is a special
+/// case of delinearization for fixed size arrays.
+///
+/// The delinearization is a 2 step process: the first step estimates the sizes
+/// of each dimension of the array. The second step computes the access
+/// functions for the delinearized array:
+///
+/// 1. Compute the array size
+/// 2. Compute the access function: same as normal delinearization
+///
+/// Different from the normal delinearization, this function assumes that NO
+/// terms exist in the \p Expr. In other words, it assumes that the all step
+/// values are constant.
+///
+/// This function is intended to replace getIndexExpressionsFromGEP and
+/// tryDelinearizeFixedSizeImpl. They rely on the GEP source element type so
+/// that they will be removed in the future.
+void delinearizeFixedSizeArray(ScalarEvolution &SE, const SCEV *Expr,
+                               SmallVectorImpl<const SCEV *> &Subscripts,
+                               SmallVectorImpl<const SCEV *> &Sizes,
+                               const SCEV *ElementSize);
+
 /// Gathers the individual index expressions from a GEP instruction.
 ///
 /// This function optimistically assumes the GEP references into a fixed size
diff --git a/llvm/lib/Analysis/Delinearization.cpp b/llvm/lib/Analysis/Delinearization.cpp
index 329bd35530c72..962b3be866dc6 100644
--- a/llvm/lib/Analysis/Delinearization.cpp
+++ b/llvm/lib/Analysis/Delinearization.cpp
@@ -24,6 +24,7 @@
 #include "llvm/IR/InstIterator.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/PassManager.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 
@@ -32,6 +33,11 @@ using namespace llvm;
 #define DL_NAME "delinearize"
 #define DEBUG_TYPE DL_NAME
 
+static cl::opt<bool> UseFixedSizeArrayHeuristic(
+    "delinearize-use-fixed-size-array-heuristic", cl::init(false), cl::Hidden,
+    cl::desc("When printing analysis, use the heuristic for fixed-size arrays "
+             "if the default delinearizetion fails."));
+
 // Return true when S contains at least an undef value.
 static inline bool containsUndefs(const SCEV *S) {
   return SCEVExprContains(S, [](const SCEV *S) {
@@ -480,6 +486,178 @@ void llvm::delinearize(ScalarEvolution &SE, const SCEV *Expr,
   });
 }
 
+static std::optional<APInt> tryIntoAPInt(const SCEV *S) {
+  if (const auto *Const = dyn_cast<SCEVConstant>(S))
+    return Const->getAPInt();
+  return std::nullopt;
+}
+
+/// Collects the absolute values of constant steps for all induction variables.
+/// Returns true if we can prove that all step recurrences are constants and \p
+/// Expr is dividable by \p ElementSize. Each step recurrence is stored in \p
+/// Steps after divided by \p ElementSize.
+static bool collectConstantAbsSteps(ScalarEvolution &SE, const SCEV *Expr,
+                                    SmallVectorImpl<unsigned> &Steps,
+                                    unsigned ElementSize) {
+  // End of recursion. The constant value also must be a multiple of
+  // ElementSize.
+  if (const auto *Const = dyn_cast<SCEVConstant>(Expr)) {
+    const unsigned Mod = Const->getAPInt().urem(ElementSize);
+    return Mod == 0;
+  }
+
+  const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Expr);
+  if (!AR || !AR->isAffine())
+    return false;
+
+  const SCEV *Step = AR->getStepRecurrence(SE);
+  std::optional<APInt> StepAPInt = tryIntoAPInt(Step);
+  if (!StepAPInt)
+    return false;
+
+  APInt Q;
+  uint64_t R;
+  APInt::udivrem(StepAPInt->abs(), ElementSize, Q, R);
+  if (R != 0)
+    return false;
+
+  // Bail out when the step is too large.
+  std::optional<unsigned> StepVal = Q.tryZExtValue();
+  if (!StepVal)
+    return false;
+
+  Steps.push_back(*StepVal);
+  return collectConstantAbsSteps(SE, AR->getStart(), Steps, ElementSize);
+}
+
+static bool findFixedSizeArrayDimensions(ScalarEvolution &SE, const SCEV *Expr,
+                                         SmallVectorImpl<unsigned> &Sizes,
+                                         const SCEV *ElementSize) {
+  if (!ElementSize)
+    return false;
+
+  std::optional<APInt> ElementSizeAPInt = tryIntoAPInt(ElementSize);
+  if (!ElementSizeAPInt || *ElementSizeAPInt == 0)
+    return false;
+
+  std::optional<unsigned> ElementSizeConst = ElementSizeAPInt->tryZExtValue();
+
+  // Early exit when ElementSize is not a positive constant.
+  if (!ElementSizeConst)
+    return false;
+
+  if (!collectConstantAbsSteps(SE, Expr, Sizes, *ElementSizeConst) ||
+      Sizes.empty()) {
+    Sizes.clear();
+    return false;
+  }
+
+  // At this point, Sizes contains the absolute step recurrences for all
+  // induction variables. Each step recurrence must be a multiple of the size of
+  // the array element. Assuming that the each value represents the size of an
+  // array for each dimension, attempts to restore the length of each dimension
+  // by dividing the step recurrence by the next smaller value. For example, if
+  // we have the following AddRec SCEV:
+  //
+  //   AddRec: {{{0,+,2048}<%for.i>,+,256}<%for.j>,+,8}<%for.k> (ElementSize=8)
+  //
+  // Then Sizes will become [256, 32, 1] after sorted. We don't know the size of
+  // the outermost dimension, the next dimension will be computed as 256 / 32 =
+  // 8, and the last dimension will be computed as 32 / 1 = 32. Thus it results
+  // in like Arr[UnknownSize][8][32] with elements of size 8 bytes, where Arr is
+  // a base pointer.
+  //
+  // TODO: Catch more cases, e.g., when a step recurrence is not dividable by
+  // the next smaller one, like A[i][3*j].
+  llvm::sort(Sizes.rbegin(), Sizes.rend());
+  Sizes.erase(llvm::unique(Sizes), Sizes.end());
+  for (unsigned I = 0; I + 1 < Sizes.size(); I++) {
+    unsigned PrevSize = Sizes[I + 1];
+    if (Sizes[I] % PrevSize) {
+      Sizes.clear();
+      return false;
+    }
+    Sizes[I] /= PrevSize;
+  }
+
+  // The last element should be ElementSize.
+  Sizes.back() = *ElementSizeConst;
+  return true;
+}
+
+/// Splits the SCEV into two vectors of SCEVs representing the subscripts and
+/// sizes of an array access, assuming that the array is a fixed size array.
+///
+/// E.g., if we have the code like as follows:
+///
+///  double A[42][8][32];
+///  for i
+///    for j
+///      for k
+///        use A[i][j][k]
+///
+/// The access function will be represented as an AddRec SCEV like:
+///
+///  AddRec: {{{0,+,2048}<%for.i>,+,256}<%for.j>,+,8}<%for.k> (ElementSize=8)
+///
+/// Then findFixedSizeArrayDimensions infers the size of each dimension of the
+/// array based on the fact that the value of the step recurrence is a multiple
+/// of the size of the corresponding array element. In the above example, it
+/// results in the following:
+///
+///  CHECK: ArrayDecl[UnknownSize][8][32] with elements of 8 bytes.
+///
+/// Finally each subscript will be computed as follows:
+///
+///  CHECK: ArrayRef[{0,+,1}<%for.i>][{0,+,1}<%for.j>][{0,+,1}<%for.k>]
+///
+/// Note that this function doesn't check the range of possible values for each
+/// subscript, so the caller should perform additional boundary checks if
+/// necessary.
+///
+/// TODO: At the moment, this function can handle only simple cases. For
+/// example, we cannot handle a case where a step recurrence is not dividable by
+/// the next smaller step recurrence, e.g., A[i][3*j]. Furthermore, this
+/// function doesn't guarantee that the original array size is restored
+/// "correctly". For example, in the following case:
+///
+///  double A[42][4][32];
+///  double B[42][8][64];
+///  for i
+///    for j
+///      for k
+///        use A[i][j][k]
+///        use B[i][2*j][k]
+///
+/// The access function for both accesses will be the same:
+///
+///  AddRec: {{{0,+,2048}<%for.i>,+,512}<%for.j>,+,8}<%for.k> (ElementSize=8)
+///
+/// The array sizes for both A and B will be computed as
+/// ArrayDecl[UnknownSize][4][64], which matches for A, but not for B.
+void llvm::delinearizeFixedSizeArray(ScalarEvolution &SE, const SCEV *Expr,
+                                     SmallVectorImpl<const SCEV *> &Subscripts,
+                                     SmallVectorImpl<const SCEV *> &Sizes,
+                                     const SCEV *ElementSize) {
+
+  // First step: find the fixed array size.
+  SmallVector<unsigned, 4> ConstSizes;
+  if (!findFixedSizeArrayDimensions(SE, Expr, ConstSizes, ElementSize)) {
+    Sizes.clear();
+    return;
+  }
+
+  // Convert the constant size to SCEV.
+  for (unsigned Size : ConstSizes)
+    Sizes.push_back(SE.getConstant(Expr->getType(), Size));
+
+  // Second step: compute the access functions for each subscript.
+  computeAccessFunctions(SE, Expr, Subscripts, Sizes);
+
+  if (Subscripts.empty())
+    return;
+}
+
 bool llvm::getIndexExpressionsFromGEP(ScalarEvolution &SE,
                                       const GetElementPtrInst *GEP,
                                       SmallVectorImpl<const SCEV *> &Subscripts,
@@ -586,9 +764,21 @@ void printDelinearization(raw_ostream &O, Function *F, LoopInfo *LI,
       O << "AccessFunction: " << *AccessFn << "\n";
 
       SmallVector<const SCEV *, 3> Subscripts, Sizes;
+
+      auto IsDelinearizationFailed = [&]() {
+        return Subscripts.size() == 0 || Sizes.size() == 0 ||
+               Subscripts.size() != Sizes.size();
+      };
+
       delinearize(*SE, AccessFn, Subscripts, Sizes, SE->getElementSize(&Inst));
-      if (Subscripts.size() == 0 || Sizes.size() == 0 ||
-          Subscripts.size() != Sizes.size()) {
+      if (UseFixedSizeArrayHeuristic && IsDelinearizationFailed()) {
+        Subscripts.clear();
+        Sizes.clear();
+        delinearizeFixedSizeArray(*SE, AccessFn, Subscripts, Sizes,
+                                  SE->getElementSize(&Inst));
+      }
+
+      if (IsDelinearizationFailed()) {
         O << "failed to delinearize\n";
         continue;
       }
diff --git a/llvm/test/Analysis/Delinearization/fixed_size_array.ll b/llvm/test/Analysis/Delinearization/fixed_size_array.ll
new file mode 100644
index 0000000000000..dbbcb32f6a038
--- /dev/null
+++ b/llvm/test/Analysis/Delinearization/fixed_size_array.ll
@@ -0,0 +1,446 @@
+; RUN: opt < %s -passes='print<delinearization>' -disable-output -delinearize-use-fixed-size-array-heuristic 2>&1 | FileCheck %s
+
+; void f(int A[][8][32]) {
+;   for (i = 0; i < 42; i++)
+;    for (j = 0; j < 8; j++)
+;     for (k = 0; k < 32; k++)
+;       A[i][j][k] = 1;
+; }
+
+; CHECK:      Delinearization on function a_i_j_k:
+; CHECK:      Base offset: %a
+; CHECK-NEXT: ArrayDecl[UnknownSize][8][32] with elements of 4 bytes.
+; CHECK-NEXT: ArrayRef[{0,+,1}<nuw><nsw><%for.i.header>][{0,+,1}<nuw><nsw><%for.j.header>][{0,+,1}<nuw><nsw><%for.k>]
+define void @a_i_j_k(ptr %a) {
+entry:
+  br label %for.i.header
+
+for.i.header:
+  %i = phi i32 [ 0, %entry ], [ %i.inc, %for.i.latch ]
+  br label %for.j.header
+
+for.j.header:
+  %j = phi i32 [ 0, %for.i.header ], [ %j.inc, %for.j.latch ]
+  br label %for.k
+
+for.k:
+  %k = phi i32 [ 0, %for.j.header ], [ %k.inc, %for.k ]
+  %idx = getelementptr [8 x [32 x i32]], ptr %a, i32 %i, i32 %j, i32 %k
+  store i32 1, ptr %idx
+  %k.inc = add i32 %k, 1
+  %cmp.k = icmp slt i32 %k.inc, 32
+  br i1 %cmp.k, label %for.k, label %for.j.latch
+
+for.j.latch:
+  %j.inc = add i32 %j, 1
+  %cmp.j = icmp slt i32 %j.inc, 8
+  br i1 %cmp.j, label %for.j.header, label %for.i.latch
+
+for.i.latch:
+  %i.inc = add i32 %i, 1
+  %cmp.i = icmp slt i32 %i.inc, 42
+  br i1 %cmp.i, label %for.i.header, label %exit
+
+exit:
+  ret void
+}
+
+; void f(int A[][8][32]) {
+;   for (i = 0; i < 42; i++)
+;    for (j = 0; j < 8; j++)
+;     for (k = 0; k < 32; k++)
+;       A[i][7-j][k] = 1;
+; }
+
+; CHECK:      Delinearization on function a_i_nj_k:
+; CHECK:      Base offset: %a
+; CHECK-NEXT: ArrayDecl[UnknownSize][8][32] with elements of 4 bytes.
+; CHECK-NEXT: ArrayRef[{0,+,1}<nuw><nsw><%for.i.header>][{7,+,-1}<nsw><%for.j.header>][{0,+,1}<nuw><nsw><%for.k>]
+define void @a_i_nj_k(ptr %a) {
+entry:
+  br label %for.i.header
+
+for.i.header:
+  %i = phi i32 [ 0, %entry ], [ %i.inc, %for.i.latch ]
+  br label %for.j.header
+
+for.j.header:
+  %j = phi i32 [ 0, %for.i.header ], [ %j.inc, %for.j.latch ]
+  %j.subscript = sub i32 7, %j
+  br label %for.k
+
+for.k:
+  %k = phi i32 [ 0, %for.j.header ], [ %k.inc, %for.k ]
+  %idx = getelementptr [8 x [32 x i32]], ptr %a, i32 %i, i32 %j.subscript, i32 %k
+  store i32 1, ptr %idx
+  %k.inc = add i32 %k, 1
+  %cmp.k = icmp slt i32 %k.inc, 32
+  br i1 %cmp.k, label %for.k, label %for.j.latch
+
+for.j.latch:
+  %j.inc = add i32 %j, 1
+  %cmp.j = icmp slt i32 %j.inc, 8
+  br i1 %cmp.j, label %for.j.header, label %for.i.latch
+
+for.i.latch:
+  %i.inc = add i32 %i, 1
+  %cmp.i = icmp slt i32 %i.inc, 42
+  br i1 %cmp.i, label %for.i.header, label %exit
+
+exit:
+  ret void
+}
+
+; In the following code, the access functions for both stores are represented
+; in the same way in SCEV, so the delinearization results are also the same. We
+; don't have any type information of the underlying objects.
+;
+; void f(int A[][4][64], int B[][8][32]) {
+;   for (i = 0; i < 42; i++)
+;    for (j = 0; j < 4; j++)
+;     for (k = 0; k < 32; k++) {
+;       A[i][j][k] = 1;
+;       B[i][2*j][k] = 1;
+;     }
+; }
+
+; CHECK:      Delinearization on function a_ijk_b_i2jk:
+; CHECK:      Base offset: %a
+; CHECK-NEXT: ArrayDecl[UnknownSize][4][64] with elements of 4 bytes.
+; CHECK-NEXT: ArrayRef[{0,+,1}<nuw><nsw><%for.i.header>][{0,+,1}<nuw><nsw><%for.j.header>][{0,+,1}<nuw><nsw><%for.k>]
+; CHECK:      Base offset: %b
+; CHECK-NEXT: ArrayDecl[UnknownSize][4][64] with elements of 4 bytes.
+; CHECK-NEXT: ArrayRef[{0,+,1}<nuw><nsw><%for.i.header>][{0,+,1}<nuw><nsw><%for.j.header>][{0,+,1}<nuw><nsw><%for.k>]
+define void @a_ijk_b_i2jk(ptr %a, ptr %b) {
+entry:
+  br label %for.i.header
+
+for.i.header:
+  %i = phi i32 [ 0, %entry ], [ %i.inc, %for.i.latch ]
+  br label %for.j.header
+
+for.j.header:
+  %j = phi i32 [ 0, %for.i.header ], [ %j.inc, %for.j.latch ]
+  %j2 = shl i32 %j, 1
+  br label %for.k
+
+for.k:
+  %k = phi i32 [ 0, %for.j.header ], [ %k.inc, %for.k ]
+  %a.idx = getelementptr [4 x [64 x i32]], ptr %a, i32 %i, i32 %j, i32 %k
+  %b.idx = getelementptr [8 x [32 x i32]], ptr %b, i32 %i, i32 %j2, i32 %k
+  store i32 1, ptr %a.idx
+  store i32 1, ptr %b.idx
+  %k.inc = add i32 %k, 1
+  %cmp.k = icmp slt i32 %k.inc, 32
+  br i1 %cmp.k, label %for.k, label %for.j.latch
+
+for.j.latch:
+  %j.inc = add i32 %j, 1
+  %cmp.j = icmp slt i32 %j.inc, 4
+  br i1 %cmp.j, label %for.j.header, label %for.i.latch
+
+for.i.latch:
+  %i.inc = add i32 %i, 1
+  %cmp.i = icmp slt i32 %i.inc, 42
+  br i1 %cmp.i, label %for.i.header, label %exit
+
+exit:
+  ret void
+}
+
+; The type information of the underlying object is not available, so the
+; delinearization result is different from it.
+;
+; void f(int A[][8][32]) {
+;   for (i = 0; i < 42; i++)
+;    for (j = 0; j < 3; j++)
+;     for (k = 0; k < 32; k++)
+;       A[i][2*j+1][k] = 1;
+; }
+
+; CHECK:      Delinearization on function a_i_2j1_k:
+; CHECK:      Base offset: %a
+; CHECK-NEXT: ArrayDecl[UnknownSize][4][64] with elements of 4 bytes.
+; CHECK-NEXT: ArrayRef[{0,+,1}<nuw><nsw><%for.i.header>][{0,+,1}<nuw><%for.j.header>][{32,+,1}<nw><%for.k>]
+define void @a_i_2j1_k(ptr %a) {
+entry:
+  br label %for.i.header
+
+for.i.header:
+  %i = phi i32 [ 0, %entry ], [ %i.inc, %for.i.latch ]
+  br label %for.j.header
+
+for.j.header:
+  %j = phi i32 [ 0, %for.i.header ], [ %j.inc, %for.j.latch ]
+  %j2 = shl i32 %j, 1
+  %j.subscript = add i32 %j2, 1
+  br label %for.k
+
+for.k:
+  %k = phi i32 [ 0, %for.j.header ], [ %k.inc, %for.k ]
+  %idx = getelementptr [8 x [32 x i32]], ptr %a, i32 %i, i32 %j.subscript, i32 %k
+  store i32 1, ptr %idx
+  %k.inc = add i32 %k, 1
+  %cmp.k = icmp slt i32 %k.inc, 32
+  br i1 %cmp.k, label %for.k, label %for.j.latch
+
+for.j.latch:
+  %j.inc = add i32 %j, 1
+  %cmp.j = icmp slt i32 %j.inc, 3
+  br i1 %cmp.j, label %for.j.header, label %for.i.latch
+
+for.i.latch:
+  %i.inc = add i32 %i, 1
+  %cmp.i = icmp slt i32 %i.inc, 42
+  br i1 %cmp.i, label %for.i.header, label %exit
+
+exit:
+  ret void
+}
+
+; Fail to delinearize because the step recurrence of the j-loop is not
+; dividable by that of the k-loop.
+;
+; void f(int A[][8][32]) {
+;   for (i = 0; i < 42; i++)
+;    for (j = 0; j < 8; j++)
+;     for (k = 0; k < 10; k++)
+;       A[i][j][3*k] = 1;
+; }
+
+; CHECK:      Delinearization on function a_i_j_3k:
+; CHECK:      AccessFunction: {{...}}0,+,1024}<nuw><nsw><%for.i.header>,+,128}<nw><%for.j.header>,+,12}<nw><%for.k>
+; CHECK-NEXT: failed to delinearize
+define void @a_i_j_3k(ptr %a) {
+entry:
+  br label %for.i.header
+
+for.i.header:
+  %i = phi i32 [ 0, %entry ], [ %i.inc, %for.i.latch ]
+  br label %for.j.header
+
+for.j.header:
+  %j = phi i32 [ 0, %for.i.header ], [ %j.inc, %for.j.latch ]
+  br label %for.k
+
+for.k:
+  %k = phi i32 [ 0, %for.j.header ], [ %k.inc, %for.k ]
+  %k.subscript = mul i32 %k, 3
+  %idx = getelementptr [8 x [32 x i32]], ptr %a, i32 %i, i32 %j, i32 %k.subscript
+  store i32 1, ptr %idx
+  %k.inc = add i32 %k, 1
+  %cmp.k = icmp slt i32 %k.inc, 10
+  br i1 %cmp.k, label %for.k, label %for.j.latch
+
+for.j.latch:
+  %j.inc = add i32 %j, 1
+  %cmp.j = icmp slt i32 %j.inc, 8
+  br i1 %cmp.j, label %for.j.header, label %for.i.latch
+
+for.i.latch:
+  %i.inc = add i32 %i, 1
+  %cmp.i = icmp slt i32 %i.inc, 42
+  br i1 %cmp.i, label %for.i.header, label %exit
+
+exit:
+  ret void
+}
+
+; Fail to delinearize because i is used in multiple subscripts that are not adjacent.
+;
+; void f(int A[][8][32]) {
+;   for (i = 0; i < 32; i++)
+;    for (j = 0; j < 4; j++)
+;     for (k = 0; k < 4; k++)
+;       A[i][j+k][i] = 1;
+; }
+
+; CHECK:      Delinearization on function a_i_jk_i:
+; CHECK:      AccessFunction: {{...}}0,+,1028}<%for.i.header>,+,128}<nw><%for.j.header>,+,128}<nw><%for.k>
+; CHECK-NEXT: failed to delinearize
+define void @a_i_jk_i(ptr %a) {
+entry:
+  br label %for.i.header
+
+for.i.header:
+  %i = phi i32 [ 0, %entry ], [ %i.inc, %for.i.latch ]
+  br label %for.j.header
+
+for.j.header:
+  %j = phi i32 [ 0, %for.i.header ], [ %j.inc, %for.j.latch ]
+  br label %for.k
+
+for.k:
+  %k = phi i32 [ 0, %for.j.header ], [ %k.inc, %for.k ]
+  %jk = add i32 %j, %k
+  %idx = getelementptr [8 x [32 x i32]], ptr %a, i32 %i, i32 %jk, i32 %i
+  store i32 1, ptr %idx
+  %k.inc = add i32 %k, 1
+  %cmp.k = icmp slt i32 %k.inc, 4
+  br i1 %cmp.k, label %for.k, label %for.j.latch
+
+for.j.latch:
+  %j.inc = add i32 %j, 1
+  %cmp.j = icmp slt i32 %j.inc, 4
+  br i1 %cmp.j, label %for.j.header, label %for.i.latch
+
+for.i.latch:
+  %i.inc = add i32 %i, 1
+  %cmp.i = icmp slt i32 %i.inc, 32
+  br i1 %cmp.i, label %for.i.header, label %exit
+
+exit:
+  ret void
+}
+
+; Can delinearize, but the result is different from the original array size. In
+; this case, the outermost two dimensions are melded into one.
+;
+; void f(int A[][8][32]) {
+;   for (i = 0; i < 8; i++)
+;    for (j = 0; j < 10; j++)
+;     for (k = 0; k < 10; k++)
+;       A[i][i][j+k] = 1;
+; }
+
+; CHECK:      Delinearization on function a_i_i_jk:
+; CHECK:      Base offset: %a
+; CHECK-NEXT: ArrayDecl[UnknownSize][288] with elements of 4 bytes.
+; CHECK-NEXT: ArrayRef[{0,+,1}<nuw><nsw><%for.i.header>][{{..}}0,+,1}<nuw><nsw><%for.j.header>,+,1}<nuw><nsw><%for.k>]
+define void @a_i_i_jk(ptr %a) {
+entry:
+  br label %for.i.header
+
+for.i.header:
+  %i = phi i32 [ 0, %entry ], [ %i.inc, %for.i.latch ]
+  br label %for.j.header
+
+for.j.header:
+  %j = phi i32 [ 0, %for.i.header ], [ %j.inc, %for.j.latch ]
+  br label %for.k
+
+for.k:
+  %k = phi i32 [ 0, %for.j.header ], [ %k.inc, %for.k ]
+  %jk = add i32 %j, %k
+  %idx = getelementptr [8 x [32 x i32]], ptr %a, i32 %i, i32 %i, i32 %jk
+  store i32 1, ptr %idx
+  %k.inc = add i32 %k, 1
+  %cmp.k = icmp slt i32 %k.inc, 10
+  br i1 %cmp.k, label %for.k, label %for.j.latch
+
+for.j.latch:
+  %j.inc = add i32 %j, 1
+  %cmp.j = icmp slt i32 %j.inc, 10
+  br i1 %cmp.j, label %for.j.header, label %for.i.latch
+
+for.i.latch:
+  %i.inc = add i32 %i, 1
+  %cmp.i = icmp slt i32 %i.inc, 8
+  br i1 %cmp.i, label %for.i.header, label %exit
+
+exit:
+  ret void
+}
+
+; void f(int A[][8][32]) {
+;   for (i = 0; i < 8; i++)
+;    for (j = 0; j < 4; j++)
+;     for (k = 0; k < 4; k++)
+;       for (l = 0; l < 32; l++)
+;         A[i][j+k][l] = 1;
+; }
+
+; CHECK:      Delinearization on function a_i_jk_l:
+; CHECK:      Base offset: %a
+; CHECK-NEXT: ArrayDecl[UnknownSize][8][32] with elements of 4 bytes.
+; CHECK-NEXT: ArrayRef[{0,+,1}<nuw><nsw><%for.i.header>][{{..}}0,+,1}<nuw><nsw><%for.j.header>,+,1}<nuw><nsw><%for.k.header>][{0,+,1}<nuw><nsw><%for.l>]
+
+define void @a_i_jk_l(ptr %a) {
+entry:
+  br label %for.i.header
+
+for.i.header:
+  %i = phi i32 [ 0, %entry ], [ %i.inc, %for.i.latch ]
+  br label %for.j.header
+
+for.j.header:
+  %j = phi i32 [ 0, %for.i.header ], [ %j.inc, %for.j.latch ]
+  br label %for.k.header
+
+for.k.header:
+  %k = phi i32 [ 0, %for.j.header ], [ %k.inc, %for.k.latch ]
+  %jk = add i32 %j, %k
+  br label %for.l
+
+for.l:
+  %l = phi i32 [ 0, %for.k.header ], [ %l.inc, %for.l ]
+  %idx = getelementptr [8 x [32 x i32]], ptr %a, i32 %i, i32 %jk, i32 %l
+  store i32 1, ptr %idx
+  %l.inc = add i32 %l, 1
+  %cmp.l = icmp slt i32 %l.inc, 32
+  br i1 %cmp.l, label %for.l, label %for.k.latch
+
+for.k.latch:
+  %k.inc = add i32 %k, 1
+  %cmp.k = icmp slt i32 %k.inc, 4
+  br i1 %cmp.k, label %for.k.header, label %for.j.latch
+
+for.j.latch:
+  %j.inc = add i32 %j, 1
+  %cmp.j = icmp slt i32 %j.inc, 4
+  br i1 %cmp.j, label %for.j.header, label %for.i.latch
+
+for.i.latch:
+  %i.inc = add i32 %i, 1
+  %cmp.i = icmp slt i32 %i.inc, 8
+  br i1 %cmp.i, label %for.i.header, label %exit
+
+exit:
+  ret void
+}
+
+; Reject if the address is not a multiple of the element size.
+;
+; void f(int *A) {
+;   for (i = 0; i < 42; i++)
+;    for (j = 0; j < 8; j++)
+;     for (k = 0; k < 32; k++)
+;       *((int *)((char *)A + i*256 + j*32 + k)) = 1;
+; }
+
+; CHECK:      Delinearization on function non_dividable_by_element_size:
+; CHECK:      AccessFunction: {{...}}0,+,256}<nuw><nsw><%for.i.header>,+,32}<nw><%for.j.header>,+,1}<nw><%for.k>
+; CHECK-NEXT: failed to delinearize
+define void @non_dividable_by_element_size(ptr %a) {
+entry:
+  br label %for.i.header
+
+for.i.header:
+  %i = phi i32 [ 0, %entry ], [ %i.inc, %for.i.latch ]
+  br label %for.j.header
+
+for.j.header:
+  %j = phi i32 [ 0, %for.i.header ], [ %j.inc, %for.j.latch ]
+  br label %for.k
+
+for.k:
+  %k = phi i32 [ 0, %for.j.header ], [ %k.inc, %for.k ]
+  %idx = getelementptr [8 x [32 x i8]], ptr %a, i32 %i, i32 %j, i32 %k
+  store i32 1, ptr %idx
+  %k.inc = add i32 %k, 1
+  %cmp.k = icmp slt i32 %k.inc, 32
+  br i1 %cmp.k, label %for.k, label %for.j.latch
+
+for.j.latch:
+  %j.inc = add i32 %j, 1
+  %cmp.j = icmp slt i32 %j.inc, 8
+  br i1 %cmp.j, label %for.j.header, label %for.i.latch
+
+for.i.latch:
+  %i.inc = add i32 %i, 1
+  %cmp.i = icmp slt i32 %i.inc, 42
+  br i1 %cmp.i, label %for.i.header, label %exit
+
+exit:
+  ret void
+}

>From 02f00979c374a7c3730a7cc4cf29df0f065e3546 Mon Sep 17 00:00:00 2001
From: Ryotaro Kasuga <kasuga.ryotaro at fujitsu.com>
Date: Fri, 27 Jun 2025 13:24:09 +0000
Subject: [PATCH 2/7] Address review comments, thanks!

---
 llvm/lib/Analysis/Delinearization.cpp         | 19 ++++++++++---------
 .../Delinearization/fixed_size_array.ll       |  6 +++---
 2 files changed, 13 insertions(+), 12 deletions(-)

diff --git a/llvm/lib/Analysis/Delinearization.cpp b/llvm/lib/Analysis/Delinearization.cpp
index 962b3be866dc6..5399a4f2e3f7f 100644
--- a/llvm/lib/Analysis/Delinearization.cpp
+++ b/llvm/lib/Analysis/Delinearization.cpp
@@ -494,7 +494,7 @@ static std::optional<APInt> tryIntoAPInt(const SCEV *S) {
 
 /// Collects the absolute values of constant steps for all induction variables.
 /// Returns true if we can prove that all step recurrences are constants and \p
-/// Expr is dividable by \p ElementSize. Each step recurrence is stored in \p
+/// Expr is divisible by \p ElementSize. Each step recurrence is stored in \p
 /// Steps after divided by \p ElementSize.
 static bool collectConstantAbsSteps(ScalarEvolution &SE, const SCEV *Expr,
                                     SmallVectorImpl<unsigned> &Steps,
@@ -567,7 +567,7 @@ static bool findFixedSizeArrayDimensions(ScalarEvolution &SE, const SCEV *Expr,
   // in like Arr[UnknownSize][8][32] with elements of size 8 bytes, where Arr is
   // a base pointer.
   //
-  // TODO: Catch more cases, e.g., when a step recurrence is not dividable by
+  // TODO: Catch more cases, e.g., when a step recurrence is not divisible by
   // the next smaller one, like A[i][3*j].
   llvm::sort(Sizes.rbegin(), Sizes.rend());
   Sizes.erase(llvm::unique(Sizes), Sizes.end());
@@ -615,14 +615,11 @@ static bool findFixedSizeArrayDimensions(ScalarEvolution &SE, const SCEV *Expr,
 /// subscript, so the caller should perform additional boundary checks if
 /// necessary.
 ///
-/// TODO: At the moment, this function can handle only simple cases. For
-/// example, we cannot handle a case where a step recurrence is not dividable by
-/// the next smaller step recurrence, e.g., A[i][3*j]. Furthermore, this
-/// function doesn't guarantee that the original array size is restored
-/// "correctly". For example, in the following case:
+/// Also note that this function doesn't guarantee that the original array size
+/// is restored "correctly". For example, in the following case:
 ///
-///  double A[42][4][32];
-///  double B[42][8][64];
+///  double A[42][4][64];
+///  double B[42][8][32];
 ///  for i
 ///    for j
 ///      for k
@@ -635,6 +632,10 @@ static bool findFixedSizeArrayDimensions(ScalarEvolution &SE, const SCEV *Expr,
 ///
 /// The array sizes for both A and B will be computed as
 /// ArrayDecl[UnknownSize][4][64], which matches for A, but not for B.
+///
+/// TODO: At the moment, this function can handle only simple cases. For
+/// example, we cannot handle a case where a step recurrence is not divisible
+/// by the next smaller step recurrence, e.g., A[i][3*j].
 void llvm::delinearizeFixedSizeArray(ScalarEvolution &SE, const SCEV *Expr,
                                      SmallVectorImpl<const SCEV *> &Subscripts,
                                      SmallVectorImpl<const SCEV *> &Sizes,
diff --git a/llvm/test/Analysis/Delinearization/fixed_size_array.ll b/llvm/test/Analysis/Delinearization/fixed_size_array.ll
index dbbcb32f6a038..f37c943272f6d 100644
--- a/llvm/test/Analysis/Delinearization/fixed_size_array.ll
+++ b/llvm/test/Analysis/Delinearization/fixed_size_array.ll
@@ -199,7 +199,7 @@ exit:
 }
 
 ; Fail to delinearize because the step recurrence of the j-loop is not
-; dividable by that of the k-loop.
+; divisible by that of the k-loop.
 ;
 ; void f(int A[][8][32]) {
 ;   for (i = 0; i < 42; i++)
@@ -408,10 +408,10 @@ exit:
 ;       *((int *)((char *)A + i*256 + j*32 + k)) = 1;
 ; }
 
-; CHECK:      Delinearization on function non_dividable_by_element_size:
+; CHECK:      Delinearization on function non_divisible_by_element_size:
 ; CHECK:      AccessFunction: {{...}}0,+,256}<nuw><nsw><%for.i.header>,+,32}<nw><%for.j.header>,+,1}<nw><%for.k>
 ; CHECK-NEXT: failed to delinearize
-define void @non_dividable_by_element_size(ptr %a) {
+define void @non_divisible_by_element_size(ptr %a) {
 entry:
   br label %for.i.header
 

>From c549980a5729ff0b870a839ce4f93408cdb22334 Mon Sep 17 00:00:00 2001
From: Ryotaro Kasuga <kasuga.ryotaro at fujitsu.com>
Date: Tue, 8 Jul 2025 09:08:28 +0000
Subject: [PATCH 3/7] Expose findFixedSizeArrayDimensions

---
 llvm/include/llvm/Analysis/Delinearization.h | 6 ++++++
 llvm/lib/Analysis/Delinearization.cpp        | 6 +++---
 2 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/llvm/include/llvm/Analysis/Delinearization.h b/llvm/include/llvm/Analysis/Delinearization.h
index dca423235b3c0..0d087f8f53d12 100644
--- a/llvm/include/llvm/Analysis/Delinearization.h
+++ b/llvm/include/llvm/Analysis/Delinearization.h
@@ -112,6 +112,12 @@ void delinearize(ScalarEvolution &SE, const SCEV *Expr,
                  SmallVectorImpl<const SCEV *> &Subscripts,
                  SmallVectorImpl<const SCEV *> &Sizes, const SCEV *ElementSize);
 
+/// Compute the dimensions of fixed size array from \Expr and save the results
+/// in \p Sizes.
+bool findFixedSizeArrayDimensions(ScalarEvolution &SE, const SCEV *Expr,
+                                  SmallVectorImpl<unsigned> &Sizes,
+                                  const SCEV *ElementSize);
+
 /// Split this SCEVAddRecExpr into two vectors of SCEVs representing the
 /// subscripts and sizes of an access to a fixed size array. This is a special
 /// case of delinearization for fixed size arrays.
diff --git a/llvm/lib/Analysis/Delinearization.cpp b/llvm/lib/Analysis/Delinearization.cpp
index 5399a4f2e3f7f..950d8d45722e8 100644
--- a/llvm/lib/Analysis/Delinearization.cpp
+++ b/llvm/lib/Analysis/Delinearization.cpp
@@ -530,9 +530,9 @@ static bool collectConstantAbsSteps(ScalarEvolution &SE, const SCEV *Expr,
   return collectConstantAbsSteps(SE, AR->getStart(), Steps, ElementSize);
 }
 
-static bool findFixedSizeArrayDimensions(ScalarEvolution &SE, const SCEV *Expr,
-                                         SmallVectorImpl<unsigned> &Sizes,
-                                         const SCEV *ElementSize) {
+bool llvm::findFixedSizeArrayDimensions(ScalarEvolution &SE, const SCEV *Expr,
+                                        SmallVectorImpl<unsigned> &Sizes,
+                                        const SCEV *ElementSize) {
   if (!ElementSize)
     return false;
 

>From 3ed1cb730c44a36180a4b6e311d68990d0cb76e4 Mon Sep 17 00:00:00 2001
From: Ryotaro Kasuga <kasuga.ryotaro at fujitsu.com>
Date: Tue, 8 Jul 2025 09:29:22 +0000
Subject: [PATCH 4/7] Minor improvements

---
 llvm/lib/Analysis/Delinearization.cpp | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Analysis/Delinearization.cpp b/llvm/lib/Analysis/Delinearization.cpp
index 950d8d45722e8..67f067f975766 100644
--- a/llvm/lib/Analysis/Delinearization.cpp
+++ b/llvm/lib/Analysis/Delinearization.cpp
@@ -571,6 +571,12 @@ bool llvm::findFixedSizeArrayDimensions(ScalarEvolution &SE, const SCEV *Expr,
   // the next smaller one, like A[i][3*j].
   llvm::sort(Sizes.rbegin(), Sizes.rend());
   Sizes.erase(llvm::unique(Sizes), Sizes.end());
+
+  // The last element in Sizes should be ElementSize. At this point, all values
+  // in Sizes are assumed to be divided by ElementSize, so replace it with 1.
+  assert(Sizes.back() != 0 && "Unexpected zero size in Sizes.");
+  Sizes.back() = 1;
+
   for (unsigned I = 0; I + 1 < Sizes.size(); I++) {
     unsigned PrevSize = Sizes[I + 1];
     if (Sizes[I] % PrevSize) {
@@ -580,7 +586,7 @@ bool llvm::findFixedSizeArrayDimensions(ScalarEvolution &SE, const SCEV *Expr,
     Sizes[I] /= PrevSize;
   }
 
-  // The last element should be ElementSize.
+  // Finally, the last element in Sizes should be ElementSize.
   Sizes.back() = *ElementSizeConst;
   return true;
 }

>From 7a640222c7bc5b2674b56099169cc31b0abc342c Mon Sep 17 00:00:00 2001
From: Ryotaro Kasuga <kasuga.ryotaro at fujitsu.com>
Date: Tue, 8 Jul 2025 09:30:58 +0000
Subject: [PATCH 5/7] Change the return type to bool

---
 llvm/include/llvm/Analysis/Delinearization.h | 2 +-
 llvm/lib/Analysis/Delinearization.cpp        | 7 +++----
 2 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/llvm/include/llvm/Analysis/Delinearization.h b/llvm/include/llvm/Analysis/Delinearization.h
index 0d087f8f53d12..459b5f11fbece 100644
--- a/llvm/include/llvm/Analysis/Delinearization.h
+++ b/llvm/include/llvm/Analysis/Delinearization.h
@@ -136,7 +136,7 @@ bool findFixedSizeArrayDimensions(ScalarEvolution &SE, const SCEV *Expr,
 /// This function is intended to replace getIndexExpressionsFromGEP and
 /// tryDelinearizeFixedSizeImpl. They rely on the GEP source element type so
 /// that they will be removed in the future.
-void delinearizeFixedSizeArray(ScalarEvolution &SE, const SCEV *Expr,
+bool delinearizeFixedSizeArray(ScalarEvolution &SE, const SCEV *Expr,
                                SmallVectorImpl<const SCEV *> &Subscripts,
                                SmallVectorImpl<const SCEV *> &Sizes,
                                const SCEV *ElementSize);
diff --git a/llvm/lib/Analysis/Delinearization.cpp b/llvm/lib/Analysis/Delinearization.cpp
index 67f067f975766..624550e1ab681 100644
--- a/llvm/lib/Analysis/Delinearization.cpp
+++ b/llvm/lib/Analysis/Delinearization.cpp
@@ -642,7 +642,7 @@ bool llvm::findFixedSizeArrayDimensions(ScalarEvolution &SE, const SCEV *Expr,
 /// TODO: At the moment, this function can handle only simple cases. For
 /// example, we cannot handle a case where a step recurrence is not divisible
 /// by the next smaller step recurrence, e.g., A[i][3*j].
-void llvm::delinearizeFixedSizeArray(ScalarEvolution &SE, const SCEV *Expr,
+bool llvm::delinearizeFixedSizeArray(ScalarEvolution &SE, const SCEV *Expr,
                                      SmallVectorImpl<const SCEV *> &Subscripts,
                                      SmallVectorImpl<const SCEV *> &Sizes,
                                      const SCEV *ElementSize) {
@@ -651,7 +651,7 @@ void llvm::delinearizeFixedSizeArray(ScalarEvolution &SE, const SCEV *Expr,
   SmallVector<unsigned, 4> ConstSizes;
   if (!findFixedSizeArrayDimensions(SE, Expr, ConstSizes, ElementSize)) {
     Sizes.clear();
-    return;
+    return false;
   }
 
   // Convert the constant size to SCEV.
@@ -661,8 +661,7 @@ void llvm::delinearizeFixedSizeArray(ScalarEvolution &SE, const SCEV *Expr,
   // Second step: compute the access functions for each subscript.
   computeAccessFunctions(SE, Expr, Subscripts, Sizes);
 
-  if (Subscripts.empty())
-    return;
+  return !Subscripts.empty();
 }
 
 bool llvm::getIndexExpressionsFromGEP(ScalarEvolution &SE,

>From 0f350cb1d0842b58e0a065944382a77529fb1674 Mon Sep 17 00:00:00 2001
From: Ryotaro Kasuga <kasuga.ryotaro at fujitsu.com>
Date: Tue, 8 Jul 2025 10:16:46 +0000
Subject: [PATCH 6/7] Change unsigned to uint64_t

---
 llvm/include/llvm/Analysis/Delinearization.h |  2 +-
 llvm/lib/Analysis/Delinearization.cpp        | 18 +++++++++---------
 2 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/llvm/include/llvm/Analysis/Delinearization.h b/llvm/include/llvm/Analysis/Delinearization.h
index 459b5f11fbece..434cfb61699d6 100644
--- a/llvm/include/llvm/Analysis/Delinearization.h
+++ b/llvm/include/llvm/Analysis/Delinearization.h
@@ -115,7 +115,7 @@ void delinearize(ScalarEvolution &SE, const SCEV *Expr,
 /// Compute the dimensions of fixed size array from \Expr and save the results
 /// in \p Sizes.
 bool findFixedSizeArrayDimensions(ScalarEvolution &SE, const SCEV *Expr,
-                                  SmallVectorImpl<unsigned> &Sizes,
+                                  SmallVectorImpl<uint64_t> &Sizes,
                                   const SCEV *ElementSize);
 
 /// Split this SCEVAddRecExpr into two vectors of SCEVs representing the
diff --git a/llvm/lib/Analysis/Delinearization.cpp b/llvm/lib/Analysis/Delinearization.cpp
index 624550e1ab681..761c566eae794 100644
--- a/llvm/lib/Analysis/Delinearization.cpp
+++ b/llvm/lib/Analysis/Delinearization.cpp
@@ -497,12 +497,12 @@ static std::optional<APInt> tryIntoAPInt(const SCEV *S) {
 /// Expr is divisible by \p ElementSize. Each step recurrence is stored in \p
 /// Steps after divided by \p ElementSize.
 static bool collectConstantAbsSteps(ScalarEvolution &SE, const SCEV *Expr,
-                                    SmallVectorImpl<unsigned> &Steps,
-                                    unsigned ElementSize) {
+                                    SmallVectorImpl<uint64_t> &Steps,
+                                    uint64_t ElementSize) {
   // End of recursion. The constant value also must be a multiple of
   // ElementSize.
   if (const auto *Const = dyn_cast<SCEVConstant>(Expr)) {
-    const unsigned Mod = Const->getAPInt().urem(ElementSize);
+    const uint64_t Mod = Const->getAPInt().urem(ElementSize);
     return Mod == 0;
   }
 
@@ -522,7 +522,7 @@ static bool collectConstantAbsSteps(ScalarEvolution &SE, const SCEV *Expr,
     return false;
 
   // Bail out when the step is too large.
-  std::optional<unsigned> StepVal = Q.tryZExtValue();
+  std::optional<uint64_t> StepVal = Q.tryZExtValue();
   if (!StepVal)
     return false;
 
@@ -531,7 +531,7 @@ static bool collectConstantAbsSteps(ScalarEvolution &SE, const SCEV *Expr,
 }
 
 bool llvm::findFixedSizeArrayDimensions(ScalarEvolution &SE, const SCEV *Expr,
-                                        SmallVectorImpl<unsigned> &Sizes,
+                                        SmallVectorImpl<uint64_t> &Sizes,
                                         const SCEV *ElementSize) {
   if (!ElementSize)
     return false;
@@ -540,7 +540,7 @@ bool llvm::findFixedSizeArrayDimensions(ScalarEvolution &SE, const SCEV *Expr,
   if (!ElementSizeAPInt || *ElementSizeAPInt == 0)
     return false;
 
-  std::optional<unsigned> ElementSizeConst = ElementSizeAPInt->tryZExtValue();
+  std::optional<uint64_t> ElementSizeConst = ElementSizeAPInt->tryZExtValue();
 
   // Early exit when ElementSize is not a positive constant.
   if (!ElementSizeConst)
@@ -578,7 +578,7 @@ bool llvm::findFixedSizeArrayDimensions(ScalarEvolution &SE, const SCEV *Expr,
   Sizes.back() = 1;
 
   for (unsigned I = 0; I + 1 < Sizes.size(); I++) {
-    unsigned PrevSize = Sizes[I + 1];
+    uint64_t PrevSize = Sizes[I + 1];
     if (Sizes[I] % PrevSize) {
       Sizes.clear();
       return false;
@@ -648,14 +648,14 @@ bool llvm::delinearizeFixedSizeArray(ScalarEvolution &SE, const SCEV *Expr,
                                      const SCEV *ElementSize) {
 
   // First step: find the fixed array size.
-  SmallVector<unsigned, 4> ConstSizes;
+  SmallVector<uint64_t, 4> ConstSizes;
   if (!findFixedSizeArrayDimensions(SE, Expr, ConstSizes, ElementSize)) {
     Sizes.clear();
     return false;
   }
 
   // Convert the constant size to SCEV.
-  for (unsigned Size : ConstSizes)
+  for (uint64_t Size : ConstSizes)
     Sizes.push_back(SE.getConstant(Expr->getType(), Size));
 
   // Second step: compute the access functions for each subscript.

>From a3032e81e4edbf7399d393dd839b8e4e52d304aa Mon Sep 17 00:00:00 2001
From: Ryotaro Kasuga <kasuga.ryotaro at fujitsu.com>
Date: Wed, 9 Jul 2025 11:07:13 +0000
Subject: [PATCH 7/7] Fix test

---
 .../Delinearization/fixed_size_array.ll       | 75 ++++++++++++++++---
 1 file changed, 63 insertions(+), 12 deletions(-)

diff --git a/llvm/test/Analysis/Delinearization/fixed_size_array.ll b/llvm/test/Analysis/Delinearization/fixed_size_array.ll
index f37c943272f6d..7ac7e3c931737 100644
--- a/llvm/test/Analysis/Delinearization/fixed_size_array.ll
+++ b/llvm/test/Analysis/Delinearization/fixed_size_array.ll
@@ -198,8 +198,57 @@ exit:
   ret void
 }
 
-; Fail to delinearize because the step recurrence of the j-loop is not
-; divisible by that of the k-loop.
+; Fail to delinearize because the step recurrence value of the i-loop is not
+; divisible by that of the j-loop.
+;
+; void f(int A[][8][32]) {
+;   for (i = 0; i < 42; i++)
+;    for (j = 0; j < 2; j++)
+;     for (k = 0; k < 42; k++)
+;       A[i][3*j][k] = 1;
+; }
+
+; CHECK:      Delinearization on function a_i_3j_k:
+; CHECK:      AccessFunction: {{...}}0,+,1024}<nuw><nsw><%for.i.header>,+,384}<nw><%for.j.header>,+,4}<nw><%for.k>
+; CHECK-NEXT: failed to delinearize
+define void @a_i_3j_k(ptr %a) {
+entry:
+  br label %for.i.header
+
+for.i.header:
+  %i = phi i32 [ 0, %entry ], [ %i.inc, %for.i.latch ]
+  br label %for.j.header
+
+for.j.header:
+  %j = phi i32 [ 0, %for.i.header ], [ %j.inc, %for.j.latch ]
+  %j.subscript = mul i32 %j, 3
+  br label %for.k
+
+for.k:
+  %k = phi i32 [ 0, %for.j.header ], [ %k.inc, %for.k ]
+  %idx = getelementptr [8 x [32 x i32]], ptr %a, i32 %i, i32 %j.subscript, i32 %k
+  store i32 1, ptr %idx
+  %k.inc = add i32 %k, 1
+  %cmp.k = icmp slt i32 %k.inc, 42
+  br i1 %cmp.k, label %for.k, label %for.j.latch
+
+for.j.latch:
+  %j.inc = add i32 %j, 1
+  %cmp.j = icmp slt i32 %j.inc, 2
+  br i1 %cmp.j, label %for.j.header, label %for.i.latch
+
+for.i.latch:
+  %i.inc = add i32 %i, 1
+  %cmp.i = icmp slt i32 %i.inc, 42
+  br i1 %cmp.i, label %for.i.header, label %exit
+
+exit:
+  ret void
+}
+
+; Although the step recurrence value of j-loop is not divisible by that of the
+; k-loop, delinearization is possible because we know that the "actual" stride
+; width for the last dimension is 4 instead of 12.
 ;
 ; void f(int A[][8][32]) {
 ;   for (i = 0; i < 42; i++)
@@ -209,8 +258,9 @@ exit:
 ; }
 
 ; CHECK:      Delinearization on function a_i_j_3k:
-; CHECK:      AccessFunction: {{...}}0,+,1024}<nuw><nsw><%for.i.header>,+,128}<nw><%for.j.header>,+,12}<nw><%for.k>
-; CHECK-NEXT: failed to delinearize
+; CHECK:      Base offset: %a
+; CHECK-NEXT: ArrayDecl[UnknownSize][8][32] with elements of 4 bytes.
+; CHECK-NEXT: ArrayRef[{0,+,1}<nuw><nsw><%for.i.header>][{0,+,1}<nuw><nsw><%for.j.header>][{0,+,3}<nuw><nsw><%for.k>]
 define void @a_i_j_3k(ptr %a) {
 entry:
   br label %for.i.header
@@ -250,15 +300,15 @@ exit:
 ;
 ; void f(int A[][8][32]) {
 ;   for (i = 0; i < 32; i++)
-;    for (j = 0; j < 4; j++)
+;    for (j = 0; j < 2; j++)
 ;     for (k = 0; k < 4; k++)
-;       A[i][j+k][i] = 1;
+;       A[i][2*j+k][i] = 1;
 ; }
 
-; CHECK:      Delinearization on function a_i_jk_i:
-; CHECK:      AccessFunction: {{...}}0,+,1028}<%for.i.header>,+,128}<nw><%for.j.header>,+,128}<nw><%for.k>
+; CHECK:      Delinearization on function a_i_j2k_i:
+; CHECK:      AccessFunction: {{...}}0,+,1028}<%for.i.header>,+,256}<nw><%for.j.header>,+,128}<nw><%for.k>
 ; CHECK-NEXT: failed to delinearize
-define void @a_i_jk_i(ptr %a) {
+define void @a_i_j2k_i(ptr %a) {
 entry:
   br label %for.i.header
 
@@ -272,8 +322,9 @@ for.j.header:
 
 for.k:
   %k = phi i32 [ 0, %for.j.header ], [ %k.inc, %for.k ]
-  %jk = add i32 %j, %k
-  %idx = getelementptr [8 x [32 x i32]], ptr %a, i32 %i, i32 %jk, i32 %i
+  %j2 = shl i32 %j, 1
+  %j2.k = add i32 %j2, %k
+  %idx = getelementptr [8 x [32 x i32]], ptr %a, i32 %i, i32 %j2.k, i32 %i
   store i32 1, ptr %idx
   %k.inc = add i32 %k, 1
   %cmp.k = icmp slt i32 %k.inc, 4
@@ -281,7 +332,7 @@ for.k:
 
 for.j.latch:
   %j.inc = add i32 %j, 1
-  %cmp.j = icmp slt i32 %j.inc, 4
+  %cmp.j = icmp slt i32 %j.inc, 2
   br i1 %cmp.j, label %for.j.header, label %for.i.latch
 
 for.i.latch: