[llvm] [Delinearization] Add function for fixed size array without relying on GEP (PR #145050)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Jun 20 08:12:10 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-analysis
Author: Ryotaro Kasuga (kasuga-fj)
<details>
<summary>Changes</summary>
The existing functions `getIndexExpressionsFromGEP` and `tryDelinearizeFixedSizeImpl` provide functionality to delinearize memory accesses for fixed size array. They use the GEP source element type in their optimization heuristics. However, driving optimization heuristics based on GEP type information is not allowed.
This patch introduces a new function `delinearizeFixedSizeArray` to remove them. This is an initial implementation that may not cover all cases, but is intended to replace the existing function in the future.
---
Patch is 22.93 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/145050.diff
3 Files Affected:
- (modified) llvm/include/llvm/Analysis/Delinearization.h (+23)
- (modified) llvm/lib/Analysis/Delinearization.cpp (+188-2)
- (added) llvm/test/Analysis/Delinearization/fixed_size_array.ll (+446)
``````````diff
diff --git a/llvm/include/llvm/Analysis/Delinearization.h b/llvm/include/llvm/Analysis/Delinearization.h
index eb775babd6061..dca423235b3c0 100644
--- a/llvm/include/llvm/Analysis/Delinearization.h
+++ b/llvm/include/llvm/Analysis/Delinearization.h
@@ -112,6 +112,29 @@ void delinearize(ScalarEvolution &SE, const SCEV *Expr,
SmallVectorImpl<const SCEV *> &Subscripts,
SmallVectorImpl<const SCEV *> &Sizes, const SCEV *ElementSize);
+/// Split this SCEVAddRecExpr into two vectors of SCEVs representing the
+/// subscripts and sizes of an access to a fixed size array. This is a special
+/// case of delinearization for fixed size arrays.
+///
+/// The delinearization is a 2 step process: the first step estimates the sizes
+/// of each dimension of the array. The second step computes the access
+/// functions for the delinearized array:
+///
+/// 1. Compute the array size
+/// 2. Compute the access function: same as normal delinearization
+///
+/// Different from the normal delinearization, this function assumes that NO
+/// terms exist in the \p Expr. In other words, it assumes that the all step
+/// values are constant.
+///
+/// This function is intended to replace getIndexExpressionsFromGEP and
+/// tryDelinearizeFixedSizeImpl. They rely on the GEP source element type so
+/// that they will be removed in the future.
+void delinearizeFixedSizeArray(ScalarEvolution &SE, const SCEV *Expr,
+ SmallVectorImpl<const SCEV *> &Subscripts,
+ SmallVectorImpl<const SCEV *> &Sizes,
+ const SCEV *ElementSize);
+
/// Gathers the individual index expressions from a GEP instruction.
///
/// This function optimistically assumes the GEP references into a fixed size
diff --git a/llvm/lib/Analysis/Delinearization.cpp b/llvm/lib/Analysis/Delinearization.cpp
index 329bd35530c72..30400b4e39cf0 100644
--- a/llvm/lib/Analysis/Delinearization.cpp
+++ b/llvm/lib/Analysis/Delinearization.cpp
@@ -24,6 +24,7 @@
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/PassManager.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
@@ -32,6 +33,11 @@ using namespace llvm;
#define DL_NAME "delinearize"
#define DEBUG_TYPE DL_NAME
+static cl::opt<bool> UseFixedSizeArrayHeuristic(
+ "delinearize-use-fixed-size-array-heuristic", cl::init(false), cl::Hidden,
+ cl::desc("When printing analysis, use the heuristic for fixed-size arrays "
+ "if the default delinearizetion fails."));
+
// Return true when S contains at least an undef value.
static inline bool containsUndefs(const SCEV *S) {
return SCEVExprContains(S, [](const SCEV *S) {
@@ -480,6 +486,175 @@ void llvm::delinearize(ScalarEvolution &SE, const SCEV *Expr,
});
}
+static std::optional<APInt> tryIntoAPInt(const SCEV *S) {
+ if (const auto *Const = dyn_cast<SCEVConstant>(S))
+ return Const->getAPInt();
+ return std::nullopt;
+}
+
+/// Collects the absolute values of constant steps for all induction variables.
+/// Returns true if we can prove that all step values are constants and \p Expr
+/// is dividable by \p ElementSize. Each step value is stored in \p Steps after
+/// divided by \p ElementSize.
+static bool collectConstantAbsSteps(ScalarEvolution &SE, const SCEV *Expr,
+ SmallVectorImpl<unsigned> &Steps,
+ unsigned ElementSize) {
+ // End of recursion. The constant value also must be a multiple of
+ // ElementSize.
+ if (const auto *Const = dyn_cast<SCEVConstant>(Expr)) {
+ const unsigned Mod = Const->getAPInt().urem(ElementSize);
+ return Mod == 0;
+ }
+
+ const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Expr);
+ if (!AR || !AR->isAffine())
+ return false;
+
+ const SCEV *Step = AR->getStepRecurrence(SE);
+ std::optional<APInt> StepAPInt = tryIntoAPInt(Step);
+ if (!StepAPInt)
+ return false;
+
+ APInt Q;
+ uint64_t R;
+ APInt::udivrem(StepAPInt->abs(), ElementSize, Q, R);
+ if (R != 0)
+ return false;
+
+ // Bail out when the step is too large.
+ std::optional<unsigned> StepVal = Q.tryZExtValue();
+ if (!StepVal)
+ return false;
+
+ Steps.push_back(*StepVal);
+ return collectConstantAbsSteps(SE, AR->getStart(), Steps, ElementSize);
+}
+
+static bool findFixedSizeArrayDimensions(ScalarEvolution &SE, const SCEV *Expr,
+ SmallVectorImpl<unsigned> &Sizes,
+ const SCEV *ElementSize) {
+ if (!ElementSize)
+ return false;
+
+ std::optional<APInt> ElementSizeAPInt = tryIntoAPInt(ElementSize);
+ if (!ElementSizeAPInt || *ElementSizeAPInt == 0)
+ return false;
+
+ std::optional<unsigned> ElementSizeConst = ElementSizeAPInt->tryZExtValue();
+
+ // Early exit when ElementSize is not a positive constant.
+ if (!ElementSizeConst)
+ return false;
+
+ if (!collectConstantAbsSteps(SE, Expr, Sizes, *ElementSizeConst) ||
+ Sizes.empty()) {
+ Sizes.clear();
+ return false;
+ }
+
+ // At this point, Sizes contains the absolute step values for all induction
+ // variables. Each step value must be a multiple of the size of the array
+ // element. Assuming that the each value represents the size of an array for
+ // each dimension, attempts to restore the length of each dimension by
+ // dividing the step value by the next smaller value. For example, if we have
+ // the following AddRec SCEV:
+ //
+ // AddRec: {{{0,+,2048}<%for.i>,+,256}<%for.j>,+,8}<%for.k> (ElementSize=8)
+ //
+ // Then Sizes will become [256, 32, 1] after sorted. We don't know the size
+ // of the outermost dimension, the next dimension will be computed as
+ // 256 / 32 = 8, and the last dimension will be computed as 32 / 1 = 32. Thus
+ // it results in like Arr[UnknownSize][8][32] with elements of size 8 bytes,
+ // where Arr is a base pointer.
+ //
+ // TODO: Catch more cases, e.g., when a step value is not dividable by the
+ // next smaller one, like A[i][3*j].
+ llvm::sort(Sizes.rbegin(), Sizes.rend());
+ Sizes.erase(llvm::unique(Sizes), Sizes.end());
+ for (unsigned I = 0; I + 1 < Sizes.size(); I++) {
+ unsigned PrevSize = Sizes[I + 1];
+ if (Sizes[I] % PrevSize) {
+ Sizes.clear();
+ return false;
+ }
+ Sizes[I] /= PrevSize;
+ }
+
+ // The last element should be ElementSize.
+ Sizes.back() = *ElementSizeConst;
+ return true;
+}
+
+/// Splits the SCEV into two vectors of SCEVs representing the subscripts and
+/// sizes of an array access, assuming that the array is a fixed size array.
+///
+/// E.g., if we have the code like as follows:
+///
+/// double A[42][8][32];
+/// for i
+/// for j
+/// for k
+/// use A[i][j][k]
+///
+/// The access function will be represented as an AddRec SCEV like:
+///
+/// AddRec: {{{0,+,2048}<%for.i>,+,256}<%for.j>,+,8}<%for.k> (ElementSize=8)
+///
+/// Then findFixedSizeArrayDimensions will compute the array size as follows:
+///
+/// CHECK: ArrayDecl[UnknownSize][8][32] with elements of 8 bytes.
+///
+/// Finally each subscript will be computed as follows:
+///
+/// CHECK: ArrayRef[{0,+,1}<%for.i>][{0,+,1}<%for.j>][{0,+,1}<%for.k>]
+///
+/// Note that this function doesn't check the range of possible values for each
+/// subscript, so the caller should perform additional boundary checks if
+/// necessary.
+///
+/// TODO: At the moment, this function can handle only simple cases. For
+/// example, we cannot handle a case where a step value is not dividable by the
+/// next smaller step value, e.g., A[i][3*j]. Furthermore, this function
+/// doesn't guarantee that the original array size is restored "correctly". For
+/// example, in the following case:
+///
+/// double A[42][4][32];
+/// double B[42][8][64];
+/// for i
+/// for j
+/// for k
+/// use A[i][j][k]
+/// use B[i][2*j][k]
+///
+/// The access function for both accesses will be the same:
+///
+/// AddRec: {{{0,+,2048}<%for.i>,+,512}<%for.j>,+,8}<%for.k> (ElementSize=8)
+///
+/// The array sizes for both A and B will be computed as
+/// ArrayDecl[UnknownSize][4][64], which matches for A, but not for B.
+void llvm::delinearizeFixedSizeArray(ScalarEvolution &SE, const SCEV *Expr,
+ SmallVectorImpl<const SCEV *> &Subscripts,
+ SmallVectorImpl<const SCEV *> &Sizes,
+ const SCEV *ElementSize) {
+
+ // First step: find the fixed array size.
+ SmallVector<unsigned, 4> ConstSizes;
+ if (!findFixedSizeArrayDimensions(SE, Expr, ConstSizes, ElementSize)) {
+ Sizes.clear();
+ return;
+ }
+
+ // Convert the constant size to SCEV.
+ for (unsigned Size : ConstSizes)
+ Sizes.push_back(SE.getConstant(Expr->getType(), Size));
+
+ // Second step: compute the access functions for each subscript.
+ computeAccessFunctions(SE, Expr, Subscripts, Sizes);
+
+ if (Subscripts.empty())
+ return;
+}
+
bool llvm::getIndexExpressionsFromGEP(ScalarEvolution &SE,
const GetElementPtrInst *GEP,
SmallVectorImpl<const SCEV *> &Subscripts,
@@ -586,9 +761,20 @@ void printDelinearization(raw_ostream &O, Function *F, LoopInfo *LI,
O << "AccessFunction: " << *AccessFn << "\n";
SmallVector<const SCEV *, 3> Subscripts, Sizes;
+
+ auto IsDelinearizationFailed = [&]() {
+ return Subscripts.size() == 0 || Sizes.size() == 0 ||
+ Subscripts.size() != Sizes.size();
+ };
+
delinearize(*SE, AccessFn, Subscripts, Sizes, SE->getElementSize(&Inst));
- if (Subscripts.size() == 0 || Sizes.size() == 0 ||
- Subscripts.size() != Sizes.size()) {
+ if (UseFixedSizeArrayHeuristic && IsDelinearizationFailed()) {
+ Subscripts.clear();
+ Sizes.clear();
+ delinearizeFixedSizeArray(*SE, AccessFn, Subscripts, Sizes, SE->getElementSize(&Inst));
+ }
+
+ if (IsDelinearizationFailed()) {
O << "failed to delinearize\n";
continue;
}
diff --git a/llvm/test/Analysis/Delinearization/fixed_size_array.ll b/llvm/test/Analysis/Delinearization/fixed_size_array.ll
new file mode 100644
index 0000000000000..98f2d3f194584
--- /dev/null
+++ b/llvm/test/Analysis/Delinearization/fixed_size_array.ll
@@ -0,0 +1,446 @@
+; RUN: opt < %s -passes='print<delinearization>' -disable-output -delinearize-use-fixed-size-array-heuristic 2>&1 | FileCheck %s
+
+; void f(int A[][8][32]) {
+; for (i = 0; i < 42; i++)
+; for (j = 0; j < 8; j++)
+; for (k = 0; k < 32; k++)
+; A[i][j][k] = 1;
+; }
+
+; CHECK: Delinearization on function a_i_j_k:
+; CHECK: Base offset: %a
+; CHECK-NEXT: ArrayDecl[UnknownSize][8][32] with elements of 4 bytes.
+; CHECK-NEXT: ArrayRef[{0,+,1}<nuw><nsw><%for.i.header>][{0,+,1}<nuw><nsw><%for.j.header>][{0,+,1}<nuw><nsw><%for.k>]
+define void @a_i_j_k(ptr %a) {
+entry:
+ br label %for.i.header
+
+for.i.header:
+ %i = phi i32 [ 0, %entry ], [ %i.inc, %for.i.latch ]
+ br label %for.j.header
+
+for.j.header:
+ %j = phi i32 [ 0, %for.i.header ], [ %j.inc, %for.j.latch ]
+ br label %for.k
+
+for.k:
+ %k = phi i32 [ 0, %for.j.header ], [ %k.inc, %for.k ]
+ %idx = getelementptr [8 x [32 x i32]], ptr %a, i32 %i, i32 %j, i32 %k
+ store i32 1, ptr %idx
+ %k.inc = add i32 %k, 1
+ %cmp.k = icmp slt i32 %k.inc, 32
+ br i1 %cmp.k, label %for.k, label %for.j.latch
+
+for.j.latch:
+ %j.inc = add i32 %j, 1
+ %cmp.j = icmp slt i32 %j.inc, 8
+ br i1 %cmp.j, label %for.j.header, label %for.i.latch
+
+for.i.latch:
+ %i.inc = add i32 %i, 1
+ %cmp.i = icmp slt i32 %i.inc, 42
+ br i1 %cmp.i, label %for.i.header, label %exit
+
+exit:
+ ret void
+}
+
+; void f(int A[][8][32]) {
+; for (i = 0; i < 42; i++)
+; for (j = 0; j < 8; j++)
+; for (k = 0; k < 32; k++)
+; A[i][7-j][k] = 1;
+; }
+
+; CHECK: Delinearization on function a_i_nj_k:
+; CHECK: Base offset: %a
+; CHECK-NEXT: ArrayDecl[UnknownSize][8][32] with elements of 4 bytes.
+; CHECK-NEXT: ArrayRef[{0,+,1}<nuw><nsw><%for.i.header>][{7,+,-1}<nsw><%for.j.header>][{0,+,1}<nuw><nsw><%for.k>]
+define void @a_i_nj_k(ptr %a) {
+entry:
+ br label %for.i.header
+
+for.i.header:
+ %i = phi i32 [ 0, %entry ], [ %i.inc, %for.i.latch ]
+ br label %for.j.header
+
+for.j.header:
+ %j = phi i32 [ 0, %for.i.header ], [ %j.inc, %for.j.latch ]
+ %j.subscript = sub i32 7, %j
+ br label %for.k
+
+for.k:
+ %k = phi i32 [ 0, %for.j.header ], [ %k.inc, %for.k ]
+ %idx = getelementptr [8 x [32 x i32]], ptr %a, i32 %i, i32 %j.subscript, i32 %k
+ store i32 1, ptr %idx
+ %k.inc = add i32 %k, 1
+ %cmp.k = icmp slt i32 %k.inc, 32
+ br i1 %cmp.k, label %for.k, label %for.j.latch
+
+for.j.latch:
+ %j.inc = add i32 %j, 1
+ %cmp.j = icmp slt i32 %j.inc, 8
+ br i1 %cmp.j, label %for.j.header, label %for.i.latch
+
+for.i.latch:
+ %i.inc = add i32 %i, 1
+ %cmp.i = icmp slt i32 %i.inc, 42
+ br i1 %cmp.i, label %for.i.header, label %exit
+
+exit:
+ ret void
+}
+
+; In the following code, the access functions for both stores are represented
+; in the same way in SCEV, so the delinearization results are also the same. We
+; don't have any type information of the underlying objects.
+;
+; void f(int A[][4][64], int B[][8][32]) {
+; for (i = 0; i < 42; i++)
+; for (j = 0; j < 4; j++)
+; for (k = 0; k < 32; k++) {
+; A[i][j][k] = 1;
+; B[i][2*j][k] = 1;
+; }
+; }
+
+; CHECK: Delinearization on function a_ijk_b_i2jk:
+; CHECK: Base offset: %a
+; CHECK-NEXT: ArrayDecl[UnknownSize][4][64] with elements of 4 bytes.
+; CHECK-NEXT: ArrayRef[{0,+,1}<nuw><nsw><%for.i.header>][{0,+,1}<nuw><nsw><%for.j.header>][{0,+,1}<nuw><nsw><%for.k>]
+; CHECK: Base offset: %b
+; CHECK-NEXT: ArrayDecl[UnknownSize][4][64] with elements of 4 bytes.
+; CHECK-NEXT: ArrayRef[{0,+,1}<nuw><nsw><%for.i.header>][{0,+,1}<nuw><nsw><%for.j.header>][{0,+,1}<nuw><nsw><%for.k>]
+define void @a_ijk_b_i2jk(ptr %a, ptr %b) {
+entry:
+ br label %for.i.header
+
+for.i.header:
+ %i = phi i32 [ 0, %entry ], [ %i.inc, %for.i.latch ]
+ br label %for.j.header
+
+for.j.header:
+ %j = phi i32 [ 0, %for.i.header ], [ %j.inc, %for.j.latch ]
+ %j2 = shl i32 %j, 1
+ br label %for.k
+
+for.k:
+ %k = phi i32 [ 0, %for.j.header ], [ %k.inc, %for.k ]
+ %a.idx = getelementptr [4 x [64 x i32]], ptr %a, i32 %i, i32 %j, i32 %k
+ %b.idx = getelementptr [8 x [32 x i32]], ptr %b, i32 %i, i32 %j2, i32 %k
+ store i32 1, ptr %a.idx
+ store i32 1, ptr %b.idx
+ %k.inc = add i32 %k, 1
+ %cmp.k = icmp slt i32 %k.inc, 32
+ br i1 %cmp.k, label %for.k, label %for.j.latch
+
+for.j.latch:
+ %j.inc = add i32 %j, 1
+ %cmp.j = icmp slt i32 %j.inc, 4
+ br i1 %cmp.j, label %for.j.header, label %for.i.latch
+
+for.i.latch:
+ %i.inc = add i32 %i, 1
+ %cmp.i = icmp slt i32 %i.inc, 42
+ br i1 %cmp.i, label %for.i.header, label %exit
+
+exit:
+ ret void
+}
+
+; The type information of the underlying object is not available, so the
+; delinearization result is different from it.
+;
+; void f(int A[][8][32]) {
+; for (i = 0; i < 42; i++)
+; for (j = 0; j < 3; j++)
+; for (k = 0; k < 32; k++)
+; A[i][2*j+1][k] = 1;
+; }
+
+; CHECK: Delinearization on function a_i_2j1_k:
+; CHECK: Base offset: %a
+; CHECK-NEXT: ArrayDecl[UnknownSize][4][64] with elements of 4 bytes.
+; CHECK-NEXT: ArrayRef[{0,+,1}<nuw><nsw><%for.i.header>][{0,+,1}<nuw><%for.j.header>][{32,+,1}<nw><%for.k>]
+define void @a_i_2j1_k(ptr %a) {
+entry:
+ br label %for.i.header
+
+for.i.header:
+ %i = phi i32 [ 0, %entry ], [ %i.inc, %for.i.latch ]
+ br label %for.j.header
+
+for.j.header:
+ %j = phi i32 [ 0, %for.i.header ], [ %j.inc, %for.j.latch ]
+ %j2 = shl i32 %j, 1
+ %j.subscript = add i32 %j2, 1
+ br label %for.k
+
+for.k:
+ %k = phi i32 [ 0, %for.j.header ], [ %k.inc, %for.k ]
+ %idx = getelementptr [8 x [32 x i32]], ptr %a, i32 %i, i32 %j.subscript, i32 %k
+ store i32 1, ptr %idx
+ %k.inc = add i32 %k, 1
+ %cmp.k = icmp slt i32 %k.inc, 32
+ br i1 %cmp.k, label %for.k, label %for.j.latch
+
+for.j.latch:
+ %j.inc = add i32 %j, 1
+ %cmp.j = icmp slt i32 %j.inc, 3
+ br i1 %cmp.j, label %for.j.header, label %for.i.latch
+
+for.i.latch:
+ %i.inc = add i32 %i, 1
+ %cmp.i = icmp slt i32 %i.inc, 42
+ br i1 %cmp.i, label %for.i.header, label %exit
+
+exit:
+ ret void
+}
+
+; Fail to delinearize because the step value of the j-loop is not dividable by
+; that of the k-loop.
+;
+; void f(int A[][8][32]) {
+; for (i = 0; i < 42; i++)
+; for (j = 0; j < 8; j++)
+; for (k = 0; k < 10; k++)
+; A[i][j][3*k] = 1;
+; }
+
+; CHECK: Delinearization on function a_i_j_3k:
+; CHECK: AccessFunction: {{...}}0,+,1024}<nuw><nsw><%for.i.header>,+,128}<nw><%for.j.header>,+,12}<nw><%for.k>
+; CHECK-NEXT: failed to delinearize
+define void @a_i_j_3k(ptr %a) {
+entry:
+ br label %for.i.header
+
+for.i.header:
+ %i = phi i32 [ 0, %entry ], [ %i.inc, %for.i.latch ]
+ br label %for.j.header
+
+for.j.header:
+ %j = phi i32 [ 0, %for.i.header ], [ %j.inc, %for.j.latch ]
+ br label %for.k
+
+for.k:
+ %k = phi i32 [ 0, %for.j.header ], [ %k.inc, %for.k ]
+ %k.subscript = mul i32 %k, 3
+ %idx = getelementptr [8 x [32 x i32]], ptr %a, i32 %i, i32 %j, i32 %k.subscript
+ store i32 1, ptr %idx
+ %k.inc = add i32 %k, 1
+ %cmp.k = icmp slt i32 %k.inc, 10
+ br i1 %cmp.k, label %for.k, label %for.j.latch
+
+for.j.latch:
+ %j.inc = add i32 %j, 1
+ %cmp.j = icmp slt i32 %j.inc, 8
+ br i1 %cmp.j, label %for.j.header, label %for.i.latch
+
+for.i.latch:
+ %i.inc = add i32 %i, 1
+ %cmp.i = icmp slt i32 %i.inc, 42
+ br i1 %cmp.i, label %for.i.header, label %exit
+
+exit:
+ ret void
+}
+
+; Fail to delinearize because i is used in multiple subscripts that are not adjacent.
+;
+; void f(int A[][8][32]) {
+; for (i = 0; i < 32; i++)
+; for (j = 0; j < 4; j++)
+; for (k = 0; k < 4; k++)
+; A[i][j+k][i] = 1;
+; }
+
+; CHECK: Delinearization on function a_i_jk_i:
+; CHECK: AccessFunction: {{...}}0,+,1028}<%for.i.header>,+,128}<nw><%for.j.header>,+,128}<nw><%for.k>
+; CHECK-NEXT: failed to delinearize
+define void @a_i_jk_i(ptr %a) {
+entry:
+ br label %for.i.header
+
+for.i.header:
+ %i = phi i32 [ 0, %entry ], [ %i.inc, %for.i.latch ]
+ br label %for.j.header
+
+for.j.header:
+ %j = phi i32 [ 0, %for.i.header ], [ %j.inc, %for.j.latch ]
+ br label %for.k
+
+for.k:
+ %k = phi i32 [ 0, %for.j.header ], [ %k.inc, %for.k ]
+ %jk = add i32 %j, %k
+ %idx = getelementptr [8 x [32 x i32]], ptr %a, i32 %i, i32 %jk, i32 %i
+ store i32 1, ptr %idx
+ %k.inc = add i32 %k, 1
+ %cmp.k = icmp slt i32 %k.inc, 4
+ br i1 %cmp.k, label %for.k, label %for.j.latch
+
+for.j.latch:
+ %j.inc = add i32 %j, 1
+ %cmp.j = icmp slt i32 %j.inc, 4
+ br i1 %cmp.j, label %for.j.header, label %for.i.latch
+
+for.i.latch:
+ %i.inc = add i32 %i, 1
+ %cmp.i = icmp slt i32 %i.inc, 32
+ br i1 %cmp.i, label %for.i.header, label %exit
+
+exit:
+ ret void
+}
+
+; Can delinearize, but the result is different from the original array size. In
+; this case, the outermost two dimensions are melded into one.
+;
+; void f(int A[][8][32]) {
+; for (i = 0; i < 8; i++)
+; for (j = 0; j < 10; j++)
+; for (k = 0; k < 10; k++)
+; A[i][i][j+k] = 1;
+; }
+
+; CHECK: Delinearization on function a_i_i_jk:
+; CHECK: Base offset: %a
+; CHECK-NEXT: ArrayDecl[UnknownSize][288] with elements of 4 bytes.
+; CHECK-NEXT: ArrayRef[{0,+,1}<nuw><nsw><%for.i.header>][{{..}}0,+,1}<nuw><nsw><%for.j.header>,+,1}<nuw><nsw><%for.k>]
+define void @a_i_i_jk(ptr %a) {
+entry:
+ br label %for.i.header
+
+for.i.header:
+ %i = phi i32 [ 0, %entry ], [ %i.inc, %for.i.latch ]
+ br label %for.j.header
+
+for.j.header:
+ %j = phi i32 [ 0, %for.i.header ], [ %j.inc, %for.j.latch ]
+ br label %for.k
+
+for.k:
+ %k = phi i32 [ 0, %for.j.header ], [ %k.inc, %for.k ]
+ %jk = add i32 %j, %k
+ %idx = getelementptr [8 x [32 x i32]], ptr %a, i32 %i, i32 %i, i32 %jk
+ store i32 1, ptr %idx
+ %k.inc = add i32 %k, 1
+ %cmp.k = icmp slt i32 %k.inc, 10
+ br i1 %cmp.k, label %for.k, label %for.j.latch
+
+for.j.latch:
+ %j.inc = add i32 %j, 1
+ %cmp.j = icmp slt i32 %j.inc, 10
+ br i1 %cmp.j, label %for.j.header, label %for.i.latch
+
+for.i.latch:
+ %i.inc = add i32 %i, 1
+ %cmp.i = icmp slt...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/145050
More information about the llvm-commits
mailing list