[llvm] [Delinearization] Add validation for large size arrays (PR #169902)
Ryotaro Kasuga via llvm-commits
llvm-commits at lists.llvm.org
Tue Dec 2 18:34:44 PST 2025
https://github.com/kasuga-fj updated https://github.com/llvm/llvm-project/pull/169902
>From 67266547a68dae20a30f7158fa1904db6fd781c2 Mon Sep 17 00:00:00 2001
From: Ryotaro Kasuga <kasuga.ryotaro at fujitsu.com>
Date: Fri, 28 Nov 2025 11:12:41 +0000
Subject: [PATCH 1/4] [Delinearization] Add validation for large size arrays
---
llvm/lib/Analysis/Delinearization.cpp | 51 +++++++++++++++++++
.../constant_functions_multi_dim.ll | 2 +-
.../Delinearization/multidim_only_ivs_2d.ll | 4 +-
.../Delinearization/multidim_only_ivs_3d.ll | 2 +-
..._two_accesses_different_delinearization.ll | 4 +-
.../Delinearization/validation_large_size.ll | 13 +++--
.../Analysis/DependenceAnalysis/DADelin.ll | 32 ++++++------
.../DependenceAnalysis/DifferentOffsets.ll | 2 +-
.../Analysis/DependenceAnalysis/StrongSIV.ll | 10 ++--
llvm/test/Transforms/LICM/lnicm.ll | 3 ++
10 files changed, 90 insertions(+), 33 deletions(-)
diff --git a/llvm/lib/Analysis/Delinearization.cpp b/llvm/lib/Analysis/Delinearization.cpp
index 0c3b02ae09f47..d847de0edca12 100644
--- a/llvm/lib/Analysis/Delinearization.cpp
+++ b/llvm/lib/Analysis/Delinearization.cpp
@@ -747,6 +747,20 @@ bool llvm::validateDelinearizationResult(ScalarEvolution &SE,
ArrayRef<const SCEV *> Sizes,
ArrayRef<const SCEV *> Subscripts,
const Value *Ptr) {
+ // Sizes and Subscripts are as follows:
+ //
+ // Sizes: [UNK][S_2]...[S_n]
+ // Subscripts: [I_1][I_2]...[I_n]
+ //
+ // where the size of the outermost dimension is unknown (UNK).
+
+ auto MulOverflow = [&](const SCEV *A, const SCEV *B) -> const SCEV * {
+ if (!SE.willNotOverflow(Instruction::Mul, /*IsSigned=*/true, A, B))
+ return nullptr;
+ return SE.getMulExpr(A, B);
+ };
+
+ // Range check: 0 <= I_k < S_k for k = 2..n.
for (size_t I = 1; I < Sizes.size(); ++I) {
const SCEV *Size = Sizes[I - 1];
const SCEV *Subscript = Subscripts[I];
@@ -755,6 +769,43 @@ bool llvm::validateDelinearizationResult(ScalarEvolution &SE,
if (!isKnownLessThan(&SE, Subscript, Size))
return false;
}
+
+ // The offset computation is as follows:
+ //
+ // Offset = I_n +
+ // S_n * I_{n-1} +
+ // ... +
+ // (S_2 * ... * S_n) * I_1
+ //
+ // Regarding this as a function from (I_1, I_2, ..., I_n) to integers, it
+ // must be injective. To guarantee it, the above calculation must not
+ // overflow. Since we have already checked that 0 <= I_k < S_k for k = 2..n,
+ // the minimum and maximum values occur in the following cases:
+ //
+ // Min = [I_1][0]...[0] = S_2 * ... * S_n * I_1
+ // Max = [I_1][S_2-1]...[S_n-1]
+ // = (S_2 * ... * S_n) * I_1 +
+ // (S_2 * ... * S_{n-1}) * (S_2 - 1) +
+ // ... +
+ // (S_n - 1)
+ // = (S_2 * ... * S_n) * I_1 +
+ // (S_2 * ... * S_n) - 1 (can be proved by induction)
+ //
+ const SCEV *Prod = SE.getOne(Sizes[0]->getType());
+ for (const SCEV *Size : drop_end(Sizes)) {
+ Prod = MulOverflow(Prod, Size);
+ if (!Prod)
+ return false;
+ }
+ const SCEV *Min = MulOverflow(Prod, Subscripts[0]);
+ if (!Min)
+ return false;
+
+ // Over-approximate Max as Prod * I_1 + Prod (ignoring the -1).
+ if (!SE.willNotOverflow(Instruction::Add, /*IsSigned=*/true, Min,
+ Subscripts[0]))
+ return false;
+
return true;
}
diff --git a/llvm/test/Analysis/Delinearization/constant_functions_multi_dim.ll b/llvm/test/Analysis/Delinearization/constant_functions_multi_dim.ll
index 9e6a4221f8eda..7e5c5142dccbc 100644
--- a/llvm/test/Analysis/Delinearization/constant_functions_multi_dim.ll
+++ b/llvm/test/Analysis/Delinearization/constant_functions_multi_dim.ll
@@ -11,7 +11,7 @@ define void @mat_mul(ptr %C, ptr %A, ptr %B, i64 %N) !kernel_arg_addr_space !2 !
; CHECK-NEXT: Base offset: %A
; CHECK-NEXT: ArrayDecl[UnknownSize][%N] with elements of 4 bytes.
; CHECK-NEXT: ArrayRef[%call][{0,+,1}<nuw><nsw><%for.inc>]
-; CHECK-NEXT: Delinearization validation: Succeeded
+; CHECK-NEXT: Delinearization validation: Failed
; CHECK-EMPTY:
; CHECK-NEXT: Inst: %tmp5 = load float, ptr %arrayidx4, align 4
; CHECK-NEXT: AccessFunction: {(4 * %call1),+,(4 * %N)}<%for.inc>
diff --git a/llvm/test/Analysis/Delinearization/multidim_only_ivs_2d.ll b/llvm/test/Analysis/Delinearization/multidim_only_ivs_2d.ll
index e1ad1c55313a4..e5d2806101926 100644
--- a/llvm/test/Analysis/Delinearization/multidim_only_ivs_2d.ll
+++ b/llvm/test/Analysis/Delinearization/multidim_only_ivs_2d.ll
@@ -16,14 +16,14 @@ define void @foo(i64 %n, i64 %m, ptr %A) {
; CHECK-NEXT: Base offset: %A
; CHECK-NEXT: ArrayDecl[UnknownSize][%m] with elements of 8 bytes.
; CHECK-NEXT: ArrayRef[{0,+,1}<nuw><nsw><%for.i>][{0,+,1}<nuw><nsw><%for.j>]
-; CHECK-NEXT: Delinearization validation: Succeeded
+; CHECK-NEXT: Delinearization validation: Failed
; CHECK-EMPTY:
; CHECK-NEXT: Inst: store double %val, ptr %arrayidx, align 8
; CHECK-NEXT: AccessFunction: {{\{\{}}0,+,(8 * %m)}<%for.i>,+,8}<%for.j>
; CHECK-NEXT: Base offset: %A
; CHECK-NEXT: ArrayDecl[UnknownSize][%m] with elements of 8 bytes.
; CHECK-NEXT: ArrayRef[{0,+,1}<nuw><nsw><%for.i>][{0,+,1}<nuw><nsw><%for.j>]
-; CHECK-NEXT: Delinearization validation: Succeeded
+; CHECK-NEXT: Delinearization validation: Failed
;
entry:
br label %for.i
diff --git a/llvm/test/Analysis/Delinearization/multidim_only_ivs_3d.ll b/llvm/test/Analysis/Delinearization/multidim_only_ivs_3d.ll
index d5213e5afb33c..f5f0628ede937 100644
--- a/llvm/test/Analysis/Delinearization/multidim_only_ivs_3d.ll
+++ b/llvm/test/Analysis/Delinearization/multidim_only_ivs_3d.ll
@@ -16,7 +16,7 @@ define void @foo(i64 %n, i64 %m, i64 %o, ptr %A) {
; CHECK-NEXT: Base offset: %A
; CHECK-NEXT: ArrayDecl[UnknownSize][%m][%o] with elements of 8 bytes.
; CHECK-NEXT: ArrayRef[{0,+,1}<nuw><nsw><%for.i>][{0,+,1}<nuw><nsw><%for.j>][{0,+,1}<nuw><nsw><%for.k>]
-; CHECK-NEXT: Delinearization validation: Succeeded
+; CHECK-NEXT: Delinearization validation: Failed
;
entry:
br label %for.i
diff --git a/llvm/test/Analysis/Delinearization/multidim_two_accesses_different_delinearization.ll b/llvm/test/Analysis/Delinearization/multidim_two_accesses_different_delinearization.ll
index 011dc40697cb5..f768002dd9e41 100644
--- a/llvm/test/Analysis/Delinearization/multidim_two_accesses_different_delinearization.ll
+++ b/llvm/test/Analysis/Delinearization/multidim_two_accesses_different_delinearization.ll
@@ -19,14 +19,14 @@ define void @foo(i64 %n, i64 %m, ptr %A) {
; CHECK-NEXT: Base offset: %A
; CHECK-NEXT: ArrayDecl[UnknownSize][%m] with elements of 8 bytes.
; CHECK-NEXT: ArrayRef[{0,+,1}<nuw><nsw><%for.i>][{0,+,1}<nuw><nsw><%for.j>]
-; CHECK-NEXT: Delinearization validation: Succeeded
+; CHECK-NEXT: Delinearization validation: Failed
; CHECK-EMPTY:
; CHECK-NEXT: Inst: store double 1.000000e+00, ptr %arrayidx1, align 8
; CHECK-NEXT: AccessFunction: {{\{\{}}0,+,8}<%for.i>,+,(8 * %n)}<%for.j>
; CHECK-NEXT: Base offset: %A
; CHECK-NEXT: ArrayDecl[UnknownSize][%n] with elements of 8 bytes.
; CHECK-NEXT: ArrayRef[{0,+,1}<nuw><nsw><%for.j>][{0,+,1}<nuw><nsw><%for.i>]
-; CHECK-NEXT: Delinearization validation: Succeeded
+; CHECK-NEXT: Delinearization validation: Failed
;
entry:
br label %for.i
diff --git a/llvm/test/Analysis/Delinearization/validation_large_size.ll b/llvm/test/Analysis/Delinearization/validation_large_size.ll
index a475f449b9e39..03a1d89cb7c23 100644
--- a/llvm/test/Analysis/Delinearization/validation_large_size.ll
+++ b/llvm/test/Analysis/Delinearization/validation_large_size.ll
@@ -1,16 +1,15 @@
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 6
; RUN: opt < %s -passes='print<delinearization>' --delinearize-use-fixed-size-array-heuristic -disable-output 2>&1 | FileCheck %s
-; FIXME: As for array accesses, the following property should hold (without
+; As for array accesses, the following property should hold (without
; out-of-bound accesses):
;
; &A[I_1][I_2]...[I_n] == &A[J_1][J_2]...[J_n] iff
; (I_1, I_2, ..., I_n) == (J_1, J_2, ..., J_n)
;
-; Currently, delinearization doesn't guarantee this property, especially when
-; the inferred array size is very large so that the product of dimensions may
-; overflow. The delinearization validation should consider such cases as
-; invalid.
+; This property may not hold if the inferred array size is very large and the
+; offset calculation can overflow. The delinearization validation should
+; consider such cases as invalid.
; for (i = 0; i < (1ULL << 60); i++)
; for (j = 0; j < 256; j++)
@@ -27,7 +26,7 @@ define void @large_size_fixed(ptr %A) {
; CHECK-NEXT: Base offset: %A
; CHECK-NEXT: ArrayDecl[UnknownSize][256] with elements of 1 bytes.
; CHECK-NEXT: ArrayRef[{0,+,1}<nuw><nsw><%for.i.header>][{0,+,1}<nuw><nsw><%for.j>]
-; CHECK-NEXT: Delinearization validation: Succeeded
+; CHECK-NEXT: Delinearization validation: Failed
;
entry:
br label %for.i.header
@@ -79,7 +78,7 @@ define void @large_size_parametric(i64 %n, i64 %m, i64 %o, ptr %A) {
; CHECK-NEXT: Base offset: %A
; CHECK-NEXT: ArrayDecl[UnknownSize][%m][%o] with elements of 1 bytes.
; CHECK-NEXT: ArrayRef[{0,+,1}<nuw><nsw><%for.i.header>][{0,+,1}<nuw><nsw><%for.j.header>][{0,+,1}<nuw><nsw><%for.k.header>]
-; CHECK-NEXT: Delinearization validation: Succeeded
+; CHECK-NEXT: Delinearization validation: Failed
;
entry:
%guard.i = icmp sgt i64 %n, 0
diff --git a/llvm/test/Analysis/DependenceAnalysis/DADelin.ll b/llvm/test/Analysis/DependenceAnalysis/DADelin.ll
index 8f94a455d3724..130b9930cfdf5 100644
--- a/llvm/test/Analysis/DependenceAnalysis/DADelin.ll
+++ b/llvm/test/Analysis/DependenceAnalysis/DADelin.ll
@@ -13,11 +13,11 @@ target triple = "thumbv8m.main-arm-none-eabi"
define void @t1(i32 %n, i32 %m, i32 %o, ptr nocapture %A) {
; CHECK-LABEL: 't1'
; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx, align 4 --> Dst: %0 = load i32, ptr %arrayidx, align 4
-; CHECK-NEXT: da analyze - none!
+; CHECK-NEXT: da analyze - input [* * *]!
; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx, align 4 --> Dst: store i32 %add12, ptr %arrayidx, align 4
-; CHECK-NEXT: da analyze - consistent anti [0 0 0|<]!
+; CHECK-NEXT: da analyze - anti [* * *|<]!
; CHECK-NEXT: Src: store i32 %add12, ptr %arrayidx, align 4 --> Dst: store i32 %add12, ptr %arrayidx, align 4
-; CHECK-NEXT: da analyze - none!
+; CHECK-NEXT: da analyze - output [* * *]!
;
entry:
%cmp49 = icmp sgt i32 %n, 0
@@ -78,7 +78,7 @@ for.cond.cleanup: ; preds = %for.cond.cleanup3,
define void @t2(i32 %n, i32 %m, i32 %o, ptr nocapture %A) {
; CHECK-LABEL: 't2'
; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx, align 4 --> Dst: %0 = load i32, ptr %arrayidx, align 4
-; CHECK-NEXT: da analyze - none!
+; CHECK-NEXT: da analyze - input [* * *]!
; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx, align 4 --> Dst: store i32 %add12, ptr %arrayidx2, align 4
; CHECK-NEXT: da analyze - anti [* * *|<]!
; CHECK-NEXT: Src: store i32 %add12, ptr %arrayidx2, align 4 --> Dst: store i32 %add12, ptr %arrayidx2, align 4
@@ -145,7 +145,7 @@ for.cond.cleanup: ; preds = %for.cond.cleanup3,
define void @t3(i32 %n, i32 %m, i32 %o, ptr nocapture %A) {
; CHECK-LABEL: 't3'
; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx, align 4 --> Dst: %0 = load i32, ptr %arrayidx, align 4
-; CHECK-NEXT: da analyze - none!
+; CHECK-NEXT: da analyze - input [* * *]!
; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx, align 4 --> Dst: store i32 %add12, ptr %arrayidx2, align 4
; CHECK-NEXT: da analyze - anti [* * *|<]!
; CHECK-NEXT: Src: store i32 %add12, ptr %arrayidx2, align 4 --> Dst: store i32 %add12, ptr %arrayidx2, align 4
@@ -212,7 +212,7 @@ for.cond.cleanup: ; preds = %for.cond.cleanup3,
define void @t4(i32 %n, i32 %m, i32 %o, ptr nocapture %A) {
; CHECK-LABEL: 't4'
; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx, align 4 --> Dst: %0 = load i32, ptr %arrayidx, align 4
-; CHECK-NEXT: da analyze - none!
+; CHECK-NEXT: da analyze - input [* * *]!
; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx, align 4 --> Dst: store i32 %add12, ptr %arrayidx2, align 4
; CHECK-NEXT: da analyze - anti [* * *|<]!
; CHECK-NEXT: Src: store i32 %add12, ptr %arrayidx2, align 4 --> Dst: store i32 %add12, ptr %arrayidx2, align 4
@@ -279,7 +279,7 @@ for.cond.cleanup: ; preds = %for.cond.cleanup3,
define void @t5(i32 %n, i32 %m, i32 %o, ptr nocapture %A) {
; CHECK-LABEL: 't5'
; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx, align 4 --> Dst: %0 = load i32, ptr %arrayidx, align 4
-; CHECK-NEXT: da analyze - none!
+; CHECK-NEXT: da analyze - input [* * *]!
; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx, align 4 --> Dst: store i32 %add12, ptr %arrayidx2, align 4
; CHECK-NEXT: da analyze - anti [* * *|<]!
; CHECK-NEXT: Src: store i32 %add12, ptr %arrayidx2, align 4 --> Dst: store i32 %add12, ptr %arrayidx2, align 4
@@ -346,11 +346,11 @@ for.cond.cleanup: ; preds = %for.cond.cleanup3,
define void @t6(i32 %n, i32 %m, i32 %o, ptr nocapture %A) {
; CHECK-LABEL: 't6'
; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx, align 4 --> Dst: %0 = load i32, ptr %arrayidx, align 4
-; CHECK-NEXT: da analyze - none!
+; CHECK-NEXT: da analyze - input [* * *]!
; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx, align 4 --> Dst: store i32 %add12, ptr %arrayidx2, align 4
-; CHECK-NEXT: da analyze - consistent anti [-1 0 0]!
+; CHECK-NEXT: da analyze - anti [* * *|<]!
; CHECK-NEXT: Src: store i32 %add12, ptr %arrayidx2, align 4 --> Dst: store i32 %add12, ptr %arrayidx2, align 4
-; CHECK-NEXT: da analyze - none!
+; CHECK-NEXT: da analyze - output [* * *]!
;
entry:
%cmp49 = icmp sgt i32 %n, 0
@@ -414,11 +414,11 @@ for.cond.cleanup: ; preds = %for.cond.cleanup3,
define void @t7(i32 %n, i32 %m, i32 %o, ptr nocapture %A) {
; CHECK-LABEL: 't7'
; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx, align 4 --> Dst: %0 = load i32, ptr %arrayidx, align 4
-; CHECK-NEXT: da analyze - none!
+; CHECK-NEXT: da analyze - input [* * *]!
; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx, align 4 --> Dst: store i32 %add12, ptr %arrayidx2, align 4
-; CHECK-NEXT: da analyze - consistent anti [1 0 0]!
+; CHECK-NEXT: da analyze - anti [* * *|<]!
; CHECK-NEXT: Src: store i32 %add12, ptr %arrayidx2, align 4 --> Dst: store i32 %add12, ptr %arrayidx2, align 4
-; CHECK-NEXT: da analyze - none!
+; CHECK-NEXT: da analyze - output [* * *]!
;
entry:
%cmp49 = icmp sgt i32 %n, 0
@@ -482,11 +482,11 @@ for.cond.cleanup: ; preds = %for.cond.cleanup3,
define void @t8(i32 %n, i32 %m, i32 %o, ptr nocapture %A) {
; CHECK-LABEL: 't8'
; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx, align 4 --> Dst: %0 = load i32, ptr %arrayidx, align 4
-; CHECK-NEXT: da analyze - none!
+; CHECK-NEXT: da analyze - input [* * *]!
; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx, align 4 --> Dst: store i32 %add12, ptr %arrayidx2, align 4
-; CHECK-NEXT: da analyze - consistent anti [0 0 1]!
+; CHECK-NEXT: da analyze - anti [* * *|<]!
; CHECK-NEXT: Src: store i32 %add12, ptr %arrayidx2, align 4 --> Dst: store i32 %add12, ptr %arrayidx2, align 4
-; CHECK-NEXT: da analyze - none!
+; CHECK-NEXT: da analyze - output [* * *]!
;
entry:
%cmp49 = icmp sgt i32 %n, 0
diff --git a/llvm/test/Analysis/DependenceAnalysis/DifferentOffsets.ll b/llvm/test/Analysis/DependenceAnalysis/DifferentOffsets.ll
index 91d127cfc09d6..fd634c8b97e24 100644
--- a/llvm/test/Analysis/DependenceAnalysis/DifferentOffsets.ll
+++ b/llvm/test/Analysis/DependenceAnalysis/DifferentOffsets.ll
@@ -93,7 +93,7 @@ define void @linearized_accesses(i64 %n, i64 %m, i64 %o, ptr %A) {
; CHECK-NEXT: Src: store i32 1, ptr %idx0, align 4 --> Dst: store i32 1, ptr %idx1, align 4
; CHECK-NEXT: da analyze - output [* * *|<]!
; CHECK-NEXT: Src: store i32 1, ptr %idx1, align 4 --> Dst: store i32 1, ptr %idx1, align 4
-; CHECK-NEXT: da analyze - none!
+; CHECK-NEXT: da analyze - output [* * *]!
;
entry:
br label %for.i
diff --git a/llvm/test/Analysis/DependenceAnalysis/StrongSIV.ll b/llvm/test/Analysis/DependenceAnalysis/StrongSIV.ll
index 19cef4537a769..16e0e7bccaaf5 100644
--- a/llvm/test/Analysis/DependenceAnalysis/StrongSIV.ll
+++ b/llvm/test/Analysis/DependenceAnalysis/StrongSIV.ll
@@ -536,9 +536,13 @@ for.end: ; preds = %for.body
;; A[i] = 0;
define void @strong11(ptr %A) nounwind uwtable ssp {
-; CHECK-LABEL: 'strong11'
-; CHECK-NEXT: Src: store i32 0, ptr %arrayidx, align 4 --> Dst: store i32 0, ptr %arrayidx, align 4
-; CHECK-NEXT: da analyze - consistent output [0 S]!
+; CHECK-ALL-LABEL: 'strong11'
+; CHECK-ALL-NEXT: Src: store i32 0, ptr %arrayidx, align 4 --> Dst: store i32 0, ptr %arrayidx, align 4
+; CHECK-ALL-NEXT: da analyze - none!
+;
+; CHECK-STRONG-SIV-LABEL: 'strong11'
+; CHECK-STRONG-SIV-NEXT: Src: store i32 0, ptr %arrayidx, align 4 --> Dst: store i32 0, ptr %arrayidx, align 4
+; CHECK-STRONG-SIV-NEXT: da analyze - consistent output [0 S]!
;
entry:
br label %for.cond1.preheader
diff --git a/llvm/test/Transforms/LICM/lnicm.ll b/llvm/test/Transforms/LICM/lnicm.ll
index 814f964666305..e331ab7d39e83 100644
--- a/llvm/test/Transforms/LICM/lnicm.ll
+++ b/llvm/test/Transforms/LICM/lnicm.ll
@@ -3,6 +3,9 @@
; RUN: opt -aa-pipeline=basic-aa -passes='loop-mssa(lnicm),loop(loop-interchange)' -cache-line-size=64 -S %s | FileCheck %s --check-prefixes LNICM
; RUN: opt -aa-pipeline=basic-aa -passes='loop-mssa(licm),loop(loop-interchange)' -cache-line-size=64 -S %s | FileCheck %s --check-prefixes LICM
+; XFAIL: *
+; Loop interchange currently fails due to a failure in dependence analysis.
+
; This test represents the following function:
; void test(int n, int m, int x[m][n], int y[n], int *z) {
; for (int k = 0; k < n; k++) {
>From 80de5af45ea78741ec255a3b1730bfb0d6b54364 Mon Sep 17 00:00:00 2001
From: Ryotaro Kasuga <kasuga.ryotaro at fujitsu.com>
Date: Sat, 29 Nov 2025 01:49:55 +0900
Subject: [PATCH 2/4] fix spell
---
llvm/lib/Analysis/Delinearization.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/lib/Analysis/Delinearization.cpp b/llvm/lib/Analysis/Delinearization.cpp
index d847de0edca12..8563eee06c994 100644
--- a/llvm/lib/Analysis/Delinearization.cpp
+++ b/llvm/lib/Analysis/Delinearization.cpp
@@ -789,7 +789,7 @@ bool llvm::validateDelinearizationResult(ScalarEvolution &SE,
// ... +
// (S_n - 1)
// = (S_2 * ... * S_n) * I_1 +
- // (S_2 * ... * S_n) - 1 (can be proved by induction)
+ // (S_2 * ... * S_n) - 1 (can be proven by induction)
//
const SCEV *Prod = SE.getOne(Sizes[0]->getType());
for (const SCEV *Size : drop_end(Sizes)) {
>From 9b033351606d498ce67827a02f093658dabc1339 Mon Sep 17 00:00:00 2001
From: Ryotaro Kasuga <kasuga.ryotaro at fujitsu.com>
Date: Sat, 29 Nov 2025 02:27:07 +0900
Subject: [PATCH 3/4] also consider element size
---
llvm/lib/Analysis/Delinearization.cpp | 2 +-
.../Delinearization/validation_large_size.ll | 41 +++++++++++++++++++
2 files changed, 42 insertions(+), 1 deletion(-)
diff --git a/llvm/lib/Analysis/Delinearization.cpp b/llvm/lib/Analysis/Delinearization.cpp
index 8563eee06c994..b1edc76f37d09 100644
--- a/llvm/lib/Analysis/Delinearization.cpp
+++ b/llvm/lib/Analysis/Delinearization.cpp
@@ -792,7 +792,7 @@ bool llvm::validateDelinearizationResult(ScalarEvolution &SE,
// (S_2 * ... * S_n) - 1 (can be proven by induction)
//
const SCEV *Prod = SE.getOne(Sizes[0]->getType());
- for (const SCEV *Size : drop_end(Sizes)) {
+ for (const SCEV *Size : Sizes) {
Prod = MulOverflow(Prod, Size);
if (!Prod)
return false;
diff --git a/llvm/test/Analysis/Delinearization/validation_large_size.ll b/llvm/test/Analysis/Delinearization/validation_large_size.ll
index 03a1d89cb7c23..ad36d84b8d914 100644
--- a/llvm/test/Analysis/Delinearization/validation_large_size.ll
+++ b/llvm/test/Analysis/Delinearization/validation_large_size.ll
@@ -137,3 +137,44 @@ for.i.latch:
exit:
ret void
}
+
+; for (i = 0; i < (1 << 54); i++)
+; for (j = 0; j < 256; j++)
+; A[i*256 + j] = 0;
+;
+; We also need to consider the element size when validation.
+;
+define void @elementsize_cause_ovfl(ptr %A) {
+; CHECK-LABEL: 'elementsize_cause_ovfl'
+; CHECK-NEXT: Inst: store i64 0, ptr %gep, align 4
+; CHECK-NEXT: AccessFunction: {{\{\{}}0,+,2048}<%for.i.header>,+,8}<%for.j>
+; CHECK-NEXT: Base offset: %A
+; CHECK-NEXT: ArrayDecl[UnknownSize][256] with elements of 8 bytes.
+; CHECK-NEXT: ArrayRef[{0,+,1}<nuw><nsw><%for.i.header>][{0,+,1}<nuw><nsw><%for.j>]
+; CHECK-NEXT: Delinearization validation: Failed
+;
+entry:
+ br label %for.i.header
+
+for.i.header:
+ %i = phi i64 [ 0, %entry ], [ %i.inc, %for.i.latch ]
+ %i.mul = mul i64 %i, 256
+ br label %for.j
+
+for.j:
+ %j = phi i64 [ 0, %for.i.header ], [ %j.inc, %for.j ]
+ %offset = add i64 %i.mul, %j
+ %gep = getelementptr i64, ptr %A, i64 %offset
+ store i64 0, ptr %gep
+ %j.inc = add i64 %j, 1
+ %ec.j = icmp eq i64 %j.inc, 256
+ br i1 %ec.j, label %for.i.latch, label %for.j
+
+for.i.latch:
+ %i.inc = add i64 %i, 1
+ %ec.i = icmp eq i64 %i.inc, 18014398509481984
+ br i1 %ec.i, label %exit, label %for.i.header
+
+exit:
+ ret void
+}
>From 49e36e8a261e004868e290dff5a9e01b6678baa3 Mon Sep 17 00:00:00 2001
From: Ryotaro Kasuga <kasuga.ryotaro at fujitsu.com>
Date: Tue, 2 Dec 2025 00:28:01 +0900
Subject: [PATCH 4/4] address review comments
---
llvm/lib/Analysis/Delinearization.cpp | 18 +++++++++++++++---
1 file changed, 15 insertions(+), 3 deletions(-)
diff --git a/llvm/lib/Analysis/Delinearization.cpp b/llvm/lib/Analysis/Delinearization.cpp
index b1edc76f37d09..686622feec477 100644
--- a/llvm/lib/Analysis/Delinearization.cpp
+++ b/llvm/lib/Analysis/Delinearization.cpp
@@ -754,6 +754,12 @@ bool llvm::validateDelinearizationResult(ScalarEvolution &SE,
//
// where the size of the outermost dimension is unknown (UNK).
+ auto AddOverflow = [&](const SCEV *A, const SCEV *B) -> const SCEV * {
+ if (!SE.willNotOverflow(Instruction::Add, /*IsSigned=*/true, A, B))
+ return nullptr;
+ return SE.getAddExpr(A, B);
+ };
+
auto MulOverflow = [&](const SCEV *A, const SCEV *B) -> const SCEV * {
if (!SE.willNotOverflow(Instruction::Mul, /*IsSigned=*/true, A, B))
return nullptr;
@@ -790,7 +796,9 @@ bool llvm::validateDelinearizationResult(ScalarEvolution &SE,
// (S_n - 1)
// = (S_2 * ... * S_n) * I_1 +
// (S_2 * ... * S_n) - 1 (can be proven by induction)
+ // = Min + (S_2 * ... * S_n) - 1
//
+ // NOTE: I_1 can be negative, so Min is not just 0.
const SCEV *Prod = SE.getOne(Sizes[0]->getType());
for (const SCEV *Size : Sizes) {
Prod = MulOverflow(Prod, Size);
@@ -801,9 +809,13 @@ bool llvm::validateDelinearizationResult(ScalarEvolution &SE,
if (!Min)
return false;
- // Over-approximate Max as Prod * I_1 + Prod (ignoring the -1).
- if (!SE.willNotOverflow(Instruction::Add, /*IsSigned=*/true, Min,
- Subscripts[0]))
+ // We have already checked that Min and Prod don't overflow, so it's enough
+ // to check whether Min + Prod - 1 doesn't overflow.
+ const SCEV *MaxPlusOne = AddOverflow(Min, Prod);
+ if (!MaxPlusOne)
+ return false;
+ if (!SE.willNotOverflow(Instruction::Sub, /*IsSigned=*/true, MaxPlusOne,
+ SE.getOne(MaxPlusOne->getType())))
return false;
return true;
More information about the llvm-commits
mailing list