[llvm] [LoopInterchange] Remove 'I' dependency (PR #140709)
via llvm-commits
llvm-commits at lists.llvm.org
Tue May 20 04:00:14 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-transforms
Author: Ryotaro Kasuga (kasuga-fj)
<details>
<summary>Changes</summary>
LoopInterchange pads the end of the direction vector with `I` when the Levels value of a Dependence object is less than the depth of the loop nest. However, `I` dependencies were treated the same as `=` in subsequent processes, which is incorrect. For example, DependenceAnalysis currently returns a default Dependence object when AliasAnalysis results in MayAlias. In such a case, the direction vector should be like `[* * ... *]`. However, because the Levels value of default Dependence object is 0, therefore LoopInterchange generated a direction vector like `[I I ... I]`, leading to unsafe interchanges. This patch fixes the issue by replacing `I` with `*`. As far as I understand, this change doesn't prevent any legal interchanges that were applied before this patch.
Fixes #<!-- -->140238
---
Full diff: https://github.com/llvm/llvm-project/pull/140709.diff
6 Files Affected:
- (modified) llvm/lib/Transforms/Scalar/LoopInterchange.cpp (+18-2)
- (added) llvm/test/Transforms/LoopInterchange/array-dims-less-than-loop-depth.ll (+93)
- (modified) llvm/test/Transforms/LoopInterchange/inner-only-reductions.ll (+2-1)
- (modified) llvm/test/Transforms/LoopInterchange/multilevel-partial-reduction.ll (+4-2)
- (modified) llvm/test/Transforms/LoopInterchange/pr43326-ideal-access-pattern.ll (+2-2)
- (modified) llvm/test/Transforms/LoopInterchange/unique-dep-matrix.ll (+1-1)
``````````diff
diff --git a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
index 2b2d56f335446..b21f625f8cf5b 100644
--- a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
@@ -209,8 +209,24 @@ static bool populateDependencyMatrix(CharMatrix &DepMatrix, unsigned Level,
Direction = '*';
Dep.push_back(Direction);
}
+
+ // In some cases, the Levels of Dependence may be less than the depth of
+ // the loop nest, particularly when the array's dimension is smaller
+ // than the loop nest depth. In such cases, fill the end of the
+ // direction vector with '*'
+ // TODO: This can be false dependency in some cases, e.g.,
+ //
+ // for (int i=0; i<N; i++)
+ // for (int j=0; j<N; j++)
+ // for (int k=0; k<N; k++)
+ // v[i] += a[i][j][k];
+ //
+ // In this example, a dependency exists between the load and store of
+ // v[i]. The direction vector returned by DependenceInfo would be [=],
+ // and thus it would become [= * *]. However, if the addition is
+ // associative, exchanging the j-loop and k-loop is legal.
while (Dep.size() != Level) {
- Dep.push_back('I');
+ Dep.push_back('*');
}
// Make sure we only add unique entries to the dependency matrix.
@@ -1214,7 +1230,7 @@ LoopInterchangeProfitability::isProfitablePerInstrOrderCost() {
static bool canVectorize(const CharMatrix &DepMatrix, unsigned LoopId) {
for (unsigned I = 0; I != DepMatrix.size(); I++) {
char Dir = DepMatrix[I][LoopId];
- if (Dir != 'I' && Dir != '=')
+ if (Dir != '=')
return false;
}
return true;
diff --git a/llvm/test/Transforms/LoopInterchange/array-dims-less-than-loop-depth.ll b/llvm/test/Transforms/LoopInterchange/array-dims-less-than-loop-depth.ll
new file mode 100644
index 0000000000000..c369ec2fa1f81
--- /dev/null
+++ b/llvm/test/Transforms/LoopInterchange/array-dims-less-than-loop-depth.ll
@@ -0,0 +1,93 @@
+; REQUIRES: asserts
+; RUN: opt < %s -passes=loop-interchange -verify-dom-info -verify-loop-info \
+; RUN: -disable-output -debug 2>&1 | FileCheck %s
+
+;; In the following case, p0 and p1 may alias, so the direction vector must be [* *].
+;;
+;; void may_alias(float *p0, float *p1) {
+;; for (int i = 0; i < 4; i++)
+;; for (int j = 0; j < 4; j++)
+;; p0[4 * i + j] = p1[4 * j + i];
+;; }
+
+; CHECK: Dependency matrix before interchange:
+; CHECK-NEXT: * *
+; CHECK-NEXT: Processing InnerLoopId = 1 and OuterLoopId = 0
+define void @may_alias(ptr %p0, ptr %p1) {
+entry:
+ br label %for.i.header
+
+for.i.header:
+ %i = phi i32 [ 0, %entry ], [ %i.inc, %for.i.latch ]
+ br label %for.j
+
+for.j:
+ %j = phi i32 [ 0, %for.i.header ], [ %j.inc, %for.j ]
+ %idx.0 = getelementptr inbounds [4 x [4 x float]], ptr %p0, i32 0, i32 %j, i32 %i
+ %idx.1 = getelementptr inbounds [4 x [4 x float]], ptr %p1, i32 0, i32 %i, i32 %j
+ %0 = load float, ptr %idx.0, align 4
+ store float %0, ptr %idx.1, align 4
+ %j.inc = add nuw nsw i32 %j, 1
+ %cmp.j = icmp slt i32 %j.inc, 4
+ br i1 %cmp.j, label %for.j, label %for.i.latch
+
+for.i.latch:
+ %i.inc = add nuw nsw i32 %i, 1
+ %cmp.i = icmp slt i32 %i.inc, 4
+ br i1 %cmp.i, label %for.i.header, label %exit
+
+exit:
+ ret void
+}
+
+ at A = global [4 x [4 x [4 x float]]] zeroinitializer, align 4
+ at V = global [4 x float] zeroinitializer, align 4
+
+;; In the following case, there is an all direction dependence for the j-loop
+;; and k-loop.
+;;
+;; for (int i = 0; i < 4; i++)
+;; for (int j = 0; j < 4; j++)
+;; for (int k = 0; k < 4; k++)
+;; V[i] += A[i][j][k];
+
+; CHECK: Dependency matrix before interchange:
+; CHECK-NEXT: = * *
+; CHECK-NEXT: Processing InnerLoopId = 2 and OuterLoopId = 1
+define void @partial_reduction() {
+entry:
+ br label %for.i.header
+
+for.i.header:
+ %i = phi i32 [ 0, %entry ], [ %i.inc, %for.i.latch ]
+ br label %for.j.header
+
+for.j.header:
+ %j = phi i32 [ 0, %for.i.header ], [ %j.inc, %for.j.latch ]
+ br label %for.k
+
+for.k:
+ %k = phi i32 [ 0, %for.j.header ], [ %k.inc, %for.k ]
+ %idx.a = getelementptr inbounds [4 x [4 x [4 x float]]], ptr @A, i32 0, i32 %i, i32 %j, i32 %k
+ %idx.v = getelementptr inbounds [4 x float], ptr @V, i32 0, i32 %i
+ %0 = load float, ptr %idx.a, align 4
+ %1 = load float, ptr %idx.v, align 4
+ %add = fadd fast float %0, %1
+ store float %add, ptr %idx.v, align 4
+ %k.inc = add nuw nsw i32 %k, 1
+ %cmp.k = icmp slt i32 %k.inc, 4
+ br i1 %cmp.k, label %for.k, label %for.j.latch
+
+for.j.latch:
+ %j.inc = add nuw nsw i32 %j, 1
+ %cmp.j = icmp slt i32 %j.inc, 4
+ br i1 %cmp.j, label %for.j.header, label %for.i.latch
+
+for.i.latch:
+ %i.inc = add nuw nsw i32 %i, 1
+ %cmp.i = icmp slt i32 %i.inc, 4
+ br i1 %cmp.i, label %for.i.header, label %exit
+
+exit:
+ ret void
+}
diff --git a/llvm/test/Transforms/LoopInterchange/inner-only-reductions.ll b/llvm/test/Transforms/LoopInterchange/inner-only-reductions.ll
index c6c2e2a8a5187..388316c537af5 100644
--- a/llvm/test/Transforms/LoopInterchange/inner-only-reductions.ll
+++ b/llvm/test/Transforms/LoopInterchange/inner-only-reductions.ll
@@ -71,10 +71,11 @@ for.end8: ; preds = %for.cond1.for.inc6_
;; Not tightly nested. Do not interchange.
;; Not interchanged hence the phi's in the inner loop will not be split.
+;; FIXME: Currently this loops is rejected for dependence reasons.
; CHECK: --- !Missed
; CHECK-NEXT: Pass: loop-interchange
-; CHECK-NEXT: Name: UnsupportedPHIOuter
+; CHECK-NEXT: Name: Dependence
; CHECK-NEXT: Function: reduction_03
; IR-LABEL: @reduction_03(
diff --git a/llvm/test/Transforms/LoopInterchange/multilevel-partial-reduction.ll b/llvm/test/Transforms/LoopInterchange/multilevel-partial-reduction.ll
index 49c706d69d167..548c7fea96bb3 100644
--- a/llvm/test/Transforms/LoopInterchange/multilevel-partial-reduction.ll
+++ b/llvm/test/Transforms/LoopInterchange/multilevel-partial-reduction.ll
@@ -9,14 +9,16 @@
; the innermost loop has a reduction operation which is however not in a form
; that loop interchange can handle. Interchanging the outermost and the
; middle loops would intervene with the reduction and cause miscompile.
+;
+; FIXME: Currently interchanges are rejected for dependence reasons.
; REMARKS: --- !Missed
; REMARKS-NEXT: Pass: loop-interchange
-; REMARKS-NEXT: Name: UnsupportedPHIInner
+; REMARKS-NEXT: Name: Dependence
; REMARKS-NEXT: Function: test7
; REMARKS: --- !Missed
; REMARKS-NEXT: Pass: loop-interchange
-; REMARKS-NEXT: Name: UnsupportedPHIInner
+; REMARKS-NEXT: Name: Dependence
; REMARKS-NEXT: Function: test7
define i32 @test7() {
diff --git a/llvm/test/Transforms/LoopInterchange/pr43326-ideal-access-pattern.ll b/llvm/test/Transforms/LoopInterchange/pr43326-ideal-access-pattern.ll
index 520e1ee3506da..732b9b2766302 100644
--- a/llvm/test/Transforms/LoopInterchange/pr43326-ideal-access-pattern.ll
+++ b/llvm/test/Transforms/LoopInterchange/pr43326-ideal-access-pattern.ll
@@ -4,7 +4,7 @@
; Triply nested loop, should be able to do interchange three times
; to get the ideal access pattern.
-; void f(int e[10][10][10], int f[10][10][10]) {
+; void f(int e[restrict 10][10][10], int f[restrict 10][10][10]) {
; for (int a = 0; a < 10; a++) {
; for (int b = 0; b < 10; b++) {
; for (int c = 0; c < 10; c++) {
@@ -35,7 +35,7 @@
; REMARKS-NEXT: Name: Interchanged
; REMARKS-NEXT: Function: pr43326-triply-nested
-define void @pr43326-triply-nested(ptr %e, ptr %f) {
+define void @pr43326-triply-nested(ptr noalias %e, ptr noalias %f) {
entry:
br label %for.outermost.header
diff --git a/llvm/test/Transforms/LoopInterchange/unique-dep-matrix.ll b/llvm/test/Transforms/LoopInterchange/unique-dep-matrix.ll
index e5c18830784ac..68089b43121c5 100644
--- a/llvm/test/Transforms/LoopInterchange/unique-dep-matrix.ll
+++ b/llvm/test/Transforms/LoopInterchange/unique-dep-matrix.ll
@@ -2,7 +2,7 @@
; RUN: opt < %s -passes=loop-interchange -S -debug 2>&1 | FileCheck %s
; CHECK: Dependency matrix before interchange:
-; CHECK-NEXT: I I
+; CHECK-NEXT: * *
; CHECK-NEXT: = *
; CHECK-NEXT: < *
; CHECK-NEXT: Processing InnerLoopId
``````````
</details>
https://github.com/llvm/llvm-project/pull/140709
More information about the llvm-commits
mailing list