[llvm] [LoopInterchange] Add tests for the vectorization profitability (NFC) (PR #133665)

Ryotaro Kasuga via llvm-commits llvm-commits at lists.llvm.org
Wed Apr 2 01:12:38 PDT 2025


https://github.com/kasuga-fj updated https://github.com/llvm/llvm-project/pull/133665

>From 0f2ac45dd84ead993f832d33c42c986b565fb406 Mon Sep 17 00:00:00 2001
From: Ryotaro Kasuga <kasuga.ryotaro at fujitsu.com>
Date: Thu, 27 Mar 2025 07:04:27 +0000
Subject: [PATCH 1/2] [LoopInterchange] Add tests for the vectorization
 profitability (NFC)

There is a problem with the current profitability check for
vectorization in LoopInterchange. There are both false positives and
false negatives. The former means that the heuristic may say that "an
exchange is necessary to vectorize the innermost loop" even though it's
already possible. The latter means that the heuristic may miss a case
where an exchange is necessary to vectorize the innermost loop. Note
that this is not a dependency analysis problem.  These problems can
occur even if the analysis is accurate (no overestimation).

This patch adds tests to clarify the cases that should be fixed. The
root cause of these cases is that the heuristic doesn't handle the
direction of a dependency correctly.
---
 .../profitability-vectorization-heuristic.ll  | 108 ++++++++++++++++++
 1 file changed, 108 insertions(+)
 create mode 100644 llvm/test/Transforms/LoopInterchange/profitability-vectorization-heuristic.ll

diff --git a/llvm/test/Transforms/LoopInterchange/profitability-vectorization-heuristic.ll b/llvm/test/Transforms/LoopInterchange/profitability-vectorization-heuristic.ll
new file mode 100644
index 0000000000000..606117e70db86
--- /dev/null
+++ b/llvm/test/Transforms/LoopInterchange/profitability-vectorization-heuristic.ll
@@ -0,0 +1,108 @@
+; RUN: opt < %s -passes=loop-interchange -cache-line-size=64 \
+; RUN:     -pass-remarks-output=%t -disable-output -loop-interchange-profitabilities=vectorize
+; RUN: FileCheck -input-file %t %s
+
+ at A = dso_local global [256 x [256 x float]] zeroinitializer
+ at B = dso_local global [256 x [256 x float]] zeroinitializer
+ at C = dso_local global [256 x [256 x float]] zeroinitializer
+
+; Check that the below loops are exchanged for vectorization.
+;
+; for (int i = 0; i < 256; i++) {
+;   for (int j = 1; j < 256; j++) {
+;     A[i][j] = A[i][j-1] + B[i][j];
+;     C[i][j] += 1;
+;   }
+; }
+;
+; FIXME: These loops are not exchanged at this time due to the problem of
+; profitablity heuristic for vectorization.
+
+; CHECK:      --- !Missed
+; CHECK-NEXT: Pass:            loop-interchange
+; CHECK-NEXT: Name:            InterchangeNotProfitable
+; CHECK-NEXT: Function:        interchange_necesasry_for_vectorization
+; CHECK-NEXT: Args:
+; CHECK-NEXT:   - String:          Interchanging loops is not considered to improve cache locality nor vectorization.
+; CHECK-NEXT: ...
+define void @interchange_necesasry_for_vectorization() {
+entry:
+  br label %for.i.header
+
+for.i.header:
+  %i = phi i64 [ 1, %entry ], [ %i.next, %for.i.inc ]
+  br label %for.j.body
+
+for.j.body:
+  %j = phi i64 [ 1, %for.i.header ], [ %j.next, %for.j.body ]
+  %j.dec = add nsw i64 %j, -1
+  %a.load.index = getelementptr nuw inbounds [256 x [256 x float]], ptr @A, i64 %i, i64 %j.dec
+  %b.index = getelementptr nuw inbounds [256 x [256 x float]], ptr @B, i64 %i, i64 %j
+  %c.index = getelementptr nuw inbounds [256 x [256 x float]], ptr @C, i64 %i, i64 %j
+  %a = load float, ptr %a.load.index, align 4
+  %b = load float, ptr %b.index, align 4
+  %c = load float, ptr %c.index, align 4
+  %add.0 = fadd float %a, %b
+  %a.store.index = getelementptr nuw inbounds [256 x [256 x float]], ptr @A, i64 %i, i64 %j
+  store float %add.0, ptr %a.store.index, align 4
+  %add.1 = fadd float %c, 1.0
+  store float %add.1, ptr %c.index, align 4
+  %j.next = add nuw nsw i64 %j, 1
+  %cmp.j = icmp eq i64 %j.next, 256
+  br i1 %cmp.j, label %for.i.inc, label %for.j.body
+
+for.i.inc:
+  %i.next = add nuw nsw i64 %i, 1
+  %cmp.i = icmp eq i64 %i.next, 256
+  br i1 %cmp.i, label %exit, label %for.i.header
+
+exit:
+  ret void
+}
+
+; Check that the following innermost loop can be vectorized so that
+; interchangig is unnecessary.
+;
+; for (int i = 0; i < 256; i++)
+;   for (int j = 1; j < 256; j++)
+;     A[i][j-1] = A[i][j] + B[i][j];
+;
+; FIXME: These loops are exchanged at this time due to the problem of
+; profitablity heuristic for vectorization.
+
+; CHECK:      --- !Passed
+; CHECK-NEXT: Pass:            loop-interchange
+; CHECK-NEXT: Name:            Interchanged
+; CHECK-NEXT: Function:        interchange_unnecesasry_for_vectorization
+; CHECK-NEXT: Args:
+; CHECK-NEXT:   - String:          Loop interchanged with enclosing loop.
+define void @interchange_unnecesasry_for_vectorization() {
+entry:
+  br label %for.i.header
+
+for.i.header:
+  %i = phi i64 [ 1, %entry ], [ %i.next, %for.i.inc ]
+  br label %for.j.body
+
+for.j.body:
+  %j = phi i64 [ 1, %for.i.header ], [ %j.next, %for.j.body ]
+  %j.dec = add nsw i64 %j, -1
+  %a.load.index = getelementptr nuw inbounds [256 x [256 x float]], ptr @A, i64 %i, i64 %j
+  %b.index = getelementptr nuw inbounds [256 x [256 x float]], ptr @B, i64 %i, i64 %j
+  %a = load float, ptr %a.load.index, align 4
+  %b = load float, ptr %b.index, align 4
+  %add = fadd float %a, %b
+  %a.store.index = getelementptr nuw inbounds [256 x [256 x float]], ptr @A, i64 %i, i64 %j.dec
+  store float %add, ptr %a.store.index, align 4
+  %j.next = add nuw nsw i64 %j, 1
+  %cmp.j = icmp eq i64 %j.next, 256
+  br i1 %cmp.j, label %for.i.inc, label %for.j.body
+
+for.i.inc:
+  %i.next = add nuw nsw i64 %i, 1
+  %cmp.i = icmp eq i64 %i.next, 256
+  br i1 %cmp.i, label %exit, label %for.i.header
+
+exit:
+  ret void
+}

>From 96dd3749f257a1acad4714106cc73aadb3357d66 Mon Sep 17 00:00:00 2001
From: Ryotaro Kasuga <kasuga.ryotaro at fujitsu.com>
Date: Wed, 2 Apr 2025 08:11:26 +0000
Subject: [PATCH 2/2] Address review comments

---
 .../profitability-vectorization-heuristic.ll       | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/llvm/test/Transforms/LoopInterchange/profitability-vectorization-heuristic.ll b/llvm/test/Transforms/LoopInterchange/profitability-vectorization-heuristic.ll
index 606117e70db86..efd2a9c09e7cf 100644
--- a/llvm/test/Transforms/LoopInterchange/profitability-vectorization-heuristic.ll
+++ b/llvm/test/Transforms/LoopInterchange/profitability-vectorization-heuristic.ll
@@ -15,17 +15,17 @@
 ;   }
 ; }
 ;
-; FIXME: These loops are not exchanged at this time due to the problem of
-; profitablity heuristic for vectorization.
+; FIXME: These loops are not exchanged at this time due to the problem in
+; profitability heuristic calculation for vectorization.
 
 ; CHECK:      --- !Missed
 ; CHECK-NEXT: Pass:            loop-interchange
 ; CHECK-NEXT: Name:            InterchangeNotProfitable
-; CHECK-NEXT: Function:        interchange_necesasry_for_vectorization
+; CHECK-NEXT: Function:        interchange_necessary_for_vectorization
 ; CHECK-NEXT: Args:
 ; CHECK-NEXT:   - String:          Interchanging loops is not considered to improve cache locality nor vectorization.
 ; CHECK-NEXT: ...
-define void @interchange_necesasry_for_vectorization() {
+define void @interchange_necessary_for_vectorization() {
 entry:
   br label %for.i.header
 
@@ -61,14 +61,14 @@ exit:
 }
 
 ; Check that the following innermost loop can be vectorized so that
-; interchangig is unnecessary.
+; interchanging is unnecessary.
 ;
 ; for (int i = 0; i < 256; i++)
 ;   for (int j = 1; j < 256; j++)
 ;     A[i][j-1] = A[i][j] + B[i][j];
 ;
-; FIXME: These loops are exchanged at this time due to the problem of
-; profitablity heuristic for vectorization.
+; FIXME: These loops are exchanged at this time due to the problem in
+; profitability heuristic calculation for vectorization.
 
 ; CHECK:      --- !Passed
 ; CHECK-NEXT: Pass:            loop-interchange



More information about the llvm-commits mailing list