[llvm] [LoopInterchange] Constrain LI within supported loop nest depth (PR #118656)
Madhur Amilkanthwar via llvm-commits
llvm-commits at lists.llvm.org
Tue Jan 21 09:05:40 PST 2025
https://github.com/madhur13490 updated https://github.com/llvm/llvm-project/pull/118656
>From 99d69df19265857c36bcc76546bf3a88b87b08e4 Mon Sep 17 00:00:00 2001
From: Madhur Amilkanthwar <madhura at nvidia.com>
Date: Wed, 4 Dec 2024 05:06:51 -0800
Subject: [PATCH 1/2] [LoopInterchange] Constrain LI within supported loop nest
depth
This patch is an extension to #115128.
After profiling LLVM test-suite, I see a lot of loop nest
of depth more than `MaxLoopNestDepth` which is 10.
Early exit for them would save compile-time as it would avoid
computing DependenceInfo and CacheCost.
---
.../lib/Transforms/Scalar/LoopInterchange.cpp | 36 ++++---
.../LoopInterchange/bail-out-one-loop.ll | 2 +-
.../LoopInterchange/deep-loop-nest.ll | 95 +++++++++++++++++++
3 files changed, 120 insertions(+), 13 deletions(-)
create mode 100644 llvm/test/Transforms/LoopInterchange/deep-loop-nest.ll
diff --git a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
index 5bcc5e41a0e875..a9afaf8e6f9bce 100644
--- a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
@@ -74,6 +74,9 @@ using CharMatrix = std::vector<std::vector<char>>;
} // end anonymous namespace
+// Minimum loop depth supported.
+static const unsigned MinLoopNestDepth = 2;
+
// Maximum loop depth supported.
static const unsigned MaxLoopNestDepth = 10;
@@ -244,10 +247,22 @@ static void populateWorklist(Loop &L, LoopVector &LoopList) {
LoopList.push_back(CurrentLoop);
}
-static bool hasMinimumLoopDepth(SmallVectorImpl<Loop *> &LoopList) {
+static bool hasSupportedLoopDepth(SmallVectorImpl<Loop *> &LoopList,
+ OptimizationRemarkEmitter &ORE) {
unsigned LoopNestDepth = LoopList.size();
- if (LoopNestDepth < 2) {
- LLVM_DEBUG(dbgs() << "Loop doesn't contain minimum nesting level.\n");
+ if (LoopNestDepth < MinLoopNestDepth || LoopNestDepth > MaxLoopNestDepth) {
+ LLVM_DEBUG(dbgs() << "Unsupported depth of loop nest " << LoopNestDepth
+ << ", the supported range is [" << MinLoopNestDepth
+ << ", " << MaxLoopNestDepth << "].\n");
+ Loop **OuterLoop = LoopList.begin();
+ ORE.emit([&]() {
+ return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedLoopNestDepth",
+ (*OuterLoop)->getStartLoc(),
+ (*OuterLoop)->getHeader())
+ << "Unsupported depth of loop nest, the supported range is ["
+ << std::to_string(MinLoopNestDepth) << ", "
+ << std::to_string(MaxLoopNestDepth) << "].\n";
+ });
return false;
}
return true;
@@ -435,15 +450,11 @@ struct LoopInterchange {
bool processLoopList(SmallVectorImpl<Loop *> &LoopList) {
bool Changed = false;
- // Ensure minimum loop nest depth.
- assert(hasMinimumLoopDepth(LoopList) && "Loop nest does not meet minimum depth.");
+ // Ensure proper loop nest depth.
+ assert(hasSupportedLoopDepth(LoopList, *ORE) &&
+ "Unsupported depth of loop nest.");
unsigned LoopNestDepth = LoopList.size();
- if (LoopNestDepth > MaxLoopNestDepth) {
- LLVM_DEBUG(dbgs() << "Cannot handle loops of depth greater than "
- << MaxLoopNestDepth << "\n");
- return false;
- }
if (!isComputableLoopNest(LoopList)) {
LLVM_DEBUG(dbgs() << "Not valid loop candidate for interchange\n");
return false;
@@ -1735,14 +1746,15 @@ PreservedAnalyses LoopInterchangePass::run(LoopNest &LN,
LLVM_DEBUG(dbgs() << "MaxMemInstrCount should be at least 1");
return PreservedAnalyses::all();
}
+ OptimizationRemarkEmitter ORE(&F);
// Ensure minimum depth of the loop nest to do the interchange.
- if (!hasMinimumLoopDepth(LoopList))
+ if (!hasSupportedLoopDepth(LoopList, ORE))
return PreservedAnalyses::all();
DependenceInfo DI(&F, &AR.AA, &AR.SE, &AR.LI);
std::unique_ptr<CacheCost> CC =
CacheCost::getCacheCost(LN.getOutermostLoop(), AR, DI);
- OptimizationRemarkEmitter ORE(&F);
+
if (!LoopInterchange(&AR.SE, &AR.LI, &DI, &AR.DT, CC, &ORE).run(LN))
return PreservedAnalyses::all();
U.markLoopNestChanged(true);
diff --git a/llvm/test/Transforms/LoopInterchange/bail-out-one-loop.ll b/llvm/test/Transforms/LoopInterchange/bail-out-one-loop.ll
index 788e1b0157d80f..d1cf33acd28319 100644
--- a/llvm/test/Transforms/LoopInterchange/bail-out-one-loop.ll
+++ b/llvm/test/Transforms/LoopInterchange/bail-out-one-loop.ll
@@ -15,7 +15,7 @@ target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i8:8:32-i16:16:32-i6
; CHECK-NOT: Delinearizing
; CHECK-NOT: Strides:
; CHECK-NOT: Terms:
-; CHECK: Loop doesn't contain minimum nesting level.
+; CHECK: Unsupported depth of loop nest 1, the supported range is [2, 10].
define void @foo() {
entry:
diff --git a/llvm/test/Transforms/LoopInterchange/deep-loop-nest.ll b/llvm/test/Transforms/LoopInterchange/deep-loop-nest.ll
new file mode 100644
index 00000000000000..30a6549a319eb0
--- /dev/null
+++ b/llvm/test/Transforms/LoopInterchange/deep-loop-nest.ll
@@ -0,0 +1,95 @@
+; REQUIRES: asserts
+; RUN: opt < %s -passes=loop-interchange -debug -disable-output 2>&1| FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+; For deep loop nest, delinearization should not be run.
+
+; CHECK-NOT: Delinearizing
+; CHECK-NOT: Strides:
+; CHECK-NOT: Terms:
+; CHECK: Unsupported depth of loop nest 11, the supported range is [2, 10].
+define void @big_loop_nest() {
+entry:
+ br label %for1.header
+
+for1.header:
+ %j = phi i64 [ 0, %entry ], [ %j.next, %for1.inc ]
+ br label %for2.header
+for2.header:
+ %k = phi i64 [ 0, %for1.header ], [ %k.next, %for2.inc ]
+ br label %for3.header
+for3.header:
+ %l = phi i64 [ 0, %for2.header ], [ %l.next, %for3.inc ]
+ br label %for4.header
+for4.header:
+ %m = phi i64 [ 0, %for3.header ], [ %m.next, %for4.inc ]
+ br label %for5.header
+for5.header:
+ %n = phi i64 [ 0, %for4.header ], [ %n.next, %for5.inc ]
+ br label %for6.header
+for6.header:
+ %o = phi i64 [ 0, %for5.header ], [ %o.next, %for6.inc ]
+ br label %for7.header
+for7.header:
+ %p = phi i64 [ 0, %for6.header ], [ %p.next, %for7.inc ]
+ br label %for8.header
+for8.header:
+ %q = phi i64 [ 0, %for7.header ], [ %q.next, %for8.inc ]
+ br label %for9.header
+for9.header:
+ %r = phi i64 [ 0, %for8.header ], [ %r.next, %for9.inc ]
+ br label %for10.header
+for10.header:
+ %s = phi i64 [ 0, %for9.header ], [ %s.next, %for10.inc ]
+ br label %for11
+for11:
+ %t = phi i64 [ %t.next, %for11 ], [ 0, %for10.header ]
+ %t.next = add nuw nsw i64 %t, 1
+ %exitcond = icmp eq i64 %t.next, 99
+ br i1 %exitcond, label %for1.inc, label %for11
+
+for1.inc:
+ %j.next = add nuw nsw i64 %j, 1
+ %exitcond26 = icmp eq i64 %j.next, 99
+ br i1 %exitcond26, label %for2.inc, label %for1.header
+for2.inc:
+ %k.next = add nuw nsw i64 %k, 1
+ %exitcond27 = icmp eq i64 %j.next, 99
+ br i1 %exitcond27, label %for3.inc, label %for2.header
+for3.inc:
+ %l.next = add nuw nsw i64 %l, 1
+ %exitcond28 = icmp eq i64 %l.next, 99
+ br i1 %exitcond28, label %for4.inc, label %for3.header
+for4.inc:
+ %m.next = add nuw nsw i64 %m, 1
+ %exitcond29 = icmp eq i64 %m.next, 99
+ br i1 %exitcond29, label %for5.inc, label %for4.header
+for5.inc:
+ %n.next = add nuw nsw i64 %n, 1
+ %exitcond30 = icmp eq i64 %n.next, 99
+ br i1 %exitcond30, label %for6.inc, label %for5.header
+for6.inc:
+ %o.next = add nuw nsw i64 %o, 1
+ %exitcond31 = icmp eq i64 %o.next, 99
+ br i1 %exitcond31, label %for7.inc, label %for6.header
+for7.inc:
+ %p.next = add nuw nsw i64 %p, 1
+ %exitcond32 = icmp eq i64 %p.next, 99
+ br i1 %exitcond32, label %for8.inc, label %for7.header
+for8.inc:
+ %q.next = add nuw nsw i64 %q, 1
+ %exitcond33 = icmp eq i64 %q.next, 99
+ br i1 %exitcond33, label %for9.inc, label %for8.header
+for9.inc:
+ %r.next = add nuw nsw i64 %r, 1
+ %exitcond34 = icmp eq i64 %q.next, 99
+ br i1 %exitcond34, label %for10.inc, label %for9.header
+for10.inc:
+ %s.next = add nuw nsw i64 %s, 1
+ %exitcond35 = icmp eq i64 %s.next, 99
+ br i1 %exitcond35, label %for.end, label %for10.header
+
+for.end:
+ ret void
+}
>From b6d834518823fdfc6cf0bee464db63c09cc91568 Mon Sep 17 00:00:00 2001
From: Madhur Amilkanthwar <madhura at nvidia.com>
Date: Tue, 21 Jan 2025 00:03:21 -0800
Subject: [PATCH 2/2] Address comments
---
llvm/lib/Transforms/Scalar/LoopInterchange.cpp | 8 ++++++--
.../Transforms/LoopInterchange/deep-loop-nest.ll | 16 ++++++++--------
2 files changed, 14 insertions(+), 10 deletions(-)
diff --git a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
index a9afaf8e6f9bce..d366e749c7370d 100644
--- a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
@@ -75,10 +75,14 @@ using CharMatrix = std::vector<std::vector<char>>;
} // end anonymous namespace
// Minimum loop depth supported.
-static const unsigned MinLoopNestDepth = 2;
+static cl::opt<unsigned int> MinLoopNestDepth(
+ "loop-interchange-min-loop-nest-depth", cl::init(2), cl::Hidden,
+ cl::desc("Minimum depth of loop nest considered for the transform"));
// Maximum loop depth supported.
-static const unsigned MaxLoopNestDepth = 10;
+static cl::opt<unsigned int> MaxLoopNestDepth(
+ "loop-interchange-max-loop-nest-depth", cl::init(10), cl::Hidden,
+ cl::desc("Maximum depth of loop nest considered for the transform"));
#ifndef NDEBUG
static void printDepMatrix(CharMatrix &DepMatrix) {
diff --git a/llvm/test/Transforms/LoopInterchange/deep-loop-nest.ll b/llvm/test/Transforms/LoopInterchange/deep-loop-nest.ll
index 30a6549a319eb0..3252d3c0d70693 100644
--- a/llvm/test/Transforms/LoopInterchange/deep-loop-nest.ll
+++ b/llvm/test/Transforms/LoopInterchange/deep-loop-nest.ll
@@ -1,14 +1,14 @@
-; REQUIRES: asserts
-; RUN: opt < %s -passes=loop-interchange -debug -disable-output 2>&1| FileCheck %s
+; RUN: opt < %s -passes=loop-interchange -pass-remarks-missed='loop-interchange' \
+; RUN: -disable-output 2>&1 | FileCheck %s
-target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+; RUN: opt < %s -passes=loop-interchange -pass-remarks-missed='loop-interchange' \
+; RUN: -loop-interchange-max-loop-nest-depth=12 -disable-output 2>&1 | \
+; RUN: FileCheck --allow-empty -check-prefix=CHECK-MAX %s
-; For deep loop nest, delinearization should not be run.
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
-; CHECK-NOT: Delinearizing
-; CHECK-NOT: Strides:
-; CHECK-NOT: Terms:
-; CHECK: Unsupported depth of loop nest 11, the supported range is [2, 10].
+; CHECK: Unsupported depth of loop nest, the supported range is [2, 10].
+; CHECK-MAX-NOT: Unsupported depth of loop nest, the supported range is [2, 10].
define void @big_loop_nest() {
entry:
br label %for1.header
More information about the llvm-commits
mailing list