[llvm] [LoopInterchange] Constrain LI within supported loop nest depth (PR #118656)
Madhur Amilkanthwar via llvm-commits
llvm-commits at lists.llvm.org
Wed Dec 4 07:34:00 PST 2024
https://github.com/madhur13490 created https://github.com/llvm/llvm-project/pull/118656
This patch is an extension to #115128.
After profiling LLVM test-suite, I see a lot of loop nest of depth more than `MaxLoopNestDepth` which is 10. Early exit for them would save compile-time as it would avoid computing DependenceInfo and CacheCost.
>From 18a195ec68a9f98b18bf878aec71ea0905651bb0 Mon Sep 17 00:00:00 2001
From: Madhur Amilkanthwar <madhura at nvidia.com>
Date: Wed, 4 Dec 2024 05:06:51 -0800
Subject: [PATCH] [LoopInterchange] Constrain LI withing supported loop nest
depth
This patch is an extension to #115128.
After profiling LLVM test-suite, I see a lot of loop nest
of depth more than `MaxLoopNestDepth` which is 10.
Early exit for them would save compile-time as it would avoid
computing DependenceInfo and CacheCost.
---
.../lib/Transforms/Scalar/LoopInterchange.cpp | 24 ++---
.../LoopInterchange/bail-out-one-loop.ll | 2 +-
.../LoopInterchange/deep-loop-nest.ll | 95 +++++++++++++++++++
3 files changed, 108 insertions(+), 13 deletions(-)
create mode 100644 llvm/test/Transforms/LoopInterchange/deep-loop-nest.ll
diff --git a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
index a0c0080c0bda1c..b5ebde300ebe6f 100644
--- a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
@@ -69,6 +69,9 @@ using CharMatrix = std::vector<std::vector<char>>;
// Maximum number of dependencies that can be handled in the dependency matrix.
static const unsigned MaxMemInstrCount = 100;
+// Minimum loop depth supported.
+static const unsigned MinLoopNestDepth = 2;
+
// Maximum loop depth supported.
static const unsigned MaxLoopNestDepth = 10;
@@ -239,10 +242,12 @@ static void populateWorklist(Loop &L, LoopVector &LoopList) {
LoopList.push_back(CurrentLoop);
}
-static bool hasMinimumLoopDepth(SmallVectorImpl<Loop *> &LoopList) {
+static bool hasSupportedLoopDepth(SmallVectorImpl<Loop *> &LoopList) {
unsigned LoopNestDepth = LoopList.size();
- if (LoopNestDepth < 2) {
- LLVM_DEBUG(dbgs() << "Loop doesn't contain minimum nesting level.\n");
+ if (LoopNestDepth < MinLoopNestDepth || LoopNestDepth > MaxLoopNestDepth) {
+ LLVM_DEBUG(dbgs() << "Unsupported depth of loop nest " << LoopNestDepth
+ << " should be [" << MinLoopNestDepth << ", "
+ << MaxLoopNestDepth << "]\n");
return false;
}
return true;
@@ -430,15 +435,10 @@ struct LoopInterchange {
bool processLoopList(SmallVectorImpl<Loop *> &LoopList) {
bool Changed = false;
- // Ensure minimum loop nest depth.
- assert(hasMinimumLoopDepth(LoopList) && "Loop nest does not meet minimum depth.");
+ // Ensure proper loop nest depth.
+ assert(hasSupportedLoopDepth(LoopList) && "Unsupported depth of loop nest.");
unsigned LoopNestDepth = LoopList.size();
- if (LoopNestDepth > MaxLoopNestDepth) {
- LLVM_DEBUG(dbgs() << "Cannot handle loops of depth greater than "
- << MaxLoopNestDepth << "\n");
- return false;
- }
if (!isComputableLoopNest(LoopList)) {
LLVM_DEBUG(dbgs() << "Not valid loop candidate for interchange\n");
return false;
@@ -1725,8 +1725,8 @@ PreservedAnalyses LoopInterchangePass::run(LoopNest &LN,
LPMUpdater &U) {
Function &F = *LN.getParent();
SmallVector<Loop *, 8> LoopList(LN.getLoops());
- // Ensure minimum depth of the loop nest to do the interchange.
- if (!hasMinimumLoopDepth(LoopList))
+ // Ensure proper depth of the loop nest to do the interchange.
+ if (!hasSupportedLoopDepth(LoopList))
return PreservedAnalyses::all();
DependenceInfo DI(&F, &AR.AA, &AR.SE, &AR.LI);
diff --git a/llvm/test/Transforms/LoopInterchange/bail-out-one-loop.ll b/llvm/test/Transforms/LoopInterchange/bail-out-one-loop.ll
index 788e1b0157d80f..477b37937747fc 100644
--- a/llvm/test/Transforms/LoopInterchange/bail-out-one-loop.ll
+++ b/llvm/test/Transforms/LoopInterchange/bail-out-one-loop.ll
@@ -15,7 +15,7 @@ target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i8:8:32-i16:16:32-i6
; CHECK-NOT: Delinearizing
; CHECK-NOT: Strides:
; CHECK-NOT: Terms:
-; CHECK: Loop doesn't contain minimum nesting level.
+; CHECK: Unsupported depth of loop nest 1 should be [2, 10]
define void @foo() {
entry:
diff --git a/llvm/test/Transforms/LoopInterchange/deep-loop-nest.ll b/llvm/test/Transforms/LoopInterchange/deep-loop-nest.ll
new file mode 100644
index 00000000000000..348c1ab74b7588
--- /dev/null
+++ b/llvm/test/Transforms/LoopInterchange/deep-loop-nest.ll
@@ -0,0 +1,95 @@
+; REQUIRES: asserts
+; RUN: opt < %s -passes=loop-interchange -debug -disable-output 2>&1| FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+; For deep loop nest, delinearization should not be run.
+
+; CHECK-NOT: Delinearizing
+; CHECK-NOT: Strides:
+; CHECK-NOT: Terms:
+; CHECK: Unsupported depth of loop nest 11 should be [2, 10]
+define void @big_loop_nest() {
+entry:
+ br label %for1.header
+
+for1.header:
+ %j = phi i64 [ 0, %entry ], [ %j.next, %for1.inc ]
+ br label %for2.header
+for2.header:
+ %k = phi i64 [ 0, %for1.header ], [ %k.next, %for2.inc ]
+ br label %for3.header
+for3.header:
+ %l = phi i64 [ 0, %for2.header ], [ %l.next, %for3.inc ]
+ br label %for4.header
+for4.header:
+ %m = phi i64 [ 0, %for3.header ], [ %m.next, %for4.inc ]
+ br label %for5.header
+for5.header:
+ %n = phi i64 [ 0, %for4.header ], [ %n.next, %for5.inc ]
+ br label %for6.header
+for6.header:
+ %o = phi i64 [ 0, %for5.header ], [ %o.next, %for6.inc ]
+ br label %for7.header
+for7.header:
+ %p = phi i64 [ 0, %for6.header ], [ %p.next, %for7.inc ]
+ br label %for8.header
+for8.header:
+ %q = phi i64 [ 0, %for7.header ], [ %q.next, %for8.inc ]
+ br label %for9.header
+for9.header:
+ %r = phi i64 [ 0, %for8.header ], [ %r.next, %for9.inc ]
+ br label %for10.header
+for10.header:
+ %s = phi i64 [ 0, %for9.header ], [ %s.next, %for10.inc ]
+ br label %for11
+for11:
+ %t = phi i64 [ %t.next, %for11 ], [ 0, %for10.header ]
+ %t.next = add nuw nsw i64 %t, 1
+ %exitcond = icmp eq i64 %t.next, 99
+ br i1 %exitcond, label %for1.inc, label %for11
+
+for1.inc:
+ %j.next = add nuw nsw i64 %j, 1
+ %exitcond26 = icmp eq i64 %j.next, 99
+ br i1 %exitcond26, label %for2.inc, label %for1.header
+for2.inc:
+ %k.next = add nuw nsw i64 %k, 1
+ %exitcond27 = icmp eq i64 %j.next, 99
+ br i1 %exitcond27, label %for3.inc, label %for2.header
+for3.inc:
+ %l.next = add nuw nsw i64 %l, 1
+ %exitcond28 = icmp eq i64 %l.next, 99
+ br i1 %exitcond28, label %for4.inc, label %for3.header
+for4.inc:
+ %m.next = add nuw nsw i64 %m, 1
+ %exitcond29 = icmp eq i64 %m.next, 99
+ br i1 %exitcond29, label %for5.inc, label %for4.header
+for5.inc:
+ %n.next = add nuw nsw i64 %n, 1
+ %exitcond30 = icmp eq i64 %n.next, 99
+ br i1 %exitcond30, label %for6.inc, label %for5.header
+for6.inc:
+ %o.next = add nuw nsw i64 %o, 1
+ %exitcond31 = icmp eq i64 %o.next, 99
+ br i1 %exitcond31, label %for7.inc, label %for6.header
+for7.inc:
+ %p.next = add nuw nsw i64 %p, 1
+ %exitcond32 = icmp eq i64 %p.next, 99
+ br i1 %exitcond32, label %for8.inc, label %for7.header
+for8.inc:
+ %q.next = add nuw nsw i64 %q, 1
+ %exitcond33 = icmp eq i64 %q.next, 99
+ br i1 %exitcond33, label %for9.inc, label %for8.header
+for9.inc:
+ %r.next = add nuw nsw i64 %r, 1
+ %exitcond34 = icmp eq i64 %q.next, 99
+ br i1 %exitcond34, label %for10.inc, label %for9.header
+for10.inc:
+ %s.next = add nuw nsw i64 %s, 1
+ %exitcond35 = icmp eq i64 %s.next, 99
+ br i1 %exitcond35, label %for.end, label %for10.header
+
+for.end:
+ ret void
+}
More information about the llvm-commits
mailing list