[llvm] [LoopInterchange] Constrain LI within supported loop nest depth (PR #118656)

Madhur Amilkanthwar via llvm-commits llvm-commits at lists.llvm.org
Tue Jan 21 09:05:40 PST 2025


https://github.com/madhur13490 updated https://github.com/llvm/llvm-project/pull/118656

>From 99d69df19265857c36bcc76546bf3a88b87b08e4 Mon Sep 17 00:00:00 2001
From: Madhur Amilkanthwar <madhura at nvidia.com>
Date: Wed, 4 Dec 2024 05:06:51 -0800
Subject: [PATCH 1/2] [LoopInterchange] Constrain LI within supported loop nest
 depth

This patch is an extension to #115128.

After profiling LLVM test-suite, I see a lot of loop nest
of depth more than `MaxLoopNestDepth` which is 10.
Early exit for them would save compile-time as it would avoid
computing DependenceInfo and CacheCost.
---
 .../lib/Transforms/Scalar/LoopInterchange.cpp | 36 ++++---
 .../LoopInterchange/bail-out-one-loop.ll      |  2 +-
 .../LoopInterchange/deep-loop-nest.ll         | 95 +++++++++++++++++++
 3 files changed, 120 insertions(+), 13 deletions(-)
 create mode 100644 llvm/test/Transforms/LoopInterchange/deep-loop-nest.ll

diff --git a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
index 5bcc5e41a0e875..a9afaf8e6f9bce 100644
--- a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
@@ -74,6 +74,9 @@ using CharMatrix = std::vector<std::vector<char>>;
 
 } // end anonymous namespace
 
+// Minimum loop depth supported.
+static const unsigned MinLoopNestDepth = 2;
+
 // Maximum loop depth supported.
 static const unsigned MaxLoopNestDepth = 10;
 
@@ -244,10 +247,22 @@ static void populateWorklist(Loop &L, LoopVector &LoopList) {
   LoopList.push_back(CurrentLoop);
 }
 
-static bool hasMinimumLoopDepth(SmallVectorImpl<Loop *> &LoopList) {
+static bool hasSupportedLoopDepth(SmallVectorImpl<Loop *> &LoopList,
+                                  OptimizationRemarkEmitter &ORE) {
   unsigned LoopNestDepth = LoopList.size();
-  if (LoopNestDepth < 2) {
-    LLVM_DEBUG(dbgs() << "Loop doesn't contain minimum nesting level.\n");
+  if (LoopNestDepth < MinLoopNestDepth || LoopNestDepth > MaxLoopNestDepth) {
+    LLVM_DEBUG(dbgs() << "Unsupported depth of loop nest " << LoopNestDepth
+                      << ", the supported range is [" << MinLoopNestDepth
+                      << ", " << MaxLoopNestDepth << "].\n");
+    Loop **OuterLoop = LoopList.begin();
+    ORE.emit([&]() {
+      return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedLoopNestDepth",
+                                      (*OuterLoop)->getStartLoc(),
+                                      (*OuterLoop)->getHeader())
+             << "Unsupported depth of loop nest, the supported range is ["
+             << std::to_string(MinLoopNestDepth) << ", "
+             << std::to_string(MaxLoopNestDepth) << "].\n";
+    });
     return false;
   }
   return true;
@@ -435,15 +450,11 @@ struct LoopInterchange {
   bool processLoopList(SmallVectorImpl<Loop *> &LoopList) {
     bool Changed = false;
 
-    // Ensure minimum loop nest depth.
-    assert(hasMinimumLoopDepth(LoopList) && "Loop nest does not meet minimum depth.");
+    // Ensure proper loop nest depth.
+    assert(hasSupportedLoopDepth(LoopList, *ORE) &&
+           "Unsupported depth of loop nest.");
 
     unsigned LoopNestDepth = LoopList.size();
-    if (LoopNestDepth > MaxLoopNestDepth) {
-      LLVM_DEBUG(dbgs() << "Cannot handle loops of depth greater than "
-                        << MaxLoopNestDepth << "\n");
-      return false;
-    }
     if (!isComputableLoopNest(LoopList)) {
       LLVM_DEBUG(dbgs() << "Not valid loop candidate for interchange\n");
       return false;
@@ -1735,14 +1746,15 @@ PreservedAnalyses LoopInterchangePass::run(LoopNest &LN,
     LLVM_DEBUG(dbgs() << "MaxMemInstrCount should be at least 1");
     return PreservedAnalyses::all();
   }
+  OptimizationRemarkEmitter ORE(&F);
 
   // Ensure minimum depth of the loop nest to do the interchange.
-  if (!hasMinimumLoopDepth(LoopList))
+  if (!hasSupportedLoopDepth(LoopList, ORE))
     return PreservedAnalyses::all();
   DependenceInfo DI(&F, &AR.AA, &AR.SE, &AR.LI);
   std::unique_ptr<CacheCost> CC =
       CacheCost::getCacheCost(LN.getOutermostLoop(), AR, DI);
-  OptimizationRemarkEmitter ORE(&F);
+  
   if (!LoopInterchange(&AR.SE, &AR.LI, &DI, &AR.DT, CC, &ORE).run(LN))
     return PreservedAnalyses::all();
   U.markLoopNestChanged(true);
diff --git a/llvm/test/Transforms/LoopInterchange/bail-out-one-loop.ll b/llvm/test/Transforms/LoopInterchange/bail-out-one-loop.ll
index 788e1b0157d80f..d1cf33acd28319 100644
--- a/llvm/test/Transforms/LoopInterchange/bail-out-one-loop.ll
+++ b/llvm/test/Transforms/LoopInterchange/bail-out-one-loop.ll
@@ -15,7 +15,7 @@ target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i8:8:32-i16:16:32-i6
 ; CHECK-NOT: Delinearizing
 ; CHECK-NOT: Strides:
 ; CHECK-NOT: Terms:
-; CHECK: Loop doesn't contain minimum nesting level.
+; CHECK: Unsupported depth of loop nest 1, the supported range is [2, 10].
 
 define void @foo() {
 entry:
diff --git a/llvm/test/Transforms/LoopInterchange/deep-loop-nest.ll b/llvm/test/Transforms/LoopInterchange/deep-loop-nest.ll
new file mode 100644
index 00000000000000..30a6549a319eb0
--- /dev/null
+++ b/llvm/test/Transforms/LoopInterchange/deep-loop-nest.ll
@@ -0,0 +1,95 @@
+; REQUIRES: asserts
+; RUN: opt < %s -passes=loop-interchange -debug -disable-output 2>&1| FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+; For deep loop nest, delinearization should not be run.
+
+; CHECK-NOT: Delinearizing
+; CHECK-NOT: Strides:
+; CHECK-NOT: Terms:
+; CHECK: Unsupported depth of loop nest 11, the supported range is [2, 10].
+define void @big_loop_nest() {
+entry:
+  br label %for1.header
+
+for1.header:
+  %j = phi i64 [ 0, %entry ], [ %j.next, %for1.inc ]
+  br label %for2.header
+for2.header:
+  %k = phi i64 [ 0, %for1.header ], [ %k.next, %for2.inc ]
+  br label %for3.header
+for3.header:
+  %l = phi i64 [ 0, %for2.header ], [ %l.next, %for3.inc ]
+  br label %for4.header
+for4.header:
+  %m = phi i64 [ 0, %for3.header ], [ %m.next, %for4.inc ]
+  br label %for5.header
+for5.header:
+  %n = phi i64 [ 0, %for4.header ], [ %n.next, %for5.inc ]
+  br label %for6.header
+for6.header:
+  %o = phi i64 [ 0, %for5.header ], [ %o.next, %for6.inc ]
+  br label %for7.header
+for7.header:
+  %p = phi i64 [ 0, %for6.header ], [ %p.next, %for7.inc ]
+  br label %for8.header
+for8.header:
+  %q = phi i64 [ 0, %for7.header ], [ %q.next, %for8.inc ]
+  br label %for9.header
+for9.header:
+  %r = phi i64 [ 0, %for8.header ], [ %r.next, %for9.inc ]
+  br label %for10.header
+for10.header:
+  %s = phi i64 [ 0, %for9.header ], [ %s.next, %for10.inc ]
+  br label %for11
+for11:
+  %t = phi i64 [ %t.next, %for11 ], [ 0, %for10.header ]
+  %t.next = add nuw nsw i64 %t, 1
+  %exitcond = icmp eq i64 %t.next, 99
+  br i1 %exitcond, label %for1.inc, label %for11
+
+for1.inc:
+  %j.next = add nuw nsw i64 %j, 1
+  %exitcond26 = icmp eq i64 %j.next, 99
+  br i1 %exitcond26, label %for2.inc, label %for1.header
+for2.inc:
+  %k.next = add nuw nsw i64 %k, 1
+  %exitcond27 = icmp eq i64 %j.next, 99
+  br i1 %exitcond27, label %for3.inc, label %for2.header
+for3.inc:
+  %l.next = add nuw nsw i64 %l, 1
+  %exitcond28 = icmp eq i64 %l.next, 99
+  br i1 %exitcond28, label %for4.inc, label %for3.header
+for4.inc:
+  %m.next = add nuw nsw i64 %m, 1
+  %exitcond29 = icmp eq i64 %m.next, 99
+  br i1 %exitcond29, label %for5.inc, label %for4.header
+for5.inc:
+  %n.next = add nuw nsw i64 %n, 1
+  %exitcond30 = icmp eq i64 %n.next, 99
+  br i1 %exitcond30, label %for6.inc, label %for5.header
+for6.inc:
+  %o.next = add nuw nsw i64 %o, 1
+  %exitcond31 = icmp eq i64 %o.next, 99
+  br i1 %exitcond31, label %for7.inc, label %for6.header
+for7.inc:
+  %p.next = add nuw nsw i64 %p, 1
+  %exitcond32 = icmp eq i64 %p.next, 99
+  br i1 %exitcond32, label %for8.inc, label %for7.header
+for8.inc:
+  %q.next = add nuw nsw i64 %q, 1
+  %exitcond33 = icmp eq i64 %q.next, 99
+  br i1 %exitcond33, label %for9.inc, label %for8.header
+for9.inc:
+  %r.next = add nuw nsw i64 %r, 1
+  %exitcond34 = icmp eq i64 %q.next, 99
+  br i1 %exitcond34, label %for10.inc, label %for9.header
+for10.inc:
+  %s.next = add nuw nsw i64 %s, 1
+  %exitcond35 = icmp eq i64 %s.next, 99
+  br i1 %exitcond35, label %for.end, label %for10.header
+
+for.end:
+  ret void
+}

>From b6d834518823fdfc6cf0bee464db63c09cc91568 Mon Sep 17 00:00:00 2001
From: Madhur Amilkanthwar <madhura at nvidia.com>
Date: Tue, 21 Jan 2025 00:03:21 -0800
Subject: [PATCH 2/2] Address comments

---
 llvm/lib/Transforms/Scalar/LoopInterchange.cpp   |  8 ++++++--
 .../Transforms/LoopInterchange/deep-loop-nest.ll | 16 ++++++++--------
 2 files changed, 14 insertions(+), 10 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
index a9afaf8e6f9bce..d366e749c7370d 100644
--- a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
@@ -75,10 +75,14 @@ using CharMatrix = std::vector<std::vector<char>>;
 } // end anonymous namespace
 
 // Minimum loop depth supported.
-static const unsigned MinLoopNestDepth = 2;
+static cl::opt<unsigned int> MinLoopNestDepth(
+    "loop-interchange-min-loop-nest-depth", cl::init(2), cl::Hidden,
+    cl::desc("Minimum depth of loop nest considered for the transform"));
 
 // Maximum loop depth supported.
-static const unsigned MaxLoopNestDepth = 10;
+static cl::opt<unsigned int> MaxLoopNestDepth(
+    "loop-interchange-max-loop-nest-depth", cl::init(10), cl::Hidden,
+    cl::desc("Maximum depth of loop nest considered for the transform"));
 
 #ifndef NDEBUG
 static void printDepMatrix(CharMatrix &DepMatrix) {
diff --git a/llvm/test/Transforms/LoopInterchange/deep-loop-nest.ll b/llvm/test/Transforms/LoopInterchange/deep-loop-nest.ll
index 30a6549a319eb0..3252d3c0d70693 100644
--- a/llvm/test/Transforms/LoopInterchange/deep-loop-nest.ll
+++ b/llvm/test/Transforms/LoopInterchange/deep-loop-nest.ll
@@ -1,14 +1,14 @@
-; REQUIRES: asserts
-; RUN: opt < %s -passes=loop-interchange -debug -disable-output 2>&1| FileCheck %s
+; RUN: opt < %s -passes=loop-interchange -pass-remarks-missed='loop-interchange' \
+; RUN:          -disable-output 2>&1 | FileCheck %s
 
-target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+; RUN: opt < %s -passes=loop-interchange -pass-remarks-missed='loop-interchange' \
+; RUN:          -loop-interchange-max-loop-nest-depth=12 -disable-output 2>&1 | \
+; RUN:          FileCheck --allow-empty -check-prefix=CHECK-MAX %s
 
-; For deep loop nest, delinearization should not be run.
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 
-; CHECK-NOT: Delinearizing
-; CHECK-NOT: Strides:
-; CHECK-NOT: Terms:
-; CHECK: Unsupported depth of loop nest 11, the supported range is [2, 10].
+; CHECK: Unsupported depth of loop nest, the supported range is [2, 10].
+; CHECK-MAX-NOT: Unsupported depth of loop nest, the supported range is [2, 10].
 define void @big_loop_nest() {
 entry:
   br label %for1.header



More information about the llvm-commits mailing list