[llvm] [LoopInterchange] Constrain LI within supported loop nest depth (PR #118656)

Madhur Amilkanthwar via llvm-commits llvm-commits at lists.llvm.org
Wed Dec 4 07:34:00 PST 2024


https://github.com/madhur13490 created https://github.com/llvm/llvm-project/pull/118656

This patch is an extension to #115128.

After profiling LLVM test-suite, I see a lot of loop nest of depth more than `MaxLoopNestDepth` which is 10. Early exit for them would save compile-time as it would avoid computing DependenceInfo and CacheCost.

>From 18a195ec68a9f98b18bf878aec71ea0905651bb0 Mon Sep 17 00:00:00 2001
From: Madhur Amilkanthwar <madhura at nvidia.com>
Date: Wed, 4 Dec 2024 05:06:51 -0800
Subject: [PATCH] [LoopInterchange] Constrain LI withing supported loop nest
 depth

This patch is an extension to #115128.

After profiling LLVM test-suite, I see a lot of loop nest
of depth more than `MaxLoopNestDepth` which is 10.
Early exit for them would save compile-time as it would avoid
computing DependenceInfo and CacheCost.
---
 .../lib/Transforms/Scalar/LoopInterchange.cpp | 24 ++---
 .../LoopInterchange/bail-out-one-loop.ll      |  2 +-
 .../LoopInterchange/deep-loop-nest.ll         | 95 +++++++++++++++++++
 3 files changed, 108 insertions(+), 13 deletions(-)
 create mode 100644 llvm/test/Transforms/LoopInterchange/deep-loop-nest.ll

diff --git a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
index a0c0080c0bda1c..b5ebde300ebe6f 100644
--- a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
@@ -69,6 +69,9 @@ using CharMatrix = std::vector<std::vector<char>>;
 // Maximum number of dependencies that can be handled in the dependency matrix.
 static const unsigned MaxMemInstrCount = 100;
 
+// Minimum loop depth supported.
+static const unsigned MinLoopNestDepth = 2;
+
 // Maximum loop depth supported.
 static const unsigned MaxLoopNestDepth = 10;
 
@@ -239,10 +242,12 @@ static void populateWorklist(Loop &L, LoopVector &LoopList) {
   LoopList.push_back(CurrentLoop);
 }
 
-static bool hasMinimumLoopDepth(SmallVectorImpl<Loop *> &LoopList) {
+static bool hasSupportedLoopDepth(SmallVectorImpl<Loop *> &LoopList) {
   unsigned LoopNestDepth = LoopList.size();
-  if (LoopNestDepth < 2) {
-    LLVM_DEBUG(dbgs() << "Loop doesn't contain minimum nesting level.\n");
+  if (LoopNestDepth < MinLoopNestDepth || LoopNestDepth > MaxLoopNestDepth) {
+    LLVM_DEBUG(dbgs() << "Unsupported depth of loop nest " << LoopNestDepth
+                      << " should be [" << MinLoopNestDepth << ", "
+                      << MaxLoopNestDepth << "]\n");
     return false;
   }
   return true;
@@ -430,15 +435,10 @@ struct LoopInterchange {
   bool processLoopList(SmallVectorImpl<Loop *> &LoopList) {
     bool Changed = false;
 
-    // Ensure minimum loop nest depth.
-    assert(hasMinimumLoopDepth(LoopList) && "Loop nest does not meet minimum depth.");
+    // Ensure proper loop nest depth.
+    assert(hasSupportedLoopDepth(LoopList) && "Unsupported depth of loop nest.");
 
     unsigned LoopNestDepth = LoopList.size();
-    if (LoopNestDepth > MaxLoopNestDepth) {
-      LLVM_DEBUG(dbgs() << "Cannot handle loops of depth greater than "
-                        << MaxLoopNestDepth << "\n");
-      return false;
-    }
     if (!isComputableLoopNest(LoopList)) {
       LLVM_DEBUG(dbgs() << "Not valid loop candidate for interchange\n");
       return false;
@@ -1725,8 +1725,8 @@ PreservedAnalyses LoopInterchangePass::run(LoopNest &LN,
                                            LPMUpdater &U) {
   Function &F = *LN.getParent();
   SmallVector<Loop *, 8> LoopList(LN.getLoops());
-  // Ensure minimum depth of the loop nest to do the interchange.
-  if (!hasMinimumLoopDepth(LoopList))
+  // Ensure proper depth of the loop nest to do the interchange.
+  if (!hasSupportedLoopDepth(LoopList))
     return PreservedAnalyses::all();
 
   DependenceInfo DI(&F, &AR.AA, &AR.SE, &AR.LI);
diff --git a/llvm/test/Transforms/LoopInterchange/bail-out-one-loop.ll b/llvm/test/Transforms/LoopInterchange/bail-out-one-loop.ll
index 788e1b0157d80f..477b37937747fc 100644
--- a/llvm/test/Transforms/LoopInterchange/bail-out-one-loop.ll
+++ b/llvm/test/Transforms/LoopInterchange/bail-out-one-loop.ll
@@ -15,7 +15,7 @@ target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i8:8:32-i16:16:32-i6
 ; CHECK-NOT: Delinearizing
 ; CHECK-NOT: Strides:
 ; CHECK-NOT: Terms:
-; CHECK: Loop doesn't contain minimum nesting level.
+; CHECK: Unsupported depth of loop nest 1 should be [2, 10]
 
 define void @foo() {
 entry:
diff --git a/llvm/test/Transforms/LoopInterchange/deep-loop-nest.ll b/llvm/test/Transforms/LoopInterchange/deep-loop-nest.ll
new file mode 100644
index 00000000000000..348c1ab74b7588
--- /dev/null
+++ b/llvm/test/Transforms/LoopInterchange/deep-loop-nest.ll
@@ -0,0 +1,95 @@
+; REQUIRES: asserts
+; RUN: opt < %s -passes=loop-interchange -debug -disable-output 2>&1| FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+; For deep loop nest, delinearization should not be run.
+
+; CHECK-NOT: Delinearizing
+; CHECK-NOT: Strides:
+; CHECK-NOT: Terms:
+; CHECK: Unsupported depth of loop nest 11 should be [2, 10]
+define void @big_loop_nest() {
+entry:
+  br label %for1.header
+
+for1.header:
+  %j = phi i64 [ 0, %entry ], [ %j.next, %for1.inc ]
+  br label %for2.header
+for2.header:
+  %k = phi i64 [ 0, %for1.header ], [ %k.next, %for2.inc ]
+  br label %for3.header
+for3.header:
+  %l = phi i64 [ 0, %for2.header ], [ %l.next, %for3.inc ]
+  br label %for4.header
+for4.header:
+  %m = phi i64 [ 0, %for3.header ], [ %m.next, %for4.inc ]
+  br label %for5.header
+for5.header:
+  %n = phi i64 [ 0, %for4.header ], [ %n.next, %for5.inc ]
+  br label %for6.header
+for6.header:
+  %o = phi i64 [ 0, %for5.header ], [ %o.next, %for6.inc ]
+  br label %for7.header
+for7.header:
+  %p = phi i64 [ 0, %for6.header ], [ %p.next, %for7.inc ]
+  br label %for8.header
+for8.header:
+  %q = phi i64 [ 0, %for7.header ], [ %q.next, %for8.inc ]
+  br label %for9.header
+for9.header:
+  %r = phi i64 [ 0, %for8.header ], [ %r.next, %for9.inc ]
+  br label %for10.header
+for10.header:
+  %s = phi i64 [ 0, %for9.header ], [ %s.next, %for10.inc ]
+  br label %for11
+for11:
+  %t = phi i64 [ %t.next, %for11 ], [ 0, %for10.header ]
+  %t.next = add nuw nsw i64 %t, 1
+  %exitcond = icmp eq i64 %t.next, 99
+  br i1 %exitcond, label %for1.inc, label %for11
+
+for1.inc:
+  %j.next = add nuw nsw i64 %j, 1
+  %exitcond26 = icmp eq i64 %j.next, 99
+  br i1 %exitcond26, label %for2.inc, label %for1.header
+for2.inc:
+  %k.next = add nuw nsw i64 %k, 1
+  %exitcond27 = icmp eq i64 %j.next, 99
+  br i1 %exitcond27, label %for3.inc, label %for2.header
+for3.inc:
+  %l.next = add nuw nsw i64 %l, 1
+  %exitcond28 = icmp eq i64 %l.next, 99
+  br i1 %exitcond28, label %for4.inc, label %for3.header
+for4.inc:
+  %m.next = add nuw nsw i64 %m, 1
+  %exitcond29 = icmp eq i64 %m.next, 99
+  br i1 %exitcond29, label %for5.inc, label %for4.header
+for5.inc:
+  %n.next = add nuw nsw i64 %n, 1
+  %exitcond30 = icmp eq i64 %n.next, 99
+  br i1 %exitcond30, label %for6.inc, label %for5.header
+for6.inc:
+  %o.next = add nuw nsw i64 %o, 1
+  %exitcond31 = icmp eq i64 %o.next, 99
+  br i1 %exitcond31, label %for7.inc, label %for6.header
+for7.inc:
+  %p.next = add nuw nsw i64 %p, 1
+  %exitcond32 = icmp eq i64 %p.next, 99
+  br i1 %exitcond32, label %for8.inc, label %for7.header
+for8.inc:
+  %q.next = add nuw nsw i64 %q, 1
+  %exitcond33 = icmp eq i64 %q.next, 99
+  br i1 %exitcond33, label %for9.inc, label %for8.header
+for9.inc:
+  %r.next = add nuw nsw i64 %r, 1
+  %exitcond34 = icmp eq i64 %q.next, 99
+  br i1 %exitcond34, label %for10.inc, label %for9.header
+for10.inc:
+  %s.next = add nuw nsw i64 %s, 1
+  %exitcond35 = icmp eq i64 %s.next, 99
+  br i1 %exitcond35, label %for.end, label %for10.header
+
+for.end:
+  ret void
+}



More information about the llvm-commits mailing list