[llvm] [LoopInterchange] Hoist isCompuatableLoopNest() in the control flow (PR #124247)

Fri Jan 24 01:52:13 PST 2025

https://github.com/madhur13490 created https://github.com/llvm/llvm-project/pull/124247

The profiling of the LLVM Test-suite reveals that a significant portion, specifically 14,090 out of 139,323, loop nests were identified as non-viable candidates for transformation, leading to the transform exiting from isComputableLoopNest() without any action.

More importantly, dependence information was computed for these loop nests before reaching the function isComputableLoopNest(), which does not require DI and relies solely on scalar evolution (SE).

To enhance compile-time efficiency, this patch moves the call to isComputableLoopNest() earlier in the control-flow, thereby avoiding unnecessary dependence calculations.

The impact of this change is evident on the compile-time-tracker, with the overall geometric mean improvement recorded at 0.11%, while the lencode benchmark gets a more substantial benefit of 0.44%.
This improvement can be tracked in the isc-ln-exp-2 branch under my repo.

>From 889bbf61ac94b39d8da97daad5f1cc0f117f0e72 Mon Sep 17 00:00:00 2001
From: Madhur Amilkanthwar <madhura at nvidia.com>
Date: Wed, 22 Jan 2025 00:50:29 -0800
Subject: [PATCH] [LoopInterchange] Hoist isCompuatableLoopNest() in the
 control flow

The profiling of the LLVM Test-suite reveals that a significant portion,
specifically 14,090 out of 139,323, loop nests were identified as non-viable
candidates for transformation, leading to the transform exiting from isComputableLoopNest()
without any action.

More importantly, dependence information was computed for these loop nests before
reaching the function isComputableLoopNest(), which does not require
DI and relies solely on scalar evolution (SE).

To enhance compile-time efficiency, this patch moves the call to isComputableLoopNest()
earlier in the control-flow, thereby avoiding unnecessary dependence calculations.

The impact of this change is evident on the compile-time-tracker, with the
overall geometric mean improvement recorded at 0.11%, while the lencode benchmark
gets a more substantial benefit of 0.44%.
This improvement can be tracked in the isc-ln-exp-2 branch under my repo.
---
 .../lib/Transforms/Scalar/LoopInterchange.cpp | 52 ++++++++++---------
 1 file changed, 28 insertions(+), 24 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
index d366e749c7370d..e995db1f5c1f6a 100644
--- a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
@@ -271,6 +271,26 @@ static bool hasSupportedLoopDepth(SmallVectorImpl<Loop *> &LoopList,
   }
   return true;
 }
+
+static bool isComputableLoopNest(ScalarEvolution *SE, ArrayRef<Loop *> LoopList) {
+  for (Loop *L : LoopList) {
+    const SCEV *ExitCountOuter = SE->getBackedgeTakenCount(L);
+    if (isa<SCEVCouldNotCompute>(ExitCountOuter)) {
+      LLVM_DEBUG(dbgs() << "Couldn't compute backedge count\n");
+      return false;
+    }
+    if (L->getNumBackEdges() != 1) {
+      LLVM_DEBUG(dbgs() << "NumBackEdges is not equal to 1\n");
+      return false;
+    }
+    if (!L->getExitingBlock()) {
+      LLVM_DEBUG(dbgs() << "Loop doesn't have unique exit block\n");
+      return false;
+    }
+  }
+  return true;
+}
+
 namespace {
 
 /// LoopInterchangeLegality checks if it is legal to interchange the loop.
@@ -426,25 +446,6 @@ struct LoopInterchange {
     return processLoopList(LoopList);
   }
 
-  bool isComputableLoopNest(ArrayRef<Loop *> LoopList) {
-    for (Loop *L : LoopList) {
-      const SCEV *ExitCountOuter = SE->getBackedgeTakenCount(L);
-      if (isa<SCEVCouldNotCompute>(ExitCountOuter)) {
-        LLVM_DEBUG(dbgs() << "Couldn't compute backedge count\n");
-        return false;
-      }
-      if (L->getNumBackEdges() != 1) {
-        LLVM_DEBUG(dbgs() << "NumBackEdges is not equal to 1\n");
-        return false;
-      }
-      if (!L->getExitingBlock()) {
-        LLVM_DEBUG(dbgs() << "Loop doesn't have unique exit block\n");
-        return false;
-      }
-    }
-    return true;
-  }
-
   unsigned selectLoopForInterchange(ArrayRef<Loop *> LoopList) {
     // TODO: Add a better heuristic to select the loop to be interchanged based
     // on the dependence matrix. Currently we select the innermost loop.
@@ -459,10 +460,6 @@ struct LoopInterchange {
            "Unsupported depth of loop nest.");
 
     unsigned LoopNestDepth = LoopList.size();
-    if (!isComputableLoopNest(LoopList)) {
-      LLVM_DEBUG(dbgs() << "Not valid loop candidate for interchange\n");
-      return false;
-    }
 
     LLVM_DEBUG(dbgs() << "Processing LoopList of size = " << LoopNestDepth
                       << "\n");
@@ -1755,10 +1752,17 @@ PreservedAnalyses LoopInterchangePass::run(LoopNest &LN,
   // Ensure minimum depth of the loop nest to do the interchange.
   if (!hasSupportedLoopDepth(LoopList, ORE))
     return PreservedAnalyses::all();
+  
+  // Ensure computable loop nest.
+  if (!isComputableLoopNest(&AR.SE, LoopList)) {
+      LLVM_DEBUG(dbgs() << "Not valid loop candidate for interchange\n");
+      return PreservedAnalyses::all();
+  }
+
   DependenceInfo DI(&F, &AR.AA, &AR.SE, &AR.LI);
   std::unique_ptr<CacheCost> CC =
       CacheCost::getCacheCost(LN.getOutermostLoop(), AR, DI);
-  
+
   if (!LoopInterchange(&AR.SE, &AR.LI, &DI, &AR.DT, CC, &ORE).run(LN))
     return PreservedAnalyses::all();
   U.markLoopNestChanged(true);