[llvm] [LoopInterchange] Make the entries of the Dependency Matrix unique (PR #116195)

Sjoerd Meijer via llvm-commits llvm-commits at lists.llvm.org
Tue Nov 19 02:00:21 PST 2024


https://github.com/sjoerdmeijer updated https://github.com/llvm/llvm-project/pull/116195

>From 5b1e2776a70264a580d5a8816642fafaee687d7e Mon Sep 17 00:00:00 2001
From: Sjoerd Meijer <smeijer at nvidia.com>
Date: Thu, 14 Nov 2024 01:53:44 -0800
Subject: [PATCH] [LoopInterchange] Make the entries of the Dependency Matrix
 unique

The entries in the dependency matrix can contain a lot of duplicates,
which is unnecessary and results in more checks that we can avoid, and
this patch does that.
---
 .../lib/Transforms/Scalar/LoopInterchange.cpp | 24 +++++-----
 .../LoopInterchange/unique-dep-matrix.ll      | 45 +++++++++++++++++++
 2 files changed, 59 insertions(+), 10 deletions(-)
 create mode 100644 llvm/test/Transforms/LoopInterchange/unique-dep-matrix.ll

diff --git a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
index db63bda1e6b926..53aaaee0ddf1c0 100644
--- a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
@@ -17,6 +17,7 @@
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/StringSet.h"
 #include "llvm/Analysis/DependenceAnalysis.h"
 #include "llvm/Analysis/LoopCacheAnalysis.h"
 #include "llvm/Analysis/LoopInfo.h"
@@ -71,7 +72,7 @@ static const unsigned MaxMemInstrCount = 100;
 // Maximum loop depth supported.
 static const unsigned MaxLoopNestDepth = 10;
 
-#ifdef DUMP_DEP_MATRICIES
+#ifndef NDEBUG
 static void printDepMatrix(CharMatrix &DepMatrix) {
   for (auto &Row : DepMatrix) {
     for (auto D : Row)
@@ -110,6 +111,7 @@ static bool populateDependencyMatrix(CharMatrix &DepMatrix, unsigned Level,
                     << " Loads and Stores to analyze\n");
 
   ValueVector::iterator I, IE, J, JE;
+  StringSet<> Seen;
 
   for (I = MemInstr.begin(), IE = MemInstr.end(); I != IE; ++I) {
     for (J = I, JE = MemInstr.end(); J != JE; ++J) {
@@ -156,7 +158,10 @@ static bool populateDependencyMatrix(CharMatrix &DepMatrix, unsigned Level,
           Dep.push_back('I');
         }
 
-        DepMatrix.push_back(Dep);
+        // Make sure we only add unique entries to the dependency matrix.
+        if (Seen.insert(StringRef(Dep.data(), Dep.size())).second)
+          DepMatrix.push_back(Dep);
+
         if (DepMatrix.size() > MaxMemInstrCount) {
           LLVM_DEBUG(dbgs() << "Cannot handle more than " << MaxMemInstrCount
                             << " dependencies inside loop\n");
@@ -441,10 +446,9 @@ struct LoopInterchange {
       LLVM_DEBUG(dbgs() << "Populating dependency matrix failed\n");
       return false;
     }
-#ifdef DUMP_DEP_MATRICIES
-    LLVM_DEBUG(dbgs() << "Dependence before interchange\n");
-    printDepMatrix(DependencyMatrix);
-#endif
+
+    LLVM_DEBUG(dbgs() << "Dependency matrix before interchange:\n";
+               printDepMatrix(DependencyMatrix));
 
     // Get the Outermost loop exit.
     BasicBlock *LoopNestExit = OuterMostLoop->getExitBlock();
@@ -484,10 +488,10 @@ struct LoopInterchange {
         std::swap(LoopList[i - 1], LoopList[i]);
         // Update the DependencyMatrix
         interChangeDependencies(DependencyMatrix, i, i - 1);
-#ifdef DUMP_DEP_MATRICIES
-        LLVM_DEBUG(dbgs() << "Dependence after interchange\n");
-        printDepMatrix(DependencyMatrix);
-#endif
+
+        LLVM_DEBUG(dbgs() << "Dependency matrix after interchange:\n";
+                   printDepMatrix(DependencyMatrix));
+
         ChangedPerIter |= Interchanged;
         Changed |= Interchanged;
       }
diff --git a/llvm/test/Transforms/LoopInterchange/unique-dep-matrix.ll b/llvm/test/Transforms/LoopInterchange/unique-dep-matrix.ll
new file mode 100644
index 00000000000000..6943e39cf163ee
--- /dev/null
+++ b/llvm/test/Transforms/LoopInterchange/unique-dep-matrix.ll
@@ -0,0 +1,45 @@
+; REQUIRES: asserts
+; RUN: opt < %s -passes=loop-interchange -S -debug 2>&1 | FileCheck %s
+
+; CHECK:       Dependency matrix before interchange:
+; CHECK-NEXT:  I I
+; CHECK-NEXT:  = S
+; CHECK-NEXT:  < S
+; CHECK-NEXT:  Processing InnerLoopId
+
+; This example is taken from github issue #54176
+;
+define void @foo(i32 noundef %n, i32 noundef %m, ptr nocapture noundef %aa, ptr nocapture noundef readonly %bb, ptr nocapture noundef writeonly %cc) {
+entry:
+  %arrayidx7 = getelementptr inbounds i8, ptr %aa, i64 512
+  br label %for.cond1.preheader
+
+for.cond1.preheader:
+  %indvars.iv32 = phi i64 [ 1, %entry ], [ %indvars.iv.next33, %for.cond.cleanup3 ]
+  %0 = add nsw i64 %indvars.iv32, -1
+  %arrayidx9 = getelementptr inbounds [128 x float], ptr %arrayidx7, i64 0, i64 %0
+  %arrayidx12 = getelementptr inbounds [128 x float], ptr %arrayidx7, i64 0, i64 %indvars.iv32
+  br label %for.body4
+
+for.cond.cleanup:
+  ret void
+
+for.cond.cleanup3:
+  %indvars.iv.next33 = add nuw nsw i64 %indvars.iv32, 1
+  %exitcond36 = icmp ne i64 %indvars.iv.next33, 128
+  br i1 %exitcond36, label %for.cond1.preheader, label %for.cond.cleanup
+
+for.body4:
+  %indvars.iv = phi i64 [ 1, %for.cond1.preheader ], [ %indvars.iv.next, %for.body4 ]
+  %arrayidx6 = getelementptr inbounds [128 x float], ptr %bb, i64 %indvars.iv, i64 %indvars.iv32
+  %1 = load float, ptr %arrayidx6, align 4
+  %2 = load float, ptr %arrayidx9, align 4
+  %add = fadd fast float %2, %1
+  store float %add, ptr %arrayidx9, align 4
+  %3 = load float, ptr %arrayidx12, align 4
+  %arrayidx16 = getelementptr inbounds [128 x float], ptr %cc, i64 %indvars.iv, i64 %indvars.iv32
+  store float %3, ptr %arrayidx16, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp ne i64 %indvars.iv.next, 1024
+  br i1 %exitcond, label %for.body4, label %for.cond.cleanup3
+}



More information about the llvm-commits mailing list