[llvm] [LoopInterchange] Make the entries of the Dependency Matrix unique (PR #116195)
Sjoerd Meijer via llvm-commits
llvm-commits at lists.llvm.org
Mon Nov 18 01:54:53 PST 2024
https://github.com/sjoerdmeijer updated https://github.com/llvm/llvm-project/pull/116195
>From 1593c5e212360ab4edc6e55e4c04d2b4403f7434 Mon Sep 17 00:00:00 2001
From: Sjoerd Meijer <smeijer at nvidia.com>
Date: Thu, 14 Nov 2024 01:53:44 -0800
Subject: [PATCH] [LoopInterchange] Make the entries of the Dependency Matrix
unique
The entries in the dependency matrix can contain a lot of duplicates,
which is unnecessary and results in more checks that we can avoid, and
this patch does that.
I haven't added tests because the printing of the dependency matrix is
guarded by and #ifdef DUMP_DEP_MATRICES so cannot be printed in normal
builds. But all existing regression tests are passing, so is tested in
that way.
---
.../lib/Transforms/Scalar/LoopInterchange.cpp | 27 ++++++-----
.../LoopInterchange/unique-dep-matrix.ll | 45 +++++++++++++++++++
2 files changed, 62 insertions(+), 10 deletions(-)
create mode 100644 llvm/test/Transforms/LoopInterchange/unique-dep-matrix.ll
diff --git a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
index db63bda1e6b926..996162ee866c5d 100644
--- a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
@@ -17,6 +17,7 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/StringSet.h"
#include "llvm/Analysis/DependenceAnalysis.h"
#include "llvm/Analysis/LoopCacheAnalysis.h"
#include "llvm/Analysis/LoopInfo.h"
@@ -71,7 +72,7 @@ static const unsigned MaxMemInstrCount = 100;
// Maximum loop depth supported.
static const unsigned MaxLoopNestDepth = 10;
-#ifdef DUMP_DEP_MATRICIES
+#ifndef NDEBUG
static void printDepMatrix(CharMatrix &DepMatrix) {
for (auto &Row : DepMatrix) {
for (auto D : Row)
@@ -110,6 +111,7 @@ static bool populateDependencyMatrix(CharMatrix &DepMatrix, unsigned Level,
<< " Loads and Stores to analyze\n");
ValueVector::iterator I, IE, J, JE;
+ StringSet<> Seen;
for (I = MemInstr.begin(), IE = MemInstr.end(); I != IE; ++I) {
for (J = I, JE = MemInstr.end(); J != JE; ++J) {
@@ -156,7 +158,13 @@ static bool populateDependencyMatrix(CharMatrix &DepMatrix, unsigned Level,
Dep.push_back('I');
}
- DepMatrix.push_back(Dep);
+ // Make sure we only add unique entries to the dependency matrix.
+ std::string DepStr = std::string(Dep.begin(), Dep.end());
+ if (!Seen.count(DepStr)) {
+ Seen.insert(DepStr);
+ DepMatrix.push_back(Dep);
+ }
+
if (DepMatrix.size() > MaxMemInstrCount) {
LLVM_DEBUG(dbgs() << "Cannot handle more than " << MaxMemInstrCount
<< " dependencies inside loop\n");
@@ -441,10 +449,9 @@ struct LoopInterchange {
LLVM_DEBUG(dbgs() << "Populating dependency matrix failed\n");
return false;
}
-#ifdef DUMP_DEP_MATRICIES
- LLVM_DEBUG(dbgs() << "Dependence before interchange\n");
- printDepMatrix(DependencyMatrix);
-#endif
+
+ LLVM_DEBUG(dbgs() << "Dependency matrix before interchange:\n";
+ printDepMatrix(DependencyMatrix));
// Get the Outermost loop exit.
BasicBlock *LoopNestExit = OuterMostLoop->getExitBlock();
@@ -484,10 +491,10 @@ struct LoopInterchange {
std::swap(LoopList[i - 1], LoopList[i]);
// Update the DependencyMatrix
interChangeDependencies(DependencyMatrix, i, i - 1);
-#ifdef DUMP_DEP_MATRICIES
- LLVM_DEBUG(dbgs() << "Dependence after interchange\n");
- printDepMatrix(DependencyMatrix);
-#endif
+
+ LLVM_DEBUG(dbgs() << "Dependency matrix after interchange:\n";
+ printDepMatrix(DependencyMatrix));
+
ChangedPerIter |= Interchanged;
Changed |= Interchanged;
}
diff --git a/llvm/test/Transforms/LoopInterchange/unique-dep-matrix.ll b/llvm/test/Transforms/LoopInterchange/unique-dep-matrix.ll
new file mode 100644
index 00000000000000..6943e39cf163ee
--- /dev/null
+++ b/llvm/test/Transforms/LoopInterchange/unique-dep-matrix.ll
@@ -0,0 +1,45 @@
+; REQUIRES: asserts
+; RUN: opt < %s -passes=loop-interchange -S -debug 2>&1 | FileCheck %s
+
+; CHECK: Dependency matrix before interchange:
+; CHECK-NEXT: I I
+; CHECK-NEXT: = S
+; CHECK-NEXT: < S
+; CHECK-NEXT: Processing InnerLoopId
+
+; This example is taken from github issue #54176
+;
+define void @foo(i32 noundef %n, i32 noundef %m, ptr nocapture noundef %aa, ptr nocapture noundef readonly %bb, ptr nocapture noundef writeonly %cc) {
+entry:
+ %arrayidx7 = getelementptr inbounds i8, ptr %aa, i64 512
+ br label %for.cond1.preheader
+
+for.cond1.preheader:
+ %indvars.iv32 = phi i64 [ 1, %entry ], [ %indvars.iv.next33, %for.cond.cleanup3 ]
+ %0 = add nsw i64 %indvars.iv32, -1
+ %arrayidx9 = getelementptr inbounds [128 x float], ptr %arrayidx7, i64 0, i64 %0
+ %arrayidx12 = getelementptr inbounds [128 x float], ptr %arrayidx7, i64 0, i64 %indvars.iv32
+ br label %for.body4
+
+for.cond.cleanup:
+ ret void
+
+for.cond.cleanup3:
+ %indvars.iv.next33 = add nuw nsw i64 %indvars.iv32, 1
+ %exitcond36 = icmp ne i64 %indvars.iv.next33, 128
+ br i1 %exitcond36, label %for.cond1.preheader, label %for.cond.cleanup
+
+for.body4:
+ %indvars.iv = phi i64 [ 1, %for.cond1.preheader ], [ %indvars.iv.next, %for.body4 ]
+ %arrayidx6 = getelementptr inbounds [128 x float], ptr %bb, i64 %indvars.iv, i64 %indvars.iv32
+ %1 = load float, ptr %arrayidx6, align 4
+ %2 = load float, ptr %arrayidx9, align 4
+ %add = fadd fast float %2, %1
+ store float %add, ptr %arrayidx9, align 4
+ %3 = load float, ptr %arrayidx12, align 4
+ %arrayidx16 = getelementptr inbounds [128 x float], ptr %cc, i64 %indvars.iv, i64 %indvars.iv32
+ store float %3, ptr %arrayidx16, align 4
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %exitcond = icmp ne i64 %indvars.iv.next, 1024
+ br i1 %exitcond, label %for.body4, label %for.cond.cleanup3
+}
More information about the llvm-commits
mailing list