[llvm] [LoopInterchange] Fix depends() check parameters (PR #77719)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Feb 5 22:36:40 PST 2024
https://github.com/ShivaChen updated https://github.com/llvm/llvm-project/pull/77719
>From 12cab0ed12b5565d79415392927ec353359d0e7a Mon Sep 17 00:00:00 2001
From: Shiva Chen <shiva.chen at imgtec.com>
Date: Mon, 8 Jan 2024 07:30:24 +0000
Subject: [PATCH 1/2] Add interchange-s231.ll
---
.../LoopInterchange/interchange-s231.ll | 56 +++++++++++++++++++
1 file changed, 56 insertions(+)
create mode 100644 llvm/test/Transforms/LoopInterchange/interchange-s231.ll
diff --git a/llvm/test/Transforms/LoopInterchange/interchange-s231.ll b/llvm/test/Transforms/LoopInterchange/interchange-s231.ll
new file mode 100644
index 0000000000000..3d496df8635a3
--- /dev/null
+++ b/llvm/test/Transforms/LoopInterchange/interchange-s231.ll
@@ -0,0 +1,56 @@
+; REQUIRES: asserts
+; RUN: opt < %s -passes=loop-interchange -cache-line-size=64 -verify-dom-info -verify-loop-info \
+; RUN: -S -debug 2>&1 | FileCheck %s
+
+ at aa = global [256 x [256 x float]] zeroinitializer, align 64
+ at bb = global [256 x [256 x float]] zeroinitializer, align 64
+
+;; for (int nl = 0; nl < 10000000/256; nl++)
+;; for (int i = 0; i < 256; ++i)
+;; for (int j = 1; j < 256; j++)
+;; aa[j][i] = aa[j - 1][i] + bb[j][i];
+
+; CHECK: Processing InnerLoopId = 2 and OuterLoopId = 1
+; CHECK: Not interchanging loops. Cannot prove legality.
+
+define float @s231() {
+entry:
+ br label %for.cond1.preheader
+
+; Loop:
+for.cond1.preheader: ; preds = %entry, %for.cond.cleanup3
+ %nl.036 = phi i32 [ 0, %entry ], [ %inc23, %for.cond.cleanup3 ]
+ br label %for.cond5.preheader
+
+for.cond.cleanup3: ; preds = %for.cond.cleanup7
+ %inc23 = add nuw nsw i32 %nl.036, 1
+ %exitcond41 = icmp ne i32 %inc23, 39062
+ br i1 %exitcond41, label %for.cond1.preheader, label %for.cond.cleanup
+
+for.cond.cleanup7: ; preds = %for.body8
+ %indvars.iv.next39 = add nuw nsw i64 %indvars.iv38, 1
+ %exitcond40 = icmp ne i64 %indvars.iv.next39, 256
+ br i1 %exitcond40, label %for.cond5.preheader, label %for.cond.cleanup3
+
+for.body8: ; preds = %for.cond5.preheader, %for.body8
+ %indvars.iv = phi i64 [ 1, %for.cond5.preheader ], [ %indvars.iv.next, %for.body8 ]
+ %0 = add nsw i64 %indvars.iv, -1
+ %arrayidx10 = getelementptr inbounds [256 x [256 x float]], ptr @aa, i64 0, i64 %0, i64 %indvars.iv38
+ %1 = load float, ptr %arrayidx10, align 4
+ %arrayidx14 = getelementptr inbounds [256 x [256 x float]], ptr @bb, i64 0, i64 %indvars.iv, i64 %indvars.iv38
+ %2 = load float, ptr %arrayidx14, align 4
+ %add = fadd fast float %2, %1
+ %arrayidx18 = getelementptr inbounds [256 x [256 x float]], ptr @aa, i64 0, i64 %indvars.iv, i64 %indvars.iv38
+ store float %add, ptr %arrayidx18, align 4
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %exitcond = icmp ne i64 %indvars.iv.next, 256
+ br i1 %exitcond, label %for.body8, label %for.cond.cleanup7
+
+for.cond5.preheader: ; preds = %for.cond1.preheader, %for.cond.cleanup7
+ %indvars.iv38 = phi i64 [ 0, %for.cond1.preheader ], [ %indvars.iv.next39, %for.cond.cleanup7 ]
+ br label %for.body8
+
+; Exit blocks
+for.cond.cleanup: ; preds = %for.cond.cleanup3
+ ret float undef
+}
>From dac9012db9e8443c2c31cbf773d659edd3f80065 Mon Sep 17 00:00:00 2001
From: Shiva Chen <shiva.chen at imgtec.com>
Date: Tue, 6 Feb 2024 02:52:20 +0000
Subject: [PATCH 2/2] [LoopInterchange] Swap Src and Dst to get distance
without normalize
---
.../lib/Transforms/Scalar/LoopInterchange.cpp | 20 +++++++++----------
.../LoopInterchange/interchange-s231.ll | 2 +-
.../Transforms/LoopInterchange/pr56275.ll | 20 +++++--------------
3 files changed, 16 insertions(+), 26 deletions(-)
diff --git a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
index 277f530ee25fc..d1966011c4645 100644
--- a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
@@ -20,6 +20,7 @@
#include "llvm/Analysis/DependenceAnalysis.h"
#include "llvm/Analysis/LoopCacheAnalysis.h"
#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/LoopIterator.h"
#include "llvm/Analysis/LoopNestAnalysis.h"
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
@@ -84,13 +85,16 @@ static void printDepMatrix(CharMatrix &DepMatrix) {
static bool populateDependencyMatrix(CharMatrix &DepMatrix, unsigned Level,
Loop *L, DependenceInfo *DI,
- ScalarEvolution *SE) {
+ ScalarEvolution *SE, LoopInfo *LI) {
using ValueVector = SmallVector<Value *, 16>;
ValueVector MemInstr;
- // For each block.
- for (BasicBlock *BB : L->blocks()) {
+ // Traverse blocks in fixed RPOT order, regardless of their storage in the
+ // loop info, as it may be arbitrary.
+ LoopBlocksRPO RPOT(L);
+ RPOT.perform(LI);
+ for (BasicBlock *BB : RPOT) {
// Scan the BB and collect legal loads and stores.
for (Instruction &I : *BB) {
if (!isa<Instruction>(I))
@@ -115,18 +119,14 @@ static bool populateDependencyMatrix(CharMatrix &DepMatrix, unsigned Level,
for (I = MemInstr.begin(), IE = MemInstr.end(); I != IE; ++I) {
for (J = I, JE = MemInstr.end(); J != JE; ++J) {
std::vector<char> Dep;
- Instruction *Src = cast<Instruction>(*I);
- Instruction *Dst = cast<Instruction>(*J);
+ Instruction *Src = cast<Instruction>(*J);
+ Instruction *Dst = cast<Instruction>(*I);
// Ignore Input dependencies.
if (isa<LoadInst>(Src) && isa<LoadInst>(Dst))
continue;
// Track Output, Flow, and Anti dependencies.
if (auto D = DI->depends(Src, Dst, true)) {
assert(D->isOrdered() && "Expected an output, flow or anti dep.");
- // If the direction vector is negative, normalize it to
- // make it non-negative.
- if (D->normalize(SE))
- LLVM_DEBUG(dbgs() << "Negative dependence vector normalized.\n");
LLVM_DEBUG(StringRef DepType =
D->isFlow() ? "flow" : D->isAnti() ? "anti" : "output";
dbgs() << "Found " << DepType
@@ -438,7 +438,7 @@ struct LoopInterchange {
CharMatrix DependencyMatrix;
Loop *OuterMostLoop = *(LoopList.begin());
if (!populateDependencyMatrix(DependencyMatrix, LoopNestDepth,
- OuterMostLoop, DI, SE)) {
+ OuterMostLoop, DI, SE, LI)) {
LLVM_DEBUG(dbgs() << "Populating dependency matrix failed\n");
return false;
}
diff --git a/llvm/test/Transforms/LoopInterchange/interchange-s231.ll b/llvm/test/Transforms/LoopInterchange/interchange-s231.ll
index 3d496df8635a3..32c865f276878 100644
--- a/llvm/test/Transforms/LoopInterchange/interchange-s231.ll
+++ b/llvm/test/Transforms/LoopInterchange/interchange-s231.ll
@@ -11,7 +11,7 @@
;; aa[j][i] = aa[j - 1][i] + bb[j][i];
; CHECK: Processing InnerLoopId = 2 and OuterLoopId = 1
-; CHECK: Not interchanging loops. Cannot prove legality.
+; CHECK: Loops interchanged.
define float @s231() {
entry:
diff --git a/llvm/test/Transforms/LoopInterchange/pr56275.ll b/llvm/test/Transforms/LoopInterchange/pr56275.ll
index c6078bb45146b..86a4fcc7b2140 100644
--- a/llvm/test/Transforms/LoopInterchange/pr56275.ll
+++ b/llvm/test/Transforms/LoopInterchange/pr56275.ll
@@ -21,20 +21,14 @@ target triple = "aarch64-unknown-linux-gnu"
define void @test1(ptr noalias noundef %a, ptr noalias noundef %b, ptr noalias noundef %c) {
; CHECK-LABEL: @test1(
; CHECK-NEXT: entry:
-; CHECK-NEXT: br label [[LOOP2_HEADER_PREHEADER:%.*]]
-; CHECK: loop1.header.preheader:
; CHECK-NEXT: br label [[LOOP1_HEADER:%.*]]
; CHECK: loop1.header:
-; CHECK-NEXT: [[I2:%.*]] = phi i64 [ [[I2_INC:%.*]], [[LOOP1_LATCH:%.*]] ], [ 1, [[LOOP1_HEADER_PREHEADER:%.*]] ]
+; CHECK-NEXT: [[I2:%.*]] = phi i64 [ 1, [[ENTRY:%.*]] ], [ [[I2_INC:%.*]], [[LOOP1_LATCH:%.*]] ]
; CHECK-NEXT: [[I2_ST:%.*]] = add i64 [[I2]], 1
; CHECK-NEXT: [[I2_LD:%.*]] = add i64 [[I2]], 0
-; CHECK-NEXT: br label [[LOOP2_HEADER_SPLIT1:%.*]]
-; CHECK: loop2.header.preheader:
; CHECK-NEXT: br label [[LOOP2_HEADER:%.*]]
; CHECK: loop2.header:
-; CHECK-NEXT: [[I1:%.*]] = phi i64 [ [[TMP0:%.*]], [[LOOP2_HEADER_SPLIT:%.*]] ], [ 1, [[LOOP2_HEADER_PREHEADER]] ]
-; CHECK-NEXT: br label [[LOOP1_HEADER_PREHEADER]]
-; CHECK: loop2.header.split1:
+; CHECK-NEXT: [[I1:%.*]] = phi i64 [ 1, [[LOOP1_HEADER]] ], [ [[I1_INC:%.*]], [[LOOP2_HEADER]] ]
; CHECK-NEXT: [[I1_ST:%.*]] = add i64 [[I1]], 0
; CHECK-NEXT: [[I1_LD:%.*]] = add i64 [[I1]], 0
; CHECK-NEXT: [[A_ST:%.*]] = getelementptr inbounds [64 x i32], ptr [[A:%.*]], i64 [[I1_ST]], i64 [[I2_ST]]
@@ -45,17 +39,13 @@ define void @test1(ptr noalias noundef %a, ptr noalias noundef %b, ptr noalias n
; CHECK-NEXT: store i32 [[B_VAL]], ptr [[A_ST]], align 4
; CHECK-NEXT: [[A_VAL:%.*]] = load i32, ptr [[A_LD]], align 4
; CHECK-NEXT: store i32 [[A_VAL]], ptr [[C_ST]], align 4
-; CHECK-NEXT: [[I1_INC:%.*]] = add nuw nsw i64 [[I1]], 1
+; CHECK-NEXT: [[I1_INC]] = add nuw nsw i64 [[I1]], 1
; CHECK-NEXT: [[LOOP2_EXITCOND_NOT:%.*]] = icmp eq i64 [[I1_INC]], 63
-; CHECK-NEXT: br label [[LOOP1_LATCH]]
-; CHECK: loop2.header.split:
-; CHECK-NEXT: [[TMP0]] = add nuw nsw i64 [[I1]], 1
-; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i64 [[TMP0]], 63
-; CHECK-NEXT: br i1 [[TMP1]], label [[EXIT:%.*]], label [[LOOP2_HEADER]]
+; CHECK-NEXT: br i1 [[LOOP2_EXITCOND_NOT]], label [[LOOP1_LATCH]], label [[LOOP2_HEADER]]
; CHECK: loop1.latch:
; CHECK-NEXT: [[I2_INC]] = add nuw nsw i64 [[I2]], 1
; CHECK-NEXT: [[LOOP1_EXITCOND_NOT:%.*]] = icmp eq i64 [[I2_INC]], 63
-; CHECK-NEXT: br i1 [[LOOP1_EXITCOND_NOT]], label [[LOOP2_HEADER_SPLIT]], label [[LOOP1_HEADER]]
+; CHECK-NEXT: br i1 [[LOOP1_EXITCOND_NOT]], label [[EXIT:%.*]], label [[LOOP1_HEADER]]
; CHECK: exit:
; CHECK-NEXT: ret void
;
More information about the llvm-commits
mailing list