[llvm] [LoopInterchange] Fix depends() check parameters (PR #77719)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Jan 10 19:19:59 PST 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-transforms
Author: None (ShivaChen)
<details>
<summary>Changes</summary>
This commit enables loop-interchange with -enable-loopinterchange for the case in https://github.com/llvm/llvm-project/issues/71519.
With the loop-interchange, the case can be vectorized.
for (int nl = 0; nl < 10000000/256; nl++)
for (int i = 0; i < 256; ++i)
for (int j = 1; j < 256; j++)
aa[j][i] = aa[j - 1][i] + bb[j][i];
The commit address the issues that:
1. populateDependencyMatrix determine aa[j][i] has output dependency with itself.
2. The reversed parameter order of depends() caused the distance between aa[j][i] and aa[j - 1][i] be -1 instead of 1.
---
Full diff: https://github.com/llvm/llvm-project/pull/77719.diff
2 Files Affected:
- (modified) llvm/lib/Transforms/Scalar/LoopInterchange.cpp (+3-3)
- (added) llvm/test/Transforms/LoopInterchange/interchange-s231.ll (+59)
``````````diff
diff --git a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
index 277f530ee25fc1..d4b78f4f04599c 100644
--- a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
@@ -113,10 +113,10 @@ static bool populateDependencyMatrix(CharMatrix &DepMatrix, unsigned Level,
ValueVector::iterator I, IE, J, JE;
for (I = MemInstr.begin(), IE = MemInstr.end(); I != IE; ++I) {
- for (J = I, JE = MemInstr.end(); J != JE; ++J) {
+ for (J = I + 1, JE = MemInstr.end(); J != JE; ++J) {
std::vector<char> Dep;
- Instruction *Src = cast<Instruction>(*I);
- Instruction *Dst = cast<Instruction>(*J);
+ Instruction *Src = cast<Instruction>(*J);
+ Instruction *Dst = cast<Instruction>(*I);
// Ignore Input dependencies.
if (isa<LoadInst>(Src) && isa<LoadInst>(Dst))
continue;
diff --git a/llvm/test/Transforms/LoopInterchange/interchange-s231.ll b/llvm/test/Transforms/LoopInterchange/interchange-s231.ll
new file mode 100644
index 00000000000000..5b51d2169fe510
--- /dev/null
+++ b/llvm/test/Transforms/LoopInterchange/interchange-s231.ll
@@ -0,0 +1,59 @@
+; REQUIRES: asserts
+; RUN: opt < %s -passes=loop-interchange -cache-line-size=64 -verify-dom-info -verify-loop-info \
+; RUN: -S -debug 2>&1 | FileCheck %s
+
+ at aa = global [256 x [256 x float]] zeroinitializer, align 64
+ at bb = global [256 x [256 x float]] zeroinitializer, align 64
+
+;; for (int nl = 0; nl < 10000000/256; nl++)
+;; for (int i = 0; i < 256; ++i)
+;; for (int j = 1; j < 256; j++)
+;; aa[j][i] = aa[j - 1][i] + bb[j][i];
+
+; CHECK: Found flow dependency between Src and Dst
+; CHECK: Src: store float %add, ptr %arrayidx18, align 4
+; CHECK: Dst: %1 = load float, ptr %arrayidx10, align 4
+; CHECK: Processing InnerLoopId = 2 and OuterLoopId = 1
+; CHECK: Loops interchanged.
+
+define float @s231() {
+entry:
+ br label %for.cond1.preheader
+
+; Loop:
+for.cond1.preheader: ; preds = %entry, %for.cond.cleanup3
+ %nl.036 = phi i32 [ 0, %entry ], [ %inc23, %for.cond.cleanup3 ]
+ br label %for.cond5.preheader
+
+for.cond.cleanup3: ; preds = %for.cond.cleanup7
+ %inc23 = add nuw nsw i32 %nl.036, 1
+ %exitcond41 = icmp ne i32 %inc23, 39062
+ br i1 %exitcond41, label %for.cond1.preheader, label %for.cond.cleanup
+
+for.cond.cleanup7: ; preds = %for.body8
+ %indvars.iv.next39 = add nuw nsw i64 %indvars.iv38, 1
+ %exitcond40 = icmp ne i64 %indvars.iv.next39, 256
+ br i1 %exitcond40, label %for.cond5.preheader, label %for.cond.cleanup3
+
+for.body8: ; preds = %for.cond5.preheader, %for.body8
+ %indvars.iv = phi i64 [ 1, %for.cond5.preheader ], [ %indvars.iv.next, %for.body8 ]
+ %0 = add nsw i64 %indvars.iv, -1
+ %arrayidx10 = getelementptr inbounds [256 x [256 x float]], ptr @aa, i64 0, i64 %0, i64 %indvars.iv38
+ %1 = load float, ptr %arrayidx10, align 4
+ %arrayidx14 = getelementptr inbounds [256 x [256 x float]], ptr @bb, i64 0, i64 %indvars.iv, i64 %indvars.iv38
+ %2 = load float, ptr %arrayidx14, align 4
+ %add = fadd fast float %2, %1
+ %arrayidx18 = getelementptr inbounds [256 x [256 x float]], ptr @aa, i64 0, i64 %indvars.iv, i64 %indvars.iv38
+ store float %add, ptr %arrayidx18, align 4
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %exitcond = icmp ne i64 %indvars.iv.next, 256
+ br i1 %exitcond, label %for.body8, label %for.cond.cleanup7
+
+for.cond5.preheader: ; preds = %for.cond1.preheader, %for.cond.cleanup7
+ %indvars.iv38 = phi i64 [ 0, %for.cond1.preheader ], [ %indvars.iv.next39, %for.cond.cleanup7 ]
+ br label %for.body8
+
+; Exit blocks
+for.cond.cleanup: ; preds = %for.cond.cleanup3
+ ret float undef
+}
``````````
</details>
https://github.com/llvm/llvm-project/pull/77719
More information about the llvm-commits
mailing list