[llvm] [LoopInterchange] Fix depends() check parameters (PR #77719)

via llvm-commits llvm-commits at lists.llvm.org
Wed Jan 10 19:19:59 PST 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-llvm-transforms

Author: None (ShivaChen)

<details>
<summary>Changes</summary>

This commit enables loop-interchange with -enable-loopinterchange for the case in https://github.com/llvm/llvm-project/issues/71519.
With the loop-interchange, the case can be vectorized.

    for (int nl = 0; nl < 10000000/256; nl++)
      for (int i = 0; i < 256; ++i)
        for (int j = 1; j < 256; j++)
          aa[j][i] = aa[j - 1][i] + bb[j][i];

The commit address the issues that:
1. populateDependencyMatrix determine aa[j][i] has output dependency with itself.
2. The reversed parameter order of depends() caused the distance between aa[j][i] and aa[j - 1][i] be -1 instead of 1.

---
Full diff: https://github.com/llvm/llvm-project/pull/77719.diff


2 Files Affected:

- (modified) llvm/lib/Transforms/Scalar/LoopInterchange.cpp (+3-3) 
- (added) llvm/test/Transforms/LoopInterchange/interchange-s231.ll (+59) 


``````````diff
diff --git a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
index 277f530ee25fc1..d4b78f4f04599c 100644
--- a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
@@ -113,10 +113,10 @@ static bool populateDependencyMatrix(CharMatrix &DepMatrix, unsigned Level,
   ValueVector::iterator I, IE, J, JE;
 
   for (I = MemInstr.begin(), IE = MemInstr.end(); I != IE; ++I) {
-    for (J = I, JE = MemInstr.end(); J != JE; ++J) {
+    for (J = I + 1, JE = MemInstr.end(); J != JE; ++J) {
       std::vector<char> Dep;
-      Instruction *Src = cast<Instruction>(*I);
-      Instruction *Dst = cast<Instruction>(*J);
+      Instruction *Src = cast<Instruction>(*J);
+      Instruction *Dst = cast<Instruction>(*I);
       // Ignore Input dependencies.
       if (isa<LoadInst>(Src) && isa<LoadInst>(Dst))
         continue;
diff --git a/llvm/test/Transforms/LoopInterchange/interchange-s231.ll b/llvm/test/Transforms/LoopInterchange/interchange-s231.ll
new file mode 100644
index 00000000000000..5b51d2169fe510
--- /dev/null
+++ b/llvm/test/Transforms/LoopInterchange/interchange-s231.ll
@@ -0,0 +1,59 @@
+; REQUIRES: asserts
+; RUN: opt < %s -passes=loop-interchange -cache-line-size=64 -verify-dom-info -verify-loop-info \
+; RUN:     -S -debug 2>&1 | FileCheck %s
+
+ at aa = global [256 x [256 x float]] zeroinitializer, align 64
+ at bb = global [256 x [256 x float]] zeroinitializer, align 64
+
+;;  for (int nl = 0; nl < 10000000/256; nl++)
+;;    for (int i = 0; i < 256; ++i)
+;;      for (int j = 1; j < 256; j++)
+;;        aa[j][i] = aa[j - 1][i] + bb[j][i];
+
+; CHECK: Found flow dependency between Src and Dst
+; CHECK:  Src:  store float %add, ptr %arrayidx18, align 4
+; CHECK:  Dst:  %1 = load float, ptr %arrayidx10, align 4
+; CHECK: Processing InnerLoopId = 2 and OuterLoopId = 1
+; CHECK: Loops interchanged.
+
+define float @s231() {
+entry:
+  br label %for.cond1.preheader
+
+; Loop:
+for.cond1.preheader:                              ; preds = %entry, %for.cond.cleanup3
+  %nl.036 = phi i32 [ 0, %entry ], [ %inc23, %for.cond.cleanup3 ]
+  br label %for.cond5.preheader
+
+for.cond.cleanup3:                                ; preds = %for.cond.cleanup7
+  %inc23 = add nuw nsw i32 %nl.036, 1
+  %exitcond41 = icmp ne i32 %inc23, 39062
+  br i1 %exitcond41, label %for.cond1.preheader, label %for.cond.cleanup
+
+for.cond.cleanup7:                                ; preds = %for.body8
+  %indvars.iv.next39 = add nuw nsw i64 %indvars.iv38, 1
+  %exitcond40 = icmp ne i64 %indvars.iv.next39, 256
+  br i1 %exitcond40, label %for.cond5.preheader, label %for.cond.cleanup3
+
+for.body8:                                        ; preds = %for.cond5.preheader, %for.body8
+  %indvars.iv = phi i64 [ 1, %for.cond5.preheader ], [ %indvars.iv.next, %for.body8 ]
+  %0 = add nsw i64 %indvars.iv, -1
+  %arrayidx10 = getelementptr inbounds [256 x [256 x float]], ptr @aa, i64 0, i64 %0, i64 %indvars.iv38
+  %1 = load float, ptr %arrayidx10, align 4
+  %arrayidx14 = getelementptr inbounds [256 x [256 x float]], ptr @bb, i64 0, i64 %indvars.iv, i64 %indvars.iv38
+  %2 = load float, ptr %arrayidx14, align 4
+  %add = fadd fast float %2, %1
+  %arrayidx18 = getelementptr inbounds [256 x [256 x float]], ptr @aa, i64 0, i64 %indvars.iv, i64 %indvars.iv38
+  store float %add, ptr %arrayidx18, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp ne i64 %indvars.iv.next, 256
+  br i1 %exitcond, label %for.body8, label %for.cond.cleanup7
+
+for.cond5.preheader:                              ; preds = %for.cond1.preheader, %for.cond.cleanup7
+  %indvars.iv38 = phi i64 [ 0, %for.cond1.preheader ], [ %indvars.iv.next39, %for.cond.cleanup7 ]
+  br label %for.body8
+
+; Exit blocks
+for.cond.cleanup:                                 ; preds = %for.cond.cleanup3
+  ret float undef
+}

``````````

</details>


https://github.com/llvm/llvm-project/pull/77719


More information about the llvm-commits mailing list