[llvm] 8dc4b2e - [LoopInterchange][PR56275] Fix legality with negative dependence vectors

via llvm-commits llvm-commits at lists.llvm.org
Wed Aug 3 17:00:23 PDT 2022


Author: Congzhe Cao
Date: 2022-08-03T19:59:01-04:00
New Revision: 8dc4b2edfad7f097f1628c9db8a6ab2264bb480d

URL: https://github.com/llvm/llvm-project/commit/8dc4b2edfad7f097f1628c9db8a6ab2264bb480d
DIFF: https://github.com/llvm/llvm-project/commit/8dc4b2edfad7f097f1628c9db8a6ab2264bb480d.diff

LOG: [LoopInterchange][PR56275] Fix legality with negative dependence vectors

This is the 2nd patch of the two-patch series (D130188, D130189) that
fix PR56275 (https://github.com/llvm/llvm-project/issues/56275) which
is a missed opportunity for loop interchange.

As follow-up on the dependence analysis (DA) patch D130188, this patch
normalizes DA results in loop interchange, such that negative dependence
vectors queried by loop interchange are reversed to be non-negative.

Now all tests in PR56275 can get interchanged. Those tests are added
in lit test as `pr56275.ll`.

Reviewed By: kawashima-fj, bmahjour, Meinersbur, #loopoptwg

Differential Revision: https://reviews.llvm.org/D130189

Added: 
    llvm/test/Transforms/LoopInterchange/pr56275.ll

Modified: 
    llvm/lib/Transforms/Scalar/LoopInterchange.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
index 18daa42952242..465dfe7030e31 100644
--- a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
@@ -86,7 +86,8 @@ static void printDepMatrix(CharMatrix &DepMatrix) {
 #endif
 
 static bool populateDependencyMatrix(CharMatrix &DepMatrix, unsigned Level,
-                                     Loop *L, DependenceInfo *DI) {
+                                     Loop *L, DependenceInfo *DI,
+                                     ScalarEvolution *SE) {
   using ValueVector = SmallVector<Value *, 16>;
 
   ValueVector MemInstr;
@@ -125,6 +126,10 @@ static bool populateDependencyMatrix(CharMatrix &DepMatrix, unsigned Level,
       // Track Output, Flow, and Anti dependencies.
       if (auto D = DI->depends(Src, Dst, true)) {
         assert(D->isOrdered() && "Expected an output, flow or anti dep.");
+        // If the direction vector is negative, normalize it to
+        // make it non-negative.
+        if (D->normalize(SE))
+          LLVM_DEBUG(dbgs() << "Negative dependence vector normalized.\n");
         LLVM_DEBUG(StringRef DepType =
                        D->isFlow() ? "flow" : D->isAnti() ? "anti" : "output";
                    dbgs() << "Found " << DepType
@@ -133,19 +138,7 @@ static bool populateDependencyMatrix(CharMatrix &DepMatrix, unsigned Level,
         unsigned Levels = D->getLevels();
         char Direction;
         for (unsigned II = 1; II <= Levels; ++II) {
-          const SCEV *Distance = D->getDistance(II);
-          const SCEVConstant *SCEVConst =
-              dyn_cast_or_null<SCEVConstant>(Distance);
-          if (SCEVConst) {
-            const ConstantInt *CI = SCEVConst->getValue();
-            if (CI->isNegative())
-              Direction = '<';
-            else if (CI->isZero())
-              Direction = '=';
-            else
-              Direction = '>';
-            Dep.push_back(Direction);
-          } else if (D->isScalar(II)) {
+          if (D->isScalar(II)) {
             Direction = 'S';
             Dep.push_back(Direction);
           } else {
@@ -486,7 +479,7 @@ struct LoopInterchange {
     CharMatrix DependencyMatrix;
     Loop *OuterMostLoop = *(LoopList.begin());
     if (!populateDependencyMatrix(DependencyMatrix, LoopNestDepth,
-                                  OuterMostLoop, DI)) {
+                                  OuterMostLoop, DI, SE)) {
       LLVM_DEBUG(dbgs() << "Populating dependency matrix failed\n");
       return false;
     }

diff  --git a/llvm/test/Transforms/LoopInterchange/pr56275.ll b/llvm/test/Transforms/LoopInterchange/pr56275.ll
new file mode 100644
index 0000000000000..c6078bb45146b
--- /dev/null
+++ b/llvm/test/Transforms/LoopInterchange/pr56275.ll
@@ -0,0 +1,175 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -aa-pipeline=basic-aa -passes=loop-interchange -cache-line-size=64 -verify-dom-info -verify-loop-info -verify-scev -verify-loop-lcssa -S | FileCheck %s
+
+target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64-unknown-linux-gnu"
+
+;; Test to make sure DA outputs the correction direction
+;; vector [< =] hence the loopnest is interchanged.
+;;
+;; void test1(unsigned a[restrict N1][N2],
+;;          unsigned b[restrict N1][N2],
+;;          unsigned c[restrict N1][N2]) {
+;;  for (unsigned long i2 = 1; i2 < N2-1; i2++) {
+;;    for (unsigned long i1 = 1; i1 < N1-1; i1++) {
+;;      a[i1][i2+1] = b[i1][i2];
+;;      c[i1][i2] = a[i1][i2];
+;;    }
+;;  }
+;; }
+
+define void @test1(ptr noalias noundef %a, ptr noalias noundef %b, ptr noalias noundef %c) {
+; CHECK-LABEL: @test1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[LOOP2_HEADER_PREHEADER:%.*]]
+; CHECK:       loop1.header.preheader:
+; CHECK-NEXT:    br label [[LOOP1_HEADER:%.*]]
+; CHECK:       loop1.header:
+; CHECK-NEXT:    [[I2:%.*]] = phi i64 [ [[I2_INC:%.*]], [[LOOP1_LATCH:%.*]] ], [ 1, [[LOOP1_HEADER_PREHEADER:%.*]] ]
+; CHECK-NEXT:    [[I2_ST:%.*]] = add i64 [[I2]], 1
+; CHECK-NEXT:    [[I2_LD:%.*]] = add i64 [[I2]], 0
+; CHECK-NEXT:    br label [[LOOP2_HEADER_SPLIT1:%.*]]
+; CHECK:       loop2.header.preheader:
+; CHECK-NEXT:    br label [[LOOP2_HEADER:%.*]]
+; CHECK:       loop2.header:
+; CHECK-NEXT:    [[I1:%.*]] = phi i64 [ [[TMP0:%.*]], [[LOOP2_HEADER_SPLIT:%.*]] ], [ 1, [[LOOP2_HEADER_PREHEADER]] ]
+; CHECK-NEXT:    br label [[LOOP1_HEADER_PREHEADER]]
+; CHECK:       loop2.header.split1:
+; CHECK-NEXT:    [[I1_ST:%.*]] = add i64 [[I1]], 0
+; CHECK-NEXT:    [[I1_LD:%.*]] = add i64 [[I1]], 0
+; CHECK-NEXT:    [[A_ST:%.*]] = getelementptr inbounds [64 x i32], ptr [[A:%.*]], i64 [[I1_ST]], i64 [[I2_ST]]
+; CHECK-NEXT:    [[A_LD:%.*]] = getelementptr inbounds [64 x i32], ptr [[A]], i64 [[I1_LD]], i64 [[I2_LD]]
+; CHECK-NEXT:    [[B_LD:%.*]] = getelementptr inbounds [64 x i32], ptr [[B:%.*]], i64 [[I1]], i64 [[I2]]
+; CHECK-NEXT:    [[C_ST:%.*]] = getelementptr inbounds [64 x i32], ptr [[C:%.*]], i64 [[I1]], i64 [[I2]]
+; CHECK-NEXT:    [[B_VAL:%.*]] = load i32, ptr [[B_LD]], align 4
+; CHECK-NEXT:    store i32 [[B_VAL]], ptr [[A_ST]], align 4
+; CHECK-NEXT:    [[A_VAL:%.*]] = load i32, ptr [[A_LD]], align 4
+; CHECK-NEXT:    store i32 [[A_VAL]], ptr [[C_ST]], align 4
+; CHECK-NEXT:    [[I1_INC:%.*]] = add nuw nsw i64 [[I1]], 1
+; CHECK-NEXT:    [[LOOP2_EXITCOND_NOT:%.*]] = icmp eq i64 [[I1_INC]], 63
+; CHECK-NEXT:    br label [[LOOP1_LATCH]]
+; CHECK:       loop2.header.split:
+; CHECK-NEXT:    [[TMP0]] = add nuw nsw i64 [[I1]], 1
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i64 [[TMP0]], 63
+; CHECK-NEXT:    br i1 [[TMP1]], label [[EXIT:%.*]], label [[LOOP2_HEADER]]
+; CHECK:       loop1.latch:
+; CHECK-NEXT:    [[I2_INC]] = add nuw nsw i64 [[I2]], 1
+; CHECK-NEXT:    [[LOOP1_EXITCOND_NOT:%.*]] = icmp eq i64 [[I2_INC]], 63
+; CHECK-NEXT:    br i1 [[LOOP1_EXITCOND_NOT]], label [[LOOP2_HEADER_SPLIT]], label [[LOOP1_HEADER]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %loop1.header
+
+loop1.header:
+  %i2 = phi i64 [ 1, %entry ], [ %i2.inc, %loop1.latch ]
+  %i2.st = add i64 %i2, 1
+  %i2.ld = add i64 %i2, 0
+  br label %loop2.header
+
+loop2.header:
+  %i1 = phi i64 [ 1, %loop1.header ], [ %i1.inc, %loop2.header ]
+  %i1.st = add i64 %i1, 0
+  %i1.ld = add i64 %i1, 0
+  %a.st = getelementptr inbounds [64 x i32], ptr %a, i64 %i1.st, i64 %i2.st
+  %a.ld = getelementptr inbounds [64 x i32], ptr %a, i64 %i1.ld, i64 %i2.ld
+  %b.ld = getelementptr inbounds [64 x i32], ptr %b, i64 %i1, i64 %i2
+  %c.st = getelementptr inbounds [64 x i32], ptr %c, i64 %i1, i64 %i2
+  %b.val = load i32, ptr %b.ld, align 4
+  store i32 %b.val, ptr %a.st, align 4  ; (X) store to a[i1][i2+1]
+  %a.val = load i32, ptr %a.ld, align 4 ; (Y) load from a[i1][i2]
+  store i32 %a.val, ptr %c.st, align 4
+  %i1.inc = add nuw nsw i64 %i1, 1
+  %loop2.exitcond.not = icmp eq i64 %i1.inc, 63
+  br i1 %loop2.exitcond.not, label %loop1.latch, label %loop2.header
+
+loop1.latch:
+  %i2.inc = add nuw nsw i64 %i2, 1
+  %loop1.exitcond.not = icmp eq i64 %i2.inc, 63
+  br i1 %loop1.exitcond.not, label %exit, label %loop1.header
+
+exit:
+  ret void
+}
+
+;; Semantically equivalent to test1() with only the 
diff erence
+;; of the order of a load and a store at (X) and (Y).
+;;
+;; Test to make sure DA outputs the correction direction
+;; vector [< =] hence the loopnest is interchanged.
+
+define void @test2(ptr noalias noundef %a, ptr noalias noundef %b, ptr noalias noundef %c) {
+; CHECK-LABEL: @test2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[LOOP2_HEADER_PREHEADER:%.*]]
+; CHECK:       loop1.header.preheader:
+; CHECK-NEXT:    br label [[LOOP1_HEADER:%.*]]
+; CHECK:       loop1.header:
+; CHECK-NEXT:    [[I2:%.*]] = phi i64 [ [[I2_INC:%.*]], [[LOOP1_LATCH:%.*]] ], [ 1, [[LOOP1_HEADER_PREHEADER:%.*]] ]
+; CHECK-NEXT:    [[I2_ST:%.*]] = add i64 [[I2]], 1
+; CHECK-NEXT:    [[I2_LD:%.*]] = add i64 [[I2]], 0
+; CHECK-NEXT:    br label [[LOOP2_HEADER_SPLIT1:%.*]]
+; CHECK:       loop2.header.preheader:
+; CHECK-NEXT:    br label [[LOOP2_HEADER:%.*]]
+; CHECK:       loop2.header:
+; CHECK-NEXT:    [[I1:%.*]] = phi i64 [ [[TMP0:%.*]], [[LOOP2_HEADER_SPLIT:%.*]] ], [ 1, [[LOOP2_HEADER_PREHEADER]] ]
+; CHECK-NEXT:    br label [[LOOP1_HEADER_PREHEADER]]
+; CHECK:       loop2.header.split1:
+; CHECK-NEXT:    [[I1_ST:%.*]] = add i64 [[I1]], 0
+; CHECK-NEXT:    [[I1_LD:%.*]] = add i64 [[I1]], 0
+; CHECK-NEXT:    [[A_ST:%.*]] = getelementptr inbounds [64 x i32], ptr [[A:%.*]], i64 [[I1_ST]], i64 [[I2_ST]]
+; CHECK-NEXT:    [[A_LD:%.*]] = getelementptr inbounds [64 x i32], ptr [[A]], i64 [[I1_LD]], i64 [[I2_LD]]
+; CHECK-NEXT:    [[B_LD:%.*]] = getelementptr inbounds [64 x i32], ptr [[B:%.*]], i64 [[I1]], i64 [[I2]]
+; CHECK-NEXT:    [[C_ST:%.*]] = getelementptr inbounds [64 x i32], ptr [[C:%.*]], i64 [[I1]], i64 [[I2]]
+; CHECK-NEXT:    [[B_VAL:%.*]] = load i32, ptr [[B_LD]], align 4
+; CHECK-NEXT:    [[A_VAL:%.*]] = load i32, ptr [[A_LD]], align 4
+; CHECK-NEXT:    store i32 [[B_VAL]], ptr [[A_ST]], align 4
+; CHECK-NEXT:    store i32 [[A_VAL]], ptr [[C_ST]], align 4
+; CHECK-NEXT:    [[I1_INC:%.*]] = add nuw nsw i64 [[I1]], 1
+; CHECK-NEXT:    [[LOOP2_EXITCOND_NOT:%.*]] = icmp eq i64 [[I1_INC]], 63
+; CHECK-NEXT:    br label [[LOOP1_LATCH]]
+; CHECK:       loop2.header.split:
+; CHECK-NEXT:    [[TMP0]] = add nuw nsw i64 [[I1]], 1
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i64 [[TMP0]], 63
+; CHECK-NEXT:    br i1 [[TMP1]], label [[EXIT:%.*]], label [[LOOP2_HEADER]]
+; CHECK:       loop1.latch:
+; CHECK-NEXT:    [[I2_INC]] = add nuw nsw i64 [[I2]], 1
+; CHECK-NEXT:    [[LOOP1_EXITCOND_NOT:%.*]] = icmp eq i64 [[I2_INC]], 63
+; CHECK-NEXT:    br i1 [[LOOP1_EXITCOND_NOT]], label [[LOOP2_HEADER_SPLIT]], label [[LOOP1_HEADER]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %loop1.header
+
+loop1.header:
+  %i2 = phi i64 [ 1, %entry ], [ %i2.inc, %loop1.latch ]
+  %i2.st = add i64 %i2, 1
+  %i2.ld = add i64 %i2, 0
+  br label %loop2.header
+
+loop2.header:
+  %i1 = phi i64 [ 1, %loop1.header ], [ %i1.inc, %loop2.header ]
+  %i1.st = add i64 %i1, 0
+  %i1.ld = add i64 %i1, 0
+  %a.st = getelementptr inbounds [64 x i32], ptr %a, i64 %i1.st, i64 %i2.st
+  %a.ld = getelementptr inbounds [64 x i32], ptr %a, i64 %i1.ld, i64 %i2.ld
+  %b.ld = getelementptr inbounds [64 x i32], ptr %b, i64 %i1, i64 %i2
+  %c.st = getelementptr inbounds [64 x i32], ptr %c, i64 %i1, i64 %i2
+  %b.val = load i32, ptr %b.ld, align 4
+  %a.val = load i32, ptr %a.ld, align 4 ; (Y) load from a[i1][i2]
+  store i32 %b.val, ptr %a.st, align 4  ; (X) store to a[i1][i2+1]
+  store i32 %a.val, ptr %c.st, align 4
+  %i1.inc = add nuw nsw i64 %i1, 1
+  %loop2.exitcond.not = icmp eq i64 %i1.inc, 63
+  br i1 %loop2.exitcond.not, label %loop1.latch, label %loop2.header
+
+loop1.latch:
+  %i2.inc = add nuw nsw i64 %i2, 1
+  %loop1.exitcond.not = icmp eq i64 %i2.inc, 63
+  br i1 %loop1.exitcond.not, label %exit, label %loop1.header
+
+exit:
+  ret void
+}


        


More information about the llvm-commits mailing list