[llvm] [LAA] Support backward dependences with non-constant distance. (PR #91525)

Florian Hahn via llvm-commits llvm-commits at lists.llvm.org
Wed May 8 12:37:06 PDT 2024


https://github.com/fhahn created https://github.com/llvm/llvm-project/pull/91525

Following up to 933f49248, also update the code reasoning about backwards dependences to support non-constant distances.

Update the code to use the signed minimum distance instead of a constant distance

This means e checked the lower bound of the dependence distance and the distance may be larger at runtime (and safe for vectorization). Whether to classify it as Unknown or Backwards depends on the vector width and LAA was updated to take TTI to get the maximum vector register width.

If the minimum dependence distance is larger than the max vector width, we consider it as backwards-vectorizable. Otherwise we classify them as Unknown, so we re-try with runtime checks.

>From a5e4deff35fdbbad171dae7e29c3a7d4cedc0720 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Wed, 8 May 2024 10:55:10 +0100
Subject: [PATCH] [LAA] Support backward dependences with non-constant
 distance.

Following up to 933f49248, also update the code reasoning about
backwards dependences to support non-constant distances.

Update the code to use the signed minimum distance instead of a
constant distance

This means e checked the lower bound of the dependence distance and
the distance may be larger at runtime (and safe for vectorization).
Whether to classify it as Unknown or Backwards depends on the vector
width and LAA was updated to take TTI to get the maximum vector register
width.

If the minimum dependence distance is larger than the max vector width,
we consider it as backwards-vectorizable. Otherwise we classify them as
Unknown, so we re-try with runtime checks.
---
 .../llvm/Analysis/LoopAccessAnalysis.h        |  22 +-
 llvm/lib/Analysis/LoopAccessAnalysis.cpp      |  89 +++--
 llvm/lib/Transforms/Scalar/LoopFlatten.cpp    |   2 +-
 .../Transforms/Scalar/LoopVersioningLICM.cpp  |   2 +-
 .../multiple-strides-rt-memory-checks.ll      |   2 +-
 .../non-constant-distance-backward.ll         | 318 ++++++++++--------
 .../Transforms/Vectorize/VPlanSlpTest.cpp     |   2 +-
 7 files changed, 268 insertions(+), 169 deletions(-)

diff --git a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
index cf998e66ee486..6616682ff47b9 100644
--- a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
+++ b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
@@ -181,8 +181,10 @@ class MemoryDepChecker {
                const SmallVectorImpl<Instruction *> &Instrs) const;
   };
 
-  MemoryDepChecker(PredicatedScalarEvolution &PSE, const Loop *L)
-      : PSE(PSE), InnermostLoop(L) {}
+  MemoryDepChecker(PredicatedScalarEvolution &PSE, const Loop *L,
+                   unsigned MaxTargetVectorWidthInBits)
+      : PSE(PSE), InnermostLoop(L),
+        MaxTargetVectorWidthInBits(MaxTargetVectorWidthInBits) {}
 
   /// Register the location (instructions are given increasing numbers)
   /// of a write access.
@@ -314,6 +316,11 @@ class MemoryDepChecker {
   /// RecordDependences is true.
   SmallVector<Dependence, 8> Dependences;
 
+  /// The maximum width of a target's vector registers. Is used to decide if a
+  /// backwards dependence with non-constant stride should be classified as
+  /// backwards-vectorizable or unknown (triggering a runtime check).
+  unsigned MaxTargetVectorWidthInBits = 0;
+
   /// Check whether there is a plausible dependence between the two
   /// accesses.
   ///
@@ -575,8 +582,9 @@ class RuntimePointerChecking {
 /// PSE must be emitted in order for the results of this analysis to be valid.
 class LoopAccessInfo {
 public:
-  LoopAccessInfo(Loop *L, ScalarEvolution *SE, const TargetLibraryInfo *TLI,
-                 AAResults *AA, DominatorTree *DT, LoopInfo *LI);
+  LoopAccessInfo(Loop *L, ScalarEvolution *SE, const TargetTransformInfo *TTI,
+                 const TargetLibraryInfo *TLI, AAResults *AA, DominatorTree *DT,
+                 LoopInfo *LI);
 
   /// Return true we can analyze the memory accesses in the loop and there are
   /// no memory dependence cycles. Note that for dependences between loads &
@@ -799,12 +807,14 @@ class LoopAccessInfoManager {
   AAResults &AA;
   DominatorTree &DT;
   LoopInfo &LI;
+  TargetTransformInfo *TTI;
   const TargetLibraryInfo *TLI = nullptr;
 
 public:
   LoopAccessInfoManager(ScalarEvolution &SE, AAResults &AA, DominatorTree &DT,
-                        LoopInfo &LI, const TargetLibraryInfo *TLI)
-      : SE(SE), AA(AA), DT(DT), LI(LI), TLI(TLI) {}
+                        LoopInfo &LI, TargetTransformInfo *TTI,
+                        const TargetLibraryInfo *TLI)
+      : SE(SE), AA(AA), DT(DT), LI(LI), TTI(TTI), TLI(TLI) {}
 
   const LoopAccessInfo &getInfo(Loop &L);
 
diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
index ae7f0373c4e84..f816c74dc781d 100644
--- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp
+++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
@@ -31,6 +31,7 @@
 #include "llvm/Analysis/ScalarEvolution.h"
 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/Analysis/VectorUtils.h"
 #include "llvm/IR/BasicBlock.h"
@@ -2119,17 +2120,22 @@ MemoryDepChecker::Dependence::DepType MemoryDepChecker::isDependent(
     return Dependence::Forward;
   }
 
-  if (!C) {
-    // TODO: FoundNonConstantDistanceDependence is used as a necessary condition
-    // to consider retrying with runtime checks. Historically, we did not set it
-    // when strides were different but there is no inherent reason to.
+  if (!SE.isKnownPositive(Dist)) {
     FoundNonConstantDistanceDependence |= CommonStride.has_value();
-    LLVM_DEBUG(dbgs() << "LAA: Dependence because of non-constant distance\n");
     return Dependence::Unknown;
   }
 
-  if (!SE.isKnownPositive(Dist))
-    return Dependence::Unknown;
+  if (!isa<SCEVConstant>(Dist)) {
+    // Previously this case would be treated as Unknown, possibly setting
+    // FoundNonConstantDistanceDependence to force re-trying with runtime
+    // checks. Until the TODO below is addressed, set it here to preserve
+    // original behavior w.r.t. re-trying with runtime checks.
+    // TODO: FoundNonConstantDistanceDependence is used as a necessary
+    // condition to consider retrying with runtime checks. Historically, we
+    // did not set it when strides were different but there is no inherent
+    // reason to.
+    FoundNonConstantDistanceDependence |= CommonStride.has_value();
+  }
 
   if (!HasSameSize) {
     LLVM_DEBUG(dbgs() << "LAA: ReadWrite-Write positive dependency with "
@@ -2137,14 +2143,9 @@ MemoryDepChecker::Dependence::DepType MemoryDepChecker::isDependent(
     return Dependence::Unknown;
   }
 
-  // The logic below currently only supports StrideA ==  StrideB, i.e. there's a
-  // common stride.
   if (!CommonStride)
     return Dependence::Unknown;
 
-  const APInt &Val = C->getAPInt();
-  int64_t Distance = Val.getSExtValue();
-
   // Bail out early if passed-in parameters make vectorization not feasible.
   unsigned ForcedFactor = (VectorizerParams::VectorizationFactor ?
                            VectorizerParams::VectorizationFactor : 1);
@@ -2169,8 +2170,8 @@ MemoryDepChecker::Dependence::DepType MemoryDepChecker::isDependent(
   //     | A[0] |      | A[2] |      | A[4] |      | A[6] |      |
   //                              | B[0] |      | B[2] |      | B[4] |
   //
-  // Distance needs for vectorizing iterations except the last iteration:
-  // 4 * 2 * (MinNumIter - 1). Distance needs for the last iteration: 4.
+  // MinDistance needs for vectorizing iterations except the last iteration:
+  // 4 * 2 * (MinNumIter - 1). MinDistance needs for the last iteration: 4.
   // So the minimum distance needed is: 4 * 2 * (MinNumIter - 1) + 4.
   //
   // If MinNumIter is 2, it is vectorizable as the minimum distance needed is
@@ -2179,11 +2180,23 @@ MemoryDepChecker::Dependence::DepType MemoryDepChecker::isDependent(
   // If MinNumIter is 4 (Say if a user forces the vectorization factor to be 4),
   // the minimum distance needed is 28, which is greater than distance. It is
   // not safe to do vectorization.
+
+  // We know that Dist is positive, but it may not be constant. Use the signed
+  // minimum for computations below, as this ensures we compute the closest
+  // possible dependence distance.
+  int64_t MinDistance = SE.getSignedRangeMin(Dist).getSExtValue();
   uint64_t MinDistanceNeeded =
-      TypeByteSize * (*CommonStride) * (MinNumIter - 1) + TypeByteSize;
-  if (MinDistanceNeeded > static_cast<uint64_t>(Distance)) {
-    LLVM_DEBUG(dbgs() << "LAA: Failure because of positive distance "
-                      << Distance << '\n');
+      TypeByteSize * *CommonStride * (MinNumIter - 1) + TypeByteSize;
+  if (MinDistanceNeeded > static_cast<uint64_t>(MinDistance)) {
+    if (!isa<SCEVConstant>(Dist)) {
+      // For non-constant distances, we checked the lower bound of the
+      // dependence distance and the distance may be larger at runtime (and safe
+      // for vectorization). Classify it as Unknown, so we re-try with runtime
+      // checks.
+      return Dependence::Unknown;
+    }
+    LLVM_DEBUG(dbgs() << "LAA: Failure because of positive minimum distance "
+                      << MinDistance << '\n');
     return Dependence::Backward;
   }
 
@@ -2212,12 +2225,13 @@ MemoryDepChecker::Dependence::DepType MemoryDepChecker::isDependent(
   // is 8, which is less than 2 and forbidden vectorization, But actually
   // both A and B could be vectorized by 2 iterations.
   MinDepDistBytes =
-      std::min(static_cast<uint64_t>(Distance), MinDepDistBytes);
+      std::min(static_cast<uint64_t>(MinDistance), MinDepDistBytes);
 
   bool IsTrueDataDependence = (!AIsWrite && BIsWrite);
   uint64_t MinDepDistBytesOld = MinDepDistBytes;
   if (IsTrueDataDependence && EnableForwardingConflictDetection &&
-      couldPreventStoreLoadForward(Distance, TypeByteSize)) {
+      isa<SCEVConstant>(Dist) &&
+      couldPreventStoreLoadForward(MinDistance, TypeByteSize)) {
     // Sanity check that we didn't update MinDepDistBytes when calling
     // couldPreventStoreLoadForward
     assert(MinDepDistBytes == MinDepDistBytesOld &&
@@ -2229,10 +2243,18 @@ MemoryDepChecker::Dependence::DepType MemoryDepChecker::isDependent(
 
   // An update to MinDepDistBytes requires an update to MaxSafeVectorWidthInBits
   // since there is a backwards dependency.
-  uint64_t MaxVF = MinDepDistBytes / (TypeByteSize * (*CommonStride));
-  LLVM_DEBUG(dbgs() << "LAA: Positive distance " << Val.getSExtValue()
+  uint64_t MaxVF = MinDepDistBytes / (TypeByteSize * *CommonStride);
+  LLVM_DEBUG(dbgs() << "LAA: Positive min distance " << MinDistance
                     << " with max VF = " << MaxVF << '\n');
+
   uint64_t MaxVFInBits = MaxVF * TypeByteSize * 8;
+  if (!isa<SCEVConstant>(Dist) && MaxVFInBits < MaxTargetVectorWidthInBits) {
+    // For non-constant distances, we checked the lower bound of the dependence
+    // distance and the distance may be larger at runtime (and safe for
+    // vectorization). Classify it as Unknown, so we re-try with runtime checks.
+    return Dependence::Unknown;
+  }
+
   MaxSafeVectorWidthInBits = std::min(MaxSafeVectorWidthInBits, MaxVFInBits);
   return Dependence::BackwardVectorizable;
 }
@@ -3015,11 +3037,25 @@ void LoopAccessInfo::collectStridedAccess(Value *MemAccess) {
 }
 
 LoopAccessInfo::LoopAccessInfo(Loop *L, ScalarEvolution *SE,
+                               const TargetTransformInfo *TTI,
                                const TargetLibraryInfo *TLI, AAResults *AA,
                                DominatorTree *DT, LoopInfo *LI)
     : PSE(std::make_unique<PredicatedScalarEvolution>(*SE, *L)),
-      PtrRtChecking(nullptr),
-      DepChecker(std::make_unique<MemoryDepChecker>(*PSE, L)), TheLoop(L) {
+      PtrRtChecking(nullptr), TheLoop(L) {
+  unsigned MaxTargetVectorWidthInBits = std::numeric_limits<unsigned>::max();
+  if (TTI) {
+    TypeSize FixedWidth =
+        TTI->getRegisterBitWidth(TargetTransformInfo::RGK_FixedWidthVector);
+    if (FixedWidth.isNonZero())
+      MaxTargetVectorWidthInBits = FixedWidth.getFixedValue();
+
+    TypeSize ScalableWidth =
+        TTI->getRegisterBitWidth(TargetTransformInfo::RGK_ScalableVector);
+    if (ScalableWidth.isNonZero())
+      MaxTargetVectorWidthInBits = std::numeric_limits<unsigned>::max();
+  }
+  DepChecker =
+      std::make_unique<MemoryDepChecker>(*PSE, L, MaxTargetVectorWidthInBits);
   PtrRtChecking = std::make_unique<RuntimePointerChecking>(*DepChecker, SE);
   if (canAnalyzeLoop()) {
     analyzeLoop(AA, LI, TLI, DT);
@@ -3079,7 +3115,7 @@ const LoopAccessInfo &LoopAccessInfoManager::getInfo(Loop &L) {
 
   if (I.second)
     I.first->second =
-        std::make_unique<LoopAccessInfo>(&L, &SE, TLI, &AA, &DT, &LI);
+        std::make_unique<LoopAccessInfo>(&L, &SE, TTI, TLI, &AA, &DT, &LI);
 
   return *I.first->second;
 }
@@ -3108,8 +3144,9 @@ LoopAccessInfoManager LoopAccessAnalysis::run(Function &F,
   auto &AA = FAM.getResult<AAManager>(F);
   auto &DT = FAM.getResult<DominatorTreeAnalysis>(F);
   auto &LI = FAM.getResult<LoopAnalysis>(F);
+  auto &TTI = FAM.getResult<TargetIRAnalysis>(F);
   auto &TLI = FAM.getResult<TargetLibraryAnalysis>(F);
-  return LoopAccessInfoManager(SE, AA, DT, LI, &TLI);
+  return LoopAccessInfoManager(SE, AA, DT, LI, &TTI, &TLI);
 }
 
 AnalysisKey LoopAccessAnalysis::Key;
diff --git a/llvm/lib/Transforms/Scalar/LoopFlatten.cpp b/llvm/lib/Transforms/Scalar/LoopFlatten.cpp
index 0e9cf328f149b..a7f8a22ece277 100644
--- a/llvm/lib/Transforms/Scalar/LoopFlatten.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopFlatten.cpp
@@ -1005,7 +1005,7 @@ PreservedAnalyses LoopFlattenPass::run(LoopNest &LN, LoopAnalysisManager &LAM,
   // in simplified form, and also needs LCSSA. Running
   // this pass will simplify all loops that contain inner loops,
   // regardless of whether anything ends up being flattened.
-  LoopAccessInfoManager LAIM(AR.SE, AR.AA, AR.DT, AR.LI, nullptr);
+  LoopAccessInfoManager LAIM(AR.SE, AR.AA, AR.DT, AR.LI, &AR.TTI, nullptr);
   for (Loop *InnerLoop : LN.getLoops()) {
     auto *OuterLoop = InnerLoop->getParentLoop();
     if (!OuterLoop)
diff --git a/llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp b/llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp
index f39c24484840c..663715948241d 100644
--- a/llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp
@@ -582,7 +582,7 @@ PreservedAnalyses LoopVersioningLICMPass::run(Loop &L, LoopAnalysisManager &AM,
   const Function *F = L.getHeader()->getParent();
   OptimizationRemarkEmitter ORE(F);
 
-  LoopAccessInfoManager LAIs(*SE, *AA, *DT, LAR.LI, nullptr);
+  LoopAccessInfoManager LAIs(*SE, *AA, *DT, LAR.LI, nullptr, nullptr);
   if (!LoopVersioningLICM(AA, SE, &ORE, LAIs, LAR.LI, &L).run(DT))
     return PreservedAnalyses::all();
   return getLoopPassPreservedAnalyses();
diff --git a/llvm/test/Analysis/LoopAccessAnalysis/multiple-strides-rt-memory-checks.ll b/llvm/test/Analysis/LoopAccessAnalysis/multiple-strides-rt-memory-checks.ll
index 3b4f2a170d8cb..d96a6ea7c555f 100644
--- a/llvm/test/Analysis/LoopAccessAnalysis/multiple-strides-rt-memory-checks.ll
+++ b/llvm/test/Analysis/LoopAccessAnalysis/multiple-strides-rt-memory-checks.ll
@@ -23,7 +23,7 @@
 
 ; CHECK: function 'Test':
 ; CHECK:   .inner:
-; CHECK-NEXT:     Memory dependences are safe with run-time checks
+; CHECK-NEXT:     Memory dependences are safe with a maximum safe vector width of 2048 bits with run-time checks
 ; CHECK-NEXT:     Dependences:
 ; CHECK-NEXT:     Run-time memory checks:
 ; CHECK:          Check 0:
diff --git a/llvm/test/Analysis/LoopAccessAnalysis/non-constant-distance-backward.ll b/llvm/test/Analysis/LoopAccessAnalysis/non-constant-distance-backward.ll
index 5a95dcca10502..39e18f8eac38c 100644
--- a/llvm/test/Analysis/LoopAccessAnalysis/non-constant-distance-backward.ll
+++ b/llvm/test/Analysis/LoopAccessAnalysis/non-constant-distance-backward.ll
@@ -1,45 +1,47 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 4
-; RUN: opt -passes='print<access-info>' -disable-output %s 2>&1 | FileCheck %s
-; RUN: opt -passes='print<access-info>' -disable-output -mtriple=arm64-apple-macosx %s 2>&1 | FileCheck %s
-; RUN: opt -passes='print<access-info>' -disable-output -mtriple=arm64-apple-macosx -mattr=+sve %s 2>&1 | FileCheck %s
+; RUN: opt -passes='print<access-info>' -disable-output %s 2>&1 | FileCheck --check-prefixes=COMMON,MAXLEN %s
+; RUN: opt -passes='print<access-info>' -disable-output -mtriple=arm64-apple-macosx %s 2>&1 | FileCheck --check-prefixes=COMMON,VW128 %s
+; RUN: opt -passes='print<access-info>' -disable-output -mtriple=arm64-apple-macosx -mattr=+sve %s 2>&1 | FileCheck --check-prefixes=COMMON,MAXLEN %s
 
 ; REQUIRES: aarch64-registered-target
 
 target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
 
+; If the dependence distance is not a constant, whether it gets identified as backwards or unknown depends on the minimum distance and the target's vector length.
+
 define void @backward_min_distance_8(ptr %A, i64 %N) {
-; CHECK-LABEL: 'backward_min_distance_8'
-; CHECK-NEXT:    loop:
-; CHECK-NEXT:      Memory dependences are safe with run-time checks
-; CHECK-NEXT:      Dependences:
-; CHECK-NEXT:      Run-time memory checks:
-; CHECK-NEXT:      Check 0:
-; CHECK-NEXT:        Comparing group ([[GRP1:0x[0-9a-f]+]]):
-; CHECK-NEXT:          %gep.off.iv = getelementptr inbounds i8, ptr %gep.off, i64 %iv
-; CHECK-NEXT:        Against group ([[GRP2:0x[0-9a-f]+]]):
-; CHECK-NEXT:          %gep = getelementptr inbounds i8, ptr %A, i64 %iv
-; CHECK-NEXT:      Grouped accesses:
-; CHECK-NEXT:        Group [[GRP1]]:
-; CHECK-NEXT:          (Low: {(1 + %A)<nuw>,+,1}<nuw><%outer.header> High: {(257 + %A),+,1}<nw><%outer.header>)
-; CHECK-NEXT:            Member: {{\{\{}}(1 + %A)<nuw>,+,1}<nuw><%outer.header>,+,1}<nuw><%loop>
-; CHECK-NEXT:        Group [[GRP2]]:
-; CHECK-NEXT:          (Low: %A High: (256 + %A))
-; CHECK-NEXT:            Member: {%A,+,1}<nuw><%loop>
-; CHECK-EMPTY:
-; CHECK-NEXT:      Non vectorizable stores to invariant address were not found in loop.
-; CHECK-NEXT:      SCEV assumptions:
-; CHECK-EMPTY:
-; CHECK-NEXT:      Expressions re-written:
-; CHECK-NEXT:    outer.header:
-; CHECK-NEXT:      Report: loop is not the innermost loop
-; CHECK-NEXT:      Dependences:
-; CHECK-NEXT:      Run-time memory checks:
-; CHECK-NEXT:      Grouped accesses:
-; CHECK-EMPTY:
-; CHECK-NEXT:      Non vectorizable stores to invariant address were not found in loop.
-; CHECK-NEXT:      SCEV assumptions:
-; CHECK-EMPTY:
-; CHECK-NEXT:      Expressions re-written:
+; COMMON-LABEL: 'backward_min_distance_8'
+; COMMON-NEXT:    loop:
+; COMMON-NEXT:      Memory dependences are safe with run-time checks
+; COMMON-NEXT:      Dependences:
+; COMMON-NEXT:      Run-time memory checks:
+; COMMON-NEXT:      Check 0:
+; COMMON-NEXT:        Comparing group ([[GRP1:0x[0-9a-f]+]]):
+; COMMON-NEXT:          %gep.off.iv = getelementptr inbounds i8, ptr %gep.off, i64 %iv
+; COMMON-NEXT:        Against group ([[GRP2:0x[0-9a-f]+]]):
+; COMMON-NEXT:          %gep = getelementptr inbounds i8, ptr %A, i64 %iv
+; COMMON-NEXT:      Grouped accesses:
+; COMMON-NEXT:        Group [[GRP1]]:
+; COMMON-NEXT:          (Low: {(1 + %A)<nuw>,+,1}<nuw><%outer.header> High: {(257 + %A),+,1}<nw><%outer.header>)
+; COMMON-NEXT:            Member: {{\{\{}}(1 + %A)<nuw>,+,1}<nuw><%outer.header>,+,1}<nuw><%loop>
+; COMMON-NEXT:        Group [[GRP2]]:
+; COMMON-NEXT:          (Low: %A High: (256 + %A))
+; COMMON-NEXT:            Member: {%A,+,1}<nuw><%loop>
+; COMMON-EMPTY:
+; COMMON-NEXT:      Non vectorizable stores to invariant address were not found in loop.
+; COMMON-NEXT:      SCEV assumptions:
+; COMMON-EMPTY:
+; COMMON-NEXT:      Expressions re-written:
+; COMMON-NEXT:    outer.header:
+; COMMON-NEXT:      Report: loop is not the innermost loop
+; COMMON-NEXT:      Dependences:
+; COMMON-NEXT:      Run-time memory checks:
+; COMMON-NEXT:      Grouped accesses:
+; COMMON-EMPTY:
+; COMMON-NEXT:      Non vectorizable stores to invariant address were not found in loop.
+; COMMON-NEXT:      SCEV assumptions:
+; COMMON-EMPTY:
+; COMMON-NEXT:      Expressions re-written:
 ;
 entry:
   br label %outer.header
@@ -70,38 +72,38 @@ exit:
 }
 
 define void @backward_min_distance_120(ptr %A, i64 %N) {
-; CHECK-LABEL: 'backward_min_distance_120'
-; CHECK-NEXT:    loop:
-; CHECK-NEXT:      Memory dependences are safe with run-time checks
-; CHECK-NEXT:      Dependences:
-; CHECK-NEXT:      Run-time memory checks:
-; CHECK-NEXT:      Check 0:
-; CHECK-NEXT:        Comparing group ([[GRP3:0x[0-9a-f]+]]):
-; CHECK-NEXT:          %gep.off.iv = getelementptr inbounds i8, ptr %gep.off, i64 %iv
-; CHECK-NEXT:        Against group ([[GRP4:0x[0-9a-f]+]]):
-; CHECK-NEXT:          %gep = getelementptr inbounds i8, ptr %A, i64 %iv
-; CHECK-NEXT:      Grouped accesses:
-; CHECK-NEXT:        Group [[GRP3]]:
-; CHECK-NEXT:          (Low: {(15 + %A)<nuw>,+,1}<nuw><%outer.header> High: {(271 + %A),+,1}<nw><%outer.header>)
-; CHECK-NEXT:            Member: {{\{\{}}(15 + %A)<nuw>,+,1}<nuw><%outer.header>,+,1}<nuw><%loop>
-; CHECK-NEXT:        Group [[GRP4]]:
-; CHECK-NEXT:          (Low: %A High: (256 + %A))
-; CHECK-NEXT:            Member: {%A,+,1}<nuw><%loop>
-; CHECK-EMPTY:
-; CHECK-NEXT:      Non vectorizable stores to invariant address were not found in loop.
-; CHECK-NEXT:      SCEV assumptions:
-; CHECK-EMPTY:
-; CHECK-NEXT:      Expressions re-written:
-; CHECK-NEXT:    outer.header:
-; CHECK-NEXT:      Report: loop is not the innermost loop
-; CHECK-NEXT:      Dependences:
-; CHECK-NEXT:      Run-time memory checks:
-; CHECK-NEXT:      Grouped accesses:
-; CHECK-EMPTY:
-; CHECK-NEXT:      Non vectorizable stores to invariant address were not found in loop.
-; CHECK-NEXT:      SCEV assumptions:
-; CHECK-EMPTY:
-; CHECK-NEXT:      Expressions re-written:
+; COMMON-LABEL: 'backward_min_distance_120'
+; COMMON-NEXT:    loop:
+; COMMON-NEXT:      Memory dependences are safe with run-time checks
+; COMMON-NEXT:      Dependences:
+; COMMON-NEXT:      Run-time memory checks:
+; COMMON-NEXT:      Check 0:
+; COMMON-NEXT:        Comparing group ([[GRP3:0x[0-9a-f]+]]):
+; COMMON-NEXT:          %gep.off.iv = getelementptr inbounds i8, ptr %gep.off, i64 %iv
+; COMMON-NEXT:        Against group ([[GRP4:0x[0-9a-f]+]]):
+; COMMON-NEXT:          %gep = getelementptr inbounds i8, ptr %A, i64 %iv
+; COMMON-NEXT:      Grouped accesses:
+; COMMON-NEXT:        Group [[GRP3]]:
+; COMMON-NEXT:          (Low: {(15 + %A)<nuw>,+,1}<nuw><%outer.header> High: {(271 + %A),+,1}<nw><%outer.header>)
+; COMMON-NEXT:            Member: {{\{\{}}(15 + %A)<nuw>,+,1}<nuw><%outer.header>,+,1}<nuw><%loop>
+; COMMON-NEXT:        Group [[GRP4]]:
+; COMMON-NEXT:          (Low: %A High: (256 + %A))
+; COMMON-NEXT:            Member: {%A,+,1}<nuw><%loop>
+; COMMON-EMPTY:
+; COMMON-NEXT:      Non vectorizable stores to invariant address were not found in loop.
+; COMMON-NEXT:      SCEV assumptions:
+; COMMON-EMPTY:
+; COMMON-NEXT:      Expressions re-written:
+; COMMON-NEXT:    outer.header:
+; COMMON-NEXT:      Report: loop is not the innermost loop
+; COMMON-NEXT:      Dependences:
+; COMMON-NEXT:      Run-time memory checks:
+; COMMON-NEXT:      Grouped accesses:
+; COMMON-EMPTY:
+; COMMON-NEXT:      Non vectorizable stores to invariant address were not found in loop.
+; COMMON-NEXT:      SCEV assumptions:
+; COMMON-EMPTY:
+; COMMON-NEXT:      Expressions re-written:
 ;
 entry:
   br label %outer.header
@@ -131,41 +133,65 @@ exit:
   ret void
 }
 
-
-declare void @llvm.assume(i1)
 define void @backward_min_distance_128(ptr %A, i64 %N) {
-; CHECK-LABEL: 'backward_min_distance_128'
-; CHECK-NEXT:    loop:
-; CHECK-NEXT:      Memory dependences are safe with run-time checks
-; CHECK-NEXT:      Dependences:
-; CHECK-NEXT:      Run-time memory checks:
-; CHECK-NEXT:      Check 0:
-; CHECK-NEXT:        Comparing group ([[GRP5:0x[0-9a-f]+]]):
-; CHECK-NEXT:          %gep.off.iv = getelementptr inbounds i8, ptr %gep.off, i64 %iv
-; CHECK-NEXT:        Against group ([[GRP6:0x[0-9a-f]+]]):
-; CHECK-NEXT:          %gep = getelementptr inbounds i8, ptr %A, i64 %iv
-; CHECK-NEXT:      Grouped accesses:
-; CHECK-NEXT:        Group [[GRP5]]:
-; CHECK-NEXT:          (Low: {(16 + %A)<nuw>,+,1}<nuw><%outer.header> High: {(272 + %A),+,1}<nw><%outer.header>)
-; CHECK-NEXT:            Member: {{\{\{}}(16 + %A)<nuw>,+,1}<nuw><%outer.header>,+,1}<nuw><%loop>
-; CHECK-NEXT:        Group [[GRP6]]:
-; CHECK-NEXT:          (Low: %A High: (256 + %A))
-; CHECK-NEXT:            Member: {%A,+,1}<nuw><%loop>
-; CHECK-EMPTY:
-; CHECK-NEXT:      Non vectorizable stores to invariant address were not found in loop.
-; CHECK-NEXT:      SCEV assumptions:
-; CHECK-EMPTY:
-; CHECK-NEXT:      Expressions re-written:
-; CHECK-NEXT:    outer.header:
-; CHECK-NEXT:      Report: loop is not the innermost loop
-; CHECK-NEXT:      Dependences:
-; CHECK-NEXT:      Run-time memory checks:
-; CHECK-NEXT:      Grouped accesses:
-; CHECK-EMPTY:
-; CHECK-NEXT:      Non vectorizable stores to invariant address were not found in loop.
-; CHECK-NEXT:      SCEV assumptions:
-; CHECK-EMPTY:
-; CHECK-NEXT:      Expressions re-written:
+; MAXLEN-LABEL: 'backward_min_distance_128'
+; MAXLEN-NEXT:    loop:
+; MAXLEN-NEXT:      Memory dependences are safe with run-time checks
+; MAXLEN-NEXT:      Dependences:
+; MAXLEN-NEXT:      Run-time memory checks:
+; MAXLEN-NEXT:      Check 0:
+; MAXLEN-NEXT:        Comparing group ([[GRP13:0x[0-9a-f]+]]):
+; MAXLEN-NEXT:          %gep.off.iv = getelementptr inbounds i8, ptr %gep.off, i64 %iv
+; MAXLEN-NEXT:        Against group ([[GRP14:0x[0-9a-f]+]]):
+; MAXLEN-NEXT:          %gep = getelementptr inbounds i8, ptr %A, i64 %iv
+; MAXLEN-NEXT:      Grouped accesses:
+; MAXLEN-NEXT:        Group [[GRP13]]:
+; MAXLEN-NEXT:          (Low: {(16 + %A)<nuw>,+,1}<nuw><%outer.header> High: {(272 + %A),+,1}<nw><%outer.header>)
+; MAXLEN-NEXT:            Member: {{\{\{}}(16 + %A)<nuw>,+,1}<nuw><%outer.header>,+,1}<nuw><%loop>
+; MAXLEN-NEXT:        Group [[GRP14]]:
+; MAXLEN-NEXT:          (Low: %A High: (256 + %A))
+; MAXLEN-NEXT:            Member: {%A,+,1}<nuw><%loop>
+; MAXLEN-EMPTY:
+; MAXLEN-NEXT:      Non vectorizable stores to invariant address were not found in loop.
+; MAXLEN-NEXT:      SCEV assumptions:
+; MAXLEN-EMPTY:
+; MAXLEN-NEXT:      Expressions re-written:
+; MAXLEN-NEXT:    outer.header:
+; MAXLEN-NEXT:      Report: loop is not the innermost loop
+; MAXLEN-NEXT:      Dependences:
+; MAXLEN-NEXT:      Run-time memory checks:
+; MAXLEN-NEXT:      Grouped accesses:
+; MAXLEN-EMPTY:
+; MAXLEN-NEXT:      Non vectorizable stores to invariant address were not found in loop.
+; MAXLEN-NEXT:      SCEV assumptions:
+; MAXLEN-EMPTY:
+; MAXLEN-NEXT:      Expressions re-written:
+;
+; VW128-LABEL: 'backward_min_distance_128'
+; VW128-NEXT:    loop:
+; VW128-NEXT:      Memory dependences are safe with a maximum safe vector width of 128 bits
+; VW128-NEXT:      Dependences:
+; VW128-NEXT:        BackwardVectorizable:
+; VW128-NEXT:            %l = load i8, ptr %gep, align 4 ->
+; VW128-NEXT:            store i8 %add, ptr %gep.off.iv, align 4
+; VW128-EMPTY:
+; VW128-NEXT:      Run-time memory checks:
+; VW128-NEXT:      Grouped accesses:
+; VW128-EMPTY:
+; VW128-NEXT:      Non vectorizable stores to invariant address were not found in loop.
+; VW128-NEXT:      SCEV assumptions:
+; VW128-EMPTY:
+; VW128-NEXT:      Expressions re-written:
+; VW128-NEXT:    outer.header:
+; VW128-NEXT:      Report: loop is not the innermost loop
+; VW128-NEXT:      Dependences:
+; VW128-NEXT:      Run-time memory checks:
+; VW128-NEXT:      Grouped accesses:
+; VW128-EMPTY:
+; VW128-NEXT:      Non vectorizable stores to invariant address were not found in loop.
+; VW128-NEXT:      SCEV assumptions:
+; VW128-EMPTY:
+; VW128-NEXT:      Expressions re-written:
 ;
 entry:
   br label %outer.header
@@ -196,38 +222,64 @@ exit:
 }
 
 define void @backward_min_distance_256(ptr %A, i64 %N) {
-; CHECK-LABEL: 'backward_min_distance_256'
-; CHECK-NEXT:    loop:
-; CHECK-NEXT:      Memory dependences are safe with run-time checks
-; CHECK-NEXT:      Dependences:
-; CHECK-NEXT:      Run-time memory checks:
-; CHECK-NEXT:      Check 0:
-; CHECK-NEXT:        Comparing group ([[GRP7:0x[0-9a-f]+]]):
-; CHECK-NEXT:          %gep.off.iv = getelementptr inbounds i8, ptr %gep.off, i64 %iv
-; CHECK-NEXT:        Against group ([[GRP8:0x[0-9a-f]+]]):
-; CHECK-NEXT:          %gep = getelementptr inbounds i8, ptr %A, i64 %iv
-; CHECK-NEXT:      Grouped accesses:
-; CHECK-NEXT:        Group [[GRP7]]:
-; CHECK-NEXT:          (Low: {(32 + %A)<nuw>,+,1}<nuw><%outer.header> High: {(288 + %A),+,1}<nw><%outer.header>)
-; CHECK-NEXT:            Member: {{\{\{}}(32 + %A)<nuw>,+,1}<nuw><%outer.header>,+,1}<nuw><%loop>
-; CHECK-NEXT:        Group [[GRP8]]:
-; CHECK-NEXT:          (Low: %A High: (256 + %A))
-; CHECK-NEXT:            Member: {%A,+,1}<nuw><%loop>
-; CHECK-EMPTY:
-; CHECK-NEXT:      Non vectorizable stores to invariant address were not found in loop.
-; CHECK-NEXT:      SCEV assumptions:
-; CHECK-EMPTY:
-; CHECK-NEXT:      Expressions re-written:
-; CHECK-NEXT:    outer.header:
-; CHECK-NEXT:      Report: loop is not the innermost loop
-; CHECK-NEXT:      Dependences:
-; CHECK-NEXT:      Run-time memory checks:
-; CHECK-NEXT:      Grouped accesses:
-; CHECK-EMPTY:
-; CHECK-NEXT:      Non vectorizable stores to invariant address were not found in loop.
-; CHECK-NEXT:      SCEV assumptions:
-; CHECK-EMPTY:
-; CHECK-NEXT:      Expressions re-written:
+; MAXLEN-LABEL: 'backward_min_distance_256'
+; MAXLEN-NEXT:    loop:
+; MAXLEN-NEXT:      Memory dependences are safe with run-time checks
+; MAXLEN-NEXT:      Dependences:
+; MAXLEN-NEXT:      Run-time memory checks:
+; MAXLEN-NEXT:      Check 0:
+; MAXLEN-NEXT:        Comparing group ([[GRP17:0x[0-9a-f]+]]):
+; MAXLEN-NEXT:          %gep.off.iv = getelementptr inbounds i8, ptr %gep.off, i64 %iv
+; MAXLEN-NEXT:        Against group ([[GRP18:0x[0-9a-f]+]]):
+; MAXLEN-NEXT:          %gep = getelementptr inbounds i8, ptr %A, i64 %iv
+; MAXLEN-NEXT:      Grouped accesses:
+; MAXLEN-NEXT:        Group [[GRP17]]:
+; MAXLEN-NEXT:          (Low: {(32 + %A)<nuw>,+,1}<nuw><%outer.header> High: {(288 + %A),+,1}<nw><%outer.header>)
+; MAXLEN-NEXT:            Member: {{\{\{}}(32 + %A)<nuw>,+,1}<nuw><%outer.header>,+,1}<nuw><%loop>
+; MAXLEN-NEXT:        Group [[GRP18]]:
+; MAXLEN-NEXT:          (Low: %A High: (256 + %A))
+; MAXLEN-NEXT:            Member: {%A,+,1}<nuw><%loop>
+; MAXLEN-EMPTY:
+; MAXLEN-NEXT:      Non vectorizable stores to invariant address were not found in loop.
+; MAXLEN-NEXT:      SCEV assumptions:
+; MAXLEN-EMPTY:
+; MAXLEN-NEXT:      Expressions re-written:
+; MAXLEN-NEXT:    outer.header:
+; MAXLEN-NEXT:      Report: loop is not the innermost loop
+; MAXLEN-NEXT:      Dependences:
+; MAXLEN-NEXT:      Run-time memory checks:
+; MAXLEN-NEXT:      Grouped accesses:
+; MAXLEN-EMPTY:
+; MAXLEN-NEXT:      Non vectorizable stores to invariant address were not found in loop.
+; MAXLEN-NEXT:      SCEV assumptions:
+; MAXLEN-EMPTY:
+; MAXLEN-NEXT:      Expressions re-written:
+;
+; VW128-LABEL: 'backward_min_distance_256'
+; VW128-NEXT:    loop:
+; VW128-NEXT:      Memory dependences are safe with a maximum safe vector width of 256 bits
+; VW128-NEXT:      Dependences:
+; VW128-NEXT:        BackwardVectorizable:
+; VW128-NEXT:            %l = load i8, ptr %gep, align 4 ->
+; VW128-NEXT:            store i8 %add, ptr %gep.off.iv, align 4
+; VW128-EMPTY:
+; VW128-NEXT:      Run-time memory checks:
+; VW128-NEXT:      Grouped accesses:
+; VW128-EMPTY:
+; VW128-NEXT:      Non vectorizable stores to invariant address were not found in loop.
+; VW128-NEXT:      SCEV assumptions:
+; VW128-EMPTY:
+; VW128-NEXT:      Expressions re-written:
+; VW128-NEXT:    outer.header:
+; VW128-NEXT:      Report: loop is not the innermost loop
+; VW128-NEXT:      Dependences:
+; VW128-NEXT:      Run-time memory checks:
+; VW128-NEXT:      Grouped accesses:
+; VW128-EMPTY:
+; VW128-NEXT:      Non vectorizable stores to invariant address were not found in loop.
+; VW128-NEXT:      SCEV assumptions:
+; VW128-EMPTY:
+; VW128-NEXT:      Expressions re-written:
 ;
 entry:
   br label %outer.header
diff --git a/llvm/unittests/Transforms/Vectorize/VPlanSlpTest.cpp b/llvm/unittests/Transforms/Vectorize/VPlanSlpTest.cpp
index 396919763c933..910fc24455a6c 100644
--- a/llvm/unittests/Transforms/Vectorize/VPlanSlpTest.cpp
+++ b/llvm/unittests/Transforms/Vectorize/VPlanSlpTest.cpp
@@ -44,7 +44,7 @@ class VPlanSlpTest : public VPlanTestBase {
     AARes.reset(new AAResults(TLI));
     AARes->addAAResult(*BasicAA);
     PSE.reset(new PredicatedScalarEvolution(*SE, *L));
-    LAI.reset(new LoopAccessInfo(L, &*SE, &TLI, &*AARes, &*DT, &*LI));
+    LAI.reset(new LoopAccessInfo(L, &*SE, nullptr, &TLI, &*AARes, &*DT, &*LI));
     IAI.reset(new InterleavedAccessInfo(*PSE, L, &*DT, &*LI, &*LAI));
     IAI->analyzeInterleaving(false);
     return {Plan, *IAI};



More information about the llvm-commits mailing list