[llvm] r273064 - [LAA] Enable symbolic stride speculation for all LAA clients

Adam Nemet via llvm-commits llvm-commits at lists.llvm.org
Fri Jun 17 15:35:42 PDT 2016


Author: anemet
Date: Fri Jun 17 17:35:41 2016
New Revision: 273064

URL: http://llvm.org/viewvc/llvm-project?rev=273064&view=rev
Log:
[LAA] Enable symbolic stride speculation for all LAA clients

This is a functional change for LLE and LDist.  The other clients (LV,
LVerLICM) already had this explicitly enabled.

The temporary boolean parameter to LAA is removed that allowed turning
off speculation of symbolic strides.  This makes LAA's caching interface
LAA::getInfo only take the loop as the parameter.  This makes the
interface more friendly to the new Pass Manager.

The flag -enable-mem-access-versioning is moved from LV to a LAA which
now allows turning off speculation globally.

Added:
    llvm/trunk/test/Transforms/LoopDistribute/symbolic-stride.ll
Modified:
    llvm/trunk/include/llvm/Analysis/LoopAccessAnalysis.h
    llvm/trunk/lib/Analysis/LoopAccessAnalysis.cpp
    llvm/trunk/lib/Transforms/Scalar/LoopVersioningLICM.cpp
    llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp
    llvm/trunk/test/Transforms/LoopLoadElim/symbolic-stride.ll

Modified: llvm/trunk/include/llvm/Analysis/LoopAccessAnalysis.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Analysis/LoopAccessAnalysis.h?rev=273064&r1=273063&r2=273064&view=diff
==============================================================================
--- llvm/trunk/include/llvm/Analysis/LoopAccessAnalysis.h (original)
+++ llvm/trunk/include/llvm/Analysis/LoopAccessAnalysis.h Fri Jun 17 17:35:41 2016
@@ -513,8 +513,7 @@ class LoopAccessInfo {
 public:
   LoopAccessInfo(Loop *L, ScalarEvolution *SE, const DataLayout &DL,
                  const TargetLibraryInfo *TLI, AliasAnalysis *AA,
-                 DominatorTree *DT, LoopInfo *LI,
-                 bool SpeculateSymbolicStrides);
+                 DominatorTree *DT, LoopInfo *LI);
 
   /// Return true we can analyze the memory accesses in the loop and there are
   /// no memory dependence cycles.
@@ -585,11 +584,6 @@ public:
   /// \brief Print the information about the memory accesses in the loop.
   void print(raw_ostream &OS, unsigned Depth = 0) const;
 
-  /// \brief Used to ensure that if the analysis was run with speculating the
-  /// value of symbolic strides, the client queries it with the same assumption.
-  /// Only used in DEBUG build but we don't want NDEBUG-dependent ABI.
-  bool SpeculateSymbolicStrides;
-
   /// \brief Checks existence of store to invariant address inside loop.
   /// If the loop has any store to invariant address, then it returns true,
   /// else returns false.
@@ -715,11 +709,8 @@ public:
 
   /// \brief Query the result of the loop access information for the loop \p L.
   ///
-  /// \p SpeculateSymbolicStrides enables symbolic value speculation.  The
-  /// corresponding run-time checks are collected in LAI::PSE.
-  ///
   /// If there is no cached result available run the analysis.
-  const LoopAccessInfo &getInfo(Loop *L, bool SpeculateSymbolicStrides = false);
+  const LoopAccessInfo &getInfo(Loop *L);
 
   void releaseMemory() override {
     // Invalidate the cache when the pass is freed.

Modified: llvm/trunk/lib/Analysis/LoopAccessAnalysis.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Analysis/LoopAccessAnalysis.cpp?rev=273064&r1=273063&r2=273064&view=diff
==============================================================================
--- llvm/trunk/lib/Analysis/LoopAccessAnalysis.cpp (original)
+++ llvm/trunk/lib/Analysis/LoopAccessAnalysis.cpp Fri Jun 17 17:35:41 2016
@@ -65,6 +65,21 @@ static cl::opt<unsigned>
                             "loop-access analysis (default = 100)"),
                    cl::init(100));
 
+/// This enables versioning on the strides of symbolically striding memory
+/// accesses in code like the following.
+///   for (i = 0; i < N; ++i)
+///     A[i * Stride1] += B[i * Stride2] ...
+///
+/// Will be roughly translated to
+///    if (Stride1 == 1 && Stride2 == 1) {
+///      for (i = 0; i < N; i+=4)
+///       A[i:i+3] += ...
+///    } else
+///      ...
+static cl::opt<bool> EnableMemAccessVersioning(
+    "enable-mem-access-versioning", cl::init(true), cl::Hidden,
+    cl::desc("Enable symbolic stride memory access versioning"));
+
 /// \brief Enable store-to-load forwarding conflict detection. This option can
 /// be disabled for correctness testing.
 static cl::opt<bool> EnableForwardingConflictDetection(
@@ -1540,7 +1555,7 @@ void LoopAccessInfo::analyzeLoop() {
         NumLoads++;
         Loads.push_back(Ld);
         DepChecker.addAccess(Ld);
-        if (SpeculateSymbolicStrides)
+        if (EnableMemAccessVersioning)
           collectStridedAccess(Ld);
         continue;
       }
@@ -1564,7 +1579,7 @@ void LoopAccessInfo::analyzeLoop() {
         NumStores++;
         Stores.push_back(St);
         DepChecker.addAccess(St);
-        if (SpeculateSymbolicStrides)
+        if (EnableMemAccessVersioning)
           collectStridedAccess(St);
       }
     } // Next instr.
@@ -1904,11 +1919,9 @@ void LoopAccessInfo::collectStridedAcces
 LoopAccessInfo::LoopAccessInfo(Loop *L, ScalarEvolution *SE,
                                const DataLayout &DL,
                                const TargetLibraryInfo *TLI, AliasAnalysis *AA,
-                               DominatorTree *DT, LoopInfo *LI,
-                               bool SpeculateSymbolicStrides)
-    : SpeculateSymbolicStrides(SpeculateSymbolicStrides), PSE(*SE, *L),
-      PtrRtChecking(SE), DepChecker(PSE, L), TheLoop(L), DL(DL), TLI(TLI),
-      AA(AA), DT(DT), LI(LI), NumLoads(0), NumStores(0),
+                               DominatorTree *DT, LoopInfo *LI)
+    : PSE(*SE, *L), PtrRtChecking(SE), DepChecker(PSE, L), TheLoop(L), DL(DL),
+      TLI(TLI), AA(AA), DT(DT), LI(LI), NumLoads(0), NumStores(0),
       MaxSafeDepDistBytes(-1U), CanVecMem(false),
       StoreToLoopInvariantAddress(false) {
   if (canAnalyzeLoop())
@@ -1955,19 +1968,12 @@ void LoopAccessInfo::print(raw_ostream &
   PSE.print(OS, Depth);
 }
 
-const LoopAccessInfo &
-LoopAccessAnalysis::getInfo(Loop *L, bool SpeculateSymbolicStrides) {
+const LoopAccessInfo &LoopAccessAnalysis::getInfo(Loop *L) {
   auto &LAI = LoopAccessInfoMap[L];
 
-#ifndef NDEBUG
-  assert((!LAI || LAI->SpeculateSymbolicStrides == SpeculateSymbolicStrides) &&
-         "Symbolic strides changed for loop");
-#endif
-
   if (!LAI) {
     const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
-    LAI = llvm::make_unique<LoopAccessInfo>(L, SE, DL, TLI, AA, DT, LI,
-                                            SpeculateSymbolicStrides);
+    LAI = llvm::make_unique<LoopAccessInfo>(L, SE, DL, TLI, AA, DT, LI);
   }
   return *LAI.get();
 }

Modified: llvm/trunk/lib/Transforms/Scalar/LoopVersioningLICM.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/LoopVersioningLICM.cpp?rev=273064&r1=273063&r2=273064&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Scalar/LoopVersioningLICM.cpp (original)
+++ llvm/trunk/lib/Transforms/Scalar/LoopVersioningLICM.cpp Fri Jun 17 17:35:41 2016
@@ -385,7 +385,7 @@ bool LoopVersioningLICM::legalLoopInstru
         return false;
     }
   // Get LoopAccessInfo from current loop.
-  LAI = &LAA->getInfo(CurLoop, true);
+  LAI = &LAA->getInfo(CurLoop);
   // Check LoopAccessInfo for need of runtime check.
   if (LAI->getRuntimePointerChecking()->getChecks().empty()) {
     DEBUG(dbgs() << "    LAA: Runtime check not found !!\n");

Modified: llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp?rev=273064&r1=273063&r2=273064&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp (original)
+++ llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp Fri Jun 17 17:35:41 2016
@@ -130,21 +130,6 @@ static cl::opt<bool> MaximizeBandwidth(
     cl::desc("Maximize bandwidth when selecting vectorization factor which "
              "will be determined by the smallest type in loop."));
 
-/// This enables versioning on the strides of symbolically striding memory
-/// accesses in code like the following.
-///   for (i = 0; i < N; ++i)
-///     A[i * Stride1] += B[i * Stride2] ...
-///
-/// Will be roughly translated to
-///    if (Stride1 == 1 && Stride2 == 1) {
-///      for (i = 0; i < N; i+=4)
-///       A[i:i+3] += ...
-///    } else
-///      ...
-static cl::opt<bool> EnableMemAccessVersioning(
-    "enable-mem-access-versioning", cl::init(true), cl::Hidden,
-    cl::desc("Enable symbolic stride memory access versioning"));
-
 static cl::opt<bool> EnableInterleavedMemAccesses(
     "enable-interleaved-mem-accesses", cl::init(false), cl::Hidden,
     cl::desc("Enable vectorization on interleaved memory accesses in a loop"));
@@ -4970,7 +4955,7 @@ void LoopVectorizationLegality::collectL
 }
 
 bool LoopVectorizationLegality::canVectorizeMemory() {
-  LAI = &LAA->getInfo(TheLoop, EnableMemAccessVersioning);
+  LAI = &LAA->getInfo(TheLoop);
   auto &OptionalReport = LAI->getReport();
   if (OptionalReport)
     emitAnalysis(VectorizationReport(*OptionalReport));

Added: llvm/trunk/test/Transforms/LoopDistribute/symbolic-stride.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopDistribute/symbolic-stride.ll?rev=273064&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopDistribute/symbolic-stride.ll (added)
+++ llvm/trunk/test/Transforms/LoopDistribute/symbolic-stride.ll Fri Jun 17 17:35:41 2016
@@ -0,0 +1,65 @@
+; RUN: opt -basicaa -loop-distribute -S < %s | \
+; RUN:     FileCheck %s --check-prefix=ALL --check-prefix=STRIDE_SPEC
+
+; RUN: opt -basicaa -loop-distribute -S -enable-mem-access-versioning=0 < %s | \
+; RUN:     FileCheck %s --check-prefix=ALL --check-prefix=NO_STRIDE_SPEC
+
+; If we don't speculate stride for 1 we can't distribute along the line
+; because we could have a backward dependence:
+;
+;   for (i = 0; i < n; i++) {
+;     A[i + 1] = A[i] * B[i];
+;     =======================
+;     C[i] = D[i] * A[stride * i];
+;   }
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.10.0"
+
+; ALL-LABEL: @f(
+define void @f(i32* noalias %a,
+               i32* noalias %b,
+               i32* noalias %c,
+               i32* noalias %d,
+               i64 %stride) {
+entry:
+  br label %for.body
+
+; STRIDE_SPEC: %ident.check = icmp ne i64 %stride, 1
+
+; STRIDE_SPEC: for.body.ldist1:
+; NO_STRIDE_SPEC-NOT: for.body.ldist1:
+
+for.body:                                         ; preds = %for.body, %entry
+  %ind = phi i64 [ 0, %entry ], [ %add, %for.body ]
+
+  %arrayidxA = getelementptr inbounds i32, i32* %a, i64 %ind
+  %loadA = load i32, i32* %arrayidxA, align 4
+
+  %arrayidxB = getelementptr inbounds i32, i32* %b, i64 %ind
+  %loadB = load i32, i32* %arrayidxB, align 4
+
+  %mulA = mul i32 %loadB, %loadA
+
+  %add = add nuw nsw i64 %ind, 1
+  %arrayidxA_plus_4 = getelementptr inbounds i32, i32* %a, i64 %add
+  store i32 %mulA, i32* %arrayidxA_plus_4, align 4
+
+  %arrayidxD = getelementptr inbounds i32, i32* %d, i64 %ind
+  %loadD = load i32, i32* %arrayidxD, align 4
+
+  %mul = mul i64 %ind, %stride
+  %arrayidxStridedA = getelementptr inbounds i32, i32* %a, i64 %mul
+  %loadStridedA = load i32, i32* %arrayidxStridedA, align 4
+
+  %mulC = mul i32 %loadD, %loadStridedA
+
+  %arrayidxC = getelementptr inbounds i32, i32* %c, i64 %ind
+  store i32 %mulC, i32* %arrayidxC, align 4
+
+  %exitcond = icmp eq i64 %add, 20
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret void
+}

Modified: llvm/trunk/test/Transforms/LoopLoadElim/symbolic-stride.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopLoadElim/symbolic-stride.ll?rev=273064&r1=273063&r2=273064&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/LoopLoadElim/symbolic-stride.ll (original)
+++ llvm/trunk/test/Transforms/LoopLoadElim/symbolic-stride.ll Fri Jun 17 17:35:41 2016
@@ -1,34 +1,91 @@
-; RUN: opt -loop-load-elim -S < %s | FileCheck %s
+; RUN: opt -loop-load-elim -S < %s | \
+; RUN:     FileCheck %s -check-prefix=ALL -check-prefix=ONE_STRIDE_SPEC \
+; RUN:                  -check-prefix=TWO_STRIDE_SPEC
 
-; Forwarding in the presence of symbolic strides is currently not supported:
+; RUN: opt -loop-load-elim -S -enable-mem-access-versioning=0 < %s | \
+; RUN:     FileCheck %s -check-prefix=ALL -check-prefix=NO_ONE_STRIDE_SPEC \
+; RUN:                  -check-prefix=NO_TWO_STRIDE_SPEC
+
+; RUN: opt -loop-load-elim -S -loop-load-elimination-scev-check-threshold=1 < %s | \
+; RUN:     FileCheck %s -check-prefix=ALL -check-prefix=ONE_STRIDE_SPEC \
+; RUN:                  -check-prefix=NO_TWO_STRIDE_SPEC
+
+; Forwarding in the presence of symbolic strides:
 ;
 ;   for (unsigned i = 0; i < 100; i++)
 ;     A[i + 1] = A[Stride * i] + B[i];
 
 target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
 
-; CHECK-LABEL: @f(
+; ALL-LABEL: @f(
 define void @f(i32* noalias nocapture %A, i32* noalias nocapture readonly %B, i64 %N,
                i64 %stride) {
+
+; ONE_STRIDE_SPEC: %ident.check = icmp ne i64 %stride, 1
+
 entry:
-; CHECK-NOT: %load_initial = load i32, i32* %A
+; NO_ONE_STRIDE_SPEC-NOT: %load_initial = load i32, i32* %A
+; ONE_STRIDE_SPEC: %load_initial = load i32, i32* %A
   br label %for.body
 
 for.body:                                         ; preds = %for.body, %entry
-; CHECK-NOT: %store_forwarded = phi i32 [ %load_initial, {{.*}} ], [ %add, %for.body ]
+; NO_ONE_STRIDE_SPEC-NOT: %store_forwarded = phi i32 [ %load_initial, {{.*}} ], [ %add, %for.body ]
+; ONE_STRIDE_SPEC: %store_forwarded = phi i32 [ %load_initial, {{.*}} ], [ %add, %for.body ]
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %mul = mul i64 %indvars.iv, %stride
   %arrayidx = getelementptr inbounds i32, i32* %A, i64 %mul
   %load = load i32, i32* %arrayidx, align 4
   %arrayidx2 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
   %load_1 = load i32, i32* %arrayidx2, align 4
-; CHECK-NOT: %add = add i32 %load_1, %store_forwarded
+; NO_ONE_STRIDE_SPEC-NOT: %add = add i32 %load_1, %store_forwarded
+; ONE_STRIDE_SPEC: %add = add i32 %load_1, %store_forwarded
   %add = add i32 %load_1, %load
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %arrayidx_next = getelementptr inbounds i32, i32* %A, i64 %indvars.iv.next
   store i32 %add, i32* %arrayidx_next, align 4
   %exitcond = icmp eq i64 %indvars.iv.next, %N
   br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
+; With two symbolic strides:
+;
+;   for (unsigned i = 0; i < 100; i++)
+;     A[Stride2 * (i + 1)] = A[Stride1 * i] + B[i];
+
+; ALL-LABEL: @two_strides(
+define void @two_strides(i32* noalias nocapture %A, i32* noalias nocapture readonly %B, i64 %N,
+                         i64 %stride.1, i64 %stride.2) {
+
+; TWO_STRIDE_SPEC: %ident.check = icmp ne i64 %stride.2, 1
+; TWO_STRIDE_SPEC: %ident.check1 = icmp ne i64 %stride.1, 1
+; NO_TWO_STRIDE_SPEC-NOT: %ident.check{{.*}} = icmp ne i64 %stride{{.*}}, 1
+
+entry:
+; NO_TWO_STRIDE_SPEC-NOT: %load_initial = load i32, i32* %A
+; TWO_STRIDE_SPEC: %load_initial = load i32, i32* %A
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+; NO_TWO_STRIDE_SPEC-NOT: %store_forwarded = phi i32 [ %load_initial, {{.*}} ], [ %add, %for.body ]
+; TWO_STRIDE_SPEC: %store_forwarded = phi i32 [ %load_initial, {{.*}} ], [ %add, %for.body ]
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %mul = mul i64 %indvars.iv, %stride.1
+  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %mul
+  %load = load i32, i32* %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
+  %load_1 = load i32, i32* %arrayidx2, align 4
+; NO_TWO_STRIDE_SPEC-NOT: %add = add i32 %load_1, %store_forwarded
+; TWO_STRIDE_SPEC: %add = add i32 %load_1, %store_forwarded
+  %add = add i32 %load_1, %load
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %mul.2 = mul i64 %indvars.iv.next, %stride.2
+  %arrayidx_next = getelementptr inbounds i32, i32* %A, i64 %mul.2
+  store i32 %add, i32* %arrayidx_next, align 4
+  %exitcond = icmp eq i64 %indvars.iv.next, %N
+  br i1 %exitcond, label %for.end, label %for.body
 
 for.end:                                          ; preds = %for.body
   ret void




More information about the llvm-commits mailing list