[llvm] r263771 - [LoopDataPrefetch/Aarch64] Allow selective prefetching of large-strided accesses

Adam Nemet via llvm-commits llvm-commits at lists.llvm.org
Thu Mar 17 17:27:38 PDT 2016


Author: anemet
Date: Thu Mar 17 19:27:38 2016
New Revision: 263771

URL: http://llvm.org/viewvc/llvm-project?rev=263771&view=rev
Log:
[LoopDataPrefetch/Aarch64] Allow selective prefetching of large-strided accesses

Summary:
And use this TTI for Cyclone.  As it was explained in the original RFC
(http://thread.gmane.org/gmane.comp.compilers.llvm.devel/92758), the HW
prefetcher work up to 2KB strides.

I am also adding tests for this and the previous change (D17943):

* Cyclone prefetching accesses with a large stride
* Cyclone not prefetching accesses with a small stride
* Generic Aarch64 subtarget not prefetching either

Reviewers: hfinkel

Subscribers: aemerson, rengolin, llvm-commits, mzolotukhin

Differential Revision: http://reviews.llvm.org/D17945

Added:
    llvm/trunk/test/Transforms/LoopDataPrefetch/AArch64/
    llvm/trunk/test/Transforms/LoopDataPrefetch/AArch64/large-stride.ll
    llvm/trunk/test/Transforms/LoopDataPrefetch/AArch64/lit.local.cfg
Modified:
    llvm/trunk/include/llvm/Analysis/TargetTransformInfo.h
    llvm/trunk/include/llvm/Analysis/TargetTransformInfoImpl.h
    llvm/trunk/lib/Analysis/TargetTransformInfo.cpp
    llvm/trunk/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
    llvm/trunk/lib/Target/AArch64/AArch64TargetTransformInfo.h
    llvm/trunk/lib/Transforms/Scalar/LoopDataPrefetch.cpp

Modified: llvm/trunk/include/llvm/Analysis/TargetTransformInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Analysis/TargetTransformInfo.h?rev=263771&r1=263770&r2=263771&view=diff
==============================================================================
--- llvm/trunk/include/llvm/Analysis/TargetTransformInfo.h (original)
+++ llvm/trunk/include/llvm/Analysis/TargetTransformInfo.h Thu Mar 17 19:27:38 2016
@@ -423,6 +423,11 @@ public:
   /// This is currently measured in number of instructions.
   unsigned getPrefetchDistance() const;
 
+  /// \return Some HW prefetchers can handle accesses up to a certain constant
+  /// stride.  This is the minimum stride in bytes where it makes sense to start
+  /// adding SW prefetches.  The default is 1, i.e. prefetch with any stride.
+  unsigned getMinPrefetchStride() const;
+
   /// \return The maximum interleave factor that any transform should try to
   /// perform for this target. This number depends on the level of parallelism
   /// and the number of execution units in the CPU.
@@ -618,6 +623,7 @@ public:
   virtual unsigned getRegisterBitWidth(bool Vector) = 0;
   virtual unsigned getCacheLineSize() = 0;
   virtual unsigned getPrefetchDistance() = 0;
+  virtual unsigned getMinPrefetchStride() = 0;
   virtual unsigned getMaxInterleaveFactor(unsigned VF) = 0;
   virtual unsigned
   getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind Opd1Info,
@@ -788,6 +794,9 @@ public:
     return Impl.getCacheLineSize();
   }
   unsigned getPrefetchDistance() override { return Impl.getPrefetchDistance(); }
+  unsigned getMinPrefetchStride() override {
+    return Impl.getMinPrefetchStride();
+  }
   unsigned getMaxInterleaveFactor(unsigned VF) override {
     return Impl.getMaxInterleaveFactor(VF);
   }

Modified: llvm/trunk/include/llvm/Analysis/TargetTransformInfoImpl.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Analysis/TargetTransformInfoImpl.h?rev=263771&r1=263770&r2=263771&view=diff
==============================================================================
--- llvm/trunk/include/llvm/Analysis/TargetTransformInfoImpl.h (original)
+++ llvm/trunk/include/llvm/Analysis/TargetTransformInfoImpl.h Thu Mar 17 19:27:38 2016
@@ -268,6 +268,8 @@ public:
 
   unsigned getPrefetchDistance() { return 0; }
 
+  unsigned getMinPrefetchStride() { return 1; }
+
   unsigned getMaxInterleaveFactor(unsigned VF) { return 1; }
 
   unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty,

Modified: llvm/trunk/lib/Analysis/TargetTransformInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Analysis/TargetTransformInfo.cpp?rev=263771&r1=263770&r2=263771&view=diff
==============================================================================
--- llvm/trunk/lib/Analysis/TargetTransformInfo.cpp (original)
+++ llvm/trunk/lib/Analysis/TargetTransformInfo.cpp Thu Mar 17 19:27:38 2016
@@ -223,6 +223,10 @@ unsigned TargetTransformInfo::getPrefetc
   return TTIImpl->getPrefetchDistance();
 }
 
+unsigned TargetTransformInfo::getMinPrefetchStride() const {
+  return TTIImpl->getMinPrefetchStride();
+}
+
 unsigned TargetTransformInfo::getMaxInterleaveFactor(unsigned VF) const {
   return TTIImpl->getMaxInterleaveFactor(VF);
 }

Modified: llvm/trunk/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64TargetTransformInfo.cpp?rev=263771&r1=263770&r2=263771&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64TargetTransformInfo.cpp (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64TargetTransformInfo.cpp Thu Mar 17 19:27:38 2016
@@ -25,6 +25,12 @@ static cl::opt<unsigned> CyclonePrefetch
     cl::desc("Number of instructions to prefetch ahead for Cyclone"),
     cl::init(280), cl::Hidden);
 
+// The HW prefetcher handles accesses with strides up to 2KB.
+static cl::opt<unsigned> CycloneMinPrefetchStride(
+    "cyclone-min-prefetch-stride",
+    cl::desc("Min stride to add prefetches for Cyclone"),
+    cl::init(2048), cl::Hidden);
+
 /// \brief Calculate the cost of materializing a 64-bit value. This helper
 /// method might only calculate a fraction of a larger immediate. Therefore it
 /// is valid to return a cost of ZERO.
@@ -590,3 +596,9 @@ unsigned AArch64TTIImpl::getPrefetchDist
     return CyclonePrefetchDistance;
   return BaseT::getPrefetchDistance();
 }
+
+unsigned AArch64TTIImpl::getMinPrefetchStride() {
+  if (ST->isCyclone())
+    return CycloneMinPrefetchStride;
+  return BaseT::getMinPrefetchStride();
+}

Modified: llvm/trunk/lib/Target/AArch64/AArch64TargetTransformInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64TargetTransformInfo.h?rev=263771&r1=263770&r2=263771&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64TargetTransformInfo.h (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64TargetTransformInfo.h Thu Mar 17 19:27:38 2016
@@ -131,6 +131,8 @@ public:
   unsigned getCacheLineSize();
 
   unsigned getPrefetchDistance();
+
+  unsigned getMinPrefetchStride();
   /// @}
 };
 

Modified: llvm/trunk/lib/Transforms/Scalar/LoopDataPrefetch.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/LoopDataPrefetch.cpp?rev=263771&r1=263770&r2=263771&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Scalar/LoopDataPrefetch.cpp (original)
+++ llvm/trunk/lib/Transforms/Scalar/LoopDataPrefetch.cpp Thu Mar 17 19:27:38 2016
@@ -73,6 +73,10 @@ namespace {
     bool runOnFunction(Function &F) override;
     bool runOnLoop(Loop *L);
 
+    /// \brief Check if the the stride of the accesses is large enough to
+    /// warrant a prefetch.
+    bool isStrideLargeEnough(const SCEVAddRecExpr *AR);
+
   private:
     AssumptionCache *AC;
     LoopInfo *LI;
@@ -94,6 +98,22 @@ INITIALIZE_PASS_END(LoopDataPrefetch, "l
 
 FunctionPass *llvm::createLoopDataPrefetchPass() { return new LoopDataPrefetch(); }
 
+bool LoopDataPrefetch::isStrideLargeEnough(const SCEVAddRecExpr *AR) {
+  unsigned TargetMinStride = TTI->getMinPrefetchStride();
+  // No need to check if any stride goes.
+  if (TargetMinStride <= 1)
+    return true;
+
+  const auto *ConstStride = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE));
+  // If MinStride is set, don't prefetch unless we can ensure that stride is
+  // larger.
+  if (!ConstStride)
+    return false;
+
+  unsigned AbsStride = std::abs(ConstStride->getAPInt().getSExtValue());
+  return TargetMinStride <= AbsStride;
+}
+
 bool LoopDataPrefetch::runOnFunction(Function &F) {
   LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
   SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
@@ -184,6 +204,11 @@ bool LoopDataPrefetch::runOnLoop(Loop *L
       if (!LSCEVAddRec)
         continue;
 
+      // Check if the the stride of the accesses is large enough to warrant a
+      // prefetch.
+      if (!isStrideLargeEnough(LSCEVAddRec))
+        continue;
+
       // We don't want to double prefetch individual cache lines. If this load
       // is known to be within one cache line of some other load that has
       // already been prefetched, then don't prefetch this one as well.

Added: llvm/trunk/test/Transforms/LoopDataPrefetch/AArch64/large-stride.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopDataPrefetch/AArch64/large-stride.ll?rev=263771&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopDataPrefetch/AArch64/large-stride.ll (added)
+++ llvm/trunk/test/Transforms/LoopDataPrefetch/AArch64/large-stride.ll Thu Mar 17 19:27:38 2016
@@ -0,0 +1,51 @@
+; RUN: opt -mcpu=cyclone -mtriple=arm64-apple-ios -loop-data-prefetch -S < %s | FileCheck %s --check-prefix=LARGE_PREFETCH --check-prefix=ALL
+; RUN: opt -mcpu=generic -mtriple=arm64-apple-ios -loop-data-prefetch -S < %s | FileCheck %s --check-prefix=NO_LARGE_PREFETCH --check-prefix=ALL
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n32:64-S128"
+
+; ALL-LABEL: @small_stride(
+define void @small_stride(double* nocapture %a, double* nocapture readonly %b) {
+entry:
+  br label %for.body
+
+; ALL: for.body:
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds double, double* %b, i64 %indvars.iv
+; ALL-NOT: call void @llvm.prefetch
+  %0 = load double, double* %arrayidx, align 8
+  %add = fadd double %0, 1.000000e+00
+  %arrayidx2 = getelementptr inbounds double, double* %a, i64 %indvars.iv
+  store double %add, double* %arrayidx2, align 8
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 1600
+  br i1 %exitcond, label %for.end, label %for.body
+
+; ALL: for.end:
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
+; ALL-LABEL: @large_stride(
+define void @large_stride(double* nocapture %a, double* nocapture readonly %b) {
+entry:
+  br label %for.body
+
+; ALL: for.body:
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds double, double* %b, i64 %indvars.iv
+; LARGE_PREFETCH: call void @llvm.prefetch
+; NO_LARGE_PREFETCH-NOT: call void @llvm.prefetch
+  %0 = load double, double* %arrayidx, align 8
+  %add = fadd double %0, 1.000000e+00
+  %arrayidx2 = getelementptr inbounds double, double* %a, i64 %indvars.iv
+  store double %add, double* %arrayidx2, align 8
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 300
+  %exitcond = icmp eq i64 %indvars.iv.next, 160000
+  br i1 %exitcond, label %for.end, label %for.body
+
+; ALL: for.end:
+for.end:                                          ; preds = %for.body
+  ret void
+}

Added: llvm/trunk/test/Transforms/LoopDataPrefetch/AArch64/lit.local.cfg
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopDataPrefetch/AArch64/lit.local.cfg?rev=263771&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopDataPrefetch/AArch64/lit.local.cfg (added)
+++ llvm/trunk/test/Transforms/LoopDataPrefetch/AArch64/lit.local.cfg Thu Mar 17 19:27:38 2016
@@ -0,0 +1,4 @@
+config.suffixes = ['.ll']
+
+if not 'AArch64' in config.root.targets:
+    config.unsupported = True




More information about the llvm-commits mailing list