[llvm] r263772 - [LoopDataPrefetch] Add TTI to limit the number of iterations to prefetch ahead

Thu Mar 17 17:27:43 PDT 2016

Author: anemet
Date: Thu Mar 17 19:27:43 2016
New Revision: 263772

URL: http://llvm.org/viewvc/llvm-project?rev=263772&view=rev
Log:
[LoopDataPrefetch] Add TTI to limit the number of iterations to prefetch ahead

Summary:
It can hurt performance to prefetch ahead too much.  Be conservative for
now and don't prefetch ahead more than 3 iterations on Cyclone.

Reviewers: hfinkel

Subscribers: llvm-commits, mzolotukhin

Differential Revision: http://reviews.llvm.org/D17949

Modified:
    llvm/trunk/include/llvm/Analysis/TargetTransformInfo.h
    llvm/trunk/include/llvm/Analysis/TargetTransformInfoImpl.h
    llvm/trunk/lib/Analysis/TargetTransformInfo.cpp
    llvm/trunk/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
    llvm/trunk/lib/Target/AArch64/AArch64TargetTransformInfo.h
    llvm/trunk/lib/Transforms/Scalar/LoopDataPrefetch.cpp
    llvm/trunk/test/Transforms/LoopDataPrefetch/AArch64/large-stride.ll

Modified: llvm/trunk/include/llvm/Analysis/TargetTransformInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Analysis/TargetTransformInfo.h?rev=263772&r1=263771&r2=263772&view=diff
==============================================================================

--- llvm/trunk/include/llvm/Analysis/TargetTransformInfo.h (original)
+++ llvm/trunk/include/llvm/Analysis/TargetTransformInfo.h Thu Mar 17 19:27:43 2016
@@ -428,6 +428,11 @@ public:
   /// adding SW prefetches.  The default is 1, i.e. prefetch with any stride.
   unsigned getMinPrefetchStride() const;
 
+  /// \return The maximum number of iterations to prefetch ahead.  If the
+  /// required number of iterations is more than this number, no prefetching is
+  /// performed.
+  unsigned getMaxPrefetchIterationsAhead() const;
+
   /// \return The maximum interleave factor that any transform should try to
   /// perform for this target. This number depends on the level of parallelism
   /// and the number of execution units in the CPU.
@@ -624,6 +629,7 @@ public:
   virtual unsigned getCacheLineSize() = 0;
   virtual unsigned getPrefetchDistance() = 0;
   virtual unsigned getMinPrefetchStride() = 0;
+  virtual unsigned getMaxPrefetchIterationsAhead() = 0;
   virtual unsigned getMaxInterleaveFactor(unsigned VF) = 0;
   virtual unsigned
   getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind Opd1Info,
@@ -797,6 +803,9 @@ public:
   unsigned getMinPrefetchStride() override {
     return Impl.getMinPrefetchStride();
   }
+  unsigned getMaxPrefetchIterationsAhead() override {
+    return Impl.getMaxPrefetchIterationsAhead();
+  }
   unsigned getMaxInterleaveFactor(unsigned VF) override {
     return Impl.getMaxInterleaveFactor(VF);
   }

Modified: llvm/trunk/include/llvm/Analysis/TargetTransformInfoImpl.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Analysis/TargetTransformInfoImpl.h?rev=263772&r1=263771&r2=263772&view=diff
==============================================================================
--- llvm/trunk/include/llvm/Analysis/TargetTransformInfoImpl.h (original)
+++ llvm/trunk/include/llvm/Analysis/TargetTransformInfoImpl.h Thu Mar 17 19:27:43 2016
@@ -270,6 +270,8 @@ public:
 
   unsigned getMinPrefetchStride() { return 1; }
 
+  unsigned getMaxPrefetchIterationsAhead() { return UINT_MAX; }
+
   unsigned getMaxInterleaveFactor(unsigned VF) { return 1; }
 
   unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty,

Modified: llvm/trunk/lib/Analysis/TargetTransformInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Analysis/TargetTransformInfo.cpp?rev=263772&r1=263771&r2=263772&view=diff
==============================================================================
--- llvm/trunk/lib/Analysis/TargetTransformInfo.cpp (original)
+++ llvm/trunk/lib/Analysis/TargetTransformInfo.cpp Thu Mar 17 19:27:43 2016
@@ -227,6 +227,10 @@ unsigned TargetTransformInfo::getMinPref
   return TTIImpl->getMinPrefetchStride();
 }
 
+unsigned TargetTransformInfo::getMaxPrefetchIterationsAhead() const {
+  return TTIImpl->getMaxPrefetchIterationsAhead();
+}
+
 unsigned TargetTransformInfo::getMaxInterleaveFactor(unsigned VF) const {
   return TTIImpl->getMaxInterleaveFactor(VF);
 }

Modified: llvm/trunk/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64TargetTransformInfo.cpp?rev=263772&r1=263771&r2=263772&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64TargetTransformInfo.cpp (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64TargetTransformInfo.cpp Thu Mar 17 19:27:43 2016
@@ -31,6 +31,13 @@ static cl::opt<unsigned> CycloneMinPrefe
     cl::desc("Min stride to add prefetches for Cyclone"),
     cl::init(2048), cl::Hidden);
 
+// Be conservative for now and don't prefetch ahead too much since the loop
+// may terminate early.
+static cl::opt<unsigned> CycloneMaxPrefetchIterationsAhead(
+    "cyclone-max-prefetch-iters-ahead",
+    cl::desc("Max number of iterations to prefetch ahead on Cyclone"),
+    cl::init(3), cl::Hidden);
+
 /// \brief Calculate the cost of materializing a 64-bit value. This helper
 /// method might only calculate a fraction of a larger immediate. Therefore it
 /// is valid to return a cost of ZERO.
@@ -602,3 +609,9 @@ unsigned AArch64TTIImpl::getMinPrefetchS
     return CycloneMinPrefetchStride;
   return BaseT::getMinPrefetchStride();
 }
+
+unsigned AArch64TTIImpl::getMaxPrefetchIterationsAhead() {
+  if (ST->isCyclone())
+    return CycloneMaxPrefetchIterationsAhead;
+  return BaseT::getMaxPrefetchIterationsAhead();
+}

Modified: llvm/trunk/lib/Target/AArch64/AArch64TargetTransformInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64TargetTransformInfo.h?rev=263772&r1=263771&r2=263772&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64TargetTransformInfo.h (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64TargetTransformInfo.h Thu Mar 17 19:27:43 2016
@@ -133,6 +133,8 @@ public:
   unsigned getPrefetchDistance();
 
   unsigned getMinPrefetchStride();
+
+  unsigned getMaxPrefetchIterationsAhead();
   /// @}
 };
 

Modified: llvm/trunk/lib/Transforms/Scalar/LoopDataPrefetch.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/LoopDataPrefetch.cpp?rev=263772&r1=263771&r2=263772&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Scalar/LoopDataPrefetch.cpp (original)
+++ llvm/trunk/lib/Transforms/Scalar/LoopDataPrefetch.cpp Thu Mar 17 19:27:43 2016
@@ -171,6 +171,9 @@ bool LoopDataPrefetch::runOnLoop(Loop *L
   if (!ItersAhead)
     ItersAhead = 1;
 
+  if (ItersAhead > TTI->getMaxPrefetchIterationsAhead())
+    return MadeChange;
+
   DEBUG(dbgs() << "Prefetching " << ItersAhead
                << " iterations ahead (loop size: " << LoopSize << ") in "
                << L->getHeader()->getParent()->getName() << ": " << *L);

Modified: llvm/trunk/test/Transforms/LoopDataPrefetch/AArch64/large-stride.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopDataPrefetch/AArch64/large-stride.ll?rev=263772&r1=263771&r2=263772&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/LoopDataPrefetch/AArch64/large-stride.ll (original)
+++ llvm/trunk/test/Transforms/LoopDataPrefetch/AArch64/large-stride.ll Thu Mar 17 19:27:43 2016
@@ -1,4 +1,5 @@
-; RUN: opt -mcpu=cyclone -mtriple=arm64-apple-ios -loop-data-prefetch -S < %s | FileCheck %s --check-prefix=LARGE_PREFETCH --check-prefix=ALL
+; RUN: opt -mcpu=cyclone -mtriple=arm64-apple-ios -loop-data-prefetch -cyclone-max-prefetch-iters-ahead=100 -S < %s | FileCheck %s --check-prefix=LARGE_PREFETCH --check-prefix=ALL
+; RUN: opt -mcpu=cyclone -mtriple=arm64-apple-ios -loop-data-prefetch -S < %s | FileCheck %s --check-prefix=NO_LARGE_PREFETCH --check-prefix=ALL
 ; RUN: opt -mcpu=generic -mtriple=arm64-apple-ios -loop-data-prefetch -S < %s | FileCheck %s --check-prefix=NO_LARGE_PREFETCH --check-prefix=ALL
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n32:64-S128"