[llvm] r263772 - [LoopDataPrefetch] Add TTI to limit the number of iterations to prefetch ahead
Adam Nemet via llvm-commits
llvm-commits at lists.llvm.org
Thu Mar 17 17:27:43 PDT 2016
Author: anemet
Date: Thu Mar 17 19:27:43 2016
New Revision: 263772
URL: http://llvm.org/viewvc/llvm-project?rev=263772&view=rev
Log:
[LoopDataPrefetch] Add TTI to limit the number of iterations to prefetch ahead
Summary:
It can hurt performance to prefetch ahead too much. Be conservative for
now and don't prefetch ahead more than 3 iterations on Cyclone.
Reviewers: hfinkel
Subscribers: llvm-commits, mzolotukhin
Differential Revision: http://reviews.llvm.org/D17949
Modified:
llvm/trunk/include/llvm/Analysis/TargetTransformInfo.h
llvm/trunk/include/llvm/Analysis/TargetTransformInfoImpl.h
llvm/trunk/lib/Analysis/TargetTransformInfo.cpp
llvm/trunk/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
llvm/trunk/lib/Target/AArch64/AArch64TargetTransformInfo.h
llvm/trunk/lib/Transforms/Scalar/LoopDataPrefetch.cpp
llvm/trunk/test/Transforms/LoopDataPrefetch/AArch64/large-stride.ll
Modified: llvm/trunk/include/llvm/Analysis/TargetTransformInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Analysis/TargetTransformInfo.h?rev=263772&r1=263771&r2=263772&view=diff
==============================================================================
--- llvm/trunk/include/llvm/Analysis/TargetTransformInfo.h (original)
+++ llvm/trunk/include/llvm/Analysis/TargetTransformInfo.h Thu Mar 17 19:27:43 2016
@@ -428,6 +428,11 @@ public:
/// adding SW prefetches. The default is 1, i.e. prefetch with any stride.
unsigned getMinPrefetchStride() const;
+ /// \return The maximum number of iterations to prefetch ahead. If the
+ /// required number of iterations is more than this number, no prefetching is
+ /// performed.
+ unsigned getMaxPrefetchIterationsAhead() const;
+
/// \return The maximum interleave factor that any transform should try to
/// perform for this target. This number depends on the level of parallelism
/// and the number of execution units in the CPU.
@@ -624,6 +629,7 @@ public:
virtual unsigned getCacheLineSize() = 0;
virtual unsigned getPrefetchDistance() = 0;
virtual unsigned getMinPrefetchStride() = 0;
+ virtual unsigned getMaxPrefetchIterationsAhead() = 0;
virtual unsigned getMaxInterleaveFactor(unsigned VF) = 0;
virtual unsigned
getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind Opd1Info,
@@ -797,6 +803,9 @@ public:
unsigned getMinPrefetchStride() override {
return Impl.getMinPrefetchStride();
}
+ unsigned getMaxPrefetchIterationsAhead() override {
+ return Impl.getMaxPrefetchIterationsAhead();
+ }
unsigned getMaxInterleaveFactor(unsigned VF) override {
return Impl.getMaxInterleaveFactor(VF);
}
Modified: llvm/trunk/include/llvm/Analysis/TargetTransformInfoImpl.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Analysis/TargetTransformInfoImpl.h?rev=263772&r1=263771&r2=263772&view=diff
==============================================================================
--- llvm/trunk/include/llvm/Analysis/TargetTransformInfoImpl.h (original)
+++ llvm/trunk/include/llvm/Analysis/TargetTransformInfoImpl.h Thu Mar 17 19:27:43 2016
@@ -270,6 +270,8 @@ public:
unsigned getMinPrefetchStride() { return 1; }
+ unsigned getMaxPrefetchIterationsAhead() { return UINT_MAX; }
+
unsigned getMaxInterleaveFactor(unsigned VF) { return 1; }
unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty,
Modified: llvm/trunk/lib/Analysis/TargetTransformInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Analysis/TargetTransformInfo.cpp?rev=263772&r1=263771&r2=263772&view=diff
==============================================================================
--- llvm/trunk/lib/Analysis/TargetTransformInfo.cpp (original)
+++ llvm/trunk/lib/Analysis/TargetTransformInfo.cpp Thu Mar 17 19:27:43 2016
@@ -227,6 +227,10 @@ unsigned TargetTransformInfo::getMinPref
return TTIImpl->getMinPrefetchStride();
}
+unsigned TargetTransformInfo::getMaxPrefetchIterationsAhead() const {
+ return TTIImpl->getMaxPrefetchIterationsAhead();
+}
+
unsigned TargetTransformInfo::getMaxInterleaveFactor(unsigned VF) const {
return TTIImpl->getMaxInterleaveFactor(VF);
}
Modified: llvm/trunk/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64TargetTransformInfo.cpp?rev=263772&r1=263771&r2=263772&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64TargetTransformInfo.cpp (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64TargetTransformInfo.cpp Thu Mar 17 19:27:43 2016
@@ -31,6 +31,13 @@ static cl::opt<unsigned> CycloneMinPrefe
cl::desc("Min stride to add prefetches for Cyclone"),
cl::init(2048), cl::Hidden);
+// Be conservative for now and don't prefetch ahead too much since the loop
+// may terminate early.
+static cl::opt<unsigned> CycloneMaxPrefetchIterationsAhead(
+ "cyclone-max-prefetch-iters-ahead",
+ cl::desc("Max number of iterations to prefetch ahead on Cyclone"),
+ cl::init(3), cl::Hidden);
+
/// \brief Calculate the cost of materializing a 64-bit value. This helper
/// method might only calculate a fraction of a larger immediate. Therefore it
/// is valid to return a cost of ZERO.
@@ -602,3 +609,9 @@ unsigned AArch64TTIImpl::getMinPrefetchS
return CycloneMinPrefetchStride;
return BaseT::getMinPrefetchStride();
}
+
+unsigned AArch64TTIImpl::getMaxPrefetchIterationsAhead() {
+ if (ST->isCyclone())
+ return CycloneMaxPrefetchIterationsAhead;
+ return BaseT::getMaxPrefetchIterationsAhead();
+}
Modified: llvm/trunk/lib/Target/AArch64/AArch64TargetTransformInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64TargetTransformInfo.h?rev=263772&r1=263771&r2=263772&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64TargetTransformInfo.h (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64TargetTransformInfo.h Thu Mar 17 19:27:43 2016
@@ -133,6 +133,8 @@ public:
unsigned getPrefetchDistance();
unsigned getMinPrefetchStride();
+
+ unsigned getMaxPrefetchIterationsAhead();
/// @}
};
Modified: llvm/trunk/lib/Transforms/Scalar/LoopDataPrefetch.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/LoopDataPrefetch.cpp?rev=263772&r1=263771&r2=263772&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Scalar/LoopDataPrefetch.cpp (original)
+++ llvm/trunk/lib/Transforms/Scalar/LoopDataPrefetch.cpp Thu Mar 17 19:27:43 2016
@@ -171,6 +171,9 @@ bool LoopDataPrefetch::runOnLoop(Loop *L
if (!ItersAhead)
ItersAhead = 1;
+ if (ItersAhead > TTI->getMaxPrefetchIterationsAhead())
+ return MadeChange;
+
DEBUG(dbgs() << "Prefetching " << ItersAhead
<< " iterations ahead (loop size: " << LoopSize << ") in "
<< L->getHeader()->getParent()->getName() << ": " << *L);
Modified: llvm/trunk/test/Transforms/LoopDataPrefetch/AArch64/large-stride.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopDataPrefetch/AArch64/large-stride.ll?rev=263772&r1=263771&r2=263772&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/LoopDataPrefetch/AArch64/large-stride.ll (original)
+++ llvm/trunk/test/Transforms/LoopDataPrefetch/AArch64/large-stride.ll Thu Mar 17 19:27:43 2016
@@ -1,4 +1,5 @@
-; RUN: opt -mcpu=cyclone -mtriple=arm64-apple-ios -loop-data-prefetch -S < %s | FileCheck %s --check-prefix=LARGE_PREFETCH --check-prefix=ALL
+; RUN: opt -mcpu=cyclone -mtriple=arm64-apple-ios -loop-data-prefetch -cyclone-max-prefetch-iters-ahead=100 -S < %s | FileCheck %s --check-prefix=LARGE_PREFETCH --check-prefix=ALL
+; RUN: opt -mcpu=cyclone -mtriple=arm64-apple-ios -loop-data-prefetch -S < %s | FileCheck %s --check-prefix=NO_LARGE_PREFETCH --check-prefix=ALL
; RUN: opt -mcpu=generic -mtriple=arm64-apple-ios -loop-data-prefetch -S < %s | FileCheck %s --check-prefix=NO_LARGE_PREFETCH --check-prefix=ALL
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n32:64-S128"
More information about the llvm-commits
mailing list