[polly] r312255 - Use the information about the target cache provided by the TargetTransformInfo.

Roman Gareev via llvm-commits llvm-commits at lists.llvm.org
Thu Aug 31 10:07:54 PDT 2017


Author: romangareev
Date: Thu Aug 31 10:07:54 2017
New Revision: 312255

URL: http://llvm.org/viewvc/llvm-project?rev=312255&view=rev
Log:
Use the information about the target cache provided by the TargetTransformInfo.

Reviewed-by: Tobias Grosser <tobias at grosser.es>

Differential Revision: https://reviews.llvm.org/D37178

Modified:
    polly/trunk/lib/Transform/ScheduleOptimizer.cpp

Modified: polly/trunk/lib/Transform/ScheduleOptimizer.cpp
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/lib/Transform/ScheduleOptimizer.cpp?rev=312255&r1=312254&r2=312255&view=diff
==============================================================================
--- polly/trunk/lib/Transform/ScheduleOptimizer.cpp (original)
+++ polly/trunk/lib/Transform/ScheduleOptimizer.cpp Thu Aug 31 10:07:54 2017
@@ -153,28 +153,51 @@ static cl::opt<int> ThroughputVectorFma(
 // represent the parameters of the target cache, which do not have typical
 // values that can be used by default. However, to apply the pattern matching
 // optimizations, we use the values of the parameters of Intel Core i7-3820
-// SandyBridge in case the parameters are not specified. Such an approach helps
-// also to attain the high-performance on IBM POWER System S822 and IBM Power
-// 730 Express server.
+// SandyBridge in case the parameters are not specified or not provided by the
+// TargetTransformInfo.
 static cl::opt<int> FirstCacheLevelAssociativity(
     "polly-target-1st-cache-level-associativity",
     cl::desc("The associativity of the first cache level."), cl::Hidden,
-    cl::init(8), cl::ZeroOrMore, cl::cat(PollyCategory));
+    cl::init(-1), cl::ZeroOrMore, cl::cat(PollyCategory));
+
+static cl::opt<int> FirstCacheLevelDefaultAssociativity(
+    "polly-target-1st-cache-level-default-associativity",
+    cl::desc("The default associativity of the first cache level"
+             " (if not enough were provided by the TargetTransformInfo)."),
+    cl::Hidden, cl::init(8), cl::ZeroOrMore, cl::cat(PollyCategory));
 
 static cl::opt<int> SecondCacheLevelAssociativity(
     "polly-target-2nd-cache-level-associativity",
     cl::desc("The associativity of the second cache level."), cl::Hidden,
-    cl::init(8), cl::ZeroOrMore, cl::cat(PollyCategory));
+    cl::init(-1), cl::ZeroOrMore, cl::cat(PollyCategory));
+
+static cl::opt<int> SecondCacheLevelDefaultAssociativity(
+    "polly-target-2nd-cache-level-default-associativity",
+    cl::desc("The default associativity of the second cache level"
+             " (if not enough were provided by the TargetTransformInfo)."),
+    cl::Hidden, cl::init(8), cl::ZeroOrMore, cl::cat(PollyCategory));
 
 static cl::opt<int> FirstCacheLevelSize(
     "polly-target-1st-cache-level-size",
     cl::desc("The size of the first cache level specified in bytes."),
+    cl::Hidden, cl::init(-1), cl::ZeroOrMore, cl::cat(PollyCategory));
+
+static cl::opt<int> FirstCacheLevelDefaultSize(
+    "polly-target-1st-cache-level-default-size",
+    cl::desc("The default size of the first cache level specified in bytes"
+             " (if not enough were provided by the TargetTransformInfo)."),
     cl::Hidden, cl::init(32768), cl::ZeroOrMore, cl::cat(PollyCategory));
 
 static cl::opt<int> SecondCacheLevelSize(
     "polly-target-2nd-cache-level-size",
     cl::desc("The size of the second level specified in bytes."), cl::Hidden,
-    cl::init(262144), cl::ZeroOrMore, cl::cat(PollyCategory));
+    cl::init(-1), cl::ZeroOrMore, cl::cat(PollyCategory));
+
+static cl::opt<int> SecondCacheLevelDefaultSize(
+    "polly-target-2nd-cache-level-default-size",
+    cl::desc("The default size of the second cache level specified in bytes"
+             " (if not enough were provided by the TargetTransformInfo)."),
+    cl::Hidden, cl::init(262144), cl::ZeroOrMore, cl::cat(PollyCategory));
 
 static cl::opt<int> VectorRegisterBitwidth(
     "polly-target-vector-register-bitwidth",
@@ -893,6 +916,44 @@ getMicroKernelParams(const TargetTransfo
   return {Mr, Nr};
 }
 
+namespace {
+/// Determine parameters of the target cache.
+///
+/// @param TTI Target Transform Info.
+void getTargetCacheParameters(const llvm::TargetTransformInfo *TTI) {
+  auto L1DCache = llvm::TargetTransformInfo::CacheLevel::L1D;
+  auto L2DCache = llvm::TargetTransformInfo::CacheLevel::L2D;
+  if (FirstCacheLevelSize == -1) {
+    if (TTI->getCacheSize(L1DCache).hasValue())
+      FirstCacheLevelSize = TTI->getCacheSize(L1DCache).getValue();
+    else
+      FirstCacheLevelSize = static_cast<int>(FirstCacheLevelDefaultSize);
+  }
+  if (SecondCacheLevelSize == -1) {
+    if (TTI->getCacheSize(L2DCache).hasValue())
+      SecondCacheLevelSize = TTI->getCacheSize(L2DCache).getValue();
+    else
+      SecondCacheLevelSize = static_cast<int>(SecondCacheLevelDefaultSize);
+  }
+  if (FirstCacheLevelAssociativity == -1) {
+    if (TTI->getCacheAssociativity(L1DCache).hasValue())
+      FirstCacheLevelAssociativity =
+          TTI->getCacheAssociativity(L1DCache).getValue();
+    else
+      FirstCacheLevelAssociativity =
+          static_cast<int>(FirstCacheLevelDefaultAssociativity);
+  }
+  if (SecondCacheLevelAssociativity == -1) {
+    if (TTI->getCacheAssociativity(L2DCache).hasValue())
+      SecondCacheLevelAssociativity =
+          TTI->getCacheAssociativity(L2DCache).getValue();
+    else
+      SecondCacheLevelAssociativity =
+          static_cast<int>(SecondCacheLevelDefaultAssociativity);
+  }
+}
+} // namespace
+
 /// Get parameters of the BLIS macro kernel.
 ///
 /// During the computation of matrix multiplication, blocks of partitioned
@@ -901,6 +962,7 @@ getMicroKernelParams(const TargetTransfo
 /// iterations. Since parameters of the macro kernel determine sizes of these
 /// blocks, there are upper and lower bounds on these parameters.
 ///
+/// @param TTI Target Transform Info.
 /// @param MicroKernelParams Parameters of the micro-kernel
 ///                          to be taken into account.
 /// @param MMI Parameters of the matrix multiplication operands.
@@ -908,8 +970,10 @@ getMicroKernelParams(const TargetTransfo
 /// @see MacroKernelParamsTy
 /// @see MicroKernelParamsTy
 static struct MacroKernelParamsTy
-getMacroKernelParams(const MicroKernelParamsTy &MicroKernelParams,
+getMacroKernelParams(const llvm::TargetTransformInfo *TTI,
+                     const MicroKernelParamsTy &MicroKernelParams,
                      MatMulInfoTy MMI) {
+  getTargetCacheParameters(TTI);
   // According to www.cs.utexas.edu/users/flame/pubs/TOMS-BLIS-Analytical.pdf,
   // it requires information about the first two levels of a cache to determine
   // all the parameters of a macro-kernel. It also checks that an associativity
@@ -1227,7 +1291,7 @@ ScheduleTreeOptimizer::optimizeMatMulPat
   NewK = NewK == DimOutNum - 2 ? NewJ : NewK;
   Node = permuteBandNodeDimensions(Node, NewK, DimOutNum - 1);
   auto MicroKernelParams = getMicroKernelParams(TTI, MMI);
-  auto MacroKernelParams = getMacroKernelParams(MicroKernelParams, MMI);
+  auto MacroKernelParams = getMacroKernelParams(TTI, MicroKernelParams, MMI);
   Node = createMacroKernel(Node, MacroKernelParams);
   Node = createMicroKernel(Node, MicroKernelParams);
   if (MacroKernelParams.Mc == 1 || MacroKernelParams.Nc == 1 ||




More information about the llvm-commits mailing list