[polly] r276616 - [NFC] Refactor creation of the BLIS mirco-kernel and improve documentation
Roman Gareev via llvm-commits
llvm-commits at lists.llvm.org
Mon Jul 25 00:27:59 PDT 2016
Author: romangareev
Date: Mon Jul 25 02:27:59 2016
New Revision: 276616
URL: http://llvm.org/viewvc/llvm-project?rev=276616&view=rev
Log:
[NFC] Refactor creation of the BLIS mirco-kernel and improve documentation
Reviewed-by: Tobias Grosser <tobias at grosser.es>
Modified:
polly/trunk/include/polly/ScheduleOptimizer.h
polly/trunk/lib/Transform/ScheduleOptimizer.cpp
Modified: polly/trunk/include/polly/ScheduleOptimizer.h
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/include/polly/ScheduleOptimizer.h?rev=276616&r1=276615&r2=276616&view=diff
==============================================================================
--- polly/trunk/include/polly/ScheduleOptimizer.h (original)
+++ polly/trunk/include/polly/ScheduleOptimizer.h Mon Jul 25 02:27:59 2016
@@ -20,6 +20,16 @@ struct isl_schedule;
struct isl_schedule_node;
struct isl_union_map;
+/// @brief Parameters of the micro kernel.
+///
+/// Parameters, which determine sizes of rank-1 (i.e., outer product) update
+/// used in the optimized matrix multiplication.
+///
+struct MicroKernelParamsTy {
+ int Mr;
+ int Nr;
+};
+
namespace polly {
extern bool DisablePollyTiling;
class Scop;
@@ -232,6 +242,21 @@ private:
///
/// @param Node The node to check.
static bool isMatrMultPattern(__isl_keep isl_schedule_node *Node);
+
+ /// @brief Create the BLIS macro-kernel.
+ ///
+ /// We create the BLIS macro-kernel by applying a combination of tiling
+ /// of dimensions of the band node and interchanging of two innermost
+ /// modified dimensions. The values passed in MicroKernelParam are used
+ /// as tile sizes.
+ ///
+ /// @param Node The schedule node to be modified.
+ /// @param MicroKernelParams Parameters of the micro kernel
+ /// to be used as tile sizes.
+ /// @see MicroKernelParamsTy
+ static __isl_give isl_schedule_node *
+ createMicroKernel(__isl_take isl_schedule_node *Node,
+ MicroKernelParamsTy MicroKernelParams);
};
#endif
Modified: polly/trunk/lib/Transform/ScheduleOptimizer.cpp
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/lib/Transform/ScheduleOptimizer.cpp?rev=276616&r1=276615&r2=276616&view=diff
==============================================================================
--- polly/trunk/lib/Transform/ScheduleOptimizer.cpp (original)
+++ polly/trunk/lib/Transform/ScheduleOptimizer.cpp Mon Jul 25 02:27:59 2016
@@ -493,10 +493,27 @@ static __isl_give isl_map *circularShift
return isl_map_set_tuple_id(IslMap, isl_dim_in, InputDimsId);
}
-__isl_give isl_schedule_node *ScheduleTreeOptimizer::optimizeMatMulPattern(
- __isl_take isl_schedule_node *Node, const llvm::TargetTransformInfo *TTI) {
+__isl_give isl_schedule_node *ScheduleTreeOptimizer::createMicroKernel(
+ __isl_take isl_schedule_node *Node, MicroKernelParamsTy MicroKernelParams) {
+ return applyRegisterTiling(Node, {MicroKernelParams.Mr, MicroKernelParams.Nr},
+ 1);
+}
+
+/// Get parameters of the BLIS micro kernel.
+///
+/// We choose the Mr and Nr parameters of the micro kernel to be large enough
+/// such that no stalls caused by the combination of latencies and dependencies
+/// are introduced during the updates of the resulting matrix of the matrix
+/// multiplication. However, they should also be as small as possible to
+/// release more registers for entries of multiplied matrices.
+///
+/// @param TTI Target Transform Info.
+/// @return The structure of type MicroKernelParamsTy.
+/// @see MicroKernelParamsTy
+static struct MicroKernelParamsTy
+getMicroKernelParams(const llvm::TargetTransformInfo *TTI) {
assert(TTI && "The target transform info should be provided.");
- // Get a micro-kernel.
+
// Nvec - Number of double-precision floating-point numbers that can be hold
// by a vector register. Use 2 by default.
auto Nvec = TTI->getRegisterBitWidth(true) / 64;
@@ -505,8 +522,14 @@ __isl_give isl_schedule_node *ScheduleTr
int Nr =
ceil(sqrt(Nvec * LatencyVectorFma * ThrougputVectorFma) / Nvec) * Nvec;
int Mr = ceil(Nvec * LatencyVectorFma * ThrougputVectorFma / Nr);
- std::vector<int> MicroKernelParams{Mr, Nr};
- Node = applyRegisterTiling(Node, MicroKernelParams, 1);
+ return {Mr, Nr};
+}
+
+__isl_give isl_schedule_node *ScheduleTreeOptimizer::optimizeMatMulPattern(
+ __isl_take isl_schedule_node *Node, const llvm::TargetTransformInfo *TTI) {
+ assert(TTI && "The target transform info should be provided.");
+ auto MicroKernelParams = getMicroKernelParams(TTI);
+ Node = createMicroKernel(Node, MicroKernelParams);
return Node;
}
More information about the llvm-commits
mailing list