[Mlir-commits] [mlir] 7fca0e9 - [MLIR] Add simple runner utilities for timing
Uday Bondhugula
llvmlistbot at llvm.org
Tue Mar 31 10:39:40 PDT 2020
Author: Uday Bondhugula
Date: 2020-03-31T23:08:29+05:30
New Revision: 7fca0e9797dccf44a1ae43328b94182d2f6188d9
URL: https://github.com/llvm/llvm-project/commit/7fca0e9797dccf44a1ae43328b94182d2f6188d9
DIFF: https://github.com/llvm/llvm-project/commit/7fca0e9797dccf44a1ae43328b94182d2f6188d9.diff
LOG: [MLIR] Add simple runner utilities for timing
Add utilities print_flops, rtclock for timing / benchmarking. Add
mlir_runner_utils_dir test conf variable.
Signed-off-by: Uday Bondhugula <uday at polymagelabs.com>
Differential Revision: https://reviews.llvm.org/D76912
Added:
mlir/test/mlir-cpu-runner/sgemm_naive_codegen.mlir
Modified:
mlir/lib/ExecutionEngine/RunnerUtils.cpp
mlir/test/CMakeLists.txt
mlir/test/lit.cfg.py
mlir/test/lit.site.cfg.py.in
Removed:
################################################################################
diff --git a/mlir/lib/ExecutionEngine/RunnerUtils.cpp b/mlir/lib/ExecutionEngine/RunnerUtils.cpp
index 0aa3571ee922..7991eca61994 100644
--- a/mlir/lib/ExecutionEngine/RunnerUtils.cpp
+++ b/mlir/lib/ExecutionEngine/RunnerUtils.cpp
@@ -15,6 +15,10 @@
#include "mlir/ExecutionEngine/RunnerUtils.h"
+#ifndef _WIN32
+#include <sys/time.h>
+#endif // _WIN32
+
extern "C" void _mlir_ciface_print_memref_vector_4x4xf32(
StridedMemRefType<Vector2D<4, 4, float>, 2> *M) {
impl::printMemRef(*M);
@@ -103,3 +107,22 @@ extern "C" void
_mlir_ciface_print_memref_4d_f32(StridedMemRefType<float, 4> *M) {
impl::printMemRef(*M);
}
+
+/// Prints GFLOPS rating.
+extern "C" void print_flops(double flops) {
+ fprintf(stderr, "%lf GFLOPS\n", flops / 1.0E9);
+}
+
+/// Returns the number of seconds since Epoch 1970-01-01 00:00:00 +0000 (UTC).
+extern "C" double rtclock() {
+#ifndef _WIN32
+ struct timeval tp;
+ int stat = gettimeofday(&tp, NULL);
+ if (stat != 0)
+ fprintf(stderr, "Error returning time from gettimeofday: %d\n", stat);
+ return (tp.tv_sec + tp.tv_usec * 1.0e-6);
+#else
+ fprintf(stderr, "Timing utility not implemented on Windows\n");
+ return 0.0;
+#endif // _WIN32
+}
diff --git a/mlir/test/CMakeLists.txt b/mlir/test/CMakeLists.txt
index 97584f81d270..91bad3331dca 100644
--- a/mlir/test/CMakeLists.txt
+++ b/mlir/test/CMakeLists.txt
@@ -10,6 +10,7 @@ llvm_canonicalize_cmake_booleans(
# Passed to lit.site.cfg.py.in to set up the path where to find the libraries
# for linalg integration tests.
set(MLIR_DIALECT_LINALG_INTEGRATION_TEST_LIB_DIR ${CMAKE_LIBRARY_OUTPUT_DIRECTORY})
+set(MLIR_RUNNER_UTILS_DIR ${CMAKE_LIBRARY_OUTPUT_DIRECTORY})
# Passed to lit.site.cfg.py.in to set up the path where to find the libraries
# for the mlir cuda runner tests.
diff --git a/mlir/test/lit.cfg.py b/mlir/test/lit.cfg.py
index 62015568670b..84a4de057c13 100644
--- a/mlir/test/lit.cfg.py
+++ b/mlir/test/lit.cfg.py
@@ -66,8 +66,9 @@
ToolSubst('toy-ch3', unresolved='ignore'),
ToolSubst('toy-ch4', unresolved='ignore'),
ToolSubst('toy-ch5', unresolved='ignore'),
- ToolSubst('%linalg_test_lib_dir', config.linalg_test_lib_dir, unresolved='ignore'),
ToolSubst('%cuda_wrapper_library_dir', config.cuda_wrapper_library_dir, unresolved='ignore'),
+ ToolSubst('%linalg_test_lib_dir', config.linalg_test_lib_dir, unresolved='ignore'),
+ ToolSubst('%mlir_runner_utils_dir', config.mlir_runner_utils_dir, unresolved='ignore'),
ToolSubst('%vulkan_wrapper_library_dir', config.vulkan_wrapper_library_dir, unresolved='ignore')
])
diff --git a/mlir/test/lit.site.cfg.py.in b/mlir/test/lit.site.cfg.py.in
index 5e18083a8b5c..dafb1c9a3eb8 100644
--- a/mlir/test/lit.site.cfg.py.in
+++ b/mlir/test/lit.site.cfg.py.in
@@ -30,6 +30,7 @@ config.llvm_host_triple = '@LLVM_HOST_TRIPLE@'
config.host_arch = "@HOST_ARCH@"
config.mlir_src_root = "@MLIR_SOURCE_DIR@"
config.mlir_obj_root = "@MLIR_BINARY_DIR@"
+config.mlir_runner_utils_dir = "@MLIR_RUNNER_UTILS_DIR@"
config.mlir_tools_dir = "@MLIR_TOOLS_DIR@"
config.linalg_test_lib_dir = "@MLIR_DIALECT_LINALG_INTEGRATION_TEST_LIB_DIR@"
config.build_examples = @LLVM_BUILD_EXAMPLES@
diff --git a/mlir/test/mlir-cpu-runner/sgemm_naive_codegen.mlir b/mlir/test/mlir-cpu-runner/sgemm_naive_codegen.mlir
new file mode 100644
index 000000000000..eadf51d9e2ee
--- /dev/null
+++ b/mlir/test/mlir-cpu-runner/sgemm_naive_codegen.mlir
@@ -0,0 +1,71 @@
+// RUN: mlir-opt -convert-linalg-to-loops -lower-affine -convert-loop-to-std -convert-std-to-llvm %s | mlir-cpu-runner -O3 -e main -entry-point-result=void -shared-libs=%mlir_runner_utils_dir/libmlir_runner_utils%shlibext | FileCheck %s
+
+func @main() {
+ %A = alloc() : memref<64x64xf32>
+ %B = alloc() : memref<64x64xf32>
+ %C = alloc() : memref<64x64xf32>
+
+ %cf1 = constant 1.00000e+00 : f32
+
+ linalg.fill(%A, %cf1) : memref<64x64xf32>, f32
+ linalg.fill(%B, %cf1) : memref<64x64xf32>, f32
+
+ %reps = constant 1 : index
+
+ %t_start = call @rtclock() : () -> f64
+ affine.for %arg0 = 0 to 5 {
+ linalg.fill(%C, %cf1) : memref<64x64xf32>, f32
+ call @sgemm_naive(%A, %B, %C) : (memref<64x64xf32>, memref<64x64xf32>, memref<64x64xf32>) -> ()
+ }
+ %t_end = call @rtclock() : () -> f64
+ %t = subf %t_end, %t_start : f64
+
+ %pC = memref_cast %C : memref<64x64xf32> to memref<*xf32>
+ call @print_memref_f32(%pC) : (memref<*xf32>) -> ()
+
+ %M = dim %C, 0 : memref<64x64xf32>
+ %N = dim %C, 1 : memref<64x64xf32>
+ %K = dim %A, 1 : memref<64x64xf32>
+
+ %f1 = muli %M, %N : index
+ %f2 = muli %f1, %K : index
+
+ // 2*M*N*K.
+ %c2 = constant 2 : index
+ %f3 = muli %c2, %f2 : index
+ %num_flops = muli %reps, %f3 : index
+ %num_flops_i = index_cast %num_flops : index to i64
+ %num_flops_f = sitofp %num_flops_i : i64 to f64
+ %flops = divf %num_flops_f, %t : f64
+ call @print_flops(%flops) : (f64) -> ()
+
+ return
+}
+// CHECK: 65, 65, 65,
+
+func @sgemm_naive(%arg0: memref<64x64xf32>, %arg1: memref<64x64xf32>, %arg2: memref<64x64xf32>) {
+ %c0 = constant 0 : index
+ affine.for %arg3 = 0 to 64 {
+ affine.for %arg4 = 0 to 64 {
+ %m = alloc() : memref<1xf32>
+ %v = affine.load %arg2[%arg3, %arg4] : memref<64x64xf32>
+ affine.store %v, %m[%c0] : memref<1xf32>
+ affine.for %arg5 = 0 to 64 {
+ %3 = affine.load %arg0[%arg3, %arg5] : memref<64x64xf32>
+ %4 = affine.load %arg1[%arg5, %arg4] : memref<64x64xf32>
+ %5 = affine.load %m[0] : memref<1xf32>
+ %6 = mulf %3, %4 : f32
+ %7 = addf %6, %5 : f32
+ affine.store %7, %m[0] : memref<1xf32>
+ }
+ %s = affine.load %m[%c0] : memref<1xf32>
+ affine.store %s, %arg2[%arg3, %arg4] : memref<64x64xf32>
+ dealloc %m : memref<1xf32>
+ }
+ }
+ return
+}
+
+func @print_flops(f64)
+func @rtclock() -> f64
+func @print_memref_f32(memref<*xf32>)
More information about the Mlir-commits
mailing list