[Mlir-commits] [mlir] f8dbd61 - [mlir][Linalg] Drop spuriously long matmul_column_major benchmark
Nicolas Vasilache
llvmlistbot at llvm.org
Tue May 18 03:07:36 PDT 2021
Author: Nicolas Vasilache
Date: 2021-05-18T10:07:19Z
New Revision: f8dbd61074176bae92ec360a093ac7bc498c9321
URL: https://github.com/llvm/llvm-project/commit/f8dbd61074176bae92ec360a093ac7bc498c9321
DIFF: https://github.com/llvm/llvm-project/commit/f8dbd61074176bae92ec360a093ac7bc498c9321.diff
LOG: [mlir][Linalg] Drop spuriously long matmul_column_major benchmark
Added:
Modified:
Removed:
mlir/test/Integration/Dialect/Linalg/CPU/benchmark_matmul_column_major.mlir
################################################################################
diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/benchmark_matmul_column_major.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/benchmark_matmul_column_major.mlir
deleted file mode 100644
index 87340d0fff2ff..0000000000000
--- a/mlir/test/Integration/Dialect/Linalg/CPU/benchmark_matmul_column_major.mlir
+++ /dev/null
@@ -1,110 +0,0 @@
-// RUN: export M=24 && export K=64 && export N=192 && export ITERS=10 && \
-// RUN: cat %s | sed 's@${M}@'"$M"'@g'| sed 's@${K}@'"$K"'@g' | sed 's@${N}@'"$N"'@g'| sed 's@${ITERS}@'"$ITERS"'@g'| \
-// RUN: mlir-opt -test-linalg-codegen-strategy="anchor-func=matmul_column_major anchor-op=linalg.matmul_column_major register-tile-sizes=16,0,32 vectorize" | \
-// RUN: mlir-opt -test-linalg-codegen-strategy="anchor-op=linalg.fill register-tile-sizes=4,16 vectorize" | \
-
-// TODO: linalg.copy vectorization in the presence of permutation map fails. Enable when addressed.
-// RUN: mlir-opt -test-linalg-codegen-strategy="anchor-op=linalg.copy register-tile-sizes=4,16 vectorize" | \
-
-// RUN: mlir-opt -canonicalize -convert-vector-to-scf -lower-affine -convert-linalg-to-loops | \
-// RUN: mlir-opt -canonicalize -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm | \
-// RUN: mlir-cpu-runner -O3 -e main -entry-point-result=void \
-// Activate to dump assembly
-// R_UN: -dump-object-file -object-filename=/tmp/a.o \
-// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \
-// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \
-// Use tee to both print to stderr and FileCheck
-// RUN: tee -a /dev/stderr | FileCheck %s
-
-!elem_type_a = type f32
-!elem_type_b = type f32
-!elem_type_c = type f32
-!row_major_A = type memref<${M}x${K}x!elem_type_a>
-!row_major_B = type memref<${K}x${N}x!elem_type_b>
-!row_major_C = type memref<${M}x${N}x!elem_type_c>
-!column_major_A = type memref<${K}x${M}x!elem_type_a>
-!column_major_B = type memref<${N}x${K}x!elem_type_b>
-!column_major_C = type memref<${N}x${M}x!elem_type_c>
-
-func @matmul_column_major(%a: !column_major_A, %b: !column_major_B, %c: !column_major_C)
-// TODO: activate manually for now.
-// attributes { passthrough = [["target-cpu", "skylake-avx512"], ["prefer-vector-width", "512"]]}
-{
- linalg.matmul_column_major ins(%a, %b : !column_major_A, !column_major_B)
- outs(%c: !column_major_C)
- return
-}
-
-func @print_perf(%iters: index, %total_time: f64) {
- %c2 = constant 2 : index
- %cM = constant ${M} : index
- %cN = constant ${N} : index
- %cK = constant ${K} : index
-
- %mn = muli %cM, %cN : index
- %mnk = muli %mn, %cK : index
-
- // 2*M*N*K.
- %flops_per_iter = muli %c2, %mnk : index
- %flops = muli %iters, %flops_per_iter : index
- %flops_i64 = index_cast %flops : index to i64
- %flops_f = sitofp %flops_i64 : i64 to f64
- %flops_per_s = divf %flops_f, %total_time : f64
- vector.print %flops_per_s : f64
-
- return
-}
-
-func @main() {
- %f0 = constant 0.0 : !elem_type_c
- %f1 = constant 1.0 : !elem_type_a
-
- %cA = memref.alloc() : !column_major_A
- %cB = memref.alloc() : !column_major_B
- %cC = memref.alloc() : !column_major_C
-
- linalg.fill(%cA, %f1) : !column_major_A, !elem_type_a
- linalg.fill(%cB, %f1) : !column_major_B, !elem_type_b
- linalg.fill(%cC, %f0) : !column_major_C, !elem_type_c
-
- %c0 = constant 0: index
- %c1 = constant 1: index
- %iters = constant ${ITERS}: index
-
- /// Run and dump performance for matmul_column_major.
- %t_start_matmul_column_major = call @rtclock() : () -> f64
- scf.for %arg0 = %c0 to %iters step %c1 {
- // linalg.matmul writes %C in place, need to reset it to zero every time.
- // This is accounts for about 10-15% perf hit on small sizes.
- // Once linalg on tensors is ready, fusing fill at the register level will
- // be easy.
- linalg.fill(%cC, %f0) : !column_major_C, !elem_type_c
- call @matmul_column_major(%cA, %cB, %cC) : (!column_major_A, !column_major_B, !column_major_C) -> ()
- }
- %t_end_matmul_column_major = call @rtclock() : () -> f64
- %tmatmul_column_major = subf %t_end_matmul_column_major, %t_start_matmul_column_major: f64
- call @print_perf(%iters, %tmatmul_column_major) : (index, f64) -> ()
-
- // CHECK: {{^0$}}
- %cC_ref = memref.alloc() : !column_major_C
- linalg.fill(%cC_ref, %f0) : !column_major_C, !elem_type_c
- linalg.matmul_column_major ins(%cA, %cB : !column_major_A, !column_major_B)
- outs(%cC_ref: !column_major_C)
- %act = memref.cast %cC : !column_major_C to memref<*xf32>
- %exp = memref.cast %cC_ref : !column_major_C to memref<*xf32>
- %errors = call @verifyMemRefF32(%act, %exp) : (memref<*xf32>, memref<*xf32>) -> i64
- vector.print %errors : i64
- memref.dealloc %cC_ref : !column_major_C
-
- memref.dealloc %cA : !column_major_A
- memref.dealloc %cB : !column_major_B
- memref.dealloc %cC : !column_major_C
-
- return
-}
-
-func private @rtclock() -> f64
-func private @verifyMemRefF32(memref<*xf32>, memref<*xf32>) -> i64 attributes { llvm.emit_c_interface }
-
-// TODO: init with random, run and check output.
-// func private @fill_random_f32(memref<*xf32>)
More information about the Mlir-commits
mailing list