[Mlir-commits] [mlir] [mlir][ArmSME] Add support for lowering masked tile_store ops (PR #71180)
Andrzej WarzyĆski
llvmlistbot at llvm.org
Mon Nov 6 02:45:52 PST 2023
================
@@ -0,0 +1,121 @@
+// DEFINE: %{entry_point} = entry
+// DEFINE: %{compile} = mlir-opt %s \
+// DEFINE: -enable-arm-streaming="mode=locally enable-za" \
+// DEFINE: -convert-vector-to-arm-sme -convert-arm-sme-to-scf \
+// DEFINE: -convert-vector-to-llvm="enable-arm-sme" -cse -canonicalize \
+// DEFINE: -allocate-arm-sme-tiles -test-lower-to-llvm
+// DEFINE: %{run} = %mcr_aarch64_cmd \
+// DEFINE: -march=aarch64 -mattr=+sve,+sme \
+// DEFINE: -e %{entry_point} -entry-point-result=void \
+// DEFINE: -shared-libs=%mlir_runner_utils,%mlir_c_runner_utils
+
+// RUN: %{compile} | %{run} | FileCheck %s
+
+// Vector store.
+func.func @transfer_write_2d(%A : memref<?x?xf32>, %base1: index, %base2: index) {
+ %c0 = arith.constant 0.0 : f32
+ %zero = vector.splat %c0 : vector<[4]x[4]xf32>
+ vector.transfer_write %zero, %A[%base1, %base2] {in_bounds=[true, true]} :
+ vector<[4]x[4]xf32>, memref<?x?xf32>
+ return
+}
+
+// Masked vector store.
+func.func @transfer_write_2d_mask(%A : memref<?x?xf32>, %base1: index, %base2: index) {
+ %c0 = arith.constant 0.0 : f32
+ %c2 = arith.constant 2 : index
+ %c3 = arith.constant 3 : index
+ %mask = vector.create_mask %c2, %c3 : vector<[4]x[4]xi1>
+ %zero = vector.splat %c0 : vector<[4]x[4]xf32>
+ vector.transfer_write %zero, %A[%base1, %base2], %mask {in_bounds=[true, true]} :
+ vector<[4]x[4]xf32>, memref<?x?xf32>
+ return
+}
+
+// Vector load + print.
+func.func @load_and_print(%A : memref<?x?xf32>, %base1: index, %base2: index) {
+ %0 = vector.load %A[%base1, %base2] : memref<?x?xf32>, vector<[4]x[4]xf32>
+
+ vector.print str "TILE BEGIN:"
+ vector.print %0: vector<[4]x[4]xf32>
+
+ return
+}
+
+// Allocate heap memory of size 'd0' x 'd1' and initialize.
+//
+// Example:
+//
+// initialize_memory(%c4, %c5)
+//
+// 0, 1, 2, 3, 4
+// 10, 11, 12, 13, 14
+// 20, 21, 22, 23, 24
+// 30, 31, 32, 33, 34
+//
+// Returns dynamic memref. It's the callers responsiblity to free the returned
+// memref.
+func.func @initialize_memory(%d0 : index, %d1 : index) -> memref<?x?xf32> {
+ %c0 = arith.constant 0 : index
+ %c1 = arith.constant 1 : index
+ %c1_f32 = arith.constant 1.0 : f32
+ %c10_f32 = arith.constant 10.0 : f32
+
+ %A = memref.alloc(%d0, %d1) : memref<?x?xf32>
+
+ %init = arith.constant 0.0 : f32
+ scf.for %i = %c0 to %d0 step %c1 iter_args(%val = %init) -> f32 {
+ scf.for %j = %c0 to %d1 step %c1 iter_args(%inner_val = %val) -> f32 {
+ memref.store %inner_val, %A[%i, %j] : memref<?x?xf32>
+ %inner_val_next = arith.addf %inner_val, %c1_f32 : f32
+ scf.yield %inner_val_next : f32
+ }
+ %val_next = arith.addf %val, %c10_f32 : f32
+ scf.yield %val_next : f32
+ }
+
+ return %A : memref<?x?xf32>
+}
+
+func.func @entry() {
+ %c0 = arith.constant 0 : index
+ %c2 = arith.constant 2 : index
+ %c4 = arith.constant 4 : index
+
+ // Allocate enough memory to load a 32-bit tile plus a tiny bit more to test
+ // non-zero offsets while remaining inbounds.
----------------
banach-space wrote:
[nit] The following code does not allocate memory :) (it defines the size of the allocation and the allocation happens in the following block)
https://github.com/llvm/llvm-project/pull/71180
More information about the Mlir-commits
mailing list