[Mlir-commits] [mlir] [mlir][AMX] Memory effects to amx.tile_zero (PR #155403)
Arun Thangamani
llvmlistbot at llvm.org
Fri Aug 29 03:15:52 PDT 2025
https://github.com/arun-thmn updated https://github.com/llvm/llvm-project/pull/155403
>From 3138ca83c677c3d515e00df92446bb97f7ff2c62 Mon Sep 17 00:00:00 2001
From: Arun Thangamani <arun.thangamani at intel.com>
Date: Tue, 26 Aug 2025 05:29:05 -0700
Subject: [PATCH 1/4] adding memeroy side-effects to amx.tile_zero
---
mlir/include/mlir/Dialect/AMX/AMX.td | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/mlir/include/mlir/Dialect/AMX/AMX.td b/mlir/include/mlir/Dialect/AMX/AMX.td
index 6bbde43e2d011..91fe1bfb5cd35 100644
--- a/mlir/include/mlir/Dialect/AMX/AMX.td
+++ b/mlir/include/mlir/Dialect/AMX/AMX.td
@@ -142,8 +142,9 @@ class AMX_Op<string mnemonic, list<Trait> traits = []> :
// Tile reset.
//
-def TileZeroOp : AMX_Op<"tile_zero", [Pure,
- AMXIntrinsicOpInterface
+def TileZeroOp : AMX_Op<"tile_zero", [
+ AMXIntrinsicOpInterface,
+ MemoryEffects<[MemWrite]>
]> {
let summary = "tile zero operation";
let description = [{
>From c1d42593852f56143a8a10ad34bd70b5e2557ce5 Mon Sep 17 00:00:00 2001
From: Arun Thangamani <arun.thangamani at intel.com>
Date: Fri, 29 Aug 2025 01:00:18 -0700
Subject: [PATCH 2/4] updating the description + adding a test-case
---
mlir/include/mlir/Dialect/AMX/AMX.td | 10 +++++---
mlir/test/Dialect/AMX/memory-effects.mlir | 30 +++++++++++++++++++++++
2 files changed, 37 insertions(+), 3 deletions(-)
create mode 100644 mlir/test/Dialect/AMX/memory-effects.mlir
diff --git a/mlir/include/mlir/Dialect/AMX/AMX.td b/mlir/include/mlir/Dialect/AMX/AMX.td
index 91fe1bfb5cd35..a8f8a7f3d5b7f 100644
--- a/mlir/include/mlir/Dialect/AMX/AMX.td
+++ b/mlir/include/mlir/Dialect/AMX/AMX.td
@@ -151,6 +151,8 @@ def TileZeroOp : AMX_Op<"tile_zero", [
Zeroes the destination tile, with the shape defined by the 2-dim
vector type of the result. This is eventually lowered into the
"tilezero" instruction with the corresponding tile configuration.
+ It includes memory effects and CSE doesn't eliminate multiple
+ "tilezero" instructions.
Example:
@@ -180,15 +182,17 @@ def TileZeroOp : AMX_Op<"tile_zero", [
// Tile memory operations.
//
-def TileLoadOp : AMX_Op<"tile_load", [Pure,
- AMXIntrinsicOpInterface
+def TileLoadOp : AMX_Op<"tile_load", [
+ AMXIntrinsicOpInterface,
+ MemoryEffects<[MemWrite]>
]> {
let summary = "tile load operation";
let description = [{
Loads a tile from memory defined by a base and indices, with the
shape defined by the 2-dim vector type of the result. This is
eventually lowered into the "tileloadd" instruction with the
- corresponding tile configuration.
+ corresponding tile configuration. It includes memory effects and
+ CSE doesn't eliminate multiple "tileload" instructions.
Example:
diff --git a/mlir/test/Dialect/AMX/memory-effects.mlir b/mlir/test/Dialect/AMX/memory-effects.mlir
new file mode 100644
index 0000000000000..0b9cdc477ad26
--- /dev/null
+++ b/mlir/test/Dialect/AMX/memory-effects.mlir
@@ -0,0 +1,30 @@
+// RUN: mlir-opt %s -cse -convert-vector-to-llvm="enable-amx" | mlir-opt | FileCheck %s
+
+// CHECK-LABEL: mem_effect(
+// CHECK: llvm.call_intrinsic "llvm.x86.tilezero.internal"
+// CHECK: llvm.call_intrinsic "llvm.x86.tilezero.internal"
+// CHECK: llvm.call_intrinsic "llvm.x86.tileloadd64.internal"
+// CHECK: llvm.call_intrinsic "llvm.x86.tileloadd64.internal"
+// CHECK: llvm.call_intrinsic "llvm.x86.tileloadd64.internal"
+// CHECK: llvm.call_intrinsic "llvm.x86.tileloadd64.internal"
+func.func @mem_effect(%arg0: memref<2x32x32xbf16>, %arg1: memref<2x16x32xbf16>) -> memref<16x32xf32> {
+ %c1 = arith.constant 1 : index
+ %c0 = arith.constant 0 : index
+ %c2 = arith.constant 2 : index
+ %c16 = arith.constant 16 : index
+ %alloca = memref.alloca() : memref<16x32xf32>
+ %0 = amx.tile_zero : !amx.tile<16x16xf32>
+ %1 = amx.tile_zero : !amx.tile<16x16xf32>
+ %2:2 = scf.for %arg2 = %c0 to %c2 step %c1 iter_args(%arg3 = %0, %arg4 = %1) -> (!amx.tile<16x16xf32>, !amx.tile<16x16xf32>) {
+ %3 = amx.tile_load %arg0[%arg2, %c0, %c0] : memref<2x32x32xbf16> into !amx.tile<16x32xbf16>
+ %4 = amx.tile_load %arg0[%arg2, %c16, %c0] : memref<2x32x32xbf16> into !amx.tile<16x32xbf16>
+ %5 = amx.tile_load %arg1[%arg2, %c0, %c0] : memref<2x16x32xbf16> into !amx.tile<16x32xbf16>
+ %6 = amx.tile_load %arg1[%arg2, %c0, %c0] : memref<2x16x32xbf16> into !amx.tile<16x32xbf16>
+ %7 = amx.tile_mulf %3, %5, %arg3 : !amx.tile<16x32xbf16>, !amx.tile<16x32xbf16>, !amx.tile<16x16xf32>
+ %8 = amx.tile_mulf %4, %6, %arg4 : !amx.tile<16x32xbf16>, !amx.tile<16x32xbf16>, !amx.tile<16x16xf32>
+ scf.yield %7, %8 : !amx.tile<16x16xf32>, !amx.tile<16x16xf32>
+ }
+ amx.tile_store %alloca[%c0, %c0], %2#0 : memref<16x32xf32>, !amx.tile<16x16xf32>
+ amx.tile_store %alloca[%c0, %c16], %2#1 : memref<16x32xf32>, !amx.tile<16x16xf32>
+ return %alloca : memref<16x32xf32>
+}
>From c2d068dca5be1660a2c297e0343666d721f72acf Mon Sep 17 00:00:00 2001
From: Arun Thangamani <arun.thangamani at intel.com>
Date: Fri, 29 Aug 2025 02:26:05 -0700
Subject: [PATCH 3/4] updating comments + name correction + description updates
---
mlir/include/mlir/Dialect/AMX/AMX.td | 8 ++++----
.../{AMX/memory-effects.mlir => side-effects.mlir} | 6 ++++--
2 files changed, 8 insertions(+), 6 deletions(-)
rename mlir/test/Dialect/{AMX/memory-effects.mlir => side-effects.mlir} (87%)
diff --git a/mlir/include/mlir/Dialect/AMX/AMX.td b/mlir/include/mlir/Dialect/AMX/AMX.td
index a8f8a7f3d5b7f..1236fede4d88b 100644
--- a/mlir/include/mlir/Dialect/AMX/AMX.td
+++ b/mlir/include/mlir/Dialect/AMX/AMX.td
@@ -151,8 +151,8 @@ def TileZeroOp : AMX_Op<"tile_zero", [
Zeroes the destination tile, with the shape defined by the 2-dim
vector type of the result. This is eventually lowered into the
"tilezero" instruction with the corresponding tile configuration.
- It includes memory effects and CSE doesn't eliminate multiple
- "tilezero" instructions.
+ With memory-effects, each "tilezero" operation serves as a compilation
+ hint to use a separate tile register.
Example:
@@ -191,8 +191,8 @@ def TileLoadOp : AMX_Op<"tile_load", [
Loads a tile from memory defined by a base and indices, with the
shape defined by the 2-dim vector type of the result. This is
eventually lowered into the "tileloadd" instruction with the
- corresponding tile configuration. It includes memory effects and
- CSE doesn't eliminate multiple "tileload" instructions.
+ corresponding tile configuration. With memory-effects, each "tileload"
+ operation serves as a compilation hint to use a separate tile register.
Example:
diff --git a/mlir/test/Dialect/AMX/memory-effects.mlir b/mlir/test/Dialect/side-effects.mlir
similarity index 87%
rename from mlir/test/Dialect/AMX/memory-effects.mlir
rename to mlir/test/Dialect/side-effects.mlir
index 0b9cdc477ad26..d3904e50f970a 100644
--- a/mlir/test/Dialect/AMX/memory-effects.mlir
+++ b/mlir/test/Dialect/side-effects.mlir
@@ -1,13 +1,15 @@
// RUN: mlir-opt %s -cse -convert-vector-to-llvm="enable-amx" | mlir-opt | FileCheck %s
-// CHECK-LABEL: mem_effect(
+// With inclusion of memory side-effects, it is expected CSE not to fold multiple
+// "tileload" and "tilezero".
+// CHECK-LABEL: do_not_fold_tiles(
// CHECK: llvm.call_intrinsic "llvm.x86.tilezero.internal"
// CHECK: llvm.call_intrinsic "llvm.x86.tilezero.internal"
// CHECK: llvm.call_intrinsic "llvm.x86.tileloadd64.internal"
// CHECK: llvm.call_intrinsic "llvm.x86.tileloadd64.internal"
// CHECK: llvm.call_intrinsic "llvm.x86.tileloadd64.internal"
// CHECK: llvm.call_intrinsic "llvm.x86.tileloadd64.internal"
-func.func @mem_effect(%arg0: memref<2x32x32xbf16>, %arg1: memref<2x16x32xbf16>) -> memref<16x32xf32> {
+func.func @do_not_fold_tiles(%arg0: memref<2x32x32xbf16>, %arg1: memref<2x16x32xbf16>) -> memref<16x32xf32> {
%c1 = arith.constant 1 : index
%c0 = arith.constant 0 : index
%c2 = arith.constant 2 : index
>From fbf578e3158b5608a462b070967459f658146fff Mon Sep 17 00:00:00 2001
From: Arun Thangamani <arun.thangamani at intel.com>
Date: Fri, 29 Aug 2025 03:15:37 -0700
Subject: [PATCH 4/4] moved tests to AMX folder
---
mlir/test/Dialect/{ => AMX}/side-effects.mlir | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
rename mlir/test/Dialect/{ => AMX}/side-effects.mlir (98%)
diff --git a/mlir/test/Dialect/side-effects.mlir b/mlir/test/Dialect/AMX/side-effects.mlir
similarity index 98%
rename from mlir/test/Dialect/side-effects.mlir
rename to mlir/test/Dialect/AMX/side-effects.mlir
index d3904e50f970a..22c76d98c6996 100644
--- a/mlir/test/Dialect/side-effects.mlir
+++ b/mlir/test/Dialect/AMX/side-effects.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -cse -convert-vector-to-llvm="enable-amx" | mlir-opt | FileCheck %s
+// RUN: mlir-opt %s -cse -convert-vector-to-llvm="enable-amx" | FileCheck %s
// With inclusion of memory side-effects, it is expected CSE not to fold multiple
// "tileload" and "tilezero".
More information about the Mlir-commits
mailing list