[Mlir-commits] [mlir] [mlir][AMX] Memory effects to amx.tile_zero (PR #155403)

Arun Thangamani llvmlistbot at llvm.org
Fri Aug 29 03:15:52 PDT 2025


https://github.com/arun-thmn updated https://github.com/llvm/llvm-project/pull/155403

>From 3138ca83c677c3d515e00df92446bb97f7ff2c62 Mon Sep 17 00:00:00 2001
From: Arun Thangamani <arun.thangamani at intel.com>
Date: Tue, 26 Aug 2025 05:29:05 -0700
Subject: [PATCH 1/4] adding memeroy side-effects to amx.tile_zero

---
 mlir/include/mlir/Dialect/AMX/AMX.td | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/mlir/include/mlir/Dialect/AMX/AMX.td b/mlir/include/mlir/Dialect/AMX/AMX.td
index 6bbde43e2d011..91fe1bfb5cd35 100644
--- a/mlir/include/mlir/Dialect/AMX/AMX.td
+++ b/mlir/include/mlir/Dialect/AMX/AMX.td
@@ -142,8 +142,9 @@ class AMX_Op<string mnemonic, list<Trait> traits = []> :
 // Tile reset.
 //
 
-def TileZeroOp : AMX_Op<"tile_zero", [Pure,
-    AMXIntrinsicOpInterface
+def TileZeroOp : AMX_Op<"tile_zero", [
+    AMXIntrinsicOpInterface,
+    MemoryEffects<[MemWrite]>
   ]> {
   let summary = "tile zero operation";
   let description = [{

>From c1d42593852f56143a8a10ad34bd70b5e2557ce5 Mon Sep 17 00:00:00 2001
From: Arun Thangamani <arun.thangamani at intel.com>
Date: Fri, 29 Aug 2025 01:00:18 -0700
Subject: [PATCH 2/4] updating the description + adding a test-case

---
 mlir/include/mlir/Dialect/AMX/AMX.td      | 10 +++++---
 mlir/test/Dialect/AMX/memory-effects.mlir | 30 +++++++++++++++++++++++
 2 files changed, 37 insertions(+), 3 deletions(-)
 create mode 100644 mlir/test/Dialect/AMX/memory-effects.mlir

diff --git a/mlir/include/mlir/Dialect/AMX/AMX.td b/mlir/include/mlir/Dialect/AMX/AMX.td
index 91fe1bfb5cd35..a8f8a7f3d5b7f 100644
--- a/mlir/include/mlir/Dialect/AMX/AMX.td
+++ b/mlir/include/mlir/Dialect/AMX/AMX.td
@@ -151,6 +151,8 @@ def TileZeroOp : AMX_Op<"tile_zero", [
     Zeroes the destination tile, with the shape defined by the 2-dim
     vector type of the result. This is eventually lowered into the
     "tilezero" instruction with the corresponding tile configuration.
+    It includes memory effects and CSE doesn't eliminate multiple
+    "tilezero" instructions.
 
     Example:
 
@@ -180,15 +182,17 @@ def TileZeroOp : AMX_Op<"tile_zero", [
 // Tile memory operations.
 //
 
-def TileLoadOp : AMX_Op<"tile_load", [Pure,
-    AMXIntrinsicOpInterface
+def TileLoadOp : AMX_Op<"tile_load", [
+    AMXIntrinsicOpInterface,
+    MemoryEffects<[MemWrite]>
   ]> {
   let summary = "tile load operation";
   let description = [{
     Loads a tile from memory defined by a base and indices, with the
     shape defined by the 2-dim vector type of the result. This is
     eventually lowered into the "tileloadd" instruction with the
-    corresponding tile configuration.
+    corresponding tile configuration. It includes memory effects and 
+    CSE doesn't eliminate multiple "tileload" instructions.
 
     Example:
 
diff --git a/mlir/test/Dialect/AMX/memory-effects.mlir b/mlir/test/Dialect/AMX/memory-effects.mlir
new file mode 100644
index 0000000000000..0b9cdc477ad26
--- /dev/null
+++ b/mlir/test/Dialect/AMX/memory-effects.mlir
@@ -0,0 +1,30 @@
+// RUN: mlir-opt %s -cse -convert-vector-to-llvm="enable-amx" | mlir-opt | FileCheck %s
+
+// CHECK-LABEL: mem_effect(
+// CHECK: llvm.call_intrinsic "llvm.x86.tilezero.internal"
+// CHECK: llvm.call_intrinsic "llvm.x86.tilezero.internal"
+// CHECK: llvm.call_intrinsic "llvm.x86.tileloadd64.internal"
+// CHECK: llvm.call_intrinsic "llvm.x86.tileloadd64.internal"
+// CHECK: llvm.call_intrinsic "llvm.x86.tileloadd64.internal"
+// CHECK: llvm.call_intrinsic "llvm.x86.tileloadd64.internal"
+func.func @mem_effect(%arg0: memref<2x32x32xbf16>, %arg1: memref<2x16x32xbf16>) -> memref<16x32xf32> {
+  %c1 = arith.constant 1 : index
+  %c0 = arith.constant 0 : index
+  %c2 = arith.constant 2 : index
+  %c16 = arith.constant 16 : index
+  %alloca = memref.alloca() : memref<16x32xf32>
+  %0 = amx.tile_zero : !amx.tile<16x16xf32>
+  %1 = amx.tile_zero : !amx.tile<16x16xf32>
+  %2:2 = scf.for %arg2 = %c0 to %c2 step %c1 iter_args(%arg3 = %0, %arg4 = %1) -> (!amx.tile<16x16xf32>, !amx.tile<16x16xf32>) {
+    %3 = amx.tile_load %arg0[%arg2, %c0, %c0] : memref<2x32x32xbf16> into !amx.tile<16x32xbf16>
+    %4 = amx.tile_load %arg0[%arg2, %c16, %c0] : memref<2x32x32xbf16> into !amx.tile<16x32xbf16>
+    %5 = amx.tile_load %arg1[%arg2, %c0, %c0] : memref<2x16x32xbf16> into !amx.tile<16x32xbf16>
+    %6 = amx.tile_load %arg1[%arg2, %c0, %c0] : memref<2x16x32xbf16> into !amx.tile<16x32xbf16>
+    %7 = amx.tile_mulf %3, %5, %arg3 : !amx.tile<16x32xbf16>, !amx.tile<16x32xbf16>, !amx.tile<16x16xf32>
+    %8 = amx.tile_mulf %4, %6, %arg4 : !amx.tile<16x32xbf16>, !amx.tile<16x32xbf16>, !amx.tile<16x16xf32>
+    scf.yield %7, %8 : !amx.tile<16x16xf32>, !amx.tile<16x16xf32>
+  }
+  amx.tile_store %alloca[%c0, %c0], %2#0 : memref<16x32xf32>, !amx.tile<16x16xf32>
+  amx.tile_store %alloca[%c0, %c16], %2#1 : memref<16x32xf32>, !amx.tile<16x16xf32>
+  return %alloca : memref<16x32xf32>
+}

>From c2d068dca5be1660a2c297e0343666d721f72acf Mon Sep 17 00:00:00 2001
From: Arun Thangamani <arun.thangamani at intel.com>
Date: Fri, 29 Aug 2025 02:26:05 -0700
Subject: [PATCH 3/4] updating comments + name correction + description updates

---
 mlir/include/mlir/Dialect/AMX/AMX.td                      | 8 ++++----
 .../{AMX/memory-effects.mlir => side-effects.mlir}        | 6 ++++--
 2 files changed, 8 insertions(+), 6 deletions(-)
 rename mlir/test/Dialect/{AMX/memory-effects.mlir => side-effects.mlir} (87%)

diff --git a/mlir/include/mlir/Dialect/AMX/AMX.td b/mlir/include/mlir/Dialect/AMX/AMX.td
index a8f8a7f3d5b7f..1236fede4d88b 100644
--- a/mlir/include/mlir/Dialect/AMX/AMX.td
+++ b/mlir/include/mlir/Dialect/AMX/AMX.td
@@ -151,8 +151,8 @@ def TileZeroOp : AMX_Op<"tile_zero", [
     Zeroes the destination tile, with the shape defined by the 2-dim
     vector type of the result. This is eventually lowered into the
     "tilezero" instruction with the corresponding tile configuration.
-    It includes memory effects and CSE doesn't eliminate multiple
-    "tilezero" instructions.
+    With memory-effects, each "tilezero" operation serves as a compilation 
+    hint to use a separate tile register.
 
     Example:
 
@@ -191,8 +191,8 @@ def TileLoadOp : AMX_Op<"tile_load", [
     Loads a tile from memory defined by a base and indices, with the
     shape defined by the 2-dim vector type of the result. This is
     eventually lowered into the "tileloadd" instruction with the
-    corresponding tile configuration. It includes memory effects and 
-    CSE doesn't eliminate multiple "tileload" instructions.
+    corresponding tile configuration. With memory-effects, each "tileload" 
+    operation serves as a compilation hint to use a separate tile register.
 
     Example:
 
diff --git a/mlir/test/Dialect/AMX/memory-effects.mlir b/mlir/test/Dialect/side-effects.mlir
similarity index 87%
rename from mlir/test/Dialect/AMX/memory-effects.mlir
rename to mlir/test/Dialect/side-effects.mlir
index 0b9cdc477ad26..d3904e50f970a 100644
--- a/mlir/test/Dialect/AMX/memory-effects.mlir
+++ b/mlir/test/Dialect/side-effects.mlir
@@ -1,13 +1,15 @@
 // RUN: mlir-opt %s -cse -convert-vector-to-llvm="enable-amx" | mlir-opt | FileCheck %s
 
-// CHECK-LABEL: mem_effect(
+// With inclusion of memory side-effects, it is expected CSE not to fold multiple 
+// "tileload" and "tilezero".
+// CHECK-LABEL: do_not_fold_tiles(
 // CHECK: llvm.call_intrinsic "llvm.x86.tilezero.internal"
 // CHECK: llvm.call_intrinsic "llvm.x86.tilezero.internal"
 // CHECK: llvm.call_intrinsic "llvm.x86.tileloadd64.internal"
 // CHECK: llvm.call_intrinsic "llvm.x86.tileloadd64.internal"
 // CHECK: llvm.call_intrinsic "llvm.x86.tileloadd64.internal"
 // CHECK: llvm.call_intrinsic "llvm.x86.tileloadd64.internal"
-func.func @mem_effect(%arg0: memref<2x32x32xbf16>, %arg1: memref<2x16x32xbf16>) -> memref<16x32xf32> {
+func.func @do_not_fold_tiles(%arg0: memref<2x32x32xbf16>, %arg1: memref<2x16x32xbf16>) -> memref<16x32xf32> {
   %c1 = arith.constant 1 : index
   %c0 = arith.constant 0 : index
   %c2 = arith.constant 2 : index

>From fbf578e3158b5608a462b070967459f658146fff Mon Sep 17 00:00:00 2001
From: Arun Thangamani <arun.thangamani at intel.com>
Date: Fri, 29 Aug 2025 03:15:37 -0700
Subject: [PATCH 4/4] moved tests to AMX folder

---
 mlir/test/Dialect/{ => AMX}/side-effects.mlir | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
 rename mlir/test/Dialect/{ => AMX}/side-effects.mlir (98%)

diff --git a/mlir/test/Dialect/side-effects.mlir b/mlir/test/Dialect/AMX/side-effects.mlir
similarity index 98%
rename from mlir/test/Dialect/side-effects.mlir
rename to mlir/test/Dialect/AMX/side-effects.mlir
index d3904e50f970a..22c76d98c6996 100644
--- a/mlir/test/Dialect/side-effects.mlir
+++ b/mlir/test/Dialect/AMX/side-effects.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -cse -convert-vector-to-llvm="enable-amx" | mlir-opt | FileCheck %s
+// RUN: mlir-opt %s -cse -convert-vector-to-llvm="enable-amx" | FileCheck %s
 
 // With inclusion of memory side-effects, it is expected CSE not to fold multiple 
 // "tileload" and "tilezero".



More information about the Mlir-commits mailing list