[Mlir-commits] [mlir] [mlir][xegpu] Add definition of SliceAttr (PR #150146)
Chao Chen
llvmlistbot at llvm.org
Fri Jul 25 10:20:59 PDT 2025
https://github.com/chencha3 updated https://github.com/llvm/llvm-project/pull/150146
>From 2bc70b6a8487a8ce0f0e7e0c5ac5bc59035465ab Mon Sep 17 00:00:00 2001
From: Chao Chen <chao.chen at intel.com>
Date: Tue, 22 Jul 2025 19:46:04 +0000
Subject: [PATCH 01/15] add definition draft of SliceAttr
---
.../mlir/Dialect/XeGPU/IR/XeGPUAttrs.td | 21 +++++++++++++++++++
1 file changed, 21 insertions(+)
diff --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td
index 42b5b7a0d4e3f..abbd227b9905f 100644
--- a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td
+++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td
@@ -330,4 +330,25 @@ def XeGPU_LayoutAttr : XeGPUAttr<"Layout", "layout"> {
let genVerifyDecl = 1;
}
+
+def XeGPU_SliceAttr : XeGPUAttr<"Slice", "slice"> {
+ let summary = [{Describes the data distribution and sharing among subgroups or work-items.}];
+
+ let description = [{
+ Like LayoutAttr, SliceAttr describes data distribution among subgroups or work-items.
+ However, whereas LayoutAttr requires the data to have the same rank as the attribute,
+ SliceAttr permits the data to have a lower rank. In this case, compute units in the
+ specified dimensions share the data, provided that the remaining ranks match the data
+ rank. SliceAttr is commonly used by operations such as vector.multi_reduction and
+ vector.broadcast.
+ }];
+
+ let parameters = (ins
+ "Attribute": $parent,
+ "DenseI64ArrayAttr": $dims
+ );
+
+ let assemblyFormat = "`<` $parent `,` `dim` `=` $dims `>`";
+}
+
#endif // MLIR_DIALECT_XEGPU_IR_XEGPUATTRS_TD
>From 3959f9e5027f7c21f420c44a5e34501c115df361 Mon Sep 17 00:00:00 2001
From: Chao Chen <chao.chen at intel.com>
Date: Tue, 22 Jul 2025 21:02:22 +0000
Subject: [PATCH 02/15] add layout traits
---
mlir/include/mlir/Dialect/XeGPU/IR/CMakeLists.txt | 6 ++++++
mlir/include/mlir/Dialect/XeGPU/IR/XeGPU.h | 1 +
mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td | 11 +++++++++--
mlir/lib/Dialect/XeGPU/IR/CMakeLists.txt | 1 +
mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp | 1 +
5 files changed, 18 insertions(+), 2 deletions(-)
diff --git a/mlir/include/mlir/Dialect/XeGPU/IR/CMakeLists.txt b/mlir/include/mlir/Dialect/XeGPU/IR/CMakeLists.txt
index 3f8cac4dc07c3..bbbeb71410a9b 100644
--- a/mlir/include/mlir/Dialect/XeGPU/IR/CMakeLists.txt
+++ b/mlir/include/mlir/Dialect/XeGPU/IR/CMakeLists.txt
@@ -12,3 +12,9 @@ mlir_tablegen(XeGPUEnums.h.inc -gen-enum-decls)
mlir_tablegen(XeGPUEnums.cpp.inc -gen-enum-defs)
add_public_tablegen_target(MLIRXeGPUEnumsIncGen)
add_dependencies(mlir-headers MLIRXeGPUEnumsIncGen)
+
+set(LLVM_TARGET_DEFINITIONS XeGPUAttrs.td)
+mlir_tablegen(XeGPUAttrInterface.h.inc -gen-attr-interface-decls)
+mlir_tablegen(XeGPUAttrInterface.cpp.inc -gen-attr-interface-defs)
+add_public_tablegen_target(MLIRXeGPUAttrInterfaceIncGen)
+add_dependencies(mlir-headers MLIRXeGPUAttrInterfaceIncGen)
\ No newline at end of file
diff --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPU.h b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPU.h
index 8e2784f40ad39..cc8d58d8975b4 100644
--- a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPU.h
+++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPU.h
@@ -25,6 +25,7 @@ class TensorDescType;
} // namespace xegpu
} // namespace mlir
+#include <mlir/Dialect/XeGPU/IR/XeGPUAttrInterface.h.inc>
#include <mlir/Dialect/XeGPU/IR/XeGPUEnums.h.inc>
#define GET_ATTRDEF_CLASSES
#include <mlir/Dialect/XeGPU/IR/XeGPUAttrs.h.inc>
diff --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td
index abbd227b9905f..b15dd4a3177f9 100644
--- a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td
+++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td
@@ -169,7 +169,14 @@ def XeGPU_FenceScopeAttr:
let assemblyFormat = "$value";
}
-def XeGPU_LayoutAttr : XeGPUAttr<"Layout", "layout"> {
+def LayoutTrait: AttrInterface<"LayoutTrait"> {
+ let cppNamespace = "::mlir::xegpu";
+ let description = [{
+ Common trait for all XeGPU layouts.
+ }];
+}
+
+def XeGPU_LayoutAttr : XeGPUAttr<"Layout", "layout", [LayoutTrait]> {
let summary = [{
Describes the data distribution to subgroups and work-items for a tensor
specified by the tensor descriptor.
@@ -331,7 +338,7 @@ def XeGPU_LayoutAttr : XeGPUAttr<"Layout", "layout"> {
}
-def XeGPU_SliceAttr : XeGPUAttr<"Slice", "slice"> {
+def XeGPU_SliceAttr : XeGPUAttr<"Slice", "slice", [LayoutTrait]> {
let summary = [{Describes the data distribution and sharing among subgroups or work-items.}];
let description = [{
diff --git a/mlir/lib/Dialect/XeGPU/IR/CMakeLists.txt b/mlir/lib/Dialect/XeGPU/IR/CMakeLists.txt
index 242a97ccfdf6d..89d986143e965 100644
--- a/mlir/lib/Dialect/XeGPU/IR/CMakeLists.txt
+++ b/mlir/lib/Dialect/XeGPU/IR/CMakeLists.txt
@@ -7,6 +7,7 @@ add_mlir_dialect_library(MLIRXeGPUDialect
DEPENDS
MLIRXeGPUIncGen
+ MLIRXeGPUAttrInterfaceIncGen
MLIRXeGPUAttrsIncGen
MLIRXeGPUEnumsIncGen
diff --git a/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp b/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp
index 78cbf884a1911..63160c98105c3 100644
--- a/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp
+++ b/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp
@@ -753,6 +753,7 @@ LogicalResult ConvertLayoutOp::verify() {
} // namespace xegpu
} // namespace mlir
+#include <mlir/Dialect/XeGPU/IR/XeGPUAttrInterface.cpp.inc>
#include <mlir/Dialect/XeGPU/IR/XeGPUEnums.cpp.inc>
#define GET_OP_CLASSES
#include <mlir/Dialect/XeGPU/IR/XeGPU.cpp.inc>
>From 2027cfc98321d8f68a713340cd652ab10625cfee Mon Sep 17 00:00:00 2001
From: Chao Chen <chao.chen at intel.com>
Date: Tue, 22 Jul 2025 23:46:10 +0000
Subject: [PATCH 03/15] add verifier and interface
---
.../mlir/Dialect/XeGPU/IR/XeGPUAttrs.td | 54 ++++++++++++++++++-
mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp | 21 ++++++++
2 files changed, 74 insertions(+), 1 deletion(-)
diff --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td
index b15dd4a3177f9..e3b06714bdcc2 100644
--- a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td
+++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td
@@ -174,6 +174,17 @@ def LayoutTrait: AttrInterface<"LayoutTrait"> {
let description = [{
Common trait for all XeGPU layouts.
}];
+
+ let methods = [
+ InterfaceMethod<"Get the effective sg layout",
+ "std::optional<llvm::SmallVector<int>>",
+ "getEffectiveSgLayout">,
+ InterfaceMethod<"Get the effective sg data",
+ "std::optional<llvm::SmallVector<int>>",
+ "getEffectiveSgData">,
+ ];
+
+
}
def XeGPU_LayoutAttr : XeGPUAttr<"Layout", "layout", [LayoutTrait]> {
@@ -331,6 +342,18 @@ def XeGPU_LayoutAttr : XeGPUAttr<"Layout", "layout", [LayoutTrait]> {
return LayoutAttr::get(getContext(), getSgLayout(), getSgData(), nullptr,
getLaneLayout(), getLaneData(), getOrder());
}
+
+ std::optional<llvm::SmallVector<int32_t>> getEffectiveSgLayout() const {
+ if (DenseI32ArrayAttr layout = getSgLayout())
+ return llvm::to_vector(layout.asArrayRef());
+ return std::nullopt;
+ }
+
+ std::optional<llvm::SmallVector<int32_t>> getEffectiveSgData() const {
+ if (DenseI32ArrayAttr data = getSgData())
+ return llvm::to_vector(data.asArrayRef());
+ return std::nullopt;
+ }
}];
let assemblyFormat = "`<` struct(params) `>`";
@@ -351,11 +374,40 @@ def XeGPU_SliceAttr : XeGPUAttr<"Slice", "slice", [LayoutTrait]> {
}];
let parameters = (ins
- "Attribute": $parent,
+ "xegpu::LayoutAttr": $parent,
"DenseI64ArrayAttr": $dims
);
+ let extraClassDeclaration = [{
+ std::optional<llvm::SmallVector<int32_t>> getEffectiveSgLayout() const {
+ if (DenseI32ArrayAttr layout = getParent().getSgLayout()) {
+ llvm::ArrayRef<int64_t> dims = getDims().asArrayRef();
+ llvm::SmallVector<int32_t> result;
+ for (auto [i, v]: llvm::enumerate(layout.asArrayRef())) {
+ if (!llvm::is_contained(dims, i))
+ result.push_back(v);
+ }
+ return result;
+ }
+ return std::nullopt;
+ }
+ std::optional<llvm::SmallVector<int32_t>> getEffectiveSgData() const {
+ if (DenseI32ArrayAttr data = getParent().getSgData()) {
+ llvm::ArrayRef<int64_t> dims = getDims().asArrayRef();
+ llvm::SmallVector<int32_t> result;
+ for (auto [i, v]: llvm::enumerate(data.asArrayRef())) {
+ if (!llvm::is_contained(dims, i))
+ result.push_back(v);
+ }
+ return result;
+ }
+ return std::nullopt;
+
+ }
+ }];
+
let assemblyFormat = "`<` $parent `,` `dim` `=` $dims `>`";
+ let genVerifyDecl = 1;
}
#endif // MLIR_DIALECT_XEGPU_IR_XEGPUATTRS_TD
diff --git a/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp b/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp
index 642c393cbc2c8..7e293b6f0e1a3 100644
--- a/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp
+++ b/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp
@@ -206,6 +206,27 @@ LayoutAttr::verify(llvm::function_ref<mlir::InFlightDiagnostic()> emitError,
return success();
}
+//===----------------------------------------------------------------------===//
+// XeGPU_SliceAttr
+//===----------------------------------------------------------------------===//
+LogicalResult
+SliceAttr::verify(llvm::function_ref<mlir::InFlightDiagnostic()> emitError,
+ xegpu::LayoutAttr parent, DenseI64ArrayAttr dims) {
+ if (!parent || !dims)
+ return emitError() << "expected parent layout and dims attribute";
+
+ int rank = parent.getRank();
+ // check every element in dims is unique and smaller than rank
+ llvm::SmallDenseSet<int64_t> seen;
+ for (int64_t dim : dims.asArrayRef()) {
+ if (dim >= rank)
+ return emitError() << "invalid dim: " << dim;
+ if (!seen.insert(dim).second)
+ return emitError() << "repeated dim: " << dim;
+ }
+ return success();
+}
+
//===----------------------------------------------------------------------===//
// XeGPU_TensorDescType
//===----------------------------------------------------------------------===//
>From 638c0853dc2b76fbc01d8410cd6bb52aa7d20891 Mon Sep 17 00:00:00 2001
From: Chao Chen <chao.chen at intel.com>
Date: Wed, 23 Jul 2025 15:52:26 +0000
Subject: [PATCH 04/15] add invalid unit test
---
.../mlir/Dialect/XeGPU/IR/XeGPUAttrs.td | 2 +-
mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp | 4 ++--
mlir/test/Dialect/XeGPU/invalid.mlir | 19 +++++++++++++++++++
3 files changed, 22 insertions(+), 3 deletions(-)
diff --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td
index e3b06714bdcc2..d0b2e936d6508 100644
--- a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td
+++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td
@@ -406,7 +406,7 @@ def XeGPU_SliceAttr : XeGPUAttr<"Slice", "slice", [LayoutTrait]> {
}
}];
- let assemblyFormat = "`<` $parent `,` `dim` `=` $dims `>`";
+ let assemblyFormat = "`<` $parent `,` `dims` `=` $dims `>`";
let genVerifyDecl = 1;
}
diff --git a/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp b/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp
index 7e293b6f0e1a3..21007f98643bc 100644
--- a/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp
+++ b/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp
@@ -220,9 +220,9 @@ SliceAttr::verify(llvm::function_ref<mlir::InFlightDiagnostic()> emitError,
llvm::SmallDenseSet<int64_t> seen;
for (int64_t dim : dims.asArrayRef()) {
if (dim >= rank)
- return emitError() << "invalid dim: " << dim;
+ return emitError() << "invalid dim (" << dim << ") in slice attribute.";
if (!seen.insert(dim).second)
- return emitError() << "repeated dim: " << dim;
+ return emitError() << "repeated dim (" << dim << ") in slice attribute.";
}
return success();
}
diff --git a/mlir/test/Dialect/XeGPU/invalid.mlir b/mlir/test/Dialect/XeGPU/invalid.mlir
index eb564d55bfd51..c4e72820e9aec 100644
--- a/mlir/test/Dialect/XeGPU/invalid.mlir
+++ b/mlir/test/Dialect/XeGPU/invalid.mlir
@@ -658,3 +658,22 @@ func.func @tensor_desc_invalid_sg_data(%src: ui64, %offsets: vector<16xindex>) {
#xegpu.layout<lane_layout = [8, 1], lane_data = [1, 2], order = [0, 1, 2]>>
return
}
+
+// -----
+#l = #xegpu.layout<sg_layout = [16, 1, 1], sg_data = [1, 8, 2]>
+// expected-error at +1 {{repeated dim (2) in slice attribute}}
+#s = #xegpu.slice<#l, dims = [2, 2]>
+func.func @slice_attr_repeat_dim() {
+ %offsets = arith.constant {layout_result_0 = #s} dense<0.8> : vector<16x8xindex>
+ return
+}
+
+// -----
+#l = #xegpu.layout<sg_layout = [16, 1, 1], sg_data = [1, 8, 2]>
+// expected-error at +1 {{invalid dim (3) in slice attribute}}
+#s = #xegpu.slice<#l, dims = [3]>
+func.func @slice_attr_repeat_dim() {
+ %offsets = arith.constant {layout_result_0 = #s} dense<0.8> : vector<16x8xindex>
+ return
+}
+
>From 91048f06417bd8af3d58d35a516115da044e6451 Mon Sep 17 00:00:00 2001
From: Chao Chen <chao.chen at intel.com>
Date: Wed, 23 Jul 2025 16:06:59 +0000
Subject: [PATCH 05/15] add wrappers
---
mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td | 13 +++++++++++--
1 file changed, 11 insertions(+), 2 deletions(-)
diff --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td
index d0b2e936d6508..a38878bc6a61f 100644
--- a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td
+++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td
@@ -183,8 +183,6 @@ def LayoutTrait: AttrInterface<"LayoutTrait"> {
"std::optional<llvm::SmallVector<int>>",
"getEffectiveSgData">,
];
-
-
}
def XeGPU_LayoutAttr : XeGPUAttr<"Layout", "layout", [LayoutTrait]> {
@@ -402,7 +400,18 @@ def XeGPU_SliceAttr : XeGPUAttr<"Slice", "slice", [LayoutTrait]> {
return result;
}
return std::nullopt;
+ }
+
+ DenseI32ArrayAttr getOrder() const {
+ return getParent().getOrder();
+ }
+
+ bool isWgLayout() const {
+ return getParent().isWgLayout();
+ }
+ bool isSgLayout() const {
+ return getParent().isSgLayout();
}
}];
>From ddc42c2886ae3c49f10032caea27817dc6d542de Mon Sep 17 00:00:00 2001
From: Chao Chen <chao.chen at intel.com>
Date: Wed, 23 Jul 2025 17:51:42 +0000
Subject: [PATCH 06/15] update description
---
mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td | 12 +++++++++++-
1 file changed, 11 insertions(+), 1 deletion(-)
diff --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td
index 78a7c48af837e..8644be8e4204c 100644
--- a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td
+++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td
@@ -187,7 +187,7 @@ def LayoutTrait: AttrInterface<"LayoutTrait"> {
"getEffectiveSgLayout">,
InterfaceMethod<"Get the effective sg data",
"std::optional<llvm::SmallVector<int>>",
- "getEffectiveSgData">,
+ "getEffectiveSgData">
];
}
@@ -375,6 +375,16 @@ def XeGPU_SliceAttr : XeGPUAttr<"Slice", "slice", [LayoutTrait]> {
specified dimensions share the data, provided that the remaining ranks match the data
rank. SliceAttr is commonly used by operations such as vector.multi_reduction and
vector.broadcast.
+
+ Example:
+ ```
+ #l = #xegpu.layout<sg_layout = [8, 4], sg_data = [32, 32]>
+ #r = #xegpu.slice<#l, dim = 0>
+
+ %exp = math.exp %input {layout_result_0 = #l}: vector<256x128xf32>
+ %red = vector.multi_reduction<add>, %exp, %acc [0] {layout_result_0 = #r}: vector<256x128xf32> to vector<128xf32>
+ %bcast = vector.broadcast %red {layout_result_0 = #l} : vector<128xf32> to vector<256x128xf32>
+ ```
}];
let parameters = (ins
>From 36e2c3a118b0167c6e4f3341533f92353ddaebe2 Mon Sep 17 00:00:00 2001
From: Chao Chen <chao.chen at intel.com>
Date: Wed, 23 Jul 2025 18:44:08 +0000
Subject: [PATCH 07/15] refactor
---
mlir/include/mlir/Dialect/XeGPU/IR/XeGPU.h | 6 +++---
mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td | 15 +++------------
.../include/mlir/Dialect/XeGPU/IR/XeGPUDialect.td | 12 ++++++++++++
3 files changed, 18 insertions(+), 15 deletions(-)
diff --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPU.h b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPU.h
index cc8d58d8975b4..c2d546fa08fe0 100644
--- a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPU.h
+++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPU.h
@@ -22,18 +22,18 @@
namespace mlir {
namespace xegpu {
class TensorDescType;
+class LayoutAttr;
} // namespace xegpu
} // namespace mlir
+#include <mlir/Dialect/XeGPU/IR/XeGPUDialect.h.inc>
#include <mlir/Dialect/XeGPU/IR/XeGPUAttrInterface.h.inc>
#include <mlir/Dialect/XeGPU/IR/XeGPUEnums.h.inc>
+
#define GET_ATTRDEF_CLASSES
#include <mlir/Dialect/XeGPU/IR/XeGPUAttrs.h.inc>
#define GET_TYPEDEF_CLASSES
#include <mlir/Dialect/XeGPU/IR/XeGPUTypes.h.inc>
-
-#include <mlir/Dialect/XeGPU/IR/XeGPUDialect.h.inc>
-
#define GET_OP_CLASSES
#include <mlir/Dialect/XeGPU/IR/XeGPU.h.inc>
diff --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td
index 8644be8e4204c..36a12a2c2a029 100644
--- a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td
+++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td
@@ -396,24 +396,15 @@ def XeGPU_SliceAttr : XeGPUAttr<"Slice", "slice", [LayoutTrait]> {
std::optional<llvm::SmallVector<int32_t>> getEffectiveSgLayout() const {
if (DenseI32ArrayAttr layout = getParent().getSgLayout()) {
llvm::ArrayRef<int64_t> dims = getDims().asArrayRef();
- llvm::SmallVector<int32_t> result;
- for (auto [i, v]: llvm::enumerate(layout.asArrayRef())) {
- if (!llvm::is_contained(dims, i))
- result.push_back(v);
- }
- return result;
+ return XeGPUDialect::dropDims(layout.asArrayRef(), dims);
}
return std::nullopt;
}
+
std::optional<llvm::SmallVector<int32_t>> getEffectiveSgData() const {
if (DenseI32ArrayAttr data = getParent().getSgData()) {
llvm::ArrayRef<int64_t> dims = getDims().asArrayRef();
- llvm::SmallVector<int32_t> result;
- for (auto [i, v]: llvm::enumerate(data.asArrayRef())) {
- if (!llvm::is_contained(dims, i))
- result.push_back(v);
- }
- return result;
+ return XeGPUDialect::dropDims(data.asArrayRef(), dims);
}
return std::nullopt;
}
diff --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUDialect.td b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUDialect.td
index 549018b61d6fb..f07a758a59b96 100644
--- a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUDialect.td
+++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUDialect.td
@@ -41,6 +41,18 @@ def XeGPU_Dialect : Dialect {
/// Checks if the given shape can be evenly distributed based on the layout
/// and data factors provided by the LayoutAttr.
static bool isEvenlyDistributable(llvm::ArrayRef<int64_t> shape, xegpu::LayoutAttr attr);
+
+ /// drops the data in the specified dimension, and return the rest. e.g.,
+ /// for data = [32, 64, 8], dropPositions = [0, 2], it will return [64]
+ template<typename T, typename U>
+ static llvm::SmallVector<T> dropDims(llvm::ArrayRef<T> data, llvm::ArrayRef<U> dropPositions) {
+ llvm::SmallVector<T> result;
+ for (auto [i, v]: llvm::enumerate(data)) {
+ if (!llvm::is_contained(dropPositions, i))
+ result.push_back(v);
+ }
+ return result;
+ }
}];
}
>From 6872e6dbda83d21d960ffb2c5156e89b1381fdfd Mon Sep 17 00:00:00 2001
From: Chao Chen <chao.chen at intel.com>
Date: Wed, 23 Jul 2025 20:26:39 +0000
Subject: [PATCH 08/15] add delinearizeSubgroupId interface
---
mlir/include/mlir/Dialect/XeGPU/IR/XeGPU.h | 1 +
.../mlir/Dialect/XeGPU/IR/XeGPUAttrs.td | 13 ++++++++++++-
mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp | 19 +++++++++++++++++++
mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp | 2 ++
.../Transforms/XeGPUWgToSgDistribute.cpp | 2 +-
5 files changed, 35 insertions(+), 2 deletions(-)
diff --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPU.h b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPU.h
index c2d546fa08fe0..57919966a90b2 100644
--- a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPU.h
+++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPU.h
@@ -15,6 +15,7 @@
#include "mlir/IR/BuiltinTypes.h"
#include "mlir/IR/Dialect.h"
#include "mlir/IR/TypeUtilities.h"
+#include "mlir/IR/Value.h"
#include "mlir/Interfaces/ShapedOpInterfaces.h"
#include "mlir/Interfaces/SideEffectInterfaces.h"
#include "mlir/Interfaces/ViewLikeInterface.h"
diff --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td
index 36a12a2c2a029..96466550cb703 100644
--- a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td
+++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td
@@ -187,7 +187,11 @@ def LayoutTrait: AttrInterface<"LayoutTrait"> {
"getEffectiveSgLayout">,
InterfaceMethod<"Get the effective sg data",
"std::optional<llvm::SmallVector<int>>",
- "getEffectiveSgData">
+ "getEffectiveSgData">,
+ InterfaceMethod<"Delinearize the Subgroup Id",
+ "FailureOr<SmallVector<Value>>",
+ "delinearizeSubgroupId",
+ (ins "Value":$linearId, "Location":$loc, "OpBuilder &": $builder)>
];
}
@@ -358,6 +362,10 @@ def XeGPU_LayoutAttr : XeGPUAttr<"Layout", "layout", [LayoutTrait]> {
return llvm::to_vector(data.asArrayRef());
return std::nullopt;
}
+
+ FailureOr<SmallVector<Value>>
+ delinearizeSubgroupId(Value linearId, Location loc, OpBuilder &builder);
+
}];
let assemblyFormat = "`<` struct(params) `>`";
@@ -409,6 +417,9 @@ def XeGPU_SliceAttr : XeGPUAttr<"Slice", "slice", [LayoutTrait]> {
return std::nullopt;
}
+ FailureOr<llvm::SmallVector<Value>>
+ delinearizeSubgroupId(Value linearId, Location loc, OpBuilder &builder);
+
DenseI32ArrayAttr getOrder() const {
return getParent().getOrder();
}
diff --git a/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp b/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp
index 836478a807761..974e42140e54e 100644
--- a/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp
+++ b/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp
@@ -6,6 +6,7 @@
//
//===----------------------------------------------------------------------===//
+#include "mlir/Dialect/Affine/Utils.h"
#include "mlir/Dialect/Utils/IndexingUtils.h"
#include "mlir/Dialect/XeGPU/IR/XeGPU.h"
#include "mlir/Dialect/XeGPU/IR/XeGPUTargetInfo.h"
@@ -211,6 +212,18 @@ LayoutAttr::verify(llvm::function_ref<mlir::InFlightDiagnostic()> emitError,
return success();
}
+FailureOr<SmallVector<Value>>
+LayoutAttr::delinearizeSubgroupId(Value linearId, Location loc,
+ OpBuilder &builder) {
+ assert(isWgLayout() && "delinearizeSubgroupId is only available for "
+ "workgroup-level layout attribute.");
+ auto dims =
+ llvm::map_to_vector(getSgLayout().asArrayRef(), [&](int32_t d) -> Value {
+ return arith::ConstantIndexOp::create(builder, loc, d);
+ });
+ return affine::delinearizeIndex(builder, loc, linearId, dims);
+}
+
//===----------------------------------------------------------------------===//
// XeGPU_SliceAttr
//===----------------------------------------------------------------------===//
@@ -232,6 +245,12 @@ SliceAttr::verify(llvm::function_ref<mlir::InFlightDiagnostic()> emitError,
return success();
}
+FailureOr<SmallVector<Value>>
+SliceAttr::delinearizeSubgroupId(Value linearId, Location loc,
+ OpBuilder &builder) {
+ return getParent().delinearizeSubgroupId(linearId, loc, builder);
+}
+
//===----------------------------------------------------------------------===//
// XeGPU_TensorDescType
//===----------------------------------------------------------------------===//
diff --git a/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp b/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp
index edc18025136ac..a7013ed470cab 100644
--- a/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp
+++ b/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp
@@ -838,7 +838,9 @@ void ConvertLayoutOp::getCanonicalizationPatterns(RewritePatternSet &patterns,
} // namespace xegpu
} // namespace mlir
+namespace mlir {
#include <mlir/Dialect/XeGPU/IR/XeGPUAttrInterface.cpp.inc>
+} // namespace mlir
#include <mlir/Dialect/XeGPU/IR/XeGPUEnums.cpp.inc>
#define GET_OP_CLASSES
#include <mlir/Dialect/XeGPU/IR/XeGPU.cpp.inc>
diff --git a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUWgToSgDistribute.cpp b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUWgToSgDistribute.cpp
index ef52323a9f46b..2168d43eb701b 100644
--- a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUWgToSgDistribute.cpp
+++ b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUWgToSgDistribute.cpp
@@ -175,7 +175,7 @@ struct WgToSgCreateNdOp : public OpConversionPattern<xegpu::CreateNdDescOp> {
}
auto deLinearizeSgId =
- affine::delinearizeIndex(rewriter, loc, linearSgId, sgLayoutDim);
+ layout.delinearizeSubgroupId(linearSgId, loc, rewriter);
if (failed(deLinearizeSgId))
return failure();
SmallVector<Value> sgIds = *deLinearizeSgId;
>From 223fab912e9987e7a7ed7440fb6fd42b2d0a4dd8 Mon Sep 17 00:00:00 2001
From: Chao Chen <chao.chen at intel.com>
Date: Wed, 23 Jul 2025 21:05:46 +0000
Subject: [PATCH 09/15] fix format
---
mlir/include/mlir/Dialect/XeGPU/IR/XeGPU.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPU.h b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPU.h
index 57919966a90b2..eb74b8142688f 100644
--- a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPU.h
+++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPU.h
@@ -27,8 +27,8 @@ class LayoutAttr;
} // namespace xegpu
} // namespace mlir
-#include <mlir/Dialect/XeGPU/IR/XeGPUDialect.h.inc>
#include <mlir/Dialect/XeGPU/IR/XeGPUAttrInterface.h.inc>
+#include <mlir/Dialect/XeGPU/IR/XeGPUDialect.h.inc>
#include <mlir/Dialect/XeGPU/IR/XeGPUEnums.h.inc>
#define GET_ATTRDEF_CLASSES
>From 60e20a02b991a4276f74937ea69c483d780d2e49 Mon Sep 17 00:00:00 2001
From: Chao Chen <chao.chen at intel.com>
Date: Thu, 24 Jul 2025 23:33:27 +0000
Subject: [PATCH 10/15] add impl of getOffsets for LayoutAttr
---
.../mlir/Dialect/XeGPU/IR/XeGPUAttrs.td | 65 +++++++++------
mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp | 80 +++++++++++++++++--
.../Transforms/XeGPUWgToSgDistribute.cpp | 2 +-
3 files changed, 113 insertions(+), 34 deletions(-)
diff --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td
index 94a294fdc5705..5794f786dc9b9 100644
--- a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td
+++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td
@@ -183,15 +183,20 @@ def LayoutTrait: AttrInterface<"LayoutTrait"> {
let methods = [
InterfaceMethod<"Get the effective sg layout",
- "std::optional<llvm::SmallVector<int>>",
+ "std::optional<SmallVector<int64_t>>",
"getEffectiveSgLayout">,
InterfaceMethod<"Get the effective sg data",
- "std::optional<llvm::SmallVector<int>>",
+ "std::optional<SmallVector<int64_t>>",
"getEffectiveSgData">,
InterfaceMethod<"Delinearize the Subgroup Id",
"FailureOr<SmallVector<Value>>",
"delinearizeSubgroupId",
- (ins "Value":$linearId, "Location":$loc, "OpBuilder &": $builder)>
+ (ins "OpBuilder &": $builder, "Location":$loc, "Value":$linearId)>,
+
+ InterfaceMethod<"Get the local offset to be accessed by the given subgroup Id",
+ "FailureOr<SmallVector<SmallVector<Value>>>",
+ "getOffsets",
+ (ins "OpBuilder &": $builder, "Location":$loc, "Value":$linearId, "ArrayRef<int64_t>":$shape)>
];
}
@@ -351,20 +356,23 @@ def XeGPU_LayoutAttr : XeGPUAttr<"Layout", "layout", [LayoutTrait]> {
getLaneLayout(), getLaneData(), getOrder());
}
- std::optional<llvm::SmallVector<int32_t>> getEffectiveSgLayout() const {
+ std::optional<SmallVector<int64_t>> getEffectiveSgLayout() const {
if (DenseI32ArrayAttr layout = getSgLayout())
- return llvm::to_vector(layout.asArrayRef());
+ return llvm::to_vector_of<int64_t>(layout.asArrayRef());
return std::nullopt;
}
- std::optional<llvm::SmallVector<int32_t>> getEffectiveSgData() const {
+ std::optional<SmallVector<int64_t>> getEffectiveSgData() const {
if (DenseI32ArrayAttr data = getSgData())
- return llvm::to_vector(data.asArrayRef());
+ return llvm::to_vector_of<int64_t>(data.asArrayRef());
return std::nullopt;
}
FailureOr<SmallVector<Value>>
- delinearizeSubgroupId(Value linearId, Location loc, OpBuilder &builder);
+ delinearizeSubgroupId(OpBuilder &builder, Location loc, Value linearId);
+
+ FailureOr<SmallVector<SmallVector<Value>>>
+ getOffsets(OpBuilder &builder, Location loc, Value linearId, ArrayRef<int64_t> shape);
}];
@@ -401,24 +409,6 @@ def XeGPU_SliceAttr : XeGPUAttr<"Slice", "slice", [LayoutTrait]> {
);
let extraClassDeclaration = [{
- std::optional<llvm::SmallVector<int32_t>> getEffectiveSgLayout() const {
- if (DenseI32ArrayAttr layout = getParent().getSgLayout()) {
- llvm::ArrayRef<int64_t> dims = getDims().asArrayRef();
- return XeGPUDialect::dropDims(layout.asArrayRef(), dims);
- }
- return std::nullopt;
- }
-
- std::optional<llvm::SmallVector<int32_t>> getEffectiveSgData() const {
- if (DenseI32ArrayAttr data = getParent().getSgData()) {
- llvm::ArrayRef<int64_t> dims = getDims().asArrayRef();
- return XeGPUDialect::dropDims(data.asArrayRef(), dims);
- }
- return std::nullopt;
- }
-
- FailureOr<llvm::SmallVector<Value>>
- delinearizeSubgroupId(Value linearId, Location loc, OpBuilder &builder);
DenseI32ArrayAttr getOrder() const {
return getParent().getOrder();
@@ -431,6 +421,29 @@ def XeGPU_SliceAttr : XeGPUAttr<"Slice", "slice", [LayoutTrait]> {
bool isSgLayout() const {
return getParent().isSgLayout();
}
+
+ std::optional<SmallVector<int64_t>> getEffectiveSgLayout() const {
+ if (auto layout = getParent().getEffectiveSgLayout()) {
+ ArrayRef<int64_t> dims = getDims().asArrayRef();
+ return XeGPUDialect::dropDims(llvm::ArrayRef<int64_t>(*layout), dims);
+ }
+ return std::nullopt;
+ }
+
+ std::optional<SmallVector<int64_t>> getEffectiveSgData() const {
+ if (auto data = getParent().getEffectiveSgData()) {
+ ArrayRef<int64_t> dims = getDims().asArrayRef();
+ return XeGPUDialect::dropDims(llvm::ArrayRef<int64_t>(*data), dims);
+ }
+ return std::nullopt;
+ }
+
+ FailureOr<SmallVector<Value>>
+ delinearizeSubgroupId(OpBuilder &builder, Location loc, Value linearId);
+
+ FailureOr<SmallVector<SmallVector<Value>>>
+ getOffsets(OpBuilder &builder, Location loc, Value linearId, ArrayRef<int64_t> shape);
+
}];
let assemblyFormat = "`<` $parent `,` `dims` `=` $dims `>`";
diff --git a/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp b/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp
index 91d7b2a137efd..682f0620dbcfb 100644
--- a/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp
+++ b/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp
@@ -7,6 +7,8 @@
//===----------------------------------------------------------------------===//
#include "mlir/Dialect/Affine/Utils.h"
+#include "mlir/Dialect/Arith/Utils/Utils.h"
+#include "mlir/Dialect/Index/IR/IndexOps.h"
#include "mlir/Dialect/Utils/IndexingUtils.h"
#include "mlir/Dialect/XeGPU/IR/XeGPU.h"
#include "mlir/Dialect/XeGPU/IR/XeGPUTargetInfo.h"
@@ -213,17 +215,75 @@ LayoutAttr::verify(llvm::function_ref<mlir::InFlightDiagnostic()> emitError,
}
FailureOr<SmallVector<Value>>
-LayoutAttr::delinearizeSubgroupId(Value linearId, Location loc,
- OpBuilder &builder) {
- assert(isWgLayout() && "delinearizeSubgroupId is only available for "
- "workgroup-level layout attribute.");
+LayoutAttr::delinearizeSubgroupId(OpBuilder &builder, Location loc,
+ Value linearId) {
+ // delinearizeSubgroupId is only available for workgroup-level layout
+ // attribute
+ if (!isWgLayout())
+ return failure();
+
auto dims =
llvm::map_to_vector(getSgLayout().asArrayRef(), [&](int32_t d) -> Value {
return arith::ConstantIndexOp::create(builder, loc, d);
});
+
return affine::delinearizeIndex(builder, loc, linearId, dims);
}
+FailureOr<SmallVector<SmallVector<Value>>>
+LayoutAttr::getOffsets(OpBuilder &builder, Location loc, Value linearId,
+ ArrayRef<int64_t> shape) {
+ if (!isWgLayout())
+ return failure();
+
+ auto sgLayout = getEffectiveSgLayout().value();
+ SmallVector<int64_t> sgShape;
+ if (auto maybeSgShape = getEffectiveSgData())
+ sgShape = maybeSgShape.value();
+ else if (auto ratio = computeShapeRatio(shape, sgLayout))
+ sgShape = ratio.value();
+ else
+ return failure();
+
+ // distUnit[i] is the minimum value between shape[i] and
+ // sgLayout[i] * sgShape[i]
+ SmallVector<int64_t> distUnit = llvm::map_to_vector(
+ llvm::zip_equal(shape, computeElementwiseMul(sgLayout, sgShape)),
+ [](const auto &t) { return std::min(std::get<0>(t), std::get<1>(t)); });
+
+ // delinearize Ids
+ auto maybeIds = delinearizeSubgroupId(builder, loc, linearId);
+ if (failed(maybeIds))
+ return failure();
+ SmallVector<Value> sgIds = *maybeIds;
+
+ // nd local offset, localOffset[i] = sgId[i] * sgShape[i]
+ SmallVector<Value> localOffsets = llvm::map_to_vector(
+ llvm::zip(sgIds, sgShape), [&](const auto &t) -> Value {
+ auto &[id, s] = t;
+ Value d = arith::ConstantIndexOp::create(builder, loc, s);
+ return index::MulOp::create(builder, loc, id, d);
+ });
+
+ SmallVector<SmallVector<Value>> offsets;
+ for (SmallVector<int64_t> unitOffs : StaticTileOffsetRange(shape, distUnit)) {
+ SmallVector<Value> base =
+ llvm::map_to_vector(unitOffs, [&](int64_t d) -> Value {
+ return arith::ConstantIndexOp::create(builder, loc, d);
+ });
+
+ SmallVector<Value> adds = llvm::map_to_vector(
+ llvm::zip_equal(base, localOffsets), [&](const auto &t) -> Value {
+ return arith::AddIOp::create(builder, loc, std::get<0>(t),
+ std::get<1>(t));
+ });
+
+ offsets.push_back(adds);
+ }
+
+ return offsets;
+}
+
//===----------------------------------------------------------------------===//
// XeGPU_SliceAttr
//===----------------------------------------------------------------------===//
@@ -246,9 +306,15 @@ SliceAttr::verify(llvm::function_ref<mlir::InFlightDiagnostic()> emitError,
}
FailureOr<SmallVector<Value>>
-SliceAttr::delinearizeSubgroupId(Value linearId, Location loc,
- OpBuilder &builder) {
- return getParent().delinearizeSubgroupId(linearId, loc, builder);
+SliceAttr::delinearizeSubgroupId(OpBuilder &builder, Location loc,
+ Value linearId) {
+ return getParent().delinearizeSubgroupId(builder, loc, linearId);
+}
+
+FailureOr<SmallVector<SmallVector<Value>>>
+SliceAttr::getOffsets(OpBuilder &builder, Location loc, Value linearId,
+ ArrayRef<int64_t> shape) {
+ return failure();
}
//===----------------------------------------------------------------------===//
diff --git a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUWgToSgDistribute.cpp b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUWgToSgDistribute.cpp
index f914914dc6b9f..e3cf5473076e7 100644
--- a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUWgToSgDistribute.cpp
+++ b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUWgToSgDistribute.cpp
@@ -213,7 +213,7 @@ struct WgToSgCreateNdOp : public OpConversionPattern<xegpu::CreateNdDescOp> {
}
auto deLinearizeSgId =
- layout.delinearizeSubgroupId(adjustedSgId, loc, rewriter);
+ layout.delinearizeSubgroupId(rewriter, loc, adjustedSgId);
if (failed(deLinearizeSgId))
return failure();
SmallVector<Value> sgIds = *deLinearizeSgId;
>From 3630966307810ff8ee47aa7d95328ebba225724e Mon Sep 17 00:00:00 2001
From: Chao Chen <chao.chen at intel.com>
Date: Fri, 25 Jul 2025 01:25:52 +0000
Subject: [PATCH 11/15] apply getOffsets in CreateNdDescOp
---
mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp | 29 +++++++-----
.../Transforms/XeGPUWgToSgDistribute.cpp | 44 +++++++++----------
mlir/test/Dialect/XeGPU/xegpu-wg-to-sg.mlir | 42 +++++++++---------
3 files changed, 60 insertions(+), 55 deletions(-)
diff --git a/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp b/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp
index 682f0620dbcfb..0b5ecfc210281 100644
--- a/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp
+++ b/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp
@@ -217,14 +217,14 @@ LayoutAttr::verify(llvm::function_ref<mlir::InFlightDiagnostic()> emitError,
FailureOr<SmallVector<Value>>
LayoutAttr::delinearizeSubgroupId(OpBuilder &builder, Location loc,
Value linearId) {
- // delinearizeSubgroupId is only available for workgroup-level layout
- // attribute
+ // delinearizeSubgroupId is only available for
+ // workgroup-level layout attribute
if (!isWgLayout())
return failure();
auto dims =
- llvm::map_to_vector(getSgLayout().asArrayRef(), [&](int32_t d) -> Value {
- return arith::ConstantIndexOp::create(builder, loc, d);
+ llvm::map_to_vector(*getEffectiveSgLayout(), [&](int64_t d) -> Value {
+ return builder.createOrFold<arith::ConstantIndexOp>(loc, d);
});
return affine::delinearizeIndex(builder, loc, linearId, dims);
@@ -260,25 +260,32 @@ LayoutAttr::getOffsets(OpBuilder &builder, Location loc, Value linearId,
// nd local offset, localOffset[i] = sgId[i] * sgShape[i]
SmallVector<Value> localOffsets = llvm::map_to_vector(
llvm::zip(sgIds, sgShape), [&](const auto &t) -> Value {
- auto &[id, s] = t;
- Value d = arith::ConstantIndexOp::create(builder, loc, s);
- return index::MulOp::create(builder, loc, id, d);
+ return builder.createOrFold<index::MulOp>(
+ loc, std::get<0>(t),
+ builder.createOrFold<arith::ConstantIndexOp>(loc, std::get<1>(t)));
});
SmallVector<SmallVector<Value>> offsets;
for (SmallVector<int64_t> unitOffs : StaticTileOffsetRange(shape, distUnit)) {
SmallVector<Value> base =
llvm::map_to_vector(unitOffs, [&](int64_t d) -> Value {
- return arith::ConstantIndexOp::create(builder, loc, d);
+ return builder.create<arith::ConstantIndexOp>(loc, d);
});
SmallVector<Value> adds = llvm::map_to_vector(
llvm::zip_equal(base, localOffsets), [&](const auto &t) -> Value {
- return arith::AddIOp::create(builder, loc, std::get<0>(t),
- std::get<1>(t));
+ return builder.createOrFold<arith::AddIOp>(loc, std::get<0>(t),
+ std::get<1>(t));
});
- offsets.push_back(adds);
+ SmallVector<Value> mods = llvm::map_to_vector(
+ llvm::zip_equal(adds, distUnit), [&](const auto &t) -> Value {
+ return builder.createOrFold<index::RemUOp>(
+ loc, std::get<0>(t),
+ builder.create<arith::ConstantIndexOp>(loc, std::get<1>(t)));
+ });
+
+ offsets.push_back(mods);
}
return offsets;
diff --git a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUWgToSgDistribute.cpp b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUWgToSgDistribute.cpp
index e3cf5473076e7..af55f176cb84f 100644
--- a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUWgToSgDistribute.cpp
+++ b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUWgToSgDistribute.cpp
@@ -212,39 +212,39 @@ struct WgToSgCreateNdOp : public OpConversionPattern<xegpu::CreateNdDescOp> {
rewriter.createOrFold<index::SubOp>(loc, linearSgId, startOfRangeVal);
}
- auto deLinearizeSgId =
- layout.delinearizeSubgroupId(rewriter, loc, adjustedSgId);
- if (failed(deLinearizeSgId))
+ auto tdescOffsets = layout.getOffsets(rewriter, loc, adjustedSgId, wgShape);
+ if (failed(tdescOffsets))
return failure();
- SmallVector<Value> sgIds = *deLinearizeSgId;
-
- // Calculate distribution unit shape and local offsets for subgroup
- SmallVector<int64_t> distUnitShape(sgLayout.size());
- SmallVector<Value> localOffset(sgLayout.size());
- for (size_t i = 0; i < sgLayout.size(); i++) {
- distUnitShape[i] = std::min(sgLayout[i] * sgShape[i], wgShape[i]);
- localOffset[i] =
- rewriter.createOrFold<index::MulOp>(loc, sgIds[i], sgDataDim[i]);
- }
-
- SmallVector<OpFoldResult> originalOffsets = op.getMixedOffsets();
xegpu::TensorDescType newTdescTy =
xegpu::TensorDescType::get(ctx, sgShape, elemTy, tdescTy.getEncoding(),
layout.dropSgLayoutAndData());
+
SmallVector<Value> newCreateNdOps;
- for (SmallVector<int64_t> distUnitBaseAddr :
- StaticTileOffsetRange(wgShape, distUnitShape)) {
- SmallVector<OpFoldResult> globalOffsets =
- calculateGlobalOffsets(rewriter, loc, originalOffsets, localOffset,
- distUnitBaseAddr, distUnitShape);
+ SmallVector<OpFoldResult> offset = op.getMixedOffsets();
+
+ for (auto tdescOffset : *tdescOffsets) {
+ SmallVector<OpFoldResult> newOffsets = llvm::map_to_vector(
+ llvm::zip_longest(tdescOffset, offset),
+ [&](const auto &t) -> OpFoldResult {
+ std::optional<Value> off = std::get<0>(t);
+ std::optional<OpFoldResult> old = std::get<1>(t);
+ if (!off.has_value())
+ return *old;
+
+ if (!old.has_value() || isZeroInteger(*old))
+ return *off;
+
+ return rewriter.createOrFold<index::AddOp>(
+ loc, *off,
+ getValueOrCreateConstantIndexOp(rewriter, loc, *old));
+ });
auto newCreateNdOp = xegpu::CreateNdDescOp::create(
- rewriter, loc, newTdescTy, op.getSource(), globalOffsets,
+ rewriter, loc, newTdescTy, op.getSource(), newOffsets,
op.getMixedSizes(), op.getMixedStrides());
newCreateNdOps.push_back(newCreateNdOp);
}
-
rewriter.replaceOpWithMultiple(op, {newCreateNdOps});
return success();
}
diff --git a/mlir/test/Dialect/XeGPU/xegpu-wg-to-sg.mlir b/mlir/test/Dialect/XeGPU/xegpu-wg-to-sg.mlir
index d51122417fb61..5e6a227e92320 100644
--- a/mlir/test/Dialect/XeGPU/xegpu-wg-to-sg.mlir
+++ b/mlir/test/Dialect/XeGPU/xegpu-wg-to-sg.mlir
@@ -4,27 +4,25 @@
//CHECK: #map1 = affine_map<()[s0] -> (s0 mod 4)>
gpu.module @test_1_1_assignment {
// CHECK-LABEL: create_nd_tdesc
- // CHECK-SAME: %[[ARG_0:.*]]: memref<24x32xf32>
+ // CHECK-SAME: [[ARG_0:%.*]]: memref<24x32xf32>
gpu.func @create_nd_tdesc(%src: memref<24x32xf32>) {
- // CHECK: %[[SGID:.*]] = gpu.subgroup_id
- // CHECK: %[[C12:.*]] = arith.constant 12 : index
- // CHECK: %[[C4:.*]] = arith.constant 4 : index
- // CHECK: %[[C8:.*]] = arith.constant 8 : index
- // CHECK: %[[DIV:.*]] = affine.apply #map()[%[[SGID]]]
- // CHECK: %[[REM:.*]] = affine.apply #map1()[%[[SGID]]]
- // CHECK: %[[MUL1:.*]] = index.mul %[[DIV]], %[[C12]]
- // CHECK: %[[MUL2:.*]] = index.mul %[[REM]], %[[C8]]
- // CHECK: %[[C24:.*]] = arith.constant 24 : index
- // CHECK: %[[MOD:.*]] = index.remu %[[MUL1]], %[[C24]]
- // CHECK: %[[C0:.*]] = arith.constant 0 : index
- // CHECK: %[[ADD1:.*]] = index.add %[[MOD]], %[[C0]]
- // CHECK: %[[C32:.*]] = arith.constant 32 : index
- // CHECK: %[[MOD1:.*]] = index.remu %[[MUL2]], %[[C32]]
- // CHECK: %[[C0_1:.*]] = arith.constant 0 : index
- // CHECK: %[[ADD2:.*]] = index.add %[[MOD1]], %[[C0_1]]
- // CHECK: %[[TDESC:.*]] = xegpu.create_nd_tdesc %[[ARG_0]][%[[ADD1]], %[[ADD2]]] : memref<24x32xf32>
- // CHECK-SAME: -> !xegpu.tensor_desc<12x8xf32, #xegpu.layout<lane_layout = [2, 8], lane_data = [1, 1]>>
- // CHECK: gpu.return
+ //CHECK: [[SGID:%.+]] = gpu.subgroup_id : index
+ //CHECK: [[SGIDY:%.+]] = affine.apply #map()[[[SGID]]]
+ //CHECK: [[SGIDX:%.+]] = affine.apply #map1()[[[SGID]]]
+ //CHECK: [[C12:%.+]] = arith.constant 12 : index
+ //CHECK: [[LY:%.+]] = index.mul [[SGIDY]], [[C12]]
+ //CHECK: [[C8:%.+]] = arith.constant 8 : index
+ //CHECK: [[LX:%.+]] = index.mul [[SGIDX]], [[C8]]
+ //CHECK: [[C0:%.+]] = arith.constant 0 : index
+ //CHECK: [[C0_1:%.+]] = arith.constant 0 : index
+ //CHECK: [[UY:%.+]] = arith.addi [[LY]], [[C0]] : index
+ //CHECK: [[UX:%.+]] = arith.addi [[LX]], [[C0_1]] : index
+ //CHECK: [[C24:%.+]] = arith.constant 24 : index
+ //CHECK: [[Y:%.+]] = index.remu [[UY]], [[C24]]
+ //CHECK: [[C32:%.+]] = arith.constant 32 : index
+ //CHECK: [[X:%.+]] = index.remu [[UX]], [[C32]]
+ //CHECK: [[TDESC:%.+]] = xegpu.create_nd_tdesc [[ARG_0]][[[Y]], [[X]]] : memref<24x32xf32> -> !xegpu.tensor_desc<12x8xf32, #xegpu.layout<lane_layout = [2, 8], lane_data = [1, 1]>>
+
%tdesc = xegpu.create_nd_tdesc %src[0, 0] : memref<24x32xf32>
-> !xegpu.tensor_desc<24x32xf32, #xegpu.layout<sg_layout = [2, 4], sg_data = [12, 8], lane_layout = [2, 8], lane_data = [1, 1]>>
gpu.return
@@ -180,7 +178,7 @@ gpu.func @dpas_no_sg_data(%a: memref<24x32xf32>, %b: memref<32x24xf32>) {
-> vector<24x1xf32>
// CHECK: vector.broadcast {{.*}} {layout_result_0 = #xegpu.layout<lane_layout = [2, 1], lane_data = [1, 1]>}
// CHECK-SAME: : vector<12x1xf32> to vector<12x8xf32>
- %broadcast = vector.broadcast %load
+ %broadcast = vector.broadcast %load
{layout_result_0 = #xegpu.layout<sg_layout = [2, 1], sg_data = [12, 8], lane_layout = [2, 1], lane_data = [1, 1]>}
: vector<24x1xf32> to vector<24x8xf32>
gpu.return
@@ -367,7 +365,7 @@ gpu.func @dpas_no_sg_data(%a: memref<24x32xf32>, %b: memref<32x24xf32>) {
// CHECK-LABEL: @subgroup_id_range_nested_if
gpu.func @subgroup_id_range_nested_if(%src: memref<256x128xf32>, %src1: memref<128x64xf32>) {
%sg_id = gpu.subgroup_id : index
- %c1 = arith.constant 1 : i1
+ %c1 = arith.constant 1 : i1
%c3 = arith.constant 3 : index
%c32 = arith.constant 32 : index
%tdesc = xegpu.create_nd_tdesc %src[0, 0] : memref<256x128xf32>
>From 398d69beac1e69ef72f23dea5b5649e4dc9a0ffd Mon Sep 17 00:00:00 2001
From: Chao Chen <chao.chen at intel.com>
Date: Fri, 25 Jul 2025 01:32:43 +0000
Subject: [PATCH 12/15] cleanup
---
mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp | 1 +
.../Transforms/XeGPUWgToSgDistribute.cpp | 59 +++----------------
2 files changed, 8 insertions(+), 52 deletions(-)
diff --git a/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp b/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp
index 0b5ecfc210281..ef336ce800385 100644
--- a/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp
+++ b/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp
@@ -222,6 +222,7 @@ LayoutAttr::delinearizeSubgroupId(OpBuilder &builder, Location loc,
if (!isWgLayout())
return failure();
+ // TODO: handle order attribute
auto dims =
llvm::map_to_vector(*getEffectiveSgLayout(), [&](int64_t d) -> Value {
return builder.createOrFold<arith::ConstantIndexOp>(loc, d);
diff --git a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUWgToSgDistribute.cpp b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUWgToSgDistribute.cpp
index af55f176cb84f..640d74d3e3715 100644
--- a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUWgToSgDistribute.cpp
+++ b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUWgToSgDistribute.cpp
@@ -125,39 +125,6 @@ getSgShapeAndCount(ArrayRef<int64_t> shape, xegpu::LayoutAttr layout) {
struct WgToSgCreateNdOp : public OpConversionPattern<xegpu::CreateNdDescOp> {
using OpConversionPattern<xegpu::CreateNdDescOp>::OpConversionPattern;
- // Calculate offset for each subgroup
- static SmallVector<OpFoldResult>
- calculateGlobalOffsets(ConversionPatternRewriter &rewriter, Location loc,
- const SmallVector<OpFoldResult> &originalOffsets,
- const SmallVector<Value> &localOffset,
- const SmallVector<int64_t> &distUnitBaseAddr,
- const SmallVector<int64_t> &distUnitShape) {
- assert(localOffset.size() == distUnitBaseAddr.size() &&
- "localOffset and distUnitBaseAddr must have the same rank");
-
- SmallVector<OpFoldResult> globalOffsets(originalOffsets.begin(),
- originalOffsets.end());
- size_t rank = localOffset.size();
- for (size_t i = 0; i < rank; ++i) {
- size_t dimIdx = originalOffsets.size() - rank + i;
- Value constOffset =
- arith::ConstantIndexOp::create(rewriter, loc, distUnitBaseAddr[i]);
- Value offset =
- rewriter.createOrFold<index::AddOp>(loc, localOffset[i], constOffset);
- Value modValue =
- arith::ConstantIndexOp::create(rewriter, loc, distUnitShape[i]);
- Value offsetMod =
- rewriter.createOrFold<index::RemUOp>(loc, offset, modValue);
- Value origOffset = getValueOrCreateConstantIndexOp(
- rewriter, loc, originalOffsets[dimIdx]);
- Value globalOffset =
- rewriter.createOrFold<index::AddOp>(loc, origOffset, offsetMod);
- globalOffsets[dimIdx] = globalOffset;
- }
-
- return globalOffsets;
- }
-
LogicalResult
matchAndRewrite(xegpu::CreateNdDescOp op, OneToNOpAdaptor adaptor,
ConversionPatternRewriter &rewriter) const override {
@@ -177,28 +144,14 @@ struct WgToSgCreateNdOp : public OpConversionPattern<xegpu::CreateNdDescOp> {
return rewriter.notifyMatchFailure(
op, "sgLayout attribute is required in layout");
- SmallVector<int64_t> sgShape = getSgShapeAndCount(wgShape, layout).first;
-
- // TODO : Handle order attribute
// Get the subgroup ID
- auto linearSgId =
+ Value linearSgId =
gpu::SubgroupIdOp::create(rewriter, loc, /*upper_bound=*/nullptr);
- // Create constants for layout dimensions
- SmallVector<Value> sgLayoutDim(sgLayout.size());
- SmallVector<Value> sgDataDim(sgShape.size());
-
- for (size_t i = 0; i < sgLayout.size(); i++) {
- sgLayoutDim[i] =
- arith::ConstantIndexOp::create(rewriter, loc, sgLayout[i]);
- sgDataDim[i] = arith::ConstantIndexOp::create(rewriter, loc, sgShape[i]);
- }
-
int64_t startOfRange = -1, endOfRange = -1;
bool sgIdRangeSpecified =
isSgIdRangeSpecified(op, startOfRange, endOfRange);
- Value adjustedSgId = linearSgId;
if (sgIdRangeSpecified) {
int64_t sgCount = endOfRange - startOfRange;
if (computeProduct(sgLayout) != sgCount)
@@ -208,14 +161,16 @@ struct WgToSgCreateNdOp : public OpConversionPattern<xegpu::CreateNdDescOp> {
// sg id
Value startOfRangeVal =
rewriter.create<arith::ConstantIndexOp>(loc, startOfRange);
- adjustedSgId =
+ linearSgId =
rewriter.createOrFold<index::SubOp>(loc, linearSgId, startOfRangeVal);
}
- auto tdescOffsets = layout.getOffsets(rewriter, loc, adjustedSgId, wgShape);
- if (failed(tdescOffsets))
+ auto maybeTdescOffsets =
+ layout.getOffsets(rewriter, loc, linearSgId, wgShape);
+ if (failed(maybeTdescOffsets))
return failure();
+ SmallVector<int64_t> sgShape = getSgShapeAndCount(wgShape, layout).first;
xegpu::TensorDescType newTdescTy =
xegpu::TensorDescType::get(ctx, sgShape, elemTy, tdescTy.getEncoding(),
layout.dropSgLayoutAndData());
@@ -223,7 +178,7 @@ struct WgToSgCreateNdOp : public OpConversionPattern<xegpu::CreateNdDescOp> {
SmallVector<Value> newCreateNdOps;
SmallVector<OpFoldResult> offset = op.getMixedOffsets();
- for (auto tdescOffset : *tdescOffsets) {
+ for (auto tdescOffset : *maybeTdescOffsets) {
SmallVector<OpFoldResult> newOffsets = llvm::map_to_vector(
llvm::zip_longest(tdescOffset, offset),
[&](const auto &t) -> OpFoldResult {
>From 08e4aa9c6df06e5d7eec54c63c96877dcc1631ac Mon Sep 17 00:00:00 2001
From: Chao Chen <chao.chen at intel.com>
Date: Fri, 25 Jul 2025 02:28:40 +0000
Subject: [PATCH 13/15] fix a bug
---
.../Transforms/XeGPUWgToSgDistribute.cpp | 30 ++++++++-----------
1 file changed, 12 insertions(+), 18 deletions(-)
diff --git a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUWgToSgDistribute.cpp b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUWgToSgDistribute.cpp
index 640d74d3e3715..688e2b25867b3 100644
--- a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUWgToSgDistribute.cpp
+++ b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUWgToSgDistribute.cpp
@@ -179,26 +179,20 @@ struct WgToSgCreateNdOp : public OpConversionPattern<xegpu::CreateNdDescOp> {
SmallVector<OpFoldResult> offset = op.getMixedOffsets();
for (auto tdescOffset : *maybeTdescOffsets) {
- SmallVector<OpFoldResult> newOffsets = llvm::map_to_vector(
- llvm::zip_longest(tdescOffset, offset),
- [&](const auto &t) -> OpFoldResult {
- std::optional<Value> off = std::get<0>(t);
- std::optional<OpFoldResult> old = std::get<1>(t);
- if (!off.has_value())
- return *old;
-
- if (!old.has_value() || isZeroInteger(*old))
- return *off;
-
- return rewriter.createOrFold<index::AddOp>(
- loc, *off,
- getValueOrCreateConstantIndexOp(rewriter, loc, *old));
- });
-
- auto newCreateNdOp = xegpu::CreateNdDescOp::create(
+ SmallVector<OpFoldResult> newOffsets;
+ size_t rank = tdescOffset.size();
+ for (size_t i = 0; i < rank; i++) {
+ size_t idx = offset.size() - rank + i;
+ Value newOff = rewriter.createOrFold<index::AddOp>(
+ loc, tdescOffset[i],
+ getValueOrCreateConstantIndexOp(rewriter, loc, offset[idx]));
+ newOffsets.push_back(newOff);
+ }
+
+ auto newOp = xegpu::CreateNdDescOp::create(
rewriter, loc, newTdescTy, op.getSource(), newOffsets,
op.getMixedSizes(), op.getMixedStrides());
- newCreateNdOps.push_back(newCreateNdOp);
+ newCreateNdOps.push_back(newOp);
}
rewriter.replaceOpWithMultiple(op, {newCreateNdOps});
return success();
>From 62aa1dde2f1c47bf3d9b45582c668c33ef64a987 Mon Sep 17 00:00:00 2001
From: Chao Chen <chao.chen at intel.com>
Date: Fri, 25 Jul 2025 02:36:15 +0000
Subject: [PATCH 14/15] cleanup
---
.../Transforms/XeGPUWgToSgDistribute.cpp | 20 +++++++++----------
1 file changed, 10 insertions(+), 10 deletions(-)
diff --git a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUWgToSgDistribute.cpp b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUWgToSgDistribute.cpp
index 688e2b25867b3..dae1f06a8fbad 100644
--- a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUWgToSgDistribute.cpp
+++ b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUWgToSgDistribute.cpp
@@ -157,8 +157,8 @@ struct WgToSgCreateNdOp : public OpConversionPattern<xegpu::CreateNdDescOp> {
if (computeProduct(sgLayout) != sgCount)
return rewriter.notifyMatchFailure(
op, "sg_layout size must match the sg_id_range");
- // Subtract startOfRange from the original subgroup id to get the adjusted
- // sg id
+ // Subtract startOfRange from the original subgroup id to get
+ // the adjusted sg id
Value startOfRangeVal =
rewriter.create<arith::ConstantIndexOp>(loc, startOfRange);
linearSgId =
@@ -176,17 +176,17 @@ struct WgToSgCreateNdOp : public OpConversionPattern<xegpu::CreateNdDescOp> {
layout.dropSgLayoutAndData());
SmallVector<Value> newCreateNdOps;
- SmallVector<OpFoldResult> offset = op.getMixedOffsets();
+ SmallVector<OpFoldResult> oldOffsets = op.getMixedOffsets();
- for (auto tdescOffset : *maybeTdescOffsets) {
+ for (auto tdescOffsets : *maybeTdescOffsets) {
SmallVector<OpFoldResult> newOffsets;
- size_t rank = tdescOffset.size();
+ size_t rank = tdescOffsets.size();
for (size_t i = 0; i < rank; i++) {
- size_t idx = offset.size() - rank + i;
- Value newOff = rewriter.createOrFold<index::AddOp>(
- loc, tdescOffset[i],
- getValueOrCreateConstantIndexOp(rewriter, loc, offset[idx]));
- newOffsets.push_back(newOff);
+ size_t idx = oldOffsets.size() - rank + i;
+ Value add = rewriter.createOrFold<index::AddOp>(
+ loc, tdescOffsets[i],
+ getValueOrCreateConstantIndexOp(rewriter, loc, oldOffsets[idx]));
+ newOffsets.push_back(add);
}
auto newOp = xegpu::CreateNdDescOp::create(
>From de0a1bbc63ac3eb04ae1e900a892dba8d03005f0 Mon Sep 17 00:00:00 2001
From: Chao Chen <chao.chen at intel.com>
Date: Fri, 25 Jul 2025 17:18:09 +0000
Subject: [PATCH 15/15] add unit test
---
.../mlir/Dialect/XeGPU/IR/XeGPUAttrs.td | 4 +
mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp | 65 ++++++++++-
mlir/test/Dialect/XeGPU/layout.mlir | 6 +
.../Dialect/XeGPU/xegpu-attr-interface.mlir | 23 ++++
.../lib/Dialect/XeGPU/TestXeGPUTransforms.cpp | 107 ++++++++++++++++++
5 files changed, 203 insertions(+), 2 deletions(-)
create mode 100644 mlir/test/Dialect/XeGPU/xegpu-attr-interface.mlir
diff --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td
index 5794f786dc9b9..4f35e3ff061a4 100644
--- a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td
+++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td
@@ -410,6 +410,10 @@ def XeGPU_SliceAttr : XeGPUAttr<"Slice", "slice", [LayoutTrait]> {
let extraClassDeclaration = [{
+ int64_t getRank() const {
+ return getParent().getRank() - getDims().size();
+ }
+
DenseI32ArrayAttr getOrder() const {
return getParent().getOrder();
}
diff --git a/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp b/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp
index ef336ce800385..fad3c6280fbbe 100644
--- a/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp
+++ b/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp
@@ -296,7 +296,7 @@ LayoutAttr::getOffsets(OpBuilder &builder, Location loc, Value linearId,
// XeGPU_SliceAttr
//===----------------------------------------------------------------------===//
LogicalResult
-SliceAttr::verify(llvm::function_ref<mlir::InFlightDiagnostic()> emitError,
+SliceAttr::verify(llvm::function_ref<InFlightDiagnostic()> emitError,
xegpu::LayoutAttr parent, DenseI64ArrayAttr dims) {
if (!parent || !dims)
return emitError() << "expected parent layout and dims attribute";
@@ -322,7 +322,68 @@ SliceAttr::delinearizeSubgroupId(OpBuilder &builder, Location loc,
FailureOr<SmallVector<SmallVector<Value>>>
SliceAttr::getOffsets(OpBuilder &builder, Location loc, Value linearId,
ArrayRef<int64_t> shape) {
- return failure();
+ assert(getRank() == static_cast<int64_t>(shape.size()) && "invalid shape.");
+ if (!isWgLayout())
+ return failure();
+
+ auto sgLayout = getEffectiveSgLayout().value();
+
+ SmallVector<int64_t> sgShape;
+ if (auto maybeSgShape = getEffectiveSgData())
+ sgShape = maybeSgShape.value();
+ else if (auto ratio = computeShapeRatio(shape, sgLayout))
+ sgShape = ratio.value();
+ else
+ return failure();
+
+ // distUnit[i] is the minimum value between shape[i] and
+ // sgLayout[i] * sgShape[i]
+ SmallVector<int64_t> distUnit = llvm::map_to_vector(
+ llvm::zip_equal(shape, computeElementwiseMul(sgLayout, sgShape)),
+ [](const auto &t) { return std::min(std::get<0>(t), std::get<1>(t)); });
+
+ // delinearize Ids
+ auto maybeIds = delinearizeSubgroupId(builder, loc, linearId);
+ if (failed(maybeIds))
+ return failure();
+ // The effective sgIds for offsets computing correspond
+ // to the dims that are not sliced.
+ ArrayRef<int64_t> dims = getDims().asArrayRef();
+ SmallVector<Value> sgIds =
+ XeGPUDialect::dropDims(ArrayRef<Value>(*maybeIds), dims);
+
+ // nd local offset, localOffset[i] = sgId[i] * sgShape[i]
+ SmallVector<Value> localOffsets = llvm::map_to_vector(
+ llvm::zip(sgIds, sgShape), [&](const auto &t) -> Value {
+ return builder.createOrFold<index::MulOp>(
+ loc, std::get<0>(t),
+ builder.createOrFold<arith::ConstantIndexOp>(loc, std::get<1>(t)));
+ });
+
+ SmallVector<SmallVector<Value>> offsets;
+ for (SmallVector<int64_t> unitOffs : StaticTileOffsetRange(shape, distUnit)) {
+ SmallVector<Value> base =
+ llvm::map_to_vector(unitOffs, [&](int64_t d) -> Value {
+ return builder.create<arith::ConstantIndexOp>(loc, d);
+ });
+
+ SmallVector<Value> adds = llvm::map_to_vector(
+ llvm::zip_equal(base, localOffsets), [&](const auto &t) -> Value {
+ return builder.createOrFold<arith::AddIOp>(loc, std::get<0>(t),
+ std::get<1>(t));
+ });
+
+ SmallVector<Value> mods = llvm::map_to_vector(
+ llvm::zip_equal(adds, distUnit), [&](const auto &t) -> Value {
+ return builder.createOrFold<index::RemUOp>(
+ loc, std::get<0>(t),
+ builder.create<arith::ConstantIndexOp>(loc, std::get<1>(t)));
+ });
+
+ offsets.push_back(mods);
+ }
+
+ return offsets;
}
//===----------------------------------------------------------------------===//
diff --git a/mlir/test/Dialect/XeGPU/layout.mlir b/mlir/test/Dialect/XeGPU/layout.mlir
index 017dacc8d629a..e5330951b065a 100644
--- a/mlir/test/Dialect/XeGPU/layout.mlir
+++ b/mlir/test/Dialect/XeGPU/layout.mlir
@@ -50,4 +50,10 @@ gpu.func @convert_layout_wg(%a: vector<32x64xf16>) {
gpu.return
}
+gpu.func @slice_attr_repeat_dim() {
+ //CHECK: arith.constant {layout_result_0 = #xegpu.slice<<sg_layout = [16, 1, 1], sg_data = [1, 8, 2]>, dims = [2]>} dense<8> : vector<16x8xindex>
+ %cst = arith.constant {layout_result_0 = #xegpu.slice<<sg_layout = [16, 1, 1], sg_data = [1, 8, 2]>, dims = [2]>} dense<8> : vector<16x8xindex>
+ gpu.return
+}
+
}
diff --git a/mlir/test/Dialect/XeGPU/xegpu-attr-interface.mlir b/mlir/test/Dialect/XeGPU/xegpu-attr-interface.mlir
new file mode 100644
index 0000000000000..6397b7fe525b8
--- /dev/null
+++ b/mlir/test/Dialect/XeGPU/xegpu-attr-interface.mlir
@@ -0,0 +1,23 @@
+// RUN: mlir-opt --test-xegpu-layout-interface --cse -split-input-file %s | FileCheck %s
+
+#block = #xegpu.layout<sg_layout = [4, 8], sg_data = [32, 32]>
+#slice = #xegpu.slice<#block, dims=[1]>
+
+//CHECk: #map = affine_map<()[s0] -> (s0 floordiv 8)>
+gpu.module @test_1_1_assignment {
+ gpu.func @create_nd_tdesc() -> vector<128xindex> {
+ //CHECK: [[sgId:%.+]] = gpu.subgroup_id : index
+ //CHECK: [[IDY:%.+]] = affine.apply #map()[[[sgId]]]
+ //CHECK: [[c32:%.+]] = arith.constant 32 : index
+ //CHECK: [[LOCALY:%.+]] = index.mul [[IDY]], [[c32]]
+ //CHECK: [[c0:%.+]] = arith.constant 0 : index
+ //CHECK: [[Y:%.+]] = arith.addi [[LOCALY]], [[c0]] : index
+ //CHECK: [[c128:%.+]] = arith.constant 128 : index
+ //CHECK: [[MODY:%.+]] = index.remu [[Y]], [[c128]]
+ //CHECK: [[BASE:%.+]] = vector.step : vector<32xindex>
+ //CHECK: [[CAST:%.+]] = vector.broadcast [[MODY]] : index to vector<32xindex>
+ //CHECK: [[ADD:%.+]] = arith.addi [[BASE]], [[CAST]] : vector<32xindex>
+ %step = vector.step {layout_result_0 = #slice}: vector<128xindex>
+ gpu.return %step : vector<128xindex>
+ }
+}
\ No newline at end of file
diff --git a/mlir/test/lib/Dialect/XeGPU/TestXeGPUTransforms.cpp b/mlir/test/lib/Dialect/XeGPU/TestXeGPUTransforms.cpp
index f71fcf7ca297b..1e96280769060 100644
--- a/mlir/test/lib/Dialect/XeGPU/TestXeGPUTransforms.cpp
+++ b/mlir/test/lib/Dialect/XeGPU/TestXeGPUTransforms.cpp
@@ -7,11 +7,14 @@
//===----------------------------------------------------------------------===//
#include "mlir/Dialect/GPU/IR/GPUDialect.h"
+#include "mlir/Dialect/Index/IR/IndexDialect.h"
#include "mlir/Dialect/Vector/Transforms/VectorTransforms.h"
#include "mlir/Dialect/XeGPU/IR/XeGPU.h"
#include "mlir/Dialect/XeGPU/Transforms/Transforms.h"
+#include "mlir/Dialect/XeGPU/Utils/XeGPUUtils.h"
#include "mlir/Pass/Pass.h"
#include "mlir/Pass/PassManager.h"
+#include "mlir/Transforms/DialectConversion.h"
#include "mlir/Transforms/GreedyPatternRewriteDriver.h"
using namespace mlir;
@@ -149,12 +152,116 @@ struct TestXeGPUUnrollingPatterns
}
};
+#undef DEBUG_TYPE
+#define DEBUG_TYPE "test-xegpu-layout-interface"
+#define DBGS() (llvm::dbgs() << "[" DEBUG_TYPE "]: ")
+#define LDBG(X) LLVM_DEBUG(DBGS() << X << "\n")
+
+class TestStepOpPattern : public OpConversionPattern<vector::StepOp> {
+ using OpConversionPattern<vector::StepOp>::OpConversionPattern;
+
+ LogicalResult
+ matchAndRewrite(vector::StepOp op, OneToNOpAdaptor adaptor,
+ ConversionPatternRewriter &rewriter) const override {
+
+ auto layoutName = xegpu::getLayoutName(op->getResult(0));
+ auto sliceAttr = op->getAttrOfType<xegpu::SliceAttr>(layoutName);
+ if (!sliceAttr || sliceAttr.getRank() != 1)
+ return failure();
+
+ std::optional<SmallVector<int64_t>> sgShape =
+ sliceAttr.getEffectiveSgData();
+ if (!sgShape)
+ return failure();
+
+ Location loc = op.getLoc();
+ VectorType type = op.getResult().getType();
+ auto wgShape = type.getShape();
+
+ Value sgId =
+ gpu::SubgroupIdOp::create(rewriter, loc, /*upper_bound=*/nullptr);
+ auto maybeOffsets = sliceAttr.getOffsets(rewriter, loc, sgId, wgShape);
+ if (failed(maybeOffsets))
+ return failure();
+
+ VectorType newTy = type.cloneWith(*sgShape, type.getElementType());
+ Value base = vector::StepOp::create(rewriter, loc, newTy);
+ SmallVector<Value> newOps;
+ for (auto offsets : *maybeOffsets) {
+ Value bcast =
+ vector::BroadcastOp::create(rewriter, loc, newTy, offsets[0]);
+ Value add = arith::AddIOp::create(rewriter, loc, base, bcast);
+ newOps.push_back(add);
+ }
+ rewriter.replaceOpWithMultiple(op, {newOps});
+ return success();
+ }
+};
+
+struct TestXeGPULayoutInterface
+ : public PassWrapper<TestXeGPULayoutInterface,
+ OperationPass<gpu::GPUModuleOp>> {
+ MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(TestXeGPULayoutInterface)
+
+ StringRef getArgument() const final { return "test-xegpu-layout-interface"; }
+
+ StringRef getDescription() const final {
+ return "Test the implementation of XeGPU Layout interfaces";
+ }
+
+ void getDependentDialects(::mlir::DialectRegistry ®istry) const override {
+ registry.insert<arith::ArithDialect>();
+ registry.insert<memref::MemRefDialect>();
+ registry.insert<xegpu::XeGPUDialect>();
+ registry.insert<vector::VectorDialect>();
+ registry.insert<index::IndexDialect>();
+ }
+
+ TestXeGPULayoutInterface() = default;
+ TestXeGPULayoutInterface(const TestXeGPULayoutInterface &pass)
+ : PassWrapper(pass) {}
+
+ void runOnOperation() override {
+ MLIRContext *ctx = &getContext();
+
+ TypeConverter typeConverter;
+ auto materializeCast = [&](mlir::OpBuilder &builder, mlir::Type type,
+ mlir::ValueRange inputs,
+ mlir::Location loc) -> mlir::Value {
+ return builder.create<UnrealizedConversionCastOp>(loc, type, inputs)
+ .getResult(0);
+ };
+ typeConverter.addSourceMaterialization(materializeCast);
+ typeConverter.addTargetMaterialization(materializeCast);
+
+ RewritePatternSet patterns(ctx);
+ patterns.add<TestStepOpPattern>(typeConverter, ctx);
+
+ ConversionTarget target(*ctx);
+ auto isLegal = [&](xegpu::SliceAttr layout) -> bool {
+ return !layout || !layout.isWgLayout();
+ };
+
+ target.addDynamicallyLegalOp<vector::StepOp>(
+ [&](vector::StepOp op) -> bool {
+ auto layoutName = xegpu::getLayoutName(op->getResult(0));
+ auto sliceAttr = op->getAttrOfType<xegpu::SliceAttr>(layoutName);
+ return isLegal(sliceAttr);
+ });
+
+ target.markUnknownOpDynamicallyLegal([](Operation *op) { return true; });
+
+ (void)applyPartialConversion(getOperation(), target, std::move(patterns));
+ }
+};
+
} // namespace
namespace mlir {
namespace test {
void registerTestXeGPULowerings() {
PassRegistration<TestXeGPUUnrollingPatterns>();
+ PassRegistration<TestXeGPULayoutInterface>();
}
} // namespace test
} // namespace mlir
More information about the Mlir-commits
mailing list