[Mlir-commits] [mlir] [mlir][xegpu] Add initial skeleton implementation for lowering ConvertLayoutOp (PR #146176)

Mon Jul 21 08:48:33 PDT 2025

================
@@ -392,6 +392,72 @@ struct WgToSgElementwiseOp : public ConversionPattern {
   }
 };
 
+// clang-format off
+// Pattern for lowering ConvertLayoutOp based on sg_layout and sg_data.
+// If input_layout and target_layout have identical sg_layout and sg_data,
+// the op is rewritten to a subgroup-level ConvertLayoutOp with these fields
+// dropped. For example:
+//   #a = #xegpu.layout<sg_layout = [2, 2], sg_data = [16, 16], inst_data = [16, 16]>
+//   #b = #xegpu.layout<sg_layout = [2, 2], sg_data = [16, 16], inst_data = [8, 16]>
+//   xegpu.convert_layout %1 <{input_layout = #a, target_layout = #b}> : vector<32x64xf32>
+// becomes:
+//   #a = #xegpu.layout<inst_data = [16, 16]>
+//   #b = #xegpu.layout<inst_data = [8, 16]>
+//   xegpu.convert_layout %1 <{input_layout = #a, target_layout = #b}> : vector<16x16xf32>
+// (vector<16x16xf32> is determined by sg_data = [16, 16])
+//
+// If sg_layout or sg_data differ, SLM is used to redistribute data across subgroups.
+// For example:
+//   #a = #xegpu.layout<sg_layout = [1, 4], sg_data = [32, 16], inst_data = [16, 16]>
+//   #b = #xegpu.layout<sg_layout = [2, 2], sg_data = [16, 32], inst_data = [8, 16]>
+//   xegpu.convert_layout %1 <{input_layout = #a, target_layout = #b}> : vector<32x64xf32>
+// is lowered to:
+//   #a = #xegpu.layout<inst_data = [16, 16]>
+//   #b = #xegpu.layout<inst_data = [8, 16]>
+//   store_matrix %1, %slm <{layout_input_0 = #a}> : vector<32x16>, metrix_desc<32x64xf32>
+//   %d = load_matrix %slm <{layout_result_0 = #a}> : metrix_desc<32x64xf32> -> vector<16x32xf32>
+//   xegpu.convert_layout %d <{input_layout = #a, target_layout = #b}> : vector<16x32xf32>
+// clang-format on
+struct WgToSgConvertLayoutOp
+    : public OpConversionPattern<xegpu::ConvertLayoutOp> {
+  using OpConversionPattern<xegpu::ConvertLayoutOp>::OpConversionPattern;
+  LogicalResult
+  matchAndRewrite(xegpu::ConvertLayoutOp op, OneToNOpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
+    xegpu::LayoutAttr input = op.getInputLayout();
+    xegpu::LayoutAttr target = op.getTargetLayout();
+
+    if (!input || !target || !input.isWgLayout() || !target.isWgLayout())
+      return rewriter.notifyMatchFailure(
+          op, "Input and target layouts must have subgroup layout");
+
+    DenseI32ArrayAttr inputSgLayout = input.getSgLayout();
+    DenseI32ArrayAttr inputSgData = input.getSgData();
+    DenseI32ArrayAttr targetSgLayout = target.getSgLayout();
+    DenseI32ArrayAttr targetSgData = target.getSgData();
+
+    // TODO: currently we only support for optimal case, where input and
+    // output has the same sg_layout and sg_data, so SLM is not involved.
+    if (inputSgLayout != targetSgLayout || inputSgData != targetSgData)
----------------
Jianhui-Li wrote:

need to check order attribute also?

https://github.com/llvm/llvm-project/pull/146176