[Mlir-commits] [mlir] 8774da8 - [MLIR][XeGPU] Preserve anchor layouts in recoverTemporaryLayout (#182186)
llvmlistbot at llvm.org
llvmlistbot at llvm.org
Sun Mar 1 15:43:06 PST 2026
Author: Nishant Patel
Date: 2026-03-01T15:43:01-08:00
New Revision: 8774da8f2f4d28e1b806da38af8732603dc63530
URL: https://github.com/llvm/llvm-project/commit/8774da8f2f4d28e1b806da38af8732603dc63530
DIFF: https://github.com/llvm/llvm-project/commit/8774da8f2f4d28e1b806da38af8732603dc63530.diff
LOG: [MLIR][XeGPU] Preserve anchor layouts in recoverTemporaryLayout (#182186)
Added:
Modified:
mlir/lib/Dialect/XeGPU/Transforms/XeGPULayoutImpl.cpp
mlir/test/Dialect/XeGPU/subgroup-distribute.mlir
mlir/test/Dialect/XeGPU/xegpu-wg-to-sg-unify-ops.mlir
Removed:
################################################################################
diff --git a/mlir/lib/Dialect/XeGPU/Transforms/XeGPULayoutImpl.cpp b/mlir/lib/Dialect/XeGPU/Transforms/XeGPULayoutImpl.cpp
index a3f0c4285da25..7aa186bb22224 100644
--- a/mlir/lib/Dialect/XeGPU/Transforms/XeGPULayoutImpl.cpp
+++ b/mlir/lib/Dialect/XeGPU/Transforms/XeGPULayoutImpl.cpp
@@ -99,7 +99,7 @@ bool xegpu::recoverTemporaryLayouts(Operation *rootOp) {
<< operand.getOperandNumber() << " of operation " << op->getName();
continue;
}
- xegpu::setDistributeLayoutAttr(operand, layout);
+ xegpu::setTemporaryLayout(operand, layout);
}
return WalkResult::advance();
});
diff --git a/mlir/test/Dialect/XeGPU/subgroup-distribute.mlir b/mlir/test/Dialect/XeGPU/subgroup-distribute.mlir
index dae00838fdcb6..5cf4ae64a0fd4 100644
--- a/mlir/test/Dialect/XeGPU/subgroup-distribute.mlir
+++ b/mlir/test/Dialect/XeGPU/subgroup-distribute.mlir
@@ -42,7 +42,7 @@ gpu.module @xevm_module{
%6 = xegpu.create_nd_tdesc %arg2 : memref<8x16xf32> ->
!xegpu.tensor_desc<8x16xf32, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>>
- xegpu.store_nd %5, %6[%c0, %c0] : vector<8x16xf32>,
+ xegpu.store_nd %5, %6[%c0, %c0] {layout = #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>} : vector<8x16xf32>,
!xegpu.tensor_desc<8x16xf32, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>>
gpu.return
}
@@ -112,7 +112,7 @@ gpu.func @gemm(%arg0: memref<1024x1024xbf16>, %arg1: memref<1024x1024xbf16>, %ar
scf.yield %9 : vector<8x16xf32>
} {layout_result_0 = #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>}
- xegpu.store_nd %4, %2[%0, %1] : vector<8x16xf32>,
+ xegpu.store_nd %4, %2[%0, %1] {layout = #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>} : vector<8x16xf32>,
!xegpu.tensor_desc<8x16xf32, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>>
gpu.return
}
@@ -150,7 +150,7 @@ gpu.module @xevm_module{
} dense<12.> : vector<16x8xf16>
scf.yield %3 : vector<16x8xf16>
} { layout_result_0 = #xegpu.layout<lane_layout = [16, 1], lane_data = [1, 2]> }
- xegpu.store %loaded, %src[%offset], %1 <{chunk_size=8}> : vector<16x8xf16>, memref<256xf16>, vector<16xindex>, vector<16xi1>
+ xegpu.store %loaded, %src[%offset], %1 <{chunk_size=8}> {layout = #xegpu.layout<lane_layout = [16, 1], lane_data = [1, 2]>} : vector<16x8xf16>, memref<256xf16>, vector<16xindex>, vector<16xi1>
gpu.return
}
}
@@ -175,7 +175,7 @@ gpu.module @xevm_module{
%3 = xegpu.load %src[%offset], %1 <{chunk_size=8}> {
layout = #xegpu.layout<lane_layout = [16, 1], lane_data = [1, 2]>
} : memref<256xf16>, vector<16xindex>, vector<16xi1> -> vector<16x8xf16>
- xegpu.store %3, %src[%offset], %1 <{chunk_size=8}> : vector<16x8xf16>, memref<256xf16>, vector<16xindex>, vector<16xi1>
+ xegpu.store %3, %src[%offset], %1 <{chunk_size=8}> {layout = #xegpu.layout<lane_layout = [16, 1], lane_data = [1, 2]>} : vector<16x8xf16>, memref<256xf16>, vector<16xindex>, vector<16xi1>
}
gpu.return
}
@@ -215,7 +215,7 @@ gpu.module @xevm_module{
: vector<8x16xf16>, vector<16x16xf16> -> vector<8x16xf32>
%7 = xegpu.create_nd_tdesc %arg2 : memref<8x16xf32>
-> !xegpu.tensor_desc<8x16xf32, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>>
- xegpu.store_nd %6, %7[%c0, %c0] : vector<8x16xf32>,
+ xegpu.store_nd %6, %7[%c0, %c0] {layout = #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>} : vector<8x16xf32>,
!xegpu.tensor_desc<8x16xf32, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>>
gpu.return
@@ -267,7 +267,7 @@ gpu.module @xevm_module{
{
layout_result_0 = #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>
}
- xegpu.store_nd %3#0, %arg1[%c0, %c0]
+ xegpu.store_nd %3#0, %arg1[%c0, %c0] {layout = #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>}
: vector<16x16xf32>, !xegpu.tensor_desc<16x16xf32, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>>
gpu.return
}
diff --git a/mlir/test/Dialect/XeGPU/xegpu-wg-to-sg-unify-ops.mlir b/mlir/test/Dialect/XeGPU/xegpu-wg-to-sg-unify-ops.mlir
index 9407f7f2357a2..fe9e3683edf7c 100644
--- a/mlir/test/Dialect/XeGPU/xegpu-wg-to-sg-unify-ops.mlir
+++ b/mlir/test/Dialect/XeGPU/xegpu-wg-to-sg-unify-ops.mlir
@@ -64,7 +64,7 @@ gpu.module @test_distribution {
%load = xegpu.load_nd %tdesc[0, 0] {layout = #xegpu.layout<sg_layout = [8, 4], sg_data = [32, 32], lane_layout = [1, 16], lane_data = [1, 1]>}
: !xegpu.tensor_desc<256x128xf32, #xegpu.layout<sg_layout = [8, 4], sg_data = [32, 32], lane_layout = [1, 16], lane_data = [1, 1]>>
-> vector<256x128xf32>
- xegpu.store_nd %load, %tdesc[0, 0]
+ xegpu.store_nd %load, %tdesc[0, 0] {layout = #xegpu.layout<sg_layout = [8, 4], sg_data = [32, 32], lane_layout = [1, 16], lane_data = [1, 1]>}
: vector<256x128xf32>, !xegpu.tensor_desc<256x128xf32, #xegpu.layout<sg_layout = [8, 4], sg_data = [32, 32], lane_layout = [1, 16], lane_data = [1, 1]>>
gpu.return
}
@@ -966,4 +966,16 @@ gpu.module @test_distribution {
gpu.return
}
+ // CHECK-LABEL: @preserve_anchor_layout
+ // CHECK: arith.constant dense<1.000000e+00> : vector<16x128xf32>
+ // CHECK: xegpu.store_nd %{{.*}}, %{{.*}}[%{{.*}}, %{{.*}}] <{layout = #xegpu.layout<inst_data = [8, 16]>}>
+ gpu.func @preserve_anchor_layout(%dst: memref<256x128xf32>) {
+ %val = arith.constant {layout_result_0 = #xegpu.layout<sg_layout = [16, 1], sg_data = [16, 128]>} dense<1.0> : vector<256x128xf32>
+ %tdesc = xegpu.create_nd_tdesc %dst : memref<256x128xf32>
+ -> !xegpu.tensor_desc<256x128xf32, #xegpu.layout<sg_layout = [16, 1], sg_data = [16, 128], inst_data = [8, 16]>>
+ xegpu.store_nd %val, %tdesc[0, 0] <{layout = #xegpu.layout<sg_layout = [16, 1], sg_data = [16, 128], inst_data = [8, 16]>}>
+ : vector<256x128xf32>, !xegpu.tensor_desc<256x128xf32, #xegpu.layout<sg_layout = [16, 1], sg_data = [16, 128], inst_data = [8, 16]>>
+ gpu.return
+ }
+
}
More information about the Mlir-commits
mailing list