[Mlir-commits] [mlir] [mlir][XeGPU][VectorToXeGPU] Propagate vector layouts to xegpu ops (PR #163071)
Jianhui Li
llvmlistbot at llvm.org
Tue Oct 28 21:43:38 PDT 2025
================
@@ -249,3 +249,188 @@ gpu.func @non_unit_inner_stride_3D(
// CHECK: %[[RES:.+]] = arith.select %[[MASK]], %[[V]], %[[PASS]] : vector<8xi1>, vector<8xf32>
// CHECK: gpu.return %[[RES]] : vector<8xf32>
}
+
+// -----
+
+gpu.module @xevm_module {
+// Layouts are only specified for the gather op itself.
+gpu.func @load_dynamic_layout_operands(%source: memref<?x?xf32>,
+ %off0: index, %off1: index,
+ %indices: vector<8x16xindex>, %mask: vector<8x16xi1>,
+ %pass_thru: vector<8x16xf32>) -> vector<8x16xf32> {
+ %res = vector.gather %source[%off0, %off1][%indices], %mask,
+ %pass_thru {
+ layout_result_0 = #xegpu.layout<sg_layout = [0]>,
+ layout_operand_3 = #xegpu.layout<sg_layout = [1]>,
+ layout_operand_4 = #xegpu.layout<sg_layout = [2]>,
+ layout_operand_5 = #xegpu.layout<sg_layout = [3]>
+ } : memref<?x?xf32>, vector<8x16xindex>, vector<8x16xi1>, vector<8x16xf32> into vector<8x16xf32>
+ gpu.return %res : vector<8x16xf32>
+}
+// CHECK-LABEL: @load_dynamic_layout_operands(
+// CHECK-SAME: %[[SRC:.+]]: memref<?x?xf32>,
+// CHECK-SAME: %[[OFF1:.+]]: index, %[[OFF2:.+]]: index,
+// CHECK-SAME: %[[INDICES:.+]]: vector<8x16xindex>, %[[MASK:.+]]: vector<8x16xi1>, %[[PASS:.+]]: vector<8x16xf32>) -> vector<8x16xf32> {
+// %indices producer doesn't have a layout, so as 'broadcast/add' ops computing linear index.
+// CHECK: %[[SPLAT:.+]] = vector.broadcast {{.*}} : index to vector<8x16xindex>
+// CHECK: %[[LIN_IDX:.+]] = arith.addi %[[SPLAT]], {{.*}} : vector<8x16xindex>
+// CHECK: %[[VEC:.+]] = xegpu.load %[[BASE_I64:.+]]{{\[}}%[[LIN_IDX]]{{\]}}, %[[MASK]]
+// CHECK-SAME: {layout_operand_1 = #xegpu.layout<sg_layout = [1]>, layout_operand_2 = #xegpu.layout<sg_layout = [2]>,
+// CHECK-SAME: layout_result_0 = #xegpu.layout<sg_layout = [0]>}
+// CHECK: %[[RES:.+]] = arith.select {{[^{]*}}
+// CHECK-SAME: {{{[^}]*}}layout_operand_0 = #xegpu.layout<sg_layout = [2]>,
+// CHECK-SAME: {{[^}]*}}layout_operand_2 = #xegpu.layout<sg_layout = [3]>,
+// CHECK-SAME: {{[^}]*}}layout_result_0 = #xegpu.layout<sg_layout = [0]>} : vector<8x16xi1>, vector<8x16xf32>
+}
+
+// -----
+
+gpu.module @xevm_module {
+gpu.func @load_dynamic_layout_mixed(%source: memref<?x?x?xf32>,
+ %off0: index, %off1: index, %off2: index,
+ %mask: vector<8x16xi1>) -> vector<8x16xf32> {
+ %pass_thru = arith.constant {layout_result_0 = #xegpu.layout<sg_layout = [0]>} dense<0.000000e+00> : vector<8x16xf32>
+ %cst_1 = arith.constant {layout_result_0 = #xegpu.layout<sg_layout = [1]>} dense<[[0], [32], [64], [96], [128], [160], [192], [224]]> : vector<8x1xindex>
+ %cst_2 = arith.constant {layout_result_0 = #xegpu.layout<sg_layout = [2]>} dense<[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]> : vector<1x16xindex>
+ %0 = vector.broadcast %cst_1 {layout_result_0 = #xegpu.layout<sg_layout = [3]>} : vector<8x1xindex> to vector<8x16xindex>
+ %1 = vector.broadcast %cst_2 {layout_result_0 = #xegpu.layout<sg_layout = [4]>} : vector<1x16xindex> to vector<8x16xindex>
+ %2 = arith.addi %0, %1 {layout_result_0 = #xegpu.layout<sg_layout = [5]>} : vector<8x16xindex>
+
+ %res = vector.gather %source[%off0, %off1, %off2][%2], %mask,
+ %pass_thru {
+ layout_result_0 = #xegpu.layout<sg_layout = [6]>,
+ layout_operand_5 = #xegpu.layout<sg_layout = [7]>
+ } : memref<?x?x?xf32>, vector<8x16xindex>, vector<8x16xi1>, vector<8x16xf32> into vector<8x16xf32>
+ %res2 = arith.addf %res, %pass_thru : vector<8x16xf32>
+ gpu.return %res2 : vector<8x16xf32>
+}
+// CHECK-LABEL: @load_dynamic_layout_mixed(
+// CHECK-SAME: %[[SRC:.+]]: memref<?x?x?xf32>,
+// CHECK-SAME: %[[OFF1:.+]]: index, %[[OFF2:.+]]: index, %[[OFF3:.+]]: index,
+// CHECK-SAME: %[[MASK:.+]]: vector<8x16xi1>) -> vector<8x16xf32> {
+// CHECK: %[[PASS_THRU:.+]] = arith.constant {layout_result_0 = #xegpu.layout<sg_layout = [0]>} dense<0.000000e+00> : vector<8x16xf32>
+// Verify that linear-indices computation uses layout from the 'indices' producer op (%2).
+// CHECK: %[[SPLAT:.+]] = vector.broadcast {{.*}} {layout_result_0 = #xegpu.layout<sg_layout = [5]>} : index to vector<8x16xindex>
+// CHECK: %[[LIN_IDX:.+]] = arith.addi %[[SPLAT]], {{.*}} {layout_result_0 = #xegpu.layout<sg_layout = [5]>} : vector<8x16xindex>
+// CHECK: %[[VEC:.+]] = xegpu.load %[[BASE_I64:.+]]{{\[}}%[[LIN_IDX]]{{\]}}, %[[MASK]]
+// CHECK-SAME: {{{[^}]*}}layout_operand_2 = #xegpu.layout<sg_layout = [7]>
----------------
Jianhui-Li wrote:
+1
https://github.com/llvm/llvm-project/pull/163071
More information about the Mlir-commits
mailing list