[Mlir-commits] [mlir] [MLIR][XeGPU] Pass inst_data for blocking create/constant Mask and Step op (PR #175456)

Sun Jan 11 12:34:58 PST 2026

https://github.com/nbpatel created https://github.com/llvm/llvm-project/pull/175456

None

>From e9d9252c4e76a6f7adb0dac33bb4ce828e35709a Mon Sep 17 00:00:00 2001
From: nbpatel <nishant.b.patel at intel.com>
Date: Fri, 9 Jan 2026 18:31:10 +0000
Subject: [PATCH] Enable blocking for vector ops

---
 .../XeGPU/Transforms/XeGPUBlocking.cpp        |  3 +-
 mlir/test/Dialect/XeGPU/xegpu-blocking.mlir   | 53 +++++++++++++++++++
 2 files changed, 55 insertions(+), 1 deletion(-)

diff --git a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUBlocking.cpp b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUBlocking.cpp
index ba2753f517ce6..f0581208c8a2b 100644
--- a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUBlocking.cpp
+++ b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUBlocking.cpp
@@ -228,7 +228,8 @@ XeGPUBlockingPass::getTileShape(Operation *op) const {
   if (isa<vector::MultiDimReductionOp>(op))
     return getTileShape(op->getOpOperand(0));
 
-  if (isa<vector::TransposeOp, vector::BroadcastOp>(op))
+  if (isa<vector::TransposeOp, vector::BroadcastOp, vector::StepOp,
+          vector::ConstantMaskOp, vector::CreateMaskOp>(op))
     return getTileShape(op->getOpResult(0));
 
   return std::nullopt;
diff --git a/mlir/test/Dialect/XeGPU/xegpu-blocking.mlir b/mlir/test/Dialect/XeGPU/xegpu-blocking.mlir
index 9f9edcd416ddf..2b35b75b8d4ea 100644
--- a/mlir/test/Dialect/XeGPU/xegpu-blocking.mlir
+++ b/mlir/test/Dialect/XeGPU/xegpu-blocking.mlir
@@ -352,6 +352,59 @@ gpu.module @test_kernel   {
   }
 }
 
+// -----
+#l = #xegpu.layout<inst_data = [16]>
+gpu.module @test_kernel {
+  gpu.func @test_vector_constant_mask(%src: ui64, %dst: ui64) {
+    //CHECK: arith.constant dense<true> : vector<16xi1>
+    %mask = vector.constant_mask [32] {layout_result_0 = #l} : vector<32xi1>
+    %cst = arith.constant dense<[
+      0,   8,  16,  24,  32,  40,  48,  56,
+      64,  72,  80,  88,  96, 104, 112, 120,
+      128, 136, 144, 152, 160, 168, 176, 184,
+      192, 200, 208, 216, 224, 232, 240, 248
+    ]> : vector<32xindex>
+    %ld = xegpu.load %src[%cst], %mask {chunk_size = 1, layout = #l, l1_hint = #xegpu.cache_hint<cached>} : ui64, vector<32xindex>, vector<32xi1> -> vector<32xf32>
+    xegpu.store %ld, %dst[%cst], %mask {chunk_size = 1, layout = #l, l1_hint = #xegpu.cache_hint<cached>} : vector<32xf32>, ui64, vector<32xindex>, vector<32xi1>
+    gpu.return
+  }
+}
+
+// -----
+#l = #xegpu.layout<inst_data = [16]>
+gpu.module @test_kernel {
+  gpu.func @test_vector_create_mask(%src: ui64, %dst: ui64) {
+    %c16 = arith.constant 16 : index
+    //CHECK-COUNT-2: vector.create_mask {{.*}} : vector<16xi1>
+    %mask = vector.create_mask %c16 {layout_result_0 = #l} : vector<32xi1>
+    %cst = arith.constant dense<[
+      0,   8,  16,  24,  32,  40,  48,  56,
+      64,  72,  80,  88,  96, 104, 112, 120,
+      128, 136, 144, 152, 160, 168, 176, 184,
+      192, 200, 208, 216, 224, 232, 240, 248
+    ]> : vector<32xindex>
+    %ld = xegpu.load %src[%cst], %mask {chunk_size = 1, layout = #l, l1_hint = #xegpu.cache_hint<cached>} : ui64, vector<32xindex>, vector<32xi1> -> vector<32xf32>
+    xegpu.store %ld, %dst[%cst], %mask {chunk_size = 1, layout = #l, l1_hint = #xegpu.cache_hint<cached>} : vector<32xf32>, ui64, vector<32xindex>, vector<32xi1>
+    gpu.return
+  }
+}
+
+// -----
+#l = #xegpu.layout<inst_data = [16]>
+gpu.module @test_kernel {
+  gpu.func @test_vector_step(%src: ui64, %dst: ui64) {
+    %c16 = arith.constant 16 : index
+    //CHECK: [[cst:%.+]] = arith.constant dense<16> : vector<16xindex>
+    //CHECK: [[step:%.+]] = vector.step : vector<16xindex>
+    //CHECK: arith.addi [[step]], [[cst]] : vector<16xindex>
+    %step = vector.step {layout_result_0 = #l} : vector<32xindex>
+    %mask = vector.create_mask %c16 {layout_result_0 = #l} : vector<32xi1>
+    %ld = xegpu.load %src[%step], %mask {chunk_size = 1, layout = #l, l1_hint = #xegpu.cache_hint<cached>} : ui64, vector<32xindex>, vector<32xi1> -> vector<32xf32>
+    xegpu.store %ld, %dst[%step], %mask {chunk_size = 1, layout = #l, l1_hint = #xegpu.cache_hint<cached>} : vector<32xf32>, ui64, vector<32xindex>, vector<32xi1>
+    gpu.return
+  }
+}
+
 // -----
 
 gpu.module @test_kernel {