[Mlir-commits] [mlir] [mlir][xegpu] Support boundary checks only for block instructions (PR #119380)

Tue Dec 10 05:46:24 PST 2024

https://github.com/adam-smnk created https://github.com/llvm/llvm-project/pull/119380

Constrains Vector lowering to apply boundary checks only to data transfers operating on block shapes.

This further aligns lowering with the current Xe instructions' restrictions.

>From 701afb3b14bf5f55337956cf94a8f973d92a4c3d Mon Sep 17 00:00:00 2001
From: Adam Siemieniuk <adam.siemieniuk at intel.com>
Date: Tue, 10 Dec 2024 14:32:15 +0100
Subject: [PATCH] [mlir][xegpu] Support boundary checks only for block
 instructions

Constrains Vector lowering to apply boundary checks only to data
transfers operating on block shapes.

This further aligns lowering with the current Xe instructions'
restrictions.
---
 .../VectorToXeGPU/VectorToXeGPU.cpp           | 19 ++++++++++++++-----
 .../VectorToXeGPU/load-to-xegpu.mlir          |  2 +-
 .../VectorToXeGPU/store-to-xegpu.mlir         |  2 +-
 .../VectorToXeGPU/transfer-read-to-xegpu.mlir | 13 +++++++++++++
 .../transfer-write-to-xegpu.mlir              | 13 +++++++++++++
 5 files changed, 42 insertions(+), 7 deletions(-)

diff --git a/mlir/lib/Conversion/VectorToXeGPU/VectorToXeGPU.cpp b/mlir/lib/Conversion/VectorToXeGPU/VectorToXeGPU.cpp
index 215e1b1b874520..1232d8795d4dcf 100644
--- a/mlir/lib/Conversion/VectorToXeGPU/VectorToXeGPU.cpp
+++ b/mlir/lib/Conversion/VectorToXeGPU/VectorToXeGPU.cpp
@@ -82,6 +82,10 @@ static LogicalResult transferPreconditions(PatternRewriter &rewriter,
         xferOp, "Buffer must be contiguous in the innermost dimension");
 
   unsigned vecRank = vecTy.getRank();
+  if (xferOp.hasOutOfBoundsDim() && vecRank < 2)
+    return rewriter.notifyMatchFailure(
+        xferOp, "Boundary check is available only for block instructions.");
+
   AffineMap map = xferOp.getPermutationMap();
   if (!map.isProjectedPermutation(/*allowZeroInResults=*/false))
     return rewriter.notifyMatchFailure(xferOp, "Unsupported permutation map");
@@ -255,9 +259,12 @@ struct LoadLowering : public OpRewritePattern<vector::LoadOp> {
     if (failed(storeLoadPreconditions(rewriter, loadOp, vecTy)))
       return failure();
 
+    // Boundary check is available only for block instructions.
+    bool boundaryCheck = vecTy.getRank() > 1;
+
     auto descType = xegpu::TensorDescType::get(
         vecTy.getShape(), vecTy.getElementType(), /*array_length=*/1,
-        /*boundary_check=*/true, xegpu::MemorySpace::Global);
+        boundaryCheck, xegpu::MemorySpace::Global);
     xegpu::CreateNdDescOp ndDesc = createNdDescriptor(
         rewriter, loc, descType, loadOp.getBase(), loadOp.getIndices());
 
@@ -285,10 +292,12 @@ struct StoreLowering : public OpRewritePattern<vector::StoreOp> {
     if (failed(storeLoadPreconditions(rewriter, storeOp, vecTy)))
       return failure();
 
-    auto descType =
-        xegpu::TensorDescType::get(vecTy.getShape(), vecTy.getElementType(),
-                                   /*array_length=*/1, /*boundary_check=*/true,
-                                   xegpu::MemorySpace::Global);
+    // Boundary check is available only for block instructions.
+    bool boundaryCheck = vecTy.getRank() > 1;
+
+    auto descType = xegpu::TensorDescType::get(
+        vecTy.getShape(), vecTy.getElementType(),
+        /*array_length=*/1, boundaryCheck, xegpu::MemorySpace::Global);
     xegpu::CreateNdDescOp ndDesc = createNdDescriptor(
         rewriter, loc, descType, storeOp.getBase(), storeOp.getIndices());
 
diff --git a/mlir/test/Conversion/VectorToXeGPU/load-to-xegpu.mlir b/mlir/test/Conversion/VectorToXeGPU/load-to-xegpu.mlir
index e2a506f8ad5abd..7cef17df79dd2a 100644
--- a/mlir/test/Conversion/VectorToXeGPU/load-to-xegpu.mlir
+++ b/mlir/test/Conversion/VectorToXeGPU/load-to-xegpu.mlir
@@ -12,7 +12,7 @@ func.func @load_1D_vector(%source: memref<8x16x32xf32>, %offset: index) -> vecto
 // CHECK:       %[[DESC:.+]] = xegpu.create_nd_tdesc
 // CHECK-SAME:    %[[SRC]][%[[OFFSET]], %[[OFFSET]], %[[OFFSET]]]
 // CHECK-SAME:    memref<8x16x32xf32> -> !xegpu.tensor_desc<8xf32,
-// CHECK-SAME:    boundary_check = true
+// CHECK-SAME:    boundary_check = false
 // CHECK:       %[[VEC:.+]] = xegpu.load_nd %[[DESC]]{{.*}}-> vector<8xf32>
 // CHECK:       return %[[VEC]]
 
diff --git a/mlir/test/Conversion/VectorToXeGPU/store-to-xegpu.mlir b/mlir/test/Conversion/VectorToXeGPU/store-to-xegpu.mlir
index 3d45407c2486d6..4f069ebc39db37 100644
--- a/mlir/test/Conversion/VectorToXeGPU/store-to-xegpu.mlir
+++ b/mlir/test/Conversion/VectorToXeGPU/store-to-xegpu.mlir
@@ -14,7 +14,7 @@ func.func @store_1D_vector(%vec: vector<8xf32>,
 // CHECK:       %[[DESC:.+]] = xegpu.create_nd_tdesc
 // CHECK-SAME:    %[[SRC]][%[[OFFSET]], %[[OFFSET]], %[[OFFSET]]]
 // CHECK-SAME:    memref<8x16x32xf32> -> !xegpu.tensor_desc<8xf32,
-// CHECK-SAME:    boundary_check = true
+// CHECK-SAME:    boundary_check = false
 // CHECK:       xegpu.store_nd %[[VEC]], %[[DESC]] : vector<8xf32>
 
 // -----
diff --git a/mlir/test/Conversion/VectorToXeGPU/transfer-read-to-xegpu.mlir b/mlir/test/Conversion/VectorToXeGPU/transfer-read-to-xegpu.mlir
index 4841ecbb62e807..497eb86cea8353 100644
--- a/mlir/test/Conversion/VectorToXeGPU/transfer-read-to-xegpu.mlir
+++ b/mlir/test/Conversion/VectorToXeGPU/transfer-read-to-xegpu.mlir
@@ -119,6 +119,19 @@ func.func @no_load_out_of_bounds_non_zero_pad(%source: memref<32x64xf32>,
 
 // -----
 
+func.func @no_load_out_of_bounds_1D_vector(%source: memref<8x16x32xf32>,
+    %offset: index) -> vector<8xf32> {
+  %c0 = arith.constant 0.0 : f32
+  %0 = vector.transfer_read %source[%offset, %offset, %offset], %c0
+    {in_bounds = [false]} : memref<8x16x32xf32>, vector<8xf32>
+  return %0 : vector<8xf32>
+}
+
+// CHECK-LABEL: @no_load_out_of_bounds_1D_vector(
+// CHECK:       vector.transfer_read
+
+// -----
+
 func.func @no_load_masked(%source : memref<4xf32>,
     %offset : index) -> vector<4xf32> {
   %c0 = arith.constant 0.0 : f32
diff --git a/mlir/test/Conversion/VectorToXeGPU/transfer-write-to-xegpu.mlir b/mlir/test/Conversion/VectorToXeGPU/transfer-write-to-xegpu.mlir
index 076760fe21dc86..91e3fb3841f6eb 100644
--- a/mlir/test/Conversion/VectorToXeGPU/transfer-write-to-xegpu.mlir
+++ b/mlir/test/Conversion/VectorToXeGPU/transfer-write-to-xegpu.mlir
@@ -164,3 +164,16 @@ func.func @no_store_unsupported_map(%vec: vector<8x16xf32>,
 
 // CHECK-LABEL: @no_store_unsupported_map(
 // CHECK:       vector.transfer_write
+
+// -----
+
+func.func @no_store_out_of_bounds_1D_vector(%vec: vector<8xf32>,
+    %source: memref<8x16x32xf32>, %offset: index) {
+  vector.transfer_write %vec, %source[%offset, %offset, %offset]
+    {in_bounds = [false]}
+    : vector<8xf32>, memref<8x16x32xf32>
+  return
+}
+
+// CHECK-LABEL: @no_store_out_of_bounds_1D_vector(
+// CHECK:       vector.transfer_write