[Mlir-commits] [mlir] [mlir][xegpu] Support boundary checks only for block instructions (PR #119380)

Tue Dec 10 05:47:01 PST 2024

llvmbot wrote:




@llvm/pr-subscribers-mlir

Author: Adam Siemieniuk (adam-smnk)

<details>
<summary>Changes</summary>

Constrains Vector lowering to apply boundary checks only to data transfers operating on block shapes.

This further aligns lowering with the current Xe instructions' restrictions.

---
Full diff: https://github.com/llvm/llvm-project/pull/119380.diff


5 Files Affected:

- (modified) mlir/lib/Conversion/VectorToXeGPU/VectorToXeGPU.cpp (+14-5) 
- (modified) mlir/test/Conversion/VectorToXeGPU/load-to-xegpu.mlir (+1-1) 
- (modified) mlir/test/Conversion/VectorToXeGPU/store-to-xegpu.mlir (+1-1) 
- (modified) mlir/test/Conversion/VectorToXeGPU/transfer-read-to-xegpu.mlir (+13) 
- (modified) mlir/test/Conversion/VectorToXeGPU/transfer-write-to-xegpu.mlir (+13) 


``````````diff

diff --git a/mlir/lib/Conversion/VectorToXeGPU/VectorToXeGPU.cpp b/mlir/lib/Conversion/VectorToXeGPU/VectorToXeGPU.cpp
index 215e1b1b874520..1232d8795d4dcf 100644
--- a/mlir/lib/Conversion/VectorToXeGPU/VectorToXeGPU.cpp
+++ b/mlir/lib/Conversion/VectorToXeGPU/VectorToXeGPU.cpp
@@ -82,6 +82,10 @@ static LogicalResult transferPreconditions(PatternRewriter &rewriter,
         xferOp, "Buffer must be contiguous in the innermost dimension");
 
   unsigned vecRank = vecTy.getRank();
+  if (xferOp.hasOutOfBoundsDim() && vecRank < 2)
+    return rewriter.notifyMatchFailure(
+        xferOp, "Boundary check is available only for block instructions.");
+
   AffineMap map = xferOp.getPermutationMap();
   if (!map.isProjectedPermutation(/*allowZeroInResults=*/false))
     return rewriter.notifyMatchFailure(xferOp, "Unsupported permutation map");
@@ -255,9 +259,12 @@ struct LoadLowering : public OpRewritePattern<vector::LoadOp> {
     if (failed(storeLoadPreconditions(rewriter, loadOp, vecTy)))
       return failure();
 
+    // Boundary check is available only for block instructions.
+    bool boundaryCheck = vecTy.getRank() > 1;
+
     auto descType = xegpu::TensorDescType::get(
         vecTy.getShape(), vecTy.getElementType(), /*array_length=*/1,
-        /*boundary_check=*/true, xegpu::MemorySpace::Global);
+        boundaryCheck, xegpu::MemorySpace::Global);
     xegpu::CreateNdDescOp ndDesc = createNdDescriptor(
         rewriter, loc, descType, loadOp.getBase(), loadOp.getIndices());
 
@@ -285,10 +292,12 @@ struct StoreLowering : public OpRewritePattern<vector::StoreOp> {
     if (failed(storeLoadPreconditions(rewriter, storeOp, vecTy)))
       return failure();
 
-    auto descType =
-        xegpu::TensorDescType::get(vecTy.getShape(), vecTy.getElementType(),
-                                   /*array_length=*/1, /*boundary_check=*/true,
-                                   xegpu::MemorySpace::Global);
+    // Boundary check is available only for block instructions.
+    bool boundaryCheck = vecTy.getRank() > 1;
+
+    auto descType = xegpu::TensorDescType::get(
+        vecTy.getShape(), vecTy.getElementType(),
+        /*array_length=*/1, boundaryCheck, xegpu::MemorySpace::Global);
     xegpu::CreateNdDescOp ndDesc = createNdDescriptor(
         rewriter, loc, descType, storeOp.getBase(), storeOp.getIndices());
 
diff --git a/mlir/test/Conversion/VectorToXeGPU/load-to-xegpu.mlir b/mlir/test/Conversion/VectorToXeGPU/load-to-xegpu.mlir
index e2a506f8ad5abd..7cef17df79dd2a 100644
--- a/mlir/test/Conversion/VectorToXeGPU/load-to-xegpu.mlir
+++ b/mlir/test/Conversion/VectorToXeGPU/load-to-xegpu.mlir
@@ -12,7 +12,7 @@ func.func @load_1D_vector(%source: memref<8x16x32xf32>, %offset: index) -> vecto
 // CHECK:       %[[DESC:.+]] = xegpu.create_nd_tdesc
 // CHECK-SAME:    %[[SRC]][%[[OFFSET]], %[[OFFSET]], %[[OFFSET]]]
 // CHECK-SAME:    memref<8x16x32xf32> -> !xegpu.tensor_desc<8xf32,
-// CHECK-SAME:    boundary_check = true
+// CHECK-SAME:    boundary_check = false
 // CHECK:       %[[VEC:.+]] = xegpu.load_nd %[[DESC]]{{.*}}-> vector<8xf32>
 // CHECK:       return %[[VEC]]
 
diff --git a/mlir/test/Conversion/VectorToXeGPU/store-to-xegpu.mlir b/mlir/test/Conversion/VectorToXeGPU/store-to-xegpu.mlir
index 3d45407c2486d6..4f069ebc39db37 100644
--- a/mlir/test/Conversion/VectorToXeGPU/store-to-xegpu.mlir
+++ b/mlir/test/Conversion/VectorToXeGPU/store-to-xegpu.mlir
@@ -14,7 +14,7 @@ func.func @store_1D_vector(%vec: vector<8xf32>,
 // CHECK:       %[[DESC:.+]] = xegpu.create_nd_tdesc
 // CHECK-SAME:    %[[SRC]][%[[OFFSET]], %[[OFFSET]], %[[OFFSET]]]
 // CHECK-SAME:    memref<8x16x32xf32> -> !xegpu.tensor_desc<8xf32,
-// CHECK-SAME:    boundary_check = true
+// CHECK-SAME:    boundary_check = false
 // CHECK:       xegpu.store_nd %[[VEC]], %[[DESC]] : vector<8xf32>
 
 // -----
diff --git a/mlir/test/Conversion/VectorToXeGPU/transfer-read-to-xegpu.mlir b/mlir/test/Conversion/VectorToXeGPU/transfer-read-to-xegpu.mlir
index 4841ecbb62e807..497eb86cea8353 100644
--- a/mlir/test/Conversion/VectorToXeGPU/transfer-read-to-xegpu.mlir
+++ b/mlir/test/Conversion/VectorToXeGPU/transfer-read-to-xegpu.mlir
@@ -119,6 +119,19 @@ func.func @no_load_out_of_bounds_non_zero_pad(%source: memref<32x64xf32>,
 
 // -----
 
+func.func @no_load_out_of_bounds_1D_vector(%source: memref<8x16x32xf32>,
+    %offset: index) -> vector<8xf32> {
+  %c0 = arith.constant 0.0 : f32
+  %0 = vector.transfer_read %source[%offset, %offset, %offset], %c0
+    {in_bounds = [false]} : memref<8x16x32xf32>, vector<8xf32>
+  return %0 : vector<8xf32>
+}
+
+// CHECK-LABEL: @no_load_out_of_bounds_1D_vector(
+// CHECK:       vector.transfer_read
+
+// -----
+
 func.func @no_load_masked(%source : memref<4xf32>,
     %offset : index) -> vector<4xf32> {
   %c0 = arith.constant 0.0 : f32
diff --git a/mlir/test/Conversion/VectorToXeGPU/transfer-write-to-xegpu.mlir b/mlir/test/Conversion/VectorToXeGPU/transfer-write-to-xegpu.mlir
index 076760fe21dc86..91e3fb3841f6eb 100644
--- a/mlir/test/Conversion/VectorToXeGPU/transfer-write-to-xegpu.mlir
+++ b/mlir/test/Conversion/VectorToXeGPU/transfer-write-to-xegpu.mlir
@@ -164,3 +164,16 @@ func.func @no_store_unsupported_map(%vec: vector<8x16xf32>,
 
 // CHECK-LABEL: @no_store_unsupported_map(
 // CHECK:       vector.transfer_write
+
+// -----
+
+func.func @no_store_out_of_bounds_1D_vector(%vec: vector<8xf32>,
+    %source: memref<8x16x32xf32>, %offset: index) {
+  vector.transfer_write %vec, %source[%offset, %offset, %offset]
+    {in_bounds = [false]}
+    : vector<8xf32>, memref<8x16x32xf32>
+  return
+}
+
+// CHECK-LABEL: @no_store_out_of_bounds_1D_vector(
+// CHECK:       vector.transfer_write

``````````

</details>


https://github.com/llvm/llvm-project/pull/119380