[Mlir-commits] [mlir] [MLIR][XeVM] Add XeVM 1D block operations to OpenCL calls conversion. (PR #161702)

Sang Ik Lee llvmlistbot at llvm.org
Tue Oct 7 11:10:42 PDT 2025


https://github.com/silee2 updated https://github.com/llvm/llvm-project/pull/161702

>From 50ac772a9561462a74ea537df0ef4d2d2a657c29 Mon Sep 17 00:00:00 2001
From: "Lee, Sang Ik" <sang.ik.lee at intel.com>
Date: Tue, 30 Sep 2025 21:06:03 +0000
Subject: [PATCH 1/2] [MLIR][XeVM] Add XeVM 1D block operations to OpenCL calls
 conversion.

---
 mlir/lib/Conversion/XeVMToLLVM/XeVMToLLVM.cpp | 85 ++++++++++++++++++-
 .../Conversion/XeVMToLLVM/xevm-to-llvm.mlir   | 56 ++++++++++++
 2 files changed, 140 insertions(+), 1 deletion(-)

diff --git a/mlir/lib/Conversion/XeVMToLLVM/XeVMToLLVM.cpp b/mlir/lib/Conversion/XeVMToLLVM/XeVMToLLVM.cpp
index 0f90acf0d9c39..f10ca5a80fa04 100644
--- a/mlir/lib/Conversion/XeVMToLLVM/XeVMToLLVM.cpp
+++ b/mlir/lib/Conversion/XeVMToLLVM/XeVMToLLVM.cpp
@@ -216,6 +216,10 @@ static std::optional<LoadCacheControl> getCacheControl(BlockLoad2dOp op) {
   return op.getCacheControl();
 }
 
+static std::optional<LoadCacheControl> getCacheControl(BlockLoadOp op) {
+  return op.getCacheControl();
+}
+
 static std::optional<LoadCacheControl> getCacheControl(BlockPrefetch2dOp op) {
   return op.getCacheControl();
 }
@@ -224,6 +228,10 @@ static std::optional<StoreCacheControl> getCacheControl(BlockStore2dOp op) {
   return op.getCacheControl();
 }
 
+static std::optional<StoreCacheControl> getCacheControl(BlockStoreOp op) {
+  return op.getCacheControl();
+}
+
 static std::optional<LoadCacheControl> getCacheControl(LLVM::LoadOp op) {
   if (op->hasAttr("cache_control")) {
     auto attr = op->getAttrOfType<xevm::LoadCacheControlAttr>("cache_control");
@@ -265,6 +273,7 @@ getCacheControlMetadata(ConversionPatternRewriter &rewriter, OpType op) {
   constexpr bool isLoad = std::is_same_v<OpType, BlockLoad2dOp> ||
                           std::is_same_v<OpType, BlockPrefetch2dOp> ||
                           std::is_same_v<OpType, LLVM::LoadOp> ||
+                          std::is_same_v<OpType, BlockLoadOp> ||
                           std::is_same_v<OpType, PrefetchOp>;
   const int32_t controlKey{isLoad ? loadCacheControlKey : storeCacheControlKey};
   SmallVector<int32_t, decorationCacheControlArity> decorationsL1{
@@ -620,6 +629,77 @@ class LoadStorePrefetchToOCLPattern : public OpConversionPattern<OpType> {
     return success();
   }
 };
+
+template <typename OpType>
+class BlockLoadStore1DToOCLPattern : public OpConversionPattern<OpType> {
+  using OpConversionPattern<OpType>::OpConversionPattern;
+  LogicalResult
+  matchAndRewrite(OpType op, typename OpType::Adaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
+    constexpr bool isStore = std::is_same_v<OpType, xevm::BlockStoreOp>;
+    // Get OpenCL function name
+    // https://registry.khronos.org/OpenCL/extensions/
+    //         intel/cl_intel_subgroup_local_block_io.html
+    std::string funcName{"intel_sub_group_block_"};
+    funcName += isStore ? "write_u" : "read_u";
+    VectorType vecType;
+    if constexpr (isStore)
+      vecType = op.getVal().getType();
+    else
+      vecType = op.getType();
+    Type elemType = vecType.getElementType();
+    funcName += getTypeMangling(elemType);
+    if (vecType.getNumElements() > 1)
+      funcName += std::to_string(vecType.getNumElements());
+    SmallVector<Type, 2> argTypes{};
+    // XeVM BlockLoad/StoreOp always use signless integer types
+    // but OpenCL builtins expect unsigned types
+    // use unsigned types for mangling
+    SmallVector<bool, 2> isUnsigned{};
+    // arg0: pointer to the src/dst address
+    // arg1 - only if store : vector to store
+    // Prepare arguments
+    SmallVector<Value, 2> args{};
+    args.push_back(op.getPtr());
+    argTypes.push_back(op.getPtr().getType());
+    isUnsigned.push_back(true);
+    Type retType;
+    if constexpr (isStore) {
+      args.push_back(op.getVal());
+      argTypes.push_back(op.getVal().getType());
+      isUnsigned.push_back(true);
+      retType = LLVM::LLVMVoidType::get(rewriter.getContext());
+    } else {
+      /*
+      retType = VectorType::get(vecType.getShape(),
+                                rewriter.getIntegerType(elemType.getIntOrFloatBitWidth(),
+                                                        false));
+                                                        */
+      retType = vecType;
+    }
+    funcName = std::string("_Z") + std::to_string(funcName.size()) + funcName +
+               "PU3AS" +
+               std::to_string(op.getPtr().getType().getAddressSpace());
+    funcName += getTypeMangling(elemType, /*isUnsigned=*/true);
+    if constexpr (isStore)
+      funcName += getTypeMangling(vecType, /*isUnsigned=*/true);
+    LLVMFuncAttributeOptions funcAttr{noUnwindWillReturnAttrs};
+
+    LLVM::CallOp call =
+        createDeviceFunctionCall(rewriter, funcName, retType, argTypes, args,
+                                 {}, funcAttr, op.getOperation());
+    if (std::optional<ArrayAttr> optCacheControls =
+            getCacheControlMetadata(rewriter, op)) {
+      call->setAttr(XeVMDialect::getCacheControlsAttrName(), *optCacheControls);
+    }
+    if constexpr (isStore)
+      rewriter.eraseOp(op);
+    else
+      rewriter.replaceOp(op, call->getResult(0));
+    return success();
+  }
+};
+
 template <typename OpType>
 class LLVMLoadStoreToOCLPattern : public OpConversionPattern<OpType> {
   using OpConversionPattern<OpType>::OpConversionPattern;
@@ -695,7 +775,10 @@ void ::mlir::populateXeVMToLLVMConversionPatterns(ConversionTarget &target,
                LoadStorePrefetchToOCLPattern<BlockPrefetch2dOp>,
                MMAToOCLPattern, MemfenceToOCLPattern, PrefetchToOCLPattern,
                LLVMLoadStoreToOCLPattern<LLVM::LoadOp>,
-               LLVMLoadStoreToOCLPattern<LLVM::StoreOp>>(patterns.getContext());
+               LLVMLoadStoreToOCLPattern<LLVM::StoreOp>,
+               BlockLoadStore1DToOCLPattern<BlockLoadOp>,
+               BlockLoadStore1DToOCLPattern<BlockStoreOp>>(
+      patterns.getContext());
 }
 
 void ::mlir::registerConvertXeVMToLLVMInterface(DialectRegistry &registry) {
diff --git a/mlir/test/Conversion/XeVMToLLVM/xevm-to-llvm.mlir b/mlir/test/Conversion/XeVMToLLVM/xevm-to-llvm.mlir
index 8f60a0797652b..c70d9f4032ae2 100644
--- a/mlir/test/Conversion/XeVMToLLVM/xevm-to-llvm.mlir
+++ b/mlir/test/Conversion/XeVMToLLVM/xevm-to-llvm.mlir
@@ -261,3 +261,59 @@ llvm.func @llvm.store(%a: !llvm.ptr<1>, %val: i32) {
   llvm.store %val, %a {cache_control=#xevm.store_cache_control<L1wt_L2uc_L3wb>} : i32, !llvm.ptr<1>
   llvm.return
 }
+
+// -----
+// CHECK-LABEL: llvm.func spir_funccc @_Z30intel_sub_group_block_read_us8PU3AS1t
+// CHECK: llvm.func @blockload_as1(%[[ARG0:.*]]: !llvm.ptr<1>)
+llvm.func @blockload_as1(%ptr: !llvm.ptr<1>) -> vector<8xi16> {
+  // CHECK: %[[VAR0:.*]] = llvm.call spir_funccc @_Z30intel_sub_group_block_read_us8PU3AS1t(%[[ARG0]])
+  // CHECK-SAME: {function_type = !llvm.func<vector<8xi16> (ptr<1>)>, linkage = #llvm.linkage<external>,
+  // CHECK-SAME:  no_unwind, sym_name = "_Z30intel_sub_group_block_read_us8PU3AS1t",
+  // CHECK-SAME:  visibility_ = 0 : i64, will_return, xevm.DecorationCacheControl =
+  // CHECK-SAME:    [6442 : i32, 0 : i32, 1 : i32, 0 : i32],
+  // CHECK-SAME:    [6442 : i32, 1 : i32, 1 : i32, 0 : i32]
+  %loaded_a = xevm.blockload %ptr <{cache_control=#xevm.load_cache_control<L1uc_L2uc_L3uc>}> : (!llvm.ptr<1>) -> vector<8xi16>
+  llvm.return %loaded_a : vector<8xi16>
+}
+
+// -----
+// CHECK-LABEL: llvm.func spir_funccc @_Z31intel_sub_group_block_read_uc16PU3AS3h(!llvm.ptr<3>)
+// CHECK: llvm.func @blockload_as3(%[[ARG0:.*]]: !llvm.ptr<3>)
+llvm.func @blockload_as3(%ptr: !llvm.ptr<3>) -> vector<16xi8> {
+  // CHECK: %[[VAR0:.*]] = llvm.call spir_funccc @_Z31intel_sub_group_block_read_uc16PU3AS3h(%[[ARG0]])
+  // CHECK-SAME: {function_type = !llvm.func<vector<16xi8> (ptr<3>)>, linkage = #llvm.linkage<external>,
+  // CHECK-SAME:   no_unwind, sym_name = "_Z31intel_sub_group_block_read_uc16PU3AS3h", visibility_ = 0 : i64,
+  // CHECK-SAME:   will_return, xevm.DecorationCacheControl =
+  // CHECK-SAME:    [6442 : i32, 0 : i32, 1 : i32, 0 : i32],
+  // CHECK-SAME:    [6442 : i32, 1 : i32, 1 : i32, 0 : i32]
+  %loaded_a = xevm.blockload %ptr <{cache_control=#xevm.load_cache_control<L1uc_L2uc_L3uc>}> : (!llvm.ptr<3>) -> vector<16xi8>
+  llvm.return %loaded_a : vector<16xi8>
+}
+
+// -----
+// CHECK-LABEL: llvm.func spir_funccc @_Z31intel_sub_group_block_write_ui8PU3AS1jDv8_j
+// CHECK: llvm.func @blockstore_as1(%[[ARG0:.*]]: !llvm.ptr<1>, %[[ARG1:.*]]: vector<8xi32>) {
+llvm.func @blockstore_as1(%ptr: !llvm.ptr<1>, %data: vector<8xi32>) {
+  // CHECK: llvm.call spir_funccc @_Z31intel_sub_group_block_write_ui8PU3AS1jDv8_j(%[[ARG0]], %[[ARG1]])
+  // CHECK-SAME: {function_type = !llvm.func<void (ptr<1>, vector<8xi32>)>, linkage = #llvm.linkage<external>,
+  // CHECK-SAME:   no_unwind, sym_name = "_Z31intel_sub_group_block_write_ui8PU3AS1jDv8_j", visibility_ = 0 : i64,
+  // CHECK-SAME:   will_return, xevm.DecorationCacheControl =
+  // CHECK-SAME:    [6443 : i32, 0 : i32, 2 : i32, 0 : i32],
+  // CHECK-SAME:    [6443 : i32, 1 : i32, 2 : i32, 0 : i32]
+  xevm.blockstore %ptr, %data <{cache_control=#xevm.store_cache_control<L1wt_L2uc_L3wb>}> : (!llvm.ptr<1>, vector<8xi32>)
+  llvm.return
+}
+
+// -----
+// CHECK-LABEL: llvm.func spir_funccc @_Z31intel_sub_group_block_write_ul2PU3AS3mDv2_m
+// CHECK: llvm.func @blockstore_as3(%[[ARG0:.*]]: !llvm.ptr<3>, %[[ARG1:.*]]: vector<2xi64>) {
+llvm.func @blockstore_as3(%ptr: !llvm.ptr<3>, %data: vector<2xi64>) {
+  // CHECK: llvm.call spir_funccc @_Z31intel_sub_group_block_write_ul2PU3AS3mDv2_m(%[[ARG0]], %[[ARG1]])
+  // CHECK-SAME: {function_type = !llvm.func<void (ptr<3>, vector<2xi64>)>, linkage = #llvm.linkage<external>,
+  // CHECK-SAME:   no_unwind, sym_name = "_Z31intel_sub_group_block_write_ul2PU3AS3mDv2_m", visibility_ = 0 : i64,
+  // CHECK-SAME:   will_return, xevm.DecorationCacheControl =
+  // CHECK-SAME:    [6443 : i32, 0 : i32, 2 : i32, 0 : i32],
+  // CHECK-SAME:    [6443 : i32, 1 : i32, 2 : i32, 0 : i32]
+  xevm.blockstore %ptr, %data <{cache_control=#xevm.store_cache_control<L1wt_L2uc_L3wb>}> : (!llvm.ptr<3>, vector<2xi64>)
+  llvm.return
+}

>From 986761f736adbd79e25257cd4e5a1394a7d9c5a1 Mon Sep 17 00:00:00 2001
From: "Lee, Sang Ik" <sang.ik.lee at intel.com>
Date: Tue, 7 Oct 2025 18:09:49 +0000
Subject: [PATCH 2/2] Update code in anticipation of vector or scalar value or
 result type.

---
 mlir/lib/Conversion/XeVMToLLVM/XeVMToLLVM.cpp | 32 +++++++++----------
 1 file changed, 16 insertions(+), 16 deletions(-)

diff --git a/mlir/lib/Conversion/XeVMToLLVM/XeVMToLLVM.cpp b/mlir/lib/Conversion/XeVMToLLVM/XeVMToLLVM.cpp
index f10ca5a80fa04..a703cc8015c8c 100644
--- a/mlir/lib/Conversion/XeVMToLLVM/XeVMToLLVM.cpp
+++ b/mlir/lib/Conversion/XeVMToLLVM/XeVMToLLVM.cpp
@@ -641,16 +641,21 @@ class BlockLoadStore1DToOCLPattern : public OpConversionPattern<OpType> {
     // https://registry.khronos.org/OpenCL/extensions/
     //         intel/cl_intel_subgroup_local_block_io.html
     std::string funcName{"intel_sub_group_block_"};
-    funcName += isStore ? "write_u" : "read_u";
-    VectorType vecType;
-    if constexpr (isStore)
-      vecType = op.getVal().getType();
-    else
-      vecType = op.getType();
-    Type elemType = vecType.getElementType();
+    // Value or Result type can be vector or scalar
+    Type valOrResTy;
+    if constexpr (isStore) {
+      funcName += "write_u";
+      valOrResTy = op.getVal().getType();
+    } else {
+      funcName += "read_u";
+      valOrResTy = op.getType();
+    }
+    // Get element type of the vector/scalar
+    VectorType vecTy = dyn_cast<VectorType>(valOrResTy);
+    Type elemType = vecTy ? vecTy.getElementType() : valOrResTy;
     funcName += getTypeMangling(elemType);
-    if (vecType.getNumElements() > 1)
-      funcName += std::to_string(vecType.getNumElements());
+    if (vecTy)
+      funcName += std::to_string(vecTy.getNumElements());
     SmallVector<Type, 2> argTypes{};
     // XeVM BlockLoad/StoreOp always use signless integer types
     // but OpenCL builtins expect unsigned types
@@ -670,19 +675,14 @@ class BlockLoadStore1DToOCLPattern : public OpConversionPattern<OpType> {
       isUnsigned.push_back(true);
       retType = LLVM::LLVMVoidType::get(rewriter.getContext());
     } else {
-      /*
-      retType = VectorType::get(vecType.getShape(),
-                                rewriter.getIntegerType(elemType.getIntOrFloatBitWidth(),
-                                                        false));
-                                                        */
-      retType = vecType;
+      retType = valOrResTy;
     }
     funcName = std::string("_Z") + std::to_string(funcName.size()) + funcName +
                "PU3AS" +
                std::to_string(op.getPtr().getType().getAddressSpace());
     funcName += getTypeMangling(elemType, /*isUnsigned=*/true);
     if constexpr (isStore)
-      funcName += getTypeMangling(vecType, /*isUnsigned=*/true);
+      funcName += getTypeMangling(valOrResTy, /*isUnsigned=*/true);
     LLVMFuncAttributeOptions funcAttr{noUnwindWillReturnAttrs};
 
     LLVM::CallOp call =



More information about the Mlir-commits mailing list