[Mlir-commits] [mlir] [MLIR][XeVM] Add XeVM 1D block operations to OpenCL calls conversion. (PR #161702)
Sang Ik Lee
llvmlistbot at llvm.org
Tue Oct 7 11:10:42 PDT 2025
https://github.com/silee2 updated https://github.com/llvm/llvm-project/pull/161702
>From 50ac772a9561462a74ea537df0ef4d2d2a657c29 Mon Sep 17 00:00:00 2001
From: "Lee, Sang Ik" <sang.ik.lee at intel.com>
Date: Tue, 30 Sep 2025 21:06:03 +0000
Subject: [PATCH 1/2] [MLIR][XeVM] Add XeVM 1D block operations to OpenCL calls
conversion.
---
mlir/lib/Conversion/XeVMToLLVM/XeVMToLLVM.cpp | 85 ++++++++++++++++++-
.../Conversion/XeVMToLLVM/xevm-to-llvm.mlir | 56 ++++++++++++
2 files changed, 140 insertions(+), 1 deletion(-)
diff --git a/mlir/lib/Conversion/XeVMToLLVM/XeVMToLLVM.cpp b/mlir/lib/Conversion/XeVMToLLVM/XeVMToLLVM.cpp
index 0f90acf0d9c39..f10ca5a80fa04 100644
--- a/mlir/lib/Conversion/XeVMToLLVM/XeVMToLLVM.cpp
+++ b/mlir/lib/Conversion/XeVMToLLVM/XeVMToLLVM.cpp
@@ -216,6 +216,10 @@ static std::optional<LoadCacheControl> getCacheControl(BlockLoad2dOp op) {
return op.getCacheControl();
}
+static std::optional<LoadCacheControl> getCacheControl(BlockLoadOp op) {
+ return op.getCacheControl();
+}
+
static std::optional<LoadCacheControl> getCacheControl(BlockPrefetch2dOp op) {
return op.getCacheControl();
}
@@ -224,6 +228,10 @@ static std::optional<StoreCacheControl> getCacheControl(BlockStore2dOp op) {
return op.getCacheControl();
}
+static std::optional<StoreCacheControl> getCacheControl(BlockStoreOp op) {
+ return op.getCacheControl();
+}
+
static std::optional<LoadCacheControl> getCacheControl(LLVM::LoadOp op) {
if (op->hasAttr("cache_control")) {
auto attr = op->getAttrOfType<xevm::LoadCacheControlAttr>("cache_control");
@@ -265,6 +273,7 @@ getCacheControlMetadata(ConversionPatternRewriter &rewriter, OpType op) {
constexpr bool isLoad = std::is_same_v<OpType, BlockLoad2dOp> ||
std::is_same_v<OpType, BlockPrefetch2dOp> ||
std::is_same_v<OpType, LLVM::LoadOp> ||
+ std::is_same_v<OpType, BlockLoadOp> ||
std::is_same_v<OpType, PrefetchOp>;
const int32_t controlKey{isLoad ? loadCacheControlKey : storeCacheControlKey};
SmallVector<int32_t, decorationCacheControlArity> decorationsL1{
@@ -620,6 +629,77 @@ class LoadStorePrefetchToOCLPattern : public OpConversionPattern<OpType> {
return success();
}
};
+
+template <typename OpType>
+class BlockLoadStore1DToOCLPattern : public OpConversionPattern<OpType> {
+ using OpConversionPattern<OpType>::OpConversionPattern;
+ LogicalResult
+ matchAndRewrite(OpType op, typename OpType::Adaptor adaptor,
+ ConversionPatternRewriter &rewriter) const override {
+ constexpr bool isStore = std::is_same_v<OpType, xevm::BlockStoreOp>;
+ // Get OpenCL function name
+ // https://registry.khronos.org/OpenCL/extensions/
+ // intel/cl_intel_subgroup_local_block_io.html
+ std::string funcName{"intel_sub_group_block_"};
+ funcName += isStore ? "write_u" : "read_u";
+ VectorType vecType;
+ if constexpr (isStore)
+ vecType = op.getVal().getType();
+ else
+ vecType = op.getType();
+ Type elemType = vecType.getElementType();
+ funcName += getTypeMangling(elemType);
+ if (vecType.getNumElements() > 1)
+ funcName += std::to_string(vecType.getNumElements());
+ SmallVector<Type, 2> argTypes{};
+ // XeVM BlockLoad/StoreOp always use signless integer types
+ // but OpenCL builtins expect unsigned types
+ // use unsigned types for mangling
+ SmallVector<bool, 2> isUnsigned{};
+ // arg0: pointer to the src/dst address
+ // arg1 - only if store : vector to store
+ // Prepare arguments
+ SmallVector<Value, 2> args{};
+ args.push_back(op.getPtr());
+ argTypes.push_back(op.getPtr().getType());
+ isUnsigned.push_back(true);
+ Type retType;
+ if constexpr (isStore) {
+ args.push_back(op.getVal());
+ argTypes.push_back(op.getVal().getType());
+ isUnsigned.push_back(true);
+ retType = LLVM::LLVMVoidType::get(rewriter.getContext());
+ } else {
+ /*
+ retType = VectorType::get(vecType.getShape(),
+ rewriter.getIntegerType(elemType.getIntOrFloatBitWidth(),
+ false));
+ */
+ retType = vecType;
+ }
+ funcName = std::string("_Z") + std::to_string(funcName.size()) + funcName +
+ "PU3AS" +
+ std::to_string(op.getPtr().getType().getAddressSpace());
+ funcName += getTypeMangling(elemType, /*isUnsigned=*/true);
+ if constexpr (isStore)
+ funcName += getTypeMangling(vecType, /*isUnsigned=*/true);
+ LLVMFuncAttributeOptions funcAttr{noUnwindWillReturnAttrs};
+
+ LLVM::CallOp call =
+ createDeviceFunctionCall(rewriter, funcName, retType, argTypes, args,
+ {}, funcAttr, op.getOperation());
+ if (std::optional<ArrayAttr> optCacheControls =
+ getCacheControlMetadata(rewriter, op)) {
+ call->setAttr(XeVMDialect::getCacheControlsAttrName(), *optCacheControls);
+ }
+ if constexpr (isStore)
+ rewriter.eraseOp(op);
+ else
+ rewriter.replaceOp(op, call->getResult(0));
+ return success();
+ }
+};
+
template <typename OpType>
class LLVMLoadStoreToOCLPattern : public OpConversionPattern<OpType> {
using OpConversionPattern<OpType>::OpConversionPattern;
@@ -695,7 +775,10 @@ void ::mlir::populateXeVMToLLVMConversionPatterns(ConversionTarget &target,
LoadStorePrefetchToOCLPattern<BlockPrefetch2dOp>,
MMAToOCLPattern, MemfenceToOCLPattern, PrefetchToOCLPattern,
LLVMLoadStoreToOCLPattern<LLVM::LoadOp>,
- LLVMLoadStoreToOCLPattern<LLVM::StoreOp>>(patterns.getContext());
+ LLVMLoadStoreToOCLPattern<LLVM::StoreOp>,
+ BlockLoadStore1DToOCLPattern<BlockLoadOp>,
+ BlockLoadStore1DToOCLPattern<BlockStoreOp>>(
+ patterns.getContext());
}
void ::mlir::registerConvertXeVMToLLVMInterface(DialectRegistry ®istry) {
diff --git a/mlir/test/Conversion/XeVMToLLVM/xevm-to-llvm.mlir b/mlir/test/Conversion/XeVMToLLVM/xevm-to-llvm.mlir
index 8f60a0797652b..c70d9f4032ae2 100644
--- a/mlir/test/Conversion/XeVMToLLVM/xevm-to-llvm.mlir
+++ b/mlir/test/Conversion/XeVMToLLVM/xevm-to-llvm.mlir
@@ -261,3 +261,59 @@ llvm.func @llvm.store(%a: !llvm.ptr<1>, %val: i32) {
llvm.store %val, %a {cache_control=#xevm.store_cache_control<L1wt_L2uc_L3wb>} : i32, !llvm.ptr<1>
llvm.return
}
+
+// -----
+// CHECK-LABEL: llvm.func spir_funccc @_Z30intel_sub_group_block_read_us8PU3AS1t
+// CHECK: llvm.func @blockload_as1(%[[ARG0:.*]]: !llvm.ptr<1>)
+llvm.func @blockload_as1(%ptr: !llvm.ptr<1>) -> vector<8xi16> {
+ // CHECK: %[[VAR0:.*]] = llvm.call spir_funccc @_Z30intel_sub_group_block_read_us8PU3AS1t(%[[ARG0]])
+ // CHECK-SAME: {function_type = !llvm.func<vector<8xi16> (ptr<1>)>, linkage = #llvm.linkage<external>,
+ // CHECK-SAME: no_unwind, sym_name = "_Z30intel_sub_group_block_read_us8PU3AS1t",
+ // CHECK-SAME: visibility_ = 0 : i64, will_return, xevm.DecorationCacheControl =
+ // CHECK-SAME: [6442 : i32, 0 : i32, 1 : i32, 0 : i32],
+ // CHECK-SAME: [6442 : i32, 1 : i32, 1 : i32, 0 : i32]
+ %loaded_a = xevm.blockload %ptr <{cache_control=#xevm.load_cache_control<L1uc_L2uc_L3uc>}> : (!llvm.ptr<1>) -> vector<8xi16>
+ llvm.return %loaded_a : vector<8xi16>
+}
+
+// -----
+// CHECK-LABEL: llvm.func spir_funccc @_Z31intel_sub_group_block_read_uc16PU3AS3h(!llvm.ptr<3>)
+// CHECK: llvm.func @blockload_as3(%[[ARG0:.*]]: !llvm.ptr<3>)
+llvm.func @blockload_as3(%ptr: !llvm.ptr<3>) -> vector<16xi8> {
+ // CHECK: %[[VAR0:.*]] = llvm.call spir_funccc @_Z31intel_sub_group_block_read_uc16PU3AS3h(%[[ARG0]])
+ // CHECK-SAME: {function_type = !llvm.func<vector<16xi8> (ptr<3>)>, linkage = #llvm.linkage<external>,
+ // CHECK-SAME: no_unwind, sym_name = "_Z31intel_sub_group_block_read_uc16PU3AS3h", visibility_ = 0 : i64,
+ // CHECK-SAME: will_return, xevm.DecorationCacheControl =
+ // CHECK-SAME: [6442 : i32, 0 : i32, 1 : i32, 0 : i32],
+ // CHECK-SAME: [6442 : i32, 1 : i32, 1 : i32, 0 : i32]
+ %loaded_a = xevm.blockload %ptr <{cache_control=#xevm.load_cache_control<L1uc_L2uc_L3uc>}> : (!llvm.ptr<3>) -> vector<16xi8>
+ llvm.return %loaded_a : vector<16xi8>
+}
+
+// -----
+// CHECK-LABEL: llvm.func spir_funccc @_Z31intel_sub_group_block_write_ui8PU3AS1jDv8_j
+// CHECK: llvm.func @blockstore_as1(%[[ARG0:.*]]: !llvm.ptr<1>, %[[ARG1:.*]]: vector<8xi32>) {
+llvm.func @blockstore_as1(%ptr: !llvm.ptr<1>, %data: vector<8xi32>) {
+ // CHECK: llvm.call spir_funccc @_Z31intel_sub_group_block_write_ui8PU3AS1jDv8_j(%[[ARG0]], %[[ARG1]])
+ // CHECK-SAME: {function_type = !llvm.func<void (ptr<1>, vector<8xi32>)>, linkage = #llvm.linkage<external>,
+ // CHECK-SAME: no_unwind, sym_name = "_Z31intel_sub_group_block_write_ui8PU3AS1jDv8_j", visibility_ = 0 : i64,
+ // CHECK-SAME: will_return, xevm.DecorationCacheControl =
+ // CHECK-SAME: [6443 : i32, 0 : i32, 2 : i32, 0 : i32],
+ // CHECK-SAME: [6443 : i32, 1 : i32, 2 : i32, 0 : i32]
+ xevm.blockstore %ptr, %data <{cache_control=#xevm.store_cache_control<L1wt_L2uc_L3wb>}> : (!llvm.ptr<1>, vector<8xi32>)
+ llvm.return
+}
+
+// -----
+// CHECK-LABEL: llvm.func spir_funccc @_Z31intel_sub_group_block_write_ul2PU3AS3mDv2_m
+// CHECK: llvm.func @blockstore_as3(%[[ARG0:.*]]: !llvm.ptr<3>, %[[ARG1:.*]]: vector<2xi64>) {
+llvm.func @blockstore_as3(%ptr: !llvm.ptr<3>, %data: vector<2xi64>) {
+ // CHECK: llvm.call spir_funccc @_Z31intel_sub_group_block_write_ul2PU3AS3mDv2_m(%[[ARG0]], %[[ARG1]])
+ // CHECK-SAME: {function_type = !llvm.func<void (ptr<3>, vector<2xi64>)>, linkage = #llvm.linkage<external>,
+ // CHECK-SAME: no_unwind, sym_name = "_Z31intel_sub_group_block_write_ul2PU3AS3mDv2_m", visibility_ = 0 : i64,
+ // CHECK-SAME: will_return, xevm.DecorationCacheControl =
+ // CHECK-SAME: [6443 : i32, 0 : i32, 2 : i32, 0 : i32],
+ // CHECK-SAME: [6443 : i32, 1 : i32, 2 : i32, 0 : i32]
+ xevm.blockstore %ptr, %data <{cache_control=#xevm.store_cache_control<L1wt_L2uc_L3wb>}> : (!llvm.ptr<3>, vector<2xi64>)
+ llvm.return
+}
>From 986761f736adbd79e25257cd4e5a1394a7d9c5a1 Mon Sep 17 00:00:00 2001
From: "Lee, Sang Ik" <sang.ik.lee at intel.com>
Date: Tue, 7 Oct 2025 18:09:49 +0000
Subject: [PATCH 2/2] Update code in anticipation of vector or scalar value or
result type.
---
mlir/lib/Conversion/XeVMToLLVM/XeVMToLLVM.cpp | 32 +++++++++----------
1 file changed, 16 insertions(+), 16 deletions(-)
diff --git a/mlir/lib/Conversion/XeVMToLLVM/XeVMToLLVM.cpp b/mlir/lib/Conversion/XeVMToLLVM/XeVMToLLVM.cpp
index f10ca5a80fa04..a703cc8015c8c 100644
--- a/mlir/lib/Conversion/XeVMToLLVM/XeVMToLLVM.cpp
+++ b/mlir/lib/Conversion/XeVMToLLVM/XeVMToLLVM.cpp
@@ -641,16 +641,21 @@ class BlockLoadStore1DToOCLPattern : public OpConversionPattern<OpType> {
// https://registry.khronos.org/OpenCL/extensions/
// intel/cl_intel_subgroup_local_block_io.html
std::string funcName{"intel_sub_group_block_"};
- funcName += isStore ? "write_u" : "read_u";
- VectorType vecType;
- if constexpr (isStore)
- vecType = op.getVal().getType();
- else
- vecType = op.getType();
- Type elemType = vecType.getElementType();
+ // Value or Result type can be vector or scalar
+ Type valOrResTy;
+ if constexpr (isStore) {
+ funcName += "write_u";
+ valOrResTy = op.getVal().getType();
+ } else {
+ funcName += "read_u";
+ valOrResTy = op.getType();
+ }
+ // Get element type of the vector/scalar
+ VectorType vecTy = dyn_cast<VectorType>(valOrResTy);
+ Type elemType = vecTy ? vecTy.getElementType() : valOrResTy;
funcName += getTypeMangling(elemType);
- if (vecType.getNumElements() > 1)
- funcName += std::to_string(vecType.getNumElements());
+ if (vecTy)
+ funcName += std::to_string(vecTy.getNumElements());
SmallVector<Type, 2> argTypes{};
// XeVM BlockLoad/StoreOp always use signless integer types
// but OpenCL builtins expect unsigned types
@@ -670,19 +675,14 @@ class BlockLoadStore1DToOCLPattern : public OpConversionPattern<OpType> {
isUnsigned.push_back(true);
retType = LLVM::LLVMVoidType::get(rewriter.getContext());
} else {
- /*
- retType = VectorType::get(vecType.getShape(),
- rewriter.getIntegerType(elemType.getIntOrFloatBitWidth(),
- false));
- */
- retType = vecType;
+ retType = valOrResTy;
}
funcName = std::string("_Z") + std::to_string(funcName.size()) + funcName +
"PU3AS" +
std::to_string(op.getPtr().getType().getAddressSpace());
funcName += getTypeMangling(elemType, /*isUnsigned=*/true);
if constexpr (isStore)
- funcName += getTypeMangling(vecType, /*isUnsigned=*/true);
+ funcName += getTypeMangling(valOrResTy, /*isUnsigned=*/true);
LLVMFuncAttributeOptions funcAttr{noUnwindWillReturnAttrs};
LLVM::CallOp call =
More information about the Mlir-commits
mailing list