[Mlir-commits] [mlir] [MLIR][XeVM] Update cache control values and metadata format. (PR #175274)

Sang Ik Lee llvmlistbot at llvm.org
Mon Feb 9 09:52:41 PST 2026


https://github.com/silee2 updated https://github.com/llvm/llvm-project/pull/175274

>From 0cfca03586eda526976cfd2483fa298a2e3be9ec Mon Sep 17 00:00:00 2001
From: "Lee, Sang Ik" <sang.ik.lee at intel.com>
Date: Fri, 9 Jan 2026 23:42:11 +0000
Subject: [PATCH 1/4] [MLIR][XeVM] Update cache control values and metadata
 format. Cache control metadata is now always attached to getelementptr op.

---
 mlir/lib/Conversion/XeVMToLLVM/XeVMToLLVM.cpp | 132 +++++++++---------
 .../Dialect/XeVM/XeVMToLLVMIRTranslation.cpp  |   2 +-
 .../Conversion/XeVMToLLVM/xevm-to-llvm.mlir   | 121 +++++++++-------
 mlir/test/Target/LLVMIR/xevm.mlir             |  40 ++----
 4 files changed, 147 insertions(+), 148 deletions(-)

diff --git a/mlir/lib/Conversion/XeVMToLLVM/XeVMToLLVM.cpp b/mlir/lib/Conversion/XeVMToLLVM/XeVMToLLVM.cpp
index 20a420dfda65c..13ea9ba26d07c 100644
--- a/mlir/lib/Conversion/XeVMToLLVM/XeVMToLLVM.cpp
+++ b/mlir/lib/Conversion/XeVMToLLVM/XeVMToLLVM.cpp
@@ -99,26 +99,22 @@ std::string mangle(StringRef baseName, ArrayRef<Type> types,
 static int32_t getL1CacheControl(LoadCacheControl cc) {
   int32_t control = 0;
   switch (cc) {
-  case LoadCacheControl::L1UC_L2UC_L3UC:
-  case LoadCacheControl::L1UC_L2UC_L3C:
-  case LoadCacheControl::L1UC_L2C_L3UC:
-  case LoadCacheControl::L1UC_L2C_L3C:
-    control = 1;
-    break;
   case LoadCacheControl::L1C_L2UC_L3UC:
   case LoadCacheControl::L1C_L2UC_L3C:
   case LoadCacheControl::L1C_L2C_L3UC:
   case LoadCacheControl::L1C_L2C_L3C:
-    control = 2;
+    control = 1;
     break;
   case LoadCacheControl::L1S_L2UC_L3UC:
   case LoadCacheControl::L1S_L2UC_L3C:
   case LoadCacheControl::L1S_L2C_L3UC:
   case LoadCacheControl::L1S_L2C_L3C:
-    control = 3;
+    control = 2;
     break;
   case LoadCacheControl::INVALIDATE_READ:
-    control = 4;
+    control = 3;
+    break;
+  default:
     break;
   }
   return control;
@@ -127,16 +123,15 @@ static int32_t getL1CacheControl(LoadCacheControl cc) {
 static int32_t getL1CacheControl(StoreCacheControl cc) {
   int32_t control = 0;
   switch (cc) {
-  case StoreCacheControl::L1UC_L2UC_L3UC:
-  case StoreCacheControl::L1UC_L2UC_L3WB:
-  case StoreCacheControl::L1UC_L2WB_L3UC:
-  case StoreCacheControl::L1UC_L2WB_L3WB:
-    control = 1;
-    break;
   case StoreCacheControl::L1WT_L2UC_L3UC:
   case StoreCacheControl::L1WT_L2UC_L3WB:
   case StoreCacheControl::L1WT_L2WB_L3UC:
   case StoreCacheControl::L1WT_L2WB_L3WB:
+    control = 1;
+    break;
+  case StoreCacheControl::L1WB_L2UC_L3UC:
+  case StoreCacheControl::L1WB_L2WB_L3UC:
+  case StoreCacheControl::L1WB_L2UC_L3WB:
     control = 2;
     break;
   case StoreCacheControl::L1S_L2UC_L3UC:
@@ -145,10 +140,7 @@ static int32_t getL1CacheControl(StoreCacheControl cc) {
   case StoreCacheControl::L1S_L2WB_L3WB:
     control = 3;
     break;
-  case StoreCacheControl::L1WB_L2UC_L3UC:
-  case StoreCacheControl::L1WB_L2WB_L3UC:
-  case StoreCacheControl::L1WB_L2UC_L3WB:
-    control = 4;
+  default:
     break;
   }
   return control;
@@ -157,24 +149,18 @@ static int32_t getL1CacheControl(StoreCacheControl cc) {
 static int32_t getL3CacheControl(LoadCacheControl cc) {
   int32_t control = 0;
   switch (cc) {
-  case LoadCacheControl::L1UC_L2UC_L3UC:
-  case LoadCacheControl::L1UC_L2C_L3UC:
-  case LoadCacheControl::L1C_L2UC_L3UC:
-  case LoadCacheControl::L1C_L2C_L3UC:
-  case LoadCacheControl::L1S_L2UC_L3UC:
-  case LoadCacheControl::L1S_L2C_L3UC:
-    control = 1;
-    break;
   case LoadCacheControl::L1UC_L2UC_L3C:
   case LoadCacheControl::L1UC_L2C_L3C:
   case LoadCacheControl::L1C_L2UC_L3C:
   case LoadCacheControl::L1C_L2C_L3C:
   case LoadCacheControl::L1S_L2UC_L3C:
   case LoadCacheControl::L1S_L2C_L3C:
-    control = 2;
+    control = 1;
     break;
   case LoadCacheControl::INVALIDATE_READ:
-    control = 4;
+    control = 3;
+    break;
+  default:
     break;
   }
   return control;
@@ -183,16 +169,6 @@ static int32_t getL3CacheControl(LoadCacheControl cc) {
 static int32_t getL3CacheControl(StoreCacheControl cc) {
   int32_t control = 0;
   switch (cc) {
-  case StoreCacheControl::L1UC_L2UC_L3UC:
-  case StoreCacheControl::L1UC_L2WB_L3UC:
-  case StoreCacheControl::L1WT_L2UC_L3UC:
-  case StoreCacheControl::L1WT_L2WB_L3UC:
-  case StoreCacheControl::L1S_L2UC_L3UC:
-  case StoreCacheControl::L1S_L2WB_L3UC:
-  case StoreCacheControl::L1WB_L2UC_L3UC:
-  case StoreCacheControl::L1WB_L2WB_L3UC:
-    control = 1;
-    break;
   case StoreCacheControl::L1UC_L2UC_L3WB:
   case StoreCacheControl::L1UC_L2WB_L3WB:
   case StoreCacheControl::L1WT_L2UC_L3WB:
@@ -202,6 +178,8 @@ static int32_t getL3CacheControl(StoreCacheControl cc) {
   case StoreCacheControl::L1WB_L2UC_L3WB:
     control = 2;
     break;
+  default:
+    break;
   }
   return control;
 }
@@ -265,7 +243,7 @@ static std::optional<ArrayAttr>
 getCacheControlMetadata(ConversionPatternRewriter &rewriter, OpType op) {
   if (!getCacheControl(op))
     return {};
-  constexpr int32_t decorationCacheControlArity{4};
+  constexpr int32_t decorationCacheControlArity{3};
   constexpr int32_t loadCacheControlKey{6442};
   constexpr int32_t storeCacheControlKey{6443};
   constexpr bool isLoad = std::is_same_v<OpType, BlockLoad2dOp> ||
@@ -275,9 +253,9 @@ getCacheControlMetadata(ConversionPatternRewriter &rewriter, OpType op) {
                           std::is_same_v<OpType, PrefetchOp>;
   const int32_t controlKey{isLoad ? loadCacheControlKey : storeCacheControlKey};
   SmallVector<int32_t, decorationCacheControlArity> decorationsL1{
-      controlKey, 0, getL1CacheControl<OpType>(op), 0};
+      controlKey, 0, getL1CacheControl<OpType>(op)};
   SmallVector<int32_t, decorationCacheControlArity> decorationsL3{
-      controlKey, 1, getL3CacheControl<OpType>(op), 0};
+      controlKey, 1, getL3CacheControl<OpType>(op)};
   auto arrayAttrL1 = rewriter.getI32ArrayAttr(decorationsL1);
   auto arrayAttrL3 = rewriter.getI32ArrayAttr(decorationsL3);
 
@@ -445,7 +423,16 @@ class PrefetchToOCLPattern : public OpConversionPattern<PrefetchOp> {
     const std::string fnName{"_Z8prefetchPU3AS1Kcm"};
     Value one =
         LLVM::ConstantOp::create(rewriter, loc, rewriter.getI64Type(), 1);
-    SmallVector<Value> args{op.getPtr(), one};
+    Value ptrOp = op.getPtr();
+    // Create getelementptr op to attach cache control metadata
+    // element type doesn't matter here as we use zero index, so use i32
+    LLVM::GEPOp gep = LLVM::GEPOp::create(rewriter, loc, ptrOp.getType(),
+                                          rewriter.getI32Type(), ptrOp,
+                                          ArrayRef<LLVM::GEPArg>{0});
+    if (std::optional<ArrayAttr> optCacheControls =
+            getCacheControlMetadata(rewriter, op))
+      gep->setAttr(XeVMDialect::getCacheControlsAttrName(), *optCacheControls);
+    SmallVector<Value> args{gep, one};
     SmallVector<Type> argTypes;
     for (auto arg : args)
       argTypes.push_back(arg.getType());
@@ -459,12 +446,9 @@ class PrefetchToOCLPattern : public OpConversionPattern<PrefetchOp> {
         /*targetMem1=*/LLVM::ModRefInfo::NoModRef);
     funcAttr.memEffectsAttr = memAttr;
 
-    LLVM::CallOp call = createDeviceFunctionCall(
-        rewriter, fnName, LLVM::LLVMVoidType::get(rewriter.getContext()),
-        argTypes, args, {}, funcAttr, op.getOperation());
-    if (std::optional<ArrayAttr> optCacheControls =
-            getCacheControlMetadata(rewriter, op))
-      call->setAttr(XeVMDialect::getCacheControlsAttrName(), *optCacheControls);
+    createDeviceFunctionCall(rewriter, fnName,
+                             LLVM::LLVMVoidType::get(rewriter.getContext()),
+                             argTypes, args, {}, funcAttr, op.getOperation());
     rewriter.eraseOp(op);
     return success();
   }
@@ -548,7 +532,16 @@ class LoadStorePrefetchToOCLPattern : public OpConversionPattern<OpType> {
         rewriter, loc, VectorType::get(2, i32Type), byteCoord, op.getX(), zero);
     byteCoord = LLVM::InsertElementOp::create(
         rewriter, loc, VectorType::get(2, i32Type), byteCoord, op.getY(), one);
-    SmallVector<Value> args{op.getPtr(), op.getBaseWidth(), op.getBaseHeight(),
+    Value ptrOp = op.getPtr();
+    // Create getelementptr op to attach cache control metadata
+    // element type doesn't matter here as we use zero index, so use i32
+    LLVM::GEPOp gep =
+        LLVM::GEPOp::create(rewriter, loc, ptrOp.getType(), i32Type, ptrOp,
+                            ArrayRef<LLVM::GEPArg>{0});
+    if (std::optional<ArrayAttr> optCacheControls =
+            getCacheControlMetadata(rewriter, op))
+      gep->setAttr(XeVMDialect::getCacheControlsAttrName(), *optCacheControls);
+    SmallVector<Value> args{gep, op.getBaseWidth(), op.getBaseHeight(),
                             op.getBasePitch(), byteCoord};
     SmallVector<Type> retTypes;
     Value spvLoadDstPtr;
@@ -624,10 +617,6 @@ class LoadStorePrefetchToOCLPattern : public OpConversionPattern<OpType> {
     LLVM::CallOp call = createDeviceFunctionCall(
         rewriter, funcName, LLVM::LLVMVoidType::get(rewriter.getContext()),
         argTypes, args, paramAttrs, funcAttr, op.getOperation());
-    if (std::optional<ArrayAttr> optCacheControls =
-            getCacheControlMetadata(rewriter, op)) {
-      call->setAttr(XeVMDialect::getCacheControlsAttrName(), *optCacheControls);
-    }
     if constexpr (isLoad)
       rewriter.replaceOp(
           op, LLVM::LoadOp::create(rewriter, loc, vecType, spvLoadDstPtr));
@@ -672,8 +661,17 @@ class BlockLoadStore1DToOCLPattern : public OpConversionPattern<OpType> {
     // arg1 - only if store : vector to store
     // Prepare arguments
     SmallVector<Value, 2> args{};
-    args.push_back(op.getPtr());
-    argTypes.push_back(op.getPtr().getType());
+    Value ptrOp = op.getPtr();
+    // Create getelementptr op to attach cache control metadata
+    // element type doesn't matter here as we use zero index, so use i32
+    LLVM::GEPOp gep = LLVM::GEPOp::create(
+        rewriter, op.getLoc(), ptrOp.getType(), rewriter.getI32Type(), ptrOp,
+        ArrayRef<LLVM::GEPArg>{0});
+    if (std::optional<ArrayAttr> optCacheControls =
+            getCacheControlMetadata(rewriter, op))
+      gep->setAttr(XeVMDialect::getCacheControlsAttrName(), *optCacheControls);
+    args.push_back(gep);
+    argTypes.push_back(gep.getType());
     isUnsigned.push_back(true);
     Type retType;
     if constexpr (isStore) {
@@ -695,10 +693,6 @@ class BlockLoadStore1DToOCLPattern : public OpConversionPattern<OpType> {
     LLVM::CallOp call =
         createDeviceFunctionCall(rewriter, funcName, retType, argTypes, args,
                                  {}, funcAttr, op.getOperation());
-    if (std::optional<ArrayAttr> optCacheControls =
-            getCacheControlMetadata(rewriter, op)) {
-      call->setAttr(XeVMDialect::getCacheControlsAttrName(), *optCacheControls);
-    }
     if constexpr (isStore)
       rewriter.eraseOp(op);
     else
@@ -715,10 +709,20 @@ class LLVMLoadStoreToOCLPattern : public OpConversionPattern<OpType> {
                   ConversionPatternRewriter &rewriter) const override {
     if (!op->hasAttr("cache_control"))
       return failure();
-    std::optional<ArrayAttr> optCacheControls =
-        getCacheControlMetadata(rewriter, op);
-    op->setAttr(XeVMDialect::getCacheControlsAttrName(), *optCacheControls);
-    op->removeAttr("cache_control");
+    constexpr bool isStore = std::is_same_v<OpType, LLVM::StoreOp>;
+    Value ptrOp = op.getAddr();
+    // Create getelementptr op to attach cache control metadata
+    // element type doesn't matter here as we use zero index, so use i32
+    LLVM::GEPOp gep = LLVM::GEPOp::create(
+        rewriter, op.getLoc(), ptrOp.getType(), rewriter.getI32Type(), ptrOp,
+        ArrayRef<LLVM::GEPArg>{0});
+    if (std::optional<ArrayAttr> optCacheControls =
+            getCacheControlMetadata(rewriter, op))
+      gep->setAttr(XeVMDialect::getCacheControlsAttrName(), *optCacheControls);
+    if constexpr (isStore)
+      rewriter.replaceOpWithNewOp<LLVM::StoreOp>(op, op.getValue(), gep);
+    else
+      rewriter.replaceOpWithNewOp<LLVM::LoadOp>(op, op.getType(), gep);
     return success();
   }
 };
diff --git a/mlir/lib/Target/LLVMIR/Dialect/XeVM/XeVMToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/XeVM/XeVMToLLVMIRTranslation.cpp
index 7e9318ad3c019..ba098aa5fde50 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/XeVM/XeVMToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/XeVM/XeVMToLLVMIRTranslation.cpp
@@ -68,7 +68,7 @@ class XeVMDialectLLVMIRTranslationInterface
         attrs, std::back_inserter(decorations),
         [&ctx, i32Ty](Attribute attr) -> llvm::Metadata * {
           auto valuesArray = dyn_cast<ArrayAttr>(attr).getValue();
-          std::array<llvm::Metadata *, 4> metadata;
+          std::array<llvm::Metadata *, 3> metadata;
           llvm::transform(
               valuesArray, metadata.begin(), [i32Ty](Attribute valueAttr) {
                 return llvm::ConstantAsMetadata::get(llvm::ConstantInt::get(
diff --git a/mlir/test/Conversion/XeVMToLLVM/xevm-to-llvm.mlir b/mlir/test/Conversion/XeVMToLLVM/xevm-to-llvm.mlir
index 7f01526cb0a06..dab735c7df31f 100644
--- a/mlir/test/Conversion/XeVMToLLVM/xevm-to-llvm.mlir
+++ b/mlir/test/Conversion/XeVMToLLVM/xevm-to-llvm.mlir
@@ -15,17 +15,18 @@ llvm.func @blockload2d(%a: !llvm.ptr<1>, %base_width_a: i32, %base_height_a: i32
   // CHECK: %[[VAR2:.*]] = llvm.mlir.constant(1 : i32) : i32
   // CHECK: %[[VAR3:.*]] = llvm.insertelement %[[ARG4]], %[[VAR0]][%[[VAR1]] : i32] : vector<2xi32>
   // CHECK: %[[VAR4:.*]] = llvm.insertelement %[[ARG5]], %[[VAR3]][%[[VAR2]] : i32] : vector<2xi32>
-  // CHECK: %[[VAR5:.*]] = llvm.mlir.constant(8 : i32) : i32
-  // CHECK: %[[VAR6:.*]] = llvm.alloca %[[VAR5]] x i16 : (i32) -> !llvm.ptr
+  // CHECK: %[[VAR5:.*]] = llvm.getelementptr %[[ARG0]][0] : (!llvm.ptr<1>) -> !llvm.ptr<1>, i32
+  // CHECK: %[[VAR6:.*]] = llvm.mlir.constant(8 : i32) : i32
+  // CHECK: %[[VAR7:.*]] = llvm.alloca %[[VAR6]] x i16 : (i32) -> !llvm.ptr
   // CHECK: llvm.call spir_funccc @_Z41intel_sub_group_2d_block_read_16b_8r16x1cPU3AS1viiiDv2_iPt(
-  // CHECK-SAME: %[[ARG0]], %[[ARG1]], %[[ARG2]], %[[ARG3]], %[[VAR4]], %[[VAR6]])
+  // CHECK-SAME: %[[VAR5]], %[[ARG1]], %[[ARG2]], %[[ARG3]], %[[VAR4]], %[[VAR7]])
   // CHECK-SAME: {function_type = !llvm.func<void (ptr<1>, i32, i32, i32, vector<2xi32>, ptr)>,
   // CHECK-SAME:   linkage = #llvm.linkage<external>, no_unwind, sym_name =
   // CHECK-SAME:   "_Z41intel_sub_group_2d_block_read_16b_8r16x1cPU3AS1viiiDv2_iPt", visibility_ = 0 : i64,
   // CHECK-SAME:   will_return} :
   // CHECK-SAME: (!llvm.ptr<1> {llvm.nonnull, llvm.readonly}, i32, i32, i32, vector<2xi32>,
   // CHECK-SAME:  !llvm.ptr {llvm.nonnull, llvm.writeonly}) -> ()
-  // CHECK: %[[VAR7:.*]] = llvm.load %[[VAR6]] : !llvm.ptr -> vector<8xi16>
+  // CHECK: %[[VAR8:.*]] = llvm.load %[[VAR7]] : !llvm.ptr -> vector<8xi16>
   %loaded_a = xevm.blockload2d %a, %base_width_a, %base_height_a, %base_pitch_a, %x, %y
     <{elem_size_in_bits=16 : i32, tile_width=16 : i32, tile_height=8 : i32, v_blocks=1 : i32, transpose=false,
       pack_register=false}> : (!llvm.ptr<1>, i32, i32, i32, i32, i32) -> vector<8xi16>
@@ -36,8 +37,8 @@ llvm.func @blockload2d(%a: !llvm.ptr<1>, %base_width_a: i32, %base_height_a: i32
 // CHECK-LABEL: llvm.func spir_funccc @_Z41intel_sub_group_2d_block_read_16b_8r16x1cPU3AS1viiiDv2_iPt(
 llvm.func @blockload2d_cache_control(%a: !llvm.ptr<1>, %base_width_a: i32, %base_height_a: i32, %base_pitch_a: i32, %x: i32, %y: i32) -> vector<8xi16> {
   // CHECK: xevm.DecorationCacheControl =
-  // CHECK-SAME: 6442 : i32, 0 : i32, 1 : i32, 0 : i32
-  // CHECK-SAME: 6442 : i32, 1 : i32, 1 : i32, 0 : i32
+  // CHECK-SAME: 6442 : i32, 0 : i32, 0 : i32
+  // CHECK-SAME: 6442 : i32, 1 : i32, 0 : i32
   %loaded_a = xevm.blockload2d %a, %base_width_a, %base_height_a, %base_pitch_a, %x, %y
     <{elem_size_in_bits=16 : i32, tile_width=16 : i32, tile_height=8 : i32, v_blocks=1 : i32, transpose=false,
       pack_register=false, cache_control=#xevm.load_cache_control<L1uc_L2uc_L3uc>}> : (!llvm.ptr<1>, i32, i32, i32, i32, i32) -> vector<8xi16>
@@ -56,17 +57,18 @@ llvm.func @blockload2d_v_blocks(%a: !llvm.ptr<1>, %base_width_a: i32, %base_heig
   // CHECK: %[[VAR2:.*]] = llvm.mlir.constant(1 : i32) : i32
   // CHECK: %[[VAR3:.*]] = llvm.insertelement %[[ARG4]], %[[VAR0]][%[[VAR1]] : i32] : vector<2xi32>
   // CHECK: %[[VAR4:.*]] = llvm.insertelement %[[ARG5]], %[[VAR3]][%[[VAR2]] : i32] : vector<2xi32>
-  // CHECK: %[[VAR5:.*]] = llvm.mlir.constant(16 : i32) : i32
-  // CHECK: %[[VAR6:.*]] = llvm.alloca %[[VAR5]] x i16 : (i32) -> !llvm.ptr
+  // CHECK: %[[VAR5:.*]] = llvm.getelementptr %[[ARG0]][0] : (!llvm.ptr<1>) -> !llvm.ptr<1>, i32
+  // CHECK: %[[VAR6:.*]] = llvm.mlir.constant(16 : i32) : i32
+  // CHECK: %[[VAR7:.*]] = llvm.alloca %[[VAR6]] x i16 : (i32) -> !llvm.ptr
   // CHECK: llvm.call spir_funccc @_Z41intel_sub_group_2d_block_read_16b_8r16x2cPU3AS1viiiDv2_iPt(
-  // CHECK-SAME: %[[ARG0]], %[[ARG1]], %[[ARG2]], %[[ARG3]], %[[VAR4]], %[[VAR6]])
+  // CHECK-SAME: %[[VAR5]], %[[ARG1]], %[[ARG2]], %[[ARG3]], %[[VAR4]], %[[VAR7]])
   // CHECK-SAME: {function_type = !llvm.func<void (ptr<1>, i32, i32, i32, vector<2xi32>, ptr)>,
   // CHECK-SAME:   linkage = #llvm.linkage<external>, no_unwind, sym_name =
   // CHECK-SAME:   "_Z41intel_sub_group_2d_block_read_16b_8r16x2cPU3AS1viiiDv2_iPt", visibility_ = 0 : i64,
   // CHECK-SAME:   will_return}
   // CHECK-SAME: (!llvm.ptr<1> {llvm.nonnull, llvm.readonly}, i32, i32, i32, vector<2xi32>,
   // CHECK-SAME:  !llvm.ptr {llvm.nonnull, llvm.writeonly}) -> ()
-  // CHECK: %[[VAR7:.*]] = llvm.load %[[VAR6]] : !llvm.ptr -> vector<16xi16>
+  // CHECK: %[[VAR8:.*]] = llvm.load %[[VAR7]] : !llvm.ptr -> vector<16xi16>
   %loaded_a = xevm.blockload2d %a, %base_width_a, %base_height_a, %base_pitch_a, %x, %y
     <{elem_size_in_bits=16 : i32, tile_width=16 : i32, tile_height=8 : i32, v_blocks=2 : i32, transpose=false,
       pack_register=false}> : (!llvm.ptr<1>, i32, i32, i32, i32, i32) -> vector<16xi16>
@@ -85,17 +87,18 @@ llvm.func @blockload2d_pack_register(%a: !llvm.ptr<1>, %base_width_a: i32, %base
   // CHECK: %[[VAR2:.*]] = llvm.mlir.constant(1 : i32) : i32
   // CHECK: %[[VAR3:.*]] = llvm.insertelement %[[ARG4]], %[[VAR0]][%[[VAR1]] : i32] : vector<2xi32>
   // CHECK: %[[VAR4:.*]] = llvm.insertelement %[[ARG5]], %[[VAR3]][%[[VAR2]] : i32] : vector<2xi32>
-  // CHECK: %[[VAR5:.*]] = llvm.mlir.constant(8 : i32) : i32
-  // CHECK: %[[VAR6:.*]] = llvm.alloca %[[VAR5]] x i32 : (i32) -> !llvm.ptr
+  // CHECK: %[[VAR5:.*]] = llvm.getelementptr %[[ARG0]][0] : (!llvm.ptr<1>) -> !llvm.ptr<1>, i32
+  // CHECK: %[[VAR6:.*]] = llvm.mlir.constant(8 : i32) : i32
+  // CHECK: %[[VAR7:.*]] = llvm.alloca %[[VAR6]] x i32 : (i32) -> !llvm.ptr
   // CHECK: llvm.call spir_funccc @_Z52intel_sub_group_2d_block_read_transform_16b_16r16x1cPU3AS1viiiDv2_iPj(
-  // CHECK-SAME: %[[ARG0]], %[[ARG1]], %[[ARG2]], %[[ARG3]], %[[VAR4]], %[[VAR6]])
+  // CHECK-SAME: %[[VAR5]], %[[ARG1]], %[[ARG2]], %[[ARG3]], %[[VAR4]], %[[VAR7]])
   // CHECK-SAME: {function_type = !llvm.func<void (ptr<1>, i32, i32, i32, vector<2xi32>, ptr)>,
   // CHECK-SAME:   linkage = #llvm.linkage<external>, no_unwind, sym_name =
   // CHECK-SAME:   "_Z52intel_sub_group_2d_block_read_transform_16b_16r16x1cPU3AS1viiiDv2_iPj", visibility_ = 0 : i64,
   // CHECK-SAME:   will_return} :
   // CHECK-SAME: (!llvm.ptr<1> {llvm.nonnull, llvm.readonly}, i32, i32, i32, vector<2xi32>,
   // CHECK-SAME:  !llvm.ptr {llvm.nonnull, llvm.writeonly}) -> ()
-  // CHECK: %[[VAR7:.*]] = llvm.load %[[VAR6]] : !llvm.ptr -> vector<8xi32>
+  // CHECK: %[[VAR8:.*]] = llvm.load %[[VAR7]] : !llvm.ptr -> vector<8xi32>
   %loaded_a = xevm.blockload2d %a, %base_width_a, %base_height_a, %base_pitch_a, %x, %y
     <{elem_size_in_bits=16 : i32, tile_width=16 : i32, tile_height=16 : i32, v_blocks=1 : i32, transpose=false,
       pack_register=true}> : (!llvm.ptr<1>, i32, i32, i32, i32, i32) -> vector<8xi32>
@@ -114,17 +117,18 @@ llvm.func @blockload2d_transpose(%a: !llvm.ptr<1>, %base_width_a: i32, %base_hei
   // CHECK: %[[VAR2:.*]] = llvm.mlir.constant(1 : i32) : i32
   // CHECK: %[[VAR3:.*]] = llvm.insertelement %[[ARG4]], %[[VAR0]][%[[VAR1]] : i32] : vector<2xi32>
   // CHECK: %[[VAR4:.*]] = llvm.insertelement %[[ARG5]], %[[VAR3]][%[[VAR2]] : i32] : vector<2xi32>
-  // CHECK: %[[VAR5:.*]] = llvm.mlir.constant(8 : i32) : i32
-  // CHECK: %[[VAR6:.*]] = llvm.alloca %[[VAR5]] x i32 : (i32) -> !llvm.ptr
+  // CHECK: %[[VAR5:.*]] = llvm.getelementptr %[[ARG0]][0] : (!llvm.ptr<1>) -> !llvm.ptr<1>, i32
+  // CHECK: %[[VAR6:.*]] = llvm.mlir.constant(8 : i32) : i32
+  // CHECK: %[[VAR7:.*]] = llvm.alloca %[[VAR6]] x i32 : (i32) -> !llvm.ptr
   // CHECK: llvm.call spir_funccc @_Z51intel_sub_group_2d_block_read_transpose_32b_16r8x1cPU3AS1viiiDv2_iPj(
-  // CHECK-SAME: %[[ARG0]], %[[ARG1]], %[[ARG2]], %[[ARG3]], %[[VAR4]], %[[VAR6]])
+  // CHECK-SAME: %[[VAR5]], %[[ARG1]], %[[ARG2]], %[[ARG3]], %[[VAR4]], %[[VAR7]])
   // CHECK-SAME: {function_type = !llvm.func<void (ptr<1>, i32, i32, i32, vector<2xi32>, ptr)>,
   // CHECK-SAME:   linkage = #llvm.linkage<external>, no_unwind, sym_name =
   // CHECK-SAME:   "_Z51intel_sub_group_2d_block_read_transpose_32b_16r8x1cPU3AS1viiiDv2_iPj", visibility_ = 0 : i64,
   // CHECK-SAME:   will_return}
   // CHECK-SAME: (!llvm.ptr<1> {llvm.nonnull, llvm.readonly}, i32, i32, i32, vector<2xi32>,
   // CHECK-SAME:  !llvm.ptr {llvm.nonnull, llvm.writeonly}) -> ()
-  // CHECK: %[[VAR7:.*]] = llvm.load %[[VAR6]] : !llvm.ptr -> vector<8xi32>
+  // CHECK: %[[VAR8:.*]] = llvm.load %[[VAR7]] : !llvm.ptr -> vector<8xi32>
   %loaded_a = xevm.blockload2d %a, %base_width_a, %base_height_a, %base_pitch_a, %x, %y
     <{elem_size_in_bits=32 : i32, tile_width=8 : i32, tile_height=16 : i32, v_blocks=1 : i32, transpose=true,
       pack_register=false}> : (!llvm.ptr<1>, i32, i32, i32, i32, i32) -> vector<8xi32>
@@ -143,11 +147,12 @@ llvm.func @blockstore2d(%c: !llvm.ptr<1>, %base_width_c: i32, %base_height_c: i3
   // CHECK: %[[VAR2:.*]] = llvm.mlir.constant(1 : i32) : i32
   // CHECK: %[[VAR3:.*]] = llvm.insertelement %[[ARG4]], %[[VAR0]][%[[VAR1]] : i32] : vector<2xi32>
   // CHECK: %[[VAR4:.*]] = llvm.insertelement %[[ARG5]], %[[VAR3]][%[[VAR2]] : i32] : vector<2xi32>
-  // CHECK: %[[VAR5:.*]] = llvm.mlir.constant(8 : i32) : i32
-  // CHECK: %[[VAR6:.*]] = llvm.alloca %[[VAR5]] x i32 : (i32) -> !llvm.ptr
-  // CHECK: llvm.store %[[ARG6]], %[[VAR6]] : vector<8xi32>, !llvm.ptr
+  // CHECK: %[[VAR5:.*]] = llvm.getelementptr %[[ARG0]][0] : (!llvm.ptr<1>) -> !llvm.ptr<1>, i32
+  // CHECK: %[[VAR6:.*]] = llvm.mlir.constant(8 : i32) : i32
+  // CHECK: %[[VAR7:.*]] = llvm.alloca %[[VAR6]] x i32 : (i32) -> !llvm.ptr
+  // CHECK: llvm.store %[[ARG6]], %[[VAR7]] : vector<8xi32>, !llvm.ptr
   // CHECK: llvm.call spir_funccc @_Z42intel_sub_group_2d_block_write_32b_8r16x1cPU3AS1viiiDv2_iPj(
-  // CHECK-SAME: %[[ARG0]], %[[ARG1]], %[[ARG2]], %[[ARG3]], %[[VAR4]], %[[VAR6]])
+  // CHECK-SAME: %[[VAR5]], %[[ARG1]], %[[ARG2]], %[[ARG3]], %[[VAR4]], %[[VAR7]])
   // CHECK-SAME: {function_type = !llvm.func<void (ptr<1>, i32, i32, i32, vector<2xi32>, ptr)>,
   // CHECK-SAME:   linkage = #llvm.linkage<external>, no_unwind, sym_name =
   // CHECK-SAME:   "_Z42intel_sub_group_2d_block_write_32b_8r16x1cPU3AS1viiiDv2_iPj", visibility_ = 0 : i64,
@@ -164,8 +169,8 @@ llvm.func @blockstore2d(%c: !llvm.ptr<1>, %base_width_c: i32, %base_height_c: i3
 // CHECK-LABEL: llvm.func spir_funccc @_Z42intel_sub_group_2d_block_write_32b_8r16x1cPU3AS1viiiDv2_iPj(
 llvm.func @blockstore2d_cache_control(%c: !llvm.ptr<1>, %base_width_c: i32, %base_height_c: i32, %base_pitch_c: i32, %x: i32, %y: i32, %c_result_casted: vector<8xi32>) {
   // CHECK: xevm.DecorationCacheControl =
-  // CHECK-SAME: 6443 : i32, 0 : i32, 2 : i32, 0 : i32
-  // CHECK-SAME: 6443 : i32, 1 : i32, 2 : i32, 0 : i32
+  // CHECK-SAME: 6443 : i32, 0 : i32, 1 : i32
+  // CHECK-SAME: 6443 : i32, 1 : i32, 2 : i32
   xevm.blockstore2d %c, %base_width_c, %base_height_c, %base_pitch_c, %x, %y, %c_result_casted
     <{elem_size_in_bits=32 : i32, tile_width=16 : i32, tile_height=8 : i32, cache_control = #xevm.store_cache_control<L1wt_L2uc_L3wb>}>
     : (!llvm.ptr<1>, i32, i32, i32, i32, i32, vector<8xi32>)
@@ -184,8 +189,9 @@ llvm.func @blockprefetch2d(%ptr: !llvm.ptr<1>, %base_width: i32, %base_height: i
   // CHECK: %[[VAR2:.*]] = llvm.mlir.constant(1 : i32) : i32
   // CHECK: %[[VAR3:.*]] = llvm.insertelement %[[ARG4]], %[[VAR0]][%[[VAR1]] : i32] : vector<2xi32>
   // CHECK: %[[VAR4:.*]] = llvm.insertelement %[[ARG5]], %[[VAR3]][%[[VAR2]] : i32] : vector<2xi32>
+  // CHECK: %[[VAR5:.*]] = llvm.getelementptr %[[ARG0]][0]
   // CHECK: llvm.call spir_funccc @_Z44intel_sub_group_2d_block_prefetch_8b_8r32x1cPU3AS1viiiDv2_i(
-  // CHECK-SAME: %[[ARG0]], %[[ARG1]], %[[ARG2]], %[[ARG3]], %[[VAR4]])
+  // CHECK-SAME: %[[VAR5]], %[[ARG1]], %[[ARG2]], %[[ARG3]], %[[VAR4]])
   // CHECK-SAME: {function_type = !llvm.func<void (ptr<1>, i32, i32, i32, vector<2xi32>)>, linkage = #llvm.linkage<external>,
   // CHECK-SAME:   memory_effects = #llvm.memory_effects<other = none, argMem = read, inaccessibleMem = none, errnoMem = none, targetMem0 = none, targetMem1 = none>, no_unwind,
   // CHECK-SAME:   sym_name = "_Z44intel_sub_group_2d_block_prefetch_8b_8r32x1cPU3AS1viiiDv2_i", visibility_ = 0 : i64
@@ -234,7 +240,8 @@ llvm.func @memfence() {
 // CHECK: llvm.func @prefetch(%[[ARG0:.*]]: !llvm.ptr<1>) {
 llvm.func @prefetch(%ptr: !llvm.ptr<1>) {
   // CHECK: %[[VAR0:.*]] = llvm.mlir.constant(1 : i64) : i64
-  // CHECK: llvm.call spir_funccc @_Z8prefetchPU3AS1Kcm(%[[ARG0]], %[[VAR0]])
+  // CHECK: %[[VAR1:.*]] = llvm.getelementptr %[[ARG0]][0]
+  // CHECK: llvm.call spir_funccc @_Z8prefetchPU3AS1Kcm(%[[VAR1]], %[[VAR0]])
   // CHECK-SAME: {function_type = !llvm.func<void (ptr<1>, i64)>, linkage = #llvm.linkage<external>,
   // CHECK-SAME:   memory_effects = #llvm.memory_effects<other = none, argMem = read, inaccessibleMem = none, errnoMem = none, targetMem0 = none, targetMem1 = none>,
   // CHECK-SAME:   no_unwind, sym_name = "_Z8prefetchPU3AS1Kcm", visibility_ = 0 : i64
@@ -246,8 +253,8 @@ llvm.func @prefetch(%ptr: !llvm.ptr<1>) {
 // CHECK-LABEL: llvm.func @llvm.load
 llvm.func @llvm.load(%a: !llvm.ptr<1>) -> i32 {
   // CHECK: xevm.DecorationCacheControl =
-  // CHECK-SAME: 6442 : i32, 0 : i32, 1 : i32, 0 : i32
-  // CHECK-SAME: 6442 : i32, 1 : i32, 1 : i32, 0 : i32
+  // CHECK-SAME: 6442 : i32, 0 : i32, 0 : i32
+  // CHECK-SAME: 6442 : i32, 1 : i32, 0 : i32
   %val = llvm.load %a {cache_control=#xevm.load_cache_control<L1uc_L2uc_L3uc>} : !llvm.ptr<1> -> i32
   llvm.return %val : i32
 }
@@ -256,8 +263,8 @@ llvm.func @llvm.load(%a: !llvm.ptr<1>) -> i32 {
 // CHECK-LABEL: llvm.func @llvm.store
 llvm.func @llvm.store(%a: !llvm.ptr<1>, %val: i32) {
   // CHECK: xevm.DecorationCacheControl =
-  // CHECK-SAME: 6443 : i32, 0 : i32, 2 : i32, 0 : i32
-  // CHECK-SAME: 6443 : i32, 1 : i32, 2 : i32, 0 : i32
+  // CHECK-SAME: 6443 : i32, 0 : i32, 1 : i32
+  // CHECK-SAME: 6443 : i32, 1 : i32, 2 : i32
   llvm.store %val, %a {cache_control=#xevm.store_cache_control<L1wt_L2uc_L3wb>} : i32, !llvm.ptr<1>
   llvm.return
 }
@@ -266,12 +273,13 @@ llvm.func @llvm.store(%a: !llvm.ptr<1>, %val: i32) {
 // CHECK-LABEL: llvm.func spir_funccc @_Z30intel_sub_group_block_read_us8PU3AS1t
 // CHECK: llvm.func @blockload_as1(%[[ARG0:.*]]: !llvm.ptr<1>)
 llvm.func @blockload_as1(%ptr: !llvm.ptr<1>) -> vector<8xi16> {
-  // CHECK: %[[VAR0:.*]] = llvm.call spir_funccc @_Z30intel_sub_group_block_read_us8PU3AS1t(%[[ARG0]])
+  // CHECK:  xevm.DecorationCacheControl =
+  // CHECK-SAME:    [6442 : i32, 0 : i32, 0 : i32],
+  // CHECK-SAME:    [6442 : i32, 1 : i32, 0 : i32]
+  // CHECK: %[[VAR0:.*]] = llvm.call spir_funccc @_Z30intel_sub_group_block_read_us8PU3AS1t
   // CHECK-SAME: {function_type = !llvm.func<vector<8xi16> (ptr<1>)>, linkage = #llvm.linkage<external>,
   // CHECK-SAME:  no_unwind, sym_name = "_Z30intel_sub_group_block_read_us8PU3AS1t",
-  // CHECK-SAME:  visibility_ = 0 : i64, will_return, xevm.DecorationCacheControl =
-  // CHECK-SAME:    [6442 : i32, 0 : i32, 1 : i32, 0 : i32],
-  // CHECK-SAME:    [6442 : i32, 1 : i32, 1 : i32, 0 : i32]
+  // CHECK-SAME:  visibility_ = 0 : i64, will_return
   %loaded_a = xevm.blockload %ptr <{cache_control=#xevm.load_cache_control<L1uc_L2uc_L3uc>}> : (!llvm.ptr<1>) -> vector<8xi16>
   llvm.return %loaded_a : vector<8xi16>
 }
@@ -280,12 +288,13 @@ llvm.func @blockload_as1(%ptr: !llvm.ptr<1>) -> vector<8xi16> {
 // CHECK-LABEL: llvm.func spir_funccc @_Z31intel_sub_group_block_read_uc16PU3AS3h(!llvm.ptr<3>)
 // CHECK: llvm.func @blockload_as3(%[[ARG0:.*]]: !llvm.ptr<3>)
 llvm.func @blockload_as3(%ptr: !llvm.ptr<3>) -> vector<16xi8> {
-  // CHECK: %[[VAR0:.*]] = llvm.call spir_funccc @_Z31intel_sub_group_block_read_uc16PU3AS3h(%[[ARG0]])
+  // CHECK:   xevm.DecorationCacheControl =
+  // CHECK-SAME:    [6442 : i32, 0 : i32, 0 : i32],
+  // CHECK-SAME:    [6442 : i32, 1 : i32, 0 : i32]
+  // CHECK: %[[VAR0:.*]] = llvm.call spir_funccc @_Z31intel_sub_group_block_read_uc16PU3AS3h
   // CHECK-SAME: {function_type = !llvm.func<vector<16xi8> (ptr<3>)>, linkage = #llvm.linkage<external>,
   // CHECK-SAME:   no_unwind, sym_name = "_Z31intel_sub_group_block_read_uc16PU3AS3h", visibility_ = 0 : i64,
-  // CHECK-SAME:   will_return, xevm.DecorationCacheControl =
-  // CHECK-SAME:    [6442 : i32, 0 : i32, 1 : i32, 0 : i32],
-  // CHECK-SAME:    [6442 : i32, 1 : i32, 1 : i32, 0 : i32]
+  // CHECK-SAME:   will_return
   %loaded_a = xevm.blockload %ptr <{cache_control=#xevm.load_cache_control<L1uc_L2uc_L3uc>}> : (!llvm.ptr<3>) -> vector<16xi8>
   llvm.return %loaded_a : vector<16xi8>
 }
@@ -294,12 +303,13 @@ llvm.func @blockload_as3(%ptr: !llvm.ptr<3>) -> vector<16xi8> {
 // CHECK-LABEL: llvm.func spir_funccc @_Z29intel_sub_group_block_read_ucPU3AS3h(!llvm.ptr<3>)
 // CHECK: llvm.func @blockload_scalar(%[[ARG0:.*]]: !llvm.ptr<3>)
 llvm.func @blockload_scalar(%ptr: !llvm.ptr<3>) -> i8 {
-  // CHECK: %[[VAR0:.*]] = llvm.call spir_funccc @_Z29intel_sub_group_block_read_ucPU3AS3h(%[[ARG0]])
+  // CHECK:   xevm.DecorationCacheControl =
+  // CHECK-SAME:    [6442 : i32, 0 : i32, 0 : i32],
+  // CHECK-SAME:    [6442 : i32, 1 : i32, 0 : i32]
+  // CHECK: %[[VAR0:.*]] = llvm.call spir_funccc @_Z29intel_sub_group_block_read_ucPU3AS3h
   // CHECK-SAME: {function_type = !llvm.func<i8 (ptr<3>)>, linkage = #llvm.linkage<external>,
   // CHECK-SAME:   no_unwind, sym_name = "_Z29intel_sub_group_block_read_ucPU3AS3h", visibility_ = 0 : i64,
-  // CHECK-SAME:   will_return, xevm.DecorationCacheControl =
-  // CHECK-SAME:    [6442 : i32, 0 : i32, 1 : i32, 0 : i32],
-  // CHECK-SAME:    [6442 : i32, 1 : i32, 1 : i32, 0 : i32]
+  // CHECK-SAME:   will_return
   %loaded_a = xevm.blockload %ptr <{cache_control=#xevm.load_cache_control<L1uc_L2uc_L3uc>}> : (!llvm.ptr<3>) -> i8
   llvm.return %loaded_a : i8
 }
@@ -308,12 +318,13 @@ llvm.func @blockload_scalar(%ptr: !llvm.ptr<3>) -> i8 {
 // CHECK-LABEL: llvm.func spir_funccc @_Z31intel_sub_group_block_write_ui8PU3AS1jDv8_j
 // CHECK: llvm.func @blockstore_as1(%[[ARG0:.*]]: !llvm.ptr<1>, %[[ARG1:.*]]: vector<8xi32>) {
 llvm.func @blockstore_as1(%ptr: !llvm.ptr<1>, %data: vector<8xi32>) {
-  // CHECK: llvm.call spir_funccc @_Z31intel_sub_group_block_write_ui8PU3AS1jDv8_j(%[[ARG0]], %[[ARG1]])
+  // CHECK:   xevm.DecorationCacheControl =
+  // CHECK-SAME:    [6443 : i32, 0 : i32, 1 : i32],
+  // CHECK-SAME:    [6443 : i32, 1 : i32, 2 : i32]
+  // CHECK: llvm.call spir_funccc @_Z31intel_sub_group_block_write_ui8PU3AS1jDv8_j
   // CHECK-SAME: {function_type = !llvm.func<void (ptr<1>, vector<8xi32>)>, linkage = #llvm.linkage<external>,
   // CHECK-SAME:   no_unwind, sym_name = "_Z31intel_sub_group_block_write_ui8PU3AS1jDv8_j", visibility_ = 0 : i64,
-  // CHECK-SAME:   will_return, xevm.DecorationCacheControl =
-  // CHECK-SAME:    [6443 : i32, 0 : i32, 2 : i32, 0 : i32],
-  // CHECK-SAME:    [6443 : i32, 1 : i32, 2 : i32, 0 : i32]
+  // CHECK-SAME:   will_return
   xevm.blockstore %ptr, %data <{cache_control=#xevm.store_cache_control<L1wt_L2uc_L3wb>}> : (!llvm.ptr<1>, vector<8xi32>)
   llvm.return
 }
@@ -322,12 +333,13 @@ llvm.func @blockstore_as1(%ptr: !llvm.ptr<1>, %data: vector<8xi32>) {
 // CHECK-LABEL: llvm.func spir_funccc @_Z31intel_sub_group_block_write_ul2PU3AS3mDv2_m
 // CHECK: llvm.func @blockstore_as3(%[[ARG0:.*]]: !llvm.ptr<3>, %[[ARG1:.*]]: vector<2xi64>) {
 llvm.func @blockstore_as3(%ptr: !llvm.ptr<3>, %data: vector<2xi64>) {
-  // CHECK: llvm.call spir_funccc @_Z31intel_sub_group_block_write_ul2PU3AS3mDv2_m(%[[ARG0]], %[[ARG1]])
+  // CHECK:   xevm.DecorationCacheControl =
+  // CHECK-SAME:    [6443 : i32, 0 : i32, 1 : i32],
+  // CHECK-SAME:    [6443 : i32, 1 : i32, 2 : i32]
+  // CHECK: llvm.call spir_funccc @_Z31intel_sub_group_block_write_ul2PU3AS3mDv2_m
   // CHECK-SAME: {function_type = !llvm.func<void (ptr<3>, vector<2xi64>)>, linkage = #llvm.linkage<external>,
   // CHECK-SAME:   no_unwind, sym_name = "_Z31intel_sub_group_block_write_ul2PU3AS3mDv2_m", visibility_ = 0 : i64,
-  // CHECK-SAME:   will_return, xevm.DecorationCacheControl =
-  // CHECK-SAME:    [6443 : i32, 0 : i32, 2 : i32, 0 : i32],
-  // CHECK-SAME:    [6443 : i32, 1 : i32, 2 : i32, 0 : i32]
+  // CHECK-SAME:   will_return
   xevm.blockstore %ptr, %data <{cache_control=#xevm.store_cache_control<L1wt_L2uc_L3wb>}> : (!llvm.ptr<3>, vector<2xi64>)
   llvm.return
 }
@@ -336,12 +348,13 @@ llvm.func @blockstore_as3(%ptr: !llvm.ptr<3>, %data: vector<2xi64>) {
 // CHECK-LABEL: llvm.func spir_funccc @_Z30intel_sub_group_block_write_ulPU3AS3mm
 // CHECK: llvm.func @blockstore_scalar(%[[ARG0:.*]]: !llvm.ptr<3>, %[[ARG1:.*]]: i64) {
 llvm.func @blockstore_scalar(%ptr: !llvm.ptr<3>, %data: i64) {
-  // CHECK: llvm.call spir_funccc @_Z30intel_sub_group_block_write_ulPU3AS3mm(%[[ARG0]], %[[ARG1]])
+  // CHECK:   xevm.DecorationCacheControl =
+  // CHECK-SAME:    [6443 : i32, 0 : i32, 1 : i32],
+  // CHECK-SAME:    [6443 : i32, 1 : i32, 2 : i32]
+  // CHECK: llvm.call spir_funccc @_Z30intel_sub_group_block_write_ulPU3AS3mm
   // CHECK-SAME: {function_type = !llvm.func<void (ptr<3>, i64)>, linkage = #llvm.linkage<external>,
   // CHECK-SAME:   no_unwind, sym_name = "_Z30intel_sub_group_block_write_ulPU3AS3mm", visibility_ = 0 : i64,
-  // CHECK-SAME:   will_return, xevm.DecorationCacheControl =
-  // CHECK-SAME:    [6443 : i32, 0 : i32, 2 : i32, 0 : i32],
-  // CHECK-SAME:    [6443 : i32, 1 : i32, 2 : i32, 0 : i32]
+  // CHECK-SAME:   will_return
   xevm.blockstore %ptr, %data <{cache_control=#xevm.store_cache_control<L1wt_L2uc_L3wb>}> : (!llvm.ptr<3>, i64)
   llvm.return
 }
diff --git a/mlir/test/Target/LLVMIR/xevm.mlir b/mlir/test/Target/LLVMIR/xevm.mlir
index 112d923607060..ecde5fa09a4fb 100644
--- a/mlir/test/Target/LLVMIR/xevm.mlir
+++ b/mlir/test/Target/LLVMIR/xevm.mlir
@@ -1,53 +1,35 @@
 // RUN: mlir-translate --split-input-file -mlir-to-llvmir %s | FileCheck %s
 
-module {
-  llvm.func spir_funccc @_Z8prefetchPU3AS1Kcm(!llvm.ptr<1>, i64)
-  llvm.func @prefetch(%arg0: !llvm.ptr<1>) {
-    %0 = llvm.mlir.constant(1 : i64) : i64
-    // CHECK-LABEL: call spir_func void @_Z8prefetchPU3AS1Kcm
-    // CHECK-SAME: !spirv.DecorationCacheControlINTEL ![[DECO1:.*]]
-    llvm.call spir_funccc @_Z8prefetchPU3AS1Kcm(%arg0, %0)
-      {function_type = !llvm.func<void (ptr<1>, i64)>, linkage = #llvm.linkage<external>,
-       no_unwind, sym_name = "_Z8prefetchPU3AS1Kcm", visibility_ = 0 : i64,
-       xevm.DecorationCacheControl = [[6442 : i32, 0 : i32, 1 : i32, 0 : i32], [6442 : i32, 1 : i32, 1 : i32, 0 : i32]]}
-      : (!llvm.ptr<1>, i64) -> ()
-    llvm.return
-  }
-}
-
-// CHECK: ![[DECO1]] = !{![[DECO2:.*]], ![[DECO3:.*]]}
-// CHECK: ![[DECO2]] = !{i32 6442, i32 0, i32 1, i32 0}
-// CHECK: ![[DECO3]] = !{i32 6442, i32 1, i32 1, i32 0}
-
-// -----
 module {
   // CHECK-LABEL: define i32 @load(ptr addrspace(1)
   // CHECK-SAME: %[[ARG0:.*]]) {
   llvm.func @load(%arg0: !llvm.ptr<1>) -> i32 {
-    // CHECK: load i32, ptr addrspace(1) %[[ARG0]], align 4,
+    // CHECK: getelementptr i32
     // CHECK-SAME: !spirv.DecorationCacheControlINTEL ![[DECO1:.*]]
-    %0 = llvm.load %arg0 {xevm.DecorationCacheControl = [[6442 : i32, 0 : i32, 1 : i32, 0 : i32], [6442 : i32, 1 : i32, 1 : i32, 0 : i32]]} : !llvm.ptr<1> -> i32
-    llvm.return %0 : i32
+    %0 = llvm.getelementptr %arg0[0] {xevm.DecorationCacheControl = [[6442 : i32, 0 : i32, 1 : i32], [6442 : i32, 1 : i32, 1 : i32]]} : (!llvm.ptr<1>) -> !llvm.ptr<1>, i32
+    %1 = llvm.load %0 : !llvm.ptr<1> -> i32
+    llvm.return %1 : i32
   }
 }
 
 // CHECK: ![[DECO1]] = !{![[DECO2:.*]], ![[DECO3:.*]]}
-// CHECK: ![[DECO2]] = !{i32 6442, i32 0, i32 1, i32 0}
-// CHECK: ![[DECO3]] = !{i32 6442, i32 1, i32 1, i32 0}
+// CHECK: ![[DECO2]] = !{i32 6442, i32 0, i32 1}
+// CHECK: ![[DECO3]] = !{i32 6442, i32 1, i32 1}
 
 // -----
 module {
   // CHECK-LABEL: define void @store(ptr addrspace(1)
   // CHECK-SAME: %[[ARG0:.*]], i32 %[[ARG1:.*]]) {
   llvm.func @store(%arg0: !llvm.ptr<1>, %arg1: i32) {
-    // CHECK: store i32 %[[ARG1]], ptr addrspace(1) %[[ARG0]], align 4,
+    // CHECK: getelementptr i32
     // CHECK-SAME: !spirv.DecorationCacheControlINTEL ![[DECO1:.*]]
-    llvm.store %arg1, %arg0 {xevm.DecorationCacheControl = [[6443 : i32, 0 : i32, 2 : i32, 0 : i32], [6443 : i32, 1 : i32, 2 : i32, 0 : i32]]} : i32, !llvm.ptr<1>
+    %0 = llvm.getelementptr %arg0[0] {xevm.DecorationCacheControl = [[6443 : i32, 0 : i32, 1 : i32], [6443 : i32, 1 : i32, 2 : i32]]} : (!llvm.ptr<1>) -> !llvm.ptr<1>, i32
+    llvm.store %arg1, %0 : i32, !llvm.ptr<1>
     llvm.return
   }
 }
 
 // CHECK: ![[DECO1]] = !{![[DECO2:.*]], ![[DECO3:.*]]}
-// CHECK: ![[DECO2]] = !{i32 6443, i32 0, i32 2, i32 0}
-// CHECK: ![[DECO3]] = !{i32 6443, i32 1, i32 2, i32 0}
+// CHECK: ![[DECO2]] = !{i32 6443, i32 0, i32 1}
+// CHECK: ![[DECO3]] = !{i32 6443, i32 1, i32 2}
 

>From 1c69a990711677a0eafd669cbb1cefef24a55ab1 Mon Sep 17 00:00:00 2001
From: "Lee, Sang Ik" <sang.ik.lee at intel.com>
Date: Tue, 13 Jan 2026 21:37:48 +0000
Subject: [PATCH 2/4] Use spirv.Decorations instead of
 spirv.DecorationCacheControlINTEL

---
 .../Target/LLVMIR/Dialect/XeVM/XeVMToLLVMIRTranslation.cpp    | 2 +-
 mlir/test/Target/LLVMIR/xevm.mlir                             | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/mlir/lib/Target/LLVMIR/Dialect/XeVM/XeVMToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/XeVM/XeVMToLLVMIRTranslation.cpp
index ba098aa5fde50..ec9cbc3b2d11e 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/XeVM/XeVMToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/XeVM/XeVMToLLVMIRTranslation.cpp
@@ -77,7 +77,7 @@ class XeVMDialectLLVMIRTranslationInterface
           return llvm::MDNode::get(ctx, metadata);
         });
     constexpr llvm::StringLiteral decorationCacheControlMDName =
-        "spirv.DecorationCacheControlINTEL";
+        "spirv.Decorations";
     inst->setMetadata(decorationCacheControlMDName,
                       llvm::MDNode::get(ctx, decorations));
     return success();
diff --git a/mlir/test/Target/LLVMIR/xevm.mlir b/mlir/test/Target/LLVMIR/xevm.mlir
index ecde5fa09a4fb..2ec06eea4cfa0 100644
--- a/mlir/test/Target/LLVMIR/xevm.mlir
+++ b/mlir/test/Target/LLVMIR/xevm.mlir
@@ -5,7 +5,7 @@ module {
   // CHECK-SAME: %[[ARG0:.*]]) {
   llvm.func @load(%arg0: !llvm.ptr<1>) -> i32 {
     // CHECK: getelementptr i32
-    // CHECK-SAME: !spirv.DecorationCacheControlINTEL ![[DECO1:.*]]
+    // CHECK-SAME: !spirv.Decorations ![[DECO1:.*]]
     %0 = llvm.getelementptr %arg0[0] {xevm.DecorationCacheControl = [[6442 : i32, 0 : i32, 1 : i32], [6442 : i32, 1 : i32, 1 : i32]]} : (!llvm.ptr<1>) -> !llvm.ptr<1>, i32
     %1 = llvm.load %0 : !llvm.ptr<1> -> i32
     llvm.return %1 : i32
@@ -22,7 +22,7 @@ module {
   // CHECK-SAME: %[[ARG0:.*]], i32 %[[ARG1:.*]]) {
   llvm.func @store(%arg0: !llvm.ptr<1>, %arg1: i32) {
     // CHECK: getelementptr i32
-    // CHECK-SAME: !spirv.DecorationCacheControlINTEL ![[DECO1:.*]]
+    // CHECK-SAME: !spirv.Decorations ![[DECO1:.*]]
     %0 = llvm.getelementptr %arg0[0] {xevm.DecorationCacheControl = [[6443 : i32, 0 : i32, 1 : i32], [6443 : i32, 1 : i32, 2 : i32]]} : (!llvm.ptr<1>) -> !llvm.ptr<1>, i32
     llvm.store %arg1, %0 : i32, !llvm.ptr<1>
     llvm.return

>From 6700d8a808619f2a28e1513e7049f04cc2536fab Mon Sep 17 00:00:00 2001
From: "Lee, Sang Ik" <sang.ik.lee at intel.com>
Date: Wed, 14 Jan 2026 18:10:07 +0000
Subject: [PATCH 3/4] Add cache control to XeVM block load store integration
 test

---
 .../Integration/Dialect/XeVM/GPU/xevm_block_load_store.mlir   | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/mlir/test/Integration/Dialect/XeVM/GPU/xevm_block_load_store.mlir b/mlir/test/Integration/Dialect/XeVM/GPU/xevm_block_load_store.mlir
index cea05b8709d72..a610e28a07e41 100644
--- a/mlir/test/Integration/Dialect/XeVM/GPU/xevm_block_load_store.mlir
+++ b/mlir/test/Integration/Dialect/XeVM/GPU/xevm_block_load_store.mlir
@@ -32,7 +32,7 @@ module @gemm attributes {gpu.container_module} {
       // would only load 4 elements into vector<8xi32>
       %loaded = xevm.blockload2d %src, %base_width, %base_height, %base_pitch, %x, %y
           <{elem_size_in_bits=32 : i32, tile_width=16 : i32, tile_height=8 : i32, v_blocks=1 : i32,
-            transpose=false, pack_register=false}> : (!llvm.ptr<1>, i32, i32, i32, i32, i32) -> vector<8xi32>
+            transpose=false, pack_register=false, cache_control=#xevm.load_cache_control<L1c_L2c_L3c>}> : (!llvm.ptr<1>, i32, i32, i32, i32, i32) -> vector<8xi32>
       %loaded_f32 = vector.bitcast %loaded : vector<8xi32> to vector<8xf32>
       %c0 = arith.constant 0 : index
       %thread_x = gpu.thread_id x
@@ -42,7 +42,7 @@ module @gemm attributes {gpu.container_module} {
       %loaded_f32_modified = vector.insert %thread_x_f32, %loaded_f32[%c0] : f32 into vector<8xf32>
       %loaded_modified = vector.bitcast %loaded_f32_modified : vector<8xf32> to vector<8xi32>
       xevm.blockstore2d %dst, %base_width, %base_height, %base_pitch, %x, %y, %loaded_modified
-          <{elem_size_in_bits=32 : i32, tile_width=16 : i32, tile_height=8 : i32}>
+          <{elem_size_in_bits=32 : i32, tile_width=16 : i32, tile_height=8 : i32, cache_control = #xevm.store_cache_control<L1wt_L2uc_L3wb>}>
           : (!llvm.ptr<1>, i32, i32, i32, i32, i32, vector<8xi32>)
       gpu.return
     }

>From 3b0ec8fb40fadd3f0e37a6cb8584d6aae244f2dd Mon Sep 17 00:00:00 2001
From: "Lee, Sang Ik" <sang.ik.lee at intel.com>
Date: Mon, 9 Feb 2026 17:33:05 +0000
Subject: [PATCH 4/4] GEPOp: use i8 element type as other types drop cache
 control meta data.

---
 mlir/lib/Conversion/XeVMToLLVM/XeVMToLLVM.cpp | 32 +++++++++++--------
 .../Conversion/XeVMToLLVM/xevm-to-llvm.mlir   | 10 +++---
 2 files changed, 23 insertions(+), 19 deletions(-)

diff --git a/mlir/lib/Conversion/XeVMToLLVM/XeVMToLLVM.cpp b/mlir/lib/Conversion/XeVMToLLVM/XeVMToLLVM.cpp
index e1a443ad39af7..e208ff43166e5 100644
--- a/mlir/lib/Conversion/XeVMToLLVM/XeVMToLLVM.cpp
+++ b/mlir/lib/Conversion/XeVMToLLVM/XeVMToLLVM.cpp
@@ -425,9 +425,10 @@ class PrefetchToOCLPattern : public OpConversionPattern<PrefetchOp> {
         LLVM::ConstantOp::create(rewriter, loc, rewriter.getI64Type(), 1);
     Value ptrOp = op.getPtr();
     // Create getelementptr op to attach cache control metadata
-    // element type doesn't matter here as we use zero index, so use i32
+    // element type doesn't matter for address computation since we use
+    // but non i8 types lose metadata in SPIRV backend, so use i8 here
     LLVM::GEPOp gep = LLVM::GEPOp::create(rewriter, loc, ptrOp.getType(),
-                                          rewriter.getI32Type(), ptrOp,
+                                          rewriter.getI8Type(), ptrOp,
                                           ArrayRef<LLVM::GEPArg>{0});
     if (std::optional<ArrayAttr> optCacheControls =
             getCacheControlMetadata(rewriter, op))
@@ -534,10 +535,11 @@ class LoadStorePrefetchToOCLPattern : public OpConversionPattern<OpType> {
         rewriter, loc, VectorType::get(2, i32Type), byteCoord, op.getY(), one);
     Value ptrOp = op.getPtr();
     // Create getelementptr op to attach cache control metadata
-    // element type doesn't matter here as we use zero index, so use i32
-    LLVM::GEPOp gep =
-        LLVM::GEPOp::create(rewriter, loc, ptrOp.getType(), i32Type, ptrOp,
-                            ArrayRef<LLVM::GEPArg>{0});
+    // element type doesn't matter for address computation since we use
+    // but non i8 types lose metadata in SPIRV backend, so use i8 here
+    LLVM::GEPOp gep = LLVM::GEPOp::create(rewriter, loc, ptrOp.getType(),
+                                          rewriter.getI8Type(), ptrOp,
+                                          ArrayRef<LLVM::GEPArg>{0});
     if (std::optional<ArrayAttr> optCacheControls =
             getCacheControlMetadata(rewriter, op))
       gep->setAttr(XeVMDialect::getCacheControlsAttrName(), *optCacheControls);
@@ -663,10 +665,11 @@ class BlockLoadStore1DToOCLPattern : public OpConversionPattern<OpType> {
     SmallVector<Value, 2> args{};
     Value ptrOp = op.getPtr();
     // Create getelementptr op to attach cache control metadata
-    // element type doesn't matter here as we use zero index, so use i32
-    LLVM::GEPOp gep = LLVM::GEPOp::create(
-        rewriter, op.getLoc(), ptrOp.getType(), rewriter.getI32Type(), ptrOp,
-        ArrayRef<LLVM::GEPArg>{0});
+    // element type doesn't matter for address computation since we use
+    // but non i8 types lose metadata in SPIRV backend, so use i8 here
+    LLVM::GEPOp gep = LLVM::GEPOp::create(rewriter, op.getLoc(),
+                                          ptrOp.getType(), rewriter.getI8Type(),
+                                          ptrOp, ArrayRef<LLVM::GEPArg>{0});
     if (std::optional<ArrayAttr> optCacheControls =
             getCacheControlMetadata(rewriter, op))
       gep->setAttr(XeVMDialect::getCacheControlsAttrName(), *optCacheControls);
@@ -712,10 +715,11 @@ class LLVMLoadStoreToOCLPattern : public OpConversionPattern<OpType> {
     constexpr bool isStore = std::is_same_v<OpType, LLVM::StoreOp>;
     Value ptrOp = op.getAddr();
     // Create getelementptr op to attach cache control metadata
-    // element type doesn't matter here as we use zero index, so use i32
-    LLVM::GEPOp gep = LLVM::GEPOp::create(
-        rewriter, op.getLoc(), ptrOp.getType(), rewriter.getI32Type(), ptrOp,
-        ArrayRef<LLVM::GEPArg>{0});
+    // element type doesn't matter for address computation since we use
+    // but non i8 types lose metadata in SPIRV backend, so use i8 here
+    LLVM::GEPOp gep = LLVM::GEPOp::create(rewriter, op.getLoc(),
+                                          ptrOp.getType(), rewriter.getI8Type(),
+                                          ptrOp, ArrayRef<LLVM::GEPArg>{0});
     if (std::optional<ArrayAttr> optCacheControls =
             getCacheControlMetadata(rewriter, op))
       gep->setAttr(XeVMDialect::getCacheControlsAttrName(), *optCacheControls);
diff --git a/mlir/test/Conversion/XeVMToLLVM/xevm-to-llvm.mlir b/mlir/test/Conversion/XeVMToLLVM/xevm-to-llvm.mlir
index 59fb9f1fbb17f..30c68f86bdd91 100644
--- a/mlir/test/Conversion/XeVMToLLVM/xevm-to-llvm.mlir
+++ b/mlir/test/Conversion/XeVMToLLVM/xevm-to-llvm.mlir
@@ -12,7 +12,7 @@ llvm.func @blockload2d(%a: !llvm.ptr<1>, %base_width_a: i32, %base_height_a: i32
   // CHECK: %[[VAR2:.*]] = llvm.mlir.constant(1 : i32) : i32
   // CHECK: %[[VAR3:.*]] = llvm.insertelement %[[ARG4]], %[[VAR0]][%[[VAR1]] : i32] : vector<2xi32>
   // CHECK: %[[VAR4:.*]] = llvm.insertelement %[[ARG5]], %[[VAR3]][%[[VAR2]] : i32] : vector<2xi32>
-  // CHECK: %[[VAR6:.*]] = llvm.getelementptr %[[ARG0]][0] : (!llvm.ptr<1>) -> !llvm.ptr<1>, i32
+  // CHECK: %[[VAR6:.*]] = llvm.getelementptr %[[ARG0]][0] : (!llvm.ptr<1>) -> !llvm.ptr<1>, i8
   // CHECK: %[[VAR7:.*]] = llvm.alloca %[[VAR5]] x i16 : (i32) -> !llvm.ptr
   // CHECK: llvm.call spir_funccc @_Z41intel_sub_group_2d_block_read_16b_8r16x1cPU3AS1viiiDv2_iPt(
   // CHECK-SAME: %[[VAR6]], %[[ARG1]], %[[ARG2]], %[[ARG3]], %[[VAR4]], %[[VAR7]])
@@ -54,7 +54,7 @@ llvm.func @blockload2d_v_blocks(%a: !llvm.ptr<1>, %base_width_a: i32, %base_heig
   // CHECK: %[[VAR2:.*]] = llvm.mlir.constant(1 : i32) : i32
   // CHECK: %[[VAR3:.*]] = llvm.insertelement %[[ARG4]], %[[VAR0]][%[[VAR1]] : i32] : vector<2xi32>
   // CHECK: %[[VAR4:.*]] = llvm.insertelement %[[ARG5]], %[[VAR3]][%[[VAR2]] : i32] : vector<2xi32>
-  // CHECK: %[[VAR6:.*]] = llvm.getelementptr %[[ARG0]][0] : (!llvm.ptr<1>) -> !llvm.ptr<1>, i32
+  // CHECK: %[[VAR6:.*]] = llvm.getelementptr %[[ARG0]][0] : (!llvm.ptr<1>) -> !llvm.ptr<1>, i8
   // CHECK: %[[VAR7:.*]] = llvm.alloca %[[VAR5]] x i16 : (i32) -> !llvm.ptr
   // CHECK: llvm.call spir_funccc @_Z41intel_sub_group_2d_block_read_16b_8r16x2cPU3AS1viiiDv2_iPt(
   // CHECK-SAME: %[[VAR6]], %[[ARG1]], %[[ARG2]], %[[ARG3]], %[[VAR4]], %[[VAR7]])
@@ -84,7 +84,7 @@ llvm.func @blockload2d_pack_register(%a: !llvm.ptr<1>, %base_width_a: i32, %base
   // CHECK: %[[VAR2:.*]] = llvm.mlir.constant(1 : i32) : i32
   // CHECK: %[[VAR3:.*]] = llvm.insertelement %[[ARG4]], %[[VAR0]][%[[VAR1]] : i32] : vector<2xi32>
   // CHECK: %[[VAR4:.*]] = llvm.insertelement %[[ARG5]], %[[VAR3]][%[[VAR2]] : i32] : vector<2xi32>
-  // CHECK: %[[VAR6:.*]] = llvm.getelementptr %[[ARG0]][0] : (!llvm.ptr<1>) -> !llvm.ptr<1>, i32
+  // CHECK: %[[VAR6:.*]] = llvm.getelementptr %[[ARG0]][0] : (!llvm.ptr<1>) -> !llvm.ptr<1>, i8
   // CHECK: %[[VAR7:.*]] = llvm.alloca %[[VAR5]] x i32 : (i32) -> !llvm.ptr
   // CHECK: llvm.call spir_funccc @_Z52intel_sub_group_2d_block_read_transform_16b_16r16x1cPU3AS1viiiDv2_iPj(
   // CHECK-SAME: %[[VAR6]], %[[ARG1]], %[[ARG2]], %[[ARG3]], %[[VAR4]], %[[VAR7]])
@@ -114,7 +114,7 @@ llvm.func @blockload2d_transpose(%a: !llvm.ptr<1>, %base_width_a: i32, %base_hei
   // CHECK: %[[VAR2:.*]] = llvm.mlir.constant(1 : i32) : i32
   // CHECK: %[[VAR3:.*]] = llvm.insertelement %[[ARG4]], %[[VAR0]][%[[VAR1]] : i32] : vector<2xi32>
   // CHECK: %[[VAR4:.*]] = llvm.insertelement %[[ARG5]], %[[VAR3]][%[[VAR2]] : i32] : vector<2xi32>
-  // CHECK: %[[VAR6:.*]] = llvm.getelementptr %[[ARG0]][0] : (!llvm.ptr<1>) -> !llvm.ptr<1>, i32
+  // CHECK: %[[VAR6:.*]] = llvm.getelementptr %[[ARG0]][0] : (!llvm.ptr<1>) -> !llvm.ptr<1>, i8
   // CHECK: %[[VAR7:.*]] = llvm.alloca %[[VAR5]] x i32 : (i32) -> !llvm.ptr
   // CHECK: llvm.call spir_funccc @_Z51intel_sub_group_2d_block_read_transpose_32b_16r8x1cPU3AS1viiiDv2_iPj(
   // CHECK-SAME: %[[VAR6]], %[[ARG1]], %[[ARG2]], %[[ARG3]], %[[VAR4]], %[[VAR7]])
@@ -144,7 +144,7 @@ llvm.func @blockstore2d(%c: !llvm.ptr<1>, %base_width_c: i32, %base_height_c: i3
   // CHECK: %[[VAR2:.*]] = llvm.mlir.constant(1 : i32) : i32
   // CHECK: %[[VAR3:.*]] = llvm.insertelement %[[ARG4]], %[[VAR0]][%[[VAR1]] : i32] : vector<2xi32>
   // CHECK: %[[VAR4:.*]] = llvm.insertelement %[[ARG5]], %[[VAR3]][%[[VAR2]] : i32] : vector<2xi32>
-  // CHECK: %[[VAR6:.*]] = llvm.getelementptr %[[ARG0]][0] : (!llvm.ptr<1>) -> !llvm.ptr<1>, i32
+  // CHECK: %[[VAR6:.*]] = llvm.getelementptr %[[ARG0]][0] : (!llvm.ptr<1>) -> !llvm.ptr<1>, i8
   // CHECK: %[[VAR7:.*]] = llvm.alloca %[[VAR5]] x i32 : (i32) -> !llvm.ptr
   // CHECK: llvm.store %[[ARG6]], %[[VAR7]] : vector<8xi32>, !llvm.ptr
   // CHECK: llvm.call spir_funccc @_Z42intel_sub_group_2d_block_write_32b_8r16x1cPU3AS1viiiDv2_iPj(



More information about the Mlir-commits mailing list