[Mlir-commits] [mlir] 0797a10 - [MLIR][XeVM] Rewrite llvm.alloca if addr_space is 3 (#183417)
llvmlistbot at llvm.org
llvmlistbot at llvm.org
Mon Mar 2 11:48:26 PST 2026
Author: Sang Ik Lee
Date: 2026-03-02T19:48:20Z
New Revision: 0797a10cc537bb4379df7fa86ba2e420edb5ecd3
URL: https://github.com/llvm/llvm-project/commit/0797a10cc537bb4379df7fa86ba2e420edb5ecd3
DIFF: https://github.com/llvm/llvm-project/commit/0797a10cc537bb4379df7fa86ba2e420edb5ecd3.diff
LOG: [MLIR][XeVM] Rewrite llvm.alloca if addr_space is 3 (#183417)
Rewrite llvm.alloca with addr_space 3 into llvm.mlir.global and llvm.mlir.addressof
Added:
mlir/test/Conversion/XeVMToLLVM/rewrite_alloca.mlir
Modified:
mlir/lib/Conversion/XeVMToLLVM/XeVMToLLVM.cpp
Removed:
################################################################################
diff --git a/mlir/lib/Conversion/XeVMToLLVM/XeVMToLLVM.cpp b/mlir/lib/Conversion/XeVMToLLVM/XeVMToLLVM.cpp
index e7537ba1f0a79..24009b63e8e26 100644
--- a/mlir/lib/Conversion/XeVMToLLVM/XeVMToLLVM.cpp
+++ b/mlir/lib/Conversion/XeVMToLLVM/XeVMToLLVM.cpp
@@ -9,14 +9,17 @@
#include "mlir/Conversion/XeVMToLLVM/XeVMToLLVM.h"
#include "mlir/Conversion/LLVMCommon/Pattern.h"
+#include "mlir/Dialect/GPU/IR/GPUDialect.h"
#include "mlir/Dialect/LLVMIR/FunctionCallUtils.h"
#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
#include "mlir/Dialect/LLVMIR/XeVMDialect.h"
#include "mlir/Pass/Pass.h"
#include "mlir/Support/LLVM.h"
+#include "llvm/ADT/ArrayRef.h"
#include "llvm/Support/FormatVariadic.h"
#include "mlir/IR/BuiltinTypes.h"
+#include "mlir/IR/Matchers.h"
#include "mlir/IR/Types.h"
#include "mlir/Transforms/GreedyPatternRewriteDriver.h"
@@ -836,6 +839,58 @@ class SubgroupOpWorkitemOpToOCLPattern : public OpConversionPattern<OpType> {
}
};
+class AllocaToGlobalPattern : public OpConversionPattern<LLVM::AllocaOp> {
+ using OpConversionPattern::OpConversionPattern;
+ LogicalResult
+ matchAndRewrite(LLVM::AllocaOp op, LLVM::AllocaOp::Adaptor adaptor,
+ ConversionPatternRewriter &rewriter) const override {
+ auto ptrType = cast<LLVM::LLVMPointerType>(op.getType());
+ auto addrSpace = ptrType.getAddressSpace();
+ if (addrSpace != 3)
+ return failure();
+ auto symTable = op->getParentWithTrait<OpTrait::SymbolTable>();
+ if (!symTable)
+ return failure();
+ Block *moduleBody;
+ if (ModuleOp mod = dyn_cast<ModuleOp>(*symTable)) {
+ moduleBody = mod.getBody();
+ } else if (gpu::GPUModuleOp gpuMod =
+ dyn_cast<gpu::GPUModuleOp>(*symTable)) {
+ moduleBody = gpuMod.getBody();
+ } else {
+ return failure();
+ }
+ auto val = op.getArraySize();
+ APInt cst;
+ if (!matchPattern(val, m_ConstantInt(&cst)))
+ return failure();
+ auto loc = op.getLoc();
+ auto globalType = LLVM::LLVMArrayType::get(
+ rewriter.getContext(), op.getElemType(), cst.getZExtValue());
+ LLVM::GlobalOp globalVar;
+ {
+ OpBuilder::InsertionGuard guard(rewriter);
+ rewriter.setInsertionPointToStart(moduleBody);
+ auto alignment = op.getAlignment();
+ globalVar = LLVM::GlobalOp::create(
+ rewriter, loc, globalType, /*isConstant=*/false,
+ /*linkage=*/LLVM::Linkage::Internal,
+ /*name=*/std::string("__global_alloca_") +
+ std::to_string(getNextGlobalIdx()),
+ /*value=*/Attribute(),
+ /*alignment=*/alignment ? *alignment : 0, /*addrSpace=*/addrSpace);
+ }
+ rewriter.replaceOpWithNewOp<LLVM::AddressOfOp>(op, globalVar);
+ return success();
+ }
+
+private:
+ static unsigned getNextGlobalIdx() {
+ static unsigned globalIdx = 0;
+ return globalIdx++;
+ }
+};
+
static bool isExtractingContiguousSlice(LLVM::ShuffleVectorOp op) {
if (op.getV1() != op.getV2())
return false;
@@ -1014,8 +1069,20 @@ struct ConvertXeVMToLLVMPass
void ::mlir::populateXeVMToLLVMConversionPatterns(ConversionTarget &target,
RewritePatternSet &patterns) {
- target.addDynamicallyLegalDialect<LLVM::LLVMDialect>(
- [](Operation *op) { return !op->hasAttr("cache_control"); });
+ // some LLVM operations need to be converted.
+ target.addDynamicallyLegalDialect<LLVM::LLVMDialect>([](Operation *op) {
+ // llvm alloca op with addrspace 3 for OpenCL (Workgroup) is not handled
+ // properly by SPIRV backend. It needs to be rewritten as a sequence with
+ // llvm global.
+ if (isa<LLVM::AllocaOp>(op)) {
+ LLVM::AllocaOp aOp = cast<LLVM::AllocaOp>(op);
+ LLVM::LLVMPointerType pTy = cast<LLVM::LLVMPointerType>(aOp.getType());
+ auto addrSpace = pTy.getAddressSpace();
+ return addrSpace != 3;
+ }
+ // cache_control attribute should be converted.
+ return !op->hasAttr("cache_control");
+ });
target.addIllegalDialect<XeVMDialect>();
patterns.add<LoadStorePrefetchToOCLPattern<BlockLoad2dOp>,
LoadStorePrefetchToOCLPattern<BlockStore2dOp>,
@@ -1039,6 +1106,6 @@ void ::mlir::populateXeVMToLLVMConversionPatterns(ConversionTarget &target,
LaunchConfigOpToOCLPattern<GridDimZOp>,
SubgroupOpWorkitemOpToOCLPattern<LaneIdOp>,
SubgroupOpWorkitemOpToOCLPattern<SubgroupIdOp>,
- SubgroupOpWorkitemOpToOCLPattern<SubgroupSizeOp>>(
- patterns.getContext());
+ SubgroupOpWorkitemOpToOCLPattern<SubgroupSizeOp>,
+ AllocaToGlobalPattern>(patterns.getContext());
}
diff --git a/mlir/test/Conversion/XeVMToLLVM/rewrite_alloca.mlir b/mlir/test/Conversion/XeVMToLLVM/rewrite_alloca.mlir
new file mode 100644
index 0000000000000..9f57db3dec016
--- /dev/null
+++ b/mlir/test/Conversion/XeVMToLLVM/rewrite_alloca.mlir
@@ -0,0 +1,76 @@
+// RUN: mlir-opt --convert-xevm-to-llvm --split-input-file %s | FileCheck %s
+
+module {
+ // CHECK: llvm.mlir.global internal @__global_alloca_[[G2:.*]]() {addr_space = 3 : i32, alignment = 8 : i64} : !llvm.array<10 x i32>
+ // CHECK: llvm.mlir.global internal @__global_alloca_[[G1:.*]]() {addr_space = 3 : i32, alignment = 8 : i64} : !llvm.array<10 x i32>
+ // CHECK: llvm.mlir.global internal @__global_alloca_[[G0:.*]]() {addr_space = 3 : i32, alignment = 8 : i64} : !llvm.array<10 x i32>
+ // CHECK: llvm.func @test_with_parent_module()
+ llvm.func @test_with_parent_module() -> !llvm.ptr<3> {
+ %0 = llvm.mlir.constant(10 : i32) : i32
+ // CHECK: %[[VAR0:.*]] = llvm.mlir.addressof @__global_alloca_[[G0]] : !llvm.ptr<3>
+ // CHECK: %[[VAR1:.*]] = llvm.mlir.addressof @__global_alloca_[[G1]] : !llvm.ptr<3>
+ // CHECK: %[[VAR2:.*]] = llvm.mlir.addressof @__global_alloca_[[G2]] : !llvm.ptr<3>
+ %1 = llvm.alloca %0 x i32 {alignment = 8 : i64} : (i32) -> !llvm.ptr<3>
+ %2 = llvm.alloca %0 x i32 {alignment = 8 : i64} : (i32) -> !llvm.ptr<3>
+ %3 = llvm.alloca %0 x i32 {alignment = 8 : i64} : (i32) -> !llvm.ptr<3>
+ // CHECK: %[[VAR3:.*]] = llvm.load %[[VAR1:.*]] : !llvm.ptr<3> -> i32
+ // CHECK: %[[VAR4:.*]] = llvm.load %[[VAR2:.*]] : !llvm.ptr<3> -> i32
+ // CHECK: %[[VAR5:.*]] = llvm.add %[[VAR3]], %[[VAR4]] : i32
+ %4 = llvm.load %2 : !llvm.ptr<3> -> i32
+ %5 = llvm.load %3 : !llvm.ptr<3> -> i32
+ %6 = llvm.add %4, %5 : i32
+ // CHECK: llvm.store %[[VAR5]], %[[VAR0]] : i32, !llvm.ptr<3>
+ // CHECK: llvm.return %[[VAR0]] : !llvm.ptr<3>
+ llvm.store %6, %1 : i32, !llvm.ptr<3>
+ llvm.return %1 : !llvm.ptr<3>
+ }
+}
+
+// -----
+
+// CHECK-LABEL: gpu.module @test
+gpu.module @test {
+ // CHECK: llvm.mlir.global internal @__global_alloca_[[G2:.*]]() {addr_space = 3 : i32, alignment = 8 : i64} : !llvm.array<10 x i32>
+ // CHECK: llvm.mlir.global internal @__global_alloca_[[G1:.*]]() {addr_space = 3 : i32, alignment = 8 : i64} : !llvm.array<10 x i32>
+ // CHECK: llvm.mlir.global internal @__global_alloca_[[G0:.*]]() {addr_space = 3 : i32, alignment = 8 : i64} : !llvm.array<10 x i32>
+ // CHECK: llvm.func @test_with_parent_gpu_module()
+ llvm.func @test_with_parent_gpu_module() -> !llvm.ptr<3> {
+ %0 = llvm.mlir.constant(10 : i32) : i32
+ // CHECK: %[[VAR0:.*]] = llvm.mlir.addressof @__global_alloca_[[G0]] : !llvm.ptr<3>
+ // CHECK: %[[VAR1:.*]] = llvm.mlir.addressof @__global_alloca_[[G1]] : !llvm.ptr<3>
+ // CHECK: %[[VAR2:.*]] = llvm.mlir.addressof @__global_alloca_[[G2]] : !llvm.ptr<3>
+ %1 = llvm.alloca %0 x i32 {alignment = 8 : i64} : (i32) -> !llvm.ptr<3>
+ %2 = llvm.alloca %0 x i32 {alignment = 8 : i64} : (i32) -> !llvm.ptr<3>
+ %3 = llvm.alloca %0 x i32 {alignment = 8 : i64} : (i32) -> !llvm.ptr<3>
+ // CHECK: %[[VAR3:.*]] = llvm.load %[[VAR1:.*]] : !llvm.ptr<3> -> i32
+ // CHECK: %[[VAR4:.*]] = llvm.load %[[VAR2:.*]] : !llvm.ptr<3> -> i32
+ // CHECK: %[[VAR5:.*]] = llvm.add %[[VAR3]], %[[VAR4]] : i32
+ %4 = llvm.load %2 : !llvm.ptr<3> -> i32
+ %5 = llvm.load %3 : !llvm.ptr<3> -> i32
+ %6 = llvm.add %4, %5 : i32
+ // CHECK: llvm.store %[[VAR5]], %[[VAR0]] : i32, !llvm.ptr<3>
+ // CHECK: llvm.return %[[VAR0]] : !llvm.ptr<3>
+ llvm.store %6, %1 : i32, !llvm.ptr<3>
+ llvm.return %1 : !llvm.ptr<3>
+ }
+}
+
+// -----
+
+module {
+ // CHECK-LABEL: llvm.func @test_with_default_addr_space()
+ llvm.func @test_with_default_addr_space() -> !llvm.ptr {
+ %0 = llvm.mlir.constant(10 : i32) : i32
+ // CHECK: %[[VAR1:.*]] = llvm.alloca %0 x i32 {alignment = 8 : i64} : (i32) -> !llvm.ptr
+ // CHECK: %[[VAR2:.*]] = llvm.alloca %0 x i32 {alignment = 8 : i64} : (i32) -> !llvm.ptr
+ // CHECK: %[[VAR3:.*]] = llvm.alloca %0 x i32 {alignment = 8 : i64} : (i32) -> !llvm.ptr
+ %1 = llvm.alloca %0 x i32 {alignment = 8 : i64} : (i32) -> !llvm.ptr
+ %2 = llvm.alloca %0 x i32 {alignment = 8 : i64} : (i32) -> !llvm.ptr
+ %3 = llvm.alloca %0 x i32 {alignment = 8 : i64} : (i32) -> !llvm.ptr
+ %4 = llvm.load %2 : !llvm.ptr -> i32
+ %5 = llvm.load %3 : !llvm.ptr -> i32
+ %6 = llvm.add %4, %5 : i32
+ llvm.store %6, %1 : i32, !llvm.ptr
+ llvm.return %1 : !llvm.ptr
+ }
+}
More information about the Mlir-commits
mailing list