[Mlir-commits] [flang] [llvm] [mlir] [Flang] [OpenMP] [MLIR] Add lowering support for OMP ALLOCATE directives and its clauses (PR #187167)
Raghu Maddhipatla
llvmlistbot at llvm.org
Tue Mar 31 22:43:22 PDT 2026
https://github.com/raghavendhra updated https://github.com/llvm/llvm-project/pull/187167
>From 5763c9cfab64120d202aa1e205960f2a66b9daa8 Mon Sep 17 00:00:00 2001
From: Raghu Maddhipatla <Raghu.Maddhipatla at amd.com>
Date: Tue, 17 Mar 2026 19:32:38 -0500
Subject: [PATCH 1/4] [Flang] [OpenMP] [MLIR] Add lowering support for OMP
ALLOCATE directive and its clauses.
---
flang/lib/Lower/OpenMP/ClauseProcessor.cpp | 19 ++++
flang/lib/Lower/OpenMP/ClauseProcessor.h | 2 +
flang/lib/Lower/OpenMP/OpenMP.cpp | 40 ++++++-
.../Todo/omp-declarative-allocate-align.f90 | 10 --
.../OpenMP/Todo/omp-declarative-allocate.f90 | 10 --
.../OpenMP/omp-declarative-allocate-align.f90 | 47 ++++++++
.../Lower/OpenMP/omp-declarative-allocate.f90 | 19 ++++
.../llvm/Frontend/OpenMP/OMPIRBuilder.h | 15 ++-
llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 17 +++
.../mlir/Dialect/OpenMP/OpenMPClauses.td | 4 +-
.../mlir/Target/LLVMIR/ModuleTranslation.h | 13 +++
.../OpenMP/OpenMPToLLVMIRTranslation.cpp | 107 ++++++++++++++++++
mlir/lib/Target/LLVMIR/ModuleTranslation.cpp | 28 +++++
mlir/test/Dialect/OpenMP/ops.mlir | 16 +--
14 files changed, 314 insertions(+), 33 deletions(-)
delete mode 100644 flang/test/Lower/OpenMP/Todo/omp-declarative-allocate-align.f90
delete mode 100644 flang/test/Lower/OpenMP/Todo/omp-declarative-allocate.f90
create mode 100644 flang/test/Lower/OpenMP/omp-declarative-allocate-align.f90
create mode 100644 flang/test/Lower/OpenMP/omp-declarative-allocate.f90
diff --git a/flang/lib/Lower/OpenMP/ClauseProcessor.cpp b/flang/lib/Lower/OpenMP/ClauseProcessor.cpp
index 7ae27daac5d68..8c107d4d72cae 100644
--- a/flang/lib/Lower/OpenMP/ClauseProcessor.cpp
+++ b/flang/lib/Lower/OpenMP/ClauseProcessor.cpp
@@ -325,6 +325,25 @@ static void collectIteratorIVs(
// ClauseProcessor unique clauses
//===----------------------------------------------------------------------===//
+bool ClauseProcessor::processAlign(
+ mlir::omp::AlignClauseOps &result) const {
+ if (auto *clause = findUniqueClause<omp::clause::Align>()) {
+ fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
+ const std::optional<std::int64_t> align = evaluate::ToInt64(clause->v);
+ result.align = firOpBuilder.getI64IntegerAttr(*align);
+ return true;
+ }
+ return false;
+}
+
+bool ClauseProcessor::processAllocator(lower::StatementContext &stmtCtx, mlir::omp::AllocatorClauseOps &result) const {
+ if (auto *clause = findUniqueClause<omp::clause::Allocator>()) {
+ result.allocator = fir::getBase(converter.genExprValue(clause->v, stmtCtx));
+ return true;
+ }
+ return false;
+}
+
bool ClauseProcessor::processBare(mlir::omp::BareClauseOps &result) const {
return markClauseOccurrence<omp::clause::OmpxBare>(result.bare);
}
diff --git a/flang/lib/Lower/OpenMP/ClauseProcessor.h b/flang/lib/Lower/OpenMP/ClauseProcessor.h
index 6c6056aac77e3..583e3909733d3 100644
--- a/flang/lib/Lower/OpenMP/ClauseProcessor.h
+++ b/flang/lib/Lower/OpenMP/ClauseProcessor.h
@@ -57,6 +57,8 @@ class ClauseProcessor {
: converter(converter), semaCtx(semaCtx), clauses(clauses) {}
// 'Unique' clauses: They can appear at most once in the clause list.
+ bool processAlign(mlir::omp::AlignClauseOps &result) const;
+ bool processAllocator(lower::StatementContext &stmtCtx, mlir::omp::AllocatorClauseOps &result) const;
bool processBare(mlir::omp::BareClauseOps &result) const;
bool processBind(mlir::omp::BindClauseOps &result) const;
bool processCancelDirectiveName(
diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp
index 4d135019ea70c..9d5f40c771fc3 100644
--- a/flang/lib/Lower/OpenMP/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP/OpenMP.cpp
@@ -1496,6 +1496,21 @@ static OpTy genWrapperOp(lower::AbstractConverter &converter,
// Code generation functions for clauses
//===----------------------------------------------------------------------===//
+static void genAllocateClauses(lower::AbstractConverter &converter,
+ semantics::SemanticsContext &semaCtx,
+ lower::StatementContext &stmtCtx,
+ const ObjectList &objects,
+ const List<Clause> &clauses, mlir::Location loc,
+ llvm::SmallVectorImpl<mlir::Value> &operandRange,
+ mlir::omp::AllocateDirOperands &clauseOps) {
+ if (!objects.empty())
+ genObjectList(objects, converter, operandRange);
+
+ ClauseProcessor cp(converter, semaCtx, clauses);
+ cp.processAlign(clauseOps);
+ cp.processAllocator(stmtCtx, clauseOps);
+}
+
static void genCancelClauses(lower::AbstractConverter &converter,
semantics::SemanticsContext &semaCtx,
const List<Clause> &clauses, mlir::Location loc,
@@ -1916,6 +1931,17 @@ static void genWsloopClauses(
//===----------------------------------------------------------------------===//
// Code generation functions for leaf constructs
//===----------------------------------------------------------------------===//
+static mlir::omp::AllocateDirOp
+genAllocateDirOp(lower::AbstractConverter &converter,
+ semantics::SemanticsContext &semaCtx, lower::StatementContext &stmtCtx, lower::pft::Evaluation &eval,
+ mlir::Location loc, const ObjectList &objects, const ConstructQueue &queue, ConstructQueue::const_iterator item) {
+ llvm::SmallVector<mlir::Value> operandRange;
+ mlir::omp::AllocateDirOperands clauseOps;
+ genAllocateClauses(converter, semaCtx, stmtCtx, objects, item->clauses, loc,
+ operandRange, clauseOps);
+
+ return mlir::omp::AllocateDirOp::create(converter.getFirOpBuilder(), loc, operandRange, clauseOps.align, clauseOps.allocator);
+}
static mlir::omp::BarrierOp
genBarrierOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
@@ -3817,8 +3843,18 @@ static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable,
semantics::SemanticsContext &semaCtx,
lower::pft::Evaluation &eval,
const parser::OmpAllocateDirective &allocate) {
- if (!semaCtx.langOptions().OpenMPSimd)
- TODO(converter.getCurrentLocation(), "OmpAllocateDirective");
+ lower::StatementContext stmtCtx;
+ ObjectList objects = makeObjects((allocate.BeginDir().Arguments()), semaCtx);
+ const auto &clauseList = (allocate.BeginDir().Clauses());
+ List<Clause> clauses = makeClauses(clauseList, semaCtx);
+ mlir::Location loc = converter.genLocation(allocate.source);
+
+ ConstructQueue queue{buildConstructQueue(
+ converter.getFirOpBuilder().getModule(), semaCtx, eval, allocate.source,
+ llvm::omp::Directive::OMPD_allocate, clauses)};
+
+ genAllocateDirOp(converter, semaCtx, stmtCtx, eval, loc, objects,
+ queue, queue.begin());
}
static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable,
diff --git a/flang/test/Lower/OpenMP/Todo/omp-declarative-allocate-align.f90 b/flang/test/Lower/OpenMP/Todo/omp-declarative-allocate-align.f90
deleted file mode 100644
index fec146ac70313..0000000000000
--- a/flang/test/Lower/OpenMP/Todo/omp-declarative-allocate-align.f90
+++ /dev/null
@@ -1,10 +0,0 @@
-! This test checks lowering of OpenMP allocate Directive with align clause.
-
-! RUN: not %flang_fc1 -emit-fir -fopenmp -fopenmp-version=51 %s 2>&1 | FileCheck %s
-
-program main
- integer :: x
-
- ! CHECK: not yet implemented: OmpAllocateDirective
- !$omp allocate(x) align(32)
-end
diff --git a/flang/test/Lower/OpenMP/Todo/omp-declarative-allocate.f90 b/flang/test/Lower/OpenMP/Todo/omp-declarative-allocate.f90
deleted file mode 100644
index 7cae8051fda77..0000000000000
--- a/flang/test/Lower/OpenMP/Todo/omp-declarative-allocate.f90
+++ /dev/null
@@ -1,10 +0,0 @@
-! This test checks lowering of OpenMP allocate Directive.
-
-! RUN: not %flang_fc1 -emit-fir -fopenmp %s 2>&1 | FileCheck %s
-
-program main
- integer :: x, y
-
- ! CHECK: not yet implemented: OmpAllocateDirective
- !$omp allocate(x, y)
-end
diff --git a/flang/test/Lower/OpenMP/omp-declarative-allocate-align.f90 b/flang/test/Lower/OpenMP/omp-declarative-allocate-align.f90
new file mode 100644
index 0000000000000..50c6ab1f64002
--- /dev/null
+++ b/flang/test/Lower/OpenMP/omp-declarative-allocate-align.f90
@@ -0,0 +1,47 @@
+! This test checks lowering of OpenMP allocate Directive with align and allocator
+! clauses to LLVM IR. Verifies code generation for:
+! - align(16) only (null allocator)
+! - allocator(omp_default_mem_alloc) only (no align)
+! - align(64) allocator(omp_cgroup_mem_alloc) (both clauses, array variable)
+! - align(32) allocator(3) (both clauses, multiple variables)
+
+! RUN: %flang_fc1 -emit-llvm %openmp_flags -fopenmp-version=51 %s -o - 2>&1 | FileCheck %s
+
+program main
+ use omp_lib
+ integer :: x, y
+ integer :: z(10)
+ character c
+ real(kind = 16) :: r
+ complex cmplx
+ !$omp allocate(x) align(16)
+ !$omp allocate(y) allocator(omp_default_mem_alloc)
+ !$omp allocate(z) align(64) allocator(omp_cgroup_mem_alloc)
+ !$omp allocate(c, r, cmplx) align(32) allocator(3)
+ x = 1
+ y = 2
+ z = x + y
+ print *, "z : ", z
+end program
+
+! CHECK: define void @_QQmain()
+! CHECK: call i32 @__kmpc_global_thread_num(
+
+! CHECK: call ptr @__kmpc_aligned_alloc(i32 {{.*}}, i64 16, i64 {{.*}}, ptr null)
+! CHECK: call ptr @__kmpc_alloc(i32 {{.*}}, i64 {{.*}}, ptr inttoptr (i64 1 to ptr))
+! CHECK: call ptr @__kmpc_aligned_alloc(i32 {{.*}}, i64 64, i64 {{.*}}, ptr inttoptr (i64 6 to ptr))
+! CHECK: call ptr @__kmpc_aligned_alloc(i32 {{.*}}, i64 32, i64 {{.*}}, ptr inttoptr (i32 3 to ptr))
+! CHECK: call ptr @__kmpc_aligned_alloc(i32 {{.*}}, i64 32, i64 {{.*}}, ptr inttoptr (i32 3 to ptr))
+! CHECK: call ptr @__kmpc_aligned_alloc(i32 {{.*}}, i64 32, i64 {{.*}}, ptr inttoptr (i32 3 to ptr))
+
+! CHECK: call void @__kmpc_free(i32 {{.*}}, ptr {{.*}}, ptr inttoptr (i32 3 to ptr))
+! CHECK: call void @__kmpc_free(i32 {{.*}}, ptr {{.*}}, ptr inttoptr (i32 3 to ptr))
+! CHECK: call void @__kmpc_free(i32 {{.*}}, ptr {{.*}}, ptr inttoptr (i32 3 to ptr))
+! CHECK: call void @__kmpc_free(i32 {{.*}}, ptr {{.*}}, ptr inttoptr (i64 6 to ptr))
+! CHECK: call void @__kmpc_free(i32 {{.*}}, ptr {{.*}}, ptr inttoptr (i64 1 to ptr))
+! CHECK: call void @__kmpc_free(i32 {{.*}}, ptr {{.*}}, ptr null)
+! CHECK: ret void
+
+! CHECK: declare noalias ptr @__kmpc_aligned_alloc(i32, i64, i64, ptr)
+! CHECK: declare noalias ptr @__kmpc_alloc(i32, i64, ptr)
+! CHECK: declare void @__kmpc_free(i32, ptr, ptr)
diff --git a/flang/test/Lower/OpenMP/omp-declarative-allocate.f90 b/flang/test/Lower/OpenMP/omp-declarative-allocate.f90
new file mode 100644
index 0000000000000..7c8047ebf7f53
--- /dev/null
+++ b/flang/test/Lower/OpenMP/omp-declarative-allocate.f90
@@ -0,0 +1,19 @@
+! This test checks lowering of OpenMP allocate Directive to LLVM IR.
+! Verifies code generation for default (no align, null allocator) case.
+
+! RUN: %flang_fc1 -emit-llvm -fopenmp %s -o - | FileCheck %s
+
+program main
+ integer :: x, y
+ !$omp allocate(x, y)
+end program
+
+! CHECK: define void @_QQmain()
+! CHECK: call i32 @__kmpc_global_thread_num(
+! CHECK: call ptr @__kmpc_alloc(i32 {{.*}}, i64 8, ptr null)
+! CHECK: call ptr @__kmpc_alloc(i32 {{.*}}, i64 8, ptr null)
+! CHECK: call void @__kmpc_free(i32 {{.*}}, ptr {{.*}}, ptr null)
+! CHECK: call void @__kmpc_free(i32 {{.*}}, ptr {{.*}}, ptr null)
+! CHECK: ret void
+! CHECK: declare noalias ptr @__kmpc_alloc(i32, i64, ptr)
+! CHECK: declare void @__kmpc_free(i32, ptr, ptr)
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
index 383fd9d94661a..fdf1e1f0b18ed 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
@@ -3180,7 +3180,7 @@ class OpenMPIRBuilder {
llvm::IntegerType *IntPtrTy,
bool BranchtoEnd = true);
- /// Create a runtime call for kmpc_Alloc
+ /// Create a runtime call for kmpc_alloc
///
/// \param Loc The insert and source location description.
/// \param Size Size of allocated memory space
@@ -3191,6 +3191,19 @@ class OpenMPIRBuilder {
LLVM_ABI CallInst *createOMPAlloc(const LocationDescription &Loc, Value *Size,
Value *Allocator, std::string Name = "");
+ /// Create a runtime call for kmpc_align_alloc
+ ///
+ /// \param Loc The insert and source location description.
+ /// \param Align Align value
+ /// \param Size Size of allocated memory space
+ /// \param Allocator Allocator information instruction
+ /// \param Name Name of call Instruction for OMP_Align_Alloc
+ ///
+ /// \returns CallInst to the OMP_Align_Alloc call
+ LLVM_ABI CallInst *createOMPAlignedAlloc(const LocationDescription &Loc,
+ Value *Align, Value *Size, Value *Allocator,
+ std::string Name = "");
+
/// Create a runtime call for kmpc_free
///
/// \param Loc The insert and source location description.
diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index ae8f261ff98b2..3a6137fc05b61 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -7619,6 +7619,23 @@ CallInst *OpenMPIRBuilder::createOMPAlloc(const LocationDescription &Loc,
return createRuntimeFunctionCall(Fn, Args, Name);
}
+CallInst *OpenMPIRBuilder::createOMPAlignedAlloc(const LocationDescription &Loc,
+ Value *Align, Value *Size, Value *Allocator,
+ std::string Name) {
+ IRBuilder<>::InsertPointGuard IPG(Builder);
+ updateToLocation(Loc);
+
+ uint32_t SrcLocStrSize;
+ Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
+ Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
+ Value *ThreadId = getOrCreateThreadID(Ident);
+ Value *Args[] = {ThreadId, Align, Size, Allocator};
+
+ Function *Fn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_aligned_alloc);
+
+ return Builder.CreateCall(Fn, Args, Name);
+}
+
CallInst *OpenMPIRBuilder::createOMPFree(const LocationDescription &Loc,
Value *Addr, Value *Allocator,
std::string Name) {
diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPClauses.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPClauses.td
index f24efd0d4fc42..13a1fc3bd08bc 100644
--- a/mlir/include/mlir/Dialect/OpenMP/OpenMPClauses.td
+++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPClauses.td
@@ -146,11 +146,11 @@ class OpenMP_AllocatorClauseSkip<
extraClassDeclaration> {
let arguments = (ins
- Optional<I64>:$allocator
+ Optional<AnyInteger>:$allocator
);
let optAssemblyFormat = [{
- `allocator` `(` $allocator `)`
+ `allocator` `(` $allocator `:` type($allocator) `)`
}];
let description = [{
diff --git a/mlir/include/mlir/Target/LLVMIR/ModuleTranslation.h b/mlir/include/mlir/Target/LLVMIR/ModuleTranslation.h
index c67bb57985bd0..f073081002719 100644
--- a/mlir/include/mlir/Target/LLVMIR/ModuleTranslation.h
+++ b/mlir/include/mlir/Target/LLVMIR/ModuleTranslation.h
@@ -272,6 +272,11 @@ class ModuleTranslation {
/// constructed.
llvm::OpenMPIRBuilder *getOpenMPBuilder();
+ /// Registers a pending __kmpc_free call for the given block. These are
+ /// emitted before the block's terminator during block conversion.
+ void registerPendingOmpAllocateFree(Block *block, llvm::Value *ptr,
+ llvm::Value *allocator);
+
/// Returns the LLVM module in which the IR is being constructed.
llvm::Module *getLLVMModule() { return llvmModule.get(); }
@@ -401,6 +406,9 @@ class ModuleTranslation {
llvm::IRBuilderBase &builder,
bool recordInsertions);
+ /// Emits pending __kmpc_free calls for the block, before its terminator.
+ void emitPendingOmpAllocateFrees(Block &bb, llvm::IRBuilderBase &builder);
+
/// Returns the LLVM metadata corresponding to the given mlir LLVM dialect
/// TBAATagAttr.
llvm::MDNode *getTBAANode(TBAATagAttr tbaaAttr) const;
@@ -509,6 +517,11 @@ class ModuleTranslation {
/// block.
DenseMap<BlockAddressAttr, llvm::BasicBlock *> blockAddressToLLVMMapping;
+ /// Pending __kmpc_free calls per block, emitted before the terminator.
+ DenseMap<Block *,
+ llvm::SmallVector<std::pair<llvm::Value *, llvm::Value *>>>
+ pendingOmpAllocateFrees;
+
/// Stack of user-specified state elements, useful when translating operations
/// with regions.
StateStack stack;
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index 9f8c13bbe9060..9d9ceeb62ab79 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -4635,6 +4635,22 @@ static Operation *getGlobalOpFromValue(Value value) {
return nullptr;
}
+static Value getBaseValueForTypeLookup(Value value) {
+ while (Operation *op = value.getDefiningOp()) {
+ if (auto addrCast = dyn_cast_if_present<LLVM::AddrSpaceCastOp>(op))
+ value = addrCast.getOperand();
+ else if (op->getName().getIdentifier()) {
+ if (op->getNumOperands() > 0)
+ value = op->getOperand(0);
+ else
+ break;
+ } else {
+ break;
+ }
+ }
+ return value;
+}
+
static llvm::SmallString<64>
getDeclareTargetRefPtrSuffix(LLVM::GlobalOp globalOp,
llvm::OpenMPIRBuilder &ompBuilder) {
@@ -7470,6 +7486,94 @@ convertTargetAllocMemOp(Operation &opInst, llvm::IRBuilderBase &builder,
return success();
}
+static LogicalResult
+convertAllocateDirOp(Operation &opInst, llvm::IRBuilderBase &builder,
+ LLVM::ModuleTranslation &moduleTranslation) {
+ auto allocateDirOp = cast<omp::AllocateDirOp>(opInst);
+ llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
+
+ llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
+ llvm::Module *llvmModule = moduleTranslation.getLLVMModule();
+ llvm::DataLayout dataLayout = llvmModule->getDataLayout();
+ SmallVector<Value> vars = allocateDirOp.getVarList();
+ std::optional<int64_t> alignAttr = allocateDirOp.getAlign();
+
+ llvm::Value *allocator;
+ if (auto allocatorVar = allocateDirOp.getAllocator()) {
+ allocator = moduleTranslation.lookupValue(allocatorVar);
+ if (allocator->getType()->isIntegerTy())
+ allocator = builder.CreateIntToPtr(allocator, builder.getPtrTy());
+ else if (allocator->getType()->isPointerTy())
+ allocator =
+ builder.CreatePointerBitCastOrAddrSpaceCast(allocator, builder.getPtrTy());
+ } else {
+ allocator = llvm::ConstantPointerNull::get(builder.getPtrTy());
+ }
+
+ SmallVector<std::pair<llvm::CallInst *, llvm::Value *>> allocatedVars;
+
+ for (Value var : vars) {
+ llvm::Type *llvmVarTy = moduleTranslation.convertType(var.getType());
+
+ // Opaque pointers lose element type. Trace to GlobalOp for type
+ // Falls back to llvmVarTy when not from a global.
+ llvm::Type *typeToInspect = llvmVarTy;
+ if (llvmVarTy->isPointerTy()) {
+ Value baseVar = getBaseValueForTypeLookup(var);
+ if (Operation *globalOp = getGlobalOpFromValue(baseVar)) {
+ if (auto gop = dyn_cast<LLVM::GlobalOp>(globalOp))
+ typeToInspect =
+ moduleTranslation.convertType(gop.getGlobalType());
+ }
+ }
+
+ llvm::Value *size;
+ if (auto arrTy = llvm::dyn_cast<llvm::ArrayType>(typeToInspect)) {
+ llvm::Value *elementCount = builder.getInt64(1);
+ llvm::Type *currentType = arrTy;
+ while (auto nestedArrTy = llvm::dyn_cast<llvm::ArrayType>(currentType)) {
+ elementCount = builder.CreateMul(
+ elementCount, builder.getInt64(nestedArrTy->getNumElements()));
+ currentType = nestedArrTy->getElementType();
+ }
+ uint64_t elemSizeInBits = dataLayout.getTypeSizeInBits(currentType);
+ size = builder.CreateMul(elementCount,
+ builder.getInt64(elemSizeInBits / 8));
+ } else {
+ size = builder.getInt64(
+ dataLayout.getTypeStoreSize(typeToInspect).getFixedValue());
+ }
+
+ uint64_t alignValue =
+ alignAttr ? alignAttr.value()
+ : dataLayout.getABITypeAlign(typeToInspect).value();
+ llvm::Value *alignConst = builder.getInt64(alignValue);
+ // Align the size: ((size + align - 1) / align) * align
+ size = builder.CreateAdd(size, builder.getInt64(alignValue - 1), "", true);
+ size = builder.CreateUDiv(size, alignConst);
+ size = builder.CreateMul(size, alignConst, "", true);
+
+ std::string allocName =
+ ompBuilder->createPlatformSpecificName({".void.addr"});
+ llvm::CallInst *allocCall;
+ if (alignAttr.has_value()) {
+ allocCall = ompBuilder->createOMPAlignedAlloc(
+ ompLoc, builder.getInt64(alignAttr.value()), size, allocator, allocName);
+ } else {
+ allocCall = ompBuilder->createOMPAlloc(ompLoc, size, allocator, allocName);
+ }
+ allocatedVars.push_back({allocCall, allocator});
+ }
+
+ // Register __kmpc_free calls to be emitted before the block terminator.
+ Block *block = allocateDirOp->getBlock();
+ for (auto &alloc : allocatedVars)
+ moduleTranslation.registerPendingOmpAllocateFree(block, alloc.first,
+ alloc.second);
+
+ return success();
+}
+
static llvm::Function *getOmpTargetFree(llvm::IRBuilderBase &builder,
llvm::Module *llvmModule) {
llvm::Type *ptrTy = builder.getPtrTy(0);
@@ -7700,6 +7804,9 @@ LogicalResult OpenMPDialectLLVMIRTranslationInterface::convertOperation(
.Case([&](omp::TargetFreeMemOp) {
return convertTargetFreeMemOp(*op, builder, moduleTranslation);
})
+ .Case([&](omp::AllocateDirOp) {
+ return convertAllocateDirOp(*op, builder, moduleTranslation);
+ })
.Default([&](Operation *inst) {
return inst->emitError()
<< "not yet implemented: " << inst->getName();
diff --git a/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp b/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp
index f4a6e5f6fc8f6..a24bec25e915c 100644
--- a/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp
@@ -1009,6 +1009,30 @@ LogicalResult ModuleTranslation::convertOperation(Operation &op,
return convertDialectAttributes(&op, scope.getCapturedInstructions());
}
+void ModuleTranslation::registerPendingOmpAllocateFree(Block *block,
+ llvm::Value *ptr,
+ llvm::Value *allocator) {
+ pendingOmpAllocateFrees[block].push_back({ptr, allocator});
+}
+
+void ModuleTranslation::emitPendingOmpAllocateFrees(
+ Block &bb, llvm::IRBuilderBase &builder) {
+ auto it = pendingOmpAllocateFrees.find(&bb);
+ if (it == pendingOmpAllocateFrees.end() || it->second.empty())
+ return;
+ llvm::OpenMPIRBuilder *ompBuilder = getOpenMPBuilder();
+ llvm::BasicBlock *llvmBB = lookupBlock(&bb);
+ llvm::Instruction *term = llvmBB->getTerminator();
+ if (term)
+ builder.SetInsertPoint(term);
+ else
+ builder.SetInsertPoint(llvmBB);
+ llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
+ for (auto it2 = it->second.rbegin(); it2 != it->second.rend(); ++it2)
+ ompBuilder->createOMPFree(ompLoc, it2->first, it2->second, "");
+ pendingOmpAllocateFrees.erase(it);
+}
+
/// Convert block to LLVM IR. Unless `ignoreArguments` is set, emit PHI nodes
/// to define values corresponding to the MLIR block arguments. These nodes
/// are not connected to the source basic blocks, which may not exist yet. Uses
@@ -1048,6 +1072,10 @@ LogicalResult ModuleTranslation::convertBlockImpl(Block &bb,
// Traverse operations.
for (auto &op : bb) {
+ // Emit pending OpenMP allocate frees before the terminator.
+ if (op.hasTrait<OpTrait::IsTerminator>())
+ emitPendingOmpAllocateFrees(bb, builder);
+
// Set the current debug location within the builder.
builder.SetCurrentDebugLocation(
debugTranslation->translateLoc(op.getLoc(), subprogram));
diff --git a/mlir/test/Dialect/OpenMP/ops.mlir b/mlir/test/Dialect/OpenMP/ops.mlir
index b0554eba459f8..ee942381f95db 100644
--- a/mlir/test/Dialect/OpenMP/ops.mlir
+++ b/mlir/test/Dialect/OpenMP/ops.mlir
@@ -3391,27 +3391,27 @@ func.func @omp_allocate_dir(%arg0 : memref<i32>, %arg1 : memref<i32>) -> () {
// Test with one data var and allocator clause
// CHECK: %[[VAL_1:.*]] = arith.constant 1 : i64
%omp_default_mem_alloc = arith.constant 1 : i64
- // CHECK: omp.allocate_dir(%[[ARG0]] : memref<i32>) allocator(%[[VAL_1:.*]])
- omp.allocate_dir (%arg0 : memref<i32>) allocator(%omp_default_mem_alloc)
+ // CHECK: omp.allocate_dir(%[[ARG0]] : memref<i32>) allocator(%[[VAL_1:.*]] : i64)
+ omp.allocate_dir (%arg0 : memref<i32>) allocator(%omp_default_mem_alloc : i64)
// Test with one data var, align clause and allocator clause
// CHECK: %[[VAL_2:.*]] = arith.constant 7 : i64
%omp_pteam_mem_alloc = arith.constant 7 : i64
- // CHECK: omp.allocate_dir(%[[ARG0]] : memref<i32>) align(4) allocator(%[[VAL_2:.*]])
- omp.allocate_dir (%arg0 : memref<i32>) align(4) allocator(%omp_pteam_mem_alloc)
+ // CHECK: omp.allocate_dir(%[[ARG0]] : memref<i32>) align(4) allocator(%[[VAL_2:.*]] : i64)
+ omp.allocate_dir (%arg0 : memref<i32>) align(4) allocator(%omp_pteam_mem_alloc : i64)
// Test with two data vars, align clause and allocator clause
// CHECK: %[[VAL_3:.*]] = arith.constant 6 : i64
%omp_cgroup_mem_alloc = arith.constant 6 : i64
- // CHECK: omp.allocate_dir(%[[ARG0]], %[[ARG1]] : memref<i32>, memref<i32>) align(8) allocator(%[[VAL_3:.*]])
- omp.allocate_dir (%arg0, %arg1 : memref<i32>, memref<i32>) align(8) allocator(%omp_cgroup_mem_alloc)
+ // CHECK: omp.allocate_dir(%[[ARG0]], %[[ARG1]] : memref<i32>, memref<i32>) align(8) allocator(%[[VAL_3:.*]] : i64)
+ omp.allocate_dir (%arg0, %arg1 : memref<i32>, memref<i32>) align(8) allocator(%omp_cgroup_mem_alloc : i64)
// Test with one data var and user defined allocator clause
// CHECK: %[[VAL_4:.*]] = arith.constant 9 : i64
%custom_allocator = arith.constant 9 : i64
%custom_mem_alloc = func.call @omp_init_allocator(%custom_allocator) : (i64) -> (i64)
- // CHECK: omp.allocate_dir(%[[ARG0]] : memref<i32>) allocator(%[[VAL_5:.*]])
- omp.allocate_dir (%arg0 : memref<i32>) allocator(%custom_mem_alloc)
+ // CHECK: omp.allocate_dir(%[[ARG0]] : memref<i32>) allocator(%[[VAL_5:.*]] : i64)
+ omp.allocate_dir (%arg0 : memref<i32>) allocator(%custom_mem_alloc : i64)
return
}
>From 2a021c105671344b9831c956dd03f47aef2ea526 Mon Sep 17 00:00:00 2001
From: Raghu Maddhipatla <Raghu.Maddhipatla at amd.com>
Date: Tue, 17 Mar 2026 21:28:49 -0500
Subject: [PATCH 2/4] Fix clang-formatting
---
flang/lib/Lower/OpenMP/ClauseProcessor.cpp | 7 +++--
flang/lib/Lower/OpenMP/ClauseProcessor.h | 3 +-
flang/lib/Lower/OpenMP/OpenMP.cpp | 31 ++++++++++---------
.../llvm/Frontend/OpenMP/OMPIRBuilder.h | 5 +--
llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 5 +--
.../mlir/Target/LLVMIR/ModuleTranslation.h | 3 +-
.../OpenMP/OpenMPToLLVMIRTranslation.cpp | 21 +++++++------
7 files changed, 41 insertions(+), 34 deletions(-)
diff --git a/flang/lib/Lower/OpenMP/ClauseProcessor.cpp b/flang/lib/Lower/OpenMP/ClauseProcessor.cpp
index 8c107d4d72cae..5ff86675dbc97 100644
--- a/flang/lib/Lower/OpenMP/ClauseProcessor.cpp
+++ b/flang/lib/Lower/OpenMP/ClauseProcessor.cpp
@@ -325,8 +325,7 @@ static void collectIteratorIVs(
// ClauseProcessor unique clauses
//===----------------------------------------------------------------------===//
-bool ClauseProcessor::processAlign(
- mlir::omp::AlignClauseOps &result) const {
+bool ClauseProcessor::processAlign(mlir::omp::AlignClauseOps &result) const {
if (auto *clause = findUniqueClause<omp::clause::Align>()) {
fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
const std::optional<std::int64_t> align = evaluate::ToInt64(clause->v);
@@ -336,7 +335,9 @@ bool ClauseProcessor::processAlign(
return false;
}
-bool ClauseProcessor::processAllocator(lower::StatementContext &stmtCtx, mlir::omp::AllocatorClauseOps &result) const {
+bool ClauseProcessor::processAllocator(
+ lower::StatementContext &stmtCtx,
+ mlir::omp::AllocatorClauseOps &result) const {
if (auto *clause = findUniqueClause<omp::clause::Allocator>()) {
result.allocator = fir::getBase(converter.genExprValue(clause->v, stmtCtx));
return true;
diff --git a/flang/lib/Lower/OpenMP/ClauseProcessor.h b/flang/lib/Lower/OpenMP/ClauseProcessor.h
index 583e3909733d3..a83f9054ed875 100644
--- a/flang/lib/Lower/OpenMP/ClauseProcessor.h
+++ b/flang/lib/Lower/OpenMP/ClauseProcessor.h
@@ -58,7 +58,8 @@ class ClauseProcessor {
// 'Unique' clauses: They can appear at most once in the clause list.
bool processAlign(mlir::omp::AlignClauseOps &result) const;
- bool processAllocator(lower::StatementContext &stmtCtx, mlir::omp::AllocatorClauseOps &result) const;
+ bool processAllocator(lower::StatementContext &stmtCtx,
+ mlir::omp::AllocatorClauseOps &result) const;
bool processBare(mlir::omp::BareClauseOps &result) const;
bool processBind(mlir::omp::BindClauseOps &result) const;
bool processCancelDirectiveName(
diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp
index 9d5f40c771fc3..fc19a5b0c40ea 100644
--- a/flang/lib/Lower/OpenMP/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP/OpenMP.cpp
@@ -1497,12 +1497,12 @@ static OpTy genWrapperOp(lower::AbstractConverter &converter,
//===----------------------------------------------------------------------===//
static void genAllocateClauses(lower::AbstractConverter &converter,
- semantics::SemanticsContext &semaCtx,
- lower::StatementContext &stmtCtx,
- const ObjectList &objects,
- const List<Clause> &clauses, mlir::Location loc,
- llvm::SmallVectorImpl<mlir::Value> &operandRange,
- mlir::omp::AllocateDirOperands &clauseOps) {
+ semantics::SemanticsContext &semaCtx,
+ lower::StatementContext &stmtCtx,
+ const ObjectList &objects,
+ const List<Clause> &clauses, mlir::Location loc,
+ llvm::SmallVectorImpl<mlir::Value> &operandRange,
+ mlir::omp::AllocateDirOperands &clauseOps) {
if (!objects.empty())
genObjectList(objects, converter, operandRange);
@@ -1931,16 +1931,19 @@ static void genWsloopClauses(
//===----------------------------------------------------------------------===//
// Code generation functions for leaf constructs
//===----------------------------------------------------------------------===//
-static mlir::omp::AllocateDirOp
-genAllocateDirOp(lower::AbstractConverter &converter,
- semantics::SemanticsContext &semaCtx, lower::StatementContext &stmtCtx, lower::pft::Evaluation &eval,
- mlir::Location loc, const ObjectList &objects, const ConstructQueue &queue, ConstructQueue::const_iterator item) {
+static mlir::omp::AllocateDirOp genAllocateDirOp(
+ lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx,
+ lower::StatementContext &stmtCtx, lower::pft::Evaluation &eval,
+ mlir::Location loc, const ObjectList &objects, const ConstructQueue &queue,
+ ConstructQueue::const_iterator item) {
llvm::SmallVector<mlir::Value> operandRange;
mlir::omp::AllocateDirOperands clauseOps;
genAllocateClauses(converter, semaCtx, stmtCtx, objects, item->clauses, loc,
- operandRange, clauseOps);
+ operandRange, clauseOps);
- return mlir::omp::AllocateDirOp::create(converter.getFirOpBuilder(), loc, operandRange, clauseOps.align, clauseOps.allocator);
+ return mlir::omp::AllocateDirOp::create(converter.getFirOpBuilder(), loc,
+ operandRange, clauseOps.align,
+ clauseOps.allocator);
}
static mlir::omp::BarrierOp
@@ -3853,8 +3856,8 @@ static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable,
converter.getFirOpBuilder().getModule(), semaCtx, eval, allocate.source,
llvm::omp::Directive::OMPD_allocate, clauses)};
- genAllocateDirOp(converter, semaCtx, stmtCtx, eval, loc, objects,
- queue, queue.begin());
+ genAllocateDirOp(converter, semaCtx, stmtCtx, eval, loc, objects, queue,
+ queue.begin());
}
static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable,
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
index fdf1e1f0b18ed..7c78f7a1d8f44 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
@@ -3201,8 +3201,9 @@ class OpenMPIRBuilder {
///
/// \returns CallInst to the OMP_Align_Alloc call
LLVM_ABI CallInst *createOMPAlignedAlloc(const LocationDescription &Loc,
- Value *Align, Value *Size, Value *Allocator,
- std::string Name = "");
+ Value *Align, Value *Size,
+ Value *Allocator,
+ std::string Name = "");
/// Create a runtime call for kmpc_free
///
diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index 3a6137fc05b61..ab1891ea89206 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -7620,8 +7620,9 @@ CallInst *OpenMPIRBuilder::createOMPAlloc(const LocationDescription &Loc,
}
CallInst *OpenMPIRBuilder::createOMPAlignedAlloc(const LocationDescription &Loc,
- Value *Align, Value *Size, Value *Allocator,
- std::string Name) {
+ Value *Align, Value *Size,
+ Value *Allocator,
+ std::string Name) {
IRBuilder<>::InsertPointGuard IPG(Builder);
updateToLocation(Loc);
diff --git a/mlir/include/mlir/Target/LLVMIR/ModuleTranslation.h b/mlir/include/mlir/Target/LLVMIR/ModuleTranslation.h
index f073081002719..243cca8831e37 100644
--- a/mlir/include/mlir/Target/LLVMIR/ModuleTranslation.h
+++ b/mlir/include/mlir/Target/LLVMIR/ModuleTranslation.h
@@ -518,8 +518,7 @@ class ModuleTranslation {
DenseMap<BlockAddressAttr, llvm::BasicBlock *> blockAddressToLLVMMapping;
/// Pending __kmpc_free calls per block, emitted before the terminator.
- DenseMap<Block *,
- llvm::SmallVector<std::pair<llvm::Value *, llvm::Value *>>>
+ DenseMap<Block *, llvm::SmallVector<std::pair<llvm::Value *, llvm::Value *>>>
pendingOmpAllocateFrees;
/// Stack of user-specified state elements, useful when translating operations
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index 9d9ceeb62ab79..5c4da886b5c0d 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -7488,7 +7488,7 @@ convertTargetAllocMemOp(Operation &opInst, llvm::IRBuilderBase &builder,
static LogicalResult
convertAllocateDirOp(Operation &opInst, llvm::IRBuilderBase &builder,
- LLVM::ModuleTranslation &moduleTranslation) {
+ LLVM::ModuleTranslation &moduleTranslation) {
auto allocateDirOp = cast<omp::AllocateDirOp>(opInst);
llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
@@ -7504,8 +7504,8 @@ convertAllocateDirOp(Operation &opInst, llvm::IRBuilderBase &builder,
if (allocator->getType()->isIntegerTy())
allocator = builder.CreateIntToPtr(allocator, builder.getPtrTy());
else if (allocator->getType()->isPointerTy())
- allocator =
- builder.CreatePointerBitCastOrAddrSpaceCast(allocator, builder.getPtrTy());
+ allocator = builder.CreatePointerBitCastOrAddrSpaceCast(
+ allocator, builder.getPtrTy());
} else {
allocator = llvm::ConstantPointerNull::get(builder.getPtrTy());
}
@@ -7522,8 +7522,7 @@ convertAllocateDirOp(Operation &opInst, llvm::IRBuilderBase &builder,
Value baseVar = getBaseValueForTypeLookup(var);
if (Operation *globalOp = getGlobalOpFromValue(baseVar)) {
if (auto gop = dyn_cast<LLVM::GlobalOp>(globalOp))
- typeToInspect =
- moduleTranslation.convertType(gop.getGlobalType());
+ typeToInspect = moduleTranslation.convertType(gop.getGlobalType());
}
}
@@ -7537,8 +7536,8 @@ convertAllocateDirOp(Operation &opInst, llvm::IRBuilderBase &builder,
currentType = nestedArrTy->getElementType();
}
uint64_t elemSizeInBits = dataLayout.getTypeSizeInBits(currentType);
- size = builder.CreateMul(elementCount,
- builder.getInt64(elemSizeInBits / 8));
+ size =
+ builder.CreateMul(elementCount, builder.getInt64(elemSizeInBits / 8));
} else {
size = builder.getInt64(
dataLayout.getTypeStoreSize(typeToInspect).getFixedValue());
@@ -7558,9 +7557,11 @@ convertAllocateDirOp(Operation &opInst, llvm::IRBuilderBase &builder,
llvm::CallInst *allocCall;
if (alignAttr.has_value()) {
allocCall = ompBuilder->createOMPAlignedAlloc(
- ompLoc, builder.getInt64(alignAttr.value()), size, allocator, allocName);
+ ompLoc, builder.getInt64(alignAttr.value()), size, allocator,
+ allocName);
} else {
- allocCall = ompBuilder->createOMPAlloc(ompLoc, size, allocator, allocName);
+ allocCall =
+ ompBuilder->createOMPAlloc(ompLoc, size, allocator, allocName);
}
allocatedVars.push_back({allocCall, allocator});
}
@@ -7569,7 +7570,7 @@ convertAllocateDirOp(Operation &opInst, llvm::IRBuilderBase &builder,
Block *block = allocateDirOp->getBlock();
for (auto &alloc : allocatedVars)
moduleTranslation.registerPendingOmpAllocateFree(block, alloc.first,
- alloc.second);
+ alloc.second);
return success();
}
>From 99a021e49138c07e83757d75cc46515145d8d1b7 Mon Sep 17 00:00:00 2001
From: Raghu Maddhipatla <Raghu.Maddhipatla at amd.com>
Date: Wed, 18 Mar 2026 00:14:58 -0500
Subject: [PATCH 3/4] Fix buildbot errors for the test program
---
.../OpenMP/omp-declarative-allocate-align.f90 | 22 +++++++++----------
1 file changed, 10 insertions(+), 12 deletions(-)
diff --git a/flang/test/Lower/OpenMP/omp-declarative-allocate-align.f90 b/flang/test/Lower/OpenMP/omp-declarative-allocate-align.f90
index 50c6ab1f64002..a131573ca5375 100644
--- a/flang/test/Lower/OpenMP/omp-declarative-allocate-align.f90
+++ b/flang/test/Lower/OpenMP/omp-declarative-allocate-align.f90
@@ -1,22 +1,21 @@
! This test checks lowering of OpenMP allocate Directive with align and allocator
! clauses to LLVM IR. Verifies code generation for:
! - align(16) only (null allocator)
-! - allocator(omp_default_mem_alloc) only (no align)
-! - align(64) allocator(omp_cgroup_mem_alloc) (both clauses, array variable)
+! - allocator(1) only (no align)
+! - align(64) allocator(6) (both clauses, array variable)
! - align(32) allocator(3) (both clauses, multiple variables)
! RUN: %flang_fc1 -emit-llvm %openmp_flags -fopenmp-version=51 %s -o - 2>&1 | FileCheck %s
program main
- use omp_lib
integer :: x, y
integer :: z(10)
character c
- real(kind = 16) :: r
- complex cmplx
+ real :: r
+ complex :: cmplx
!$omp allocate(x) align(16)
- !$omp allocate(y) allocator(omp_default_mem_alloc)
- !$omp allocate(z) align(64) allocator(omp_cgroup_mem_alloc)
+ !$omp allocate(y) allocator(1)
+ !$omp allocate(z) align(64) allocator(6)
!$omp allocate(c, r, cmplx) align(32) allocator(3)
x = 1
y = 2
@@ -24,12 +23,11 @@ program main
print *, "z : ", z
end program
-! CHECK: define void @_QQmain()
! CHECK: call i32 @__kmpc_global_thread_num(
! CHECK: call ptr @__kmpc_aligned_alloc(i32 {{.*}}, i64 16, i64 {{.*}}, ptr null)
-! CHECK: call ptr @__kmpc_alloc(i32 {{.*}}, i64 {{.*}}, ptr inttoptr (i64 1 to ptr))
-! CHECK: call ptr @__kmpc_aligned_alloc(i32 {{.*}}, i64 64, i64 {{.*}}, ptr inttoptr (i64 6 to ptr))
+! CHECK: call ptr @__kmpc_alloc(i32 {{.*}}, i64 {{.*}}, ptr inttoptr (i32 1 to ptr))
+! CHECK: call ptr @__kmpc_aligned_alloc(i32 {{.*}}, i64 64, i64 {{.*}}, ptr inttoptr (i32 6 to ptr))
! CHECK: call ptr @__kmpc_aligned_alloc(i32 {{.*}}, i64 32, i64 {{.*}}, ptr inttoptr (i32 3 to ptr))
! CHECK: call ptr @__kmpc_aligned_alloc(i32 {{.*}}, i64 32, i64 {{.*}}, ptr inttoptr (i32 3 to ptr))
! CHECK: call ptr @__kmpc_aligned_alloc(i32 {{.*}}, i64 32, i64 {{.*}}, ptr inttoptr (i32 3 to ptr))
@@ -37,8 +35,8 @@ program main
! CHECK: call void @__kmpc_free(i32 {{.*}}, ptr {{.*}}, ptr inttoptr (i32 3 to ptr))
! CHECK: call void @__kmpc_free(i32 {{.*}}, ptr {{.*}}, ptr inttoptr (i32 3 to ptr))
! CHECK: call void @__kmpc_free(i32 {{.*}}, ptr {{.*}}, ptr inttoptr (i32 3 to ptr))
-! CHECK: call void @__kmpc_free(i32 {{.*}}, ptr {{.*}}, ptr inttoptr (i64 6 to ptr))
-! CHECK: call void @__kmpc_free(i32 {{.*}}, ptr {{.*}}, ptr inttoptr (i64 1 to ptr))
+! CHECK: call void @__kmpc_free(i32 {{.*}}, ptr {{.*}}, ptr inttoptr (i32 6 to ptr))
+! CHECK: call void @__kmpc_free(i32 {{.*}}, ptr {{.*}}, ptr inttoptr (i32 1 to ptr))
! CHECK: call void @__kmpc_free(i32 {{.*}}, ptr {{.*}}, ptr null)
! CHECK: ret void
>From 4f37e8b244a7e79d7275e0966ee60c5128c41bc7 Mon Sep 17 00:00:00 2001
From: Raghu Maddhipatla <Raghu.Maddhipatla at amd.com>
Date: Tue, 31 Mar 2026 18:43:09 -0500
Subject: [PATCH 4/4] Address review comments. Moved implementation from
ModuleTranslation.cpp to use LLVMTranslationInterface.h
---
.../OpenMP/omp-declarative-allocate-align.f90 | 44 +++----
.../Lower/OpenMP/omp-declarative-allocate.f90 | 18 ++-
.../LLVMIR/LLVMTranslationDialectInterface.td | 10 ++
.../Target/LLVMIR/LLVMTranslationInterface.h | 11 ++
.../mlir/Target/LLVMIR/ModuleTranslation.h | 12 --
.../OpenMP/OpenMPToLLVMIRTranslation.cpp | 57 ++++++++-
mlir/lib/Target/LLVMIR/ModuleTranslation.cpp | 29 +----
.../LLVMIR/openmp-allocate-directive.mlir | 109 ++++++++++++++++++
8 files changed, 213 insertions(+), 77 deletions(-)
create mode 100644 mlir/test/Target/LLVMIR/openmp-allocate-directive.mlir
diff --git a/flang/test/Lower/OpenMP/omp-declarative-allocate-align.f90 b/flang/test/Lower/OpenMP/omp-declarative-allocate-align.f90
index a131573ca5375..0824d8bcb7e90 100644
--- a/flang/test/Lower/OpenMP/omp-declarative-allocate-align.f90
+++ b/flang/test/Lower/OpenMP/omp-declarative-allocate-align.f90
@@ -1,11 +1,11 @@
! This test checks lowering of OpenMP allocate Directive with align and allocator
-! clauses to LLVM IR. Verifies code generation for:
+! clauses to HLFIR. Verifies code generation for:
! - align(16) only (null allocator)
! - allocator(1) only (no align)
! - align(64) allocator(6) (both clauses, array variable)
! - align(32) allocator(3) (both clauses, multiple variables)
-! RUN: %flang_fc1 -emit-llvm %openmp_flags -fopenmp-version=51 %s -o - 2>&1 | FileCheck %s
+! RUN: %flang_fc1 -emit-hlfir %openmp_flags -fopenmp-version=51 %s -o - 2>&1 | FileCheck %s
program main
integer :: x, y
@@ -23,23 +23,23 @@ program main
print *, "z : ", z
end program
-! CHECK: call i32 @__kmpc_global_thread_num(
-
-! CHECK: call ptr @__kmpc_aligned_alloc(i32 {{.*}}, i64 16, i64 {{.*}}, ptr null)
-! CHECK: call ptr @__kmpc_alloc(i32 {{.*}}, i64 {{.*}}, ptr inttoptr (i32 1 to ptr))
-! CHECK: call ptr @__kmpc_aligned_alloc(i32 {{.*}}, i64 64, i64 {{.*}}, ptr inttoptr (i32 6 to ptr))
-! CHECK: call ptr @__kmpc_aligned_alloc(i32 {{.*}}, i64 32, i64 {{.*}}, ptr inttoptr (i32 3 to ptr))
-! CHECK: call ptr @__kmpc_aligned_alloc(i32 {{.*}}, i64 32, i64 {{.*}}, ptr inttoptr (i32 3 to ptr))
-! CHECK: call ptr @__kmpc_aligned_alloc(i32 {{.*}}, i64 32, i64 {{.*}}, ptr inttoptr (i32 3 to ptr))
-
-! CHECK: call void @__kmpc_free(i32 {{.*}}, ptr {{.*}}, ptr inttoptr (i32 3 to ptr))
-! CHECK: call void @__kmpc_free(i32 {{.*}}, ptr {{.*}}, ptr inttoptr (i32 3 to ptr))
-! CHECK: call void @__kmpc_free(i32 {{.*}}, ptr {{.*}}, ptr inttoptr (i32 3 to ptr))
-! CHECK: call void @__kmpc_free(i32 {{.*}}, ptr {{.*}}, ptr inttoptr (i32 6 to ptr))
-! CHECK: call void @__kmpc_free(i32 {{.*}}, ptr {{.*}}, ptr inttoptr (i32 1 to ptr))
-! CHECK: call void @__kmpc_free(i32 {{.*}}, ptr {{.*}}, ptr null)
-! CHECK: ret void
-
-! CHECK: declare noalias ptr @__kmpc_aligned_alloc(i32, i64, i64, ptr)
-! CHECK: declare noalias ptr @__kmpc_alloc(i32, i64, ptr)
-! CHECK: declare void @__kmpc_free(i32, ptr, ptr)
+! CHECK: %[[C1_IDX:.*]] = arith.constant 1 : index
+! CHECK: %[[C_ALLOC:.*]] = fir.alloca !fir.char<1> {bindc_name = "c", uniq_name = "_QFEc"}
+! CHECK: %[[C_DECL:.*]]:2 = hlfir.declare %[[C_ALLOC]] typeparams %[[C1_IDX]] {uniq_name = "_QFEc"} : (!fir.ref<!fir.char<1>>, index) -> (!fir.ref<!fir.char<1>>, !fir.ref<!fir.char<1>>)
+! CHECK: %[[CMPLX_ALLOC:.*]] = fir.alloca complex<f32> {bindc_name = "cmplx", uniq_name = "_QFEcmplx"}
+! CHECK: %[[CMPLX_DECL:.*]]:2 = hlfir.declare %[[CMPLX_ALLOC]] {uniq_name = "_QFEcmplx"} : (!fir.ref<complex<f32>>) -> (!fir.ref<complex<f32>>, !fir.ref<complex<f32>>)
+! CHECK: %[[R_ALLOC:.*]] = fir.alloca f32 {bindc_name = "r", uniq_name = "_QFEr"}
+! CHECK: %[[R_DECL:.*]]:2 = hlfir.declare %[[R_ALLOC]] {uniq_name = "_QFEr"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
+! CHECK: %[[X_ALLOC:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFEx"}
+! CHECK: %[[X_DECL:.*]]:2 = hlfir.declare %[[X_ALLOC]] {uniq_name = "_QFEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[Y_ALLOC:.*]] = fir.alloca i32 {bindc_name = "y", uniq_name = "_QFEy"}
+! CHECK: %[[Y_DECL:.*]]:2 = hlfir.declare %[[Y_ALLOC]] {uniq_name = "_QFEy"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[Z_REF:.*]] = fir.address_of(@_QFEz) : !fir.ref<!fir.array<10xi32>>
+! CHECK: %[[Z_DECL:.*]]:2 = hlfir.declare %[[Z_REF]]({{.*}}) {uniq_name = "_QFEz"} : (!fir.ref<!fir.array<10xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<10xi32>>, !fir.ref<!fir.array<10xi32>>)
+! CHECK: omp.allocate_dir(%[[X_DECL]]#0 : !fir.ref<i32>) align(16)
+! CHECK: %[[ALLOC1:.*]] = arith.constant 1 : i32
+! CHECK: omp.allocate_dir(%[[Y_DECL]]#0 : !fir.ref<i32>) allocator(%[[ALLOC1]] : i32)
+! CHECK: %[[ALLOC6:.*]] = arith.constant 6 : i32
+! CHECK: omp.allocate_dir(%[[Z_DECL]]#0 : !fir.ref<!fir.array<10xi32>>) align(64) allocator(%[[ALLOC6]] : i32)
+! CHECK: %[[ALLOC3:.*]] = arith.constant 3 : i32
+! CHECK: omp.allocate_dir(%[[C_DECL]]#0, %[[R_DECL]]#0, %[[CMPLX_DECL]]#0 : !fir.ref<!fir.char<1>>, !fir.ref<f32>, !fir.ref<complex<f32>>) align(32) allocator(%[[ALLOC3]] : i32)
diff --git a/flang/test/Lower/OpenMP/omp-declarative-allocate.f90 b/flang/test/Lower/OpenMP/omp-declarative-allocate.f90
index 7c8047ebf7f53..69da3f52b459f 100644
--- a/flang/test/Lower/OpenMP/omp-declarative-allocate.f90
+++ b/flang/test/Lower/OpenMP/omp-declarative-allocate.f90
@@ -1,19 +1,15 @@
-! This test checks lowering of OpenMP allocate Directive to LLVM IR.
+! This test checks lowering of OpenMP allocate Directive to HLFIR.
! Verifies code generation for default (no align, null allocator) case.
-! RUN: %flang_fc1 -emit-llvm -fopenmp %s -o - | FileCheck %s
+! RUN: %flang_fc1 -emit-hlfir -fopenmp %s -o - | FileCheck %s
program main
integer :: x, y
!$omp allocate(x, y)
end program
-! CHECK: define void @_QQmain()
-! CHECK: call i32 @__kmpc_global_thread_num(
-! CHECK: call ptr @__kmpc_alloc(i32 {{.*}}, i64 8, ptr null)
-! CHECK: call ptr @__kmpc_alloc(i32 {{.*}}, i64 8, ptr null)
-! CHECK: call void @__kmpc_free(i32 {{.*}}, ptr {{.*}}, ptr null)
-! CHECK: call void @__kmpc_free(i32 {{.*}}, ptr {{.*}}, ptr null)
-! CHECK: ret void
-! CHECK: declare noalias ptr @__kmpc_alloc(i32, i64, ptr)
-! CHECK: declare void @__kmpc_free(i32, ptr, ptr)
+! CHECK: %[[X_ALLOC:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFEx"}
+! CHECK: %[[X_DECL:.*]]:2 = hlfir.declare %[[X_ALLOC]] {uniq_name = "_QFEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[Y_ALLOC:.*]] = fir.alloca i32 {bindc_name = "y", uniq_name = "_QFEy"}
+! CHECK: %[[Y_DECL:.*]]:2 = hlfir.declare %[[Y_ALLOC]] {uniq_name = "_QFEy"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: omp.allocate_dir(%[[X_DECL]]#0, %[[Y_DECL]]#0 : !fir.ref<i32>, !fir.ref<i32>)
diff --git a/mlir/include/mlir/Target/LLVMIR/LLVMTranslationDialectInterface.td b/mlir/include/mlir/Target/LLVMIR/LLVMTranslationDialectInterface.td
index 6d8c7174bd2e3..b1e7f25b44c40 100644
--- a/mlir/include/mlir/Target/LLVMIR/LLVMTranslationDialectInterface.td
+++ b/mlir/include/mlir/Target/LLVMIR/LLVMTranslationDialectInterface.td
@@ -55,6 +55,16 @@ def LLVMTranslationDialectInterface : DialectInterface<"LLVMTranslationDialectIn
[{
return ::llvm::success();
}]
+ >,
+ InterfaceMethod<[{
+ Hook called just before a block's terminator operation is translated.
+ Dialects can override this to inject IR that must appear at the end of
+ a basic block.
+ }],
+ "void", "preTranslateTerminator",
+ (ins "::mlir::Block &":$block, "::llvm::IRBuilderBase &":$builder,
+ "::mlir::LLVM::ModuleTranslation &":$moduleTranslation),
+ [{ }]
>
];
}
diff --git a/mlir/include/mlir/Target/LLVMIR/LLVMTranslationInterface.h b/mlir/include/mlir/Target/LLVMIR/LLVMTranslationInterface.h
index 58d3ee0ed2139..5bc4aa9a4e7e9 100644
--- a/mlir/include/mlir/Target/LLVMIR/LLVMTranslationInterface.h
+++ b/mlir/include/mlir/Target/LLVMIR/LLVMTranslationInterface.h
@@ -79,6 +79,17 @@ class LLVMTranslationInterface
attribute.getName().str() + "'");
return success();
}
+
+ /// Calls the `preTranslateTerminator` hook on every registered dialect
+ /// interface. This is broadcast to all interfaces because any dialect may
+ /// have registered deferred work for the given block, independent of which
+ /// dialect owns the terminator.
+ virtual void
+ preTranslateTerminator(Block &block, llvm::IRBuilderBase &builder,
+ LLVM::ModuleTranslation &moduleTranslation) const {
+ for (const LLVMTranslationDialectInterface &iface : *this)
+ iface.preTranslateTerminator(block, builder, moduleTranslation);
+ }
};
} // namespace mlir
diff --git a/mlir/include/mlir/Target/LLVMIR/ModuleTranslation.h b/mlir/include/mlir/Target/LLVMIR/ModuleTranslation.h
index 243cca8831e37..c67bb57985bd0 100644
--- a/mlir/include/mlir/Target/LLVMIR/ModuleTranslation.h
+++ b/mlir/include/mlir/Target/LLVMIR/ModuleTranslation.h
@@ -272,11 +272,6 @@ class ModuleTranslation {
/// constructed.
llvm::OpenMPIRBuilder *getOpenMPBuilder();
- /// Registers a pending __kmpc_free call for the given block. These are
- /// emitted before the block's terminator during block conversion.
- void registerPendingOmpAllocateFree(Block *block, llvm::Value *ptr,
- llvm::Value *allocator);
-
/// Returns the LLVM module in which the IR is being constructed.
llvm::Module *getLLVMModule() { return llvmModule.get(); }
@@ -406,9 +401,6 @@ class ModuleTranslation {
llvm::IRBuilderBase &builder,
bool recordInsertions);
- /// Emits pending __kmpc_free calls for the block, before its terminator.
- void emitPendingOmpAllocateFrees(Block &bb, llvm::IRBuilderBase &builder);
-
/// Returns the LLVM metadata corresponding to the given mlir LLVM dialect
/// TBAATagAttr.
llvm::MDNode *getTBAANode(TBAATagAttr tbaaAttr) const;
@@ -517,10 +509,6 @@ class ModuleTranslation {
/// block.
DenseMap<BlockAddressAttr, llvm::BasicBlock *> blockAddressToLLVMMapping;
- /// Pending __kmpc_free calls per block, emitted before the terminator.
- DenseMap<Block *, llvm::SmallVector<std::pair<llvm::Value *, llvm::Value *>>>
- pendingOmpAllocateFrees;
-
/// Stack of user-specified state elements, useful when translating operations
/// with regions.
StateStack stack;
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index 5c4da886b5c0d..bd6ba2120467e 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -4639,7 +4639,11 @@ static Value getBaseValueForTypeLookup(Value value) {
while (Operation *op = value.getDefiningOp()) {
if (auto addrCast = dyn_cast_if_present<LLVM::AddrSpaceCastOp>(op))
value = addrCast.getOperand();
- else if (op->getName().getIdentifier()) {
+ // Traces through hlfir.declare, fir.declare to reach the base address and
+ // use for type lookup.
+ else if (op->getName().getIdentifier() &&
+ (op->getName().getIdentifier().str() == "hlfir.declare" ||
+ op->getName().getIdentifier().str() == "fir.declare")) {
if (op->getNumOperands() > 0)
value = op->getOperand(0);
else
@@ -7309,10 +7313,50 @@ class OpenMPDialectLLVMIRTranslationInterface
amendOperation(Operation *op, ArrayRef<llvm::Instruction *> instructions,
NamedAttribute attribute,
LLVM::ModuleTranslation &moduleTranslation) const final;
+
+ /// Emits pending __kmpc_free calls just before the block's terminator.
+ void preTranslateTerminator(
+ Block &block, llvm::IRBuilderBase &builder,
+ LLVM::ModuleTranslation &moduleTranslation) const final;
+
+ /// Registers a deferred __kmpc_free call to be emitted before the
+ /// terminator of the given block.
+ void registerPendingOmpAllocateFree(Block *block, llvm::Value *ptr,
+ llvm::Value *allocator) const {
+ pendingOmpAllocateFrees[block].push_back({ptr, allocator});
+ }
+
+private:
+ /// Pending __kmpc_free calls per block, emitted via preTranslateTerminator.
+ mutable DenseMap<Block *,
+ llvm::SmallVector<std::pair<llvm::Value *, llvm::Value *>>>
+ pendingOmpAllocateFrees;
};
} // namespace
+void OpenMPDialectLLVMIRTranslationInterface::preTranslateTerminator(
+ Block &block, llvm::IRBuilderBase &builder,
+ LLVM::ModuleTranslation &moduleTranslation) const {
+ auto it = pendingOmpAllocateFrees.find(&block);
+ if (it == pendingOmpAllocateFrees.end() || it->second.empty())
+ return;
+ llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
+ if (!ompBuilder)
+ return;
+ llvm::BasicBlock *llvmBB = moduleTranslation.lookupBlock(&block);
+ if (!llvmBB)
+ return;
+ if (!llvmBB->empty() && llvmBB->back().isTerminator())
+ builder.SetInsertPoint(&llvmBB->back());
+ else
+ builder.SetInsertPoint(llvmBB);
+ llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
+ for (auto it2 = it->second.rbegin(); it2 != it->second.rend(); ++it2)
+ ompBuilder->createOMPFree(ompLoc, it2->first, it2->second, "");
+ pendingOmpAllocateFrees.erase(it);
+}
+
LogicalResult OpenMPDialectLLVMIRTranslationInterface::amendOperation(
Operation *op, ArrayRef<llvm::Instruction *> instructions,
NamedAttribute attribute,
@@ -7488,7 +7532,8 @@ convertTargetAllocMemOp(Operation &opInst, llvm::IRBuilderBase &builder,
static LogicalResult
convertAllocateDirOp(Operation &opInst, llvm::IRBuilderBase &builder,
- LLVM::ModuleTranslation &moduleTranslation) {
+ LLVM::ModuleTranslation &moduleTranslation,
+ const OpenMPDialectLLVMIRTranslationInterface &ompIface) {
auto allocateDirOp = cast<omp::AllocateDirOp>(opInst);
llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
@@ -7566,11 +7611,11 @@ convertAllocateDirOp(Operation &opInst, llvm::IRBuilderBase &builder,
allocatedVars.push_back({allocCall, allocator});
}
- // Register __kmpc_free calls to be emitted before the block terminator.
+ // Register __kmpc_free calls to be emitted before the block terminator via
+ // preTranslateTerminator()
Block *block = allocateDirOp->getBlock();
for (auto &alloc : allocatedVars)
- moduleTranslation.registerPendingOmpAllocateFree(block, alloc.first,
- alloc.second);
+ ompIface.registerPendingOmpAllocateFree(block, alloc.first, alloc.second);
return success();
}
@@ -7806,7 +7851,7 @@ LogicalResult OpenMPDialectLLVMIRTranslationInterface::convertOperation(
return convertTargetFreeMemOp(*op, builder, moduleTranslation);
})
.Case([&](omp::AllocateDirOp) {
- return convertAllocateDirOp(*op, builder, moduleTranslation);
+ return convertAllocateDirOp(*op, builder, moduleTranslation, *this);
})
.Default([&](Operation *inst) {
return inst->emitError()
diff --git a/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp b/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp
index a24bec25e915c..b4f5fa998ccae 100644
--- a/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp
@@ -1009,30 +1009,6 @@ LogicalResult ModuleTranslation::convertOperation(Operation &op,
return convertDialectAttributes(&op, scope.getCapturedInstructions());
}
-void ModuleTranslation::registerPendingOmpAllocateFree(Block *block,
- llvm::Value *ptr,
- llvm::Value *allocator) {
- pendingOmpAllocateFrees[block].push_back({ptr, allocator});
-}
-
-void ModuleTranslation::emitPendingOmpAllocateFrees(
- Block &bb, llvm::IRBuilderBase &builder) {
- auto it = pendingOmpAllocateFrees.find(&bb);
- if (it == pendingOmpAllocateFrees.end() || it->second.empty())
- return;
- llvm::OpenMPIRBuilder *ompBuilder = getOpenMPBuilder();
- llvm::BasicBlock *llvmBB = lookupBlock(&bb);
- llvm::Instruction *term = llvmBB->getTerminator();
- if (term)
- builder.SetInsertPoint(term);
- else
- builder.SetInsertPoint(llvmBB);
- llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
- for (auto it2 = it->second.rbegin(); it2 != it->second.rend(); ++it2)
- ompBuilder->createOMPFree(ompLoc, it2->first, it2->second, "");
- pendingOmpAllocateFrees.erase(it);
-}
-
/// Convert block to LLVM IR. Unless `ignoreArguments` is set, emit PHI nodes
/// to define values corresponding to the MLIR block arguments. These nodes
/// are not connected to the source basic blocks, which may not exist yet. Uses
@@ -1072,9 +1048,10 @@ LogicalResult ModuleTranslation::convertBlockImpl(Block &bb,
// Traverse operations.
for (auto &op : bb) {
- // Emit pending OpenMP allocate frees before the terminator.
+ // Give registered dialect interfaces a chance to inject IR before the
+ // terminator.
if (op.hasTrait<OpTrait::IsTerminator>())
- emitPendingOmpAllocateFrees(bb, builder);
+ iface.preTranslateTerminator(bb, builder, *this);
// Set the current debug location within the builder.
builder.SetCurrentDebugLocation(
diff --git a/mlir/test/Target/LLVMIR/openmp-allocate-directive.mlir b/mlir/test/Target/LLVMIR/openmp-allocate-directive.mlir
new file mode 100644
index 0000000000000..1c05b20a83a61
--- /dev/null
+++ b/mlir/test/Target/LLVMIR/openmp-allocate-directive.mlir
@@ -0,0 +1,109 @@
+// Tests for translation of omp.allocate_dir operations to LLVM IR,
+// covering all combinations of align and allocator clauses.
+
+// RUN: mlir-translate -mlir-to-llvmir -split-input-file %s | FileCheck %s
+
+// -----
+
+// CHECK-LABEL: define void @test_allocate_default
+// CHECK-SAME: (ptr %[[ARG0:.*]]) {
+// CHECK: %[[TID:.*]] = call i32 @__kmpc_global_thread_num(
+// CHECK: %[[ALLOC:.*]] = call ptr @__kmpc_alloc(i32 %[[TID]], i64 8, ptr null)
+// CHECK: %[[TID_FREE:.*]] = call i32 @__kmpc_global_thread_num(
+// CHECK: call void @__kmpc_free(i32 %[[TID_FREE]], ptr %[[ALLOC]], ptr null)
+// CHECK: ret void
+// CHECK: }
+// CHECK: declare noalias ptr @__kmpc_alloc(i32, i64, ptr)
+// CHECK: declare void @__kmpc_free(i32, ptr, ptr)
+llvm.func @test_allocate_default(%arg0: !llvm.ptr) {
+ omp.allocate_dir (%arg0 : !llvm.ptr)
+ llvm.return
+}
+
+// -----
+
+// CHECK-LABEL: define void @test_allocate_align_only
+// CHECK: %[[TID:.*]] = call i32 @__kmpc_global_thread_num(
+// CHECK: %[[ALLOC:.*]] = call ptr @__kmpc_aligned_alloc(i32 %[[TID]], i64 16, i64 16, ptr null)
+// CHECK: %[[TID_FREE:.*]] = call i32 @__kmpc_global_thread_num(
+// CHECK: call void @__kmpc_free(i32 %[[TID_FREE]], ptr %[[ALLOC]], ptr null)
+// CHECK: ret void
+// CHECK: declare noalias ptr @__kmpc_aligned_alloc(i32, i64, i64, ptr)
+llvm.func @test_allocate_align_only(%arg0: !llvm.ptr) {
+ omp.allocate_dir (%arg0 : !llvm.ptr) align(16)
+ llvm.return
+}
+
+// -----
+
+// CHECK-LABEL: define void @test_allocate_allocator_only
+// CHECK: %[[TID:.*]] = call i32 @__kmpc_global_thread_num(
+// CHECK: %[[ALLOC:.*]] = call ptr @__kmpc_alloc(i32 %[[TID]], i64 8, ptr inttoptr (i32 1 to ptr))
+// CHECK: %[[TID_FREE:.*]] = call i32 @__kmpc_global_thread_num(
+// CHECK: call void @__kmpc_free(i32 %[[TID_FREE]], ptr %[[ALLOC]], ptr inttoptr (i32 1 to ptr))
+// CHECK: ret void
+llvm.func @test_allocate_allocator_only(%arg0: !llvm.ptr) {
+ %alloc1 = llvm.mlir.constant(1 : i32) : i32
+ omp.allocate_dir (%arg0 : !llvm.ptr) allocator(%alloc1 : i32)
+ llvm.return
+}
+
+// -----
+
+// CHECK-LABEL: define void @test_allocate_align_and_allocator
+// CHECK: %[[TID:.*]] = call i32 @__kmpc_global_thread_num(
+// CHECK: %[[ALLOC:.*]] = call ptr @__kmpc_aligned_alloc(i32 %[[TID]], i64 64, i64 64, ptr inttoptr (i32 6 to ptr))
+// CHECK: %[[TID_FREE:.*]] = call i32 @__kmpc_global_thread_num(
+// CHECK: call void @__kmpc_free(i32 %[[TID_FREE]], ptr %[[ALLOC]], ptr inttoptr (i32 6 to ptr))
+// CHECK: ret void
+llvm.func @test_allocate_align_and_allocator(%arg0: !llvm.ptr) {
+ %alloc6 = llvm.mlir.constant(6 : i32) : i32
+ omp.allocate_dir (%arg0 : !llvm.ptr) align(64) allocator(%alloc6 : i32)
+ llvm.return
+}
+
+// -----
+
+// Verifies that multiple variables each get their own __kmpc_aligned_alloc call
+// and that __kmpc_free calls are emitted in reverse allocation order.
+//
+// CHECK-LABEL: define void @test_allocate_multiple_vars
+// CHECK: %[[TID0:.*]] = call i32 @__kmpc_global_thread_num(
+// CHECK: %[[ALLOC0:.*]] = call ptr @__kmpc_aligned_alloc(i32 %[[TID0]], i64 32, i64 32, ptr inttoptr (i32 3 to ptr))
+// CHECK: %[[TID1:.*]] = call i32 @__kmpc_global_thread_num(
+// CHECK: %[[ALLOC1:.*]] = call ptr @__kmpc_aligned_alloc(i32 %[[TID1]], i64 32, i64 32, ptr inttoptr (i32 3 to ptr))
+// CHECK: %[[TID2:.*]] = call i32 @__kmpc_global_thread_num(
+// CHECK: %[[ALLOC2:.*]] = call ptr @__kmpc_aligned_alloc(i32 %[[TID2]], i64 32, i64 32, ptr inttoptr (i32 3 to ptr))
+// Free order is reversed relative to allocation order.
+// CHECK: call void @__kmpc_free({{.*}}, ptr %[[ALLOC2]], ptr inttoptr (i32 3 to ptr))
+// CHECK: call void @__kmpc_free({{.*}}, ptr %[[ALLOC1]], ptr inttoptr (i32 3 to ptr))
+// CHECK: call void @__kmpc_free({{.*}}, ptr %[[ALLOC0]], ptr inttoptr (i32 3 to ptr))
+// CHECK: ret void
+llvm.func @test_allocate_multiple_vars(%arg0: !llvm.ptr, %arg1: !llvm.ptr, %arg2: !llvm.ptr) {
+ %alloc3 = llvm.mlir.constant(3 : i32) : i32
+ omp.allocate_dir (%arg0, %arg1, %arg2 : !llvm.ptr, !llvm.ptr, !llvm.ptr) align(32) allocator(%alloc3 : i32)
+ llvm.return
+}
+
+// -----
+
+// Verifies that array size is correctly calculated from the global's element
+// type: [10 x i32] = 40 bytes, rounded up to alignment 64 => 64 bytes.
+//
+// CHECK-LABEL: define void @test_allocate_array_global
+// CHECK: %[[TID:.*]] = call i32 @__kmpc_global_thread_num(
+// CHECK: %[[ALLOC:.*]] = call ptr @__kmpc_aligned_alloc(i32 %[[TID]], i64 64, i64 64, ptr inttoptr (i32 6 to ptr))
+// CHECK: %[[TID_FREE:.*]] = call i32 @__kmpc_global_thread_num(
+// CHECK: call void @__kmpc_free(i32 %[[TID_FREE]], ptr %[[ALLOC]], ptr inttoptr (i32 6 to ptr))
+// CHECK: ret void
+llvm.mlir.global internal @arr_global() : !llvm.array<10 x i32> {
+ %0 = llvm.mlir.zero : !llvm.array<10 x i32>
+ llvm.return %0 : !llvm.array<10 x i32>
+}
+
+llvm.func @test_allocate_array_global() {
+ %z = llvm.mlir.addressof @arr_global : !llvm.ptr
+ %alloc6 = llvm.mlir.constant(6 : i32) : i32
+ omp.allocate_dir (%z : !llvm.ptr) align(64) allocator(%alloc6 : i32)
+ llvm.return
+}
More information about the Mlir-commits
mailing list