[Mlir-commits] [flang] [llvm] [mlir] [Flang] [OpenMP] [MLIR] Add lowering support for OMP ALLOCATE directives and its clauses (PR #187167)
Raghu Maddhipatla
llvmlistbot at llvm.org
Fri Apr 10 16:01:13 PDT 2026
https://github.com/raghavendhra updated https://github.com/llvm/llvm-project/pull/187167
>From f254c678b6d2d421eb37c4df109ed6383457457c Mon Sep 17 00:00:00 2001
From: Raghu Maddhipatla <Raghu.Maddhipatla at amd.com>
Date: Tue, 17 Mar 2026 19:32:38 -0500
Subject: [PATCH 1/6] [Flang] [OpenMP] [MLIR] Add lowering support for OMP
ALLOCATE directive and its clauses.
---
flang/lib/Lower/OpenMP/ClauseProcessor.cpp | 19 ++++
flang/lib/Lower/OpenMP/ClauseProcessor.h | 2 +
flang/lib/Lower/OpenMP/OpenMP.cpp | 40 ++++++-
.../Todo/omp-declarative-allocate-align.f90 | 10 --
.../OpenMP/Todo/omp-declarative-allocate.f90 | 10 --
.../OpenMP/omp-declarative-allocate-align.f90 | 47 ++++++++
.../Lower/OpenMP/omp-declarative-allocate.f90 | 19 ++++
.../llvm/Frontend/OpenMP/OMPIRBuilder.h | 15 ++-
llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 17 +++
.../mlir/Dialect/OpenMP/OpenMPClauses.td | 4 +-
.../mlir/Target/LLVMIR/ModuleTranslation.h | 13 +++
.../OpenMP/OpenMPToLLVMIRTranslation.cpp | 107 ++++++++++++++++++
mlir/lib/Target/LLVMIR/ModuleTranslation.cpp | 28 +++++
mlir/test/Dialect/OpenMP/ops.mlir | 16 +--
14 files changed, 314 insertions(+), 33 deletions(-)
delete mode 100644 flang/test/Lower/OpenMP/Todo/omp-declarative-allocate-align.f90
delete mode 100644 flang/test/Lower/OpenMP/Todo/omp-declarative-allocate.f90
create mode 100644 flang/test/Lower/OpenMP/omp-declarative-allocate-align.f90
create mode 100644 flang/test/Lower/OpenMP/omp-declarative-allocate.f90
diff --git a/flang/lib/Lower/OpenMP/ClauseProcessor.cpp b/flang/lib/Lower/OpenMP/ClauseProcessor.cpp
index 45b11c818245e..47cdaf1829913 100644
--- a/flang/lib/Lower/OpenMP/ClauseProcessor.cpp
+++ b/flang/lib/Lower/OpenMP/ClauseProcessor.cpp
@@ -325,6 +325,25 @@ static void collectIteratorIVs(
// ClauseProcessor unique clauses
//===----------------------------------------------------------------------===//
+bool ClauseProcessor::processAlign(
+ mlir::omp::AlignClauseOps &result) const {
+ if (auto *clause = findUniqueClause<omp::clause::Align>()) {
+ fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
+ const std::optional<std::int64_t> align = evaluate::ToInt64(clause->v);
+ result.align = firOpBuilder.getI64IntegerAttr(*align);
+ return true;
+ }
+ return false;
+}
+
+bool ClauseProcessor::processAllocator(lower::StatementContext &stmtCtx, mlir::omp::AllocatorClauseOps &result) const {
+ if (auto *clause = findUniqueClause<omp::clause::Allocator>()) {
+ result.allocator = fir::getBase(converter.genExprValue(clause->v, stmtCtx));
+ return true;
+ }
+ return false;
+}
+
bool ClauseProcessor::processBare(mlir::omp::BareClauseOps &result) const {
return markClauseOccurrence<omp::clause::OmpxBare>(result.bare);
}
diff --git a/flang/lib/Lower/OpenMP/ClauseProcessor.h b/flang/lib/Lower/OpenMP/ClauseProcessor.h
index f343ee8ff4332..33323036cdc3a 100644
--- a/flang/lib/Lower/OpenMP/ClauseProcessor.h
+++ b/flang/lib/Lower/OpenMP/ClauseProcessor.h
@@ -57,6 +57,8 @@ class ClauseProcessor {
: converter(converter), semaCtx(semaCtx), clauses(clauses) {}
// 'Unique' clauses: They can appear at most once in the clause list.
+ bool processAlign(mlir::omp::AlignClauseOps &result) const;
+ bool processAllocator(lower::StatementContext &stmtCtx, mlir::omp::AllocatorClauseOps &result) const;
bool processBare(mlir::omp::BareClauseOps &result) const;
bool processBind(mlir::omp::BindClauseOps &result) const;
bool processCancelDirectiveName(
diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp
index 33de565eda275..cce210b54dceb 100644
--- a/flang/lib/Lower/OpenMP/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP/OpenMP.cpp
@@ -1507,6 +1507,21 @@ static OpTy genWrapperOp(lower::AbstractConverter &converter,
// Code generation functions for clauses
//===----------------------------------------------------------------------===//
+static void genAllocateClauses(lower::AbstractConverter &converter,
+ semantics::SemanticsContext &semaCtx,
+ lower::StatementContext &stmtCtx,
+ const ObjectList &objects,
+ const List<Clause> &clauses, mlir::Location loc,
+ llvm::SmallVectorImpl<mlir::Value> &operandRange,
+ mlir::omp::AllocateDirOperands &clauseOps) {
+ if (!objects.empty())
+ genObjectList(objects, converter, operandRange);
+
+ ClauseProcessor cp(converter, semaCtx, clauses);
+ cp.processAlign(clauseOps);
+ cp.processAllocator(stmtCtx, clauseOps);
+}
+
static void genCancelClauses(lower::AbstractConverter &converter,
semantics::SemanticsContext &semaCtx,
const List<Clause> &clauses, mlir::Location loc,
@@ -1927,6 +1942,17 @@ static void genWsloopClauses(
//===----------------------------------------------------------------------===//
// Code generation functions for leaf constructs
//===----------------------------------------------------------------------===//
+static mlir::omp::AllocateDirOp
+genAllocateDirOp(lower::AbstractConverter &converter,
+ semantics::SemanticsContext &semaCtx, lower::StatementContext &stmtCtx, lower::pft::Evaluation &eval,
+ mlir::Location loc, const ObjectList &objects, const ConstructQueue &queue, ConstructQueue::const_iterator item) {
+ llvm::SmallVector<mlir::Value> operandRange;
+ mlir::omp::AllocateDirOperands clauseOps;
+ genAllocateClauses(converter, semaCtx, stmtCtx, objects, item->clauses, loc,
+ operandRange, clauseOps);
+
+ return mlir::omp::AllocateDirOp::create(converter.getFirOpBuilder(), loc, operandRange, clauseOps.align, clauseOps.allocator);
+}
static mlir::omp::BarrierOp
genBarrierOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
@@ -3841,8 +3867,18 @@ static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable,
semantics::SemanticsContext &semaCtx,
lower::pft::Evaluation &eval,
const parser::OmpAllocateDirective &allocate) {
- if (!semaCtx.langOptions().OpenMPSimd)
- TODO(converter.getCurrentLocation(), "OmpAllocateDirective");
+ lower::StatementContext stmtCtx;
+ ObjectList objects = makeObjects((allocate.BeginDir().Arguments()), semaCtx);
+ const auto &clauseList = (allocate.BeginDir().Clauses());
+ List<Clause> clauses = makeClauses(clauseList, semaCtx);
+ mlir::Location loc = converter.genLocation(allocate.source);
+
+ ConstructQueue queue{buildConstructQueue(
+ converter.getFirOpBuilder().getModule(), semaCtx, eval, allocate.source,
+ llvm::omp::Directive::OMPD_allocate, clauses)};
+
+ genAllocateDirOp(converter, semaCtx, stmtCtx, eval, loc, objects,
+ queue, queue.begin());
}
static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable,
diff --git a/flang/test/Lower/OpenMP/Todo/omp-declarative-allocate-align.f90 b/flang/test/Lower/OpenMP/Todo/omp-declarative-allocate-align.f90
deleted file mode 100644
index fec146ac70313..0000000000000
--- a/flang/test/Lower/OpenMP/Todo/omp-declarative-allocate-align.f90
+++ /dev/null
@@ -1,10 +0,0 @@
-! This test checks lowering of OpenMP allocate Directive with align clause.
-
-! RUN: not %flang_fc1 -emit-fir -fopenmp -fopenmp-version=51 %s 2>&1 | FileCheck %s
-
-program main
- integer :: x
-
- ! CHECK: not yet implemented: OmpAllocateDirective
- !$omp allocate(x) align(32)
-end
diff --git a/flang/test/Lower/OpenMP/Todo/omp-declarative-allocate.f90 b/flang/test/Lower/OpenMP/Todo/omp-declarative-allocate.f90
deleted file mode 100644
index 7cae8051fda77..0000000000000
--- a/flang/test/Lower/OpenMP/Todo/omp-declarative-allocate.f90
+++ /dev/null
@@ -1,10 +0,0 @@
-! This test checks lowering of OpenMP allocate Directive.
-
-! RUN: not %flang_fc1 -emit-fir -fopenmp %s 2>&1 | FileCheck %s
-
-program main
- integer :: x, y
-
- ! CHECK: not yet implemented: OmpAllocateDirective
- !$omp allocate(x, y)
-end
diff --git a/flang/test/Lower/OpenMP/omp-declarative-allocate-align.f90 b/flang/test/Lower/OpenMP/omp-declarative-allocate-align.f90
new file mode 100644
index 0000000000000..50c6ab1f64002
--- /dev/null
+++ b/flang/test/Lower/OpenMP/omp-declarative-allocate-align.f90
@@ -0,0 +1,47 @@
+! This test checks lowering of OpenMP allocate Directive with align and allocator
+! clauses to LLVM IR. Verifies code generation for:
+! - align(16) only (null allocator)
+! - allocator(omp_default_mem_alloc) only (no align)
+! - align(64) allocator(omp_cgroup_mem_alloc) (both clauses, array variable)
+! - align(32) allocator(3) (both clauses, multiple variables)
+
+! RUN: %flang_fc1 -emit-llvm %openmp_flags -fopenmp-version=51 %s -o - 2>&1 | FileCheck %s
+
+program main
+ use omp_lib
+ integer :: x, y
+ integer :: z(10)
+ character c
+ real(kind = 16) :: r
+ complex cmplx
+ !$omp allocate(x) align(16)
+ !$omp allocate(y) allocator(omp_default_mem_alloc)
+ !$omp allocate(z) align(64) allocator(omp_cgroup_mem_alloc)
+ !$omp allocate(c, r, cmplx) align(32) allocator(3)
+ x = 1
+ y = 2
+ z = x + y
+ print *, "z : ", z
+end program
+
+! CHECK: define void @_QQmain()
+! CHECK: call i32 @__kmpc_global_thread_num(
+
+! CHECK: call ptr @__kmpc_aligned_alloc(i32 {{.*}}, i64 16, i64 {{.*}}, ptr null)
+! CHECK: call ptr @__kmpc_alloc(i32 {{.*}}, i64 {{.*}}, ptr inttoptr (i64 1 to ptr))
+! CHECK: call ptr @__kmpc_aligned_alloc(i32 {{.*}}, i64 64, i64 {{.*}}, ptr inttoptr (i64 6 to ptr))
+! CHECK: call ptr @__kmpc_aligned_alloc(i32 {{.*}}, i64 32, i64 {{.*}}, ptr inttoptr (i32 3 to ptr))
+! CHECK: call ptr @__kmpc_aligned_alloc(i32 {{.*}}, i64 32, i64 {{.*}}, ptr inttoptr (i32 3 to ptr))
+! CHECK: call ptr @__kmpc_aligned_alloc(i32 {{.*}}, i64 32, i64 {{.*}}, ptr inttoptr (i32 3 to ptr))
+
+! CHECK: call void @__kmpc_free(i32 {{.*}}, ptr {{.*}}, ptr inttoptr (i32 3 to ptr))
+! CHECK: call void @__kmpc_free(i32 {{.*}}, ptr {{.*}}, ptr inttoptr (i32 3 to ptr))
+! CHECK: call void @__kmpc_free(i32 {{.*}}, ptr {{.*}}, ptr inttoptr (i32 3 to ptr))
+! CHECK: call void @__kmpc_free(i32 {{.*}}, ptr {{.*}}, ptr inttoptr (i64 6 to ptr))
+! CHECK: call void @__kmpc_free(i32 {{.*}}, ptr {{.*}}, ptr inttoptr (i64 1 to ptr))
+! CHECK: call void @__kmpc_free(i32 {{.*}}, ptr {{.*}}, ptr null)
+! CHECK: ret void
+
+! CHECK: declare noalias ptr @__kmpc_aligned_alloc(i32, i64, i64, ptr)
+! CHECK: declare noalias ptr @__kmpc_alloc(i32, i64, ptr)
+! CHECK: declare void @__kmpc_free(i32, ptr, ptr)
diff --git a/flang/test/Lower/OpenMP/omp-declarative-allocate.f90 b/flang/test/Lower/OpenMP/omp-declarative-allocate.f90
new file mode 100644
index 0000000000000..7c8047ebf7f53
--- /dev/null
+++ b/flang/test/Lower/OpenMP/omp-declarative-allocate.f90
@@ -0,0 +1,19 @@
+! This test checks lowering of OpenMP allocate Directive to LLVM IR.
+! Verifies code generation for default (no align, null allocator) case.
+
+! RUN: %flang_fc1 -emit-llvm -fopenmp %s -o - | FileCheck %s
+
+program main
+ integer :: x, y
+ !$omp allocate(x, y)
+end program
+
+! CHECK: define void @_QQmain()
+! CHECK: call i32 @__kmpc_global_thread_num(
+! CHECK: call ptr @__kmpc_alloc(i32 {{.*}}, i64 8, ptr null)
+! CHECK: call ptr @__kmpc_alloc(i32 {{.*}}, i64 8, ptr null)
+! CHECK: call void @__kmpc_free(i32 {{.*}}, ptr {{.*}}, ptr null)
+! CHECK: call void @__kmpc_free(i32 {{.*}}, ptr {{.*}}, ptr null)
+! CHECK: ret void
+! CHECK: declare noalias ptr @__kmpc_alloc(i32, i64, ptr)
+! CHECK: declare void @__kmpc_free(i32, ptr, ptr)
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
index 383fd9d94661a..fdf1e1f0b18ed 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
@@ -3180,7 +3180,7 @@ class OpenMPIRBuilder {
llvm::IntegerType *IntPtrTy,
bool BranchtoEnd = true);
- /// Create a runtime call for kmpc_Alloc
+ /// Create a runtime call for kmpc_alloc
///
/// \param Loc The insert and source location description.
/// \param Size Size of allocated memory space
@@ -3191,6 +3191,19 @@ class OpenMPIRBuilder {
LLVM_ABI CallInst *createOMPAlloc(const LocationDescription &Loc, Value *Size,
Value *Allocator, std::string Name = "");
+ /// Create a runtime call for kmpc_align_alloc
+ ///
+ /// \param Loc The insert and source location description.
+ /// \param Align Align value
+ /// \param Size Size of allocated memory space
+ /// \param Allocator Allocator information instruction
+ /// \param Name Name of call Instruction for OMP_Align_Alloc
+ ///
+ /// \returns CallInst to the OMP_Align_Alloc call
+ LLVM_ABI CallInst *createOMPAlignedAlloc(const LocationDescription &Loc,
+ Value *Align, Value *Size, Value *Allocator,
+ std::string Name = "");
+
/// Create a runtime call for kmpc_free
///
/// \param Loc The insert and source location description.
diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index 6a3cbde33e785..eecfc3c2ba251 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -7641,6 +7641,23 @@ CallInst *OpenMPIRBuilder::createOMPAlloc(const LocationDescription &Loc,
return createRuntimeFunctionCall(Fn, Args, Name);
}
+CallInst *OpenMPIRBuilder::createOMPAlignedAlloc(const LocationDescription &Loc,
+ Value *Align, Value *Size, Value *Allocator,
+ std::string Name) {
+ IRBuilder<>::InsertPointGuard IPG(Builder);
+ updateToLocation(Loc);
+
+ uint32_t SrcLocStrSize;
+ Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
+ Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
+ Value *ThreadId = getOrCreateThreadID(Ident);
+ Value *Args[] = {ThreadId, Align, Size, Allocator};
+
+ Function *Fn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_aligned_alloc);
+
+ return Builder.CreateCall(Fn, Args, Name);
+}
+
CallInst *OpenMPIRBuilder::createOMPFree(const LocationDescription &Loc,
Value *Addr, Value *Allocator,
std::string Name) {
diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPClauses.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPClauses.td
index f24efd0d4fc42..13a1fc3bd08bc 100644
--- a/mlir/include/mlir/Dialect/OpenMP/OpenMPClauses.td
+++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPClauses.td
@@ -146,11 +146,11 @@ class OpenMP_AllocatorClauseSkip<
extraClassDeclaration> {
let arguments = (ins
- Optional<I64>:$allocator
+ Optional<AnyInteger>:$allocator
);
let optAssemblyFormat = [{
- `allocator` `(` $allocator `)`
+ `allocator` `(` $allocator `:` type($allocator) `)`
}];
let description = [{
diff --git a/mlir/include/mlir/Target/LLVMIR/ModuleTranslation.h b/mlir/include/mlir/Target/LLVMIR/ModuleTranslation.h
index c67bb57985bd0..f073081002719 100644
--- a/mlir/include/mlir/Target/LLVMIR/ModuleTranslation.h
+++ b/mlir/include/mlir/Target/LLVMIR/ModuleTranslation.h
@@ -272,6 +272,11 @@ class ModuleTranslation {
/// constructed.
llvm::OpenMPIRBuilder *getOpenMPBuilder();
+ /// Registers a pending __kmpc_free call for the given block. These are
+ /// emitted before the block's terminator during block conversion.
+ void registerPendingOmpAllocateFree(Block *block, llvm::Value *ptr,
+ llvm::Value *allocator);
+
/// Returns the LLVM module in which the IR is being constructed.
llvm::Module *getLLVMModule() { return llvmModule.get(); }
@@ -401,6 +406,9 @@ class ModuleTranslation {
llvm::IRBuilderBase &builder,
bool recordInsertions);
+ /// Emits pending __kmpc_free calls for the block, before its terminator.
+ void emitPendingOmpAllocateFrees(Block &bb, llvm::IRBuilderBase &builder);
+
/// Returns the LLVM metadata corresponding to the given mlir LLVM dialect
/// TBAATagAttr.
llvm::MDNode *getTBAANode(TBAATagAttr tbaaAttr) const;
@@ -509,6 +517,11 @@ class ModuleTranslation {
/// block.
DenseMap<BlockAddressAttr, llvm::BasicBlock *> blockAddressToLLVMMapping;
+ /// Pending __kmpc_free calls per block, emitted before the terminator.
+ DenseMap<Block *,
+ llvm::SmallVector<std::pair<llvm::Value *, llvm::Value *>>>
+ pendingOmpAllocateFrees;
+
/// Stack of user-specified state elements, useful when translating operations
/// with regions.
StateStack stack;
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index 2e15f4de4545d..663d8274bd6fe 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -4632,6 +4632,22 @@ static Operation *getGlobalOpFromValue(Value value) {
return nullptr;
}
+static Value getBaseValueForTypeLookup(Value value) {
+ while (Operation *op = value.getDefiningOp()) {
+ if (auto addrCast = dyn_cast_if_present<LLVM::AddrSpaceCastOp>(op))
+ value = addrCast.getOperand();
+ else if (op->getName().getIdentifier()) {
+ if (op->getNumOperands() > 0)
+ value = op->getOperand(0);
+ else
+ break;
+ } else {
+ break;
+ }
+ }
+ return value;
+}
+
static llvm::SmallString<64>
getDeclareTargetRefPtrSuffix(LLVM::GlobalOp globalOp,
llvm::OpenMPIRBuilder &ompBuilder) {
@@ -7474,6 +7490,94 @@ convertTargetAllocMemOp(Operation &opInst, llvm::IRBuilderBase &builder,
return success();
}
+static LogicalResult
+convertAllocateDirOp(Operation &opInst, llvm::IRBuilderBase &builder,
+ LLVM::ModuleTranslation &moduleTranslation) {
+ auto allocateDirOp = cast<omp::AllocateDirOp>(opInst);
+ llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
+
+ llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
+ llvm::Module *llvmModule = moduleTranslation.getLLVMModule();
+ llvm::DataLayout dataLayout = llvmModule->getDataLayout();
+ SmallVector<Value> vars = allocateDirOp.getVarList();
+ std::optional<int64_t> alignAttr = allocateDirOp.getAlign();
+
+ llvm::Value *allocator;
+ if (auto allocatorVar = allocateDirOp.getAllocator()) {
+ allocator = moduleTranslation.lookupValue(allocatorVar);
+ if (allocator->getType()->isIntegerTy())
+ allocator = builder.CreateIntToPtr(allocator, builder.getPtrTy());
+ else if (allocator->getType()->isPointerTy())
+ allocator =
+ builder.CreatePointerBitCastOrAddrSpaceCast(allocator, builder.getPtrTy());
+ } else {
+ allocator = llvm::ConstantPointerNull::get(builder.getPtrTy());
+ }
+
+ SmallVector<std::pair<llvm::CallInst *, llvm::Value *>> allocatedVars;
+
+ for (Value var : vars) {
+ llvm::Type *llvmVarTy = moduleTranslation.convertType(var.getType());
+
+ // Opaque pointers lose element type. Trace to GlobalOp for type
+ // Falls back to llvmVarTy when not from a global.
+ llvm::Type *typeToInspect = llvmVarTy;
+ if (llvmVarTy->isPointerTy()) {
+ Value baseVar = getBaseValueForTypeLookup(var);
+ if (Operation *globalOp = getGlobalOpFromValue(baseVar)) {
+ if (auto gop = dyn_cast<LLVM::GlobalOp>(globalOp))
+ typeToInspect =
+ moduleTranslation.convertType(gop.getGlobalType());
+ }
+ }
+
+ llvm::Value *size;
+ if (auto arrTy = llvm::dyn_cast<llvm::ArrayType>(typeToInspect)) {
+ llvm::Value *elementCount = builder.getInt64(1);
+ llvm::Type *currentType = arrTy;
+ while (auto nestedArrTy = llvm::dyn_cast<llvm::ArrayType>(currentType)) {
+ elementCount = builder.CreateMul(
+ elementCount, builder.getInt64(nestedArrTy->getNumElements()));
+ currentType = nestedArrTy->getElementType();
+ }
+ uint64_t elemSizeInBits = dataLayout.getTypeSizeInBits(currentType);
+ size = builder.CreateMul(elementCount,
+ builder.getInt64(elemSizeInBits / 8));
+ } else {
+ size = builder.getInt64(
+ dataLayout.getTypeStoreSize(typeToInspect).getFixedValue());
+ }
+
+ uint64_t alignValue =
+ alignAttr ? alignAttr.value()
+ : dataLayout.getABITypeAlign(typeToInspect).value();
+ llvm::Value *alignConst = builder.getInt64(alignValue);
+ // Align the size: ((size + align - 1) / align) * align
+ size = builder.CreateAdd(size, builder.getInt64(alignValue - 1), "", true);
+ size = builder.CreateUDiv(size, alignConst);
+ size = builder.CreateMul(size, alignConst, "", true);
+
+ std::string allocName =
+ ompBuilder->createPlatformSpecificName({".void.addr"});
+ llvm::CallInst *allocCall;
+ if (alignAttr.has_value()) {
+ allocCall = ompBuilder->createOMPAlignedAlloc(
+ ompLoc, builder.getInt64(alignAttr.value()), size, allocator, allocName);
+ } else {
+ allocCall = ompBuilder->createOMPAlloc(ompLoc, size, allocator, allocName);
+ }
+ allocatedVars.push_back({allocCall, allocator});
+ }
+
+ // Register __kmpc_free calls to be emitted before the block terminator.
+ Block *block = allocateDirOp->getBlock();
+ for (auto &alloc : allocatedVars)
+ moduleTranslation.registerPendingOmpAllocateFree(block, alloc.first,
+ alloc.second);
+
+ return success();
+}
+
static llvm::Function *getOmpTargetFree(llvm::IRBuilderBase &builder,
llvm::Module *llvmModule) {
llvm::Type *ptrTy = builder.getPtrTy(0);
@@ -7719,6 +7823,9 @@ LogicalResult OpenMPDialectLLVMIRTranslationInterface::convertOperation(
.Case([&](omp::TargetFreeMemOp) {
return convertTargetFreeMemOp(*op, builder, moduleTranslation);
})
+ .Case([&](omp::AllocateDirOp) {
+ return convertAllocateDirOp(*op, builder, moduleTranslation);
+ })
.Default([&](Operation *inst) {
return inst->emitError()
<< "not yet implemented: " << inst->getName();
diff --git a/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp b/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp
index cf398f151ed0b..2f0345b67ea6b 100644
--- a/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp
@@ -1009,6 +1009,30 @@ LogicalResult ModuleTranslation::convertOperation(Operation &op,
return convertDialectAttributes(&op, scope.getCapturedInstructions());
}
+void ModuleTranslation::registerPendingOmpAllocateFree(Block *block,
+ llvm::Value *ptr,
+ llvm::Value *allocator) {
+ pendingOmpAllocateFrees[block].push_back({ptr, allocator});
+}
+
+void ModuleTranslation::emitPendingOmpAllocateFrees(
+ Block &bb, llvm::IRBuilderBase &builder) {
+ auto it = pendingOmpAllocateFrees.find(&bb);
+ if (it == pendingOmpAllocateFrees.end() || it->second.empty())
+ return;
+ llvm::OpenMPIRBuilder *ompBuilder = getOpenMPBuilder();
+ llvm::BasicBlock *llvmBB = lookupBlock(&bb);
+ llvm::Instruction *term = llvmBB->getTerminator();
+ if (term)
+ builder.SetInsertPoint(term);
+ else
+ builder.SetInsertPoint(llvmBB);
+ llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
+ for (auto it2 = it->second.rbegin(); it2 != it->second.rend(); ++it2)
+ ompBuilder->createOMPFree(ompLoc, it2->first, it2->second, "");
+ pendingOmpAllocateFrees.erase(it);
+}
+
/// Convert block to LLVM IR. Unless `ignoreArguments` is set, emit PHI nodes
/// to define values corresponding to the MLIR block arguments. These nodes
/// are not connected to the source basic blocks, which may not exist yet. Uses
@@ -1048,6 +1072,10 @@ LogicalResult ModuleTranslation::convertBlockImpl(Block &bb,
// Traverse operations.
for (auto &op : bb) {
+ // Emit pending OpenMP allocate frees before the terminator.
+ if (op.hasTrait<OpTrait::IsTerminator>())
+ emitPendingOmpAllocateFrees(bb, builder);
+
// Set the current debug location within the builder.
builder.SetCurrentDebugLocation(
debugTranslation->translateLoc(op.getLoc(), subprogram));
diff --git a/mlir/test/Dialect/OpenMP/ops.mlir b/mlir/test/Dialect/OpenMP/ops.mlir
index 869f163cb4014..3d1133f4ba6e9 100644
--- a/mlir/test/Dialect/OpenMP/ops.mlir
+++ b/mlir/test/Dialect/OpenMP/ops.mlir
@@ -3466,27 +3466,27 @@ func.func @omp_allocate_dir(%arg0 : memref<i32>, %arg1 : memref<i32>) -> () {
// Test with one data var and allocator clause
// CHECK: %[[VAL_1:.*]] = arith.constant 1 : i64
%omp_default_mem_alloc = arith.constant 1 : i64
- // CHECK: omp.allocate_dir(%[[ARG0]] : memref<i32>) allocator(%[[VAL_1:.*]])
- omp.allocate_dir (%arg0 : memref<i32>) allocator(%omp_default_mem_alloc)
+ // CHECK: omp.allocate_dir(%[[ARG0]] : memref<i32>) allocator(%[[VAL_1:.*]] : i64)
+ omp.allocate_dir (%arg0 : memref<i32>) allocator(%omp_default_mem_alloc : i64)
// Test with one data var, align clause and allocator clause
// CHECK: %[[VAL_2:.*]] = arith.constant 7 : i64
%omp_pteam_mem_alloc = arith.constant 7 : i64
- // CHECK: omp.allocate_dir(%[[ARG0]] : memref<i32>) align(4) allocator(%[[VAL_2:.*]])
- omp.allocate_dir (%arg0 : memref<i32>) align(4) allocator(%omp_pteam_mem_alloc)
+ // CHECK: omp.allocate_dir(%[[ARG0]] : memref<i32>) align(4) allocator(%[[VAL_2:.*]] : i64)
+ omp.allocate_dir (%arg0 : memref<i32>) align(4) allocator(%omp_pteam_mem_alloc : i64)
// Test with two data vars, align clause and allocator clause
// CHECK: %[[VAL_3:.*]] = arith.constant 6 : i64
%omp_cgroup_mem_alloc = arith.constant 6 : i64
- // CHECK: omp.allocate_dir(%[[ARG0]], %[[ARG1]] : memref<i32>, memref<i32>) align(8) allocator(%[[VAL_3:.*]])
- omp.allocate_dir (%arg0, %arg1 : memref<i32>, memref<i32>) align(8) allocator(%omp_cgroup_mem_alloc)
+ // CHECK: omp.allocate_dir(%[[ARG0]], %[[ARG1]] : memref<i32>, memref<i32>) align(8) allocator(%[[VAL_3:.*]] : i64)
+ omp.allocate_dir (%arg0, %arg1 : memref<i32>, memref<i32>) align(8) allocator(%omp_cgroup_mem_alloc : i64)
// Test with one data var and user defined allocator clause
// CHECK: %[[VAL_4:.*]] = arith.constant 9 : i64
%custom_allocator = arith.constant 9 : i64
%custom_mem_alloc = func.call @omp_init_allocator(%custom_allocator) : (i64) -> (i64)
- // CHECK: omp.allocate_dir(%[[ARG0]] : memref<i32>) allocator(%[[VAL_5:.*]])
- omp.allocate_dir (%arg0 : memref<i32>) allocator(%custom_mem_alloc)
+ // CHECK: omp.allocate_dir(%[[ARG0]] : memref<i32>) allocator(%[[VAL_5:.*]] : i64)
+ omp.allocate_dir (%arg0 : memref<i32>) allocator(%custom_mem_alloc : i64)
return
}
>From 07293d33aa3e5d5422ba9a41fd167a017c09db37 Mon Sep 17 00:00:00 2001
From: Raghu Maddhipatla <Raghu.Maddhipatla at amd.com>
Date: Tue, 17 Mar 2026 21:28:49 -0500
Subject: [PATCH 2/6] Fix clang-formatting
---
flang/lib/Lower/OpenMP/ClauseProcessor.cpp | 7 +++--
flang/lib/Lower/OpenMP/ClauseProcessor.h | 3 +-
flang/lib/Lower/OpenMP/OpenMP.cpp | 31 ++++++++++---------
.../llvm/Frontend/OpenMP/OMPIRBuilder.h | 5 +--
llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 5 +--
.../mlir/Target/LLVMIR/ModuleTranslation.h | 3 +-
.../OpenMP/OpenMPToLLVMIRTranslation.cpp | 21 +++++++------
7 files changed, 41 insertions(+), 34 deletions(-)
diff --git a/flang/lib/Lower/OpenMP/ClauseProcessor.cpp b/flang/lib/Lower/OpenMP/ClauseProcessor.cpp
index 47cdaf1829913..7d2fe869322f3 100644
--- a/flang/lib/Lower/OpenMP/ClauseProcessor.cpp
+++ b/flang/lib/Lower/OpenMP/ClauseProcessor.cpp
@@ -325,8 +325,7 @@ static void collectIteratorIVs(
// ClauseProcessor unique clauses
//===----------------------------------------------------------------------===//
-bool ClauseProcessor::processAlign(
- mlir::omp::AlignClauseOps &result) const {
+bool ClauseProcessor::processAlign(mlir::omp::AlignClauseOps &result) const {
if (auto *clause = findUniqueClause<omp::clause::Align>()) {
fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
const std::optional<std::int64_t> align = evaluate::ToInt64(clause->v);
@@ -336,7 +335,9 @@ bool ClauseProcessor::processAlign(
return false;
}
-bool ClauseProcessor::processAllocator(lower::StatementContext &stmtCtx, mlir::omp::AllocatorClauseOps &result) const {
+bool ClauseProcessor::processAllocator(
+ lower::StatementContext &stmtCtx,
+ mlir::omp::AllocatorClauseOps &result) const {
if (auto *clause = findUniqueClause<omp::clause::Allocator>()) {
result.allocator = fir::getBase(converter.genExprValue(clause->v, stmtCtx));
return true;
diff --git a/flang/lib/Lower/OpenMP/ClauseProcessor.h b/flang/lib/Lower/OpenMP/ClauseProcessor.h
index 33323036cdc3a..29b5c29b8e33a 100644
--- a/flang/lib/Lower/OpenMP/ClauseProcessor.h
+++ b/flang/lib/Lower/OpenMP/ClauseProcessor.h
@@ -58,7 +58,8 @@ class ClauseProcessor {
// 'Unique' clauses: They can appear at most once in the clause list.
bool processAlign(mlir::omp::AlignClauseOps &result) const;
- bool processAllocator(lower::StatementContext &stmtCtx, mlir::omp::AllocatorClauseOps &result) const;
+ bool processAllocator(lower::StatementContext &stmtCtx,
+ mlir::omp::AllocatorClauseOps &result) const;
bool processBare(mlir::omp::BareClauseOps &result) const;
bool processBind(mlir::omp::BindClauseOps &result) const;
bool processCancelDirectiveName(
diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp
index cce210b54dceb..3cc343925d8fa 100644
--- a/flang/lib/Lower/OpenMP/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP/OpenMP.cpp
@@ -1508,12 +1508,12 @@ static OpTy genWrapperOp(lower::AbstractConverter &converter,
//===----------------------------------------------------------------------===//
static void genAllocateClauses(lower::AbstractConverter &converter,
- semantics::SemanticsContext &semaCtx,
- lower::StatementContext &stmtCtx,
- const ObjectList &objects,
- const List<Clause> &clauses, mlir::Location loc,
- llvm::SmallVectorImpl<mlir::Value> &operandRange,
- mlir::omp::AllocateDirOperands &clauseOps) {
+ semantics::SemanticsContext &semaCtx,
+ lower::StatementContext &stmtCtx,
+ const ObjectList &objects,
+ const List<Clause> &clauses, mlir::Location loc,
+ llvm::SmallVectorImpl<mlir::Value> &operandRange,
+ mlir::omp::AllocateDirOperands &clauseOps) {
if (!objects.empty())
genObjectList(objects, converter, operandRange);
@@ -1942,16 +1942,19 @@ static void genWsloopClauses(
//===----------------------------------------------------------------------===//
// Code generation functions for leaf constructs
//===----------------------------------------------------------------------===//
-static mlir::omp::AllocateDirOp
-genAllocateDirOp(lower::AbstractConverter &converter,
- semantics::SemanticsContext &semaCtx, lower::StatementContext &stmtCtx, lower::pft::Evaluation &eval,
- mlir::Location loc, const ObjectList &objects, const ConstructQueue &queue, ConstructQueue::const_iterator item) {
+static mlir::omp::AllocateDirOp genAllocateDirOp(
+ lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx,
+ lower::StatementContext &stmtCtx, lower::pft::Evaluation &eval,
+ mlir::Location loc, const ObjectList &objects, const ConstructQueue &queue,
+ ConstructQueue::const_iterator item) {
llvm::SmallVector<mlir::Value> operandRange;
mlir::omp::AllocateDirOperands clauseOps;
genAllocateClauses(converter, semaCtx, stmtCtx, objects, item->clauses, loc,
- operandRange, clauseOps);
+ operandRange, clauseOps);
- return mlir::omp::AllocateDirOp::create(converter.getFirOpBuilder(), loc, operandRange, clauseOps.align, clauseOps.allocator);
+ return mlir::omp::AllocateDirOp::create(converter.getFirOpBuilder(), loc,
+ operandRange, clauseOps.align,
+ clauseOps.allocator);
}
static mlir::omp::BarrierOp
@@ -3877,8 +3880,8 @@ static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable,
converter.getFirOpBuilder().getModule(), semaCtx, eval, allocate.source,
llvm::omp::Directive::OMPD_allocate, clauses)};
- genAllocateDirOp(converter, semaCtx, stmtCtx, eval, loc, objects,
- queue, queue.begin());
+ genAllocateDirOp(converter, semaCtx, stmtCtx, eval, loc, objects, queue,
+ queue.begin());
}
static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable,
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
index fdf1e1f0b18ed..7c78f7a1d8f44 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
@@ -3201,8 +3201,9 @@ class OpenMPIRBuilder {
///
/// \returns CallInst to the OMP_Align_Alloc call
LLVM_ABI CallInst *createOMPAlignedAlloc(const LocationDescription &Loc,
- Value *Align, Value *Size, Value *Allocator,
- std::string Name = "");
+ Value *Align, Value *Size,
+ Value *Allocator,
+ std::string Name = "");
/// Create a runtime call for kmpc_free
///
diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index eecfc3c2ba251..edd181f7eea2e 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -7642,8 +7642,9 @@ CallInst *OpenMPIRBuilder::createOMPAlloc(const LocationDescription &Loc,
}
CallInst *OpenMPIRBuilder::createOMPAlignedAlloc(const LocationDescription &Loc,
- Value *Align, Value *Size, Value *Allocator,
- std::string Name) {
+ Value *Align, Value *Size,
+ Value *Allocator,
+ std::string Name) {
IRBuilder<>::InsertPointGuard IPG(Builder);
updateToLocation(Loc);
diff --git a/mlir/include/mlir/Target/LLVMIR/ModuleTranslation.h b/mlir/include/mlir/Target/LLVMIR/ModuleTranslation.h
index f073081002719..243cca8831e37 100644
--- a/mlir/include/mlir/Target/LLVMIR/ModuleTranslation.h
+++ b/mlir/include/mlir/Target/LLVMIR/ModuleTranslation.h
@@ -518,8 +518,7 @@ class ModuleTranslation {
DenseMap<BlockAddressAttr, llvm::BasicBlock *> blockAddressToLLVMMapping;
/// Pending __kmpc_free calls per block, emitted before the terminator.
- DenseMap<Block *,
- llvm::SmallVector<std::pair<llvm::Value *, llvm::Value *>>>
+ DenseMap<Block *, llvm::SmallVector<std::pair<llvm::Value *, llvm::Value *>>>
pendingOmpAllocateFrees;
/// Stack of user-specified state elements, useful when translating operations
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index 663d8274bd6fe..5f5b1150f9588 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -7492,7 +7492,7 @@ convertTargetAllocMemOp(Operation &opInst, llvm::IRBuilderBase &builder,
static LogicalResult
convertAllocateDirOp(Operation &opInst, llvm::IRBuilderBase &builder,
- LLVM::ModuleTranslation &moduleTranslation) {
+ LLVM::ModuleTranslation &moduleTranslation) {
auto allocateDirOp = cast<omp::AllocateDirOp>(opInst);
llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
@@ -7508,8 +7508,8 @@ convertAllocateDirOp(Operation &opInst, llvm::IRBuilderBase &builder,
if (allocator->getType()->isIntegerTy())
allocator = builder.CreateIntToPtr(allocator, builder.getPtrTy());
else if (allocator->getType()->isPointerTy())
- allocator =
- builder.CreatePointerBitCastOrAddrSpaceCast(allocator, builder.getPtrTy());
+ allocator = builder.CreatePointerBitCastOrAddrSpaceCast(
+ allocator, builder.getPtrTy());
} else {
allocator = llvm::ConstantPointerNull::get(builder.getPtrTy());
}
@@ -7526,8 +7526,7 @@ convertAllocateDirOp(Operation &opInst, llvm::IRBuilderBase &builder,
Value baseVar = getBaseValueForTypeLookup(var);
if (Operation *globalOp = getGlobalOpFromValue(baseVar)) {
if (auto gop = dyn_cast<LLVM::GlobalOp>(globalOp))
- typeToInspect =
- moduleTranslation.convertType(gop.getGlobalType());
+ typeToInspect = moduleTranslation.convertType(gop.getGlobalType());
}
}
@@ -7541,8 +7540,8 @@ convertAllocateDirOp(Operation &opInst, llvm::IRBuilderBase &builder,
currentType = nestedArrTy->getElementType();
}
uint64_t elemSizeInBits = dataLayout.getTypeSizeInBits(currentType);
- size = builder.CreateMul(elementCount,
- builder.getInt64(elemSizeInBits / 8));
+ size =
+ builder.CreateMul(elementCount, builder.getInt64(elemSizeInBits / 8));
} else {
size = builder.getInt64(
dataLayout.getTypeStoreSize(typeToInspect).getFixedValue());
@@ -7562,9 +7561,11 @@ convertAllocateDirOp(Operation &opInst, llvm::IRBuilderBase &builder,
llvm::CallInst *allocCall;
if (alignAttr.has_value()) {
allocCall = ompBuilder->createOMPAlignedAlloc(
- ompLoc, builder.getInt64(alignAttr.value()), size, allocator, allocName);
+ ompLoc, builder.getInt64(alignAttr.value()), size, allocator,
+ allocName);
} else {
- allocCall = ompBuilder->createOMPAlloc(ompLoc, size, allocator, allocName);
+ allocCall =
+ ompBuilder->createOMPAlloc(ompLoc, size, allocator, allocName);
}
allocatedVars.push_back({allocCall, allocator});
}
@@ -7573,7 +7574,7 @@ convertAllocateDirOp(Operation &opInst, llvm::IRBuilderBase &builder,
Block *block = allocateDirOp->getBlock();
for (auto &alloc : allocatedVars)
moduleTranslation.registerPendingOmpAllocateFree(block, alloc.first,
- alloc.second);
+ alloc.second);
return success();
}
>From 18ea8e95e162186f99d5f8c56d1cdd3974a46431 Mon Sep 17 00:00:00 2001
From: Raghu Maddhipatla <Raghu.Maddhipatla at amd.com>
Date: Wed, 18 Mar 2026 00:14:58 -0500
Subject: [PATCH 3/6] Fix buildbot errors for the test program
---
.../OpenMP/omp-declarative-allocate-align.f90 | 22 +++++++++----------
1 file changed, 10 insertions(+), 12 deletions(-)
diff --git a/flang/test/Lower/OpenMP/omp-declarative-allocate-align.f90 b/flang/test/Lower/OpenMP/omp-declarative-allocate-align.f90
index 50c6ab1f64002..a131573ca5375 100644
--- a/flang/test/Lower/OpenMP/omp-declarative-allocate-align.f90
+++ b/flang/test/Lower/OpenMP/omp-declarative-allocate-align.f90
@@ -1,22 +1,21 @@
! This test checks lowering of OpenMP allocate Directive with align and allocator
! clauses to LLVM IR. Verifies code generation for:
! - align(16) only (null allocator)
-! - allocator(omp_default_mem_alloc) only (no align)
-! - align(64) allocator(omp_cgroup_mem_alloc) (both clauses, array variable)
+! - allocator(1) only (no align)
+! - align(64) allocator(6) (both clauses, array variable)
! - align(32) allocator(3) (both clauses, multiple variables)
! RUN: %flang_fc1 -emit-llvm %openmp_flags -fopenmp-version=51 %s -o - 2>&1 | FileCheck %s
program main
- use omp_lib
integer :: x, y
integer :: z(10)
character c
- real(kind = 16) :: r
- complex cmplx
+ real :: r
+ complex :: cmplx
!$omp allocate(x) align(16)
- !$omp allocate(y) allocator(omp_default_mem_alloc)
- !$omp allocate(z) align(64) allocator(omp_cgroup_mem_alloc)
+ !$omp allocate(y) allocator(1)
+ !$omp allocate(z) align(64) allocator(6)
!$omp allocate(c, r, cmplx) align(32) allocator(3)
x = 1
y = 2
@@ -24,12 +23,11 @@ program main
print *, "z : ", z
end program
-! CHECK: define void @_QQmain()
! CHECK: call i32 @__kmpc_global_thread_num(
! CHECK: call ptr @__kmpc_aligned_alloc(i32 {{.*}}, i64 16, i64 {{.*}}, ptr null)
-! CHECK: call ptr @__kmpc_alloc(i32 {{.*}}, i64 {{.*}}, ptr inttoptr (i64 1 to ptr))
-! CHECK: call ptr @__kmpc_aligned_alloc(i32 {{.*}}, i64 64, i64 {{.*}}, ptr inttoptr (i64 6 to ptr))
+! CHECK: call ptr @__kmpc_alloc(i32 {{.*}}, i64 {{.*}}, ptr inttoptr (i32 1 to ptr))
+! CHECK: call ptr @__kmpc_aligned_alloc(i32 {{.*}}, i64 64, i64 {{.*}}, ptr inttoptr (i32 6 to ptr))
! CHECK: call ptr @__kmpc_aligned_alloc(i32 {{.*}}, i64 32, i64 {{.*}}, ptr inttoptr (i32 3 to ptr))
! CHECK: call ptr @__kmpc_aligned_alloc(i32 {{.*}}, i64 32, i64 {{.*}}, ptr inttoptr (i32 3 to ptr))
! CHECK: call ptr @__kmpc_aligned_alloc(i32 {{.*}}, i64 32, i64 {{.*}}, ptr inttoptr (i32 3 to ptr))
@@ -37,8 +35,8 @@ program main
! CHECK: call void @__kmpc_free(i32 {{.*}}, ptr {{.*}}, ptr inttoptr (i32 3 to ptr))
! CHECK: call void @__kmpc_free(i32 {{.*}}, ptr {{.*}}, ptr inttoptr (i32 3 to ptr))
! CHECK: call void @__kmpc_free(i32 {{.*}}, ptr {{.*}}, ptr inttoptr (i32 3 to ptr))
-! CHECK: call void @__kmpc_free(i32 {{.*}}, ptr {{.*}}, ptr inttoptr (i64 6 to ptr))
-! CHECK: call void @__kmpc_free(i32 {{.*}}, ptr {{.*}}, ptr inttoptr (i64 1 to ptr))
+! CHECK: call void @__kmpc_free(i32 {{.*}}, ptr {{.*}}, ptr inttoptr (i32 6 to ptr))
+! CHECK: call void @__kmpc_free(i32 {{.*}}, ptr {{.*}}, ptr inttoptr (i32 1 to ptr))
! CHECK: call void @__kmpc_free(i32 {{.*}}, ptr {{.*}}, ptr null)
! CHECK: ret void
>From f047c352d4c4615dc205f12a8719438652a0994a Mon Sep 17 00:00:00 2001
From: Raghu Maddhipatla <Raghu.Maddhipatla at amd.com>
Date: Tue, 31 Mar 2026 18:43:09 -0500
Subject: [PATCH 4/6] Address review comments. Moved implementation from
ModuleTranslation.cpp to use LLVMTranslationInterface.h
---
.../OpenMP/omp-declarative-allocate-align.f90 | 44 +++----
.../Lower/OpenMP/omp-declarative-allocate.f90 | 18 ++-
.../LLVMIR/LLVMTranslationDialectInterface.td | 10 ++
.../Target/LLVMIR/LLVMTranslationInterface.h | 11 ++
.../mlir/Target/LLVMIR/ModuleTranslation.h | 12 --
.../OpenMP/OpenMPToLLVMIRTranslation.cpp | 57 ++++++++-
mlir/lib/Target/LLVMIR/ModuleTranslation.cpp | 29 +----
.../LLVMIR/openmp-allocate-directive.mlir | 109 ++++++++++++++++++
8 files changed, 213 insertions(+), 77 deletions(-)
create mode 100644 mlir/test/Target/LLVMIR/openmp-allocate-directive.mlir
diff --git a/flang/test/Lower/OpenMP/omp-declarative-allocate-align.f90 b/flang/test/Lower/OpenMP/omp-declarative-allocate-align.f90
index a131573ca5375..0824d8bcb7e90 100644
--- a/flang/test/Lower/OpenMP/omp-declarative-allocate-align.f90
+++ b/flang/test/Lower/OpenMP/omp-declarative-allocate-align.f90
@@ -1,11 +1,11 @@
! This test checks lowering of OpenMP allocate Directive with align and allocator
-! clauses to LLVM IR. Verifies code generation for:
+! clauses to HLFIR. Verifies code generation for:
! - align(16) only (null allocator)
! - allocator(1) only (no align)
! - align(64) allocator(6) (both clauses, array variable)
! - align(32) allocator(3) (both clauses, multiple variables)
-! RUN: %flang_fc1 -emit-llvm %openmp_flags -fopenmp-version=51 %s -o - 2>&1 | FileCheck %s
+! RUN: %flang_fc1 -emit-hlfir %openmp_flags -fopenmp-version=51 %s -o - 2>&1 | FileCheck %s
program main
integer :: x, y
@@ -23,23 +23,23 @@ program main
print *, "z : ", z
end program
-! CHECK: call i32 @__kmpc_global_thread_num(
-
-! CHECK: call ptr @__kmpc_aligned_alloc(i32 {{.*}}, i64 16, i64 {{.*}}, ptr null)
-! CHECK: call ptr @__kmpc_alloc(i32 {{.*}}, i64 {{.*}}, ptr inttoptr (i32 1 to ptr))
-! CHECK: call ptr @__kmpc_aligned_alloc(i32 {{.*}}, i64 64, i64 {{.*}}, ptr inttoptr (i32 6 to ptr))
-! CHECK: call ptr @__kmpc_aligned_alloc(i32 {{.*}}, i64 32, i64 {{.*}}, ptr inttoptr (i32 3 to ptr))
-! CHECK: call ptr @__kmpc_aligned_alloc(i32 {{.*}}, i64 32, i64 {{.*}}, ptr inttoptr (i32 3 to ptr))
-! CHECK: call ptr @__kmpc_aligned_alloc(i32 {{.*}}, i64 32, i64 {{.*}}, ptr inttoptr (i32 3 to ptr))
-
-! CHECK: call void @__kmpc_free(i32 {{.*}}, ptr {{.*}}, ptr inttoptr (i32 3 to ptr))
-! CHECK: call void @__kmpc_free(i32 {{.*}}, ptr {{.*}}, ptr inttoptr (i32 3 to ptr))
-! CHECK: call void @__kmpc_free(i32 {{.*}}, ptr {{.*}}, ptr inttoptr (i32 3 to ptr))
-! CHECK: call void @__kmpc_free(i32 {{.*}}, ptr {{.*}}, ptr inttoptr (i32 6 to ptr))
-! CHECK: call void @__kmpc_free(i32 {{.*}}, ptr {{.*}}, ptr inttoptr (i32 1 to ptr))
-! CHECK: call void @__kmpc_free(i32 {{.*}}, ptr {{.*}}, ptr null)
-! CHECK: ret void
-
-! CHECK: declare noalias ptr @__kmpc_aligned_alloc(i32, i64, i64, ptr)
-! CHECK: declare noalias ptr @__kmpc_alloc(i32, i64, ptr)
-! CHECK: declare void @__kmpc_free(i32, ptr, ptr)
+! CHECK: %[[C1_IDX:.*]] = arith.constant 1 : index
+! CHECK: %[[C_ALLOC:.*]] = fir.alloca !fir.char<1> {bindc_name = "c", uniq_name = "_QFEc"}
+! CHECK: %[[C_DECL:.*]]:2 = hlfir.declare %[[C_ALLOC]] typeparams %[[C1_IDX]] {uniq_name = "_QFEc"} : (!fir.ref<!fir.char<1>>, index) -> (!fir.ref<!fir.char<1>>, !fir.ref<!fir.char<1>>)
+! CHECK: %[[CMPLX_ALLOC:.*]] = fir.alloca complex<f32> {bindc_name = "cmplx", uniq_name = "_QFEcmplx"}
+! CHECK: %[[CMPLX_DECL:.*]]:2 = hlfir.declare %[[CMPLX_ALLOC]] {uniq_name = "_QFEcmplx"} : (!fir.ref<complex<f32>>) -> (!fir.ref<complex<f32>>, !fir.ref<complex<f32>>)
+! CHECK: %[[R_ALLOC:.*]] = fir.alloca f32 {bindc_name = "r", uniq_name = "_QFEr"}
+! CHECK: %[[R_DECL:.*]]:2 = hlfir.declare %[[R_ALLOC]] {uniq_name = "_QFEr"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
+! CHECK: %[[X_ALLOC:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFEx"}
+! CHECK: %[[X_DECL:.*]]:2 = hlfir.declare %[[X_ALLOC]] {uniq_name = "_QFEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[Y_ALLOC:.*]] = fir.alloca i32 {bindc_name = "y", uniq_name = "_QFEy"}
+! CHECK: %[[Y_DECL:.*]]:2 = hlfir.declare %[[Y_ALLOC]] {uniq_name = "_QFEy"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[Z_REF:.*]] = fir.address_of(@_QFEz) : !fir.ref<!fir.array<10xi32>>
+! CHECK: %[[Z_DECL:.*]]:2 = hlfir.declare %[[Z_REF]]({{.*}}) {uniq_name = "_QFEz"} : (!fir.ref<!fir.array<10xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<10xi32>>, !fir.ref<!fir.array<10xi32>>)
+! CHECK: omp.allocate_dir(%[[X_DECL]]#0 : !fir.ref<i32>) align(16)
+! CHECK: %[[ALLOC1:.*]] = arith.constant 1 : i32
+! CHECK: omp.allocate_dir(%[[Y_DECL]]#0 : !fir.ref<i32>) allocator(%[[ALLOC1]] : i32)
+! CHECK: %[[ALLOC6:.*]] = arith.constant 6 : i32
+! CHECK: omp.allocate_dir(%[[Z_DECL]]#0 : !fir.ref<!fir.array<10xi32>>) align(64) allocator(%[[ALLOC6]] : i32)
+! CHECK: %[[ALLOC3:.*]] = arith.constant 3 : i32
+! CHECK: omp.allocate_dir(%[[C_DECL]]#0, %[[R_DECL]]#0, %[[CMPLX_DECL]]#0 : !fir.ref<!fir.char<1>>, !fir.ref<f32>, !fir.ref<complex<f32>>) align(32) allocator(%[[ALLOC3]] : i32)
diff --git a/flang/test/Lower/OpenMP/omp-declarative-allocate.f90 b/flang/test/Lower/OpenMP/omp-declarative-allocate.f90
index 7c8047ebf7f53..69da3f52b459f 100644
--- a/flang/test/Lower/OpenMP/omp-declarative-allocate.f90
+++ b/flang/test/Lower/OpenMP/omp-declarative-allocate.f90
@@ -1,19 +1,15 @@
-! This test checks lowering of OpenMP allocate Directive to LLVM IR.
+! This test checks lowering of OpenMP allocate Directive to HLFIR.
! Verifies code generation for default (no align, null allocator) case.
-! RUN: %flang_fc1 -emit-llvm -fopenmp %s -o - | FileCheck %s
+! RUN: %flang_fc1 -emit-hlfir -fopenmp %s -o - | FileCheck %s
program main
integer :: x, y
!$omp allocate(x, y)
end program
-! CHECK: define void @_QQmain()
-! CHECK: call i32 @__kmpc_global_thread_num(
-! CHECK: call ptr @__kmpc_alloc(i32 {{.*}}, i64 8, ptr null)
-! CHECK: call ptr @__kmpc_alloc(i32 {{.*}}, i64 8, ptr null)
-! CHECK: call void @__kmpc_free(i32 {{.*}}, ptr {{.*}}, ptr null)
-! CHECK: call void @__kmpc_free(i32 {{.*}}, ptr {{.*}}, ptr null)
-! CHECK: ret void
-! CHECK: declare noalias ptr @__kmpc_alloc(i32, i64, ptr)
-! CHECK: declare void @__kmpc_free(i32, ptr, ptr)
+! CHECK: %[[X_ALLOC:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFEx"}
+! CHECK: %[[X_DECL:.*]]:2 = hlfir.declare %[[X_ALLOC]] {uniq_name = "_QFEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[Y_ALLOC:.*]] = fir.alloca i32 {bindc_name = "y", uniq_name = "_QFEy"}
+! CHECK: %[[Y_DECL:.*]]:2 = hlfir.declare %[[Y_ALLOC]] {uniq_name = "_QFEy"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: omp.allocate_dir(%[[X_DECL]]#0, %[[Y_DECL]]#0 : !fir.ref<i32>, !fir.ref<i32>)
diff --git a/mlir/include/mlir/Target/LLVMIR/LLVMTranslationDialectInterface.td b/mlir/include/mlir/Target/LLVMIR/LLVMTranslationDialectInterface.td
index 6d8c7174bd2e3..b1e7f25b44c40 100644
--- a/mlir/include/mlir/Target/LLVMIR/LLVMTranslationDialectInterface.td
+++ b/mlir/include/mlir/Target/LLVMIR/LLVMTranslationDialectInterface.td
@@ -55,6 +55,16 @@ def LLVMTranslationDialectInterface : DialectInterface<"LLVMTranslationDialectIn
[{
return ::llvm::success();
}]
+ >,
+ InterfaceMethod<[{
+ Hook called just before a block's terminator operation is translated.
+ Dialects can override this to inject IR that must appear at the end of
+ a basic block.
+ }],
+ "void", "preTranslateTerminator",
+ (ins "::mlir::Block &":$block, "::llvm::IRBuilderBase &":$builder,
+ "::mlir::LLVM::ModuleTranslation &":$moduleTranslation),
+ [{ }]
>
];
}
diff --git a/mlir/include/mlir/Target/LLVMIR/LLVMTranslationInterface.h b/mlir/include/mlir/Target/LLVMIR/LLVMTranslationInterface.h
index 58d3ee0ed2139..5bc4aa9a4e7e9 100644
--- a/mlir/include/mlir/Target/LLVMIR/LLVMTranslationInterface.h
+++ b/mlir/include/mlir/Target/LLVMIR/LLVMTranslationInterface.h
@@ -79,6 +79,17 @@ class LLVMTranslationInterface
attribute.getName().str() + "'");
return success();
}
+
+ /// Calls the `preTranslateTerminator` hook on every registered dialect
+ /// interface. This is broadcast to all interfaces because any dialect may
+ /// have registered deferred work for the given block, independent of which
+ /// dialect owns the terminator.
+ virtual void
+ preTranslateTerminator(Block &block, llvm::IRBuilderBase &builder,
+ LLVM::ModuleTranslation &moduleTranslation) const {
+ for (const LLVMTranslationDialectInterface &iface : *this)
+ iface.preTranslateTerminator(block, builder, moduleTranslation);
+ }
};
} // namespace mlir
diff --git a/mlir/include/mlir/Target/LLVMIR/ModuleTranslation.h b/mlir/include/mlir/Target/LLVMIR/ModuleTranslation.h
index 243cca8831e37..c67bb57985bd0 100644
--- a/mlir/include/mlir/Target/LLVMIR/ModuleTranslation.h
+++ b/mlir/include/mlir/Target/LLVMIR/ModuleTranslation.h
@@ -272,11 +272,6 @@ class ModuleTranslation {
/// constructed.
llvm::OpenMPIRBuilder *getOpenMPBuilder();
- /// Registers a pending __kmpc_free call for the given block. These are
- /// emitted before the block's terminator during block conversion.
- void registerPendingOmpAllocateFree(Block *block, llvm::Value *ptr,
- llvm::Value *allocator);
-
/// Returns the LLVM module in which the IR is being constructed.
llvm::Module *getLLVMModule() { return llvmModule.get(); }
@@ -406,9 +401,6 @@ class ModuleTranslation {
llvm::IRBuilderBase &builder,
bool recordInsertions);
- /// Emits pending __kmpc_free calls for the block, before its terminator.
- void emitPendingOmpAllocateFrees(Block &bb, llvm::IRBuilderBase &builder);
-
/// Returns the LLVM metadata corresponding to the given mlir LLVM dialect
/// TBAATagAttr.
llvm::MDNode *getTBAANode(TBAATagAttr tbaaAttr) const;
@@ -517,10 +509,6 @@ class ModuleTranslation {
/// block.
DenseMap<BlockAddressAttr, llvm::BasicBlock *> blockAddressToLLVMMapping;
- /// Pending __kmpc_free calls per block, emitted before the terminator.
- DenseMap<Block *, llvm::SmallVector<std::pair<llvm::Value *, llvm::Value *>>>
- pendingOmpAllocateFrees;
-
/// Stack of user-specified state elements, useful when translating operations
/// with regions.
StateStack stack;
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index 5f5b1150f9588..a2ae28990e327 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -4636,7 +4636,11 @@ static Value getBaseValueForTypeLookup(Value value) {
while (Operation *op = value.getDefiningOp()) {
if (auto addrCast = dyn_cast_if_present<LLVM::AddrSpaceCastOp>(op))
value = addrCast.getOperand();
- else if (op->getName().getIdentifier()) {
+ // Traces through hlfir.declare, fir.declare to reach the base address and
+ // use for type lookup.
+ else if (op->getName().getIdentifier() &&
+ (op->getName().getIdentifier().str() == "hlfir.declare" ||
+ op->getName().getIdentifier().str() == "fir.declare")) {
if (op->getNumOperands() > 0)
value = op->getOperand(0);
else
@@ -7313,10 +7317,50 @@ class OpenMPDialectLLVMIRTranslationInterface
amendOperation(Operation *op, ArrayRef<llvm::Instruction *> instructions,
NamedAttribute attribute,
LLVM::ModuleTranslation &moduleTranslation) const final;
+
+ /// Emits pending __kmpc_free calls just before the block's terminator.
+ void preTranslateTerminator(
+ Block &block, llvm::IRBuilderBase &builder,
+ LLVM::ModuleTranslation &moduleTranslation) const final;
+
+ /// Registers a deferred __kmpc_free call to be emitted before the
+ /// terminator of the given block.
+ void registerPendingOmpAllocateFree(Block *block, llvm::Value *ptr,
+ llvm::Value *allocator) const {
+ pendingOmpAllocateFrees[block].push_back({ptr, allocator});
+ }
+
+private:
+ /// Pending __kmpc_free calls per block, emitted via preTranslateTerminator.
+ mutable DenseMap<Block *,
+ llvm::SmallVector<std::pair<llvm::Value *, llvm::Value *>>>
+ pendingOmpAllocateFrees;
};
} // namespace
+void OpenMPDialectLLVMIRTranslationInterface::preTranslateTerminator(
+ Block &block, llvm::IRBuilderBase &builder,
+ LLVM::ModuleTranslation &moduleTranslation) const {
+ auto it = pendingOmpAllocateFrees.find(&block);
+ if (it == pendingOmpAllocateFrees.end() || it->second.empty())
+ return;
+ llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
+ if (!ompBuilder)
+ return;
+ llvm::BasicBlock *llvmBB = moduleTranslation.lookupBlock(&block);
+ if (!llvmBB)
+ return;
+ if (!llvmBB->empty() && llvmBB->back().isTerminator())
+ builder.SetInsertPoint(&llvmBB->back());
+ else
+ builder.SetInsertPoint(llvmBB);
+ llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
+ for (auto it2 = it->second.rbegin(); it2 != it->second.rend(); ++it2)
+ ompBuilder->createOMPFree(ompLoc, it2->first, it2->second, "");
+ pendingOmpAllocateFrees.erase(it);
+}
+
LogicalResult OpenMPDialectLLVMIRTranslationInterface::amendOperation(
Operation *op, ArrayRef<llvm::Instruction *> instructions,
NamedAttribute attribute,
@@ -7492,7 +7536,8 @@ convertTargetAllocMemOp(Operation &opInst, llvm::IRBuilderBase &builder,
static LogicalResult
convertAllocateDirOp(Operation &opInst, llvm::IRBuilderBase &builder,
- LLVM::ModuleTranslation &moduleTranslation) {
+ LLVM::ModuleTranslation &moduleTranslation,
+ const OpenMPDialectLLVMIRTranslationInterface &ompIface) {
auto allocateDirOp = cast<omp::AllocateDirOp>(opInst);
llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
@@ -7570,11 +7615,11 @@ convertAllocateDirOp(Operation &opInst, llvm::IRBuilderBase &builder,
allocatedVars.push_back({allocCall, allocator});
}
- // Register __kmpc_free calls to be emitted before the block terminator.
+ // Register __kmpc_free calls to be emitted before the block terminator via
+ // preTranslateTerminator()
Block *block = allocateDirOp->getBlock();
for (auto &alloc : allocatedVars)
- moduleTranslation.registerPendingOmpAllocateFree(block, alloc.first,
- alloc.second);
+ ompIface.registerPendingOmpAllocateFree(block, alloc.first, alloc.second);
return success();
}
@@ -7825,7 +7870,7 @@ LogicalResult OpenMPDialectLLVMIRTranslationInterface::convertOperation(
return convertTargetFreeMemOp(*op, builder, moduleTranslation);
})
.Case([&](omp::AllocateDirOp) {
- return convertAllocateDirOp(*op, builder, moduleTranslation);
+ return convertAllocateDirOp(*op, builder, moduleTranslation, *this);
})
.Default([&](Operation *inst) {
return inst->emitError()
diff --git a/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp b/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp
index 2f0345b67ea6b..699544d0565cb 100644
--- a/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp
@@ -1009,30 +1009,6 @@ LogicalResult ModuleTranslation::convertOperation(Operation &op,
return convertDialectAttributes(&op, scope.getCapturedInstructions());
}
-void ModuleTranslation::registerPendingOmpAllocateFree(Block *block,
- llvm::Value *ptr,
- llvm::Value *allocator) {
- pendingOmpAllocateFrees[block].push_back({ptr, allocator});
-}
-
-void ModuleTranslation::emitPendingOmpAllocateFrees(
- Block &bb, llvm::IRBuilderBase &builder) {
- auto it = pendingOmpAllocateFrees.find(&bb);
- if (it == pendingOmpAllocateFrees.end() || it->second.empty())
- return;
- llvm::OpenMPIRBuilder *ompBuilder = getOpenMPBuilder();
- llvm::BasicBlock *llvmBB = lookupBlock(&bb);
- llvm::Instruction *term = llvmBB->getTerminator();
- if (term)
- builder.SetInsertPoint(term);
- else
- builder.SetInsertPoint(llvmBB);
- llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
- for (auto it2 = it->second.rbegin(); it2 != it->second.rend(); ++it2)
- ompBuilder->createOMPFree(ompLoc, it2->first, it2->second, "");
- pendingOmpAllocateFrees.erase(it);
-}
-
/// Convert block to LLVM IR. Unless `ignoreArguments` is set, emit PHI nodes
/// to define values corresponding to the MLIR block arguments. These nodes
/// are not connected to the source basic blocks, which may not exist yet. Uses
@@ -1072,9 +1048,10 @@ LogicalResult ModuleTranslation::convertBlockImpl(Block &bb,
// Traverse operations.
for (auto &op : bb) {
- // Emit pending OpenMP allocate frees before the terminator.
+ // Give registered dialect interfaces a chance to inject IR before the
+ // terminator.
if (op.hasTrait<OpTrait::IsTerminator>())
- emitPendingOmpAllocateFrees(bb, builder);
+ iface.preTranslateTerminator(bb, builder, *this);
// Set the current debug location within the builder.
builder.SetCurrentDebugLocation(
diff --git a/mlir/test/Target/LLVMIR/openmp-allocate-directive.mlir b/mlir/test/Target/LLVMIR/openmp-allocate-directive.mlir
new file mode 100644
index 0000000000000..1c05b20a83a61
--- /dev/null
+++ b/mlir/test/Target/LLVMIR/openmp-allocate-directive.mlir
@@ -0,0 +1,109 @@
+// Tests for translation of omp.allocate_dir operations to LLVM IR,
+// covering all combinations of align and allocator clauses.
+
+// RUN: mlir-translate -mlir-to-llvmir -split-input-file %s | FileCheck %s
+
+// -----
+
+// CHECK-LABEL: define void @test_allocate_default
+// CHECK-SAME: (ptr %[[ARG0:.*]]) {
+// CHECK: %[[TID:.*]] = call i32 @__kmpc_global_thread_num(
+// CHECK: %[[ALLOC:.*]] = call ptr @__kmpc_alloc(i32 %[[TID]], i64 8, ptr null)
+// CHECK: %[[TID_FREE:.*]] = call i32 @__kmpc_global_thread_num(
+// CHECK: call void @__kmpc_free(i32 %[[TID_FREE]], ptr %[[ALLOC]], ptr null)
+// CHECK: ret void
+// CHECK: }
+// CHECK: declare noalias ptr @__kmpc_alloc(i32, i64, ptr)
+// CHECK: declare void @__kmpc_free(i32, ptr, ptr)
+llvm.func @test_allocate_default(%arg0: !llvm.ptr) {
+ omp.allocate_dir (%arg0 : !llvm.ptr)
+ llvm.return
+}
+
+// -----
+
+// CHECK-LABEL: define void @test_allocate_align_only
+// CHECK: %[[TID:.*]] = call i32 @__kmpc_global_thread_num(
+// CHECK: %[[ALLOC:.*]] = call ptr @__kmpc_aligned_alloc(i32 %[[TID]], i64 16, i64 16, ptr null)
+// CHECK: %[[TID_FREE:.*]] = call i32 @__kmpc_global_thread_num(
+// CHECK: call void @__kmpc_free(i32 %[[TID_FREE]], ptr %[[ALLOC]], ptr null)
+// CHECK: ret void
+// CHECK: declare noalias ptr @__kmpc_aligned_alloc(i32, i64, i64, ptr)
+llvm.func @test_allocate_align_only(%arg0: !llvm.ptr) {
+ omp.allocate_dir (%arg0 : !llvm.ptr) align(16)
+ llvm.return
+}
+
+// -----
+
+// CHECK-LABEL: define void @test_allocate_allocator_only
+// CHECK: %[[TID:.*]] = call i32 @__kmpc_global_thread_num(
+// CHECK: %[[ALLOC:.*]] = call ptr @__kmpc_alloc(i32 %[[TID]], i64 8, ptr inttoptr (i32 1 to ptr))
+// CHECK: %[[TID_FREE:.*]] = call i32 @__kmpc_global_thread_num(
+// CHECK: call void @__kmpc_free(i32 %[[TID_FREE]], ptr %[[ALLOC]], ptr inttoptr (i32 1 to ptr))
+// CHECK: ret void
+llvm.func @test_allocate_allocator_only(%arg0: !llvm.ptr) {
+ %alloc1 = llvm.mlir.constant(1 : i32) : i32
+ omp.allocate_dir (%arg0 : !llvm.ptr) allocator(%alloc1 : i32)
+ llvm.return
+}
+
+// -----
+
+// CHECK-LABEL: define void @test_allocate_align_and_allocator
+// CHECK: %[[TID:.*]] = call i32 @__kmpc_global_thread_num(
+// CHECK: %[[ALLOC:.*]] = call ptr @__kmpc_aligned_alloc(i32 %[[TID]], i64 64, i64 64, ptr inttoptr (i32 6 to ptr))
+// CHECK: %[[TID_FREE:.*]] = call i32 @__kmpc_global_thread_num(
+// CHECK: call void @__kmpc_free(i32 %[[TID_FREE]], ptr %[[ALLOC]], ptr inttoptr (i32 6 to ptr))
+// CHECK: ret void
+llvm.func @test_allocate_align_and_allocator(%arg0: !llvm.ptr) {
+ %alloc6 = llvm.mlir.constant(6 : i32) : i32
+ omp.allocate_dir (%arg0 : !llvm.ptr) align(64) allocator(%alloc6 : i32)
+ llvm.return
+}
+
+// -----
+
+// Verifies that multiple variables each get their own __kmpc_aligned_alloc call
+// and that __kmpc_free calls are emitted in reverse allocation order.
+//
+// CHECK-LABEL: define void @test_allocate_multiple_vars
+// CHECK: %[[TID0:.*]] = call i32 @__kmpc_global_thread_num(
+// CHECK: %[[ALLOC0:.*]] = call ptr @__kmpc_aligned_alloc(i32 %[[TID0]], i64 32, i64 32, ptr inttoptr (i32 3 to ptr))
+// CHECK: %[[TID1:.*]] = call i32 @__kmpc_global_thread_num(
+// CHECK: %[[ALLOC1:.*]] = call ptr @__kmpc_aligned_alloc(i32 %[[TID1]], i64 32, i64 32, ptr inttoptr (i32 3 to ptr))
+// CHECK: %[[TID2:.*]] = call i32 @__kmpc_global_thread_num(
+// CHECK: %[[ALLOC2:.*]] = call ptr @__kmpc_aligned_alloc(i32 %[[TID2]], i64 32, i64 32, ptr inttoptr (i32 3 to ptr))
+// Free order is reversed relative to allocation order.
+// CHECK: call void @__kmpc_free({{.*}}, ptr %[[ALLOC2]], ptr inttoptr (i32 3 to ptr))
+// CHECK: call void @__kmpc_free({{.*}}, ptr %[[ALLOC1]], ptr inttoptr (i32 3 to ptr))
+// CHECK: call void @__kmpc_free({{.*}}, ptr %[[ALLOC0]], ptr inttoptr (i32 3 to ptr))
+// CHECK: ret void
+llvm.func @test_allocate_multiple_vars(%arg0: !llvm.ptr, %arg1: !llvm.ptr, %arg2: !llvm.ptr) {
+ %alloc3 = llvm.mlir.constant(3 : i32) : i32
+ omp.allocate_dir (%arg0, %arg1, %arg2 : !llvm.ptr, !llvm.ptr, !llvm.ptr) align(32) allocator(%alloc3 : i32)
+ llvm.return
+}
+
+// -----
+
+// Verifies that array size is correctly calculated from the global's element
+// type: [10 x i32] = 40 bytes, rounded up to alignment 64 => 64 bytes.
+//
+// CHECK-LABEL: define void @test_allocate_array_global
+// CHECK: %[[TID:.*]] = call i32 @__kmpc_global_thread_num(
+// CHECK: %[[ALLOC:.*]] = call ptr @__kmpc_aligned_alloc(i32 %[[TID]], i64 64, i64 64, ptr inttoptr (i32 6 to ptr))
+// CHECK: %[[TID_FREE:.*]] = call i32 @__kmpc_global_thread_num(
+// CHECK: call void @__kmpc_free(i32 %[[TID_FREE]], ptr %[[ALLOC]], ptr inttoptr (i32 6 to ptr))
+// CHECK: ret void
+llvm.mlir.global internal @arr_global() : !llvm.array<10 x i32> {
+ %0 = llvm.mlir.zero : !llvm.array<10 x i32>
+ llvm.return %0 : !llvm.array<10 x i32>
+}
+
+llvm.func @test_allocate_array_global() {
+ %z = llvm.mlir.addressof @arr_global : !llvm.ptr
+ %alloc6 = llvm.mlir.constant(6 : i32) : i32
+ omp.allocate_dir (%z : !llvm.ptr) align(64) allocator(%alloc6 : i32)
+ llvm.return
+}
>From d6d0a1b301cee0434229159465af7a06db974d7a Mon Sep 17 00:00:00 2001
From: Raghu Maddhipatla <Raghu.Maddhipatla at amd.com>
Date: Fri, 10 Apr 2026 17:02:58 -0500
Subject: [PATCH 5/6] Handling the matching kmpc_free call generation of an OMP
ALLOCATE variable in Fortran frontend.
---
flang/lib/Lower/OpenMP/OpenMP.cpp | 16 +++-
.../OpenMP/omp-declarative-allocate-align.f90 | 6 ++
.../Lower/OpenMP/omp-declarative-allocate.f90 | 3 +
mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td | 20 ++++
.../LLVMIR/LLVMTranslationDialectInterface.td | 10 --
.../Target/LLVMIR/LLVMTranslationInterface.h | 11 ---
.../OpenMP/OpenMPToLLVMIRTranslation.cpp | 95 ++++++++++---------
mlir/lib/Target/LLVMIR/ModuleTranslation.cpp | 5 -
mlir/test/Dialect/OpenMP/ops.mlir | 24 +++++
.../LLVMIR/openmp-allocate-directive.mlir | 16 +++-
10 files changed, 130 insertions(+), 76 deletions(-)
diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp
index 3cc343925d8fa..920fdfaafdf0d 100644
--- a/flang/lib/Lower/OpenMP/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP/OpenMP.cpp
@@ -1952,9 +1952,19 @@ static mlir::omp::AllocateDirOp genAllocateDirOp(
genAllocateClauses(converter, semaCtx, stmtCtx, objects, item->clauses, loc,
operandRange, clauseOps);
- return mlir::omp::AllocateDirOp::create(converter.getFirOpBuilder(), loc,
- operandRange, clauseOps.align,
- clauseOps.allocator);
+ auto allocDirOp = mlir::omp::AllocateDirOp::create(
+ converter.getFirOpBuilder(), loc, operandRange, clauseOps.align,
+ clauseOps.allocator);
+
+ // Register a cleanup at the Fortran scope exit.
+ fir::FirOpBuilder *builder = &converter.getFirOpBuilder();
+ mlir::Value allocator = clauseOps.allocator;
+ converter.getFctCtx().attachCleanup([builder, loc, operandRange,
+ allocator]() {
+ mlir::omp::AllocateFreeOp::create(*builder, loc, operandRange, allocator);
+ });
+
+ return allocDirOp;
}
static mlir::omp::BarrierOp
diff --git a/flang/test/Lower/OpenMP/omp-declarative-allocate-align.f90 b/flang/test/Lower/OpenMP/omp-declarative-allocate-align.f90
index 0824d8bcb7e90..fdcc4ac1fef20 100644
--- a/flang/test/Lower/OpenMP/omp-declarative-allocate-align.f90
+++ b/flang/test/Lower/OpenMP/omp-declarative-allocate-align.f90
@@ -4,6 +4,7 @@
! - allocator(1) only (no align)
! - align(64) allocator(6) (both clauses, array variable)
! - align(32) allocator(3) (both clauses, multiple variables)
+! Each omp.allocate_dir must be paired with a matching omp.allocate_free
! RUN: %flang_fc1 -emit-hlfir %openmp_flags -fopenmp-version=51 %s -o - 2>&1 | FileCheck %s
@@ -43,3 +44,8 @@ program main
! CHECK: omp.allocate_dir(%[[Z_DECL]]#0 : !fir.ref<!fir.array<10xi32>>) align(64) allocator(%[[ALLOC6]] : i32)
! CHECK: %[[ALLOC3:.*]] = arith.constant 3 : i32
! CHECK: omp.allocate_dir(%[[C_DECL]]#0, %[[R_DECL]]#0, %[[CMPLX_DECL]]#0 : !fir.ref<!fir.char<1>>, !fir.ref<f32>, !fir.ref<complex<f32>>) align(32) allocator(%[[ALLOC3]] : i32)
+! CHECK: omp.allocate_free(%[[C_DECL]]#0, %[[R_DECL]]#0, %[[CMPLX_DECL]]#0 : !fir.ref<!fir.char<1>>, !fir.ref<f32>, !fir.ref<complex<f32>>) allocator(%[[ALLOC3]] : i32)
+! CHECK: omp.allocate_free(%[[Z_DECL]]#0 : !fir.ref<!fir.array<10xi32>>) allocator(%[[ALLOC6]] : i32)
+! CHECK: omp.allocate_free(%[[Y_DECL]]#0 : !fir.ref<i32>) allocator(%[[ALLOC1]] : i32)
+! CHECK: omp.allocate_free(%[[X_DECL]]#0 : !fir.ref<i32>)
+! CHECK: return
diff --git a/flang/test/Lower/OpenMP/omp-declarative-allocate.f90 b/flang/test/Lower/OpenMP/omp-declarative-allocate.f90
index 69da3f52b459f..77f211ccf0aeb 100644
--- a/flang/test/Lower/OpenMP/omp-declarative-allocate.f90
+++ b/flang/test/Lower/OpenMP/omp-declarative-allocate.f90
@@ -1,5 +1,6 @@
! This test checks lowering of OpenMP allocate Directive to HLFIR.
! Verifies code generation for default (no align, null allocator) case.
+! omp.allocate_free must be emitted at the exit (before return).
! RUN: %flang_fc1 -emit-hlfir -fopenmp %s -o - | FileCheck %s
@@ -13,3 +14,5 @@ program main
! CHECK: %[[Y_ALLOC:.*]] = fir.alloca i32 {bindc_name = "y", uniq_name = "_QFEy"}
! CHECK: %[[Y_DECL:.*]]:2 = hlfir.declare %[[Y_ALLOC]] {uniq_name = "_QFEy"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
! CHECK: omp.allocate_dir(%[[X_DECL]]#0, %[[Y_DECL]]#0 : !fir.ref<i32>, !fir.ref<i32>)
+! CHECK: omp.allocate_free(%[[X_DECL]]#0, %[[Y_DECL]]#0 : !fir.ref<i32>, !fir.ref<i32>)
+! CHECK: return
diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
index 40ec8904a136f..00b58eb2b8c1c 100644
--- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
+++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
@@ -2217,6 +2217,26 @@ def AllocateDirOp : OpenMP_Op<"allocate_dir", [AttrSizedOperandSegments], clause
let hasVerifier = 1;
}
+//===----------------------------------------------------------------------===//
+// AllocateFreeOp
+//===----------------------------------------------------------------------===//
+
+def AllocateFreeOp : OpenMP_Op<"allocate_free", [AttrSizedOperandSegments],
+ clauses = [OpenMP_AllocatorClause]> {
+ let summary = "free-op paired with allocate directive";
+ let description = [{
+ At the end of the scope each list item allocated using allocate directive
+ should be deallocated(using this free operation).
+ }] # clausesDescription;
+
+ let arguments = !con((ins Variadic<AnyType>:$varList),
+ clausesArgs);
+
+ let assemblyFormat = " `(` $varList `:` type($varList) `)` oilist(" #
+ clausesOptAssemblyFormat #
+ ") attr-dict ";
+}
+
//===----------------------------------------------------------------------===//
// TargetAllocMemOp
//===----------------------------------------------------------------------===//
diff --git a/mlir/include/mlir/Target/LLVMIR/LLVMTranslationDialectInterface.td b/mlir/include/mlir/Target/LLVMIR/LLVMTranslationDialectInterface.td
index b1e7f25b44c40..01c1b3a3cfaa3 100644
--- a/mlir/include/mlir/Target/LLVMIR/LLVMTranslationDialectInterface.td
+++ b/mlir/include/mlir/Target/LLVMIR/LLVMTranslationDialectInterface.td
@@ -56,16 +56,6 @@ def LLVMTranslationDialectInterface : DialectInterface<"LLVMTranslationDialectIn
return ::llvm::success();
}]
>,
- InterfaceMethod<[{
- Hook called just before a block's terminator operation is translated.
- Dialects can override this to inject IR that must appear at the end of
- a basic block.
- }],
- "void", "preTranslateTerminator",
- (ins "::mlir::Block &":$block, "::llvm::IRBuilderBase &":$builder,
- "::mlir::LLVM::ModuleTranslation &":$moduleTranslation),
- [{ }]
- >
];
}
diff --git a/mlir/include/mlir/Target/LLVMIR/LLVMTranslationInterface.h b/mlir/include/mlir/Target/LLVMIR/LLVMTranslationInterface.h
index 5bc4aa9a4e7e9..58d3ee0ed2139 100644
--- a/mlir/include/mlir/Target/LLVMIR/LLVMTranslationInterface.h
+++ b/mlir/include/mlir/Target/LLVMIR/LLVMTranslationInterface.h
@@ -79,17 +79,6 @@ class LLVMTranslationInterface
attribute.getName().str() + "'");
return success();
}
-
- /// Calls the `preTranslateTerminator` hook on every registered dialect
- /// interface. This is broadcast to all interfaces because any dialect may
- /// have registered deferred work for the given block, independent of which
- /// dialect owns the terminator.
- virtual void
- preTranslateTerminator(Block &block, llvm::IRBuilderBase &builder,
- LLVM::ModuleTranslation &moduleTranslation) const {
- for (const LLVMTranslationDialectInterface &iface : *this)
- iface.preTranslateTerminator(block, builder, moduleTranslation);
- }
};
} // namespace mlir
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index a2ae28990e327..0e78cfd3733c0 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -7318,49 +7318,29 @@ class OpenMPDialectLLVMIRTranslationInterface
NamedAttribute attribute,
LLVM::ModuleTranslation &moduleTranslation) const final;
- /// Emits pending __kmpc_free calls just before the block's terminator.
- void preTranslateTerminator(
- Block &block, llvm::IRBuilderBase &builder,
- LLVM::ModuleTranslation &moduleTranslation) const final;
+ /// Records the LLVM alloc pointer produced for an OMP ALLOCATE variable so
+ /// that the paired omp.allocate_free op can generate the matching
+ /// __kmpc_free call.
+ void registerAllocatedPtr(Value var, llvm::Value *ptr) const {
+ ompAllocatedPtrs[var] = ptr;
+ }
- /// Registers a deferred __kmpc_free call to be emitted before the
- /// terminator of the given block.
- void registerPendingOmpAllocateFree(Block *block, llvm::Value *ptr,
- llvm::Value *allocator) const {
- pendingOmpAllocateFrees[block].push_back({ptr, allocator});
+ /// Returns the LLVM alloc pointer previously registered for var, or
+ /// nullptr if no allocation was recorded.
+ llvm::Value *lookupAllocatedPtr(Value var) const {
+ auto it = ompAllocatedPtrs.find(var);
+ return it != ompAllocatedPtrs.end() ? it->second : nullptr;
}
private:
- /// Pending __kmpc_free calls per block, emitted via preTranslateTerminator.
- mutable DenseMap<Block *,
- llvm::SmallVector<std::pair<llvm::Value *, llvm::Value *>>>
- pendingOmpAllocateFrees;
+ /// Maps each MLIR variable value that appeared in an omp.allocate_dir op to
+ /// the LLVM pointer returned by the corresponding __kmpc_alloc call. The
+ /// paired omp.allocate_free op looks up these pointers to emit __kmpc_free.
+ mutable DenseMap<Value, llvm::Value *> ompAllocatedPtrs;
};
} // namespace
-void OpenMPDialectLLVMIRTranslationInterface::preTranslateTerminator(
- Block &block, llvm::IRBuilderBase &builder,
- LLVM::ModuleTranslation &moduleTranslation) const {
- auto it = pendingOmpAllocateFrees.find(&block);
- if (it == pendingOmpAllocateFrees.end() || it->second.empty())
- return;
- llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
- if (!ompBuilder)
- return;
- llvm::BasicBlock *llvmBB = moduleTranslation.lookupBlock(&block);
- if (!llvmBB)
- return;
- if (!llvmBB->empty() && llvmBB->back().isTerminator())
- builder.SetInsertPoint(&llvmBB->back());
- else
- builder.SetInsertPoint(llvmBB);
- llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
- for (auto it2 = it->second.rbegin(); it2 != it->second.rend(); ++it2)
- ompBuilder->createOMPFree(ompLoc, it2->first, it2->second, "");
- pendingOmpAllocateFrees.erase(it);
-}
-
LogicalResult OpenMPDialectLLVMIRTranslationInterface::amendOperation(
Operation *op, ArrayRef<llvm::Instruction *> instructions,
NamedAttribute attribute,
@@ -7559,8 +7539,6 @@ convertAllocateDirOp(Operation &opInst, llvm::IRBuilderBase &builder,
allocator = llvm::ConstantPointerNull::get(builder.getPtrTy());
}
- SmallVector<std::pair<llvm::CallInst *, llvm::Value *>> allocatedVars;
-
for (Value var : vars) {
llvm::Type *llvmVarTy = moduleTranslation.convertType(var.getType());
@@ -7612,14 +7590,41 @@ convertAllocateDirOp(Operation &opInst, llvm::IRBuilderBase &builder,
allocCall =
ompBuilder->createOMPAlloc(ompLoc, size, allocator, allocName);
}
- allocatedVars.push_back({allocCall, allocator});
+ // Record the alloc pointer keyed by the MLIR variable value.
+ ompIface.registerAllocatedPtr(var, allocCall);
+ }
+
+ return success();
+}
+
+static LogicalResult
+convertAllocateFreeOp(Operation &opInst, llvm::IRBuilderBase &builder,
+ LLVM::ModuleTranslation &moduleTranslation,
+ const OpenMPDialectLLVMIRTranslationInterface &ompIface) {
+ auto freeOp = cast<omp::AllocateFreeOp>(opInst);
+ llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
+ llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
+
+ llvm::Value *allocator;
+ if (auto allocatorVar = freeOp.getAllocator()) {
+ allocator = moduleTranslation.lookupValue(allocatorVar);
+ if (allocator->getType()->isIntegerTy())
+ allocator = builder.CreateIntToPtr(allocator, builder.getPtrTy());
+ else if (allocator->getType()->isPointerTy())
+ allocator = builder.CreatePointerBitCastOrAddrSpaceCast(
+ allocator, builder.getPtrTy());
+ } else {
+ allocator = llvm::ConstantPointerNull::get(builder.getPtrTy());
}
- // Register __kmpc_free calls to be emitted before the block terminator via
- // preTranslateTerminator()
- Block *block = allocateDirOp->getBlock();
- for (auto &alloc : allocatedVars)
- ompIface.registerPendingOmpAllocateFree(block, alloc.first, alloc.second);
+ // Emit __kmpc_free for each variable in reverse allocation order.
+ SmallVector<Value> vars = freeOp.getVarList();
+ for (Value var : llvm::reverse(vars)) {
+ llvm::Value *allocPtr = ompIface.lookupAllocatedPtr(var);
+ if (!allocPtr)
+ return opInst.emitError("omp.allocate_free: no allocation recorded");
+ ompBuilder->createOMPFree(ompLoc, allocPtr, allocator, "");
+ }
return success();
}
@@ -7872,6 +7877,10 @@ LogicalResult OpenMPDialectLLVMIRTranslationInterface::convertOperation(
.Case([&](omp::AllocateDirOp) {
return convertAllocateDirOp(*op, builder, moduleTranslation, *this);
})
+ .Case([&](omp::AllocateFreeOp) {
+ return convertAllocateFreeOp(*op, builder, moduleTranslation,
+ *this);
+ })
.Default([&](Operation *inst) {
return inst->emitError()
<< "not yet implemented: " << inst->getName();
diff --git a/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp b/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp
index 699544d0565cb..cf398f151ed0b 100644
--- a/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp
@@ -1048,11 +1048,6 @@ LogicalResult ModuleTranslation::convertBlockImpl(Block &bb,
// Traverse operations.
for (auto &op : bb) {
- // Give registered dialect interfaces a chance to inject IR before the
- // terminator.
- if (op.hasTrait<OpTrait::IsTerminator>())
- iface.preTranslateTerminator(bb, builder, *this);
-
// Set the current debug location within the builder.
builder.SetCurrentDebugLocation(
debugTranslation->translateLoc(op.getLoc(), subprogram));
diff --git a/mlir/test/Dialect/OpenMP/ops.mlir b/mlir/test/Dialect/OpenMP/ops.mlir
index 3d1133f4ba6e9..5fbc6f5fced67 100644
--- a/mlir/test/Dialect/OpenMP/ops.mlir
+++ b/mlir/test/Dialect/OpenMP/ops.mlir
@@ -3491,6 +3491,30 @@ func.func @omp_allocate_dir(%arg0 : memref<i32>, %arg1 : memref<i32>) -> () {
return
}
+// CHECK-LABEL: func.func @omp_allocate_free(
+// CHECK-SAME: %[[ARG0:.*]]: memref<i32>,
+// CHECK-SAME: %[[ARG1:.*]]: memref<i32>) {
+func.func @omp_allocate_free(%arg0 : memref<i32>, %arg1 : memref<i32>) -> () {
+
+ // Test free with no allocator
+ // CHECK: omp.allocate_free(%[[ARG0]] : memref<i32>)
+ omp.allocate_free (%arg0 : memref<i32>)
+
+ // Test free with allocator clause
+ // CHECK: %[[VAL_1:.*]] = arith.constant 1 : i64
+ %omp_default_mem_alloc = arith.constant 1 : i64
+ // CHECK: omp.allocate_free(%[[ARG0]] : memref<i32>) allocator(%[[VAL_1:.*]] : i64)
+ omp.allocate_free (%arg0 : memref<i32>) allocator(%omp_default_mem_alloc : i64)
+
+ // Test free with two variables and allocator clause
+ // CHECK: %[[VAL_3:.*]] = arith.constant 6 : i64
+ %omp_cgroup_mem_alloc = arith.constant 6 : i64
+ // CHECK: omp.allocate_free(%[[ARG0]], %[[ARG1]] : memref<i32>, memref<i32>) allocator(%[[VAL_3:.*]] : i64)
+ omp.allocate_free (%arg0, %arg1 : memref<i32>, memref<i32>) allocator(%omp_cgroup_mem_alloc : i64)
+
+ return
+}
+
// CHECK-LABEL: func.func @omp_workdistribute
func.func @omp_workdistribute() {
// CHECK: omp.teams
diff --git a/mlir/test/Target/LLVMIR/openmp-allocate-directive.mlir b/mlir/test/Target/LLVMIR/openmp-allocate-directive.mlir
index 1c05b20a83a61..d8975eb512abe 100644
--- a/mlir/test/Target/LLVMIR/openmp-allocate-directive.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-allocate-directive.mlir
@@ -1,5 +1,7 @@
-// Tests for translation of omp.allocate_dir operations to LLVM IR,
-// covering all combinations of align and allocator clauses.
+// Tests for translation of omp.allocate_dir / omp.allocate_free pairs to
+// LLVM IR, covering all combinations of align and allocator clauses.
+// The frontend is responsible for placing omp.allocate_free at the correct
+// Fortran scope exit; here each function pairs the ops manually.
// RUN: mlir-translate -mlir-to-llvmir -split-input-file %s | FileCheck %s
@@ -17,6 +19,7 @@
// CHECK: declare void @__kmpc_free(i32, ptr, ptr)
llvm.func @test_allocate_default(%arg0: !llvm.ptr) {
omp.allocate_dir (%arg0 : !llvm.ptr)
+ omp.allocate_free (%arg0 : !llvm.ptr)
llvm.return
}
@@ -31,6 +34,7 @@ llvm.func @test_allocate_default(%arg0: !llvm.ptr) {
// CHECK: declare noalias ptr @__kmpc_aligned_alloc(i32, i64, i64, ptr)
llvm.func @test_allocate_align_only(%arg0: !llvm.ptr) {
omp.allocate_dir (%arg0 : !llvm.ptr) align(16)
+ omp.allocate_free (%arg0 : !llvm.ptr)
llvm.return
}
@@ -45,6 +49,7 @@ llvm.func @test_allocate_align_only(%arg0: !llvm.ptr) {
llvm.func @test_allocate_allocator_only(%arg0: !llvm.ptr) {
%alloc1 = llvm.mlir.constant(1 : i32) : i32
omp.allocate_dir (%arg0 : !llvm.ptr) allocator(%alloc1 : i32)
+ omp.allocate_free (%arg0 : !llvm.ptr) allocator(%alloc1 : i32)
llvm.return
}
@@ -59,13 +64,14 @@ llvm.func @test_allocate_allocator_only(%arg0: !llvm.ptr) {
llvm.func @test_allocate_align_and_allocator(%arg0: !llvm.ptr) {
%alloc6 = llvm.mlir.constant(6 : i32) : i32
omp.allocate_dir (%arg0 : !llvm.ptr) align(64) allocator(%alloc6 : i32)
+ omp.allocate_free (%arg0 : !llvm.ptr) allocator(%alloc6 : i32)
llvm.return
}
// -----
-// Verifies that multiple variables each get their own __kmpc_aligned_alloc call
-// and that __kmpc_free calls are emitted in reverse allocation order.
+// Verifies that multiple variables each get their own __kmpc_aligned_alloc
+// call, and that __kmpc_free calls are emitted in reverse allocation order.
//
// CHECK-LABEL: define void @test_allocate_multiple_vars
// CHECK: %[[TID0:.*]] = call i32 @__kmpc_global_thread_num(
@@ -82,6 +88,7 @@ llvm.func @test_allocate_align_and_allocator(%arg0: !llvm.ptr) {
llvm.func @test_allocate_multiple_vars(%arg0: !llvm.ptr, %arg1: !llvm.ptr, %arg2: !llvm.ptr) {
%alloc3 = llvm.mlir.constant(3 : i32) : i32
omp.allocate_dir (%arg0, %arg1, %arg2 : !llvm.ptr, !llvm.ptr, !llvm.ptr) align(32) allocator(%alloc3 : i32)
+ omp.allocate_free (%arg0, %arg1, %arg2 : !llvm.ptr, !llvm.ptr, !llvm.ptr) allocator(%alloc3 : i32)
llvm.return
}
@@ -105,5 +112,6 @@ llvm.func @test_allocate_array_global() {
%z = llvm.mlir.addressof @arr_global : !llvm.ptr
%alloc6 = llvm.mlir.constant(6 : i32) : i32
omp.allocate_dir (%z : !llvm.ptr) align(64) allocator(%alloc6 : i32)
+ omp.allocate_free (%z : !llvm.ptr) allocator(%alloc6 : i32)
llvm.return
}
>From 3a6d8663aab0bbb5c5a5f79bafd6b15c7a8e5632 Mon Sep 17 00:00:00 2001
From: Raghu Maddhipatla <Raghu.Maddhipatla at amd.com>
Date: Fri, 10 Apr 2026 18:00:02 -0500
Subject: [PATCH 6/6] Added NULL pointer check for updateToLocation()
---
llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 9 ++++++---
1 file changed, 6 insertions(+), 3 deletions(-)
diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index edd181f7eea2e..875c66aef7366 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -7628,7 +7628,8 @@ CallInst *OpenMPIRBuilder::createOMPAlloc(const LocationDescription &Loc,
Value *Size, Value *Allocator,
std::string Name) {
IRBuilder<>::InsertPointGuard IPG(Builder);
- updateToLocation(Loc);
+ if (!updateToLocation(Loc))
+ return nullptr;
uint32_t SrcLocStrSize;
Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
@@ -7646,7 +7647,8 @@ CallInst *OpenMPIRBuilder::createOMPAlignedAlloc(const LocationDescription &Loc,
Value *Allocator,
std::string Name) {
IRBuilder<>::InsertPointGuard IPG(Builder);
- updateToLocation(Loc);
+ if (!updateToLocation(Loc))
+ return nullptr;
uint32_t SrcLocStrSize;
Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
@@ -7663,7 +7665,8 @@ CallInst *OpenMPIRBuilder::createOMPFree(const LocationDescription &Loc,
Value *Addr, Value *Allocator,
std::string Name) {
IRBuilder<>::InsertPointGuard IPG(Builder);
- updateToLocation(Loc);
+ if (!updateToLocation(Loc))
+ return nullptr;
uint32_t SrcLocStrSize;
Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
More information about the Mlir-commits
mailing list