[Mlir-commits] [flang] [llvm] [mlir] [Flang] [OpenMP] [MLIR] Add lowering support for OMP ALLOCATE directives and its clauses (PR #187167)

Raghu Maddhipatla llvmlistbot at llvm.org
Fri Apr 10 16:01:13 PDT 2026


https://github.com/raghavendhra updated https://github.com/llvm/llvm-project/pull/187167

>From f254c678b6d2d421eb37c4df109ed6383457457c Mon Sep 17 00:00:00 2001
From: Raghu Maddhipatla <Raghu.Maddhipatla at amd.com>
Date: Tue, 17 Mar 2026 19:32:38 -0500
Subject: [PATCH 1/6] [Flang] [OpenMP] [MLIR] Add lowering support for OMP
 ALLOCATE directive and its clauses.

---
 flang/lib/Lower/OpenMP/ClauseProcessor.cpp    |  19 ++++
 flang/lib/Lower/OpenMP/ClauseProcessor.h      |   2 +
 flang/lib/Lower/OpenMP/OpenMP.cpp             |  40 ++++++-
 .../Todo/omp-declarative-allocate-align.f90   |  10 --
 .../OpenMP/Todo/omp-declarative-allocate.f90  |  10 --
 .../OpenMP/omp-declarative-allocate-align.f90 |  47 ++++++++
 .../Lower/OpenMP/omp-declarative-allocate.f90 |  19 ++++
 .../llvm/Frontend/OpenMP/OMPIRBuilder.h       |  15 ++-
 llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp     |  17 +++
 .../mlir/Dialect/OpenMP/OpenMPClauses.td      |   4 +-
 .../mlir/Target/LLVMIR/ModuleTranslation.h    |  13 +++
 .../OpenMP/OpenMPToLLVMIRTranslation.cpp      | 107 ++++++++++++++++++
 mlir/lib/Target/LLVMIR/ModuleTranslation.cpp  |  28 +++++
 mlir/test/Dialect/OpenMP/ops.mlir             |  16 +--
 14 files changed, 314 insertions(+), 33 deletions(-)
 delete mode 100644 flang/test/Lower/OpenMP/Todo/omp-declarative-allocate-align.f90
 delete mode 100644 flang/test/Lower/OpenMP/Todo/omp-declarative-allocate.f90
 create mode 100644 flang/test/Lower/OpenMP/omp-declarative-allocate-align.f90
 create mode 100644 flang/test/Lower/OpenMP/omp-declarative-allocate.f90

diff --git a/flang/lib/Lower/OpenMP/ClauseProcessor.cpp b/flang/lib/Lower/OpenMP/ClauseProcessor.cpp
index 45b11c818245e..47cdaf1829913 100644
--- a/flang/lib/Lower/OpenMP/ClauseProcessor.cpp
+++ b/flang/lib/Lower/OpenMP/ClauseProcessor.cpp
@@ -325,6 +325,25 @@ static void collectIteratorIVs(
 // ClauseProcessor unique clauses
 //===----------------------------------------------------------------------===//
 
+bool ClauseProcessor::processAlign(
+    mlir::omp::AlignClauseOps &result) const {
+  if (auto *clause = findUniqueClause<omp::clause::Align>()) {
+    fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
+    const std::optional<std::int64_t> align = evaluate::ToInt64(clause->v);
+    result.align = firOpBuilder.getI64IntegerAttr(*align);
+    return true;
+  }
+  return false;
+}
+
+bool ClauseProcessor::processAllocator(lower::StatementContext &stmtCtx, mlir::omp::AllocatorClauseOps &result) const {
+  if (auto *clause = findUniqueClause<omp::clause::Allocator>()) {
+    result.allocator = fir::getBase(converter.genExprValue(clause->v, stmtCtx));
+    return true;
+  }
+  return false;
+}
+
 bool ClauseProcessor::processBare(mlir::omp::BareClauseOps &result) const {
   return markClauseOccurrence<omp::clause::OmpxBare>(result.bare);
 }
diff --git a/flang/lib/Lower/OpenMP/ClauseProcessor.h b/flang/lib/Lower/OpenMP/ClauseProcessor.h
index f343ee8ff4332..33323036cdc3a 100644
--- a/flang/lib/Lower/OpenMP/ClauseProcessor.h
+++ b/flang/lib/Lower/OpenMP/ClauseProcessor.h
@@ -57,6 +57,8 @@ class ClauseProcessor {
       : converter(converter), semaCtx(semaCtx), clauses(clauses) {}
 
   // 'Unique' clauses: They can appear at most once in the clause list.
+  bool processAlign(mlir::omp::AlignClauseOps &result) const;
+  bool processAllocator(lower::StatementContext &stmtCtx, mlir::omp::AllocatorClauseOps &result) const;
   bool processBare(mlir::omp::BareClauseOps &result) const;
   bool processBind(mlir::omp::BindClauseOps &result) const;
   bool processCancelDirectiveName(
diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp
index 33de565eda275..cce210b54dceb 100644
--- a/flang/lib/Lower/OpenMP/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP/OpenMP.cpp
@@ -1507,6 +1507,21 @@ static OpTy genWrapperOp(lower::AbstractConverter &converter,
 // Code generation functions for clauses
 //===----------------------------------------------------------------------===//
 
+static void genAllocateClauses(lower::AbstractConverter &converter,
+                            semantics::SemanticsContext &semaCtx,
+                            lower::StatementContext &stmtCtx,
+                            const ObjectList &objects,
+                            const List<Clause> &clauses, mlir::Location loc,
+                            llvm::SmallVectorImpl<mlir::Value> &operandRange,
+                            mlir::omp::AllocateDirOperands &clauseOps) {
+  if (!objects.empty())
+    genObjectList(objects, converter, operandRange);
+
+  ClauseProcessor cp(converter, semaCtx, clauses);
+  cp.processAlign(clauseOps);
+  cp.processAllocator(stmtCtx, clauseOps);
+}
+
 static void genCancelClauses(lower::AbstractConverter &converter,
                              semantics::SemanticsContext &semaCtx,
                              const List<Clause> &clauses, mlir::Location loc,
@@ -1927,6 +1942,17 @@ static void genWsloopClauses(
 //===----------------------------------------------------------------------===//
 // Code generation functions for leaf constructs
 //===----------------------------------------------------------------------===//
+static mlir::omp::AllocateDirOp
+genAllocateDirOp(lower::AbstractConverter &converter,
+           semantics::SemanticsContext &semaCtx, lower::StatementContext &stmtCtx, lower::pft::Evaluation &eval,
+           mlir::Location loc, const ObjectList &objects,  const ConstructQueue &queue, ConstructQueue::const_iterator item) {
+  llvm::SmallVector<mlir::Value> operandRange;
+  mlir::omp::AllocateDirOperands clauseOps;
+  genAllocateClauses(converter, semaCtx, stmtCtx, objects, item->clauses, loc,
+                  operandRange, clauseOps);
+
+  return mlir::omp::AllocateDirOp::create(converter.getFirOpBuilder(), loc, operandRange, clauseOps.align, clauseOps.allocator);
+}
 
 static mlir::omp::BarrierOp
 genBarrierOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
@@ -3841,8 +3867,18 @@ static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable,
                    semantics::SemanticsContext &semaCtx,
                    lower::pft::Evaluation &eval,
                    const parser::OmpAllocateDirective &allocate) {
-  if (!semaCtx.langOptions().OpenMPSimd)
-    TODO(converter.getCurrentLocation(), "OmpAllocateDirective");
+  lower::StatementContext stmtCtx;
+  ObjectList objects = makeObjects((allocate.BeginDir().Arguments()), semaCtx);
+  const auto &clauseList = (allocate.BeginDir().Clauses());
+  List<Clause> clauses = makeClauses(clauseList, semaCtx);
+  mlir::Location loc = converter.genLocation(allocate.source);
+
+  ConstructQueue queue{buildConstructQueue(
+      converter.getFirOpBuilder().getModule(), semaCtx, eval, allocate.source,
+      llvm::omp::Directive::OMPD_allocate, clauses)};
+
+  genAllocateDirOp(converter, semaCtx, stmtCtx, eval, loc, objects,
+             queue, queue.begin());
 }
 
 static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable,
diff --git a/flang/test/Lower/OpenMP/Todo/omp-declarative-allocate-align.f90 b/flang/test/Lower/OpenMP/Todo/omp-declarative-allocate-align.f90
deleted file mode 100644
index fec146ac70313..0000000000000
--- a/flang/test/Lower/OpenMP/Todo/omp-declarative-allocate-align.f90
+++ /dev/null
@@ -1,10 +0,0 @@
-! This test checks lowering of OpenMP allocate Directive with align clause.
-
-! RUN: not %flang_fc1 -emit-fir -fopenmp -fopenmp-version=51 %s 2>&1 | FileCheck %s
-
-program main
-  integer :: x
-
-  ! CHECK: not yet implemented: OmpAllocateDirective
-  !$omp allocate(x) align(32)
-end
diff --git a/flang/test/Lower/OpenMP/Todo/omp-declarative-allocate.f90 b/flang/test/Lower/OpenMP/Todo/omp-declarative-allocate.f90
deleted file mode 100644
index 7cae8051fda77..0000000000000
--- a/flang/test/Lower/OpenMP/Todo/omp-declarative-allocate.f90
+++ /dev/null
@@ -1,10 +0,0 @@
-! This test checks lowering of OpenMP allocate Directive.
-
-! RUN: not %flang_fc1 -emit-fir -fopenmp %s 2>&1 | FileCheck %s
-
-program main
-  integer :: x, y
-
-  ! CHECK: not yet implemented: OmpAllocateDirective
-  !$omp allocate(x, y)
-end
diff --git a/flang/test/Lower/OpenMP/omp-declarative-allocate-align.f90 b/flang/test/Lower/OpenMP/omp-declarative-allocate-align.f90
new file mode 100644
index 0000000000000..50c6ab1f64002
--- /dev/null
+++ b/flang/test/Lower/OpenMP/omp-declarative-allocate-align.f90
@@ -0,0 +1,47 @@
+! This test checks lowering of OpenMP allocate Directive with align and allocator
+! clauses to LLVM IR. Verifies code generation for:
+!   - align(16) only (null allocator)
+!   - allocator(omp_default_mem_alloc) only (no align)
+!   - align(64) allocator(omp_cgroup_mem_alloc) (both clauses, array variable)
+!   - align(32) allocator(3) (both clauses, multiple variables)
+
+! RUN: %flang_fc1 -emit-llvm %openmp_flags -fopenmp-version=51 %s -o - 2>&1 | FileCheck %s
+
+program main
+  use omp_lib
+  integer :: x, y
+  integer :: z(10)
+  character c
+  real(kind = 16) :: r
+  complex cmplx
+  !$omp allocate(x) align(16)
+  !$omp allocate(y) allocator(omp_default_mem_alloc)
+  !$omp allocate(z) align(64) allocator(omp_cgroup_mem_alloc)
+  !$omp allocate(c, r, cmplx) align(32) allocator(3)
+  x = 1
+  y = 2
+  z = x + y
+  print *, "z : ", z
+end program
+
+! CHECK: define void @_QQmain()
+! CHECK: call i32 @__kmpc_global_thread_num(
+
+! CHECK: call ptr @__kmpc_aligned_alloc(i32 {{.*}}, i64 16, i64 {{.*}}, ptr null)
+! CHECK: call ptr @__kmpc_alloc(i32 {{.*}}, i64 {{.*}}, ptr inttoptr (i64 1 to ptr))
+! CHECK: call ptr @__kmpc_aligned_alloc(i32 {{.*}}, i64 64, i64 {{.*}}, ptr inttoptr (i64 6 to ptr))
+! CHECK: call ptr @__kmpc_aligned_alloc(i32 {{.*}}, i64 32, i64 {{.*}}, ptr inttoptr (i32 3 to ptr))
+! CHECK: call ptr @__kmpc_aligned_alloc(i32 {{.*}}, i64 32, i64 {{.*}}, ptr inttoptr (i32 3 to ptr))
+! CHECK: call ptr @__kmpc_aligned_alloc(i32 {{.*}}, i64 32, i64 {{.*}}, ptr inttoptr (i32 3 to ptr))
+
+! CHECK: call void @__kmpc_free(i32 {{.*}}, ptr {{.*}}, ptr inttoptr (i32 3 to ptr))
+! CHECK: call void @__kmpc_free(i32 {{.*}}, ptr {{.*}}, ptr inttoptr (i32 3 to ptr))
+! CHECK: call void @__kmpc_free(i32 {{.*}}, ptr {{.*}}, ptr inttoptr (i32 3 to ptr))
+! CHECK: call void @__kmpc_free(i32 {{.*}}, ptr {{.*}}, ptr inttoptr (i64 6 to ptr))
+! CHECK: call void @__kmpc_free(i32 {{.*}}, ptr {{.*}}, ptr inttoptr (i64 1 to ptr))
+! CHECK: call void @__kmpc_free(i32 {{.*}}, ptr {{.*}}, ptr null)
+! CHECK: ret void
+
+! CHECK: declare noalias ptr @__kmpc_aligned_alloc(i32, i64, i64, ptr)
+! CHECK: declare noalias ptr @__kmpc_alloc(i32, i64, ptr)
+! CHECK: declare void @__kmpc_free(i32, ptr, ptr)
diff --git a/flang/test/Lower/OpenMP/omp-declarative-allocate.f90 b/flang/test/Lower/OpenMP/omp-declarative-allocate.f90
new file mode 100644
index 0000000000000..7c8047ebf7f53
--- /dev/null
+++ b/flang/test/Lower/OpenMP/omp-declarative-allocate.f90
@@ -0,0 +1,19 @@
+! This test checks lowering of OpenMP allocate Directive to LLVM IR.
+! Verifies code generation for default (no align, null allocator) case.
+
+! RUN: %flang_fc1 -emit-llvm -fopenmp %s -o - | FileCheck %s
+
+program main
+  integer :: x, y
+  !$omp allocate(x, y)
+end program
+
+! CHECK: define void @_QQmain()
+! CHECK: call i32 @__kmpc_global_thread_num(
+! CHECK: call ptr @__kmpc_alloc(i32 {{.*}}, i64 8, ptr null)
+! CHECK: call ptr @__kmpc_alloc(i32 {{.*}}, i64 8, ptr null)
+! CHECK: call void @__kmpc_free(i32 {{.*}}, ptr {{.*}}, ptr null)
+! CHECK: call void @__kmpc_free(i32 {{.*}}, ptr {{.*}}, ptr null)
+! CHECK: ret void
+! CHECK: declare noalias ptr @__kmpc_alloc(i32, i64, ptr)
+! CHECK: declare void @__kmpc_free(i32, ptr, ptr)
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
index 383fd9d94661a..fdf1e1f0b18ed 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
@@ -3180,7 +3180,7 @@ class OpenMPIRBuilder {
                                                   llvm::IntegerType *IntPtrTy,
                                                   bool BranchtoEnd = true);
 
-  /// Create a runtime call for kmpc_Alloc
+  /// Create a runtime call for kmpc_alloc
   ///
   /// \param Loc The insert and source location description.
   /// \param Size Size of allocated memory space
@@ -3191,6 +3191,19 @@ class OpenMPIRBuilder {
   LLVM_ABI CallInst *createOMPAlloc(const LocationDescription &Loc, Value *Size,
                                     Value *Allocator, std::string Name = "");
 
+  /// Create a runtime call for kmpc_align_alloc
+  ///
+  /// \param Loc The insert and source location description.
+  /// \param Align Align value
+  /// \param Size Size of allocated memory space
+  /// \param Allocator Allocator information instruction
+  /// \param Name Name of call Instruction for OMP_Align_Alloc
+  ///
+  /// \returns CallInst to the OMP_Align_Alloc call
+  LLVM_ABI CallInst *createOMPAlignedAlloc(const LocationDescription &Loc,
+                                    Value *Align, Value *Size, Value *Allocator,
+                                    std::string Name = "");
+
   /// Create a runtime call for kmpc_free
   ///
   /// \param Loc The insert and source location description.
diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index 6a3cbde33e785..eecfc3c2ba251 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -7641,6 +7641,23 @@ CallInst *OpenMPIRBuilder::createOMPAlloc(const LocationDescription &Loc,
   return createRuntimeFunctionCall(Fn, Args, Name);
 }
 
+CallInst *OpenMPIRBuilder::createOMPAlignedAlloc(const LocationDescription &Loc,
+                                          Value *Align, Value *Size, Value *Allocator,
+                                          std::string Name) {
+  IRBuilder<>::InsertPointGuard IPG(Builder);
+  updateToLocation(Loc);
+
+  uint32_t SrcLocStrSize;
+  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
+  Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
+  Value *ThreadId = getOrCreateThreadID(Ident);
+  Value *Args[] = {ThreadId, Align, Size, Allocator};
+
+  Function *Fn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_aligned_alloc);
+
+  return Builder.CreateCall(Fn, Args, Name);
+}
+
 CallInst *OpenMPIRBuilder::createOMPFree(const LocationDescription &Loc,
                                          Value *Addr, Value *Allocator,
                                          std::string Name) {
diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPClauses.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPClauses.td
index f24efd0d4fc42..13a1fc3bd08bc 100644
--- a/mlir/include/mlir/Dialect/OpenMP/OpenMPClauses.td
+++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPClauses.td
@@ -146,11 +146,11 @@ class OpenMP_AllocatorClauseSkip<
                     extraClassDeclaration> {
 
   let arguments = (ins
-    Optional<I64>:$allocator
+    Optional<AnyInteger>:$allocator
   );
 
   let optAssemblyFormat = [{
-    `allocator` `(` $allocator `)`
+    `allocator` `(` $allocator `:` type($allocator) `)`
   }];
 
   let description = [{
diff --git a/mlir/include/mlir/Target/LLVMIR/ModuleTranslation.h b/mlir/include/mlir/Target/LLVMIR/ModuleTranslation.h
index c67bb57985bd0..f073081002719 100644
--- a/mlir/include/mlir/Target/LLVMIR/ModuleTranslation.h
+++ b/mlir/include/mlir/Target/LLVMIR/ModuleTranslation.h
@@ -272,6 +272,11 @@ class ModuleTranslation {
   /// constructed.
   llvm::OpenMPIRBuilder *getOpenMPBuilder();
 
+  /// Registers a pending __kmpc_free call for the given block. These are
+  /// emitted before the block's terminator during block conversion.
+  void registerPendingOmpAllocateFree(Block *block, llvm::Value *ptr,
+                                      llvm::Value *allocator);
+
   /// Returns the LLVM module in which the IR is being constructed.
   llvm::Module *getLLVMModule() { return llvmModule.get(); }
 
@@ -401,6 +406,9 @@ class ModuleTranslation {
                                  llvm::IRBuilderBase &builder,
                                  bool recordInsertions);
 
+  /// Emits pending __kmpc_free calls for the block, before its terminator.
+  void emitPendingOmpAllocateFrees(Block &bb, llvm::IRBuilderBase &builder);
+
   /// Returns the LLVM metadata corresponding to the given mlir LLVM dialect
   /// TBAATagAttr.
   llvm::MDNode *getTBAANode(TBAATagAttr tbaaAttr) const;
@@ -509,6 +517,11 @@ class ModuleTranslation {
   /// block.
   DenseMap<BlockAddressAttr, llvm::BasicBlock *> blockAddressToLLVMMapping;
 
+  /// Pending __kmpc_free calls per block, emitted before the terminator.
+  DenseMap<Block *,
+           llvm::SmallVector<std::pair<llvm::Value *, llvm::Value *>>>
+      pendingOmpAllocateFrees;
+
   /// Stack of user-specified state elements, useful when translating operations
   /// with regions.
   StateStack stack;
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index 2e15f4de4545d..663d8274bd6fe 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -4632,6 +4632,22 @@ static Operation *getGlobalOpFromValue(Value value) {
   return nullptr;
 }
 
+static Value getBaseValueForTypeLookup(Value value) {
+  while (Operation *op = value.getDefiningOp()) {
+    if (auto addrCast = dyn_cast_if_present<LLVM::AddrSpaceCastOp>(op))
+      value = addrCast.getOperand();
+    else if (op->getName().getIdentifier()) {
+      if (op->getNumOperands() > 0)
+        value = op->getOperand(0);
+      else
+        break;
+    } else {
+      break;
+    }
+  }
+  return value;
+}
+
 static llvm::SmallString<64>
 getDeclareTargetRefPtrSuffix(LLVM::GlobalOp globalOp,
                              llvm::OpenMPIRBuilder &ompBuilder) {
@@ -7474,6 +7490,94 @@ convertTargetAllocMemOp(Operation &opInst, llvm::IRBuilderBase &builder,
   return success();
 }
 
+static LogicalResult
+convertAllocateDirOp(Operation &opInst, llvm::IRBuilderBase &builder,
+                    LLVM::ModuleTranslation &moduleTranslation) {
+  auto allocateDirOp = cast<omp::AllocateDirOp>(opInst);
+  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
+
+  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
+  llvm::Module *llvmModule = moduleTranslation.getLLVMModule();
+  llvm::DataLayout dataLayout = llvmModule->getDataLayout();
+  SmallVector<Value> vars = allocateDirOp.getVarList();
+  std::optional<int64_t> alignAttr = allocateDirOp.getAlign();
+
+  llvm::Value *allocator;
+  if (auto allocatorVar = allocateDirOp.getAllocator()) {
+    allocator = moduleTranslation.lookupValue(allocatorVar);
+    if (allocator->getType()->isIntegerTy())
+      allocator = builder.CreateIntToPtr(allocator, builder.getPtrTy());
+    else if (allocator->getType()->isPointerTy())
+      allocator =
+          builder.CreatePointerBitCastOrAddrSpaceCast(allocator, builder.getPtrTy());
+  } else {
+    allocator = llvm::ConstantPointerNull::get(builder.getPtrTy());
+  }
+
+  SmallVector<std::pair<llvm::CallInst *, llvm::Value *>> allocatedVars;
+
+  for (Value var : vars) {
+    llvm::Type *llvmVarTy = moduleTranslation.convertType(var.getType());
+
+    // Opaque pointers lose element type. Trace to GlobalOp for type
+    // Falls back to llvmVarTy when not from a global.
+    llvm::Type *typeToInspect = llvmVarTy;
+    if (llvmVarTy->isPointerTy()) {
+      Value baseVar = getBaseValueForTypeLookup(var);
+      if (Operation *globalOp = getGlobalOpFromValue(baseVar)) {
+        if (auto gop = dyn_cast<LLVM::GlobalOp>(globalOp))
+          typeToInspect =
+              moduleTranslation.convertType(gop.getGlobalType());
+      }
+    }
+
+    llvm::Value *size;
+    if (auto arrTy = llvm::dyn_cast<llvm::ArrayType>(typeToInspect)) {
+      llvm::Value *elementCount = builder.getInt64(1);
+      llvm::Type *currentType = arrTy;
+      while (auto nestedArrTy = llvm::dyn_cast<llvm::ArrayType>(currentType)) {
+        elementCount = builder.CreateMul(
+            elementCount, builder.getInt64(nestedArrTy->getNumElements()));
+        currentType = nestedArrTy->getElementType();
+      }
+      uint64_t elemSizeInBits = dataLayout.getTypeSizeInBits(currentType);
+      size = builder.CreateMul(elementCount,
+                              builder.getInt64(elemSizeInBits / 8));
+    } else {
+      size = builder.getInt64(
+          dataLayout.getTypeStoreSize(typeToInspect).getFixedValue());
+    }
+
+    uint64_t alignValue =
+        alignAttr ? alignAttr.value()
+                  : dataLayout.getABITypeAlign(typeToInspect).value();
+    llvm::Value *alignConst = builder.getInt64(alignValue);
+    // Align the size: ((size + align - 1) / align) * align
+    size = builder.CreateAdd(size, builder.getInt64(alignValue - 1), "", true);
+    size = builder.CreateUDiv(size, alignConst);
+    size = builder.CreateMul(size, alignConst, "", true);
+
+    std::string allocName =
+        ompBuilder->createPlatformSpecificName({".void.addr"});
+    llvm::CallInst *allocCall;
+    if (alignAttr.has_value()) {
+      allocCall = ompBuilder->createOMPAlignedAlloc(
+          ompLoc, builder.getInt64(alignAttr.value()), size, allocator, allocName);
+    } else {
+      allocCall = ompBuilder->createOMPAlloc(ompLoc, size, allocator, allocName);
+    }
+    allocatedVars.push_back({allocCall, allocator});
+  }
+
+  // Register __kmpc_free calls to be emitted before the block terminator.
+  Block *block = allocateDirOp->getBlock();
+  for (auto &alloc : allocatedVars)
+    moduleTranslation.registerPendingOmpAllocateFree(block, alloc.first,
+                                                    alloc.second);
+
+  return success();
+}
+
 static llvm::Function *getOmpTargetFree(llvm::IRBuilderBase &builder,
                                         llvm::Module *llvmModule) {
   llvm::Type *ptrTy = builder.getPtrTy(0);
@@ -7719,6 +7823,9 @@ LogicalResult OpenMPDialectLLVMIRTranslationInterface::convertOperation(
           .Case([&](omp::TargetFreeMemOp) {
             return convertTargetFreeMemOp(*op, builder, moduleTranslation);
           })
+          .Case([&](omp::AllocateDirOp) {
+            return convertAllocateDirOp(*op, builder, moduleTranslation);
+          })
           .Default([&](Operation *inst) {
             return inst->emitError()
                    << "not yet implemented: " << inst->getName();
diff --git a/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp b/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp
index cf398f151ed0b..2f0345b67ea6b 100644
--- a/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp
@@ -1009,6 +1009,30 @@ LogicalResult ModuleTranslation::convertOperation(Operation &op,
   return convertDialectAttributes(&op, scope.getCapturedInstructions());
 }
 
+void ModuleTranslation::registerPendingOmpAllocateFree(Block *block,
+                                                       llvm::Value *ptr,
+                                                       llvm::Value *allocator) {
+  pendingOmpAllocateFrees[block].push_back({ptr, allocator});
+}
+
+void ModuleTranslation::emitPendingOmpAllocateFrees(
+    Block &bb, llvm::IRBuilderBase &builder) {
+  auto it = pendingOmpAllocateFrees.find(&bb);
+  if (it == pendingOmpAllocateFrees.end() || it->second.empty())
+    return;
+  llvm::OpenMPIRBuilder *ompBuilder = getOpenMPBuilder();
+  llvm::BasicBlock *llvmBB = lookupBlock(&bb);
+  llvm::Instruction *term = llvmBB->getTerminator();
+  if (term)
+    builder.SetInsertPoint(term);
+  else
+    builder.SetInsertPoint(llvmBB);
+  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
+  for (auto it2 = it->second.rbegin(); it2 != it->second.rend(); ++it2)
+    ompBuilder->createOMPFree(ompLoc, it2->first, it2->second, "");
+  pendingOmpAllocateFrees.erase(it);
+}
+
 /// Convert block to LLVM IR.  Unless `ignoreArguments` is set, emit PHI nodes
 /// to define values corresponding to the MLIR block arguments.  These nodes
 /// are not connected to the source basic blocks, which may not exist yet.  Uses
@@ -1048,6 +1072,10 @@ LogicalResult ModuleTranslation::convertBlockImpl(Block &bb,
 
   // Traverse operations.
   for (auto &op : bb) {
+    // Emit pending OpenMP allocate frees before the terminator.
+    if (op.hasTrait<OpTrait::IsTerminator>())
+      emitPendingOmpAllocateFrees(bb, builder);
+
     // Set the current debug location within the builder.
     builder.SetCurrentDebugLocation(
         debugTranslation->translateLoc(op.getLoc(), subprogram));
diff --git a/mlir/test/Dialect/OpenMP/ops.mlir b/mlir/test/Dialect/OpenMP/ops.mlir
index 869f163cb4014..3d1133f4ba6e9 100644
--- a/mlir/test/Dialect/OpenMP/ops.mlir
+++ b/mlir/test/Dialect/OpenMP/ops.mlir
@@ -3466,27 +3466,27 @@ func.func @omp_allocate_dir(%arg0 : memref<i32>, %arg1 : memref<i32>) -> () {
   // Test with one data var and allocator clause
   // CHECK: %[[VAL_1:.*]] = arith.constant 1 : i64
   %omp_default_mem_alloc = arith.constant 1 : i64
-  // CHECK: omp.allocate_dir(%[[ARG0]] : memref<i32>) allocator(%[[VAL_1:.*]])
-  omp.allocate_dir (%arg0 : memref<i32>) allocator(%omp_default_mem_alloc)
+  // CHECK: omp.allocate_dir(%[[ARG0]] : memref<i32>) allocator(%[[VAL_1:.*]]  : i64)
+  omp.allocate_dir (%arg0 : memref<i32>) allocator(%omp_default_mem_alloc : i64)
 
   // Test with one data var, align clause and allocator clause
   // CHECK: %[[VAL_2:.*]] = arith.constant 7 : i64
   %omp_pteam_mem_alloc = arith.constant 7 : i64
-  // CHECK: omp.allocate_dir(%[[ARG0]] : memref<i32>)  align(4) allocator(%[[VAL_2:.*]])
-  omp.allocate_dir (%arg0 : memref<i32>)  align(4) allocator(%omp_pteam_mem_alloc)
+  // CHECK: omp.allocate_dir(%[[ARG0]] : memref<i32>)  align(4) allocator(%[[VAL_2:.*]]  : i64)
+  omp.allocate_dir (%arg0 : memref<i32>)  align(4) allocator(%omp_pteam_mem_alloc  : i64)
 
   // Test with two data vars, align clause and allocator clause
   // CHECK: %[[VAL_3:.*]] = arith.constant 6 : i64
   %omp_cgroup_mem_alloc = arith.constant 6 : i64
-  // CHECK: omp.allocate_dir(%[[ARG0]], %[[ARG1]] : memref<i32>, memref<i32>) align(8) allocator(%[[VAL_3:.*]])
-  omp.allocate_dir (%arg0, %arg1 : memref<i32>, memref<i32>) align(8) allocator(%omp_cgroup_mem_alloc)
+  // CHECK: omp.allocate_dir(%[[ARG0]], %[[ARG1]] : memref<i32>, memref<i32>) align(8) allocator(%[[VAL_3:.*]] : i64)
+  omp.allocate_dir (%arg0, %arg1 : memref<i32>, memref<i32>) align(8) allocator(%omp_cgroup_mem_alloc : i64)
 
   // Test with one data var and user defined allocator clause
   // CHECK: %[[VAL_4:.*]] = arith.constant 9 : i64
   %custom_allocator = arith.constant 9 : i64
   %custom_mem_alloc = func.call @omp_init_allocator(%custom_allocator) : (i64) -> (i64)
-  // CHECK: omp.allocate_dir(%[[ARG0]] : memref<i32>) allocator(%[[VAL_5:.*]])
-  omp.allocate_dir (%arg0 : memref<i32>) allocator(%custom_mem_alloc)
+  // CHECK: omp.allocate_dir(%[[ARG0]] : memref<i32>) allocator(%[[VAL_5:.*]] : i64)
+  omp.allocate_dir (%arg0 : memref<i32>) allocator(%custom_mem_alloc : i64)
 
   return
 }

>From 07293d33aa3e5d5422ba9a41fd167a017c09db37 Mon Sep 17 00:00:00 2001
From: Raghu Maddhipatla <Raghu.Maddhipatla at amd.com>
Date: Tue, 17 Mar 2026 21:28:49 -0500
Subject: [PATCH 2/6] Fix clang-formatting

---
 flang/lib/Lower/OpenMP/ClauseProcessor.cpp    |  7 +++--
 flang/lib/Lower/OpenMP/ClauseProcessor.h      |  3 +-
 flang/lib/Lower/OpenMP/OpenMP.cpp             | 31 ++++++++++---------
 .../llvm/Frontend/OpenMP/OMPIRBuilder.h       |  5 +--
 llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp     |  5 +--
 .../mlir/Target/LLVMIR/ModuleTranslation.h    |  3 +-
 .../OpenMP/OpenMPToLLVMIRTranslation.cpp      | 21 +++++++------
 7 files changed, 41 insertions(+), 34 deletions(-)

diff --git a/flang/lib/Lower/OpenMP/ClauseProcessor.cpp b/flang/lib/Lower/OpenMP/ClauseProcessor.cpp
index 47cdaf1829913..7d2fe869322f3 100644
--- a/flang/lib/Lower/OpenMP/ClauseProcessor.cpp
+++ b/flang/lib/Lower/OpenMP/ClauseProcessor.cpp
@@ -325,8 +325,7 @@ static void collectIteratorIVs(
 // ClauseProcessor unique clauses
 //===----------------------------------------------------------------------===//
 
-bool ClauseProcessor::processAlign(
-    mlir::omp::AlignClauseOps &result) const {
+bool ClauseProcessor::processAlign(mlir::omp::AlignClauseOps &result) const {
   if (auto *clause = findUniqueClause<omp::clause::Align>()) {
     fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
     const std::optional<std::int64_t> align = evaluate::ToInt64(clause->v);
@@ -336,7 +335,9 @@ bool ClauseProcessor::processAlign(
   return false;
 }
 
-bool ClauseProcessor::processAllocator(lower::StatementContext &stmtCtx, mlir::omp::AllocatorClauseOps &result) const {
+bool ClauseProcessor::processAllocator(
+    lower::StatementContext &stmtCtx,
+    mlir::omp::AllocatorClauseOps &result) const {
   if (auto *clause = findUniqueClause<omp::clause::Allocator>()) {
     result.allocator = fir::getBase(converter.genExprValue(clause->v, stmtCtx));
     return true;
diff --git a/flang/lib/Lower/OpenMP/ClauseProcessor.h b/flang/lib/Lower/OpenMP/ClauseProcessor.h
index 33323036cdc3a..29b5c29b8e33a 100644
--- a/flang/lib/Lower/OpenMP/ClauseProcessor.h
+++ b/flang/lib/Lower/OpenMP/ClauseProcessor.h
@@ -58,7 +58,8 @@ class ClauseProcessor {
 
   // 'Unique' clauses: They can appear at most once in the clause list.
   bool processAlign(mlir::omp::AlignClauseOps &result) const;
-  bool processAllocator(lower::StatementContext &stmtCtx, mlir::omp::AllocatorClauseOps &result) const;
+  bool processAllocator(lower::StatementContext &stmtCtx,
+                        mlir::omp::AllocatorClauseOps &result) const;
   bool processBare(mlir::omp::BareClauseOps &result) const;
   bool processBind(mlir::omp::BindClauseOps &result) const;
   bool processCancelDirectiveName(
diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp
index cce210b54dceb..3cc343925d8fa 100644
--- a/flang/lib/Lower/OpenMP/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP/OpenMP.cpp
@@ -1508,12 +1508,12 @@ static OpTy genWrapperOp(lower::AbstractConverter &converter,
 //===----------------------------------------------------------------------===//
 
 static void genAllocateClauses(lower::AbstractConverter &converter,
-                            semantics::SemanticsContext &semaCtx,
-                            lower::StatementContext &stmtCtx,
-                            const ObjectList &objects,
-                            const List<Clause> &clauses, mlir::Location loc,
-                            llvm::SmallVectorImpl<mlir::Value> &operandRange,
-                            mlir::omp::AllocateDirOperands &clauseOps) {
+                               semantics::SemanticsContext &semaCtx,
+                               lower::StatementContext &stmtCtx,
+                               const ObjectList &objects,
+                               const List<Clause> &clauses, mlir::Location loc,
+                               llvm::SmallVectorImpl<mlir::Value> &operandRange,
+                               mlir::omp::AllocateDirOperands &clauseOps) {
   if (!objects.empty())
     genObjectList(objects, converter, operandRange);
 
@@ -1942,16 +1942,19 @@ static void genWsloopClauses(
 //===----------------------------------------------------------------------===//
 // Code generation functions for leaf constructs
 //===----------------------------------------------------------------------===//
-static mlir::omp::AllocateDirOp
-genAllocateDirOp(lower::AbstractConverter &converter,
-           semantics::SemanticsContext &semaCtx, lower::StatementContext &stmtCtx, lower::pft::Evaluation &eval,
-           mlir::Location loc, const ObjectList &objects,  const ConstructQueue &queue, ConstructQueue::const_iterator item) {
+static mlir::omp::AllocateDirOp genAllocateDirOp(
+    lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx,
+    lower::StatementContext &stmtCtx, lower::pft::Evaluation &eval,
+    mlir::Location loc, const ObjectList &objects, const ConstructQueue &queue,
+    ConstructQueue::const_iterator item) {
   llvm::SmallVector<mlir::Value> operandRange;
   mlir::omp::AllocateDirOperands clauseOps;
   genAllocateClauses(converter, semaCtx, stmtCtx, objects, item->clauses, loc,
-                  operandRange, clauseOps);
+                     operandRange, clauseOps);
 
-  return mlir::omp::AllocateDirOp::create(converter.getFirOpBuilder(), loc, operandRange, clauseOps.align, clauseOps.allocator);
+  return mlir::omp::AllocateDirOp::create(converter.getFirOpBuilder(), loc,
+                                          operandRange, clauseOps.align,
+                                          clauseOps.allocator);
 }
 
 static mlir::omp::BarrierOp
@@ -3877,8 +3880,8 @@ static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable,
       converter.getFirOpBuilder().getModule(), semaCtx, eval, allocate.source,
       llvm::omp::Directive::OMPD_allocate, clauses)};
 
-  genAllocateDirOp(converter, semaCtx, stmtCtx, eval, loc, objects,
-             queue, queue.begin());
+  genAllocateDirOp(converter, semaCtx, stmtCtx, eval, loc, objects, queue,
+                   queue.begin());
 }
 
 static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable,
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
index fdf1e1f0b18ed..7c78f7a1d8f44 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
@@ -3201,8 +3201,9 @@ class OpenMPIRBuilder {
   ///
   /// \returns CallInst to the OMP_Align_Alloc call
   LLVM_ABI CallInst *createOMPAlignedAlloc(const LocationDescription &Loc,
-                                    Value *Align, Value *Size, Value *Allocator,
-                                    std::string Name = "");
+                                           Value *Align, Value *Size,
+                                           Value *Allocator,
+                                           std::string Name = "");
 
   /// Create a runtime call for kmpc_free
   ///
diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index eecfc3c2ba251..edd181f7eea2e 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -7642,8 +7642,9 @@ CallInst *OpenMPIRBuilder::createOMPAlloc(const LocationDescription &Loc,
 }
 
 CallInst *OpenMPIRBuilder::createOMPAlignedAlloc(const LocationDescription &Loc,
-                                          Value *Align, Value *Size, Value *Allocator,
-                                          std::string Name) {
+                                                 Value *Align, Value *Size,
+                                                 Value *Allocator,
+                                                 std::string Name) {
   IRBuilder<>::InsertPointGuard IPG(Builder);
   updateToLocation(Loc);
 
diff --git a/mlir/include/mlir/Target/LLVMIR/ModuleTranslation.h b/mlir/include/mlir/Target/LLVMIR/ModuleTranslation.h
index f073081002719..243cca8831e37 100644
--- a/mlir/include/mlir/Target/LLVMIR/ModuleTranslation.h
+++ b/mlir/include/mlir/Target/LLVMIR/ModuleTranslation.h
@@ -518,8 +518,7 @@ class ModuleTranslation {
   DenseMap<BlockAddressAttr, llvm::BasicBlock *> blockAddressToLLVMMapping;
 
   /// Pending __kmpc_free calls per block, emitted before the terminator.
-  DenseMap<Block *,
-           llvm::SmallVector<std::pair<llvm::Value *, llvm::Value *>>>
+  DenseMap<Block *, llvm::SmallVector<std::pair<llvm::Value *, llvm::Value *>>>
       pendingOmpAllocateFrees;
 
   /// Stack of user-specified state elements, useful when translating operations
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index 663d8274bd6fe..5f5b1150f9588 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -7492,7 +7492,7 @@ convertTargetAllocMemOp(Operation &opInst, llvm::IRBuilderBase &builder,
 
 static LogicalResult
 convertAllocateDirOp(Operation &opInst, llvm::IRBuilderBase &builder,
-                    LLVM::ModuleTranslation &moduleTranslation) {
+                     LLVM::ModuleTranslation &moduleTranslation) {
   auto allocateDirOp = cast<omp::AllocateDirOp>(opInst);
   llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
 
@@ -7508,8 +7508,8 @@ convertAllocateDirOp(Operation &opInst, llvm::IRBuilderBase &builder,
     if (allocator->getType()->isIntegerTy())
       allocator = builder.CreateIntToPtr(allocator, builder.getPtrTy());
     else if (allocator->getType()->isPointerTy())
-      allocator =
-          builder.CreatePointerBitCastOrAddrSpaceCast(allocator, builder.getPtrTy());
+      allocator = builder.CreatePointerBitCastOrAddrSpaceCast(
+          allocator, builder.getPtrTy());
   } else {
     allocator = llvm::ConstantPointerNull::get(builder.getPtrTy());
   }
@@ -7526,8 +7526,7 @@ convertAllocateDirOp(Operation &opInst, llvm::IRBuilderBase &builder,
       Value baseVar = getBaseValueForTypeLookup(var);
       if (Operation *globalOp = getGlobalOpFromValue(baseVar)) {
         if (auto gop = dyn_cast<LLVM::GlobalOp>(globalOp))
-          typeToInspect =
-              moduleTranslation.convertType(gop.getGlobalType());
+          typeToInspect = moduleTranslation.convertType(gop.getGlobalType());
       }
     }
 
@@ -7541,8 +7540,8 @@ convertAllocateDirOp(Operation &opInst, llvm::IRBuilderBase &builder,
         currentType = nestedArrTy->getElementType();
       }
       uint64_t elemSizeInBits = dataLayout.getTypeSizeInBits(currentType);
-      size = builder.CreateMul(elementCount,
-                              builder.getInt64(elemSizeInBits / 8));
+      size =
+          builder.CreateMul(elementCount, builder.getInt64(elemSizeInBits / 8));
     } else {
       size = builder.getInt64(
           dataLayout.getTypeStoreSize(typeToInspect).getFixedValue());
@@ -7562,9 +7561,11 @@ convertAllocateDirOp(Operation &opInst, llvm::IRBuilderBase &builder,
     llvm::CallInst *allocCall;
     if (alignAttr.has_value()) {
       allocCall = ompBuilder->createOMPAlignedAlloc(
-          ompLoc, builder.getInt64(alignAttr.value()), size, allocator, allocName);
+          ompLoc, builder.getInt64(alignAttr.value()), size, allocator,
+          allocName);
     } else {
-      allocCall = ompBuilder->createOMPAlloc(ompLoc, size, allocator, allocName);
+      allocCall =
+          ompBuilder->createOMPAlloc(ompLoc, size, allocator, allocName);
     }
     allocatedVars.push_back({allocCall, allocator});
   }
@@ -7573,7 +7574,7 @@ convertAllocateDirOp(Operation &opInst, llvm::IRBuilderBase &builder,
   Block *block = allocateDirOp->getBlock();
   for (auto &alloc : allocatedVars)
     moduleTranslation.registerPendingOmpAllocateFree(block, alloc.first,
-                                                    alloc.second);
+                                                     alloc.second);
 
   return success();
 }

>From 18ea8e95e162186f99d5f8c56d1cdd3974a46431 Mon Sep 17 00:00:00 2001
From: Raghu Maddhipatla <Raghu.Maddhipatla at amd.com>
Date: Wed, 18 Mar 2026 00:14:58 -0500
Subject: [PATCH 3/6] Fix buildbot errors for the test program

---
 .../OpenMP/omp-declarative-allocate-align.f90 | 22 +++++++++----------
 1 file changed, 10 insertions(+), 12 deletions(-)

diff --git a/flang/test/Lower/OpenMP/omp-declarative-allocate-align.f90 b/flang/test/Lower/OpenMP/omp-declarative-allocate-align.f90
index 50c6ab1f64002..a131573ca5375 100644
--- a/flang/test/Lower/OpenMP/omp-declarative-allocate-align.f90
+++ b/flang/test/Lower/OpenMP/omp-declarative-allocate-align.f90
@@ -1,22 +1,21 @@
 ! This test checks lowering of OpenMP allocate Directive with align and allocator
 ! clauses to LLVM IR. Verifies code generation for:
 !   - align(16) only (null allocator)
-!   - allocator(omp_default_mem_alloc) only (no align)
-!   - align(64) allocator(omp_cgroup_mem_alloc) (both clauses, array variable)
+!   - allocator(1) only (no align)
+!   - align(64) allocator(6) (both clauses, array variable)
 !   - align(32) allocator(3) (both clauses, multiple variables)
 
 ! RUN: %flang_fc1 -emit-llvm %openmp_flags -fopenmp-version=51 %s -o - 2>&1 | FileCheck %s
 
 program main
-  use omp_lib
   integer :: x, y
   integer :: z(10)
   character c
-  real(kind = 16) :: r
-  complex cmplx
+  real :: r
+  complex :: cmplx
   !$omp allocate(x) align(16)
-  !$omp allocate(y) allocator(omp_default_mem_alloc)
-  !$omp allocate(z) align(64) allocator(omp_cgroup_mem_alloc)
+  !$omp allocate(y) allocator(1)
+  !$omp allocate(z) align(64) allocator(6)
   !$omp allocate(c, r, cmplx) align(32) allocator(3)
   x = 1
   y = 2
@@ -24,12 +23,11 @@ program main
   print *, "z : ", z
 end program
 
-! CHECK: define void @_QQmain()
 ! CHECK: call i32 @__kmpc_global_thread_num(
 
 ! CHECK: call ptr @__kmpc_aligned_alloc(i32 {{.*}}, i64 16, i64 {{.*}}, ptr null)
-! CHECK: call ptr @__kmpc_alloc(i32 {{.*}}, i64 {{.*}}, ptr inttoptr (i64 1 to ptr))
-! CHECK: call ptr @__kmpc_aligned_alloc(i32 {{.*}}, i64 64, i64 {{.*}}, ptr inttoptr (i64 6 to ptr))
+! CHECK: call ptr @__kmpc_alloc(i32 {{.*}}, i64 {{.*}}, ptr inttoptr (i32 1 to ptr))
+! CHECK: call ptr @__kmpc_aligned_alloc(i32 {{.*}}, i64 64, i64 {{.*}}, ptr inttoptr (i32 6 to ptr))
 ! CHECK: call ptr @__kmpc_aligned_alloc(i32 {{.*}}, i64 32, i64 {{.*}}, ptr inttoptr (i32 3 to ptr))
 ! CHECK: call ptr @__kmpc_aligned_alloc(i32 {{.*}}, i64 32, i64 {{.*}}, ptr inttoptr (i32 3 to ptr))
 ! CHECK: call ptr @__kmpc_aligned_alloc(i32 {{.*}}, i64 32, i64 {{.*}}, ptr inttoptr (i32 3 to ptr))
@@ -37,8 +35,8 @@ program main
 ! CHECK: call void @__kmpc_free(i32 {{.*}}, ptr {{.*}}, ptr inttoptr (i32 3 to ptr))
 ! CHECK: call void @__kmpc_free(i32 {{.*}}, ptr {{.*}}, ptr inttoptr (i32 3 to ptr))
 ! CHECK: call void @__kmpc_free(i32 {{.*}}, ptr {{.*}}, ptr inttoptr (i32 3 to ptr))
-! CHECK: call void @__kmpc_free(i32 {{.*}}, ptr {{.*}}, ptr inttoptr (i64 6 to ptr))
-! CHECK: call void @__kmpc_free(i32 {{.*}}, ptr {{.*}}, ptr inttoptr (i64 1 to ptr))
+! CHECK: call void @__kmpc_free(i32 {{.*}}, ptr {{.*}}, ptr inttoptr (i32 6 to ptr))
+! CHECK: call void @__kmpc_free(i32 {{.*}}, ptr {{.*}}, ptr inttoptr (i32 1 to ptr))
 ! CHECK: call void @__kmpc_free(i32 {{.*}}, ptr {{.*}}, ptr null)
 ! CHECK: ret void
 

>From f047c352d4c4615dc205f12a8719438652a0994a Mon Sep 17 00:00:00 2001
From: Raghu Maddhipatla <Raghu.Maddhipatla at amd.com>
Date: Tue, 31 Mar 2026 18:43:09 -0500
Subject: [PATCH 4/6] Address review comments. Moved implementation from
 ModuleTranslation.cpp to use LLVMTranslationInterface.h

---
 .../OpenMP/omp-declarative-allocate-align.f90 |  44 +++----
 .../Lower/OpenMP/omp-declarative-allocate.f90 |  18 ++-
 .../LLVMIR/LLVMTranslationDialectInterface.td |  10 ++
 .../Target/LLVMIR/LLVMTranslationInterface.h  |  11 ++
 .../mlir/Target/LLVMIR/ModuleTranslation.h    |  12 --
 .../OpenMP/OpenMPToLLVMIRTranslation.cpp      |  57 ++++++++-
 mlir/lib/Target/LLVMIR/ModuleTranslation.cpp  |  29 +----
 .../LLVMIR/openmp-allocate-directive.mlir     | 109 ++++++++++++++++++
 8 files changed, 213 insertions(+), 77 deletions(-)
 create mode 100644 mlir/test/Target/LLVMIR/openmp-allocate-directive.mlir

diff --git a/flang/test/Lower/OpenMP/omp-declarative-allocate-align.f90 b/flang/test/Lower/OpenMP/omp-declarative-allocate-align.f90
index a131573ca5375..0824d8bcb7e90 100644
--- a/flang/test/Lower/OpenMP/omp-declarative-allocate-align.f90
+++ b/flang/test/Lower/OpenMP/omp-declarative-allocate-align.f90
@@ -1,11 +1,11 @@
 ! This test checks lowering of OpenMP allocate Directive with align and allocator
-! clauses to LLVM IR. Verifies code generation for:
+! clauses to HLFIR. Verifies code generation for:
 !   - align(16) only (null allocator)
 !   - allocator(1) only (no align)
 !   - align(64) allocator(6) (both clauses, array variable)
 !   - align(32) allocator(3) (both clauses, multiple variables)
 
-! RUN: %flang_fc1 -emit-llvm %openmp_flags -fopenmp-version=51 %s -o - 2>&1 | FileCheck %s
+! RUN: %flang_fc1 -emit-hlfir %openmp_flags -fopenmp-version=51 %s -o - 2>&1 | FileCheck %s
 
 program main
   integer :: x, y
@@ -23,23 +23,23 @@ program main
   print *, "z : ", z
 end program
 
-! CHECK: call i32 @__kmpc_global_thread_num(
-
-! CHECK: call ptr @__kmpc_aligned_alloc(i32 {{.*}}, i64 16, i64 {{.*}}, ptr null)
-! CHECK: call ptr @__kmpc_alloc(i32 {{.*}}, i64 {{.*}}, ptr inttoptr (i32 1 to ptr))
-! CHECK: call ptr @__kmpc_aligned_alloc(i32 {{.*}}, i64 64, i64 {{.*}}, ptr inttoptr (i32 6 to ptr))
-! CHECK: call ptr @__kmpc_aligned_alloc(i32 {{.*}}, i64 32, i64 {{.*}}, ptr inttoptr (i32 3 to ptr))
-! CHECK: call ptr @__kmpc_aligned_alloc(i32 {{.*}}, i64 32, i64 {{.*}}, ptr inttoptr (i32 3 to ptr))
-! CHECK: call ptr @__kmpc_aligned_alloc(i32 {{.*}}, i64 32, i64 {{.*}}, ptr inttoptr (i32 3 to ptr))
-
-! CHECK: call void @__kmpc_free(i32 {{.*}}, ptr {{.*}}, ptr inttoptr (i32 3 to ptr))
-! CHECK: call void @__kmpc_free(i32 {{.*}}, ptr {{.*}}, ptr inttoptr (i32 3 to ptr))
-! CHECK: call void @__kmpc_free(i32 {{.*}}, ptr {{.*}}, ptr inttoptr (i32 3 to ptr))
-! CHECK: call void @__kmpc_free(i32 {{.*}}, ptr {{.*}}, ptr inttoptr (i32 6 to ptr))
-! CHECK: call void @__kmpc_free(i32 {{.*}}, ptr {{.*}}, ptr inttoptr (i32 1 to ptr))
-! CHECK: call void @__kmpc_free(i32 {{.*}}, ptr {{.*}}, ptr null)
-! CHECK: ret void
-
-! CHECK: declare noalias ptr @__kmpc_aligned_alloc(i32, i64, i64, ptr)
-! CHECK: declare noalias ptr @__kmpc_alloc(i32, i64, ptr)
-! CHECK: declare void @__kmpc_free(i32, ptr, ptr)
+! CHECK: %[[C1_IDX:.*]] = arith.constant 1 : index
+! CHECK: %[[C_ALLOC:.*]] = fir.alloca !fir.char<1> {bindc_name = "c", uniq_name = "_QFEc"}
+! CHECK: %[[C_DECL:.*]]:2 = hlfir.declare %[[C_ALLOC]] typeparams %[[C1_IDX]] {uniq_name = "_QFEc"} : (!fir.ref<!fir.char<1>>, index) -> (!fir.ref<!fir.char<1>>, !fir.ref<!fir.char<1>>)
+! CHECK: %[[CMPLX_ALLOC:.*]] = fir.alloca complex<f32> {bindc_name = "cmplx", uniq_name = "_QFEcmplx"}
+! CHECK: %[[CMPLX_DECL:.*]]:2 = hlfir.declare %[[CMPLX_ALLOC]] {uniq_name = "_QFEcmplx"} : (!fir.ref<complex<f32>>) -> (!fir.ref<complex<f32>>, !fir.ref<complex<f32>>)
+! CHECK: %[[R_ALLOC:.*]] = fir.alloca f32 {bindc_name = "r", uniq_name = "_QFEr"}
+! CHECK: %[[R_DECL:.*]]:2 = hlfir.declare %[[R_ALLOC]] {uniq_name = "_QFEr"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
+! CHECK: %[[X_ALLOC:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFEx"}
+! CHECK: %[[X_DECL:.*]]:2 = hlfir.declare %[[X_ALLOC]] {uniq_name = "_QFEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[Y_ALLOC:.*]] = fir.alloca i32 {bindc_name = "y", uniq_name = "_QFEy"}
+! CHECK: %[[Y_DECL:.*]]:2 = hlfir.declare %[[Y_ALLOC]] {uniq_name = "_QFEy"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[Z_REF:.*]] = fir.address_of(@_QFEz) : !fir.ref<!fir.array<10xi32>>
+! CHECK: %[[Z_DECL:.*]]:2 = hlfir.declare %[[Z_REF]]({{.*}}) {uniq_name = "_QFEz"} : (!fir.ref<!fir.array<10xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<10xi32>>, !fir.ref<!fir.array<10xi32>>)
+! CHECK: omp.allocate_dir(%[[X_DECL]]#0 : !fir.ref<i32>) align(16)
+! CHECK: %[[ALLOC1:.*]] = arith.constant 1 : i32
+! CHECK: omp.allocate_dir(%[[Y_DECL]]#0 : !fir.ref<i32>) allocator(%[[ALLOC1]] : i32)
+! CHECK: %[[ALLOC6:.*]] = arith.constant 6 : i32
+! CHECK: omp.allocate_dir(%[[Z_DECL]]#0 : !fir.ref<!fir.array<10xi32>>) align(64) allocator(%[[ALLOC6]] : i32)
+! CHECK: %[[ALLOC3:.*]] = arith.constant 3 : i32
+! CHECK: omp.allocate_dir(%[[C_DECL]]#0, %[[R_DECL]]#0, %[[CMPLX_DECL]]#0 : !fir.ref<!fir.char<1>>, !fir.ref<f32>, !fir.ref<complex<f32>>) align(32) allocator(%[[ALLOC3]] : i32)
diff --git a/flang/test/Lower/OpenMP/omp-declarative-allocate.f90 b/flang/test/Lower/OpenMP/omp-declarative-allocate.f90
index 7c8047ebf7f53..69da3f52b459f 100644
--- a/flang/test/Lower/OpenMP/omp-declarative-allocate.f90
+++ b/flang/test/Lower/OpenMP/omp-declarative-allocate.f90
@@ -1,19 +1,15 @@
-! This test checks lowering of OpenMP allocate Directive to LLVM IR.
+! This test checks lowering of OpenMP allocate Directive to HLFIR.
 ! Verifies code generation for default (no align, null allocator) case.
 
-! RUN: %flang_fc1 -emit-llvm -fopenmp %s -o - | FileCheck %s
+! RUN: %flang_fc1 -emit-hlfir -fopenmp %s -o - | FileCheck %s
 
 program main
   integer :: x, y
   !$omp allocate(x, y)
 end program
 
-! CHECK: define void @_QQmain()
-! CHECK: call i32 @__kmpc_global_thread_num(
-! CHECK: call ptr @__kmpc_alloc(i32 {{.*}}, i64 8, ptr null)
-! CHECK: call ptr @__kmpc_alloc(i32 {{.*}}, i64 8, ptr null)
-! CHECK: call void @__kmpc_free(i32 {{.*}}, ptr {{.*}}, ptr null)
-! CHECK: call void @__kmpc_free(i32 {{.*}}, ptr {{.*}}, ptr null)
-! CHECK: ret void
-! CHECK: declare noalias ptr @__kmpc_alloc(i32, i64, ptr)
-! CHECK: declare void @__kmpc_free(i32, ptr, ptr)
+! CHECK: %[[X_ALLOC:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFEx"}
+! CHECK: %[[X_DECL:.*]]:2 = hlfir.declare %[[X_ALLOC]] {uniq_name = "_QFEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[Y_ALLOC:.*]] = fir.alloca i32 {bindc_name = "y", uniq_name = "_QFEy"}
+! CHECK: %[[Y_DECL:.*]]:2 = hlfir.declare %[[Y_ALLOC]] {uniq_name = "_QFEy"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: omp.allocate_dir(%[[X_DECL]]#0, %[[Y_DECL]]#0 : !fir.ref<i32>, !fir.ref<i32>)
diff --git a/mlir/include/mlir/Target/LLVMIR/LLVMTranslationDialectInterface.td b/mlir/include/mlir/Target/LLVMIR/LLVMTranslationDialectInterface.td
index 6d8c7174bd2e3..b1e7f25b44c40 100644
--- a/mlir/include/mlir/Target/LLVMIR/LLVMTranslationDialectInterface.td
+++ b/mlir/include/mlir/Target/LLVMIR/LLVMTranslationDialectInterface.td
@@ -55,6 +55,16 @@ def LLVMTranslationDialectInterface : DialectInterface<"LLVMTranslationDialectIn
       [{
         return ::llvm::success();
       }]
+    >,
+    InterfaceMethod<[{
+        Hook called just before a block's terminator operation is translated.
+        Dialects can override this to inject IR that must appear at the end of
+        a basic block.
+      }],
+      "void", "preTranslateTerminator",
+      (ins "::mlir::Block &":$block, "::llvm::IRBuilderBase &":$builder,
+           "::mlir::LLVM::ModuleTranslation &":$moduleTranslation),
+      [{ }]
     >
   ];
 }
diff --git a/mlir/include/mlir/Target/LLVMIR/LLVMTranslationInterface.h b/mlir/include/mlir/Target/LLVMIR/LLVMTranslationInterface.h
index 58d3ee0ed2139..5bc4aa9a4e7e9 100644
--- a/mlir/include/mlir/Target/LLVMIR/LLVMTranslationInterface.h
+++ b/mlir/include/mlir/Target/LLVMIR/LLVMTranslationInterface.h
@@ -79,6 +79,17 @@ class LLVMTranslationInterface
                          attribute.getName().str() + "'");
     return success();
   }
+
+  /// Calls the `preTranslateTerminator` hook on every registered dialect
+  /// interface. This is broadcast to all interfaces because any dialect may
+  /// have registered deferred work for the given block, independent of which
+  /// dialect owns the terminator.
+  virtual void
+  preTranslateTerminator(Block &block, llvm::IRBuilderBase &builder,
+                         LLVM::ModuleTranslation &moduleTranslation) const {
+    for (const LLVMTranslationDialectInterface &iface : *this)
+      iface.preTranslateTerminator(block, builder, moduleTranslation);
+  }
 };
 
 } // namespace mlir
diff --git a/mlir/include/mlir/Target/LLVMIR/ModuleTranslation.h b/mlir/include/mlir/Target/LLVMIR/ModuleTranslation.h
index 243cca8831e37..c67bb57985bd0 100644
--- a/mlir/include/mlir/Target/LLVMIR/ModuleTranslation.h
+++ b/mlir/include/mlir/Target/LLVMIR/ModuleTranslation.h
@@ -272,11 +272,6 @@ class ModuleTranslation {
   /// constructed.
   llvm::OpenMPIRBuilder *getOpenMPBuilder();
 
-  /// Registers a pending __kmpc_free call for the given block. These are
-  /// emitted before the block's terminator during block conversion.
-  void registerPendingOmpAllocateFree(Block *block, llvm::Value *ptr,
-                                      llvm::Value *allocator);
-
   /// Returns the LLVM module in which the IR is being constructed.
   llvm::Module *getLLVMModule() { return llvmModule.get(); }
 
@@ -406,9 +401,6 @@ class ModuleTranslation {
                                  llvm::IRBuilderBase &builder,
                                  bool recordInsertions);
 
-  /// Emits pending __kmpc_free calls for the block, before its terminator.
-  void emitPendingOmpAllocateFrees(Block &bb, llvm::IRBuilderBase &builder);
-
   /// Returns the LLVM metadata corresponding to the given mlir LLVM dialect
   /// TBAATagAttr.
   llvm::MDNode *getTBAANode(TBAATagAttr tbaaAttr) const;
@@ -517,10 +509,6 @@ class ModuleTranslation {
   /// block.
   DenseMap<BlockAddressAttr, llvm::BasicBlock *> blockAddressToLLVMMapping;
 
-  /// Pending __kmpc_free calls per block, emitted before the terminator.
-  DenseMap<Block *, llvm::SmallVector<std::pair<llvm::Value *, llvm::Value *>>>
-      pendingOmpAllocateFrees;
-
   /// Stack of user-specified state elements, useful when translating operations
   /// with regions.
   StateStack stack;
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index 5f5b1150f9588..a2ae28990e327 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -4636,7 +4636,11 @@ static Value getBaseValueForTypeLookup(Value value) {
   while (Operation *op = value.getDefiningOp()) {
     if (auto addrCast = dyn_cast_if_present<LLVM::AddrSpaceCastOp>(op))
       value = addrCast.getOperand();
-    else if (op->getName().getIdentifier()) {
+    // Traces through hlfir.declare, fir.declare to reach the base address and
+    // use for type lookup.
+    else if (op->getName().getIdentifier() &&
+             (op->getName().getIdentifier().str() == "hlfir.declare" ||
+              op->getName().getIdentifier().str() == "fir.declare")) {
       if (op->getNumOperands() > 0)
         value = op->getOperand(0);
       else
@@ -7313,10 +7317,50 @@ class OpenMPDialectLLVMIRTranslationInterface
   amendOperation(Operation *op, ArrayRef<llvm::Instruction *> instructions,
                  NamedAttribute attribute,
                  LLVM::ModuleTranslation &moduleTranslation) const final;
+
+  /// Emits pending __kmpc_free calls just before the block's terminator.
+  void preTranslateTerminator(
+      Block &block, llvm::IRBuilderBase &builder,
+      LLVM::ModuleTranslation &moduleTranslation) const final;
+
+  /// Registers a deferred __kmpc_free call to be emitted before the
+  /// terminator of the given block.
+  void registerPendingOmpAllocateFree(Block *block, llvm::Value *ptr,
+                                      llvm::Value *allocator) const {
+    pendingOmpAllocateFrees[block].push_back({ptr, allocator});
+  }
+
+private:
+  /// Pending __kmpc_free calls per block, emitted via preTranslateTerminator.
+  mutable DenseMap<Block *,
+                   llvm::SmallVector<std::pair<llvm::Value *, llvm::Value *>>>
+      pendingOmpAllocateFrees;
 };
 
 } // namespace
 
+void OpenMPDialectLLVMIRTranslationInterface::preTranslateTerminator(
+    Block &block, llvm::IRBuilderBase &builder,
+    LLVM::ModuleTranslation &moduleTranslation) const {
+  auto it = pendingOmpAllocateFrees.find(&block);
+  if (it == pendingOmpAllocateFrees.end() || it->second.empty())
+    return;
+  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
+  if (!ompBuilder)
+    return;
+  llvm::BasicBlock *llvmBB = moduleTranslation.lookupBlock(&block);
+  if (!llvmBB)
+    return;
+  if (!llvmBB->empty() && llvmBB->back().isTerminator())
+    builder.SetInsertPoint(&llvmBB->back());
+  else
+    builder.SetInsertPoint(llvmBB);
+  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
+  for (auto it2 = it->second.rbegin(); it2 != it->second.rend(); ++it2)
+    ompBuilder->createOMPFree(ompLoc, it2->first, it2->second, "");
+  pendingOmpAllocateFrees.erase(it);
+}
+
 LogicalResult OpenMPDialectLLVMIRTranslationInterface::amendOperation(
     Operation *op, ArrayRef<llvm::Instruction *> instructions,
     NamedAttribute attribute,
@@ -7492,7 +7536,8 @@ convertTargetAllocMemOp(Operation &opInst, llvm::IRBuilderBase &builder,
 
 static LogicalResult
 convertAllocateDirOp(Operation &opInst, llvm::IRBuilderBase &builder,
-                     LLVM::ModuleTranslation &moduleTranslation) {
+                     LLVM::ModuleTranslation &moduleTranslation,
+                     const OpenMPDialectLLVMIRTranslationInterface &ompIface) {
   auto allocateDirOp = cast<omp::AllocateDirOp>(opInst);
   llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
 
@@ -7570,11 +7615,11 @@ convertAllocateDirOp(Operation &opInst, llvm::IRBuilderBase &builder,
     allocatedVars.push_back({allocCall, allocator});
   }
 
-  // Register __kmpc_free calls to be emitted before the block terminator.
+  // Register __kmpc_free calls to be emitted before the block terminator via
+  // preTranslateTerminator()
   Block *block = allocateDirOp->getBlock();
   for (auto &alloc : allocatedVars)
-    moduleTranslation.registerPendingOmpAllocateFree(block, alloc.first,
-                                                     alloc.second);
+    ompIface.registerPendingOmpAllocateFree(block, alloc.first, alloc.second);
 
   return success();
 }
@@ -7825,7 +7870,7 @@ LogicalResult OpenMPDialectLLVMIRTranslationInterface::convertOperation(
             return convertTargetFreeMemOp(*op, builder, moduleTranslation);
           })
           .Case([&](omp::AllocateDirOp) {
-            return convertAllocateDirOp(*op, builder, moduleTranslation);
+            return convertAllocateDirOp(*op, builder, moduleTranslation, *this);
           })
           .Default([&](Operation *inst) {
             return inst->emitError()
diff --git a/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp b/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp
index 2f0345b67ea6b..699544d0565cb 100644
--- a/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp
@@ -1009,30 +1009,6 @@ LogicalResult ModuleTranslation::convertOperation(Operation &op,
   return convertDialectAttributes(&op, scope.getCapturedInstructions());
 }
 
-void ModuleTranslation::registerPendingOmpAllocateFree(Block *block,
-                                                       llvm::Value *ptr,
-                                                       llvm::Value *allocator) {
-  pendingOmpAllocateFrees[block].push_back({ptr, allocator});
-}
-
-void ModuleTranslation::emitPendingOmpAllocateFrees(
-    Block &bb, llvm::IRBuilderBase &builder) {
-  auto it = pendingOmpAllocateFrees.find(&bb);
-  if (it == pendingOmpAllocateFrees.end() || it->second.empty())
-    return;
-  llvm::OpenMPIRBuilder *ompBuilder = getOpenMPBuilder();
-  llvm::BasicBlock *llvmBB = lookupBlock(&bb);
-  llvm::Instruction *term = llvmBB->getTerminator();
-  if (term)
-    builder.SetInsertPoint(term);
-  else
-    builder.SetInsertPoint(llvmBB);
-  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
-  for (auto it2 = it->second.rbegin(); it2 != it->second.rend(); ++it2)
-    ompBuilder->createOMPFree(ompLoc, it2->first, it2->second, "");
-  pendingOmpAllocateFrees.erase(it);
-}
-
 /// Convert block to LLVM IR.  Unless `ignoreArguments` is set, emit PHI nodes
 /// to define values corresponding to the MLIR block arguments.  These nodes
 /// are not connected to the source basic blocks, which may not exist yet.  Uses
@@ -1072,9 +1048,10 @@ LogicalResult ModuleTranslation::convertBlockImpl(Block &bb,
 
   // Traverse operations.
   for (auto &op : bb) {
-    // Emit pending OpenMP allocate frees before the terminator.
+    // Give registered dialect interfaces a chance to inject IR before the
+    // terminator.
     if (op.hasTrait<OpTrait::IsTerminator>())
-      emitPendingOmpAllocateFrees(bb, builder);
+      iface.preTranslateTerminator(bb, builder, *this);
 
     // Set the current debug location within the builder.
     builder.SetCurrentDebugLocation(
diff --git a/mlir/test/Target/LLVMIR/openmp-allocate-directive.mlir b/mlir/test/Target/LLVMIR/openmp-allocate-directive.mlir
new file mode 100644
index 0000000000000..1c05b20a83a61
--- /dev/null
+++ b/mlir/test/Target/LLVMIR/openmp-allocate-directive.mlir
@@ -0,0 +1,109 @@
+// Tests for translation of omp.allocate_dir operations to LLVM IR,
+// covering all combinations of align and allocator clauses.
+
+// RUN: mlir-translate -mlir-to-llvmir -split-input-file %s | FileCheck %s
+
+// -----
+
+// CHECK-LABEL: define void @test_allocate_default
+// CHECK-SAME: (ptr %[[ARG0:.*]]) {
+// CHECK:   %[[TID:.*]] = call i32 @__kmpc_global_thread_num(
+// CHECK:   %[[ALLOC:.*]] = call ptr @__kmpc_alloc(i32 %[[TID]], i64 8, ptr null)
+// CHECK:   %[[TID_FREE:.*]] = call i32 @__kmpc_global_thread_num(
+// CHECK:   call void @__kmpc_free(i32 %[[TID_FREE]], ptr %[[ALLOC]], ptr null)
+// CHECK:   ret void
+// CHECK: }
+// CHECK: declare noalias ptr @__kmpc_alloc(i32, i64, ptr)
+// CHECK: declare void @__kmpc_free(i32, ptr, ptr)
+llvm.func @test_allocate_default(%arg0: !llvm.ptr) {
+  omp.allocate_dir (%arg0 : !llvm.ptr)
+  llvm.return
+}
+
+// -----
+
+// CHECK-LABEL: define void @test_allocate_align_only
+// CHECK:   %[[TID:.*]] = call i32 @__kmpc_global_thread_num(
+// CHECK:   %[[ALLOC:.*]] = call ptr @__kmpc_aligned_alloc(i32 %[[TID]], i64 16, i64 16, ptr null)
+// CHECK:   %[[TID_FREE:.*]] = call i32 @__kmpc_global_thread_num(
+// CHECK:   call void @__kmpc_free(i32 %[[TID_FREE]], ptr %[[ALLOC]], ptr null)
+// CHECK:   ret void
+// CHECK: declare noalias ptr @__kmpc_aligned_alloc(i32, i64, i64, ptr)
+llvm.func @test_allocate_align_only(%arg0: !llvm.ptr) {
+  omp.allocate_dir (%arg0 : !llvm.ptr) align(16)
+  llvm.return
+}
+
+// -----
+
+// CHECK-LABEL: define void @test_allocate_allocator_only
+// CHECK:   %[[TID:.*]] = call i32 @__kmpc_global_thread_num(
+// CHECK:   %[[ALLOC:.*]] = call ptr @__kmpc_alloc(i32 %[[TID]], i64 8, ptr inttoptr (i32 1 to ptr))
+// CHECK:   %[[TID_FREE:.*]] = call i32 @__kmpc_global_thread_num(
+// CHECK:   call void @__kmpc_free(i32 %[[TID_FREE]], ptr %[[ALLOC]], ptr inttoptr (i32 1 to ptr))
+// CHECK:   ret void
+llvm.func @test_allocate_allocator_only(%arg0: !llvm.ptr) {
+  %alloc1 = llvm.mlir.constant(1 : i32) : i32
+  omp.allocate_dir (%arg0 : !llvm.ptr) allocator(%alloc1 : i32)
+  llvm.return
+}
+
+// -----
+
+// CHECK-LABEL: define void @test_allocate_align_and_allocator
+// CHECK:   %[[TID:.*]] = call i32 @__kmpc_global_thread_num(
+// CHECK:   %[[ALLOC:.*]] = call ptr @__kmpc_aligned_alloc(i32 %[[TID]], i64 64, i64 64, ptr inttoptr (i32 6 to ptr))
+// CHECK:   %[[TID_FREE:.*]] = call i32 @__kmpc_global_thread_num(
+// CHECK:   call void @__kmpc_free(i32 %[[TID_FREE]], ptr %[[ALLOC]], ptr inttoptr (i32 6 to ptr))
+// CHECK:   ret void
+llvm.func @test_allocate_align_and_allocator(%arg0: !llvm.ptr) {
+  %alloc6 = llvm.mlir.constant(6 : i32) : i32
+  omp.allocate_dir (%arg0 : !llvm.ptr) align(64) allocator(%alloc6 : i32)
+  llvm.return
+}
+
+// -----
+
+// Verifies that multiple variables each get their own __kmpc_aligned_alloc call
+// and that __kmpc_free calls are emitted in reverse allocation order.
+//
+// CHECK-LABEL: define void @test_allocate_multiple_vars
+// CHECK:   %[[TID0:.*]] = call i32 @__kmpc_global_thread_num(
+// CHECK:   %[[ALLOC0:.*]] = call ptr @__kmpc_aligned_alloc(i32 %[[TID0]], i64 32, i64 32, ptr inttoptr (i32 3 to ptr))
+// CHECK:   %[[TID1:.*]] = call i32 @__kmpc_global_thread_num(
+// CHECK:   %[[ALLOC1:.*]] = call ptr @__kmpc_aligned_alloc(i32 %[[TID1]], i64 32, i64 32, ptr inttoptr (i32 3 to ptr))
+// CHECK:   %[[TID2:.*]] = call i32 @__kmpc_global_thread_num(
+// CHECK:   %[[ALLOC2:.*]] = call ptr @__kmpc_aligned_alloc(i32 %[[TID2]], i64 32, i64 32, ptr inttoptr (i32 3 to ptr))
+// Free order is reversed relative to allocation order.
+// CHECK:   call void @__kmpc_free({{.*}}, ptr %[[ALLOC2]], ptr inttoptr (i32 3 to ptr))
+// CHECK:   call void @__kmpc_free({{.*}}, ptr %[[ALLOC1]], ptr inttoptr (i32 3 to ptr))
+// CHECK:   call void @__kmpc_free({{.*}}, ptr %[[ALLOC0]], ptr inttoptr (i32 3 to ptr))
+// CHECK:   ret void
+llvm.func @test_allocate_multiple_vars(%arg0: !llvm.ptr, %arg1: !llvm.ptr, %arg2: !llvm.ptr) {
+  %alloc3 = llvm.mlir.constant(3 : i32) : i32
+  omp.allocate_dir (%arg0, %arg1, %arg2 : !llvm.ptr, !llvm.ptr, !llvm.ptr) align(32) allocator(%alloc3 : i32)
+  llvm.return
+}
+
+// -----
+
+// Verifies that array size is correctly calculated from the global's element
+// type: [10 x i32] = 40 bytes, rounded up to alignment 64 => 64 bytes.
+//
+// CHECK-LABEL: define void @test_allocate_array_global
+// CHECK:   %[[TID:.*]] = call i32 @__kmpc_global_thread_num(
+// CHECK:   %[[ALLOC:.*]] = call ptr @__kmpc_aligned_alloc(i32 %[[TID]], i64 64, i64 64, ptr inttoptr (i32 6 to ptr))
+// CHECK:   %[[TID_FREE:.*]] = call i32 @__kmpc_global_thread_num(
+// CHECK:   call void @__kmpc_free(i32 %[[TID_FREE]], ptr %[[ALLOC]], ptr inttoptr (i32 6 to ptr))
+// CHECK:   ret void
+llvm.mlir.global internal @arr_global() : !llvm.array<10 x i32> {
+  %0 = llvm.mlir.zero : !llvm.array<10 x i32>
+  llvm.return %0 : !llvm.array<10 x i32>
+}
+
+llvm.func @test_allocate_array_global() {
+  %z = llvm.mlir.addressof @arr_global : !llvm.ptr
+  %alloc6 = llvm.mlir.constant(6 : i32) : i32
+  omp.allocate_dir (%z : !llvm.ptr) align(64) allocator(%alloc6 : i32)
+  llvm.return
+}

>From d6d0a1b301cee0434229159465af7a06db974d7a Mon Sep 17 00:00:00 2001
From: Raghu Maddhipatla <Raghu.Maddhipatla at amd.com>
Date: Fri, 10 Apr 2026 17:02:58 -0500
Subject: [PATCH 5/6] Handling the matching kmpc_free call generation of an OMP
 ALLOCATE variable in Fortran frontend.

---
 flang/lib/Lower/OpenMP/OpenMP.cpp             | 16 +++-
 .../OpenMP/omp-declarative-allocate-align.f90 |  6 ++
 .../Lower/OpenMP/omp-declarative-allocate.f90 |  3 +
 mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td | 20 ++++
 .../LLVMIR/LLVMTranslationDialectInterface.td | 10 --
 .../Target/LLVMIR/LLVMTranslationInterface.h  | 11 ---
 .../OpenMP/OpenMPToLLVMIRTranslation.cpp      | 95 ++++++++++---------
 mlir/lib/Target/LLVMIR/ModuleTranslation.cpp  |  5 -
 mlir/test/Dialect/OpenMP/ops.mlir             | 24 +++++
 .../LLVMIR/openmp-allocate-directive.mlir     | 16 +++-
 10 files changed, 130 insertions(+), 76 deletions(-)

diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp
index 3cc343925d8fa..920fdfaafdf0d 100644
--- a/flang/lib/Lower/OpenMP/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP/OpenMP.cpp
@@ -1952,9 +1952,19 @@ static mlir::omp::AllocateDirOp genAllocateDirOp(
   genAllocateClauses(converter, semaCtx, stmtCtx, objects, item->clauses, loc,
                      operandRange, clauseOps);
 
-  return mlir::omp::AllocateDirOp::create(converter.getFirOpBuilder(), loc,
-                                          operandRange, clauseOps.align,
-                                          clauseOps.allocator);
+  auto allocDirOp = mlir::omp::AllocateDirOp::create(
+      converter.getFirOpBuilder(), loc, operandRange, clauseOps.align,
+      clauseOps.allocator);
+
+  // Register a cleanup at the Fortran scope exit.
+  fir::FirOpBuilder *builder = &converter.getFirOpBuilder();
+  mlir::Value allocator = clauseOps.allocator;
+  converter.getFctCtx().attachCleanup([builder, loc, operandRange,
+                                       allocator]() {
+    mlir::omp::AllocateFreeOp::create(*builder, loc, operandRange, allocator);
+  });
+
+  return allocDirOp;
 }
 
 static mlir::omp::BarrierOp
diff --git a/flang/test/Lower/OpenMP/omp-declarative-allocate-align.f90 b/flang/test/Lower/OpenMP/omp-declarative-allocate-align.f90
index 0824d8bcb7e90..fdcc4ac1fef20 100644
--- a/flang/test/Lower/OpenMP/omp-declarative-allocate-align.f90
+++ b/flang/test/Lower/OpenMP/omp-declarative-allocate-align.f90
@@ -4,6 +4,7 @@
 !   - allocator(1) only (no align)
 !   - align(64) allocator(6) (both clauses, array variable)
 !   - align(32) allocator(3) (both clauses, multiple variables)
+! Each omp.allocate_dir must be paired with a matching omp.allocate_free
 
 ! RUN: %flang_fc1 -emit-hlfir %openmp_flags -fopenmp-version=51 %s -o - 2>&1 | FileCheck %s
 
@@ -43,3 +44,8 @@ program main
 ! CHECK: omp.allocate_dir(%[[Z_DECL]]#0 : !fir.ref<!fir.array<10xi32>>) align(64) allocator(%[[ALLOC6]] : i32)
 ! CHECK: %[[ALLOC3:.*]] = arith.constant 3 : i32
 ! CHECK: omp.allocate_dir(%[[C_DECL]]#0, %[[R_DECL]]#0, %[[CMPLX_DECL]]#0 : !fir.ref<!fir.char<1>>, !fir.ref<f32>, !fir.ref<complex<f32>>) align(32) allocator(%[[ALLOC3]] : i32)
+! CHECK: omp.allocate_free(%[[C_DECL]]#0, %[[R_DECL]]#0, %[[CMPLX_DECL]]#0 : !fir.ref<!fir.char<1>>, !fir.ref<f32>, !fir.ref<complex<f32>>) allocator(%[[ALLOC3]] : i32)
+! CHECK: omp.allocate_free(%[[Z_DECL]]#0 : !fir.ref<!fir.array<10xi32>>) allocator(%[[ALLOC6]] : i32)
+! CHECK: omp.allocate_free(%[[Y_DECL]]#0 : !fir.ref<i32>) allocator(%[[ALLOC1]] : i32)
+! CHECK: omp.allocate_free(%[[X_DECL]]#0 : !fir.ref<i32>)
+! CHECK: return
diff --git a/flang/test/Lower/OpenMP/omp-declarative-allocate.f90 b/flang/test/Lower/OpenMP/omp-declarative-allocate.f90
index 69da3f52b459f..77f211ccf0aeb 100644
--- a/flang/test/Lower/OpenMP/omp-declarative-allocate.f90
+++ b/flang/test/Lower/OpenMP/omp-declarative-allocate.f90
@@ -1,5 +1,6 @@
 ! This test checks lowering of OpenMP allocate Directive to HLFIR.
 ! Verifies code generation for default (no align, null allocator) case.
+! omp.allocate_free must be emitted at the exit (before return).
 
 ! RUN: %flang_fc1 -emit-hlfir -fopenmp %s -o - | FileCheck %s
 
@@ -13,3 +14,5 @@ program main
 ! CHECK: %[[Y_ALLOC:.*]] = fir.alloca i32 {bindc_name = "y", uniq_name = "_QFEy"}
 ! CHECK: %[[Y_DECL:.*]]:2 = hlfir.declare %[[Y_ALLOC]] {uniq_name = "_QFEy"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK: omp.allocate_dir(%[[X_DECL]]#0, %[[Y_DECL]]#0 : !fir.ref<i32>, !fir.ref<i32>)
+! CHECK: omp.allocate_free(%[[X_DECL]]#0, %[[Y_DECL]]#0 : !fir.ref<i32>, !fir.ref<i32>)
+! CHECK: return
diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
index 40ec8904a136f..00b58eb2b8c1c 100644
--- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
+++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
@@ -2217,6 +2217,26 @@ def AllocateDirOp : OpenMP_Op<"allocate_dir", [AttrSizedOperandSegments], clause
   let hasVerifier = 1;
 }
 
+//===----------------------------------------------------------------------===//
+// AllocateFreeOp
+//===----------------------------------------------------------------------===//
+
+def AllocateFreeOp : OpenMP_Op<"allocate_free", [AttrSizedOperandSegments],
+    clauses = [OpenMP_AllocatorClause]> {
+  let summary = "free-op paired with allocate directive";
+  let description = [{
+    At the end of the scope each list item allocated using allocate directive
+    should be deallocated(using this free operation).
+  }] # clausesDescription;
+
+  let arguments = !con((ins Variadic<AnyType>:$varList),
+                       clausesArgs);
+
+  let assemblyFormat = " `(` $varList `:` type($varList) `)` oilist(" #
+                       clausesOptAssemblyFormat #
+                       ") attr-dict ";
+}
+
 //===----------------------------------------------------------------------===//
 // TargetAllocMemOp
 //===----------------------------------------------------------------------===//
diff --git a/mlir/include/mlir/Target/LLVMIR/LLVMTranslationDialectInterface.td b/mlir/include/mlir/Target/LLVMIR/LLVMTranslationDialectInterface.td
index b1e7f25b44c40..01c1b3a3cfaa3 100644
--- a/mlir/include/mlir/Target/LLVMIR/LLVMTranslationDialectInterface.td
+++ b/mlir/include/mlir/Target/LLVMIR/LLVMTranslationDialectInterface.td
@@ -56,16 +56,6 @@ def LLVMTranslationDialectInterface : DialectInterface<"LLVMTranslationDialectIn
         return ::llvm::success();
       }]
     >,
-    InterfaceMethod<[{
-        Hook called just before a block's terminator operation is translated.
-        Dialects can override this to inject IR that must appear at the end of
-        a basic block.
-      }],
-      "void", "preTranslateTerminator",
-      (ins "::mlir::Block &":$block, "::llvm::IRBuilderBase &":$builder,
-           "::mlir::LLVM::ModuleTranslation &":$moduleTranslation),
-      [{ }]
-    >
   ];
 }
 
diff --git a/mlir/include/mlir/Target/LLVMIR/LLVMTranslationInterface.h b/mlir/include/mlir/Target/LLVMIR/LLVMTranslationInterface.h
index 5bc4aa9a4e7e9..58d3ee0ed2139 100644
--- a/mlir/include/mlir/Target/LLVMIR/LLVMTranslationInterface.h
+++ b/mlir/include/mlir/Target/LLVMIR/LLVMTranslationInterface.h
@@ -79,17 +79,6 @@ class LLVMTranslationInterface
                          attribute.getName().str() + "'");
     return success();
   }
-
-  /// Calls the `preTranslateTerminator` hook on every registered dialect
-  /// interface. This is broadcast to all interfaces because any dialect may
-  /// have registered deferred work for the given block, independent of which
-  /// dialect owns the terminator.
-  virtual void
-  preTranslateTerminator(Block &block, llvm::IRBuilderBase &builder,
-                         LLVM::ModuleTranslation &moduleTranslation) const {
-    for (const LLVMTranslationDialectInterface &iface : *this)
-      iface.preTranslateTerminator(block, builder, moduleTranslation);
-  }
 };
 
 } // namespace mlir
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index a2ae28990e327..0e78cfd3733c0 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -7318,49 +7318,29 @@ class OpenMPDialectLLVMIRTranslationInterface
                  NamedAttribute attribute,
                  LLVM::ModuleTranslation &moduleTranslation) const final;
 
-  /// Emits pending __kmpc_free calls just before the block's terminator.
-  void preTranslateTerminator(
-      Block &block, llvm::IRBuilderBase &builder,
-      LLVM::ModuleTranslation &moduleTranslation) const final;
+  /// Records the LLVM alloc pointer produced for an OMP ALLOCATE variable so
+  /// that the paired omp.allocate_free op can generate the matching
+  /// __kmpc_free call.
+  void registerAllocatedPtr(Value var, llvm::Value *ptr) const {
+    ompAllocatedPtrs[var] = ptr;
+  }
 
-  /// Registers a deferred __kmpc_free call to be emitted before the
-  /// terminator of the given block.
-  void registerPendingOmpAllocateFree(Block *block, llvm::Value *ptr,
-                                      llvm::Value *allocator) const {
-    pendingOmpAllocateFrees[block].push_back({ptr, allocator});
+  /// Returns the LLVM alloc pointer previously registered for var, or
+  /// nullptr if no allocation was recorded.
+  llvm::Value *lookupAllocatedPtr(Value var) const {
+    auto it = ompAllocatedPtrs.find(var);
+    return it != ompAllocatedPtrs.end() ? it->second : nullptr;
   }
 
 private:
-  /// Pending __kmpc_free calls per block, emitted via preTranslateTerminator.
-  mutable DenseMap<Block *,
-                   llvm::SmallVector<std::pair<llvm::Value *, llvm::Value *>>>
-      pendingOmpAllocateFrees;
+  /// Maps each MLIR variable value that appeared in an omp.allocate_dir op to
+  /// the LLVM pointer returned by the corresponding __kmpc_alloc call.  The
+  /// paired omp.allocate_free op looks up these pointers to emit __kmpc_free.
+  mutable DenseMap<Value, llvm::Value *> ompAllocatedPtrs;
 };
 
 } // namespace
 
-void OpenMPDialectLLVMIRTranslationInterface::preTranslateTerminator(
-    Block &block, llvm::IRBuilderBase &builder,
-    LLVM::ModuleTranslation &moduleTranslation) const {
-  auto it = pendingOmpAllocateFrees.find(&block);
-  if (it == pendingOmpAllocateFrees.end() || it->second.empty())
-    return;
-  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
-  if (!ompBuilder)
-    return;
-  llvm::BasicBlock *llvmBB = moduleTranslation.lookupBlock(&block);
-  if (!llvmBB)
-    return;
-  if (!llvmBB->empty() && llvmBB->back().isTerminator())
-    builder.SetInsertPoint(&llvmBB->back());
-  else
-    builder.SetInsertPoint(llvmBB);
-  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
-  for (auto it2 = it->second.rbegin(); it2 != it->second.rend(); ++it2)
-    ompBuilder->createOMPFree(ompLoc, it2->first, it2->second, "");
-  pendingOmpAllocateFrees.erase(it);
-}
-
 LogicalResult OpenMPDialectLLVMIRTranslationInterface::amendOperation(
     Operation *op, ArrayRef<llvm::Instruction *> instructions,
     NamedAttribute attribute,
@@ -7559,8 +7539,6 @@ convertAllocateDirOp(Operation &opInst, llvm::IRBuilderBase &builder,
     allocator = llvm::ConstantPointerNull::get(builder.getPtrTy());
   }
 
-  SmallVector<std::pair<llvm::CallInst *, llvm::Value *>> allocatedVars;
-
   for (Value var : vars) {
     llvm::Type *llvmVarTy = moduleTranslation.convertType(var.getType());
 
@@ -7612,14 +7590,41 @@ convertAllocateDirOp(Operation &opInst, llvm::IRBuilderBase &builder,
       allocCall =
           ompBuilder->createOMPAlloc(ompLoc, size, allocator, allocName);
     }
-    allocatedVars.push_back({allocCall, allocator});
+    // Record the alloc pointer keyed by the MLIR variable value.
+    ompIface.registerAllocatedPtr(var, allocCall);
+  }
+
+  return success();
+}
+
+static LogicalResult
+convertAllocateFreeOp(Operation &opInst, llvm::IRBuilderBase &builder,
+                      LLVM::ModuleTranslation &moduleTranslation,
+                      const OpenMPDialectLLVMIRTranslationInterface &ompIface) {
+  auto freeOp = cast<omp::AllocateFreeOp>(opInst);
+  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
+  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
+
+  llvm::Value *allocator;
+  if (auto allocatorVar = freeOp.getAllocator()) {
+    allocator = moduleTranslation.lookupValue(allocatorVar);
+    if (allocator->getType()->isIntegerTy())
+      allocator = builder.CreateIntToPtr(allocator, builder.getPtrTy());
+    else if (allocator->getType()->isPointerTy())
+      allocator = builder.CreatePointerBitCastOrAddrSpaceCast(
+          allocator, builder.getPtrTy());
+  } else {
+    allocator = llvm::ConstantPointerNull::get(builder.getPtrTy());
   }
 
-  // Register __kmpc_free calls to be emitted before the block terminator via
-  // preTranslateTerminator()
-  Block *block = allocateDirOp->getBlock();
-  for (auto &alloc : allocatedVars)
-    ompIface.registerPendingOmpAllocateFree(block, alloc.first, alloc.second);
+  // Emit __kmpc_free for each variable in reverse allocation order.
+  SmallVector<Value> vars = freeOp.getVarList();
+  for (Value var : llvm::reverse(vars)) {
+    llvm::Value *allocPtr = ompIface.lookupAllocatedPtr(var);
+    if (!allocPtr)
+      return opInst.emitError("omp.allocate_free: no allocation recorded");
+    ompBuilder->createOMPFree(ompLoc, allocPtr, allocator, "");
+  }
 
   return success();
 }
@@ -7872,6 +7877,10 @@ LogicalResult OpenMPDialectLLVMIRTranslationInterface::convertOperation(
           .Case([&](omp::AllocateDirOp) {
             return convertAllocateDirOp(*op, builder, moduleTranslation, *this);
           })
+          .Case([&](omp::AllocateFreeOp) {
+            return convertAllocateFreeOp(*op, builder, moduleTranslation,
+                                         *this);
+          })
           .Default([&](Operation *inst) {
             return inst->emitError()
                    << "not yet implemented: " << inst->getName();
diff --git a/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp b/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp
index 699544d0565cb..cf398f151ed0b 100644
--- a/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp
@@ -1048,11 +1048,6 @@ LogicalResult ModuleTranslation::convertBlockImpl(Block &bb,
 
   // Traverse operations.
   for (auto &op : bb) {
-    // Give registered dialect interfaces a chance to inject IR before the
-    // terminator.
-    if (op.hasTrait<OpTrait::IsTerminator>())
-      iface.preTranslateTerminator(bb, builder, *this);
-
     // Set the current debug location within the builder.
     builder.SetCurrentDebugLocation(
         debugTranslation->translateLoc(op.getLoc(), subprogram));
diff --git a/mlir/test/Dialect/OpenMP/ops.mlir b/mlir/test/Dialect/OpenMP/ops.mlir
index 3d1133f4ba6e9..5fbc6f5fced67 100644
--- a/mlir/test/Dialect/OpenMP/ops.mlir
+++ b/mlir/test/Dialect/OpenMP/ops.mlir
@@ -3491,6 +3491,30 @@ func.func @omp_allocate_dir(%arg0 : memref<i32>, %arg1 : memref<i32>) -> () {
   return
 }
 
+// CHECK-LABEL: func.func @omp_allocate_free(
+// CHECK-SAME: %[[ARG0:.*]]: memref<i32>,
+// CHECK-SAME: %[[ARG1:.*]]: memref<i32>) {
+func.func @omp_allocate_free(%arg0 : memref<i32>, %arg1 : memref<i32>) -> () {
+
+  // Test free with no allocator
+  // CHECK: omp.allocate_free(%[[ARG0]] : memref<i32>)
+  omp.allocate_free (%arg0 : memref<i32>)
+
+  // Test free with allocator clause
+  // CHECK: %[[VAL_1:.*]] = arith.constant 1 : i64
+  %omp_default_mem_alloc = arith.constant 1 : i64
+  // CHECK: omp.allocate_free(%[[ARG0]] : memref<i32>) allocator(%[[VAL_1:.*]] : i64)
+  omp.allocate_free (%arg0 : memref<i32>) allocator(%omp_default_mem_alloc : i64)
+
+  // Test free with two variables and allocator clause
+  // CHECK: %[[VAL_3:.*]] = arith.constant 6 : i64
+  %omp_cgroup_mem_alloc = arith.constant 6 : i64
+  // CHECK: omp.allocate_free(%[[ARG0]], %[[ARG1]] : memref<i32>, memref<i32>) allocator(%[[VAL_3:.*]] : i64)
+  omp.allocate_free (%arg0, %arg1 : memref<i32>, memref<i32>) allocator(%omp_cgroup_mem_alloc : i64)
+
+  return
+}
+
 // CHECK-LABEL: func.func @omp_workdistribute
 func.func @omp_workdistribute() {
   // CHECK: omp.teams
diff --git a/mlir/test/Target/LLVMIR/openmp-allocate-directive.mlir b/mlir/test/Target/LLVMIR/openmp-allocate-directive.mlir
index 1c05b20a83a61..d8975eb512abe 100644
--- a/mlir/test/Target/LLVMIR/openmp-allocate-directive.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-allocate-directive.mlir
@@ -1,5 +1,7 @@
-// Tests for translation of omp.allocate_dir operations to LLVM IR,
-// covering all combinations of align and allocator clauses.
+// Tests for translation of omp.allocate_dir / omp.allocate_free pairs to
+// LLVM IR, covering all combinations of align and allocator clauses.
+// The frontend is responsible for placing omp.allocate_free at the correct
+// Fortran scope exit; here each function pairs the ops manually.
 
 // RUN: mlir-translate -mlir-to-llvmir -split-input-file %s | FileCheck %s
 
@@ -17,6 +19,7 @@
 // CHECK: declare void @__kmpc_free(i32, ptr, ptr)
 llvm.func @test_allocate_default(%arg0: !llvm.ptr) {
   omp.allocate_dir (%arg0 : !llvm.ptr)
+  omp.allocate_free (%arg0 : !llvm.ptr)
   llvm.return
 }
 
@@ -31,6 +34,7 @@ llvm.func @test_allocate_default(%arg0: !llvm.ptr) {
 // CHECK: declare noalias ptr @__kmpc_aligned_alloc(i32, i64, i64, ptr)
 llvm.func @test_allocate_align_only(%arg0: !llvm.ptr) {
   omp.allocate_dir (%arg0 : !llvm.ptr) align(16)
+  omp.allocate_free (%arg0 : !llvm.ptr)
   llvm.return
 }
 
@@ -45,6 +49,7 @@ llvm.func @test_allocate_align_only(%arg0: !llvm.ptr) {
 llvm.func @test_allocate_allocator_only(%arg0: !llvm.ptr) {
   %alloc1 = llvm.mlir.constant(1 : i32) : i32
   omp.allocate_dir (%arg0 : !llvm.ptr) allocator(%alloc1 : i32)
+  omp.allocate_free (%arg0 : !llvm.ptr) allocator(%alloc1 : i32)
   llvm.return
 }
 
@@ -59,13 +64,14 @@ llvm.func @test_allocate_allocator_only(%arg0: !llvm.ptr) {
 llvm.func @test_allocate_align_and_allocator(%arg0: !llvm.ptr) {
   %alloc6 = llvm.mlir.constant(6 : i32) : i32
   omp.allocate_dir (%arg0 : !llvm.ptr) align(64) allocator(%alloc6 : i32)
+  omp.allocate_free (%arg0 : !llvm.ptr) allocator(%alloc6 : i32)
   llvm.return
 }
 
 // -----
 
-// Verifies that multiple variables each get their own __kmpc_aligned_alloc call
-// and that __kmpc_free calls are emitted in reverse allocation order.
+// Verifies that multiple variables each get their own __kmpc_aligned_alloc
+// call, and that __kmpc_free calls are emitted in reverse allocation order.
 //
 // CHECK-LABEL: define void @test_allocate_multiple_vars
 // CHECK:   %[[TID0:.*]] = call i32 @__kmpc_global_thread_num(
@@ -82,6 +88,7 @@ llvm.func @test_allocate_align_and_allocator(%arg0: !llvm.ptr) {
 llvm.func @test_allocate_multiple_vars(%arg0: !llvm.ptr, %arg1: !llvm.ptr, %arg2: !llvm.ptr) {
   %alloc3 = llvm.mlir.constant(3 : i32) : i32
   omp.allocate_dir (%arg0, %arg1, %arg2 : !llvm.ptr, !llvm.ptr, !llvm.ptr) align(32) allocator(%alloc3 : i32)
+  omp.allocate_free (%arg0, %arg1, %arg2 : !llvm.ptr, !llvm.ptr, !llvm.ptr) allocator(%alloc3 : i32)
   llvm.return
 }
 
@@ -105,5 +112,6 @@ llvm.func @test_allocate_array_global() {
   %z = llvm.mlir.addressof @arr_global : !llvm.ptr
   %alloc6 = llvm.mlir.constant(6 : i32) : i32
   omp.allocate_dir (%z : !llvm.ptr) align(64) allocator(%alloc6 : i32)
+  omp.allocate_free (%z : !llvm.ptr) allocator(%alloc6 : i32)
   llvm.return
 }

>From 3a6d8663aab0bbb5c5a5f79bafd6b15c7a8e5632 Mon Sep 17 00:00:00 2001
From: Raghu Maddhipatla <Raghu.Maddhipatla at amd.com>
Date: Fri, 10 Apr 2026 18:00:02 -0500
Subject: [PATCH 6/6] Added NULL pointer check for updateToLocation()

---
 llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index edd181f7eea2e..875c66aef7366 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -7628,7 +7628,8 @@ CallInst *OpenMPIRBuilder::createOMPAlloc(const LocationDescription &Loc,
                                           Value *Size, Value *Allocator,
                                           std::string Name) {
   IRBuilder<>::InsertPointGuard IPG(Builder);
-  updateToLocation(Loc);
+  if (!updateToLocation(Loc))
+    return nullptr;
 
   uint32_t SrcLocStrSize;
   Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
@@ -7646,7 +7647,8 @@ CallInst *OpenMPIRBuilder::createOMPAlignedAlloc(const LocationDescription &Loc,
                                                  Value *Allocator,
                                                  std::string Name) {
   IRBuilder<>::InsertPointGuard IPG(Builder);
-  updateToLocation(Loc);
+  if (!updateToLocation(Loc))
+    return nullptr;
 
   uint32_t SrcLocStrSize;
   Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
@@ -7663,7 +7665,8 @@ CallInst *OpenMPIRBuilder::createOMPFree(const LocationDescription &Loc,
                                          Value *Addr, Value *Allocator,
                                          std::string Name) {
   IRBuilder<>::InsertPointGuard IPG(Builder);
-  updateToLocation(Loc);
+  if (!updateToLocation(Loc))
+    return nullptr;
 
   uint32_t SrcLocStrSize;
   Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);



More information about the Mlir-commits mailing list